commit dbb2eccad0566caa8c8749f00c1ee88e3998c082 Author: Eportant Date: Sun Sep 8 14:08:46 2024 +0800 提交项目 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c8987d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,165 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# Profiling +*.pclprof + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +.idea +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# VSCode project settings +.vscode/ + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site +mkdocs_github_authors.yaml + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# datasets and projects +datasets/ +runs/ +wandb/ +tests/ +.DS_Store + +# Neural Network weights ----------------------------------------------------------------------------------------------- +weights/ +*.weights +*.pt +*.pb +*.onnx +*.engine +*.mlmodel +*.mlpackage +*.torchscript +*.tflite +*.h5 +*_saved_model/ +*_web_model/ +*_openvino_model/ +*_paddle_model/ +pnnx* + +# Autogenerated files for tests +/ultralytics/assets/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..633b78f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,89 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Pre-commit hooks. For more information see https://github.com/pre-commit/pre-commit-hooks/blob/main/README.md +# Optionally remove from local hooks with 'rm .git/hooks/pre-commit' + +# Define bot property if installed via https://github.com/marketplace/pre-commit-ci +ci: + autofix_prs: true + autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions' + autoupdate_schedule: monthly + submodules: true + +# Exclude directories (optional) +# exclude: 'docs/' + +# Define repos to run +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-case-conflict + # - id: check-yaml + - id: check-docstring-first + - id: double-quote-string-fixer + - id: detect-private-key + + - repo: https://github.com/asottile/pyupgrade + rev: v3.15.0 + hooks: + - id: pyupgrade + name: Upgrade code + + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + name: Sort imports + + - repo: https://github.com/google/yapf + rev: v0.40.2 + hooks: + - id: yapf + name: YAPF formatting + + - repo: https://github.com/executablebooks/mdformat + rev: 0.7.17 + hooks: + - id: mdformat + name: MD formatting + additional_dependencies: + - mdformat-gfm + - mdformat-black + exclude: 'docs/.*\.md' + # exclude: "README.md|README.zh-CN.md|CONTRIBUTING.md" + + - repo: https://github.com/PyCQA/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + name: PEP8 + + - repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + exclude: 'docs/de|docs/fr|docs/pt|docs/es|docs/mkdocs_de.yml' + args: + - --ignore-words-list=crate,nd,strack,dota,ane,segway,fo,gool,winn + + - repo: https://github.com/PyCQA/docformatter + rev: v1.7.5 + hooks: + - id: docformatter + +# - repo: https://github.com/asottile/yesqa +# rev: v1.4.0 +# hooks: +# - id: yesqa + +# - repo: https://github.com/asottile/dead +# rev: v1.5.0 +# hooks: +# - id: dead + +# - repo: https://github.com/ultralytics/pre-commit +# rev: bd60a414f80a53fb8f593d3bfed4701fc47e4b23 +# hooks: +# - id: capitalize-comments diff --git a/0.1.1 b/0.1.1 new file mode 100644 index 0000000..e69de29 diff --git a/Arial.ttf b/Arial.ttf new file mode 100644 index 0000000..ab68fb1 Binary files /dev/null and b/Arial.ttf differ diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..8e85b7a --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,20 @@ +cff-version: 1.2.0 +preferred-citation: + type: software + message: If you use this software, please cite it as below. + authors: + - family-names: Jocher + given-names: Glenn + orcid: "https://orcid.org/0000-0001-5950-6979" + - family-names: Chaurasia + given-names: Ayush + orcid: "https://orcid.org/0000-0002-7603-6750" + - family-names: Qiu + given-names: Jing + orcid: "https://orcid.org/0000-0003-3783-7069" + title: "YOLO by Ultralytics" + version: 8.0.0 + # doi: 10.5281/zenodo.3908559 # TODO + date-released: 2023-1-10 + license: AGPL-3.0 + url: "https://github.com/ultralytics/ultralytics" diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..cd7df3e --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,8 @@ +include *.md +include requirements.txt +include LICENSE +include setup.py +include ultralytics/assets/bus.jpg +include ultralytics/assets/zidane.jpg +include tests/*.py +recursive-include ultralytics *.yaml diff --git a/ceshi.py b/ceshi.py new file mode 100644 index 0000000..04704a3 --- /dev/null +++ b/ceshi.py @@ -0,0 +1,17 @@ +#测试训练集 + +from ultralytics import YOLO + +# 创建 YOLOv8 模型实例,选择适合的配置文件 +model = YOLO('yolov8n.yaml') # YOLOv8 Nano 配置文件(这个是从官网上下载的预训练模型) + +# 配置训练参数 +model.train( + data='data.yaml', # 数据配置文件 + epochs=150, # 训练轮数 + batch=8, # 批量大小(适合 CPU 计算) + imgsz=320, # 图像尺寸(较小尺寸适合 CPU) + device='cpu' # 训练设备设置为 CPU(本人用的是cpu) +) + + diff --git a/data.yaml b/data.yaml new file mode 100644 index 0000000..053ef9b --- /dev/null +++ b/data.yaml @@ -0,0 +1,8 @@ + +train: # 训练集路径 +val: # 验证集路径(如果训练集和验证集相同的话,这样写是可以的) + + +nc: # 类别数 + +names: ['xxxx',‘xxxx’] # 类别名称 diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..e3a32c8 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,82 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Builds ultralytics/ultralytics:latest image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics +# Image is CUDA-optimized for YOLOv8 single/multi-GPU training and inference + +# Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch or nvcr.io/nvidia/pytorch:23.03-py3 +FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime +RUN pip install --no-cache nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com + +# Downloads to user config dir +ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ + +# Install linux packages +# g++ required to build 'tflite_support' and 'lap' packages, libusb-1.0-0 required for 'tflite_support' package +RUN apt update \ + && apt install --no-install-recommends -y gcc git zip curl htop libgl1 libglib2.0-0 libpython3-dev gnupg g++ libusb-1.0-0 + +# Security updates +# https://security.snyk.io/vuln/SNYK-UBUNTU1804-OPENSSL-3314796 +RUN apt upgrade --no-install-recommends -y openssl tar + +# Create working directory +WORKDIR /usr/src/ultralytics + +# Copy contents +# COPY . /usr/src/ultralytics # git permission issues inside container +RUN git clone https://github.com/ultralytics/ultralytics -b main /usr/src/ultralytics +ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt /usr/src/ultralytics/ + +# Install pip packages +RUN python3 -m pip install --upgrade pip wheel +RUN pip install --no-cache -e ".[export]" albumentations comet pycocotools pytest-cov + +# Run exports to AutoInstall packages +RUN yolo export model=tmp/yolov8n.pt format=edgetpu imgsz=32 +RUN yolo export model=tmp/yolov8n.pt format=ncnn imgsz=32 +# Requires <= Python 3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991 +RUN pip install --no-cache paddlepaddle==2.4.2 x2paddle +# Fix error: `np.bool` was a deprecated alias for the builtin `bool` segmentation error in Tests +RUN pip install --no-cache numpy==1.23.5 +# Remove exported models +RUN rm -rf tmp + +# Set environment variables +ENV OMP_NUM_THREADS=1 +# Avoid DDP error "MKL_THREADING_LAYER=INTEL is incompatible with libgomp.so.1 library" https://github.com/pytorch/pytorch/issues/37377 +ENV MKL_THREADING_LAYER=GNU + + +# Usage Examples ------------------------------------------------------------------------------------------------------- + +# Build and Push +# t=ultralytics/ultralytics:latest && sudo docker build -f docker/Dockerfile -t $t . && sudo docker push $t + +# Pull and Run with access to all GPUs +# t=ultralytics/ultralytics:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all $t + +# Pull and Run with access to GPUs 2 and 3 (inside container CUDA devices will appear as 0 and 1) +# t=ultralytics/ultralytics:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus '"device=2,3"' $t + +# Pull and Run with local directory access +# t=ultralytics/ultralytics:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/datasets:/usr/src/datasets $t + +# Kill all +# sudo docker kill $(sudo docker ps -q) + +# Kill all image-based +# sudo docker kill $(sudo docker ps -qa --filter ancestor=ultralytics/ultralytics:latest) + +# DockerHub tag update +# t=ultralytics/ultralytics:latest tnew=ultralytics/ultralytics:v6.2 && sudo docker pull $t && sudo docker tag $t $tnew && sudo docker push $tnew + +# Clean up +# sudo docker system prune -a --volumes + +# Update Ubuntu drivers +# https://www.maketecheasier.com/install-nvidia-drivers-ubuntu/ + +# DDP test +# python -m torch.distributed.run --nproc_per_node 2 --master_port 1 train.py --epochs 3 + +# GCP VM from Image +# docker.io/ultralytics/ultralytics:latest diff --git a/docker/Dockerfile-arm64 b/docker/Dockerfile-arm64 new file mode 100644 index 0000000..aedb4f2 --- /dev/null +++ b/docker/Dockerfile-arm64 @@ -0,0 +1,44 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Builds ultralytics/ultralytics:latest-arm64 image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics +# Image is aarch64-compatible for Apple M1 and other ARM architectures i.e. Jetson Nano and Raspberry Pi + +# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu +FROM arm64v8/ubuntu:22.04 + +# Downloads to user config dir +ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ + +# Install linux packages +# g++ required to build 'tflite_support' and 'lap' packages, libusb-1.0-0 required for 'tflite_support' package +RUN apt update \ + && apt install --no-install-recommends -y python3-pip git zip curl htop gcc libgl1 libglib2.0-0 libpython3-dev gnupg g++ libusb-1.0-0 + +# Create working directory +WORKDIR /usr/src/ultralytics + +# Copy contents +# COPY . /usr/src/ultralytics # git permission issues inside container +RUN git clone https://github.com/ultralytics/ultralytics -b main /usr/src/ultralytics +ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt /usr/src/ultralytics/ + +# Install pip packages +RUN python3 -m pip install --upgrade pip wheel +RUN pip install --no-cache -e . + +# Creates a symbolic link to make 'python' point to 'python3' +RUN ln -sf /usr/bin/python3 /usr/bin/python + + +# Usage Examples ------------------------------------------------------------------------------------------------------- + +# Build and Push +# t=ultralytics/ultralytics:latest-arm64 && sudo docker build --platform linux/arm64 -f docker/Dockerfile-arm64 -t $t . && sudo docker push $t + +# Run +# t=ultralytics/ultralytics:latest-arm64 && sudo docker run -it --ipc=host $t + +# Pull and Run +# t=ultralytics/ultralytics:latest-arm64 && sudo docker pull $t && sudo docker run -it --ipc=host $t + +# Pull and Run with local volume mounted +# t=ultralytics/ultralytics:latest-arm64 && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t diff --git a/docker/Dockerfile-conda b/docker/Dockerfile-conda new file mode 100644 index 0000000..73d38d6 --- /dev/null +++ b/docker/Dockerfile-conda @@ -0,0 +1,38 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Builds ultralytics/ultralytics:latest-conda image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics +# Image is optimized for Ultralytics Anaconda (https://anaconda.org/conda-forge/ultralytics) installation and usage + +# Start FROM miniconda3 image https://hub.docker.com/r/continuumio/miniconda3 +FROM continuumio/miniconda3:latest + +# Downloads to user config dir +ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ + +# Install linux packages +RUN apt update \ + && apt install --no-install-recommends -y libgl1 + +# Copy contents +ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt . + +# Install conda packages +# mkl required to fix 'OSError: libmkl_intel_lp64.so.2: cannot open shared object file: No such file or directory' +RUN conda config --set solver libmamba && \ + conda install pytorch torchvision pytorch-cuda=11.8 -c pytorch -c nvidia && \ + conda install -c conda-forge ultralytics mkl + # conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics mkl + + +# Usage Examples ------------------------------------------------------------------------------------------------------- + +# Build and Push +# t=ultralytics/ultralytics:latest-conda && sudo docker build -f docker/Dockerfile-cpu -t $t . && sudo docker push $t + +# Run +# t=ultralytics/ultralytics:latest-conda && sudo docker run -it --ipc=host $t + +# Pull and Run +# t=ultralytics/ultralytics:latest-conda && sudo docker pull $t && sudo docker run -it --ipc=host $t + +# Pull and Run with local volume mounted +# t=ultralytics/ultralytics:latest-conda && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t diff --git a/docker/Dockerfile-cpu b/docker/Dockerfile-cpu new file mode 100644 index 0000000..65326da --- /dev/null +++ b/docker/Dockerfile-cpu @@ -0,0 +1,55 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Builds ultralytics/ultralytics:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics +# Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLOv8 deployments + +# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu +FROM ubuntu:mantic-20231011 + +# Downloads to user config dir +ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ + +# Install linux packages +# g++ required to build 'tflite_support' and 'lap' packages, libusb-1.0-0 required for 'tflite_support' package +RUN apt update \ + && apt install --no-install-recommends -y python3-pip git zip curl htop libgl1 libglib2.0-0 libpython3-dev gnupg g++ libusb-1.0-0 + +# Create working directory +WORKDIR /usr/src/ultralytics + +# Copy contents +# COPY . /usr/src/ultralytics # git permission issues inside container +RUN git clone https://github.com/ultralytics/ultralytics -b main /usr/src/ultralytics +ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt /usr/src/ultralytics/ + +# Remove python3.11/EXTERNALLY-MANAGED or use 'pip install --break-system-packages' avoid 'externally-managed-environment' Ubuntu nightly error +RUN rm -rf /usr/lib/python3.11/EXTERNALLY-MANAGED + +# Install pip packages +RUN python3 -m pip install --upgrade pip wheel +RUN pip install --no-cache -e ".[export]" --extra-index-url https://download.pytorch.org/whl/cpu + +# Run exports to AutoInstall packages +RUN yolo export model=tmp/yolov8n.pt format=edgetpu imgsz=32 +RUN yolo export model=tmp/yolov8n.pt format=ncnn imgsz=32 +# Requires <= Python 3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991 +# RUN pip install --no-cache paddlepaddle==2.4.2 x2paddle +# Remove exported models +RUN rm -rf tmp + +# Creates a symbolic link to make 'python' point to 'python3' +RUN ln -sf /usr/bin/python3 /usr/bin/python + + +# Usage Examples ------------------------------------------------------------------------------------------------------- + +# Build and Push +# t=ultralytics/ultralytics:latest-cpu && sudo docker build -f docker/Dockerfile-cpu -t $t . && sudo docker push $t + +# Run +# t=ultralytics/ultralytics:latest-cpu && sudo docker run -it --ipc=host $t + +# Pull and Run +# t=ultralytics/ultralytics:latest-cpu && sudo docker pull $t && sudo docker run -it --ipc=host $t + +# Pull and Run with local volume mounted +# t=ultralytics/ultralytics:latest-cpu && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t diff --git a/docker/Dockerfile-jetson b/docker/Dockerfile-jetson new file mode 100644 index 0000000..c177b8e --- /dev/null +++ b/docker/Dockerfile-jetson @@ -0,0 +1,48 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Builds ultralytics/ultralytics:jetson image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics +# Supports JetPack for YOLOv8 on Jetson Nano, TX1/TX2, Xavier NX, AGX Xavier, AGX Orin, and Orin NX + +# Start FROM https://catalog.ngc.nvidia.com/orgs/nvidia/containers/l4t-pytorch +FROM nvcr.io/nvidia/l4t-pytorch:r35.2.1-pth2.0-py3 + +# Downloads to user config dir +ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ + +# Install linux packages +# g++ required to build 'tflite_support' and 'lap' packages, libusb-1.0-0 required for 'tflite_support' package +RUN apt update \ + && apt install --no-install-recommends -y gcc git zip curl htop libgl1 libglib2.0-0 libpython3-dev gnupg g++ libusb-1.0-0 + +# Create working directory +WORKDIR /usr/src/ultralytics + +# Copy contents +# COPY . /usr/src/ultralytics # git permission issues inside container +RUN git clone https://github.com/ultralytics/ultralytics -b main /usr/src/ultralytics +ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt /usr/src/ultralytics/ + +# Remove opencv-python from requirements.txt as it conflicts with opencv-python installed in base image +RUN grep -v '^opencv-python' requirements.txt > tmp.txt && mv tmp.txt requirements.txt + +# Install pip packages manually for TensorRT compatibility https://github.com/NVIDIA/TensorRT/issues/2567 +RUN python3 -m pip install --upgrade pip wheel +RUN pip install --no-cache tqdm matplotlib pyyaml psutil pandas onnx "numpy==1.23" +RUN pip install --no-cache -e . + +# Set environment variables +ENV OMP_NUM_THREADS=1 + + +# Usage Examples ------------------------------------------------------------------------------------------------------- + +# Build and Push +# t=ultralytics/ultralytics:latest-jetson && sudo docker build --platform linux/arm64 -f docker/Dockerfile-jetson -t $t . && sudo docker push $t + +# Run +# t=ultralytics/ultralytics:latest-jetson && sudo docker run -it --ipc=host $t + +# Pull and Run +# t=ultralytics/ultralytics:latest-jetson && sudo docker pull $t && sudo docker run -it --ipc=host $t + +# Pull and Run with NVIDIA runtime +# t=ultralytics/ultralytics:latest-jetson && sudo docker pull $t && sudo docker run -it --ipc=host --runtime=nvidia $t diff --git a/docker/Dockerfile-python b/docker/Dockerfile-python new file mode 100644 index 0000000..b227fa6 --- /dev/null +++ b/docker/Dockerfile-python @@ -0,0 +1,52 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Builds ultralytics/ultralytics:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics +# Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLOv8 deployments + +# Use the official Python 3.10 slim-bookworm as base image +FROM python:3.10-slim-bookworm + +# Downloads to user config dir +ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ + +# Install linux packages +# g++ required to build 'tflite_support' and 'lap' packages, libusb-1.0-0 required for 'tflite_support' package +RUN apt update \ + && apt install --no-install-recommends -y python3-pip git zip curl htop libgl1 libglib2.0-0 libpython3-dev gnupg g++ libusb-1.0-0 + +# Create working directory +WORKDIR /usr/src/ultralytics + +# Copy contents +# COPY . /usr/src/ultralytics # git permission issues inside container +RUN git clone https://github.com/ultralytics/ultralytics -b main /usr/src/ultralytics +ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt /usr/src/ultralytics/ + +# Remove python3.11/EXTERNALLY-MANAGED or use 'pip install --break-system-packages' avoid 'externally-managed-environment' Ubuntu nightly error +# RUN rm -rf /usr/lib/python3.11/EXTERNALLY-MANAGED + +# Install pip packages +RUN python3 -m pip install --upgrade pip wheel +RUN pip install --no-cache -e ".[export]" --extra-index-url https://download.pytorch.org/whl/cpu + +# Run exports to AutoInstall packages +RUN yolo export model=tmp/yolov8n.pt format=edgetpu imgsz=32 +RUN yolo export model=tmp/yolov8n.pt format=ncnn imgsz=32 +# Requires <= Python 3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991 +RUN pip install --no-cache paddlepaddle==2.4.2 x2paddle +# Remove exported models +RUN rm -rf tmp + + +# Usage Examples ------------------------------------------------------------------------------------------------------- + +# Build and Push +# t=ultralytics/ultralytics:latest-python && sudo docker build -f docker/Dockerfile-python -t $t . && sudo docker push $t + +# Run +# t=ultralytics/ultralytics:latest-python && sudo docker run -it --ipc=host $t + +# Pull and Run +# t=ultralytics/ultralytics:latest-python && sudo docker pull $t && sudo docker run -it --ipc=host $t + +# Pull and Run with local volume mounted +# t=ultralytics/ultralytics:latest-python && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t diff --git a/docker/Dockerfile-runner b/docker/Dockerfile-runner new file mode 100644 index 0000000..c0f8659 --- /dev/null +++ b/docker/Dockerfile-runner @@ -0,0 +1,38 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Builds GitHub actions CI runner image for deployment to DockerHub https://hub.docker.com/r/ultralytics/ultralytics +# Image is CUDA-optimized for YOLOv8 single/multi-GPU training and inference tests + +# Start FROM Ultralytics GPU image +FROM ultralytics/ultralytics:latest + +# Set the working directory +WORKDIR /actions-runner + +# Download and unpack the latest runner from https://github.com/actions/runner +RUN FILENAME=actions-runner-linux-x64-2.309.0.tar.gz && \ + curl -o $FILENAME -L https://github.com/actions/runner/releases/download/v2.309.0/$FILENAME && \ + tar xzf $FILENAME && \ + rm $FILENAME + +# Install runner dependencies +ENV RUNNER_ALLOW_RUNASROOT=1 +ENV DEBIAN_FRONTEND=noninteractive +RUN ./bin/installdependencies.sh && \ + apt-get -y install libicu-dev + +# Inline ENTRYPOINT command to configure and start runner with default TOKEN and NAME +ENTRYPOINT sh -c './config.sh --url https://github.com/ultralytics/ultralytics \ + --token ${GITHUB_RUNNER_TOKEN:-TOKEN} \ + --name ${GITHUB_RUNNER_NAME:-NAME} \ + --labels gpu-latest \ + --replace && \ + ./run.sh' + + +# Usage Examples ------------------------------------------------------------------------------------------------------- + +# Build and Push +# t=ultralytics/ultralytics:latest-runner && sudo docker build -f docker/Dockerfile-runner -t $t . && sudo docker push $t + +# Pull and Run in detached mode with access to GPUs 0 and 1 +# t=ultralytics/ultralytics:latest-runner && sudo docker run -d -e GITHUB_RUNNER_TOKEN=TOKEN -e GITHUB_RUNNER_NAME=NAME --ipc=host --gpus '"device=0,1"' $t diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..bcf7e0f --- /dev/null +++ b/docs/README.md @@ -0,0 +1,102 @@ +# Ultralytics Docs + +Ultralytics Docs are deployed to [https://docs.ultralytics.com](https://docs.ultralytics.com). + +[![pages-build-deployment](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment) [![Check Broken links](https://github.com/ultralytics/docs/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/links.yml) + +### Install Ultralytics package + +[![PyPI version](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + +To install the ultralytics package in developer mode, you will need to have Git and Python 3 installed on your system. Then, follow these steps: + +1. Clone the ultralytics repository to your local machine using Git: + + ```bash + git clone https://github.com/ultralytics/ultralytics.git + ``` + +2. Navigate to the root directory of the repository: + + ```bash + cd ultralytics + ``` + +3. Install the package in developer mode using pip: + + ```bash + pip install -e '.[dev]' + ``` + +This will install the ultralytics package and its dependencies in developer mode, allowing you to make changes to the package code and have them reflected immediately in your Python environment. + +Note that you may need to use the pip3 command instead of pip if you have multiple versions of Python installed on your system. + +### Building and Serving Locally + +The `mkdocs serve` command is used to build and serve a local version of the MkDocs documentation site. It is typically used during the development and testing phase of a documentation project. + +```bash +mkdocs serve +``` + +Here is a breakdown of what this command does: + +- `mkdocs`: This is the command-line interface (CLI) for the MkDocs static site generator. It is used to build and serve MkDocs sites. +- `serve`: This is a subcommand of the `mkdocs` CLI that tells it to build and serve the documentation site locally. +- `-a`: This flag specifies the hostname and port number to bind the server to. The default value is `localhost:8000`. +- `-t`: This flag specifies the theme to use for the documentation site. The default value is `mkdocs`. +- `-s`: This flag tells the `serve` command to serve the site in silent mode, which means it will not display any log messages or progress updates. When you run the `mkdocs serve` command, it will build the documentation site using the files in the `docs/` directory and serve it at the specified hostname and port number. You can then view the site by going to the URL in your web browser. + +While the site is being served, you can make changes to the documentation files and see them reflected in the live site immediately. This is useful for testing and debugging your documentation before deploying it to a live server. + +To stop the serve command and terminate the local server, you can use the `CTRL+C` keyboard shortcut. + +### Building and Serving Multi-Language + +For multi-language MkDocs sites use the following additional steps: + +1. Add all new language *.md files to git commit: `git add docs/**/*.md -f` +2. Build all languages to the `/site` directory. Verify that the top-level `/site` directory contains `CNAME`, `robots.txt` and `sitemap.xml` files, if applicable. + + ```bash + # Remove existing /site directory + rm -rf site + + # Loop through all *.yml files in the docs directory + mkdocs build -f docs/mkdocs.yml + for file in docs/mkdocs_*.yml; do + echo "Building MkDocs site with configuration file: $file" + mkdocs build -f "$file" + done + ``` + +3. Preview in web browser with: + + ```bash + cd site + python -m http.server + open http://localhost:8000 # on macOS + ``` + +Note the above steps are combined into the Ultralytics [build_docs.py](https://github.com/ultralytics/ultralytics/blob/main/docs/build_docs.py) script. + +### Deploying Your Documentation Site + +To deploy your MkDocs documentation site, you will need to choose a hosting provider and a deployment method. Some popular options include GitHub Pages, GitLab Pages, and Amazon S3. + +Before you can deploy your site, you will need to configure your `mkdocs.yml` file to specify the remote host and any other necessary deployment settings. + +Once you have configured your `mkdocs.yml` file, you can use the `mkdocs deploy` command to build and deploy your site. This command will build the documentation site using the files in the `docs/` directory and the specified configuration file and theme, and then deploy the site to the specified remote host. + +For example, to deploy your site to GitHub Pages using the gh-deploy plugin, you can use the following command: + +```bash +mkdocs gh-deploy +``` + +If you are using GitHub Pages, you can set a custom domain for your documentation site by going to the "Settings" page for your repository and updating the "Custom domain" field in the "GitHub Pages" section. + +![196814117-fc16e711-d2be-4722-9536-b7c6d78fd167](https://user-images.githubusercontent.com/26833433/210150206-9e86dcd7-10af-43e4-9eb2-9518b3799eac.png) + +For more information on deploying your MkDocs documentation site, see the [MkDocs documentation](https://www.mkdocs.org/user-guide/deploying-your-docs/). diff --git a/docs/ar/index.md b/docs/ar/index.md new file mode 100644 index 0000000..211f667 --- /dev/null +++ b/docs/ar/index.md @@ -0,0 +1,82 @@ +--- +comments: true +description: استكشف دليل كامل لـ Ultralytics YOLOv8 ، نموذج كشف الكائنات وتجزئة الصور ذو السرعة العالية والدقة العالية. تثبيت المحررة ، والتنبؤ ، والتدريب والمزيد. +keywords: Ultralytics، YOLOv8، كشف الكائنات، تجزئة الصور، التعلم الآلي، التعلم العميق، الرؤية الحاسوبية، YOLOv8 installation، YOLOv8 prediction، YOLOv8 training، تاريخ YOLO، تراخيص YOLO +--- + +
+

+ + Ultralytics YOLO banner +

+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ Ultralytics CI + Ultralytics Code Coverage + YOLOv8 Citation + Docker Pulls +
+ Run on Gradient + Open In Colab + Open In Kaggle +
+ +يتم تقديم [Ultralytics](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics) ، أحدث إصدار من نموذج كشف الكائنات وتجزئة الصور المشهورة للوقت الفعلي. يعتمد YOLOv8 على التطورات المتقدمة في التعلم العميق والرؤية الحاسوبية ، ويقدم أداءً فائقًا من حيث السرعة والدقة. يجعل التصميم البسيط له مناسبًا لمختلف التطبيقات وقابلًا للتكيف بسهولة مع منصات الأجهزة المختلفة ، من الأجهزة الحافة إلى واجهات برمجة التطبيقات في السحابة. + +استكشف أدلة YOLOv8 ، وهي مورد شامل يهدف إلى مساعدتك في فهم واستخدام ميزاته وقدراته. سواء كنت ممارسًا في مجال التعلم الآلي من ذوي الخبرة أو جديدًا في هذا المجال ، فإن الهدف من هذا المركز هو تحقيق الحد الأقصى لإمكانات YOLOv8 في مشاريعك. + +!!! Note "ملاحظة" + + 🚧 تم تطوير وثائقنا متعددة اللغات حاليًا ، ونعمل بجد لتحسينها. شكراً لصبرك! 🙏 + +## من أين أبدأ + +- **تثبيت** `ultralytics` بواسطة pip والبدء في العمل في دقائق   [:material-clock-fast: ابدأ الآن](quickstart.md){ .md-button } +- **توقع** الصور ومقاطع الفيديو الجديدة بواسطة YOLOv8   [:octicons-image-16: توقع على الصور](modes/predict.md){ .md-button } +- **تدريب** نموذج YOLOv8 الجديد على مجموعة البيانات المخصصة الخاصة بك   [:fontawesome-solid-brain: قم بتدريب نموذج](modes/train.md){ .md-button } +- **استكشاف** مهام YOLOv8 مثل التجزئة والتصنيف والوضع والتتبع   [:material-magnify-expand: استكشاف المهام](tasks/index.md){ .md-button } + +

+
+ +
+ مشاهدة: كيفية تدريب نموذج YOLOv8 على مجموعة بيانات مخصصة في جوجل كولاب. +

+ +## YOLO: نبذة تاريخية + +تم تطوير [YOLO](https://arxiv.org/abs/1506.02640) (You Only Look Once) ، نموذج شهير لكشف الكائنات وتجزئة الصور ، من قبل جوزيف ريدمون وعلي فرهادي في جامعة واشنطن. في عام 2015 ، حققت YOLO شهرة سريعة بفضل سرعتها العالية ودقتها. + +- [YOLOv2](https://arxiv.org/abs/1612.08242) ، الذي تم إصداره في عام 2016 ، قام بتحسين النموذج الأصلي من خلال دمج التطبيع التشغيلي ، ومربعات الربط ، ومجموعات الأبعاد. +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf) ، الذي تم إطلاقه في عام 2018 ، قدم تحسينات إضافية لأداء النموذج باستخدام شبكة ظهر أكثر كفاءة ومرشحات متعددة وتجميع هرم المساحة. +- تم إصدار [YOLOv4](https://arxiv.org/abs/2004.10934) في عام 2020 ، وقدم ابتكارات مثل زيادة المساعدات في البيانات ، ورأس جديد للكشف غير المرتبط بالمرابط ، ووظيفة فقدان جديدة. +- [YOLOv5](https://github.com/ultralytics/yolov5) قام بتحسين أداء النموذج وأضاف ميزات جديدة مثل تحسين ثوابت النموذج ، وتعقب التجارب المتكامل والتصدير التلقائي إلى تنسيقات التصدير الشهيرة. +- [YOLOv6](https://github.com/meituan/YOLOv6) تم تَوْزيعه على [Meituan](https://about.meituan.com/) في عام 2022 وهو قيد الاستخدام في العديد من روبوتات التسليم الذاتي للشركة. +- [YOLOv7](https://github.com/WongKinYiu/yolov7) أضاف مهمات إضافية مثل تقدير الوضع على مجموعة بيانات نقاط COCO الرئيسية. +- [YOLOv8](https://github.com/ultralytics/ultralytics) هو أحدث إصدار من YOLO بواسطة Ultralytics. باعتباره نموذجًا حديثًا وفريدًا من نوعه ، فإن YOLOv8 يبني على نجاح الإصدارات السابقة ، ويقدم ميزات وتحسينات جديدة لتحسين الأداء والمرونة والكفاءة. يدعم YOLOv8 مجموعة كاملة من مهام الذكاء الصناعي للرؤية ، بما في ذلك [الكشف](tasks/detect.md) ، [التجزئة](tasks/segment.md) ، [تقدير الوضع](tasks/pose.md) ، [التتبع](modes/track.md) ، و [التصنيف](tasks/classify.md). تتيح هذه القابلية للتكيف للمستخدمين استغلال قدرات YOLOv8 في تطبيقات ومجالات متنوعة. + +## تراخيص YOLO: كيف يتم ترخيص Ultralytics YOLO؟ + +يوفر Ultralytics خيارين للترخيص لاستيعاب الحالات الاستخدام المتنوعة: + +- **ترخيص AGPL-3.0**: هذا الترخيص مفتوح المصدر والمعتمد من [OSI](https://opensource.org/licenses/) وهو مثالي للطلاب والهواة ، ويشجع على التعاون المفتوح ومشاركة المعرفة. راجع ملف [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) لمزيد من التفاصيل. +- **ترخيص المؤسسة**: صمم للاستخدام التجاري ، يسمح هذا الترخيص بدمج سلس للبرمجيات ونماذج AI الخاصة بشركة Ultralytics في السلع والخدمات التجارية ، وتفادي متطلبات المصدر المفتوح لـ AGPL-3.0. إذا تشمل سيناريو الخاص بك تضمين حلولنا في عرض تجاري ، فيرجى التواصل من خلال [Ultralytics Licensing](https://ultralytics.com/license). + +تم تصميم استراتيجية الترخيص الخاصة بنا لضمان أن أي تحسينات على مشاريعنا مفتوحة المصدر يتم إرجاعها إلى المجتمع. نحمل مبادئ المصدر المفتوح قريبة من قلوبنا ❤️ ، ومهمتنا هي ضمان أن يمكن استخدام وتوسيع مساهماتنا بطرق تعود بالنفع على الجميع. diff --git a/docs/ar/models/fast-sam.md b/docs/ar/models/fast-sam.md new file mode 100644 index 0000000..68b39fc --- /dev/null +++ b/docs/ar/models/fast-sam.md @@ -0,0 +1,191 @@ +--- +comments: true +description: استكشف FastSAM ، وهو حلاً مبنيًا على الشبكات العصبية السريعة لتجزئة الكائنات في الوقت الحقيقي في الصور. تفاعل المستخدم المحسّن ، والكفاءة الحسابية ، والقابلية للتكيف في مهام الرؤية المختلفة. +keywords: FastSAM ، التعلم الآلي ، حلاً مبنيًا على الشبكات العصبية السريعة ، قسيمة الكائنات ، حلاً في الوقت الحقيقي ، Ultralytics ، مهام الرؤية ، معالجة الصور ، تطبيقات صناعية ، تفاعل المستخدم +--- + +# نموذج تجزئة أي شيء بسرعة عالية (FastSAM) + +نموذج تجزئة أي شيء بسرعة عالية (FastSAM) هو حلاً مبتكرًا للعصب الشبكي يعمل بالزمن الحقيقي لمهمة تجزئة أي كائن داخل صورة ما. تم تصميم هذه المهمة لتجزئة أي كائن داخل صورة بناءً على إشارات تفاعل المستخدم المختلفة الممكنة. يقلل الـ FastSAM من الاحتياجات الحسابية بشكل كبير مع الحفاظ على أداء تنافسي ، مما يجعله خيارًا عمليًا لمجموعة متنوعة من مهام الرؤية. + +![نظرة عامة على تصميم نموذج تجزئة أي شيء بسرعة عالية (FastSAM)](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## نظرة عامة + +تم تصميم FastSAM للتغلب على القيود الموجودة في [نموذج تجزئة ما شيء (SAM)](sam.md) ، وهو نموذج تحويل ثقيل يتطلب موارد حسابية كبيرة. يفصل FastSAM عملية تجزئة أي شيء إلى مرحلتين متسلسلتين: تجزئة جميع الأمثلة واختيار موجه بناءً على التعليمات. تستخدم المرحلة الأولى [YOLOv8-seg](../tasks/segment.md) لإنتاج قناع التجزئة لجميع الأمثلة في الصورة. في المرحلة الثانية ، يتم إخراج منطقة الاهتمام المتعلقة بالتعليمة. + +## المميزات الرئيسية + +1. **حلاً في الوقت الحقيقي**: من خلال استغلال كفاءة الشبكات العصبية الحاسوبية ، يوفر FastSAM حلاً في الوقت الحقيقي لمهمة تجزئة أي شيء ، مما يجعله قيمًا للتطبيقات الصناعية التي تتطلب نتائج سريعة. + +2. **كفاءة وأداء**: يقدم FastSAM تقليل كبير في الاحتياجات الحسابية واستخدام الموارد دون التنازل عن جودة الأداء. يحقق أداءً قابلاً للمقارنة مع SAM ولكن بموارد حسابية مخفضة بشكل كبير ، مما يمكن من تطبيقه في الوقت الحقيقي. + +3. **تجزئة يستند إلى الموجه**: يمكن لـ FastSAM تجزئة أي كائن داخل صورة ترشده مختلف إشارات تفاعل المستخدم الممكنة ، مما يوفر مرونة وقابلية للتكيف في سيناريوهات مختلفة. + +4. **يستند إلى YOLOv8-seg**: يستند FastSAM إلى [YOLOv8-seg](../tasks/segment.md) ، وهو كاشف كائنات مجهز بفرع تجزئة المثيلات. يمكنه بشكل فعال إنتاج قناع التجزئة لجميع الأمثلة في صورة. + +5. **نتائج تنافسية في الاختبارات التحضيرية**: في مهمة اقتراح الكائن على MS COCO ، يحقق FastSAM درجات عالية بسرعة أسرع بكثير من [SAM](sam.md) على بطاقة NVIDIA RTX 3090 واحدة ، مما يدل على كفاءته وقدرته. + +6. **تطبيقات عملية**: توفر الطريقة المقترحة حلاً جديدًا وعمليًا لعدد كبير من مهام الرؤية بسرعة عالية حقًا ، بمعدلات سرعة عشرات أو مئات المرات أسرع من الطرق الحالية. + +7. **جدوى ضغط النموذج**: يظهر FastSAM إمكانية تقليل الجهد الحسابي بشكل كبير من خلال إدخال سابق اصطناعي للهيكل ، مما يفتح إمكانيات جديدة لهندسة هيكل النموذج الكبير لمهام الرؤية العامة. + +## النماذج المتاحة ، المهام المدعومة ، وأوضاع التشغيل + +يعرض هذا الجدول النماذج المتاحة مع أوزانها المحددة ، والمهام التي تدعمها ، ومدى توافقها مع أوضاع التشغيل المختلفة مثل [الاستنتاج](../modes/predict.md) ، [التحقق](../modes/val.md) ، [التدريب](../modes/train.md) ، و[التصدير](../modes/export.md) ، مشار إليها برموز الـ✅ للأوضاع المدعومة والرموز ❌ للأوضاع غير المدعومة. + +| نوع النموذج | أوزان تم تدريبها مسبقًا | المهام المدعومة | الاستنتاج | التحقق | التدريب | التصدير | +|-------------|-------------------------|---------------------------------------|-----------|--------|---------|---------| +| FastSAM-s | `FastSAM-s.pt` | [تجزئة المثيلات](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| FastSAM-x | `FastSAM-x.pt` | [تجزئة المثيلات](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## أمثلة الاستخدام + +يسهل دمج نماذج FastSAM في تطبيقات Python الخاصة بك. يوفر Ultralytics واجهة برمجة تطبيقات Python سهلة الاستخدام وأوامر CLI لتسهيل التطوير. + +### استخدام التوقعات + +للقيام بكشف الكائنات في صورة ، استخدم طريقة `predict` كما هو موضح أدناه: + +!!! Example "مثال" + + === "بايثون" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # حدد مصدر التوقع + source = 'path/to/bus.jpg' + + # قم بإنشاء نموذج FastSAM + model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt + + # تنفيذ توقعات على صورة + everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # قم بتجهيز كائن معالج مع قواعد التوقع + prompt_process = FastSAMPrompt(source, everything_results, device='cpu') + + # التوقع باستخدام كل شيء + ann = prompt_process.everything_prompt() + + # bbox الشكل الافتراضي [0،0،0،0] -> [x1،y1،x2،y2] + ann = prompt_process.box_prompt(bbox=[200، 200، 300، 300]) + + # التوقع النصي + ann = prompt_process.text_prompt(text='صورة لكلب') + + # التوقع النقطي + ann = prompt_process.point_prompt(points=[[200، 200]]، pointlabel=[1]) + prompt_process.plot(annotations=ann، output='./') + ``` + + === "CLI" + ```bash + # قم بتحميل نموذج FastSAM وتجزئة كل شيء به + yolo segment predict model=FastSAM-s.pt source=path/to/bus.jpg imgsz=640 + ``` + +توضح هذه المقاطع البساطة في تحميل نموذج مدرب مسبقًا وتنفيذ توقع على صورة. + +### استخدام مهام التحقق + +يمكن تنفيذ التحقق من النموذج على مجموعة بيانات على النحو التالي: + +!!! Example "مثال" + + === "بايثون" + ```python + from ultralytics import FastSAM + + # قم بإنشاء نموذج FastSAM + model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt + + # قم بتنفيذ التحقق من النموذج + results = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # قم بتحميل نموذج FastSAM وأجرِ التحقق منه بخصوص مجموعة البيانات مثال كوكو 8 بحجم صورة 640 + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +يرجى ملاحظة أن الـ FastSAM يدعم فقط الكشف والتجزئة لفئة واحدة من الكائن. هذا يعني أنه سيتعرف ويجزء جميع الكائنات على أنها نفس الفئة. لذلك ، عند إعداد مجموعة البيانات ، يجب تحويل جميع معرفات فئة الكائن إلى 0. + +## استخدام FastSAM الرسمي + +يتوفر نموذج FastSAM مباشرةً من مستودع [https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM). فيما يلي نظرة عامة موجزة على الخطوات التقليدية التي قد تتخذها لاستخدام FastSAM: + +### التثبيت + +1. استنسخ مستودع FastSAM: + ```shell + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. أنشئ بيئة Conda وفعّلها بـ Python 3.9: + ```shell + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. انتقل إلى المستودع المنسخ وقم بتثبيت الحزم المطلوبة: + ```shell + cd FastSAM + pip install -r requirements.txt + ``` + +4. قم بتثبيت نموذج CLIP: + ```shell + pip install git+https://github.com/openai/CLIP.git + ``` + +### مثال الاستخدام + +1. قم بتنزيل [تفويض نموذج](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing). + +2. استخدم FastSAM للتوقع. أمثلة الأوامر: + + - تجزئة كل شيء في صورة: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - تجزئة كائنات محددة باستخدام تعليمات النص: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "الكلب الأصفر" + ``` + + - تجزئة كائنات داخل مربع محدد (تقديم إحداثيات الصندوق في تنسيق xywh): + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - تجزئة كائنات قرب النقاط المحددة: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +بالإضافة إلى ذلك ، يمكنك تجربة FastSAM من خلال [Colab demo](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing) أو على [HuggingFace web demo](https://huggingface.co/spaces/An-619/FastSAM) لتجربة بصرية. + +## الاقتباسات والشكر + +نود أن نشكر أباء FastSAM على مساهماتهم الهامة في مجال تجزئة المثيلات في الوقت الحقيقي: + +!!! Quote "" + + === "بيب تيكس" + + ```bibtex + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +يمكن العثور على ورقة FastSAM الأصلية على [arXiv](https://arxiv.org/abs/2306.12156). قام الأباء بجعل أعمالهم متاحة للجمهور ، ويمكن الوصول إلى قاعدة الكود على [GitHub](https://github.com/CASIA-IVA-Lab/FastSAM). نقدر جهودهم في تطوير المجال وجعل أعمالهم متاحة للمجتمع الأوسع. diff --git a/docs/ar/models/index.md b/docs/ar/models/index.md new file mode 100644 index 0000000..a399f0d --- /dev/null +++ b/docs/ar/models/index.md @@ -0,0 +1,98 @@ +--- +comments: true +description: استكشف مجموعة متنوعة من عائلة YOLO، ونماذج SAM وMobileSAM وFastSAM وYOLO-NAS وRT-DETR المدعومة من Ultralytics. ابدأ بأمثلة لكل من استخدام واجهة الأوامر وPython. +keywords: Ultralytics, documentation, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, models, architectures, Python, CLI +--- + +# النماذج المدعومة من Ultralytics + +أهلاً بك في وثائق نماذج Ultralytics! نحن نقدم الدعم لمجموعة واسعة من النماذج، كل منها مُصمم لمهام محددة مثل [الكشف عن الأجسام](../tasks/detect.md)، [تقطيع الحالات](../tasks/segment.md)، [تصنيف الصور](../tasks/classify.md)، [تقدير الوضعيات](../tasks/pose.md)، و[تتبع الأجسام المتعددة](../modes/track.md). إذا كنت مهتمًا بالمساهمة في هندسة نموذجك مع Ultralytics، راجع دليل [المساهمة](../../help/contributing.md). + +!!! Note "ملاحظة" + + 🚧 تحت الإنشاء: وثائقنا بلغات متعددة قيد الإنشاء حاليًا، ونحن نعمل بجد لتحسينها. شكرًا لصبرك! 🙏 + +## النماذج المميزة + +إليك بعض النماذج الرئيسية المدعومة: + +1. **[YOLOv3](yolov3.md)**: الإصدار الثالث من عائلة نموذج YOLO، الذي أنشأه أصلاً Joseph Redmon، والمعروف بقدراته الفعالة في الكشف عن الأجسام في الوقت الفعلي. +2. **[YOLOv4](yolov4.md)**: تحديث محلي لـ YOLOv3، تم إصداره بواسطة Alexey Bochkovskiy في 2020. +3. **[YOLOv5](yolov5.md)**: نسخة مُحسنة من هندسة YOLO من قبل Ultralytics، توفر أداءً أفضل وتوازن في السرعة مقارنة بالإصدارات السابقة. +4. **[YOLOv6](yolov6.md)**: أُصدرت بواسطة [Meituan](https://about.meituan.com/) في 2022، ويُستخدم في العديد من روبوتات التوصيل الذاتية للشركة. +5. **[YOLOv7](yolov7.md)**: تم إصدار نماذج YOLO المحدثة في 2022 بواسطة مؤلفي YOLOv4. +6. **[YOLOv8](yolov8.md) جديد 🚀**: الإصدار الأحدث من عائلة YOLO، يتميز بقدرات مُعززة مثل تقطيع الحالات، تقدير الوضعيات/النقاط الرئيسية، والتصنيف. +7. **[Segment Anything Model (SAM)](sam.md)**: نموذج Segment Anything Model (SAM) من Meta. +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**: نموذج MobileSAM للتطبيقات المحمولة، من جامعة Kyung Hee. +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)**: نموذج FastSAM من مجموعة تحليل الصور والفيديو، والمعهد الصيني للأتمتة، وأكاديمية العلوم الصينية. +10. **[YOLO-NAS](yolo-nas.md)**: نماذج YOLO Neural Architecture Search (NAS). +11. **[Realtime Detection Transformers (RT-DETR)](rtdetr.md)**: نماذج Realtime Detection Transformer (RT-DETR) من PaddlePaddle التابعة لشركة Baidu. + +

+
+ +
+ شاهد: تشغيل نماذج YOLO من Ultralytics في بضعة أسطر من الكود فقط. +

+ +## البدء في الاستخدام: أمثلة على الاستخدام + +يوفر هذا المثال أمثلة مبسطة على التدريب والاستدلال باستخدام YOLO. للحصول على الوثائق الكاملة عن هذه وغيرها من [الأوضاع](../modes/index.md), انظر صفحات وثائق [التنبؤ](../modes/predict.md)، و[التدريب](../modes/train.md)، و[التقييم](../modes/val.md) و[التصدير](../modes/export.md). + +لاحظ أن المثال أدناه هو لنماذج [Detect](../tasks/detect.md) YOLOv8 لكشف الأجسام. للاطلاع على المهام الإضافية المدعومة، راجع وثائق [Segment](../tasks/segment.md)، و[Classify](../tasks/classify.md) و[Pose](../tasks/pose.md). + +!!! Example "مثال" + + === "Python" + + نماذج `*.pt` المُدربة مسبقًا وملفات الإعداد `*.yaml` يمكن أن تُمرر إلى فئات `YOLO()`, `SAM()`, `NAS()` و `RTDETR()` لإنشاء مثال نموذج في Python: + + ```python + من ultralytics استيراد YOLO + + # تحميل نموذج YOLOv8n المُدرب مسبقًا على COCO + النموذج = YOLO('yolov8n.pt') + + # عرض معلومات النموذج (اختياري) + model.info() + + # تدريب النموذج على مجموعة البيانات المثالية COCO8 لمدة 100 عصر + النتائج = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # تشغيل الاستدلال بنموذج YOLOv8n على صورة 'bus.jpg' + النتائج = model('path/to/bus.jpg') + ``` + + === "CLI" + + الأوامر CLI متاحة لتشغيل النماذج مباشرة: + + ```bash + # تحميل نموذج YOLOv8n المُدرب مسبقًا على COCO وتدريبه على مجموعة البيانات المثالية COCO8 لمدة 100 عصر + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # تحميل نموذج YOLOv8n المُدرب مسبقًا على COCO وتشغيل الاستدلال على صورة 'bus.jpg' + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## المساهمة بنماذج جديدة + +هل أنت مهتم بالمساهمة بنموذجك في Ultralytics؟ رائع! نحن دائمًا منفتحون على توسيع محفظة النماذج لدينا. + +1. **احفظ نسخة عن المستودع**: ابدأ بحفظ نسخة عن [مستودع Ultralytics على GitHub](https://github.com/ultralytics/ultralytics). + +2. **استنسخ نسختك**: انسخ نسختك إلى جهازك المحلي وأنشئ فرعًا جديدًا للعمل عليه. + +3. **طبق نموذجك**: أضف نموذجك متبعًا معايير وإرشادات البرمجة الموفرة في دليل [المساهمة](../../help/contributing.md) لدينا. + +4. **اختبر بدقة**: تأكد من اختبار نموذجك بشكل مكثف، سواء بشكل منفصل أو كجزء من المسار البرمجي. + +5. **أنشئ Pull Request**: بمجرد أن تكون راضًيا عن نموذجك، قم بإنشاء طلب سحب إلى المستودع الرئيسي للمراجعة. + +6. **مراجعة الكود والدمج**: بعد المراجعة، إذا كان نموذجك يلبي معاييرنا، سيتم دمجه في المستودع الرئيسي. + +للخطوات التفصيلية، يرجى الرجوع إلى دليل [المساهمة](../../help/contributing.md). diff --git a/docs/ar/models/mobile-sam.md b/docs/ar/models/mobile-sam.md new file mode 100644 index 0000000..ec671da --- /dev/null +++ b/docs/ar/models/mobile-sam.md @@ -0,0 +1,116 @@ +--- +comments: true +description: تعرّف على MobileSAM وتطبيقه، وقارنه مع SAM الأصلي، وكيفية تنزيله واختباره في إطار Ultralytics. قم بتحسين تطبيقاتك المحمولة اليوم. +keywords: MobileSAM، Ultralytics، SAM، التطبيقات المحمولة، Arxiv، GPU، API، مُشفّر الصورة، فك تشفير القناع، تنزيل النموذج، طريقة الاختبار +--- + +![شعار MobileSAM](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# التمييز المحمول لأي شيء (MobileSAM) + +الآن يمكنك الاطّلاع على ورقة MobileSAM في [arXiv](https://arxiv.org/pdf/2306.14289.pdf). + +يمكن الوصول إلى عرض مباشر لـ MobileSAM يعمل على وحدة المعالجة المركزية CPU من [هنا](https://huggingface.co/spaces/dhkim2810/MobileSAM). يستغرق الأداء على وحدة المعالجة المركزية Mac i5 تقريبًا 3 ثوانٍ. في عرض الواجهة التفاعلية الخاص بهنغ فيس، تؤدي واجهة المستخدم ووحدات المعالجة المركزية ذات الأداء المنخفض إلى استجابة أبطأ، لكنها تواصل العمل بفعالية. + +تم تنفيذ MobileSAM في عدة مشاريع بما في ذلك [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything) و [AnyLabeling](https://github.com/vietanhdev/anylabeling) و [Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D). + +تم تدريب MobileSAM على وحدة المعالجة الرسومية (GPU) الواحدة باستخدام مجموعة بيانات تحتوي على 100000 صورة (1% من الصور الأصلية) في أقل من يوم واحد. سيتم توفير الشفرة المصدرية لعملية التدريب هذه في المستقبل. + +## النماذج المتاحة، المهام المدعومة، وأوضاع التشغيل + +يُعرض في هذا الجدول النماذج المتاحة مع وزنها المدرب مسبقًا، والمهام التي تدعمها، وتوافقها مع أوضاع التشغيل المختلفة مثل [الاستدلال](../modes/predict.md)، [التحقق](../modes/val.md)، [التدريب](../modes/train.md)، و [التصدير](../modes/export.md)، حيث يُشير إيموجي ✅ للأوضاع المدعومة وإيموجي ❌ للأوضاع غير المدعومة. + +| نوع النموذج | الأوزان المدربة مسبقًا | المهام المدعومة | الاستدلال | التحقق | التدريب | التصدير | +|-------------|------------------------|--------------------------------------|-----------|--------|---------|---------| +| MobileSAM | `mobile_sam.pt` | [تجزئة العناصر](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## التحويل من SAM إلى MobileSAM + +نظرًا لأن MobileSAM يحتفظ بنفس سير العمل لـ SAM الأصلي، قمنا بدمج التجهيزات المسبقة والتجهيزات اللاحقة للنموذج الأصلي وجميع الواجهات الأخرى. نتيجة لذلك، يمكن لأولئك الذين يستخدمون حاليًا SAM الأصلي الانتقال إلى MobileSAM بقدر أدنى من الجهد. + +يؤدي MobileSAM بشكل مقارب لـ SAM الأصلي ويحتفظ بنفس سير العمل باستثناء تغيير في مُشفر الصورة. على وحدة المعالجة الرسومية (GPU) الواحدة، يعمل MobileSAM بمعدل 12 مللي ثانية لكل صورة: 8 مللي ثانية لمُشفر الصورة و4 مللي ثانية لفك تشفير القناع. + +يوفر الجدول التالي مقارنة بين مُشفرات الصور القائمة على ViT: + +| مُشفّر الصورة | SAM الأصلي | MobileSAM | +|---------------|----------------|--------------| +| العوامل | 611 مليون | 5 مليون | +| السرعة | 452 مللي ثانية | 8 مللي ثانية | + +يستخدم SَM الأصلي و MobileSAM نفس فك تشفير القناع الذي يعتمد على التوجيه بواسطة الرموز: + +| فك تشفير القناع | SAM الأصلي | MobileSAM | +|-----------------|--------------|--------------| +| العوامل | 3.876 مليون | 3.876 مليون | +| السرعة | 4 مللي ثانية | 4 مللي ثانية | + +فيما يلي مقارنة لكامل سير العمل: + +| السير الكامل (التشفير+الفك) | SAM الأصلي | MobileSAM | +|-----------------------------|----------------|---------------| +| العوامل | 615 مليون | 9.66 مليون | +| السرعة | 456 مللي ثانية | 12 مللي ثانية | + +يتم عرض أداء MobileSAM و SAM الأصلي باستخدام كل من النقطة ومربع كلمة المحفز. + +![صورة بالنقطة ككلمة محفز](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![صورة بالمربع ككلمة محفز](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +بفضل أدائه المتفوق، يكون MobileSAM أصغر بحوالي 5 أضعاف وأسرع بحوالي 7 أضعاف من FastSAM الحالي. يتوفر مزيد من التفاصيل على [صفحة مشروع MobileSAM](https://github.com/ChaoningZhang/MobileSAM). + +## اختبار MobileSAM في Ultralytics + +مثل SAM الأصلي، نقدم طريقة اختبار مبسّطة في Ultralytics، بما في ذلك وضعي النقطة والصندوق. + +### تنزيل النموذج + +يمكنك تنزيل النموذج [هنا](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt). + +### النقطة ككلمة محفز + +!!! Example "مثال" + + === "Python" + ```python + from ultralytics import SAM + + # تحميل النموذج + model = SAM('mobile_sam.pt') + + # توقع جزء بناءً على نقطة محفز + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### الصندوق ككلمة محفز + +!!! Example "مثال" + + === "Python" + ```python + from ultralytics import SAM + + # تحميل النموذج + model = SAM('mobile_sam.pt') + + # توقع جزء بناءً على صندوق محفز + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +لقد قمنا بتنفيذ "MobileSAM" و "SAM" باستخدام نفس API. لمزيد من معلومات الاستخدام، يُرجى الاطّلاع على [صفحة SAM](sam.md). + +## الاقتباس والشكر + +إذا وجدت MobileSAM مفيدًا في أبحاثك أو عملك التطويري، يُرجى النظر في استشهاد ورقتنا: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } diff --git a/docs/ar/models/rtdetr.md b/docs/ar/models/rtdetr.md new file mode 100644 index 0000000..f60d049 --- /dev/null +++ b/docs/ar/models/rtdetr.md @@ -0,0 +1,93 @@ +--- +comments: true +description: اكتشف ميزات وفوائد RT-DETR من Baidu، وهو كاشف كائنات فعال وقابل للتكيف في الوقت الفعلي يعتمد على Vision Transformers، بما في ذلك النماذج المدربة مسبقًا. +keywords: RT-DETR, Baidu, Vision Transformers, كشف كائنات, أداء فوري, CUDA, TensorRT, اختيار الاستعلام المرتبط بـ IoU, Ultralytics, واجهة برمجة التطبيقات الخاصة بلغة Python, PaddlePaddle +--- + +# RT-DETR من Baidu: اكتشاف كائنات في الوقت الفعلي يعتمد على Vision Transformer + +## النظرة العامة + +Real-Time Detection Transformer (RT-DETR)، المطور من قبل Baidu، هو كاشف حديث الطراز يوفر أداءً فعليًا في الوقت الفعلي مع الحفاظ على دقة عالية. يستفيد من قوة Vision Transformers (ViT) في معالجة الميزات متعددة المقياس عن طريق فصل التفاعلات داخل المقياس ودمج التفاعلات بين المقاييس المختلفة. يتكيف RT-DETR بشكل كبير ويدعم ضبط سرعة الاستعلام باستخدام طبقات مختلفة في المفكرة بدون إعادة التدريب. يتفوق هذا النموذج على العديد من كاشفات الكائنات في الوقت الفعلي الأخرى، ويستفيد من المنصات القوية مثل CUDA مع TensorRT. + +![نموذج مثال](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**نظرة عامة على RT-DETR من Baidu.** يعرض مخطط معمارية نموذج RT-DETR مراحل الظهر الثلاث الأخيرة {S3، S4، S5} كإدخال للمشفر. يحول المشفر الهجين الفعال الميزات متعددة المقياس إلى تسلسل من ميزات الصورة من خلال تفاعل الميزات داخل المقياس (AIFI) ووحدة دمج الميزات بين المقاييس المختلفة (CCFM). يتم استخدام اختيار الاستعلام المرتبط بـ IoU لاختيار عدد ثابت من ميزات الصورة لتكون استعلامات الكائن الأولية لفك الترميز. أخيرًا، يحسن فك الترميز مع رؤوس التنبؤ الإضافية الاستعلامات الكائنية لتوليد المربعات وتقييمات الثقة ([المصدر](https://arxiv.org/pdf/2304.08069.pdf)). + +### الميزات الرئيسية + +- **مشفر هجين فعال:** يستخدم RT-DETR من Baidu مشفر هجين فعال يعمل على معالجة الميزات متعددة المقياس من خلال فصل التفاعلات داخل المقياس ودمج التفاعلات بين المقاييس المختلفة. يقلل هذا التصميم الفريد القائم على Vision Transformers من تكاليف الحسابات ويتيح الكشف عن الكائنات في الوقت الفعلي. +- **اختيار الاستعلام المرتبط بـ IoU:** يعمل RT-DETR من Baidu على تحسين بدء استعلام الكائنات من خلال استخدام اختيار الاستعلام المرتبط بـ IoU. يتيح هذا للنموذج التركيز على الكائنات الأكثر صلة في السيناريو، مما يعزز دقة الكشف. +- **سرعة الاستنتاج قابلة للتكيف:** يدعم RT-DETR من Baidu ضبط سرعة الاستنتاج بشكل مرن باستخدام طبقات مختلفة في المفكرة دون الحاجة لإعادة التدريب. يسهل هذا التكيف التطبيق العملي في العديد من سيناريوهات كشف الكائنات في الوقت الفعلي. + +## النماذج المدربة مسبقًا + +تقدم واجهة برمجة التطبيقات الخاصة بلغة Python في Ultralytics نماذج PaddlePaddle RT-DETR مدربة مسبقًا بمقاييس مختلفة: + +- RT-DETR-L: 53.0% AP على COCO val2017، 114 FPS على GPU T4 +- RT-DETR-X: 54.8% AP على COCO val2017، 74 FPS على GPU T4 + +## أمثلة الاستخدام + +يوفر هذا المثال أمثلة بسيطة لتدريب واختبار RT-DETRR. للحصول على وثائق كاملة حول هذه الأمثلة وأوضاع أخرى [انقر هنا](../modes/index.md) للاطلاع على صفحات الوثائق [التنبؤ](../modes/predict.md)، [التدريب](../modes/train.md)، [التصحيح](../modes/val.md) و [التصدير](../modes/export.md). + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import RTDETR + + # تحميل نموذج RT-DETR-l محمي بواسطة COCO مسبقًا + model = RTDETR('rtdetr-l.pt') + + # عرض معلومات النموذج (اختياري) + model.info() + + # تدريب النموذج على مجموعة بيانات المثال COCO8 لـ 100 دورة + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # تشغيل الاستدلال باستخدام النموذج RT-DETR-l على صورة 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # تحميل نموذج RT-DETR-l محمي بواسطة COCO مسبقًا وتدريبه على مجموعة بيانات المثال COCO8 لـ 100 دورة + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # تحميل نموذج RT-DETR-l محمي بواسطة COCO مسبقًا وتشغيل الاستدلال على صورة 'bus.jpg' + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## المهام والأوضاع المدعومة + +يقدم هذا الجدول أنواع النماذج والأوزان المدربة مسبقًا المحددة والمهام المدعومة بواسطة كل نموذج، والأوضاع المختلفة ([التدريب](../modes/train.md)، [التصحيح](../modes/val.md)، [التنبؤ](../modes/predict.md)، [التصدير](../modes/export.md)) التي يتم دعمها، ممثلة برموز الـ ✅. + +| نوع النموذج | الأوزان المدربة مسبقًا | المهام المدعومة | استنتاج | تحقق صحة | تدريب | استيراد | +|-----------------------|------------------------|----------------------------------|---------|----------|-------|---------| +| RT-DETR الكبير | `rtdetr-l.pt` | [كشف كائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR الكبير الزائد | `rtdetr-x.pt` | [كشف كائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## الاستشهادات والتقديرات + +إذا استخدمت RT-DETR من Baidu في أعمال البحث أو التطوير الخاصة بك، يرجى الاستشهاد بـ [الورقة الأصلية](https://arxiv.org/abs/2304.08069): + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +نحن نود أن نعرب عن امتناننا لـ Baidu وفريق [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection) لإنشاء وصيانة هذ المورد القيم لمجتمع الرؤية الحاسوبية. نقدر تفاعلهم مع المجال من خلال تطوير كاشف الكائنات الحقيقي في الوقت الفعلي القائم على Vision Transformers، RT-DETR. + +*keywords: RT-DETR، الناقل، Vision Transformers، Baidu RT-DETR، PaddlePaddle، Paddle Paddle RT-DETR، كشف كائنات في الوقت الفعلي، كشف كائنات قائم على Vision Transformers، نماذج PaddlePaddle RT-DETR مدربة مسبقًا، استخدام Baidu's RT-DETR، واجهة برمجة التطبيقات الخاصة بلغة Python في Ultralytics* diff --git a/docs/ar/models/sam.md b/docs/ar/models/sam.md new file mode 100644 index 0000000..b47d5ec --- /dev/null +++ b/docs/ar/models/sam.md @@ -0,0 +1,225 @@ +--- +comments: true +description: استكشف النموذج القاطع للشيء أيا كان (SAM) الحديث من Ultralytics الذي يتيح الت segment تشفير صور الوقت الحقيقي. تعرف على مرونته في مجال الت segment، وأداء نقل انيفورم زيرو شوت، وكيفية استخدامه. +keywords: Ultralytics, قسيمة الصور, Segment Anything Model, SAM, سلسلة بيانات SA-1B, مرونة الصور في الوقت الحقيقي, نقل الانيفورم زيرو شوت, الكشف عن الكائنات, تحليل الصور, التعلم الآلي +--- + +# نموذج القطعة شيء ما (SAM) + +مرحبًا بك في الجبهة الأولى لقطع الصور مع نموذج القطعة شيء ما ، أو SAM. هذا النموذج الثوري قد غير اللعبة من خلال إدخال التشفير القراءة للصور مع أداء في الوقت الحقيقي، وتحديد معايير جديدة في هذا المجال. + +## مقدمة إلى SAM: القطعة شيء ما نموذج + +نموذج القطعة شيء ما ، أو SAM، هو نموذج شفاف اول في فصل الصور الرقمية التي تتيح قدرة شهير على التشفير، توفر مرونة فريدة من نوعها في مهام التحليل اللازمة للصور. نموذج SAM هو أساس مشروع 'أي شيء في شيء' الابتكاري و هو مشروع يقدم نموذجا جديدا ، مهمة وسلسلة بيانات مبتكرة للفصل البصري. + +يتيح تصميم SAM المتقدم له التكيف مع توزيعات صور جديدة ومهام جديدة دون الحاجة إلى معرفة مسبقة، وهذه الميزة تعرف بالكفاءة المطلوبة. حيث يتم تدريبه على سلسلة البيانات الواسعة [سلسلة SA-1B](https://ai.facebook.com/datasets/segment-anything/)، التي تحتوي على أكثر من ملياري قناع معروض على 11 مليون صورة تمت المحافظة عليها بعناية، وقد عرض SAM أداء مثير للإعجاب مع نقل انيفورم زيرو شوت فاق النتائج المراقبة السابقة بالتدريب الكامل في العديد من الحالات. + +![صورة مثالية لسلسة البيانات](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +صور مثالية مع قناع محاط بها من سلسلة البيانات التي قدمناها حديثًا ، SA-1B. يحتوي سلسلة SA-1B على 11 مليون صورة متنوعة ، عالية الدقة ، مرخصة وتحمي الخصوصية و 1.1 مليار قناع فصل جودة عالية. تم توجيه هذه القناع تمامًا بتقويم آلي من قبل SAM وتم التحقق من جودتها وتنوعها من خلال تصنيفات بشرية وتجارب عديدة. يتم تجميع الصور حسب عدد الأقنعة في كل صورة للتصوير (هناك حوالي 100 قناع في الصورة في المتوسط). + +## السمات الرئيسية لنموذج القطعة شيء ما (SAM) + +- **مهمة التشفير القضائية:** تم تصميم SAM بهدف مهمة التشفير القابلة للتشفير ، مما يتيح له إنشاء قناع تشفير صالح من أي تلميح معين ، مثل الدلائل المكانية أو النصية التي تحدد الكائن. +- **بنية متقدمة:** يستخدم نموذج القطعة شيء ما مُشفر صورة قوي ، مشفر تشفير ومُشفر بسهولة الويغورة. تمكن هذه البنية الفريدة من فتح المجال للتشفير المرن ، وحساب القناع في الوقت الحقيقي ، والاستعداد للغموض في مهام التشفير. +- **سلسلة البيانات SA-1B:** التي قدمها مشروع أي شيء في شيء، تعرض سلسلة البيانات SA-1B أكثر من ملياري قناع على 11 مليون صورة. كأكبر سلسلة بيانات للفصل حتى الآن، توفر نموذج SAM مصدر تدريب ضخم ومتنوع. +- **أداء نقل الانيفورم زيرو شوت:** يعرض نموذج SAM أداء رائع في نقل الانيفورم زيرو شوت في مهام القطع المختلفة، مما يجعله أداة قوية جاهزة للاستخدام في تطبيقات متنوعة مع حاجة قليلة جدًا لهندسة التشفير الخاصة. + +للحصول على نظرة شاملة على نموذج القطعة شيء ما وسلسلة SA-1B، يرجى زيارة [موقع أي شيء في شيء](https://segment-anything.com) واطلع على بحث [أي شيء في شيء](https://arxiv.org/abs/2304.02643). + +## النماذج المتاحة والمهام المدعومة ووضعيات العمل + +تقدم هذه الجدول النماذج المتاحة مع أوزان محددة مسبقًا والمهام التي يدعمونها وتوافقهم مع وضعيات العمل المختلفة مثل [قراءة الصورة](../modes/predict.md)، [التحقق](../modes/val.md)، [التدريب](../modes/train.md)، و [التصدير](../modes/export.md) ، مما يشير إلى ✅ رموز الدعم و ❌ للوضعيات غير المدعومة. + +| نوع النموذج | الأوزان المدربة مسبقًا | المهام المدعومة | قراءة الصورة | التحقق | التدريب | التصدير | +|-------------|------------------------|------------------------------------|--------------|--------|---------|---------| +| SAM الأساسي | `sam_b.pt` | [تجزئة النسخ](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| SAM الكبير | `sam_l.pt` | [تجزئة النسخ](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## كيفية استخدام SAM: مرونة وقوة في تجزئة الصورة + +يمكن استخدام نموذج القطعة شيء من أجل العديد من المهام التابعة إلى تدريبه. يشمل ذلك الكشف عن الحافة، إنشاء ترشيح للكائنات، تجزئة نسخة وتوقع نص مبدئي للتشفير. مع التشفير المهني ، يمكن لـ SAM التكيف بسرعة مع المهمات وتوزيعات البيانات الجديدة بطريقة transfer zero-shot، وبالتالي يعتبر أداة متعددة الاستخدامات وفعالة لجميع احتياجات تجزئة الصورة. + +### مثال لدمج SAM + +!!! Example "القسم بالاشارات" + + تقسيم الصورة مع الإشارات المعطاة. + + === "البايثون" + + ```python + from ultralytics import SAM + + # تحميل النموذج + model = SAM('sam_b.pt') + + # عرض معلومات النموذج (اختياري) + model.info() + + # تشغيل التنبوء بواسطة الدلائل + model('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # تشغيل التنبوء بواسطة نقاط الإشارة + model('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "قطع كل الشيء" + + قم بتجزئة الصورة بأكملها. + + === "البايثون" + + ```python + from ultralytics import SAM + + # تحميل النموذج + model = SAM('sam_b.pt') + + # عرض معلومات النموذج (اختياري) + model.info() + + # تشغيل التنبوء + model('مسار/إلى/صورة.jpg') + ``` + + === "صفيحة" + + ```البايش + # تشغيل التنبوء بنموذج SAM + yolo predict model=sam_b.pt source=path/to/image.jpg + ``` + +- المنطق هنا هو تجزئة الصورة كلها إذا لم تمر عبر أي إشارات (bboxes/ points / masks). + +!!! Example "مثال على SAMPredictor" + + بواسطة هذا الطريق ، يمكنك تعيين الصورة مرة واحدة وتشغيل الإشارات مرارًا وتكرارًا دون تشغيل مشفر الصورة مرة أخرى. + + === "التنبؤ بالإشارة" + + ```البايثون + from ultralytics.models.sam import Predictor as SAMPredictor + + # إنشاء SAMPredictor + الأعلى = dict (الثقة = 0.25، task ='segment'، النمط = 'تنبؤ'، imgsz = 1024، نموذج = "mobile_sam.pt") + predictor = SAMPredictor (overrides = التجاوز الأعلى) + + # تعيين الصورة + predictor.set_image("ultralytics/assets/zidane.jpg") # تعيين بواسطة ملف صورة + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # تعيين مع np.ndarray + results = predictor(bboxes=[439, 437, 524, 709]) + results = predictor(points=[900, 370], labels=[1]) + + # إعادة تعيين الصورة + predictor.reset_image() + ``` + + قطع كل شيء مع وجود معطيات اختيارية. + + === "تقطيع كل شيء" + + ```البايثون + from ultralytics.models.sam import Predictor as SAMPredictor + + # إنشاء SAMPredictor + الأعلى = dict (الثقة = 0.25، task ='segment'، النمط = 'تنبؤ'، imgsz = 1024، نموذج = "mobile_sam.pt") + predictor = SAMPredictor (overrides = التجاوز الأعلى) + + # تجزئة مع بيانات إضافية + results = predictor(source="ultralytics/assets/zidane.jpg"، crop_n_layers=1، points_stride=64) + ``` + +- المزيد args إضافي للتقطيع كل شيء شاهد التوثيق مرجع [`السلبي/تقديم` مرجع](../../../reference/models/sam/predict.md). + +## مقارنة SAM مقابل YOLOv8 + +في هذا المكان نقارن نموذج SAM الأصغر سام، SAM-b ، مع نموذج التجزئة YOLOv8 الصغيرة Ultralytics، [YOLOv8n-seg](../tasks/segment.md): + +| النموذج | الحجم | المعلمات | السرعة (المعالج) | +|----------------------------------------------|----------------------------|-----------------------|--------------------------| +| سام SAM-b | 358 م.بايت | 94.7 M | 51096 ms/im | +| [MobileSAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im | +| [FastSAM-s](fast-sam.md) with YOLOv8 حافظة | 23.7 MB | 11.8 M | 115 ms/im | +| [YOLOv8n-seg](../tasks/segment.md) ل Ultraly | **6.7 MB** (53.4 مرة أصغر) | **3.4 M** (27.9x أقل) | **59 ms/im** (866x أسرع) | + +هذه المقارنة تظهر الاختلافات في أمر المقدار والسرعة بين النماذج. في حين يقدم SAM قدرات فريدة للتجزئة التلقائية ، إلا أنه ليس منافسًا مباشرًا لنماذج التجزئة YOLOv8 ، حيث تكون أصغر وأسرع وأكثر كفاءة. + +اكتنزات التجريب على ماكينة Apple M2 Macbook 2023 مع 16GB من الذاكرة. لإعادة إنتاج هذا الاختبار: + +!!! Example "مثال" + + === "البايثون" + ```البايثون + from ultralytics import FastSAM, SAM, YOLO + + # تحليل يام-b + model = SAM('sam_b.pt') + model.info() + model('ultralytics/assets') + + # تحليل MobileSAM + model = SAM('mobile_sam.pt') + model.info() + model('ultralytics/assets') + + # تحليل FastSAM-s + model = FastSAM('FastSAM-s.pt') + model.info() + model('ultralytics/assets') + + # تحليل YOLOv8n-seg + model = YOLO('yolov8n-seg.pt') + model.info() + model('ultralytics/assets') + ``` + +## تعلم تلقائي: مسار سريع إلى سلاسل البيانات الخاصة بالتجزئة + +التعلم التلقائي هو ميزة رئيسية لـ SAM، حيث يسمح للمستخدمين بإنشاء [سلاسل بيانات تجزئة](https://docs.ultralytics.com/datasets/segment) باستخدام نموذج الكشف الجاهز. يتيح هذا الميزة إنشاء وتحديث سريع ودقيق لعدد كبير من الصور بدون الحاجة إلى عملية التسمية اليدوية البطيئة. + +### إنشاء سلاسل البيانات الخاصة بالتجزئة باستخدام نموذج الكشف + +للتعليم التلقائي لسلاسل البيانات الخاصة بالتجزئة باستخدام إطار العمل Ultralytics ، استخدم وظيفة 'auto_annotate' كما هو موضح أدناه: + +!!! Example "مثال" + + === "البايثون" + ```البايثون + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="مسار/إلى/صور", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| الوسيطة | النوع | الوصف | الافتراضي | +|------------|------------------------|---------------------------------------------------------------------------------------------------------------------------|--------------| +| البيانات | سلسلة | المسار إلى المجلد الذي يحتوي على الصور التي سيتم الإشارة إليها. | | +| det_model | سلسلة، اختياري | نموذج الكشف المدرب المسبق لـ YOLO. الافتراضي هو 'yolov8x.pt'. | 'yolov8x.pt' | +| sam_model | سلسلة، اختياري | نموذج القطعة أيا شيء من Ultralytics. الافتراضي هو 'sam_b.pt'. | 'sam_b.pt' | +| device | سلسلة، اختياري | الجهاز الذي يتم تشغيل النماذج عليه. الافتراضي هو سلسلة فارغة (وحدة المعالجة المركزية أو وحدة معالجة الرسومات إذا توافرت). | | +| output_dir | سلسلة، لا شيء، اختياري | الدليل لحفظ النتائج المرئية. الافتراضي هو مجلد 'التسميات' في نفس دليل 'البيانات'. | لا شيء | + +يأخذ تابع 'auto_annotate' المسار إلى الصور الخاصة بك مع وسيطات اختيارية لتحديد نموذج الكشف المدرب مسبقًا ونموذج التجزئة SAM والجهاز الذي سيتم تشغيل النماذج به والدليل الخروج لحفظ النتائج المرئية. + +تعلم تلقائيًا باستخدام نماذج مدربة مسبقًا يمكن أن يقلل بشكل كبير من الوقت والجهد المطلوب لإنشاء سلاسل بيانات تجزئة عالية الجودة. يكون هذا الأمر مفيدًا خصوصًا للباحثين والمطورين الذين يتعاملون مع مجموعات صور كبيرة ، حيث يتيح لهم التركيز على تطوير النماذج وتقييمها بدلاً من التسمية اليدوية البطيئة. + +## الاقتباسات والتقديرات + +إذا وجدت SAM مفيدًا في البحث أو العمل التطويري الخاص بك ، يرجى النظر في استشهاد بحثنا: + +!!! Quote "" + + === "البيبتيكس" + ```البيبتيكس + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +نود أن نعبر عن امتناننا لـ Meta AI لإنشاء وصيانة هذا المورد القيم لمجتمع البصريات الحواسيبية. + +*أكلمات دالة: سلسلة المفعولة, نموذج القطعة شيء ما, SAM, Meta SAM, التجزئة, التشفير المميز, آلة آي, segment, Ultralytics, نماذج مدربة مسبقا, SAM الاساسي, SAM الكبير, تجزئة الكيانات, الرؤية الكمبيوترية, آي الاصطناعية, التعلم الآلي, تسمية بيانات, قناع التجزئة, نموذج الكشف, نموذج الكشف YOLO, البيبتكس, Meta AI.* diff --git a/docs/ar/models/yolo-nas.md b/docs/ar/models/yolo-nas.md new file mode 100644 index 0000000..29ea0cb --- /dev/null +++ b/docs/ar/models/yolo-nas.md @@ -0,0 +1,121 @@ +--- +comments: true +description: استكشف التوثيق المفصل لـ YOLO-NAS ، وهو نموذج كشف الكائنات المتطور. تعلم المزيد عن ميزاته والطرز المدربة مسبقًا واستخدامه مع واجهة برمجة Ultralytics Python وأكثر من ذلك. +keywords: YOLO-NAS, Deci AI, كشف الكائنات, deep learning, البحث في الهندسة العصبية, واجهة برمجة Ultralytics Python, نموذج YOLO, الطرز المدربة مسبقًا, كمّية, التحسين, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## نظرة عامة + +تم تطوير YOLO-NAS بواسطة ديسي ايه اي ، وهو نموذج استشعار الكائنات الطائرة للأمام الذي يقدم تطورًا مبتكرًا. إنه منتج تكنولوجيا بحث الهندسة العصبية المتقدمة ، المصممة بعناية لمعالجة القيود التي كانت تعاني منها النماذج السابقة YOLO. مع تحسينات كبيرة في دعم التمثيل الكموني وتنازلات الدقة والتأخير ، يمثل YOLO-NAS قفزة كبيرة في كشف الكائنات. + +![نموذج صورة مثال](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**نظرة عامة على YOLO-NAS.** يستخدم YOLO-NAS كتلًا تفاعلية للتمثيل الكموني وتمثيل كمي للحصول على أداء مثلى. يواجه النموذج ، عند تحويله إلى الإصدار المكون من 8 بت ، انخفاضًا طفيفًا في الدقة ، وهو تحسين كبير على النماذج الأخرى. تتوج هذه التطورات بتصميم متفوق ذي قدرات استشعار للكائنات لا مثيل لها وأداء متميز. + +### المزايا الرئيسية + +- **كتلة أساسية ودية للتمثيل الكموني:** يقدم YOLO-NAS كتلة أساسية جديدة ودية للتمثيل الكموني ، مما يعالج أحد القيود الرئيسية للنماذج السابقة YOLO. +- **تدريب متطور وتمثيل كمي:** يستخدم YOLO-NAS نظم تدريب متقدمة وتمثيلًا للكم بعد التدريب لتعزيز الأداء. +- **تحسين AutoNAC والتدريب المسبق:** يستخدم YOLO-NAS تحسين AutoNAC ويتم تدريبه مسبقًا على مجموعات بيانات بارزة مثل COCO و Objects365 و Roboflow 100. يجعل هذا التدريب المسبق مناسبًا لمهام استشعار الكائنات الفرعية في بيئات الإنتاج. + +## الطرز المدربة مسبقًا + +استمتع بقوة كشف الكائنات من الجيل القادم مع الطرز المدربة مسبقًا لـ YOLO-NAS التي يوفرها Ultralytics. تم تصميم هذه الطرز لتقديم أداء متفوق من حيث السرعة والدقة. اختر من بين مجموعة متنوعة من الخيارات المصممة وفقًا لاحتياجاتك الخاصة: + +| الطراز | مؤشر التقدير المتوسط (mAP) | تأخر الوقت (ms) | +|------------------|----------------------------|-----------------| +| YOLO-NAS S | 47.5 | 3.21 | +| YOLO-NAS M | 51.55 | 5.85 | +| YOLO-NAS L | 52.22 | 7.87 | +| YOLO-NAS S INT-8 | 47.03 | 2.36 | +| YOLO-NAS M INT-8 | 51.0 | 3.78 | +| YOLO-NAS L INT-8 | 52.1 | 4.78 | + +تم تصميم كل نسخة من النموذج لتقديم توازن بين متوسط الدقة (mAP) وتأخير الوقت ، مما يساعدك في تحسين مهام كشف الكائنات الخاصة بك من حيث الأداء والسرعة. + +## أمثلة الاستخدام + +قام Ultralytics بجعل طرز YOLO-NAS سهلة الدمج في تطبيقات Python الخاصة بك عبر حزمة `ultralytics` الخاصة بنا. توفر الحزمة واجهة برمجة التطبيقات بسيطة الاستخدام لتسهيل العملية. + +توضح الأمثلة التالية كيفية استخدام طرز YOLO-NAS مع حزمة `ultralytics` للكشف والتحقق: + +### أمثلة الكشف والتحقق + +في هذا المثال ، نقوم بالتحقق من صحة YOLO-NAS-s على مجموعة بيانات COCO8. + +!!! Example "مثال" + + يوفر هذا المثال رمز بسيط لعملية الكشف والتحقق لـ YOLO-NAS. لمعالجة نتائج الاستدلال ، انظر وضع [توقع](../modes/predict.md). لاستخدام YOLO-NAS مع وضعيات إضافية ، انظر [توصيف](../modes/val.md) و[تصدير](../modes/export.md). لا يدعم نظام YOLO-NAS على حزمة `ultralytics` عملية التدريب. + + === "Python" + + يمكن تمرير نماذج PyTorch المدربة مسبقًا `*.pt` إلى فئة `NAS()` لإنشاء نموذج في Python: + + ```python + from ultralytics import NAS + + # تحميل نموذج YOLO-NAS-s المدرب مسبقًا على COCO + model = NAS('yolo_nas_s.pt') + + # عرض معلومات النموذج (اختياري) + model.info() + + # التحقق من صحة النموذج على مجموعة بيانات مثال COCO8 + results = model.val(data='coco8.yaml') + + # تشغيل استدلال باستخدام نموذج YOLO-NAS-s على صورة 'bus.jpg' + results = model('path/to/bus.jpg')) + ``` + + === "CLI" + + تتوفر أوامر CLI لتشغيل النماذج مباشرة: + + ```bash + # تحميل نموذج YOLO-NAS-s المدرب مسبقًا على COCO والتحقق من أدائه على مجموعة بيانات مثال COCO8 + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # تحميل نموذج YOLO-NAS-s المدرب مسبقًا على COCO والتنبؤ بالاستدلال على صورة 'bus.jpg' + yolo predict model=yolo_nas_s.pt source=path/to/bus.jpg + ``` + +## المهام والأوضاع المدعومة + +نحن نقدم ثلاثة أنواع من نماذج YOLO-NAS: الصغير (s) ، المتوسط (m) ، والكبير (l). يتم تصميم كل نسخة لتلبية احتياجات الحوسبة والأداء المختلفة: + +- **YOLO-NAS-s**: محسنة للبيئات التي تكون فيها الموارد الحسابية محدودة والكفاءة هي الأهم. +- **YOLO-NAS-m**: يقدم نهجًا متوازنًا ، مناسبًا لكشف الكائنات العامة بدقة أعلى. +- **YOLO-NAS-l**: مصممة للسيناريوهات التي تتطلب أعلى درجة من الدقة ، حيث الموارد الحسابية أقل قيدًا. + +أدناه نظرة عامة مفصلة عن كل نموذج ، بما في ذلك روابط أوزانهم المدربين مسبقًا ، والمهام التي يدعمونها ، وتوافقهم مع وضعيات التشغيل المختلفة. + +| نوع النموذج | أوزان مدربة مسبقًا | المهام المدعومة | الاستدلال | التحقق | التدريب | التصدير | +|-------------|-----------------------------------------------------------------------------------------------|------------------------------------|-----------|--------|---------|---------| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [كشف الكائنات](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [كشف الكائنات](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [كشف الكائنات](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## الاقتباسات والشكر + +إذا استخدمت YOLO-NAS في أعمالك البحثية أو التطويرية ، يرجى الاستشهاد بمشروع SuperGradients: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +نعبر عن امتناننا لفريق [SuperGradients](https://github.com/Deci-AI/super-gradients/) في Deci AI لجهودهم في إنشاء وصيانة هذة الموارد القيمة لمجتمع رؤية الحاسوب. نعتقد أن YOLO-NAS ، بتصميمه المبتكر وقدرته الاستشعار المتفوقة للكائنات ، سيصبح أداة حاسمة للمطورين والباحثين على حد سواء. + +*keywords: YOLO-NAS, Deci AI, كشف الكائنات, deep learning, البحث في الهندسة العصبية, واجهة برمجة Ultralytics Python, نموذج YOLO, SuperGradients, الطرز المدربة مسبقًا, كتلة أساسية ودية للتمثيل الكموني, أنظمة تدريب متطورة, تمثيل كمي بعد التدريب, تحسين AutoNAC, COCO, Objects365, Roboflow 100* diff --git a/docs/ar/models/yolov3.md b/docs/ar/models/yolov3.md new file mode 100644 index 0000000..d34e915 --- /dev/null +++ b/docs/ar/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: احصل على نظرة عامة حول YOLOv3 و YOLOv3-Ultralytics و YOLOv3u. تعرف على ميزاتها الرئيسية واستخدامها والمهام المدعومة للكشف عن الكائنات. +keywords: YOLOv3، YOLOv3-Ultralytics، YOLOv3u، الكشف عن الكائنات، إجراء، التدريب، Ultralytics +--- + +# YOLOv3 و YOLOv3-Ultralytics و YOLOv3u + +## النظرة العامة + +يقدم هذا الوثيقة نظرة عامة على ثلاث نماذج مرتبطة بكشف الكائنات ، وهي [YOLOv3](https://pjreddie.com/darknet/yolo/) و [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3) و [YOLOv3u](https://github.com/ultralytics/ultralytics). + +1. **YOLOv3:** هذه هي الإصدار الثالث من خوارزمية You Only Look Once (YOLO) للكشف عن الكائنات. قام جوزيف ريدمون بتطويرها بالأصل ، وقد قامت YOLOv3 بتحسين سابقيها من خلال إدخال ميزات مثل التنبؤات متعددة المقياس وثلاثة أحجام مختلفة من نوى الكشف. + +2. **YOLOv3-Ultralytics:** هذه هي تنفيذ Ultralytics لنموذج YOLOv3. يقوم بإعادة إنتاج بنية YOLOv3 الأصلية ويقدم وظائف إضافية ، مثل دعم المزيد من النماذج المدربة مسبقًا وخيارات تخصيص أسهل. + +3. **YOLOv3u:** هذا هو الإصدار المُحدّث لـ YOLOv3-Ultralytics الذي يدمج رأس الكشف بدون مشتركات وبدون مستخدم الكائن الذي يستخدم في نماذج YOLOv8. يحتفظ YOLOv3u بنفس بنية العمود الفقري والعنق مثل YOLOv3 ولكن برأس الكشف المُحدث من YOLOv8. + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## الميزات الرئيسية + +- **YOLOv3:** قدم استخدام ثلاث مقياسات مختلفة للكشف ، باستخدام ثلاثة أحجام مختلفة من نوى الكشف: 13x13 ، 26x26 و 52x52. هذا يحسن بشكل كبير دقة الكشف للكائنات ذات الأحجام المختلفة. بالإضافة إلى ذلك ، أضاف YOLOv3 ميزات مثل التنبؤات متعددة العلامات لكل مربع محاذاة وشبكة استخراج سمات أفضل. + +- **YOLOv3-Ultralytics:** توفر تنفيذ Ultralytics لـ YOLOv3 نفس الأداء مثل النموذج الأصلي ولكنه يأتي مع دعم إضافي للمزيد من النماذج المدربة مسبقًا وطرق تدريب إضافية وخيارات أسهل للتخصيص. هذا يجعلها أكثر مرونة وسهولة استخداماً للتطبيقات العملية. + +- **YOLOv3u:** يدمج هذا النموذج المُحدّث رأس الكشف بدون مشتركات وبدون مستخدم الكائن من YOLOv8. من خلال إزالة الحاجة إلى صناديق المرجع المحددة مسبقًا ودرجات تكون الكائن ، يمكن أن يحسن تصميم رأس الكشف هذا قدرة النموذج على كشف الكائنات ذات الأحجام والأشكال المتنوعة. هذا يجعل YOLOv3u أكثر مرونة ودقة لمهام كشف الكائنات. + +## المهام المدعومة والأوضاع + +تم تصميم سلسلة YOLOv3 ، بما في ذلك YOLOv3 و YOLOv3-Ultralytics و YOLOv3u ، خصيصًا لمهام الكشف عن الكائنات. يشتهر هذه النماذج بفعاليتها في سيناريوهات العالم الحقيقي المختلفة ، مع توازن دقة الكشف والسرعة. يوفر كل طراز ميزات وتحسينات فريدة ، مما يجعلها مناسبة لمجموعة متنوعة من التطبيقات. + +يدعم النماذج الثلاثة وضعًا شاملاً من الأوضاع ، مما يضمن مرونة في مراحل مختلفة من نموذج النشر والتطوير. هذه الأوضاع تشمل [التمييز](../modes/predict.md) ، [التحقق](../modes/val.md) ، [التدريب](../modes/train.md) و [التصدير](../modes/export.md) ، مما يوفر للمستخدمين مجموعة كاملة من أدوات فعالة للكشف عن الكائنات. + +| نوع النموذج | المهام المدعومة | التمييز | التحقق | التدريب | التصدير | +|--------------------|------------------------------------|---------|--------|---------|---------| +| YOLOv3 | [كشف الكائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [كشف الكائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [كشف الكائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +توفر هذه الجدولة نظرة فورية على إمكانات كل نسخة من YOLOv3 ، مما يسلط الضوء على مرونتها وملاءمتها لمختلف المهام وأوضاع العمل العملية في سير العمل لكشف الكائنات. + +## أمثلة الاستخدام + +يقدم هذا المثال أمثلة بسيطة للتدريب والتنبؤ باستخدام YOLOv3. للحصول على وثائق كاملة حول هذه وغيرها من [الأوضاع](../modes/index.md) انظر صفحات الوثائق: [التنبؤ](../modes/predict.md) ، (../modes/train.md) [Val](../modes/val.md) و [التصدير](../modes/export.md) docs. + +!!! Example "مثال" + + === "بيثون" + + يمكن تمرير نماذج PyTorch المدربة مسبقًا `*.pt` وملفات التكوين `*.yaml` إلى فئة `YOLO()` لإنشاء نموذج في Python: + + ```python + من ultralytics استيراد YOLO + + # تحميل نموذج YOLOv3n المدرب مسبقًا على COCO + model = YOLO('yolov3n.pt') + + # عرض معلومات النموذج (اختياري) + model.info() + + # تدريب النموذج على مجموعة البيانات المثالية Coco8 لمدة 100 دورة تدريب + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # قم بتشغيل التنبؤ باستخدام نموذج YOLOv3n على صورة 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + تتوفر أوامر CLI لتشغيل النماذج مباشرة: + + ```bash + # تحميل نموذج YOLOv3n المدرب مسبقًا على COCO وقم بتدريبه على مجموعة البيانات المثالية Coco8 لمدة 100 دورة تدريب + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # تحميل نموذج YOLOv3n المدرب مسبقًا على COCO وتشغيل التنبؤ على صورة 'bus.jpg' + yolo predict model=yolov3n.pt source=path/to/bus.jpg + ``` + +## الاقتباسات والشكر + +إذا قمت باستخدام YOLOv3 في بحثك ، فيرجى الاقتباس لأوراق YOLO الأصلية ومستودع Ultralytics YOLOv3: + +!!! Quote "" + + === "بيب تيكس" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +شكراً لجوزيف ريدمون وعلي فرهادي على تطوير YOLOv3 الأصلي. diff --git a/docs/ar/models/yolov4.md b/docs/ar/models/yolov4.md new file mode 100644 index 0000000..cab8cac --- /dev/null +++ b/docs/ar/models/yolov4.md @@ -0,0 +1,72 @@ +--- +comments: true +description: استكشف دليلنا التفصيلي على YOLOv4 ، وهو جهاز كشف الكائنات الحديثة في الوقت الحقيقي. فهم أبرز معالم التصميم المعماري الخاصة به ، والميزات المبتكرة ، وأمثلة التطبيق. +keywords: ultralytics ، yolo v4 ، كشف الكائنات ، شبكة عصبية ، كشف في الوقت الحقيقي ، كاشف الكائنات ، تعلم الآلة + +--- + +# YOLOv4: الكشف العالي السرعة والدقة للكائنات + +أهلاً بك في صفحة وثائق Ultralytics لـ YOLOv4 ، جهاز كشف الكائنات الحديث في الوقت الحقيقي الذي تم إطلاقه في عام 2020 من قبل Alexey Bochkovskiy على [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). تم تصميم YOLOv4 لتوفير التوازن المثالي بين السرعة والدقة ، مما يجعله خيارًا ممتازًا للعديد من التطبيقات. + +![رسم توضيحي لهندسة YOLOv4](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**رسم توضيحي لهندسة YOLOv4**. يعرض التصميم المعماري المعقد لشبكة YOLOv4 ، بما في ذلك المكونات الرئيسية والرقبة والرأس ، والطبقات المترابطة للكشف الفعال في الوقت الحقيقي. + +## مقدمة + +تعني YOLOv4 "فقط تنظر مرة واحدة النسخة 4". هو نموذج كشف الكائنات الحقيقي الزمني الذي تم تطويره لمعالجة قيود الإصدارات السابقة لـ YOLO مثل [YOLOv3](yolov3.md) ونماذج كشف الكائنات الأخرى. على عكس كاشفات الكائنات الأخرى القائمة على الشبكات العصبية المتزاحمة المستخدمة للكشف عن الكائنات ، يمكن تطبيق YOLOv4 لأنظمة الوصية النصحية وكذلك لإدارة العملية المستقلة وتقليل الإدخالات البشرية. يتيح تشغيله على وحدات معالجة الرسومات القياسية (GPUs) الاستخدام الشامل بتكلفة معقولة ، وتم تصميمه للعمل في الوقت الفعلي على وحدة معالجة الرسومات التقليدية مع الحاجة إلى وحدة واحدة فقط من هذا النوع للتدريب. + +## الهندسة + +تستغل YOLOv4 العديد من الميزات المبتكرة التي تعمل معًا لتحسين أدائها. تشمل هذه الميزات الاتصالات المتبقية المرجحة (WRC) ، والاتصالات الجزئية عبر المرحلة المتقاطعة (CSP) ، والتطبيع المتقاطع المصغر لدُفع (CmBN) ، والتدريب المتنازع لنفسه (SAT) ، وتنشيط Mish ، وزيادة بيانات الزخم ، وتنظيم DropBlock ، وخسارة CIoU. يتم دمج هذه الميزات لتحقيق أحدث النتائج. + +يتألف كاشف الكائنات النموذجي من عدة أجزاء بما في ذلك المدخل والظهر والرقبة والرأس. يتم تدريب الظهرية لـ YOLOv4 سلفًا على ImageNet ويستخدم لتوقع فئات ومربعات محيطة للكائنات. يمكن أن يكون الظهرية من عدة نماذج بما في ذلك VGG و ResNet و ResNeXt أو DenseNet. يتم استخدام جزء الرقبة من الكاشف لجمع خرائط الميزات من مراحل مختلفة وعادة ما يتضمن عدة مسارات لأسفل وعدة مسارات للأعلى. جزء الرأس هو ما يستخدم لإجراء اكتشاف الكائنات والتصنيف النهائي. + +## الحقيبة المجانية + +يستخدم YOLOv4 أيضًا طرقًا تعرف باسم "حقيبة المجانيات" وهي تقنيات تحسِّن دقة النموذج أثناء التدريب دون زيادة تكلفة الاستنتاج. تعد التعديلات في البيانات تقنية شائعة في كشف الكائنات ، والتي تزيد من تنوع صور الإدخال لتحسين قوة الموديل. بعض أمثلة التعديل في البيانات تشمل التشويهات البصرية (ضبط السطوع والتباين والدرجة والتشبع والضوضاء في الصورة) والتشويهات الهندسية (إضافة توزيع عشوائي للتغيير المقياسي والاقتصاص والانعكاس والتدوير). تساعد هذه التقنيات الموديل في التعميم على أنواع مختلفة من الصور. + +## الميزات والأداء + +تم تصميم YOLOv4 لتحقيق سرعة ودقة مثلى في كشف الكائنات. يتضمن تصميم YOLOv4 CSPDarknet53 كظهر ، PANet كرقبة ، و YOLOv3 كرأس كشف. يسمح هذا التصميم لـ YOLOv4 بأداء كشف الكائنات بسرعة مذهلة ، مما يجعله مناسبًا لتطبيقات الوقت الحقيقي. يتفوق YOLOv4 أيضًا في الدقة ، ويحقق نتائج عالية في مقاييس كشف الكائنات. + +## أمثلة الاستخدام + +في وقت كتابة هذا النص ، لا يدعم Ultralytics حاليًا نماذج YOLOv4. لذلك ، سيحتاج أي مستخدمين مهتمين باستخدام YOLOv4 إلى الرجوع مباشرة إلى مستودع YOLOv4 على GitHub للحصول على تعليمات التثبيت والاستخدام. + +إليك نظرة عامة موجزة على الخطوات النموذجية التي يمكن أن تتخذها لاستخدام YOLOv4: + +1. قم بزيارة مستودع YOLOv4 على GitHub: [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). + +2. اتبع التعليمات المقدمة في ملف README لعملية التثبيت. ينطوي هذا عادة على استنساخ المستودع ، وتثبيت التبعيات اللازمة ، وإعداد أي متغيرات بيئة ضرورية. + +3. بمجرد الانتهاء من التثبيت ، يمكنك تدريب واستخدام النموذج وفقًا لتعليمات الاستخدام المقدمة في المستودع. يتضمن ذلك عادة إعداد مجموعة البيانات الخاصة بك ، وتكوين معاملات النموذج ، وتدريب النموذج ، ثم استخدام النموذج المدرب لأداء اكتشاف الكائنات. + +يرجى ملاحظة أن الخطوات النموذجية قد تختلف اعتمادًا على حالة الاستخدام الخاصة بك وحالة مستودع YOLOv4 الحالي. لذلك ، يُنصح بشدة بالرجوع مباشرة إلى التعليمات المقدمة في مستودع YOLOv4 على GitHub. + +نؤسف على أي إزعاج ، وسنسعى لتحديث هذا المستند بأمثلة استخدام لـ Ultralytics بمجرد تنفيذ الدعم لـ YOLOv4. + +## الاستنتاج + +YOLOv4 هو نموذج قوي وفعال لكشف الكائنات يجمع بين السرعة والدقة. يستخدم الميزات الفريدة وتقنيات الزخم في التدريب للأداء بشكل ممتاز في مهام اكتشاف الكائنات في الوقت الفعلي. يمكن لأي شخص يمتلك وحدة معالجة رسومية تقليدية تدريب YOLOv4 واستخدامها ، مما يجعلها سهلة الوصول وعملية لمجموعة واسعة من التطبيقات. + +## التنويه والتقديرات + +نود أن نعترف بمساهمة أصحاب YOLOv4 في مجال كشف الكائنات الحقيقية الزمنية: + +!!! Quote "" + + === "بيب تكس" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +يمكن العثور على ورقة YOLOv4 الأصلية على [arXiv](https://arxiv.org/pdf/2004.10934.pdf). قام المؤلفون بتوفير عملهم بشكل عام ، ويمكن الوصول إلى قاعدة الشفرات على [GitHub](https://github.com/AlexeyAB/darknet). نقدر جهودهم في تعزيز الميدان وتوفير عملهم للمجتمع العريض. diff --git a/docs/ar/models/yolov5.md b/docs/ar/models/yolov5.md new file mode 100644 index 0000000..32481a3 --- /dev/null +++ b/docs/ar/models/yolov5.md @@ -0,0 +1,107 @@ +--- +comments: true +description: اكتشف YOLOv5u، وهو إصدار معزز لنموذج YOLOv5 يوفر توازنًا محسنًا بين الدقة والسرعة والعديد من النماذج المدربة مسبقًا لمهام كشف الكائنات المختلفة. +keywords: YOLOv5u، كشف الكائنات، النماذج المدربة مسبقًا، Ultralytics، التشخيص، التحقق، YOLOv5، YOLOv8، بدون قاعدة تثبيت العقدة الرئيسية، بدون قيمة الكائن، التطبيقات الفعلية، تعلم الآلة +--- + +# YOLOv5 + +## نظرة عامة + +يمثل YOLOv5u تقدمًا في منهجيات كشف الكائنات. يندرج YOLOv5u تحت البنية المعمارية الأساسية لنموذج [YOLOv5](https://github.com/ultralytics/yolov5) الذي طورته شركة Ultralytics، و يدمج نموذج YOLOv5u ميزة القسمة على جزئين للكائنات المستقلة عن القاعدة التي تم تقديمها في نماذج [YOLOv8](yolov8.md). تحسين هذا النمط يحسن نمط النموذج، مما يؤدي إلى تحسين التوازن بين الدقة والسرعة في مهام كشف الكائنات. بناءً على النتائج التجريبية والمزايا المشتقة منها، يقدم YOLOv5u بديلاً فعالًا لأولئك الذين يسعون لإيجاد حلول قوية في الأبحاث والتطبيقات العملية. + +![Ultralytics YOLOv5](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## المزايا الرئيسية + +- **رأس Ultralytics للقسمة بدون قاعدة تثبيت العقدة:** يعتمد نماذج كشف الكائنات التقليدية على صناديق قاعدة محددة مسبقًا لتوقع مواقع الكائنات. ومع ذلك، يحدث تحديث في نهج YOLOv5u هذا. من خلال اعتماد رأس Ultralytics المُقسم بدون قاعدة تثبيت العقدة، يضمن هذا النمط آلية كشف أكثر مرونة واندفاعًا، مما يعزز الأداء في سيناريوهات متنوعة. + +- **توازن محسن بين الدقة والسرعة:** تتصارع السرعة والدقة في العديد من الأحيان. ولكن YOLOv5u يتحدى هذا التوازن. يقدم توازنًا معايرًا، ويضمن كشفًا في الوقت الفعلي دون المساومة على الدقة. تعد هذه الميزة ذات قيمة خاصة للتطبيقات التي تتطلب استجابة سريعة، مثل المركبات المستقلة والروبوتات وتحليل الفيديو في الوقت الفعلي. + +- **مجموعة متنوعة من النماذج المدربة مسبقًا:** على فهم الأمور التي تحتاج إلى مجموعات أدوات مختلفة YOLOv5u يوفر العديد من النماذج المدربة مسبقًا. سواء كنت تركز على التشخيص أو التحقق أو التدريب، هناك نموذج مصمم خصيصًا ينتظرك. يضمن هذا التنوع أنك لا تستخدم حلاً من نوع واحد يناسب الجميع، ولكن نموذج موازن حسب حاجتك الفريدة. + +## المهام والأوضاع المدعومة + +تتفوق نماذج YOLOv5u، مع مجموعة متنوعة من الأوزان المدربة مسبقًا، في مهام [كشف الكائنات](../tasks/detect.md). تدعم هذه النماذج مجموعة شاملة من الأوضاع، مما يجعلها مناسبة لتطبيقات متنوعة، من التطوير إلى التنفيذ. + +| نوع النموذج | الأوزان المدربة مسبقًا | المهمة | التشخيص | التحقق | التدريب | التصدير | +|-------------|-----------------------------------------------------------------------------------------------------------------------------|------------------------------------|---------|--------|---------|---------| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [كشف الكائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +يوفر هذا الجدول نظرة عامة مفصلة عن البدائل من نماذج نموذج YOLOv5u، ويسلط الضوء على تطبيقاتها في مهام كشف الكائنات ودعمها لأوضاع تشغيل متنوعة مثل [التشخيص](../modes/predict.md)، [التحقق](../modes/val.md)، [التدريب](../modes/train.md)، و[التصدير](../modes/export.md). يضمن هذا الدعم الشامل أن يمكن للمستخدمين استغلال قدرات نماذج YOLOv5u بشكل كامل في مجموعة واسعة من سيناريوهات كشف الكائنات. + +## الأداء + +!!! الأداء + + === "كشف" + + راجع [وثائق الكشف](https://docs.ultralytics.com/tasks/detect/) للحصول على أمثلة استخدام مع هذه النماذج المدربة على [COCO](https://docs.ultralytics.com/datasets/detect/coco/)، التي تشمل 80 فئة مدربة مسبقًا. + + | النموذج | يامل | حجم
(بكسل) | mAPval
50-95 | سرعة
معالج الجهاز ONNX
(مللي ثانية) | سرعة
حويصلة A100 TensorRT
(مللي ثانية) | المعلمات
(مليون) | FLOPs
(بليون) | + |---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 42.1 | 211.0 | 1.83 | 4.3 | 7.8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 48.6 | 422.6 | 2.34 | 15.3 | 24.6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 53.6 | 810.9 | 4.36 | 41.2 | 65.7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 55.7 | 1470.9 | 5.47 | 86.1 | 137.4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 56.8 | 2436.5 | 8.98 | 155.4 | 250.7 | + +## أمثلة للاستخدام + +يقدم هذا المثال أمثلة بسيطة للغاية للتدريب والتشخيص باستخدام YOLOv5. يُمكن إنشاء نموذج مثيل في البرمجة باستخدام نماذج PyTorch المدربة مسبقًا في صيغة `*.pt` وملفات التكوين `*.yaml`: + +```python +from ultralytics import YOLO + +#قم بتحميل نموذج YOLOv5n المدرب مسبقًا على مجموعة بيانات COCO +model = YOLO('yolov5n.pt') + +# قم بعرض معلومات النموذج (اختياري) +model.info() + +# قم بتدريب النموذج على مجموعة البيانات COCO8 لمدة 100 دورة +results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + +# قم بتشغيل التشخيص بنموذج YOLOv5n على صورة 'bus.jpg' +results = model('path/to/bus.jpg') +``` + +=== "سطر الأوامر" + + يتاح سطر الأوامر لتشغيل النماذج مباشرة: + + ```bash + # قم بتحميل نموذج YOLOv5n المدرب مسبقًا على مجموعة بيانات COCO8 وقم بتدريبه لمدة 100 دورة + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # قم بتحميل نموذج YOLOv5n المدرب مسبقًا على مجموعة بيانات COCO8 وتشغيل حالة التشخيص على صورة 'bus.jpg' + yolo predict model=yolov5n.pt source=path/to/bus.jpg + ``` + +## الاستشهادات والتقدير + +إذا قمت باستخدام YOLOv5 أو YOLOv5u في بحثك، يرجى استشهاد نموذج Ultralytics YOLOv5 بطريقة الاقتباس التالية: + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +يرجى ملاحظة أن نماذج YOLOv5 متاحة بترخيص [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) و[Enterprise](https://ultralytics.com/license). diff --git a/docs/ar/models/yolov6.md b/docs/ar/models/yolov6.md new file mode 100644 index 0000000..12dd557 --- /dev/null +++ b/docs/ar/models/yolov6.md @@ -0,0 +1,107 @@ +--- +comments: true +description: استكشف نموذج Meituan YOLOv6 للكشف عن الكائنات الحديثة، والذي يوفر توازنًا مذهلاً بين السرعة والدقة، مما يجعله الخيار الأمثل لتطبيقات الوقت الحقيقي. تعرّف على الميزات والنماذج المُدربة مسبقًا واستخدام Python. +keywords: Meituan YOLOv6، الكشف عن الكائنات، Ultralytics، YOLOv6 docs، Bi-directional Concatenation، تدريب بمساعدة العناصر، النماذج المدربة مسبقا، تطبيقات الوقت الحقيقي +--- + +# Meituan YOLOv6 + +## نظرة عامة + +[Meituan](https://about.meituan.com/) YOLOv6 هو منظّف الكائنات الحديثة الحديثة الذي يُقدم توازنًا ملحوظًا بين السرعة والدقة، مما يجعله خيارًا شائعًا لتطبيقات الوقت الحقيقي. يُقدم هذا النموذج العديد من التحسينات الملحوظة في بنيته ونظام التدريب، بما في ذلك تطبيق وحدة Bi-directional Concatenation (BiC)، واستراتيجية AAT (anchor-aided training) التي تعتمد على العناصر، وتصميم محسّن للأساس والرقبة لتحقيق أداء على مجموعة بيانات COCO يفوق جميع النماذج الأخرى. + +![Meituan YOLOv6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![Model example image](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**نظرة عامة على YOLOv6.** مخطط بنية النموذج يوضح المكونات المعاد تصميمها واستراتيجيات التدريب التي أدت إلى تحسينات أداء كبيرة. (أ) الرقبة الخاصة بـ YOLOv6 (N و S معروضان). لاحظ أنه بالنسبة لم/n، يتم استبدال RepBlocks بـ CSPStackRep. (ب) هيكل وحدة BiC. (ج) مكون SimCSPSPPF. ([المصدر](https://arxiv.org/pdf/2301.05586.pdf)). + +### ميزات رئيسية + +- **وحدة Bi-directional Concatenation (BiC):** يقدم YOLOv6 وحدة BiC في الرقبة التابعة للكاشف، مما يعزز إشارات التحديد المحلية ويؤدي إلى زيادة الأداء دون التأثير على السرعة. +- **استراتيجية التدريب بمساعدة العناصر (AAT):** يقدم هذا النموذج استراتيجية AAT للاستفادة من فوائد النماذج المستندة إلى العناصر وغير المستندة إليها دون التضحية في كفاءة الاستدلال. +- **تصميم أساس ورقبة محسّن:** من خلال تعميق YOLOv6 لتشمل مرحلة أخرى في الأساس والرقبة، يحقق هذا النموذج أداءً يفوق جميع النماذج الأخرى على مجموعة بيانات COCO لإدخال عالي الدقة. +- **استراتيجية الاستنباط الذاتي:** يتم تنفيذ استراتيجية استنتاج ذاتي جديدة لتعزيز أداء النماذج الصغيرة من YOLOv6، وذلك عن طريق تعزيز فرع الانحدار المساعد خلال التدريب وإزالته في الاستنتاج لتجنب انخفاض السرعة الواضح. + +## معايير الأداء + +يوفر YOLOv6 مجموعة متنوعة من النماذج المدرّبة مسبقًا بمقاييس مختلفة: + +- YOLOv6-N: ٣٧.٥٪ AP في COCO val2017 عندما يتم استخدام بطاقة NVIDIA Tesla T4 GPU وسرعة ١١٨٧ إطار في الثانية. +- YOLOv6-S: ٤٥.٠٪ AP وسرعة ٤٨٤ إطار في الثانية. +- YOLOv6-M: ٥٠.٠٪ AP وسرعة ٢٢٦ إطار في الثانية. +- YOLOv6-L: ٥٢.٨٪ AP وسرعة ١١٦ إطار في الثانية. +- YOLOv6-L6: دقة حديثة في الزمن الحقيقي. + +كما يوفر YOLOv6 نماذج مؤنقة (quantized models) بدقات مختلفة ونماذج محسنة للمنصات المحمولة. + +## أمثلة عن الاستخدام + +يقدم هذا المثال أمثلة بسيطة لتدريب YOLOv6 واستنتاجه. للحصول على وثائق كاملة حول هذه وأوضاع أخرى [انظر](../modes/index.md) الى الصفحات التوضيحية لتوسعة الوثائق الفائقة ، [توقع](../modes/predict.md) ، [تدريب](../modes/train.md) ، [التحقق](../modes/val.md) و [التصدير](../modes/export.md). + +!!! Example "مثال" + + === "Python" + + يمكن تمرير النماذج المدرّبة مسبقًا بتنسيق `*.pt` في PyTorch وملفات التكوين `*.yaml` لفئة `YOLO()` لإنشاء نموذج في Python: + + ```python + from ultralytics import YOLO + + # إنشاء نموذج YOLOv6n من البداية + model = YOLO('yolov6n.yaml') + + # عرض معلومات النموذج (اختياري) + model.info() + + # تدريب النموذج على مجموعة بيانات مثال COCO8 لمدة 100 دورة تدريب + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # تشغيل الاستنتاج بنموذج YOLOv6n على صورة 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + يمكن استخدام أوامر CLI لتشغيل النماذج مباشرةً: + + ```bash + # إنشاء نموذج YOLOv6n من البداية وتدريبه باستخدام مجموعة بيانات مثال COCO8 لمدة 100 دورة تدريب + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # إنشاء نموذج YOLOv6n من البداية وتشغيل الاستنتاج على صورة 'bus.jpg' + yolo predict model=yolov6n.yaml source=path/to/bus.jpg + ``` + +## المهام والأوضاع المدعومة + +تقدم سلسلة YOLOv6 مجموعة من النماذج، والتي تم تحسينها للكشف عن الكائنات عالي الأداء. تلبي هذه النماذج احتياجات الكمبيوتيشن المتنوعة ومتطلبات الدقة، مما يجعلها متعددة الاستخدامات في مجموعة واسعة من التطبيقات. + +| نوع النموذج | الأوزان المدربة مسبقًا | المهام المدعومة | الاستنتاج | التحقق | التدريب | التصدير | +|-------------|------------------------|-----------------------------------------|-----------|--------|---------|---------| +| YOLOv6-N | `yolov6-n.pt` | [الكشف عن الكائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [الكشف عن الكائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [الكشف عن الكائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [الكشف عن الكائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [الكشف عن الكائنات](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +توفر هذه الجدول نظرة عامة مفصلة على النماذج المختلفة لـ YOLOv6، مع تسليط الضوء على قدراتها في مهام الكشف عن الكائنات وتوافقها مع الأوضاع التشغيلية المختلفة مثل [الاستنتاج](../modes/predict.md) و [التحقق](../modes/val.md) و [التدريب](../modes/train.md) و [التصدير](../modes/export.md). هذا الدعم الشامل يضمن أن يمكن للمستخدمين الاستفادة الكاملة من قدرات نماذج YOLOv6 في مجموعة واسعة من سيناريوهات الكشف عن الكائنات. + +## الاقتباسات والتقديرات + +نحن نود أن نقدّم الشكر للمؤلفين على مساهماتهم الهامة في مجال كشف الكائنات في الوقت الحقيقي: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{li2023yolov6, + title={YOLOv6 v3.0: A Full-Scale Reloading}, + author={Chuyi Li and Lulu Li and Yifei Geng and Hongliang Jiang and Meng Cheng and Bo Zhang and Zaidan Ke and Xiaoming Xu and Xiangxiang Chu}, + year={2023}, + eprint={2301.05586}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +يمكن العثور على الورقة الأصلية لـ YOLOv6 على [arXiv](https://arxiv.org/abs/2301.05586). نشر المؤلفون عملهم بشكل عام، ويمكن الوصول إلى الشيفرة المصدرية على [GitHub](https://github.com/meituan/YOLOv6). نحن نقدّر جهودهم في تطوير هذا المجال وجعل عملهم متاحًا للمجتمع بأسره. diff --git a/docs/ar/models/yolov7.md b/docs/ar/models/yolov7.md new file mode 100644 index 0000000..1cdcc63 --- /dev/null +++ b/docs/ar/models/yolov7.md @@ -0,0 +1,66 @@ +--- +comments: true +description: استكشف YOLOv7 ، جهاز كشف الكائنات في الوقت الحقيقي. تعرف على سرعته الفائقة، ودقته المذهلة، وتركيزه الفريد على تحسين الأمتعة التدريبية تدريبياً. +keywords: YOLOv7، كاشف الكائنات في الوقت الحقيقي، الحالة الفنية، Ultralytics، مجموعة بيانات MS COCO، المعيار المعاد تعريفه للنموذج، التسمية الديناميكية، التحجيم الموسع، التحجيم المركب +--- + +# YOLOv7: حقيبة مجانية قابلة للتدريب + +YOLOv7 هو كاشف الكائنات في الوقت الحقيقي الحديث الحالي الذي يتفوق على جميع كاشفات الكائنات المعروفة من حيث السرعة والدقة في النطاق من 5 إطارات في الثانية إلى 160 إطارًا في الثانية. إنه يتمتع بأعلى دقة (٥٦.٨٪ AP) بين جميع كاشفات الكائنات الحالية في الوقت الحقيقي بسرعة ٣٠ إطارًا في الثانية أو أعلى على GPU V100. علاوة على ذلك, يتفوق YOLOv7 على كاشفات الكائنات الأخرى مثل YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5 والعديد من الآخرين من حيث السرعة والدقة. النموذج مدرب على مجموعة بيانات MS COCO من البداية دون استخدام أي مجموعات بيانات أخرى أو وزن مُعين مُسبقًا. رمز المصدر لـ YOLOv7 متاح على GitHub. + +![مقارنة YOLOv7 مع كاشفات الكائنات الأعلى الفنية](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) + +**مقارنة بين كاشفات الكائنات الأعلى الفنية.** من النتائج في الجدول 2 نتعرف على أن الطريقة المقترحة لديها أفضل توازن بين السرعة والدقة بشكل شامل. إذا قارنا بين YOLOv7-tiny-SiLU و YOLOv5-N (r6.1) ، يكون الطريقة الحالية أسرع بـ ١٢٧ إطارًا في الثانية وأكثر دقة بنسبة ١٠.٧٪ من حيث AP. بالإضافة إلى ذلك ، YOLOv7 لديها AP بنسبة ٥١.٤٪ في معدل إطار ١٦١ في الثانية ، في حين يكون لـ PPYOLOE-L نفس AP فقط بمعدل إطار ٧٨ في الثانية. من حيث استخدام العوامل ، يكون YOLOv7 أقل بنسبة ٤١٪ من العوامل مقارنةً بـ PPYOLOE-L. إذا قارنا YOLOv7-X بسرعة تواصل بيانات ١١٤ إطارًا في الثانية مع YOLOv5-L (r6.1) مع سرعة تحليل ٩٩ إطارًا في الثانية ، يمكن أن يحسن YOLOv7-X AP بمقدار ٣.٩٪. إذا قورن YOLOv7-X بــ YOLOv5-X (r6.1) بنفس الحجم ، فإن سرعة تواصل البيانات في YOLOv7-X تكون أسرع بـ ٣١ إطارًا في الثانية. بالإضافة إلى ذلك ، من حيث كمية المعاملات والحسابات ، يقلل YOLOv7-X بنسبة ٢٢٪ من المعاملات و٨٪ من الحساب مقارنةً بـ YOLOv5-X (r6.1) ، ولكنه يحسن AP بنسبة ٢.٢٪ ([المصدر](https://arxiv.org/pdf/2207.02696.pdf)). + +## النظرة العامة + +كاشف الكائنات في الوقت الحقيقي هو جزء مهم في العديد من أنظمة رؤية الحاسوب ، بما في ذلك التتبع متعدد الكائنات والقيادة التلقائية والروبوتات وتحليل صور الأعضاء. في السنوات الأخيرة ، تركز تطوير كاشفات الكائنات في الوقت الحقيقي على تصميم هياكل فعالة وتحسين سرعة التحليل لمعالجات الكمبيوتر المركزية ومعالجات الرسومات ووحدات معالجة الأعصاب (NPUs). يدعم YOLOv7 كلاً من GPU المحمول وأجهزة الـ GPU ، من الحواف إلى السحابة. + +على عكس كاشفات الكائنات في الوقت الحقيقي التقليدية التي تركز على تحسين الهياكل ، يُقدم YOLOv7 تركيزًا على تحسين عملية التدريب. يتضمن ذلك وحدات وطرق تحسين تُصمم لتحسين دقة كشف الكائنات دون زيادة تكلفة التحليل ، وهو مفهوم يُعرف بـ "الحقيبة القابلة للتدريب للمجانيات". + +## الميزات الرئيسية + +تُقدم YOLOv7 عدة ميزات رئيسية: + +1. **إعادة تعيين نموذج المعاملات**: يقترح YOLOv7 نموذج معاملات معين مخطط له ، وهو استراتيجية قابلة للتطبيق على الطبقات في شبكات مختلفة باستخدام مفهوم مسار انتشار التدرج. + +2. **التسمية الديناميكية**: تدريب النموذج مع عدة طبقات إخراج يبرز قضية جديدة: "كيفية تعيين أهداف ديناميكية لإخراج الفروع المختلفة؟" لحل هذه المشكلة ، يقدم YOLOv7 طريقة تسمية جديدة تسمى تسمية الهدف المرشدة من الخشن إلى الدقيقة. + +3. **التحجيم الموسع والمركب**: يقترح YOLOv7 طرق "التحجيم الموسع" و "التحجيم المركب" لكاشف الكائنات في الوقت الحقيقي التي يمكن أن تستخدم بشكل فعال في المعاملات والحسابات. + +4. **الكفاءة**: يمكن للطريقة المقترحة بواسطة YOLOv7 تقليل بشكل فعال حوالي 40٪ من المعاملات و 50٪ من الحساب لكاشف الكائنات في الوقت الحقيقي الأولى من حيث الدقة والسرعة في التحليل. + +## أمثلة على الاستخدام + +في وقت كتابة هذا النص ، لا تدعم Ultralytics حاليًا نماذج YOLOv7. لذلك ، سيحتاج أي مستخدمين مهتمين باستخدام YOLOv7 إلى الرجوع مباشرة إلى مستودع YOLOv7 على GitHub للحصول على تعليمات التثبيت والاستخدام. + +وفيما يلي نظرة عامة على الخطوات النموذجية التي يمكنك اتباعها لاستخدام YOLOv7: + +1. قم بزيارة مستودع YOLOv7 على GitHub: [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7). + +2. اتبع التعليمات الموجودة في ملف README لعملية التثبيت. يتضمن ذلك عادةً استنساخ المستودع ، وتثبيت التبعيات اللازمة ، وإعداد أي متغيرات بيئة ضرورية. + +3. بمجرد الانتهاء من عملية التثبيت ، يمكنك تدريب النموذج واستخدامه وفقًا لتعليمات الاستخدام الموجودة في المستودع. ينطوي ذلك عادةً على إعداد مجموعة البيانات الخاصة بك ، وتكوين معلمات النموذج ، وتدريب النموذج ، ثم استخدام النموذج المدرب لأداء كشف الكائنات. + +يرجى ملاحظة أن الخطوات المحددة قد تختلف اعتمادًا على حالة الاستخدام الخاصة بك والحالة الحالية لمستودع YOLOv7. لذا ، يُوصى بشدة بالرجوع مباشرة إلى التعليمات المقدمة في مستودع YOLOv7 على GitHub. + +نأسف على أي إزعاج قد يسببه ذلك وسنسعى لتحديث هذا المستند بأمثلة على الاستخدام لـ Ultralytics عندما يتم تنفيذ الدعم لـ YOLOv7. + +## الاقتباسات والشكر + +نود أن نشكر كتاب YOLOv7 على مساهماتهم الهامة في مجال اكتشاف الكائنات في الوقت الحقيقي: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +يمكن العثور على ورقة YOLOv7 الأصلية على [arXiv](https://arxiv.org/pdf/2207.02696.pdf). قدم الكتاب عملهم علنياً، ويمكن الوصول إلى قاعدة الشيفرة على [GitHub](https://github.com/WongKinYiu/yolov7). نحن نقدر جهودهم في تقدم المجال وتوفير عملهم للمجتمع بشكل عام. diff --git a/docs/ar/models/yolov8.md b/docs/ar/models/yolov8.md new file mode 100644 index 0000000..7b2082f --- /dev/null +++ b/docs/ar/models/yolov8.md @@ -0,0 +1,166 @@ +--- +comments: true +description: استكشف الميزات المثيرة لـ YOLOv8 ، أحدث إصدار من مكتشف الكائنات الحية الخاص بنا في الوقت الحقيقي! تعرّف على العمارات المتقدمة والنماذج المدرّبة مسبقًا والتوازن المثلى بين الدقة والسرعة التي تجعل YOLOv8 الخيار المثالي لمهام الكشف عن الكائنات الخاصة بك. +keywords: YOLOv8, Ultralytics, مكتشف الكائنات الحية الخاص بنا في الوقت الحقيقي, النماذج المدرّبة مسبقًا, وثائق, الكشف عن الكائنات, سلسلة YOLO, العمارات المتقدمة, الدقة, السرعة +--- + +# YOLOv8 + +## نظرة عامة + +YOLOv8 هو التطور الأخير في سلسلة YOLO لمكتشفات الكائنات الحية الخاصة بنا في الوقت الحقيقي ، والذي يقدم أداءً متقدمًا في مجال الدقة والسرعة. بناءً على التقدمات التي تم إحرازها في إصدارات YOLO السابقة ، يقدم YOLOv8 ميزات وتحسينات جديدة تجعله الخيار المثالي لمهام الكشف عن الكائنات في مجموعة واسعة من التطبيقات. + +![YOLOv8 المقدمة من Ultralytics](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## الميزات الرئيسية + +- **العمارات المتقدمة للظهر والعنق:** يعتمد YOLOv8 على عمارات الظهر والعنق على أحدث طراز ، مما يؤدي إلى تحسين استخراج الميزات وأداء الكشف عن الكائنات. +- **Ultralytics Head بدون إثبات خطافي:** يعتمد YOLOv8 على Ultralytics Head بدون إثبات خطافي ، مما يسهم في زيادة الدقة وتوفير وقت مكشف أكثر كفاءة مقارنةً بالطرق التي تعتمد على الإثبات. +- **توازن مثالي بين الدقة والسرعة محسَّن:** بتركيزه على الحفاظ على توازن مثالي بين الدقة والسرعة ، فإن YOLOv8 مناسب لمهام الكشف عن الكائنات في الوقت الحقيقي في مجموعة متنوعة من المجالات التطبيقية. +- **تشكيلة من النماذج المدرّبة مسبقًا:** يقدم YOLOv8 مجموعة من النماذج المدرّبة مسبقًا لتلبية متطلبات المهام المختلفة ومتطلبات الأداء ، مما يجعل من السهل إيجاد النموذج المناسب لحالتك الاستخدامية الخاصة. + +## المهام والأوضاع المدعومة + +تقدم سلسلة YOLOv8 مجموعة متنوعة من النماذج ، يتم تخصيص كلًا منها للمهام المحددة في رؤية الحاسوب. تم تصميم هذه النماذج لتلبية متطلبات مختلفة ، بدءًا من الكشف عن الكائنات إلى مهام أكثر تعقيدًا مثل تقسيم الصور إلى أجزاء واكتشاف نقاط المفاتيح والتصنيف. + +تمت تحسين كل نوع من سلسلة YOLOv8 للمهام التي تخصها ، مما يضمن أداء ودقة عاليين. بالإضافة إلى ذلك ، تتوافق هذه النماذج مع أوضاع تشغيل مختلفة بما في ذلك [الاستدلال](../modes/predict.md) ، [التحقق](../modes/val.md) ، [التدريب](../modes/train.md) و [التصدير](../modes/export.md) ، مما يسهل استخدامها في مراحل مختلفة من عملية التطوير والتنفيذ. + +| النموذج | أسماء الملف | المهمة | استدلال | التحقق | التدريب | التصدير | +|-------------|----------------------------------------------------------------------------------------------------------------|----------------------------------------------|---------|--------|---------|---------| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [الكشف](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [تقسيم الصور إلى أجزاء](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [المواقق/نقاط المفاتيح](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [التصنيف](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +توفر هذه الجدولة نظرة عامة على متغيرات نموذج YOLOv8 ، مما يسلط الضوء على قابليتها للتطبيق في مهام محددة وتوافقها مع أوضاع تشغيل مختلفة مثل الاستدلال والتحقق والتدريب والتصدير. يعرض مرونة وقوة سلسلة YOLOv8 ، مما يجعلها مناسبة لمجموعة متنوعة من التطبيقات في رؤية الحاسوب. + +## مقاييس الأداء + +!!! الأداء + + === "الكشف (COCO)" + + انظر إلى [وثائق الكشف](https://docs.ultralytics.com/tasks/detect/) لأمثلة عن الاستخدام مع هذه النماذج المدربة مسبقًا على [COCO](https://docs.ultralytics.com/datasets/detect/coco/) ، التي تضم 80 فئة مدربة مسبقًا. + + | النموذج | حجم
(بيكسل) | معدل الكشفالتحقق
50-95 | سرعة
CPU ONNX
(متوسط) | سرعة
A100 TensorRT
(متوسط) | معلمات
(مليون) | FLOPs
(مليون) | + | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + + === "الكشف (صور مفتوحة V7)" + + انظر إلى [وثائق الكشف](https://docs.ultralytics.com/tasks/detect/) لأمثلة عن الاستخدام مع هذه النماذج المدربة مسبقًا على [Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/)، والتي تضم 600 فئة مدربة مسبقًا. + + | النموذج | حجم
(بيكسل) | معدل الكشفالتحقق
50-95 | سرعة
CPU ONNX
(متوسط) | سرعة
A100 TensorRT
(متوسط) | معلمات
(مليون) | FLOPs
(مليون) | + | ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | + + === "تقسيم الصور إلى أجزاء (COCO)" + + انظر إلى [وثائق التقسيم](https://docs.ultralytics.com/tasks/segment/) لأمثلة عن الاستخدام مع هذه النماذج المدرّبة مسبقًا على [COCO](https://docs.ultralytics.com/datasets/segment/coco/)، والتي تضم 80 فئة مدربة مسبقًا. + + | النموذج | حجم
(بيكسل) | معدل التقسيمالتحقق
50-95 | معدل التقسيمالأقنعة
50-95 | سرعة
CPU ONNX
(متوسط) | سرعة
A100 TensorRT
(متوسط) | معلمات
(مليون) | FLOPs
(مليون) | + | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + + === "التصنيف (ImageNet)" + + انظر إلى [وثائق التصنيف](https://docs.ultralytics.com/tasks/classify/) لأمثلة عن الاستخدام مع هذه النماذج المدرّبة مسبقًا على [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/)، والتي تضم 1000 فئة مدربة مسبقًا. + + | النموذج | حجم
(بيكسل) | دقة أعلى
أعلى 1 | دقة أعلى
أعلى 5 | سرعة
CPU ONNX
(متوسط) | سرعة
A100 TensorRT
(متوسط) | معلمات
(مليون) | FLOPs
(مليون) عند 640 | + | -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + + === "المواقف (COCO)" + + انظر إلى [وثائق تقدير المواقع] (https://docs.ultralytics.com/tasks/pose/) لأمثلة على الاستخدام مع هذه النماذج المدربة مسبقًا على [COCO](https://docs.ultralytics.com/datasets/pose/coco/)، والتي تتضمن فئة واحدة مدربة مسبقًا ، 'شخص'. + + | النموذج | حجم
(بيكسل) | معدل التوضيحالتحقق
50-95 | معدل التوضيح50 | سرعة
CPU ONNX
(متوسط) | سرعة
A100 TensorRT
(متوسط) | معلمات
(مليون) | FLOPs
(مليون) | + | ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +## أمثلة استخدام + +يوفر هذا المثال أمثلة بسيطة للتدريب والتنبؤ باستخدام YOLOv8. للحصول على وثائق كاملة حول هذه وغيرها من [الأوضاع](../modes/index.md) ، انظر إلى صفحات وثائق [تنبؤ](../modes/predict.md) ، [تدريب](../modes/train.md) ، [التحقق](../modes/val.md) و [التصدير](../modes/export.md) . + +يرجى ملاحظة أن المثال أدناه يتعلق بطراز YOLOv8 [Detect](../tasks/detect.md) للكشف عن الكائنات. لمهام مدعومة إضافية ، انظر إلى وثائق [تقسيم](../tasks/segment.md) ، [تحديد إنتماء](../tasks/classify.md) و [تصوير (Pose)](../tasks/pose.md) . + +!!! + +!!! Example "مثال" + + === "Python" + + يمكن تمرير نماذج الـ PyTorch المُدرّبة المُحفوظة بالامتداد `*.pt` بالإضافة إلى ملفات التكوين بامتداد `*.yaml` إلى فئة `YOLO()` لإنشاء نموذج في لغة بايثون: + + ```python + from ultralytics import YOLO + + # تحميل نموذج YOLOv8n المدرّب مسبقًا على COCO + model = YOLO('yolov8n.pt') + + # عرض معلومات النموذج (اختياري) + model.info() + + # تدريب النموذج على مجموعة بيانات المُدخلات coco8 على سبيل المثال لمدة 100 دورة + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # تشغيل التنبؤ باستخدام نموذج YOLOv8n على صورة 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + تتوفر أوامر CLI لتشغيل النماذج مباشرة: + + ```bash + # تحميل نموذج YOLOv8n المدرّب مسبقًا على COCO واختباره على مجموعة بيانات coco8 + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # تحميل نموذج YOLOv8n المدرّب مسبقًا على COCO والتنبؤ به على صورة 'bus.jpg' + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## الاقتباسات والتقديرات + +إذا استخدمت نموذج YOLOv8 أو أي برنامج آخر من هذا المستودع في عملك ، فيرجى استشهاده باستخدام التنسيق التالي: + +!!!، + +!!! quote "" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + +يرجى ملاحظة أن وجود معرف الكائن الرقمي (DOI) قيد الانتظار وسيتم إضافته إلى الاقتباس بمجرد توفره. تُقدم نماذج YOLOv8 بموجب [رخصة AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) و [الرخصة المؤسسية](https://ultralytics.com/license). diff --git a/docs/ar/modes/benchmark.md b/docs/ar/modes/benchmark.md new file mode 100644 index 0000000..506240a --- /dev/null +++ b/docs/ar/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: true +description: تعرف على كيفية قياس سرعة ودقة YOLOv8 عبر تنسيقات التصدير المختلفة. احصل على رؤى حول مقاييس mAP50-95 وaccuracy_top5 والمزيد. +keywords: Ultralytics، YOLOv8، اختبار الأداء، قياس السرعة، قياس الدقة، مقاييس mAP50-95 وaccuracy_top5، ONNX، OpenVINO، TensorRT، تنسيقات تصدير YOLO +--- + +# اختبار النموذج باستخدام Ultralytics YOLO + +Ultralytics YOLO ecosystem and integrations + +## المقدمة + +بمجرد أن يتم تدريب نموذجك وتحقق صحته ، فإن الخطوة التالية بشكل منطقي هي تقييم أدائه في سيناريوهات العالم الحقيقي المختلفة. يوفر وضع الاختبار في Ultralytics YOLOv8 هذا الهدف من خلال توفير إطار قوي لتقييم سرعة ودقة النموذج عبر مجموعة من صيغ التصدير. + +## لماذا هو اختبار الأداء مهم؟ + +- **قرارات مستنيرة:** اكتساب رؤى حول التنازلات بين السرعة والدقة. +- **تخصيص الموارد:** فهم كيفية أداء تنسيقات التصدير المختلفة على أجهزة مختلفة. +- **تحسين:** تعلم أي تنسيق تصدير يقدم أفضل أداء لحالتك الاستخدامية المحددة. +- **كفاءة التكلفة:** استخدام الموارد الأجهزة بشكل أكثر كفاءة بناءً على نتائج الاختبار. + +### المقاييس الرئيسية في وضع الاختبار + +- **mAP50-95:** لكشف الكائنات وتقسيمها وتحديد الوضع. +- **accuracy_top5:** لتصنيف الصور. +- **وقت التتبع:** الوقت المستغرق لكل صورة بالميلي ثانية. + +### تنسيقات التصدير المدعومة + +- **ONNX:** لأفضل أداء على وحدة المعالجة المركزية. +- **TensorRT:** لأقصى استفادة من وحدة المعالجة الرسومية. +- **OpenVINO:** لتحسين الأجهزة من إنتل. +- **CoreML و TensorFlow SavedModel وما إلى ذلك:** لتلبية احتياجات النشر المتنوعة. + +!!! Tip "نصيحة" + + * قم بتصدير إلى نموذج ONNX أو OpenVINO لزيادة سرعة وحدة المعالجة المركزية بمقدار 3 مرات. + * قم بتصدير إلى نموذج TensorRT لزيادة سرعة وحدة المعالجة الرسومية بمقدار 5 مرات. + +## أمثلة على الاستخدام + +قم بتشغيل اختبارات YOLOv8n على جميع تنسيقات التصدير المدعومة بما في ذلك ONNX و TensorRT وما إلى ذلك. انظر القسم الموجود أدناه للحصول على قائمة كاملة من وسيطات التصدير. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # اختبار على وحدة المعالجة الرسومية + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## وسيطات + +توفر الوسائط مثل `model` و `data` و `imgsz` و `half` و `device` و `verbose` مرونة للمستخدمين لضبط الاختبارات حسب احتياجاتهم المحددة ومقارنة أداء تنسيقات التصدير المختلفة بسهولة. + +| المفتاح | القيمة | الوصف | +|-----------|---------|---------------------------------------------------------------------------------------------------| +| `model` | `None` | مسار إلى ملف النموذج ، على سبيل المثال yolov8n.pt ، yolov8n.yaml | +| `data` | `None` | مسار إلى YAML يشير إلى مجموعة بيانات اختبار الأداء (بتحتوى على بيانات `val`) | +| `imgsz` | `640` | حجم الصورة كرقم ، أو قائمة (h ، w) ، على سبيل المثال (640، 480) | +| `half` | `False` | تقليل دقة العدد العشرى للأبعاد (FP16 quantization) | +| `int8` | `False` | تقليل دقة العدد الصحيح 8 بت (INT8 quantization) | +| `device` | `None` | الجهاز الذى ستعمل عليه العملية ، على سبيل المثال cuda device=0 أو device=0,1,2,3 أو device=cpu | +| `verbose` | `False` | عدم المتابعة عند حدوث خطأ (مقدار منطقى)، أو مستوى الكشف عند تجاوز حد القيمة المطلوبة (قيمة عائمة) | + +## صيغ التصدير + +سيحاول التطبيق تشغيل الاختبارات تلقائيًا على جميع صيغ التصدير الممكنة الموجودة أدناه. + +| Format | `format` Argument | Model | Metadata | Arguments | +|--------------------------------------------------------------------|-------------------|---------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +انظر تفاصيل التصدير الكاملة في الصفحة [Export](https://docs.ultralytics.com/modes/export/) diff --git a/docs/ar/modes/export.md b/docs/ar/modes/export.md new file mode 100644 index 0000000..f555ec0 --- /dev/null +++ b/docs/ar/modes/export.md @@ -0,0 +1,108 @@ +--- +comments: true +description: دليل خطوة بخطوة حول تصدير نماذج YOLOv8 الخاصة بك إلى تنسيقات مختلفة مثل ONNX و TensorRT و CoreML وغيرها للنشر. استكشف الآن!. +keywords: YOLO، YOLOv8، Ultralytics، تصدير النموذج، ONNX، TensorRT، CoreML، TensorFlow SavedModel، OpenVINO، PyTorch، تصدير النموذج +--- + +# تصدير النموذج باستخدام يولو من Ultralytics + +إكوسيستم يولو من Ultralytics والتكاملات + +## مقدمة + +الهدف النهائي لتدريب نموذج هو نشره لتطبيقات العالم الحقيقي. يوفر وضع التصدير في يولو من Ultralytics مجموعة متنوعة من الخيارات لتصدير النموذج المدرب إلى تنسيقات مختلفة، مما يجعله يمكن استخدامه في مختلف الأنظمة والأجهزة. يهدف هذا الدليل الشامل إلى مساعدتك في فهم تفاصيل تصدير النموذج، ويعرض كيفية تحقيق أقصى توافق وأداء. + +

+
+ +
+ شاهد: كيفية تصدير نموذج Ultralytics YOLOv8 التدريب المخصص وتشغيل الاستدلال المباشر على كاميرا الويب. +

+ +## لماذا اختيار وضع تصدير YOLOv8؟ + +- **التنوع:** تصدير إلى تنسيقات متعددة بما في ذلك ONNX و TensorRT و CoreML ، وغيرها. +- **الأداء:** الحصول على سرعة تسريع تصل إلى 5 أضعاف باستخدام TensorRT وسرعة تسريع معالج الكمبيوتر المركزي بنسبة 3 أضعاف باستخدام ONNX أو OpenVINO. +- **التوافقية:** جعل النموذج قابلاً للنشر على الأجهزة والبرامج المختلفة. +- **سهولة الاستخدام:** واجهة سطر الأوامر البسيطة وواجهة برمجة Python لتصدير النموذج بسرعة وسهولة. + +### الميزات الرئيسية لوضع التصدير + +إليك بعض من الميزات المميزة: + +- **تصدير بنقرة واحدة:** أوامر بسيطة لتصدير إلى تنسيقات مختلفة. +- **تصدير الدُفعات:** تصدير نماذج قادرة على العمل مع الدُفعات. +- **تنفيذ محسَّن:** يتم تحسين النماذج المصدرة لتوفير وقت تنفيذ أسرع. +- **فيديوهات تعليمية:** مرشدين وفيديوهات تعليمية لتجربة تصدير سلسة. + +!!! Tip "نصيحة" + + * صدّر إلى ONNX أو OpenVINO للحصول على تسريع معالج الكمبيوتر المركزي بنسبة 3 أضعاف. + * صدّر إلى TensorRT للحصول على تسريع وحدة المعالجة الرسومية بنسبة 5 أضعاف. + +## أمثلة للاستخدام + +قم بتصدير نموذج YOLOv8n إلى تنسيق مختلف مثل ONNX أو TensorRT. انظر الجدول أدناه للحصول على قائمة كاملة من وسائط التصدير. + +!!! Example "مثال" + + === "بايثون" + + ```python + from ultralytics import YOLO + + # قم بتحميل نموذج + model = YOLO('yolov8n.pt') # تحميل نموذج رسمي + model = YOLO('path/to/best.pt') # تحميل نموذج مدرب مخصص + + # قم بتصدير النموذج + model.export(format='onnx') + ``` + === "واجهة سطر الأوامر" + + ```bash + yolo export model=yolov8n.pt format=onnx # تصدير نموذج رسمي + yolo export model=path/to/best.pt format=onnx # تصدير نموذج مدرب مخصص + ``` + +## الوسائط + +تشير إعدادات تصدير YOLO إلى التكوينات والخيارات المختلفة المستخدمة لحفظ أو تصدير النموذج للاستخدام في بيئات أو منصات أخرى. يمكن أن تؤثر هذه الإعدادات على أداء النموذج وحجمه وتوافقه مع الأنظمة المختلفة. تشمل بعض إعدادات تصدير YOLO الشائعة تنسيق ملف النموذج المصدر (مثل ONNX وتنسيق TensorFlow SavedModel) والجهاز الذي سيتم تشغيل النموذج عليه (مثل المعالج المركزي أو وحدة المعالجة الرسومية) ووجود ميزات إضافية مثل الأقنعة أو التسميات المتعددة لكل مربع. قد تؤثر عوامل أخرى قد تؤثر عملية التصدير تشمل المهمة النموذجة المحددة التي يتم استخدام النموذج لها ومتطلبات أو قيود البيئة أو المنصة المستهدفة. من المهم أن ننظر بعناية ونقوم بتكوين هذه الإعدادات لضمان أن النموذج المصدر هو محسَّن للحالة الاستخدام المقصودة ويمكن استخدامه بشكل فعال في البيئة المستهدفة. + +| المفتاح | القيمة | الوصف | +|-------------|-----------------|-----------------------------------------------------------------------| +| `format` | `'torchscript'` | التنسيق المراد تصديره | +| `imgsz` | `640` | حجم الصورة كمقدار علمي أو قائمة (h ، w) ، على سبيل المثال (640 ، 480) | +| `keras` | `False` | استخدام Keras لتصدير TF SavedModel | +| `optimize` | `False` | TorchScript: الأمثل للجوال | +| `half` | `False` | تكميم FP16 | +| `int8` | `False` | تكميم INT8 | +| `dynamic` | `False` | ONNX/TensorRT: المحاور الديناميكية | +| `simplify` | `False` | ONNX/TensorRT: تبسيط النموذج | +| `opset` | `None` | ONNX: إصدار opset (اختياري ، الافتراضي هو الأحدث) | +| `workspace` | `4` | TensorRT: حجم مساحة العمل (GB) | +| `nms` | `False` | CoreML: إضافة NMS | + +## تنسيقات التصدير + +صيغ تصدير YOLOv8 المتاحة في الجدول أدناه. يمكنك التصدير إلى أي تنسيق باستخدام الوسيطة `format` ، مثل `format='onnx'` أو `format='engine'`. + +| التنسيق | وسيطة format | النموذج | البيانات الوصفية | الوسائط | +|--------------------------------------------------------------------|---------------|---------------------------|------------------|-----------------------------------------------------| +| [بايثورش](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `تورتشسيريبت` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/ar/modes/index.md b/docs/ar/modes/index.md new file mode 100644 index 0000000..86e2c4d --- /dev/null +++ b/docs/ar/modes/index.md @@ -0,0 +1,77 @@ +--- +comments: true +description: من التدريب إلى التتبع، استفد من YOLOv8 مع Ultralytics. احصل على نصائح وأمثلة لكل وضع مدعوم بما في ذلك التحقق والتصدير واختبار الأداء. +keywords: Ultralytics, YOLOv8, التعلم الآلي، كشف الكائنات، التدريب، التحقق، التنبؤ، التصدير، التتبع، اختبار الأداء +--- + +# أوضاع Ultralytics YOLOv8 + +بيئة عمل Ultralytics YOLO والتكاملات + +## المقدمة + +YOLOv8 من Ultralytics ليست مجرد نموذج لكشف الكائنات آخر؛ إنها إطار متعدد الاستخدامات مصمم لتغطية دورة حياة نماذج التعلم الآلي بأكملها - من امتصاص البيانات وتدريب النموذج إلى التحقق والنشر وتتبع الواقع الحقيقي. يخدم كل وضع غرضًا محددًا وهو مصمم لتوفير المرونة والكفاءة المطلوبة للمهام والحالات الاستخدام المختلفة. + +!!! Note "ملاحظة" + + 🚧 توثيقنا متعدد اللغات قيد الإنشاء حاليًا، ونحن نعمل بجهد لتحسينه. شكرًا لك على صبرك! 🙏 + +

+
+ +
+ شاهد: برنامج التعليم Ultralytics: تدريب، التحقق، التنبؤ، التصدير، واختبار الأداء. +

+ +### أوضاع مختصرة + +فهم ال**أوضاع** المختلفة المدعومة بواسطة Ultralytics YOLOv8 مهم جدًا للاستفادة القصوى من النماذج الخاصة بك: + +- وضع **التدريب**: قم بضبط نموذجك على مجموعة بيانات مخصصة أو محملة مسبقًا. +- وضع **التحقق**: نقطة فحص بعد التدريب لتقييم أداء النموذج. +- وضع **التنبؤ**: اطلق قوة التنبؤ الخاصة بنموذجك على البيانات الحقيقية. +- وضع **التصدير**: قم بتجهيز نموذجك للاستخدام في صيغ مختلفة. +- وضع **التتبع**: قم بتوسيع نموذج الكشف عن الكائنات الخاص بك إلى تطبيقات التتبع في الوقت الحقيقي. +- وضع **اختبار الأداء**: قم بتحليل سرعة ودقة نموذجك في بيئات نشر متنوعة. + +يهدف هذا الدليل الشامل إلى تقديم لمحة عامة ونصائح عملية حول كل وضع، لمساعدتك في استغلال كامل إمكانات YOLOv8. + +## [وضع التدريب](train.md) + +يتم استخدام وضع التدريب لتدريب نموذج YOLOv8 على مجموعة بيانات مخصصة. في هذا الوضع، يتم تدريب النموذج باستخدام مجموعة البيانات والمعلمات الهايبر للحصول على دقة في توقع الفئات ومواقع الكائنات في الصورة. + +[أمثلة التدريب](train.md){ .md-button } + +## [وضع التحقق](val.md) + +يتم استخدام وضع التحقق للتحقق من نموذج YOLOv8 بعد تدريبه. في هذا الوضع، يتم تقييم النموذج على مجموعة التحقق لقياس دقته وأداء التعميم. يمكن استخدام هذا الوضع لتعديل المعلمات الهايبر للنموذج لتحسين أدائه. + +[أمثلة التحقق](val.md){ .md-button } + +## [وضع التنبؤ](predict.md) + +يتم استخدام وضع التنبؤ لإجراء تنبؤات باستخدام نموذج YOLOv8 المدرب على صور أو فيديوهات جديدة. في هذا الوضع، يتم تحميل النموذج من ملف الفحص، ويمكن للمستخدم توفير الصور أو مقاطع الفيديو لإجراء استدلال. يقوم النموذج بتوقع الفئات ومواقع الكائنات في الصور أو مقاطع الفيديو المدخلة. + +[أمثلة التنبؤ](predict.md){ .md-button } + +## [وضع التصدير](export.md) + +يتم استخدام وضع التصدير لتصدير نموذج YOLOv8 إلى صيغة يمكن استخدامها للنشر. في هذا الوضع، يتم تحويل النموذج إلى صيغة يمكن استخدامها من قبل تطبيقات البرامج الأخرى أو الأجهزة الأجهزة. يكون هذا الوضع مفيدًا عند نشر النموذج في بيئات الإنتاج. + +[أمثلة التصدير](export.md){ .md-button } + +## [وضع التتبع](track.md) + +يتم استخدام وضع التتبع لتتبع الكائنات في الوقت الحقيقي باستخدام نموذج YOLOv8. في هذا الوضع، يتم تحميل النموذج من ملف الفحص، ويمكن للمستخدم توفير تيار فيديو مباشر لأداء تتبع الكائنات في الوقت الفعلي. يكون هذا الوضع مفيدًا لتطبيقات مثل أنظمة المراقبة أو السيارات ذاتية القيادة. + +[أمثلة التتبع](track.md){ .md-button } + +## [وضع اختبار الأداء](benchmark.md) + +يتم استخدام وضع اختبار الأداء لتقييم سرعة ودقة صيغ التصدير المختلفة لـ YOLOv8. تقدم الاختبارات معلومات حول حجم الصيغة المصدر، معيار الأداء `mAP50-95` (لكشف الكائنات والتقسيم والتصوير) أو المعيار `accuracy_top5` (للتصنيف)، ووقت الاستدلال بالملي ثانية لكل صورة في صيغ التصدير المختلفة مثل ONNX و OpenVINO و TensorRT وغيرها. يمكن لهذه المعلومات مساعدة المستخدمين على اختيار صيغة التصدير الأمثل لحالتهم الاستخدامية المحددة بناءً على متطلبات السرعة والدقة. + +[أمثلة اختبار الأداء](benchmark.md){ .md-button } diff --git a/docs/ar/modes/predict.md b/docs/ar/modes/predict.md new file mode 100644 index 0000000..69a9cb8 --- /dev/null +++ b/docs/ar/modes/predict.md @@ -0,0 +1,217 @@ +--- +comments: true +description: اكتشف كيفية استخدام وضع التنبؤ YOLOv8 لمهام مختلفة. تعرّف على مصادر التنبؤ المختلفة مثل الصور ومقاطع الفيديو وتنسيقات البيانات المختلفة. +keywords: Ultralytics، YOLOv8، وضع التنبؤ، مصادر التنبؤ، مهام التنبؤ، وضع التدفق، معالجة الصور، معالجة الفيديو، التعلم الآلي، الذكاء الاصطناعي +--- + +# التنبؤ بالنموذج باستخدام Ultralytics YOLO + +البيئة والتكامل الخاصة بنظام Ultralytics YOLO + +## المقدمة + +في عالم التعلم الآلي ورؤية الحاسوب، يُطلق على عملية استخلاص المعنى من البيانات البصرية اسم "الاستدلال" أو "التنبؤ". يوفر YOLOv8 من Ultralytics ميزة قوية تُعرف بـ**وضع التنبؤ** والتي تم تصميمها خصيصًا للاستدلال في الوقت الحقيقي وبأداء عال على مجموعة واسعة من مصادر البيانات. + +

+
+ +
+ شاهد: كيفية استخراج النتائج من نموذج Ultralytics YOLOv8 لمشاريع مخصصة. +

+ +## التطبيقات في العالم الحقيقي + +| التصنيع | الرياضة | السلامة | +|:-------------------------------------------------:|:----------------------------------------------------:|:-------------------------------------------:| +| ![Vehicle Spare Parts Detection][car spare parts] | ![Football Player Detection][football player detect] | ![People Fall Detection][human fall detect] | +| كشف قطع غيار المركبات | كشف لاعبي كرة القدم | كشف سقوط الأشخاص | + +## لماذا يجب استخدام Ultralytics YOLO لعمليات التنبؤ؟ + +فيما يلي الأسباب التي يجب أخذها في الاعتبار عند الاستفادة من وضع التنبؤ YOLOv8 لاحتياجات التنبؤ المختلفة: + +- **التنوع:** قادر على التنبؤ على الصور ومقاطع الفيديو، وحتى التدفقات الحية. +- **الأداء:** مصمم للتطبيقات في الوقت الحقيقي والمعالجة عالية السرعة دون التضحية بالدقة. +- **سهولة الاستخدام:** واجهات Python والواجهة السطرية لتسريع النشر والاختبار. +- **قابلية التخصيص العالية:** إعدادات ومعلمات مختلفة لضبط سلوك التنبؤ النموذج وفقًا لمتطلباتك المحددة. + +### الميزات الرئيسية لوضع التنبؤ + +تم تصميم وضع التنبؤ الخاص بـ YOLOv8 ليكون قويًا ومتعدد الاستخدامات، ويتميز بما يلي: + +- **توافق متعدد مصادر البيانات:** سواء كانت بياناتك عبارة عن صور فردية أو مجموعة من الصور أو ملفات فيديو أو تدفقات فيديو في الوقت الحقيقي، سيتمكن وضع التنبؤ من التعامل معها جميعًا. +- **وضع التدفق الحي:** استخدم ميزة التدفق لإنشاء مولد فعّال لكائنات "النتائج" باستخدام الذاكرة. قم بتمكين هذا بتعيين `stream=True` في طريقة استدعاء المتنبئ. +- **معالجة دُفعات:** القدرة على معالجة العديد من الصور أو إطارات الفيديو في دُفعة واحدة، مما يزيد أكثر من سرعة التنبؤ. +- **سهل التكامل:** يسهل الدمج مع خطوط الأنابيب البيانية الحالية ومكونات البرامج الأخرى بفضل واجهة برمجة التطبيقات المرنة. + +تُرجع نماذج Ultralytics YOLO إما قائمة Python من كائنات "النتائج" أو مُنشئ برمجياً فعّال لكائنات الـ "النتائج" في حال تم تمرير `stream=True` إلى النموذج أثناء عملية التنبؤ: + +!!! Example "التنبؤ" + + === "العودة بقائمة واحدة باستخدام `stream=False`" + ```python + from ultralytics import YOLO + + # تحميل نموذج + model = YOLO('yolov8n.pt') # نموذج YOLOv8n المُدرَّب مسبقًا + + # تشغيل التنبؤ بدُفعة على قائمة من الصور + results = model(['im1.jpg', 'im2.jpg']) # العودة بقائمة من كائنات 'النتائج' + + # معالجة قائمة النتائج + for result in results: + boxes = result.boxes # كائن Boxes لمخرجات bbox + masks = result.masks # كائن Masks لمخرجات قنوات الفصل العنقودي + keypoints = result.keypoints # كائن Keypoints لمخرجات الاتجاهات + probs = result.probs # كائن Probs لمخرجات التصنيف + ``` + + === "العودة بمُنشئ فعال مع `stream=True`" + ```python + from ultralytics import YOLO + + # تحميل نموذج + model = YOLO('yolov8n.pt') # نموذج YOLOv8n المُدرَّب مسبقًا + + # تشغيل التنبؤ بدُفعة على قائمة من الصور + results = model(['im1.jpg', 'im2.jpg'], stream=True) # العودة بمُنشئ فعال لكائنات 'النتائج' + + # معالجة المُنشئ الفعال + for result in results: + boxes = result.boxes # كائن Boxes لمخرجات bbox + masks = result.masks # كائن Masks لمخرجات قنوات الفصل العنقودي + keypoints = result.keypoints # كائن Keypoints لمخرجات الاتجاهات + probs = result.probs # كائن Probs لمخرجات التصنيف + ``` + +## مصادر التنبؤ + +يمكن لـ YOLOv8 معالجة أنواع مختلفة من مصادر الإدخال لعملية الاستدلال، على النحو الموضح في الجدول أدناه. تشمل المصادر الصور الثابتة وتيارات الفيديو وتنسيقات مختلفة للبيانات. يشير الجدول أيضًا إلى ما إذا كان يمكن استخدام كل مصدر في وضع التدفق باستخدام الوسيط `stream=True` ✅. يعتبر وضع التدفق مفيدًا لمعالجة مقاطع الفيديو أو التدفقات الحية حيث يقوم بإنشاء مُنشئ للنتائج بدلاً من تحميل جميع الإطارات في الذاكرة. + +!!! Tip "طراز" + + استخدم `stream=True` لمعالجة مقاطع الفيديو الطويلة أو مجموعات البيانات الكبيرة لإدارة الذاكرة بكفاءة. عندما تكون القيمة مساوية لـ `stream=False`، يتم تخزين النتائج لجميع الإطارات أو نقاط البيانات في الذاكرة، والتي يمكن أن تتراكم بسرعة وتُسبِّب أخطاء الذاكرة غير الكافية للمدخلات الكبيرة. على النقيض من ذلك، يستخدم التدفق `stream=True` مولدًا يُبقي نتائج الإطار الحالي أو نقطة البيانات الحالية في الذاكرة فقط، مما يقلل بشكل كبير من استهلاك الذاكرة ويمنع مشكلات عدم كفاية الذاكرة. + +| مصدر | الوسيط | النوع | الملاحظات | +|------------------|--------------------------------------------|-----------------|----------------------------------------------------------------------------------------------| +| صورة | `'صورة.jpg'` | `str` or `Path` | ملف صورة واحدة. | +| رابط URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | رابط URL لصورة ما. | +| لقطة شاشة برمجية | `'الشاشة'` | `str` | قم بالتقاط لقطة شاشة برمجية. | +| PIL | `Image.open('im.jpg')` | `PIL.Image` | الصيغة HWC مع قنوات RGB. | +| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | الصيغة HWC مع قنوات BGR `uint8 (0-255)`. | +| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | الصيغة HWC مع قنوات BGR `uint8 (0-255)`. | +| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | الصيغة BCHW مع قنوات RGB `float32 (0.0-1.0)`. | +| CSV | `'المصادر.csv'` | `str` or `Path` | ملف CSV يحتوي على مسارات الصور أو مقاطع الفيديو أو المجلدات. | +| فيديو ✅ | `'الفيديو.mp4'` | `str` or `Path` | ملف فيديو بتنسيقات مثل MP4 و AVI وما إلى ذلك. | +| الدليل ✅ | `'المسار/'` | `str` or `Path` | مسار إلى مجلد يحتوي على صور أو مقاطع فيديو. | +| glob ✅ | `'المسار/*.jpg'` | `str` | نمط glob لمطابقة عدة ملفات. استخدم حرف `*` كحرطوم. | +| يوتيوب ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | رابط URL إلى فيديو يوتيوب. | +| تدفق ✅ | `'rtsp://example.com/media.mp4'` | `str` | عنوان URL لبروتوكولات التدفق مثل RTSP و RTMP و TCP أو عنوان IP. | +| تدفق متعدد ✅ | `'list.streams'` | `str` or `Path` | ملف نصي `*.streams` مع عنوان تدفق URL في كل صف، على سبيل المثال 8 تدفقات ستعمل بحجم دُفعة 8. | + +فيما يلي أمثلة تعليمات برمجية لاستخدام كل نوع من مصدر: + +!!! Example "مصادر التنبؤ" + + === "الصورة" + قم بأجراء عملية التنبؤ على ملف صورة. + ```python + from ultralytics import YOLO + + # تحميل نموذج YOLOv8n المدرب مسبقًا + model = YOLO('yolov8n.pt') + + # تنشيط عملية التنبؤ لملف الصورة + source = 'المسار/إلى/الصورة.jpg' + + # الجمع بين التنبؤ على المصدر + results = model(source) # قائمة كائنات النتائج + ``` + + === "لقطة شاشة برمجية" + قم بأجراء عملية التنبؤ على محتوى الشاشة الحالي كلقطة شاشة. + ```python + from ultralytics import YOLO + + # تحميل نموذج YOLOv8n المدرب مسبقًا + model = YOLO('yolov8n.pt') + + # تعريف اللقطة الحالية كمصدر + source = 'الشاشة' + + # الجمع بين التنبؤ على المصدر + results = model(source) # قائمة كائنات النتائج + ``` + + === "رابط URL" + قم بأجراء عملية التنبؤ على صورة موجودة على الإنترنت أو فيديو. + ```python + from ultralytics import YOLO + + # تحميل نموذج YOLOv8n المدرب مسبقًا + model = YOLO('yolov8n.pt') + + # تعريف رابط الصورة أو الفيديو على الإنترنت + source = 'https://ultralytics.com/images/bus.jpg' + + # الجمع بين التنبؤ على المصدر + results = model(source) # قائمة كائنات النتائج + ``` + + === "PIL" + قم بأجراء عملية التنبؤ على صورة مفتوحة بواسطة مكتبة Python Imaging Library (PIL). + ```python + from PIL import Image + from ultralytics import YOLO + + # تحميل نموذج YOLOv8n المدرب مسبقًا + model = YOLO('yolov8n.pt') + + # فتح صورة باستخدام PIL + source = Image.open('المسار/إلى/الصورة.jpg') + + # الجمع بين التنبؤ على المصدر + results = model(source) # قائمة كائنات النتائج + ``` + + === "OpenCV" + قم بأجراء عملية التنبؤ على صورة مُقروءة بواسطة OpenCV. + ```python + import cv2 + from ultralytics import YOLO + + # تحميل نموذج YOLOv8n المدرب مسبقًا + model = YOLO('yolov8n.pt') + + # قراءة صورة باستخدام OpenCV + source = cv2.imread('المسار/إلى/الصورة.jpg') + + # الجمع بين التنبؤ على المصدر + results = model(source) # قائمة كائنات النتائج + ``` + + === "numpy" + قم بأجراء عملية التنبؤ على صورة مُمثلة كمصفوفة numpy. + ```python + import numpy as np + from ultralytics import YOLO + + # تحميل نموذج YOLOv8n المدرب مسبقًا + model = YOLO('yolov8n.pt') + + # إنشاء مصفوفة numpy عشوائية في صيغة HWC (640, 640, 3) بقيم بين [0, 255] ونوع uint8 + source = np.random.randint(low=0, high=255, size=(640, 640, 3), dtype='uint8') + + # الجمع بين التنبؤ على المصدر + results = model(source) # قائمة كائنات النتائج + ``` + +[car spare parts]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1 + +[football player detect]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442 + +[human fall detect]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43 diff --git a/docs/ar/modes/track.md b/docs/ar/modes/track.md new file mode 100644 index 0000000..5300ecf --- /dev/null +++ b/docs/ar/modes/track.md @@ -0,0 +1,360 @@ +--- +comments: true +description: تعرف على كيفية استخدام Ultralytics YOLO لتتبع الكائنات في تدفقات الفيديو. أدلة لاستخدام مختلف المتتبعين وتخصيص إعدادات المتتبع. +keywords: Ultralytics، YOLO، تتبع الكائنات، تدفقات الفيديو، BoT-SORT، ByteTrack، دليل Python، دليل خط الأوامر (CLI) +--- + +# تتبع عدة كائنات باستخدام Ultralytics YOLO + +Multi-object tracking examples + +يعد تتبع الكائنات في مجال تحليل الفيديو مهمة حرجة ليس فقط في تحديد موقع وفئة الكائنات داخل الإطار، ولكن أيضًا في الحفاظ على هوية فريدة لكل كائن يتم اكتشافه مع تقدم الفيديو. تكاد التطبيقات لا تعد ولا تحصى - تتراوح من المراقبة والأمان إلى تحليل الرياضة الفورية. + +## لماذا يجب اختيار Ultralytics YOLO لتتبع الكائنات؟ + +إن مخرجات المتتبعين في Ultralytics متسقة مع كشف الكائنات القياسي ولها قيمة مضافة من هويات الكائنات. هذا يجعل من السهل تتبع الكائنات في تدفقات الفيديو وأداء التحليلات التالية. إليك لماذا يجب أن تفكر في استخدام Ultralytics YOLO لتلبية احتياجات تتبع الكائنات الخاصة بك: + +- **الكفاءة:** معالجة تدفقات الفيديو في الوقت الحقيقي دون المساومة على الدقة. +- **المرونة:** يدعم العديد من خوارزميات التتبع والتكوينات. +- **سهولة الاستخدام:** واجهة برمجة تطبيقات بسيطة للغاية وخيارات سطر الأوامر للاستدماج السريع والنشر. +- **إمكانية التخصيص:** سهل الاستخدام مع نماذج YOLO مدربة مخصصة، مما يسمح بالاكتمال في التطبيقات ذات النطاق الخاص. + +

+
+ +
+ شاهد: كشف الكائنات وتتبعها باستخدام Ultralytics YOLOv8. +

+ +## تطبيقات في العالم الحقيقي + +| النقل | البيع بالتجزئة | الاستزراع المائي | +|:----------------------------------:|:--------------------------------:|:----------------------------:| +| ![Vehicle Tracking][vehicle track] | ![People Tracking][people track] | ![Fish Tracking][fish track] | +| تتبع المركبات | تتبع الأشخاص | تتبع الأسماك | + +## ملامح بلمحة + +يوفر Ultralytics YOLO ميزات كشف الكائنات لتوفير تتبع فعال ومتعدد الاستخدامات للكائنات: + +- **تتبع فوري:** تتبع الكائنات بسلاسة في مقاطع الفيديو ذات معدل الإطارات العالي. +- **دعم عدة متتبعين:** اختيار بين مجموعة متنوعة من خوارزميات التتبع المعتمدة. +- **تخصيص تكوينات المتتبع المتاحة:** ضبط خوارزمية التتبع لتلبية المتطلبات المحددة عن طريق ضبط مختلف المعلمات. + +## متتبعون متاحون + +يدعم Ultralytics YOLO الخوارزميات التالية للتتبع. يمكن تمكينها عن طريق تمرير ملف تكوين YAML ذي الصلة مثل "tracker=tracker_type.yaml": + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - استخدم `botsort.yaml` لتمكين هذا المتتبع. +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - استخدم `bytetrack.yaml` لتمكين هذا المتتبع. + +المتتبع الافتراضي هو BoT-SORT. + +## تتبع + +لتشغيل المتتبع على تدفقات الفيديو، استخدم نموذج تحديد (Detect) أو قطع (Segment) أو وضع (Pose) مدرب مثل YOLOv8n و YOLOv8n-seg و YOLOv8n-pose. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتحميل نموذج رسمي أو مخصص + model = YOLO('yolov8n.pt') # قم بتحميل نموذج رسمي Detect + model = YOLO('yolov8n-seg.pt') # قم بتحميل نموذج رسمي Segment + model = YOLO('yolov8n-pose.pt') # قم بتحميل نموذج رسمي Pose + model = YOLO('path/to/best.pt') # قم بتحميل نموذج مخصص مدرب + + # قم بتنفيذ التتبع باستخدام النموذج + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # التتبع باستخدام المتتبع الافتراضي + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # التتبع باستخدام متتبع ByteTrack + ``` + + === "CLI" + + ```bash + # قم بتنفيذ التتبع باستخدام مختلف النماذج باستخدام واجهة سطر الأوامر + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # نموذج Detect رسمي + yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # نموذج Segment رسمي + yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # نموذج Pose رسمي + yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # تم تدريب نموذج مخصص + + # تتبع عن طريق ByteTrack متتبع + yolo track model=path/to/best.pt tracker="bytetrack.yaml" + ``` + +كما يظهر في الاستخدام أعلاه، يتوفر التتبع لجميع نماذج Detect و Segment و Pose التي تعمل على مقاطع الفيديو أو مصادر البث. + +## الاعدادات + +### معاملات التتبع + +تتشارك إعدادات التتبع الخصائص مع وضع التوقعات (Predict)، مثل `conf` و `iou` و `show`. للحصول على مزيد من التكوينات، راجع صفحة النموذج [Predict](../modes/predict.md#inference-arguments). + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتكوين معلمات التتبع وقم بتشغيل التتبع + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```bash + # قم بتكوين معلمات التتبع وقم بتشغيل التتبع باستخدام واجهة سطر الأوامر + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### اختيار المتتبع + +يتيح لك Ultralytics أيضًا استخدام ملف تكوين متتبع معدل. للقيام بذلك، أنقل نسخة من ملف تكوين المتتبع (مثل `custom_tracker.yaml`) من [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) وقم بتعديل أي تكوينات (باستثناء `tracker_type`) حسب احتياجاتك. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتحميل النموذج وتشغيل التتبع باستخدام ملف تكوين مخصص + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```bash + # قم بتحميل النموذج وتشغيل التتبع باستخدام ملف تكوين مخصص باستخدام واجهة سطر الأوامر + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +للحصول على قائمة شاملة من وسائط تتبع، راجع الصفحة [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers). + +## أمثلة Python + +### الحفاظ على المسارات التكرارية + +فيما يلي سكريبت Python باستخدام OpenCV (cv2) و YOLOv8 لتشغيل تتبع الكائنات على إطارات الفيديو. يفترض هذا السكريبت أنك قد قمت بالفعل بتثبيت الحزم اللازمة (opencv-python و ultralytics). المعامل `persist=True` يخبر المتتبع أن الصورة الحالية أو الإطار التالي في التسلسل ومن المتوقع أن يتوفر مسارات من الصورة السابقة في الصورة الحالية. + +!!! Example "For-loop للتدفق مع التتبع" + + ```python + import cv2 + from ultralytics import YOLO + + # حمّل نموذج YOLOv8 + model = YOLO('yolov8n.pt') + + # افتح ملف الفيديو + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # تحلق عبر إطارات الفيديو + while cap.isOpened(): + # قراءة الإطار من الفيديو + success, frame = cap.read() + + if success: + # تشغيل تتبع YOLOv8 على الإطار ، وحفظ المسارات بين الإطارات + results = model.track(frame, persist=True) + + # تصور النتائج على الإطار + annotated_frame = results[0].plot() + + # عرض الإطار المعلق + cv2.imshow("YOLOv8 Tracking", annotated_frame) + + # كسر اللوب في حالة الضغط على 'q' + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # كسر اللوب في نهاية الفيديو + break + + # إطلاق كائن التقاط الفيديو وإغلاق نافذة العرض + cap.release() + cv2.destroyAllWindows() + ``` + +يرجى ملاحظة التغيير من `model(frame)` إلى `model.track(frame)` ، مما يمكن التتبع بدلاً من الكشف البسيط. سيتم تشغيل البرنامج المعدل على كل إطار فيديو وتصور النتائج وعرضها في نافذة. يمكن الخروج من الحلقة عن طريق الضغط على 'q'. + +### رسم المسارات عبر الوقت + +يمكن أن يوفر رسم المسارات الكائنية عبر الإطارات المتتالية إشارات قيمة حول أنماط الحركة والسلوك للكائنات المكتشفة في الفيديو. باستخدام Ultralytics YOLOv8 ، يعد تصوير هذه المسارات عملية سلسة وفعالة. + +في المثال التالي ، نوضح كيفية استخدام قدرات يوكو 8 YOLO لتتبع الكائنات لرسم حركة الكائنات المكتشفة عبر إطارات الفيديو المتعددة. يتضمن هذا البرنامج فتح ملف فيديو وقراءته إطارًا بإطار ، واستخدام نموذج YOLO لتحديد وتتبع العديد من الكائنات. عن طريق الاحتفاظ بنقاط الوسط لمربعات الحدود المكتشفة وتوصيلها ، يمكننا رسم خطوط تمثل المسارات التي تم اتباعها بواسطة الكائنات التي تمت متابعتها. + +!!! Example "رسم المسارات عبر إطارات الفيديو المتعددة" + + ```python + from collections import defaultdict + + import cv2 + import numpy as np + + from ultralytics import YOLO + + # حمّل نموذج YOLOv8 + model = YOLO('yolov8n.pt') + + # افتح ملف الفيديو + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # احفظ تاريخ المسارات + track_history = defaultdict(lambda: []) + + # تحلق عبر إطارات الفيديو + while cap.isOpened(): + # قراءة الإطار من الفيديو + success, frame = cap.read() + + if success: + # تشغيل تتبع YOLOv8 على الإطار ، وحفظ المسارات بين الإطارات + results = model.track(frame, persist=True) + + # الحصول على المربعات ومعرفات المسار + boxes = results[0].boxes.xywh.cpu() + track_ids = results[0].boxes.id.int().cpu().tolist() + + # تصور النتائج على الإطار + annotated_frame = results[0].plot() + + # رسم المسارات + for box, track_id in zip(boxes, track_ids): + x, y, w, h = box + track = track_history[track_id] + track.append((float(x), float(y))) # x, y نقطة الوسط + if len(track) > 30: # احتفظ بـ 90 مسارًا لـ 90 إطارًا + track.pop(0) + + # رسم خطوط التتبع + points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10) + + # عرض الإطار المعلق + cv2.imshow("YOLOv8 Tracking", annotated_frame) + + # كسر اللوب في حالة الضغط على 'q' + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # كسر اللوب في نهاية الفيديو + break + + # إطلاق كائن التقاط الفيديو وإغلاق نافذة العرض + cap.release() + cv2.destroyAllWindows() + ``` + +### التتبع متعدد الخيوط + +يوفر التتبع متعدد الخيوط القدرة على تشغيل تتبع الكائنات على عدة تدفقات فيديو في وقت واحد. هذا مفيد بشكل خاص عند التعامل مع مدخلات فيديو متعددة ، مثل من كاميرات المراقبة المتعددة ، حيث يمكن أن يعزز المعالجة المتزامنة الكفاءة والأداء بشكل كبير. + +في السكريبت البايثون المقدم ، نستخدم وحدة `threading` في Python لتشغيل عدة نسخ متزامنة من المتتبع. يكون لكل موضوع مسؤولية تشغيل المتتبع على ملف فيديو واحد ، وتعمل جميع الخيوط في الخلفية في نفس الوقت. + +للتأكد من أن كل خيط يتلقى المعلمات الصحيحة (ملف الفيديو والنموذج المستخدم وفهرس الملف) ، نحدد وظيفة `run_tracker_in_thread` التي تقبل هذه المعلمات وتحتوي على حلقة المتابعة الرئيسية. هذه الوظيفة تقرأ إطار الفيديو الخاصة بالفيديو مباشرة من مصدر الملف الواحد ، وتشغيل المتتبع ، وعرض النتائج. + +تستخدم في هذا المثال نموذجين مختلفين: 'yolov8n.pt' و 'yolov8n-seg.pt' ، يقوم كل منهما بتتبع الكائنات في ملف فيديو مختلف. تم تحديد ملفات الفيديو في `video_file1` و `video_file2`. + +تعديل معلمات `daemon=True` في `threading.Thread` يعني أن هذه الخيوط ستتم إغلاقها بمجرد انتهاء البرنامج الرئيسي. ثم نبدأ الخيوط باستخدام `start ()` واستخدم `join ()` لجعل الخيط الرئيسي ينتظر حتى ينتهي خيطي المتتبع. + +أخيرًا ، بعد اكتمال جميع الخيوط لمهمتها ، يتم إغلاق النوافذ التي تعرض النتائج باستخدام `cv2.destroyAllWindows()`. + +!!! Example "Streaming for-loop with tracking" + + ```python + import threading + import cv2 + from ultralytics import YOLO + + + def run_tracker_in_thread(filename, model, file_index): + """ + يشغل ملف فيديو أو مصدر تيار الكاميرا بالتزامن مع YOLOv8 النموذج باستخدام تعدد الخيوط. + + هذه الوظيفة تلتقط إطارات الفيديو من ملف أو مصدر الكاميرا المعروف ، وتستخدم نموذج YOLOv8 لتتبع الكائنات. + يعمل البرنامج في خيطه الخاص للمعالجة المتزامنة. + + Args: + filename (str): مسار ملف الفيديو أو معرف مصدر كاميرا الويب / خارجية. + model (obj): كائن نموذج YOLOv8. + file_index (int): مؤشر لتحديد الملف بشكل فريد ، يُستخدم لأغراض العرض. + + ملاحظة: + اضغط على 'q' لإنهاء نافذة عرض الفيديو. + """ + video = cv2.VideoCapture(filename) # قراءة ملف الفيديو + + while True: + ret, frame = video.read() # قراءة إطارات الفيديو + + # إنهاء الدورة إذا لم يتبقى إطارات على الفيديوين + if not ret: + break + + # تتبع كائنات في الإطارات إذا توفرت + results = model.track(frame, persist=True) + res_plotted = results[0].plot() + cv2.imshow(f"Tracking_Stream_{file_index}", res_plotted) + + key = cv2.waitKey(1) + if key == ord('q'): + break + + # إطلاق مصدري الفيديو + video.release() + + + # حمّل النماذج + model1 = YOLO('yolov8n.pt') + model2 = YOLO('yolov8n-seg.pt') + + # حدد ملفات الفيديو للمتابعين + video_file1 = "path/to/video1.mp4" # مسار ملف الفيديو ، 0 لكاميرا الويب + video_file2 = 0 # مسار ملف الفيديو ، 0 لكاميرا الويب ، 1 لكاميرا خارجية + + # إنشاء خيوط المتابع + tracker_thread1 = threading.Thread(target=run_tracker_in_thread, args=(video_file1, model1 ,1), daemon=True) + tracker_thread2 = threading.Thread(target=run_tracker_in_thread, args=(video_file2, model2, 2), daemon=True) + + # بدء خيوط المتابع + tracker_thread1.start() + tracker_thread2.start() + + # انتظر حتى ينتهي خيط المتابع + tracker_thread1.join() + tracker_thread2.join() + + # Clean up and close windows + cv2.destroyAllWindows() + ``` + +يمكن بسهولة توسيع هذا المثال للتعامل مع ملفات فيديو ونماذج أخرى من خلال إنشاء مزيد من الخيوط وتطبيق نفس المنهجية. + +## المساهمة في المتتبعون الجديدون + +هل أنت ماهر في التتبع متعدد الكائنات وقد نفذت أو صيغت بنجاح خوارزمية تتبع باستخدام Ultralytics YOLO؟ ندعوك للمشاركة في قسم المتتبعين لدينا في [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)! قد تكون التطبيقات في العالم الحقيقي والحلول التي تقدمها لا تقدر بثمن للمستخدمين العاملين على مهام التتبع. + +من خلال المساهمة في هذا القسم ، تساعد في توسيع نطاق حلول التتبع المتاحة في إطار Ultralytics YOLO ، مضيفًا طبقة أخرى من الوظائف والفعالية للمجتمع. + +لبدء المساهمة ، يرجى الرجوع إلى [دليل المساهمة الخاص بنا](https://docs.ultralytics.com/help/contributing) للحصول على تعليمات شاملة حول تقديم طلب سحب (PR) 🛠️. نتطلع بشكل كبير إلى ما ستجلبه للطاولة! + +لنعزز معًا قدرات عملية التتبع لأجهزة Ultralytics YOLO 🙏! + +[vehicle track]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab + +[people track]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527 + +[fish track]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142 diff --git a/docs/ar/modes/train.md b/docs/ar/modes/train.md new file mode 100644 index 0000000..54881f1 --- /dev/null +++ b/docs/ar/modes/train.md @@ -0,0 +1,286 @@ +--- +comments: true +description: دليل خطوة بخطوة لتدريب نماذج YOLOv8 باستخدام Ultralytics YOLO بما في ذلك أمثلة على التدريب باستخدام بطاقة رسومات منفصلة ومتعددة البطاقات الرسومية +keywords: Ultralytics، YOLOv8، YOLO، كشف الكائنات، وضع تدريب، مجموعة بيانات مخصصة، تدريب بطاقة رسومات، متعددة البطاقات الرسومية، معلمات تكبير، أمثلة سطر الأوامر، أمثلة بايثون +--- + +# تدريب النموذج باستخدام Ultralytics YOLO + +بيئة ومدمجات Ultralytics YOLO + +## المقدمة + +يتضمن تدريب نموذج التعلم العميق تزويده بالبيانات وضبط معلماته بحيث يتمكن من إجراء توقعات دقيقة. يتم تصميم وضع التدريب في Ultralytics YOLOv8 لتدريب فعال وفعال لنماذج كشف الكائنات، مستغلاً تمامًا إمكانات الأجهزة الحديثة. يهدف هذا الدليل إلى شرح جميع التفاصيل التي تحتاج إلى البدء في تدريب النماذج الخاصة بك باستخدام مجموعة متينة من ميزات YOLOv8. + +

+
+ +
+ شاهد: كيفية تدريب نموذج YOLOv8 على مجموعة البيانات المخصصة الخاصة بك في Google Colab. +

+ +## لماذا اختيار Ultralytics YOLO للتدريب؟ + +إليك بعض الأسباب المقنعة لاختيار وضع التدريب في YOLOv8: + +- **الكفاءة:** استفد إلى أقصى حد من الأجهزة الخاصة بك، سواء كنت تستخدم بطاقة رسومات واحدة أو توسيعها عبر عدة بطاقات رسومات. +- **تعدد الاستخدامات:** قم بالتدريب على مجموعات البيانات المخصصة بالإضافة إلى المجموعات المتاحة بسهولة مثل COCO و VOC و ImageNet. +- **سهل الاستخدام:** واجهة سطر الأوامر CLI وواجهة Python البسيطة والقوية لتجربة تدريب مباشرة. +- **مرونة المعلمات:** مجموعة واسعة من المعلمات القابلة للتخصيص لضبط أداء النموذج. + +### الميزات الرئيسية لوضع التدريب + +تتمثل الميزات البارزة لوضع التدريب في YOLOv8 في ما يلي: + +- **تنزيل مجموعة البيانات تلقائيًا:** تقوم مجموعات البيانات القياسية مثل COCO و VOC و ImageNet بالتنزيل تلقائيًا عند أول استخدام. +- **دعم متعدد البطاقات الرسومية:** قم بتوزيع العمليات التدريبية بسلاسة عبر عدة بطاقات رسومات لتسريع العملية. +- **ضبط المعلمات:** الخيار لتعديل المعلمات التكبير من خلال ملفات تكوين YAML أو وسائط سطر الأوامر. +- **مراقبة وتتبع:** تتبع في الوقت الفعلي لمقاييس التدريب وتصور عملية التعلم لتحقيق رؤى أفضل. + +!!! Example "نصيحة" + + * يتم تنزيل مجموعات YOLOv8 القياسية مثل COCO و VOC و ImageNet وغيرها تلقائيًا عند الاستخدام الأول، على سبيل المثال: `yolo train data=coco.yaml` + +## أمثلة استخدام + +تدريب YOLOv8n على مجموعة بيانات COCO128 لمدة 100 حقبة بحجم صورة 640. يمكن تحديد جهاز التدريب باستخدام الوسيطة `device`. إذا لم يتم تمرير وسيطة، سيتم استخدام الجهاز بطاقة الرسومات "device=0" إذا كانت متاحة، وإلا سيتم استخدام `device=cpu`. استعرض الجدول الزمني أدناه للحصول على قائمة كاملة بوسائط التدريب. + +!!! Example "أمثلة سطر الأوامر للتدريب باستخدام بطاقة رسومات مستقلة ومعالج مركزي" + + يتم تحديد الجهاز تلقائيًا. إذا كانت بطاقة رسومات متاحة، سيتم استخدامها، وإلا ستبدأ التدريب على المعالج المركزي. + + === "بايثون" + ```python + from ultralytics import YOLO + + # تحميل النموذج + model = YOLO('yolov8n.yaml') # إنشاء نموذج جديد من ملف YAML + model = YOLO('yolov8n.pt') # تحميل نموذج مدرب مسبقًا (الأكثر توصية للتدريب) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # إنشاء من ملف YAML ونقل الأوزان + + # تدريب النموذج + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "سطر الأوامر" + ```bash + # إنشاء نموذج جديد من ملف YAML وبدء التدريب من البداية + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # بدء التدريب من نموذج *.pt مدرب مسبقًا + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # إنشاء نموذج جديد من ملف YAML ونقل الأوزان المدربة مسبقًا وبدء التدريب + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### التدريب متعدد البطاقات الرسومية + +يتيح التدريب متعدد البطاقات الرسومية استخدام الموارد الأجهزة المتاحة بكفاءة أكبر من خلال توزيع أعباء التدريب عبر عدة بطاقات رسومية. هذه الميزة متاحة من خلال واجهة برمجة التطبيقات باستخدام Python وسطر الأوامر. لتمكين التدريب متعدد البطاقات الرسومية، حدد معرفات أجهزة GPU التي ترغب في استخدامها. + +!!! Example "أمثلة على التدريب متعدد البطاقات الرسومية" + + للتدريب باستخدام أجهزتي GPU، جهاز CUDA 0 و 1، استخدم الأوامر التالية. قم بتوسيعها لاستخدام المزيد من البطاقات. + + === "بايثون" + ```python + from ultralytics import YOLO + + # تحميل النموذج + model = YOLO('yolov8n.pt') # تحميل نموذج مدرب مسبقًا (الأكثر توصية للتدريب) + + # تدريب النموذج بأجهزة GPU 2 + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "سطر الأوامر" + ```bash + # بدء التدريب من نموذج *.pt مدرب مسبقًا باستخدام بطاقات GPU 0 و 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### التدريب باستخدام Apple M1 و M2 MPS + +مع دعم شرائح Apple M1 و M2 المدمج في نماذج Ultralytics YOLO، يمكنك الآن تدريب نماذجك على الأجهزة التي تستخدم نظام Metal Performance Shaders (MPS) القوي. يوفر MPS طريقة عالية الأداء لتنفيذ المهام الحسابية ومعالجة الصور على شرائح السيليكون المخصصة لعبة Apple. + +لتمكين التدريب على شرائح Apple M1 و M2، يجب عليك تحديد "mps" كجهازك عند بدء عملية التدريب. فيما يلي مثال لكيفية القيام بذلك في بايثون وعبر سطر الأوامر: + +!!! Example "مثال على التدريب بواسطة MPS" + + === "بايثون" + ```python + from ultralytics import YOLO + + # تحميل النموذج + model = YOLO('yolov8n.pt') # تحميل نموذج مدرب مسبقًا (الأكثر توصية للتدريب) + + # تدريب النموذج باستخدام 2 بطاقات GPU + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "سطر الأوامر" + ```bash + # بدء التدريب من نموذج *.pt مدرب مسبقًا باستخدام بطاقات GPU 0 و 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +عند الاستفادة من قدرة المعالجة الحاسوبية لشرائح M1/M2، يتيح لك هذا الحمل أداءً أكثر كفاءة لمهام التدريب. للحصول على إرشادات أكثر تفصيلاً وخيارات تكوين متقدمة، يرجى الرجوع إلى [وثائق PyTorch MPS](https://pytorch.org/docs/stable/notes/mps.html). + +### استئناف التدريب المقطوع + +يعتبر استئناف التدريب من الحالات التخزين السابقة ميزة حاسمة عند العمل مع نماذج التعلم العميق. يمكن أن يكون هذا مفيدًا في العديد من السيناريوهات، مثل عند تعطل عملية التدريب بشكل غير متوقع، أو عند الرغبة في متابعة تدريب نموذج بيانات جديدة أو لفترة زمنية أطول. + +عند استئناف التدريب، يقوم Ultralytics YOLO بتحميل الأوزان من آخر نموذج محفوظ وأيضًا استعادة حالة المحسن، وجدولة معدل التعلم، وعدد الحقبة. هذا يتيح لك متابعة عملية التدريب بشكل سلس من حيث توقفت. + +يمكنك بسهولة استئناف التدريب في Ultralytics YOLO عن طريق تعيين الوسيطة `resume` إلى `True` عند استدعاء طريقة `train`، وتحديد المسار إلى ملف `.pt` الذي يحتوي على أوزان النموذج المدرب جزئيًا. + +فيما يلي مثال لكيفية استئناف تدريب مقطوع باستخدام بايثون وعبر سطر الأوامر: + +!!! Example "مثال على استئناف التدريب" + + === "بايثون" + ```python + from ultralytics import YOLO + + # تحميل النموذج + model = YOLO('path/to/last.pt') # تحميل نموذج مدرب جزئيًا + + # استئناف التدريب + results = model.train(resume=True) + ``` + + === "سطر الأوامر" + ```bash + # استئناف تدريب متقطع + yolo train resume model=path/to/last.pt + ``` + +من خلال تعيين `resume=True`، ستواصل وظيفة 'train' التدريب من حيث توقفت، باستخدام الحالة المخزنة في ملف 'path/to/last.pt'. إذا تم حذف الوسيطة `resume` أو تعيينها على `False`، ستبدأ وظيفة 'train' جلسة تدريب جديدة. + +تذكر أنه يتم حفظ نقاط التفتيش في نهاية كل حقبة افتراضياً، أو في فترة ثابتة باستخدام وسيطة 'save_period'، لذا يجب عليك إتمام حقبة واحدة على الأقل لاستئناف تشغيل تدريب. + +## الوسائط + +تتعلق إعدادات التدريب لنماذج YOLO بالمعلمات والتكوينات المختلفة المستخدمة لتدريب النموذج على مجموعة بيانات ما. يمكن أن تؤثر هذه الإعدادات على أداء النموذج وسرعته ودقته. تتضمن بعض إعدادات YOLO التدريب الشائعة حجم الدُفعات، معدل التعلم، الزخم، والتقليل القيمي للأوزان. العوامل الأخرى التي قد تؤثر في عملية التدريب تشمل اختيار المحسن، اختيار دالة الخسارة، وحجم وتركيب مجموعة البيانات التدريب. من المهم ضبط وتجربة هذه الإعدادات بعناية لتحقيق أفضل أداء ممكن لمهمة معينة. + +| المفتاح | القيمة | الوصف | +|-------------------|----------|---------------------------------------------------------------------------------------------------------------------------| +| `model` | `None` | مسار إلى ملف النموذج، على سبيل المثال yolov8n.pt، yolov8n.yaml | +| `data` | `None` | مسار إلى ملف البيانات، على سبيل المثال coco128.yaml | +| `epochs` | `100` | عدد الحقب للتدريب ل | +| `patience` | `50` | حقب للانتظار بدون تحسن ظاهر لإيقاف التدريب مبكرا | +| `batch` | `16` | عدد الصور في كل دُفعة (-1 for AutoBatch) | +| `imgsz` | `640` | حجم الصور الدخل بصورة مثالية | +| `save` | `True` | حال إنقاذ النقاط المفتوحة للتدريب ونتائج الكشف | +| `save_period` | `-1` | حفظ النقطة الفاصلة كل x حقبة (تكون معطلة إذا كانت < 1) | +| `cache` | `False` | صحيح / ذاكرة عشوائية أو قرص / غير صحيح. استخدم ذاكرة التخزين المؤقت في تحميل البيانات | +| `device` | `None` | الجهاز لتشغيل التدريب عليه، على سبيل المثال جهاز الرسومات cuda=0 أو جهاز الرسومات cuda=0,1,2,3 أو جهاز المعالج المركزيcpu | +| `workers` | `8` | عدد خيوط العاملة لتحميل البيانات (لكل RANK إذا كان DDP) | +| `project` | `None` | اسم المشروع | +| `name` | `None` | اسم التجربة | +| `exist_ok` | `False` | ما إذا كان سيتم الكتابة فوق تجربة موجودة | +| `pretrained` | `True` | (bool أو str) ما إذا كان سيتم استخدام نموذج متدرب مسبقًا (bool) أو نموذج لتحميل الأوزان منه (str) | +| `optimizer` | `'auto'` | المحسن لاستخدامه، الخيارات=[SGD، Adam، Adamax، AdamW، NAdam، RAdam، RMSProp، Auto] | +| `verbose` | `False` | ما إذا كان سيتم طباعة مخرجات مفصلة | +| `seed` | `0` | البذرة العشوائية لإعادة الإنتاجية | +| `deterministic` | `True` | ما إذا كان يتم تمكين الوضع المحدد | +| `single_cls` | `False` | يجب تدريب بيانات متعددة الفئات كفئة واحدة | +| `rect` | `False` | تدريب مستطيل باستخدام تجميع الدُفعات للحد الأدنى من الحشو | +| `cos_lr` | `False` | استخدم جدولة معدل التعلم بتوقيت الكوسا | +| `close_mosaic` | `10` | (int) تعطيل التكبير التجانبي للحجم للحقب النهائية (0 للتعطيل) | +| `resume` | `False` | استأنف التدريب من النقطة الأخيرة | +| `amp` | `True` | تدريب دقة مختلطة تلقائية (AMP)، الخيارات=[True، False] | +| `fraction` | `1.0` | نسبة مجموعة البيانات المراد تدريبها (الافتراضي هو 1.0، جميع الصور في مجموعة التدريب) | +| `profile` | `False` | قم بتشغيل بروفايل السرعة لمشغلات ONNX و TensorRT أثناء التدريب للمسجلات | +| `freeze` | `None` | (int أو list، اختياري) تجميد أول n طبقة، أو قائمة طبقات الفهرس خلال التدريب | +| `lr0` | `0.01` | معدل التعلم الأولي (على سبيل المثال SGD=1E-2، Adam=1E-3) | +| `lrf` | `0.01` | معدل التعلم النهائي (lr0 * lrf) | +| `momentum` | `0.937` | الزخم SGD / Adam beta1 | +| `weight_decay` | `0.0005` | تقليل الأوزان للمحسن (5e-4) | +| `warmup_epochs` | `3.0` | حقب الاحماء (الأجزاء المئوية مقبولة) | +| `warmup_momentum` | `0.8` | الزخم الأولي للتدفق الأعلى | +| `warmup_bias_lr` | `0.1` | نسبة تعلم الانحياز الأولي للتدفق العلوي | +| `box` | `7.5` | وزن فاقد الصندوق | +| `cls` | `0.5` | وزن فاقد التصنيف (تناسب مع البكسل) | +| `dfl` | `1.5` | وزن الخسارة الأمامية للتصنيف والصندوق | +| `pose` | `12.0` | وزن فاقد الوضع (الوضع فقط) | +| `kobj` | `2.0` | وزن فاقد نقطة المفتاح (الوضع فقط) | +| `label_smoothing` | `0.0` | التسوية الغموض (كسر) | +| `nbs` | `64` | حجم الدُفعة الاسمي | +| `overlap_mask` | `True` | التحجيم يجب أن يتداخل أقنعة التدريب (التدريب الفصلي فقط) | +| `mask_ratio` | `4` | معدل تحجيم أقنعة (التدريب الفصلي فقط) | +| `dropout` | `0.0` | استخدام تنظيم الإسقاط (التدريب التطبيقي فقط) | +| `val` | `True` | التحقق/الاختبار خلال التدريب | + +## تسجيل + +عند تدريب نموذج YOLOv8، قد تجد أنه من المفيد تتبع أداء النموذج مع مرور الوقت. هنا يأتي دور تسجيل. يوفر Ultralytics' YOLO دعمًا لثلاثة أنواع من أجهزة السجل - Comet و ClearML و TensorBoard. + +لاستخدام سجل، حدده من قائمة السحب أسفل الكود وقم بتشغيله. سيتم تثبيت السجل المختار وتهيئته. + +### Comet + +[Comet](../../../integrations/comet.md) هو منصة تسمح لعلماء البيانات والمطورين بمتابعة ومقارنة وشرح وتحسين التجارب والنماذج. يوفر وظائف مثل المقاييس الزمنية في الوقت الحقيقي وفروقات الشفرة وتتبع المعلمات. + +لاستخدام Comet: + +!!! Example "أمثلة بايثون" + + === "بايثون" + ```python + # pip install comet_ml + import comet_ml + + comet_ml.init() + ``` + +تذكر تسجيل الدخول إلى حسابك في Comet على موقعهم على الويب والحصول على مفتاح API الخاص بك. ستحتاج إلى إضافته إلى الإعدادات المتغيرة في البيئة الخاصة بك أو برنامج النص الخاص بك لتسجيل التجارب الخاصة بك. + +### ClearML + +[ClearML](https://www.clear.ml/) هي منصة مفتوحة المصدر تعمل على تتبع التجارب وتسهيل مشاركة الموارد بكفاءة. تم تصميمه لمساعدة الفرق في إدارة وتنفيذ وإعادة إنتاج عملهم في مجال تعلم الآلة بكفاءة أكبر. + +لاستخدام ClearML: + +!!! Example "أمثلة بايثون" + + === "بايثون" + ```python + # pip install clearml + import clearml + + clearml.browser_login() + ``` + +بعد تشغيل هذا السكريبت، ستحتاج إلى تسجيل الدخول إلى حساب ClearML الخاص بك على المستعرض ومصادقة جلستك. + +## TensorBoard + +[TensorBoard](https://www.tensorflow.org/tensorboard) هي مجموعة أدوات لتصور TensorFlow ، تسمح لك بتصور نموذج TensorFlow الخاص بك ، ورسم المقاييس الكمية حول تنفيذ النموذج الخاص بك ، وعرض بيانات إضافية مثل الصور التي تمر عبرها. + +للاستفادة من TensorBoard في [Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb): + +!!! Example "أمثلة سطر الأوامر" + + === "سطر الأوامر" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # استبدل بالدليل 'runs' + ``` + +لاستخدام TensorBoard محليًا، قم بتشغيل الأمر أدناه واعرض النتائج على الرابط http://localhost:6006/. + +!!! Example "أمثلة سطر الأوامر" + + === "سطر الأوامر" + ```bash + tensorboard --logdir ultralytics/runs # استبدل بالدليل 'runs' + ``` + +سيتم تحميل TensorBoard وتوجيهه إلى الدليل الذي يتم حفظ سجلات التدريب فيه. + +بعد إعداد السجل الخاص بك، يمكنك الاستمرار في تدريب النموذج. سيتم سجل جميع مقاييس التدريب تلقائيًا في المنصة التي اخترتها، ويمكنك الوصول إلى هذه السجلات لمراقبة أداء النموذج الخاص بك مع مرور الوقت ومقارنة نماذج مختلفة وتحديد المجالات التي يمكن تحسينها. diff --git a/docs/ar/modes/val.md b/docs/ar/modes/val.md new file mode 100644 index 0000000..ed95432 --- /dev/null +++ b/docs/ar/modes/val.md @@ -0,0 +1,86 @@ +--- +comments: true +description: دليل لاختبار نماذج YOLOv8 الصحيحة. تعرف على كيفية تقييم أداء نماذج YOLO الخاصة بك باستخدام إعدادات ومقاييس التحقق من الصحة مع أمثلة برمجية باللغة البايثون وواجهة سطر الأوامر. +keywords: Ultralytics, YOLO Docs, YOLOv8, التحقق من الصحة, تقييم النموذج, المعلمات الفرعية, الدقة, المقاييس, البايثون, واجهة سطر الأوامر +--- + +# التحقق من النماذج باستخدام Ultralytics YOLO + +بيئة النظام البيئي والتكاملات لـ Ultralytics YOLO + +## مقدمة + +يعتبر التحقق من النموذج خطوة حاسمة في خط أنابيب التعلم الآلي، حيث يتيح لك تقييم جودة النماذج المدربة. يوفر وضع الـ Val في Ultralytics YOLOv8 مجموعة أدوات ومقاييس قوية لتقييم أداء نماذج الكشف عن الكائنات الخاصة بك. يعمل هذا الدليل كمصدر كامل لفهم كيفية استخدام وضع الـ Val بشكل فعال لضمان أن نماذجك دقيقة وموثوقة. + +## لماذا يوفر Ultralytics YOLO التحقق من الصحة + +هنا هي الأسباب التي تجعل استخدام وضع الـ Val في YOLOv8 مفيدًا: + +- **الدقة:** الحصول على مقاييس دقيقة مثل mAP50 و mAP75 و mAP50-95 لتقييم نموذجك بشكل شامل. +- **الراحة:** استخدم الميزات المدمجة التي تتذكر إعدادات التدريب، مما يبسط عملية التحقق من الصحة. +- **مرونة:** قم بالتحقق من النموذج باستخدام نفس المجموعات البيانات وأحجام الصور أو مجموعات بيانات وأحجام صور مختلفة. +- **ضبط المعلمات الفرعية:** استخدم المقاييس التحقق لضبط نموذجك لتحسين الأداء. + +### الميزات الرئيسية لوضع الـ Val + +هذه هي الوظائف المميزة التي يوفرها وضع الـ Val في YOLOv8: + +- **الإعدادات التلقائية:** يتذكر النماذج إعدادات التدريب الخاصة بها للتحقق من الصحة بسهولة. +- **دعم متعدد المقاييس:** قيم نموذجك بناءً على مجموعة من مقاييس الدقة. +- **واجهة سطر الأوامر وواجهة برمجة Python:** اختر بين واجهة سطر الأوامر أو واجهة برمجة Python حسب تفضيلك للتحقق من الصحة. +- **توافق البيانات:** يعمل بسلاسة مع مجموعات البيانات المستخدمة خلال مرحلة التدريب بالإضافة إلى مجموعات البيانات المخصصة. + +!!! Tip "نصيحة" + + * تتذكر نماذج YOLOv8 إعدادات التدريب تلقائيًا، لذا يمكنك التحقق من النموذج بنفس حجم الصورة وعلى مجموعة البيانات الأصلية بسهولة باستخدام "yolo val model=yolov8n.pt" أو "model('yolov8n.pt').val()" + +## أمثلة الاستخدام + +تحقق من دقة النموذج المدرب YOLOv8n على مجموعة بيانات COCO128. لا يلزم تمرير أي وسيطة كوسيطة يتذكر الـ model التدريب والوسيطات كسمات النموذج. انظر الجدول أدناه للحصول على قائمة كاملة من وسيطات التصدير. + +!!! Example "مثال" + + === "البايثون" + + ```python + from ultralytics import YOLO + + # تحميل النموذج + model = YOLO('yolov8n.pt') # تحميل النموذج الرسمي + model = YOLO('path/to/best.pt') # تحميل نموذج مخصص + + # التحقق من النموذج + metrics = model.val() # لا يلزم أي وسيطات، يتذكر التكوين والوسيطات + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # قائمة تحتوي على map50-95 لكل فئة + ``` + === "واجهة سطر الأوامر" + + ```bash + yolo detect val model=yolov8n.pt # تجريب نموذج رسمي + yolo detect val model=path/to/best.pt # تجٌَرب نموذج مخصص + ``` + +## الوسيطات + +تشير إعدادات التحقق بالنسبة لنماذج YOLO إلى المعلمات الفرعية والتكوينات المختلفة المستخدمة لتقييم أداء النموذج على مجموعة بيانات التحقق. هذه الإعدادات يمكن أن تؤثر على أداء النموذج وسرعته ودقته. تشمل بعض إعدادات التحقق الشائعة في YOLO حجم الدفعة وتكرارات تنفيذ التحقق أثناء التدريب والمقاييس المستخدمة لتقييم أداء النموذج. العوامل الأخرى التي قد تؤثر على العملية الخاصة بالتحقق تشمل حجم وتركيب مجموعة البيانات التحقق والمهمة المحددة التي يتم استخدام النموذج فيها. من المهم ضبط هذه الإعدادات وتجربتها بعناية لضمان أداء جيد للنموذج على مجموعة بيانات التحقق وكشف ومنع الحالة التي يتم فيها ضبط الطراز بشكل جيد. + +| مفتاح | القيمة | الوصف | +|---------------|---------|------------------------------------------------------------------------------------| +| `data` | `None` | مسار إلى ملف البيانات، على سبيل المثال coco128.yaml | +| `imgsz` | `640` | حجم الصور الداخلية باعتبارها عدد صحيح | +| `batch` | `16` | عدد الصور لكل دفعة (-1 للدفع الآلي) | +| `save_json` | `False` | حفظ النتائج في ملف JSON | +| `save_hybrid` | `False` | حفظ النسخة المختلطة للتسميات (التسميات + التنبؤات الإضافية) | +| `conf` | `0.001` | حد الثقة في كشف الكائن | +| `iou` | `0.6` | حد تداخل على المتحدة (IoU) لعملية الجمع والطرح | +| `max_det` | `300` | العدد الأقصى من الكشفات لكل صورة | +| `half` | `True` | استخدم التنصت نصف الدقة (FP16) | +| `device` | `None` | الجهاز الذي يتم تشغيله عليه، على سبيل المثال جهاز Cuda=0/1/2/3 أو جهاز=معالج (CPU) | +| `dnn` | `False` | استخدم OpenCV DNN لعملية التنصت الأمثل | +| `plots` | `False` | إظهار الرسوم البيانية أثناء التدريب | +| `rect` | `False` | تحقق صيغة *rectangular* مع تجميع كل دفعة للحصول على الحد الأدنى من التعبئة | +| `split` | `val` | اختر تقسيم البيانات للتحقق من الصحة، على سبيل المثال "val"، "test" أو "train" | +| diff --git a/docs/ar/quickstart.md b/docs/ar/quickstart.md new file mode 100644 index 0000000..2364115 --- /dev/null +++ b/docs/ar/quickstart.md @@ -0,0 +1,326 @@ +--- +comments: true +description: استكشف أساليب مختلفة لتثبيت Ultralytics باستخدام pip و conda و git و Docker. تعرّف على كيفية استخدام Ultralytics مع واجهة سطر الأوامر أو ضمن مشاريع Python الخاصة بك. +keywords: تثبيت Ultralytics, pip install Ultralytics, Docker install Ultralytics, Ultralytics command line interface, Ultralytics Python interface +--- + +## تثبيت Ultralytics + +يوفر Ultralytics طرق تثبيت مختلفة بما في ذلك pip و conda و Docker. يمكنك تثبيت YOLOv8 عن طريق حزمة `ultralytics` من خلال pip للإصدار الأحدث والمستقر أو من خلال استنساخ [مستودع Ultralytics على GitHub](https://github.com/ultralytics/ultralytics) للحصول على الإصدار الأحدث. يمكن استخدام Docker لتنفيذ الحزمة في حاوية معزولة، وتجنب التثبيت المحلي. + +!!! Note "ملاحظة" + + 🚧 تم بناء وثائقنا متعددة اللغات حاليًا، ونعمل بجد لتحسينها. شكرًا لك على صبرك! 🙏 + +!!! Example "تثبيت" + + === "تثبيت باستخدام pip (الموصَى به)" + قم بتثبيت حزمة `ultralytics` باستخدام pip، أو قم بتحديث التثبيت الحالي عن طريق تشغيل `pip install -U ultralytics`. قم بزيارة مؤشر Python Package Index (PyPI) للحصول على مزيد من التفاصيل حول حزمة `ultralytics`: [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/). + + [![نسخة PyPI](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![التنزيلات](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # قم بتثبيت حزمة ultralytics من PyPI + pip install ultralytics + ``` + + يمكنك أيضًا تثبيت حزمة `ultralytics` مباشرة من مستودع GitHub [repository](https://github.com/ultralytics/ultralytics). قد يكون ذلك مفيدًا إذا كنت ترغب في الحصول على الإصدار التجريبي الأحدث. تأكد من تثبيت أداة الأوامر Git على نظامك. يُثبّت الأمر `@main` الفرع `main` ويمكن تعديله إلى فرع آخر، على سبيل المثال `@my-branch`، أو يمكن إزالته تمامًا للانتقال إلى الفرع الرئيسي `main`. + + ```bash + # قم بتثبيت حزمة ultralytics من GitHub + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + + === "تثبيت باستخدام conda" + Conda هو مدير حزم بديل لـ pip ويمكن استخدامه أيضًا للتثبيت. قم بزيارة Anaconda للحصول على مزيد من التفاصيل على [https://anaconda.org/conda-forge/ultralytics](https://anaconda.org/conda-forge/ultralytics). يمكن العثور على مستودع Ultralytics feedstock لتحديث حزمة conda على [https://github.com/conda-forge/ultralytics-feedstock/](https://github.com/conda-forge/ultralytics-feedstock/). + + + [![وصفة conda](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![تنزيلات conda](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![إصدار conda](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![منصات conda](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # قم بتثبيت حزمة ultralytics باستخدام conda + conda install -c conda-forge ultralytics + ``` + + !!! Note "ملاحظة" + + إذا كنت تقوم بالتثبيت في بيئة CUDA، فإن الممارسة الجيدة هي تثبيت `ultralytics`, `pytorch` و `pytorch-cuda` في نفس الأمر للسماح لمدير حزم conda بحل أي تعارضات، أو وإلا فقوم بتثبيت `pytorch-cuda` في نهاية الأمر للسماح له بتجاوز حزمة `pytorch` المحددة لوحدة المعالجة المركزية إذا لزم الأمر. + ```bash + # قم بتثبيت كافة الحزم معًا باستخدام conda + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### صورة Docker في Conda + + تتوفر أيضًا صور Docker لـ Conda لـ Ultralytics من [DockerHub](https://hub.docker.com/r/ultralytics/ultralytics). تستند هذه الصور إلى [Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/) وهي وسيلة بسيطة لبدء استخدام `ultralytics` في بيئة Conda. + + ```bash + # قم بتعيين اسم الصورة بوصفه متغير + t=ultralytics/ultralytics:latest-conda + + # اسحب أحدث صورة ultralytics من Docker Hub + sudo docker pull $t + + # قم بتشغيل صورة ultralytics في حاوية مع دعم GPU + sudo docker run -it --ipc=host --gpus all $t # all GPUs + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # قد يتم تحديد GPUs + ``` + + === "استنساخ Git" + قم بنسخ مستودع `ultralytics` إذا كنت مهتمًا بالمساهمة في التطوير أو ترغب في تجربة الشفرة المصدرية الأحدث. بعد الاستنساخ، انتقل إلى الدليل وقم بتثبيت الحزمة في وضع التحرير `-e` باستخدام pip. + ```bash + # قم بنسخ مستودع ultralytics + git clone https://github.com/ultralytics/ultralytics + + # انتقل إلى الدليل المنسوخ + cd ultralytics + + # قم بتثبيت الحزمة في وضع التحرير + pip install -e . + ``` + + === "Docker" + + تمكنك من استخدام Docker بسهولة لتنفيذ حزمة `ultralytics` في حاوية معزولة، مما يضمن أداءً سلسًا ومتسقًا في مختلف البيئات. عن طريق اختيار إحدى صور Docker الأصلية لـ `ultralytics` من [Docker Hub](https://hub.docker.com/r/ultralytics/ultralytics)، لن تتجنب فقط تعقيد التثبيت المحلي ولكنك ستستفيد أيضًا من وصول إلى بيئة عمل متحققة وفعالة. يقدم Ultralytics 5 صور Docker مدعومة رئيسية، يتم تصميم كل منها لتوفير توافق عالي وكفاءة لمنصات وحالات استخدام مختلفة: + + Docker Pulls + + - **Dockerfile:** صورة GPU الموصى بها للتدريب. + - **Dockerfile-arm64:** محسّن لبنية ARM64، مما يتيح النشر على أجهزة مثل Raspberry Pi ومنصات أخرى تعتمد على ARM64. + - **Dockerfile-cpu:** إصدار مناسب للتحكم بوحدة المعالجة المركزية فقط بدون دعم لل GPU. + - **Dockerfile-jetson:** مصمم خصيصًا لأجهزة NVIDIA Jetson، ويدمج دعمًا لل GPU المحسن لهذه المنصات. + - **Dockerfile-python:** صورة صغيرة بها فقط Python والتبعيات الضرورية، مثالية للتطبيقات والتطوير الخفيف. + - **Dockerfile-conda:** قائمة على Miniconda3 مع تثبيت conda لحزمة ultralytics. + + فيما يلي الأوامر للحصول على أحدث صورة وتشغيلها: + + ```bash + # قم بتعيين اسم الصورة بوصفه متغير + t=ultralytics/ultralytics:latest + + # اسحب أحدث صورة ultralytics من Docker Hub + sudo docker pull $t + + # قم بتشغيل صورة ultralytics في حاوية مع دعم GPU + sudo docker run -it --ipc=host --gpus all $t # all GPUs + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # قد يتم تحديد GPUs + ``` + + يقوم الأمر أعلاه بتهيئة حاوية Docker بأحدث صورة `ultralytics`. يُسند العلامة `-it` جهازًا افتراضيًا TTY ويحافظ على فتح stdin لتمكينك من التفاعل مع الحاوية. تعيين العلامة `--ipc=host` مساحة اسم IPC (Inter-Process Communication) إلى المضيف، وهو أمر ضروري لمشاركة الذاكرة بين العمليات. تُمكّن العلامة `--gpus all` الوصول إلى كل وحدات المعالجة المركزية الرسومية المتاحة داخل الحاوية، مما هو أمر حاسم للمهام التي تتطلب حسابات GPU. + + ملاحظة: للعمل مع الملفات على جهازك المحلي داخل الحاوية، استخدم مجلدات Docker لتوصيل دليل محلي بالحاوية: + + ```bash + # مجلد الدليل المحلي بالحاوية + sudo docker run -it --ipc=host --gpus all -v /path/on/host:/path/in/container $t + ``` + + قم بتغيير `/path/on/host` بمسار الدليل على جهازك المحلي، و `/path/in/container` باالمسار المطلوب داخل حاوية Docker للوصول إليه. + + للاستفادة القصوى من استخدام Docker المتقدم، لا تتردد في استكشاف [دليل Ultralytics Docker](https://docs.ultralytics.com/guides/docker-quickstart/). + +راجع ملف `requirements.txt` الخاص بـ `ultralytics` [هنا](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) للحصول على قائمة المتطلبات. يُرجى ملاحظة أن جميع الأمثلة أعلاه يتم تثبيت جميع المتطلبات المطلوبة. + +

+
+ +
+ شاهد: دليل فتع Ultralytics YOLO السريع +

+ +!!! Tip "نصيحة" + + يختلف متطلبات PyTorch حسب نظام التشغيل ومتطلبات CUDA، لذا يُوصَى بتثبيت PyTorch أولاً باستخدام التعليمات الموجودة في [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally). + + + PyTorch تعليمات التثبيت + + +## استخدم Ultralytics مع واجهة سطر الأوامر (CLI) + +تتيح واجهة سطر الأوامر (CLI) في Ultralytics تشغيل أوامر بسيطة بدون الحاجة إلى بيئة Python. لا تحتاج CLI إلى أي تخصيص أو كود Python. يمكنك ببساطة تشغيل جميع المهام من الطرفية باستخدام الأمر `yolo`. تحقق من [دليل CLI](/../usage/cli.md) لمعرفة المزيد حول استخدام YOLOv8 من سطر الأوامر. + +!!! Example "مثال" + + === "الصيغة" + تستخدم أوامر Ultralytics `yolo` الصيغة التالية: + ```bash + yolo TASK MODE ARGS + ``` + + - `TASK` (اختياري) أحد التالي ([detect](tasks/detect.md), [segment](tasks/segment.md), [classify](tasks/classify.md), [pose](tasks/pose.md)) + - `MODE` (مطلوب) واحد من ([train](modes/train.md), [val](modes/val.md), [predict](modes/predict.md), [export](modes/export.md), [track](modes/track.md)) + - `ARGS` (اختياري) أزواج "arg=value" مثل `imgsz=640` التي تستبدل القيم الافتراضية. + + راجع جميع `ARGS` [هنا](/../usage/cfg.md) أو باستخدام الأمر `yolo cfg` في سطر الأوامر. + + === "التدريب" + قم بتدريب نموذج اكتشاف لمدة 10 حلقات مع سعر تعلم بدءي 0.01 + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "التنبؤ" + تنبؤ بفيديو YouTube باستخدام نموذج تجزئة معتمد مسبقًا عند حجم الصورة 320: + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "التحقق" + التحقق من نموذج اكتشاف معتمد مسبقًا على دُفعَة واحدة وحجم صورة قدره 640: + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "التصدير" + قم بتصدير نموذج فئة YOLOv8n إلى تنسيق ONNX على حجم صورة 224 بواسطة 128 (لا يلزم TASK) + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "خاص" + قم بتشغيل أوامر خاصة لعرض الإصدارة وعرض الإعدادات وتشغيل عمليات التحقق والمزيد: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "تحذير" +يجب تمرير الوسوم كأزواج "arg=val"، وأن تُفصل بعلامة تساوي `=` وأن تُفصل بمسافات بين الأزواج. لا تستخدم بادئات الوسوم `--` أو فواصل `,` بين الوسوم. + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25` ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25` ❌ (مفقود العلامة المساواة) + - `yolo predict model=yolov8n.pt, imgsz=640, conf=0.25` ❌ (لا تستخدم `,`) + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25` ❌ (لا تستخدم `--`) + +[دليل CLI](/../usage/cli.md){ .md-button } + +## استخدم Ultralytics مع Python + +تسمح واجهة Python في YOLOv8 بالتكامل السلس في مشاريع Python الخاصة بك، مما يجعل من السهل تحميل النموذج وتشغيله ومعالجة نتائجه. المصممة ببساطة وسهولة الاستخدام في الاعتبار، تمكن واجهة Python المستخدمين من تنفيذ الكشف على الكائنات والتجزئة والتصنيف في مشاريعهم. يجعل هذا واجهة YOLOv8 Python أداة قيمة لأي شخص يرغب في دمج هذه الوظائف في مشاريعهم باسياتو. + +على سبيل المثال، يمكن للمستخدمين تحميل نموذج، تدريبه، تقييم أدائه على مجموعة التحقق، وحتى تصديره إلى تنسيق ONNX ببضعة أسطر فقط من الشفرة. تحقق من [دليل Python](/../usage/python.md) لمعرفة المزيد حول استخدام YOLOv8 داخل مشاريعك الخاصة. + +!!! Example "مثال" + + ```python + from ultralytics import YOLO + + # أنشئ نموذج YOLO جديد من البداية + model = YOLO('yolov8n.yaml') + + # قم بتحميل نموذج YOLO معتمد مسبقًا (موصَى به للتدريب) + model = YOLO('yolov8n.pt') + + # قم بتدريب النموذج باستخدام مجموعة البيانات 'coco128.yaml' لمدة 3 حلقات + results = model.train(data='coco128.yaml', epochs=3) + + # قم بتقييم أداء النموذج على مجموعة التحقق + results = model.val() + + # قم بإجراء الكشف على صورة باستخدام النموذج + results = model('https://ultralytics.com/images/bus.jpg') + + # قم بتصدير النموذج إلى تنسيق ONNX + success = model.export(format='onnx') + ``` + +[دليل Python](/../usage/python.md){.md-button .md-button--primary} + +## إعدادات Ultralytics + +يوفر مكتبة Ultralytics نظامًا قويًا لإدارة الإعدادات لتمكين التحكم بمحاكاة تفصيلية لتجاربك. من خلال استخدام `SettingsManager` في الوحدة `ultralytics.utils`، يمكن للمستخدمين الوصول بسهولة إلى إعداداتهم وتعديلها. يتم تخزينها في ملف YAML ويمكن عرضها أو تعديلها إما مباشرة في بيئة Python أو من خلال واجهة سطر الأوامر (CLI). + +### فحص الإعدادات + +للحصول على فهم للتكوين الحالي لإعداداتك، يمكنك عرضها مباشرةً: + +!!! Example "عرض الإعدادات" + + === "Python" + يُمكنك استخدام Python لعرض الإعدادات الخاصة بك. ابدأ بـاستيراد الكائن `settings` من وحدة `ultralytics`. استخدم الأوامر التالية لطباعة الإعدادات والعودة منها: + ```python + from ultralytics import settings + + # عرض كل الإعدادات + print(settings) + + # إرجاع إعداد محدد + value = settings['runs_dir'] + ``` + + === "CLI" + بدلاً من ذلك، واجهة سطر الأوامر تسمح لك بالتحقق من الإعدادات الخاصة بك باستخدام أمر بسيط: + ```bash + yolo settings + ``` + +### تعديل الإعدادات + +يسمح لك Ultralytics بتعديل الإعدادات بسهولة. يمكن تنفيذ التغييرات بالطرق التالية: + +!!! Example "تحديث الإعدادات" + + === "Python" + داخل بيئة Python، اطلب الطريقة `update` على الكائن `settings` لتغيير إعداداتك: + + ```python + from ultralytics import settings + + # تحديث إعداد واحد + settings.update({'runs_dir': '/path/to/runs'}) + + # تحديث إعدادات متعددة + settings.update({'runs_dir': '/path/to/runs', 'tensorboard': False}) + + # إعادة الإعدادات إلى القيم الافتراضية + settings.reset() + ``` + + === "CLI" + إذا كنت تفضل استخدام واجهة سطر الأوامر، يمكنك استخدام الأوامر التالية لتعديل إعداداتك: + + ```bash + # تحديث إعداد واحد + yolo settings runs_dir='/path/to/runs' + + # تحديث إعدادات متعددة + yolo settings runs_dir='/path/to/runs' tensorboard=False + + # إعادة الإعدادات إلى القيم الافتراضية + yolo settings reset + ``` + +### فهم الإعدادات + +يوفر الجدول أدناه نظرة عامة على الإعدادات المتاحة للضبط في Ultralytics. يتم توضيح كل إعداد بالإضافة إلى قيمة مثالية ونوع البيانات ووصف موجز. + +| الاسم | القيمة المثالية | نوع البيانات | الوصف | +|--------------------|-----------------------|--------------|-------------------------------------------------------------------------------------------------------------| +| `settings_version` | `'0.0.4'` | `str` | إصدار إعدادات Ultralytics (مختلف عن إصدار Ultralytics [pip](https://pypi.org/project/ultralytics/)) | +| `datasets_dir` | `'/path/to/datasets'` | `str` | المسار الذي يتم تخزينه فيه مجموعات البيانات | +| `weights_dir` | `'/path/to/weights'` | `str` | المسار الذي يتم تخزينه فيه أوزان النموذج | +| `runs_dir` | `'/path/to/runs'` | `str` | المسار الذي يتم تخزينه فيه تشغيل التجارب | +| `uuid` | `'a1b2c3d4'` | `str` | مُعرِّف فريد لإعدادات الحالية | +| `sync` | `True` | `bool` | ما إذا كان يتم مزامنة التحليلات وحوادث الأعطال إلى HUB | +| `api_key` | `''` | `str` | HUB الخاص بـ Ultralytics [API Key](https://hub.ultralytics.com/settings?tab=api+keys) | +| `clearml` | `True` | `bool` | ما إذا كان يتم استخدام ClearML لتسجيل التجارب | +| `comet` | `True` | `bool` | ما إذا كان يتم استخدام [Comet ML](https://bit.ly/yolov8-readme-comet) لتتبع وتصور التجارب | +| `dvc` | `True` | `bool` | ما إذا كان يتم استخدام [DVC لتتبع التجارب](https://dvc.org/doc/dvclive/ml-frameworks/yolo) والتحكم في النسخ | +| `hub` | `True` | `bool` | ما إذا كان يتم استخدام [Ultralytics HUB](https://hub.ultralytics.com) للتكامل | +| `mlflow` | `True` | `bool` | ما إذا كان يتم استخدام MLFlow لتتبع التجارب | +| `neptune` | `True` | `bool` | ما إذا كان يتم استخدام Neptune لتتبع التجارب | +| `raytune` | `True` | `bool` | ما إذا كان يتم استخدام Ray Tune لضبط الحساسية | +| `tensorboard` | `True` | `bool` | ما إذا كان يتم استخدام TensorBoard للتصور | +| `wandb` | `True` | `bool` | ما إذا كان يتم استخدام Weights & Biases لتسجيل البيانات | + +أثناء تنقلك في مشاريعك أو تجاربك، تأكد من مراجعة هذه الإعدادات لضمان تكوينها بشكل مثالي وفقًا لاحتياجاتك. diff --git a/docs/ar/tasks/classify.md b/docs/ar/tasks/classify.md new file mode 100644 index 0000000..b0dadf4 --- /dev/null +++ b/docs/ar/tasks/classify.md @@ -0,0 +1,172 @@ +--- +comments: true +description: تعرّف على نماذج YOLOv8 Classify لتصنيف الصور. احصل على معلومات مفصلة حول قائمة النماذج المدرّبة مسبقًا وكيفية التدريب والتحقق والتنبؤ وتصدير النماذج. +keywords: Ultralytics، YOLOv8، تصنيف الصور، النماذج المدربة مسبقًا، YOLOv8n-cls، التدريب، التحقق، التنبؤ، تصدير النماذج +--- + +# تصنيف الصور + +أمثلة على تصنيف الصور + +تعتبر عملية تصنيف الصور أبسط المهام الثلاثة وتنطوي على تصنيف صورة كاملة في إحدى الفئات المحددة سابقًا. + +ناتج نموذج تصنيف الصور هو تسمية فئة واحدة ودرجة ثقة. يكون تصنيف الصور مفيدًا عندما تحتاج فقط إلى معرفة فئة الصورة ولا تحتاج إلى معرفة موقع الكائنات التابعة لتلك الفئة أو شكلها الدقيق. + +!!! Tip "نصيحة" + + تستخدم نماذج YOLOv8 Classify اللاحقة "-cls"، مثالًا "yolov8n-cls.pt" وتم تدريبها على [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +## [النماذج](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +تظهر هنا النماذج المدرّبة مسبقًا لـ YOLOv8 للتصنيف. تم تدريب نماذج الكشف والشعبة والموضع على مجموعة البيانات [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)، بينما تم تدريب نماذج التصنيف مسبقًا على مجموعة البيانات [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +يتم تنزيل [النماذج](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) تلقائيًا من أحدث إصدار لـ Ultralytics [releases](https://github.com/ultralytics/assets/releases) عند الاستخدام الأول. + +| النموذج | الحجم
(بكسل) | دقة (أعلى 1)
acc | دقة (أعلى 5)
acc | سرعة التنفيذ
ONNX للوحدة المركزية
(مللي ثانية) | سرعة التنفيذ
A100 TensorRT
(مللي ثانية) | المعلمات
(مليون) | FLOPs
(مليار) لحجم 640 | +|----------------------------------------------------------------------------------------------|----------------------|--------------------------|--------------------------|-----------------------------------------------------------|----------------------------------------------------|--------------------------|--------------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- قيمة **acc** هي دقة النماذج على مجموعة بيانات التحقق [ImageNet](https://www.image-net.org/). +
لإعادة إنتاج ذلك، استخدم `yolo val classify data=path/to/ImageNet device=0` +- يتم حساب سرعة **Speed** بناءً على متوسط صور التحقق من ImageNet باستخدام [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
لإعادة إنتاج ذلك، استخدم `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` + +## التدريب + +قم بتدريب YOLOv8n-cls على مجموعة بيانات MNIST160 لمدة 100 دورة عند حجم الصورة 64 بكسل. للحصول على قائمة كاملة بالوسائط المتاحة، اطلع على صفحة [تكوين](/../usage/cfg.md). + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # تحميل نموذج + model = YOLO('yolov8n-cls.yaml') # إنشاء نموذج جديد من نموذج YAML + model = YOLO('yolov8n-cls.pt') # تحميل نموذج مدرّب مسبقًا (موصى به للتدريب) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # إنشاء من YAML ونقل الأوزان + + # تدريب النموذج + results = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # إنشاء نموذج جديد من YAML وبدء التدريب من البداية + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # بدء التدريب من نموذج مدرّب بصيغة pt + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # إنشاء نموذج جديد من YAML ونقل الأوزان المدرّبة مسبقًا وبدء التدريب + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### تنسيق مجموعة البيانات + +يمكن العثور على تنسيق مجموعة بيانات تصنيف YOLO بالتفصيل في [مرشد المجموعة](../../../datasets/classify/index.md). + +## التحقق + +قم بتحديد دقة النموذج YOLOv8n-cls المدرّب على مجموعة بيانات MNIST160. لا يلزم تمرير أي وسيطة حيث يحتفظ `model` ببيانات التدريب والوسائط كسمات النموذج. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # تحميل نموذج + model = YOLO('yolov8n-cls.pt') # تحميل نموذج رسمي + model = YOLO('path/to/best.pt') # تحميل نموذج مخصص + + # التحقق من النموذج + metrics = model.val() # لا تحتاج إلى وسائط، يتم تذكر مجموعة البيانات والإعدادات النموذج + metrics.top1 # دقة أعلى 1 + metrics.top5 # دقة أعلى 5 + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # تحقق من النموذج الرسمي + yolo classify val model=path/to/best.pt # تحقق من النموذج المخصص + ``` + +## التنبؤ + +استخدم نموذج YOLOv8n-cls المدرّب لتنفيذ تنبؤات على الصور. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # تحميل نموذج + model = YOLO('yolov8n-cls.pt') # تحميل نموذج رسمي + model = YOLO('path/to/best.pt') # تحميل نموذج مخصص + + # تنبؤ باستخدام النموذج + results = model('https://ultralytics.com/images/bus.jpg') # تنبؤ على صورة + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # تنبؤ باستخدام النموذج الرسمي + yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # تنبؤ باستخدام النموذج المخصص + ``` + +راجع تفاصيل كاملة حول وضع `predict` في الصفحة [Predict](https://docs.ultralytics.com/modes/predict/). + +## تصدير + +قم بتصدير نموذج YOLOv8n-cls إلى تنسيق مختلف مثل ONNX، CoreML، وما إلى ذلك. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # تحميل نموذج + model = YOLO('yolov8n-cls.pt') # تحميل نموذج رسمي + model = YOLO('path/to/best.pt') # تحميل نموذج مدرّب مخصص + + # تصدير النموذج + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # تصدير النموذج الرسمي + yolo export model=path/to/best.pt format=onnx # تصدير نموذج مدرّب مخصص + ``` + +تتوفر صيغ تصدير YOLOv8-cls في الجدول أدناه. يمكنك تنبؤ أو التحقق من الصحة مباشرةً على النماذج المصدر، أي "yolo predict model=yolov8n-cls.onnx". يتم عرض أمثلة لاستخدام النموذج الخاص بك بعد الانتهاء من التصدير. + +| الصيغة | وسيطة الصيغة | النموذج | البيانات الوصفية | الوسيطات | +|--------------------------------------------------------------------|---------------|-------------------------------|------------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +راجع التفاصيل الكاملة حول `export` في الصفحة [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/ar/tasks/detect.md b/docs/ar/tasks/detect.md new file mode 100644 index 0000000..644269a --- /dev/null +++ b/docs/ar/tasks/detect.md @@ -0,0 +1,185 @@ +--- +comments: true +description: وثائق رسمية لـ YOLOv8 بواسطة Ultralytics. تعلم كيفية تدريب و التحقق من صحة و التنبؤ و تصدير النماذج بتنسيقات مختلفة. تتضمن إحصائيات الأداء التفصيلية. +keywords: YOLOv8, Ultralytics, التعرف على الكائنات, النماذج المدربة من قبل, التدريب, التحقق من الصحة, التنبؤ, تصدير النماذج, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# التعرف على الكائنات + +Beispiele für die Erkennung von Objekten + +Task التعرف على الكائنات هو عبارة عن تعرف على موقع و فئة الكائنات في صورة أو فيديو. + +مخرجات جهاز الاستشعار هي مجموعة من مربعات تحيط بالكائنات في الصورة، مع تصنيف الفئة ودرجات وثقة لكل مربع. التعرف على الكائنات هو اختيار جيد عندما تحتاج إلى تحديد كائنات مهمة في مشهد، ولكنك لا تحتاج إلى معرفة بالضبط أين يكمن الكائن أو شكله الدقيق. + +

+
+ +
+ شاهد: التعرف على الكائنات باستخدام نموذج Ultralytics YOLOv8 مع تدريب مسبق. +

+ +!!! Tip "تلميح" + + نماذج YOLOv8 Detect هي النماذج الافتراضية YOLOv8، أي `yolov8n.pt` و هي مدربة مسبقًا على [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [النماذج](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +تُعرض هنا النماذج المدربة مسبقًا لـ YOLOv8 Detect. النماذج Detect و Segment و Pose معتمدة على مجموعة البيانات [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)، بينما النماذج Classify معتمدة على مجموعة البيانات [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +تُقوم النماذج بالتنزيل تلقائيًا من أحدث [إصدار Ultralytics](https://github.com/ultralytics/assets/releases) عند الاستخدام لأول مرة. + +| النموذج | الحجم
(بكسل) | mAPval
50-95 | السرعة
CPU ONNX
(مللي ثانية) | السرعة
A100 TensorRT
(مللي ثانية) | الوزن
(ميغا) | FLOPs
(مليار) | +|--------------------------------------------------------------------------------------|----------------------|----------------------|-----------------------------------------|----------------------------------------------|----------------------|-----------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- قيم mAPval تنطبق على مقياس نموذج واحد-مقياس واحد على مجموعة بيانات [COCO val2017](http://cocodataset.org). +
اعيد حسابها بواسطة `yolo val detect data=coco.yaml device=0` +- السرعةتمت متوسطة على صور COCO val باستخدام [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + instance. +
اعيد حسابها بواسطة `yolo val detect data=coco128.yaml batch=1 device=0|cpu` + +## تدريب + +قم بتدريب YOLOv8n على مجموعة البيانات COCO128 لمدة 100 دورة على حجم صورة 640. للحصول على قائمة كاملة بالوسائط المتاحة انظر الصفحة [التكوين](/../usage/cfg.md). + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتحميل نموذج + model = YOLO('yolov8n.yaml') # بناء نموذج جديد من YAML + model = YOLO('yolov8n.pt') # قم بتحميل نموذج مدرب مسبقًا (موصى به للتدريب) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # بناء من YAML و نقل الأوزان + + # قم بتدريب النموذج + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # قم ببناء نموذج جديد من YAML وابدأ التدريب من الصفر + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # ابدأ التدريب من نموذج *.pt مدرب مسبقًا + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # بناء نموذج جديد من YAML، ونقل الأوزان المدربة مسبقاً إلى النموذج وابدأ التدريب + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### تنسيق مجموعة بيانات + +يمكن العثور على تنسيق مجموعة بيانات التعرف على الكائنات بالتفصيل في [دليل مجموعة البيانات](../../../datasets/detect/index.md). لتحويل مجموعة البيانات الحالية من تنسيقات أخرى (مثل COCO الخ) إلى تنسيق YOLO، يرجى استخدام أداة [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) المقدمة من Ultralytics. + +## التحقق من الصحة + +قم بتحقق من دقة النموذج المدرب مسبقًا YOLOv8n على مجموعة البيانات COCO128. ليس هناك حاجة إلى تمرير أي وسيطات حيث يحتفظ النموذج ببياناته التدريبية والوسيطات كسمات النموذج. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتحميل نموذج + model = YOLO('yolov8n.pt') # تحميل نموذج رسمي + model = YOLO('path/to/best.pt') # تحميل نموذج مخصص + + # قم بالتحقق من النموذج + metrics = model.val() # لا حاجة لأي بيانات، يتذكر النموذج بيانات التدريب و الوسيطات + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # قائمة تحتوي map50-95 لكل فئة + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # التحقق من النموذج الرسمي + yolo detect val model=path/to/best.pt # التحقق من النموذج المخصص + ``` + +## التنبؤ + +استخدم نموذج YOLOv8n المدرب مسبقًا لتشغيل التنبؤات على الصور. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتحميل نموذج + model = YOLO('yolov8n.pt') # قم بتحميل نموذج رسمي + model = YOLO('path/to/best.pt') # قم بتحميل نموذج مخصص + + # أجرِ التنبؤ باستخدام النموذج + results = model('https://ultralytics.com/images/bus.jpg') # التنبؤ على صورة + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # التنبؤ باستخدام النموذج الرسمي + yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # التنبؤ بالنموذج المخصص + ``` + +انظر تفاصيل وضع الـ `predict` الكامل في صفحة [Predict](https://docs.ultralytics.com/modes/predict/). + +## تصدير + +قم بتصدير نموذج YOLOv8n إلى تنسيق مختلف مثل ONNX، CoreML وغيرها. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتحميل نموذج + model = YOLO('yolov8n.pt') # تحميل نموذج رسمي + model = YOLO('path/to/best.pt') # تحميل نموذج مدرب مخصص + + # قم بتصدير النموذج + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # تصدير النموذج الرسمي + yolo export model=path/to/best.pt format=onnx # تصدير النموذج المدرب مخصص + ``` + +التنسيقات المدعومة لتصدير YOLOv8 مدرجة في الجدول أدناه. يمكنك التنبؤ أو التحقق من صحة النماذج المصدرة مباشرة، على سبيل المثال `yolo predict model=yolov8n.onnx`. سيتم عرض أمثلة استخدام لنموذجك بعد اكتمال التصدير. + +| الشكل | مسافة `format` | النموذج | بيانات الوصف | وسيطات | +|--------------------------------------------------------------------|----------------|---------------------------|--------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - أو | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +انظر تفاصيل كاملة للـ `export` في صفحة [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/ar/tasks/index.md b/docs/ar/tasks/index.md new file mode 100644 index 0000000..dfb2f43 --- /dev/null +++ b/docs/ar/tasks/index.md @@ -0,0 +1,55 @@ +--- +comments: true +description: تعرّف على المهام الأساسية لتقنية YOLOv8 للرؤية الحاسوبية والتي تشمل الكشف، التجزئة، التصنيف وتقدير الوضعية. تعرف على استخداماتها في مشاريع الذكاء الاصطناعي الخاصة بك. +keywords: Ultralytics، YOLOv8، الكشف، التجزئة، التصنيف، تقدير الوضعية، الإطار الذكي للذكاء الاصطناعي، المهام الرؤية الحاسوبية +--- + +# مهام Ultralytics YOLOv8 + +
+مهام Ultralytics YOLOv8 المدعومة + +YOLOv8 هو إطار ذكاء اصطناعي يدعم عدة **مهام** للرؤية الحاسوبية. يمكن استخدام الإطار لأداء [الكشف](detect.md)، [التجزئة](segment.md)، [التصنيف](classify.md)، و[تقدير الوضعية](pose.md). كل من هذه المهام لها هدف مختلف واستخدام محدد. + +!!! Note "ملاحظة" + + 🚧 يجري بناء وثائقنا متعددة اللغات حاليًا، ونعمل جاهدين على تحسينها. شكرًا لصبرك! 🙏 + +

+
+ +
+ شاهد: استكشف مهام Ultralytics YOLO: كشف الكائنات، التجزئة، التتبع وتقدير الوضعية. +

+ +## [الكشف](detect.md) + +الكشف هو المهمة الأساسية المدعومة بواسطة YOLOv8. يتضمن الكشف اكتشاف الكائنات في صورة أو إطار فيديو ورسم مربعات محيطة حولها. يتم تصنيف الكائنات المكتشفة إلى فئات مختلفة استنادًا إلى ميزاتها. يمكن لـ YOLOv8 اكتشاف أكثر من كائن واحد في صورة أو إطار فيديو واحد بدقة وسرعة عالية. + +[أمثلة للكشف](detect.md){ .md-button } + +## [التجزئة](segment.md) + +التجزئة هي مهمة تتضمن تقسيم صورة إلى مناطق مختلفة استنادًا إلى محتوى الصورة. يتم تعيين علامة لكل منطقة استنادًا إلى محتواها. تعتبر هذه المهمة مفيدة في تطبيقات مثل تجزئة الصور وتصوير الطبية. يستخدم YOLOv8 نسخة معدلة من هندسة U-Net لأداء التجزئة. + +[أمثلة للتجزئة](segment.md){ .md-button } + +## [التصنيف](classify.md) + +التصنيف هو مهمة تتضمن تصنيف صورة إلى فئات مختلفة. يمكن استخدام YOLOv8 لتصنيف الصور استنادًا إلى محتواها. يستخدم نسخة معدلة من هندسة EfficientNet لأداء التصنيف. + +[أمثلة للتصنيف](classify.md){ .md-button } + +## [تقدير الوضعية](pose.md) + +تقدير الوضعية/النقاط الرئيسية هو مهمة تتضمن اكتشاف نقاط محددة في صورة أو إطار فيديو. يُشار إلى هذه النقاط بمصطلح النقاط الرئيسية وتُستخدم لتتبع الحركة أو تقدير الوضعية. يمكن لـ YOLOv8 اكتشاف النقاط الرئيسية في صورة أو إطار فيديو بدقة وسرعة عالية. + +[أمثلة لتقدير الوضعية](pose.md){ .md-button } + +## الاستنتاج + +يدعم YOLOv8 مهام متعددة، بما في ذلك الكشف، التجزئة، التصنيف، وكشف النقاط الرئيسية. لكل من هذه المهام أهداف واستخدامات مختلفة. عن طريق فهم الاختلافات بين هذه المهام، يمكنك اختيار المهمة المناسبة لتطبيق الرؤية الحاسوبية الخاص بك. diff --git a/docs/ar/tasks/pose.md b/docs/ar/tasks/pose.md new file mode 100644 index 0000000..8ac1771 --- /dev/null +++ b/docs/ar/tasks/pose.md @@ -0,0 +1,186 @@ +--- +comments: true +description: تعرّف على كيفية استخدام Ultralytics YOLOv8 لمهام تقدير الوضعية. اعثر على نماذج مدرّبة مسبقًا، وتعلم كيفية التدريب والتحقق والتنبؤ وتصدير نموذجك الخاص. +keywords: Ultralytics، YOLO، YOLOv8، تقدير الوضعية ، كشف نقاط المفاتيح ، كشف الكائنات ، نماذج مدرّبة مسبقًا ، تعلم الآلة ، الذكاء الاصطناعي +--- + +# تقدير الوضعية + +تقدير الوضعية هو مهمة تنطوي على تحديد موقع نقاط محددة في الصورة ، وعادةً ما يشار إليها بنقاط الوضوح. يمكن أن تمثل نقاط الوضوح أجزاءً مختلفةً من الكائن مثل المفاصل أو العلامات المميزة أو الميزات البارزة الأخرى. عادةً ما يتم تمثيل مواقع نقاط الوضوح كمجموعة من الإحداثيات 2D `[x ، y]` أو 3D `[x ، y ، visible]`. + +يكون ناتج نموذج تقدير الوضعية مجموعة من النقاط التي تمثل نقاط الوضوح على كائن في الصورة ، عادةً مع نقاط الثقة لكل نقطة. تقدير الوضعية هو خيار جيد عندما تحتاج إلى تحديد أجزاء محددة من كائن في مشهد، وموقعها بالنسبة لبعضها البعض. + +

+
+ +
+ شاهد: تقدير الوضعية مع Ultralytics YOLOv8. +

+ +!!! Tip "نصيحة" + + النماذج التي تحتوي على البادئة "-pose" تستخدم لنماذج YOLOv8 pose ، على سبيل المثال `yolov8n-pose.pt`. هذه النماذج مدربة على [مجموعة بيانات نقاط الوضوح COCO]("https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml") وهي مناسبة لمجموعة متنوعة من مهام تقدير الوضعية. + +## [النماذج]("https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8") + +تعرض نماذج مدرّبة مسبقًا لـ YOLOv8 التي تستخدم لتقدير الوضعية هنا. النماذج للكشف والشريحة والوضعية يتم تدريبها على [مجموعة بيانات COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)، بينما تتم تدريب نماذج التصنيف على مجموعة بيانات ImageNet. + +يتم تنزيل النماذج من [آخر إصدار Ultralytics]("https://github.com/ultralytics/assets/releases") تلقائيًا عند استخدامها لأول مرة. + +| النموذج | الحجم (بالبكسل) | mAPالوضعية 50-95 | mAPالوضعية 50 | سرعةالوحدة المركزية ONNX(ms) | سرعةA100 TensorRT(ms) | المعلمات (مليون) | FLOPs (بالمليار) | +|------------------------------------------------------------------------------------------------------|-----------------|-----------------------|--------------------|----------------------------------------|---------------------------------|------------------|------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +- تعتبر القيم **mAPval** لنموذج واحد ومقياس واحد فقط على [COCO Keypoints val2017](http://cocodataset.org) + مجموعة البيانات. +
يمكن إعادة إنتاجه بواسطة `يولو val pose data=coco-pose.yaml device=0` +- يتم حساب **السرعة** من خلال متوسط صور COCO val باستخدام [المروحة الحرارية Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + مثيل. +
يمكن إعادة إنتاجه بواسطة `يولو val pose data=coco8-pose.yaml batch=1 device=0|cpu` + +## التدريب + +يتم تدريب نموذج YOLOv8-pose على مجموعة بيانات COCO128-pose. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # تحميل النموذج + model = YOLO('yolov8n-pose.yaml') # بناء نموذج جديد من ملف YAML + model = YOLO('yolov8n-pose.pt') # تحميل نموذج مدرّب مسبقًا (موصى به للتدريب) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # بناء نموذج من YAML ونقل الوزن + + # تدريب النموذج + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # بناء نموذج جديد من YAML وبدء التدريب من البداية. + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # البدء في التدريب من نموذج مدرب مسبقًا *.pt + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # بناء نموذج جديد من YAML ، ونقل الأوزان المدرّبة مسبقًا إليه ، والبدء في التدريب. + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### تنسيق مجموعة البيانات + +يمكن العثور على تنسيق مجموعات بيانات نقاط الوضوح YOLO في [دليل المجموعة البيانات](../../../datasets/pose/index.md). لتحويل مجموعة البيانات الحالية التي لديك من تنسيقات أخرى (مثل COCO إلخ) إلى تنسيق YOLO ، يرجى استخدام أداة [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) من Ultralytics. + +## التحقق من الصحة + +تحقق من دقة نموذج YOLOv8n-pose المدرّب على مجموعة بيانات COCO128-pose. لا يلزم تمرير سبب ما كوسيط إلى `model` +عند استدعاء. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # تحميل النموذج + model = YOLO('yolov8n-pose.pt') # تحميل نموذج رسمي + model = YOLO('path/to/best.pt') # تحميل نموذج مخصص + + # التحقق من النموذج + metrics = model.val() # لا يوجد حاجة لأي سبب، يتذكر النموذج البيانات والوسائط كمجالات للنموذج + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # قائمة تحتوي على map50-95 لكل فئة + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # التحقق من النموذج الرسمي + yolo pose val model=path/to/best.pt # التحقق من النموذج المخصص + ``` + +## التنبؤ + +استخدم نموذج YOLOv8n-pose المدرّب لتشغيل توقعات على الصور. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # تحميل النموذج + model = YOLO('yolov8n-pose.pt') # تحميل نموذج رسمي + model = YOLO('path/to/best.pt') # تحميل نموذج مخصص + + # التنبؤ باستخدام النموذج + results = model('https://ultralytics.com/images/bus.jpg') # التنبؤ بصورة + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # التنبؤ باستخدام النموذج الرسمي + yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # التنبؤ باستخدام النموذج المخصص + ``` + +انظر تفاصيل `predict` كاملة في [صفحة التنبؤ](https://docs.ultralytics.com/modes/predict/). + +## التصدير + +قم بتصدير نموذج YOLOv8n-pose إلى تنسيق مختلف مثل ONNX، CoreML، الخ. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # تحميل النموذج + model = YOLO('yolov8n-pose.pt') # تحميل نموذج رسمي + model = YOLO('path/to/best.pt') # تحميل نموذج مدرب مخصص + + # تصدير النموذج + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # تصدير نموذج رسمي + yolo export model=path/to/best.pt format=onnx # تصدير نموذج مخصص + ``` + +تتوفر تنسيقات تصدير YOLOv8-pose في الجدول أدناه. يمكنك التنبؤ أو التحقق مباشرةً على النماذج المصدرة ، على سبيل المثال `yolo predict model=yolov8n-pose.onnx`. توجد أمثلة استخدام متاحة لنموذجك بعد اكتمال عملية التصدير. + +| تنسيق | إجراء `format` | النموذج | البيانات الوصفية | الوسائط | +|--------------------------------------------------------------------|----------------|--------------------------------|------------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half` | + +انظر تفاصيل `export` كاملة في [صفحة التصدير](https://docs.ultralytics.com/modes/export/). diff --git a/docs/ar/tasks/segment.md b/docs/ar/tasks/segment.md new file mode 100644 index 0000000..85f7d39 --- /dev/null +++ b/docs/ar/tasks/segment.md @@ -0,0 +1,189 @@ +--- +comments: true +description: تعلم كيفية استخدام نماذج فصل الأشكال الفردية مع Ultralytics YOLO. تعليمات حول التدريب والتحقق من الصحة وتوقع الصورة وتصدير النموذج. +keywords: yolov8 ، فصل الأشكال الفردية ، Ultralytics ، مجموعة بيانات COCO ، تجزئة الصورة ، كشف الكائنات ، تدريب النموذج ، التحقق من صحة النموذج ، توقع الصورة ، تصدير النموذج +--- + +# فصل الأشكال الفردية + +أمثلة على فصل الأشكال الفردية + +يذهب فصل الأشكال الفردية خطوة أبعد من كشف الكائنات وينطوي على تحديد الكائنات الفردية في صورة وتجزيئها عن بقية الصورة. + +ناتج نموذج فصل الأشكال الفردية هو مجموعة من الأقنعة أو الحدود التي تحدد كل كائن في الصورة ، جنبًا إلى جنب مع تصنيف الصنف ونقاط الثقة لكل كائن. يكون فصل الأشكال الفردية مفيدًا عندما تحتاج إلى معرفة ليس فقط أين توجد الكائنات في الصورة ، ولكن أيضًا ما هو شكلها الدقيق. + +

+
+ +
+ المشاهدة: تشغيل فصل الأشكال مع نموذج Ultralytics YOLOv8 مدرب مسبقًا باستخدام Python. +

+ +!!! Tip "نصيحة" + + تستخدم نماذج YOLOv8 Seg اللاحقة `-seg`، أي `yolov8n-seg.pt` وتكون مدربة مسبقًا على [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [النماذج](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +تُعرض هنا النماذج الجاهزة المدربة مسبقًا لـ YOLOv8 Segment. يتم تدريب نماذج الكشف والتجزيء والمواقف على مجموعة البيانات [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) ، بينما تدرب نماذج التصنيف على مجموعة البيانات [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +تتم تنزيل [النماذج](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) تلقائيًا من [الإصدار](https://github.com/ultralytics/assets/releases) الأخير لـ Ultralytics عند أول استخدام. + +| النموذج | الحجم
بكسل | mAPbox
50-95 | mAPmask
50-95 | السرعة
CPU ONNX
(مللي ثانية) | السرعة
A100 TensorRT
(مللي ثانية) | المعلمات
(مليون) | FLOPs
(مليار) | +|----------------------------------------------------------------------------------------------|--------------------|----------------------|-----------------------|-----------------------------------------|----------------------------------------------|--------------------------|-----------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- تُستخدم قيم **mAPval** لنموذج واحد وحجم واحد على مجموعة بيانات [COCO val2017](http://cocodataset.org). +
يمكن إعادة إنتاجها باستخدام `yolo val segment data=coco.yaml device=0` +- **تُحسب السرعة** كمتوسط على صور COCO val باستخدام [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + instance. +
يمكن إعادة إنتاجها باستخدام `yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu` + +## التدريب + +قم بتدريب YOLOv8n-seg على مجموعة بيانات COCO128-seg لمدة 100 دورة عند حجم صورة 640. للحصول على قائمة كاملة بالوسائط المتاحة ، راجع صفحة [التكوين](/../usage/cfg.md). + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتحميل النموذج + model = YOLO('yolov8n-seg.yaml') # قم ببناء نموذج جديد من ملف YAML + model = YOLO('yolov8n-seg.pt') # قم بتحميل نموذج مدرب مسبقًا (موصى به للتدريب) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # قم ببنائه من YAML ونقل الوزن + + # قم بتدريب النموذج + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # قم ببناء نموذج جديد من ملف YAML وبدء التدريب من البداية + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # قم ببدء التدريب من نموذج *.pt مدرب مسبقًا + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # قم ببناء نموذج جديد من YAML ونقل الأوزان المدربة مسبَقًا إليه وابدأ التدريب + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### تنسيق مجموعة البيانات + +يمكن العثور على تنسيق مجموعة بيانات تجزيء YOLO بالتفصيل في [دليل مجموعة البيانات](../../../datasets/segment/index.md). لتحويل مجموعة البيانات الحالية التي تتبع تنسيقات أخرى (مثل COCO إلخ) إلى تنسيق YOLO ، يُرجى استخدام أداة [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) من Ultralytics. + +## التحقق من الصحة + +قم بالتحقق من دقة نموذج YOLOv8n-seg المدرب على مجموعة بيانات COCO128-seg. لا حاجة لتمرير أي وسيطة كما يحتفظ النموذج ببيانات "تدريبه" والوسيطات كسمات النموذج. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتحميل النموذج + model = YOLO('yolov8n-seg.pt') # قم بتحميل نموذج رسمي + model = YOLO('path/to/best.pt') # قم بتحميل نموذج مخصص + + # قم بالتحقق من النموذج + metrics = model.val() # لا حاجة إلى أي وسيطة ، يتذكر النموذج بيانات التدريب والوسيطات كسمات النموذج + metrics.box.map # map50-95(B) + metrics.box.map50 # map50(B) + metrics.box.map75 # map75(B) + metrics.box.maps # قائمة تحتوي على map50-95(B) لكل فئة + metrics.seg.map # map50-95(M) + metrics.seg.map50 # map50(M) + metrics.seg.map75 # map75(M) + metrics.seg.maps # قائمة تحتوي على map50-95(M) لكل فئة + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # التحقق من النموذج الرسمي + yolo segment val model=path/to/best.pt # التحقق من النموذج المخصص + ``` + +## التنبؤ + +استخدم نموذج YOLOv8n-seg المدرب للقيام بالتنبؤات على الصور. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتحميل النموذج + model = YOLO('yolov8n-seg.pt') # قم بتحميل نموذج رسمي + model = YOLO('path/to/best.pt') # قم بتحميل نموذج مخصص + + # التنبؤ باستخدام النموذج + results = model('https://ultralytics.com/images/bus.jpg') # التنبؤ على صورة + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # التنبؤ باستخدام النموذج الرسمي + yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # التنبؤ باستخدام النموذج المخصص + ``` + +انظر تفاصيل "التنبؤ" الكاملة في [الصفحة](https://docs.ultralytics.com/modes/predict/). + +## التصدير + +قم بتصدير نموذج YOLOv8n-seg إلى تنسيق مختلف مثل ONNX و CoreML وما إلى ذلك. + +!!! Example "مثال" + + === "Python" + + ```python + from ultralytics import YOLO + + # قم بتحميل النموذج + model = YOLO('yolov8n-seg.pt') # قم بتحميل نموذج رسمي + model = YOLO('path/to/best.pt') # قم بتحميل نموذج مدرب مخصص + + # قم بتصدير النموذج + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # تصدير نموذج رسمي + yolo export model=path/to/best.pt format=onnx # تصدير نموذج مدرب مخصص + ``` + +صيغ تصدير YOLOv8-seg المتاحة في الجدول أدناه. يمكنك التنبؤ أو التحقق من صحة الموديل المصدر بشكل مباشر ، أي `yolo predict model=yolov8n-seg.onnx`. يتم عرض أمثلة عن الاستخدام لنموذجك بعد اكتمال التصدير. + +| الصيغة | `format` Argument | النموذج | التعليمات | الخيارات | +|--------------------------------------------------------------------|-------------------|-------------------------------|-----------|-------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `الحجم ، الأمان` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `الحجم ، half ، dynamic ، simplify ، opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `الحجم ، half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `الحجم ، half ، dynamic ، simplify ، workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `الحجم ، half ، int8 ، nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `الحجم ، keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `الحجم` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `الحجم ، half ، int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `الحجم` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `الحجم` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `الحجم` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `الحجم ، half` | + +انظر تفاصيل "التصدير" الكاملة في [الصفحة](https://docs.ultralytics.com/modes/export/). diff --git a/docs/build_docs.py b/docs/build_docs.py new file mode 100644 index 0000000..914f2fe --- /dev/null +++ b/docs/build_docs.py @@ -0,0 +1,116 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +This Python script is designed to automate the building and post-processing of MkDocs documentation, particularly for +projects with multilingual content. It streamlines the workflow for generating localized versions of the documentation +and updating HTML links to ensure they are correctly formatted. + +Key Features: +- Automated building of MkDocs documentation: The script compiles both the main documentation and + any localized versions specified in separate MkDocs configuration files. +- Post-processing of generated HTML files: After the documentation is built, the script updates all + HTML files to remove the '.md' extension from internal links. This ensures that links in the built + HTML documentation correctly point to other HTML pages rather than Markdown files, which is crucial + for proper navigation within the web-based documentation. + +Usage: +- Run the script from the root directory of your MkDocs project. +- Ensure that MkDocs is installed and that all MkDocs configuration files (main and localized versions) + are present in the project directory. +- The script first builds the documentation using MkDocs, then scans the generated HTML files in the 'site' + directory to update the internal links. +- It's ideal for projects where the documentation is written in Markdown and needs to be served as a static website. + +Note: +- This script is built to be run in an environment where Python and MkDocs are installed and properly configured. +""" + +import re +import shutil +import subprocess +from pathlib import Path + +DOCS = Path(__file__).parent.resolve() +SITE = DOCS.parent / 'site' + + +def build_docs(): + """Build docs using mkdocs.""" + if SITE.exists(): + print(f'Removing existing {SITE}') + shutil.rmtree(SITE) + + # Build the main documentation + print(f'Building docs from {DOCS}') + subprocess.run(f'mkdocs build -f {DOCS}/mkdocs.yml', check=True, shell=True) + + # Build other localized documentations + for file in DOCS.glob('mkdocs_*.yml'): + print(f'Building MkDocs site with configuration file: {file}') + subprocess.run(f'mkdocs build -f {file}', check=True, shell=True) + print(f'Site built at {SITE}') + + +def update_html_links(): + """Update href links in HTML files to remove '.md' and '/index.md', excluding links starting with 'https://'.""" + html_files = Path(SITE).rglob('*.html') + total_updated_links = 0 + + for html_file in html_files: + with open(html_file, 'r+', encoding='utf-8') as file: + content = file.read() + # Find all links to be updated, excluding those starting with 'https://' + links_to_update = re.findall(r'href="(?!https://)([^"]+?)(/index)?\.md"', content) + + # Update the content and count the number of links updated + updated_content, number_of_links_updated = re.subn(r'href="(?!https://)([^"]+?)(/index)?\.md"', + r'href="\1"', content) + total_updated_links += number_of_links_updated + + # Special handling for '/index' links + updated_content, number_of_index_links_updated = re.subn(r'href="([^"]+)/index"', r'href="\1/"', + updated_content) + total_updated_links += number_of_index_links_updated + + # Write the updated content back to the file + file.seek(0) + file.write(updated_content) + file.truncate() + + # Print updated links for this file + for link in links_to_update: + print(f'Updated link in {html_file}: {link[0]}') + + print(f'Total number of links updated: {total_updated_links}') + + +def update_page_title(file_path: Path, new_title: str): + """Update the title of an HTML file.""" + + # Read the content of the file + with open(file_path, encoding='utf-8') as file: + content = file.read() + + # Replace the existing title with the new title + updated_content = re.sub(r'.*?', f'{new_title}', content) + + # Write the updated content back to the file + with open(file_path, 'w', encoding='utf-8') as file: + file.write(updated_content) + + +def main(): + # Build the docs + build_docs() + + # Update .md in href links + update_html_links() + + # Show command to serve built website + print('Serve site at http://localhost:8000 with "python -m http.server --directory site"') + + # Update titles + update_page_title(SITE / '404.html', new_title='Ultralytics Docs - Not Found') + + +if __name__ == '__main__': + main() diff --git a/docs/build_reference.py b/docs/build_reference.py new file mode 100644 index 0000000..cb15d34 --- /dev/null +++ b/docs/build_reference.py @@ -0,0 +1,128 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Helper file to build Ultralytics Docs reference section. Recursively walks through ultralytics dir and builds an MkDocs +reference section of *.md files composed of classes and functions, and also creates a nav menu for use in mkdocs.yaml. + +Note: Must be run from repository root directory. Do not run from docs directory. +""" + +import re +from collections import defaultdict +from pathlib import Path + +from ultralytics.utils import ROOT + +NEW_YAML_DIR = ROOT.parent +CODE_DIR = ROOT +REFERENCE_DIR = ROOT.parent / 'docs/en/reference' + + +def extract_classes_and_functions(filepath: Path) -> tuple: + """Extracts class and function names from a given Python file.""" + content = filepath.read_text() + class_pattern = r'(?:^|\n)class\s(\w+)(?:\(|:)' + func_pattern = r'(?:^|\n)def\s(\w+)\(' + + classes = re.findall(class_pattern, content) + functions = re.findall(func_pattern, content) + + return classes, functions + + +def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list): + """Creates a Markdown file containing the API reference for the given Python module.""" + md_filepath = py_filepath.with_suffix('.md') + + # Read existing content and keep header content between first two --- + header_content = '' + if md_filepath.exists(): + existing_content = md_filepath.read_text() + header_parts = existing_content.split('---') + for part in header_parts: + if 'description:' in part or 'comments:' in part: + header_content += f'---{part}---\n\n' + + module_name = module_path.replace('.__init__', '') + module_path = module_path.replace('.', '/') + url = f'https://github.com/ultralytics/ultralytics/blob/main/{module_path}.py' + edit = f'https://github.com/ultralytics/ultralytics/edit/main/{module_path}.py' + title_content = ( + f'# Reference for `{module_path}.py`\n\n' + f'!!! Note\n\n' + f' This file is available at [{url}]({url}). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request]({edit}) 🛠️. Thank you 🙏!\n\n' + ) + md_content = ['

\n'] + [f'## ::: {module_name}.{class_name}\n\n

\n' for class_name in classes] + md_content.extend(f'## ::: {module_name}.{func_name}\n\n

\n' for func_name in functions) + md_content = header_content + title_content + '\n'.join(md_content) + if not md_content.endswith('\n'): + md_content += '\n' + + md_filepath.parent.mkdir(parents=True, exist_ok=True) + md_filepath.write_text(md_content) + + return md_filepath.relative_to(NEW_YAML_DIR) + + +def nested_dict() -> defaultdict: + """Creates and returns a nested defaultdict.""" + return defaultdict(nested_dict) + + +def sort_nested_dict(d: dict) -> dict: + """Sorts a nested dictionary recursively.""" + return {key: sort_nested_dict(value) if isinstance(value, dict) else value for key, value in sorted(d.items())} + + +def create_nav_menu_yaml(nav_items: list): + """Creates a YAML file for the navigation menu based on the provided list of items.""" + nav_tree = nested_dict() + + for item_str in nav_items: + item = Path(item_str) + parts = item.parts + current_level = nav_tree['reference'] + for part in parts[2:-1]: # skip the first two parts (docs and reference) and the last part (filename) + current_level = current_level[part] + + md_file_name = parts[-1].replace('.md', '') + current_level[md_file_name] = item + + nav_tree_sorted = sort_nested_dict(nav_tree) + + def _dict_to_yaml(d, level=0): + """Converts a nested dictionary to a YAML-formatted string with indentation.""" + yaml_str = '' + indent = ' ' * level + for k, v in d.items(): + if isinstance(v, dict): + yaml_str += f'{indent}- {k}:\n{_dict_to_yaml(v, level + 1)}' + else: + yaml_str += f"{indent}- {k}: {str(v).replace('docs/en/', '')}\n" + return yaml_str + + # Print updated YAML reference section + print('Scan complete, new mkdocs.yaml reference section is:\n\n', _dict_to_yaml(nav_tree_sorted)) + + # Save new YAML reference section + # (NEW_YAML_DIR / 'nav_menu_updated.yml').write_text(_dict_to_yaml(nav_tree_sorted)) + + +def main(): + """Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu.""" + nav_items = [] + + for py_filepath in CODE_DIR.rglob('*.py'): + classes, functions = extract_classes_and_functions(py_filepath) + + if classes or functions: + py_filepath_rel = py_filepath.relative_to(CODE_DIR) + md_filepath = REFERENCE_DIR / py_filepath_rel + module_path = f"ultralytics.{py_filepath_rel.with_suffix('').as_posix().replace('/', '.')}" + md_rel_filepath = create_markdown(md_filepath, module_path, classes, functions) + nav_items.append(str(md_rel_filepath)) + + create_nav_menu_yaml(nav_items) + + +if __name__ == '__main__': + main() diff --git a/docs/de/index.md b/docs/de/index.md new file mode 100644 index 0000000..1216d92 --- /dev/null +++ b/docs/de/index.md @@ -0,0 +1,82 @@ +--- +comments: true +description: Entdecken Sie einen vollständigen Leitfaden zu Ultralytics YOLOv8, einem schnellen und präzisen Modell zur Objekterkennung und Bildsegmentierung. Installations-, Vorhersage-, Trainingstutorials und mehr. +keywords: Ultralytics, YOLOv8, Objekterkennung, Bildsegmentierung, maschinelles Lernen, Deep Learning, Computer Vision, YOLOv8 Installation, YOLOv8 Vorhersage, YOLOv8 Training, YOLO-Geschichte, YOLO-Lizenzen +--- + +
+

+ + Ultralytics YOLO Banner +

+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ Ultralytics CI + Ultralytics Code Coverage + YOLOv8 Zitation + Docker Ziehungen +
+ Auf Gradient ausführen + In Colab öffnen + In Kaggle öffnen +
+ +Wir stellen [Ultralytics](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics) vor, die neueste Version des renommierten Echtzeit-Modells zur Objekterkennung und Bildsegmentierung. YOLOv8 basiert auf den neuesten Erkenntnissen im Bereich Deep Learning und Computer Vision und bietet eine unvergleichliche Leistung hinsichtlich Geschwindigkeit und Genauigkeit. Sein optimiertes Design macht es für verschiedene Anwendungen geeignet und leicht an verschiedene Hardwareplattformen anpassbar, von Edge-Geräten bis hin zu Cloud-APIs. + +Erkunden Sie die YOLOv8-Dokumentation, eine umfassende Ressource, die Ihnen helfen soll, seine Funktionen und Fähigkeiten zu verstehen und zu nutzen. Ob Sie ein erfahrener Machine-Learning-Praktiker sind oder neu in diesem Bereich, dieses Hub zielt darauf ab, das Potenzial von YOLOv8 in Ihren Projekten zu maximieren + +!!! Note "Hinweis" + + 🚧 Unsere mehrsprachige Dokumentation wird derzeit entwickelt und wir arbeiten intensiv an ihrer Verbesserung. Wir danken für Ihre Geduld! 🙏 + +## Wo Sie beginnen sollten + +- **Installieren** Sie `ultralytics` mit pip und starten Sie in wenigen Minuten   [:material-clock-fast: Loslegen](quickstart.md){ .md-button } +- **Vorhersagen** Sie neue Bilder und Videos mit YOLOv8   [:octicons-image-16: Auf Bilder vorhersagen](modes/predict.md){ .md-button } +- **Trainieren** Sie ein neues YOLOv8-Modell mit Ihrem eigenen benutzerdefinierten Datensatz   [:fontawesome-solid-brain: Ein Modell trainieren](modes/train.md){ .md-button } +- **Erforschen** Sie YOLOv8-Aufgaben wie Segmentieren, Klassifizieren, Posenschätzung und Verfolgen   [:material-magnify-expand: Aufgaben erkunden](tasks/index.md){ .md-button } + +

+
+ +
+ Ansehen: Wie Sie ein YOLOv8-Modell auf Ihrem eigenen Datensatz in Google Colab trainieren. +

+ +## YOLO: Eine kurze Geschichte + +[YOLO](https://arxiv.org/abs/1506.02640) (You Only Look Once), ein beliebtes Modell zur Objekterkennung und Bildsegmentierung, wurde von Joseph Redmon und Ali Farhadi an der Universität von Washington entwickelt. Seit seiner Einführung im Jahr 2015 erfreut es sich aufgrund seiner hohen Geschwindigkeit und Genauigkeit großer Beliebtheit. + +- [YOLOv2](https://arxiv.org/abs/1612.08242), veröffentlicht im Jahr 2016, verbesserte das Originalmodell durch die Einführung von Batch-Normalisierung, Ankerkästen und Dimensionsclustern. +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf), eingeführt im Jahr 2018, erhöhte die Leistung des Modells weiter mit einem effizienteren Backbone-Netzwerk, mehreren Ankern und räumlichem Pyramid-Pooling. +- [YOLOv4](https://arxiv.org/abs/2004.10934) wurde 2020 veröffentlicht und brachte Neuerungen wie Mosaic-Datenerweiterung, einen neuen ankerfreien Erkennungskopf und eine neue Verlustfunktion. +- [YOLOv5](https://github.com/ultralytics/yolov5) verbesserte die Leistung des Modells weiter und führte neue Funktionen ein, wie Hyperparameter-Optimierung, integriertes Experiment-Tracking und automatischen Export in beliebte Exportformate. +- [YOLOv6](https://github.com/meituan/YOLOv6) wurde 2022 von [Meituan](https://about.meituan.com/) als Open Source zur Verfügung gestellt und wird in vielen autonomen Lieferrobotern des Unternehmens eingesetzt. +- [YOLOv7](https://github.com/WongKinYiu/yolov7) führte zusätzliche Aufgaben ein, wie Posenschätzung auf dem COCO-Keypoints-Datensatz. +- [YOLOv8](https://github.com/ultralytics/ultralytics) ist die neueste Version von YOLO von Ultralytics. Als Spitzenmodell der neuesten Generation baut YOLOv8 auf dem Erfolg vorheriger Versionen auf und führt neue Funktionen und Verbesserungen für erhöhte Leistung, Flexibilität und Effizienz ein. YOLOv8 unterstützt eine vollständige Palette an Vision-KI-Aufgaben, einschließlich [Erkennung](tasks/detect.md), [Segmentierung](tasks/segment.md), [Posenschätzung](tasks/pose.md), [Verfolgung](modes/track.md) und [Klassifizierung](tasks/classify.md). Diese Vielseitigkeit ermöglicht es Benutzern, die Fähigkeiten von YOLOv8 in verschiedenen Anwendungen und Domänen zu nutzen. + +## YOLO-Lizenzen: Wie wird Ultralytics YOLO lizenziert? + +Ultralytics bietet zwei Lizenzoptionen, um unterschiedliche Einsatzszenarien zu berücksichtigen: + +- **AGPL-3.0-Lizenz**: Diese [OSI-geprüfte](https://opensource.org/licenses/) Open-Source-Lizenz ist ideal für Studenten und Enthusiasten und fördert offene Zusammenarbeit und Wissensaustausch. Weitere Details finden Sie in der [LIZENZ](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)-Datei. +- **Enterprise-Lizenz**: Für die kommerzielle Nutzung konzipiert, ermöglicht diese Lizenz die problemlose Integration von Ultralytics-Software und KI-Modellen in kommerzielle Produkte und Dienstleistungen und umgeht die Open-Source-Anforderungen der AGPL-3.0. Wenn Ihr Szenario die Einbettung unserer Lösungen in ein kommerzielles Angebot beinhaltet, kontaktieren Sie uns über [Ultralytics-Lizenzierung](https://ultralytics.com/license). + +Unsere Lizenzstrategie ist darauf ausgerichtet sicherzustellen, dass jegliche Verbesserungen an unseren Open-Source-Projekten der Gemeinschaft zurückgegeben werden. Wir halten die Prinzipien von Open Source in Ehren ❤️ und es ist unser Anliegen, dass unsere Beiträge auf Weisen genutzt und erweitert werden können, die für alle vorteilhaft sind. diff --git a/docs/de/models/fast-sam.md b/docs/de/models/fast-sam.md new file mode 100644 index 0000000..e726c5c --- /dev/null +++ b/docs/de/models/fast-sam.md @@ -0,0 +1,193 @@ +--- +comments: true +description: Erkunden Sie FastSAM, eine CNN-basierte Lösung zur Echtzeit-Segmentierung von Objekten in Bildern. Verbesserte Benutzerinteraktion, Recheneffizienz und anpassbar für verschiedene Vision-Aufgaben. +keywords: FastSAM, maschinelles Lernen, CNN-basierte Lösung, Objektsegmentierung, Echtzeillösung, Ultralytics, Vision-Aufgaben, Bildverarbeitung, industrielle Anwendungen, Benutzerinteraktion +--- + +# Fast Segment Anything Model (FastSAM) + +Das Fast Segment Anything Model (FastSAM) ist eine neuartige, Echtzeit-CNN-basierte Lösung für die Segment Anything Aufgabe. Diese Aufgabe zielt darauf ab, jedes Objekt in einem Bild auf Basis verschiedener möglicher Benutzerinteraktionen zu segmentieren. FastSAM reduziert signifikant den Rechenbedarf, während es eine wettbewerbsfähige Leistung beibehält und somit für eine Vielzahl von Vision-Aufgaben praktisch einsetzbar ist. + +![Übersicht über die Architektur des Fast Segment Anything Model (FastSAM)](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## Überblick + +FastSAM wurde entwickelt, um die Einschränkungen des [Segment Anything Model (SAM)](sam.md) zu beheben, einem schweren Transformer-Modell mit erheblichem Rechenressourcenbedarf. Das FastSAM teilt die Segment Anything Aufgabe in zwei aufeinanderfolgende Stufen auf: die Instanzsegmentierung und die promptgesteuerte Auswahl. In der ersten Stufe wird [YOLOv8-seg](../tasks/segment.md) verwendet, um die Segmentierungsmasken aller Instanzen im Bild zu erzeugen. In der zweiten Stufe gibt es den Bereich von Interesse aus, der dem Prompt entspricht. + +## Hauptmerkmale + +1. **Echtzeitlösung:** Durch die Nutzung der Recheneffizienz von CNNs bietet FastSAM eine Echtzeitlösung für die Segment Anything Aufgabe und eignet sich somit für industrielle Anwendungen, die schnelle Ergebnisse erfordern. + +2. **Effizienz und Leistung:** FastSAM bietet eine signifikante Reduzierung des Rechen- und Ressourcenbedarfs, ohne die Leistungsqualität zu beeinträchtigen. Es erzielt eine vergleichbare Leistung wie SAM, verwendet jedoch drastisch reduzierte Rechenressourcen und ermöglicht so eine Echtzeitanwendung. + +3. **Promptgesteuerte Segmentierung:** FastSAM kann jedes Objekt in einem Bild anhand verschiedener möglicher Benutzerinteraktionsaufforderungen segmentieren. Dies ermöglicht Flexibilität und Anpassungsfähigkeit in verschiedenen Szenarien. + +4. **Basierend auf YOLOv8-seg:** FastSAM basiert auf [YOLOv8-seg](../tasks/segment.md), einem Objektdetektor mit einem Instanzsegmentierungsmodul. Dadurch ist es in der Lage, die Segmentierungsmasken aller Instanzen in einem Bild effektiv zu erzeugen. + +5. **Wettbewerbsfähige Ergebnisse auf Benchmarks:** Bei der Objektvorschlagsaufgabe auf MS COCO erzielt FastSAM hohe Punktzahlen bei deutlich schnellerem Tempo als [SAM](sam.md) auf einer einzelnen NVIDIA RTX 3090. Dies demonstriert seine Effizienz und Leistungsfähigkeit. + +6. **Praktische Anwendungen:** Der vorgeschlagene Ansatz bietet eine neue, praktische Lösung für eine Vielzahl von Vision-Aufgaben mit sehr hoher Geschwindigkeit, die zehn- oder hundertmal schneller ist als vorhandene Methoden. + +7. **Möglichkeit zur Modellkompression:** FastSAM zeigt, dass der Rechenaufwand erheblich reduziert werden kann, indem ein künstlicher Prior in die Struktur eingeführt wird. Dadurch eröffnen sich neue Möglichkeiten für große Modellarchitekturen für allgemeine Vision-Aufgaben. + +## Verfügbare Modelle, unterstützte Aufgaben und Betriebsmodi + +In dieser Tabelle werden die verfügbaren Modelle mit ihren spezifischen vorab trainierten Gewichten, den unterstützten Aufgaben und ihrer Kompatibilität mit verschiedenen Betriebsmodi wie [Inferenz](../modes/predict.md), [Validierung](../modes/val.md), [Training](../modes/train.md) und [Export](../modes/export.md) angezeigt. Dabei stehen ✅ Emojis für unterstützte Modi und ❌ Emojis für nicht unterstützte Modi. + +| Modelltyp | Vorab trainierte Gewichte | Unterstützte Aufgaben | Inferenz | Validierung | Training | Export | +|-----------|---------------------------|---------------------------------------------|----------|-------------|----------|--------| +| FastSAM-s | `FastSAM-s.pt` | [Instanzsegmentierung](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| FastSAM-x | `FastSAM-x.pt` | [Instanzsegmentierung](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Beispiele für die Verwendung + +Die FastSAM-Modelle lassen sich problemlos in Ihre Python-Anwendungen integrieren. Ultralytics bietet eine benutzerfreundliche Python-API und CLI-Befehle zur Vereinfachung der Entwicklung. + +### Verwendung der Methode `predict` + +Um eine Objekterkennung auf einem Bild durchzuführen, verwenden Sie die Methode `predict` wie folgt: + +!!! Example "Beispiel" + + === "Python" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # Definieren Sie die Quelle für die Inferenz + source = 'Pfad/zum/bus.jpg' + + # Erstellen Sie ein FastSAM-Modell + model = FastSAM('FastSAM-s.pt') # oder FastSAM-x.pt + + # Führen Sie die Inferenz auf einem Bild durch + everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # Bereiten Sie ein Prompt-Process-Objekt vor + prompt_process = FastSAMPrompt(source, everything_results, device='cpu') + + # Alles-Prompt + ann = prompt_process.everything_prompt() + + # Bbox Standardform [0,0,0,0] -> [x1,y1,x2,y2] + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # Text-Prompt + ann = prompt_process.text_prompt(text='ein Foto von einem Hund') + + # Punkt-Prompt + # Punkte Standard [[0,0]] [[x1,y1],[x2,y2]] + # Punktbezeichnung Standard [0] [1,0] 0:Hintergrund, 1:Vordergrund + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` + + === "CLI" + ```bash + # Laden Sie ein FastSAM-Modell und segmentieren Sie alles damit + yolo segment predict model=FastSAM-s.pt source=Pfad/zum/bus.jpg imgsz=640 + ``` + +Dieser Code-Ausschnitt zeigt die Einfachheit des Ladens eines vorab trainierten Modells und das Durchführen einer Vorhersage auf einem Bild. + +### Verwendung von `val` + +Die Validierung des Modells auf einem Datensatz kann wie folgt durchgeführt werden: + +!!! Example "Beispiel" + + === "Python" + ```python + from ultralytics import FastSAM + + # Erstellen Sie ein FastSAM-Modell + model = FastSAM('FastSAM-s.pt') # oder FastSAM-x.pt + + # Validieren Sie das Modell + results = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # Laden Sie ein FastSAM-Modell und validieren Sie es auf dem COCO8-Beispieldatensatz mit Bildgröße 640 + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +Bitte beachten Sie, dass FastSAM nur die Erkennung und Segmentierung einer einzigen Objektklasse unterstützt. Das bedeutet, dass es alle Objekte als dieselbe Klasse erkennt und segmentiert. Daher müssen Sie beim Vorbereiten des Datensatzes alle Objektkategorie-IDs in 0 umwandeln. + +## Offizielle Verwendung von FastSAM + +FastSAM ist auch direkt aus dem [https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) Repository erhältlich. Hier ist ein kurzer Überblick über die typischen Schritte, die Sie unternehmen könnten, um FastSAM zu verwenden: + +### Installation + +1. Klonen Sie das FastSAM-Repository: + ```shell + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. Erstellen und aktivieren Sie eine Conda-Umgebung mit Python 3.9: + ```shell + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. Navigieren Sie zum geklonten Repository und installieren Sie die erforderlichen Pakete: + ```shell + cd FastSAM + pip install -r requirements.txt + ``` + +4. Installieren Sie das CLIP-Modell: + ```shell + pip install git+https://github.com/openai/CLIP.git + ``` + +### Beispielverwendung + +1. Laden Sie eine [Modell-Sicherung](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing) herunter. + +2. Verwenden Sie FastSAM für Inferenz. Beispielbefehle: + + - Segmentieren Sie alles in einem Bild: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - Segmentieren Sie bestimmte Objekte anhand eines Textprompts: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "der gelbe Hund" + ``` + + - Segmentieren Sie Objekte innerhalb eines Begrenzungsrahmens (geben Sie die Boxkoordinaten im xywh-Format an): + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - Segmentieren Sie Objekte in der Nähe bestimmter Punkte: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +Sie können FastSAM auch über eine [Colab-Demo](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing) oder die [HuggingFace-Web-Demo](https://huggingface.co/spaces/An-619/FastSAM) testen, um eine visuelle Erfahrung zu machen. + +## Zitate und Danksagungen + +Wir möchten den Autoren von FastSAM für ihre bedeutenden Beiträge auf dem Gebiet der Echtzeit-Instanzsegmentierung danken: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Die ursprüngliche FastSAM-Arbeit ist auf [arXiv](https://arxiv.org/abs/2306.12156) zu finden. Die Autoren haben ihre Arbeit öffentlich zugänglich gemacht, und der Code ist auf [GitHub](https://github.com/CASIA-IVA-Lab/FastSAM) verfügbar. Wir schätzen ihre Bemühungen, das Fachgebiet voranzutreiben und ihre Arbeit der breiteren Gemeinschaft zugänglich zu machen. diff --git a/docs/de/models/index.md b/docs/de/models/index.md new file mode 100644 index 0000000..4482ccc --- /dev/null +++ b/docs/de/models/index.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Entdecken Sie die vielfältige Palette an Modellen der YOLO-Familie, SAM, MobileSAM, FastSAM, YOLO-NAS und RT-DETR, die von Ultralytics unterstützt werden. Beginnen Sie mit Beispielen für die CLI- und Python-Nutzung. +keywords: Ultralytics, Dokumentation, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, Modelle, Architekturen, Python, CLI +--- + +# Von Ultralytics unterstützte Modelle + +Willkommen bei der Modell-Dokumentation von Ultralytics! Wir bieten Unterstützung für eine breite Palette von Modellen, die jeweils für spezifische Aufgaben wie [Objekterkennung](../tasks/detect.md), [Instanzsegmentierung](../tasks/segment.md), [Bildklassifizierung](../tasks/classify.md), [Posenschätzung](../tasks/pose.md) und [Multi-Objekt-Tracking](../modes/track.md) maßgeschneidert sind. Wenn Sie daran interessiert sind, Ihre Modellarchitektur bei Ultralytics beizutragen, sehen Sie sich unseren [Beitragenden-Leitfaden](../../help/contributing.md) an. + +!!! Note "Hinweis" + + 🚧 Unsere Dokumentation in verschiedenen Sprachen ist derzeit im Aufbau und wir arbeiten hart daran, sie zu verbessern. Vielen Dank für Ihre Geduld! 🙏 + +## Vorgestellte Modelle + +Hier sind einige der wichtigsten unterstützten Modelle: + +1. **[YOLOv3](yolov3.md)**: Die dritte Iteration der YOLO-Modellfamilie, ursprünglich von Joseph Redmon, bekannt für ihre effiziente Echtzeit-Objekterkennungsfähigkeiten. +2. **[YOLOv4](yolov4.md)**: Ein dunkelnetz-natives Update von YOLOv3, veröffentlicht von Alexey Bochkovskiy im Jahr 2020. +3. **[YOLOv5](yolov5.md)**: Eine verbesserte Version der YOLO-Architektur von Ultralytics, die bessere Leistungs- und Geschwindigkeitskompromisse im Vergleich zu früheren Versionen bietet. +4. **[YOLOv6](yolov6.md)**: Veröffentlicht von [Meituan](https://about.meituan.com/) im Jahr 2022 und in vielen autonomen Lieferrobotern des Unternehmens im Einsatz. +5. **[YOLOv7](yolov7.md)**: Aktualisierte YOLO-Modelle, die 2022 von den Autoren von YOLOv4 veröffentlicht wurden. +6. **[YOLOv8](yolov8.md) NEU 🚀**: Die neueste Version der YOLO-Familie, mit erweiterten Fähigkeiten wie Instanzsegmentierung, Pose/Schlüsselpunktschätzung und Klassifizierung. +7. **[Segment Anything Model (SAM)](sam.md)**: Metas Segment Anything Model (SAM). +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**: MobileSAM für mobile Anwendungen, von der Kyung Hee University. +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)**: FastSAM von der Image & Video Analysis Group, Institute of Automation, Chinesische Akademie der Wissenschaften. +10. **[YOLO-NAS](yolo-nas.md)**: YOLO Neural Architecture Search (NAS) Modelle. +11. **[Realtime Detection Transformers (RT-DETR)](rtdetr.md)**: Baidus PaddlePaddle Realtime Detection Transformer (RT-DETR) Modelle. + +

+
+ +
+ Anschauen: Führen Sie Ultralytics YOLO-Modelle in nur wenigen Codezeilen aus. +

+ +## Einstieg: Nutzungbeispiele + +Dieses Beispiel bietet einfache YOLO-Trainings- und Inferenzbeispiele. Für vollständige Dokumentationen über diese und andere [Modi](../modes/index.md) siehe die Dokumentationsseiten [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) und [Export](../modes/export.md). + +Beachten Sie, dass das folgende Beispiel für YOLOv8 [Detect](../tasks/detect.md) Modelle zur Objekterkennung ist. Für zusätzliche unterstützte Aufgaben siehe die Dokumentation zu [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) und [Pose](../tasks/pose.md). + +!!! Example "Beispiel" + + === "Python" + + Vorgefertigte PyTorch `*.pt` Modelle sowie Konfigurationsdateien `*.yaml` können den Klassen `YOLO()`, `SAM()`, `NAS()` und `RTDETR()` übergeben werden, um eine Modellinstanz in Python zu erstellen: + + ```python + from ultralytics import YOLO + + # Laden eines COCO-vortrainierten YOLOv8n Modells + model = YOLO('yolov8n.pt') + + # Modellinformationen anzeigen (optional) + model.info() + + # Model auf dem COCO8-Beispieldatensatz für 100 Epochen trainieren + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Inferenz mit dem YOLOv8n Modell auf das Bild 'bus.jpg' ausführen + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI-Befehle sind verfügbar, um die Modelle direkt auszuführen: + + ```bash + # Ein COCO-vortrainiertes YOLOv8n Modell laden und auf dem COCO8-Beispieldatensatz für 100 Epochen trainieren + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Ein COCO-vortrainiertes YOLOv8n Modell laden und Inferenz auf das Bild 'bus.jpg' ausführen + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## Neue Modelle beitragen + +Sind Sie daran interessiert, Ihr Modell bei Ultralytics beizutragen? Großartig! Wir sind immer offen dafür, unser Modellportfolio zu erweitern. + +1. **Repository forken**: Beginnen Sie mit dem Forken des [Ultralytics GitHub-Repositorys](https://github.com/ultralytics/ultralytics). + +2. **Ihren Fork klonen**: Klonen Sie Ihren Fork auf Ihre lokale Maschine und erstellen Sie einen neuen Branch, um daran zu arbeiten. + +3. **Ihr Modell implementieren**: Fügen Sie Ihr Modell entsprechend den in unserem [Beitragenden-Leitfaden](../../help/contributing.md) bereitgestellten Kodierungsstandards und Richtlinien hinzu. + +4. **Gründlich testen**: Stellen Sie sicher, dass Sie Ihr Modell sowohl isoliert als auch als Teil des Pipelines gründlich testen. + +5. **Eine Pull-Anfrage erstellen**: Sobald Sie mit Ihrem Modell zufrieden sind, erstellen Sie eine Pull-Anfrage zum Hauptrepository zur Überprüfung. + +6. **Code-Review & Zusammenführen**: Nach der Überprüfung, wenn Ihr Modell unseren Kriterien entspricht, wird es in das Hauptrepository zusammengeführt. + +Für detaillierte Schritte konsultieren Sie unseren [Beitragenden-Leitfaden](../../help/contributing.md). diff --git a/docs/de/models/mobile-sam.md b/docs/de/models/mobile-sam.md new file mode 100644 index 0000000..7f30175 --- /dev/null +++ b/docs/de/models/mobile-sam.md @@ -0,0 +1,116 @@ +--- +comments: true +description: Erfahren Sie mehr über MobileSAM, dessen Implementierung, den Vergleich mit dem Original-SAM und wie Sie es im Ultralytics-Framework herunterladen und testen können. Verbessern Sie Ihre mobilen Anwendungen heute. +keywords: MobileSAM, Ultralytics, SAM, mobile Anwendungen, Arxiv, GPU, API, Bildencoder, Maskendekoder, Modell-Download, Testmethode +--- + +![MobileSAM Logo](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# Mobile Segment Anything (MobileSAM) + +Das MobileSAM-Paper ist jetzt auf [arXiv](https://arxiv.org/pdf/2306.14289.pdf) verfügbar. + +Eine Demonstration von MobileSAM, das auf einer CPU ausgeführt wird, finden Sie unter diesem [Demo-Link](https://huggingface.co/spaces/dhkim2810/MobileSAM). Die Leistung auf einer Mac i5 CPU beträgt etwa 3 Sekunden. Auf der Hugging Face-Demo führt die Benutzeroberfläche und CPUs mit niedrigerer Leistung zu einer langsameren Reaktion, aber die Funktion bleibt effektiv. + +MobileSAM ist in verschiedenen Projekten implementiert, darunter [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything), [AnyLabeling](https://github.com/vietanhdev/anylabeling) und [Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D). + +MobileSAM wird mit einem einzigen GPU und einem 100K-Datensatz (1% der Originalbilder) in weniger als einem Tag trainiert. Der Code für dieses Training wird in Zukunft verfügbar gemacht. + +## Verfügbarkeit von Modellen, unterstützte Aufgaben und Betriebsarten + +Die folgende Tabelle zeigt die verfügbaren Modelle mit ihren spezifischen vortrainierten Gewichten, die unterstützten Aufgaben und ihre Kompatibilität mit unterschiedlichen Betriebsarten wie [Inferenz](../modes/predict.md), [Validierung](../modes/val.md), [Training](../modes/train.md) und [Export](../modes/export.md). Unterstützte Betriebsarten werden mit ✅-Emojis und nicht unterstützte Betriebsarten mit ❌-Emojis angezeigt. + +| Modelltyp | Vortrainierte Gewichte | Unterstützte Aufgaben | Inferenz | Validierung | Training | Export | +|-----------|------------------------|---------------------------------------------|----------|-------------|----------|--------| +| MobileSAM | `mobile_sam.pt` | [Instanzsegmentierung](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Anpassung von SAM zu MobileSAM + +Da MobileSAM die gleiche Pipeline wie das Original-SAM beibehält, haben wir das ursprüngliche Preprocessing, Postprocessing und alle anderen Schnittstellen eingebunden. Personen, die derzeit das ursprüngliche SAM verwenden, können daher mit minimalem Aufwand zu MobileSAM wechseln. + +MobileSAM bietet vergleichbare Leistungen wie das ursprüngliche SAM und behält dieselbe Pipeline, mit Ausnahme eines Wechsels des Bildencoders. Konkret ersetzen wir den ursprünglichen, leistungsstarken ViT-H-Encoder (632M) durch einen kleineren Tiny-ViT-Encoder (5M). Auf einem einzelnen GPU arbeitet MobileSAM in etwa 12 ms pro Bild: 8 ms auf dem Bildencoder und 4 ms auf dem Maskendekoder. + +Die folgende Tabelle bietet einen Vergleich der Bildencoder, die auf ViT basieren: + +| Bildencoder | Original-SAM | MobileSAM | +|-----------------|--------------|-----------| +| Parameter | 611M | 5M | +| Geschwindigkeit | 452ms | 8ms | + +Sowohl das ursprüngliche SAM als auch MobileSAM verwenden denselben promptgeführten Maskendekoder: + +| Maskendekoder | Original-SAM | MobileSAM | +|-----------------|--------------|-----------| +| Parameter | 3.876M | 3.876M | +| Geschwindigkeit | 4ms | 4ms | + +Hier ist ein Vergleich der gesamten Pipeline: + +| Gesamte Pipeline (Enc+Dec) | Original-SAM | MobileSAM | +|----------------------------|--------------|-----------| +| Parameter | 615M | 9.66M | +| Geschwindigkeit | 456ms | 12ms | + +Die Leistung von MobileSAM und des ursprünglichen SAM werden sowohl mit einem Punkt als auch mit einem Kasten als Prompt demonstriert. + +![Bild mit Punkt als Prompt](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![Bild mit Kasten als Prompt](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +Mit seiner überlegenen Leistung ist MobileSAM etwa 5-mal kleiner und 7-mal schneller als das aktuelle FastSAM. Weitere Details finden Sie auf der [MobileSAM-Projektseite](https://github.com/ChaoningZhang/MobileSAM). + +## Testen von MobileSAM in Ultralytics + +Wie beim ursprünglichen SAM bieten wir eine unkomplizierte Testmethode in Ultralytics an, einschließlich Modi für Punkt- und Kasten-Prompts. + +### Modell-Download + +Sie können das Modell [hier](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt) herunterladen. + +### Punkt-Prompt + +!!! Example "Beispiel" + + === "Python" + ```python + from ultralytics import SAM + + # Laden Sie das Modell + model = SAM('mobile_sam.pt') + + # Vorhersage einer Segmentierung basierend auf einem Punkt-Prompt + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### Kasten-Prompt + +!!! Example "Beispiel" + + === "Python" + ```python + from ultralytics import SAM + + # Laden Sie das Modell + model = SAM('mobile_sam.pt') + + # Vorhersage einer Segmentierung basierend auf einem Kasten-Prompt + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +Wir haben `MobileSAM` und `SAM` mit derselben API implementiert. Für weitere Verwendungsinformationen sehen Sie bitte die [SAM-Seite](sam.md). + +## Zitate und Danksagungen + +Wenn Sie MobileSAM in Ihrer Forschungs- oder Entwicklungsarbeit nützlich finden, zitieren Sie bitte unser Paper: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } diff --git a/docs/de/models/rtdetr.md b/docs/de/models/rtdetr.md new file mode 100644 index 0000000..dbc0b41 --- /dev/null +++ b/docs/de/models/rtdetr.md @@ -0,0 +1,93 @@ +--- +comments: true +description: Entdecken Sie die Funktionen und Vorteile von RT-DETR, dem effizienten und anpassungsfähigen Echtzeitobjektdetektor von Baidu, der von Vision Transformers unterstützt wird, einschließlich vortrainierter Modelle. +keywords: RT-DETR, Baidu, Vision Transformers, Objekterkennung, Echtzeitleistung, CUDA, TensorRT, IoU-bewusste Query-Auswahl, Ultralytics, Python API, PaddlePaddle +--- + +# Baidus RT-DETR: Ein Echtzeit-Objektdetektor auf Basis von Vision Transformers + +## Überblick + +Der Real-Time Detection Transformer (RT-DETR), entwickelt von Baidu, ist ein moderner End-to-End-Objektdetektor, der Echtzeitleistung mit hoher Genauigkeit bietet. Er nutzt die Leistung von Vision Transformers (ViT), um Multiskalen-Funktionen effizient zu verarbeiten, indem intra-skaliere Interaktion und eine skalenübergreifende Fusion entkoppelt werden. RT-DETR ist hoch anpassungsfähig und unterstützt flexible Anpassung der Inferenzgeschwindigkeit durch Verwendung verschiedener Decoder-Schichten ohne erneutes Training. Das Modell übertrifft viele andere Echtzeit-Objektdetektoren auf beschleunigten Backends wie CUDA mit TensorRT. + +![Beispielbild des Modells](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**Übersicht von Baidus RT-DETR.** Die Modellarchitekturdiagramm des RT-DETR zeigt die letzten drei Stufen des Backbone {S3, S4, S5} als Eingabe für den Encoder. Der effiziente Hybrid-Encoder verwandelt Multiskalen-Funktionen durch intraskalare Feature-Interaktion (AIFI) und das skalenübergreifende Feature-Fusion-Modul (CCFM) in eine Sequenz von Bildmerkmalen. Die IoU-bewusste Query-Auswahl wird verwendet, um eine feste Anzahl von Bildmerkmalen als anfängliche Objekt-Queries für den Decoder auszuwählen. Der Decoder optimiert iterativ Objekt-Queries, um Boxen und Vertrauenswerte zu generieren ([Quelle](https://arxiv.org/pdf/2304.08069.pdf)). + +### Hauptmerkmale + +- **Effizienter Hybrid-Encoder:** Baidus RT-DETR verwendet einen effizienten Hybrid-Encoder, der Multiskalen-Funktionen verarbeitet, indem intra-skaliere Interaktion und eine skalenübergreifende Fusion entkoppelt werden. Dieses einzigartige Design auf Basis von Vision Transformers reduziert die Rechenkosten und ermöglicht die Echtzeit-Objekterkennung. +- **IoU-bewusste Query-Auswahl:** Baidus RT-DETR verbessert die Initialisierung von Objekt-Queries, indem IoU-bewusste Query-Auswahl verwendet wird. Dadurch kann das Modell sich auf die relevantesten Objekte in der Szene konzentrieren und die Erkennungsgenauigkeit verbessern. +- **Anpassbare Inferenzgeschwindigkeit:** Baidus RT-DETR ermöglicht flexible Anpassungen der Inferenzgeschwindigkeit durch Verwendung unterschiedlicher Decoder-Schichten ohne erneutes Training. Diese Anpassungsfähigkeit erleichtert den praktischen Einsatz in verschiedenen Echtzeit-Objekterkennungsszenarien. + +## Vortrainierte Modelle + +Die Ultralytics Python API bietet vortrainierte PaddlePaddle RT-DETR-Modelle in verschiedenen Skalierungen: + +- RT-DETR-L: 53,0% AP auf COCO val2017, 114 FPS auf T4 GPU +- RT-DETR-X: 54,8% AP auf COCO val2017, 74 FPS auf T4 GPU + +## Beispiele für die Verwendung + +Das folgende Beispiel enthält einfache Trainings- und Inferenzbeispiele für RT-DETRR. Für die vollständige Dokumentation zu diesen und anderen [Modi](../modes/index.md) siehe die Dokumentationsseiten für [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) und [Export](../modes/export.md). + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import RTDETR + + # Laden Sie ein vortrainiertes RT-DETR-l Modell auf COCO + model = RTDETR('rtdetr-l.pt') + + # Zeigen Sie Informationen über das Modell an (optional) + model.info() + + # Trainieren Sie das Modell auf dem COCO8-Beispiel-Datensatz für 100 Epochen + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Führen Sie die Inferenz mit dem RT-DETR-l Modell auf dem Bild 'bus.jpg' aus + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # Laden Sie ein vortrainiertes RT-DETR-l Modell auf COCO und trainieren Sie es auf dem COCO8-Beispiel-Datensatz für 100 Epochen + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # Laden Sie ein vortrainiertes RT-DETR-l Modell auf COCO und führen Sie die Inferenz auf dem Bild 'bus.jpg' aus + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## Unterstützte Aufgaben und Modi + +In dieser Tabelle werden die Modelltypen, die spezifischen vortrainierten Gewichte, die von jedem Modell unterstützten Aufgaben und die verschiedenen Modi ([Train](../modes/train.md), [Val](../modes/val.md), [Predict](../modes/predict.md), [Export](../modes/export.md)), die unterstützt werden, mit ✅-Emoji angezeigt. + +| Modelltyp | Vortrainierte Gewichte | Unterstützte Aufgaben | Inferenz | Validierung | Training | Exportieren | +|--------------------|------------------------|---------------------------------------|----------|-------------|----------|-------------| +| RT-DETR Groß | `rtdetr-l.pt` | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR Extra-Groß | `rtdetr-x.pt` | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## Zitate und Danksagungen + +Wenn Sie Baidus RT-DETR in Ihrer Forschungs- oder Entwicklungsarbeit verwenden, zitieren Sie bitte das [ursprüngliche Papier](https://arxiv.org/abs/2304.08069): + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Wir möchten Baidu und dem [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection)-Team für die Erstellung und Pflege dieser wertvollen Ressource für die Computer-Vision-Community danken. Ihre Beitrag zum Gebiet der Entwicklung des Echtzeit-Objekterkenners auf Basis von Vision Transformers, RT-DETR, wird sehr geschätzt. + +*Keywords: RT-DETR, Transformer, ViT, Vision Transformers, Baidu RT-DETR, PaddlePaddle, Paddle Paddle RT-DETR, Objekterkennung in Echtzeit, objekterkennung basierend auf Vision Transformers, vortrainierte PaddlePaddle RT-DETR Modelle, Verwendung von Baidus RT-DETR, Ultralytics Python API* diff --git a/docs/de/models/sam.md b/docs/de/models/sam.md new file mode 100644 index 0000000..9ca50b9 --- /dev/null +++ b/docs/de/models/sam.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Erkunden Sie das innovative Segment Anything Model (SAM) von Ultralytics, das Echtzeit-Bildsegmentierung ermöglicht. Erfahren Sie mehr über die promptable Segmentierung, die Zero-Shot-Performance und die Anwendung. +keywords: Ultralytics, Bildsegmentierung, Segment Anything Model, SAM, SA-1B-Datensatz, Echtzeit-Performance, Zero-Shot-Transfer, Objekterkennung, Bildanalyse, maschinelles Lernen +--- + +# Segment Anything Model (SAM) + +Willkommen an der Spitze der Bildsegmentierung mit dem Segment Anything Model (SAM). Dieses revolutionäre Modell hat mit promptabler Bildsegmentierung und Echtzeit-Performance neue Standards in diesem Bereich gesetzt. + +## Einführung in SAM: Das Segment Anything Model + +Das Segment Anything Model (SAM) ist ein innovatives Bildsegmentierungsmodell, das promptable Segmentierung ermöglicht und so eine beispiellose Vielseitigkeit bei der Bildanalyse bietet. SAM bildet das Herzstück der Segment Anything Initiative, einem bahnbrechenden Projekt, das ein neuartiges Modell, eine neue Aufgabe und einen neuen Datensatz für die Bildsegmentierung einführt. + +Dank seiner fortschrittlichen Konstruktion kann SAM sich an neue Bildverteilungen und Aufgaben anpassen, auch ohne Vorwissen. Das wird als Zero-Shot-Transfer bezeichnet. Trainiert wurde SAM auf dem umfangreichen [SA-1B-Datensatz](https://ai.facebook.com/datasets/segment-anything/), der über 1 Milliarde Masken auf 11 Millionen sorgfältig kuratierten Bildern enthält. SAM hat beeindruckende Zero-Shot-Performance gezeigt und in vielen Fällen frühere vollständig überwachte Ergebnisse übertroffen. + +![Beispielbild aus dem Datensatz](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +Beispielimagen mit überlagernden Masken aus unserem neu eingeführten Datensatz SA-1B. SA-1B enthält 11 Millionen diverse, hochauflösende, lizenzierte und die Privatsphäre schützende Bilder und 1,1 Milliarden qualitativ hochwertige Segmentierungsmasken. Diese wurden vollautomatisch von SAM annotiert und sind nach menschlichen Bewertungen und zahlreichen Experimenten von hoher Qualität und Vielfalt. Die Bilder sind nach der Anzahl der Masken pro Bild gruppiert (im Durchschnitt sind es etwa 100 Masken pro Bild). + +## Hauptmerkmale des Segment Anything Model (SAM) + +- **Promptable Segmentierungsaufgabe:** SAM wurde mit der Ausführung einer promptable Segmentierungsaufgabe entwickelt, wodurch es valide Segmentierungsmasken aus beliebigen Prompts generieren kann, z. B. räumlichen oder textuellen Hinweisen zur Identifizierung eines Objekts. +- **Fortgeschrittene Architektur:** Das Segment Anything Model verwendet einen leistungsfähigen Bild-Encoder, einen Prompt-Encoder und einen leichten Masken-Decoder. Diese einzigartige Architektur ermöglicht flexibles Prompting, Echtzeitmaskenberechnung und Berücksichtigung von Mehrdeutigkeiten in Segmentierungsaufgaben. +- **Der SA-1B-Datensatz:** Eingeführt durch das Segment Anything Projekt, enthält der SA-1B-Datensatz über 1 Milliarde Masken auf 11 Millionen Bildern. Als bisher größter Segmentierungsdatensatz liefert er SAM eine vielfältige und umfangreiche Datenquelle für das Training. +- **Zero-Shot-Performance:** SAM zeigt herausragende Zero-Shot-Performance in verschiedenen Segmentierungsaufgaben und ist damit ein einsatzbereites Werkzeug für vielfältige Anwendungen mit minimalem Bedarf an prompt engineering. + +Für eine detaillierte Betrachtung des Segment Anything Models und des SA-1B-Datensatzes besuchen Sie bitte die [Segment Anything Website](https://segment-anything.com) und lesen Sie das Forschungspapier [Segment Anything](https://arxiv.org/abs/2304.02643). + +## Verfügbare Modelle, unterstützte Aufgaben und Betriebsmodi + +Diese Tabelle zeigt die verfügbaren Modelle mit ihren spezifischen vortrainierten Gewichten, die unterstützten Aufgaben und ihre Kompatibilität mit verschiedenen Betriebsmodi wie [Inference](../modes/predict.md), [Validierung](../modes/val.md), [Training](../modes/train.md) und [Export](../modes/export.md), wobei ✅ Emojis für unterstützte Modi und ❌ Emojis für nicht unterstützte Modi verwendet werden. + +| Modelltyp | Vortrainierte Gewichte | Unterstützte Aufgaben | Inference | Validierung | Training | Export | +|-----------|------------------------|---------------------------------------------|-----------|-------------|----------|--------| +| SAM base | `sam_b.pt` | [Instanzsegmentierung](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| SAM large | `sam_l.pt` | [Instanzsegmentierung](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Wie man SAM verwendet: Vielseitigkeit und Power in der Bildsegmentierung + +Das Segment Anything Model kann für eine Vielzahl von Aufgaben verwendet werden, die über die Trainingsdaten hinausgehen. Dazu gehören Kantenerkennung, Generierung von Objektvorschlägen, Instanzsegmentierung und vorläufige Text-to-Mask-Vorhersage. Mit prompt engineering kann SAM sich schnell an neue Aufgaben und Datenverteilungen anpassen und sich so als vielseitiges und leistungsstarkes Werkzeug für alle Anforderungen der Bildsegmentierung etablieren. + +### Beispiel für SAM-Vorhersage + +!!! Example "Segmentierung mit Prompts" + + Bildsegmentierung mit gegebenen Prompts. + + === "Python" + + ```python + from ultralytics import SAM + + # Modell laden + model = SAM('sam_b.pt') + + # Modellinformationen anzeigen (optional) + model.info() + + # Inferenz mit Bounding Box Prompt + model('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # Inferenz mit Point Prompt + model('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "Alles segmentieren" + + Das ganze Bild segmentieren. + + === "Python" + + ```python + from ultralytics import SAM + + # Modell laden + model = SAM('sam_b.pt') + + # Modellinformationen anzeigen (optional) + model.info() + + # Inferenz + model('Pfad/zum/Bild.jpg') + ``` + + === "CLI" + + ```bash + # Inferenz mit einem SAM-Modell + yolo predict model=sam_b.pt source=Pfad/zum/Bild.jpg + ``` + +- Die Logik hier besteht darin, das gesamte Bild zu segmentieren, wenn keine Prompts (Bounding Box/Point/Maske) übergeben werden. + +!!! Example "Beispiel SAMPredictor" + + Dadurch können Sie das Bild einmal festlegen und mehrmals Inferenz mit Prompts ausführen, ohne den Bild-Encoder mehrfach auszuführen. + + === "Prompt-Inferenz" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # SAMPredictor erstellen + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # Bild festlegen + predictor.set_image("ultralytics/assets/zidane.jpg") # Festlegung mit Bild-Datei + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # Festlegung mit np.ndarray + results = predictor(bboxes=[439, 437, 524, 709]) + results = predictor(points=[900, 370], labels=[1]) + + # Bild zurücksetzen + predictor.reset_image() + ``` + + Alles segmentieren mit zusätzlichen Argumenten. + + === "Alles segmentieren" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # SAMPredictor erstellen + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # Mit zusätzlichen Argumenten segmentieren + results = predictor(source="ultralytics/assets/zidane.jpg", crop_n_layers=1, points_stride=64) + ``` + +- Weitere zusätzliche Argumente für `Alles segmentieren` finden Sie in der [`Predictor/generate` Referenz](../../../reference/models/sam/predict.md). + +## Vergleich von SAM und YOLOv8 + +Hier vergleichen wir Meta's kleinstes SAM-Modell, SAM-b, mit Ultralytics kleinstem Segmentierungsmodell, [YOLOv8n-seg](../tasks/segment.md): + +| Modell | Größe | Parameter | Geschwindigkeit (CPU) | +|------------------------------------------------|-------------------------------|------------------------------|----------------------------------------| +| Meta's SAM-b | 358 MB | 94,7 M | 51096 ms/pro Bild | +| [MobileSAM](mobile-sam.md) | 40,7 MB | 10,1 M | 46122 ms/pro Bild | +| [FastSAM-s](fast-sam.md) mit YOLOv8-Backbone | 23,7 MB | 11,8 M | 115 ms/pro Bild | +| Ultralytics [YOLOv8n-seg](../tasks/segment.md) | **6,7 MB** (53,4-mal kleiner) | **3,4 M** (27,9-mal kleiner) | **59 ms/pro Bild** (866-mal schneller) | + +Dieser Vergleich zeigt die Größen- und Geschwindigkeitsunterschiede zwischen den Modellen. Während SAM einzigartige Fähigkeiten für die automatische Segmentierung bietet, konkurriert es nicht direkt mit YOLOv8-Segmentierungsmodellen, die kleiner, schneller und effizienter sind. + +Die Tests wurden auf einem Apple M2 MacBook aus dem Jahr 2023 mit 16 GB RAM durchgeführt. Um diesen Test zu reproduzieren: + +!!! Example "Beispiel" + + === "Python" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # SAM-b profilieren + model = SAM('sam_b.pt') + model.info() + model('ultralytics/assets') + + # MobileSAM profilieren + model = SAM('mobile_sam.pt') + model.info() + model('ultralytics/assets') + + # FastSAM-s profilieren + model = FastSAM('FastSAM-s.pt') + model.info() + model('ultralytics/assets') + + # YOLOv8n-seg profilieren + model = YOLO('yolov8n-seg.pt') + model.info() + model('ultralytics/assets') + ``` + +## Auto-Annotierung: Der schnelle Weg zu Segmentierungsdatensätzen + +Die Auto-Annotierung ist eine wichtige Funktion von SAM, mit der Benutzer mithilfe eines vortrainierten Detektionsmodells einen [Segmentierungsdatensatz](https://docs.ultralytics.com/datasets/segment) generieren können. Diese Funktion ermöglicht eine schnelle und genaue Annotation einer großen Anzahl von Bildern, ohne dass zeitaufwändiges manuelles Labeling erforderlich ist. + +### Generieren Sie Ihren Segmentierungsdatensatz mit einem Detektionsmodell + +Um Ihren Datensatz mit dem Ultralytics-Framework automatisch zu annotieren, verwenden Sie die `auto_annotate` Funktion wie folgt: + +!!! Example "Beispiel" + + === "Python" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="Pfad/zum/Bilderordner", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| Argument | Typ | Beschreibung | Standard | +|------------|---------------------|---------------------------------------------------------------------------------------------------------------------------|--------------| +| data | str | Pfad zu einem Ordner, der die zu annotierenden Bilder enthält. | | +| det_model | str, optional | Vortrainiertes YOLO-Detektionsmodell. Standardmäßig 'yolov8x.pt'. | 'yolov8x.pt' | +| sam_model | str, optional | Vortrainiertes SAM-Segmentierungsmodell. Standardmäßig 'sam_b.pt'. | 'sam_b.pt' | +| device | str, optional | Gerät, auf dem die Modelle ausgeführt werden. Standardmäßig ein leerer String (CPU oder GPU, falls verfügbar). | | +| output_dir | str, None, optional | Verzeichnis zum Speichern der annotierten Ergebnisse. Standardmäßig ein 'labels'-Ordner im selben Verzeichnis wie 'data'. | None | + +Die `auto_annotate` Funktion nimmt den Pfad zu Ihren Bildern entgegen, mit optionalen Argumenten für das vortrainierte Detektions- und SAM-Segmentierungsmodell, das Gerät, auf dem die Modelle ausgeführt werden sollen, und das Ausgabeverzeichnis, in dem die annotierten Ergebnisse gespeichert werden sollen. + +Die Auto-Annotierung mit vortrainierten Modellen kann die Zeit und den Aufwand für die Erstellung hochwertiger Segmentierungsdatensätze erheblich reduzieren. Diese Funktion ist besonders vorteilhaft für Forscher und Entwickler, die mit großen Bildersammlungen arbeiten. Sie ermöglicht es ihnen, sich auf die Modellentwicklung und -bewertung zu konzentrieren, anstatt auf die manuelle Annotation. + +## Zitate und Danksagungen + +Wenn Sie SAM in Ihrer Forschungs- oder Entwicklungsarbeit nützlich finden, erwägen Sie bitte, unser Paper zu zitieren: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Wir möchten Meta AI für die Erstellung und Pflege dieser wertvollen Ressource für die Computer Vision Community danken. + +*Stichworte: Segment Anything, Segment Anything Model, SAM, Meta SAM, Bildsegmentierung, Promptable Segmentierung, Zero-Shot-Performance, SA-1B-Datensatz, fortschrittliche Architektur, Auto-Annotierung, Ultralytics, vortrainierte Modelle, SAM Base, SAM Large, Instanzsegmentierung, Computer Vision, Künstliche Intelligenz, maschinelles Lernen, Datenannotation, Segmentierungsmasken, Detektionsmodell, YOLO Detektionsmodell, Bibtex, Meta AI.* diff --git a/docs/de/models/yolo-nas.md b/docs/de/models/yolo-nas.md new file mode 100644 index 0000000..c1d8ed7 --- /dev/null +++ b/docs/de/models/yolo-nas.md @@ -0,0 +1,121 @@ +--- +comments: true +description: Erfahren Sie mehr über YOLO-NAS, ein herausragendes Modell für die Objekterkennung. Erfahren Sie mehr über seine Funktionen, vortrainierte Modelle, Nutzung mit der Ultralytics Python API und vieles mehr. +keywords: YOLO-NAS, Deci AI, Objekterkennung, Deep Learning, Neural Architecture Search, Ultralytics Python API, YOLO-Modell, vortrainierte Modelle, Quantisierung, Optimierung, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## Übersicht + +Entwickelt von Deci AI, ist YOLO-NAS ein bahnbrechendes Modell für die Objekterkennung. Es ist das Ergebnis fortschrittlicher Technologien zur Neural Architecture Search und wurde sorgfältig entworfen, um die Einschränkungen früherer YOLO-Modelle zu überwinden. Mit signifikanten Verbesserungen in der Quantisierungsunterstützung und Abwägung von Genauigkeit und Latenz stellt YOLO-NAS einen großen Fortschritt in der Objekterkennung dar. + +![Modellbeispielbild](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**Übersicht über YOLO-NAS.** YOLO-NAS verwendet Quantisierungsblöcke und selektive Quantisierung für optimale Leistung. Das Modell weist bei der Konvertierung in seine quantisierte Version mit INT8 einen minimalen Präzisionsverlust auf, was im Vergleich zu anderen Modellen eine signifikante Verbesserung darstellt. Diese Entwicklungen führen zu einer überlegenen Architektur mit beispiellosen Fähigkeiten zur Objekterkennung und herausragender Leistung. + +### Schlüsselfunktionen + +- **Quantisierungsfreundlicher Basiselement:** YOLO-NAS führt ein neues Basiselement ein, das für Quantisierung geeignet ist und eine der wesentlichen Einschränkungen früherer YOLO-Modelle angeht. +- **Raffiniertes Training und Quantisierung:** YOLO-NAS nutzt fortschrittliche Trainingsschemata und post-training Quantisierung zur Leistungsverbesserung. +- **AutoNAC-Optimierung und Vortraining:** YOLO-NAS verwendet die AutoNAC-Optimierung und wird auf prominenten Datensätzen wie COCO, Objects365 und Roboflow 100 vortrainiert. Dieses Vortraining macht es äußerst geeignet für die Objekterkennung in Produktionsumgebungen. + +## Vortrainierte Modelle + +Erleben Sie die Leistungsfähigkeit der Objekterkennung der nächsten Generation mit den vortrainierten YOLO-NAS-Modellen von Ultralytics. Diese Modelle sind darauf ausgelegt, sowohl bei Geschwindigkeit als auch bei Genauigkeit hervorragende Leistung zu liefern. Wählen Sie aus einer Vielzahl von Optionen, die auf Ihre spezifischen Anforderungen zugeschnitten sind: + +| Modell | mAP | Latenz (ms) | +|------------------|-------|-------------| +| YOLO-NAS S | 47,5 | 3,21 | +| YOLO-NAS M | 51,55 | 5,85 | +| YOLO-NAS L | 52,22 | 7,87 | +| YOLO-NAS S INT-8 | 47,03 | 2,36 | +| YOLO-NAS M INT-8 | 51,0 | 3,78 | +| YOLO-NAS L INT-8 | 52,1 | 4,78 | + +Jede Modellvariante ist darauf ausgelegt, eine Balance zwischen Mean Average Precision (mAP) und Latenz zu bieten und Ihre Objekterkennungsaufgaben für Performance und Geschwindigkeit zu optimieren. + +## Beispiele zur Verwendung + +Ultralytics hat es einfach gemacht, YOLO-NAS-Modelle in Ihre Python-Anwendungen über unser `ultralytics` Python-Paket zu integrieren. Das Paket bietet eine benutzerfreundliche Python-API, um den Prozess zu optimieren. + +Die folgenden Beispiele zeigen, wie Sie YOLO-NAS-Modelle mit dem `ultralytics`-Paket für Inferenz und Validierung verwenden: + +### Beispiele für Inferenz und Validierung + +In diesem Beispiel validieren wir YOLO-NAS-s auf dem COCO8-Datensatz. + +!!! Example "Beispiel" + + Dieses Beispiel bietet einfachen Code für Inferenz und Validierung für YOLO-NAS. Für die Verarbeitung von Inferenzergebnissen siehe den [Predict](../modes/predict.md)-Modus. Für die Verwendung von YOLO-NAS mit zusätzlichen Modi siehe [Val](../modes/val.md) und [Export](../modes/export.md). Das YOLO-NAS-Modell im `ultralytics`-Paket unterstützt kein Training. + + === "Python" + + Vorab trainierte `*.pt`-Modelldateien von PyTorch können der Klasse `NAS()` übergeben werden, um eine Modellinstanz in Python zu erstellen: + + ```python + from ultralytics import NAS + + # Laden Sie ein auf COCO vortrainiertes YOLO-NAS-s-Modell + model = NAS('yolo_nas_s.pt') + + # Modelinformationen anzeigen (optional) + model.info() + + # Validieren Sie das Modell am Beispiel des COCO8-Datensatzes + results = model.val(data='coco8.yaml') + + # Führen Sie Inferenz mit dem YOLO-NAS-s-Modell auf dem Bild 'bus.jpg' aus + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI-Befehle sind verfügbar, um die Modelle direkt auszuführen: + + ```bash + # Laden Sie ein auf COCO vortrainiertes YOLO-NAS-s-Modell und validieren Sie die Leistung am Beispiel des COCO8-Datensatzes + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # Laden Sie ein auf COCO vortrainiertes YOLO-NAS-s-Modell und führen Sie Inferenz auf dem Bild 'bus.jpg' aus + yolo predict model=yolo_nas_s.pt source=path/to/bus.jpg + ``` + +## Unterstützte Aufgaben und Modi + +Wir bieten drei Varianten der YOLO-NAS-Modelle an: Small (s), Medium (m) und Large (l). Jede Variante ist dazu gedacht, unterschiedliche Berechnungs- und Leistungsanforderungen zu erfüllen: + +- **YOLO-NAS-s**: Optimiert für Umgebungen mit begrenzten Rechenressourcen, bei denen Effizienz entscheidend ist. +- **YOLO-NAS-m**: Bietet einen ausgewogenen Ansatz und ist für die Objekterkennung im Allgemeinen mit höherer Genauigkeit geeignet. +- **YOLO-NAS-l**: Maßgeschneidert für Szenarien, bei denen höchste Genauigkeit gefordert ist und Rechenressourcen weniger einschränkend sind. + +Im Folgenden finden Sie eine detaillierte Übersicht über jedes Modell, einschließlich Links zu den vortrainierten Gewichten, den unterstützten Aufgaben und deren Kompatibilität mit verschiedenen Betriebsmodi. + +| Modelltyp | Vortrainierte Gewichte | Unterstützte Aufgaben | Inferenz | Validierung | Training | Export | +|------------|-----------------------------------------------------------------------------------------------|---------------------------------------|----------|-------------|----------|--------| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## Zitierungen und Danksagungen + +Wenn Sie YOLO-NAS in Ihrer Forschungs- oder Entwicklungsarbeit verwenden, zitieren Sie bitte SuperGradients: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +Wir möchten dem [SuperGradients](https://github.com/Deci-AI/super-gradients/)-Team von Deci AI für ihre Bemühungen bei der Erstellung und Pflege dieser wertvollen Ressource für die Computer Vision Community danken. Wir sind der Meinung, dass YOLO-NAS mit seiner innovativen Architektur und seinen herausragenden Fähigkeiten zur Objekterkennung ein wichtiges Werkzeug für Entwickler und Forscher gleichermaßen wird. + +*Keywords: YOLO-NAS, Deci AI, Objekterkennung, Deep Learning, Neural Architecture Search, Ultralytics Python API, YOLO-Modell, SuperGradients, vortrainierte Modelle, quantisierungsfreundliches Basiselement, fortschrittliche Trainingsschemata, post-training Quantisierung, AutoNAC-Optimierung, COCO, Objects365, Roboflow 100* diff --git a/docs/de/models/yolov3.md b/docs/de/models/yolov3.md new file mode 100644 index 0000000..de1cf46 --- /dev/null +++ b/docs/de/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Erhalten Sie eine Übersicht über YOLOv3, YOLOv3-Ultralytics und YOLOv3u. Erfahren Sie mehr über ihre wichtigsten Funktionen, Verwendung und unterstützte Aufgaben für die Objekterkennung. +keywords: YOLOv3, YOLOv3-Ultralytics, YOLOv3u, Objekterkennung, Inferenz, Training, Ultralytics +--- + +# YOLOv3, YOLOv3-Ultralytics und YOLOv3u + +## Übersicht + +Dieses Dokument bietet eine Übersicht über drei eng verwandte Modelle zur Objekterkennung, nämlich [YOLOv3](https://pjreddie.com/darknet/yolo/), [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3) und [YOLOv3u](https://github.com/ultralytics/ultralytics). + +1. **YOLOv3:** Dies ist die dritte Version des You Only Look Once (YOLO) Objekterkennungsalgorithmus. Ursprünglich entwickelt von Joseph Redmon, verbesserte YOLOv3 seine Vorgängermodelle durch die Einführung von Funktionen wie mehrskaligen Vorhersagen und drei verschiedenen Größen von Erkennungskernen. + +2. **YOLOv3-Ultralytics:** Dies ist die Implementierung des YOLOv3-Modells von Ultralytics. Es reproduziert die ursprüngliche YOLOv3-Architektur und bietet zusätzliche Funktionalitäten, wie die Unterstützung für weitere vortrainierte Modelle und einfachere Anpassungsoptionen. + +3. **YOLOv3u:** Dies ist eine aktualisierte Version von YOLOv3-Ultralytics, die den anchor-freien, objektfreien Split Head aus den YOLOv8-Modellen einbezieht. YOLOv3u verwendet die gleiche Backbone- und Neck-Architektur wie YOLOv3, aber mit dem aktualisierten Erkennungskopf von YOLOv8. + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## Wichtigste Funktionen + +- **YOLOv3:** Einführung der Verwendung von drei unterschiedlichen Skalen für die Erkennung unter Verwendung von drei verschiedenen Größen von Erkennungskernen: 13x13, 26x26 und 52x52. Dadurch wurde die Erkennungsgenauigkeit für Objekte unterschiedlicher Größe erheblich verbessert. Darüber hinaus fügte YOLOv3 Funktionen wie Mehrfachkennzeichnungen für jeden Begrenzungsrahmen und ein besseres Feature-Extraktionsnetzwerk hinzu. + +- **YOLOv3-Ultralytics:** Ultralytics' Implementierung von YOLOv3 bietet die gleiche Leistung wie das ursprüngliche Modell, bietet jedoch zusätzliche Unterstützung für weitere vortrainierte Modelle, zusätzliche Trainingsmethoden und einfachere Anpassungsoptionen. Dadurch wird es vielseitiger und benutzerfreundlicher für praktische Anwendungen. + +- **YOLOv3u:** Dieses aktualisierte Modell enthält den anchor-freien, objektfreien Split Head aus YOLOv8. Durch die Beseitigung der Notwendigkeit vordefinierter Ankerfelder und Objektheitsscores kann dieses Entwurfsmerkmal für den Erkennungskopf die Fähigkeit des Modells verbessern, Objekte unterschiedlicher Größe und Form zu erkennen. Dadurch wird YOLOv3u robuster und genauer für Aufgaben der Objekterkennung. + +## Unterstützte Aufgaben und Modi + +Die YOLOv3-Serie, einschließlich YOLOv3, YOLOv3-Ultralytics und YOLOv3u, ist speziell für Aufgaben der Objekterkennung konzipiert. Diese Modelle sind bekannt für ihre Effektivität in verschiedenen realen Szenarien und kombinieren Genauigkeit und Geschwindigkeit. Jede Variante bietet einzigartige Funktionen und Optimierungen, die sie für eine Vielzahl von Anwendungen geeignet machen. + +Alle drei Modelle unterstützen einen umfangreichen Satz von Modi, um Vielseitigkeit in verschiedenen Phasen der Modellbereitstellung und -entwicklung zu gewährleisten. Zu diesen Modi gehören [Inferenz](../modes/predict.md), [Validierung](../modes/val.md), [Training](../modes/train.md) und [Export](../modes/export.md), was den Benutzern ein vollständiges Toolkit für eine effektive Objekterkennung bietet. + +| Modelltyp | Unterstützte Aufgaben | Inferenz | Validierung | Training | Export | +|--------------------|---------------------------------------|----------|-------------|----------|--------| +| YOLOv3 | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Diese Tabelle bietet einen schnellen Überblick über die Fähigkeiten jeder YOLOv3-Variante und hebt ihre Vielseitigkeit und Eignung für verschiedene Aufgaben und Betriebsmodi in Workflows zur Objekterkennung hervor. + +## Beispiele zur Verwendung + +Dieses Beispiel enthält einfache Trainings- und Inferenzbeispiele für YOLOv3. Für die vollständige Dokumentation zu diesen und anderen [Modi](../modes/index.md) siehe die Seiten zur [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) und [Export](../modes/export.md). + +!!! Example "Beispiel" + + === "Python" + + Vorgefertigte PyTorch-Modelle im `*.pt`-Format sowie Konfigurationsdateien im `*.yaml`-Format können an die `YOLO()`-Klasse übergeben werden, um eine Modellinstanz in Python zu erstellen: + + ```python + from ultralytics import YOLO + + # Lade ein vortrainiertes YOLOv3n-Modell für COCO + model = YOLO('yolov3n.pt') + + # Zeige Informationen zum Modell an (optional) + model.info() + + # Trainiere das Modell mit dem COCO8-Beispieldatensatz für 100 Epochen + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Führe Inferenz mit dem YOLOv3n-Modell auf dem Bild "bus.jpg" durch + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI-Befehle stehen zur Verfügung, um die Modelle direkt auszuführen: + + ```bash + # Lade ein vortrainiertes YOLOv3n-Modell und trainiere es mit dem COCO8-Beispieldatensatz für 100 Epochen + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Lade ein vortrainiertes YOLOv3n-Modell und führe Inferenz auf dem Bild "bus.jpg" aus + yolo predict model=yolov3n.pt source=path/to/bus.jpg + ``` + +## Zitate und Anerkennungen + +Wenn Sie YOLOv3 in Ihrer Forschung verwenden, zitieren Sie bitte die ursprünglichen YOLO-Papiere und das Ultralytics YOLOv3-Repository: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +Vielen Dank an Joseph Redmon und Ali Farhadi für die Entwicklung des originalen YOLOv3. diff --git a/docs/de/models/yolov4.md b/docs/de/models/yolov4.md new file mode 100644 index 0000000..a866f28 --- /dev/null +++ b/docs/de/models/yolov4.md @@ -0,0 +1,71 @@ +--- +comments: true +description: Erforschen Sie unseren detaillierten Leitfaden zu YOLOv4, einem hochmodernen Echtzeit-Objektdetektor. Erfahren Sie mehr über seine architektonischen Highlights, innovativen Funktionen und Anwendungsbeispiele. +keywords: ultralytics, YOLOv4, Objekterkennung, neuronales Netzwerk, Echtzeit-Erkennung, Objektdetektor, maschinelles Lernen +--- + +# YOLOv4: Schnelle und präzise Objekterkennung + +Willkommen auf der Ultralytics-Dokumentationsseite für YOLOv4, einem hochmodernen, Echtzeit-Objektdetektor, der 2020 von Alexey Bochkovskiy unter [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet) veröffentlicht wurde. YOLOv4 wurde entwickelt, um das optimale Gleichgewicht zwischen Geschwindigkeit und Genauigkeit zu bieten und ist somit eine ausgezeichnete Wahl für viele Anwendungen. + +![YOLOv4 Architekturdiagramm](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**YOLOv4 Architekturdiagramm**. Zeigt das komplexe Netzwerkdesign von YOLOv4, einschließlich der Backbone-, Neck- und Head-Komponenten sowie ihrer verbundenen Schichten für eine optimale Echtzeit-Objekterkennung. + +## Einleitung + +YOLOv4 steht für You Only Look Once Version 4. Es handelt sich um ein Echtzeit-Objekterkennungsmodell, das entwickelt wurde, um die Grenzen früherer YOLO-Versionen wie [YOLOv3](yolov3.md) und anderer Objekterkennungsmodelle zu überwinden. Im Gegensatz zu anderen konvolutionellen neuronalen Netzwerken (CNN), die auf Objekterkennung basieren, ist YOLOv4 nicht nur für Empfehlungssysteme geeignet, sondern auch für eigenständiges Prozessmanagement und Reduzierung der Benutzereingabe. Durch den Einsatz von herkömmlichen Grafikprozessoreinheiten (GPUs) ermöglicht es YOLOv4 eine Massennutzung zu einem erschwinglichen Preis und ist so konzipiert, dass es in Echtzeit auf einer herkömmlichen GPU funktioniert, wobei nur eine solche GPU für das Training erforderlich ist. + +## Architektur + +YOLOv4 nutzt mehrere innovative Funktionen, die zusammenarbeiten, um seine Leistung zu optimieren. Dazu gehören Weighted-Residual-Connections (WRC), Cross-Stage-Partial-connections (CSP), Cross mini-Batch Normalization (CmBN), Self-adversarial-training (SAT), Mish-Aktivierung, Mosaic-Datenaugmentation, DropBlock-Regularisierung und CIoU-Verlust. Diese Funktionen werden kombiniert, um erstklassige Ergebnisse zu erzielen. + +Ein typischer Objektdetektor besteht aus mehreren Teilen, darunter der Eingabe, dem Backbone, dem Neck und dem Head. Das Backbone von YOLOv4 ist auf ImageNet vorgeschult und wird zur Vorhersage von Klassen und Begrenzungsrahmen von Objekten verwendet. Das Backbone kann aus verschiedenen Modellen wie VGG, ResNet, ResNeXt oder DenseNet stammen. Der Neck-Teil des Detektors wird verwendet, um Merkmalskarten von verschiedenen Stufen zu sammeln und umfasst normalerweise mehrere Bottom-up-Pfade und mehrere Top-down-Pfade. Der Head-Teil wird schließlich zur Durchführung der endgültigen Objekterkennung und Klassifizierung verwendet. + +## Bag of Freebies + +YOLOv4 verwendet auch Methoden, die als "Bag of Freebies" bekannt sind. Dabei handelt es sich um Techniken, die die Genauigkeit des Modells während des Trainings verbessern, ohne die Kosten der Inferenz zu erhöhen. Datenaugmentation ist eine häufige Bag of Freebies-Technik, die in der Objekterkennung verwendet wird, um die Variabilität der Eingabebilder zu erhöhen und die Robustheit des Modells zu verbessern. Beispiele für Datenaugmentation sind photometrische Verzerrungen (Anpassung von Helligkeit, Kontrast, Farbton, Sättigung und Rauschen eines Bildes) und geometrische Verzerrungen (Hinzufügen von zufälliger Skalierung, Ausschnitt, Spiegelung und Rotation). Diese Techniken helfen dem Modell, sich besser an verschiedene Arten von Bildern anzupassen. + +## Funktionen und Leistung + +YOLOv4 ist für optimale Geschwindigkeit und Genauigkeit in der Objekterkennung konzipiert. Die Architektur von YOLOv4 umfasst CSPDarknet53 als Backbone, PANet als Neck und YOLOv3 als Detektionskopf. Diese Konstruktion ermöglicht es YOLOv4, beeindruckend schnelle Objekterkennungen durchzuführen und ist somit für Echtzeitanwendungen geeignet. YOLOv4 zeichnet sich auch durch Genauigkeit aus und erzielt erstklassige Ergebnisse in Objekterkennungs-Benchmarks. + +## Beispiele für die Verwendung + +Zum Zeitpunkt der Erstellung dieser Dokumentation unterstützt Ultralytics derzeit keine YOLOv4-Modelle. Daher müssen sich Benutzer, die YOLOv4 verwenden möchten, direkt an das YOLOv4 GitHub-Repository für Installations- und Verwendungshinweise wenden. + +Hier ist ein kurzer Überblick über die typischen Schritte, die Sie unternehmen könnten, um YOLOv4 zu verwenden: + +1. Besuchen Sie das YOLOv4 GitHub-Repository: [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). + +2. Befolgen Sie die in der README-Datei bereitgestellten Anweisungen zur Installation. Dies beinhaltet in der Regel das Klonen des Repositories, die Installation der erforderlichen Abhängigkeiten und das Einrichten der erforderlichen Umgebungsvariablen. + +3. Sobald die Installation abgeschlossen ist, können Sie das Modell gemäß den in dem Repository bereitgestellten Verwendungshinweisen trainieren und verwenden. Dies beinhaltet in der Regel die Vorbereitung des Datensatzes, die Konfiguration der Modellparameter, das Training des Modells und die anschließende Verwendung des trainierten Modells zur Durchführung der Objekterkennung. + +Bitte beachten Sie, dass die spezifischen Schritte je nach Ihrer spezifischen Anwendung und dem aktuellen Stand des YOLOv4-Repositories variieren können. Es wird daher dringend empfohlen, sich direkt an die Anweisungen im YOLOv4-GitHub-Repository zu halten. + +Wir bedauern etwaige Unannehmlichkeiten und werden uns bemühen, dieses Dokument mit Verwendungsbeispielen für Ultralytics zu aktualisieren, sobald die Unterstützung für YOLOv4 implementiert ist. + +## Fazit + +YOLOv4 ist ein leistungsstarkes und effizientes Modell zur Objekterkennung, das eine Balance zwischen Geschwindigkeit und Genauigkeit bietet. Durch den Einsatz einzigartiger Funktionen und Bag of Freebies-Techniken während des Trainings erzielt es hervorragende Ergebnisse in Echtzeit-Objekterkennungsaufgaben. YOLOv4 kann von jedem mit einer herkömmlichen GPU trainiert und verwendet werden, was es für eine Vielzahl von Anwendungen zugänglich und praktisch macht. + +## Zitate und Anerkennungen + +Wir möchten den Autoren von YOLOv4 für ihren bedeutenden Beitrag auf dem Gebiet der Echtzeit-Objekterkennung danken: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Die originale YOLOv4-Publikation finden Sie auf [arXiv](https://arxiv.org/pdf/2004.10934.pdf). Die Autoren haben ihre Arbeit öffentlich zugänglich gemacht und der Code kann auf [GitHub](https://github.com/AlexeyAB/darknet) abgerufen werden. Wir schätzen ihre Bemühungen, das Fachgebiet voranzubringen und ihre Arbeit der breiteren Community zugänglich zu machen. diff --git a/docs/de/models/yolov5.md b/docs/de/models/yolov5.md new file mode 100644 index 0000000..b194f12 --- /dev/null +++ b/docs/de/models/yolov5.md @@ -0,0 +1,113 @@ +--- +comments: true +description: Entdecken Sie YOLOv5u, eine verbesserte Version des YOLOv5-Modells mit einem optimierten Verhältnis von Genauigkeit und Geschwindigkeit sowie zahlreiche vorab trainierte Modelle für verschiedene Objekterkennungsaufgaben. +keywords: YOLOv5u, Objekterkennung, vorab trainierte Modelle, Ultralytics, Inferenz, Validierung, YOLOv5, YOLOv8, Ankerfrei, Objektlos, Echtzeitanwendungen, Maschinelles Lernen +--- + +# YOLOv5 + +## Übersicht + +YOLOv5u steht für eine Weiterentwicklung der Methoden zur Objekterkennung. Basierend auf der grundlegenden Architektur des von Ultralytics entwickelten YOLOv5-Modells integriert YOLOv5u den ankerfreien, objektlosen Split-Kopf, ein Feature, das zuvor in den YOLOv8-Modellen eingeführt wurde. Diese Anpassung verfeinert die Architektur des Modells und führt zu einem optimierten Verhältnis von Genauigkeit und Geschwindigkeit bei der Objekterkennung. Basierend auf den empirischen Ergebnissen und den abgeleiteten Features bietet YOLOv5u eine effiziente Alternative für diejenigen, die robuste Lösungen sowohl in der Forschung als auch in praktischen Anwendungen suchen. + +![Ultralytics YOLOv5](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## Hauptmerkmale + +- **Ankerfreier Split-Ultralytics-Kopf:** Herkömmliche Objekterkennungsmodelle verwenden vordefinierte Ankerboxen, um die Position von Objekten vorherzusagen. YOLOv5u modernisiert diesen Ansatz. Durch die Verwendung eines ankerfreien Split-Ultralytics-Kopfes wird ein flexiblerer und anpassungsfähigerer Detektionsmechanismus gewährleistet, der die Leistung in verschiedenen Szenarien verbessert. + +- **Optimiertes Verhältnis von Genauigkeit und Geschwindigkeit:** Geschwindigkeit und Genauigkeit ziehen oft in entgegengesetzte Richtungen. Aber YOLOv5u stellt diese Abwägung in Frage. Es bietet eine ausgewogene Balance, die Echtzeitdetektionen ohne Einbußen bei der Genauigkeit ermöglicht. Diese Funktion ist besonders wertvoll für Anwendungen, die schnelle Reaktionen erfordern, wie autonome Fahrzeuge, Robotik und Echtzeitanalyse von Videos. + +- **Vielfalt an vorab trainierten Modellen:** YOLOv5u bietet eine Vielzahl von vorab trainierten Modellen, da verschiedene Aufgaben unterschiedliche Werkzeuge erfordern. Ob Sie sich auf Inferenz, Validierung oder Training konzentrieren, es wartet ein maßgeschneidertes Modell auf Sie. Diese Vielfalt gewährleistet, dass Sie nicht nur eine Einheitslösung verwenden, sondern ein speziell für Ihre einzigartige Herausforderung feinabgestimmtes Modell. + +## Unterstützte Aufgaben und Modi + +Die YOLOv5u-Modelle mit verschiedenen vorab trainierten Gewichten eignen sich hervorragend für Aufgaben zur [Objekterkennung](../tasks/detect.md). Sie unterstützen eine umfassende Palette von Modi, die sie für verschiedene Anwendungen von der Entwicklung bis zur Bereitstellung geeignet machen. + +| Modelltyp | Vorab trainierte Gewichte | Aufgabe | Inferenz | Validierung | Training | Export | +|-----------|-----------------------------------------------------------------------------------------------------------------------------|---------------------------------------|----------|-------------|----------|--------| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Diese Tabelle bietet eine detaillierte Übersicht über die verschiedenen Varianten des YOLOv5u-Modells und hebt ihre Anwendbarkeit in der Objekterkennung sowie die Unterstützung unterschiedlicher Betriebsmodi wie [Inferenz](../modes/predict.md), [Validierung](../modes/val.md), [Training](../modes/train.md) und [Export](../modes/export.md) hervor. Diese umfassende Unterstützung ermöglicht es Benutzern, die Fähigkeiten der YOLOv5u-Modelle in einer Vielzahl von Objekterkennungsszenarien voll auszuschöpfen. + +## Leistungskennzahlen + +!!! Leistung + + === "Erkennung" + + Siehe [Erkennungsdokumentation](https://docs.ultralytics.com/tasks/detect/) für Beispiele zur Verwendung dieser Modelle, die auf [COCO](https://docs.ultralytics.com/datasets/detect/coco/) trainiert wurden und 80 vorab trainierte Klassen enthalten. + + | Modell | YAML | Größe
(Pixel) | mAPval
50-95 | Geschwindigkeit
CPU ONNX
(ms) | Geschwindigkeit
A100 TensorRT
(ms) | Parameter
(M) | FLOPs
(B) | + |---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|-----------------------|----------------------|------------------------------------------|-----------------------------------------------|--------------------|-------------------| + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34,3 | 73,6 | 1,06 | 2,6 | 7,7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43,0 | 120,7 | 1,27 | 9,1 | 24,0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49,0 | 233,9 | 1,86 | 25,1 | 64,2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52,2 | 408,4 | 2,50 | 53,2 | 135,0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53,2 | 763,2 | 3,81 | 97,2 | 246,4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1.280 | 42,1 | 211,0 | 1,83 | 4,3 | 7,8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1.280 | 48,6 | 422,6 | 2,34 | 15,3 | 24,6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1.280 | 53,6 | 810,9 | 4,36 | 41,2 | 65,7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1.280 | 55,7 | 1.470,9 | 5,47 | 86,1 | 137,4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1.280 | 56,8 | 2.436,5 | 8,98 | 155,4 | 250,7 | + +## Beispiele zur Verwendung + +Dieses Beispiel enthält einfache Beispiele zur Schulung und Inferenz mit YOLOv5. Die vollständige Dokumentation zu diesen und anderen [Modi](../modes/index.md) finden Sie in den Seiten [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) und [Export](../modes/export.md). + +!!! Example "Beispiel" + + === "Python" + + PyTorch-vortrainierte `*.pt`-Modelle sowie Konfigurationsdateien `*.yaml` können an die `YOLO()`-Klasse übergeben werden, um eine Modellinstanz in Python zu erstellen: + + ```python + from ultralytics import YOLO + + # Laden Sie ein vortrainiertes YOLOv5n-Modell für COCO-Daten + modell = YOLO('yolov5n.pt') + + # Informationen zum Modell anzeigen (optional) + model.info() + + # Trainieren Sie das Modell anhand des COCO8-Beispieldatensatzes für 100 Epochen + ergebnisse = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Führen Sie die Inferenz mit dem YOLOv5n-Modell auf dem Bild 'bus.jpg' durch + ergebnisse = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI-Befehle sind verfügbar, um die Modelle direkt auszuführen: + + ```bash + # Laden Sie ein vortrainiertes YOLOv5n-Modell und trainieren Sie es anhand des COCO8-Beispieldatensatzes für 100 Epochen + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Laden Sie ein vortrainiertes YOLOv5n-Modell und führen Sie die Inferenz auf dem Bild 'bus.jpg' durch + yolo predict model=yolov5n.pt source=path/to/bus.jpg + ``` + +## Zitate und Danksagungen + +Wenn Sie YOLOv5 oder YOLOv5u in Ihrer Forschung verwenden, zitieren Sie bitte das Ultralytics YOLOv5-Repository wie folgt: + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +Bitte beachten Sie, dass die YOLOv5-Modelle unter den Lizenzen [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) und [Enterprise](https://ultralytics.com/license) bereitgestellt werden. diff --git a/docs/de/models/yolov6.md b/docs/de/models/yolov6.md new file mode 100644 index 0000000..0246170 --- /dev/null +++ b/docs/de/models/yolov6.md @@ -0,0 +1,107 @@ +--- +comments: true +description: Erforschen Sie Meituan YOLOv6, ein modernes Objekterkennungsmodell, das eine ausgewogene Kombination aus Geschwindigkeit und Genauigkeit bietet. Tauchen Sie ein in Funktionen, vorab trainierte Modelle und die Verwendung von Python. +keywords: Meituan YOLOv6, Objekterkennung, Ultralytics, YOLOv6 Dokumentation, Bi-direktionale Konkatenation, Anchor-Aided Training, vorab trainierte Modelle, Echtzeitanwendungen +--- + +# Meituan YOLOv6 + +## Überblick + +[Meituan](https://about.meituan.com/) YOLOv6 ist ein moderner Objekterkenner, der eine bemerkenswerte Balance zwischen Geschwindigkeit und Genauigkeit bietet und somit eine beliebte Wahl für Echtzeitanwendungen darstellt. Dieses Modell bietet mehrere bemerkenswerte Verbesserungen in seiner Architektur und seinem Trainingsschema, einschließlich der Implementierung eines Bi-direktionalen Konkatenationsmoduls (BiC), einer anchor-aided training (AAT)-Strategie und einem verbesserten Backpropagation- und Neck-Design für Spitzenleistungen auf dem COCO-Datensatz. + +![Meituan YOLOv6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![Modellbeispielbild](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**Übersicht über YOLOv6.** Diagramm der Modellarchitektur, das die neu gestalteten Netzwerkkomponenten und Trainingstrategien zeigt, die zu signifikanten Leistungsverbesserungen geführt haben. (a) Der Nacken von YOLOv6 (N und S sind dargestellt). Beachten Sie, dass bei M/L RepBlocks durch CSPStackRep ersetzt wird. (b) Die Struktur eines BiC-Moduls. (c) Ein SimCSPSPPF-Block. ([Quelle](https://arxiv.org/pdf/2301.05586.pdf)). + +### Hauptmerkmale + +- **Bi-direktionales Konkatenations (BiC) Modul:** YOLOv6 führt ein BiC-Modul im Nacken des Erkenners ein, das die Lokalisierungssignale verbessert und eine Leistungssteigerung bei vernachlässigbarem Geschwindigkeitsabfall liefert. +- **Anchor-aided Training (AAT) Strategie:** Dieses Modell schlägt AAT vor, um die Vorteile sowohl von ankerbasierten als auch von ankerfreien Paradigmen zu nutzen, ohne die Inferenzeffizienz zu beeinträchtigen. +- **Verbessertes Backpropagation- und Neck-Design:** Durch Vertiefung von YOLOv6 um eine weitere Stufe im Backpropagation und Nacken erreicht dieses Modell Spitzenleistungen auf dem COCO-Datensatz bei hochauflösenden Eingaben. +- **Self-Distillation Strategie:** Eine neue Self-Distillation-Strategie wird implementiert, um die Leistung von kleineren Modellen von YOLOv6 zu steigern, indem der Hilfsregressionszweig während des Trainings verstärkt und bei der Inferenz entfernt wird, um einen deutlichen Geschwindigkeitsabfall zu vermeiden. + +## Leistungsmetriken + +YOLOv6 bietet verschiedene vorab trainierte Modelle mit unterschiedlichen Maßstäben: + +- YOLOv6-N: 37,5% AP auf COCO val2017 bei 1187 FPS mit NVIDIA Tesla T4 GPU. +- YOLOv6-S: 45,0% AP bei 484 FPS. +- YOLOv6-M: 50,0% AP bei 226 FPS. +- YOLOv6-L: 52,8% AP bei 116 FPS. +- YOLOv6-L6: Spitzenleistung in Echtzeit. + +YOLOv6 bietet auch quantisierte Modelle für verschiedene Genauigkeiten sowie Modelle, die für mobile Plattformen optimiert sind. + +## Beispiele zur Verwendung + +In diesem Beispiel werden einfache Schulungs- und Inferenzbeispiele für YOLOv6 bereitgestellt. Weitere Dokumentation zu diesen und anderen [Modi](../modes/index.md) finden Sie auf den Seiten [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) und [Export](../modes/export.md). + +!!! Example "Beispiel" + + === "Python" + + In Python kann PyTorch-vorab trainierte `*.pt`-Modelle sowie Konfigurations-`*.yaml`-Dateien an die `YOLO()`-Klasse übergeben werden, um eine Modellinstanz zu erstellen: + + ```python + from ultralytics import YOLO + + # Erstellen Sie ein YOLOv6n-Modell von Grund auf + model = YOLO('yolov6n.yaml') + + # Zeigen Sie Informationen zum Modell an (optional) + model.info() + + # Trainieren Sie das Modell am Beispiel des COCO8-Datensatzes für 100 Epochen + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Führen Sie Inferenz mit dem YOLOv6n-Modell auf dem Bild 'bus.jpg' durch + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI-Befehle stehen zur Verfügung, um die Modelle direkt auszuführen: + + ```bash + # Erstellen Sie ein YOLOv6n-Modell von Grund auf und trainieren Sie es am Beispiel des COCO8-Datensatzes für 100 Epochen + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # Erstellen Sie ein YOLOv6n-Modell von Grund auf und führen Sie Inferenz auf dem Bild 'bus.jpg' durch + yolo predict model=yolov6n.yaml source=path/to/bus.jpg + ``` + +## Unterstützte Aufgaben und Modi + +Die YOLOv6-Serie bietet eine Reihe von Modellen, die jeweils für die Hochleistungs-[Objekterkennung](../tasks/detect.md) optimiert sind. Diese Modelle erfüllen unterschiedliche Rechenanforderungen und Genauigkeitsanforderungen und sind daher vielseitig für eine Vielzahl von Anwendungen einsetzbar. + +| Modelltyp | Vorab trainierte Gewichte | Unterstützte Aufgaben | Inferenz | Validierung | Training | Exportieren | +|-----------|---------------------------|---------------------------------------|----------|-------------|----------|-------------| +| YOLOv6-N | `yolov6-n.pt` | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Diese Tabelle bietet einen detaillierten Überblick über die YOLOv6-Modellvarianten und hebt ihre Fähigkeiten bei der Objekterkennung sowie ihre Kompatibilität mit verschiedenen Betriebsmodi wie [Inferenz](../modes/predict.md), [Validierung](../modes/val.md), [Training](../modes/train.md) und [Exportieren](../modes/export.md) hervor. Diese umfassende Unterstützung ermöglicht es den Benutzern, die Fähigkeiten von YOLOv6-Modellen in einer Vielzahl von Objekterkennungsszenarien vollständig zu nutzen. + +## Zitate und Anerkennungen + +Wir möchten den Autoren für ihre bedeutenden Beiträge auf dem Gebiet der Echtzeit-Objekterkennung danken: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{li2023yolov6, + title={YOLOv6 v3.0: A Full-Scale Reloading}, + author={Chuyi Li and Lulu Li and Yifei Geng and Hongliang Jiang and Meng Cheng and Bo Zhang and Zaidan Ke and Xiaoming Xu and Xiangxiang Chu}, + year={2023}, + eprint={2301.05586}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Das ursprüngliche YOLOv6-Papier finden Sie auf [arXiv](https://arxiv.org/abs/2301.05586). Die Autoren haben ihre Arbeit öffentlich zugänglich gemacht, und der Code kann auf [GitHub](https://github.com/meituan/YOLOv6) abgerufen werden. Wir schätzen ihre Bemühungen zur Weiterentwicklung des Fachgebiets und zur Zugänglichmachung ihrer Arbeit für die breitere Gemeinschaft. diff --git a/docs/de/models/yolov7.md b/docs/de/models/yolov7.md new file mode 100644 index 0000000..4edb969 --- /dev/null +++ b/docs/de/models/yolov7.md @@ -0,0 +1,66 @@ +--- +comments: true +description: Erforsche den YOLOv7, einen echtzeitfähigen Objektdetektor. Verstehe seine überlegene Geschwindigkeit, beeindruckende Genauigkeit und seinen einzigartigen Fokus auf die optimierte Ausbildung mit "trainable bag-of-freebies". +keywords: YOLOv7, echtzeitfähiger Objektdetektor, State-of-the-Art, Ultralytics, MS COCO Datensatz, Modellumparameterisierung, dynamische Labelzuweisung, erweiterte Skalierung, umfassende Skalierung +--- + +# YOLOv7: Trainable Bag-of-Freebies + +YOLOv7 ist ein echtzeitfähiger Objektdetektor der Spitzenklasse, der alle bekannten Objektdetektoren in Bezug auf Geschwindigkeit und Genauigkeit im Bereich von 5 FPS bis 160 FPS übertrifft. Mit einer Genauigkeit von 56,8% AP ist er der präziseste Echtzeit-Objektdetektor unter allen bekannten Modellen mit einer FPS von 30 oder höher auf der GPU V100. Darüber hinaus übertrifft YOLOv7 andere Objektdetektoren wie YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5 und viele andere in Bezug auf Geschwindigkeit und Genauigkeit. Das Modell wird ausschließlich auf dem MS COCO-Datensatz trainiert, ohne andere Datensätze oder vortrainierte Gewichte zu verwenden. Sourcecode für YOLOv7 ist auf GitHub verfügbar. + +![Vergleich von YOLOv7 mit SOTA-Objektdetektoren](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) +**Vergleich von Spitzen-Objektdetektoren. +** Aus den Ergebnissen in Tabelle 2 wissen wir, dass die vorgeschlagene Methode das beste Verhältnis von Geschwindigkeit und Genauigkeit umfassend aufweist. Vergleichen wir YOLOv7-tiny-SiLU mit YOLOv5-N (r6.1), so ist unsere Methode 127 FPS schneller und um 10,7% genauer beim AP. Darüber hinaus erreicht YOLOv7 bei einer Bildrate von 161 FPS einen AP von 51,4%, während PPYOLOE-L mit demselben AP nur eine Bildrate von 78 FPS aufweist. In Bezug auf die Parameterverwendung ist YOLOv7 um 41% geringer als PPYOLOE-L. Vergleicht man YOLOv7-X mit 114 FPS Inferenzgeschwindigkeit mit YOLOv5-L (r6.1) mit 99 FPS Inferenzgeschwindigkeit, kann YOLOv7-X den AP um 3,9% verbessern. Wenn YOLOv7-X mit YOLOv5-X (r6.1) in ähnlichem Maßstab verglichen wird, ist die Inferenzgeschwindigkeit von YOLOv7-X 31 FPS schneller. Darüber hinaus reduziert YOLOv7-X in Bezug auf die Anzahl der Parameter und Berechnungen 22% der Parameter und 8% der Berechnungen im Vergleich zu YOLOv5-X (r6.1), verbessert jedoch den AP um 2,2% ([Source](https://arxiv.org/pdf/2207.02696.pdf)). + +## Übersicht + +Echtzeit-Objekterkennung ist eine wichtige Komponente vieler Computersysteme für Bildverarbeitung, einschließlich Multi-Object-Tracking, autonomes Fahren, Robotik und medizinische Bildanalyse. In den letzten Jahren konzentrierte sich die Entwicklung der Echtzeit-Objekterkennung auf die Gestaltung effizienter Architekturen und die Verbesserung der Inferenzgeschwindigkeit verschiedener CPUs, GPUs und Neural Processing Units (NPUs). YOLOv7 unterstützt sowohl mobile GPUs als auch GPU-Geräte, von der Edge bis zur Cloud. + +Im Gegensatz zu herkömmlichen, echtzeitfähigen Objektdetektoren, die sich auf die Architekturoptimierung konzentrieren, führt YOLOv7 eine Fokussierung auf die Optimierung des Schulungsprozesses ein. Dazu gehören Module und Optimierungsmethoden, die darauf abzielen, die Genauigkeit der Objekterkennung zu verbessern, ohne die Inferenzkosten zu erhöhen - ein Konzept, das als "trainable bag-of-freebies" bekannt ist. + +## Hauptmerkmale + +YOLOv7 führt mehrere Schlüsselfunktionen ein: + +1. **Modellumparameterisierung**: YOLOv7 schlägt ein geplantes umparameterisiertes Modell vor, das eine in verschiedenen Netzwerken anwendbare Strategie darstellt und auf dem Konzept des Gradientenpropagationspfades basiert. + +2. **Dynamische Labelzuweisung**: Das Training des Modells mit mehreren Ausgabeschichten stellt ein neues Problem dar: "Wie weist man dynamische Ziele für die Ausgaben der verschiedenen Zweige zu?" Zur Lösung dieses Problems führt YOLOv7 eine neue Methode zur Labelzuweisung ein, die als coarse-to-fine lead guided label assignment bekannt ist. + +3. **Erweiterte und umfassende Skalierung**: YOLOv7 schlägt Methoden zur "erweiterten" und "umfassenden Skalierung" des echtzeitfähigen Objektdetektors vor, die Parameter und Berechnungen effektiv nutzen können. + +4. **Effizienz**: Die von YOLOv7 vorgeschlagene Methode kann etwa 40 % der Parameter und 50 % der Berechnungen des state-of-the-art echtzeitfähigen Objektdetektors wirksam reduzieren und weist eine schnellere Inferenzgeschwindigkeit und eine höhere Detektionsgenauigkeit auf. + +## Beispiele zur Nutzung + +Zum Zeitpunkt der Erstellung dieses Textes unterstützt Ultralytics derzeit keine YOLOv7-Modelle. Daher müssen sich alle Benutzer, die YOLOv7 verwenden möchten, direkt an das YOLOv7 GitHub-Repository für Installations- und Nutzungshinweise wenden. + +Hier ist ein kurzer Überblick über die typischen Schritte, die Sie unternehmen könnten, um YOLOv7 zu verwenden: + +1. Besuchen Sie das YOLOv7 GitHub-Repository: [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7). + +2. Befolgen Sie die in der README-Datei bereitgestellten Anweisungen zur Installation. Dies beinhaltet in der Regel das Klonen des Repositories, die Installation der erforderlichen Abhängigkeiten und das Einrichten eventuell notwendiger Umgebungsvariablen. + +3. Sobald die Installation abgeschlossen ist, können Sie das Modell entsprechend den im Repository bereitgestellten Anleitungen trainieren und verwenden. Dies umfasst in der Regel die Vorbereitung des Datensatzes, das Konfigurieren der Modellparameter, das Training des Modells und anschließend die Verwendung des trainierten Modells zur Durchführung der Objekterkennung. + +Bitte beachten Sie, dass die spezifischen Schritte je nach Ihrem spezifischen Anwendungsfall und dem aktuellen Stand des YOLOv7-Repositories variieren können. Es wird daher dringend empfohlen, sich direkt an die im YOLOv7 GitHub-Repository bereitgestellten Anweisungen zu halten. + +Wir bedauern etwaige Unannehmlichkeiten und werden uns bemühen, dieses Dokument mit Anwendungsbeispielen für Ultralytics zu aktualisieren, sobald die Unterstützung für YOLOv7 implementiert ist. + +## Zitationen und Danksagungen + +Wir möchten den Autoren von YOLOv7 für ihre bedeutenden Beiträge im Bereich der echtzeitfähigen Objekterkennung danken: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +Die ursprüngliche YOLOv7-Studie kann auf [arXiv](https://arxiv.org/pdf/2207.02696.pdf) gefunden werden. Die Autoren haben ihre Arbeit öffentlich zugänglich gemacht, und der Code kann auf [GitHub](https://github.com/WongKinYiu/yolov7) abgerufen werden. Wir schätzen ihre Bemühungen, das Feld voranzubringen und ihre Arbeit der breiteren Gemeinschaft zugänglich zu machen. diff --git a/docs/de/models/yolov8.md b/docs/de/models/yolov8.md new file mode 100644 index 0000000..e24ca96 --- /dev/null +++ b/docs/de/models/yolov8.md @@ -0,0 +1,162 @@ +--- +comments: true +description: Erfahren Sie mehr über die aufregenden Funktionen von YOLOv8, der neuesten Version unseres Echtzeit-Objekterkenners! Erfahren Sie, wie fortschrittliche Architekturen, vortrainierte Modelle und die optimale Balance zwischen Genauigkeit und Geschwindigkeit YOLOv8 zur perfekten Wahl für Ihre Objekterkennungsaufgaben machen. +keywords: YOLOv8, Ultralytics, Echtzeit-Objekterkennung, vortrainierte Modelle, Dokumentation, Objekterkennung, YOLO-Serie, fortschrittliche Architekturen, Genauigkeit, Geschwindigkeit +--- + +# YOLOv8 + +## Übersicht + +YOLOv8 ist die neueste Version der YOLO-Serie von Echtzeit-Objekterkennern und bietet modernste Leistung in Bezug auf Genauigkeit und Geschwindigkeit. Basierend auf den Fortschritten früherer YOLO-Versionen bringt YOLOv8 neue Funktionen und Optimierungen mit sich, die ihn zu einer idealen Wahl für verschiedene Objekterkennungsaufgaben in einer Vielzahl von Anwendungen machen. + +![Ultralytics YOLOv8](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## Schlüsselfunktionen + +- **Fortschrittliche Backbone- und Neck-Architekturen:** YOLOv8 verwendet modernste Backbone- und Neck-Architekturen, die zu einer verbesserten Merkmalsextraktion und Objekterkennungsleistung führen. +- **Ankerfreier Split Ultralytics Head:** YOLOv8 verwendet einen ankerfreien Split Ultralytics Head, der zu einer besseren Genauigkeit und einem effizienteren Erkennungsprozess im Vergleich zu ankerbasierten Ansätzen führt. +- **Optimale Genauigkeits-Geschwindigkeits-Balance:** Mit dem Fokus auf die Aufrechterhaltung einer optimalen Balance zwischen Genauigkeit und Geschwindigkeit eignet sich YOLOv8 für Echtzeit-Objekterkennungsaufgaben in verschiedenen Anwendungsbereichen. +- **Vielfalt an vortrainierten Modellen:** YOLOv8 bietet eine Vielzahl von vortrainierten Modellen, um verschiedenen Aufgaben und Leistungsanforderungen gerecht zu werden. Dies erleichtert die Suche nach dem richtigen Modell für Ihren spezifischen Anwendungsfall. + +## Unterstützte Aufgaben und Modi + +Die YOLOv8-Serie bietet eine Vielzahl von Modellen, von denen jedes auf bestimmte Aufgaben in der Computer Vision spezialisiert ist. Diese Modelle sind so konzipiert, dass sie verschiedenen Anforderungen gerecht werden, von der Objekterkennung bis hin zu komplexeren Aufgaben wie Instanzsegmentierung, Pose/Keypoint-Erkennung und Klassifikation. + +Jede Variante der YOLOv8-Serie ist auf ihre jeweilige Aufgabe optimiert und gewährleistet damit hohe Leistung und Genauigkeit. Darüber hinaus sind diese Modelle kompatibel mit verschiedenen Betriebsmodi, einschließlich [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md) und [Export](../modes/export.md). Dadurch wird ihre Verwendung in verschiedenen Phasen der Bereitstellung und Entwicklung erleichtert. + +| Modell | Dateinamen | Aufgabe | Inference | Validation | Training | Export | +|-------------|----------------------------------------------------------------------------------------------------------------|---------------------------------------------|-----------|------------|----------|--------| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [Objekterkennung](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [Instanzsegmentierung](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [Pose/Keypoints](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [Klassifikation](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +Diese Tabelle gibt einen Überblick über die verschiedenen Varianten des YOLOv8-Modells und deren Anwendungsbereiche sowie deren Kompatibilität mit verschiedenen Betriebsmodi wie Inference, Validation, Training und Export. Sie zeigt die Vielseitigkeit und Robustheit der YOLOv8-Serie, was sie für verschiedene Anwendungen in der Computer Vision geeignet macht. + +## Leistungskennzahlen + +!!! Performance + + === "Objekterkennung (COCO)" + + Siehe [Objekterkennungsdokumentation](https://docs.ultralytics.com/tasks/detect/) für Beispiele zur Verwendung dieser Modelle, die auf [COCO](https://docs.ultralytics.com/datasets/detect/coco/) trainiert wurden und 80 vortrainierte Klassen enthalten. + + | Modell | Größe
(Pixel) | mAPval
50-95 | Geschwindigkeit
CPU ONNX
(ms) | Geschwindigkeit
A100 TensorRT
(ms) | Parameter
(M) | FLOPs
(B) | + | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ---------------------------------------- | --------------------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37,3 | 80,4 | 0,99 | 3,2 | 8,7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44,9 | 128,4 | 1,20 | 11,2 | 28,6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50,2 | 234,7 | 1,83 | 25,9 | 78,9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52,9 | 375,2 | 2,39 | 43,7 | 165,2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53,9 | 479,1 | 3,53 | 68,2 | 257,8 | + + === "Objekterkennung (Open Images V7)" + + Siehe [Objekterkennungsdokumentation](https://docs.ultralytics.com/tasks/detect/) für Beispiele zur Verwendung dieser Modelle, die auf [Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/) trainiert wurden und 600 vortrainierte Klassen enthalten. + + | Modell | Größe
(Pixel) | mAPval
50-95 | Geschwindigkeit
CPU ONNX
(ms) | Geschwindigkeit
A100 TensorRT
(ms) | Parameter
(M) | FLOPs
(B) | + | ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ---------------------------------------- | --------------------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18,4 | 142,4 | 1,21 | 3,5 | 10,5 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27,7 | 183,1 | 1,40 | 11,4 | 29,7 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33,6 | 408,5 | 2,26 | 26,2 | 80,6 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34,9 | 596,9 | 2,43 | 44,1 | 167,4 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36,3 | 860,6 | 3,56 | 68,7 | 260,6 | + + === "Segmentierung (COCO)" + + Siehe [Segmentierungsdokumentation](https://docs.ultralytics.com/tasks/segment/) für Beispiele zur Verwendung dieser Modelle, die auf [COCO](https://docs.ultralytics.com/datasets/segment/coco/) trainiert wurden und 80 vortrainierte Klassen enthalten. + + | Modell | Größe
(Pixel) | mAPbox
50-95 | mAPmask
50-95 | Geschwindigkeit
CPU ONNX
(ms) | Geschwindigkeit
A100 TensorRT
(ms) | Parameter
(M) | FLOPs
(B) | + | -------------------------------------------------------------------------------------------- | --------------------- | --------------------- | --------------------- | ---------------------------------------- | --------------------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36,7 | 30,5 | 96,1 | 1,21 | 3,4 | 12,6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44,6 | 36,8 | 155,7 | 1,47 | 11,8 | 42,6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49,9 | 40,8 | 317,0 | 2,18 | 27,3 | 110,2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52,3 | 42,6 | 572,4 | 2,79 | 46,0 | 220,5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53,4 | 43,4 | 712,1 | 4,02 | 71,8 | 344,1 | + + === "Klassifikation (ImageNet)" + + Siehe [Klassifikationsdokumentation](https://docs.ultralytics.com/tasks/classify/) für Beispiele zur Verwendung dieser Modelle, die auf [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) trainiert wurden und 1000 vortrainierte Klassen enthalten. + + | Modell | Größe
(Pixel) | acc
top1 | acc
top5 | Geschwindigkeit
CPU ONNX
(ms) | Geschwindigkeit
A100 TensorRT
(ms) | Parameter
(M) | FLOPs
(B) bei 640 | + | -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ---------------------------------------- | --------------------------------------------- | ------------------ | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66,6 | 87,0 | 12,9 | 0,31 | 2,7 | 4,3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72,3 | 91,1 | 23,4 | 0,35 | 6,4 | 13,5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76,4 | 93,2 | 85,4 | 0,62 | 17,0 | 42,7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78,0 | 94,1 | 163,0 | 0,87 | 37,5 | 99,7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78,4 | 94,3 | 232,0 | 1,01 | 57,4 | 154,8 | + + === "Pose (COCO)" + + Siehe [Pose Estimation Docs](https://docs.ultralytics.com/tasks/segment/) für Beispiele zur Verwendung dieser Modelle, die auf [COCO](https://docs.ultralytics.com/datasets/pose/coco/) trainiert wurden und 1 vortrainierte Klasse, 'person', enthalten. + + | Modell | Größe
(Pixel) | mAPpose
50-95 | mAPpose
50 | Geschwindigkeit
CPU ONNX
(ms) | Geschwindigkeit
A100 TensorRT
(ms) | Parameter
(M) | FLOPs
(B) | + | ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ---------------------------------------- | --------------------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50,4 | 80,1 | 131,8 | 1,18 | 3,3 | 9,2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60,0 | 86,2 | 233,2 | 1,42 | 11,6 | 30,2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65,0 | 88,8 | 456,3 | 2,00 | 26,4 | 81,0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67,6 | 90,0 | 784,5 | 2,59 | 44,4 | 168,6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69,2 | 90,2 | 1607,1 | 3,73 | 69,4 | 263,2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71,6 | 91,2 | 4088,7 | 10,04 | 99,1 | 1066,4 | + +## Beispiele zur Verwendung + +Dieses Beispiel liefert einfache Trainings- und Inferenzbeispiele für YOLOv8. Für die vollständige Dokumentation zu diesen und anderen [Modi](../modes/index.md) siehe die Seiten [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) und [Export](../modes/export.md). + +Beachten Sie, dass das folgende Beispiel für YOLOv8 [Detect](../tasks/detect.md) Modelle für die Objekterkennung verwendet. Für zusätzliche unterstützte Aufgaben siehe die Dokumentation zur [Segmentation](../tasks/segment.md), [Classification](../tasks/classify.md) und [Pose](../tasks/pose.md). + +!!! Example "Beispiel" + + === "Python" + + Vortrainierte PyTorch-`*.pt`-Modelle sowie Konfigurations-`*.yaml`-Dateien können der Klasse `YOLO()` in Python übergeben werden, um eine Modellinstanz zu erstellen: + + ```python + from ultralytics import YOLO + + # Laden Sie ein vortrainiertes YOLOv8n-Modell für COCO + model = YOLO('yolov8n.pt') + + # Zeigen Sie Informationen zum Modell an (optional) + model.info() + + # Trainieren Sie das Modell mit dem COCO8-Beispieldatensatz für 100 Epochen + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Führen Sie eine Inferenz mit dem YOLOv8n-Modell auf dem Bild 'bus.jpg' aus + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI-Befehle stehen zur direkten Ausführung der Modelle zur Verfügung: + + ```bash + # Laden Sie ein vortrainiertes YOLOv8n-Modell für COCO und trainieren Sie es mit dem COCO8-Beispieldatensatz für 100 Epochen + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Laden Sie ein vortrainiertes YOLOv8n-Modell für COCO und führen Sie eine Inferenz auf dem Bild 'bus.jpg' aus + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## Zitate und Danksagungen + +Wenn Sie das YOLOv8-Modell oder eine andere Software aus diesem Repository in Ihrer Arbeit verwenden, zitieren Sie es bitte in folgendem Format: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + +Bitte beachten Sie, dass dieDOI aussteht und der Zitation hinzugefügt wird, sobald sie verfügbar ist. YOLOv8-Modelle werden unter den Lizenzen [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) und [Enterprise](https://ultralytics.com/license) bereitgestellt. diff --git a/docs/de/modes/benchmark.md b/docs/de/modes/benchmark.md new file mode 100644 index 0000000..93908a1 --- /dev/null +++ b/docs/de/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: true +description: Lernen Sie, wie Sie die Geschwindigkeit und Genauigkeit von YOLOv8 über verschiedene Exportformate hinweg profilieren können; erhalten Sie Einblicke in mAP50-95, Genauigkeit_top5 Kennzahlen und mehr. +keywords: Ultralytics, YOLOv8, Benchmarking, Geschwindigkeitsprofilierung, Genauigkeitsprofilierung, mAP50-95, accuracy_top5, ONNX, OpenVINO, TensorRT, YOLO-Exportformate +--- + +# Modell-Benchmarking mit Ultralytics YOLO + +Ultralytics YOLO-Ökosystem und Integrationen + +## Einführung + +Nachdem Ihr Modell trainiert und validiert wurde, ist der nächste logische Schritt, seine Leistung in verschiedenen realen Szenarien zu bewerten. Der Benchmark-Modus in Ultralytics YOLOv8 dient diesem Zweck, indem er einen robusten Rahmen für die Beurteilung von Geschwindigkeit und Genauigkeit Ihres Modells über eine Reihe von Exportformaten hinweg bietet. + +## Warum ist Benchmarking entscheidend? + +- **Informierte Entscheidungen:** Erhalten Sie Einblicke in die Kompromisse zwischen Geschwindigkeit und Genauigkeit. +- **Ressourcenzuweisung:** Verstehen Sie, wie sich verschiedene Exportformate auf unterschiedlicher Hardware verhalten. +- **Optimierung:** Erfahren Sie, welches Exportformat die beste Leistung für Ihren spezifischen Anwendungsfall bietet. +- **Kosteneffizienz:** Nutzen Sie Hardware-Ressourcen basierend auf den Benchmark-Ergebnissen effizienter. + +### Schlüsselmetriken im Benchmark-Modus + +- **mAP50-95:** Für Objekterkennung, Segmentierung und Posenschätzung. +- **accuracy_top5:** Für die Bildklassifizierung. +- **Inferenzzeit:** Zeit, die für jedes Bild in Millisekunden benötigt wird. + +### Unterstützte Exportformate + +- **ONNX:** Für optimale CPU-Leistung +- **TensorRT:** Für maximale GPU-Effizienz +- **OpenVINO:** Für die Optimierung von Intel-Hardware +- **CoreML, TensorFlow SavedModel, und mehr:** Für vielfältige Deployment-Anforderungen. + +!!! Tip "Tipp" + + * Exportieren Sie in ONNX oder OpenVINO für bis zu 3x CPU-Beschleunigung. + * Exportieren Sie in TensorRT für bis zu 5x GPU-Beschleunigung. + +## Anwendungsbeispiele + +Führen Sie YOLOv8n-Benchmarks auf allen unterstützten Exportformaten einschließlich ONNX, TensorRT usw. durch. Siehe den Abschnitt Argumente unten für eine vollständige Liste der Exportargumente. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # Benchmark auf GPU + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## Argumente + +Argumente wie `model`, `data`, `imgsz`, `half`, `device` und `verbose` bieten Benutzern die Flexibilität, die Benchmarks auf ihre spezifischen Bedürfnisse abzustimmen und die Leistung verschiedener Exportformate mühelos zu vergleichen. + +| Schlüssel | Wert | Beschreibung | +|-----------|---------|--------------------------------------------------------------------------------------| +| `model` | `None` | Pfad zur Modelldatei, z. B. yolov8n.pt, yolov8n.yaml | +| `data` | `None` | Pfad zur YAML, die das Benchmarking-Dataset referenziert (unter `val`-Kennzeichnung) | +| `imgsz` | `640` | Bildgröße als Skalar oder Liste (h, w), z. B. (640, 480) | +| `half` | `False` | FP16-Quantisierung | +| `int8` | `False` | INT8-Quantisierung | +| `device` | `None` | Gerät zum Ausführen, z. B. CUDA device=0 oder device=0,1,2,3 oder device=cpu | +| `verbose` | `False` | bei Fehlern nicht fortsetzen (bool), oder Wertebereichsschwelle (float) | + +## Exportformate + +Benchmarks werden automatisch auf allen möglichen Exportformaten unten ausgeführt. + +| Format | `format`-Argument | Modell | Metadaten | Argumente | +|--------------------------------------------------------------------|-------------------|---------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +Vollständige Details zum `export` finden Sie auf der [Export](https://docs.ultralytics.com/modes/export/)-Seite. diff --git a/docs/de/modes/export.md b/docs/de/modes/export.md new file mode 100644 index 0000000..c62a1d6 --- /dev/null +++ b/docs/de/modes/export.md @@ -0,0 +1,108 @@ +--- +comments: true +description: Schritt-für-Schritt-Anleitung zum Exportieren Ihrer YOLOv8-Modelle in verschiedene Formate wie ONNX, TensorRT, CoreML und mehr für den Einsatz. +keywords: YOLO, YOLOv8, Ultralytics, Modell-Export, ONNX, TensorRT, CoreML, TensorFlow SavedModel, OpenVINO, PyTorch, Modell exportieren +--- + +# Modell-Export mit Ultralytics YOLO + +Ultralytics YOLO Ökosystem und Integrationen + +## Einführung + +Das ultimative Ziel des Trainierens eines Modells besteht darin, es für reale Anwendungen einzusetzen. Der Exportmodus in Ultralytics YOLOv8 bietet eine vielseitige Palette von Optionen für den Export Ihres trainierten Modells in verschiedene Formate, sodass es auf verschiedenen Plattformen und Geräten eingesetzt werden kann. Dieser umfassende Leitfaden soll Sie durch die Nuancen des Modell-Exports führen und zeigen, wie Sie maximale Kompatibilität und Leistung erzielen können. + +

+
+ +
+ Ansehen: Wie man ein benutzerdefiniertes trainiertes Ultralytics YOLOv8-Modell exportiert und Live-Inferenz auf der Webcam ausführt. +

+ +## Warum den Exportmodus von YOLOv8 wählen? + +- **Vielseitigkeit:** Export in verschiedene Formate einschließlich ONNX, TensorRT, CoreML und mehr. +- **Leistung:** Bis zu 5-fache GPU-Beschleunigung mit TensorRT und 3-fache CPU-Beschleunigung mit ONNX oder OpenVINO. +- **Kompatibilität:** Machen Sie Ihr Modell universell einsetzbar in zahlreichen Hardware- und Softwareumgebungen. +- **Benutzerfreundlichkeit:** Einfache CLI- und Python-API für schnellen und unkomplizierten Modell-Export. + +### Schlüsselfunktionen des Exportmodus + +Hier sind einige der herausragenden Funktionen: + +- **Ein-Klick-Export:** Einfache Befehle für den Export in verschiedene Formate. +- **Batch-Export:** Export von Modellen, die Batch-Inferenz unterstützen. +- **Optimiertes Inferenzverhalten:** Exportierte Modelle sind für schnellere Inferenzzeiten optimiert. +- **Tutorial-Videos:** Ausführliche Anleitungen und Tutorials für ein reibungsloses Exporterlebnis. + +!!! Tip "Tipp" + + * Exportieren Sie nach ONNX oder OpenVINO für bis zu 3-fache CPU-Beschleunigung. + * Exportieren Sie nach TensorRT für bis zu 5-fache GPU-Beschleunigung. + +## Nutzungsbeispiele + +Exportieren Sie ein YOLOv8n-Modell in ein anderes Format wie ONNX oder TensorRT. Weitere Informationen zu den Exportargumenten finden Sie im Abschnitt „Argumente“ unten. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Laden eines Modells + model = YOLO('yolov8n.pt') # offizielles Modell laden + model = YOLO('path/to/best.pt') # benutzerdefiniertes trainiertes Modell laden + + # Exportieren des Modells + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # offizielles Modell exportieren + yolo export model=path/to/best.pt format=onnx # benutzerdefiniertes trainiertes Modell exportieren + ``` + +## Argumente + +Exporteinstellungen für YOLO-Modelle beziehen sich auf verschiedene Konfigurationen und Optionen, die verwendet werden, um das Modell zu speichern oder für den Einsatz in anderen Umgebungen oder Plattformen zu exportieren. Diese Einstellungen können die Leistung, Größe und Kompatibilität des Modells mit verschiedenen Systemen beeinflussen. Zu den gängigen Exporteinstellungen von YOLO gehören das Format der exportierten Modelldatei (z. B. ONNX, TensorFlow SavedModel), das Gerät, auf dem das Modell ausgeführt wird (z. B. CPU, GPU) und das Vorhandensein zusätzlicher Funktionen wie Masken oder mehrere Labels pro Box. Andere Faktoren, die den Exportprozess beeinflussen können, sind die spezifische Aufgabe, für die das Modell verwendet wird, und die Anforderungen oder Einschränkungen der Zielumgebung oder -plattform. Es ist wichtig, diese Einstellungen sorgfältig zu berücksichtigen und zu konfigurieren, um sicherzustellen, dass das exportierte Modell für den beabsichtigten Einsatzzweck optimiert ist und in der Zielumgebung effektiv eingesetzt werden kann. + +| Schlüssel | Wert | Beschreibung | +|-------------|-----------------|----------------------------------------------------------| +| `format` | `'torchscript'` | Format für den Export | +| `imgsz` | `640` | Bildgröße als Skalar oder (h, w)-Liste, z.B. (640, 480) | +| `keras` | `False` | Verwendung von Keras für TensorFlow SavedModel-Export | +| `optimize` | `False` | TorchScript: Optimierung für mobile Geräte | +| `half` | `False` | FP16-Quantisierung | +| `int8` | `False` | INT8-Quantisierung | +| `dynamic` | `False` | ONNX/TensorRT: dynamische Achsen | +| `simplify` | `False` | ONNX/TensorRT: Vereinfachung des Modells | +| `opset` | `None` | ONNX: Opset-Version (optional, Standardwert ist neueste) | +| `workspace` | `4` | TensorRT: Arbeitsbereichgröße (GB) | +| `nms` | `False` | CoreML: Hinzufügen von NMS | + +## Exportformate + +Verfügbare YOLOv8-Exportformate finden Sie in der Tabelle unten. Sie können in jedes Format exportieren, indem Sie das `format`-Argument verwenden, z. B. `format='onnx'` oder `format='engine'`. + +| Format | `format`-Argument | Modell | Metadaten | Argumente | +|--------------------------------------------------------------------|-------------------|---------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/de/modes/index.md b/docs/de/modes/index.md new file mode 100644 index 0000000..c73b488 --- /dev/null +++ b/docs/de/modes/index.md @@ -0,0 +1,74 @@ +--- +comments: true +description: Vom Training bis zum Tracking - Nutzen Sie YOLOv8 von Ultralytics optimal. Erhalten Sie Einblicke und Beispiele für jeden unterstützten Modus, einschließlich Validierung, Export und Benchmarking. +keywords: Ultralytics, YOLOv8, Maschinelles Lernen, Objekterkennung, Training, Validierung, Vorhersage, Export, Tracking, Benchmarking +--- + +# Ultralytics YOLOv8 Modi + +Ultralytics YOLO-Ökosystem und Integrationen + +## Einführung + +Ultralytics YOLOv8 ist nicht nur ein weiteres Objekterkennungsmodell; es ist ein vielseitiges Framework, das den gesamten Lebenszyklus von Machine-Learning-Modellen abdeckt - von der Dateneingabe und dem Modelltraining über die Validierung und Bereitstellung bis hin zum Tracking in der realen Welt. Jeder Modus dient einem bestimmten Zweck und ist darauf ausgelegt, Ihnen die Flexibilität und Effizienz zu bieten, die für verschiedene Aufgaben und Anwendungsfälle erforderlich ist. + +

+
+ +
+ Anschauen: Ultralytics Modi Tutorial: Trainieren, Validieren, Vorhersagen, Exportieren & Benchmarking. +

+ +### Modi im Überblick + +Das Verständnis der verschiedenen **Modi**, die Ultralytics YOLOv8 unterstützt, ist entscheidend, um das Beste aus Ihren Modellen herauszuholen: + +- **Train**-Modus: Verfeinern Sie Ihr Modell mit angepassten oder vorgeladenen Datensätzen. +- **Val**-Modus: Eine Nachtrainingsprüfung zur Validierung der Modellleistung. +- **Predict**-Modus: Entfesseln Sie die Vorhersagekraft Ihres Modells mit realen Daten. +- **Export**-Modus: Machen Sie Ihr Modell in verschiedenen Formaten einsatzbereit. +- **Track**-Modus: Erweitern Sie Ihr Objekterkennungsmodell um Echtzeit-Tracking-Anwendungen. +- **Benchmark**-Modus: Analysieren Sie die Geschwindigkeit und Genauigkeit Ihres Modells in verschiedenen Einsatzumgebungen. + +Dieser umfassende Leitfaden soll Ihnen einen Überblick und praktische Einblicke in jeden Modus geben, um Ihnen zu helfen, das volle Potenzial von YOLOv8 zu nutzen. + +## [Trainieren](train.md) + +Der Trainingsmodus wird verwendet, um ein YOLOv8-Modell mit einem angepassten Datensatz zu trainieren. In diesem Modus wird das Modell mit dem angegebenen Datensatz und den Hyperparametern trainiert. Der Trainingsprozess beinhaltet die Optimierung der Modellparameter, damit es die Klassen und Standorte von Objekten in einem Bild genau vorhersagen kann. + +[Trainingsbeispiele](train.md){ .md-button } + +## [Validieren](val.md) + +Der Validierungsmodus wird genutzt, um ein YOLOv8-Modell nach dem Training zu bewerten. In diesem Modus wird das Modell auf einem Validierungsset getestet, um seine Genauigkeit und Generalisierungsleistung zu messen. Dieser Modus kann verwendet werden, um die Hyperparameter des Modells für eine bessere Leistung zu optimieren. + +[Validierungsbeispiele](val.md){ .md-button } + +## [Vorhersagen](predict.md) + +Der Vorhersagemodus wird verwendet, um mit einem trainierten YOLOv8-Modell Vorhersagen für neue Bilder oder Videos zu treffen. In diesem Modus wird das Modell aus einer Checkpoint-Datei geladen, und der Benutzer kann Bilder oder Videos zur Inferenz bereitstellen. Das Modell sagt die Klassen und Standorte von Objekten in den Eingabebildern oder -videos voraus. + +[Vorhersagebeispiele](predict.md){ .md-button } + +## [Exportieren](export.md) + +Der Exportmodus wird verwendet, um ein YOLOv8-Modell in ein Format zu exportieren, das für die Bereitstellung verwendet werden kann. In diesem Modus wird das Modell in ein Format konvertiert, das von anderen Softwareanwendungen oder Hardwaregeräten verwendet werden kann. Dieser Modus ist nützlich, wenn das Modell in Produktionsumgebungen eingesetzt wird. + +[Exportbeispiele](export.md){ .md-button } + +## [Verfolgen](track.md) + +Der Trackingmodus wird zur Echtzeitverfolgung von Objekten mit einem YOLOv8-Modell verwendet. In diesem Modus wird das Modell aus einer Checkpoint-Datei geladen, und der Benutzer kann einen Live-Videostream für das Echtzeitobjekttracking bereitstellen. Dieser Modus ist nützlich für Anwendungen wie Überwachungssysteme oder selbstfahrende Autos. + +[Trackingbeispiele](track.md){ .md-button } + +## [Benchmarking](benchmark.md) + +Der Benchmark-Modus wird verwendet, um die Geschwindigkeit und Genauigkeit verschiedener Exportformate für YOLOv8 zu profilieren. Die Benchmarks liefern Informationen über die Größe des exportierten Formats, seine `mAP50-95`-Metriken (für Objekterkennung, Segmentierung und Pose) +oder `accuracy_top5`-Metriken (für Klassifizierung) und die Inferenzzeit in Millisekunden pro Bild für verschiedene Exportformate wie ONNX, OpenVINO, TensorRT und andere. Diese Informationen können den Benutzern dabei helfen, das optimale Exportformat für ihren spezifischen Anwendungsfall basierend auf ihren Anforderungen an Geschwindigkeit und Genauigkeit auszuwählen. + +[Benchmarkbeispiele](benchmark.md){ .md-button } diff --git a/docs/de/modes/predict.md b/docs/de/modes/predict.md new file mode 100644 index 0000000..feb64f0 --- /dev/null +++ b/docs/de/modes/predict.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Erkunden Sie, wie der YOLOv8-Prognosemodus für verschiedene Aufgaben verwendet werden kann. Erfahren Sie mehr über verschiedene Inferenzquellen wie Bilder, Videos und Datenformate. +keywords: Ultralytics, YOLOv8, Vorhersagemodus, Inferenzquellen, Vorhersageaufgaben, Streaming-Modus, Bildverarbeitung, Videoverarbeitung, maschinelles Lernen, KI +--- + +# Modellvorhersage mit Ultralytics YOLO + +Ultralytics YOLO Ökosystem und Integrationen + +## Einführung + +Im Bereich des maschinellen Lernens und der Computer Vision wird der Prozess des Verstehens visueller Daten als 'Inferenz' oder 'Vorhersage' bezeichnet. Ultralytics YOLOv8 bietet eine leistungsstarke Funktion, die als **Prognosemodus** bekannt ist und für eine hochleistungsfähige, echtzeitfähige Inferenz auf einer breiten Palette von Datenquellen zugeschnitten ist. + +

+
+ +
+ Anschauen: Wie man die Ausgaben vom Ultralytics YOLOv8 Modell für individuelle Projekte extrahiert. +

+ +## Anwendungen in der realen Welt + +| Herstellung | Sport | Sicherheit | +|:---------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------:| +| ![Ersatzteilerkennung für Fahrzeuge](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1) | ![Erkennung von Fußballspielern](https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442) | ![Erkennung von stürzenden Personen](https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43) | +| Erkennung von Fahrzeugersatzteilen | Erkennung von Fußballspielern | Erkennung von stürzenden Personen | + +## Warum Ultralytics YOLO für Inferenz nutzen? + +Hier sind Gründe, warum Sie den Prognosemodus von YOLOv8 für Ihre verschiedenen Inferenzanforderungen in Betracht ziehen sollten: + +- **Vielseitigkeit:** Fähig, Inferenzen auf Bilder, Videos und sogar Live-Streams zu machen. +- **Leistung:** Entwickelt für Echtzeit-Hochgeschwindigkeitsverarbeitung ohne Genauigkeitsverlust. +- **Einfache Bedienung:** Intuitive Python- und CLI-Schnittstellen für schnelle Einsatzbereitschaft und Tests. +- **Hohe Anpassbarkeit:** Verschiedene Einstellungen und Parameter, um das Verhalten der Modellinferenz entsprechend Ihren spezifischen Anforderungen zu optimieren. + +### Schlüsselfunktionen des Prognosemodus + +Der Prognosemodus von YOLOv8 ist robust und vielseitig konzipiert und verfügt über: + +- **Kompatibilität mit mehreren Datenquellen:** Ganz gleich, ob Ihre Daten in Form von Einzelbildern, einer Bildersammlung, Videodateien oder Echtzeit-Videostreams vorliegen, der Prognosemodus deckt alles ab. +- **Streaming-Modus:** Nutzen Sie die Streaming-Funktion, um einen speichereffizienten Generator von `Results`-Objekten zu erzeugen. Aktivieren Sie dies, indem Sie `stream=True` in der Aufrufmethode des Predictors einstellen. +- **Batchverarbeitung:** Die Möglichkeit, mehrere Bilder oder Videoframes in einem einzigen Batch zu verarbeiten, wodurch die Inferenzzeit weiter verkürzt wird. +- **Integrationsfreundlich:** Dank der flexiblen API leicht in bestehende Datenpipelines und andere Softwarekomponenten zu integrieren. + +Ultralytics YOLO-Modelle geben entweder eine Python-Liste von `Results`-Objekten zurück, oder einen speichereffizienten Python-Generator von `Results`-Objekten, wenn `stream=True` beim Inferenzvorgang an das Modell übergeben wird: + +!!! Example "Predict" + + === "Gibt eine Liste mit `stream=False` zurück" + ```python + from ultralytics import YOLO + + # Ein Modell laden + model = YOLO('yolov8n.pt') # vortrainiertes YOLOv8n Modell + + # Batch-Inferenz auf einer Liste von Bildern ausführen + results = model(['im1.jpg', 'im2.jpg']) # gibt eine Liste von Results-Objekten zurück + + # Ergebnisliste verarbeiten + for result in results: + boxes = result.boxes # Boxes-Objekt für Bbox-Ausgaben + masks = result.masks # Masks-Objekt für Segmentierungsmasken-Ausgaben + keypoints = result.keypoints # Keypoints-Objekt für Pose-Ausgaben + probs = result.probs # Probs-Objekt für Klassifizierungs-Ausgaben + ``` + + === "Gibt einen Generator mit `stream=True` zurück" + ```python + from ultralytics import YOLO + + # Ein Modell laden + model = YOLO('yolov8n.pt') # vortrainiertes YOLOv8n Modell + + # Batch-Inferenz auf einer Liste von Bildern ausführen + results = model(['im1.jpg', 'im2.jpg'], stream=True) # gibt einen Generator von Results-Objekten zurück + + # Generator von Ergebnissen verarbeiten + for result in results: + boxes = result.boxes # Boxes-Objekt für Bbox-Ausgaben + masks = result.masks # Masks-Objekt für Segmentierungsmasken-Ausgaben + keypoints = result.keypoints # Keypoints-Objekt für Pose-Ausgaben + probs = result.probs # Probs-Objekt für Klassifizierungs-Ausgaben + ``` + +## Inferenzquellen + +YOLOv8 kann verschiedene Arten von Eingabequellen für die Inferenz verarbeiten, wie in der folgenden Tabelle gezeigt. Die Quellen umfassen statische Bilder, Videostreams und verschiedene Datenformate. Die Tabelle gibt ebenfalls an, ob jede Quelle im Streaming-Modus mit dem Argument `stream=True` ✅ verwendet werden kann. Der Streaming-Modus ist vorteilhaft für die Verarbeitung von Videos oder Live-Streams, da er einen Generator von Ergebnissen statt das Laden aller Frames in den Speicher erzeugt. + +!!! Tip "Tipp" + + Verwenden Sie `stream=True` für die Verarbeitung langer Videos oder großer Datensätze, um den Speicher effizient zu verwalten. Bei `stream=False` werden die Ergebnisse für alle Frames oder Datenpunkte im Speicher gehalten, was bei großen Eingaben schnell zu Speicherüberläufen führen kann. Im Gegensatz dazu verwendet `stream=True` einen Generator, der nur die Ergebnisse des aktuellen Frames oder Datenpunkts im Speicher behält, was den Speicherverbrauch erheblich reduziert und Speicherüberlaufprobleme verhindert. + +| Quelle | Argument | Typ | Hinweise | +|--------------------|--------------------------------------------|-------------------|------------------------------------------------------------------------------------------------| +| Bild | `'image.jpg'` | `str` oder `Path` | Einzelbilddatei. | +| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | URL zu einem Bild. | +| Bildschirmaufnahme | `'screen'` | `str` | Eine Bildschirmaufnahme erstellen. | +| PIL | `Image.open('im.jpg')` | `PIL.Image` | HWC-Format mit RGB-Kanälen. | +| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | HWC-Format mit BGR-Kanälen `uint8 (0-255)`. | +| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | HWC-Format mit BGR-Kanälen `uint8 (0-255)`. | +| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | BCHW-Format mit RGB-Kanälen `float32 (0.0-1.0)`. | +| CSV | `'sources.csv'` | `str` oder `Path` | CSV-Datei mit Pfaden zu Bildern, Videos oder Verzeichnissen. | +| video ✅ | `'video.mp4'` | `str` oder `Path` | Videodatei in Formaten wie MP4, AVI, usw. | +| Verzeichnis ✅ | `'path/'` | `str` oder `Path` | Pfad zu einem Verzeichnis mit Bildern oder Videos. | +| glob ✅ | `'path/*.jpg'` | `str` | Glob-Muster, um mehrere Dateien zu finden. Verwenden Sie das `*` Zeichen als Platzhalter. | +| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | URL zu einem YouTube-Video. | +| stream ✅ | `'rtsp://example.com/media.mp4'` | `str` | URL für Streaming-Protokolle wie RTSP, RTMP, TCP oder eine IP-Adresse. | +| Multi-Stream ✅ | `'list.streams'` | `str` oder `Path` | `*.streams` Textdatei mit einer Stream-URL pro Zeile, z.B. 8 Streams laufen bei Batch-Größe 8. | + +Untenstehend finden Sie Codebeispiele für die Verwendung jedes Quelltyps: + +!!! Example "Vorhersagequellen" + + === "Bild" + Führen Sie die Inferenz auf einer Bilddatei aus. + ```python + from ultralytics import YOLO + + # Ein vortrainiertes YOLOv8n Modell laden + model = YOLO('yolov8n.pt') + + # Pfad zur Bilddatei definieren + quell = 'Pfad/zum/Bild.jpg' + + # Inferenz auf der Quelle ausführen + ergebnisse = model(quell) # Liste von Results-Objekten + ``` + + === "Bildschirmaufnahme" + Führen Sie die Inferenz auf dem aktuellen Bildschirminhalt als Screenshot aus. + ```python + from ultralytics import YOLO + + # Ein vortrainiertes YOLOv8n Modell laden + model = YOLO('yolov8n.pt') + + # Aktuellen Screenshot als Quelle definieren + quell = 'Bildschirm' + + # Inferenz auf der Quelle ausführen + ergebnisse = model(quell) # Liste von Results-Objekten + ``` + + === "URL" + Führen Sie die Inferenz auf einem Bild oder Video aus, das über eine URL remote gehostet wird. + ```python + from ultralytics import YOLO + + # Ein vortrainiertes YOLOv8n Modell laden + model = YOLO('yolov8n.pt') + + # Remote-Bild- oder Video-URL definieren + quell = 'https://ultralytics.com/images/bus.jpg' + + # Inferenz auf der Quelle ausführen + ergebnisse = model(quell) # Liste von Results-Objekten + ``` + + === "PIL" + Führen Sie die Inferenz auf einem Bild aus, das mit der Python Imaging Library (PIL) geöffnet wurde. + ```python + from PIL import Image + from ultralytics import YOLO + + # Ein vortrainiertes YOLOv8n Modell laden + model = YOLO('yolov8n.pt') + + # Ein Bild mit PIL öffnen + quell = Image.open('Pfad/zum/Bild.jpg') + + # Inferenz auf der Quelle ausführen + ergebnisse = model(quell) # Liste von Results-Objekten + ``` + + === "OpenCV" + Führen Sie die Inferenz auf einem Bild aus, das mit OpenCV gelesen wurde. + ```python + import cv2 + from ultralytics import YOLO + + # Ein vortrainiertes YOLOv8n Modell laden + model = YOLO('yolov8n.pt') + + # Ein Bild mit OpenCV lesen + quell = cv2.imread('Pfad/zum/Bild.jpg') + + # Inferenz auf der Quelle ausführen + ergebnisse = model(quell) # Liste von Results-Objekten + ``` + + === "numpy" + Führen Sie die Inferenz auf einem Bild aus, das als numpy-Array dargestellt wird. + ```python + import numpy as np + from ultralytics import YOLO + + # Ein vortrainiertes YOLOv8n Modell laden + model = YOLO('yolov8n.pt') + + # Ein zufälliges numpy-Array der HWC-Form (640, 640, 3) mit Werten im Bereich [0, 255] und Typ uint8 erstellen + quell = np.random.randint(low=0, high=255, size=(640, 640, 3), dtype='uint8') + + # Inferenz auf der Quelle ausführen + ergebnisse = model(quell) # Liste von Results-Objekten + ``` + + === "torch" + Führen Sie die Inferenz auf einem Bild aus, das als PyTorch-Tensor dargestellt wird. + ```python + import torch + from ultralytics import YOLO + + # Ein vortrainiertes YOLOv8n Modell laden + model = YOLO('yolov8n.pt') + + # Ein zufälliger torch-Tensor der BCHW-Form (1, 3, 640, 640) mit Werten im Bereich [0, 1] und Typ float32 erstellen + quell = torch.rand(1, 3, 640, 640, dtype=torch.float32) + + # Inferenz auf der Quelle ausführen + ergebnisse = model(quell) # Liste von Results-Objekten diff --git a/docs/de/modes/track.md b/docs/de/modes/track.md new file mode 100644 index 0000000..f29dae9 --- /dev/null +++ b/docs/de/modes/track.md @@ -0,0 +1,200 @@ +--- +comments: true +description: Erfahren Sie, wie Sie Ultralytics YOLO für Objektverfolgung in Videostreams verwenden. Anleitungen zum Einsatz verschiedener Tracker und zur Anpassung von Tracker-Konfigurationen. +keywords: Ultralytics, YOLO, Objektverfolgung, Videostreams, BoT-SORT, ByteTrack, Python-Anleitung, CLI-Anleitung +--- + +# Multi-Objektverfolgung mit Ultralytics YOLO + +Beispiele für Multi-Objektverfolgung + +Objektverfolgung im Bereich der Videoanalytik ist eine essentielle Aufgabe, die nicht nur den Standort und die Klasse von Objekten innerhalb des Frames identifiziert, sondern auch eine eindeutige ID für jedes erkannte Objekt, während das Video fortschreitet, erhält. Die Anwendungsmöglichkeiten sind grenzenlos – von Überwachung und Sicherheit bis hin zur Echtzeitsportanalytik. + +## Warum Ultralytics YOLO für Objektverfolgung wählen? + +Die Ausgabe von Ultralytics Trackern ist konsistent mit der standardmäßigen Objekterkennung, bietet aber zusätzlich Objekt-IDs. Dies erleichtert das Verfolgen von Objekten in Videostreams und das Durchführen nachfolgender Analysen. Hier sind einige Gründe, warum Sie Ultralytics YOLO für Ihre Objektverfolgungsaufgaben in Betracht ziehen sollten: + +- **Effizienz:** Verarbeitung von Videostreams in Echtzeit ohne Einbußen bei der Genauigkeit. +- **Flexibilität:** Unterstützt mehrere Tracking-Algorithmen und -Konfigurationen. +- **Benutzerfreundlichkeit:** Einfache Python-API und CLI-Optionen für schnelle Integration und Bereitstellung. +- **Anpassbarkeit:** Einfache Verwendung mit individuell trainierten YOLO-Modellen, ermöglicht Integration in branchenspezifische Anwendungen. + +

+
+ +
+ Ansehen: Objekterkennung und -verfolgung mit Ultralytics YOLOv8. +

+ +## Anwendungen in der realen Welt + +| Transportwesen | Einzelhandel | Aquakultur | +|:------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------:| +| ![Fahrzeugverfolgung](https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab) | ![Personenverfolgung](https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527) | ![Fischverfolgung](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142) | +| Fahrzeugverfolgung | Personenverfolgung | Fischverfolgung | + +## Eigenschaften auf einen Blick + +Ultralytics YOLO erweitert seine Objekterkennungsfunktionen, um eine robuste und vielseitige Objektverfolgung bereitzustellen: + +- **Echtzeitverfolgung:** Nahtloses Verfolgen von Objekten in Videos mit hoher Bildfrequenz. +- **Unterstützung mehrerer Tracker:** Auswahl aus einer Vielzahl etablierter Tracking-Algorithmen. +- **Anpassbare Tracker-Konfigurationen:** Anpassen des Tracking-Algorithmus an spezifische Anforderungen durch Einstellung verschiedener Parameter. + +## Verfügbare Tracker + +Ultralytics YOLO unterstützt die folgenden Tracking-Algorithmen. Sie können aktiviert werden, indem Sie die entsprechende YAML-Konfigurationsdatei wie `tracker=tracker_type.yaml` übergeben: + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - Verwenden Sie `botsort.yaml`, um diesen Tracker zu aktivieren. +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - Verwenden Sie `bytetrack.yaml`, um diesen Tracker zu aktivieren. + +Der Standardtracker ist BoT-SORT. + +## Verfolgung + +Um den Tracker auf Videostreams auszuführen, verwenden Sie ein trainiertes Erkennungs-, Segmentierungs- oder Posierungsmodell wie YOLOv8n, YOLOv8n-seg und YOLOv8n-pose. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Laden Sie ein offizielles oder individuelles Modell + model = YOLO('yolov8n.pt') # Laden Sie ein offizielles Erkennungsmodell + model = YOLO('yolov8n-seg.pt') # Laden Sie ein offizielles Segmentierungsmodell + model = YOLO('yolov8n-pose.pt') # Laden Sie ein offizielles Posierungsmodell + model = YOLO('path/to/best.pt') # Laden Sie ein individuell trainiertes Modell + + # Führen Sie die Verfolgung mit dem Modell durch + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # Verfolgung mit Standardtracker + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # Verfolgung mit ByteTrack-Tracker + ``` + + === "CLI" + + ```bash + # Führen Sie die Verfolgung mit verschiedenen Modellen über die Befehlszeilenschnittstelle durch + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # Offizielles Erkennungsmodell + yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Offizielles Segmentierungsmodell + yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Offizielles Posierungsmodell + yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # Individuell trainiertes Modell + + # Verfolgung mit ByteTrack-Tracker + yolo track model=path/to/best.pt tracker="bytetrack.yaml" + ``` + +Wie in der obigen Nutzung zu sehen ist, ist die Verfolgung für alle Detect-, Segment- und Pose-Modelle verfügbar, die auf Videos oder Streaming-Quellen ausgeführt werden. + +## Konfiguration + +### Tracking-Argumente + +Die Tracking-Konfiguration teilt Eigenschaften mit dem Predict-Modus, wie `conf`, `iou` und `show`. Für weitere Konfigurationen siehe die Seite des [Predict](https://docs.ultralytics.com/modes/predict/)-Modells. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Konfigurieren Sie die Tracking-Parameter und führen Sie den Tracker aus + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```bash + # Konfigurieren Sie die Tracking-Parameter und führen Sie den Tracker über die Befehlszeilenschnittstelle aus + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### Tracker-Auswahl + +Ultralytics ermöglicht es Ihnen auch, eine modifizierte Tracker-Konfigurationsdatei zu verwenden. Hierfür kopieren Sie einfach eine Tracker-Konfigurationsdatei (zum Beispiel `custom_tracker.yaml`) von [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) und ändern jede Konfiguration (außer dem `tracker_type`), wie es Ihren Bedürfnissen entspricht. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Laden Sie das Modell und führen Sie den Tracker mit einer individuellen Konfigurationsdatei aus + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```bash + # Laden Sie das Modell und führen Sie den Tracker mit einer individuellen Konfigurationsdatei über die Befehlszeilenschnittstelle aus + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +Für eine umfassende Liste der Tracking-Argumente siehe die Seite [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers). + +## Python-Beispiele + +### Persistierende Tracks-Schleife + +Hier ist ein Python-Skript, das OpenCV (`cv2`) und YOLOv8 verwendet, um Objektverfolgung in Videoframes durchzuführen. Dieses Skript setzt voraus, dass Sie die notwendigen Pakete (`opencv-python` und `ultralytics`) bereits installiert haben. Das Argument `persist=True` teilt dem Tracker mit, dass das aktuelle Bild oder Frame das nächste in einer Sequenz ist und Tracks aus dem vorherigen Bild im aktuellen Bild erwartet werden. + +!!! Example "Streaming-For-Schleife mit Tracking" + + ```python + import cv2 + from ultralytics import YOLO + + # Laden Sie das YOLOv8-Modell + model = YOLO('yolov8n.pt') + + # Öffnen Sie die Videodatei + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # Schleife durch die Videoframes + while cap.isOpened(): + # Einen Frame aus dem Video lesen + success, frame = cap.read() + + if success: + # Führen Sie YOLOv8-Tracking im Frame aus, wobei Tracks zwischen Frames beibehalten werden + results = model.track(frame, persist=True) + + # Visualisieren Sie die Ergebnisse im Frame + annotated_frame = results[0].plot() + + # Zeigen Sie den kommentierten Frame an + cv2.imshow("YOLOv8-Tracking", annotated_frame) + + # Beenden Sie die Schleife, wenn 'q' gedrückt wird + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # Beenden Sie die Schleife, wenn das Ende des Videos erreicht ist + break + + # Geben Sie das Videoaufnahmeobjekt frei und schließen Sie das Anzeigefenster + cap.release() + cv2.destroyAllWindows() + ``` + +Bitte beachten Sie die Änderung von `model(frame)` zu `model.track(frame)`, welche die Objektverfolgung anstelle der einfachen Erkennung aktiviert. Dieses modifizierte Skript führt den Tracker auf jedem Frame des Videos aus, visualisiert die Ergebnisse und zeigt sie in einem Fenster an. Die Schleife kann durch Drücken von 'q' beendet werden. + +## Neue Tracker beisteuern + +Sind Sie versiert in der Multi-Objektverfolgung und haben erfolgreich einen Tracking-Algorithmus mit Ultralytics YOLO implementiert oder angepasst? Wir laden Sie ein, zu unserem Trackers-Bereich in [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) beizutragen! Ihre realen Anwendungen und Lösungen könnten für Benutzer, die an Tracking-Aufgaben arbeiten, von unschätzbarem Wert sein. + +Indem Sie zu diesem Bereich beitragen, helfen Sie, das Spektrum verfügbarer Tracking-Lösungen innerhalb des Ultralytics YOLO-Frameworks zu erweitern und fügen eine weitere Funktionsschicht für die Gemeinschaft hinzu. + +Um Ihren Beitrag einzuleiten, sehen Sie bitte in unserem [Contributing Guide](https://docs.ultralytics.com/help/contributing) für umfassende Anweisungen zur Einreichung eines Pull Requests (PR) 🛠️. Wir sind gespannt darauf, was Sie beitragen! + +Gemeinsam verbessern wir die Tracking-Fähigkeiten des Ultralytics YOLO-Ökosystems 🙏! diff --git a/docs/de/modes/train.md b/docs/de/modes/train.md new file mode 100644 index 0000000..b7ee65e --- /dev/null +++ b/docs/de/modes/train.md @@ -0,0 +1,206 @@ +--- +comments: true +description: Schritt-für-Schritt-Leitfaden zum Trainieren von YOLOv8-Modellen mit Ultralytics YOLO, einschließlich Beispielen für Single-GPU- und Multi-GPU-Training +keywords: Ultralytics, YOLOv8, YOLO, Objekterkennung, Trainingsmodus, benutzerdefinierter Datensatz, GPU-Training, Multi-GPU, Hyperparameter, CLI-Beispiele, Python-Beispiele +--- + +# Modelltraining mit Ultralytics YOLO + +Ultralytics YOLO Ökosystem und Integrationen + +## Einleitung + +Das Training eines Deep-Learning-Modells beinhaltet das Einspeisen von Daten und die Anpassung seiner Parameter, so dass es genaue Vorhersagen treffen kann. Der Trainingsmodus in Ultralytics YOLOv8 ist für das effektive und effiziente Training von Objekterkennungsmodellen konzipiert und nutzt dabei die Fähigkeiten moderner Hardware voll aus. Dieser Leitfaden zielt darauf ab, alle Details zu vermitteln, die Sie benötigen, um mit dem Training Ihrer eigenen Modelle unter Verwendung des robusten Funktionssatzes von YOLOv8 zu beginnen. + +

+
+ +
+ Video anschauen: Wie man ein YOLOv8-Modell auf Ihrem benutzerdefinierten Datensatz in Google Colab trainiert. +

+ +## Warum Ultralytics YOLO für das Training wählen? + +Hier einige überzeugende Gründe, sich für den Trainingsmodus von YOLOv8 zu entscheiden: + +- **Effizienz:** Machen Sie das Beste aus Ihrer Hardware, egal ob Sie auf einem Single-GPU-Setup sind oder über mehrere GPUs skalieren. +- **Vielseitigkeit:** Training auf benutzerdefinierten Datensätzen zusätzlich zu den bereits verfügbaren Datensätzen wie COCO, VOC und ImageNet. +- **Benutzerfreundlich:** Einfache, aber leistungsstarke CLI- und Python-Schnittstellen für ein unkompliziertes Trainingserlebnis. +- **Flexibilität der Hyperparameter:** Eine breite Palette von anpassbaren Hyperparametern, um die Modellleistung zu optimieren. + +### Schlüsselfunktionen des Trainingsmodus + +Die folgenden sind einige bemerkenswerte Funktionen von YOLOv8s Trainingsmodus: + +- **Automatischer Datensatz-Download:** Standarddatensätze wie COCO, VOC und ImageNet werden bei der ersten Verwendung automatisch heruntergeladen. +- **Multi-GPU-Unterstützung:** Skalieren Sie Ihr Training nahtlos über mehrere GPUs, um den Prozess zu beschleunigen. +- **Konfiguration der Hyperparameter:** Die Möglichkeit zur Modifikation der Hyperparameter über YAML-Konfigurationsdateien oder CLI-Argumente. +- **Visualisierung und Überwachung:** Echtzeit-Tracking von Trainingsmetriken und Visualisierung des Lernprozesses für bessere Einsichten. + +!!! Tip "Tipp" + + * YOLOv8-Datensätze wie COCO, VOC, ImageNet und viele andere werden automatisch bei der ersten Verwendung heruntergeladen, d.h. `yolo train data=coco.yaml` + +## Nutzungsbeispiele + +Trainieren Sie YOLOv8n auf dem COCO128-Datensatz für 100 Epochen bei einer Bildgröße von 640. Das Trainingsgerät kann mit dem Argument `device` spezifiziert werden. Wenn kein Argument übergeben wird, wird GPU `device=0` verwendet, wenn verfügbar, sonst wird `device=cpu` verwendet. Siehe den Abschnitt Argumente unten für eine vollständige Liste der Trainingsargumente. + +!!! Example "Beispiel für Single-GPU- und CPU-Training" + + Das Gerät wird automatisch ermittelt. Wenn eine GPU verfügbar ist, dann wird diese verwendet, sonst beginnt das Training auf der CPU. + + === "Python" + + ```python + from ultralytics import YOLO + + # Laden Sie ein Modell + model = YOLO('yolov8n.yaml') # bauen Sie ein neues Modell aus YAML + model = YOLO('yolov8n.pt') # laden Sie ein vortrainiertes Modell (empfohlen für das Training) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # bauen Sie aus YAML und übertragen Sie Gewichte + + # Trainieren Sie das Modell + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Bauen Sie ein neues Modell aus YAML und beginnen Sie das Training von Grund auf + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Beginnen Sie das Training von einem vortrainierten *.pt Modell + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Bauen Sie ein neues Modell aus YAML, übertragen Sie vortrainierte Gewichte darauf und beginnen Sie das Training + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Multi-GPU-Training + +Multi-GPU-Training ermöglicht eine effizientere Nutzung von verfügbaren Hardware-Ressourcen, indem die Trainingslast über mehrere GPUs verteilt wird. Diese Funktion ist über sowohl die Python-API als auch die Befehlszeilenschnittstelle verfügbar. Um das Multi-GPU-Training zu aktivieren, geben Sie die GPU-Geräte-IDs an, die Sie verwenden möchten. + +!!! Example "Beispiel für Multi-GPU-Training" + + Um mit 2 GPUs zu trainieren, verwenden Sie die folgenden Befehle für CUDA-Geräte 0 und 1. Erweitern Sie dies bei Bedarf auf zusätzliche GPUs. + + === "Python" + + ```python + from ultralytics import YOLO + + # Laden Sie ein Modell + model = YOLO('yolov8n.pt') # laden Sie ein vortrainiertes Modell (empfohlen für das Training) + + # Trainieren Sie das Modell mit 2 GPUs + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "CLI" + + ```bash + # Beginnen Sie das Training von einem vortrainierten *.pt Modell unter Verwendung der GPUs 0 und 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### Apple M1- und M2-MPS-Training + +Mit der Unterstützung für Apple M1- und M2-Chips, die in den Ultralytics YOLO-Modellen integriert ist, ist es jetzt möglich, Ihre Modelle auf Geräten zu trainieren, die das leistungsstarke Metal Performance Shaders (MPS)-Framework nutzen. MPS bietet eine leistungsstarke Methode zur Ausführung von Berechnungs- und Bildverarbeitungsaufgaben auf Apples benutzerdefinierten Siliziumchips. + +Um das Training auf Apple M1- und M2-Chips zu ermöglichen, sollten Sie 'mps' als Ihr Gerät angeben, wenn Sie den Trainingsprozess starten. Unten ist ein Beispiel, wie Sie dies in Python und über die Befehlszeile tun könnten: + +!!! Example "MPS-Training Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Laden Sie ein Modell + model = YOLO('yolov8n.pt') # laden Sie ein vortrainiertes Modell (empfohlen für das Training) + + # Trainieren Sie das Modell mit 2 GPUs + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "CLI" + + ```bash + # Beginnen Sie das Training von einem vortrainierten *.pt Modell unter Verwendung der GPUs 0 und 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +Indem sie die Rechenleistung der M1/M2-Chips nutzen, ermöglicht dies eine effizientere Verarbeitung der Trainingsaufgaben. Für detailliertere Anleitungen und fortgeschrittene Konfigurationsoptionen beziehen Sie sich bitte auf die [PyTorch MPS-Dokumentation](https://pytorch.org/docs/stable/notes/mps.html). + +## Protokollierung + +Beim Training eines YOLOv8-Modells kann es wertvoll sein, die Leistung des Modells im Laufe der Zeit zu verfolgen. Hier kommt die Protokollierung ins Spiel. Ultralytics' YOLO unterstützt drei Typen von Loggern - Comet, ClearML und TensorBoard. + +Um einen Logger zu verwenden, wählen Sie ihn aus dem Dropdown-Menü im obigen Codeausschnitt aus und führen ihn aus. Der ausgewählte Logger wird installiert und initialisiert. + +### Comet + +[Comet](https://www.comet.ml/site/) ist eine Plattform, die Datenwissenschaftlern und Entwicklern erlaubt, Experimente und Modelle zu verfolgen, zu vergleichen, zu erklären und zu optimieren. Es bietet Funktionen wie Echtzeitmetriken, Code-Diffs und das Verfolgen von Hyperparametern. + +Um Comet zu verwenden: + +!!! Example "Beispiel" + + === "Python" + ```python + # pip installieren comet_ml + import comet_ml + + comet_ml.init() + ``` + +Vergessen Sie nicht, sich auf der Comet-Website anzumelden und Ihren API-Schlüssel zu erhalten. Sie müssen diesen zu Ihren Umgebungsvariablen oder Ihrem Skript hinzufügen, um Ihre Experimente zu protokollieren. + +### ClearML + +[ClearML](https://www.clear.ml/) ist eine Open-Source-Plattform, die das Verfolgen von Experimenten automatisiert und hilft, Ressourcen effizient zu teilen. Sie ist darauf ausgelegt, Teams bei der Verwaltung, Ausführung und Reproduktion ihrer ML-Arbeiten effizienter zu unterstützen. + +Um ClearML zu verwenden: + +!!! Example "Beispiel" + + === "Python" + ```python + # pip installieren clearml + import clearml + + clearml.browser_login() + ``` + +Nach dem Ausführen dieses Skripts müssen Sie sich auf dem Browser bei Ihrem ClearML-Konto anmelden und Ihre Sitzung authentifizieren. + +### TensorBoard + +[TensorBoard](https://www.tensorflow.org/tensorboard) ist ein Visualisierungstoolset für TensorFlow. Es ermöglicht Ihnen, Ihren TensorFlow-Graphen zu visualisieren, quantitative Metriken über die Ausführung Ihres Graphen zu plotten und zusätzliche Daten wie Bilder zu zeigen, die durch ihn hindurchgehen. + +Um TensorBoard in [Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb) zu verwenden: + +!!! Example "Beispiel" + + === "CLI" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # ersetzen Sie mit Ihrem 'runs' Verzeichnis + ``` + +Um TensorBoard lokal auszuführen, führen Sie den folgenden Befehl aus und betrachten Sie die Ergebnisse unter http://localhost:6006/. + +!!! Example "Beispiel" + + === "CLI" + ```bash + tensorboard --logdir ultralytics/runs # ersetzen Sie mit Ihrem 'runs' Verzeichnis + ``` + +Dies lädt TensorBoard und weist es an, das Verzeichnis zu verwenden, in dem Ihre Trainingsprotokolle gespeichert sind. + +Nachdem Sie Ihren Logger eingerichtet haben, können Sie mit Ihrem Modelltraining fortfahren. Alle Trainingsmetriken werden automatisch in Ihrer gewählten Plattform protokolliert, und Sie können auf diese Protokolle zugreifen, um die Leistung Ihres Modells im Laufe der Zeit zu überwachen, verschiedene Modelle zu vergleichen und Bereiche für Verbesserungen zu identifizieren. diff --git a/docs/de/modes/val.md b/docs/de/modes/val.md new file mode 100644 index 0000000..3f0c188 --- /dev/null +++ b/docs/de/modes/val.md @@ -0,0 +1,86 @@ +--- +comments: true +description: Anleitung zur Validierung von YOLOv8-Modellen. Erfahren Sie, wie Sie die Leistung Ihrer YOLO-Modelle mit Validierungseinstellungen und Metriken in Python und CLI-Beispielen bewerten können. +keywords: Ultralytics, YOLO-Dokumente, YOLOv8, Validierung, Modellbewertung, Hyperparameter, Genauigkeit, Metriken, Python, CLI +--- + +# Modellvalidierung mit Ultralytics YOLO + +Ultralytics YOLO-Ökosystem und Integrationen + +## Einführung + +Die Validierung ist ein kritischer Schritt im Machine-Learning-Prozess, der es Ihnen ermöglicht, die Qualität Ihrer trainierten Modelle zu bewerten. Der Val-Modus in Ultralytics YOLOv8 bietet eine robuste Suite von Tools und Metriken zur Bewertung der Leistung Ihrer Objekterkennungsmodelle. Dieser Leitfaden dient als umfassende Ressource, um zu verstehen, wie Sie den Val-Modus effektiv nutzen können, um sicherzustellen, dass Ihre Modelle sowohl genau als auch zuverlässig sind. + +## Warum mit Ultralytics YOLO validieren? + +Hier sind die Vorteile der Verwendung des Val-Modus von YOLOv8: + +- **Präzision:** Erhalten Sie genaue Metriken wie mAP50, mAP75 und mAP50-95, um Ihr Modell umfassend zu bewerten. +- **Bequemlichkeit:** Nutzen Sie integrierte Funktionen, die Trainingseinstellungen speichern und so den Validierungsprozess vereinfachen. +- **Flexibilität:** Validieren Sie Ihr Modell mit den gleichen oder verschiedenen Datensätzen und Bildgrößen. +- **Hyperparameter-Tuning:** Verwenden Sie Validierungsmetriken, um Ihr Modell für eine bessere Leistung zu optimieren. + +### Schlüsselfunktionen des Val-Modus + +Dies sind die bemerkenswerten Funktionen, die der Val-Modus von YOLOv8 bietet: + +- **Automatisierte Einstellungen:** Modelle erinnern sich an ihre Trainingskonfigurationen für eine unkomplizierte Validierung. +- **Unterstützung mehrerer Metriken:** Bewerten Sie Ihr Modell anhand einer Reihe von Genauigkeitsmetriken. +- **CLI- und Python-API:** Wählen Sie zwischen Befehlszeilenschnittstelle oder Python-API basierend auf Ihrer Präferenz für die Validierung. +- **Datenkompatibilität:** Funktioniert nahtlos mit Datensätzen, die während der Trainingsphase sowie mit benutzerdefinierten Datensätzen verwendet wurden. + +!!! Tip "Tipp" + + * YOLOv8-Modelle speichern automatisch ihre Trainingseinstellungen, sodass Sie ein Modell mit der gleichen Bildgröße und dem ursprünglichen Datensatz leicht validieren können, indem Sie einfach `yolo val model=yolov8n.pt` oder `model('yolov8n.pt').val()` ausführen + +## Beispielverwendung + +Validieren Sie die Genauigkeit des trainierten YOLOv8n-Modells auf dem COCO128-Datensatz. Es muss kein Argument übergeben werden, da das `model` seine Trainings-`data` und Argumente als Modellattribute speichert. Siehe Abschnitt „Argumente“ unten für eine vollständige Liste der Exportargumente. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n.pt') # ein offizielles Modell laden + model = YOLO('path/to/best.pt') # ein benutzerdefiniertes Modell laden + + # Modell validieren + metrics = model.val() # keine Argumente benötigt, Datensatz und Einstellungen gespeichert + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # eine Liste enthält map50-95 jeder Kategorie + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # offizielles Modell validieren + yolo detect val model=path/to/best.pt # benutzerdefiniertes Modell validieren + ``` + +## Argumente + +Validierungseinstellungen für YOLO-Modelle beziehen sich auf verschiedene Hyperparameter und Konfigurationen, die verwendet werden, um die Leistung des Modells an einem Validierungsdatensatz zu bewerten. Diese Einstellungen können die Leistung, Geschwindigkeit und Genauigkeit des Modells beeinflussen. Einige gängige YOLO-Validierungseinstellungen umfassen die Batch-Größe, die Häufigkeit der Validierung während des Trainings und die Metriken zur Bewertung der Modellleistung. Andere Faktoren, die den Validierungsprozess beeinflussen können, sind die Größe und Zusammensetzung des Validierungsdatensatzes und die spezifische Aufgabe, für die das Modell verwendet wird. Es ist wichtig, diese Einstellungen sorgfältig abzustimmen und zu experimentieren, um sicherzustellen, dass das Modell auf dem Validierungsdatensatz gut funktioniert sowie Überanpassung zu erkennen und zu verhindern. + +| Key | Value | Beschreibung | +|---------------|---------|---------------------------------------------------------------------------------| +| `data` | `None` | Pfad zur Datendatei, z.B. coco128.yaml | +| `imgsz` | `640` | Größe der Eingabebilder als ganzzahlige Zahl | +| `batch` | `16` | Anzahl der Bilder pro Batch (-1 für AutoBatch) | +| `save_json` | `False` | Ergebnisse in JSON-Datei speichern | +| `save_hybrid` | `False` | hybride Version der Labels speichern (Labels + zusätzliche Vorhersagen) | +| `conf` | `0.001` | Objekterkennungsschwelle für Zuversichtlichkeit | +| `iou` | `0.6` | Schwellenwert für IoU (Intersection over Union) für NMS | +| `max_det` | `300` | maximale Anzahl an Vorhersagen pro Bild | +| `half` | `True` | Halbpräzision verwenden (FP16) | +| `device` | `None` | Gerät zur Ausführung, z.B. CUDA device=0/1/2/3 oder device=cpu | +| `dnn` | `False` | OpenCV DNN für ONNX-Inf erenz nutzen | +| `plots` | `False` | Diagramme während des Trainings anzeigen | +| `rect` | `False` | rechteckige Validierung mit jeder Batch-Charge für minimale Polsterung | +| `split` | `val` | Zu verwendende Daten-Teilmenge für Validierung, z.B. 'val', 'test' oder 'train' | +| diff --git a/docs/de/quickstart.md b/docs/de/quickstart.md new file mode 100644 index 0000000..1bfbaf7 --- /dev/null +++ b/docs/de/quickstart.md @@ -0,0 +1,198 @@ +--- +comments: true +description: Entdecken Sie verschiedene Methoden zur Installation von Ultralytics mit Pip, Conda, Git und Docker. Erfahren Sie, wie Sie Ultralytics über die Befehlszeilenschnittstelle oder innerhalb Ihrer Python-Projekte verwenden können. +keywords: Ultralytics-Installation, pip installieren Ultralytics, Docker installieren Ultralytics, Ultralytics-Befehlszeilenschnittstelle, Ultralytics Python-Schnittstelle +--- + +## Ultralytics installieren + +Ultralytics bietet verschiedene Installationsmethoden, darunter Pip, Conda und Docker. Installiere YOLOv8 über das `ultralytics` Pip-Paket für die neueste stabile Veröffentlichung oder indem du das [Ultralytics GitHub-Repository](https://github.com/ultralytics/ultralytics) klonst für die aktuellste Version. Docker kann verwendet werden, um das Paket in einem isolierten Container auszuführen, ohne eine lokale Installation vornehmen zu müssen. + +!!! Example "Installieren" + + === "Pip-Installation (empfohlen)" + Installieren Sie das `ultralytics` Paket mit Pip oder aktualisieren Sie eine bestehende Installation, indem Sie `pip install -U ultralytics` ausführen. Besuchen Sie den Python Package Index (PyPI) für weitere Details zum `ultralytics` Paket: [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/). + + [![PyPI-Version](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # Installiere das ultralytics Paket von PyPI + pip install ultralytics + ``` + + Sie können auch das `ultralytics` Paket direkt vom GitHub [Repository](https://github.com/ultralytics/ultralytics) installieren. Dies könnte nützlich sein, wenn Sie die neueste Entwicklerversion möchten. Stellen Sie sicher, dass das Git-Kommandozeilen-Tool auf Ihrem System installiert ist. Der Befehl `@main` installiert den `main` Branch und kann zu einem anderen Branch geändert werden, z. B. `@my-branch`, oder ganz entfernt werden, um auf den `main` Branch standardmäßig zurückzugreifen. + + ```bash + # Installiere das ultralytics Paket von GitHub + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + + === "Conda-Installation" + Conda ist ein alternativer Paketmanager zu Pip, der ebenfalls für die Installation verwendet werden kann. Besuche Anaconda für weitere Details unter [https://anaconda.org/conda-forge/ultralytics](https://anaconda.org/conda-forge/ultralytics). Ultralytics Feedstock Repository für die Aktualisierung des Conda-Pakets befindet sich unter [https://github.com/conda-forge/ultralytics-feedstock/](https://github.com/conda-forge/ultralytics-feedstock/). + + + [![Conda Rezept](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Plattformen](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # Installiere das ultralytics Paket mit Conda + conda install -c conda-forge ultralytics + ``` + + !!! Note "Hinweis" + + Wenn Sie in einer CUDA-Umgebung installieren, ist es am besten, `ultralytics`, `pytorch` und `pytorch-cuda` im selben Befehl zu installieren, um dem Conda-Paketmanager zu ermöglichen, Konflikte zu lösen, oder `pytorch-cuda` als letztes zu installieren, damit es das CPU-spezifische `pytorch` Paket bei Bedarf überschreiben kann. + ```bash + # Installiere alle Pakete zusammen mit Conda + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### Conda Docker-Image + + Ultralytics Conda Docker-Images sind ebenfalls von [DockerHub](https://hub.docker.com/r/ultralytics/ultralytics) verfügbar. Diese Bilder basieren auf [Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/) und bieten eine einfache Möglichkeit, `ultralytics` in einer Conda-Umgebung zu nutzen. + + ```bash + # Setze Image-Name als Variable + t=ultralytics/ultralytics:latest-conda + + # Ziehe das neuste ultralytics Image von Docker Hub + sudo docker pull $t + + # Führe das ultralytics Image in einem Container mit GPU-Unterstützung aus + sudo docker run -it --ipc=host --gpus all $t # alle GPUs + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # spezifische GPUs angeben + ``` + + === "Git klonen" + Klonen Sie das `ultralytics` Repository, wenn Sie einen Beitrag zur Entwicklung leisten möchten oder mit dem neuesten Quellcode experimentieren wollen. Nach dem Klonen navigieren Sie in das Verzeichnis und installieren das Paket im editierbaren Modus `-e` mit Pip. + ```bash + # Klonen Sie das ultralytics Repository + git clone https://github.com/ultralytics/ultralytics + + # Navigiere zum geklonten Verzeichnis + cd ultralytics + + # Installiere das Paket im editierbaren Modus für die Entwicklung + pip install -e . + ``` + +Siehe die `ultralytics` [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) Datei für eine Liste der Abhängigkeiten. Beachten Sie, dass alle oben genannten Beispiele alle erforderlichen Abhängigkeiten installieren. + +

+
+ +
+ Watch: Ultralytics YOLO Quick Start Guide +

+ +!!! Tip "Tipp" + + PyTorch-Anforderungen variieren je nach Betriebssystem und CUDA-Anforderungen, daher wird empfohlen, PyTorch zuerst gemäß den Anweisungen unter [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally) zu installieren. + + + PyTorch Installationsanweisungen + + +## Ultralytics mit CLI verwenden + +Die Befehlszeilenschnittstelle (CLI) von Ultralytics ermöglicht einfache Einzeilige Befehle ohne die Notwendigkeit einer Python-Umgebung. CLI erfordert keine Anpassung oder Python-Code. Sie können alle Aufgaben einfach vom Terminal aus mit dem `yolo` Befehl ausführen. Schauen Sie sich den [CLI-Leitfaden](/../usage/cli.md) an, um mehr über die Verwendung von YOLOv8 über die Befehlszeile zu erfahren. + +!!! Example "Beispiel" + + === "Syntax" + + Ultralytics `yolo` Befehle verwenden die folgende Syntax: + ```bash + yolo TASK MODE ARGS + + Wo TASK (optional) einer von [detect, segment, classify] ist + MODE (erforderlich) einer von [train, val, predict, export, track] ist + ARGS (optional) eine beliebige Anzahl von benutzerdefinierten 'arg=value' Paaren wie 'imgsz=320', die Vorgaben überschreiben. + ``` + Sehen Sie alle ARGS im vollständigen [Konfigurationsleitfaden](/../usage/cfg.md) oder mit `yolo cfg` + + === "Trainieren" + + Trainieren Sie ein Erkennungsmodell für 10 Epochen mit einer Anfangslernerate von 0.01 + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "Vorhersagen" + + Vorhersagen eines YouTube-Videos mit einem vortrainierten Segmentierungsmodell bei einer Bildgröße von 320: + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "Val" + + Val ein vortrainiertes Erkennungsmodell bei Batch-Größe 1 und Bildgröße 640: + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "Exportieren" + + Exportieren Sie ein YOLOv8n-Klassifikationsmodell im ONNX-Format bei einer Bildgröße von 224 mal 128 (kein TASK erforderlich) + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "Speziell" + + Führen Sie spezielle Befehle aus, um Version, Einstellungen zu sehen, Checks auszuführen und mehr: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "Warnung" + + Argumente müssen als `arg=val` Paare übergeben werden, getrennt durch ein Gleichheitszeichen `=` und durch Leerzeichen ` ` zwischen den Paaren. Verwenden Sie keine `--` Argumentpräfixe oder Kommata `,` zwischen den Argumenten. + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25`   ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25`   ❌ + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25`   ❌ + +[CLI-Leitfaden](/../usage/cli.md){ .md-button } + +## Ultralytics mit Python verwenden + +Die Python-Schnittstelle von YOLOv8 ermöglicht eine nahtlose Integration in Ihre Python-Projekte und erleichtert das Laden, Ausführen und Verarbeiten der Modellausgabe. Konzipiert für Einfachheit und Benutzerfreundlichkeit, ermöglicht die Python-Schnittstelle Benutzern, Objekterkennung, Segmentierung und Klassifizierung schnell in ihren Projekten zu implementieren. Dies macht die Python-Schnittstelle von YOLOv8 zu einem unschätzbaren Werkzeug für jeden, der diese Funktionalitäten in seine Python-Projekte integrieren möchte. + +Benutzer können beispielsweise ein Modell laden, es trainieren, seine Leistung an einem Validierungsset auswerten und sogar in das ONNX-Format exportieren, und das alles mit nur wenigen Codezeilen. Schauen Sie sich den [Python-Leitfaden](/../usage/python.md) an, um mehr über die Verwendung von YOLOv8 in Ihren_python_pro_jek_ten zu erfahren. + +!!! Example "Beispiel" + + ```python + from ultralytics import YOLO + + # Erstellen Sie ein neues YOLO Modell von Grund auf + model = YOLO('yolov8n.yaml') + + # Laden Sie ein vortrainiertes YOLO Modell (empfohlen für das Training) + model = YOLO('yolov8n.pt') + + # Trainieren Sie das Modell mit dem Datensatz 'coco128.yaml' für 3 Epochen + results = model.train(data='coco128.yaml', epochs=3) + + # Bewerten Sie die Leistung des Modells am Validierungssatz + results = model.val() + + # Führen Sie eine Objekterkennung an einem Bild mit dem Modell durch + results = model('https://ultralytics.com/images/bus.jpg') + + # Exportieren Sie das Modell ins ONNX-Format + success = model.export(format='onnx') + ``` + +[Python-Leitfaden](/../usage/python.md){.md-button .md-button--primary} diff --git a/docs/de/tasks/classify.md b/docs/de/tasks/classify.md new file mode 100644 index 0000000..09622a0 --- /dev/null +++ b/docs/de/tasks/classify.md @@ -0,0 +1,172 @@ +--- +comments: true +description: Erfahren Sie mehr über YOLOv8 Classify-Modelle zur Bildklassifizierung. Erhalten Sie detaillierte Informationen über die Liste vortrainierter Modelle und wie man Modelle trainiert, validiert, vorhersagt und exportiert. +keywords: Ultralytics, YOLOv8, Bildklassifizierung, Vortrainierte Modelle, YOLOv8n-cls, Training, Validierung, Vorhersage, Modellexport +--- + +# Bildklassifizierung + +Beispiele für Bildklassifizierung + +Bildklassifizierung ist die einfachste der drei Aufgaben und besteht darin, ein ganzes Bild in eine von einem Satz vordefinierter Klassen zu klassifizieren. + +Die Ausgabe eines Bildklassifizierers ist ein einzelnes Klassenlabel und eine Vertrauenspunktzahl. Bildklassifizierung ist nützlich, wenn Sie nur wissen müssen, zu welcher Klasse ein Bild gehört, und nicht wissen müssen, wo sich Objekte dieser Klasse befinden oder wie ihre genaue Form ist. + +!!! Tip "Tipp" + + YOLOv8 Classify-Modelle verwenden den Suffix `-cls`, z.B. `yolov8n-cls.pt` und sind auf [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) vortrainiert. + +## [Modelle](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Hier werden vortrainierte YOLOv8 Classify-Modelle gezeigt. Detect-, Segment- und Pose-Modelle sind auf dem [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)-Datensatz vortrainiert, während Classify-Modelle auf dem [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml)-Datensatz vortrainiert sind. + +[Modelle](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) werden automatisch vom neuesten Ultralytics-[Release](https://github.com/ultralytics/assets/releases) beim ersten Gebrauch heruntergeladen. + +| Modell | Größe
(Pixel) | Genauigkeit
top1 | Genauigkeit
top5 | Geschwindigkeit
CPU ONNX
(ms) | Geschwindigkeit
A100 TensorRT
(ms) | Parameter
(M) | FLOPs
(B) bei 640 | +|----------------------------------------------------------------------------------------------|-----------------------|--------------------------|--------------------------|------------------------------------------|-----------------------------------------------|-----------------------|---------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- **Genauigkeit**-Werte sind Modellgenauigkeiten auf dem [ImageNet](https://www.image-net.org/)-Datensatz Validierungsset. +
Zur Reproduktion `yolo val classify data=pfad/zu/ImageNet device=0 verwenden` +- **Geschwindigkeit** Durchschnitt über ImageNet-Validierungsbilder mit einer [Amazon EC2 P4d](https://aws.amazon.com/de/ec2/instance-types/p4/)-Instanz. +
Zur Reproduktion `yolo val classify data=pfad/zu/ImageNet batch=1 device=0|cpu verwenden` + +## Trainieren + +Trainieren Sie das YOLOv8n-cls-Modell auf dem MNIST160-Datensatz für 100 Epochen bei Bildgröße 64. Eine vollständige Liste der verfügbaren Argumente finden Sie auf der Seite [Konfiguration](/../usage/cfg.md). + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Ein Modell laden + model = YOLO('yolov8n-cls.yaml') # ein neues Modell aus YAML erstellen + model = YOLO('yolov8n-cls.pt') # ein vortrainiertes Modell laden (empfohlen für das Training) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # aus YAML erstellen und Gewichte übertragen + + # Das Modell trainieren + results = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # Ein neues Modell aus YAML erstellen und das Training von Grund auf starten + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # Das Training von einem vortrainierten *.pt Modell starten + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # Ein neues Modell aus YAML erstellen, vortrainierte Gewichte übertragen und das Training starten + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### Datenformat + +Das Datenformat für YOLO-Klassifizierungsdatensätze finden Sie im Detail im [Datenleitfaden](../../../datasets/classify/index.md). + +## Validieren + +Validieren Sie die Genauigkeit des trainierten YOLOv8n-cls-Modells auf dem MNIST160-Datensatz. Kein Argument muss übergeben werden, da das `modell` seine Trainings`daten` und Argumente als Modellattribute behält. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Ein Modell laden + model = YOLO('yolov8n-cls.pt') # ein offizielles Modell laden + model = YOLO('pfad/zu/best.pt') # ein benutzerdefiniertes Modell laden + + # Das Modell validieren + metrics = model.val() # keine Argumente benötigt, Datensatz und Einstellungen gespeichert + metrics.top1 # top1 Genauigkeit + metrics.top5 # top5 Genauigkeit + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # ein offizielles Modell validieren + yolo classify val model=pfad/zu/best.pt # ein benutzerdefiniertes Modell validieren + ``` + +## Vorhersagen + +Verwenden Sie ein trainiertes YOLOv8n-cls-Modell, um Vorhersagen auf Bildern durchzuführen. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Ein Modell laden + model = YOLO('yolov8n-cls.pt') # ein offizielles Modell laden + model = YOLO('pfad/zu/best.pt') # ein benutzerdefiniertes Modell laden + + # Mit dem Modell vorhersagen + results = model('https://ultralytics.com/images/bus.jpg') # Vorhersage auf einem Bild + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # mit offiziellem Modell vorhersagen + yolo classify predict model=pfad/zu/best.pt source='https://ultralytics.com/images/bus.jpg' # mit benutzerdefiniertem Modell vorhersagen + ``` + +Vollständige Details zum `predict`-Modus finden Sie auf der Seite [Vorhersage](https://docs.ultralytics.com/modes/predict/). + +## Exportieren + +Exportieren Sie ein YOLOv8n-cls-Modell in ein anderes Format wie ONNX, CoreML usw. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Ein Modell laden + model = YOLO('yolov8n-cls.pt') # ein offizielles Modell laden + model = YOLO('pfad/zu/best.pt') # ein benutzerdefiniertes trainiertes Modell laden + + # Das Modell exportieren + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # offizielles Modell exportieren + yolo export model=pfad/zu/best.pt format=onnx # benutzerdefiniertes trainiertes Modell exportieren + ``` + +Verfügbare YOLOv8-cls Exportformate stehen in der folgenden Tabelle. Sie können direkt auf exportierten Modellen vorhersagen oder validieren, d.h. `yolo predict model=yolov8n-cls.onnx`. Nutzungsexempel werden für Ihr Modell nach Abschluss des Exports angezeigt. + +| Format | `format`-Argument | Modell | Metadaten | Argumente | +|--------------------------------------------------------------------|-------------------|-------------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +Vollständige Details zum `export` finden Sie auf der Seite [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/de/tasks/detect.md b/docs/de/tasks/detect.md new file mode 100644 index 0000000..ab2848c --- /dev/null +++ b/docs/de/tasks/detect.md @@ -0,0 +1,184 @@ +--- +comments: true +description: Offizielle Dokumentation für YOLOv8 von Ultralytics. Erfahren Sie, wie Sie Modelle trainieren, validieren, vorhersagen und in verschiedenen Formaten exportieren. Einschließlich detaillierter Leistungsstatistiken. +keywords: YOLOv8, Ultralytics, Objekterkennung, vortrainierte Modelle, Training, Validierung, Vorhersage, Modell-Export, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# Objekterkennung + +Beispiele für Objekterkennung + +Objekterkennung ist eine Aufgabe, die das Identifizieren der Position und Klasse von Objekten in einem Bild oder Videostream umfasst. + +Die Ausgabe eines Objekterkenners ist eine Menge von Begrenzungsrahmen, die die Objekte im Bild umschließen, zusammen mit Klassenbezeichnungen und Vertrauenswerten für jedes Feld. Objekterkennung ist eine gute Wahl, wenn Sie Objekte von Interesse in einer Szene identifizieren müssen, aber nicht genau wissen müssen, wo das Objekt ist oder wie seine genaue Form ist. + +

+
+ +
+ Sehen Sie: Objekterkennung mit vortrainiertem Ultralytics YOLOv8 Modell. +

+ +!!! Tip "Tipp" + + YOLOv8 Detect Modelle sind die Standard YOLOv8 Modelle, zum Beispiel `yolov8n.pt`, und sind vortrainiert auf dem [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)-Datensatz. + +## [Modelle](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Hier werden die vortrainierten YOLOv8 Detect Modelle gezeigt. Detect, Segment und Pose Modelle sind vortrainiert auf dem [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)-Datensatz, während die Classify Modelle vortrainiert sind auf dem [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml)-Datensatz. + +[Modelle](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) werden automatisch von der neuesten Ultralytics [Veröffentlichung](https://github.com/ultralytics/assets/releases) bei Erstbenutzung heruntergeladen. + +| Modell | Größe
(Pixel) | mAPval
50-95 | Geschwindigkeit
CPU ONNX
(ms) | Geschwindigkeit
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +|--------------------------------------------------------------------------------------|-----------------------|----------------------|------------------------------------------|-----------------------------------------------|--------------------|-------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- **mAPval** Werte sind für Single-Modell Single-Scale auf dem [COCO val2017](http://cocodataset.org) Datensatz. +
Reproduzieren mit `yolo val detect data=coco.yaml device=0` +- **Geschwindigkeit** gemittelt über COCO Val Bilder mit einer [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)-Instanz. +
Reproduzieren mit `yolo val detect data=coco128.yaml batch=1 device=0|cpu` + +## Training + +YOLOv8n auf dem COCO128-Datensatz für 100 Epochen bei Bildgröße 640 trainieren. Für eine vollständige Liste verfügbarer Argumente siehe die [Konfigurationsseite](/../usage/cfg.md). + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n.yaml') # ein neues Modell aus YAML aufbauen + model = YOLO('yolov8n.pt') # ein vortrainiertes Modell laden (empfohlen für Training) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # aus YAML aufbauen und Gewichte übertragen + + # Das Modell trainieren + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Ein neues Modell aus YAML aufbauen und Training von Grund auf starten + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Training von einem vortrainierten *.pt Modell starten + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Ein neues Modell aus YAML aufbauen, vortrainierte Gewichte übertragen und Training starten + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Datenformat + +Das Datenformat für YOLO-Erkennungsdatensätze finden Sie detailliert im [Dataset Guide](../../../datasets/detect/index.md). Um Ihren vorhandenen Datensatz von anderen Formaten (wie COCO etc.) in das YOLO-Format zu konvertieren, verwenden Sie bitte das [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO)-Tool von Ultralytics. + +## Validierung + +Genauigkeit des trainierten YOLOv8n-Modells auf dem COCO128-Datensatz validieren. Es müssen keine Argumente übergeben werden, da das `modell` seine Trainingsdaten und Argumente als Modellattribute beibehält. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n.pt') # ein offizielles Modell laden + model = YOLO('pfad/zum/besten.pt') # ein benutzerdefiniertes Modell laden + + # Das Modell validieren + metrics = model.val() # keine Argumente nötig, Datensatz und Einstellungen erinnert + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # eine Liste enthält map50-95 jeder Kategorie + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # offizielles Modell validieren + yolo detect val model=pfad/zum/besten.pt # benutzerdefiniertes Modell validieren + ``` + +## Vorhersage + +Ein trainiertes YOLOv8n-Modell verwenden, um Vorhersagen auf Bildern durchzuführen. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n.pt') # ein offizielles Modell laden + model = YOLO('pfad/zum/besten.pt') # ein benutzerdefiniertes Modell laden + + # Mit dem Modell vorhersagen + results = model('https://ultralytics.com/images/bus.jpg') # Vorhersage auf einem Bild + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # Vorhersage mit offiziellem Modell + yolo detect predict model=pfad/zum/besten.pt source='https://ultralytics.com/images/bus.jpg' # Vorhersage mit benutzerdefiniertem Modell + ``` + +Volle Details über den `predict`-Modus finden Sie auf der [Predict-Seite](https://docs.ultralytics.com/modes/predict/). + +## Export + +Ein YOLOv8n-Modell in ein anderes Format wie ONNX, CoreML usw. exportieren. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n.pt') # ein offizielles Modell laden + model = YOLO('pfad/zum/besten.pt') # ein benutzerdefiniert trainiertes Modell laden + + # Das Modell exportieren + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # offizielles Modell exportieren + yolo export model=pfad/zum/besten.pt format=onnx # benutzerdefiniert trainiertes Modell exportieren + ``` + +Verfügbare YOLOv8 Exportformate sind in der untenstehenden Tabelle aufgeführt. Sie können direkt auf den exportierten Modellen Vorhersagen treffen oder diese validieren, zum Beispiel `yolo predict model=yolov8n.onnx`. Verwendungsbeispiele werden für Ihr Modell nach Abschluss des Exports angezeigt. + +| Format | `format`-Argument | Modell | Metadaten | Argumente | +|--------------------------------------------------------------------|-------------------|---------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +Volle Details zum `export` finden Sie auf der [Export-Seite](https://docs.ultralytics.com/modes/export/). diff --git a/docs/de/tasks/index.md b/docs/de/tasks/index.md new file mode 100644 index 0000000..4077c5e --- /dev/null +++ b/docs/de/tasks/index.md @@ -0,0 +1,55 @@ +--- +comments: true +description: Erfahren Sie, welche grundlegenden Aufgaben im Bereich der Computer Vision YOLOv8 durchführen kann, einschließlich Erkennung, Segmentierung, Klassifizierung und Haltungsschätzung, und wie sie in Ihren KI-Projekten verwendet werden können. +keywords: Ultralytics, YOLOv8, Erkennung, Segmentierung, Klassifizierung, Pose-Schätzung, KI-Framework, Computer Vision-Aufgaben +--- + +# Ultralytics YOLOv8 Aufgaben + +
+Ultralytics YOLO unterstützte Aufgaben + +YOLOv8 ist ein KI-Framework, das mehrere Aufgaben im Bereich der Computer Vision **unterstützt**. Das Framework kann für die [Erkennung](detect.md), [Segmentierung](segment.md), [Klassifizierung](classify.md) und die [Pose](pose.md)-Schätzung verwendet werden. Jede dieser Aufgaben hat ein unterschiedliches Ziel und Anwendungsgebiete. + +!!! Note "Hinweis" + + 🚧 Unsere mehrsprachigen Dokumentation befindet sich derzeit im Aufbau und wir arbeiten hart daran, sie zu verbessern. Danke für Ihre Geduld! 🙏 + +

+
+ +
+ Schauen Sie zu: Entdecken Sie Ultralytics YOLO Aufgaben: Objekterkennung, Segmentierung, Verfolgung und Pose-Schätzung. +

+ +## [Erkennung](detect.md) + +Erkennung ist die primäre von YOLOv8 unterstützte Aufgabe. Sie beinhaltet das Erkennen von Objekten in einem Bild oder Videobild und das Zeichnen von Rahmen um sie herum. Die erkannten Objekte werden anhand ihrer Merkmale in verschiedene Kategorien klassifiziert. YOLOv8 kann mehrere Objekte in einem einzelnen Bild oder Videobild mit hoher Genauigkeit und Geschwindigkeit erkennen. + +[Beispiele für Erkennung](detect.md){ .md-button } + +## [Segmentierung](segment.md) + +Segmentierung ist eine Aufgabe, die das Aufteilen eines Bildes in unterschiedliche Regionen anhand des Bildinhalts beinhaltet. Jeder Region wird basierend auf ihrem Inhalt eine Markierung zugewiesen. Diese Aufgabe ist nützlich in Anwendungen wie der Bildsegmentierung und medizinischen Bildgebung. YOLOv8 verwendet eine Variante der U-Net-Architektur, um die Segmentierung durchzuführen. + +[Beispiele für Segmentierung](segment.md){ .md-button } + +## [Klassifizierung](classify.md) + +Klassifizierung ist eine Aufgabe, die das Einordnen eines Bildes in verschiedene Kategorien umfasst. YOLOv8 kann genutzt werden, um Bilder anhand ihres Inhalts zu klassifizieren. Es verwendet eine Variante der EfficientNet-Architektur, um die Klassifizierung durchzuführen. + +[Beispiele für Klassifizierung](classify.md){ .md-button } + +## [Pose](pose.md) + +Die Pose-/Keypoint-Erkennung ist eine Aufgabe, die das Erkennen von spezifischen Punkten in einem Bild oder Videobild beinhaltet. Diese Punkte werden als Keypoints bezeichnet und werden zur Bewegungsverfolgung oder Pose-Schätzung verwendet. YOLOv8 kann Keypoints in einem Bild oder Videobild mit hoher Genauigkeit und Geschwindigkeit erkennen. + +[Beispiele für Posen](pose.md){ .md-button } + +## Fazit + +YOLOv8 unterstützt mehrere Aufgaben, einschließlich Erkennung, Segmentierung, Klassifizierung und Keypoint-Erkennung. Jede dieser Aufgaben hat unterschiedliche Ziele und Anwendungsgebiete. Durch das Verständnis der Unterschiede zwischen diesen Aufgaben können Sie die geeignete Aufgabe für Ihre Anwendung im Bereich der Computer Vision auswählen. diff --git a/docs/de/tasks/pose.md b/docs/de/tasks/pose.md new file mode 100644 index 0000000..14d0f25 --- /dev/null +++ b/docs/de/tasks/pose.md @@ -0,0 +1,185 @@ +--- +comments: true +description: Erfahren Sie, wie Sie Ultralytics YOLOv8 für Aufgaben der Pose-Schätzung verwenden können. Finden Sie vortrainierte Modelle, lernen Sie, wie man eigene trainiert, validiert, vorhersagt und exportiert. +keywords: Ultralytics, YOLO, YOLOv8, Pose-Schätzung, Erkennung von Schlüsselpunkten, Objekterkennung, vortrainierte Modelle, maschinelles Lernen, künstliche Intelligenz +--- + +# Pose-Schätzung + +![Beispiele für die Pose-Schätzung](https://user-images.githubusercontent.com/26833433/243418616-9811ac0b-a4a7-452a-8aba-484ba32bb4a8.png) + +Die Pose-Schätzung ist eine Aufgabe, die das Identifizieren der Lage spezifischer Punkte in einem Bild beinhaltet, die normalerweise als Schlüsselpunkte bezeichnet werden. Die Schlüsselpunkte können verschiedene Teile des Objekts wie Gelenke, Landmarken oder andere charakteristische Merkmale repräsentieren. Die Positionen der Schlüsselpunkte sind üblicherweise als eine Gruppe von 2D `[x, y]` oder 3D `[x, y, sichtbar]` Koordinaten dargestellt. + +Das Ergebnis eines Pose-Schätzungsmodells ist eine Gruppe von Punkten, die die Schlüsselpunkte auf einem Objekt im Bild darstellen, normalerweise zusammen mit den Konfidenzwerten für jeden Punkt. Die Pose-Schätzung eignet sich gut, wenn Sie spezifische Teile eines Objekts in einer Szene identifizieren müssen und deren Lage zueinander. + +

+
+ +
+ Ansehen: Pose-Schätzung mit Ultralytics YOLOv8. +

+ +!!! Tip "Tipp" + + YOLOv8 _pose_-Modelle verwenden den Suffix `-pose`, z. B. `yolov8n-pose.pt`. Diese Modelle sind auf dem [COCO-Schlüsselpunkte](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml)-Datensatz trainiert und für eine Vielzahl von Pose-Schätzungsaufgaben geeignet. + +## [Modelle](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Hier werden vortrainierte YOLOv8 Pose-Modelle gezeigt. Erkennungs-, Segmentierungs- und Pose-Modelle sind auf dem [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)-Datensatz vortrainiert, während Klassifizierungsmodelle auf dem [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml)-Datensatz vortrainiert sind. + +[Modelle](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) werden automatisch aus der neuesten Ultralytics-[Veröffentlichung](https://github.com/ultralytics/assets/releases) bei erstmaliger Verwendung heruntergeladen. + +| Modell | Größe
(Pixel) | mAPpose
50-95 | mAPpose
50 | Geschwindigkeit
CPU ONNX
(ms) | Geschwindigkeit
A100 TensorRT
(ms) | Parameter
(M) | FLOPs
(B) | +|------------------------------------------------------------------------------------------------------|-----------------------|-----------------------|--------------------|------------------------------------------|-----------------------------------------------|-----------------------|-------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50,4 | 80,1 | 131,8 | 1,18 | 3,3 | 9,2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60,0 | 86,2 | 233,2 | 1,42 | 11,6 | 30,2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65,0 | 88,8 | 456,3 | 2,00 | 26,4 | 81,0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67,6 | 90,0 | 784,5 | 2,59 | 44,4 | 168,6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69,2 | 90,2 | 1607,1 | 3,73 | 69,4 | 263,2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71,6 | 91,2 | 4088,7 | 10,04 | 99,1 | 1066,4 | + +- **mAPval** Werte gelten für ein einzelnes Modell mit einfacher Skala auf dem [COCO Keypoints val2017](http://cocodataset.org)-Datensatz. +
Zu reproduzieren mit `yolo val pose data=coco-pose.yaml device=0`. +- **Geschwindigkeit** gemittelt über COCO-Validierungsbilder mit einer [Amazon EC2 P4d](https://aws.amazon.com/de/ec2/instance-types/p4/)-Instanz. +
Zu reproduzieren mit `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu`. + +## Trainieren + +Trainieren Sie ein YOLOv8-Pose-Modell auf dem COCO128-Pose-Datensatz. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n-pose.yaml') # ein neues Modell aus YAML bauen + model = YOLO('yolov8n-pose.pt') # ein vortrainiertes Modell laden (empfohlen für das Training) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # aus YAML bauen und Gewichte übertragen + + # Modell trainieren + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Ein neues Modell aus YAML bauen und das Training von Grund auf starten + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # Training von einem vortrainierten *.pt Modell starten + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # Ein neues Modell aus YAML bauen, vortrainierte Gewichte übertragen und das Training starten + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### Datensatzformat + +Das YOLO-Pose-Datensatzformat finden Sie detailliert im [Datensatz-Leitfaden](../../../datasets/pose/index.md). Um Ihren bestehenden Datensatz aus anderen Formaten (wie COCO usw.) in das YOLO-Format zu konvertieren, verwenden Sie bitte das [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO)-Tool von Ultralytics. + +## Validieren + +Die Genauigkeit des trainierten YOLOv8n-Pose-Modells auf dem COCO128-Pose-Datensatz validieren. Es müssen keine Argumente übergeben werden, da das `Modell` seine Trainings`daten` und Argumente als Modellattribute beibehält. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n-pose.pt') # ein offizielles Modell laden + model = YOLO('pfad/zu/best.pt') # ein benutzerdefiniertes Modell laden + + # Modell validieren + metrics = model.val() # keine Argumente nötig, Datensatz und Einstellungen sind gespeichert + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # Liste enthält map50-95 jeder Kategorie + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # offizielles Modell validieren + yolo pose val model=pfad/zu/best.pt # benutzerdefiniertes Modell validieren + ``` + +## Vorhersagen + +Ein trainiertes YOLOv8n-Pose-Modell verwenden, um Vorhersagen auf Bildern zu machen. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n-pose.pt') # ein offizielles Modell laden + model = YOLO('pfad/zu/best.pt') # ein benutzerdefiniertes Modell laden + + # Mit dem Modell Vorhersagen machen + results = model('https://ultralytics.com/images/bus.jpg') # Vorhersage auf einem Bild machen + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # Vorhersage mit dem offiziellen Modell machen + yolo pose predict model=pfad/zu/best.pt source='https://ultralytics.com/images/bus.jpg' # Vorhersage mit dem benutzerdefinierten Modell machen + ``` + +Vollständige `predict`-Modusdetails finden Sie auf der [Vorhersage](https://docs.ultralytics.com/modes/predict/)-Seite. + +## Exportieren + +Ein YOLOv8n-Pose-Modell in ein anderes Format wie ONNX, CoreML usw. exportieren. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n-pose.pt') # ein offizielles Modell laden + model = YOLO('pfad/zu/best.pt') # ein benutzerdefiniertes Modell laden + + # Modell exportieren + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # offizielles Modell exportieren + yolo export model=pfad/zu/best.pt format=onnx # benutzerdefiniertes Modell exportieren + ``` + +Verfügbare YOLOv8-Pose-Exportformate sind in der folgenden Tabelle aufgeführt. Sie können direkt auf exportierten Modellen vorhersagen oder validieren, z. B. `yolo predict model=yolov8n-pose.onnx`. Verwendungsbeispiele werden für Ihr Modell nach Abschluss des Exports angezeigt. + +| Format | `format` Argument | Modell | Metadaten | Argumente | +|--------------------------------------------------------------------|-------------------|--------------------------------|-----------|-----------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimieren` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `halb`, `dynamisch`, `vereinfachen`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `halb` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `halb`, `dynamisch`, `vereinfachen`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `halb`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `halb`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `halb` | + +Vollständige `export`-Details finden Sie auf der [Export](https://docs.ultralytics.com/modes/export/)-Seite. diff --git a/docs/de/tasks/segment.md b/docs/de/tasks/segment.md new file mode 100644 index 0000000..a70909f --- /dev/null +++ b/docs/de/tasks/segment.md @@ -0,0 +1,188 @@ +--- +comments: true +description: Erfahren Sie, wie Sie Instanzsegmentierungsmodelle mit Ultralytics YOLO verwenden. Anleitungen zum Training, zur Validierung, zur Bildvorhersage und zum Export von Modellen. +Schlagworte: yolov8, Instanzsegmentierung, Ultralytics, COCO-Datensatz, Bildsegmentierung, Objekterkennung, Modelltraining, Modellvalidierung, Bildvorhersage, Modellexport +--- + +# Instanzsegmentierung + +![Beispiele für Instanzsegmentierung](https://user-images.githubusercontent.com/26833433/243418644-7df320b8-098d-47f1-85c5-26604d761286.png) + +Instanzsegmentierung geht einen Schritt weiter als die Objekterkennung und beinhaltet die Identifizierung einzelner Objekte in einem Bild und deren Abtrennung vom Rest des Bildes. + +Das Ergebnis eines Instanzsegmentierungsmodells ist eine Reihe von Masken oder Konturen, die jedes Objekt im Bild umreißen, zusammen mit Klassenbezeichnungen und Vertrauensscores für jedes Objekt. Instanzsegmentierung ist nützlich, wenn man nicht nur wissen muss, wo sich Objekte in einem Bild befinden, sondern auch, welche genaue Form sie haben. + +

+
+ +
+ Schauen Sie: Führen Sie Segmentierung mit dem vortrainierten Ultralytics YOLOv8 Modell in Python aus. +

+ +!!! Tip "Tipp" + + YOLOv8 Segment-Modelle verwenden das Suffix `-seg`, d.h. `yolov8n-seg.pt` und sind auf dem [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)-Datensatz vortrainiert. + +## [Modelle](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Hier werden vortrainierte YOLOv8 Segment-Modelle gezeigt. Detect-, Segment- und Pose-Modelle sind auf dem [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)-Datensatz vortrainiert, während Classify-Modelle auf dem [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml)-Datensatz vortrainiert sind. + +[Modelle](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) laden sich automatisch von der neuesten Ultralytics [Veröffentlichung](https://github.com/ultralytics/assets/releases) beim ersten Gebrauch herunter. + +| Modell | Größe
(Pixel) | mAPKasten
50-95 | mAPMasken
50-95 | Geschwindigkeit
CPU ONNX
(ms) | Geschwindigkeit
A100 TensorRT
(ms) | Parameter
(M) | FLOPs
(B) | +|----------------------------------------------------------------------------------------------|-----------------------|-------------------------|-------------------------|------------------------------------------|-----------------------------------------------|-----------------------|-------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- Die **mAPval**-Werte sind für ein einzelnes Modell, einzelne Skala auf dem [COCO val2017](http://cocodataset.org)-Datensatz. +
Zum Reproduzieren nutzen Sie `yolo val segment data=coco.yaml device=0` +- Die **Geschwindigkeit** ist über die COCO-Validierungsbilder gemittelt und verwendet eine [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)-Instanz. +
Zum Reproduzieren `yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu` + +## Training + +Trainieren Sie YOLOv8n-seg auf dem COCO128-seg-Datensatz für 100 Epochen mit einer Bildgröße von 640. Eine vollständige Liste der verfügbaren Argumente finden Sie auf der Seite [Konfiguration](/../usage/cfg.md). + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n-seg.yaml') # ein neues Modell aus YAML erstellen + model = YOLO('yolov8n-seg.pt') # ein vortrainiertes Modell laden (empfohlen für das Training) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # aus YAML erstellen und Gewichte übertragen + + # Das Modell trainieren + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Ein neues Modell aus YAML erstellen und das Training von vorne beginnen + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # Das Training von einem vortrainierten *.pt Modell aus starten + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # Ein neues Modell aus YAML erstellen, vortrainierte Gewichte darauf übertragen und das Training beginnen + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### Datenformat + +Das YOLO Segmentierungsdatenformat finden Sie detailliert im [Dataset Guide](../../../datasets/segment/index.md). Um Ihre vorhandenen Daten aus anderen Formaten (wie COCO usw.) in das YOLO-Format umzuwandeln, verwenden Sie bitte das [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO)-Tool von Ultralytics. + +## Val + +Validieren Sie die Genauigkeit des trainierten YOLOv8n-seg-Modells auf dem COCO128-seg-Datensatz. Es müssen keine Argumente übergeben werden, da das `Modell` seine Trainingsdaten und -argumente als Modellattribute behält. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n-seg.pt') # offizielles Modell laden + model = YOLO('pfad/zu/best.pt') # benutzerdefiniertes Modell laden + + # Das Modell validieren + metrics = model.val() # Keine Argumente erforderlich, Datensatz und Einstellungen werden behalten + metrics.box.map # mAP50-95(B) + metrics.box.map50 # mAP50(B) + metrics.box.map75 # mAP75(B) + metrics.box.maps # eine Liste enthält mAP50-95(B) für jede Kategorie + metrics.seg.map # mAP50-95(M) + metrics.seg.map50 # mAP50(M) + metrics.seg.map75 # mAP75(M) + metrics.seg.maps # eine Liste enthält mAP50-95(M) für jede Kategorie + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # offizielles Modell validieren + yolo segment val model=pfad/zu/best.pt # benutzerdefiniertes Modell validieren + ``` + +## Predict + +Verwenden Sie ein trainiertes YOLOv8n-seg-Modell für Vorhersagen auf Bildern. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n-seg.pt') # offizielles Modell laden + model = YOLO('pfad/zu/best.pt') # benutzerdefiniertes Modell laden + + # Mit dem Modell Vorhersagen treffen + results = model('https://ultralytics.com/images/bus.jpg') # Vorhersage auf einem Bild + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # Vorhersage mit offiziellem Modell treffen + yolo segment predict model=pfad/zu/best.pt source='https://ultralytics.com/images/bus.jpg' # Vorhersage mit benutzerdefiniertem Modell treffen + ``` + +Die vollständigen Details zum `predict`-Modus finden Sie auf der Seite [Predict](https://docs.ultralytics.com/modes/predict/). + +## Export + +Exportieren Sie ein YOLOv8n-seg-Modell in ein anderes Format wie ONNX, CoreML usw. + +!!! Example "Beispiel" + + === "Python" + + ```python + from ultralytics import YOLO + + # Modell laden + model = YOLO('yolov8n-seg.pt') # offizielles Modell laden + model = YOLO('pfad/zu/best.pt') # benutzerdefiniertes trainiertes Modell laden + + # Das Modell exportieren + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # offizielles Modell exportieren + yolo export model=pfad/zu/best.pt format=onnx # benutzerdefiniertes trainiertes Modell exportieren + ``` + +Die verfügbaren YOLOv8-seg-Exportformate sind in der folgenden Tabelle aufgeführt. Sie können direkt auf exportierten Modellen Vorhersagen treffen oder sie validieren, z.B. `yolo predict model=yolov8n-seg.onnx`. Verwendungsbeispiele werden für Ihr Modell nach dem Export angezeigt. + +| Format | `format`-Argument | Modell | Metadaten | Argumente | +|--------------------------------------------------------------------|-------------------|-------------------------------|-----------|-----------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimieren` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `halb`, `dynamisch`, `vereinfachen`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `halb` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `halb`, `dynamisch`, `vereinfachen`, `Arbeitsspeicher` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `halb`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `halb`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `halb` | + +Die vollständigen Details zum `export` finden Sie auf der Seite [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/en/CNAME b/docs/en/CNAME new file mode 100644 index 0000000..339382a --- /dev/null +++ b/docs/en/CNAME @@ -0,0 +1 @@ +docs.ultralytics.com diff --git a/docs/en/guides/azureml-quickstart.md b/docs/en/guides/azureml-quickstart.md new file mode 100644 index 0000000..56b1cea --- /dev/null +++ b/docs/en/guides/azureml-quickstart.md @@ -0,0 +1,152 @@ +--- +comments: true +description: Step-by-step Quickstart Guide to Running YOLOv8 Object Detection Models on AzureML for Fast Prototyping and Testing +keywords: Ultralytics, YOLOv8, Object Detection, Azure Machine Learning, Quickstart Guide, Prototype, Compute Instance, Terminal, Notebook, IPython Kernel, CLI, Python SDK +--- + +# YOLOv8 🚀 on AzureML + +## What is Azure? + +[Azure](https://azure.microsoft.com/) is Microsoft's cloud computing platform, designed to help organizations move their workloads to the cloud from on-premises data centers. With the full spectrum of cloud services including those for computing, databases, analytics, machine learning, and networking, users can pick and choose from these services to develop and scale new applications, or run existing applications, in the public cloud. + +## What is Azure Machine Learning (AzureML)? + +Azure Machine Learning, commonly referred to as AzureML, is a fully managed cloud service that enables data scientists and developers to efficiently embed predictive analytics into their applications, helping organizations use massive data sets and bring all the benefits of the cloud to machine learning. AzureML offers a variety of services and capabilities aimed at making machine learning accessible, easy to use, and scalable. It provides capabilities like automated machine learning, drag-and-drop model training, as well as a robust Python SDK so that developers can make the most out of their machine learning models. + +## How Does AzureML Benefit YOLO Users? + +For users of YOLO (You Only Look Once), AzureML provides a robust, scalable, and efficient platform to both train and deploy machine learning models. Whether you are looking to run quick prototypes or scale up to handle more extensive data, AzureML's flexible and user-friendly environment offers various tools and services to fit your needs. You can leverage AzureML to: + +- Easily manage large datasets and computational resources for training. +- Utilize built-in tools for data preprocessing, feature selection, and model training. +- Collaborate more efficiently with capabilities for MLOps (Machine Learning Operations), including but not limited to monitoring, auditing, and versioning of models and data. + +In the subsequent sections, you will find a quickstart guide detailing how to run YOLOv8 object detection models using AzureML, either from a compute terminal or a notebook. + +## Prerequisites + +Before you can get started, make sure you have access to an AzureML workspace. If you don't have one, you can create a new [AzureML workspace](https://learn.microsoft.com/azure/machine-learning/concept-workspace?view=azureml-api-2) by following Azure's official documentation. This workspace acts as a centralized place to manage all AzureML resources. + +## Create a compute instance + +From your AzureML workspace, select Compute > Compute instances > New, select the instance with the resources you need. + +

+ Create Azure Compute Instance +

+ +## Quickstart from Terminal + +Start your compute and open a Terminal: + +

+ Open Terminal +

+ +### Create virtualenv + +Create your conda virtualenv and install pip in it: + +```bash +conda create --name yolov8env -y +conda activate yolov8env +conda install pip -y +``` + +Install the required dependencies: + +```bash +cd ultralytics +pip install -r requirements.txt +pip install ultralytics +pip install onnx>=1.12.0 +``` + +### Perform YOLOv8 tasks + +Predict: + +```bash +yolo predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' +``` + +Train a detection model for 10 epochs with an initial learning_rate of 0.01: + +```bash +yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 +``` + +You can find more [instructions to use the Ultralytics CLI here](../quickstart.md#use-ultralytics-with-cli). + +## Quickstart from a Notebook + +### Create a new IPython kernel + +Open the compute Terminal. + +

+ Open Terminal +

+ +From your compute terminal, you need to create a new ipykernel that will be used by your notebook to manage your dependencies: + +```bash +conda create --name yolov8env -y +conda activate yolov8env +conda install pip -y +conda install ipykernel -y +python -m ipykernel install --user --name yolov8env --display-name "yolov8env" +``` + +Close your terminal and create a new notebook. From your Notebook, you can select the new kernel. + +Then you can open a Notebook cell and install the required dependencies: + +```bash +%%bash +source activate yolov8env +cd ultralytics +pip install -r requirements.txt +pip install ultralytics +pip install onnx>=1.12.0 +``` + +Note that we need to use the `source activate yolov8env` for all the %%bash cells, to make sure that the %%bash cell uses environment we want. + +Run some predictions using the [Ultralytics CLI](../quickstart.md#use-ultralytics-with-cli): + +```bash +%%bash +source activate yolov8env +yolo predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' +``` + +Or with the [Ultralytics Python interface](../quickstart.md#use-ultralytics-with-python), for example to train the model: + +```python +from ultralytics import YOLO + +# Load a model +model = YOLO("yolov8n.pt") # load an official YOLOv8n model + +# Use the model +model.train(data="coco128.yaml", epochs=3) # train the model +metrics = model.val() # evaluate model performance on the validation set +results = model("https://ultralytics.com/images/bus.jpg") # predict on an image +path = model.export(format="onnx") # export the model to ONNX format +``` + +You can use either the Ultralytics CLI or Python interface for running YOLOv8 tasks, as described in the terminal section above. + +By following these steps, you should be able to get YOLOv8 running quickly on AzureML for quick trials. For more advanced uses, you may refer to the full AzureML documentation linked at the beginning of this guide. + +## Explore More with AzureML + +This guide serves as an introduction to get you up and running with YOLOv8 on AzureML. However, it only scratches the surface of what AzureML can offer. To delve deeper and unlock the full potential of AzureML for your machine learning projects, consider exploring the following resources: + +- [Create a Data Asset](https://learn.microsoft.com/azure/machine-learning/how-to-create-data-assets): Learn how to set up and manage your data assets effectively within the AzureML environment. +- [Initiate an AzureML Job](https://learn.microsoft.com/azure/machine-learning/how-to-train-model): Get a comprehensive understanding of how to kickstart your machine learning training jobs on AzureML. +- [Register a Model](https://learn.microsoft.com/azure/machine-learning/how-to-manage-models): Familiarize yourself with model management practices including registration, versioning, and deployment. +- [Train YOLOv8 with AzureML Python SDK](https://medium.com/@ouphi/how-to-train-the-yolov8-model-with-azure-machine-learning-python-sdk-8268696be8ba): Explore a step-by-step guide on using the AzureML Python SDK to train your YOLOv8 models. +- [Train YOLOv8 with AzureML CLI](https://medium.com/@ouphi/how-to-train-the-yolov8-model-with-azureml-and-the-az-cli-73d3c870ba8e): Discover how to utilize the command-line interface for streamlined training and management of YOLOv8 models on AzureML. diff --git a/docs/en/guides/conda-quickstart.md b/docs/en/guides/conda-quickstart.md new file mode 100644 index 0000000..21f3b89 --- /dev/null +++ b/docs/en/guides/conda-quickstart.md @@ -0,0 +1,132 @@ +--- +comments: true +description: Comprehensive guide to setting up and using Ultralytics YOLO models in a Conda environment. Learn how to install the package, manage dependencies, and get started with object detection projects. +keywords: Ultralytics, YOLO, Conda, environment setup, object detection, package installation, deep learning, machine learning, guide +--- + +# Conda Quickstart Guide for Ultralytics + +

+ Ultralytics Conda Package Visual +

+ +This guide provides a comprehensive introduction to setting up a Conda environment for your Ultralytics projects. Conda is an open-source package and environment management system that offers an excellent alternative to pip for installing packages and dependencies. Its isolated environments make it particularly well-suited for data science and machine learning endeavors. For more details, visit the Ultralytics Conda package on [Anaconda](https://anaconda.org/conda-forge/ultralytics) and check out the Ultralytics feedstock repository for package updates on [GitHub](https://github.com/conda-forge/ultralytics-feedstock/). + +[![Conda Recipe](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + +## What You Will Learn + +- Setting up a Conda environment +- Installing Ultralytics via Conda +- Initializing Ultralytics in your environment +- Using Ultralytics Docker images with Conda + +--- + +## Prerequisites + +- You should have Anaconda or Miniconda installed on your system. If not, download and install it from [Anaconda](https://www.anaconda.com/) or [Miniconda](https://docs.conda.io/projects/miniconda/en/latest/). + +--- + +## Setting up a Conda Environment + +First, let's create a new Conda environment. Open your terminal and run the following command: + +```bash +conda create --name ultralytics-env python=3.8 -y +``` + +Activate the new environment: + +```bash +conda activate ultralytics-env +``` + +--- + +## Installing Ultralytics + +You can install the Ultralytics package from the conda-forge channel. Execute the following command: + +```bash +conda install -c conda-forge ultralytics +``` + +### Note on CUDA Environment + +If you're working in a CUDA-enabled environment, it's a good practice to install `ultralytics`, `pytorch`, and `pytorch-cuda` together to resolve any conflicts: + +```bash +conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics +``` + +--- + +## Using Ultralytics + +With Ultralytics installed, you can now start using its robust features for object detection, instance segmentation, and more. For example, to predict an image, you can run: + +```python +from ultralytics import YOLO + +model = YOLO('yolov8n.pt') # initialize model +results = model('path/to/image.jpg') # perform inference +results.show() # display results +``` + +--- + +## Ultralytics Conda Docker Image + +If you prefer using Docker, Ultralytics offers Docker images with a Conda environment included. You can pull these images from [DockerHub](https://hub.docker.com/r/ultralytics/ultralytics). + +Pull the latest Ultralytics image: + +```bash +# Set image name as a variable +t=ultralytics/ultralytics:latest-conda + +# Pull the latest Ultralytics image from Docker Hub +sudo docker pull $t +``` + +Run the image: + +```bash +# Run the Ultralytics image in a container with GPU support +sudo docker run -it --ipc=host --gpus all $t # all GPUs +sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # specify GPUs +``` + +--- + +Certainly, you can include the following section in your Conda guide to inform users about speeding up installation using `libmamba`: + +--- + +## Speeding Up Installation with Libmamba + +If you're looking to [speed up the package installation](https://www.anaconda.com/blog/a-faster-conda-for-a-growing-community) process in Conda, you can opt to use `libmamba`, a fast, cross-platform, and dependency-aware package manager that serves as an alternative solver to Conda's default. + +### How to Enable Libmamba + +To enable `libmamba` as the solver for Conda, you can perform the following steps: + +1. First, install the `conda-libmamba-solver` package. This can be skipped if your Conda version is 4.11 or above, as `libmamba` is included by default. + + ```bash + conda install conda-libmamba-solver + ``` + +2. Next, configure Conda to use `libmamba` as the solver: + + ```bash + conda config --set solver libmamba + ``` + +And that's it! Your Conda installation will now use `libmamba` as the solver, which should result in a faster package installation process. + +--- + +Congratulations! You have successfully set up a Conda environment, installed the Ultralytics package, and are now ready to explore its rich functionalities. Feel free to dive deeper into the [Ultralytics documentation](../index.md) for more advanced tutorials and examples. diff --git a/docs/en/guides/docker-quickstart.md b/docs/en/guides/docker-quickstart.md new file mode 100644 index 0000000..6c45268 --- /dev/null +++ b/docs/en/guides/docker-quickstart.md @@ -0,0 +1,119 @@ +--- +comments: true +description: Complete guide to setting up and using Ultralytics YOLO models with Docker. Learn how to install Docker, manage GPU support, and run YOLO models in isolated containers. +keywords: Ultralytics, YOLO, Docker, GPU, containerization, object detection, package installation, deep learning, machine learning, guide +--- + +# Docker Quickstart Guide for Ultralytics + +

+ Ultralytics Docker Package Visual +

+ +This guide serves as a comprehensive introduction to setting up a Docker environment for your Ultralytics projects. [Docker](https://docker.com/) is a platform for developing, shipping, and running applications in containers. It is particularly beneficial for ensuring that the software will always run the same, regardless of where it's deployed. For more details, visit the Ultralytics Docker repository on [Docker Hub](https://hub.docker.com/r/ultralytics/ultralytics). + +[![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/ultralytics?logo=docker)](https://hub.docker.com/r/ultralytics/ultralytics) + +## What You Will Learn + +- Setting up Docker with NVIDIA support +- Installing Ultralytics Docker images +- Running Ultralytics in a Docker container +- Mounting local directories into the container + +--- + +## Prerequisites + +- Make sure Docker is installed on your system. If not, you can download and install it from [Docker's website](https://www.docker.com/products/docker-desktop). +- Ensure that your system has an NVIDIA GPU and NVIDIA drivers are installed. + +--- + +## Setting up Docker with NVIDIA Support + +First, verify that the NVIDIA drivers are properly installed by running: + +```bash +nvidia-smi +``` + +### Installing NVIDIA Docker Runtime + +Now, let's install the NVIDIA Docker runtime to enable GPU support in Docker containers: + +```bash +# Add NVIDIA package repositories +curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - +distribution=$(lsb_release -cs) +curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list + +# Install NVIDIA Docker runtime +sudo apt-get update +sudo apt-get install -y nvidia-docker2 + +# Restart Docker service to apply changes +sudo systemctl restart docker +``` + +### Verify NVIDIA Runtime with Docker + +Run `docker info | grep -i runtime` to ensure that `nvidia` appears in the list of runtimes: + +```bash +docker info | grep -i runtime +``` + +--- + +## Installing Ultralytics Docker Images + +Ultralytics offers several Docker images optimized for various platforms and use-cases: + +- **Dockerfile:** GPU image, ideal for training. +- **Dockerfile-arm64:** For ARM64 architecture, suitable for devices like [Raspberry Pi](raspberry-pi.md). +- **Dockerfile-cpu:** CPU-only version for inference and non-GPU environments. +- **Dockerfile-jetson:** Optimized for NVIDIA Jetson devices. +- **Dockerfile-python:** Minimal Python environment for lightweight applications. +- **Dockerfile-conda:** Includes [Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/) and Ultralytics package installed via Conda. + +To pull the latest image: + +```bash +# Set image name as a variable +t=ultralytics/ultralytics:latest + +# Pull the latest Ultralytics image from Docker Hub +sudo docker pull $t +``` + +--- + +## Running Ultralytics in Docker Container + +Here's how to execute the Ultralytics Docker container: + +```bash +# Run with all GPUs +sudo docker run -it --ipc=host --gpus all $t + +# Run specifying which GPUs to use +sudo docker run -it --ipc=host --gpus '"device=2,3"' $t +``` + +The `-it` flag assigns a pseudo-TTY and keeps stdin open, allowing you to interact with the container. The `--ipc=host` flag enables sharing of host's IPC namespace, essential for sharing memory between processes. The `--gpus` flag allows the container to access the host's GPUs. + +### Note on File Accessibility + +To work with files on your local machine within the container, you can use Docker volumes: + +```bash +# Mount a local directory into the container +sudo docker run -it --ipc=host --gpus all -v /path/on/host:/path/in/container $t +``` + +Replace `/path/on/host` with the directory path on your local machine and `/path/in/container` with the desired path inside the Docker container. + +--- + +Congratulations! You're now set up to use Ultralytics with Docker and ready to take advantage of its powerful capabilities. For alternate installation methods, feel free to explore the [Ultralytics quickstart documentation](../quickstart.md). diff --git a/docs/en/guides/hyperparameter-tuning.md b/docs/en/guides/hyperparameter-tuning.md new file mode 100644 index 0000000..f7d2b56 --- /dev/null +++ b/docs/en/guides/hyperparameter-tuning.md @@ -0,0 +1,206 @@ +--- +comments: true +description: Dive into hyperparameter tuning in Ultralytics YOLO models. Learn how to optimize performance using the Tuner class and genetic evolution. +keywords: Ultralytics, YOLO, Hyperparameter Tuning, Tuner Class, Genetic Evolution, Optimization +--- + +# Ultralytics YOLO Hyperparameter Tuning Guide + +## Introduction + +Hyperparameter tuning is not just a one-time set-up but an iterative process aimed at optimizing the machine learning model's performance metrics, such as accuracy, precision, and recall. In the context of Ultralytics YOLO, these hyperparameters could range from learning rate to architectural details, such as the number of layers or types of activation functions used. + +### What are Hyperparameters? + +Hyperparameters are high-level, structural settings for the algorithm. They are set prior to the training phase and remain constant during it. Here are some commonly tuned hyperparameters in Ultralytics YOLO: + +- **Learning Rate** `lr0`: Determines the step size at each iteration while moving towards a minimum in the loss function. +- **Batch Size** `batch`: Number of images processed simultaneously in a forward pass. +- **Number of Epochs** `epochs`: An epoch is one complete forward and backward pass of all the training examples. +- **Architecture Specifics**: Such as channel counts, number of layers, types of activation functions, etc. + +

+ Hyperparameter Tuning Visual +

+ +For a full list of augmentation hyperparameters used in YOLOv8 please refer to the [configurations page](../usage/cfg.md#augmentation). + +### Genetic Evolution and Mutation + +Ultralytics YOLO uses genetic algorithms to optimize hyperparameters. Genetic algorithms are inspired by the mechanism of natural selection and genetics. + +- **Mutation**: In the context of Ultralytics YOLO, mutation helps in locally searching the hyperparameter space by applying small, random changes to existing hyperparameters, producing new candidates for evaluation. +- **Crossover**: Although crossover is a popular genetic algorithm technique, it is not currently used in Ultralytics YOLO for hyperparameter tuning. The focus is mainly on mutation for generating new hyperparameter sets. + +## Preparing for Hyperparameter Tuning + +Before you begin the tuning process, it's important to: + +1. **Identify the Metrics**: Determine the metrics you will use to evaluate the model's performance. This could be AP50, F1-score, or others. +2. **Set the Tuning Budget**: Define how much computational resources you're willing to allocate. Hyperparameter tuning can be computationally intensive. + +## Steps Involved + +### Initialize Hyperparameters + +Start with a reasonable set of initial hyperparameters. This could either be the default hyperparameters set by Ultralytics YOLO or something based on your domain knowledge or previous experiments. + +### Mutate Hyperparameters + +Use the `_mutate` method to produce a new set of hyperparameters based on the existing set. + +### Train Model + +Training is performed using the mutated set of hyperparameters. The training performance is then assessed. + +### Evaluate Model + +Use metrics like AP50, F1-score, or custom metrics to evaluate the model's performance. + +### Log Results + +It's crucial to log both the performance metrics and the corresponding hyperparameters for future reference. + +### Repeat + +The process is repeated until either the set number of iterations is reached or the performance metric is satisfactory. + +## Usage Example + +Here's how to use the `model.tune()` method to utilize the `Tuner` class for hyperparameter tuning of YOLOv8n on COCO8 for 30 epochs with an AdamW optimizer and skipping plotting, checkpointing and validation other than on final epoch for faster Tuning. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Initialize the YOLO model + model = YOLO('yolov8n.pt') + + # Tune hyperparameters on COCO8 for 30 epochs + model.tune(data='coco8.yaml', epochs=30, iterations=300, optimizer='AdamW', plots=False, save=False, val=False) + ``` + +## Results + +After you've successfully completed the hyperparameter tuning process, you will obtain several files and directories that encapsulate the results of the tuning. The following describes each: + +### File Structure + +Here's what the directory structure of the results will look like. Training directories like `train1/` contain individual tuning iterations, i.e. one model trained with one set of hyperparameters. The `tune/` directory contains tuning results from all the individual model trainings: + +```plaintext +runs/ +└── detect/ + ├── train1/ + ├── train2/ + ├── ... + └── tune/ + ├── best_hyperparameters.yaml + ├── best_fitness.png + ├── tune_results.csv + ├── tune_scatter_plots.png + └── weights/ + ├── last.pt + └── best.pt +``` + +### File Descriptions + +#### best_hyperparameters.yaml + +This YAML file contains the best-performing hyperparameters found during the tuning process. You can use this file to initialize future trainings with these optimized settings. + +- **Format**: YAML +- **Usage**: Hyperparameter results +- **Example**: + ```yaml + # 558/900 iterations complete ✅ (45536.81s) + # Results saved to /usr/src/ultralytics/runs/detect/tune + # Best fitness=0.64297 observed at iteration 498 + # Best fitness metrics are {'metrics/precision(B)': 0.87247, 'metrics/recall(B)': 0.71387, 'metrics/mAP50(B)': 0.79106, 'metrics/mAP50-95(B)': 0.62651, 'val/box_loss': 2.79884, 'val/cls_loss': 2.72386, 'val/dfl_loss': 0.68503, 'fitness': 0.64297} + # Best fitness model is /usr/src/ultralytics/runs/detect/train498 + # Best fitness hyperparameters are printed below. + + lr0: 0.00269 + lrf: 0.00288 + momentum: 0.73375 + weight_decay: 0.00015 + warmup_epochs: 1.22935 + warmup_momentum: 0.1525 + box: 18.27875 + cls: 1.32899 + dfl: 0.56016 + hsv_h: 0.01148 + hsv_s: 0.53554 + hsv_v: 0.13636 + degrees: 0.0 + translate: 0.12431 + scale: 0.07643 + shear: 0.0 + perspective: 0.0 + flipud: 0.0 + fliplr: 0.08631 + mosaic: 0.42551 + mixup: 0.0 + copy_paste: 0.0 + ``` + +#### best_fitness.png + +This is a plot displaying fitness (typically a performance metric like AP50) against the number of iterations. It helps you visualize how well the genetic algorithm performed over time. + +- **Format**: PNG +- **Usage**: Performance visualization + +

+ Hyperparameter Tuning Fitness vs Iteration +

+ +#### tune_results.csv + +A CSV file containing detailed results of each iteration during the tuning. Each row in the file represents one iteration, and it includes metrics like fitness score, precision, recall, as well as the hyperparameters used. + +- **Format**: CSV +- **Usage**: Per-iteration results tracking. +- **Example**: + ```csv + fitness,lr0,lrf,momentum,weight_decay,warmup_epochs,warmup_momentum,box,cls,dfl,hsv_h,hsv_s,hsv_v,degrees,translate,scale,shear,perspective,flipud,fliplr,mosaic,mixup,copy_paste + 0.05021,0.01,0.01,0.937,0.0005,3.0,0.8,7.5,0.5,1.5,0.015,0.7,0.4,0.0,0.1,0.5,0.0,0.0,0.0,0.5,1.0,0.0,0.0 + 0.07217,0.01003,0.00967,0.93897,0.00049,2.79757,0.81075,7.5,0.50746,1.44826,0.01503,0.72948,0.40658,0.0,0.0987,0.4922,0.0,0.0,0.0,0.49729,1.0,0.0,0.0 + 0.06584,0.01003,0.00855,0.91009,0.00073,3.42176,0.95,8.64301,0.54594,1.72261,0.01503,0.59179,0.40658,0.0,0.0987,0.46955,0.0,0.0,0.0,0.49729,0.80187,0.0,0.0 + ``` + +#### tune_scatter_plots.png + +This file contains scatter plots generated from `tune_results.csv`, helping you visualize relationships between different hyperparameters and performance metrics. Note that hyperparameters initialized to 0 will not be tuned, such as `degrees` and `shear` below. + +- **Format**: PNG +- **Usage**: Exploratory data analysis + +

+ Hyperparameter Tuning Scatter Plots +

+ +#### weights/ + +This directory contains the saved PyTorch models for the last and the best iterations during the hyperparameter tuning process. + +- **`last.pt`**: The last.pt are the weights from the last epoch of training. +- **`best.pt`**: The best.pt weights for the iteration that achieved the best fitness score. + +Using these results, you can make more informed decisions for your future model trainings and analyses. Feel free to consult these artifacts to understand how well your model performed and how you might improve it further. + +## Conclusion + +The hyperparameter tuning process in Ultralytics YOLO is simplified yet powerful, thanks to its genetic algorithm-based approach focused on mutation. Following the steps outlined in this guide will assist you in systematically tuning your model to achieve better performance. + +### Further Reading + +1. [Hyperparameter Optimization in Wikipedia](https://en.wikipedia.org/wiki/Hyperparameter_optimization) +2. [YOLOv5 Hyperparameter Evolution Guide](../yolov5/tutorials/hyperparameter_evolution.md) +3. [Efficient Hyperparameter Tuning with Ray Tune and YOLOv8](../integrations/ray-tune.md) + +For deeper insights, you can explore the `Tuner` class source code and accompanying documentation. Should you have any questions, feature requests, or need further assistance, feel free to reach out to us on [GitHub](https://github.com/ultralytics/ultralytics/issues/new/choose) or [Discord](https://ultralytics.com/discord). diff --git a/docs/en/guides/index.md b/docs/en/guides/index.md new file mode 100644 index 0000000..a01e08a --- /dev/null +++ b/docs/en/guides/index.md @@ -0,0 +1,37 @@ +--- +comments: true +description: In-depth exploration of Ultralytics' YOLO. Learn about the YOLO object detection model, how to train it on custom data, multi-GPU training, exporting, predicting, deploying, and more. +keywords: Ultralytics, YOLO, Deep Learning, Object detection, PyTorch, Tutorial, Multi-GPU training, Custom data training, SAHI, Tiled Inference +--- + +# Comprehensive Tutorials to Ultralytics YOLO + +Welcome to the Ultralytics' YOLO 🚀 Guides! Our comprehensive tutorials cover various aspects of the YOLO object detection model, ranging from training and prediction to deployment. Built on PyTorch, YOLO stands out for its exceptional speed and accuracy in real-time object detection tasks. + +Whether you're a beginner or an expert in deep learning, our tutorials offer valuable insights into the implementation and optimization of YOLO for your computer vision projects. Let's dive in! + +## Guides + +Here's a compilation of in-depth guides to help you master different aspects of Ultralytics YOLO. + +* [YOLO Common Issues](yolo-common-issues.md) ⭐ RECOMMENDED: Practical solutions and troubleshooting tips to the most frequently encountered issues when working with Ultralytics YOLO models. +* [YOLO Performance Metrics](yolo-performance-metrics.md) ⭐ ESSENTIAL: Understand the key metrics like mAP, IoU, and F1 score used to evaluate the performance of your YOLO models. Includes practical examples and tips on how to improve detection accuracy and speed. +* [Model Deployment Options](model-deployment-options.md): Overview of YOLO model deployment formats like ONNX, OpenVINO, and TensorRT, with pros and cons for each to inform your deployment strategy. +* [K-Fold Cross Validation](kfold-cross-validation.md) 🚀 NEW: Learn how to improve model generalization using K-Fold cross-validation technique. +* [Hyperparameter Tuning](hyperparameter-tuning.md) 🚀 NEW: Discover how to optimize your YOLO models by fine-tuning hyperparameters using the Tuner class and genetic evolution algorithms. +* [SAHI Tiled Inference](sahi-tiled-inference.md) 🚀 NEW: Comprehensive guide on leveraging SAHI's sliced inference capabilities with YOLOv8 for object detection in high-resolution images. +* [AzureML Quickstart](azureml-quickstart.md) 🚀 NEW: Get up and running with Ultralytics YOLO models on Microsoft's Azure Machine Learning platform. Learn how to train, deploy, and scale your object detection projects in the cloud. +* [Conda Quickstart](conda-quickstart.md) 🚀 NEW: Step-by-step guide to setting up a [Conda](https://anaconda.org/conda-forge/ultralytics) environment for Ultralytics. Learn how to install and start using the Ultralytics package efficiently with Conda. +* [Docker Quickstart](docker-quickstart.md) 🚀 NEW: Complete guide to setting up and using Ultralytics YOLO models with [Docker](https://hub.docker.com/r/ultralytics/ultralytics). Learn how to install Docker, manage GPU support, and run YOLO models in isolated containers for consistent development and deployment. +* [Raspberry Pi](raspberry-pi.md) 🚀 NEW: Quickstart tutorial to run YOLO models to the latest Raspberry Pi hardware. +* [Triton Inference Server Integration](triton-inference-server.md) 🚀 NEW: Dive into the integration of Ultralytics YOLOv8 with NVIDIA's Triton Inference Server for scalable and efficient deep learning inference deployments. +* [YOLO Thread-Safe Inference](yolo-thread-safe-inference.md) 🚀 NEW: Guidelines for performing inference with YOLO models in a thread-safe manner. Learn the importance of thread safety and best practices to prevent race conditions and ensure consistent predictions. +* [Isolating Segmentation Objects](isolating-segmentation-objects.md) 🚀 NEW: Step-by-step recipe and explanation on how to extract and/or isolate objects from images using Ultralytics Segmentation. + +## Contribute to Our Guides + +We welcome contributions from the community! If you've mastered a particular aspect of Ultralytics YOLO that's not yet covered in our guides, we encourage you to share your expertise. Writing a guide is a great way to give back to the community and help us make our documentation more comprehensive and user-friendly. + +To get started, please read our [Contributing Guide](../help/contributing.md) for guidelines on how to open up a Pull Request (PR) 🛠️. We look forward to your contributions! + +Let's work together to make the Ultralytics YOLO ecosystem more robust and versatile 🙏! diff --git a/docs/en/guides/isolating-segmentation-objects.md b/docs/en/guides/isolating-segmentation-objects.md new file mode 100644 index 0000000..334367f --- /dev/null +++ b/docs/en/guides/isolating-segmentation-objects.md @@ -0,0 +1,319 @@ +--- +comments: true +description: A concise guide on isolating segmented objects using Ultralytics. +keywords: Ultralytics, YOLO, segmentation, Python, object detection, inference, dataset, prediction, instance segmentation, contours, binary mask, object mask, image processing +--- + +# Isolating Segmentation Objects + +After performing the [Segment Task](../tasks/segment.md), it's sometimes desirable to extract the isolated objects from the inference results. This guide provides a generic recipe on how to accomplish this using the Ultralytics [Predict Mode](../modes/predict.md). + +

+ Example Isolated Object Segmentation +

+ +## Recipe Walk Through + +1. Begin with the necessary imports + + ```py + from pathlib import Path + + import cv2 as cv + import numpy as np + from ultralytics import YOLO + ``` + + ???+ tip "Ultralytics Install" + + See the Ultralytics [Quickstart](../quickstart.md/#install-ultralytics) Installation section for a quick walkthrough on installing the required libraries. + + --- + +1. Load a model and run `predict()` method on a source. + + ```py + m = YOLO('yolov8n-seg.pt') + res = m.predict() + ``` + + ??? question "No Prediction Arguments?" + + Without specifying a source, the example images from the library will be used: + + ``` + 'ultralytics/assets/bus.jpg' + 'ultralytics/assets/zidane.jpg' + ``` + + This is helpful for rapid testing with the `predict()` method. + + For additional information about Segmentation Models, visit the [Segment Task](../tasks/segment.md/#models) page. To learn more about `predict()` method, see [Predict Mode](../modes/predict.md) section of the Documentation. + + --- + +1. Now iterate over the results and the contours. For workflows that want to save an image to file, the source image `base-name` and the detection `class-label` are retrieved for later use (optional). + + ``` { .py .annotate } + # (2) Iterate detection results (helpful for multiple images) + for r in res: + img = np.copy(r.orig_img) + img_name = Path(r.path).stem # source image base-name + + # Iterate each object contour (multiple detections) + for ci,c in enumerate(r): + # (1) Get detection class name + label = c.names[c.boxes.cls.tolist().pop()] + + ``` + + 1. To learn more about working with detection results, see [Boxes Section for Predict Mode](../modes/predict.md/#boxes). + 2. To learn more about `predict()` results see [Working with Results for Predict Mode](../modes/predict.md/#working-with-results) + + ??? info "For-Loop" + + A single image will only iterate the first loop once. A single image with only a single detection will iterate each loop _only_ once. + + --- + +1. Start with generating a binary mask from the source image and then draw a filled contour onto the mask. This will allow the object to be isolated from the other parts of the image. An example from `bus.jpg` for one of the detected `person` class objects is shown on the right. + + ![Binary Mask Image](https://github.com/ultralytics/ultralytics/assets/62214284/59bce684-fdda-4b17-8104-0b4b51149aca){ width="240", align="right" } + ``` { .py .annotate } + # Create binary mask + b_mask = np.zeros(img.shape[:2], np.uint8) + + # (1) Extract contour result + contour = c.masks.xy.pop() + # (2) Changing the type + contour = contour.astype(np.int32) + # (3) Reshaping + contour = contour.reshape(-1, 1, 2) + + + # Draw contour onto mask + _ = cv.drawContours(b_mask, + [contour], + -1, + (255, 255, 255), + cv.FILLED) + + ``` + + 1. For more info on `c.masks.xy` see [Masks Section from Predict Mode](../modes/predict.md/#masks). + + 2. Here, the values are cast into `np.int32` for compatibility with `drawContours()` function from OpenCV. + + 3. The OpenCV `drawContours()` function expects contours to have a shape of `[N, 1, 2]` expand section below for more details. + +
+ Expand to understand what is happening when defining the contour variable. +

+ + - `c.masks.xy` :: Provides the coordinates of the mask contour points in the format `(x, y)`. For more details, refer to the [Masks Section from Predict Mode](../modes/predict.md/#masks). + + - `.pop()` :: As `masks.xy` is a list containing a single element, this element is extracted using the `pop()` method. + + - `.astype(np.int32)` :: Using `masks.xy` will return with a data type of `float32`, but this won't be compatible with the OpenCV `drawContours()` function, so this will change the data type to `int32` for compatibility. + + - `.reshape(-1, 1, 2)` :: Reformats the data into the required shape of of `[N, 1, 2]` where `N` is the number of contour points, with each point represented by a single entry `1`, and the entry is composed of `2` values. The `-1` denotes that the number of values along this dimension is flexible. + +

+

+
+ Expand for an explanation of the drawContours() configuration. +

+ + - Encapsulating the `contour` variable within square brackets, `[contour]`, was found to effectively generate the desired contour mask during testing. + + - The value `-1` specified for the `drawContours()` parameter instructs the function to draw all contours present in the image. + + - The `tuple` `(255, 255, 255)` represents the color white, which is the desired color for drawing the contour in this binary mask. + + - The addition of `cv.FILLED` will color all pixels enclosed by the contour boundary the same, in this case, all enclosed pixels will be white. + + - See [OpenCV Documentation on `drawContours()`](https://docs.opencv.org/4.8.0/d6/d6e/group__imgproc__draw.html#ga746c0625f1781f1ffc9056259103edbc) for more information. + +

+

+ + --- + +1. Next the there are 2 options for how to move forward with the image from this point and a subsequent option for each. + + ### Object Isolation Options + + !!! Example "" + + === "Black Background Pixels" + + ```py + # Create 3-channel mask + mask3ch = cv.cvtColor(b_mask, cv.COLOR_GRAY2BGR) + + # Isolate object with binary mask + isolated = cv.bitwise_and(mask3ch, img) + + ``` + + ??? question "How does this work?" + + - First, the binary mask is first converted from a single-channel image to a three-channel image. This conversion is necessary for the subsequent step where the mask and the original image are combined. Both images must have the same number of channels to be compatible with the blending operation. + + - The original image and the three-channel binary mask are merged using the OpenCV function `bitwise_and()`. This operation retains only pixel values that are greater than zero `(> 0)` from both images. Since the mask pixels are greater than zero `(> 0)` only within the contour region, the pixels remaining from the original image are those that overlap with the contour. + + ### Isolate with Black Pixels: Sub-options + + ??? info "Full-size Image" + + There are no additional steps required if keeping full size image. + +
+ ![Example Full size Isolated Object Image Black Background](https://github.com/ultralytics/ultralytics/assets/62214284/845c00d0-52a6-4b1e-8010-4ba73e011b99){ width=240 } +
Example full-size output
+
+ + ??? info "Cropped object Image" + + Additional steps required to crop image to only include object region. + + ![Example Crop Isolated Object Image Black Background](https://github.com/ultralytics/ultralytics/assets/62214284/103dbf90-c169-4f77-b791-76cdf09c6f22){ align="right" } + ``` { .py .annotate } + # (1) Bounding box coordinates + x1, y1, x2, y2 = c.boxes.xyxy.cpu().numpy().squeeze().astype(np.int32) + # Crop image to object region + iso_crop = isolated[y1:y2, x1:x2] + + ``` + + 1. For more information on bounding box results, see [Boxes Section from Predict Mode](../modes/predict.md/#boxes) + + ??? question "What does this code do?" + + - The `c.boxes.xyxy.cpu().numpy()` call retrieves the bounding boxes as a NumPy array in the `xyxy` format, where `xmin`, `ymin`, `xmax`, and `ymax` represent the coordinates of the bounding box rectangle. See [Boxes Section from Predict Mode](../modes/predict.md/#boxes) for more details. + + - The `squeeze()` operation removes any unnecessary dimensions from the NumPy array, ensuring it has the expected shape. + + - Converting the coordinate values using `.astype(np.int32)` changes the box coordinates data type from `float32` to `int32`, making them compatible for image cropping using index slices. + + - Finally, the bounding box region is cropped from the image using index slicing. The bounds are defined by the `[ymin:ymax, xmin:xmax]` coordinates of the detection bounding box. + + === "Transparent Background Pixels" + + ```py + # Isolate object with transparent background (when saved as PNG) + isolated = np.dstack([img, b_mask]) + + ``` + + ??? question "How does this work?" + + - Using the NumPy `dstack()` function (array stacking along depth-axis) in conjunction with the binary mask generated, will create an image with four channels. This allows for all pixels outside of the object contour to be transparent when saving as a `PNG` file. + + ### Isolate with Transparent Pixels: Sub-options + + ??? info "Full-size Image" + + There are no additional steps required if keeping full size image. + +
+ ![Example Full size Isolated Object Image No Background](https://github.com/ultralytics/ultralytics/assets/62214284/b1043ee0-369a-4019-941a-9447a9771042){ width=240 } +
Example full-size output + transparent background
+
+ + ??? info "Cropped object Image" + + Additional steps required to crop image to only include object region. + + ![Example Crop Isolated Object Image No Background](https://github.com/ultralytics/ultralytics/assets/62214284/5910244f-d1e1-44af-af7f-6dea4c688da8){ align="right" } + ``` { .py .annotate } + # (1) Bounding box coordinates + x1, y1, x2, y2 = c.boxes.xyxy.cpu().numpy().squeeze().astype(np.int32) + # Crop image to object region + iso_crop = isolated[y1:y2, x1:x2] + + ``` + + 1. For more information on bounding box results, see [Boxes Section from Predict Mode](../modes/predict.md/#boxes) + + ??? question "What does this code do?" + + - When using `c.boxes.xyxy.cpu().numpy()`, the bounding boxes are returned as a NumPy array, using the `xyxy` box coordinates format, which correspond to the points `xmin, ymin, xmax, ymax` for the bounding box (rectangle), see [Boxes Section from Predict Mode](../modes/predict.md/#boxes) for more information. + + - Adding `squeeze()` ensures that any extraneous dimensions are removed from the NumPy array. + + - Converting the coordinate values using `.astype(np.int32)` changes the box coordinates data type from `float32` to `int32` which will be compatible when cropping the image using index slices. + + - Finally the image region for the bounding box is cropped using index slicing, where the bounds are set using the `[ymin:ymax, xmin:xmax]` coordinates of the detection bounding box. + + ??? question "What if I want the cropped object **including** the background?" + + This is a built in feature for the Ultralytics library. See the `save_crop` argument for [Predict Mode Inference Arguments](../modes/predict.md/#inference-arguments) for details. + + --- + +1. What to do next is entirely left to the you as the developer. A basic example of one possible next step (saving the image to file for future use) is shown. + + - **NOTE:** this step is optional and can be skipped if not required for your specific use case. + + ??? example "Example Final Step" + + ```py + # Save isolated object to file + _ = cv.imwrite(f'{img_name}_{label}-{ci}.png', iso_crop) + ``` + + - In this example, the `img_name` is the base-name of the source image file, `label` is the detected class-name, and `ci` is the index of the object detection (in case of multiple instances with the same class name). + +## Full Example code + +Here, all steps from the previous section are combined into a single block of code. For repeated use, it would be optimal to define a function to do some or all commands contained in the `for`-loops, but that is an exercise left to the reader. + +``` { .py .annotate } +from pathlib import Path + +import cv2 as cv +import numpy as np +from ultralytics import YOLO + +m = YOLO('yolov8n-seg.pt')#(4)! +res = m.predict()#(3)! + +# iterate detection results (5) +for r in res: + img = np.copy(r.orig_img) + img_name = Path(r.path).stem + + # iterate each object contour (6) + for ci,c in enumerate(r): + label = c.names[c.boxes.cls.tolist().pop()] + + b_mask = np.zeros(img.shape[:2], np.uint8) + + # Create contour mask (1) + contour = c.masks.xy.pop().astype(np.int32).reshape(-1, 1, 2) + _ = cv.drawContours(b_mask, [contour], -1, (255, 255, 255), cv.FILLED) + + # Choose one: + + # OPTION-1: Isolate object with black background + mask3ch = cv.cvtColor(b_mask, cv.COLOR_GRAY2BGR) + isolated = cv.bitwise_and(mask3ch, img) + + # OPTION-2: Isolate object with transparent background (when saved as PNG) + isolated = np.dstack([img, b_mask]) + + # OPTIONAL: detection crop (from either OPT1 or OPT2) + x1, y1, x2, y2 = c.boxes.xyxy.cpu().numpy().squeeze().astype(np.int32) + iso_crop = isolated[y1:y2, x1:x2] + + # TODO your actions go here (2) + +``` + +1. The line populating `contour` is combined into a single line here, where it was split to multiple above. +2. {==What goes here is up to you!==} +3. See [Predict Mode](../modes/predict.md) for additional information. +4. See [Segment Task](../tasks/segment.md/#models) for more information. +5. Learn more about [Working with Results](../modes/predict.md/#working-with-results) +6. Learn more about [Segmentation Mask Results](../modes/predict.md/#masks) diff --git a/docs/en/guides/kfold-cross-validation.md b/docs/en/guides/kfold-cross-validation.md new file mode 100644 index 0000000..400c420 --- /dev/null +++ b/docs/en/guides/kfold-cross-validation.md @@ -0,0 +1,278 @@ +--- +comments: true +description: An in-depth guide demonstrating the implementation of K-Fold Cross Validation with the Ultralytics ecosystem for object detection datasets, leveraging Python, YOLO, and sklearn. +keywords: K-Fold cross validation, Ultralytics, YOLO detection format, Python, sklearn, object detection +--- + +# K-Fold Cross Validation with Ultralytics + +## Introduction + +This comprehensive guide illustrates the implementation of K-Fold Cross Validation for object detection datasets within the Ultralytics ecosystem. We'll leverage the YOLO detection format and key Python libraries such as sklearn, pandas, and PyYaml to guide you through the necessary setup, the process of generating feature vectors, and the execution of a K-Fold dataset split. + +

+ K-Fold Cross Validation Overview +

+ +Whether your project involves the Fruit Detection dataset or a custom data source, this tutorial aims to help you comprehend and apply K-Fold Cross Validation to bolster the reliability and robustness of your machine learning models. While we're applying `k=5` folds for this tutorial, keep in mind that the optimal number of folds can vary depending on your dataset and the specifics of your project. + +Without further ado, let's dive in! + +## Setup + +- Your annotations should be in the [YOLO detection format](../datasets/detect/index.md). + +- This guide assumes that annotation files are locally available. + +- For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset. + - This dataset contains a total of 8479 images. + - It includes 6 class labels, each with its total instance counts listed below. + +| Class Label | Instance Count | +|:------------|:--------------:| +| Apple | 7049 | +| Grapes | 7202 | +| Pineapple | 1613 | +| Orange | 15549 | +| Banana | 3536 | +| Watermelon | 1976 | + +- Necessary Python packages include: + + - `ultralytics` + - `sklearn` + - `pandas` + - `pyyaml` + +- This tutorial operates with `k=5` folds. However, you should determine the best number of folds for your specific dataset. + +1. Initiate a new Python virtual environment (`venv`) for your project and activate it. Use `pip` (or your preferred package manager) to install: + + - The Ultralytics library: `pip install -U ultralytics`. Alternatively, you can clone the official [repo](https://github.com/ultralytics/ultralytics). + - Scikit-learn, pandas, and PyYAML: `pip install -U scikit-learn pandas pyyaml`. + +2. Verify that your annotations are in the [YOLO detection format](../datasets/detect/index.md). + + - For this tutorial, all annotation files are found in the `Fruit-Detection/labels` directory. + +## Generating Feature Vectors for Object Detection Dataset + +1. Start by creating a new Python file and import the required libraries. + + ```python + import datetime + import shutil + from pathlib import Path + from collections import Counter + + import yaml + import numpy as np + import pandas as pd + from ultralytics import YOLO + from sklearn.model_selection import KFold + ``` + +2. Proceed to retrieve all label files for your dataset. + + ```python + dataset_path = Path('./Fruit-detection') # replace with 'path/to/dataset' for your custom data + labels = sorted(dataset_path.rglob("*labels/*.txt")) # all data in 'labels' + ``` + +3. Now, read the contents of the dataset YAML file and extract the indices of the class labels. + + ```python + yaml_file = 'path/to/data.yaml' # your data YAML with data directories and names dictionary + with open(yaml_file, 'r', encoding="utf8") as y: + classes = yaml.safe_load(y)['names'] + cls_idx = sorted(classes.keys()) + ``` + +4. Initialize an empty `pandas` DataFrame. + + ```python + indx = [l.stem for l in labels] # uses base filename as ID (no extension) + labels_df = pd.DataFrame([], columns=cls_idx, index=indx) + ``` + +5. Count the instances of each class-label present in the annotation files. + + ```python + for label in labels: + lbl_counter = Counter() + + with open(label,'r') as lf: + lines = lf.readlines() + + for l in lines: + # classes for YOLO label uses integer at first position of each line + lbl_counter[int(l.split(' ')[0])] += 1 + + labels_df.loc[label.stem] = lbl_counter + + labels_df = labels_df.fillna(0.0) # replace `nan` values with `0.0` + ``` + +6. The following is a sample view of the populated DataFrame: + + ```pandas + 0 1 2 3 4 5 + '0000a16e4b057580_jpg.rf.00ab48988370f64f5ca8ea4...' 0.0 0.0 0.0 0.0 0.0 7.0 + '0000a16e4b057580_jpg.rf.7e6dce029fb67f01eb19aa7...' 0.0 0.0 0.0 0.0 0.0 7.0 + '0000a16e4b057580_jpg.rf.bc4d31cdcbe229dd022957a...' 0.0 0.0 0.0 0.0 0.0 7.0 + '00020ebf74c4881c_jpg.rf.508192a0a97aa6c4a3b6882...' 0.0 0.0 0.0 1.0 0.0 0.0 + '00020ebf74c4881c_jpg.rf.5af192a2254c8ecc4188a25...' 0.0 0.0 0.0 1.0 0.0 0.0 + ... ... ... ... ... ... ... + 'ff4cd45896de38be_jpg.rf.c4b5e967ca10c7ced3b9e97...' 0.0 0.0 0.0 0.0 0.0 2.0 + 'ff4cd45896de38be_jpg.rf.ea4c1d37d2884b3e3cbce08...' 0.0 0.0 0.0 0.0 0.0 2.0 + 'ff5fd9c3c624b7dc_jpg.rf.bb519feaa36fc4bf630a033...' 1.0 0.0 0.0 0.0 0.0 0.0 + 'ff5fd9c3c624b7dc_jpg.rf.f0751c9c3aa4519ea3c9d6a...' 1.0 0.0 0.0 0.0 0.0 0.0 + 'fffe28b31f2a70d4_jpg.rf.7ea16bd637ba0711c53b540...' 0.0 6.0 0.0 0.0 0.0 0.0 + ``` + +The rows index the label files, each corresponding to an image in your dataset, and the columns correspond to your class-label indices. Each row represents a pseudo feature-vector, with the count of each class-label present in your dataset. This data structure enables the application of K-Fold Cross Validation to an object detection dataset. + +## K-Fold Dataset Split + +1. Now we will use the `KFold` class from `sklearn.model_selection` to generate `k` splits of the dataset. + + - Important: + - Setting `shuffle=True` ensures a randomized distribution of classes in your splits. + - By setting `random_state=M` where `M` is a chosen integer, you can obtain repeatable results. + + ```python + ksplit = 5 + kf = KFold(n_splits=ksplit, shuffle=True, random_state=20) # setting random_state for repeatable results + + kfolds = list(kf.split(labels_df)) + ``` + +2. The dataset has now been split into `k` folds, each having a list of `train` and `val` indices. We will construct a DataFrame to display these results more clearly. + + ```python + folds = [f'split_{n}' for n in range(1, ksplit + 1)] + folds_df = pd.DataFrame(index=indx, columns=folds) + + for idx, (train, val) in enumerate(kfolds, start=1): + folds_df[f'split_{idx}'].loc[labels_df.iloc[train].index] = 'train' + folds_df[f'split_{idx}'].loc[labels_df.iloc[val].index] = 'val' + ``` + +3. Now we will calculate the distribution of class labels for each fold as a ratio of the classes present in `val` to those present in `train`. + + ```python + fold_lbl_distrb = pd.DataFrame(index=folds, columns=cls_idx) + + for n, (train_indices, val_indices) in enumerate(kfolds, start=1): + train_totals = labels_df.iloc[train_indices].sum() + val_totals = labels_df.iloc[val_indices].sum() + + # To avoid division by zero, we add a small value (1E-7) to the denominator + ratio = val_totals / (train_totals + 1E-7) + fold_lbl_distrb.loc[f'split_{n}'] = ratio + ``` + +The ideal scenario is for all class ratios to be reasonably similar for each split and across classes. This, however, will be subject to the specifics of your dataset. + +4. Next, we create the directories and dataset YAML files for each split. + + ```python + supported_extensions = ['.jpg', '.jpeg', '.png'] + + # Initialize an empty list to store image file paths + images = [] + + # Loop through supported extensions and gather image files + for ext in supported_extensions: + images.extend(sorted((dataset_path / 'images').rglob(f"*{ext}"))) + + # Create the necessary directories and dataset YAML files (unchanged) + save_path = Path(dataset_path / f'{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val') + save_path.mkdir(parents=True, exist_ok=True) + ds_yamls = [] + + for split in folds_df.columns: + # Create directories + split_dir = save_path / split + split_dir.mkdir(parents=True, exist_ok=True) + (split_dir / 'train' / 'images').mkdir(parents=True, exist_ok=True) + (split_dir / 'train' / 'labels').mkdir(parents=True, exist_ok=True) + (split_dir / 'val' / 'images').mkdir(parents=True, exist_ok=True) + (split_dir / 'val' / 'labels').mkdir(parents=True, exist_ok=True) + + # Create dataset YAML files + dataset_yaml = split_dir / f'{split}_dataset.yaml' + ds_yamls.append(dataset_yaml) + + with open(dataset_yaml, 'w') as ds_y: + yaml.safe_dump({ + 'path': split_dir.as_posix(), + 'train': 'train', + 'val': 'val', + 'names': classes + }, ds_y) + ``` + +5. Lastly, copy images and labels into the respective directory ('train' or 'val') for each split. + + - __NOTE:__ The time required for this portion of the code will vary based on the size of your dataset and your system hardware. + + ```python + for image, label in zip(images, labels): + for split, k_split in folds_df.loc[image.stem].items(): + # Destination directory + img_to_path = save_path / split / k_split / 'images' + lbl_to_path = save_path / split / k_split / 'labels' + + # Copy image and label files to new directory (SamefileError if file already exists) + shutil.copy(image, img_to_path / image.name) + shutil.copy(label, lbl_to_path / label.name) + ``` + +## Save Records (Optional) + +Optionally, you can save the records of the K-Fold split and label distribution DataFrames as CSV files for future reference. + +```python +folds_df.to_csv(save_path / "kfold_datasplit.csv") +fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv") +``` + +## Train YOLO using K-Fold Data Splits + +1. First, load the YOLO model. + + ```python + weights_path = 'path/to/weights.pt' + model = YOLO(weights_path, task='detect') + ``` + +2. Next, iterate over the dataset YAML files to run training. The results will be saved to a directory specified by the `project` and `name` arguments. By default, this directory is 'exp/runs#' where # is an integer index. + + ```python + results = {} + + # Define your additional arguments here + batch = 16 + project = 'kfold_demo' + epochs = 100 + + for k in range(ksplit): + dataset_yaml = ds_yamls[k] + model.train(data=dataset_yaml,epochs=epochs, batch=batch, project=project) # include any train arguments + results[k] = model.metrics # save output metrics for further analysis + ``` + +## Conclusion + +In this guide, we have explored the process of using K-Fold cross-validation for training the YOLO object detection model. We learned how to split our dataset into K partitions, ensuring a balanced class distribution across the different folds. + +We also explored the procedure for creating report DataFrames to visualize the data splits and label distributions across these splits, providing us a clear insight into the structure of our training and validation sets. + +Optionally, we saved our records for future reference, which could be particularly useful in large-scale projects or when troubleshooting model performance. + +Finally, we implemented the actual model training using each split in a loop, saving our training results for further analysis and comparison. + +This technique of K-Fold cross-validation is a robust way of making the most out of your available data, and it helps to ensure that your model performance is reliable and consistent across different data subsets. This results in a more generalizable and reliable model that is less likely to overfit to specific data patterns. + +Remember that although we used YOLO in this guide, these steps are mostly transferable to other machine learning models. Understanding these steps allows you to apply cross-validation effectively in your own machine learning projects. Happy coding! diff --git a/docs/en/guides/model-deployment-options.md b/docs/en/guides/model-deployment-options.md new file mode 100644 index 0000000..6a4adba --- /dev/null +++ b/docs/en/guides/model-deployment-options.md @@ -0,0 +1,305 @@ +--- +comments: true +description: A guide to help determine which deployment option to choose for your YOLOv8 model, including essential considerations. +keywords: YOLOv8, Deployment, PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, CoreML, TensorFlow, Export +--- + +# Understanding YOLOv8’s Deployment Options + +## Introduction + +*Setting the Scene:* You've come a long way on your journey with YOLOv8. You've diligently collected data, meticulously annotated it, and put in the hours to train and rigorously evaluate your custom YOLOv8 model. Now, it’s time to put your model to work for your specific application, use case, or project. But there's a critical decision that stands before you: how to export and deploy your model effectively. + +This guide walks you through YOLOv8’s deployment options and the essential factors to consider to choose the right option for your project. + +## How to Select the Right Deployment Option for Your YOLOv8 Model + +When it's time to deploy your YOLOv8 model, selecting a suitable export format is very important. As outlined in the [Ultralytics YOLOv8 Modes documentation](../modes/export.md#usage-examples), the model.export() function allows for converting your trained model into a variety of formats tailored to diverse environments and performance requirements. + +The ideal format depends on your model's intended operational context, balancing speed, hardware constraints, and ease of integration. In the following section, we'll take a closer look at each export option, understanding when to choose each one. + +### YOLOv8’s Deployment Options + +Let’s walk through the different YOLOv8 deployment options. For a detailed walkthrough of the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). + +#### PyTorch + +PyTorch is an open-source machine learning library widely used for applications in deep learning and artificial intelligence. It provides a high level of flexibility and speed, which has made it a favorite among researchers and developers. + +- **Performance Benchmarks**: PyTorch is known for its ease of use and flexibility, which may result in a slight trade-off in raw performance when compared to other frameworks that are more specialized and optimized. + +- **Compatibility and Integration**: Offers excellent compatibility with various data science and machine learning libraries in Python. + +- **Community Support and Ecosystem**: One of the most vibrant communities, with extensive resources for learning and troubleshooting. + +- **Case Studies**: Commonly used in research prototypes, many academic papers reference models deployed in PyTorch. + +- **Maintenance and Updates**: Regular updates with active development and support for new features. + +- **Security Considerations**: Regular patches for security issues, but security is largely dependent on the overall environment it’s deployed in. + +- **Hardware Acceleration**: Supports CUDA for GPU acceleration, essential for speeding up model training and inference. + +#### TorchScript + +TorchScript extends PyTorch’s capabilities by allowing the exportation of models to be run in a C++ runtime environment. This makes it suitable for production environments where Python is unavailable. + +- **Performance Benchmarks**: Can offer improved performance over native PyTorch, especially in production environments. + +- **Compatibility and Integration**: Designed for seamless transition from PyTorch to C++ production environments, though some advanced features might not translate perfectly. + +- **Community Support and Ecosystem**: Benefits from PyTorch’s large community but has a narrower scope of specialized developers. + +- **Case Studies**: Widely used in industry settings where Python’s performance overhead is a bottleneck. + +- **Maintenance and Updates**: Maintained alongside PyTorch with consistent updates. + +- **Security Considerations**: Offers improved security by enabling the running of models in environments without full Python installations. + +- **Hardware Acceleration**: Inherits PyTorch’s CUDA support, ensuring efficient GPU utilization. + +#### ONNX + +The Open Neural Network Exchange (ONNX) is a format that allows for model interoperability across different frameworks, which can be critical when deploying to various platforms. + +- **Performance Benchmarks**: ONNX models may experience a variable performance depending on the specific runtime they are deployed on. + +- **Compatibility and Integration**: High interoperability across multiple platforms and hardware due to its framework-agnostic nature. + +- **Community Support and Ecosystem**: Supported by many organizations, leading to a broad ecosystem and a variety of tools for optimization. + +- **Case Studies**: Frequently used to move models between different machine learning frameworks, demonstrating its flexibility. + +- **Maintenance and Updates**: As an open standard, ONNX is regularly updated to support new operations and models. + +- **Security Considerations**: As with any cross-platform tool, it's essential to ensure secure practices in the conversion and deployment pipeline. + +- **Hardware Acceleration**: With ONNX Runtime, models can leverage various hardware optimizations. + +#### OpenVINO + +OpenVINO is an Intel toolkit designed to facilitate the deployment of deep learning models across Intel hardware, enhancing performance and speed. + +- **Performance Benchmarks**: Specifically optimized for Intel CPUs, GPUs, and VPUs, offering significant performance boosts on compatible hardware. + +- **Compatibility and Integration**: Works best within the Intel ecosystem but also supports a range of other platforms. + +- **Community Support and Ecosystem**: Backed by Intel, with a solid user base especially in the computer vision domain. + +- **Case Studies**: Often utilized in IoT and edge computing scenarios where Intel hardware is prevalent. + +- **Maintenance and Updates**: Intel regularly updates OpenVINO to support the latest deep learning models and Intel hardware. + +- **Security Considerations**: Provides robust security features suitable for deployment in sensitive applications. + +- **Hardware Acceleration**: Tailored for acceleration on Intel hardware, leveraging dedicated instruction sets and hardware features. + +For more details on deployment using OpenVINO, refer to the Ultralytics Integration documentation: [Intel OpenVINO Export](../integrations/openvino.md). + +#### TensorRT + +TensorRT is a high-performance deep learning inference optimizer and runtime from NVIDIA, ideal for applications needing speed and efficiency. + +- **Performance Benchmarks**: Delivers top-tier performance on NVIDIA GPUs with support for high-speed inference. + +- **Compatibility and Integration**: Best suited for NVIDIA hardware, with limited support outside this environment. + +- **Community Support and Ecosystem**: Strong support network through NVIDIA’s developer forums and documentation. + +- **Case Studies**: Widely adopted in industries requiring real-time inference on video and image data. + +- **Maintenance and Updates**: NVIDIA maintains TensorRT with frequent updates to enhance performance and support new GPU architectures. + +- **Security Considerations**: Like many NVIDIA products, it has a strong emphasis on security, but specifics depend on the deployment environment. + +- **Hardware Acceleration**: Exclusively designed for NVIDIA GPUs, providing deep optimization and acceleration. + +#### CoreML + +CoreML is Apple’s machine learning framework, optimized for on-device performance in the Apple ecosystem, including iOS, macOS, watchOS, and tvOS. + +- **Performance Benchmarks**: Optimized for on-device performance on Apple hardware with minimal battery usage. + +- **Compatibility and Integration**: Exclusively for Apple's ecosystem, providing a streamlined workflow for iOS and macOS applications. + +- **Community Support and Ecosystem**: Strong support from Apple and a dedicated developer community, with extensive documentation and tools. + +- **Case Studies**: Commonly used in applications that require on-device machine learning capabilities on Apple products. + +- **Maintenance and Updates**: Regularly updated by Apple to support the latest machine learning advancements and Apple hardware. + +- **Security Considerations**: Benefits from Apple's focus on user privacy and data security. + +- **Hardware Acceleration**: Takes full advantage of Apple's neural engine and GPU for accelerated machine learning tasks. + +#### TF SavedModel + +TF SavedModel is TensorFlow’s format for saving and serving machine learning models, particularly suited for scalable server environments. + +- **Performance Benchmarks**: Offers scalable performance in server environments, especially when used with TensorFlow Serving. + +- **Compatibility and Integration**: Wide compatibility across TensorFlow's ecosystem, including cloud and enterprise server deployments. + +- **Community Support and Ecosystem**: Large community support due to TensorFlow's popularity, with a vast array of tools for deployment and optimization. + +- **Case Studies**: Extensively used in production environments for serving deep learning models at scale. + +- **Maintenance and Updates**: Supported by Google and the TensorFlow community, ensuring regular updates and new features. + +- **Security Considerations**: Deployment using TensorFlow Serving includes robust security features for enterprise-grade applications. + +- **Hardware Acceleration**: Supports various hardware accelerations through TensorFlow's backends. + +#### TF GraphDef + +TF GraphDef is a TensorFlow format that represents the model as a graph, which is beneficial for environments where a static computation graph is required. + +- **Performance Benchmarks**: Provides stable performance for static computation graphs, with a focus on consistency and reliability. + +- **Compatibility and Integration**: Easily integrates within TensorFlow's infrastructure but less flexible compared to SavedModel. + +- **Community Support and Ecosystem**: Good support from TensorFlow's ecosystem, with many resources available for optimizing static graphs. + +- **Case Studies**: Useful in scenarios where a static graph is necessary, such as in certain embedded systems. + +- **Maintenance and Updates**: Regular updates alongside TensorFlow's core updates. + +- **Security Considerations**: Ensures safe deployment with TensorFlow's established security practices. + +- **Hardware Acceleration**: Can utilize TensorFlow's hardware acceleration options, though not as flexible as SavedModel. + +#### TF Lite + +TF Lite is TensorFlow’s solution for mobile and embedded device machine learning, providing a lightweight library for on-device inference. + +- **Performance Benchmarks**: Designed for speed and efficiency on mobile and embedded devices. + +- **Compatibility and Integration**: Can be used on a wide range of devices due to its lightweight nature. + +- **Community Support and Ecosystem**: Backed by Google, it has a robust community and a growing number of resources for developers. + +- **Case Studies**: Popular in mobile applications that require on-device inference with minimal footprint. + +- **Maintenance and Updates**: Regularly updated to include the latest features and optimizations for mobile devices. + +- **Security Considerations**: Provides a secure environment for running models on end-user devices. + +- **Hardware Acceleration**: Supports a variety of hardware acceleration options, including GPU and DSP. + +#### TF Edge TPU + +TF Edge TPU is designed for high-speed, efficient computing on Google's Edge TPU hardware, perfect for IoT devices requiring real-time processing. + +- **Performance Benchmarks**: Specifically optimized for high-speed, efficient computing on Google's Edge TPU hardware. + +- **Compatibility and Integration**: Works exclusively with TensorFlow Lite models on Edge TPU devices. + +- **Community Support and Ecosystem**: Growing support with resources provided by Google and third-party developers. + +- **Case Studies**: Used in IoT devices and applications that require real-time processing with low latency. + +- **Maintenance and Updates**: Continually improved upon to leverage the capabilities of new Edge TPU hardware releases. + +- **Security Considerations**: Integrates with Google's robust security for IoT and edge devices. + +- **Hardware Acceleration**: Custom-designed to take full advantage of Google Coral devices. + +#### TF.js + +TensorFlow.js (TF.js) is a library that brings machine learning capabilities directly to the browser, offering a new realm of possibilities for web developers and users alike. It allows for the integration of machine learning models in web applications without the need for back-end infrastructure. + +- **Performance Benchmarks**: Enables machine learning directly in the browser with reasonable performance, depending on the client device. + +- **Compatibility and Integration**: High compatibility with web technologies, allowing for easy integration into web applications. + +- **Community Support and Ecosystem**: Support from a community of web and Node.js developers, with a variety of tools for deploying ML models in browsers. + +- **Case Studies**: Ideal for interactive web applications that benefit from client-side machine learning without the need for server-side processing. + +- **Maintenance and Updates**: Maintained by the TensorFlow team with contributions from the open-source community. + +- **Security Considerations**: Runs within the browser's secure context, utilizing the security model of the web platform. + +- **Hardware Acceleration**: Performance can be enhanced with web-based APIs that access hardware acceleration like WebGL. + +#### PaddlePaddle + +PaddlePaddle is an open-source deep learning framework developed by Baidu. It is designed to be both efficient for researchers and easy to use for developers. It's particularly popular in China and offers specialized support for Chinese language processing. + +- **Performance Benchmarks**: Offers competitive performance with a focus on ease of use and scalability. + +- **Compatibility and Integration**: Well-integrated within Baidu's ecosystem and supports a wide range of applications. + +- **Community Support and Ecosystem**: While the community is smaller globally, it's rapidly growing, especially in China. + +- **Case Studies**: Commonly used in Chinese markets and by developers looking for alternatives to other major frameworks. + +- **Maintenance and Updates**: Regularly updated with a focus on serving Chinese language AI applications and services. + +- **Security Considerations**: Emphasizes data privacy and security, catering to Chinese data governance standards. + +- **Hardware Acceleration**: Supports various hardware accelerations, including Baidu's own Kunlun chips. + +#### ncnn + +ncnn is a high-performance neural network inference framework optimized for the mobile platform. It stands out for its lightweight nature and efficiency, making it particularly well-suited for mobile and embedded devices where resources are limited. + +- **Performance Benchmarks**: Highly optimized for mobile platforms, offering efficient inference on ARM-based devices. + +- **Compatibility and Integration**: Suitable for applications on mobile phones and embedded systems with ARM architecture. + +- **Community Support and Ecosystem**: Supported by a niche but active community focused on mobile and embedded ML applications. + +- **Case Studies**: Favoured for mobile applications where efficiency and speed are critical on Android and other ARM-based systems. + +- **Maintenance and Updates**: Continuously improved to maintain high performance on a range of ARM devices. + +- **Security Considerations**: Focuses on running locally on the device, leveraging the inherent security of on-device processing. + +- **Hardware Acceleration**: Tailored for ARM CPUs and GPUs, with specific optimizations for these architectures. + +## Comparative Analysis of YOLOv8 Deployment Options + +The following table provides a snapshot of the various deployment options available for YOLOv8 models, helping you to assess which may best fit your project needs based on several critical criteria. For an in-depth look at each deployment option's format, please see the [Ultralytics documentation page on export formats](../modes/export.md#export-formats). + +| Deployment Option | Performance Benchmarks | Compatibility and Integration | Community Support and Ecosystem | Case Studies | Maintenance and Updates | Security Considerations | Hardware Acceleration | +|-------------------|-------------------------------------------------|------------------------------------------------|-----------------------------------------------|--------------------------------------------|---------------------------------------------|---------------------------------------------------|------------------------------------| +| PyTorch | Good flexibility; may trade off raw performance | Excellent with Python libraries | Extensive resources and community | Research and prototypes | Regular, active development | Dependent on deployment environment | CUDA support for GPU acceleration | +| TorchScript | Better for production than PyTorch | Smooth transition from PyTorch to C++ | Specialized but narrower than PyTorch | Industry where Python is a bottleneck | Consistent updates with PyTorch | Improved security without full Python | Inherits CUDA support from PyTorch | +| ONNX | Variable depending on runtime | High across different frameworks | Broad ecosystem, supported by many orgs | Flexibility across ML frameworks | Regular updates for new operations | Ensure secure conversion and deployment practices | Various hardware optimizations | +| OpenVINO | Optimized for Intel hardware | Best within Intel ecosystem | Solid in computer vision domain | IoT and edge with Intel hardware | Regular updates for Intel hardware | Robust features for sensitive applications | Tailored for Intel hardware | +| TensorRT | Top-tier on NVIDIA GPUs | Best for NVIDIA hardware | Strong network through NVIDIA | Real-time video and image inference | Frequent updates for new GPUs | Emphasis on security | Designed for NVIDIA GPUs | +| CoreML | Optimized for on-device Apple hardware | Exclusive to Apple ecosystem | Strong Apple and developer support | On-device ML on Apple products | Regular Apple updates | Focus on privacy and security | Apple neural engine and GPU | +| TF SavedModel | Scalable in server environments | Wide compatibility in TensorFlow ecosystem | Large support due to TensorFlow popularity | Serving models at scale | Regular updates by Google and community | Robust features for enterprise | Various hardware accelerations | +| TF GraphDef | Stable for static computation graphs | Integrates well with TensorFlow infrastructure | Resources for optimizing static graphs | Scenarios requiring static graphs | Updates alongside TensorFlow core | Established TensorFlow security practices | TensorFlow acceleration options | +| TF Lite | Speed and efficiency on mobile/embedded | Wide range of device support | Robust community, Google backed | Mobile applications with minimal footprint | Latest features for mobile | Secure environment on end-user devices | GPU and DSP among others | +| TF Edge TPU | Optimized for Google's Edge TPU hardware | Exclusive to Edge TPU devices | Growing with Google and third-party resources | IoT devices requiring real-time processing | Improvements for new Edge TPU hardware | Google's robust IoT security | Custom-designed for Google Coral | +| TF.js | Reasonable in-browser performance | High with web technologies | Web and Node.js developers support | Interactive web applications | TensorFlow team and community contributions | Web platform security model | Enhanced with WebGL and other APIs | +| PaddlePaddle | Competitive, easy to use and scalable | Baidu ecosystem, wide application support | Rapidly growing, especially in China | Chinese market and language processing | Focus on Chinese AI applications | Emphasizes data privacy and security | Including Baidu's Kunlun chips | +| ncnn | Optimized for mobile ARM-based devices | Mobile and embedded ARM systems | Niche but active mobile/embedded ML community | Android and ARM systems efficiency | High performance maintenance on ARM | On-device security advantages | ARM CPUs and GPUs optimizations | + +This comparative analysis gives you a high-level overview. For deployment, it's essential to consider the specific requirements and constraints of your project, and consult the detailed documentation and resources available for each option. + +## Community and Support + +When you're getting started with YOLOv8, having a helpful community and support can make a significant impact. Here's how to connect with others who share your interests and get the assistance you need. + +### Engage with the Broader Community + +- **GitHub Discussions:** The YOLOv8 repository on GitHub has a "Discussions" section where you can ask questions, report issues, and suggest improvements. + +- **Ultralytics Discord Server:** Ultralytics has a [Discord server](https://ultralytics.com/discord/) where you can interact with other users and developers. + +### Official Documentation and Resources + +- **Ultralytics YOLOv8 Docs:** The [official documentation](../index.md) provides a comprehensive overview of YOLOv8, along with guides on installation, usage, and troubleshooting. + +These resources will help you tackle challenges and stay updated on the latest trends and best practices in the YOLOv8 community. + +## Conclusion + +In this guide, we've explored the different deployment options for YOLOv8. We've also discussed the important factors to consider when making your choice. These options allow you to customize your model for various environments and performance requirements, making it suitable for real-world applications. + +Don't forget that the YOLOv8 and Ultralytics community is a valuable source of help. Connect with other developers and experts to learn unique tips and solutions you might not find in regular documentation. Keep seeking knowledge, exploring new ideas, and sharing your experiences. + +Happy deploying! diff --git a/docs/en/guides/raspberry-pi.md b/docs/en/guides/raspberry-pi.md new file mode 100644 index 0000000..389db0d --- /dev/null +++ b/docs/en/guides/raspberry-pi.md @@ -0,0 +1,196 @@ +--- +comments: true +description: Quick start guide to setting up YOLO on a Raspberry Pi with a Pi Camera using the libcamera stack. Detailed comparison between Raspberry Pi 3, 4 and 5 models. +keywords: Ultralytics, YOLO, Raspberry Pi, Pi Camera, libcamera, quick start guide, Raspberry Pi 4 vs Raspberry Pi 5, YOLO on Raspberry Pi, hardware setup, machine learning, AI +--- + +# Quick Start Guide: Raspberry Pi and Pi Camera with YOLOv5 and YOLOv8 + +This comprehensive guide aims to expedite your journey with YOLO object detection models on a [Raspberry Pi](https://www.raspberrypi.com/) using a [Pi Camera](https://www.raspberrypi.com/products/camera-module-v2/). Whether you're a student, hobbyist, or a professional, this guide is designed to get you up and running in less than 30 minutes. The instructions here are rigorously tested to minimize setup issues, allowing you to focus on utilizing YOLO for your specific projects. + +

+
+ +
+ Watch: Raspberry Pi 5 updates and improvements. +

+ +## Prerequisites + +- Raspberry Pi 3, 4 or 5 +- Pi Camera +- 64-bit Raspberry Pi Operating System + +Connect the Pi Camera to your Raspberry Pi via a CSI cable and install the 64-bit Raspberry Pi Operating System. Verify your camera with the following command: + +```bash +libcamera-hello +``` + +You should see a video feed from your camera. + +## Choose Your YOLO Version: YOLOv5 or YOLOv8 + +This guide offers you the flexibility to start with either [YOLOv5](https://github.com/ultralytics/yolov5) or [YOLOv8](https://github.com/ultralytics/ultralytics). Both versions have their unique advantages and use-cases. The choice is yours, but remember, the guide's aim is not just quick setup but also a robust foundation for your future work in object detection. + +## Hardware Specifics: At a Glance + +To assist you in making an informed hardware decision, we've summarized the key hardware specifics of Raspberry Pi 3, 4, and 5 in the table below: + +| Feature | Raspberry Pi 3 | Raspberry Pi 4 | Raspberry Pi 5 | +|----------------------------|------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|----------------------------------------------------------------------| +| **CPU** | 1.2GHz Quad-Core ARM Cortex-A53 | 1.5GHz Quad-core 64-bit ARM Cortex-A72 | 2.4GHz Quad-core 64-bit Arm Cortex-A76 | +| **RAM** | 1GB LPDDR2 | 2GB, 4GB or 8GB LPDDR4 | *Details not yet available* | +| **USB Ports** | 4 x USB 2.0 | 2 x USB 2.0, 2 x USB 3.0 | 2 x USB 3.0, 2 x USB 2.0 | +| **Network** | Ethernet & Wi-Fi 802.11n | Gigabit Ethernet & Wi-Fi 802.11ac | Gigabit Ethernet with PoE+ support, Dual-band 802.11ac Wi-Fi® | +| **Performance** | Slower, may require lighter YOLO models | Faster, can run complex YOLO models | *Details not yet available* | +| **Power Requirement** | 2.5A power supply | 3.0A USB-C power supply | *Details not yet available* | +| **Official Documentation** | [Link](https://www.raspberrypi.org/documentation/hardware/raspberrypi/bcm2837/README.md) | [Link](https://www.raspberrypi.org/documentation/hardware/raspberrypi/bcm2711/README.md) | [Link](https://www.raspberrypi.com/news/introducing-raspberry-pi-5/) | + +Please make sure to follow the instructions specific to your Raspberry Pi model to ensure a smooth setup process. + +## Quick Start with YOLOv5 + +This section outlines how to set up YOLOv5 on a Raspberry Pi with a Pi Camera. These steps are designed to be compatible with the libcamera camera stack introduced in Raspberry Pi OS Bullseye. + +### Install Necessary Packages + +1. Update the Raspberry Pi: + + ```bash + sudo apt-get update + sudo apt-get upgrade -y + sudo apt-get autoremove -y + ``` + +2. Clone the YOLOv5 repository: + + ```bash + cd ~ + git clone https://github.com/Ultralytics/yolov5.git + ``` + +3. Install the required dependencies: + + ```bash + cd ~/yolov5 + pip3 install -r requirements.txt + ``` + +4. For Raspberry Pi 3, install compatible versions of PyTorch and Torchvision (skip for Raspberry Pi 4): + + ```bash + pip3 uninstall torch torchvision + pip3 install torch==1.11.0 torchvision==0.12.0 + ``` + +### Modify `detect.py` + +To enable TCP streams via SSH or the CLI, minor modifications are needed in `detect.py`. + +1. Open `detect.py`: + + ```bash + sudo nano ~/yolov5/detect.py + ``` + +2. Find and modify the `is_url` line to accept TCP streams: + + ```python + is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://', 'tcp://')) + ``` + +3. Comment out the `view_img` line: + + ```python + # view_img = check_imshow(warn=True) + ``` + +4. Save and exit: + + ```bash + CTRL + O -> ENTER -> CTRL + X + ``` + +### Initiate TCP Stream with Libcamera + +1. Start the TCP stream: + + ```bash + libcamera-vid -n -t 0 --width 1280 --height 960 --framerate 1 --inline --listen -o tcp://127.0.0.1:8888 + ``` + +Keep this terminal session running for the next steps. + +### Perform YOLOv5 Inference + +1. Run the YOLOv5 detection: + + ```bash + cd ~/yolov5 + python3 detect.py --source=tcp://127.0.0.1:8888 + ``` + +## Quick Start with YOLOv8 + +Follow this section if you are interested in setting up YOLOv8 instead. The steps are quite similar but are tailored for YOLOv8's specific needs. + +### Install Necessary Packages + +1. Update the Raspberry Pi: + + ```bash + sudo apt-get update + sudo apt-get upgrade -y + sudo apt-get autoremove -y + ``` + +2. Install the `ultralytics` Python package: + + ```bash + pip3 install ultralytics + ``` + +3. Reboot: + + ```bash + sudo reboot + ``` + +### Initiate TCP Stream with Libcamera + +1. Start the TCP stream: + + ```bash + libcamera-vid -n -t 0 --width 1280 --height 960 --framerate 1 --inline --listen -o tcp://127.0.0.1:8888 + ``` + +### Perform YOLOv8 Inference + +To perform inference with YOLOv8, you can use the following Python code snippet: + +```python +from ultralytics import YOLO + +model = YOLO('yolov8n.pt') +results = model('tcp://127.0.0.1:8888', stream=True) + +while True: + for result in results: + boxes = result.boxes + probs = result.probs +``` + +## Next Steps + +Congratulations on successfully setting up YOLO on your Raspberry Pi! For further learning and support, visit [Ultralytics](https://ultralytics.com/) and [Kashmir World Foundation](https://www.kashmirworldfoundation.org/). + +## Acknowledgements and Citations + +This guide was initially created by Daan Eeltink for Kashmir World Foundation, an organization dedicated to the use of YOLO for the conservation of endangered species. We acknowledge their pioneering work and educational focus in the realm of object detection technologies. + +For more information about Kashmir World Foundation's activities, you can visit their [website](https://www.kashmirworldfoundation.org/). diff --git a/docs/en/guides/sahi-tiled-inference.md b/docs/en/guides/sahi-tiled-inference.md new file mode 100644 index 0000000..9728703 --- /dev/null +++ b/docs/en/guides/sahi-tiled-inference.md @@ -0,0 +1,185 @@ +--- +comments: true +description: A comprehensive guide on how to use YOLOv8 with SAHI for standard and sliced inference in object detection tasks. +keywords: YOLOv8, SAHI, Sliced Inference, Object Detection, Ultralytics, Large Scale Image Analysis, High-Resolution Imagery +--- + +# Ultralytics Docs: Using YOLOv8 with SAHI for Sliced Inference + +Welcome to the Ultralytics documentation on how to use YOLOv8 with [SAHI](https://github.com/obss/sahi) (Slicing Aided Hyper Inference). This comprehensive guide aims to furnish you with all the essential knowledge you'll need to implement SAHI alongside YOLOv8. We'll deep-dive into what SAHI is, why sliced inference is critical for large-scale applications, and how to integrate these functionalities with YOLOv8 for enhanced object detection performance. + +

+ SAHI Sliced Inference Overview +

+ +## Introduction to SAHI + +SAHI (Slicing Aided Hyper Inference) is an innovative library designed to optimize object detection algorithms for large-scale and high-resolution imagery. Its core functionality lies in partitioning images into manageable slices, running object detection on each slice, and then stitching the results back together. SAHI is compatible with a range of object detection models, including the YOLO series, thereby offering flexibility while ensuring optimized use of computational resources. + +### Key Features of SAHI + +- **Seamless Integration**: SAHI integrates effortlessly with YOLO models, meaning you can start slicing and detecting without a lot of code modification. +- **Resource Efficiency**: By breaking down large images into smaller parts, SAHI optimizes the memory usage, allowing you to run high-quality detection on hardware with limited resources. +- **High Accuracy**: SAHI maintains the detection accuracy by employing smart algorithms to merge overlapping detection boxes during the stitching process. + +## What is Sliced Inference? + +Sliced Inference refers to the practice of subdividing a large or high-resolution image into smaller segments (slices), conducting object detection on these slices, and then recompiling the slices to reconstruct the object locations on the original image. This technique is invaluable in scenarios where computational resources are limited or when working with extremely high-resolution images that could otherwise lead to memory issues. + +### Benefits of Sliced Inference + +- **Reduced Computational Burden**: Smaller image slices are faster to process, and they consume less memory, enabling smoother operation on lower-end hardware. + +- **Preserved Detection Quality**: Since each slice is treated independently, there is no reduction in the quality of object detection, provided the slices are large enough to capture the objects of interest. + +- **Enhanced Scalability**: The technique allows for object detection to be more easily scaled across different sizes and resolutions of images, making it ideal for a wide range of applications from satellite imagery to medical diagnostics. + + + + + + + + + + +
YOLOv8 without SAHIYOLOv8 with SAHI
YOLOv8 without SAHIYOLOv8 with SAHI
+ +## Installation and Preparation + +### Installation + +To get started, install the latest versions of SAHI and Ultralytics: + +```bash +pip install -U ultralytics sahi +``` + +### Import Modules and Download Resources + +Here's how to import the necessary modules and download a YOLOv8 model and some test images: + +```python +from sahi.utils.yolov8 import download_yolov8s_model +from sahi import AutoDetectionModel +from sahi.utils.cv import read_image +from sahi.utils.file import download_from_url +from sahi.predict import get_prediction, get_sliced_prediction, predict +from pathlib import Path +from IPython.display import Image + +# Download YOLOv8 model +yolov8_model_path = "models/yolov8s.pt" +download_yolov8s_model(yolov8_model_path) + +# Download test images +download_from_url('https://raw.githubusercontent.com/obss/sahi/main/demo/demo_data/small-vehicles1.jpeg', 'demo_data/small-vehicles1.jpeg') +download_from_url('https://raw.githubusercontent.com/obss/sahi/main/demo/demo_data/terrain2.png', 'demo_data/terrain2.png') +``` + +## Standard Inference with YOLOv8 + +### Instantiate the Model + +You can instantiate a YOLOv8 model for object detection like this: + +```python +detection_model = AutoDetectionModel.from_pretrained( + model_type='yolov8', + model_path=yolov8_model_path, + confidence_threshold=0.3, + device="cpu", # or 'cuda:0' +) +``` + +### Perform Standard Prediction + +Perform standard inference using an image path or a numpy image. + +```python +# With an image path +result = get_prediction("demo_data/small-vehicles1.jpeg", detection_model) + +# With a numpy image +result = get_prediction(read_image("demo_data/small-vehicles1.jpeg"), detection_model) +``` + +### Visualize Results + +Export and visualize the predicted bounding boxes and masks: + +```python +result.export_visuals(export_dir="demo_data/") +Image("demo_data/prediction_visual.png") +``` + +## Sliced Inference with YOLOv8 + +Perform sliced inference by specifying the slice dimensions and overlap ratios: + +```python +result = get_sliced_prediction( + "demo_data/small-vehicles1.jpeg", + detection_model, + slice_height=256, + slice_width=256, + overlap_height_ratio=0.2, + overlap_width_ratio=0.2 +) +``` + +## Handling Prediction Results + +SAHI provides a `PredictionResult` object, which can be converted into various annotation formats: + +```python +# Access the object prediction list +object_prediction_list = result.object_prediction_list + +# Convert to COCO annotation, COCO prediction, imantics, and fiftyone formats +result.to_coco_annotations()[:3] +result.to_coco_predictions(image_id=1)[:3] +result.to_imantics_annotations()[:3] +result.to_fiftyone_detections()[:3] +``` + +## Batch Prediction + +For batch prediction on a directory of images: + +```python +predict( + model_type="yolov8", + model_path="path/to/yolov8n.pt", + model_device="cpu", # or 'cuda:0' + model_confidence_threshold=0.4, + source="path/to/dir", + slice_height=256, + slice_width=256, + overlap_height_ratio=0.2, + overlap_width_ratio=0.2, +) +``` + +That's it! Now you're equipped to use YOLOv8 with SAHI for both standard and sliced inference. + +## Citations and Acknowledgments + +If you use SAHI in your research or development work, please cite the original SAHI paper and acknowledge the authors: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{akyon2022sahi, + title={Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection}, + author={Akyon, Fatih Cagatay and Altinuc, Sinan Onur and Temizel, Alptekin}, + journal={2022 IEEE International Conference on Image Processing (ICIP)}, + doi={10.1109/ICIP46576.2022.9897990}, + pages={966-970}, + year={2022} + } + ``` + +We extend our thanks to the SAHI research group for creating and maintaining this invaluable resource for the computer vision community. For more information about SAHI and its creators, visit the [SAHI GitHub repository](https://github.com/obss/sahi). diff --git a/docs/en/guides/triton-inference-server.md b/docs/en/guides/triton-inference-server.md new file mode 100644 index 0000000..9ccc88d --- /dev/null +++ b/docs/en/guides/triton-inference-server.md @@ -0,0 +1,137 @@ +--- +comments: true +description: A step-by-step guide on integrating Ultralytics YOLOv8 with Triton Inference Server for scalable and high-performance deep learning inference deployments. +keywords: YOLOv8, Triton Inference Server, ONNX, Deep Learning Deployment, Scalable Inference, Ultralytics, NVIDIA, Object Detection, Cloud Inferencing +--- + +# Triton Inference Server with Ultralytics YOLOv8 + +The [Triton Inference Server](https://developer.nvidia.com/nvidia-triton-inference-server) (formerly known as TensorRT Inference Server) is an open-source software solution developed by NVIDIA. It provides a cloud inferencing solution optimized for NVIDIA GPUs. Triton simplifies the deployment of AI models at scale in production. Integrating Ultralytics YOLOv8 with Triton Inference Server allows you to deploy scalable, high-performance deep learning inference workloads. This guide provides steps to set up and test the integration. + +

+
+ +
+ Watch: Getting Started with NVIDIA Triton Inference Server. +

+ +## What is Triton Inference Server? + +Triton Inference Server is designed to deploy a variety of AI models in production. It supports a wide range of deep learning and machine learning frameworks, including TensorFlow, PyTorch, ONNX Runtime, and many others. Its primary use cases are: + +- Serving multiple models from a single server instance. +- Dynamic model loading and unloading without server restart. +- Ensemble inferencing, allowing multiple models to be used together to achieve results. +- Model versioning for A/B testing and rolling updates. + +## Prerequisites + +Ensure you have the following prerequisites before proceeding: + +- Docker installed on your machine. +- Install `tritonclient`: + ```bash + pip install tritonclient[all] + ``` + +## Exporting YOLOv8 to ONNX Format + +Before deploying the model on Triton, it must be exported to the ONNX format. ONNX (Open Neural Network Exchange) is a format that allows models to be transferred between different deep learning frameworks. Use the `export` function from the `YOLO` class: + +```python +from ultralytics import YOLO + +# Load a model +model = YOLO('yolov8n.pt') # load an official model + +# Export the model +onnx_file = model.export(format='onnx', dynamic=True) +``` + +## Setting Up Triton Model Repository + +The Triton Model Repository is a storage location where Triton can access and load models. + +1. Create the necessary directory structure: + + ```python + from pathlib import Path + + # Define paths + triton_repo_path = Path('tmp') / 'triton_repo' + triton_model_path = triton_repo_path / 'yolo' + + # Create directories + (triton_model_path / '1').mkdir(parents=True, exist_ok=True) + ``` + +2. Move the exported ONNX model to the Triton repository: + + ```python + from pathlib import Path + + # Move ONNX model to Triton Model path + Path(onnx_file).rename(triton_model_path / '1' / 'model.onnx') + + # Create config file + (triton_model_path / 'config.pbtxt').touch() + ``` + +## Running Triton Inference Server + +Run the Triton Inference Server using Docker: + +```python +import subprocess +import time + +from tritonclient.http import InferenceServerClient + +# Define image https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver +tag = 'nvcr.io/nvidia/tritonserver:23.09-py3' # 6.4 GB + +# Pull the image +subprocess.call(f'docker pull {tag}', shell=True) + +# Run the Triton server and capture the container ID +container_id = subprocess.check_output( + f'docker run -d --rm -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models', + shell=True).decode('utf-8').strip() + +# Wait for the Triton server to start +triton_client = InferenceServerClient(url='localhost:8000', verbose=False, ssl=False) + +# Wait until model is ready +for _ in range(10): + with contextlib.suppress(Exception): + assert triton_client.is_model_ready(model_name) + break + time.sleep(1) +``` + +Then run inference using the Triton Server model: + +```python +from ultralytics import YOLO + +# Load the Triton Server model +model = YOLO(f'http://localhost:8000/yolo', task='detect') + +# Run inference on the server +results = model('path/to/image.jpg') +``` + +Cleanup the container: + +```python +# Kill and remove the container at the end of the test +subprocess.call(f'docker kill {container_id}', shell=True) +``` + +--- + +By following the above steps, you can deploy and run Ultralytics YOLOv8 models efficiently on Triton Inference Server, providing a scalable and high-performance solution for deep learning inference tasks. If you face any issues or have further queries, refer to the [official Triton documentation](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/index.html) or reach out to the Ultralytics community for support. diff --git a/docs/en/guides/yolo-common-issues.md b/docs/en/guides/yolo-common-issues.md new file mode 100644 index 0000000..71083cf --- /dev/null +++ b/docs/en/guides/yolo-common-issues.md @@ -0,0 +1,276 @@ +--- +comments: true +description: A comprehensive guide to troubleshooting common issues encountered while working with YOLOv8 in the Ultralytics ecosystem. +keywords: Troubleshooting, Ultralytics, YOLOv8, Installation Errors, Training Data, Model Performance, Hyperparameter Tuning, Deployment +--- + +# Troubleshooting Common YOLO Issues + +

+ YOLO Common Issues Image +

+ +## Introduction + +This guide serves as a comprehensive aid for troubleshooting common issues encountered while working with YOLOv8 on your Ultralytics projects. Navigating through these issues can be a breeze with the right guidance, ensuring your projects remain on track without unnecessary delays. + +## Common Issues + +### Installation Errors + +Installation errors can arise due to various reasons, such as incompatible versions, missing dependencies, or incorrect environment setups. First, check to make sure you are doing the following: + +- You're using Python 3.8 or later as recommended. + +- Ensure that you have the correct version of PyTorch (1.8 or later) installed. + +- Consider using virtual environments to avoid conflicts. + +- Follow the [official installation guide](../quickstart.md) step by step. + +Additionally, here are some common installation issues users have encountered, along with their respective solutions: + +- Import Errors or Dependency Issues - If you're getting errors during the import of YOLOv8, or you're having issues related to dependencies, consider the following troubleshooting steps: + + - **Fresh Installation**: Sometimes, starting with a fresh installation can resolve unexpected issues. Especially with libraries like Ultralytics, where updates might introduce changes to the file tree structure or functionalities. + + - **Update Regularly**: Ensure you're using the latest version of the library. Older versions might not be compatible with recent updates, leading to potential conflicts or issues. + + - **Check Dependencies**: Verify that all required dependencies are correctly installed and are of the compatible versions. + + - **Review Changes**: If you initially cloned or installed an older version, be aware that significant updates might affect the library's structure or functionalities. Always refer to the official documentation or changelogs to understand any major changes. + + - Remember, keeping your libraries and dependencies up-to-date is crucial for a smooth and error-free experience. + +- Running YOLOv8 on GPU - If you're having trouble running YOLOv8 on GPU, consider the following troubleshooting steps: + + - **Verify CUDA Compatibility and Installation**: Ensure your GPU is CUDA compatible and that CUDA is correctly installed. Use the `nvidia-smi` command to check the status of your NVIDIA GPU and CUDA version. + + - **Check PyTorch and CUDA Integration**: Ensure PyTorch can utilize CUDA by running `import torch; print(torch.cuda.is_available())` in a Python terminal. If it returns 'True', PyTorch is set up to use CUDA. + + - **Environment Activation**: Ensure you're in the correct environment where all necessary packages are installed. + + - **Update Your Packages**: Outdated packages might not be compatible with your GPU. Keep them updated. + + - **Program Configuration**: Check if the program or code specifies GPU usage. In YOLOv8, this might be in the settings or configuration. + +### Model Training Issues + +This section will address common issues faced while training and their respective explanations and solutions. + +#### Verification of Configuration Settings + +**Issue**: You are unsure whether the configuration settings in the `.yaml` file are being applied correctly during model training. + +**Solution**: The configuration settings in the `.yaml` file should be applied when using the `model.train()` function. To ensure that these settings are correctly applied, follow these steps: + +- Confirm that the path to your `.yaml` configuration file is correct. +- Make sure you pass the path to your `.yaml` file as the `data` argument when calling `model.train()`, as shown below: + +```python +model.train(data='/path/to/your/data.yaml', batch=4) +``` + +#### Accelerating Training with Multiple GPUs + +**Issue**: Training is slow on a single GPU, and you want to speed up the process using multiple GPUs. + +**Solution**: Increasing the batch size can accelerate training, but it's essential to consider GPU memory capacity. To speed up training with multiple GPUs, follow these steps: + +- Ensure that you have multiple GPUs available. + +- Modify your .yaml configuration file to specify the number of GPUs to use, e.g., gpus: 4. + +- Increase the batch size accordingly to fully utilize the multiple GPUs without exceeding memory limits. + +- Modify your training command to utilize multiple GPUs: + +```python +# Adjust the batch size and other settings as needed to optimize training speed +model.train(data='/path/to/your/data.yaml', batch=32, multi_scale=True) +``` + +#### Continuous Monitoring Parameters + +**Issue**: You want to know which parameters should be continuously monitored during training, apart from loss. + +**Solution**: While loss is a crucial metric to monitor, it's also essential to track other metrics for model performance optimization. Some key metrics to monitor during training include: + +- Precision +- Recall +- Mean Average Precision (mAP) + +You can access these metrics from the training logs or by using tools like TensorBoard or wandb for visualization. Implementing early stopping based on these metrics can help you achieve better results. + +#### Tools for Tracking Training Progress + +**Issue**: You are looking for recommendations on tools to track training progress. + +**Solution**: To track and visualize training progress, you can consider using the following tools: + +- [TensorBoard](https://www.tensorflow.org/tensorboard): TensorBoard is a popular choice for visualizing training metrics, including loss, accuracy, and more. You can integrate it with your YOLOv8 training process. +- [Comet](https://bit.ly/yolov8-readme-comet): Comet provides an extensive toolkit for experiment tracking and comparison. It allows you to track metrics, hyperparameters, and even model weights. Integration with YOLO models is also straightforward, providing you with a complete overview of your experiment cycle. +- [Ultralytics HUB](https://hub.ultralytics.com): Ultralytics HUB offers a specialized environment for tracking YOLO models, giving you a one-stop platform to manage metrics, datasets, and even collaborate with your team. Given its tailored focus on YOLO, it offers more customized tracking options. + +Each of these tools offers its own set of advantages, so you may want to consider the specific needs of your project when making a choice. + +#### How to Check if Training is Happening on the GPU + +**Issue**: The 'device' value in the training logs is 'null,' and you're unsure if training is happening on the GPU. + +**Solution**: The 'device' value being 'null' typically means that the training process is set to automatically use an available GPU, which is the default behavior. To ensure training occurs on a specific GPU, you can manually set the 'device' value to the GPU index (e.g., '0' for the first GPU) in your .yaml configuration file: + +```yaml +device: 0 +``` + +This will explicitly assign the training process to the specified GPU. If you wish to train on the CPU, set 'device' to 'cpu'. + +Keep an eye on the 'runs' folder for logs and metrics to monitor training progress effectively. + +#### Key Considerations for Effective Model Training + +Here are some things to keep in mind, if you are facing issues related to model training. + +**Dataset Format and Labels** + +- Importance: The foundation of any machine learning model lies in the quality and format of the data it is trained on. + +- Recommendation: Ensure that your custom dataset and its associated labels adhere to the expected format. It's crucial to verify that annotations are accurate and of high quality. Incorrect or subpar annotations can derail the model's learning process, leading to unpredictable outcomes. + +**Model Convergence** + +- Importance: Achieving model convergence ensures that the model has sufficiently learned from the training data. + +- Recommendation: When training a model 'from scratch', it's vital to ensure that the model reaches a satisfactory level of convergence. This might necessitate a longer training duration, with more epochs, compared to when you're fine-tuning an existing model. + +**Learning Rate and Batch Size** + +- Importance: These hyperparameters play a pivotal role in determining how the model updates its weights during training. + +- Recommendation: Regularly evaluate if the chosen learning rate and batch size are optimal for your specific dataset. Parameters that are not in harmony with the dataset's characteristics can hinder the model's performance. + +**Class Distribution** + +- Importance: The distribution of classes in your dataset can influence the model's prediction tendencies. + +- Recommendation: Regularly assess the distribution of classes within your dataset. If there's a class imbalance, there's a risk that the model will develop a bias towards the more prevalent class. This bias can be evident in the confusion matrix, where the model might predominantly predict the majority class. + +**Cross-Check with Pretrained Weights** + +- Importance: Leveraging pretrained weights can provide a solid starting point for model training, especially when data is limited. + +- Recommendation: As a diagnostic step, consider training your model using the same data but initializing it with pretrained weights. If this approach yields a well-formed confusion matrix, it could suggest that the 'from scratch' model might require further training or adjustments. + +### Issues Related to Model Predictions + +This section will address common issues faced during model prediction. + +#### Getting Bounding Box Predictions With Your YOLOv8 Custom Model + +**Issue**: When running predictions with a custom YOLOv8 model, there are challenges with the format and visualization of the bounding box coordinates. + +**Solution**: + +- Coordinate Format: YOLOv8 provides bounding box coordinates in absolute pixel values. To convert these to relative coordinates (ranging from 0 to 1), you need to divide by the image dimensions. For example, let’s say your image size is 640x640. Then you would do the following: + +```python +# Convert absolute coordinates to relative coordinates +x1 = x1 / 640 # Divide x-coordinates by image width +x2 = x2 / 640 +y1 = y1 / 640 # Divide y-coordinates by image height +y2 = y2 / 640 +``` + +- File Name: To obtain the file name of the image you're predicting on, access the image file path directly from the result object within your prediction loop. + +#### Filtering Objects in YOLOv8 Predictions + +**Issue**: Facing issues with how to filter and display only specific objects in the prediction results when running YOLOv8 using the Ultralytics library. + +**Solution**: To detect specific classes use the classes argument to specify the classes you want to include in the output. For instance, to detect only cars (assuming 'cars' have class index 2): + +```shell +yolo task=detect mode=segment model=yolov8n-seg.pt source='path/to/car.mp4' show=True classes=2 +``` + +#### Understanding Precision Metrics in YOLOv8 + +**Issue**: Confusion regarding the difference between box precision, mask precision, and confusion matrix precision in YOLOv8. + +**Solution**: Box precision measures the accuracy of predicted bounding boxes compared to the actual ground truth boxes using IoU (Intersection over Union) as the metric. Mask precision assesses the agreement between predicted segmentation masks and ground truth masks in pixel-wise object classification. Confusion matrix precision, on the other hand, focuses on overall classification accuracy across all classes and does not consider the geometric accuracy of predictions. It's important to note that a bounding box can be geometrically accurate (true positive) even if the class prediction is wrong, leading to differences between box precision and confusion matrix precision. These metrics evaluate distinct aspects of a model's performance, reflecting the need for different evaluation metrics in various tasks. + +#### Extracting Object Dimensions in YOLOv8 + +**Issue**: Difficulty in retrieving the length and height of detected objects in YOLOv8, especially when multiple objects are detected in an image. + +**Solution**: To retrieve the bounding box dimensions, first use the Ultralytics YOLOv8 model to predict objects in an image. Then, extract the width and height information of bounding boxes from the prediction results. + +```python +from ultralytics import YOLO + +# Load a pre-trained YOLOv8 model +model = YOLO('yolov8n.pt') + +# Specify the source image +source = 'https://ultralytics.com/images/bus.jpg' + +# Make predictions +results = model.predict(source, save=True, imgsz=320, conf=0.5) + +# Extract bounding box dimensions +boxes = results[0].boxes.xywh.cpu() +for box in boxes: + x, y, w, h = box + print(f"Width of Box: {w}, Height of Box: {h}") +``` + +### Deployment Challenges + +#### GPU Deployment Issues + +**Issue:** Deploying models in a multi-GPU environment can sometimes lead to unexpected behaviors like unexpected memory usage, inconsistent results across GPUs, etc. + +**Solution:** Check for default GPU initialization. Some frameworks, like PyTorch, might initialize CUDA operations on a default GPU before transitioning to the designated GPUs. To bypass unexpected default initializations, specify the GPU directly during deployment and prediction. Then, use tools to monitor GPU utilization and memory usage to identify any anomalies in real-time. Also, ensure you're using the latest version of the framework or library. + +#### Model Conversion/Exporting Issues + +**Issue:** During the process of converting or exporting machine learning models to different formats or platforms, users might encounter errors or unexpected behaviors. + +**Solution:** + +- Compatibility Check: Ensure that you are using versions of libraries and frameworks that are compatible with each other. Mismatched versions can lead to unexpected errors during conversion. + +- Environment Reset: If you're using an interactive environment like Jupyter or Colab, consider restarting your environment after making significant changes or installations. A fresh start can sometimes resolve underlying issues. + +- Official Documentation: Always refer to the official documentation of the tool or library you are using for conversion. It often contains specific guidelines and best practices for model exporting. + +- Community Support: Check the library or framework's official repository for similar issues reported by other users. The maintainers or community might have provided solutions or workarounds in discussion threads. + +- Update Regularly: Ensure that you are using the latest version of the tool or library. Developers frequently release updates that fix known bugs or improve functionality. + +- Test Incrementally: Before performing a full conversion, test the process with a smaller model or dataset to identify potential issues early on. + +## Community and Support + +Engaging with a community of like-minded individuals can significantly enhance your experience and success in working with YOLOv8. Below are some channels and resources you may find helpful. + +### Forums and Channels for Getting Help + +**GitHub Issues:** The YOLOv8 repository on GitHub has an [Issues tab](https://github.com/ultralytics/ultralytics/issues) where you can ask questions, report bugs, and suggest new features. The community and maintainers are active here, and it’s a great place to get help with specific problems. + +**Ultralytics Discord Server:** Ultralytics has a [Discord server](https://ultralytics.com/discord/) where you can interact with other users and the developers. + +### Official Documentation and Resources + +**Ultralytics YOLOv8 Docs**: The [official documentation](../index.md) provides a comprehensive overview of YOLOv8, along with guides on installation, usage, and troubleshooting. + +These resources should provide a solid foundation for troubleshooting and improving your YOLOv8 projects, as well as connecting with others in the YOLOv8 community. + +## Conclusion + +Troubleshooting is an integral part of any development process, and being equipped with the right knowledge can significantly reduce the time and effort spent in resolving issues. This guide aimed to address the most common challenges faced by users of the YOLOv8 model within the Ultralytics ecosystem. By understanding and addressing these common issues, you can ensure smoother project progress and achieve better results with your computer vision tasks. + +Remember, the Ultralytics community is a valuable resource. Engaging with fellow developers and experts can provide additional insights and solutions that might not be covered in standard documentation. Always keep learning, experimenting, and sharing your experiences to contribute to the collective knowledge of the community. + +Happy troubleshooting! diff --git a/docs/en/guides/yolo-performance-metrics.md b/docs/en/guides/yolo-performance-metrics.md new file mode 100644 index 0000000..6a7a5e6 --- /dev/null +++ b/docs/en/guides/yolo-performance-metrics.md @@ -0,0 +1,165 @@ +--- +comments: true +description: A comprehensive guide on various performance metrics related to YOLOv8, their significance, and how to interpret them. +keywords: YOLOv8, Performance metrics, Object detection, Intersection over Union (IoU), Average Precision (AP), Mean Average Precision (mAP), Precision, Recall, Validation mode, Ultralytics +--- + +# Performance Metrics Deep Dive + +## Introduction + +Performance metrics are key tools to evaluate the accuracy and efficiency of object detection models. They shed light on how effectively a model can identify and localize objects within images. Additionally, they help in understanding the model's handling of false positives and false negatives. These insights are crucial for evaluating and enhancing the model's performance. In this guide, we will explore various performance metrics associated with YOLOv8, their significance, and how to interpret them. + +## Object Detection Metrics + +Let’s start by discussing some metrics that are not only important to YOLOv8 but are broadly applicable across different object detection models. + +- **Intersection over Union (IoU):** IoU is a measure that quantifies the overlap between a predicted bounding box and a ground truth bounding box. It plays a fundamental role in evaluating the accuracy of object localization. + +- **Average Precision (AP):** AP computes the area under the precision-recall curve, providing a single value that encapsulates the model's precision and recall performance. + +- **Mean Average Precision (mAP):** mAP extends the concept of AP by calculating the average AP values across multiple object classes. This is useful in multi-class object detection scenarios to provide a comprehensive evaluation of the model's performance. + +- **Precision and Recall:** Precision quantifies the proportion of true positives among all positive predictions, assessing the model's capability to avoid false positives. On the other hand, Recall calculates the proportion of true positives among all actual positives, measuring the model's ability to detect all instances of a class. + +- **F1 Score:** The F1 Score is the harmonic mean of precision and recall, providing a balanced assessment of a model's performance while considering both false positives and false negatives. + +## How to Calculate Metrics for YOLOv8 Model + +Now, we can explore [YOLOv8's Validation mode](../modes/val.md) that can be used to compute the above discussed evaluation metrics. + +Using the validation mode is simple. Once you have a trained model, you can invoke the model.val() function. This function will then process the validation dataset and return a variety of performance metrics. But what do these metrics mean? And how should you interpret them? + +### Interpreting the Output + +Let's break down the output of the model.val() function and understand each segment of the output. + +#### Class-wise Metrics + +One of the sections of the output is the class-wise breakdown of performance metrics. This granular information is useful when you are trying to understand how well the model is doing for each specific class, especially in datasets with a diverse range of object categories. For each class in the dataset the following is provided: + +- **Class**: This denotes the name of the object class, such as "person", "car", or "dog". + +- **Images**: This metric tells you the number of images in the validation set that contain the object class. + +- **Instances**: This provides the count of how many times the class appears across all images in the validation set. + +- **Box(P, R, mAP50, mAP50-95)**: This metric provides insights into the model's performance in detecting objects: + + - **P (Precision)**: The accuracy of the detected objects, indicating how many detections were correct. + + - **R (Recall)**: The ability of the model to identify all instances of objects in the images. + + - **mAP50**: Mean average precision calculated at an intersection over union (IoU) threshold of 0.50. It's a measure of the model's accuracy considering only the "easy" detections. + + - **mAP50-95**: The average of the mean average precision calculated at varying IoU thresholds, ranging from 0.50 to 0.95. It gives a comprehensive view of the model's performance across different levels of detection difficulty. + +#### Speed Metrics + +The speed of inference can be as critical as accuracy, especially in real-time object detection scenarios. This section breaks down the time taken for various stages of the validation process, from preprocessing to post-processing. + +#### COCO Metrics Evaluation + +For users validating on the COCO dataset, additional metrics are calculated using the COCO evaluation script. These metrics give insights into precision and recall at different IoU thresholds and for objects of different sizes. + +#### Visual Outputs + +The model.val() function, apart from producing numeric metrics, also yields visual outputs that can provide a more intuitive understanding of the model's performance. Here's a breakdown of the visual outputs you can expect: + +- **F1 Score Curve (`F1_curve.png`)**: This curve represents the F1 score across various thresholds. Interpreting this curve can offer insights into the model's balance between false positives and false negatives over different thresholds. + +- **Precision-Recall Curve (`PR_curve.png`)**: An integral visualization for any classification problem, this curve showcases the trade-offs between precision and recall at varied thresholds. It becomes especially significant when dealing with imbalanced classes. + +- **Precision Curve (`P_curve.png`)**: A graphical representation of precision values at different thresholds. This curve helps in understanding how precision varies as the threshold changes. + +- **Recall Curve (`R_curve.png`)**: Correspondingly, this graph illustrates how the recall values change across different thresholds. + +- **Confusion Matrix (`confusion_matrix.png`)**: The confusion matrix provides a detailed view of the outcomes, showcasing the counts of true positives, true negatives, false positives, and false negatives for each class. + +- **Normalized Confusion Matrix (`confusion_matrix_normalized.png`)**: This visualization is a normalized version of the confusion matrix. It represents the data in proportions rather than raw counts. This format makes it simpler to compare the performance across classes. + +- **Validation Batch Labels (`val_batchX_labels.jpg`)**: These images depict the ground truth labels for distinct batches from the validation dataset. They provide a clear picture of what the objects are and their respective locations as per the dataset. + +- **Validation Batch Predictions (`val_batchX_pred.jpg`)**: Contrasting the label images, these visuals display the predictions made by the YOLOv8 model for the respective batches. By comparing these to the label images, you can easily assess how well the model detects and classifies objects visually. + +#### Results Storage + +For future reference, the results are saved to a directory, typically named runs/detect/val. + +## Choosing the Right Metrics + +Choosing the right metrics to evaluate often depends on the specific application. + +- **mAP:** Suitable for a broad assessment of model performance. + +- **IoU:** Essential when precise object location is crucial. + +- **Precision:** Important when minimizing false detections is a priority. + +- **Recall:** Vital when it's important to detect every instance of an object. + +- **F1 Score:** Useful when a balance between precision and recall is needed. + +For real-time applications, speed metrics like FPS (Frames Per Second) and latency are crucial to ensure timely results. + +## Interpretation of Results + +It’s important to understand the metrics. Here's what some of the commonly observed lower scores might suggest: + +- **Low mAP:** Indicates the model may need general refinements. + +- **Low IoU:** The model might be struggling to pinpoint objects accurately. Different bounding box methods could help. + +- **Low Precision:** The model may be detecting too many non-existent objects. Adjusting confidence thresholds might reduce this. + +- **Low Recall:** The model could be missing real objects. Improving feature extraction or using more data might help. + +- **Imbalanced F1 Score:** There's a disparity between precision and recall. + +- **Class-specific AP:** Low scores here can highlight classes the model struggles with. + +## Case Studies + +Real-world examples can help clarify how these metrics work in practice. + +### Case 1 + +- **Situation:** mAP and F1 Score are suboptimal, but while Recall is good, Precision isn't. + +- **Interpretation & Action:** There might be too many incorrect detections. Tightening confidence thresholds could reduce these, though it might also slightly decrease recall. + +### Case 2 + +- **Situation:** mAP and Recall are acceptable, but IoU is lacking. + +- **Interpretation & Action:** The model detects objects well but might not be localizing them precisely. Refining bounding box predictions might help. + +### Case 3 + +- **Situation:** Some classes have a much lower AP than others, even with a decent overall mAP. + +- **Interpretation & Action:** These classes might be more challenging for the model. Using more data for these classes or adjusting class weights during training could be beneficial. + +## Connect and Collaborate + +Tapping into a community of enthusiasts and experts can amplify your journey with YOLOv8. Here are some avenues that can facilitate learning, troubleshooting, and networking. + +### Engage with the Broader Community + +- **GitHub Issues:** The YOLOv8 repository on GitHub has an [Issues tab](https://github.com/ultralytics/ultralytics/issues) where you can ask questions, report bugs, and suggest new features. The community and maintainers are active here, and it’s a great place to get help with specific problems. + +- **Ultralytics Discord Server:** Ultralytics has a [Discord server](https://ultralytics.com/discord/) where you can interact with other users and the developers. + +### Official Documentation and Resources: + +- **Ultralytics YOLOv8 Docs:** The [official documentation](../index.md) provides a comprehensive overview of YOLOv8, along with guides on installation, usage, and troubleshooting. + +Using these resources will not only guide you through any challenges but also keep you updated with the latest trends and best practices in the YOLOv8 community. + +## Conclusion + +In this guide, we've taken a close look at the essential performance metrics for YOLOv8. These metrics are key to understanding how well a model is performing and are vital for anyone aiming to fine-tune their models. They offer the necessary insights for improvements and to make sure the model works effectively in real-life situations. + +Remember, the YOLOv8 and Ultralytics community is an invaluable asset. Engaging with fellow developers and experts can open doors to insights and solutions not found in standard documentation. As you journey through object detection, keep the spirit of learning alive, experiment with new strategies, and share your findings. By doing so, you contribute to the community's collective wisdom and ensure its growth. + +Happy object detecting! diff --git a/docs/en/guides/yolo-thread-safe-inference.md b/docs/en/guides/yolo-thread-safe-inference.md new file mode 100644 index 0000000..abf7a36 --- /dev/null +++ b/docs/en/guides/yolo-thread-safe-inference.md @@ -0,0 +1,108 @@ +--- +comments: true +description: This guide provides best practices for performing thread-safe inference with YOLO models, ensuring reliable and concurrent predictions in multi-threaded applications. +keywords: thread-safe, YOLO inference, multi-threading, concurrent predictions, YOLO models, Ultralytics, Python threading, safe YOLO usage, AI concurrency +--- + +# Thread-Safe Inference with YOLO Models + +Running YOLO models in a multi-threaded environment requires careful consideration to ensure thread safety. Python's `threading` module allows you to run several threads concurrently, but when it comes to using YOLO models across these threads, there are important safety issues to be aware of. This page will guide you through creating thread-safe YOLO model inference. + +## Understanding Python Threading + +Python threads are a form of parallelism that allow your program to run multiple operations at once. However, Python's Global Interpreter Lock (GIL) means that only one thread can execute Python bytecode at a time. + +

+ Single vs Multi-Thread Examples +

+ +While this sounds like a limitation, threads can still provide concurrency, especially for I/O-bound operations or when using operations that release the GIL, like those performed by YOLO's underlying C libraries. + +## The Danger of Shared Model Instances + +Instantiating a YOLO model outside your threads and sharing this instance across multiple threads can lead to race conditions, where the internal state of the model is inconsistently modified due to concurrent accesses. This is particularly problematic when the model or its components hold state that is not designed to be thread-safe. + +### Non-Thread-Safe Example: Single Model Instance + +When using threads in Python, it's important to recognize patterns that can lead to concurrency issues. Here is what you should avoid: sharing a single YOLO model instance across multiple threads. + +```python +# Unsafe: Sharing a single model instance across threads +from ultralytics import YOLO +from threading import Thread + +# Instantiate the model outside the thread +shared_model = YOLO("yolov8n.pt") + + +def predict(image_path): + results = shared_model.predict(image_path) + # Process results + + +# Starting threads that share the same model instance +Thread(target=predict, args=("image1.jpg",)).start() +Thread(target=predict, args=("image2.jpg",)).start() +``` + +In the example above, the `shared_model` is used by multiple threads, which can lead to unpredictable results because `predict` could be executed simultaneously by multiple threads. + +### Non-Thread-Safe Example: Multiple Model Instances + +Similarly, here is an unsafe pattern with multiple YOLO model instances: + +```python +# Unsafe: Sharing multiple model instances across threads can still lead to issues +from ultralytics import YOLO +from threading import Thread + +# Instantiate multiple models outside the thread +shared_model_1 = YOLO("yolov8n_1.pt") +shared_model_2 = YOLO("yolov8n_2.pt") + + +def predict(model, image_path): + results = model.predict(image_path) + # Process results + + +# Starting threads with individual model instances +Thread(target=predict, args=(shared_model_1, "image1.jpg")).start() +Thread(target=predict, args=(shared_model_2, "image2.jpg")).start() +``` + +Even though there are two separate model instances, the risk of concurrency issues still exists. If the internal implementation of `YOLO` is not thread-safe, using separate instances might not prevent race conditions, especially if these instances share any underlying resources or states that are not thread-local. + +## Thread-Safe Inference + +To perform thread-safe inference, you should instantiate a separate YOLO model within each thread. This ensures that each thread has its own isolated model instance, eliminating the risk of race conditions. + +### Thread-Safe Example + +Here's how to instantiate a YOLO model inside each thread for safe parallel inference: + +```python +# Safe: Instantiating a single model inside each thread +from ultralytics import YOLO +from threading import Thread + + +def thread_safe_predict(image_path): + # Instantiate a new model inside the thread + local_model = YOLO("yolov8n.pt") + results = local_model.predict(image_path) + # Process results + + +# Starting threads that each have their own model instance +Thread(target=thread_safe_predict, args=("image1.jpg",)).start() +Thread(target=thread_safe_predict, args=("image2.jpg",)).start() +``` + +In this example, each thread creates its own `YOLO` instance. This prevents any thread from interfering with the model state of another, thus ensuring that each thread performs inference safely and without unexpected interactions with the other threads. + +## Conclusion + +When using YOLO models with Python's `threading`, always instantiate your models within the thread that will use them to ensure thread safety. This practice avoids race conditions and makes sure that your inference tasks run reliably. + +For more advanced scenarios and to further optimize your multi-threaded inference performance, consider using process-based parallelism with `multiprocessing` or leveraging a task queue with dedicated worker processes. diff --git a/docs/en/help/CI.md b/docs/en/help/CI.md new file mode 100644 index 0000000..60cd01e --- /dev/null +++ b/docs/en/help/CI.md @@ -0,0 +1,61 @@ +--- +comments: true +description: Learn how Ultralytics leverages Continuous Integration (CI) for maintaining high-quality code. Explore our CI tests and the status of these tests for our repositories. +keywords: continuous integration, software development, CI tests, Ultralytics repositories, high-quality code, Docker Deployment, Broken Links, CodeQL, PyPi Publishing +--- + +# Continuous Integration (CI) + +Continuous Integration (CI) is an essential aspect of software development which involves integrating changes and testing them automatically. CI allows us to maintain high-quality code by catching issues early and often in the development process. At Ultralytics, we use various CI tests to ensure the quality and integrity of our codebase. + +## CI Actions + +Here's a brief description of our CI actions: + +- **CI:** This is our primary CI test that involves running unit tests, linting checks, and sometimes more comprehensive tests depending on the repository. +- **Docker Deployment:** This test checks the deployment of the project using Docker to ensure the Dockerfile and related scripts are working correctly. +- **Broken Links:** This test scans the codebase for any broken or dead links in our markdown or HTML files. +- **CodeQL:** CodeQL is a tool from GitHub that performs semantic analysis on our code, helping to find potential security vulnerabilities and maintain high-quality code. +- **PyPi Publishing:** This test checks if the project can be packaged and published to PyPi without any errors. + +### CI Results + +Below is the table showing the status of these CI tests for our main repositories: + +| Repository | CI | Docker Deployment | Broken Links | CodeQL | PyPi and Docs Publishing | +|-----------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [yolov3](https://github.com/ultralytics/yolov3) | [![YOLOv3 CI](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov3/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml) | | +| [yolov5](https://github.com/ultralytics/yolov5) | [![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov5/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml) | | +| [ultralytics](https://github.com/ultralytics/ultralytics) | [![ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml) | [![Publish Docker Images](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml) | [![Check Broken links](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) | [![Publish to PyPI and Deploy Docs](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) | +| [hub](https://github.com/ultralytics/hub) | [![HUB CI](https://github.com/ultralytics/hub/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/ci.yaml) | | [![Check Broken links](https://github.com/ultralytics/hub/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/links.yml) | | | +| [docs](https://github.com/ultralytics/docs) | | | [![Check Broken links](https://github.com/ultralytics/docs/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/links.yml) | | [![pages-build-deployment](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment) | + +Each badge shows the status of the last run of the corresponding CI test on the `main` branch of the respective repository. If a test fails, the badge will display a "failing" status, and if it passes, it will display a "passing" status. + +If you notice a test failing, it would be a great help if you could report it through a GitHub issue in the respective repository. + +Remember, a successful CI test does not mean that everything is perfect. It is always recommended to manually review the code before deployment or merging changes. + +## Code Coverage + +Code coverage is a metric that represents the percentage of your codebase that is executed when your tests run. It provides insight into how well your tests exercise your code and can be crucial in identifying untested parts of your application. A high code coverage percentage is often associated with a lower likelihood of bugs. However, it's essential to understand that code coverage doesn't guarantee the absence of defects. It merely indicates which parts of the code have been executed by the tests. + +### Integration with [codecov.io](https://codecov.io/) + +At Ultralytics, we have integrated our repositories with [codecov.io](https://codecov.io/), a popular online platform for measuring and visualizing code coverage. Codecov provides detailed insights, coverage comparisons between commits, and visual overlays directly on your code, indicating which lines were covered. + +By integrating with Codecov, we aim to maintain and improve the quality of our code by focusing on areas that might be prone to errors or need further testing. + +### Coverage Results + +To quickly get a glimpse of the code coverage status of the `ultralytics` python package, we have included a badge and and sunburst visual of the `ultralytics` coverage results. These images show the percentage of code covered by our tests, offering an at-a-glance metric of our testing efforts. For full details please see https://codecov.io/github/ultralytics/ultralytics. + +| Repository | Code Coverage | +|-----------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------| +| [ultralytics](https://github.com/ultralytics/ultralytics) | [![codecov](https://codecov.io/gh/ultralytics/ultralytics/branch/main/graph/badge.svg?token=HHW7IIVFVY)](https://codecov.io/gh/ultralytics/ultralytics) | + +In the sunburst graphic below, the inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively. + + + Ultralytics Codecov Image + diff --git a/docs/en/help/CLA.md b/docs/en/help/CLA.md new file mode 100644 index 0000000..b33b488 --- /dev/null +++ b/docs/en/help/CLA.md @@ -0,0 +1,31 @@ +--- +description: Understand terms governing contributions to Ultralytics projects including source code, bug fixes, documentation and more. Read our Contributor License Agreement. +keywords: Ultralytics, Contributor License Agreement, Open Source Software, Contributions, Copyright License, Patent License, Moral Rights +--- + +# Ultralytics Individual Contributor License Agreement + +Thank you for your interest in contributing to open source software projects (“Projects”) made available by Ultralytics SE or its affiliates (“Ultralytics”). This Individual Contributor License Agreement (“Agreement”) sets out the terms governing any source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or other works of authorship that you submit or have submitted, in any form and in any manner, to Ultralytics in respect of any of the Projects (collectively “Contributions”). If you have any questions respecting this Agreement, please contact hello@ultralytics.com. + +You agree that the following terms apply to all of your past, present and future Contributions. Except for the licenses granted in this Agreement, you retain all of your right, title and interest in and to your Contributions. + +**Copyright License.** You hereby grant, and agree to grant, to Ultralytics a non-exclusive, perpetual, irrevocable, worldwide, fully-paid, royalty-free, transferable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, and distribute your Contributions and such derivative works, with the right to sublicense the foregoing rights through multiple tiers of sublicensees. + +**Patent License.** You hereby grant, and agree to grant, to Ultralytics a non-exclusive, perpetual, irrevocable, worldwide, fully-paid, royalty-free, transferable patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer your Contributions, where such license applies only to those patent claims licensable by you that are necessarily infringed by your Contributions alone or by combination of your Contributions with the Project to which such Contributions were submitted, with the right to sublicense the foregoing rights through multiple tiers of sublicensees. + +**Moral Rights.** To the fullest extent permitted under applicable law, you hereby waive, and agree not to assert, all of your “moral rights” in or relating to your Contributions for the benefit of Ultralytics, its assigns, and their respective direct and indirect sublicensees. + +**Third Party Content/Rights. +** If your Contribution includes or is based on any source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or other works of authorship that were not authored by you (“Third Party Content”) or if you are aware of any third party intellectual property or proprietary rights associated with your Contribution (“Third Party Rights”), then you agree to include with the submission of your Contribution full details respecting such Third Party Content and Third Party Rights, including, without limitation, identification of which aspects of your Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights do not apply to any portion of a Project that is incorporated into your Contribution to that same Project. + +**Representations.** You represent that, other than the Third Party Content and Third Party Rights identified by you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were created in the course of your employment with your past or present employer(s), you represent that such employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer +(s) has waived all of their right, title or interest in or to your Contributions. + +**Disclaimer.** To the fullest extent permitted under applicable law, your Contributions are provided on an "asis" +basis, without any warranties or conditions, express or implied, including, without limitation, any implied warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not required to provide support for your Contributions, except to the extent you desire to provide support. + +**No Obligation.** You acknowledge that Ultralytics is under no obligation to use or incorporate your Contributions into any of the Projects. The decision to use or incorporate your Contributions into any of the Projects will be made at the sole discretion of Ultralytics or its authorized delegates .. + +**Disputes.** This Agreement shall be governed by and construed in accordance with the laws of the State of New York, United States of America, without giving effect to its principles or rules regarding conflicts of laws, other than such principles directing application of New York law. The parties hereby submit to venue in, and jurisdiction of the courts located in New York, New York for purposes relating to this Agreement. In the event that any of the provisions of this Agreement shall be held by a court or other tribunal of competent jurisdiction to be unenforceable, the remaining portions hereof shall remain in full force and effect. + +**Assignment.** You agree that Ultralytics may assign this Agreement, and all of its rights, obligations and licenses hereunder. diff --git a/docs/en/help/FAQ.md b/docs/en/help/FAQ.md new file mode 100644 index 0000000..8e4430a --- /dev/null +++ b/docs/en/help/FAQ.md @@ -0,0 +1,39 @@ +--- +comments: true +description: Find solutions to your common Ultralytics YOLO related queries. Learn about hardware requirements, fine-tuning YOLO models, conversion to ONNX/TensorFlow, and more. +keywords: Ultralytics, YOLO, FAQ, hardware requirements, ONNX, TensorFlow, real-time detection, YOLO accuracy +--- + +# Ultralytics YOLO Frequently Asked Questions (FAQ) + +This FAQ section addresses some common questions and issues users might encounter while working with Ultralytics YOLO repositories. + +## 1. What are the hardware requirements for running Ultralytics YOLO? + +Ultralytics YOLO can be run on a variety of hardware configurations, including CPUs, GPUs, and even some edge devices. However, for optimal performance and faster training and inference, we recommend using a GPU with a minimum of 8GB of memory. NVIDIA GPUs with CUDA support are ideal for this purpose. + +## 2. How do I fine-tune a pre-trained YOLO model on my custom dataset? + +To fine-tune a pre-trained YOLO model on your custom dataset, you'll need to create a dataset configuration file (YAML) that defines the dataset's properties, such as the path to the images, the number of classes, and class names. Next, you'll need to modify the model configuration file to match the number of classes in your dataset. Finally, use the `train.py` script to start the training process with your custom dataset and the pre-trained model. You can find a detailed guide on fine-tuning YOLO in the Ultralytics documentation. + +## 3. How do I convert a YOLO model to ONNX or TensorFlow format? + +Ultralytics provides built-in support for converting YOLO models to ONNX format. You can use the `export.py` script to convert a saved model to ONNX format. If you need to convert the model to TensorFlow format, you can use the ONNX model as an intermediary and then use the ONNX-TensorFlow converter to convert the ONNX model to TensorFlow format. + +## 4. Can I use Ultralytics YOLO for real-time object detection? + +Yes, Ultralytics YOLO is designed to be efficient and fast, making it suitable for real-time object detection tasks. The actual performance will depend on your hardware configuration and the complexity of the model. Using a GPU and optimizing the model for your specific use case can help achieve real-time performance. + +## 5. How can I improve the accuracy of my YOLO model? + +Improving the accuracy of a YOLO model may involve several strategies, such as: + +- Fine-tuning the model on more annotated data +- Data augmentation to increase the variety of training samples +- Using a larger or more complex model architecture +- Adjusting the learning rate, batch size, and other hyperparameters +- Using techniques like transfer learning or knowledge distillation + +Remember that there's often a trade-off between accuracy and inference speed, so finding the right balance is crucial for your specific application. + +If you have any more questions or need assistance, don't hesitate to consult the Ultralytics documentation or reach out to the community through GitHub Issues or the official discussion forum. diff --git a/docs/en/help/code_of_conduct.md b/docs/en/help/code_of_conduct.md new file mode 100644 index 0000000..c8c7cdc --- /dev/null +++ b/docs/en/help/code_of_conduct.md @@ -0,0 +1,88 @@ +--- +comments: true +description: Explore Ultralytics community’s Code of Conduct, ensuring a supportive, inclusive environment for contributors & members at all levels. Find our guidelines on acceptable behavior & enforcement. +keywords: Ultralytics, code of conduct, community, contribution, behavior guidelines, enforcement, open source contributions +--- + +# Ultralytics Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our community include: + +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience +- Focusing on what is best not just for us as individuals, but for the overall community + +Examples of unacceptable behavior include: + +- The use of sexualized language or imagery, and sexual attention or advances of any kind +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at hello@ultralytics.com. All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of actions. + +**Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. + +[homepage]: https://www.contributor-covenant.org diff --git a/docs/en/help/contributing.md b/docs/en/help/contributing.md new file mode 100644 index 0000000..b798e9c --- /dev/null +++ b/docs/en/help/contributing.md @@ -0,0 +1,76 @@ +--- +comments: true +description: Learn how to contribute to Ultralytics YOLO projects – guidelines for pull requests, reporting bugs, code conduct and CLA signing. +keywords: Ultralytics, YOLO, open-source, contribute, pull request, bug report, coding guidelines, CLA, code of conduct, GitHub +--- + +# Contributing to Ultralytics Open-Source YOLO Repositories + +First of all, thank you for your interest in contributing to Ultralytics open-source YOLO repositories! Your contributions will help improve the project and benefit the community. This document provides guidelines and best practices to get you started. + +## Table of Contents + +- [Code of Conduct](#code-of-conduct) +- [Pull Requests](#pull-requests) + - [CLA Signing](#cla-signing) + - [Google-Style Docstrings](#google-style-docstrings) + - [GitHub Actions CI Tests](#github-actions-ci-tests) +- [Bug Reports](#bug-reports) + - [Minimum Reproducible Example](#minimum-reproducible-example) +- [License and Copyright](#license-and-copyright) + +## Code of Conduct + +All contributors are expected to adhere to the [Code of Conduct](code_of_conduct.md) to ensure a welcoming and inclusive environment for everyone. + +## Pull Requests + +We welcome contributions in the form of pull requests. To make the review process smoother, please follow these guidelines: + +1. **Fork the repository**: Fork the Ultralytics YOLO repository to your own GitHub account. + +2. **Create a branch**: Create a new branch in your forked repository with a descriptive name for your changes. + +3. **Make your changes**: Make the changes you want to contribute. Ensure that your changes follow the coding style of the project and do not introduce new errors or warnings. + +4. **Test your changes**: Test your changes locally to ensure that they work as expected and do not introduce new issues. + +5. **Commit your changes**: Commit your changes with a descriptive commit message. Make sure to include any relevant issue numbers in your commit message. + +6. **Create a pull request**: Create a pull request from your forked repository to the main Ultralytics YOLO repository. In the pull request description, provide a clear explanation of your changes and how they improve the project. + +### CLA Signing + +Before we can accept your pull request, you need to sign a [Contributor License Agreement (CLA)](CLA.md). This is a legal document stating that you agree to the terms of contributing to the Ultralytics YOLO repositories. The CLA ensures that your contributions are properly licensed and that the project can continue to be distributed under the AGPL-3.0 license. + +To sign the CLA, follow the instructions provided by the CLA bot after you submit your PR. + +### Google-Style Docstrings + +When adding new functions or classes, please include a [Google-style docstring](https://google.github.io/styleguide/pyguide.html) to provide clear and concise documentation for other developers. This will help ensure that your contributions are easy to understand and maintain. + +Example Google-style docstring: + +```python +def example_function(arg1: int, arg2: int) -> bool: + """ + Example function that demonstrates Google-style docstrings. + + Args: + arg1 (int): The first argument. + arg2 (int): The second argument. + + Returns: + (bool): True if successful, False otherwise. + + Examples: + >>> result = example_function(1, 2) # returns False + """ + if arg1 == arg2: + return True + return False +``` + +### GitHub Actions CI Tests + +Before your pull request can be merged, all GitHub Actions Continuous Integration (CI) tests must pass. These tests include linting, unit tests, and other checks to ensure that your changes meet the quality standards of the project. Make sure to review the output of the GitHub Actions and fix any issues diff --git a/docs/en/help/environmental-health-safety.md b/docs/en/help/environmental-health-safety.md new file mode 100644 index 0000000..9fee240 --- /dev/null +++ b/docs/en/help/environmental-health-safety.md @@ -0,0 +1,37 @@ +--- +comments: false +description: Discover Ultralytics’ EHS policy principles and implementation measures. Committed to safety, environment, and continuous improvement for a sustainable future. +keywords: Ultralytics policy, EHS, environment, health and safety, compliance, prevention, continuous improvement, risk management, emergency preparedness, resource allocation, communication +--- + +# Ultralytics Environmental, Health and Safety (EHS) Policy + +At Ultralytics, we recognize that the long-term success of our company relies not only on the products and services we offer, but also the manner in which we conduct our business. We are committed to ensuring the safety and well-being of our employees, stakeholders, and the environment, and we will continuously strive to mitigate our impact on the environment while promoting health and safety. + +## Policy Principles + +1. **Compliance**: We will comply with all applicable laws, regulations, and standards related to EHS, and we will strive to exceed these standards where possible. + +2. **Prevention**: We will work to prevent accidents, injuries, and environmental harm by implementing risk management measures and ensuring all our operations and procedures are safe. + +3. **Continuous Improvement**: We will continuously improve our EHS performance by setting measurable objectives, monitoring our performance, auditing our operations, and revising our policies and procedures as needed. + +4. **Communication**: We will communicate openly about our EHS performance and will engage with stakeholders to understand and address their concerns and expectations. + +5. **Education and Training**: We will educate and train our employees and contractors in appropriate EHS procedures and practices. + +## Implementation Measures + +1. **Responsibility and Accountability**: Every employee and contractor working at or with Ultralytics is responsible for adhering to this policy. Managers and supervisors are accountable for ensuring this policy is implemented within their areas of control. + +2. **Risk Management**: We will identify, assess, and manage EHS risks associated with our operations and activities to prevent accidents, injuries, and environmental harm. + +3. **Resource Allocation**: We will allocate the necessary resources to ensure the effective implementation of our EHS policy, including the necessary equipment, personnel, and training. + +4. **Emergency Preparedness and Response**: We will develop, maintain, and test emergency preparedness and response plans to ensure we can respond effectively to EHS incidents. + +5. **Monitoring and Review**: We will monitor and review our EHS performance regularly to identify opportunities for improvement and ensure we are meeting our objectives. + +This policy reflects our commitment to minimizing our environmental footprint, ensuring the safety and well-being of our employees, and continuously improving our performance. + +Please remember that the implementation of an effective EHS policy requires the involvement and commitment of everyone working at or with Ultralytics. We encourage you to take personal responsibility for your safety and the safety of others, and to take care of the environment in which we live and work. diff --git a/docs/en/help/index.md b/docs/en/help/index.md new file mode 100644 index 0000000..e8f7376 --- /dev/null +++ b/docs/en/help/index.md @@ -0,0 +1,19 @@ +--- +comments: true +description: Find comprehensive guides and documents on Ultralytics YOLO tasks. Includes FAQs, contributing guides, CI guide, CLA, MRE guide, code of conduct & more. +keywords: Ultralytics, YOLO, guides, documents, FAQ, contributing, CI guide, CLA, MRE guide, code of conduct, EHS policy, security policy, privacy policy +--- + +Welcome to the Ultralytics Help page! We are dedicated to providing you with detailed resources to enhance your experience with the Ultralytics YOLO models and repositories. This page serves as your portal to guides and documentation designed to assist you with various tasks and answer questions you may encounter while engaging with our repositories. + +- [Frequently Asked Questions (FAQ)](FAQ.md): Find answers to common questions and issues encountered by the community of Ultralytics YOLO users and contributors. +- [Contributing Guide](contributing.md): Discover the protocols for making contributions, including how to submit pull requests, report bugs, and more. +- [Continuous Integration (CI) Guide](CI.md): Gain insights into the CI processes we employ, complete with status reports for each Ultralytics repository. +- [Contributor License Agreement (CLA)](CLA.md): Review the CLA to understand the rights and responsibilities associated with contributing to Ultralytics projects. +- [Minimum Reproducible Example (MRE) Guide](minimum_reproducible_example.md): Learn the process for creating an MRE, which is crucial for the timely and effective resolution of bug reports. +- [Code of Conduct](code_of_conduct.md): Our community guidelines support a respectful and open atmosphere for all collaborators. +- [Environmental, Health and Safety (EHS) Policy](environmental-health-safety.md): Delve into our commitment to sustainability and the well-being of all our stakeholders. +- [Security Policy](security.md): Familiarize yourself with our security protocols and the procedure for reporting vulnerabilities. +- [Privacy Policy](privacy.md): Read our privacy policy to understand how we protect your data and respect your privacy in all our services and operations. + +We encourage you to review these resources for a seamless and productive experience. Our aim is to foster a helpful and friendly environment for everyone in the Ultralytics community. Should you require additional support, please feel free to reach out via GitHub Issues or our official discussion forums. Happy coding! diff --git a/docs/en/help/minimum_reproducible_example.md b/docs/en/help/minimum_reproducible_example.md new file mode 100644 index 0000000..47a0cdf --- /dev/null +++ b/docs/en/help/minimum_reproducible_example.md @@ -0,0 +1,78 @@ +--- +comments: true +description: Learn how to create minimum reproducible examples (MRE) for efficient bug reporting in Ultralytics YOLO repositories with this step-by-step guide. +keywords: Ultralytics, YOLO, minimum reproducible example, MRE, bug reports, guide, dependencies, code, troubleshooting +--- + +# Creating a Minimum Reproducible Example for Bug Reports in Ultralytics YOLO Repositories + +When submitting a bug report for Ultralytics YOLO repositories, it's essential to provide a [minimum reproducible example](https://docs.ultralytics.com/help/minimum_reproducible_example/) (MRE). An MRE is a small, self-contained piece of code that demonstrates the problem you're experiencing. Providing an MRE helps maintainers and contributors understand the issue and work on a fix more efficiently. This guide explains how to create an MRE when submitting bug reports to Ultralytics YOLO repositories. + +## 1. Isolate the Problem + +The first step in creating an MRE is to isolate the problem. This means removing any unnecessary code or dependencies that are not directly related to the issue. Focus on the specific part of the code that is causing the problem and remove any irrelevant code. + +## 2. Use Public Models and Datasets + +When creating an MRE, use publicly available models and datasets to reproduce the issue. For example, use the 'yolov8n.pt' model and the 'coco8.yaml' dataset. This ensures that the maintainers and contributors can easily run your example and investigate the problem without needing access to proprietary data or custom models. + +## 3. Include All Necessary Dependencies + +Make sure to include all the necessary dependencies in your MRE. If your code relies on external libraries, specify the required packages and their versions. Ideally, provide a `requirements.txt` file or list the dependencies in your bug report. + +## 4. Write a Clear Description of the Issue + +Provide a clear and concise description of the issue you're experiencing. Explain the expected behavior and the actual behavior you're encountering. If applicable, include any relevant error messages or logs. + +## 5. Format Your Code Properly + +When submitting an MRE, format your code properly using code blocks in the issue description. This makes it easier for others to read and understand your code. In GitHub, you can create a code block by wrapping your code with triple backticks (\```) and specifying the language: + +
+```python
+# Your Python code goes here
+```
+
+ +## 6. Test Your MRE + +Before submitting your MRE, test it to ensure that it accurately reproduces the issue. Make sure that others can run your example without any issues or modifications. + +## Example of an MRE + +Here's an example of an MRE for a hypothetical bug report: + +**Bug description:** + +When running the `detect.py` script on the sample image from the 'coco8.yaml' dataset, I get an error related to the dimensions of the input tensor. + +**MRE:** + +```python +import torch +from ultralytics import YOLO + +# Load the model +model = YOLO("yolov8n.pt") + +# Load a 0-channel image +image = torch.rand(1, 0, 640, 640) + +# Run the model +results = model(image) +``` + +**Error message:** + +``` +RuntimeError: Expected input[1, 0, 640, 640] to have 3 channels, but got 0 channels instead +``` + +**Dependencies:** + +- torch==2.0.0 +- ultralytics==8.0.90 + +In this example, the MRE demonstrates the issue with a minimal amount of code, uses a public model ('yolov8n.pt'), includes all necessary dependencies, and provides a clear description of the problem along with the error message. + +By following these guidelines, you'll help the maintainers and contributors of Ultralytics YOLO repositories to understand and resolve your issue more efficiently. diff --git a/docs/en/help/privacy.md b/docs/en/help/privacy.md new file mode 100644 index 0000000..c9bc3a5 --- /dev/null +++ b/docs/en/help/privacy.md @@ -0,0 +1,137 @@ +--- +description: Learn about how Ultralytics collects and uses data to improve user experience, ensure software stability, and address privacy concerns, with options to opt-out. +keywords: Ultralytics, Data Collection, User Privacy, Google Analytics, Sentry, Crash Reporting, Anonymized Data, Privacy Settings, Opt-Out +--- + +# Data Collection for Ultralytics Python Package + +## Overview + +[Ultralytics](https://ultralytics.com) is dedicated to the continuous enhancement of the user experience and the capabilities of our Python package, including the advanced YOLO models we develop. Our approach involves the gathering of anonymized usage statistics and crash reports, helping us identify opportunities for improvement and ensuring the reliability of our software. This transparency document outlines what data we collect, its purpose, and the choice you have regarding this data collection. + +## Anonymized Google Analytics + +[Google Analytics](https://developers.google.com/analytics) is a web analytics service offered by Google that tracks and reports website traffic. It allows us to collect data about how our Python package is used, which is crucial for making informed decisions about design and functionality. + +### What We Collect + +- **Usage Metrics**: These metrics help us understand how frequently and in what ways the package is utilized, what features are favored, and the typical command-line arguments that are used. +- **System Information**: We collect general non-identifiable information about your computing environment to ensure our package performs well across various systems. +- **Performance Data**: Understanding the performance of our models during training, validation, and inference helps us in identifying optimization opportunities. + +For more information about Google Analytics and data privacy, visit [Google Analytics Privacy](https://support.google.com/analytics/answer/6004245). + +### How We Use This Data + +- **Feature Improvement**: Insights from usage metrics guide us in enhancing user satisfaction and interface design. +- **Optimization**: Performance data assist us in fine-tuning our models for better efficiency and speed across diverse hardware and software configurations. +- **Trend Analysis**: By studying usage trends, we can predict and respond to the evolving needs of our community. + +### Privacy Considerations + +We take several measures to ensure the privacy and security of the data you entrust to us: + +- **Anonymization**: We configure Google Analytics to anonymize the data collected, which means no personally identifiable information (PII) is gathered. You can use our services with the assurance that your personal details remain private. +- **Aggregation**: Data is analyzed only in aggregate form. This practice ensures that patterns can be observed without revealing any individual user's activity. +- **No Image Data Collection**: Ultralytics does not collect, process, or view any training or inference images. + +## Sentry Crash Reporting + +[Sentry](https://sentry.io/) is a developer-centric error tracking software that aids in identifying, diagnosing, and resolving issues in real-time, ensuring the robustness and reliability of applications. Within our package, it plays a crucial role by providing insights through crash reporting, significantly contributing to the stability and ongoing refinement of our software. + +!!! Note + + Crash reporting via Sentry is activated only if the `sentry-sdk` Python package is pre-installed on your system. This package isn't included in the `ultralytics` prerequisites and won't be installed automatically by Ultralytics. + +### What We Collect + +If the `sentry-sdk` Python package is pre-installed on your system a crash event may send the following information: + +- **Crash Logs**: Detailed reports on the application's condition at the time of a crash, which are vital for our debugging efforts. +- **Error Messages**: We record error messages generated during the operation of our package to understand and resolve potential issues quickly. + +To learn more about how Sentry handles data, please visit [Sentry's Privacy Policy](https://sentry.io/privacy/). + +### How We Use This Data + +- **Debugging**: Analyzing crash logs and error messages enables us to swiftly identify and correct software bugs. +- **Stability Metrics**: By constantly monitoring for crashes, we aim to improve the stability and reliability of our package. + +### Privacy Considerations + +- **Sensitive Information**: We ensure that crash logs are scrubbed of any personally identifiable or sensitive user data, safeguarding the confidentiality of your information. +- **Controlled Collection**: Our crash reporting mechanism is meticulously calibrated to gather only what is essential for troubleshooting while respecting user privacy. + +By detailing the tools used for data collection and offering additional background information with URLs to their respective privacy pages, users are provided with a comprehensive view of our practices, emphasizing transparency and respect for user privacy. + +## Disabling Data Collection + +We believe in providing our users with full control over their data. By default, our package is configured to collect analytics and crash reports to help improve the experience for all users. However, we respect that some users may prefer to opt out of this data collection. + +To opt out of sending analytics and crash reports, you can simply set `sync=False` in your YOLO settings. This ensures that no data is transmitted from your machine to our analytics tools. + +### Inspecting Settings + +To gain insight into the current configuration of your settings, you can view them directly: + +!!! Example "View settings" + + === "Python" + You can use Python to view your settings. Start by importing the `settings` object from the `ultralytics` module. Print and return settings using the following commands: + ```python + from ultralytics import settings + + # View all settings + print(settings) + + # Return analytics and crash reporting setting + value = settings['sync'] + ``` + + === "CLI" + Alternatively, the command-line interface allows you to check your settings with a simple command: + ```bash + yolo settings + ``` + +### Modifying Settings + +Ultralytics allows users to easily modify their settings. Changes can be performed in the following ways: + +!!! Example "Update settings" + + === "Python" + Within the Python environment, call the `update` method on the `settings` object to change your settings: + ```python + from ultralytics import settings + + # Disable analytics and crash reporting + settings.update({'sync': False}) + + # Reset settings to default values + settings.reset() + ``` + + === "CLI" + If you prefer using the command-line interface, the following commands will allow you to modify your settings: + ```bash + # Disable analytics and crash reporting + yolo settings sync=False + + # Reset settings to default values + yolo settings reset + ``` + +The `sync=False` setting will prevent any data from being sent to Google Analytics or Sentry. Your settings will be respected across all sessions using the Ultralytics package and saved to disk for future sessions. + +## Commitment to Privacy + +Ultralytics takes user privacy seriously. We design our data collection practices with the following principles: + +- **Transparency**: We are open about the data we collect and how it is used. +- **Control**: We give users full control over their data. +- **Security**: We employ industry-standard security measures to protect the data we collect. + +## Questions or Concerns + +If you have any questions or concerns about our data collection practices, please reach out to us via our [contact form](https://ultralytics.com/contact) or via [support@ultralytics.com](mailto:support@ultralytics.com). We are dedicated to ensuring our users feel informed and confident in their privacy when using our package. diff --git a/docs/en/help/security.md b/docs/en/help/security.md new file mode 100644 index 0000000..01ea1f8 --- /dev/null +++ b/docs/en/help/security.md @@ -0,0 +1,36 @@ +--- +description: Explore Ultralytics' comprehensive security strategies safeguarding user data and systems. Learn about our diverse security tools, including Snyk, GitHub CodeQL, and Dependabot Alerts. +keywords: Ultralytics, Comprehensive Security, user data protection, Snyk, GitHub CodeQL, Dependabot, vulnerability management, coding security practices +--- + +# Ultralytics Security Policy + +At [Ultralytics](https://ultralytics.com), the security of our users' data and systems is of utmost importance. To ensure the safety and security of our [open-source projects](https://github.com/ultralytics), we have implemented several measures to detect and prevent security vulnerabilities. + +## Snyk Scanning + +We utilize [Snyk](https://snyk.io/advisor/python/ultralytics) to conduct comprehensive security scans on Ultralytics repositories. Snyk's robust scanning capabilities extend beyond dependency checks; it also examines our code and Dockerfiles for various vulnerabilities. By identifying and addressing these issues proactively, we ensure a higher level of security and reliability for our users. + +[![ultralytics](https://snyk.io/advisor/python/ultralytics/badge.svg)](https://snyk.io/advisor/python/ultralytics) + +## GitHub CodeQL Scanning + +Our security strategy includes GitHub's [CodeQL](https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/about-code-scanning-with-codeql) scanning. CodeQL delves deep into our codebase, identifying complex vulnerabilities like SQL injection and XSS by analyzing the code's semantic structure. This advanced level of analysis ensures early detection and resolution of potential security risks. + +[![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) + +## GitHub Dependabot Alerts + +[Dependabot](https://docs.github.com/en/code-security/dependabot) is integrated into our workflow to monitor dependencies for known vulnerabilities. When a vulnerability is identified in one of our dependencies, Dependabot alerts us, allowing for swift and informed remediation actions. + +## GitHub Secret Scanning Alerts + +We employ GitHub [secret scanning](https://docs.github.com/en/code-security/secret-scanning/managing-alerts-from-secret-scanning) alerts to detect sensitive data, such as credentials and private keys, accidentally pushed to our repositories. This early detection mechanism helps prevent potential security breaches and data exposures. + +## Private Vulnerability Reporting + +We enable private vulnerability reporting, allowing users to discreetly report potential security issues. This approach facilitates responsible disclosure, ensuring vulnerabilities are handled securely and efficiently. + +If you suspect or discover a security vulnerability in any of our repositories, please let us know immediately. You can reach out to us directly via our [contact form](https://ultralytics.com/contact) or via [security@ultralytics.com](mailto:security@ultralytics.com). Our security team will investigate and respond as soon as possible. + +We appreciate your help in keeping all Ultralytics open-source projects secure and safe for everyone 🙏. diff --git a/docs/en/hub/app/android.md b/docs/en/hub/app/android.md new file mode 100644 index 0000000..0bff31c --- /dev/null +++ b/docs/en/hub/app/android.md @@ -0,0 +1,89 @@ +--- +comments: true +description: Learn about the Ultralytics Android App, enabling real-time object detection using YOLO models. Discover in-app features, quantization methods, and delegate options for optimal performance. +keywords: Ultralytics, Android App, real-time object detection, YOLO models, TensorFlow Lite, FP16 quantization, INT8 quantization, CPU, GPU, Hexagon, NNAPI +--- + +# Ultralytics Android App: Real-time Object Detection with YOLO Models + + + Ultralytics HUB preview image +
+
+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ + Google Play store  +
+ +The Ultralytics Android App is a powerful tool that allows you to run YOLO models directly on your Android device for real-time object detection. This app utilizes TensorFlow Lite for model optimization and various hardware delegates for acceleration, enabling fast and efficient object detection. + +## Quantization and Acceleration + +To achieve real-time performance on your Android device, YOLO models are quantized to either FP16 or INT8 precision. Quantization is a process that reduces the numerical precision of the model's weights and biases, thus reducing the model's size and the amount of computation required. This results in faster inference times without significantly affecting the model's accuracy. + +### FP16 Quantization + +FP16 (or half-precision) quantization converts the model's 32-bit floating-point numbers to 16-bit floating-point numbers. This reduces the model's size by half and speeds up the inference process, while maintaining a good balance between accuracy and performance. + +### INT8 Quantization + +INT8 (or 8-bit integer) quantization further reduces the model's size and computation requirements by converting its 32-bit floating-point numbers to 8-bit integers. This quantization method can result in a significant speedup, but it may lead to a slight reduction in mean average precision (mAP) due to the lower numerical precision. + +!!! Tip "mAP Reduction in INT8 Models" + + The reduced numerical precision in INT8 models can lead to some loss of information during the quantization process, which may result in a slight decrease in mAP. However, this trade-off is often acceptable considering the substantial performance gains offered by INT8 quantization. + +## Delegates and Performance Variability + +Different delegates are available on Android devices to accelerate model inference. These delegates include CPU, [GPU](https://www.tensorflow.org/lite/android/delegates/gpu), [Hexagon](https://www.tensorflow.org/lite/android/delegates/hexagon) and [NNAPI](https://www.tensorflow.org/lite/android/delegates/nnapi). The performance of these delegates varies depending on the device's hardware vendor, product line, and specific chipsets used in the device. + +1. **CPU**: The default option, with reasonable performance on most devices. +2. **GPU**: Utilizes the device's GPU for faster inference. It can provide a significant performance boost on devices with powerful GPUs. +3. **Hexagon**: Leverages Qualcomm's Hexagon DSP for faster and more efficient processing. This option is available on devices with Qualcomm Snapdragon processors. +4. **NNAPI**: The Android Neural Networks API (NNAPI) serves as an abstraction layer for running ML models on Android devices. NNAPI can utilize various hardware accelerators, such as CPU, GPU, and dedicated AI chips (e.g., Google's Edge TPU, or the Pixel Neural Core). + +Here's a table showing the primary vendors, their product lines, popular devices, and supported delegates: + +| Vendor | Product Lines | Popular Devices | Delegates Supported | +|-----------------------------------------|--------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------| +| [Qualcomm](https://www.qualcomm.com/) | [Snapdragon (e.g., 800 series)](https://www.qualcomm.com/snapdragon) | [Samsung Galaxy S21](https://www.samsung.com/global/galaxy/galaxy-s21-5g/), [OnePlus 9](https://www.oneplus.com/9), [Google Pixel 6](https://store.google.com/product/pixel_6) | CPU, GPU, Hexagon, NNAPI | +| [Samsung](https://www.samsung.com/) | [Exynos (e.g., Exynos 2100)](https://www.samsung.com/semiconductor/minisite/exynos/) | [Samsung Galaxy S21 (Global version)](https://www.samsung.com/global/galaxy/galaxy-s21-5g/) | CPU, GPU, NNAPI | +| [MediaTek](https://i.mediatek.com/) | [Dimensity (e.g., Dimensity 1200)](https://i.mediatek.com/dimensity-1200) | [Realme GT](https://www.realme.com/global/realme-gt), [Xiaomi Redmi Note](https://www.mi.com/en/phone/redmi/note-list) | CPU, GPU, NNAPI | +| [HiSilicon](https://www.hisilicon.com/) | [Kirin (e.g., Kirin 990)](https://www.hisilicon.com/en/products/Kirin) | [Huawei P40 Pro](https://consumer.huawei.com/en/phones/p40-pro/), [Huawei Mate 30 Pro](https://consumer.huawei.com/en/phones/mate30-pro/) | CPU, GPU, NNAPI | +| [NVIDIA](https://www.nvidia.com/) | [Tegra (e.g., Tegra X1)](https://developer.nvidia.com/content/tegra-x1) | [NVIDIA Shield TV](https://www.nvidia.com/en-us/shield/shield-tv/), [Nintendo Switch](https://www.nintendo.com/switch/) | CPU, GPU, NNAPI | + +Please note that the list of devices mentioned is not exhaustive and may vary depending on the specific chipsets and device models. Always test your models on your target devices to ensure compatibility and optimal performance. + +Keep in mind that the choice of delegate can affect performance and model compatibility. For example, some models may not work with certain delegates, or a delegate may not be available on a specific device. As such, it's essential to test your model and the chosen delegate on your target devices for the best results. + +## Getting Started with the Ultralytics Android App + +To get started with the Ultralytics Android App, follow these steps: + +1. Download the Ultralytics App from the [Google Play Store](https://play.google.com/store/apps/details?id=com.ultralytics.ultralytics_app). + +2. Launch the app on your Android device and sign in with your Ultralytics account. If you don't have an account yet, create one [here](https://hub.ultralytics.com/). + +3. Once signed in, you will see a list of your trained YOLO models. Select a model to use for object detection. + +4. Grant the app permission to access your device's camera. + +5. Point your device's camera at objects you want to detect. The app will display bounding boxes and class labels in real-time as it detects objects. + +6. Explore the app's settings to adjust the detection threshold, enable or disable specific object classes, and more. + +With the Ultralytics Android App, you now have the power of real-time object detection using YOLO models right at your fingertips. Enjoy exploring the app's features and optimizing its settings to suit your specific use cases. diff --git a/docs/en/hub/app/index.md b/docs/en/hub/app/index.md new file mode 100644 index 0000000..ef962e8 --- /dev/null +++ b/docs/en/hub/app/index.md @@ -0,0 +1,48 @@ +--- +comments: true +description: Explore the Ultralytics HUB App, offering the ability to run YOLOv5 and YOLOv8 models on your iOS and Android devices with optimized performance. +keywords: Ultralytics, HUB App, YOLOv5, YOLOv8, mobile AI, real-time object detection, image recognition, mobile device, hardware acceleration, Apple Neural Engine, Android GPU, NNAPI, custom model training +--- + +# Ultralytics HUB App + + + Ultralytics HUB preview image +
+
+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ + Apple App store + + Google Play store  +
+ +Welcome to the Ultralytics HUB App! We are excited to introduce this powerful mobile app that allows you to run YOLOv5 and YOLOv8 models directly on your [iOS](https://apps.apple.com/xk/app/ultralytics/id1583935240) and [Android](https://play.google.com/store/apps/details?id=com.ultralytics.ultralytics_app) devices. With the HUB App, you can utilize hardware acceleration features like Apple's Neural Engine (ANE) or Android GPU and Neural Network API (NNAPI) delegates to achieve impressive performance on your mobile device. + +## Features + +- **Run YOLOv5 and YOLOv8 models**: Experience the power of YOLO models on your mobile device for real-time object detection and image recognition tasks. +- **Hardware Acceleration**: Benefit from Apple ANE on iOS devices or Android GPU and NNAPI delegates for optimized performance. +- **Custom Model Training**: Train custom models with the Ultralytics HUB platform and preview them live using the HUB App. +- **Mobile Compatibility**: The HUB App supports both iOS and Android devices, bringing the power of YOLO models to a wide range of users. + +## App Documentation + +- [**iOS**](ios.md): Learn about YOLO CoreML models accelerated on Apple's Neural Engine for iPhones and iPads. +- [**Android**](android.md): Explore TFLite acceleration on Android mobile devices. + +Get started today by downloading the Ultralytics HUB App on your mobile device and unlock the potential of YOLOv5 and YOLOv8 models on-the-go. Don't forget to check out our comprehensive [HUB Docs](../index.md) for more information on training, deploying, and using your custom models with the Ultralytics HUB platform. diff --git a/docs/en/hub/app/ios.md b/docs/en/hub/app/ios.md new file mode 100644 index 0000000..ac939c9 --- /dev/null +++ b/docs/en/hub/app/ios.md @@ -0,0 +1,79 @@ +--- +comments: true +description: Execute object detection in real-time on your iOS devices utilizing YOLO models. Leverage the power of the Apple Neural Engine and Core ML for fast and efficient object detection. +keywords: Ultralytics, iOS app, object detection, YOLO models, real time, Apple Neural Engine, Core ML, FP16, INT8, quantization +--- + +# Ultralytics iOS App: Real-time Object Detection with YOLO Models + + + Ultralytics HUB preview image +
+
+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ + Apple App store +
+ +The Ultralytics iOS App is a powerful tool that allows you to run YOLO models directly on your iPhone or iPad for real-time object detection. This app utilizes the Apple Neural Engine and Core ML for model optimization and acceleration, enabling fast and efficient object detection. + +## Quantization and Acceleration + +To achieve real-time performance on your iOS device, YOLO models are quantized to either FP16 or INT8 precision. Quantization is a process that reduces the numerical precision of the model's weights and biases, thus reducing the model's size and the amount of computation required. This results in faster inference times without significantly affecting the model's accuracy. + +### FP16 Quantization + +FP16 (or half-precision) quantization converts the model's 32-bit floating-point numbers to 16-bit floating-point numbers. This reduces the model's size by half and speeds up the inference process, while maintaining a good balance between accuracy and performance. + +### INT8 Quantization + +INT8 (or 8-bit integer) quantization further reduces the model's size and computation requirements by converting its 32-bit floating-point numbers to 8-bit integers. This quantization method can result in a significant speedup, but it may lead to a slight reduction in accuracy. + +## Apple Neural Engine + +The Apple Neural Engine (ANE) is a dedicated hardware component integrated into Apple's A-series and M-series chips. It's designed to accelerate machine learning tasks, particularly for neural networks, allowing for faster and more efficient execution of your YOLO models. + +By combining quantized YOLO models with the Apple Neural Engine, the Ultralytics iOS App achieves real-time object detection on your iOS device without compromising on accuracy or performance. + +| Release Year | iPhone Name | Chipset Name | Node Size | ANE TOPs | +|--------------|------------------------------------------------------|-------------------------------------------------------|-----------|----------| +| 2017 | [iPhone X](https://en.wikipedia.org/wiki/IPhone_X) | [A11 Bionic](https://en.wikipedia.org/wiki/Apple_A11) | 10 nm | 0.6 | +| 2018 | [iPhone XS](https://en.wikipedia.org/wiki/IPhone_XS) | [A12 Bionic](https://en.wikipedia.org/wiki/Apple_A12) | 7 nm | 5 | +| 2019 | [iPhone 11](https://en.wikipedia.org/wiki/IPhone_11) | [A13 Bionic](https://en.wikipedia.org/wiki/Apple_A13) | 7 nm | 6 | +| 2020 | [iPhone 12](https://en.wikipedia.org/wiki/IPhone_12) | [A14 Bionic](https://en.wikipedia.org/wiki/Apple_A14) | 5 nm | 11 | +| 2021 | [iPhone 13](https://en.wikipedia.org/wiki/IPhone_13) | [A15 Bionic](https://en.wikipedia.org/wiki/Apple_A15) | 5 nm | 15.8 | +| 2022 | [iPhone 14](https://en.wikipedia.org/wiki/IPhone_14) | [A16 Bionic](https://en.wikipedia.org/wiki/Apple_A16) | 4 nm | 17.0 | + +Please note that this list only includes iPhone models from 2017 onwards, and the ANE TOPs values are approximate. + +## Getting Started with the Ultralytics iOS App + +To get started with the Ultralytics iOS App, follow these steps: + +1. Download the Ultralytics App from the [App Store](https://apps.apple.com/xk/app/ultralytics/id1583935240). + +2. Launch the app on your iOS device and sign in with your Ultralytics account. If you don't have an account yet, create one [here](https://hub.ultralytics.com/). + +3. Once signed in, you will see a list of your trained YOLO models. Select a model to use for object detection. + +4. Grant the app permission to access your device's camera. + +5. Point your device's camera at objects you want to detect. The app will display bounding boxes and class labels in real-time as it detects objects. + +6. Explore the app's settings to adjust the detection threshold, enable or disable specific object classes, and more. + +With the Ultralytics iOS App, you can now leverage the power of YOLO models for real-time object detection on your iPhone or iPad, powered by the Apple Neural Engine and optimized with FP16 or INT8 quantization. diff --git a/docs/en/hub/datasets.md b/docs/en/hub/datasets.md new file mode 100644 index 0000000..1ab7c45 --- /dev/null +++ b/docs/en/hub/datasets.md @@ -0,0 +1,159 @@ +--- +comments: true +description: Learn how Ultralytics HUB datasets streamline your ML workflow. Upload, format, validate, access, share, edit or delete datasets for Ultralytics YOLO model training. +keywords: Ultralytics, HUB datasets, YOLO model training, upload datasets, dataset validation, ML workflow, share datasets +--- + +# HUB Datasets + +[Ultralytics HUB](https://hub.ultralytics.com/) datasets are a practical solution for managing and leveraging your custom datasets. + +Once uploaded, datasets can be immediately utilized for model training. This integrated approach facilitates a seamless transition from dataset management to model training, significantly simplifying the entire process. + +## Upload Dataset + +Ultralytics HUB datasets are just like YOLOv5 and YOLOv8 🚀 datasets. They use the same structure and the same label formats to keep everything simple. + +Before you upload a dataset to Ultralytics HUB, make sure to **place your dataset YAML file inside the dataset root directory** and that **your dataset YAML, directory and ZIP have the same name**, as shown in the example below, and then zip the dataset directory. + +For example, if your dataset is called "coco8", as our [COCO8](https://docs.ultralytics.com/datasets/detect/coco8) example dataset, then you should have a `coco8.yaml` inside your `coco8/` directory, which will create a `coco8.zip` when zipped: + +```bash +zip -r coco8.zip coco8 +``` + +You can download our [COCO8](https://github.com/ultralytics/hub/blob/main/example_datasets/coco8.zip) example dataset and unzip it to see exactly how to structure your dataset. + +

+ COCO8 Dataset Structure +

+ +The dataset YAML is the same standard YOLOv5 and YOLOv8 YAML format. + +!!! Example "coco8.yaml" + + ```yaml + --8<-- "ultralytics/cfg/datasets/coco8.yaml" + ``` + +After zipping your dataset, you should validate it before uploading it to Ultralytics HUB. Ultralytics HUB conducts the dataset validation check post-upload, so by ensuring your dataset is correctly formatted and error-free ahead of time, you can forestall any setbacks due to dataset rejection. + +```py +from ultralytics.hub import check_dataset + +check_dataset('path/to/coco8.zip') +``` + +Once your dataset ZIP is ready, navigate to the [Datasets](https://hub.ultralytics.com/datasets) page by clicking on the **Datasets** button in the sidebar. + +![Ultralytics HUB screenshot of the Home page with an arrow pointing to the Datasets button in the sidebar](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_2.jpg) + +??? tip "Tip" + + You can also upload a dataset directly from the [Home](https://hub.ultralytics.com/home) page. + + ![Ultralytics HUB screenshot of the Home page with an arrow pointing to the Upload Dataset card](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_3.jpg) + +Click on the **Upload Dataset** button on the top right of the page. This action will trigger the **Upload Dataset** dialog. + +![Ultralytics HUB screenshot of the Dataset page with an arrow pointing to the Upload Dataset button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_4.jpg) + +Upload your dataset in the _Dataset .zip file_ field. + +You have the additional option to set a custom name and description for your Ultralytics HUB dataset. + +When you're happy with your dataset configuration, click **Upload**. + +![Ultralytics HUB screenshot of the Upload Dataset dialog with an arrow pointing to the Upload button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_5.jpg) + +After your dataset is uploaded and processed, you will be able to access it from the Datasets page. + +![Ultralytics HUB screenshot of the Datasets page with an arrow pointing to one of the datasets](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_6.jpg) + +You can view the images in your dataset grouped by splits (Train, Validation, Test). + +![Ultralytics HUB screenshot of the Dataset page with an arrow pointing to the Images tab](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_7.jpg) + +??? tip "Tip" + + Each image can be enlarged for better visualization. + + ![Ultralytics HUB screenshot of the Images tab inside the Dataset page with an arrow pointing to the expand icon](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_8.jpg) + + ![Ultralytics HUB screenshot of the Images tab inside the Dataset page with one of the images expanded](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_9.jpg) + +Also, you can analyze your dataset by click on the **Overview** tab. + +![Ultralytics HUB screenshot of the Dataset page with an arrow pointing to the Overview tab](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_10.jpg) + +Next, [train a model](https://docs.ultralytics.com/hub/models/#train-model) on your dataset. + +![Ultralytics HUB screenshot of the Dataset page with an arrow pointing to the Train Model button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_11.jpg) + +## Share Dataset + +!!! Info "Info" + + Ultralytics HUB's sharing functionality provides a convenient way to share datasets with others. This feature is designed to accommodate both existing Ultralytics HUB users and those who have yet to create an account. + +??? note "Note" + + You have control over the general access of your datasets. + + You can choose to set the general access to "Private", in which case, only you will have access to it. Alternatively, you can set the general access to "Unlisted" which grants viewing access to anyone who has the direct link to the dataset, regardless of whether they have an Ultralytics HUB account or not. + +Navigate to the Dataset page of the dataset you want to share, open the dataset actions dropdown and click on the **Share** option. This action will trigger the **Share Dataset** dialog. + +![Ultralytics HUB screenshot of the Dataset page with an arrow pointing to the Share option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_share_dataset_1.jpg) + +??? tip "Tip" + + You can also share a dataset directly from the [Datasets](https://hub.ultralytics.com/datasets) page. + + ![Ultralytics HUB screenshot of the Datasets page with an arrow pointing to the Share option of one of the datasets](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_share_dataset_2.jpg) + +Set the general access to "Unlisted" and click **Save**. + +![Ultralytics HUB screenshot of the Share Dataset dialog with an arrow pointing to the dropdown and one to the Save button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_share_dataset_3.jpg) + +Now, anyone who has the direct link to your dataset can view it. + +??? tip "Tip" + + You can easily click on the dataset's link shown in the **Share Dataset** dialog to copy it. + + ![Ultralytics HUB screenshot of the Share Dataset dialog with an arrow pointing to the dataset's link](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_share_dataset_4.jpg) + +## Edit Dataset + +Navigate to the Dataset page of the dataset you want to edit, open the dataset actions dropdown and click on the **Edit** option. This action will trigger the **Update Dataset** dialog. + +![Ultralytics HUB screenshot of the Dataset page with an arrow pointing to the Edit option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_edit_dataset_1.jpg) + +??? tip "Tip" + + You can also edit a dataset directly from the [Datasets](https://hub.ultralytics.com/datasets) page. + + ![Ultralytics HUB screenshot of the Datasets page with an arrow pointing to the Edit option of one of the datasets](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_edit_dataset_2.jpg) + +Apply the desired modifications to your dataset and then confirm the changes by clicking **Save**. + +![Ultralytics HUB screenshot of the Update Dataset dialog with an arrow pointing to the Save button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_edit_dataset_3.jpg) + +## Delete Dataset + +Navigate to the Dataset page of the dataset you want to delete, open the dataset actions dropdown and click on the **Delete** option. This action will delete the dataset. + +![Ultralytics HUB screenshot of the Dataset page with an arrow pointing to the Delete option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_delete_dataset_1.jpg) + +??? tip "Tip" + + You can also delete a dataset directly from the [Datasets](https://hub.ultralytics.com/datasets) page. + + ![Ultralytics HUB screenshot of the Datasets page with an arrow pointing to the Delete option of one of the datasets](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_delete_dataset_2.jpg) + +??? note "Note" + + If you change your mind, you can restore the dataset from the [Trash](https://hub.ultralytics.com/trash) page. + + ![Ultralytics HUB screenshot of the Trash page with an arrow pointing to the Restore option of one of the datasets](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_delete_dataset_3.jpg) diff --git a/docs/en/hub/index.md b/docs/en/hub/index.md new file mode 100644 index 0000000..6920953 --- /dev/null +++ b/docs/en/hub/index.md @@ -0,0 +1,61 @@ +--- +comments: true +description: Gain insights into training and deploying your YOLOv5 and YOLOv8 models with Ultralytics HUB. Explore pre-trained models, templates and various integrations. +keywords: Ultralytics HUB, YOLOv5, YOLOv8, model training, model deployment, pretrained models, model integrations +--- + +# Ultralytics HUB + + + Ultralytics HUB preview image +
+
+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ + CI CPU + + Open In Colab +
+ +👋 Hello from the [Ultralytics](https://ultralytics.com/) Team! We've been working hard these last few months to launch [Ultralytics HUB](https://bit.ly/ultralytics_hub), a new web tool for training and deploying all your YOLOv5 and YOLOv8 🚀 models from one spot! + +## Introduction + +HUB is designed to be user-friendly and intuitive, with a drag-and-drop interface that allows users to easily upload their data and train new models quickly. It offers a range of pre-trained models and templates to choose from, making it easy for users to get started with training their own models. Once a model is trained, it can be easily deployed and used for real-time object detection, instance segmentation and classification tasks. + +

+
+ +
+ Watch: Train Your Custom YOLO Models In A Few Clicks with Ultralytics HUB. +

+ +We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions! + +- [**Quickstart**](quickstart.md). Start training and deploying YOLO models with HUB in seconds. +- [**Datasets: Preparing and Uploading**](datasets.md). Learn how to prepare and upload your datasets to HUB in YOLO format. +- [**Projects: Creating and Managing**](projects.md). Group your models into projects for improved organization. +- [**Models: Training and Exporting**](models.md). Train YOLOv5 and YOLOv8 models on your custom datasets and export them to various formats for deployment. +- [**Integrations: Options**](integrations.md). Explore different integration options for your trained models, such as TensorFlow, ONNX, OpenVINO, CoreML, and PaddlePaddle. +- [**Ultralytics HUB App**](app/index.md). Learn about the Ultralytics App for iOS and Android, which allows you to run models directly on your mobile device. + * [**iOS**](app/ios.md). Learn about YOLO CoreML models accelerated on Apple's Neural Engine on iPhones and iPads. + * [**Android**](app/android.md). Explore TFLite acceleration on mobile devices. +- [**Inference API**](inference_api.md). Understand how to use the Inference API for running your trained models in the cloud to generate predictions. diff --git a/docs/en/hub/inference_api.md b/docs/en/hub/inference_api.md new file mode 100644 index 0000000..7ca6822 --- /dev/null +++ b/docs/en/hub/inference_api.md @@ -0,0 +1,458 @@ +--- +comments: true +description: Access object detection capabilities of YOLOv8 via our RESTful API. Learn how to use the YOLO Inference API with Python or CLI for swift object detection. +keywords: Ultralytics, YOLOv8, Inference API, object detection, RESTful API, Python, CLI, Quickstart +--- + +# YOLO Inference API + +The YOLO Inference API allows you to access the YOLOv8 object detection capabilities via a RESTful API. This enables you to run object detection on images without the need to install and set up the YOLOv8 environment locally. + +![Inference API Screenshot](https://github.com/ultralytics/ultralytics/assets/26833433/c0109ec0-7bb0-46e1-b0d2-bae687960a01) +Screenshot of the Inference API section in the trained model Preview tab. + +## API URL + +The API URL is the address used to access the YOLO Inference API. In this case, the base URL is: + +``` +https://api.ultralytics.com/v1/predict +``` + +## Example Usage in Python + +To access the YOLO Inference API with the specified model and API key using Python, you can use the following code: + +```python +import requests + +# API URL, use actual MODEL_ID +url = f"https://api.ultralytics.com/v1/predict/MODEL_ID" + +# Headers, use actual API_KEY +headers = {"x-api-key": "API_KEY"} + +# Inference arguments (optional) +data = {"size": 640, "confidence": 0.25, "iou": 0.45} + +# Load image and send request +with open("path/to/image.jpg", "rb") as image_file: + files = {"image": image_file} + response = requests.post(url, headers=headers, files=files, data=data) + +print(response.json()) +``` + +In this example, replace `API_KEY` with your actual API key, `MODEL_ID` with the desired model ID, and `path/to/image.jpg` with the path to the image you want to analyze. + +## Example Usage with CLI + +You can use the YOLO Inference API with the command-line interface (CLI) by utilizing the `curl` command. Replace `API_KEY` with your actual API key, `MODEL_ID` with the desired model ID, and `image.jpg` with the path to the image you want to analyze: + +```bash +curl -X POST "https://api.ultralytics.com/v1/predict/MODEL_ID" \ + -H "x-api-key: API_KEY" \ + -F "image=@/path/to/image.jpg" \ + -F "size=640" \ + -F "confidence=0.25" \ + -F "iou=0.45" +``` + +## Passing Arguments + +This command sends a POST request to the YOLO Inference API with the specified `MODEL_ID` in the URL and the `API_KEY` in the request `headers`, along with the image file specified by `@path/to/image.jpg`. + +Here's an example of passing the `size`, `confidence`, and `iou` arguments via the API URL using the `requests` library in Python: + +```python +import requests + +# API URL, use actual MODEL_ID +url = f"https://api.ultralytics.com/v1/predict/MODEL_ID" + +# Headers, use actual API_KEY +headers = {"x-api-key": "API_KEY"} + +# Inference arguments (optional) +data = {"size": 640, "confidence": 0.25, "iou": 0.45} + +# Load image and send request +with open("path/to/image.jpg", "rb") as image_file: + files = {"image": image_file} + response = requests.post(url, headers=headers, files=files, data=data) + +print(response.json()) +``` + +In this example, the `data` dictionary contains the query arguments `size`, `confidence`, and `iou`, which tells the API to run inference at image size 640 with confidence and IoU thresholds of 0.25 and 0.45. + +This will send the query parameters along with the file in the POST request. See the table below for a full list of available inference arguments. + +| Inference Argument | Default | Type | Notes | +|--------------------|---------|---------|------------------------------------------------| +| `size` | `640` | `int` | valid range is `32` - `1280` pixels | +| `confidence` | `0.25` | `float` | valid range is `0.01` - `1.0` | +| `iou` | `0.45` | `float` | valid range is `0.0` - `0.95` | +| `url` | `''` | `str` | optional image URL if not image file is passed | +| `normalize` | `False` | `bool` | | + +## Return JSON format + +The YOLO Inference API returns a JSON list with the detection results. The format of the JSON list will be the same as the one produced locally by the `results[0].tojson()` command. + +The JSON list contains information about the detected objects, their coordinates, classes, and confidence scores. + +### Detect Model Format + +YOLO detection models, such as `yolov8n.pt`, can return JSON responses from local inference, CLI API inference, and Python API inference. All of these methods produce the same JSON response format. + +!!! Example "Detect Model JSON Response" + + === "Local" + ```python + from ultralytics import YOLO + + # Load model + model = YOLO('yolov8n.pt') + + # Run inference + results = model('image.jpg') + + # Print image.jpg results in JSON format + print(results[0].tojson()) + ``` + + === "CLI API" + ```bash + curl -X POST "https://api.ultralytics.com/v1/predict/MODEL_ID" \ + -H "x-api-key: API_KEY" \ + -F "image=@/path/to/image.jpg" \ + -F "size=640" \ + -F "confidence=0.25" \ + -F "iou=0.45" + ``` + + === "Python API" + ```python + import requests + + # API URL, use actual MODEL_ID + url = f"https://api.ultralytics.com/v1/predict/MODEL_ID" + + # Headers, use actual API_KEY + headers = {"x-api-key": "API_KEY"} + + # Inference arguments (optional) + data = {"size": 640, "confidence": 0.25, "iou": 0.45} + + # Load image and send request + with open("path/to/image.jpg", "rb") as image_file: + files = {"image": image_file} + response = requests.post(url, headers=headers, files=files, data=data) + + print(response.json()) + ``` + + === "JSON Response" + ```json + { + "success": True, + "message": "Inference complete.", + "data": [ + { + "name": "person", + "class": 0, + "confidence": 0.8359682559967041, + "box": { + "x1": 0.08974208831787109, + "y1": 0.27418340047200523, + "x2": 0.8706787109375, + "y2": 0.9887352837456598 + } + }, + { + "name": "person", + "class": 0, + "confidence": 0.8189555406570435, + "box": { + "x1": 0.5847355842590332, + "y1": 0.05813225640190972, + "x2": 0.8930277824401855, + "y2": 0.9903111775716146 + } + }, + { + "name": "tie", + "class": 27, + "confidence": 0.2909725308418274, + "box": { + "x1": 0.3433395862579346, + "y1": 0.6070465511745877, + "x2": 0.40964522361755373, + "y2": 0.9849439832899306 + } + } + ] + } + ``` + +### Segment Model Format + +YOLO segmentation models, such as `yolov8n-seg.pt`, can return JSON responses from local inference, CLI API inference, and Python API inference. All of these methods produce the same JSON response format. + +!!! Example "Segment Model JSON Response" + + === "Local" + ```python + from ultralytics import YOLO + + # Load model + model = YOLO('yolov8n-seg.pt') + + # Run inference + results = model('image.jpg') + + # Print image.jpg results in JSON format + print(results[0].tojson()) + ``` + + === "CLI API" + ```bash + curl -X POST "https://api.ultralytics.com/v1/predict/MODEL_ID" \ + -H "x-api-key: API_KEY" \ + -F "image=@/path/to/image.jpg" \ + -F "size=640" \ + -F "confidence=0.25" \ + -F "iou=0.45" + ``` + + === "Python API" + ```python + import requests + + # API URL, use actual MODEL_ID + url = f"https://api.ultralytics.com/v1/predict/MODEL_ID" + + # Headers, use actual API_KEY + headers = {"x-api-key": "API_KEY"} + + # Inference arguments (optional) + data = {"size": 640, "confidence": 0.25, "iou": 0.45} + + # Load image and send request + with open("path/to/image.jpg", "rb") as image_file: + files = {"image": image_file} + response = requests.post(url, headers=headers, files=files, data=data) + + print(response.json()) + ``` + + === "JSON Response" + Note `segments` `x` and `y` lengths may vary from one object to another. Larger or more complex objects may have more segment points. + ```json + { + "success": True, + "message": "Inference complete.", + "data": [ + { + "name": "person", + "class": 0, + "confidence": 0.856913149356842, + "box": { + "x1": 0.1064866065979004, + "y1": 0.2798851860894097, + "x2": 0.8738358497619629, + "y2": 0.9894873725043403 + }, + "segments": { + "x": [ + 0.421875, + 0.4203124940395355, + 0.41718751192092896 + ... + ], + "y": [ + 0.2888889014720917, + 0.2916666567325592, + 0.2916666567325592 + ... + ] + } + }, + { + "name": "person", + "class": 0, + "confidence": 0.8512625694274902, + "box": { + "x1": 0.5757311820983887, + "y1": 0.053943040635850696, + "x2": 0.8960096359252929, + "y2": 0.985154045952691 + }, + "segments": { + "x": [ + 0.7515624761581421, + 0.75, + 0.7437499761581421 + ... + ], + "y": [ + 0.0555555559694767, + 0.05833333358168602, + 0.05833333358168602 + ... + ] + } + }, + { + "name": "tie", + "class": 27, + "confidence": 0.6485961675643921, + "box": { + "x1": 0.33911995887756347, + "y1": 0.6057066175672743, + "x2": 0.4081430912017822, + "y2": 0.9916408962673611 + }, + "segments": { + "x": [ + 0.37187498807907104, + 0.37031251192092896, + 0.3687500059604645 + ... + ], + "y": [ + 0.6111111044883728, + 0.6138888597488403, + 0.6138888597488403 + ... + ] + } + } + ] + } + ``` + +### Pose Model Format + +YOLO pose models, such as `yolov8n-pose.pt`, can return JSON responses from local inference, CLI API inference, and Python API inference. All of these methods produce the same JSON response format. + +!!! Example "Pose Model JSON Response" + + === "Local" + ```python + from ultralytics import YOLO + + # Load model + model = YOLO('yolov8n-seg.pt') + + # Run inference + results = model('image.jpg') + + # Print image.jpg results in JSON format + print(results[0].tojson()) + ``` + + === "CLI API" + ```bash + curl -X POST "https://api.ultralytics.com/v1/predict/MODEL_ID" \ + -H "x-api-key: API_KEY" \ + -F "image=@/path/to/image.jpg" \ + -F "size=640" \ + -F "confidence=0.25" \ + -F "iou=0.45" + ``` + + === "Python API" + ```python + import requests + + # API URL, use actual MODEL_ID + url = f"https://api.ultralytics.com/v1/predict/MODEL_ID" + + # Headers, use actual API_KEY + headers = {"x-api-key": "API_KEY"} + + # Inference arguments (optional) + data = {"size": 640, "confidence": 0.25, "iou": 0.45} + + # Load image and send request + with open("path/to/image.jpg", "rb") as image_file: + files = {"image": image_file} + response = requests.post(url, headers=headers, files=files, data=data) + + print(response.json()) + ``` + + === "JSON Response" + Note COCO-keypoints pretrained models will have 17 human keypoints. The `visible` part of the keypoints indicates whether a keypoint is visible or obscured. Obscured keypoints may be outside the image or may not be visible, i.e. a person's eyes facing away from the camera. + ```json + { + "success": True, + "message": "Inference complete.", + "data": [ + { + "name": "person", + "class": 0, + "confidence": 0.8439509868621826, + "box": { + "x1": 0.1125, + "y1": 0.28194444444444444, + "x2": 0.7953125, + "y2": 0.9902777777777778 + }, + "keypoints": { + "x": [ + 0.5058594942092896, + 0.5103894472122192, + 0.4920862317085266 + ... + ], + "y": [ + 0.48964157700538635, + 0.4643048942089081, + 0.4465252459049225 + ... + ], + "visible": [ + 0.8726999163627625, + 0.653947651386261, + 0.9130823612213135 + ... + ] + } + }, + { + "name": "person", + "class": 0, + "confidence": 0.7474289536476135, + "box": { + "x1": 0.58125, + "y1": 0.0625, + "x2": 0.8859375, + "y2": 0.9888888888888889 + }, + "keypoints": { + "x": [ + 0.778544008731842, + 0.7976160049438477, + 0.7530890107154846 + ... + ], + "y": [ + 0.27595141530036926, + 0.2378823608160019, + 0.23644638061523438 + ... + ], + "visible": [ + 0.8900790810585022, + 0.789978563785553, + 0.8974530100822449 + ... + ] + } + } + ] + } + ``` diff --git a/docs/en/hub/integrations.md b/docs/en/hub/integrations.md new file mode 100644 index 0000000..271c5e4 --- /dev/null +++ b/docs/en/hub/integrations.md @@ -0,0 +1,62 @@ +--- +comments: true +description: Explore integration options for Ultralytics HUB. Currently featuring Roboflow for dataset integration and multiple export formats for your trained models. +keywords: Ultralytics HUB, Integrations, Roboflow, Dataset, Export, YOLOv5, YOLOv8, ONNX, CoreML, TensorRT, TensorFlow +--- + +# HUB Integrations + +🚧 **Under Construction** 🚧 + +Welcome to the Integrations guide for [Ultralytics HUB](https://hub.ultralytics.com/)! We are in the process of expanding this section to provide you with comprehensive guidance on integrating your YOLOv5 and YOLOv8 models with various platforms and formats. Currently, Roboflow is our available dataset integration, with a wide range of export integrations for your trained models. + +

+
+ +
+ Watch: Train Your Custom YOLO Models In A Few Clicks with Ultralytics HUB. +

+ +## Available Integrations + +### Dataset Integrations + +- **Roboflow**: Seamlessly import your datasets for training. + +### Export Integrations + +| Format | `format` Argument | Model | Metadata | Arguments | +|--------------------------------------------------------------------|-------------------|---------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [NCNN](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +## Coming Soon + +- Additional Dataset Integrations +- Detailed Export Integration Guides +- Step-by-Step Tutorials for Each Integration + +## Need Immediate Assistance? + +While we're in the process of creating detailed guides: + +- Browse through other [HUB Docs](https://docs.ultralytics.com/hub/) for detailed guides and tutorials. +- Raise an issue on our [GitHub](https://github.com/ultralytics/hub/) for technical support. +- Join our [Discord Community](https://ultralytics.com/discord/) for live discussions and community support. + +We appreciate your patience as we work to make this section comprehensive and user-friendly. Stay tuned for updates! diff --git a/docs/en/hub/models.md b/docs/en/hub/models.md new file mode 100644 index 0000000..9762f34 --- /dev/null +++ b/docs/en/hub/models.md @@ -0,0 +1,213 @@ +--- +comments: true +description: Learn how to use Ultralytics HUB models for efficient and user-friendly AI model training. For easy model creation, training, evaluation and deployment, follow our detailed guide. +keywords: Ultralytics, HUB Models, AI model training, model creation, model training, model evaluation, model deployment +--- + +# Ultralytics HUB Models + +[Ultralytics HUB](https://hub.ultralytics.com/) models provide a streamlined solution for training vision AI models on your custom datasets. + +The process is user-friendly and efficient, involving a simple three-step creation and accelerated training powered by Utralytics YOLOv8. During training, real-time updates on model metrics are available so that you can monitor each step of the progress. Once training is completed, you can preview your model and easily deploy it to real-world applications. Therefore, Ultralytics HUB offers a comprehensive yet straightforward system for model creation, training, evaluation, and deployment. + +## Train Model + +Navigate to the [Models](https://hub.ultralytics.com/models) page by clicking on the **Models** button in the sidebar. + +![Ultralytics HUB screenshot of the Home page with an arrow pointing to the Models button in the sidebar](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_1.jpg) + +??? tip "Tip" + + You can also train a model directly from the [Home](https://hub.ultralytics.com/home) page. + + ![Ultralytics HUB screenshot of the Home page with an arrow pointing to the Train Model card](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_2.jpg) + +Click on the **Train Model** button on the top right of the page. This action will trigger the **Train Model** dialog. + +![Ultralytics HUB screenshot of the Models page with an arrow pointing to the Train Model button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_3.jpg) + +The **Train Model** dialog has three simple steps, explained below. + +### 1. Dataset + +In this step, you have to select the dataset you want to train your model on. After you selected a dataset, click **Continue**. + +![Ultralytics HUB screenshot of the Train Model dialog with an arrow pointing to a dataset and one to the Continue button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_4.jpg) + +??? tip "Tip" + + You can skip this step if you train a model directly from the Dataset page. + + ![Ultralytics HUB screenshot of the Dataset page with an arrow pointing to the Train Model button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_5.jpg) + +### 2. Model + +In this step, you have to choose the project in which you want to create your model, the name of your model and your model's architecture. + +??? note "Note" + + Ultralytics HUB will try to pre-select the project. + + If you opened the **Train Model** dialog as described above, Ultralytics HUB will pre-select the last project you used. + + If you opened the **Train Model** dialog from the Project page, Ultralytics HUB will pre-select the project you were inside of. + + ![Ultralytics HUB screenshot of the Project page with an arrow pointing to the Train Model button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_6.jpg) + + In case you don't have a project created yet, you can set the name of your project in this step and it will be created together with your model. + + ![Ultralytics HUB screenshot of the Train Model dialog with an arrow pointing to the project name](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_7.jpg) + +!!! Info "Info" + + You can read more about the available [YOLOv8](https://docs.ultralytics.com/models/yolov8) (and [YOLOv5](https://docs.ultralytics.com/models/yolov5)) architectures in our documentation. + +When you're happy with your model configuration, click **Continue**. + +![Ultralytics HUB screenshot of the Train Model dialog with an arrow pointing to a model architecture and one to the Continue button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_8.jpg) + +??? note "Note" + + By default, your model will use a pre-trained model (trained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco) dataset) to reduce training time. + + You can change this behaviour by opening the **Advanced Options** accordion. + +### 3. Train + +In this step, you will start training you model. + +Ultralytics HUB offers three training options: + +- Ultralytics Cloud **(COMING SOON)** +- Google Colab +- Bring your own agent + +In order to start training your model, follow the instructions presented in this step. + +![Ultralytics HUB screenshot of the Train Model dialog with an arrow pointing to each step](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_9.jpg) + +??? note "Note" + + When you are on this step, before the training starts, you can change the default training configuration by opening the **Advanced Options** accordion. + + ![Ultralytics HUB screenshot of the Train Model dialog with an arrow pointing to the Train Advanced Options](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_10.jpg) + +??? note "Note" + + When you are on this step, you have the option to close the **Train Model** dialog and start training your model from the Model page later. + + ![Ultralytics HUB screenshot of the Model page with an arrow pointing to the Start Training card](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_11.jpg) + +To start training your model using Google Colab, simply follow the instructions shown above or on the Google Colab notebook. + + + Open In Colab + + +When the training starts, you can click **Done** and monitor the training progress on the Model page. + +![Ultralytics HUB screenshot of the Train Model dialog with an arrow pointing to the Done button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_12.jpg) + +![Ultralytics HUB screenshot of the Model page of a model that is currently training](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_13.jpg) + +??? note "Note" + + In case the training stops and a checkpoint was saved, you can resume training your model from the Model page. + + ![Ultralytics HUB screenshot of the Model page with an arrow pointing to the Resume Training card](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_train_model_14.jpg) + +## Preview Model + +Ultralytics HUB offers a variety of ways to preview your trained model. + +You can preview your model if you click on the **Preview** tab and upload an image in the **Test** card. + +![Ultralytics HUB screenshot of the Preview tab (Test card) inside the Model page](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_preview_model_1.jpg) + +You can also use our Ultralytics Cloud API to effortlessly [run inference](https://docs.ultralytics.com/hub/inference_api) with your custom model. + +![Ultralytics HUB screenshot of the Preview tab (Ultralytics Cloud API card) inside the Model page](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_preview_model_2.jpg) + +Furthermore, you can preview your model in real-time directly on your [iOS](https://apps.apple.com/xk/app/ultralytics/id1583935240) or [Android](https://play.google.com/store/apps/details?id=com.ultralytics.ultralytics_app) mobile device by [downloading](https://ultralytics.com/app_install) our [Ultralytics HUB Mobile Application](app/index.md). + +![Ultralytics HUB screenshot of the Deploy tab inside the Model page with arrow pointing to the Real-Time Preview card](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_preview_model_3.jpg) + +## Deploy Model + +You can export your model to 13 different formats, including ONNX, OpenVINO, CoreML, TensorFlow, Paddle and many others. + +![Ultralytics HUB screenshot of the Deploy tab inside the Model page with all formats exported](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_deploy_model_1.jpg) + +??? tip "Tip" + + You can customize the export options of each format if you open the export actions dropdown and click on the **Advanced** option. + + ![Ultralytics HUB screenshot of the Deploy tab inside the Model page with an arrow pointing to the Advanced option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_deploy_model_2.jpg) + +## Share Model + +!!! Info "Info" + + Ultralytics HUB's sharing functionality provides a convenient way to share models with others. This feature is designed to accommodate both existing Ultralytics HUB users and those who have yet to create an account. + +??? note "Note" + + You have control over the general access of your models. + + You can choose to set the general access to "Private", in which case, only you will have access to it. Alternatively, you can set the general access to "Unlisted" which grants viewing access to anyone who has the direct link to the model, regardless of whether they have an Ultralytics HUB account or not. + +Navigate to the Model page of the model you want to share, open the model actions dropdown and click on the **Share** option. This action will trigger the **Share Model** dialog. + +![Ultralytics HUB screenshot of the Model page with an arrow pointing to the Share option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_share_model_1.jpg) + +??? tip "Tip" + + You can also share a model directly from the [Models](https://hub.ultralytics.com/models) page or from the Project page of the project where your model is located. + + ![Ultralytics HUB screenshot of the Models page with an arrow pointing to the Share option of one of the models](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_share_model_2.jpg) + +Set the general access to "Unlisted" and click **Save**. + +![Ultralytics HUB screenshot of the Share Model dialog with an arrow pointing to the dropdown and one to the Save button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_share_model_3.jpg) + +Now, anyone who has the direct link to your model can view it. + +??? tip "Tip" + + You can easily click on the model's link shown in the **Share Model** dialog to copy it. + + ![Ultralytics HUB screenshot of the Share Model dialog with an arrow pointing to the model's link](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_share_model_4.jpg) + +## Edit Model + +Navigate to the Model page of the model you want to edit, open the model actions dropdown and click on the **Edit** option. This action will trigger the **Update Model** dialog. + +![Ultralytics HUB screenshot of the Model page with an arrow pointing to the Edit option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_edit_model_1.jpg) + +??? tip "Tip" + + You can also edit a model directly from the [Models](https://hub.ultralytics.com/models) page or from the Project page of the project where your model is located. + + ![Ultralytics HUB screenshot of the Models page with an arrow pointing to the Edit option of one of the models](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_edit_model_2.jpg) + +Apply the desired modifications to your model and then confirm the changes by clicking **Save**. + +![Ultralytics HUB screenshot of the Update Model dialog with an arrow pointing to the Save button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_edit_model_3.jpg) + +## Delete Model + +Navigate to the Model page of the model you want to delete, open the model actions dropdown and click on the **Delete** option. This action will delete the model. + +![Ultralytics HUB screenshot of the Model page with an arrow pointing to the Delete option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_delete_model_1.jpg) + +??? tip "Tip" + + You can also delete a model directly from the [Models](https://hub.ultralytics.com/models) page or from the Project page of the project where your model is located. + + ![Ultralytics HUB screenshot of the Models page with an arrow pointing to the Delete option of one of the models](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_delete_model_2.jpg) + +??? note "Note" + + If you change your mind, you can restore the model from the [Trash](https://hub.ultralytics.com/trash) page. + + ![Ultralytics HUB screenshot of the Trash page with an arrow pointing to the Restore option of one of the models](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/models/hub_delete_model_3.jpg) diff --git a/docs/en/hub/projects.md b/docs/en/hub/projects.md new file mode 100644 index 0000000..c0cbf20 --- /dev/null +++ b/docs/en/hub/projects.md @@ -0,0 +1,180 @@ +--- +comments: true +description: Learn how to manage Ultralytics HUB projects. Understand effective strategies to create, share, edit, delete, and compare models in an organized workspace. +keywords: Ultralytics, HUB projects, Create project, Edit project, Share project, Delete project, Compare Models, Model Management +--- + +# Ultralytics HUB Projects + +[Ultralytics HUB](https://hub.ultralytics.com/) projects provide an effective solution for consolidating and managing your models. If you are working with several models that perform similar tasks or have related purposes, Ultralytics HUB projects allow you to group these models together. + +This creates a unified and organized workspace that facilitates easier model management, comparison and development. Having similar models or various iterations together can facilitate rapid benchmarking, as you can compare their effectiveness. This can lead to faster, more insightful iterative development and refinement of your models. + +

+
+ +
+ Watch: Train YOLOv8 Pose Model on Tiger-Pose Dataset Using Ultralytics HUB +

+ +## Create Project + +Navigate to the [Projects](https://hub.ultralytics.com/projects) page by clicking on the **Projects** button in the sidebar. + +![Ultralytics HUB screenshot of the Home page with an arrow pointing to the Projects button in the sidebar](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_create_project_1.jpg) + +??? tip "Tip" + + You can also create a project directly from the [Home](https://hub.ultralytics.com/home) page. + + ![Ultralytics HUB screenshot of the Home page with an arrow pointing to the Create Project card](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_create_project_2.jpg) + +Click on the **Create Project** button on the top right of the page. This action will trigger the **Create Project** dialog, opening up a suite of options for tailoring your project to your needs. + +![Ultralytics HUB screenshot of the Projects page with an arrow pointing to the Create Project button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_create_project_3.jpg) + +Type the name of your project in the _Project name_ field or keep the default name and finalize the project creation with a single click. + +You have the additional option to enrich your project with a description and a unique image, enhancing its recognizability on the Projects page. + +When you're happy with your project configuration, click **Create**. + +![Ultralytics HUB screenshot of the Create Project dialog with an arrow pointing to the Create button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_create_project_4.jpg) + +After your project is created, you will be able to access it from the Projects page. + +![Ultralytics HUB screenshot of the Projects page with an arrow pointing to one of the projects](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_create_project_5.jpg) + +Next, [train a model](https://docs.ultralytics.com/hub/models/#train-model) inside your project. + +![Ultralytics HUB screenshot of the Project page with an arrow pointing to the Train Model button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_create_project_6.jpg) + +## Share Project + +!!! Info "Info" + + Ultralytics HUB's sharing functionality provides a convenient way to share projects with others. This feature is designed to accommodate both existing Ultralytics HUB users and those who have yet to create an account. + +??? note "Note" + + You have control over the general access of your projects. + + You can choose to set the general access to "Private", in which case, only you will have access to it. Alternatively, you can set the general access to "Unlisted" which grants viewing access to anyone who has the direct link to the project, regardless of whether they have an Ultralytics HUB account or not. + +Navigate to the Project page of the project you want to share, open the project actions dropdown and click on the **Share** option. This action will trigger the **Share Project** dialog. + +![Ultralytics HUB screenshot of the Project page with an arrow pointing to the Share option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_share_project_1.jpg) + +??? tip "Tip" + + You can also share a project directly from the [Projects](https://hub.ultralytics.com/projects) page. + + ![Ultralytics HUB screenshot of the Projects page with an arrow pointing to the Share option of one of the projects](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_share_project_2.jpg) + +Set the general access to "Unlisted" and click **Save**. + +![Ultralytics HUB screenshot of the Share Project dialog with an arrow pointing to the dropdown and one to the Save button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_share_project_3.jpg) + +!!! Warning "Warning" + + When changing the general access of a project, the general access of the models inside the project will be changed as well. + +Now, anyone who has the direct link to your project can view it. + +??? tip "Tip" + + You can easily click on the project's link shown in the **Share Project** dialog to copy it. + + ![Ultralytics HUB screenshot of the Share Project dialog with an arrow pointing to the project's link](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_share_project_4.jpg) + +## Edit Project + +Navigate to the Project page of the project you want to edit, open the project actions dropdown and click on the **Edit** option. This action will trigger the **Update Project** dialog. + +![Ultralytics HUB screenshot of the Project page with an arrow pointing to the Edit option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_edit_project_1.jpg) + +??? tip "Tip" + + You can also edit a project directly from the [Projects](https://hub.ultralytics.com/projects) page. + + ![Ultralytics HUB screenshot of the Projects page with an arrow pointing to the Edit option of one of the projects](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_edit_project_2.jpg) + +Apply the desired modifications to your project and then confirm the changes by clicking **Save**. + +![Ultralytics HUB screenshot of the Update Project dialog with an arrow pointing to the Save button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_edit_project_3.jpg) + +## Delete Project + +Navigate to the Project page of the project you want to delete, open the project actions dropdown and click on the **Delete** option. This action will delete the project. + +![Ultralytics HUB screenshot of the Project page with an arrow pointing to the Delete option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_delete_project_1.jpg) + +??? tip "Tip" + + You can also delete a project directly from the [Projects](https://hub.ultralytics.com/projects) page. + + ![Ultralytics HUB screenshot of the Projects page with an arrow pointing to the Delete option of one of the projects](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_delete_project_2.jpg) + +!!! Warning "Warning" + + When deleting a project, the models inside the project will be deleted as well. + +??? note "Note" + + If you change your mind, you can restore the project from the [Trash](https://hub.ultralytics.com/trash) page. + + ![Ultralytics HUB screenshot of the Trash page with an arrow pointing to the Restore option of one of the projects](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_delete_project_3.jpg) + +## Compare Models + +Navigate to the Project page of the project where the models you want to compare are located. To use the model comparison feature, click on the **Charts** tab. + +![Ultralytics HUB screenshot of the Project page with an arrow pointing to the Charts tab](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_compare_models_1.jpg) + +This will display all the relevant charts. Each chart corresponds to a different metric and contains the performance of each model for that metric. The models are represented by different colors and you can hover over each data point to get more information. + +![Ultralytics HUB screenshot of the Charts tab inside the Project page](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_compare_models_2.jpg) + +??? tip "Tip" + + Each chart can be enlarged for better visualization. + + ![Ultralytics HUB screenshot of the Charts tab inside the Project page with an arrow pointing to the expand icon](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_compare_models_3.jpg) + + ![Ultralytics HUB screenshot of the Charts tab inside the Project page with one of the charts expanded](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_compare_models_4.jpg) + +??? tip "Tip" + + You have the flexibility to customize your view by selectively hiding certain models. This feature allows you to concentrate on the models of interest. + + ![Ultralytics HUB screenshot of the Charts tab inside the Project page with an arrow pointing to the hide/unhide icon of one of the model](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_compare_models_5.jpg) + +## Reorder Models + +??? note "Note" + + Ultralytics HUB's reordering functionality works only inside projects you own. + +Navigate to the Project page of the project where the models you want to reorder are located. Click on the designated reorder icon of the model you want to move and drag it to the desired location. + +![Ultralytics HUB screenshot of the Project page with an arrow pointing to the reorder icon](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_reorder_models_1.jpg) + +## Transfer Models + +Navigate to the Project page of the project where the model you want to mode is located, open the project actions dropdown and click on the **Transfer** option. This action will trigger the **Transfer Model** dialog. + +![Ultralytics HUB screenshot of the Project page with an arrow pointing to the Transfer option of one of the models](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_transfer_models_1.jpg) + +??? tip "Tip" + + You can also transfer a model directly from the [Models](https://hub.ultralytics.com/models) page. + + ![Ultralytics HUB screenshot of the Models page with an arrow pointing to the Transfer option of one of the models](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_transfer_models_2.jpg) + +Select the project you want to transfer the model to and click **Save**. + +![Ultralytics HUB screenshot of the Transfer Model dialog with an arrow pointing to the dropdown and one to the Save button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/projects/hub_transfer_models_3.jpg) diff --git a/docs/en/hub/quickstart.md b/docs/en/hub/quickstart.md new file mode 100644 index 0000000..3728dc4 --- /dev/null +++ b/docs/en/hub/quickstart.md @@ -0,0 +1,52 @@ +--- +comments: true +description: Kickstart your journey with Ultralytics HUB. Learn how to train and deploy YOLOv5 and YOLOv8 models in seconds with our Quickstart guide. +keywords: Ultralytics HUB, Quickstart, YOLOv5, YOLOv8, model training, quick deployment, drag-and-drop interface, real-time object detection +--- + +# Quickstart Guide for Ultralytics HUB + +🚧 **Under Construction** 🚧 + +Thank you for visiting the Quickstart guide for [Ultralytics HUB](https://hub.ultralytics.com/)! We're currently hard at work building out this page to provide you with step-by-step instructions on how to get up and running with HUB in no time. + +

+
+ +
+ Watch: Train Your Custom YOLO Models In A Few Clicks with Ultralytics HUB. +

+ +In the meantime, here's a brief overview of what you can expect from Ultralytics HUB: + +## What is Ultralytics HUB? + +Ultralytics HUB is your one-stop solution for training and deploying YOLOv5 and YOLOv8 models. It's designed with user experience in mind, featuring a drag-and-drop interface to make uploading data and training new models a breeze. Whether you're a beginner or an experienced machine learning practitioner, HUB has a range of pre-trained models and templates to accelerate your projects. + +## Key Features + +- **User-Friendly Interface**: Simply drag and drop your data to start training. +- **Pre-Trained Models**: Choose from a selection of pre-trained models to kick-start your projects. +- **Real-Time Object Detection**: Deploy trained models easily for real-time object detection, instance segmentation, and classification tasks. + +## Coming Soon + +- Detailed Steps to Start Your First Project +- Guide on Preparing and Uploading Datasets +- Tutorial on Model Training and Exporting +- Integration Options and How-To's +- And much more! + +## Need Help Now? + +While we're polishing this page, feel free to: + +- Browse through other [HUB Docs](https://docs.ultralytics.com/hub/) for detailed guides and tutorials. +- Raise an issue on our [GitHub](https://github.com/ultralytics/hub/) for technical support. +- Join our [Discord Community](https://ultralytics.com/discord/) for live discussions and community support. + +Stay tuned! We'll be back soon with more detailed information to help you get the most out of Ultralytics HUB. Thank you for your patience and interest! diff --git a/docs/en/index.md b/docs/en/index.md new file mode 100644 index 0000000..c69388b --- /dev/null +++ b/docs/en/index.md @@ -0,0 +1,78 @@ +--- +comments: true +description: Explore a complete guide to Ultralytics YOLOv8, a high-speed, high-accuracy object detection & image segmentation model. Installation, prediction, training tutorials and more. +keywords: Ultralytics, YOLOv8, object detection, image segmentation, machine learning, deep learning, computer vision, YOLOv8 installation, YOLOv8 prediction, YOLOv8 training, YOLO history, YOLO licenses +--- + +
+

+ + Ultralytics YOLO banner +

+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ Ultralytics CI + Ultralytics Code Coverage + YOLOv8 Citation + Docker Pulls +
+ Run on Gradient + Open In Colab + Open In Kaggle +
+ +Introducing [Ultralytics](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics), the latest version of the acclaimed real-time object detection and image segmentation model. YOLOv8 is built on cutting-edge advancements in deep learning and computer vision, offering unparalleled performance in terms of speed and accuracy. Its streamlined design makes it suitable for various applications and easily adaptable to different hardware platforms, from edge devices to cloud APIs. + +Explore the YOLOv8 Docs, a comprehensive resource designed to help you understand and utilize its features and capabilities. Whether you are a seasoned machine learning practitioner or new to the field, this hub aims to maximize YOLOv8's potential in your projects + +## Where to Start + +- **Install** `ultralytics` with pip and get up and running in minutes   [:material-clock-fast: Get Started](quickstart.md){ .md-button } +- **Predict** new images and videos with YOLOv8   [:octicons-image-16: Predict on Images](modes/predict.md){ .md-button } +- **Train** a new YOLOv8 model on your own custom dataset   [:fontawesome-solid-brain: Train a Model](modes/train.md){ .md-button } +- **Explore** YOLOv8 tasks like segment, classify, pose and track   [:material-magnify-expand: Explore Tasks](tasks/index.md){ .md-button } + +

+
+ +
+ Watch: How to Train a YOLOv8 model on Your Custom Dataset in Google Colab. +

+ +## YOLO: A Brief History + +[YOLO](https://arxiv.org/abs/1506.02640) (You Only Look Once), a popular object detection and image segmentation model, was developed by Joseph Redmon and Ali Farhadi at the University of Washington. Launched in 2015, YOLO quickly gained popularity for its high speed and accuracy. + +- [YOLOv2](https://arxiv.org/abs/1612.08242), released in 2016, improved the original model by incorporating batch normalization, anchor boxes, and dimension clusters. +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf), launched in 2018, further enhanced the model's performance using a more efficient backbone network, multiple anchors and spatial pyramid pooling. +- [YOLOv4](https://arxiv.org/abs/2004.10934) was released in 2020, introducing innovations like Mosaic data augmentation, a new anchor-free detection head, and a new loss function. +- [YOLOv5](https://github.com/ultralytics/yolov5) further improved the model's performance and added new features such as hyperparameter optimization, integrated experiment tracking and automatic export to popular export formats. +- [YOLOv6](https://github.com/meituan/YOLOv6) was open-sourced by [Meituan](https://about.meituan.com/) in 2022 and is in use in many of the company's autonomous delivery robots. +- [YOLOv7](https://github.com/WongKinYiu/yolov7) added additional tasks such as pose estimation on the COCO keypoints dataset. +- [YOLOv8](https://github.com/ultralytics/ultralytics) is the latest version of YOLO by Ultralytics. As a cutting-edge, state-of-the-art (SOTA) model, YOLOv8 builds on the success of previous versions, introducing new features and improvements for enhanced performance, flexibility, and efficiency. YOLOv8 supports a full range of vision AI tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/pose.md), [tracking](modes/track.md), and [classification](tasks/classify.md). This versatility allows users to leverage YOLOv8's capabilities across diverse applications and domains. + +## YOLO Licenses: How is Ultralytics YOLO licensed? + +Ultralytics offers two licensing options to accommodate diverse use cases: + +- **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/licenses/) open-source license is ideal for students and enthusiasts, promoting open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for more details. +- **Enterprise License**: Designed for commercial use, this license permits seamless integration of Ultralytics software and AI models into commercial goods and services, bypassing the open-source requirements of AGPL-3.0. If your scenario involves embedding our solutions into a commercial offering, reach out through [Ultralytics Licensing](https://ultralytics.com/license). + +Our licensing strategy is designed to ensure that any improvements to our open-source projects are returned to the community. We hold the principles of open source close to our hearts ❤️, and our mission is to guarantee that our contributions can be utilized and expanded upon in ways that are beneficial to all. diff --git a/docs/en/integrations/clearml.md b/docs/en/integrations/clearml.md new file mode 100644 index 0000000..ca3e85f --- /dev/null +++ b/docs/en/integrations/clearml.md @@ -0,0 +1,184 @@ +--- +comments: true +description: Learn how to streamline and optimize your YOLOv8 model training with ClearML. This guide provides insights into integrating ClearML's MLOps tools for efficient model training, from initial setup to advanced experiment tracking and model management. +keywords: Ultralytics, YOLOv8, Object Detection, ClearML, Model Training, MLOps, Experiment Tracking, Workflow Optimization +--- + +# Training YOLOv8 with ClearML: Streamlining Your MLOps Workflow + +MLOps bridges the gap between creating and deploying machine learning models in real-world settings. It focuses on efficient deployment, scalability, and ongoing management to ensure models perform well in practical applications. + +[Ultralytics YOLOv8](https://ultralytics.com) effortlessly integrates with ClearML, streamlining and enhancing your object detection model's training and management. This guide will walk you through the integration process, detailing how to set up ClearML, manage experiments, automate model management, and collaborate effectively. + +## ClearML + +

+ ClearML Overview +

+ +[ClearML](https://clear.ml/) is an innovative open-source MLOps platform that is skillfully designed to automate, monitor, and orchestrate machine learning workflows. Its key features include automated logging of all training and inference data for full experiment reproducibility, an intuitive web UI for easy data visualization and analysis, advanced hyperparameter optimization algorithms, and robust model management for efficient deployment across various platforms. + +## YOLOv8 Training with ClearML + +You can bring automation and efficiency to your machine learning workflow by improving your training process by integrating YOLOv8 with ClearML. + +## Installation + +To install the required packages, run: + +!!! Tip "Installation" + + === "CLI" + + ```bash + # Install the required packages for YOLOv8 and ClearML + pip install ultralytics clearml + ``` + +For detailed instructions and best practices related to the installation process, be sure to check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. + +## Configuring ClearML + +Once you have installed the necessary packages, the next step is to initialize and configure your ClearML SDK. This involves setting up your ClearML account and obtaining the necessary credentials for a seamless connection between your development environment and the ClearML server. + +Begin by initializing the ClearML SDK in your environment. The ‘clearml-init’ command starts the setup process and prompts you for the necessary credentials. + +!!! Tip "Initial SDK Setup" + + === "CLI" + + ```bash + # Initialize your ClearML SDK setup process + clearml-init + ``` + +After executing this command, visit the [ClearML Settings page](https://app.clear.ml/settings/workspace-configuration). Navigate to the top right corner and select "Settings." Go to the "Workspace" section and click on "Create new credentials." Use the credentials provided in the "Create Credentials" pop-up to complete the setup as instructed, depending on whether you are configuring ClearML in a Jupyter Notebook or a local Python environment. + +## Usage + +Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. + +!!! Example "Usage" + + === "Python" + + ```python + from clearml import Task + from ultralytics import YOLO + + # Step 1: Creating a ClearML Task + task = Task.init( + project_name="my_project", + task_name="my_yolov8_task" + ) + + # Step 2: Selecting the YOLOv8 Model + model_variant = "yolov8n" + task.set_parameter("model_variant", model_variant) + + # Step 3: Loading the YOLOv8 Model + model = YOLO(f'{model_variant}.pt') + + # Step 4: Setting Up Training Arguments + args = dict(data="coco128.yaml", epochs=16) + task.connect(args) + + # Step 5: Initiating Model Training + results = model.train(**args) + ``` + +### Understanding the Code + +Let’s understand the steps showcased in the usage code snippet above. + +**Step 1: Creating a ClearML Task**: A new task is initialized in ClearML, specifying your project and task names. This task will track and manage your model's training. + +**Step 2: Selecting the YOLOv8 Model**: The `model_variant` variable is set to 'yolov8n', one of the YOLOv8 models. This variant is then logged in ClearML for tracking. + +**Step 3: Loading the YOLOv8 Model**: The selected YOLOv8 model is loaded using Ultralytics' YOLO class, preparing it for training. + +**Step 4: Setting Up Training Arguments**: Key training arguments like the dataset (`coco128.yaml`) and the number of epochs (`16`) are organized in a dictionary and connected to the ClearML task. This allows for tracking and potential modification via the ClearML UI. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md). + +**Step 5: Initiating Model Training**: The model training is started with the specified arguments. The results of the training process are captured in the `results` variable. + +### Understanding the Output + +Upon running the usage code snippet above, you can expect the following output: + +- A confirmation message indicating the creation of a new ClearML task, along with its unique ID. +- An informational message about the script code being stored, indicating that the code execution is being tracked by ClearML. +- A URL link to the ClearML results page where you can monitor the training progress and view detailed logs. +- Download progress for the YOLOv8 model and the specified dataset, followed by a summary of the model architecture and training configuration. +- Initialization messages for various training components like TensorBoard, Automatic Mixed Precision (AMP), and dataset preparation. +- Finally, the training process starts, with progress updates as the model trains on the specified dataset. For an in-depth understanding of the performance metrics used during training, read [our guide on performance metrics](../guides/yolo-performance-metrics.md). + +### Viewing the ClearML Results Page + +By clicking on the URL link to the ClearML results page in the output of the usage code snippet, you can access a comprehensive view of your model's training process. + +#### Key Features of the ClearML Results Page + +- **Real-Time Metrics Tracking** + - Track critical metrics like loss, accuracy, and validation scores as they occur. + - Provides immediate feedback for timely model performance adjustments. + +- **Experiment Comparison** + - Compare different training runs side-by-side. + - Essential for hyperparameter tuning and identifying the most effective models. + +- **Detailed Logs and Outputs** + - Access comprehensive logs, graphical representations of metrics, and console outputs. + - Gain a deeper understanding of model behavior and issue resolution. + +- **Resource Utilization Monitoring** + - Monitor the utilization of computational resources, including CPU, GPU, and memory. + - Key to optimizing training efficiency and costs. + +- **Model Artifacts Management** + - View, download, and share model artifacts like trained models and checkpoints. + - Enhances collaboration and streamlines model deployment and sharing. + +For a visual walkthrough of what the ClearML Results Page looks like, watch the video below: + +

+
+ +
+ Watch: YOLOv8 MLOps Integration using ClearML +

+ +### Advanced Features in ClearML + +ClearML offers several advanced features to enhance your MLOps experience. + +#### Remote Execution + +ClearML's remote execution feature facilitates the reproduction and manipulation of experiments on different machines. It logs essential details like installed packages and uncommitted changes. When a task is enqueued, the ClearML Agent pulls it, recreates the environment, and runs the experiment, reporting back with detailed results. + +Deploying a ClearML Agent is straightforward and can be done on various machines using the following command: + +```bash +clearml-agent daemon --queue [--docker] +``` + +This setup is applicable to cloud VMs, local GPUs, or laptops. ClearML Autoscalers help manage cloud workloads on platforms like AWS, GCP, and Azure, automating the deployment of agents and adjusting resources based on your resource budget. + +### Cloning, Editing, and Enqueuing + +ClearML's user-friendly interface allows easy cloning, editing, and enqueuing of tasks. Users can clone an existing experiment, adjust parameters or other details through the UI, and enqueue the task for execution. This streamlined process ensures that the ClearML Agent executing the task uses updated configurations, making it ideal for iterative experimentation and model fine-tuning. + +


+ Cloning, Editing, and Enqueuing with ClearML +

+ +## Summary + +This guide has led you through the process of integrating ClearML with Ultralytics' YOLOv8. Covering everything from initial setup to advanced model management, you've discovered how to leverage ClearML for efficient training, experiment tracking, and workflow optimization in your machine learning projects. + +For further details on usage, visit [ClearML's official documentation](https://clear.ml/docs/latest/docs/integrations/yolov8/). + +Additionally, explore more integrations and capabilities of Ultralytics by visiting the [Ultralytics integration guide page](../integrations/index.md), which is a treasure trove of resources and insights. diff --git a/docs/en/integrations/comet.md b/docs/en/integrations/comet.md new file mode 100644 index 0000000..ac9819c --- /dev/null +++ b/docs/en/integrations/comet.md @@ -0,0 +1,173 @@ +--- +comments: true +description: Discover how to track and enhance YOLOv8 model training with Comet ML's logging tools, from setup to monitoring key metrics and managing experiments for in-depth analysis. +keywords: Ultralytics, YOLOv8, Object Detection, Comet ML, Model Training, Model Metrics Logging, Experiment Tracking, Offline Experiment Management +--- + +# Elevating YOLOv8 Training: Simplify Your Logging Process with Comet ML + +Logging key training details such as parameters, metrics, image predictions, and model checkpoints is essential in machine learning—it keeps your project transparent, your progress measurable, and your results repeatable. + +[Ultralytics YOLOv8](https://ultralytics.com) seamlessly integrates with Comet ML, efficiently capturing and optimizing every aspect of your YOLOv8 object detection model's training process. In this guide, we'll cover the installation process, Comet ML setup, real-time insights, custom logging, and offline usage, ensuring that your YOLOv8 training is thoroughly documented and fine-tuned for outstanding results. + +## Comet ML + +

+ Comet ML Overview +

+ +[Comet ML](https://www.comet.ml/) is a platform for tracking, comparing, explaining, and optimizing machine learning models and experiments. It allows you to log metrics, parameters, media, and more during your model training and monitor your experiments through an aesthetically pleasing web interface. Comet ML helps data scientists iterate more rapidly, enhances transparency and reproducibility, and aids in the development of production models. + +## Harnessing the Power of YOLOv8 and Comet ML + +By combining Ultralytics YOLOv8 with Comet ML, you unlock a range of benefits. These include simplified experiment management, real-time insights for quick adjustments, flexible and tailored logging options, and the ability to log experiments offline when internet access is limited. This integration empowers you to make data-driven decisions, analyze performance metrics, and achieve exceptional results. + +## Installation + +To install the required packages, run: + +!!! Tip "Installation" + + === "CLI" + + ```bash + # Install the required packages for YOLOv8 and Comet ML + pip install ultralytics comet_ml torch torchvision + ``` + +## Configuring Comet ML + +After installing the required packages, you’ll need to sign up, get a [Comet API Key](https://www.comet.com/signup), and configure it. + +!!! Tip "Configuring Comet ML" + + === "CLI" + + ```bash + # Set your Comet Api Key + export COMET_API_KEY= + ``` + +Then, you can initialize your Comet project. Comet will automatically detect the API key and proceed with the setup. + +```python +import comet_ml +comet_ml.init(project_name="comet-example-yolov8-coco128") +``` + +*Note:* If you are using a Google Colab notebook, the code above will prompt you to enter your API key for initialization. + +## Usage + +Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. + +!!! Example "Usage" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolov8n.pt") + + # train the model + results = model.train( + data="coco128.yaml", + project="comet-example-yolov8-coco128", + batch=32, + save_period=1, + save_json=True, + epochs=3 + ) + ``` + +After running the training code, Comet ML will create an experiment in your Comet workspace to track the run automatically. You will then be provided with a link to view the detailed logging of your [YOLOv8 model's training](../modes/train.md) process. + +Comet automatically logs the following data with no additional configuration: metrics such as mAP and loss, hyperparameters, model checkpoints, interactive confusion matrix, and image bounding box predictions. + +## Understanding Your Model's Performance with Comet ML Visualizations + +Let's dive into what you'll see on the Comet ML dashboard once your YOLOv8 model begins training. The dashboard is where all the action happens, presenting a range of automatically logged information through visuals and statistics. Here’s a quick tour: + +**Experiment Panels** + +The experiment panels section of the Comet ML dashboard organize and present the different runs and their metrics, such as segment mask loss, class loss, precision, and mean average precision. + +

+ Comet ML Overview +

+ +**Metrics** + +In the metrics section, you have the option to examine the metrics in a tabular format as well, which is displayed in a dedicated pane as illustrated here. + +

+ Comet ML Overview +

+ +**Interactive Confusion Matrix** + +The confusion matrix, found in the Confusion Matrix tab, provides an interactive way to assess the model's classification accuracy. It details the correct and incorrect predictions, allowing you to understand the model's strengths and weaknesses. + +

+ Comet ML Overview +

+ +**System Metrics** + +Comet ML logs system metrics to help identify any bottlenecks in the training process. It includes metrics such as GPU utilization, GPU memory usage, CPU utilization, and RAM usage. These are essential for monitoring the efficiency of resource usage during model training. + +

+ Comet ML Overview +

+ +## Customizing Comet ML Logging + +Comet ML offers the flexibility to customize its logging behavior by setting environment variables. These configurations allow you to tailor Comet ML to your specific needs and preferences. Here are some helpful customization options: + +### Logging Image Predictions + +You can control the number of image predictions that Comet ML logs during your experiments. By default, Comet ML logs 100 image predictions from the validation set. However, you can change this number to better suit your requirements. For example, to log 200 image predictions, use the following code: + +```python +import os +os.environ["COMET_MAX_IMAGE_PREDICTIONS"] = "200" +``` + +### Batch Logging Interval + +Comet ML allows you to specify how often batches of image predictions are logged. The `COMET_EVAL_BATCH_LOGGING_INTERVAL` environment variable controls this frequency. The default setting is 1, which logs predictions from every validation batch. You can adjust this value to log predictions at a different interval. For instance, setting it to 4 will log predictions from every fourth batch. + +```python +import os +os.environ['COMET_EVAL_BATCH_LOGGING_INTERVAL'] = "4" +``` + +### Disabling Confusion Matrix Logging + +In some cases, you may not want to log the confusion matrix from your validation set after every epoch. You can disable this feature by setting the `COMET_EVAL_LOG_CONFUSION_MATRIX` environment variable to "false." The confusion matrix will only be logged once, after the training is completed. + +```python +import os +os.environ["COMET_EVAL_LOG_CONFUSION_MATRIX"] = "false" +``` + +### Offline Logging + +If you find yourself in a situation where internet access is limited, Comet ML provides an offline logging option. You can set the `COMET_MODE` environment variable to "offline" to enable this feature. Your experiment data will be saved locally in a directory that you can later upload to Comet ML when internet connectivity is available. + +```python +import os +os.environ["COMET_MODE"] = "offline" +``` + +## Summary + +This guide has walked you through integrating Comet ML with Ultralytics' YOLOv8. From installation to customization, you've learned to streamline experiment management, gain real-time insights, and adapt logging to your project's needs. + +Explore [Comet ML's official documentation](https://www.comet.com/docs/v2/integrations/third-party-tools/yolov8/) for more insights on integrating with YOLOv8. + +Furthermore, if you're looking to dive deeper into the practical applications of YOLOv8, specifically for image segmentation tasks, this detailed guide on [fine-tuning YOLOv8 with Comet ML](https://www.comet.com/site/blog/fine-tuning-yolov8-for-image-segmentation-with-comet/) offers valuable insights and step-by-step instructions to enhance your model's performance. + +Additionally, to explore other exciting integrations with Ultralytics, check out the [integration guide page](../integrations/index.md), which offers a wealth of resources and information. diff --git a/docs/en/integrations/dvc.md b/docs/en/integrations/dvc.md new file mode 100644 index 0000000..542a91a --- /dev/null +++ b/docs/en/integrations/dvc.md @@ -0,0 +1,171 @@ +--- +comments: true +description: This guide provides a step-by-step approach to integrating DVCLive with Ultralytics YOLOv8 for advanced experiment tracking. Learn how to set up your environment, run experiments with varied configurations, and analyze results using DVCLive's powerful tracking and visualization tools. +keywords: DVCLive, Ultralytics, YOLOv8, Machine Learning, Experiment Tracking, Data Version Control, ML Workflows, Model Training, Hyperparameter Tuning +--- + +# Advanced YOLOv8 Experiment Tracking with DVCLive + +Experiment tracking in machine learning is critical to model development and evaluation. It involves recording and analyzing various parameters, metrics, and outcomes from numerous training runs. This process is essential for understanding model performance and making data-driven decisions to refine and optimize models. + +Integrating DVCLive with [Ultralytics YOLOv8](https://ultralytics.com) transforms the way experiments are tracked and managed. This integration offers a seamless solution for automatically logging key experiment details, comparing results across different runs, and visualizing data for in-depth analysis. In this guide, we'll understand how DVCLive can be used to streamline the process. + +## DVCLive + +

+ DVCLive Overview +

+ +[DVCLive](https://dvc.org/doc/dvclive), developed by DVC, is an innovative open-source tool for experiment tracking in machine learning. Integrating seamlessly with Git and DVC, it automates the logging of crucial experiment data like model parameters and training metrics. Designed for simplicity, DVCLive enables effortless comparison and analysis of multiple runs, enhancing the efficiency of machine learning projects with intuitive data visualization and analysis tools. + +## YOLOv8 Training with DVCLive + +YOLOv8 training sessions can be effectively monitored with DVCLive. Additionally, DVC provides integral features for visualizing these experiments, including the generation of a report that enables the comparison of metric plots across all tracked experiments, offering a comprehensive view of the training process. + +## Installation + +To install the required packages, run: + +!!! Tip "Installation" + + === "CLI" + + ```bash + # Install the required packages for YOLOv8 and DVCLive + pip install ultralytics dvclive + ``` + +For detailed instructions and best practices related to the installation process, be sure to check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. + +## Configuring DVCLive + +Once you have installed the necessary packages, the next step is to set up and configure your environment with the necessary credentials. This setup ensures a smooth integration of DVCLive into your existing workflow. + +Begin by initializing a Git repository, as Git plays a crucial role in version control for both your code and DVCLive configurations. + +!!! Tip "Initial Environment Setup" + + === "CLI" + + ```bash + # Initialize a Git repository + git init -q + + # Configure Git with your details + git config --local user.email "you@example.com" + git config --local user.name "Your Name" + + # Initialize DVCLive in your project + dvc init -q + + # Commit the DVCLive setup to your Git repository + git commit -m "DVC init" + ``` + +In these commands, ensure to replace "you@example.com" with the email address associated with your Git account, and "Your Name" with your Git account username. + +## Usage + +Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. + +### Training YOLOv8 Models with DVCLive + +Start by running your YOLOv8 training sessions. You can use different model configurations and training parameters to suit your project needs. For instance: + +```bash +# Example training commands for YOLOv8 with varying configurations +yolo train model=yolov8n.pt data=coco8.yaml epochs=5 imgsz=512 +yolo train model=yolov8n.pt data=coco8.yaml epochs=5 imgsz=640 +``` + +Adjust the model, data, epochs, and imgsz parameters according to your specific requirements. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md). + +### Monitoring Experiments with DVCLive + +DVCLive enhances the training process by enabling the tracking and visualization of key metrics. When installed, Ultralytics YOLOv8 automatically integrates with DVCLive for experiment tracking, which you can later analyze for performance insights. For a comprehensive understanding of the specific performance metrics used during training, be sure to explore [our detailed guide on performance metrics](../guides/yolo-performance-metrics.md). + +### Analyzing Results + +After your YOLOv8 training sessions are complete, you can leverage DVCLive's powerful visualization tools for in-depth analysis of the results. DVCLive's integration ensures that all training metrics are systematically logged, facilitating a comprehensive evaluation of your model's performance. + +To start the analysis, you can extract the experiment data using DVC's API and process it with Pandas for easier handling and visualization: + +```python +import dvc.api +import pandas as pd + +# Define the columns of interest +columns = ["Experiment", "epochs", "imgsz", "model", "metrics.mAP50-95(B)"] + +# Retrieve experiment data +df = pd.DataFrame(dvc.api.exp_show(), columns=columns) + +# Clean the data +df.dropna(inplace=True) +df.reset_index(drop=True, inplace=True) + +# Display the DataFrame +print(df) +``` + +The output of the code snippet above provides a clear tabular view of the different experiments conducted with YOLOv8 models. Each row represents a different training run, detailing the experiment's name, the number of epochs, image size (imgsz), the specific model used, and the mAP50-95(B) metric. This metric is crucial for evaluating the model's accuracy, with higher values indicating better performance. + +#### Visualizing Results with Plotly + +For a more interactive and visual analysis of your experiment results, you can use Plotly's parallel coordinates plot. This type of plot is particularly useful for understanding the relationships and trade-offs between different parameters and metrics. + +```python +from plotly.express import parallel_coordinates + +# Create a parallel coordinates plot +fig = parallel_coordinates(df, columns, color="metrics.mAP50-95(B)") + +# Display the plot +fig.show() +``` + +The output of the code snippet above generates a plot that will visually represent the relationships between epochs, image size, model type, and their corresponding mAP50-95(B) scores, enabling you to spot trends and patterns in your experiment data. + +#### Generating Comparative Visualizations with DVC + +DVC provides a useful command to generate comparative plots for your experiments. This can be especially helpful to compare the performance of different models over various training runs. + +```bash +# Generate DVC comparative plots +dvc plots diff $(dvc exp list --names-only) +``` + +After executing this command, DVC generates plots comparing the metrics across different experiments, which are saved as HTML files. Below is an example image illustrating typical plots generated by this process. The image showcases various graphs, including those representing mAP, recall, precision, loss values, and more, providing a visual overview of key performance metrics: + +

+ DVCLive Plots +

+ +### Displaying DVC Plots + +If you are using a Jupyter Notebook and you want to display the generated DVC plots, you can use the IPython display functionality. + +```python +from IPython.display import HTML + +# Display the DVC plots as HTML +HTML(filename='./dvc_plots/index.html') +``` + +This code will render the HTML file containing the DVC plots directly in your Jupyter Notebook, providing an easy and convenient way to analyze the visualized experiment data. + +### Making Data-Driven Decisions + +Use the insights gained from these visualizations to make informed decisions about model optimizations, hyperparameter tuning, and other modifications to enhance your model's performance. + +### Iterating on Experiments + +Based on your analysis, iterate on your experiments. Adjust model configurations, training parameters, or even the data inputs, and repeat the training and analysis process. This iterative approach is key to refining your model for the best possible performance. + +## Summary + +This guide has led you through the process of integrating DVCLive with Ultralytics' YOLOv8. You have learned how to harness the power of DVCLive for detailed experiment monitoring, effective visualization, and insightful analysis in your machine learning endeavors. + +For further details on usage, visit [DVCLive’s official documentation](https://dvc.org/doc/dvclive/ml-frameworks/yolo). + +Additionally, explore more integrations and capabilities of Ultralytics by visiting the [Ultralytics integration guide page](../integrations/index.md), which is a collection of great resources and insights. diff --git a/docs/en/integrations/index.md b/docs/en/integrations/index.md new file mode 100644 index 0000000..492d282 --- /dev/null +++ b/docs/en/integrations/index.md @@ -0,0 +1,71 @@ +--- +comments: true +description: Explore Ultralytics integrations with tools for dataset management, model optimization, ML workflows automation, experiment tracking, version control, and more. Learn about our support for various model export formats for deployment. +keywords: Ultralytics integrations, Roboflow, Neural Magic, ClearML, Comet ML, DVC, Ultralytics HUB, MLFlow, Neptune, Ray Tune, TensorBoard, W&B, model export formats, PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, CoreML, TF SavedModel, TF GraphDef, TF Lite, TF Edge TPU, TF.js, PaddlePaddle, NCNN +--- + +# Ultralytics Integrations + +Welcome to the Ultralytics Integrations page! This page provides an overview of our partnerships with various tools and platforms, designed to streamline your machine learning workflows, enhance dataset management, simplify model training, and facilitate efficient deployment. + +Ultralytics YOLO ecosystem and integrations + +## Datasets Integrations + +- [Roboflow](roboflow.md): Facilitate seamless dataset management for Ultralytics models, offering robust annotation, preprocessing, and augmentation capabilities. + +## Training Integrations + +- [Comet ML](comet.md): Enhance your model development with Ultralytics by tracking, comparing, and optimizing your machine learning experiments. + +- [ClearML](clearml.md): Automate your Ultralytics ML workflows, monitor experiments, and foster team collaboration. + +- [DVC](dvc.md): Implement version control for your Ultralytics machine learning projects, synchronizing data, code, and models effectively. + +- [Ultralytics HUB](https://hub.ultralytics.com): Access and contribute to a community of pre-trained Ultralytics models. + +- [MLFlow](mlflow.md): Streamline the entire ML lifecycle of Ultralytics models, from experimentation and reproducibility to deployment. + +- [Neptune](https://neptune.ai/): Maintain a comprehensive log of your ML experiments with Ultralytics in this metadata store designed for MLOps. + +- [Ray Tune](ray-tune.md): Optimize the hyperparameters of your Ultralytics models at any scale. + +- [TensorBoard](https://tensorboard.dev/): Visualize your Ultralytics ML workflows, monitor model metrics, and foster team collaboration. + +- [Weights & Biases (W&B)](https://wandb.ai/site): Monitor experiments, visualize metrics, and foster reproducibility and collaboration on Ultralytics projects. + +## Deployment Integrations + +- [Neural Magic](https://neuralmagic.com/): Leverage Quantization Aware Training (QAT) and pruning techniques to optimize Ultralytics models for superior performance and leaner size. + +### Export Formats + +We also support a variety of model export formats for deployment in different environments. Here are the available formats: + +| Format | `format` Argument | Model | Metadata | Arguments | +|--------------------------------------------------------------------|-------------------|---------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [NCNN](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +Explore the links to learn more about each integration and how to get the most out of them with Ultralytics. + +## Contribute to Our Integrations + +We're always excited to see how the community integrates Ultralytics YOLO with other technologies, tools, and platforms! If you have successfully integrated YOLO with a new system or have valuable insights to share, consider contributing to our Integrations Docs. + +By writing a guide or tutorial, you can help expand our documentation and provide real-world examples that benefit the community. It's an excellent way to contribute to the growing ecosystem around Ultralytics YOLO. + +To contribute, please check out our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for instructions on how to submit a Pull Request (PR) 🛠️. We eagerly await your contributions! + +Let's collaborate to make the Ultralytics YOLO ecosystem more expansive and feature-rich 🙏! diff --git a/docs/en/integrations/mlflow.md b/docs/en/integrations/mlflow.md new file mode 100644 index 0000000..58052df --- /dev/null +++ b/docs/en/integrations/mlflow.md @@ -0,0 +1,112 @@ +--- +comments: true +description: Uncover the utility of MLflow for effective experiment logging in your Ultralytics YOLO projects. +keywords: ultralytics docs, YOLO, MLflow, experiment logging, metrics tracking, parameter logging, artifact logging +--- + +# MLflow Integration for Ultralytics YOLO + +MLflow ecosystem + +## Introduction + +Experiment logging is a crucial aspect of machine learning workflows that enables tracking of various metrics, parameters, and artifacts. It helps to enhance model reproducibility, debug issues, and improve model performance. [Ultralytics](https://ultralytics.com) YOLO, known for its real-time object detection capabilities, now offers integration with [MLflow](https://mlflow.org/), an open-source platform for complete machine learning lifecycle management. + +This documentation page is a comprehensive guide to setting up and utilizing the MLflow logging capabilities for your Ultralytics YOLO project. + +## What is MLflow? + +[MLflow](https://mlflow.org/) is an open-source platform developed by [Databricks](https://www.databricks.com/) for managing the end-to-end machine learning lifecycle. It includes tools for tracking experiments, packaging code into reproducible runs, and sharing and deploying models. MLflow is designed to work with any machine learning library and programming language. + +## Features + +- **Metrics Logging**: Logs metrics at the end of each epoch and at the end of the training. +- **Parameter Logging**: Logs all the parameters used in the training. +- **Artifacts Logging**: Logs model artifacts, including weights and configuration files, at the end of the training. + +## Setup and Prerequisites + +Ensure MLflow is installed. If not, install it using pip: + +```bash +pip install mlflow +``` + +Make sure that MLflow logging is enabled in Ultralytics settings. Usually, this is controlled by the settings `mflow` key. See the [settings](https://docs.ultralytics.com/quickstart/#ultralytics-settings) page for more info. + +!!! Example "Update Ultralytics MLflow Settings" + + === "Python" + Within the Python environment, call the `update` method on the `settings` object to change your settings: + ```python + from ultralytics import settings + + # Update a setting + settings.update({'mlflow': True}) + + # Reset settings to default values + settings.reset() + ``` + + === "CLI" + If you prefer using the command-line interface, the following commands will allow you to modify your settings: + ```bash + # Update a setting + yolo settings runs_dir='/path/to/runs' + + # Reset settings to default values + yolo settings reset + ``` + +## How to Use + +### Commands + +1. **Set a Project Name**: You can set the project name via an environment variable: + ```bash + export MLFLOW_EXPERIMENT_NAME= + ``` + Or use the `project=` argument when training a YOLO model, i.e. `yolo train project=my_project`. + +2. **Set a Run Name**: Similar to setting a project name, you can set the run name via an environment variable: + ```bash + export MLFLOW_RUN= + ``` + Or use the `name=` argument when training a YOLO model, i.e. `yolo train project=my_project name=my_name`. + +3. **Start Local MLflow Server**: To start tracking, use: + ```bash + mlflow server --backend-store-uri runs/mlflow' + ``` + This will start a local server at http://127.0.0.1:5000 by default and save all mlflow logs to the 'runs/mlflow' directory. To specify a different URI, set the `MLFLOW_TRACKING_URI` environment variable. + +4. **Kill MLflow Server Instances**: To stop all running MLflow instances, run: + ```bash + ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill -9 + ``` + +### Logging + +The logging is taken care of by the `on_pretrain_routine_end`, `on_fit_epoch_end`, and `on_train_end` callback functions. These functions are automatically called during the respective stages of the training process, and they handle the logging of parameters, metrics, and artifacts. + +## Examples + +1. **Logging Custom Metrics**: You can add custom metrics to be logged by modifying the `trainer.metrics` dictionary before `on_fit_epoch_end` is called. + +2. **View Experiment**: To view your logs, navigate to your MLflow server (usually http://127.0.0.1:5000) and select your experiment and run. + YOLO MLflow Experiment + +3. **View Run**: Runs are individual models inside an experiment. Click on a Run and see the Run details, including uploaded artifacts and model weights. + YOLO MLflow Run + +## Disabling MLflow + +To turn off MLflow logging: + +```bash +yolo settings mlflow=False +``` + +## Conclusion + +MLflow logging integration with Ultralytics YOLO offers a streamlined way to keep track of your machine learning experiments. It empowers you to monitor performance metrics and manage artifacts effectively, thus aiding in robust model development and deployment. For further details please visit the MLflow [official documentation](https://mlflow.org/docs/latest/index.html). diff --git a/docs/en/integrations/openvino.md b/docs/en/integrations/openvino.md new file mode 100644 index 0000000..6f552ff --- /dev/null +++ b/docs/en/integrations/openvino.md @@ -0,0 +1,284 @@ +--- +comments: true +description: Discover the power of deploying your Ultralytics YOLOv8 model using OpenVINO format for up to 10x speedup vs PyTorch. +keywords: ultralytics docs, YOLOv8, export YOLOv8, YOLOv8 model deployment, exporting YOLOv8, OpenVINO, OpenVINO format +--- + +# Intel OpenVINO Export + +OpenVINO Ecosystem + +In this guide, we cover exporting YOLOv8 models to the [OpenVINO](https://docs.openvino.ai/) format, which can provide up to 3x [CPU](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_CPU.html) speedup as well as accelerating on other Intel hardware ([iGPU](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_GPU.html), [dGPU](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_GPU.html), [VPU](https://docs.openvino.ai/2022.3/openvino_docs_OV_UG_supported_plugins_VPU.html), etc.). + +OpenVINO, short for Open Visual Inference & Neural Network Optimization toolkit, is a comprehensive toolkit for optimizing and deploying AI inference models. Even though the name contains Visual, OpenVINO also supports various additional tasks including language, audio, time series, etc. + +

+
+ +
+ Watch: How To Export and Optimize an Ultralytics YOLOv8 Model for Inference with OpenVINO. +

+ +## Usage Examples + +Export a YOLOv8n model to OpenVINO format and run inference with the exported model. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a YOLOv8n PyTorch model + model = YOLO('yolov8n.pt') + + # Export the model + model.export(format='openvino') # creates 'yolov8n_openvino_model/' + + # Load the exported OpenVINO model + ov_model = YOLO('yolov8n_openvino_model/') + + # Run inference + results = ov_model('https://ultralytics.com/images/bus.jpg') + ``` + === "CLI" + + ```bash + # Export a YOLOv8n PyTorch model to OpenVINO format + yolo export model=yolov8n.pt format=openvino # creates 'yolov8n_openvino_model/' + + # Run inference with the exported model + yolo predict model=yolov8n_openvino_model source='https://ultralytics.com/images/bus.jpg' + ``` + +## Arguments + +| Key | Value | Description | +|----------|--------------|------------------------------------------------------| +| `format` | `'openvino'` | format to export to | +| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `half` | `False` | FP16 quantization | + +## Benefits of OpenVINO + +1. **Performance**: OpenVINO delivers high-performance inference by utilizing the power of Intel CPUs, integrated and discrete GPUs, and FPGAs. +2. **Support for Heterogeneous Execution**: OpenVINO provides an API to write once and deploy on any supported Intel hardware (CPU, GPU, FPGA, VPU, etc.). +3. **Model Optimizer**: OpenVINO provides a Model Optimizer that imports, converts, and optimizes models from popular deep learning frameworks such as PyTorch, TensorFlow, TensorFlow Lite, Keras, ONNX, PaddlePaddle, and Caffe. +4. **Ease of Use**: The toolkit comes with more than [80 tutorial notebooks](https://github.com/openvinotoolkit/openvino_notebooks) (including [YOLOv8 optimization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/230-yolov8-optimization)) teaching different aspects of the toolkit. + +## OpenVINO Export Structure + +When you export a model to OpenVINO format, it results in a directory containing the following: + +1. **XML file**: Describes the network topology. +2. **BIN file**: Contains the weights and biases binary data. +3. **Mapping file**: Holds mapping of original model output tensors to OpenVINO tensor names. + +You can use these files to run inference with the OpenVINO Inference Engine. + +## Using OpenVINO Export in Deployment + +Once you have the OpenVINO files, you can use the OpenVINO Runtime to run the model. The Runtime provides a unified API to inference across all supported Intel hardware. It also provides advanced capabilities like load balancing across Intel hardware and asynchronous execution. For more information on running the inference, refer to the [Inference with OpenVINO Runtime Guide](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_OV_Runtime_User_Guide.html). + +Remember, you'll need the XML and BIN files as well as any application-specific settings like input size, scale factor for normalization, etc., to correctly set up and use the model with the Runtime. + +In your deployment application, you would typically do the following steps: + +1. Initialize OpenVINO by creating `core = Core()`. +2. Load the model using the `core.read_model()` method. +3. Compile the model using the `core.compile_model()` function. +4. Prepare the input (image, text, audio, etc.). +5. Run inference using `compiled_model(input_data)`. + +For more detailed steps and code snippets, refer to the [OpenVINO documentation](https://docs.openvino.ai/) or [API tutorial](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/002-openvino-api/002-openvino-api.ipynb). + +## OpenVINO YOLOv8 Benchmarks + +YOLOv8 benchmarks below were run by the Ultralytics team on 4 different model formats measuring speed and accuracy: PyTorch, TorchScript, ONNX and OpenVINO. Benchmarks were run on Intel Flex and Arc GPUs, and on Intel Xeon CPUs at FP32 precision (with the `half=False` argument). + +!!! Note + + The benchmarking results below are for reference and might vary based on the exact hardware and software configuration of a system, as well as the current workload of the system at the time the benchmarks are run. + + All benchmarks run with `openvino` Python package version [2023.0.1](https://pypi.org/project/openvino/2023.0.1/). + +### Intel Flex GPU + +The Intel® Data Center GPU Flex Series is a versatile and robust solution designed for the intelligent visual cloud. This GPU supports a wide array of workloads including media streaming, cloud gaming, AI visual inference, and virtual desktop Infrastructure workloads. It stands out for its open architecture and built-in support for the AV1 encode, providing a standards-based software stack for high-performance, cross-architecture applications. The Flex Series GPU is optimized for density and quality, offering high reliability, availability, and scalability. + +Benchmarks below run on Intel® Data Center GPU Flex 170 at FP32 precision. + +
+Flex GPU benchmarks +
+ +| Model | Format | Status | Size (MB) | mAP50-95(B) | Inference time (ms/im) | +|---------|-------------|--------|-----------|-------------|------------------------| +| YOLOv8n | PyTorch | ✅ | 6.2 | 0.3709 | 21.79 | +| YOLOv8n | TorchScript | ✅ | 12.4 | 0.3704 | 23.24 | +| YOLOv8n | ONNX | ✅ | 12.2 | 0.3704 | 37.22 | +| YOLOv8n | OpenVINO | ✅ | 12.3 | 0.3703 | 3.29 | +| YOLOv8s | PyTorch | ✅ | 21.5 | 0.4471 | 31.89 | +| YOLOv8s | TorchScript | ✅ | 42.9 | 0.4472 | 32.71 | +| YOLOv8s | ONNX | ✅ | 42.8 | 0.4472 | 43.42 | +| YOLOv8s | OpenVINO | ✅ | 42.9 | 0.4470 | 3.92 | +| YOLOv8m | PyTorch | ✅ | 49.7 | 0.5013 | 50.75 | +| YOLOv8m | TorchScript | ✅ | 99.2 | 0.4999 | 47.90 | +| YOLOv8m | ONNX | ✅ | 99.0 | 0.4999 | 63.16 | +| YOLOv8m | OpenVINO | ✅ | 49.8 | 0.4997 | 7.11 | +| YOLOv8l | PyTorch | ✅ | 83.7 | 0.5293 | 77.45 | +| YOLOv8l | TorchScript | ✅ | 167.2 | 0.5268 | 85.71 | +| YOLOv8l | ONNX | ✅ | 166.8 | 0.5268 | 88.94 | +| YOLOv8l | OpenVINO | ✅ | 167.0 | 0.5264 | 9.37 | +| YOLOv8x | PyTorch | ✅ | 130.5 | 0.5404 | 100.09 | +| YOLOv8x | TorchScript | ✅ | 260.7 | 0.5371 | 114.64 | +| YOLOv8x | ONNX | ✅ | 260.4 | 0.5371 | 110.32 | +| YOLOv8x | OpenVINO | ✅ | 260.6 | 0.5367 | 15.02 | + +This table represents the benchmark results for five different models (YOLOv8n, YOLOv8s, YOLOv8m, YOLOv8l, YOLOv8x) across four different formats (PyTorch, TorchScript, ONNX, OpenVINO), giving us the status, size, mAP50-95(B) metric, and inference time for each combination. + +### Intel Arc GPU + +Intel® Arc™ represents Intel's foray into the dedicated GPU market. The Arc™ series, designed to compete with leading GPU manufacturers like AMD and Nvidia, caters to both the laptop and desktop markets. The series includes mobile versions for compact devices like laptops, and larger, more powerful versions for desktop computers. + +The Arc™ series is divided into three categories: Arc™ 3, Arc™ 5, and Arc™ 7, with each number indicating the performance level. Each category includes several models, and the 'M' in the GPU model name signifies a mobile, integrated variant. + +Early reviews have praised the Arc™ series, particularly the integrated A770M GPU, for its impressive graphics performance. The availability of the Arc™ series varies by region, and additional models are expected to be released soon. Intel® Arc™ GPUs offer high-performance solutions for a range of computing needs, from gaming to content creation. + +Benchmarks below run on Intel® Arc 770 GPU at FP32 precision. + +
+Arc GPU benchmarks +
+ +| Model | Format | Status | Size (MB) | metrics/mAP50-95(B) | Inference time (ms/im) | +|---------|-------------|--------|-----------|---------------------|------------------------| +| YOLOv8n | PyTorch | ✅ | 6.2 | 0.3709 | 88.79 | +| YOLOv8n | TorchScript | ✅ | 12.4 | 0.3704 | 102.66 | +| YOLOv8n | ONNX | ✅ | 12.2 | 0.3704 | 57.98 | +| YOLOv8n | OpenVINO | ✅ | 12.3 | 0.3703 | 8.52 | +| YOLOv8s | PyTorch | ✅ | 21.5 | 0.4471 | 189.83 | +| YOLOv8s | TorchScript | ✅ | 42.9 | 0.4472 | 227.58 | +| YOLOv8s | ONNX | ✅ | 42.7 | 0.4472 | 142.03 | +| YOLOv8s | OpenVINO | ✅ | 42.9 | 0.4469 | 9.19 | +| YOLOv8m | PyTorch | ✅ | 49.7 | 0.5013 | 411.64 | +| YOLOv8m | TorchScript | ✅ | 99.2 | 0.4999 | 517.12 | +| YOLOv8m | ONNX | ✅ | 98.9 | 0.4999 | 298.68 | +| YOLOv8m | OpenVINO | ✅ | 99.1 | 0.4996 | 12.55 | +| YOLOv8l | PyTorch | ✅ | 83.7 | 0.5293 | 725.73 | +| YOLOv8l | TorchScript | ✅ | 167.1 | 0.5268 | 892.83 | +| YOLOv8l | ONNX | ✅ | 166.8 | 0.5268 | 576.11 | +| YOLOv8l | OpenVINO | ✅ | 167.0 | 0.5262 | 17.62 | +| YOLOv8x | PyTorch | ✅ | 130.5 | 0.5404 | 988.92 | +| YOLOv8x | TorchScript | ✅ | 260.7 | 0.5371 | 1186.42 | +| YOLOv8x | ONNX | ✅ | 260.4 | 0.5371 | 768.90 | +| YOLOv8x | OpenVINO | ✅ | 260.6 | 0.5367 | 19 | + +### Intel Xeon CPU + +The Intel® Xeon® CPU is a high-performance, server-grade processor designed for complex and demanding workloads. From high-end cloud computing and virtualization to artificial intelligence and machine learning applications, Xeon® CPUs provide the power, reliability, and flexibility required for today's data centers. + +Notably, Xeon® CPUs deliver high compute density and scalability, making them ideal for both small businesses and large enterprises. By choosing Intel® Xeon® CPUs, organizations can confidently handle their most demanding computing tasks and foster innovation while maintaining cost-effectiveness and operational efficiency. + +Benchmarks below run on 4th Gen Intel® Xeon® Scalable CPU at FP32 precision. + +
+Xeon CPU benchmarks +
+ +| Model | Format | Status | Size (MB) | metrics/mAP50-95(B) | Inference time (ms/im) | +|---------|-------------|--------|-----------|---------------------|------------------------| +| YOLOv8n | PyTorch | ✅ | 6.2 | 0.3709 | 24.36 | +| YOLOv8n | TorchScript | ✅ | 12.4 | 0.3704 | 23.93 | +| YOLOv8n | ONNX | ✅ | 12.2 | 0.3704 | 39.86 | +| YOLOv8n | OpenVINO | ✅ | 12.3 | 0.3704 | 11.34 | +| YOLOv8s | PyTorch | ✅ | 21.5 | 0.4471 | 33.77 | +| YOLOv8s | TorchScript | ✅ | 42.9 | 0.4472 | 34.84 | +| YOLOv8s | ONNX | ✅ | 42.8 | 0.4472 | 43.23 | +| YOLOv8s | OpenVINO | ✅ | 42.9 | 0.4471 | 13.86 | +| YOLOv8m | PyTorch | ✅ | 49.7 | 0.5013 | 53.91 | +| YOLOv8m | TorchScript | ✅ | 99.2 | 0.4999 | 53.51 | +| YOLOv8m | ONNX | ✅ | 99.0 | 0.4999 | 64.16 | +| YOLOv8m | OpenVINO | ✅ | 99.1 | 0.4996 | 28.79 | +| YOLOv8l | PyTorch | ✅ | 83.7 | 0.5293 | 75.78 | +| YOLOv8l | TorchScript | ✅ | 167.2 | 0.5268 | 79.13 | +| YOLOv8l | ONNX | ✅ | 166.8 | 0.5268 | 88.45 | +| YOLOv8l | OpenVINO | ✅ | 167.0 | 0.5263 | 56.23 | +| YOLOv8x | PyTorch | ✅ | 130.5 | 0.5404 | 96.60 | +| YOLOv8x | TorchScript | ✅ | 260.7 | 0.5371 | 114.28 | +| YOLOv8x | ONNX | ✅ | 260.4 | 0.5371 | 111.02 | +| YOLOv8x | OpenVINO | ✅ | 260.6 | 0.5371 | 83.28 | + +### Intel Core CPU + +The Intel® Core® series is a range of high-performance processors by Intel. The lineup includes Core i3 (entry-level), Core i5 (mid-range), Core i7 (high-end), and Core i9 (extreme performance). Each series caters to different computing needs and budgets, from everyday tasks to demanding professional workloads. With each new generation, improvements are made to performance, energy efficiency, and features. + +Benchmarks below run on 13th Gen Intel® Core® i7-13700H CPU at FP32 precision. + +
+Core CPU benchmarks +
+ +| Model | Format | Status | Size (MB) | metrics/mAP50-95(B) | Inference time (ms/im) | +|---------|-------------|--------|-----------|---------------------|------------------------| +| YOLOv8n | PyTorch | ✅ | 6.2 | 0.4478 | 104.61 | +| YOLOv8n | TorchScript | ✅ | 12.4 | 0.4525 | 112.39 | +| YOLOv8n | ONNX | ✅ | 12.2 | 0.4525 | 28.02 | +| YOLOv8n | OpenVINO | ✅ | 12.3 | 0.4504 | 23.53 | +| YOLOv8s | PyTorch | ✅ | 21.5 | 0.5885 | 194.83 | +| YOLOv8s | TorchScript | ✅ | 43.0 | 0.5962 | 202.01 | +| YOLOv8s | ONNX | ✅ | 42.8 | 0.5962 | 65.74 | +| YOLOv8s | OpenVINO | ✅ | 42.9 | 0.5966 | 38.66 | +| YOLOv8m | PyTorch | ✅ | 49.7 | 0.6101 | 355.23 | +| YOLOv8m | TorchScript | ✅ | 99.2 | 0.6120 | 424.78 | +| YOLOv8m | ONNX | ✅ | 99.0 | 0.6120 | 173.39 | +| YOLOv8m | OpenVINO | ✅ | 99.1 | 0.6091 | 69.80 | +| YOLOv8l | PyTorch | ✅ | 83.7 | 0.6591 | 593.00 | +| YOLOv8l | TorchScript | ✅ | 167.2 | 0.6580 | 697.54 | +| YOLOv8l | ONNX | ✅ | 166.8 | 0.6580 | 342.15 | +| YOLOv8l | OpenVINO | ✅ | 167.0 | 0.0708 | 117.69 | +| YOLOv8x | PyTorch | ✅ | 130.5 | 0.6651 | 804.65 | +| YOLOv8x | TorchScript | ✅ | 260.8 | 0.6650 | 921.46 | +| YOLOv8x | ONNX | ✅ | 260.4 | 0.6650 | 526.66 | +| YOLOv8x | OpenVINO | ✅ | 260.6 | 0.6619 | 158.73 | + +## Reproduce Our Results + +To reproduce the Ultralytics benchmarks above on all export [formats](../modes/export.md) run this code: + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a YOLOv8n PyTorch model + model = YOLO('yolov8n.pt') + + # Benchmark YOLOv8n speed and accuracy on the COCO128 dataset for all all export formats + results= model.benchmarks(data='coco128.yaml') + ``` + === "CLI" + + ```bash + # Benchmark YOLOv8n speed and accuracy on the COCO128 dataset for all all export formats + yolo benchmark model=yolov8n.pt data=coco128.yaml + ``` + + Note that benchmarking results might vary based on the exact hardware and software configuration of a system, as well as the current workload of the system at the time the benchmarks are run. For the most reliable results use a dataset with a large number of images, i.e. `data='coco128.yaml' (128 val images), or `data='coco.yaml'` (5000 val images). + +## Conclusion + +The benchmarking results clearly demonstrate the benefits of exporting the YOLOv8 model to the OpenVINO format. Across different models and hardware platforms, the OpenVINO format consistently outperforms other formats in terms of inference speed while maintaining comparable accuracy. + +For the Intel® Data Center GPU Flex Series, the OpenVINO format was able to deliver inference speeds almost 10 times faster than the original PyTorch format. On the Xeon CPU, the OpenVINO format was twice as fast as the PyTorch format. The accuracy of the models remained nearly identical across the different formats. + +The benchmarks underline the effectiveness of OpenVINO as a tool for deploying deep learning models. By converting models to the OpenVINO format, developers can achieve significant performance improvements, making it easier to deploy these models in real-world applications. + +For more detailed information and instructions on using OpenVINO, refer to the [official OpenVINO documentation](https://docs.openvino.ai/). diff --git a/docs/en/integrations/ray-tune.md b/docs/en/integrations/ray-tune.md new file mode 100644 index 0000000..3825cf7 --- /dev/null +++ b/docs/en/integrations/ray-tune.md @@ -0,0 +1,179 @@ +--- +comments: true +description: Discover how to streamline hyperparameter tuning for YOLOv8 models with Ray Tune. Learn to accelerate tuning, integrate with Weights & Biases, and analyze results. +keywords: Ultralytics, YOLOv8, Ray Tune, hyperparameter tuning, machine learning optimization, Weights & Biases integration, result analysis +--- + +# Efficient Hyperparameter Tuning with Ray Tune and YOLOv8 + +Hyperparameter tuning is vital in achieving peak model performance by discovering the optimal set of hyperparameters. This involves running trials with different hyperparameters and evaluating each trial’s performance. + +## Accelerate Tuning with Ultralytics YOLOv8 and Ray Tune + +[Ultralytics YOLOv8](https://ultralytics.com) incorporates Ray Tune for hyperparameter tuning, streamlining the optimization of YOLOv8 model hyperparameters. With Ray Tune, you can utilize advanced search strategies, parallelism, and early stopping to expedite the tuning process. + +### Ray Tune + +

+ Ray Tune Overview +

+ +[Ray Tune](https://docs.ray.io/en/latest/tune/index.html) is a hyperparameter tuning library designed for efficiency and flexibility. It supports various search strategies, parallelism, and early stopping strategies, and seamlessly integrates with popular machine learning frameworks, including Ultralytics YOLOv8. + +### Integration with Weights & Biases + +YOLOv8 also allows optional integration with [Weights & Biases](https://wandb.ai/site) for monitoring the tuning process. + +## Installation + +To install the required packages, run: + +!!! Tip "Installation" + + === "CLI" + + ```bash + # Install and update Ultralytics and Ray Tune packages + pip install -U ultralytics "ray[tune]" + + # Optionally install W&B for logging + pip install wandb + ``` + +## Usage + +!!! Example "Usage" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a YOLOv8n model + model = YOLO('yolov8n.pt') + + # Start tuning hyperparameters for YOLOv8n training on the COCO8 dataset + result_grid = model.tune(data='coco8.yaml', use_ray=True) + ``` + +## `tune()` Method Parameters + +The `tune()` method in YOLOv8 provides an easy-to-use interface for hyperparameter tuning with Ray Tune. It accepts several arguments that allow you to customize the tuning process. Below is a detailed explanation of each parameter: + +| Parameter | Type | Description | Default Value | +|-----------------|------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------| +| `data` | `str` | The dataset configuration file (in YAML format) to run the tuner on. This file should specify the training and validation data paths, as well as other dataset-specific settings. | | +| `space` | `dict, optional` | A dictionary defining the hyperparameter search space for Ray Tune. Each key corresponds to a hyperparameter name, and the value specifies the range of values to explore during tuning. If not provided, YOLOv8 uses a default search space with various hyperparameters. | | +| `grace_period` | `int, optional` | The grace period in epochs for the [ASHA scheduler](https://docs.ray.io/en/latest/tune/api/schedulers.html) in Ray Tune. The scheduler will not terminate any trial before this number of epochs, allowing the model to have some minimum training before making a decision on early stopping. | 10 | +| `gpu_per_trial` | `int, optional` | The number of GPUs to allocate per trial during tuning. This helps manage GPU usage, particularly in multi-GPU environments. If not provided, the tuner will use all available GPUs. | None | +| `iterations` | `int, optional` | The maximum number of trials to run during tuning. This parameter helps control the total number of hyperparameter combinations tested, ensuring the tuning process does not run indefinitely. | 10 | +| `**train_args` | `dict, optional` | Additional arguments to pass to the `train()` method during tuning. These arguments can include settings like the number of training epochs, batch size, and other training-specific configurations. | {} | + +By customizing these parameters, you can fine-tune the hyperparameter optimization process to suit your specific needs and available computational resources. + +## Default Search Space Description + +The following table lists the default search space parameters for hyperparameter tuning in YOLOv8 with Ray Tune. Each parameter has a specific value range defined by `tune.uniform()`. + +| Parameter | Value Range | Description | +|-------------------|----------------------------|------------------------------------------| +| `lr0` | `tune.uniform(1e-5, 1e-1)` | Initial learning rate | +| `lrf` | `tune.uniform(0.01, 1.0)` | Final learning rate factor | +| `momentum` | `tune.uniform(0.6, 0.98)` | Momentum | +| `weight_decay` | `tune.uniform(0.0, 0.001)` | Weight decay | +| `warmup_epochs` | `tune.uniform(0.0, 5.0)` | Warmup epochs | +| `warmup_momentum` | `tune.uniform(0.0, 0.95)` | Warmup momentum | +| `box` | `tune.uniform(0.02, 0.2)` | Box loss weight | +| `cls` | `tune.uniform(0.2, 4.0)` | Class loss weight | +| `hsv_h` | `tune.uniform(0.0, 0.1)` | Hue augmentation range | +| `hsv_s` | `tune.uniform(0.0, 0.9)` | Saturation augmentation range | +| `hsv_v` | `tune.uniform(0.0, 0.9)` | Value (brightness) augmentation range | +| `degrees` | `tune.uniform(0.0, 45.0)` | Rotation augmentation range (degrees) | +| `translate` | `tune.uniform(0.0, 0.9)` | Translation augmentation range | +| `scale` | `tune.uniform(0.0, 0.9)` | Scaling augmentation range | +| `shear` | `tune.uniform(0.0, 10.0)` | Shear augmentation range (degrees) | +| `perspective` | `tune.uniform(0.0, 0.001)` | Perspective augmentation range | +| `flipud` | `tune.uniform(0.0, 1.0)` | Vertical flip augmentation probability | +| `fliplr` | `tune.uniform(0.0, 1.0)` | Horizontal flip augmentation probability | +| `mosaic` | `tune.uniform(0.0, 1.0)` | Mosaic augmentation probability | +| `mixup` | `tune.uniform(0.0, 1.0)` | Mixup augmentation probability | +| `copy_paste` | `tune.uniform(0.0, 1.0)` | Copy-paste augmentation probability | + +## Custom Search Space Example + +In this example, we demonstrate how to use a custom search space for hyperparameter tuning with Ray Tune and YOLOv8. By providing a custom search space, you can focus the tuning process on specific hyperparameters of interest. + +!!! Example "Usage" + + ```python + from ultralytics import YOLO + + # Define a YOLO model + model = YOLO("yolov8n.pt") + + # Run Ray Tune on the model + result_grid = model.tune(data="coco128.yaml", + space={"lr0": tune.uniform(1e-5, 1e-1)}, + epochs=50, + use_ray=True) + ``` + +In the code snippet above, we create a YOLO model with the "yolov8n.pt" pretrained weights. Then, we call the `tune()` method, specifying the dataset configuration with "coco128.yaml". We provide a custom search space for the initial learning rate `lr0` using a dictionary with the key "lr0" and the value `tune.uniform(1e-5, 1e-1)`. Finally, we pass additional training arguments, such as the number of epochs directly to the tune method as `epochs=50`. + +## Processing Ray Tune Results + +After running a hyperparameter tuning experiment with Ray Tune, you might want to perform various analyses on the obtained results. This guide will take you through common workflows for processing and analyzing these results. + +### Loading Tune Experiment Results from a Directory + +After running the tuning experiment with `tuner.fit()`, you can load the results from a directory. This is useful, especially if you're performing the analysis after the initial training script has exited. + +```python +experiment_path = f"{storage_path}/{exp_name}" +print(f"Loading results from {experiment_path}...") + +restored_tuner = tune.Tuner.restore(experiment_path, trainable=train_mnist) +result_grid = restored_tuner.get_results() +``` + +### Basic Experiment-Level Analysis + +Get an overview of how trials performed. You can quickly check if there were any errors during the trials. + +```python +if result_grid.errors: + print("One or more trials failed!") +else: + print("No errors!") +``` + +### Basic Trial-Level Analysis + +Access individual trial hyperparameter configurations and the last reported metrics. + +```python +for i, result in enumerate(result_grid): + print(f"Trial #{i}: Configuration: {result.config}, Last Reported Metrics: {result.metrics}") +``` + +### Plotting the Entire History of Reported Metrics for a Trial + +You can plot the history of reported metrics for each trial to see how the metrics evolved over time. + +```python +import matplotlib.pyplot as plt + +for result in result_grid: + plt.plot(result.metrics_dataframe["training_iteration"], result.metrics_dataframe["mean_accuracy"], label=f"Trial {i}") + +plt.xlabel('Training Iterations') +plt.ylabel('Mean Accuracy') +plt.legend() +plt.show() +``` + +## Summary + +In this documentation, we covered common workflows to analyze the results of experiments run with Ray Tune using Ultralytics. The key steps include loading the experiment results from a directory, performing basic experiment-level and trial-level analysis and plotting metrics. + +Explore further by looking into Ray Tune’s [Analyze Results](https://docs.ray.io/en/latest/tune/examples/tune_analyze_results.html) docs page to get the most out of your hyperparameter tuning experiments. diff --git a/docs/en/integrations/roboflow.md b/docs/en/integrations/roboflow.md new file mode 100644 index 0000000..f640918 --- /dev/null +++ b/docs/en/integrations/roboflow.md @@ -0,0 +1,239 @@ +--- +comments: true +description: Learn how to use Roboflow with Ultralytics for labeling and managing images for use in training, and for evaluating model performance. +keywords: Ultralytics, YOLOv8, Roboflow, vector analysis, confusion matrix, data management, image labeling +--- + +# Roboflow + +[Roboflow](https://roboflow.com/?ref=ultralytics) has everything you need to build and deploy computer vision models. Connect Roboflow at any step in your pipeline with APIs and SDKs, or use the end-to-end interface to automate the entire process from image to inference. Whether you’re in need of [data labeling](https://roboflow.com/annotate?ref=ultralytics), [model training](https://roboflow.com/train?ref=ultralytics), or [model deployment](https://roboflow.com/deploy?ref=ultralytics), Roboflow gives you building blocks to bring custom computer vision solutions to your project. + +!!! Warning + + Roboflow users can use Ultralytics under the [AGPL license](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) or procure an [Enterprise license](https://ultralytics.com/license) directly from Ultralytics. Be aware that Roboflow does **not** provide Ultralytics licenses, and it is the responsibility of the user to ensure appropriate licensing. + +In this guide, we are going to showcase how to find, label, and organize data for use in training a custom Ultralytics YOLOv8 model. Use the table of contents below to jump directly to a specific section: + +- Gather data for training a custom YOLOv8 model +- Upload, convert and label data for YOLOv8 format +- Pre-process and augment data for model robustness +- Dataset management for [YOLOv8](https://docs.ultralytics.com/models/yolov8/) +- Export data in 40+ formats for model training +- Upload custom YOLOv8 model weights for testing and deployment +- Gather Data for Training a Custom YOLOv8 Model + +Roboflow provides two services that can help you collect data for YOLOv8 models: [Universe](https://universe.roboflow.com/?ref=ultralytics) and [Collect](https://roboflow.com/collect?ref=ultralytics). + +Universe is an online repository with over 250,000 vision datasets totalling over 100 million images. + +

+Roboflow Universe +

+ +With a [free Roboflow account](https://app.roboflow.com/?ref=ultralytics), you can export any dataset available on Universe. To export a dataset, click the "Download this Dataset" button on any dataset. + + +

+Roboflow Universe dataset export +

+ +For YOLOv8, select "YOLOv8" as the export format: + +

+Roboflow Universe dataset export +

+ +Universe also has a page that aggregates all [public fine-tuned YOLOv8 models uploaded to Roboflow](https://universe.roboflow.com/search?q=model:yolov8). You can use this page to explore pre-trained models you can use for testing or [for automated data labeling](https://docs.roboflow.com/annotate/use-roboflow-annotate/model-assisted-labeling) or to prototype with [Roboflow inference](https://roboflow.com/inference?ref=ultralytics). + +If you want to gather images yourself, try [Collect](https://github.com/roboflow/roboflow-collect), an open source project that allows you to automatically gather images using a webcam on the edge. You can use text or image prompts with Collect to instruct what data should be collected, allowing you to capture only the useful data you need to build your vision model. + +## Upload, Convert and Label Data for YOLOv8 Format + +[Roboflow Annotate](https://docs.roboflow.com/annotate/use-roboflow-annotate) is an online annotation tool for use in labeling images for object detection, classification, and segmentation. + +To label data for a YOLOv8 object detection, instance segmentation, or classification model, first create a project in Roboflow. + +

+Create a Roboflow project +

+ +Next, upload your images, and any pre-existing annotations you have from other tools ([using one of the 40+ supported import formats](https://roboflow.com/formats?ref=ultralytics)), into Roboflow. + +

+Upload images to Roboflow +

+ +Select the batch of images you have uploaded on the Annotate page to which you are taken after uploading images. Then, click "Start Annotating" to label images. + +To label with bounding boxes, press the `B` key on your keyboard or click the box icon in the sidebar. Click on a point where you want to start your bounding box, then drag to create the box: + +

+Annotating an image in Roboflow +

+ +A pop-up will appear asking you to select a class for your annotation once you have created an annotation. + +To label with polygons, press the `P` key on your keyboard, or the polygon icon in the sidebar. With the polygon annotation tool enabled, click on individual points in the image to draw a polygon. + +Roboflow offers a SAM-based label assistant with which you can label images faster than ever. SAM (Segment Anything Model) is a state-of-the-art computer vision model that can precisely label images. With SAM, you can significantly speed up the image labeling process. Annotating images with polygons becomes as simple as a few clicks, rather than the tedious process of precisely clicking points around an object. + +To use the label assistant, click the cursor icon in the sidebar, SAM will be loaded for use in your project. + +

+Annotating an image in Roboflow with SAM-powered label assist +

+ +Hover over any object in the image and SAM will recommend an annotation. You can hover to find the right place to annotate, then click to create your annotation. To amend your annotation to be more or less specific, you can click inside or outside of the annotation SAM has created on the document. + +You can also add tags to images from the Tags panel in the sidebar. You can apply tags to data from a particular area, taken from a specific camera, and more. You can then use these tags to search through data for images matching a tag and generate versions of a dataset with images that contain a particular tag or set of tags. + +

+Adding tags to an image in Roboflow +

+ +Models hosted on Roboflow can be used with Label Assist, an automated annotation tool that uses your YOLOv8 model to recommend annotations. To use Label Assist, first upload a YOLOv8 model to Roboflow (see instructions later in the guide). Then, click the magic wand icon in the left sidebar and select your model for use in Label Assist. + +Choose a model, then click "Continue" to enable Label Assist: + +

+Enabling Label Assist +

+ +When you open new images for annotation, Label Assist will trigger and recommend annotations. + +

+ALabel Assist recommending an annotation +

+ +## Dataset Management for YOLOv8 + +Roboflow provides a suite of tools for understanding computer vision datasets. + +First, you can use dataset search to find images that meet a semantic text description (i.e. find all images that contain people), or that meet a specified label (i.e. the image is associated with a specific tag). To use dataset search, click "Dataset" in the sidebar. Then, input a search query using the search bar and associated filters at the top of the page. + +For example, the following text query finds images that contain people in a dataset: + +

+Searching for an image +

+ +You can narrow your search to images with a particular tag using the "Tags" selector: + +

+Filter images by tag +

+ +Before you start training a model with your dataset, we recommend using Roboflow [Health Check](https://docs.roboflow.com/datasets/dataset-health-check), a web tool that provides an insight into your dataset and how you can improve the dataset prior to training a vision model. + +To use Health Check, click the "Health Check" sidebar link. A list of statistics will appear that show the average size of images in your dataset, class balance, a heatmap of where annotations are in your images, and more. + +

+Roboflow Health Check analysis +

+ +Health Check may recommend changes to help enhance dataset performance. For example, the class balance feature may show that there is an imbalance in labels that, if solved, may boost performance or your model. + +## Export Data in 40+ Formats for Model Training + +To export your data, you will need a dataset version. A version is a state of your dataset frozen-in-time. To create a version, first click "Versions" in the sidebar. Then, click the "Create New Version" button. On this page, you will be able to choose augmentations and preprocessing steps to apply to your dataset: + +

+Creating a dataset version on Roboflow +

+ +For each augmentation you select, a pop-up will appear allowing you to tune the augmentation to your needs. Here is an example of tuning a brightness augmentation within specified parameters: + +

+Applying augmentations to a dataset +

+ +When your dataset version has been generated, you can export your data into a range of formats. Click the "Export Dataset" button on your dataset version page to export your data: + +

+Exporting a dataset +

+ +You are now ready to train YOLOv8 on a custom dataset. Follow this [written guide](https://blog.roboflow.com/how-to-train-yolov8-on-a-custom-dataset/) and [YouTube video](https://www.youtube.com/watch?v=wuZtUMEiKWY) for step-by-step instructions or refer to the [Ultralytics documentation](https://docs.ultralytics.com/modes/train/). + +## Upload Custom YOLOv8 Model Weights for Testing and Deployment + +Roboflow offers an infinitely scalable API for deployed models and SDKs for use with NVIDIA Jetsons, Luxonis OAKs, Raspberry Pis, GPU-based devices, and more. + +You can deploy YOLOv8 models by uploading YOLOv8 weights to Roboflow. You can do this in a few lines of Python code. Create a new Python file and add the following code: + +```python +import roboflow # install with 'pip install roboflow' + +roboflow.login() + +rf = roboflow.Roboflow() + +project = rf.workspace(WORKSPACE_ID).project("football-players-detection-3zvbc") +dataset = project.version(VERSION).download("yolov8") + +project.version(dataset.version).deploy(model_type="yolov8", model_path=f"{HOME}/runs/detect/train/") +``` + +In this code, replace the project ID and version ID with the values for your account and project. [Learn how to retrieve your Roboflow API key](https://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key). + +When you run the code above, you will be asked to authenticate. Then, your model will be uploaded and an API will be created for your project. This process can take up to 30 minutes to complete. + +To test your model and find deployment instructions for supported SDKs, go to the "Deploy" tab in the Roboflow sidebar. At the top of this page, a widget will appear with which you can test your model. You can use your webcam for live testing or upload images or videos. + +

+Running inference on an example image +

+ +You can also use your uploaded model as a [labeling assistant](https://docs.roboflow.com/annotate/use-roboflow-annotate/model-assisted-labeling). This feature uses your trained model to recommend annotations on images uploaded to Roboflow. + +## How to Evaluate YOLOv8 Models + +Roboflow provides a range of features for use in evaluating models. + +Once you have uploaded a model to Roboflow, you can access our model evaluation tool, which provides a confusion matrix showing the performance of your model as well as an interactive vector analysis plot. These features can help you find opportunities to improve your model. + +To access a confusion matrix, go to your model page on the Roboflow dashboard, then click "View Detailed Evaluation": + +

+Start a Roboflow model evaluation +

+ +A pop-up will appear showing a confusion matrix: + +

+A confusion matrix +

+ +Hover over a box on the confusion matrix to see the value associated with the box. Click on a box to see images in the respective category. Click on an image to view the model predictions and ground truth data associated with that image. + +For more insights, click Vector Analysis. This will show a scatter plot of the images in your dataset, calculated using CLIP. The closer images are in the plot, the more similar they are, semantically. Each image is represented as a dot with a color between white and red. The more red the dot, the worse the model performed. + +

+A vector analysis plot +

+ +You can use Vector Analysis to: + +- Find clusters of images; +- Identify clusters where the model performs poorly, and; +- Visualize commonalities between images on which the model performs poorly. + +## Learning Resources + +Want to learn more about using Roboflow for creating YOLOv8 models? The following resources may be helpful in your work. + +- [Train YOLOv8 on a Custom Dataset](https://github.com/roboflow/notebooks/blob/main/notebooks/train-yolov8-object-detection-on-custom-dataset.ipynb): Follow our interactive notebook that shows you how to train a YOLOv8 model on a custom dataset. +- [Autodistill](https://autodistill.github.io/autodistill/): Use large foundation vision models to label data for specific models. You can label images for use in training YOLOv8 classification, detection, and segmentation models with Autodistill. +- [Supervision](https://roboflow.github.io/supervision/): A Python package with helpful utilities for use in working with computer vision models. You can use supervision to filter detections, compute confusion matrices, and more, all in a few lines of Python code. +- [Roboflow Blog](https://blog.roboflow.com/): The Roboflow Blog features over 500 articles on computer vision, covering topics from how to train a YOLOv8 model to annotation best practices. +- [Roboflow YouTube channel](https://www.youtube.com/@Roboflow): Browse dozens of in-depth computer vision guides on our YouTube channel, covering topics from training YOLOv8 models to automated image labeling. + +## Project Showcase + +Below are a few of the many pieces of feedback we have received for using YOLOv8 and Roboflow together to create computer vision models. + +

+Showcase image +Showcase image +Showcase image +

diff --git a/docs/en/models/fast-sam.md b/docs/en/models/fast-sam.md new file mode 100644 index 0000000..d528d47 --- /dev/null +++ b/docs/en/models/fast-sam.md @@ -0,0 +1,193 @@ +--- +comments: true +description: Explore FastSAM, a CNN-based solution for real-time object segmentation in images. Enhanced user interaction, computational efficiency and adaptable across vision tasks. +keywords: FastSAM, machine learning, CNN-based solution, object segmentation, real-time solution, Ultralytics, vision tasks, image processing, industrial applications, user interaction +--- + +# Fast Segment Anything Model (FastSAM) + +The Fast Segment Anything Model (FastSAM) is a novel, real-time CNN-based solution for the Segment Anything task. This task is designed to segment any object within an image based on various possible user interaction prompts. FastSAM significantly reduces computational demands while maintaining competitive performance, making it a practical choice for a variety of vision tasks. + +![Fast Segment Anything Model (FastSAM) architecture overview](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## Overview + +FastSAM is designed to address the limitations of the [Segment Anything Model (SAM)](sam.md), a heavy Transformer model with substantial computational resource requirements. The FastSAM decouples the segment anything task into two sequential stages: all-instance segmentation and prompt-guided selection. The first stage uses [YOLOv8-seg](../tasks/segment.md) to produce the segmentation masks of all instances in the image. In the second stage, it outputs the region-of-interest corresponding to the prompt. + +## Key Features + +1. **Real-time Solution:** By leveraging the computational efficiency of CNNs, FastSAM provides a real-time solution for the segment anything task, making it valuable for industrial applications that require quick results. + +2. **Efficiency and Performance:** FastSAM offers a significant reduction in computational and resource demands without compromising on performance quality. It achieves comparable performance to SAM but with drastically reduced computational resources, enabling real-time application. + +3. **Prompt-guided Segmentation:** FastSAM can segment any object within an image guided by various possible user interaction prompts, providing flexibility and adaptability in different scenarios. + +4. **Based on YOLOv8-seg:** FastSAM is based on [YOLOv8-seg](../tasks/segment.md), an object detector equipped with an instance segmentation branch. This allows it to effectively produce the segmentation masks of all instances in an image. + +5. **Competitive Results on Benchmarks:** On the object proposal task on MS COCO, FastSAM achieves high scores at a significantly faster speed than [SAM](sam.md) on a single NVIDIA RTX 3090, demonstrating its efficiency and capability. + +6. **Practical Applications:** The proposed approach provides a new, practical solution for a large number of vision tasks at a really high speed, tens or hundreds of times faster than current methods. + +7. **Model Compression Feasibility:** FastSAM demonstrates the feasibility of a path that can significantly reduce the computational effort by introducing an artificial prior to the structure, thus opening new possibilities for large model architecture for general vision tasks. + +## Available Models, Supported Tasks, and Operating Modes + +This table presents the available models with their specific pre-trained weights, the tasks they support, and their compatibility with different operating modes like [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md), indicated by ✅ emojis for supported modes and ❌ emojis for unsupported modes. + +| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | +|------------|---------------------|----------------------------------------------|-----------|------------|----------|--------| +| FastSAM-s | `FastSAM-s.pt` | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| FastSAM-x | `FastSAM-x.pt` | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Usage Examples + +The FastSAM models are easy to integrate into your Python applications. Ultralytics provides user-friendly Python API and CLI commands to streamline development. + +### Predict Usage + +To perform object detection on an image, use the `predict` method as shown below: + +!!! Example + + === "Python" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # Define an inference source + source = 'path/to/bus.jpg' + + # Create a FastSAM model + model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt + + # Run inference on an image + everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # Prepare a Prompt Process object + prompt_process = FastSAMPrompt(source, everything_results, device='cpu') + + # Everything prompt + ann = prompt_process.everything_prompt() + + # Bbox default shape [0,0,0,0] -> [x1,y1,x2,y2] + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # Text prompt + ann = prompt_process.text_prompt(text='a photo of a dog') + + # Point prompt + # points default [[0,0]] [[x1,y1],[x2,y2]] + # point_label default [0] [1,0] 0:background, 1:foreground + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` + + === "CLI" + ```bash + # Load a FastSAM model and segment everything with it + yolo segment predict model=FastSAM-s.pt source=path/to/bus.jpg imgsz=640 + ``` + +This snippet demonstrates the simplicity of loading a pre-trained model and running a prediction on an image. + +### Val Usage + +Validation of the model on a dataset can be done as follows: + +!!! Example + + === "Python" + ```python + from ultralytics import FastSAM + + # Create a FastSAM model + model = FastSAM('FastSAM-s.pt') # or FastSAM-x.pt + + # Validate the model + results = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # Load a FastSAM model and validate it on the COCO8 example dataset at image size 640 + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +Please note that FastSAM only supports detection and segmentation of a single class of object. This means it will recognize and segment all objects as the same class. Therefore, when preparing the dataset, you need to convert all object category IDs to 0. + +## FastSAM official Usage + +FastSAM is also available directly from the [https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) repository. Here is a brief overview of the typical steps you might take to use FastSAM: + +### Installation + +1. Clone the FastSAM repository: + ```shell + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. Create and activate a Conda environment with Python 3.9: + ```shell + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. Navigate to the cloned repository and install the required packages: + ```shell + cd FastSAM + pip install -r requirements.txt + ``` + +4. Install the CLIP model: + ```shell + pip install git+https://github.com/openai/CLIP.git + ``` + +### Example Usage + +1. Download a [model checkpoint](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing). + +2. Use FastSAM for inference. Example commands: + + - Segment everything in an image: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - Segment specific objects using text prompt: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "the yellow dog" + ``` + + - Segment objects within a bounding box (provide box coordinates in xywh format): + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - Segment objects near specific points: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +Additionally, you can try FastSAM through a [Colab demo](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing) or on the [HuggingFace web demo](https://huggingface.co/spaces/An-619/FastSAM) for a visual experience. + +## Citations and Acknowledgements + +We would like to acknowledge the FastSAM authors for their significant contributions in the field of real-time instance segmentation: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +The original FastSAM paper can be found on [arXiv](https://arxiv.org/abs/2306.12156). The authors have made their work publicly available, and the codebase can be accessed on [GitHub](https://github.com/CASIA-IVA-Lab/FastSAM). We appreciate their efforts in advancing the field and making their work accessible to the broader community. diff --git a/docs/en/models/index.md b/docs/en/models/index.md new file mode 100644 index 0000000..93bbdb9 --- /dev/null +++ b/docs/en/models/index.md @@ -0,0 +1,94 @@ +--- +comments: true +description: Explore the diverse range of YOLO family, SAM, MobileSAM, FastSAM, YOLO-NAS, and RT-DETR models supported by Ultralytics. Get started with examples for both CLI and Python usage. +keywords: Ultralytics, documentation, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, models, architectures, Python, CLI +--- + +# Models Supported by Ultralytics + +Welcome to Ultralytics' model documentation! We offer support for a wide range of models, each tailored to specific tasks like [object detection](../tasks/detect.md), [instance segmentation](../tasks/segment.md), [image classification](../tasks/classify.md), [pose estimation](../tasks/pose.md), and [multi-object tracking](../modes/track.md). If you're interested in contributing your model architecture to Ultralytics, check out our [Contributing Guide](../help/contributing.md). + +## Featured Models + +Here are some of the key models supported: + +1. **[YOLOv3](yolov3.md)**: The third iteration of the YOLO model family, originally by Joseph Redmon, known for its efficient real-time object detection capabilities. +2. **[YOLOv4](yolov4.md)**: A darknet-native update to YOLOv3, released by Alexey Bochkovskiy in 2020. +3. **[YOLOv5](yolov5.md)**: An improved version of the YOLO architecture by Ultralytics, offering better performance and speed trade-offs compared to previous versions. +4. **[YOLOv6](yolov6.md)**: Released by [Meituan](https://about.meituan.com/) in 2022, and in use in many of the company's autonomous delivery robots. +5. **[YOLOv7](yolov7.md)**: Updated YOLO models released in 2022 by the authors of YOLOv4. +6. **[YOLOv8](yolov8.md) NEW 🚀**: The latest version of the YOLO family, featuring enhanced capabilities such as instance segmentation, pose/keypoints estimation, and classification. +7. **[Segment Anything Model (SAM)](sam.md)**: Meta's Segment Anything Model (SAM). +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**: MobileSAM for mobile applications, by Kyung Hee University. +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)**: FastSAM by Image & Video Analysis Group, Institute of Automation, Chinese Academy of Sciences. +10. **[YOLO-NAS](yolo-nas.md)**: YOLO Neural Architecture Search (NAS) Models. +11. **[Realtime Detection Transformers (RT-DETR)](rtdetr.md)**: Baidu's PaddlePaddle Realtime Detection Transformer (RT-DETR) models. + +

+
+ +
+ Watch: Run Ultralytics YOLO models in just a few lines of code. +

+ +## Getting Started: Usage Examples + +This example provides simple YOLO training and inference examples. For full documentation on these and other [modes](../modes/index.md) see the [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md) docs pages. + +Note the below example is for YOLOv8 [Detect](../tasks/detect.md) models for object detection. For additional supported tasks see the [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) and [Pose](../tasks/pose.md) docs. + +!!! Example + + === "Python" + + PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()`, `SAM()`, `NAS()` and `RTDETR()` classes to create a model instance in Python: + + ```python + from ultralytics import YOLO + + # Load a COCO-pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Display model information (optional) + model.info() + + # Train the model on the COCO8 example dataset for 100 epochs + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Run inference with the YOLOv8n model on the 'bus.jpg' image + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI commands are available to directly run the models: + + ```bash + # Load a COCO-pretrained YOLOv8n model and train it on the COCO8 example dataset for 100 epochs + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Load a COCO-pretrained YOLOv8n model and run inference on the 'bus.jpg' image + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## Contributing New Models + +Interested in contributing your model to Ultralytics? Great! We're always open to expanding our model portfolio. + +1. **Fork the Repository**: Start by forking the [Ultralytics GitHub repository](https://github.com/ultralytics/ultralytics). + +2. **Clone Your Fork**: Clone your fork to your local machine and create a new branch to work on. + +3. **Implement Your Model**: Add your model following the coding standards and guidelines provided in our [Contributing Guide](../help/contributing.md). + +4. **Test Thoroughly**: Make sure to test your model rigorously, both in isolation and as part of the pipeline. + +5. **Create a Pull Request**: Once you're satisfied with your model, create a pull request to the main repository for review. + +6. **Code Review & Merging**: After review, if your model meets our criteria, it will be merged into the main repository. + +For detailed steps, consult our [Contributing Guide](../help/contributing.md). diff --git a/docs/en/models/mobile-sam.md b/docs/en/models/mobile-sam.md new file mode 100644 index 0000000..c06e351 --- /dev/null +++ b/docs/en/models/mobile-sam.md @@ -0,0 +1,117 @@ +--- +comments: true +description: Learn more about MobileSAM, its implementation, comparison with the original SAM, and how to download and test it in the Ultralytics framework. Improve your mobile applications today. +keywords: MobileSAM, Ultralytics, SAM, mobile applications, Arxiv, GPU, API, image encoder, mask decoder, model download, testing method +--- + +![MobileSAM Logo](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# Mobile Segment Anything (MobileSAM) + +The MobileSAM paper is now available on [arXiv](https://arxiv.org/pdf/2306.14289.pdf). + +A demonstration of MobileSAM running on a CPU can be accessed at this [demo link](https://huggingface.co/spaces/dhkim2810/MobileSAM). The performance on a Mac i5 CPU takes approximately 3 seconds. On the Hugging Face demo, the interface and lower-performance CPUs contribute to a slower response, but it continues to function effectively. + +MobileSAM is implemented in various projects including [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything), [AnyLabeling](https://github.com/vietanhdev/anylabeling), and [Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D). + +MobileSAM is trained on a single GPU with a 100k dataset (1% of the original images) in less than a day. The code for this training will be made available in the future. + +## Available Models, Supported Tasks, and Operating Modes + +This table presents the available models with their specific pre-trained weights, the tasks they support, and their compatibility with different operating modes like [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md), indicated by ✅ emojis for supported modes and ❌ emojis for unsupported modes. + +| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | +|------------|---------------------|----------------------------------------------|-----------|------------|----------|--------| +| MobileSAM | `mobile_sam.pt` | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Adapting from SAM to MobileSAM + +Since MobileSAM retains the same pipeline as the original SAM, we have incorporated the original's pre-processing, post-processing, and all other interfaces. Consequently, those currently using the original SAM can transition to MobileSAM with minimal effort. + +MobileSAM performs comparably to the original SAM and retains the same pipeline except for a change in the image encoder. Specifically, we replace the original heavyweight ViT-H encoder (632M) with a smaller Tiny-ViT (5M). On a single GPU, MobileSAM operates at about 12ms per image: 8ms on the image encoder and 4ms on the mask decoder. + +The following table provides a comparison of ViT-based image encoders: + +| Image Encoder | Original SAM | MobileSAM | +|---------------|--------------|-----------| +| Parameters | 611M | 5M | +| Speed | 452ms | 8ms | + +Both the original SAM and MobileSAM utilize the same prompt-guided mask decoder: + +| Mask Decoder | Original SAM | MobileSAM | +|--------------|--------------|-----------| +| Parameters | 3.876M | 3.876M | +| Speed | 4ms | 4ms | + +Here is the comparison of the whole pipeline: + +| Whole Pipeline (Enc+Dec) | Original SAM | MobileSAM | +|--------------------------|--------------|-----------| +| Parameters | 615M | 9.66M | +| Speed | 456ms | 12ms | + +The performance of MobileSAM and the original SAM are demonstrated using both a point and a box as prompts. + +![Image with Point as Prompt](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![Image with Box as Prompt](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +With its superior performance, MobileSAM is approximately 5 times smaller and 7 times faster than the current FastSAM. More details are available at the [MobileSAM project page](https://github.com/ChaoningZhang/MobileSAM). + +## Testing MobileSAM in Ultralytics + +Just like the original SAM, we offer a straightforward testing method in Ultralytics, including modes for both Point and Box prompts. + +### Model Download + +You can download the model [here](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt). + +### Point Prompt + +!!! Example + + === "Python" + ```python + from ultralytics import SAM + + # Load the model + model = SAM('mobile_sam.pt') + + # Predict a segment based on a point prompt + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### Box Prompt + +!!! Example + + === "Python" + ```python + from ultralytics import SAM + + # Load the model + model = SAM('mobile_sam.pt') + + # Predict a segment based on a box prompt + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +We have implemented `MobileSAM` and `SAM` using the same API. For more usage information, please see the [SAM page](sam.md). + +## Citations and Acknowledgements + +If you find MobileSAM useful in your research or development work, please consider citing our paper: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } + ``` diff --git a/docs/en/models/rtdetr.md b/docs/en/models/rtdetr.md new file mode 100644 index 0000000..b7d449a --- /dev/null +++ b/docs/en/models/rtdetr.md @@ -0,0 +1,93 @@ +--- +comments: true +description: Discover the features and benefits of RT-DETR, Baidu’s efficient and adaptable real-time object detector powered by Vision Transformers, including pre-trained models. +keywords: RT-DETR, Baidu, Vision Transformers, object detection, real-time performance, CUDA, TensorRT, IoU-aware query selection, Ultralytics, Python API, PaddlePaddle +--- + +# Baidu's RT-DETR: A Vision Transformer-Based Real-Time Object Detector + +## Overview + +Real-Time Detection Transformer (RT-DETR), developed by Baidu, is a cutting-edge end-to-end object detector that provides real-time performance while maintaining high accuracy. It leverages the power of Vision Transformers (ViT) to efficiently process multiscale features by decoupling intra-scale interaction and cross-scale fusion. RT-DETR is highly adaptable, supporting flexible adjustment of inference speed using different decoder layers without retraining. The model excels on accelerated backends like CUDA with TensorRT, outperforming many other real-time object detectors. + +![Model example image](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**Overview of Baidu's RT-DETR.** The RT-DETR model architecture diagram shows the last three stages of the backbone {S3, S4, S5} as the input to the encoder. The efficient hybrid encoder transforms multiscale features into a sequence of image features through intrascale feature interaction (AIFI) and cross-scale feature-fusion module (CCFM). The IoU-aware query selection is employed to select a fixed number of image features to serve as initial object queries for the decoder. Finally, the decoder with auxiliary prediction heads iteratively optimizes object queries to generate boxes and confidence scores ([source](https://arxiv.org/pdf/2304.08069.pdf)). + +### Key Features + +- **Efficient Hybrid Encoder:** Baidu's RT-DETR uses an efficient hybrid encoder that processes multiscale features by decoupling intra-scale interaction and cross-scale fusion. This unique Vision Transformers-based design reduces computational costs and allows for real-time object detection. +- **IoU-aware Query Selection:** Baidu's RT-DETR improves object query initialization by utilizing IoU-aware query selection. This allows the model to focus on the most relevant objects in the scene, enhancing the detection accuracy. +- **Adaptable Inference Speed:** Baidu's RT-DETR supports flexible adjustments of inference speed by using different decoder layers without the need for retraining. This adaptability facilitates practical application in various real-time object detection scenarios. + +## Pre-trained Models + +The Ultralytics Python API provides pre-trained PaddlePaddle RT-DETR models with different scales: + +- RT-DETR-L: 53.0% AP on COCO val2017, 114 FPS on T4 GPU +- RT-DETR-X: 54.8% AP on COCO val2017, 74 FPS on T4 GPU + +## Usage Examples + +This example provides simple RT-DETRR training and inference examples. For full documentation on these and other [modes](../modes/index.md) see the [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md) docs pages. + +!!! Example + + === "Python" + + ```python + from ultralytics import RTDETR + + # Load a COCO-pretrained RT-DETR-l model + model = RTDETR('rtdetr-l.pt') + + # Display model information (optional) + model.info() + + # Train the model on the COCO8 example dataset for 100 epochs + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Run inference with the RT-DETR-l model on the 'bus.jpg' image + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # Load a COCO-pretrained RT-DETR-l model and train it on the COCO8 example dataset for 100 epochs + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # Load a COCO-pretrained RT-DETR-l model and run inference on the 'bus.jpg' image + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## Supported Tasks and Modes + +This table presents the model types, the specific pre-trained weights, the tasks supported by each model, and the various modes ([Train](../modes/train.md) , [Val](../modes/val.md), [Predict](../modes/predict.md), [Export](../modes/export.md)) that are supported, indicated by ✅ emojis. + +| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | +|---------------------|---------------------|----------------------------------------|-----------|------------|----------|--------| +| RT-DETR Large | `rtdetr-l.pt` | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR Extra-Large | `rtdetr-x.pt` | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## Citations and Acknowledgements + +If you use Baidu's RT-DETR in your research or development work, please cite the [original paper](https://arxiv.org/abs/2304.08069): + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +We would like to acknowledge Baidu and the [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection) team for creating and maintaining this valuable resource for the computer vision community. Their contribution to the field with the development of the Vision Transformers-based real-time object detector, RT-DETR, is greatly appreciated. + +*Keywords: RT-DETR, Transformer, ViT, Vision Transformers, Baidu RT-DETR, PaddlePaddle, Paddle Paddle RT-DETR, real-time object detection, Vision Transformers-based object detection, pre-trained PaddlePaddle RT-DETR models, Baidu's RT-DETR usage, Ultralytics Python API* diff --git a/docs/en/models/sam.md b/docs/en/models/sam.md new file mode 100644 index 0000000..e2ff070 --- /dev/null +++ b/docs/en/models/sam.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Explore the cutting-edge Segment Anything Model (SAM) from Ultralytics that allows real-time image segmentation. Learn about its promptable segmentation, zero-shot performance, and how to use it. +keywords: Ultralytics, image segmentation, Segment Anything Model, SAM, SA-1B dataset, real-time performance, zero-shot transfer, object detection, image analysis, machine learning +--- + +# Segment Anything Model (SAM) + +Welcome to the frontier of image segmentation with the Segment Anything Model, or SAM. This revolutionary model has changed the game by introducing promptable image segmentation with real-time performance, setting new standards in the field. + +## Introduction to SAM: The Segment Anything Model + +The Segment Anything Model, or SAM, is a cutting-edge image segmentation model that allows for promptable segmentation, providing unparalleled versatility in image analysis tasks. SAM forms the heart of the Segment Anything initiative, a groundbreaking project that introduces a novel model, task, and dataset for image segmentation. + +SAM's advanced design allows it to adapt to new image distributions and tasks without prior knowledge, a feature known as zero-shot transfer. Trained on the expansive [SA-1B dataset](https://ai.facebook.com/datasets/segment-anything/), which contains more than 1 billion masks spread over 11 million carefully curated images, SAM has displayed impressive zero-shot performance, surpassing previous fully supervised results in many cases. + +![Dataset sample image](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +Example images with overlaid masks from our newly introduced dataset, SA-1B. SA-1B contains 11M diverse, high-resolution, licensed, and privacy protecting images and 1.1B high-quality segmentation masks. These masks were annotated fully automatically by SAM, and as verified by human ratings and numerous experiments, are of high quality and diversity. Images are grouped by number of masks per image for visualization (there are ∼100 masks per image on average). + +## Key Features of the Segment Anything Model (SAM) + +- **Promptable Segmentation Task:** SAM was designed with a promptable segmentation task in mind, allowing it to generate valid segmentation masks from any given prompt, such as spatial or text clues identifying an object. +- **Advanced Architecture:** The Segment Anything Model employs a powerful image encoder, a prompt encoder, and a lightweight mask decoder. This unique architecture enables flexible prompting, real-time mask computation, and ambiguity awareness in segmentation tasks. +- **The SA-1B Dataset:** Introduced by the Segment Anything project, the SA-1B dataset features over 1 billion masks on 11 million images. As the largest segmentation dataset to date, it provides SAM with a diverse and large-scale training data source. +- **Zero-Shot Performance:** SAM displays outstanding zero-shot performance across various segmentation tasks, making it a ready-to-use tool for diverse applications with minimal need for prompt engineering. + +For an in-depth look at the Segment Anything Model and the SA-1B dataset, please visit the [Segment Anything website](https://segment-anything.com) and check out the research paper [Segment Anything](https://arxiv.org/abs/2304.02643). + +## Available Models, Supported Tasks, and Operating Modes + +This table presents the available models with their specific pre-trained weights, the tasks they support, and their compatibility with different operating modes like [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md), indicated by ✅ emojis for supported modes and ❌ emojis for unsupported modes. + +| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | +|------------|---------------------|----------------------------------------------|-----------|------------|----------|--------| +| SAM base | `sam_b.pt` | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| SAM large | `sam_l.pt` | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## How to Use SAM: Versatility and Power in Image Segmentation + +The Segment Anything Model can be employed for a multitude of downstream tasks that go beyond its training data. This includes edge detection, object proposal generation, instance segmentation, and preliminary text-to-mask prediction. With prompt engineering, SAM can swiftly adapt to new tasks and data distributions in a zero-shot manner, establishing it as a versatile and potent tool for all your image segmentation needs. + +### SAM prediction example + +!!! Example "Segment with prompts" + + Segment image with given prompts. + + === "Python" + + ```python + from ultralytics import SAM + + # Load a model + model = SAM('sam_b.pt') + + # Display model information (optional) + model.info() + + # Run inference with bboxes prompt + model('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # Run inference with points prompt + model('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "Segment everything" + + Segment the whole image. + + === "Python" + + ```python + from ultralytics import SAM + + # Load a model + model = SAM('sam_b.pt') + + # Display model information (optional) + model.info() + + # Run inference + model('path/to/image.jpg') + ``` + + === "CLI" + + ```bash + # Run inference with a SAM model + yolo predict model=sam_b.pt source=path/to/image.jpg + ``` + +- The logic here is to segment the whole image if you don't pass any prompts(bboxes/points/masks). + +!!! Example "SAMPredictor example" + + This way you can set image once and run prompts inference multiple times without running image encoder multiple times. + + === "Prompt inference" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # Create SAMPredictor + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # Set image + predictor.set_image("ultralytics/assets/zidane.jpg") # set with image file + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # set with np.ndarray + results = predictor(bboxes=[439, 437, 524, 709]) + results = predictor(points=[900, 370], labels=[1]) + + # Reset image + predictor.reset_image() + ``` + + Segment everything with additional args. + + === "Segment everything" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # Create SAMPredictor + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # Segment with additional args + results = predictor(source="ultralytics/assets/zidane.jpg", crop_n_layers=1, points_stride=64) + ``` + +- More additional args for `Segment everything` see [`Predictor/generate` Reference](../reference/models/sam/predict.md). + +## SAM comparison vs YOLOv8 + +Here we compare Meta's smallest SAM model, SAM-b, with Ultralytics smallest segmentation model, [YOLOv8n-seg](../tasks/segment.md): + +| Model | Size | Parameters | Speed (CPU) | +|------------------------------------------------|----------------------------|------------------------|----------------------------| +| Meta's SAM-b | 358 MB | 94.7 M | 51096 ms/im | +| [MobileSAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im | +| [FastSAM-s](fast-sam.md) with YOLOv8 backbone | 23.7 MB | 11.8 M | 115 ms/im | +| Ultralytics [YOLOv8n-seg](../tasks/segment.md) | **6.7 MB** (53.4x smaller) | **3.4 M** (27.9x less) | **59 ms/im** (866x faster) | + +This comparison shows the order-of-magnitude differences in the model sizes and speeds between models. Whereas SAM presents unique capabilities for automatic segmenting, it is not a direct competitor to YOLOv8 segment models, which are smaller, faster and more efficient. + +Tests run on a 2023 Apple M2 Macbook with 16GB of RAM. To reproduce this test: + +!!! Example + + === "Python" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # Profile SAM-b + model = SAM('sam_b.pt') + model.info() + model('ultralytics/assets') + + # Profile MobileSAM + model = SAM('mobile_sam.pt') + model.info() + model('ultralytics/assets') + + # Profile FastSAM-s + model = FastSAM('FastSAM-s.pt') + model.info() + model('ultralytics/assets') + + # Profile YOLOv8n-seg + model = YOLO('yolov8n-seg.pt') + model.info() + model('ultralytics/assets') + ``` + +## Auto-Annotation: A Quick Path to Segmentation Datasets + +Auto-annotation is a key feature of SAM, allowing users to generate a [segmentation dataset](https://docs.ultralytics.com/datasets/segment) using a pre-trained detection model. This feature enables rapid and accurate annotation of a large number of images, bypassing the need for time-consuming manual labeling. + +### Generate Your Segmentation Dataset Using a Detection Model + +To auto-annotate your dataset with the Ultralytics framework, use the `auto_annotate` function as shown below: + +!!! Example + + === "Python" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| Argument | Type | Description | Default | +|------------|---------------------|---------------------------------------------------------------------------------------------------------|--------------| +| data | str | Path to a folder containing images to be annotated. | | +| det_model | str, optional | Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'. | 'yolov8x.pt' | +| sam_model | str, optional | Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'. | 'sam_b.pt' | +| device | str, optional | Device to run the models on. Defaults to an empty string (CPU or GPU, if available). | | +| output_dir | str, None, optional | Directory to save the annotated results. Defaults to a 'labels' folder in the same directory as 'data'. | None | + +The `auto_annotate` function takes the path to your images, with optional arguments for specifying the pre-trained detection and SAM segmentation models, the device to run the models on, and the output directory for saving the annotated results. + +Auto-annotation with pre-trained models can dramatically cut down the time and effort required for creating high-quality segmentation datasets. This feature is especially beneficial for researchers and developers dealing with large image collections, as it allows them to focus on model development and evaluation rather than manual annotation. + +## Citations and Acknowledgements + +If you find SAM useful in your research or development work, please consider citing our paper: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +We would like to express our gratitude to Meta AI for creating and maintaining this valuable resource for the computer vision community. + +*keywords: Segment Anything, Segment Anything Model, SAM, Meta SAM, image segmentation, promptable segmentation, zero-shot performance, SA-1B dataset, advanced architecture, auto-annotation, Ultralytics, pre-trained models, SAM base, SAM large, instance segmentation, computer vision, AI, artificial intelligence, machine learning, data annotation, segmentation masks, detection model, YOLO detection model, bibtex, Meta AI.* diff --git a/docs/en/models/yolo-nas.md b/docs/en/models/yolo-nas.md new file mode 100644 index 0000000..a8e550b --- /dev/null +++ b/docs/en/models/yolo-nas.md @@ -0,0 +1,121 @@ +--- +comments: true +description: Explore detailed documentation of YOLO-NAS, a superior object detection model. Learn about its features, pre-trained models, usage with Ultralytics Python API, and more. +keywords: YOLO-NAS, Deci AI, object detection, deep learning, neural architecture search, Ultralytics Python API, YOLO model, pre-trained models, quantization, optimization, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## Overview + +Developed by Deci AI, YOLO-NAS is a groundbreaking object detection foundational model. It is the product of advanced Neural Architecture Search technology, meticulously designed to address the limitations of previous YOLO models. With significant improvements in quantization support and accuracy-latency trade-offs, YOLO-NAS represents a major leap in object detection. + +![Model example image](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**Overview of YOLO-NAS.** YOLO-NAS employs quantization-aware blocks and selective quantization for optimal performance. The model, when converted to its INT8 quantized version, experiences a minimal precision drop, a significant improvement over other models. These advancements culminate in a superior architecture with unprecedented object detection capabilities and outstanding performance. + +### Key Features + +- **Quantization-Friendly Basic Block:** YOLO-NAS introduces a new basic block that is friendly to quantization, addressing one of the significant limitations of previous YOLO models. +- **Sophisticated Training and Quantization:** YOLO-NAS leverages advanced training schemes and post-training quantization to enhance performance. +- **AutoNAC Optimization and Pre-training:** YOLO-NAS utilizes AutoNAC optimization and is pre-trained on prominent datasets such as COCO, Objects365, and Roboflow 100. This pre-training makes it extremely suitable for downstream object detection tasks in production environments. + +## Pre-trained Models + +Experience the power of next-generation object detection with the pre-trained YOLO-NAS models provided by Ultralytics. These models are designed to deliver top-notch performance in terms of both speed and accuracy. Choose from a variety of options tailored to your specific needs: + +| Model | mAP | Latency (ms) | +|------------------|-------|--------------| +| YOLO-NAS S | 47.5 | 3.21 | +| YOLO-NAS M | 51.55 | 5.85 | +| YOLO-NAS L | 52.22 | 7.87 | +| YOLO-NAS S INT-8 | 47.03 | 2.36 | +| YOLO-NAS M INT-8 | 51.0 | 3.78 | +| YOLO-NAS L INT-8 | 52.1 | 4.78 | + +Each model variant is designed to offer a balance between Mean Average Precision (mAP) and latency, helping you optimize your object detection tasks for both performance and speed. + +## Usage Examples + +Ultralytics has made YOLO-NAS models easy to integrate into your Python applications via our `ultralytics` python package. The package provides a user-friendly Python API to streamline the process. + +The following examples show how to use YOLO-NAS models with the `ultralytics` package for inference and validation: + +### Inference and Validation Examples + +In this example we validate YOLO-NAS-s on the COCO8 dataset. + +!!! Example + + This example provides simple inference and validation code for YOLO-NAS. For handling inference results see [Predict](../modes/predict.md) mode. For using YOLO-NAS with additional modes see [Val](../modes/val.md) and [Export](../modes/export.md). YOLO-NAS on the `ultralytics` package does not support training. + + === "Python" + + PyTorch pretrained `*.pt` models files can be passed to the `NAS()` class to create a model instance in python: + + ```python + from ultralytics import NAS + + # Load a COCO-pretrained YOLO-NAS-s model + model = NAS('yolo_nas_s.pt') + + # Display model information (optional) + model.info() + + # Validate the model on the COCO8 example dataset + results = model.val(data='coco8.yaml') + + # Run inference with the YOLO-NAS-s model on the 'bus.jpg' image + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI commands are available to directly run the models: + + ```bash + # Load a COCO-pretrained YOLO-NAS-s model and validate it's performance on the COCO8 example dataset + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # Load a COCO-pretrained YOLO-NAS-s model and run inference on the 'bus.jpg' image + yolo predict model=yolo_nas_s.pt source=path/to/bus.jpg + ``` + +## Supported Tasks and Modes + +We offer three variants of the YOLO-NAS models: Small (s), Medium (m), and Large (l). Each variant is designed to cater to different computational and performance needs: + +- **YOLO-NAS-s**: Optimized for environments where computational resources are limited but efficiency is key. +- **YOLO-NAS-m**: Offers a balanced approach, suitable for general-purpose object detection with higher accuracy. +- **YOLO-NAS-l**: Tailored for scenarios requiring the highest accuracy, where computational resources are less of a constraint. + +Below is a detailed overview of each model, including links to their pre-trained weights, the tasks they support, and their compatibility with different operating modes. + +| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | +|------------|-----------------------------------------------------------------------------------------------|----------------------------------------|-----------|------------|----------|--------| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## Citations and Acknowledgements + +If you employ YOLO-NAS in your research or development work, please cite SuperGradients: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +We express our gratitude to Deci AI's [SuperGradients](https://github.com/Deci-AI/super-gradients/) team for their efforts in creating and maintaining this valuable resource for the computer vision community. We believe YOLO-NAS, with its innovative architecture and superior object detection capabilities, will become a critical tool for developers and researchers alike. + +*Keywords: YOLO-NAS, Deci AI, object detection, deep learning, neural architecture search, Ultralytics Python API, YOLO model, SuperGradients, pre-trained models, quantization-friendly basic block, advanced training schemes, post-training quantization, AutoNAC optimization, COCO, Objects365, Roboflow 100* diff --git a/docs/en/models/yolov3.md b/docs/en/models/yolov3.md new file mode 100644 index 0000000..2e6d34b --- /dev/null +++ b/docs/en/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Get an overview of YOLOv3, YOLOv3-Ultralytics and YOLOv3u. Learn about their key features, usage, and supported tasks for object detection. +keywords: YOLOv3, YOLOv3-Ultralytics, YOLOv3u, Object Detection, Inference, Training, Ultralytics +--- + +# YOLOv3, YOLOv3-Ultralytics, and YOLOv3u + +## Overview + +This document presents an overview of three closely related object detection models, namely [YOLOv3](https://pjreddie.com/darknet/yolo/), [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3), and [YOLOv3u](https://github.com/ultralytics/ultralytics). + +1. **YOLOv3:** This is the third version of the You Only Look Once (YOLO) object detection algorithm. Originally developed by Joseph Redmon, YOLOv3 improved on its predecessors by introducing features such as multiscale predictions and three different sizes of detection kernels. + +2. **YOLOv3-Ultralytics:** This is Ultralytics' implementation of the YOLOv3 model. It reproduces the original YOLOv3 architecture and offers additional functionalities, such as support for more pre-trained models and easier customization options. + +3. **YOLOv3u:** This is an updated version of YOLOv3-Ultralytics that incorporates the anchor-free, objectness-free split head used in YOLOv8 models. YOLOv3u maintains the same backbone and neck architecture as YOLOv3 but with the updated detection head from YOLOv8. + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## Key Features + +- **YOLOv3:** Introduced the use of three different scales for detection, leveraging three different sizes of detection kernels: 13x13, 26x26, and 52x52. This significantly improved detection accuracy for objects of different sizes. Additionally, YOLOv3 added features such as multi-label predictions for each bounding box and a better feature extractor network. + +- **YOLOv3-Ultralytics:** Ultralytics' implementation of YOLOv3 provides the same performance as the original model but comes with added support for more pre-trained models, additional training methods, and easier customization options. This makes it more versatile and user-friendly for practical applications. + +- **YOLOv3u:** This updated model incorporates the anchor-free, objectness-free split head from YOLOv8. By eliminating the need for pre-defined anchor boxes and objectness scores, this detection head design can improve the model's ability to detect objects of varying sizes and shapes. This makes YOLOv3u more robust and accurate for object detection tasks. + +## Supported Tasks and Modes + +The YOLOv3 series, including YOLOv3, YOLOv3-Ultralytics, and YOLOv3u, are designed specifically for object detection tasks. These models are renowned for their effectiveness in various real-world scenarios, balancing accuracy and speed. Each variant offers unique features and optimizations, making them suitable for a range of applications. + +All three models support a comprehensive set of modes, ensuring versatility in various stages of model deployment and development. These modes include [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md), providing users with a complete toolkit for effective object detection. + +| Model Type | Tasks Supported | Inference | Validation | Training | Export | +|--------------------|----------------------------------------|-----------|------------|----------|--------| +| YOLOv3 | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +This table provides an at-a-glance view of the capabilities of each YOLOv3 variant, highlighting their versatility and suitability for various tasks and operational modes in object detection workflows. + +## Usage Examples + +This example provides simple YOLOv3 training and inference examples. For full documentation on these and other [modes](../modes/index.md) see the [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md) docs pages. + +!!! Example + + === "Python" + + PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python: + + ```python + from ultralytics import YOLO + + # Load a COCO-pretrained YOLOv3n model + model = YOLO('yolov3n.pt') + + # Display model information (optional) + model.info() + + # Train the model on the COCO8 example dataset for 100 epochs + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Run inference with the YOLOv3n model on the 'bus.jpg' image + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI commands are available to directly run the models: + + ```bash + # Load a COCO-pretrained YOLOv3n model and train it on the COCO8 example dataset for 100 epochs + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Load a COCO-pretrained YOLOv3n model and run inference on the 'bus.jpg' image + yolo predict model=yolov3n.pt source=path/to/bus.jpg + ``` + +## Citations and Acknowledgements + +If you use YOLOv3 in your research, please cite the original YOLO papers and the Ultralytics YOLOv3 repository: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +Thank you to Joseph Redmon and Ali Farhadi for developing the original YOLOv3. diff --git a/docs/en/models/yolov4.md b/docs/en/models/yolov4.md new file mode 100644 index 0000000..71bde6b --- /dev/null +++ b/docs/en/models/yolov4.md @@ -0,0 +1,71 @@ +--- +comments: true +description: Explore our detailed guide on YOLOv4, a state-of-the-art real-time object detector. Understand its architectural highlights, innovative features, and application examples. +keywords: ultralytics, YOLOv4, object detection, neural network, real-time detection, object detector, machine learning +--- + +# YOLOv4: High-Speed and Precise Object Detection + +Welcome to the Ultralytics documentation page for YOLOv4, a state-of-the-art, real-time object detector launched in 2020 by Alexey Bochkovskiy at [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). YOLOv4 is designed to provide the optimal balance between speed and accuracy, making it an excellent choice for many applications. + +![YOLOv4 architecture diagram](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**YOLOv4 architecture diagram**. Showcasing the intricate network design of YOLOv4, including the backbone, neck, and head components, and their interconnected layers for optimal real-time object detection. + +## Introduction + +YOLOv4 stands for You Only Look Once version 4. It is a real-time object detection model developed to address the limitations of previous YOLO versions like [YOLOv3](yolov3.md) and other object detection models. Unlike other convolutional neural network (CNN) based object detectors, YOLOv4 is not only applicable for recommendation systems but also for standalone process management and human input reduction. Its operation on conventional graphics processing units (GPUs) allows for mass usage at an affordable price, and it is designed to work in real-time on a conventional GPU while requiring only one such GPU for training. + +## Architecture + +YOLOv4 makes use of several innovative features that work together to optimize its performance. These include Weighted-Residual-Connections (WRC), Cross-Stage-Partial-connections (CSP), Cross mini-Batch Normalization (CmBN), Self-adversarial-training (SAT), Mish-activation, Mosaic data augmentation, DropBlock regularization, and CIoU loss. These features are combined to achieve state-of-the-art results. + +A typical object detector is composed of several parts including the input, the backbone, the neck, and the head. The backbone of YOLOv4 is pre-trained on ImageNet and is used to predict classes and bounding boxes of objects. The backbone could be from several models including VGG, ResNet, ResNeXt, or DenseNet. The neck part of the detector is used to collect feature maps from different stages and usually includes several bottom-up paths and several top-down paths. The head part is what is used to make the final object detections and classifications. + +## Bag of Freebies + +YOLOv4 also makes use of methods known as "bag of freebies," which are techniques that improve the accuracy of the model during training without increasing the cost of inference. Data augmentation is a common bag of freebies technique used in object detection, which increases the variability of the input images to improve the robustness of the model. Some examples of data augmentation include photometric distortions (adjusting the brightness, contrast, hue, saturation, and noise of an image) and geometric distortions (adding random scaling, cropping, flipping, and rotating). These techniques help the model to generalize better to different types of images. + +## Features and Performance + +YOLOv4 is designed for optimal speed and accuracy in object detection. The architecture of YOLOv4 includes CSPDarknet53 as the backbone, PANet as the neck, and YOLOv3 as the detection head. This design allows YOLOv4 to perform object detection at an impressive speed, making it suitable for real-time applications. YOLOv4 also excels in accuracy, achieving state-of-the-art results in object detection benchmarks. + +## Usage Examples + +As of the time of writing, Ultralytics does not currently support YOLOv4 models. Therefore, any users interested in using YOLOv4 will need to refer directly to the YOLOv4 GitHub repository for installation and usage instructions. + +Here is a brief overview of the typical steps you might take to use YOLOv4: + +1. Visit the YOLOv4 GitHub repository: [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). + +2. Follow the instructions provided in the README file for installation. This typically involves cloning the repository, installing necessary dependencies, and setting up any necessary environment variables. + +3. Once installation is complete, you can train and use the model as per the usage instructions provided in the repository. This usually involves preparing your dataset, configuring the model parameters, training the model, and then using the trained model to perform object detection. + +Please note that the specific steps may vary depending on your specific use case and the current state of the YOLOv4 repository. Therefore, it is strongly recommended to refer directly to the instructions provided in the YOLOv4 GitHub repository. + +We regret any inconvenience this may cause and will strive to update this document with usage examples for Ultralytics once support for YOLOv4 is implemented. + +## Conclusion + +YOLOv4 is a powerful and efficient object detection model that strikes a balance between speed and accuracy. Its use of unique features and bag of freebies techniques during training allows it to perform excellently in real-time object detection tasks. YOLOv4 can be trained and used by anyone with a conventional GPU, making it accessible and practical for a wide range of applications. + +## Citations and Acknowledgements + +We would like to acknowledge the YOLOv4 authors for their significant contributions in the field of real-time object detection: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +The original YOLOv4 paper can be found on [arXiv](https://arxiv.org/pdf/2004.10934.pdf). The authors have made their work publicly available, and the codebase can be accessed on [GitHub](https://github.com/AlexeyAB/darknet). We appreciate their efforts in advancing the field and making their work accessible to the broader community. diff --git a/docs/en/models/yolov5.md b/docs/en/models/yolov5.md new file mode 100644 index 0000000..ab55225 --- /dev/null +++ b/docs/en/models/yolov5.md @@ -0,0 +1,113 @@ +--- +comments: true +description: Discover YOLOv5u, a boosted version of the YOLOv5 model featuring an improved accuracy-speed tradeoff and numerous pre-trained models for various object detection tasks. +keywords: YOLOv5u, object detection, pre-trained models, Ultralytics, Inference, Validation, YOLOv5, YOLOv8, anchor-free, objectness-free, real-time applications, machine learning +--- + +# YOLOv5 + +## Overview + +YOLOv5u represents an advancement in object detection methodologies. Originating from the foundational architecture of the [YOLOv5](https://github.com/ultralytics/yolov5) model developed by Ultralytics, YOLOv5u integrates the anchor-free, objectness-free split head, a feature previously introduced in the [YOLOv8](yolov8.md) models. This adaptation refines the model's architecture, leading to an improved accuracy-speed tradeoff in object detection tasks. Given the empirical results and its derived features, YOLOv5u provides an efficient alternative for those seeking robust solutions in both research and practical applications. + +![Ultralytics YOLOv5](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## Key Features + +- **Anchor-free Split Ultralytics Head:** Traditional object detection models rely on predefined anchor boxes to predict object locations. However, YOLOv5u modernizes this approach. By adopting an anchor-free split Ultralytics head, it ensures a more flexible and adaptive detection mechanism, consequently enhancing the performance in diverse scenarios. + +- **Optimized Accuracy-Speed Tradeoff:** Speed and accuracy often pull in opposite directions. But YOLOv5u challenges this tradeoff. It offers a calibrated balance, ensuring real-time detections without compromising on accuracy. This feature is particularly invaluable for applications that demand swift responses, such as autonomous vehicles, robotics, and real-time video analytics. + +- **Variety of Pre-trained Models:** Understanding that different tasks require different toolsets, YOLOv5u provides a plethora of pre-trained models. Whether you're focusing on Inference, Validation, or Training, there's a tailor-made model awaiting you. This variety ensures you're not just using a one-size-fits-all solution, but a model specifically fine-tuned for your unique challenge. + +## Supported Tasks and Modes + +The YOLOv5u models, with various pre-trained weights, excel in [Object Detection](../tasks/detect.md) tasks. They support a comprehensive range of modes, making them suitable for diverse applications, from development to deployment. + +| Model Type | Pre-trained Weights | Task | Inference | Validation | Training | Export | +|------------|-----------------------------------------------------------------------------------------------------------------------------|----------------------------------------|-----------|------------|----------|--------| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +This table provides a detailed overview of the YOLOv5u model variants, highlighting their applicability in object detection tasks and support for various operational modes such as [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md). This comprehensive support ensures that users can fully leverage the capabilities of YOLOv5u models in a wide range of object detection scenarios. + +## Performance Metrics + +!!! Performance + + === "Detection" + + See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for usage examples with these models trained on [COCO](https://docs.ultralytics.com/datasets/detect/coco/), which include 80 pre-trained classes. + + | Model | YAML | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | + |---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 42.1 | 211.0 | 1.83 | 4.3 | 7.8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 48.6 | 422.6 | 2.34 | 15.3 | 24.6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 53.6 | 810.9 | 4.36 | 41.2 | 65.7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 55.7 | 1470.9 | 5.47 | 86.1 | 137.4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 56.8 | 2436.5 | 8.98 | 155.4 | 250.7 | + +## Usage Examples + +This example provides simple YOLOv5 training and inference examples. For full documentation on these and other [modes](../modes/index.md) see the [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md) docs pages. + +!!! Example + + === "Python" + + PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python: + + ```python + from ultralytics import YOLO + + # Load a COCO-pretrained YOLOv5n model + model = YOLO('yolov5n.pt') + + # Display model information (optional) + model.info() + + # Train the model on the COCO8 example dataset for 100 epochs + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Run inference with the YOLOv5n model on the 'bus.jpg' image + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI commands are available to directly run the models: + + ```bash + # Load a COCO-pretrained YOLOv5n model and train it on the COCO8 example dataset for 100 epochs + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Load a COCO-pretrained YOLOv5n model and run inference on the 'bus.jpg' image + yolo predict model=yolov5n.pt source=path/to/bus.jpg + ``` + +## Citations and Acknowledgements + +If you use YOLOv5 or YOLOv5u in your research, please cite the Ultralytics YOLOv5 repository as follows: + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +Please note that YOLOv5 models are provided under [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) and [Enterprise](https://ultralytics.com/license) licenses. diff --git a/docs/en/models/yolov6.md b/docs/en/models/yolov6.md new file mode 100644 index 0000000..a3aaca5 --- /dev/null +++ b/docs/en/models/yolov6.md @@ -0,0 +1,107 @@ +--- +comments: true +description: Explore Meituan YOLOv6, a state-of-the-art object detection model striking a balance between speed and accuracy. Dive into features, pre-trained models, and Python usage. +keywords: Meituan YOLOv6, object detection, Ultralytics, YOLOv6 docs, Bi-directional Concatenation, Anchor-Aided Training, pretrained models, real-time applications +--- + +# Meituan YOLOv6 + +## Overview + +[Meituan](https://about.meituan.com/) YOLOv6 is a cutting-edge object detector that offers remarkable balance between speed and accuracy, making it a popular choice for real-time applications. This model introduces several notable enhancements on its architecture and training scheme, including the implementation of a Bi-directional Concatenation (BiC) module, an anchor-aided training (AAT) strategy, and an improved backbone and neck design for state-of-the-art accuracy on the COCO dataset. + +![Meituan YOLOv6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![Model example image](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**Overview of YOLOv6.** Model architecture diagram showing the redesigned network components and training strategies that have led to significant performance improvements. (a) The neck of YOLOv6 (N and S are shown). Note for M/L, RepBlocks is replaced with CSPStackRep. (b) The structure of a BiC module. (c) A SimCSPSPPF block. ([source](https://arxiv.org/pdf/2301.05586.pdf)). + +### Key Features + +- **Bidirectional Concatenation (BiC) Module:** YOLOv6 introduces a BiC module in the neck of the detector, enhancing localization signals and delivering performance gains with negligible speed degradation. +- **Anchor-Aided Training (AAT) Strategy:** This model proposes AAT to enjoy the benefits of both anchor-based and anchor-free paradigms without compromising inference efficiency. +- **Enhanced Backbone and Neck Design:** By deepening YOLOv6 to include another stage in the backbone and neck, this model achieves state-of-the-art performance on the COCO dataset at high-resolution input. +- **Self-Distillation Strategy:** A new self-distillation strategy is implemented to boost the performance of smaller models of YOLOv6, enhancing the auxiliary regression branch during training and removing it at inference to avoid a marked speed decline. + +## Performance Metrics + +YOLOv6 provides various pre-trained models with different scales: + +- YOLOv6-N: 37.5% AP on COCO val2017 at 1187 FPS with NVIDIA Tesla T4 GPU. +- YOLOv6-S: 45.0% AP at 484 FPS. +- YOLOv6-M: 50.0% AP at 226 FPS. +- YOLOv6-L: 52.8% AP at 116 FPS. +- YOLOv6-L6: State-of-the-art accuracy in real-time. + +YOLOv6 also provides quantized models for different precisions and models optimized for mobile platforms. + +## Usage Examples + +This example provides simple YOLOv6 training and inference examples. For full documentation on these and other [modes](../modes/index.md) see the [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md) docs pages. + +!!! Example + + === "Python" + + PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python: + + ```python + from ultralytics import YOLO + + # Build a YOLOv6n model from scratch + model = YOLO('yolov6n.yaml') + + # Display model information (optional) + model.info() + + # Train the model on the COCO8 example dataset for 100 epochs + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Run inference with the YOLOv6n model on the 'bus.jpg' image + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI commands are available to directly run the models: + + ```bash + # Build a YOLOv6n model from scratch and train it on the COCO8 example dataset for 100 epochs + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # Build a YOLOv6n model from scratch and run inference on the 'bus.jpg' image + yolo predict model=yolov6n.yaml source=path/to/bus.jpg + ``` + +## Supported Tasks and Modes + +The YOLOv6 series offers a range of models, each optimized for high-performance [Object Detection](../tasks/detect.md). These models cater to varying computational needs and accuracy requirements, making them versatile for a wide array of applications. + +| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | +|------------|---------------------|----------------------------------------|-----------|------------|----------|--------| +| YOLOv6-N | `yolov6-n.pt` | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +This table provides a detailed overview of the YOLOv6 model variants, highlighting their capabilities in object detection tasks and their compatibility with various operational modes such as [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md). This comprehensive support ensures that users can fully leverage the capabilities of YOLOv6 models in a broad range of object detection scenarios. + +## Citations and Acknowledgements + +We would like to acknowledge the authors for their significant contributions in the field of real-time object detection: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{li2023yolov6, + title={YOLOv6 v3.0: A Full-Scale Reloading}, + author={Chuyi Li and Lulu Li and Yifei Geng and Hongliang Jiang and Meng Cheng and Bo Zhang and Zaidan Ke and Xiaoming Xu and Xiangxiang Chu}, + year={2023}, + eprint={2301.05586}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +The original YOLOv6 paper can be found on [arXiv](https://arxiv.org/abs/2301.05586). The authors have made their work publicly available, and the codebase can be accessed on [GitHub](https://github.com/meituan/YOLOv6). We appreciate their efforts in advancing the field and making their work accessible to the broader community. diff --git a/docs/en/models/yolov7.md b/docs/en/models/yolov7.md new file mode 100644 index 0000000..f3d99d5 --- /dev/null +++ b/docs/en/models/yolov7.md @@ -0,0 +1,65 @@ +--- +comments: true +description: Explore the YOLOv7, a real-time object detector. Understand its superior speed, impressive accuracy, and unique trainable bag-of-freebies optimization focus. +keywords: YOLOv7, real-time object detector, state-of-the-art, Ultralytics, MS COCO dataset, model re-parameterization, dynamic label assignment, extended scaling, compound scaling +--- + +# YOLOv7: Trainable Bag-of-Freebies + +YOLOv7 is a state-of-the-art real-time object detector that surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS. It has the highest accuracy (56.8% AP) among all known real-time object detectors with 30 FPS or higher on GPU V100. Moreover, YOLOv7 outperforms other object detectors such as YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5, and many others in speed and accuracy. The model is trained on the MS COCO dataset from scratch without using any other datasets or pre-trained weights. Source code for YOLOv7 is available on GitHub. + +![YOLOv7 comparison with SOTA object detectors](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) +**Comparison of state-of-the-art object detectors.** From the results in Table 2 we know that the proposed method has the best speed-accuracy trade-off comprehensively. If we compare YOLOv7-tiny-SiLU with YOLOv5-N (r6.1), our method is 127 fps faster and 10.7% more accurate on AP. In addition, YOLOv7 has 51.4% AP at frame rate of 161 fps, while PPYOLOE-L with the same AP has only 78 fps frame rate. In terms of parameter usage, YOLOv7 is 41% less than PPYOLOE-L. If we compare YOLOv7-X with 114 fps inference speed to YOLOv5-L (r6.1) with 99 fps inference speed, YOLOv7-X can improve AP by 3.9%. If YOLOv7-X is compared with YOLOv5-X (r6.1) of similar scale, the inference speed of YOLOv7-X is 31 fps faster. In addition, in terms the amount of parameters and computation, YOLOv7-X reduces 22% of parameters and 8% of computation compared to YOLOv5-X (r6.1), but improves AP by 2.2% ([Source](https://arxiv.org/pdf/2207.02696.pdf)). + +## Overview + +Real-time object detection is an important component in many computer vision systems, including multi-object tracking, autonomous driving, robotics, and medical image analysis. In recent years, real-time object detection development has focused on designing efficient architectures and improving the inference speed of various CPUs, GPUs, and neural processing units (NPUs). YOLOv7 supports both mobile GPU and GPU devices, from the edge to the cloud. + +Unlike traditional real-time object detectors that focus on architecture optimization, YOLOv7 introduces a focus on the optimization of the training process. This includes modules and optimization methods designed to improve the accuracy of object detection without increasing the inference cost, a concept known as the "trainable bag-of-freebies". + +## Key Features + +YOLOv7 introduces several key features: + +1. **Model Re-parameterization**: YOLOv7 proposes a planned re-parameterized model, which is a strategy applicable to layers in different networks with the concept of gradient propagation path. + +2. **Dynamic Label Assignment**: The training of the model with multiple output layers presents a new issue: "How to assign dynamic targets for the outputs of different branches?" To solve this problem, YOLOv7 introduces a new label assignment method called coarse-to-fine lead guided label assignment. + +3. **Extended and Compound Scaling**: YOLOv7 proposes "extend" and "compound scaling" methods for the real-time object detector that can effectively utilize parameters and computation. + +4. **Efficiency**: The method proposed by YOLOv7 can effectively reduce about 40% parameters and 50% computation of state-of-the-art real-time object detector, and has faster inference speed and higher detection accuracy. + +## Usage Examples + +As of the time of writing, Ultralytics does not currently support YOLOv7 models. Therefore, any users interested in using YOLOv7 will need to refer directly to the YOLOv7 GitHub repository for installation and usage instructions. + +Here is a brief overview of the typical steps you might take to use YOLOv7: + +1. Visit the YOLOv7 GitHub repository: [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7). + +2. Follow the instructions provided in the README file for installation. This typically involves cloning the repository, installing necessary dependencies, and setting up any necessary environment variables. + +3. Once installation is complete, you can train and use the model as per the usage instructions provided in the repository. This usually involves preparing your dataset, configuring the model parameters, training the model, and then using the trained model to perform object detection. + +Please note that the specific steps may vary depending on your specific use case and the current state of the YOLOv7 repository. Therefore, it is strongly recommended to refer directly to the instructions provided in the YOLOv7 GitHub repository. + +We regret any inconvenience this may cause and will strive to update this document with usage examples for Ultralytics once support for YOLOv7 is implemented. + +## Citations and Acknowledgements + +We would like to acknowledge the YOLOv7 authors for their significant contributions in the field of real-time object detection: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +The original YOLOv7 paper can be found on [arXiv](https://arxiv.org/pdf/2207.02696.pdf). The authors have made their work publicly available, and the codebase can be accessed on [GitHub](https://github.com/WongKinYiu/yolov7). We appreciate their efforts in advancing the field and making their work accessible to the broader community. diff --git a/docs/en/models/yolov8.md b/docs/en/models/yolov8.md new file mode 100644 index 0000000..027b399 --- /dev/null +++ b/docs/en/models/yolov8.md @@ -0,0 +1,162 @@ +--- +comments: true +description: Explore the thrilling features of YOLOv8, the latest version of our real-time object detector! Learn how advanced architectures, pre-trained models and optimal balance between accuracy & speed make YOLOv8 the perfect choice for your object detection tasks. +keywords: YOLOv8, Ultralytics, real-time object detector, pre-trained models, documentation, object detection, YOLO series, advanced architectures, accuracy, speed +--- + +# YOLOv8 + +## Overview + +YOLOv8 is the latest iteration in the YOLO series of real-time object detectors, offering cutting-edge performance in terms of accuracy and speed. Building upon the advancements of previous YOLO versions, YOLOv8 introduces new features and optimizations that make it an ideal choice for various object detection tasks in a wide range of applications. + +![Ultralytics YOLOv8](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## Key Features + +- **Advanced Backbone and Neck Architectures:** YOLOv8 employs state-of-the-art backbone and neck architectures, resulting in improved feature extraction and object detection performance. +- **Anchor-free Split Ultralytics Head:** YOLOv8 adopts an anchor-free split Ultralytics head, which contributes to better accuracy and a more efficient detection process compared to anchor-based approaches. +- **Optimized Accuracy-Speed Tradeoff:** With a focus on maintaining an optimal balance between accuracy and speed, YOLOv8 is suitable for real-time object detection tasks in diverse application areas. +- **Variety of Pre-trained Models:** YOLOv8 offers a range of pre-trained models to cater to various tasks and performance requirements, making it easier to find the right model for your specific use case. + +## Supported Tasks and Modes + +The YOLOv8 series offers a diverse range of models, each specialized for specific tasks in computer vision. These models are designed to cater to various requirements, from object detection to more complex tasks like instance segmentation, pose/keypoints detection, and classification. + +Each variant of the YOLOv8 series is optimized for its respective task, ensuring high performance and accuracy. Additionally, these models are compatible with various operational modes including [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md), facilitating their use in different stages of deployment and development. + +| Model | Filenames | Task | Inference | Validation | Training | Export | +|-------------|----------------------------------------------------------------------------------------------------------------|----------------------------------------------|-----------|------------|----------|--------| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [Instance Segmentation](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [Pose/Keypoints](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [Classification](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +This table provides an overview of the YOLOv8 model variants, highlighting their applicability in specific tasks and their compatibility with various operational modes such as Inference, Validation, Training, and Export. It showcases the versatility and robustness of the YOLOv8 series, making them suitable for a variety of applications in computer vision. + +## Performance Metrics + +!!! Performance + + === "Detection (COCO)" + + See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for usage examples with these models trained on [COCO](https://docs.ultralytics.com/datasets/detect/coco/), which include 80 pre-trained classes. + + | Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | + | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + + === "Detection (Open Images V7)" + + See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for usage examples with these models trained on [Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/), which include 600 pre-trained classes. + + | Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | + | ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | + + === "Segmentation (COCO)" + + See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for usage examples with these models trained on [COCO](https://docs.ultralytics.com/datasets/segment/coco/), which include 80 pre-trained classes. + + | Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | + | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + + === "Classification (ImageNet)" + + See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for usage examples with these models trained on [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/), which include 1000 pre-trained classes. + + | Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) at 640 | + | -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + + === "Pose (COCO)" + + See [Pose Estimation Docs](https://docs.ultralytics.com/tasks/segment/) for usage examples with these models trained on [COCO](https://docs.ultralytics.com/datasets/pose/coco/), which include 1 pre-trained class, 'person'. + + | Model | size
(pixels) | mAPpose
50-95 | mAPpose
50 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | + | ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +## Usage Examples + +This example provides simple YOLOv8 training and inference examples. For full documentation on these and other [modes](../modes/index.md) see the [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md) docs pages. + +Note the below example is for YOLOv8 [Detect](../tasks/detect.md) models for object detection. For additional supported tasks see the [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) and [Pose](../tasks/pose.md) docs. + +!!! Example + + === "Python" + + PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python: + + ```python + from ultralytics import YOLO + + # Load a COCO-pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Display model information (optional) + model.info() + + # Train the model on the COCO8 example dataset for 100 epochs + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Run inference with the YOLOv8n model on the 'bus.jpg' image + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI commands are available to directly run the models: + + ```bash + # Load a COCO-pretrained YOLOv8n model and train it on the COCO8 example dataset for 100 epochs + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Load a COCO-pretrained YOLOv8n model and run inference on the 'bus.jpg' image + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## Citations and Acknowledgements + +If you use the YOLOv8 model or any other software from this repository in your work, please cite it using the following format: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + +Please note that the DOI is pending and will be added to the citation once it is available. YOLOv8 models are provided under [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) and [Enterprise](https://ultralytics.com/license) licenses. diff --git a/docs/en/modes/benchmark.md b/docs/en/modes/benchmark.md new file mode 100644 index 0000000..c5f887f --- /dev/null +++ b/docs/en/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: true +description: Learn how to profile speed and accuracy of YOLOv8 across various export formats; get insights on mAP50-95, accuracy_top5 metrics, and more. +keywords: Ultralytics, YOLOv8, benchmarking, speed profiling, accuracy profiling, mAP50-95, accuracy_top5, ONNX, OpenVINO, TensorRT, YOLO export formats +--- + +# Model Benchmarking with Ultralytics YOLO + +Ultralytics YOLO ecosystem and integrations + +## Introduction + +Once your model is trained and validated, the next logical step is to evaluate its performance in various real-world scenarios. Benchmark mode in Ultralytics YOLOv8 serves this purpose by providing a robust framework for assessing the speed and accuracy of your model across a range of export formats. + +## Why Is Benchmarking Crucial? + +- **Informed Decisions:** Gain insights into the trade-offs between speed and accuracy. +- **Resource Allocation:** Understand how different export formats perform on different hardware. +- **Optimization:** Learn which export format offers the best performance for your specific use case. +- **Cost Efficiency:** Make more efficient use of hardware resources based on benchmark results. + +### Key Metrics in Benchmark Mode + +- **mAP50-95:** For object detection, segmentation, and pose estimation. +- **accuracy_top5:** For image classification. +- **Inference Time:** Time taken for each image in milliseconds. + +### Supported Export Formats + +- **ONNX:** For optimal CPU performance +- **TensorRT:** For maximal GPU efficiency +- **OpenVINO:** For Intel hardware optimization +- **CoreML, TensorFlow SavedModel, and More:** For diverse deployment needs. + +!!! Tip "Tip" + + * Export to ONNX or OpenVINO for up to 3x CPU speedup. + * Export to TensorRT for up to 5x GPU speedup. + +## Usage Examples + +Run YOLOv8n benchmarks on all supported export formats including ONNX, TensorRT etc. See Arguments section below for a full list of export arguments. + +!!! Example + + === "Python" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # Benchmark on GPU + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## Arguments + +Arguments such as `model`, `data`, `imgsz`, `half`, `device`, and `verbose` provide users with the flexibility to fine-tune the benchmarks to their specific needs and compare the performance of different export formats with ease. + +| Key | Value | Description | +|-----------|---------|-----------------------------------------------------------------------| +| `model` | `None` | path to model file, i.e. yolov8n.pt, yolov8n.yaml | +| `data` | `None` | path to YAML referencing the benchmarking dataset (under `val` label) | +| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `half` | `False` | FP16 quantization | +| `int8` | `False` | INT8 quantization | +| `device` | `None` | device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu | +| `verbose` | `False` | do not continue on error (bool), or val floor threshold (float) | + +## Export Formats + +Benchmarks will attempt to run automatically on all possible export formats below. + +| Format | `format` Argument | Model | Metadata | Arguments | +|--------------------------------------------------------------------|-------------------|---------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/en/modes/export.md b/docs/en/modes/export.md new file mode 100644 index 0000000..8c14fdc --- /dev/null +++ b/docs/en/modes/export.md @@ -0,0 +1,108 @@ +--- +comments: true +description: Step-by-step guide on exporting your YOLOv8 models to various format like ONNX, TensorRT, CoreML and more for deployment. Explore now!. +keywords: YOLO, YOLOv8, Ultralytics, Model export, ONNX, TensorRT, CoreML, TensorFlow SavedModel, OpenVINO, PyTorch, export model +--- + +# Model Export with Ultralytics YOLO + +Ultralytics YOLO ecosystem and integrations + +## Introduction + +The ultimate goal of training a model is to deploy it for real-world applications. Export mode in Ultralytics YOLOv8 offers a versatile range of options for exporting your trained model to different formats, making it deployable across various platforms and devices. This comprehensive guide aims to walk you through the nuances of model exporting, showcasing how to achieve maximum compatibility and performance. + +

+
+ +
+ Watch: How To Export Custom Trained Ultralytics YOLOv8 Model and Run Live Inference on Webcam. +

+ +## Why Choose YOLOv8's Export Mode? + +- **Versatility:** Export to multiple formats including ONNX, TensorRT, CoreML, and more. +- **Performance:** Gain up to 5x GPU speedup with TensorRT and 3x CPU speedup with ONNX or OpenVINO. +- **Compatibility:** Make your model universally deployable across numerous hardware and software environments. +- **Ease of Use:** Simple CLI and Python API for quick and straightforward model exporting. + +### Key Features of Export Mode + +Here are some of the standout functionalities: + +- **One-Click Export:** Simple commands for exporting to different formats. +- **Batch Export:** Export batched-inference capable models. +- **Optimized Inference:** Exported models are optimized for quicker inference times. +- **Tutorial Videos:** In-depth guides and tutorials for a smooth exporting experience. + +!!! Tip "Tip" + + * Export to ONNX or OpenVINO for up to 3x CPU speedup. + * Export to TensorRT for up to 5x GPU speedup. + +## Usage Examples + +Export a YOLOv8n model to a different format like ONNX or TensorRT. See Arguments section below for a full list of export arguments. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom trained model + + # Export the model + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # export official model + yolo export model=path/to/best.pt format=onnx # export custom trained model + ``` + +## Arguments + +Export settings for YOLO models refer to the various configurations and options used to save or export the model for use in other environments or platforms. These settings can affect the model's performance, size, and compatibility with different systems. Some common YOLO export settings include the format of the exported model file (e.g. ONNX, TensorFlow SavedModel), the device on which the model will be run (e.g. CPU, GPU), and the presence of additional features such as masks or multiple labels per box. Other factors that may affect the export process include the specific task the model is being used for and the requirements or constraints of the target environment or platform. It is important to carefully consider and configure these settings to ensure that the exported model is optimized for the intended use case and can be used effectively in the target environment. + +| Key | Value | Description | +|-------------|-----------------|------------------------------------------------------| +| `format` | `'torchscript'` | format to export to | +| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `keras` | `False` | use Keras for TF SavedModel export | +| `optimize` | `False` | TorchScript: optimize for mobile | +| `half` | `False` | FP16 quantization | +| `int8` | `False` | INT8 quantization | +| `dynamic` | `False` | ONNX/TensorRT: dynamic axes | +| `simplify` | `False` | ONNX/TensorRT: simplify model | +| `opset` | `None` | ONNX: opset version (optional, defaults to latest) | +| `workspace` | `4` | TensorRT: workspace size (GB) | +| `nms` | `False` | CoreML: add NMS | + +## Export Formats + +Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. + +| Format | `format` Argument | Model | Metadata | Arguments | +|--------------------------------------------------------------------|-------------------|---------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/en/modes/index.md b/docs/en/modes/index.md new file mode 100644 index 0000000..d0a0f9e --- /dev/null +++ b/docs/en/modes/index.md @@ -0,0 +1,74 @@ +--- +comments: true +description: From training to tracking, make the most of YOLOv8 with Ultralytics. Get insights and examples for each supported mode including validation, export, and benchmarking. +keywords: Ultralytics, YOLOv8, Machine Learning, Object Detection, Training, Validation, Prediction, Export, Tracking, Benchmarking +--- + +# Ultralytics YOLOv8 Modes + +Ultralytics YOLO ecosystem and integrations + +## Introduction + +Ultralytics YOLOv8 is not just another object detection model; it's a versatile framework designed to cover the entire lifecycle of machine learning models—from data ingestion and model training to validation, deployment, and real-world tracking. Each mode serves a specific purpose and is engineered to offer you the flexibility and efficiency required for different tasks and use-cases. + +

+
+ +
+ Watch: Ultralytics Modes Tutorial: Train, Validate, Predict, Export & Benchmark. +

+ +### Modes at a Glance + +Understanding the different **modes** that Ultralytics YOLOv8 supports is critical to getting the most out of your models: + +- **Train** mode: Fine-tune your model on custom or preloaded datasets. +- **Val** mode: A post-training checkpoint to validate model performance. +- **Predict** mode: Unleash the predictive power of your model on real-world data. +- **Export** mode: Make your model deployment-ready in various formats. +- **Track** mode: Extend your object detection model into real-time tracking applications. +- **Benchmark** mode: Analyze the speed and accuracy of your model in diverse deployment environments. + +This comprehensive guide aims to give you an overview and practical insights into each mode, helping you harness the full potential of YOLOv8. + +## [Train](train.md) + +Train mode is used for training a YOLOv8 model on a custom dataset. In this mode, the model is trained using the specified dataset and hyperparameters. The training process involves optimizing the model's parameters so that it can accurately predict the classes and locations of objects in an image. + +[Train Examples](train.md){ .md-button } + +## [Val](val.md) + +Val mode is used for validating a YOLOv8 model after it has been trained. In this mode, the model is evaluated on a validation set to measure its accuracy and generalization performance. This mode can be used to tune the hyperparameters of the model to improve its performance. + +[Val Examples](val.md){ .md-button } + +## [Predict](predict.md) + +Predict mode is used for making predictions using a trained YOLOv8 model on new images or videos. In this mode, the model is loaded from a checkpoint file, and the user can provide images or videos to perform inference. The model predicts the classes and locations of objects in the input images or videos. + +[Predict Examples](predict.md){ .md-button } + +## [Export](export.md) + +Export mode is used for exporting a YOLOv8 model to a format that can be used for deployment. In this mode, the model is converted to a format that can be used by other software applications or hardware devices. This mode is useful when deploying the model to production environments. + +[Export Examples](export.md){ .md-button } + +## [Track](track.md) + +Track mode is used for tracking objects in real-time using a YOLOv8 model. In this mode, the model is loaded from a checkpoint file, and the user can provide a live video stream to perform real-time object tracking. This mode is useful for applications such as surveillance systems or self-driving cars. + +[Track Examples](track.md){ .md-button } + +## [Benchmark](benchmark.md) + +Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation and pose) +or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for their specific use case based on their requirements for speed and accuracy. + +[Benchmark Examples](benchmark.md){ .md-button } diff --git a/docs/en/modes/predict.md b/docs/en/modes/predict.md new file mode 100644 index 0000000..446d36e --- /dev/null +++ b/docs/en/modes/predict.md @@ -0,0 +1,727 @@ +--- +comments: true +description: Discover how to use YOLOv8 predict mode for various tasks. Learn about different inference sources like images, videos, and data formats. +keywords: Ultralytics, YOLOv8, predict mode, inference sources, prediction tasks, streaming mode, image processing, video processing, machine learning, AI +--- + +# Model Prediction with Ultralytics YOLO + +Ultralytics YOLO ecosystem and integrations + +## Introduction + +In the world of machine learning and computer vision, the process of making sense out of visual data is called 'inference' or 'prediction'. Ultralytics YOLOv8 offers a powerful feature known as **predict mode** that is tailored for high-performance, real-time inference on a wide range of data sources. + +

+
+ +
+ Watch: How to Extract the Outputs from Ultralytics YOLOv8 Model for Custom Projects. +

+ +## Real-world Applications + +| Manufacturing | Sports | Safety | +|:-------------------------------------------------:|:----------------------------------------------------:|:-------------------------------------------:| +| ![Vehicle Spare Parts Detection][car spare parts] | ![Football Player Detection][football player detect] | ![People Fall Detection][human fall detect] | +| Vehicle Spare Parts Detection | Football Player Detection | People Fall Detection | + +## Why Use Ultralytics YOLO for Inference? + +Here's why you should consider YOLOv8's predict mode for your various inference needs: + +- **Versatility:** Capable of making inferences on images, videos, and even live streams. +- **Performance:** Engineered for real-time, high-speed processing without sacrificing accuracy. +- **Ease of Use:** Intuitive Python and CLI interfaces for rapid deployment and testing. +- **Highly Customizable:** Various settings and parameters to tune the model's inference behavior according to your specific requirements. + +### Key Features of Predict Mode + +YOLOv8's predict mode is designed to be robust and versatile, featuring: + +- **Multiple Data Source Compatibility:** Whether your data is in the form of individual images, a collection of images, video files, or real-time video streams, predict mode has you covered. +- **Streaming Mode:** Use the streaming feature to generate a memory-efficient generator of `Results` objects. Enable this by setting `stream=True` in the predictor's call method. +- **Batch Processing:** The ability to process multiple images or video frames in a single batch, further speeding up inference time. +- **Integration Friendly:** Easily integrate with existing data pipelines and other software components, thanks to its flexible API. + +Ultralytics YOLO models return either a Python list of `Results` objects, or a memory-efficient Python generator of `Results` objects when `stream=True` is passed to the model during inference: + +!!! Example "Predict" + + === "Return a list with `stream=False`" + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # pretrained YOLOv8n model + + # Run batched inference on a list of images + results = model(['im1.jpg', 'im2.jpg']) # return a list of Results objects + + # Process results list + for result in results: + boxes = result.boxes # Boxes object for bbox outputs + masks = result.masks # Masks object for segmentation masks outputs + keypoints = result.keypoints # Keypoints object for pose outputs + probs = result.probs # Probs object for classification outputs + ``` + + === "Return a generator with `stream=True`" + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # pretrained YOLOv8n model + + # Run batched inference on a list of images + results = model(['im1.jpg', 'im2.jpg'], stream=True) # return a generator of Results objects + + # Process results generator + for result in results: + boxes = result.boxes # Boxes object for bbox outputs + masks = result.masks # Masks object for segmentation masks outputs + keypoints = result.keypoints # Keypoints object for pose outputs + probs = result.probs # Probs object for classification outputs + ``` + +## Inference Sources + +YOLOv8 can process different types of input sources for inference, as shown in the table below. The sources include static images, video streams, and various data formats. The table also indicates whether each source can be used in streaming mode with the argument `stream=True` ✅. Streaming mode is beneficial for processing videos or live streams as it creates a generator of results instead of loading all frames into memory. + +!!! Tip "Tip" + + Use `stream=True` for processing long videos or large datasets to efficiently manage memory. When `stream=False`, the results for all frames or data points are stored in memory, which can quickly add up and cause out-of-memory errors for large inputs. In contrast, `stream=True` utilizes a generator, which only keeps the results of the current frame or data point in memory, significantly reducing memory consumption and preventing out-of-memory issues. + +| Source | Argument | Type | Notes | +|----------------|--------------------------------------------|-----------------|---------------------------------------------------------------------------------------------| +| image | `'image.jpg'` | `str` or `Path` | Single image file. | +| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | URL to an image. | +| screenshot | `'screen'` | `str` | Capture a screenshot. | +| PIL | `Image.open('im.jpg')` | `PIL.Image` | HWC format with RGB channels. | +| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | HWC format with BGR channels `uint8 (0-255)`. | +| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | HWC format with BGR channels `uint8 (0-255)`. | +| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | BCHW format with RGB channels `float32 (0.0-1.0)`. | +| CSV | `'sources.csv'` | `str` or `Path` | CSV file containing paths to images, videos, or directories. | +| video ✅ | `'video.mp4'` | `str` or `Path` | Video file in formats like MP4, AVI, etc. | +| directory ✅ | `'path/'` | `str` or `Path` | Path to a directory containing images or videos. | +| glob ✅ | `'path/*.jpg'` | `str` | Glob pattern to match multiple files. Use the `*` character as a wildcard. | +| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | URL to a YouTube video. | +| stream ✅ | `'rtsp://example.com/media.mp4'` | `str` | URL for streaming protocols such as RTSP, RTMP, TCP, or an IP address. | +| multi-stream ✅ | `'list.streams'` | `str` or `Path` | `*.streams` text file with one stream URL per row, i.e. 8 streams will run at batch-size 8. | + +Below are code examples for using each source type: + +!!! Example "Prediction sources" + + === "image" + Run inference on an image file. + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Define path to the image file + source = 'path/to/image.jpg' + + # Run inference on the source + results = model(source) # list of Results objects + ``` + + === "screenshot" + Run inference on the current screen content as a screenshot. + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Define current screenshot as source + source = 'screen' + + # Run inference on the source + results = model(source) # list of Results objects + ``` + + === "URL" + Run inference on an image or video hosted remotely via URL. + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Define remote image or video URL + source = 'https://ultralytics.com/images/bus.jpg' + + # Run inference on the source + results = model(source) # list of Results objects + ``` + + === "PIL" + Run inference on an image opened with Python Imaging Library (PIL). + ```python + from PIL import Image + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Open an image using PIL + source = Image.open('path/to/image.jpg') + + # Run inference on the source + results = model(source) # list of Results objects + ``` + + === "OpenCV" + Run inference on an image read with OpenCV. + ```python + import cv2 + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Read an image using OpenCV + source = cv2.imread('path/to/image.jpg') + + # Run inference on the source + results = model(source) # list of Results objects + ``` + + === "numpy" + Run inference on an image represented as a numpy array. + ```python + import numpy as np + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Create a random numpy array of HWC shape (640, 640, 3) with values in range [0, 255] and type uint8 + source = np.random.randint(low=0, high=255, size=(640, 640, 3), dtype='uint8') + + # Run inference on the source + results = model(source) # list of Results objects + ``` + + === "torch" + Run inference on an image represented as a PyTorch tensor. + ```python + import torch + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Create a random torch tensor of BCHW shape (1, 3, 640, 640) with values in range [0, 1] and type float32 + source = torch.rand(1, 3, 640, 640, dtype=torch.float32) + + # Run inference on the source + results = model(source) # list of Results objects + ``` + + === "CSV" + Run inference on a collection of images, URLs, videos and directories listed in a CSV file. + ```python + import torch + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Define a path to a CSV file with images, URLs, videos and directories + source = 'path/to/file.csv' + + # Run inference on the source + results = model(source) # list of Results objects + ``` + + === "video" + Run inference on a video file. By using `stream=True`, you can create a generator of Results objects to reduce memory usage. + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Define path to video file + source = 'path/to/video.mp4' + + # Run inference on the source + results = model(source, stream=True) # generator of Results objects + ``` + + === "directory" + Run inference on all images and videos in a directory. To also capture images and videos in subdirectories use a glob pattern, i.e. `path/to/dir/**/*`. + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Define path to directory containing images and videos for inference + source = 'path/to/dir' + + # Run inference on the source + results = model(source, stream=True) # generator of Results objects + ``` + + === "glob" + Run inference on all images and videos that match a glob expression with `*` characters. + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Define a glob search for all JPG files in a directory + source = 'path/to/dir/*.jpg' + + # OR define a recursive glob search for all JPG files including subdirectories + source = 'path/to/dir/**/*.jpg' + + # Run inference on the source + results = model(source, stream=True) # generator of Results objects + ``` + + === "YouTube" + Run inference on a YouTube video. By using `stream=True`, you can create a generator of Results objects to reduce memory usage for long videos. + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Define source as YouTube video URL + source = 'https://youtu.be/LNwODJXcvt4' + + # Run inference on the source + results = model(source, stream=True) # generator of Results objects + ``` + + === "Streams" + Run inference on remote streaming sources using RTSP, RTMP, TCP and IP address protocols. If multiple streams are provided in a `*.streams` text file then batched inference will run, i.e. 8 streams will run at batch-size 8, otherwise single streams will run at batch-size 1. + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Single stream with batch-size 1 inference + source = 'rtsp://example.com/media.mp4' # RTSP, RTMP, TCP or IP streaming address + + # Multiple streams with batched inference (i.e. batch-size 8 for 8 streams) + source = 'path/to/list.streams' # *.streams text file with one streaming address per row + + # Run inference on the source + results = model(source, stream=True) # generator of Results objects + ``` + +## Inference Arguments + +`model.predict()` accepts multiple arguments that can be passed at inference time to override defaults: + +!!! Example + + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Run inference on 'bus.jpg' with arguments + model.predict('bus.jpg', save=True, imgsz=320, conf=0.5) + ``` + +Inference arguments: + +| Name | Type | Default | Description | +|-----------------|----------------|------------------------|----------------------------------------------------------------------------| +| `source` | `str` | `'ultralytics/assets'` | source directory for images or videos | +| `conf` | `float` | `0.25` | object confidence threshold for detection | +| `iou` | `float` | `0.7` | intersection over union (IoU) threshold for NMS | +| `imgsz` | `int or tuple` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `half` | `bool` | `False` | use half precision (FP16) | +| `device` | `None or str` | `None` | device to run on, i.e. cuda device=0/1/2/3 or device=cpu | +| `max_det` | `int` | `300` | maximum number of detections per image | +| `vid_stride` | `bool` | `False` | video frame-rate stride | +| `stream_buffer` | `bool` | `False` | buffer all streaming frames (True) or return the most recent frame (False) | +| `visualize` | `bool` | `False` | visualize model features | +| `augment` | `bool` | `False` | apply image augmentation to prediction sources | +| `agnostic_nms` | `bool` | `False` | class-agnostic NMS | +| `retina_masks` | `bool` | `False` | use high-resolution segmentation masks | +| `classes` | `None or list` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] | + +Visualization arguments: + +| Name | Type | Default | Description | +|---------------|---------------|---------|-----------------------------------------------------------------| +| `show` | `bool` | `False` | show predicted images and videos if environment allows | +| `save` | `bool` | `False` | save predicted images and videos | +| `save_frames` | `bool` | `False` | save predicted individual video frames | +| `save_txt` | `bool` | `False` | save results as `.txt` file | +| `save_conf` | `bool` | `False` | save results with confidence scores | +| `save_crop` | `bool` | `False` | save cropped images with results | +| `show_labels` | `bool` | `True` | show prediction labels, i.e. 'person' | +| `show_conf` | `bool` | `True` | show prediction confidence, i.e. '0.99' | +| `show_boxes` | `bool` | `True` | show prediction boxes | +| `line_width` | `None or int` | `None` | line width of the bounding boxes. Scaled to image size if None. | + +## Image and Video Formats + +YOLOv8 supports various image and video formats, as specified in [data/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/utils.py). See the tables below for the valid suffixes and example predict commands. + +### Images + +The below table contains valid Ultralytics image formats. + +| Image Suffixes | Example Predict Command | Reference | +|----------------|----------------------------------|-------------------------------------------------------------------------------| +| .bmp | `yolo predict source=image.bmp` | [Microsoft BMP File Format](https://en.wikipedia.org/wiki/BMP_file_format) | +| .dng | `yolo predict source=image.dng` | [Adobe DNG](https://www.adobe.com/products/photoshop/extend.displayTab2.html) | +| .jpeg | `yolo predict source=image.jpeg` | [JPEG](https://en.wikipedia.org/wiki/JPEG) | +| .jpg | `yolo predict source=image.jpg` | [JPEG](https://en.wikipedia.org/wiki/JPEG) | +| .mpo | `yolo predict source=image.mpo` | [Multi Picture Object](https://fileinfo.com/extension/mpo) | +| .png | `yolo predict source=image.png` | [Portable Network Graphics](https://en.wikipedia.org/wiki/PNG) | +| .tif | `yolo predict source=image.tif` | [Tag Image File Format](https://en.wikipedia.org/wiki/TIFF) | +| .tiff | `yolo predict source=image.tiff` | [Tag Image File Format](https://en.wikipedia.org/wiki/TIFF) | +| .webp | `yolo predict source=image.webp` | [WebP](https://en.wikipedia.org/wiki/WebP) | +| .pfm | `yolo predict source=image.pfm` | [Portable FloatMap](https://en.wikipedia.org/wiki/Netpbm#File_formats) | + +### Videos + +The below table contains valid Ultralytics video formats. + +| Video Suffixes | Example Predict Command | Reference | +|----------------|----------------------------------|----------------------------------------------------------------------------------| +| .asf | `yolo predict source=video.asf` | [Advanced Systems Format](https://en.wikipedia.org/wiki/Advanced_Systems_Format) | +| .avi | `yolo predict source=video.avi` | [Audio Video Interleave](https://en.wikipedia.org/wiki/Audio_Video_Interleave) | +| .gif | `yolo predict source=video.gif` | [Graphics Interchange Format](https://en.wikipedia.org/wiki/GIF) | +| .m4v | `yolo predict source=video.m4v` | [MPEG-4 Part 14](https://en.wikipedia.org/wiki/M4V) | +| .mkv | `yolo predict source=video.mkv` | [Matroska](https://en.wikipedia.org/wiki/Matroska) | +| .mov | `yolo predict source=video.mov` | [QuickTime File Format](https://en.wikipedia.org/wiki/QuickTime_File_Format) | +| .mp4 | `yolo predict source=video.mp4` | [MPEG-4 Part 14 - Wikipedia](https://en.wikipedia.org/wiki/MPEG-4_Part_14) | +| .mpeg | `yolo predict source=video.mpeg` | [MPEG-1 Part 2](https://en.wikipedia.org/wiki/MPEG-1) | +| .mpg | `yolo predict source=video.mpg` | [MPEG-1 Part 2](https://en.wikipedia.org/wiki/MPEG-1) | +| .ts | `yolo predict source=video.ts` | [MPEG Transport Stream](https://en.wikipedia.org/wiki/MPEG_transport_stream) | +| .wmv | `yolo predict source=video.wmv` | [Windows Media Video](https://en.wikipedia.org/wiki/Windows_Media_Video) | +| .webm | `yolo predict source=video.webm` | [WebM Project](https://en.wikipedia.org/wiki/WebM) | + +## Working with Results + +All Ultralytics `predict()` calls will return a list of `Results` objects: + +!!! Example "Results" + + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Run inference on an image + results = model('bus.jpg') # list of 1 Results object + results = model(['bus.jpg', 'zidane.jpg']) # list of 2 Results objects + ``` + +`Results` objects have the following attributes: + +| Attribute | Type | Description | +|--------------|-----------------------|------------------------------------------------------------------------------------------| +| `orig_img` | `numpy.ndarray` | The original image as a numpy array. | +| `orig_shape` | `tuple` | The original image shape in (height, width) format. | +| `boxes` | `Boxes, optional` | A Boxes object containing the detection bounding boxes. | +| `masks` | `Masks, optional` | A Masks object containing the detection masks. | +| `probs` | `Probs, optional` | A Probs object containing probabilities of each class for classification task. | +| `keypoints` | `Keypoints, optional` | A Keypoints object containing detected keypoints for each object. | +| `speed` | `dict` | A dictionary of preprocess, inference, and postprocess speeds in milliseconds per image. | +| `names` | `dict` | A dictionary of class names. | +| `path` | `str` | The path to the image file. | + +`Results` objects have the following methods: + +| Method | Return Type | Description | +|-----------------|-----------------|-------------------------------------------------------------------------------------| +| `__getitem__()` | `Results` | Return a Results object for the specified index. | +| `__len__()` | `int` | Return the number of detections in the Results object. | +| `update()` | `None` | Update the boxes, masks, and probs attributes of the Results object. | +| `cpu()` | `Results` | Return a copy of the Results object with all tensors on CPU memory. | +| `numpy()` | `Results` | Return a copy of the Results object with all tensors as numpy arrays. | +| `cuda()` | `Results` | Return a copy of the Results object with all tensors on GPU memory. | +| `to()` | `Results` | Return a copy of the Results object with tensors on the specified device and dtype. | +| `new()` | `Results` | Return a new Results object with the same image, path, and names. | +| `keys()` | `List[str]` | Return a list of non-empty attribute names. | +| `plot()` | `numpy.ndarray` | Plots the detection results. Returns a numpy array of the annotated image. | +| `verbose()` | `str` | Return log string for each task. | +| `save_txt()` | `None` | Save predictions into a txt file. | +| `save_crop()` | `None` | Save cropped predictions to `save_dir/cls/file_name.jpg`. | +| `tojson()` | `None` | Convert the object to JSON format. | + +For more details see the `Results` class [documentation](../reference/engine/results.md). + +### Boxes + +`Boxes` object can be used to index, manipulate, and convert bounding boxes to different formats. + +!!! Example "Boxes" + + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Run inference on an image + results = model('bus.jpg') # results list + + # View results + for r in results: + print(r.boxes) # print the Boxes object containing the detection bounding boxes + ``` + +Here is a table for the `Boxes` class methods and properties, including their name, type, and description: + +| Name | Type | Description | +|-----------|---------------------------|--------------------------------------------------------------------| +| `cpu()` | Method | Move the object to CPU memory. | +| `numpy()` | Method | Convert the object to a numpy array. | +| `cuda()` | Method | Move the object to CUDA memory. | +| `to()` | Method | Move the object to the specified device. | +| `xyxy` | Property (`torch.Tensor`) | Return the boxes in xyxy format. | +| `conf` | Property (`torch.Tensor`) | Return the confidence values of the boxes. | +| `cls` | Property (`torch.Tensor`) | Return the class values of the boxes. | +| `id` | Property (`torch.Tensor`) | Return the track IDs of the boxes (if available). | +| `xywh` | Property (`torch.Tensor`) | Return the boxes in xywh format. | +| `xyxyn` | Property (`torch.Tensor`) | Return the boxes in xyxy format normalized by original image size. | +| `xywhn` | Property (`torch.Tensor`) | Return the boxes in xywh format normalized by original image size. | + +For more details see the `Boxes` class [documentation](../reference/engine/results.md#ultralytics.engine.results.Boxes). + +### Masks + +`Masks` object can be used index, manipulate and convert masks to segments. + +!!! Example "Masks" + + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n-seg Segment model + model = YOLO('yolov8n-seg.pt') + + # Run inference on an image + results = model('bus.jpg') # results list + + # View results + for r in results: + print(r.masks) # print the Masks object containing the detected instance masks + ``` + +Here is a table for the `Masks` class methods and properties, including their name, type, and description: + +| Name | Type | Description | +|-----------|---------------------------|-----------------------------------------------------------------| +| `cpu()` | Method | Returns the masks tensor on CPU memory. | +| `numpy()` | Method | Returns the masks tensor as a numpy array. | +| `cuda()` | Method | Returns the masks tensor on GPU memory. | +| `to()` | Method | Returns the masks tensor with the specified device and dtype. | +| `xyn` | Property (`torch.Tensor`) | A list of normalized segments represented as tensors. | +| `xy` | Property (`torch.Tensor`) | A list of segments in pixel coordinates represented as tensors. | + +For more details see the `Masks` class [documentation](../reference/engine/results.md#ultralytics.engine.results.Masks). + +### Keypoints + +`Keypoints` object can be used index, manipulate and normalize coordinates. + +!!! Example "Keypoints" + + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n-pose Pose model + model = YOLO('yolov8n-pose.pt') + + # Run inference on an image + results = model('bus.jpg') # results list + + # View results + for r in results: + print(r.keypoints) # print the Keypoints object containing the detected keypoints + ``` + +Here is a table for the `Keypoints` class methods and properties, including their name, type, and description: + +| Name | Type | Description | +|-----------|---------------------------|-------------------------------------------------------------------| +| `cpu()` | Method | Returns the keypoints tensor on CPU memory. | +| `numpy()` | Method | Returns the keypoints tensor as a numpy array. | +| `cuda()` | Method | Returns the keypoints tensor on GPU memory. | +| `to()` | Method | Returns the keypoints tensor with the specified device and dtype. | +| `xyn` | Property (`torch.Tensor`) | A list of normalized keypoints represented as tensors. | +| `xy` | Property (`torch.Tensor`) | A list of keypoints in pixel coordinates represented as tensors. | +| `conf` | Property (`torch.Tensor`) | Returns confidence values of keypoints if available, else None. | + +For more details see the `Keypoints` class [documentation](../reference/engine/results.md#ultralytics.engine.results.Keypoints). + +### Probs + +`Probs` object can be used index, get `top1` and `top5` indices and scores of classification. + +!!! Example "Probs" + + ```python + from ultralytics import YOLO + + # Load a pretrained YOLOv8n-cls Classify model + model = YOLO('yolov8n-cls.pt') + + # Run inference on an image + results = model('bus.jpg') # results list + + # View results + for r in results: + print(r.probs) # print the Probs object containing the detected class probabilities + ``` + +Here's a table summarizing the methods and properties for the `Probs` class: + +| Name | Type | Description | +|------------|---------------------------|-------------------------------------------------------------------------| +| `cpu()` | Method | Returns a copy of the probs tensor on CPU memory. | +| `numpy()` | Method | Returns a copy of the probs tensor as a numpy array. | +| `cuda()` | Method | Returns a copy of the probs tensor on GPU memory. | +| `to()` | Method | Returns a copy of the probs tensor with the specified device and dtype. | +| `top1` | Property (`int`) | Index of the top 1 class. | +| `top5` | Property (`list[int]`) | Indices of the top 5 classes. | +| `top1conf` | Property (`torch.Tensor`) | Confidence of the top 1 class. | +| `top5conf` | Property (`torch.Tensor`) | Confidences of the top 5 classes. | + +For more details see the `Probs` class [documentation](../reference/engine/results.md#ultralytics.engine.results.Probs). + +## Plotting Results + +You can use the `plot()` method of a `Result` objects to visualize predictions. It plots all prediction types (boxes, masks, keypoints, probabilities, etc.) contained in the `Results` object onto a numpy array that can then be shown or saved. + +!!! Example "Plotting" + + ```python + from PIL import Image + from ultralytics import YOLO + + # Load a pretrained YOLOv8n model + model = YOLO('yolov8n.pt') + + # Run inference on 'bus.jpg' + results = model('bus.jpg') # results list + + # Show the results + for r in results: + im_array = r.plot() # plot a BGR numpy array of predictions + im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image + im.show() # show image + im.save('results.jpg') # save image + ``` + + The `plot()` method supports the following arguments: + + | Argument | Type | Description | Default | + |--------------|-----------------|--------------------------------------------------------------------------------|---------------| + | `conf` | `bool` | Whether to plot the detection confidence score. | `True` | + | `line_width` | `float` | The line width of the bounding boxes. If None, it is scaled to the image size. | `None` | + | `font_size` | `float` | The font size of the text. If None, it is scaled to the image size. | `None` | + | `font` | `str` | The font to use for the text. | `'Arial.ttf'` | + | `pil` | `bool` | Whether to return the image as a PIL Image. | `False` | + | `img` | `numpy.ndarray` | Plot to another image. if not, plot to original image. | `None` | + | `im_gpu` | `torch.Tensor` | Normalized image in gpu with shape (1, 3, 640, 640), for faster mask plotting. | `None` | + | `kpt_radius` | `int` | Radius of the drawn keypoints. Default is 5. | `5` | + | `kpt_line` | `bool` | Whether to draw lines connecting keypoints. | `True` | + | `labels` | `bool` | Whether to plot the label of bounding boxes. | `True` | + | `boxes` | `bool` | Whether to plot the bounding boxes. | `True` | + | `masks` | `bool` | Whether to plot the masks. | `True` | + | `probs` | `bool` | Whether to plot classification probability | `True` | + +## Thread-Safe Inference + +Ensuring thread safety during inference is crucial when you are running multiple YOLO models in parallel across different threads. Thread-safe inference guarantees that each thread's predictions are isolated and do not interfere with one another, avoiding race conditions and ensuring consistent and reliable outputs. + +When using YOLO models in a multi-threaded application, it's important to instantiate separate model objects for each thread or employ thread-local storage to prevent conflicts: + +!!! Example "Thread-Safe Inference" + + Instantiate a single model inside each thread for thread-safe inference: + ```python + from ultralytics import YOLO + from threading import Thread + + def thread_safe_predict(image_path): + # Instantiate a new model inside the thread + local_model = YOLO("yolov8n.pt") + results = local_model.predict(image_path) + # Process results + + + # Starting threads that each have their own model instance + Thread(target=thread_safe_predict, args=("image1.jpg",)).start() + Thread(target=thread_safe_predict, args=("image2.jpg",)).start() + ``` + +For an in-depth look at thread-safe inference with YOLO models and step-by-step instructions, please refer to our [YOLO Thread-Safe Inference Guide](../guides/yolo-thread-safe-inference.md). This guide will provide you with all the necessary information to avoid common pitfalls and ensure that your multi-threaded inference runs smoothly. + +## Streaming Source `for`-loop + +Here's a Python script using OpenCV (`cv2`) and YOLOv8 to run inference on video frames. This script assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). + +!!! Example "Streaming for-loop" + + ```python + import cv2 + from ultralytics import YOLO + + # Load the YOLOv8 model + model = YOLO('yolov8n.pt') + + # Open the video file + video_path = "path/to/your/video/file.mp4" + cap = cv2.VideoCapture(video_path) + + # Loop through the video frames + while cap.isOpened(): + # Read a frame from the video + success, frame = cap.read() + + if success: + # Run YOLOv8 inference on the frame + results = model(frame) + + # Visualize the results on the frame + annotated_frame = results[0].plot() + + # Display the annotated frame + cv2.imshow("YOLOv8 Inference", annotated_frame) + + # Break the loop if 'q' is pressed + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # Break the loop if the end of the video is reached + break + + # Release the video capture object and close the display window + cap.release() + cv2.destroyAllWindows() + ``` + +This script will run predictions on each frame of the video, visualize the results, and display them in a window. The loop can be exited by pressing 'q'. + +[car spare parts]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1 + +[football player detect]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442 + +[human fall detect]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43 diff --git a/docs/en/modes/track.md b/docs/en/modes/track.md new file mode 100644 index 0000000..8ec5651 --- /dev/null +++ b/docs/en/modes/track.md @@ -0,0 +1,360 @@ +--- +comments: true +description: Learn how to use Ultralytics YOLO for object tracking in video streams. Guides to use different trackers and customise tracker configurations. +keywords: Ultralytics, YOLO, object tracking, video streams, BoT-SORT, ByteTrack, Python guide, CLI guide +--- + +# Multi-Object Tracking with Ultralytics YOLO + +Multi-object tracking examples + +Object tracking in the realm of video analytics is a critical task that not only identifies the location and class of objects within the frame but also maintains a unique ID for each detected object as the video progresses. The applications are limitless—ranging from surveillance and security to real-time sports analytics. + +## Why Choose Ultralytics YOLO for Object Tracking? + +The output from Ultralytics trackers is consistent with standard object detection but has the added value of object IDs. This makes it easy to track objects in video streams and perform subsequent analytics. Here's why you should consider using Ultralytics YOLO for your object tracking needs: + +- **Efficiency:** Process video streams in real-time without compromising accuracy. +- **Flexibility:** Supports multiple tracking algorithms and configurations. +- **Ease of Use:** Simple Python API and CLI options for quick integration and deployment. +- **Customizability:** Easy to use with custom trained YOLO models, allowing integration into domain-specific applications. + +

+
+ +
+ Watch: Object Detection and Tracking with Ultralytics YOLOv8. +

+ +## Real-world Applications + +| Transportation | Retail | Aquaculture | +|:----------------------------------:|:--------------------------------:|:----------------------------:| +| ![Vehicle Tracking][vehicle track] | ![People Tracking][people track] | ![Fish Tracking][fish track] | +| Vehicle Tracking | People Tracking | Fish Tracking | + +## Features at a Glance + +Ultralytics YOLO extends its object detection features to provide robust and versatile object tracking: + +- **Real-Time Tracking:** Seamlessly track objects in high-frame-rate videos. +- **Multiple Tracker Support:** Choose from a variety of established tracking algorithms. +- **Customizable Tracker Configurations:** Tailor the tracking algorithm to meet specific requirements by adjusting various parameters. + +## Available Trackers + +Ultralytics YOLO supports the following tracking algorithms. They can be enabled by passing the relevant YAML configuration file such as `tracker=tracker_type.yaml`: + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - Use `botsort.yaml` to enable this tracker. +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - Use `bytetrack.yaml` to enable this tracker. + +The default tracker is BoT-SORT. + +## Tracking + +To run the tracker on video streams, use a trained Detect, Segment or Pose model such as YOLOv8n, YOLOv8n-seg and YOLOv8n-pose. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load an official or custom model + model = YOLO('yolov8n.pt') # Load an official Detect model + model = YOLO('yolov8n-seg.pt') # Load an official Segment model + model = YOLO('yolov8n-pose.pt') # Load an official Pose model + model = YOLO('path/to/best.pt') # Load a custom trained model + + # Perform tracking with the model + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # Tracking with default tracker + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # Tracking with ByteTrack tracker + ``` + + === "CLI" + + ```bash + # Perform tracking with various models using the command line interface + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # Official Detect model + yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Official Segment model + yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Official Pose model + yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # Custom trained model + + # Track using ByteTrack tracker + yolo track model=path/to/best.pt tracker="bytetrack.yaml" + ``` + +As can be seen in the above usage, tracking is available for all Detect, Segment and Pose models run on videos or streaming sources. + +## Configuration + +### Tracking Arguments + +Tracking configuration shares properties with Predict mode, such as `conf`, `iou`, and `show`. For further configurations, refer to the [Predict](../modes/predict.md#inference-arguments) model page. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Configure the tracking parameters and run the tracker + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```bash + # Configure tracking parameters and run the tracker using the command line interface + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### Tracker Selection + +Ultralytics also allows you to use a modified tracker configuration file. To do this, simply make a copy of a tracker config file (for example, `custom_tracker.yaml`) from [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) and modify any configurations (except the `tracker_type`) as per your needs. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load the model and run the tracker with a custom configuration file + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```bash + # Load the model and run the tracker with a custom configuration file using the command line interface + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +For a comprehensive list of tracking arguments, refer to the [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) page. + +## Python Examples + +### Persisting Tracks Loop + +Here is a Python script using OpenCV (`cv2`) and YOLOv8 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). The `persist=True` argument tells the tracker that the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image. + +!!! Example "Streaming for-loop with tracking" + + ```python + import cv2 + from ultralytics import YOLO + + # Load the YOLOv8 model + model = YOLO('yolov8n.pt') + + # Open the video file + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # Loop through the video frames + while cap.isOpened(): + # Read a frame from the video + success, frame = cap.read() + + if success: + # Run YOLOv8 tracking on the frame, persisting tracks between frames + results = model.track(frame, persist=True) + + # Visualize the results on the frame + annotated_frame = results[0].plot() + + # Display the annotated frame + cv2.imshow("YOLOv8 Tracking", annotated_frame) + + # Break the loop if 'q' is pressed + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # Break the loop if the end of the video is reached + break + + # Release the video capture object and close the display window + cap.release() + cv2.destroyAllWindows() + ``` + +Please note the change from `model(frame)` to `model.track(frame)`, which enables object tracking instead of simple detection. This modified script will run the tracker on each frame of the video, visualize the results, and display them in a window. The loop can be exited by pressing 'q'. + +### Plotting Tracks Over Time + +Visualizing object tracks over consecutive frames can provide valuable insights into the movement patterns and behavior of detected objects within a video. With Ultralytics YOLOv8, plotting these tracks is a seamless and efficient process. + +In the following example, we demonstrate how to utilize YOLOv8's tracking capabilities to plot the movement of detected objects across multiple video frames. This script involves opening a video file, reading it frame by frame, and utilizing the YOLO model to identify and track various objects. By retaining the center points of the detected bounding boxes and connecting them, we can draw lines that represent the paths followed by the tracked objects. + +!!! Example "Plotting tracks over multiple video frames" + + ```python + from collections import defaultdict + + import cv2 + import numpy as np + + from ultralytics import YOLO + + # Load the YOLOv8 model + model = YOLO('yolov8n.pt') + + # Open the video file + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # Store the track history + track_history = defaultdict(lambda: []) + + # Loop through the video frames + while cap.isOpened(): + # Read a frame from the video + success, frame = cap.read() + + if success: + # Run YOLOv8 tracking on the frame, persisting tracks between frames + results = model.track(frame, persist=True) + + # Get the boxes and track IDs + boxes = results[0].boxes.xywh.cpu() + track_ids = results[0].boxes.id.int().cpu().tolist() + + # Visualize the results on the frame + annotated_frame = results[0].plot() + + # Plot the tracks + for box, track_id in zip(boxes, track_ids): + x, y, w, h = box + track = track_history[track_id] + track.append((float(x), float(y))) # x, y center point + if len(track) > 30: # retain 90 tracks for 90 frames + track.pop(0) + + # Draw the tracking lines + points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10) + + # Display the annotated frame + cv2.imshow("YOLOv8 Tracking", annotated_frame) + + # Break the loop if 'q' is pressed + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # Break the loop if the end of the video is reached + break + + # Release the video capture object and close the display window + cap.release() + cv2.destroyAllWindows() + ``` + +### Multithreaded Tracking + +Multithreaded tracking provides the capability to run object tracking on multiple video streams simultaneously. This is particularly useful when handling multiple video inputs, such as from multiple surveillance cameras, where concurrent processing can greatly enhance efficiency and performance. + +In the provided Python script, we make use of Python's `threading` module to run multiple instances of the tracker concurrently. Each thread is responsible for running the tracker on one video file, and all the threads run simultaneously in the background. + +To ensure that each thread receives the correct parameters (the video file, the model to use and the file index), we define a function `run_tracker_in_thread` that accepts these parameters and contains the main tracking loop. This function reads the video frame by frame, runs the tracker, and displays the results. + +Two different models are used in this example: `yolov8n.pt` and `yolov8n-seg.pt`, each tracking objects in a different video file. The video files are specified in `video_file1` and `video_file2`. + +The `daemon=True` parameter in `threading.Thread` means that these threads will be closed as soon as the main program finishes. We then start the threads with `start()` and use `join()` to make the main thread wait until both tracker threads have finished. + +Finally, after all threads have completed their task, the windows displaying the results are closed using `cv2.destroyAllWindows()`. + +!!! Example "Streaming for-loop with tracking" + + ```python + import threading + import cv2 + from ultralytics import YOLO + + + def run_tracker_in_thread(filename, model, file_index): + """ + Runs a video file or webcam stream concurrently with the YOLOv8 model using threading. + + This function captures video frames from a given file or camera source and utilizes the YOLOv8 model for object + tracking. The function runs in its own thread for concurrent processing. + + Args: + filename (str): The path to the video file or the identifier for the webcam/external camera source. + model (obj): The YOLOv8 model object. + file_index (int): An index to uniquely identify the file being processed, used for display purposes. + + Note: + Press 'q' to quit the video display window. + """ + video = cv2.VideoCapture(filename) # Read the video file + + while True: + ret, frame = video.read() # Read the video frames + + # Exit the loop if no more frames in either video + if not ret: + break + + # Track objects in frames if available + results = model.track(frame, persist=True) + res_plotted = results[0].plot() + cv2.imshow(f"Tracking_Stream_{file_index}", res_plotted) + + key = cv2.waitKey(1) + if key == ord('q'): + break + + # Release video sources + video.release() + + + # Load the models + model1 = YOLO('yolov8n.pt') + model2 = YOLO('yolov8n-seg.pt') + + # Define the video files for the trackers + video_file1 = "path/to/video1.mp4" # Path to video file, 0 for webcam + video_file2 = 0 # Path to video file, 0 for webcam, 1 for external camera + + # Create the tracker threads + tracker_thread1 = threading.Thread(target=run_tracker_in_thread, args=(video_file1, model1, 1), daemon=True) + tracker_thread2 = threading.Thread(target=run_tracker_in_thread, args=(video_file2, model2, 2), daemon=True) + + # Start the tracker threads + tracker_thread1.start() + tracker_thread2.start() + + # Wait for the tracker threads to finish + tracker_thread1.join() + tracker_thread2.join() + + # Clean up and close windows + cv2.destroyAllWindows() + ``` + +This example can easily be extended to handle more video files and models by creating more threads and applying the same methodology. + +## Contribute New Trackers + +Are you proficient in multi-object tracking and have successfully implemented or adapted a tracking algorithm with Ultralytics YOLO? We invite you to contribute to our Trackers section in [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)! Your real-world applications and solutions could be invaluable for users working on tracking tasks. + +By contributing to this section, you help expand the scope of tracking solutions available within the Ultralytics YOLO framework, adding another layer of functionality and utility for the community. + +To initiate your contribution, please refer to our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for comprehensive instructions on submitting a Pull Request (PR) 🛠️. We are excited to see what you bring to the table! + +Together, let's enhance the tracking capabilities of the Ultralytics YOLO ecosystem 🙏! + +[vehicle track]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab + +[people track]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527 + +[fish track]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142 diff --git a/docs/en/modes/train.md b/docs/en/modes/train.md new file mode 100644 index 0000000..7d859df --- /dev/null +++ b/docs/en/modes/train.md @@ -0,0 +1,295 @@ +--- +comments: true +description: Step-by-step guide to train YOLOv8 models with Ultralytics YOLO including examples of single-GPU and multi-GPU training +keywords: Ultralytics, YOLOv8, YOLO, object detection, train mode, custom dataset, GPU training, multi-GPU, hyperparameters, CLI examples, Python examples +--- + +# Model Training with Ultralytics YOLO + +Ultralytics YOLO ecosystem and integrations + +## Introduction + +Training a deep learning model involves feeding it data and adjusting its parameters so that it can make accurate predictions. Train mode in Ultralytics YOLOv8 is engineered for effective and efficient training of object detection models, fully utilizing modern hardware capabilities. This guide aims to cover all the details you need to get started with training your own models using YOLOv8's robust set of features. + +

+
+ +
+ Watch: How to Train a YOLOv8 model on Your Custom Dataset in Google Colab. +

+ +## Why Choose Ultralytics YOLO for Training? + +Here are some compelling reasons to opt for YOLOv8's Train mode: + +- **Efficiency:** Make the most out of your hardware, whether you're on a single-GPU setup or scaling across multiple GPUs. +- **Versatility:** Train on custom datasets in addition to readily available ones like COCO, VOC, and ImageNet. +- **User-Friendly:** Simple yet powerful CLI and Python interfaces for a straightforward training experience. +- **Hyperparameter Flexibility:** A broad range of customizable hyperparameters to fine-tune model performance. + +### Key Features of Train Mode + +The following are some notable features of YOLOv8's Train mode: + +- **Automatic Dataset Download:** Standard datasets like COCO, VOC, and ImageNet are downloaded automatically on first use. +- **Multi-GPU Support:** Scale your training efforts seamlessly across multiple GPUs to expedite the process. +- **Hyperparameter Configuration:** The option to modify hyperparameters through YAML configuration files or CLI arguments. +- **Visualization and Monitoring:** Real-time tracking of training metrics and visualization of the learning process for better insights. + +!!! Tip "Tip" + + * YOLOv8 datasets like COCO, VOC, ImageNet and many others automatically download on first use, i.e. `yolo train data=coco.yaml` + +## Usage Examples + +Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. The training device can be specified using the `device` argument. If no argument is passed GPU `device=0` will be used if available, otherwise `device=cpu` will be used. See Arguments section below for a full list of training arguments. + +!!! Example "Single-GPU and CPU Training Example" + + Device is determined automatically. If a GPU is available then it will be used, otherwise training will start on CPU. + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.yaml') # build a new model from YAML + model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # build from YAML and transfer weights + + # Train the model + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Build a new model from YAML and start training from scratch + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Start training from a pretrained *.pt model + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Build a new model from YAML, transfer pretrained weights to it and start training + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Multi-GPU Training + +Multi-GPU training allows for more efficient utilization of available hardware resources by distributing the training load across multiple GPUs. This feature is available through both the Python API and the command-line interface. To enable multi-GPU training, specify the GPU device IDs you wish to use. + +!!! Example "Multi-GPU Training Example" + + To train with 2 GPUs, CUDA devices 0 and 1 use the following commands. Expand to additional GPUs as required. + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) + + # Train the model with 2 GPUs + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model using GPUs 0 and 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### Apple M1 and M2 MPS Training + +With the support for Apple M1 and M2 chips integrated in the Ultralytics YOLO models, it's now possible to train your models on devices utilizing the powerful Metal Performance Shaders (MPS) framework. The MPS offers a high-performance way of executing computation and image processing tasks on Apple's custom silicon. + +To enable training on Apple M1 and M2 chips, you should specify 'mps' as your device when initiating the training process. Below is an example of how you could do this in Python and via the command line: + +!!! Example "MPS Training Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) + + # Train the model with 2 GPUs + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model using GPUs 0 and 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +While leveraging the computational power of the M1/M2 chips, this enables more efficient processing of the training tasks. For more detailed guidance and advanced configuration options, please refer to the [PyTorch MPS documentation](https://pytorch.org/docs/stable/notes/mps.html). + +### Resuming Interrupted Trainings + +Resuming training from a previously saved state is a crucial feature when working with deep learning models. This can come in handy in various scenarios, like when the training process has been unexpectedly interrupted, or when you wish to continue training a model with new data or for more epochs. + +When training is resumed, Ultralytics YOLO loads the weights from the last saved model and also restores the optimizer state, learning rate scheduler, and the epoch number. This allows you to continue the training process seamlessly from where it was left off. + +You can easily resume training in Ultralytics YOLO by setting the `resume` argument to `True` when calling the `train` method, and specifying the path to the `.pt` file containing the partially trained model weights. + +Below is an example of how to resume an interrupted training using Python and via the command line: + +!!! Example "Resume Training Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('path/to/last.pt') # load a partially trained model + + # Resume training + results = model.train(resume=True) + ``` + + === "CLI" + + ```bash + # Resume an interrupted training + yolo train resume model=path/to/last.pt + ``` + +By setting `resume=True`, the `train` function will continue training from where it left off, using the state stored in the 'path/to/last.pt' file. If the `resume` argument is omitted or set to `False`, the `train` function will start a new training session. + +Remember that checkpoints are saved at the end of every epoch by default, or at fixed interval using the `save_period` argument, so you must complete at least 1 epoch to resume a training run. + +## Arguments + +Training settings for YOLO models refer to the various hyperparameters and configurations used to train the model on a dataset. These settings can affect the model's performance, speed, and accuracy. Some common YOLO training settings include the batch size, learning rate, momentum, and weight decay. Other factors that may affect the training process include the choice of optimizer, the choice of loss function, and the size and composition of the training dataset. It is important to carefully tune and experiment with these settings to achieve the best possible performance for a given task. + +| Key | Value | Description | +|-------------------|----------|------------------------------------------------------------------------------------------------| +| `model` | `None` | path to model file, i.e. yolov8n.pt, yolov8n.yaml | +| `data` | `None` | path to data file, i.e. coco128.yaml | +| `epochs` | `100` | number of epochs to train for | +| `patience` | `50` | epochs to wait for no observable improvement for early stopping of training | +| `batch` | `16` | number of images per batch (-1 for AutoBatch) | +| `imgsz` | `640` | size of input images as integer | +| `save` | `True` | save train checkpoints and predict results | +| `save_period` | `-1` | Save checkpoint every x epochs (disabled if < 1) | +| `cache` | `False` | True/ram, disk or False. Use cache for data loading | +| `device` | `None` | device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu | +| `workers` | `8` | number of worker threads for data loading (per RANK if DDP) | +| `project` | `None` | project name | +| `name` | `None` | experiment name | +| `exist_ok` | `False` | whether to overwrite existing experiment | +| `pretrained` | `True` | (bool or str) whether to use a pretrained model (bool) or a model to load weights from (str) | +| `optimizer` | `'auto'` | optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto] | +| `verbose` | `False` | whether to print verbose output | +| `seed` | `0` | random seed for reproducibility | +| `deterministic` | `True` | whether to enable deterministic mode | +| `single_cls` | `False` | train multi-class data as single-class | +| `rect` | `False` | rectangular training with each batch collated for minimum padding | +| `cos_lr` | `False` | use cosine learning rate scheduler | +| `close_mosaic` | `10` | (int) disable mosaic augmentation for final epochs (0 to disable) | +| `resume` | `False` | resume training from last checkpoint | +| `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] | +| `fraction` | `1.0` | dataset fraction to train on (default is 1.0, all images in train set) | +| `profile` | `False` | profile ONNX and TensorRT speeds during training for loggers | +| `freeze` | `None` | (int or list, optional) freeze first n layers, or freeze list of layer indices during training | +| `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) | +| `lrf` | `0.01` | final learning rate (lr0 * lrf) | +| `momentum` | `0.937` | SGD momentum/Adam beta1 | +| `weight_decay` | `0.0005` | optimizer weight decay 5e-4 | +| `warmup_epochs` | `3.0` | warmup epochs (fractions ok) | +| `warmup_momentum` | `0.8` | warmup initial momentum | +| `warmup_bias_lr` | `0.1` | warmup initial bias lr | +| `box` | `7.5` | box loss gain | +| `cls` | `0.5` | cls loss gain (scale with pixels) | +| `dfl` | `1.5` | dfl loss gain | +| `pose` | `12.0` | pose loss gain (pose-only) | +| `kobj` | `2.0` | keypoint obj loss gain (pose-only) | +| `label_smoothing` | `0.0` | label smoothing (fraction) | +| `nbs` | `64` | nominal batch size | +| `overlap_mask` | `True` | masks should overlap during training (segment train only) | +| `mask_ratio` | `4` | mask downsample ratio (segment train only) | +| `dropout` | `0.0` | use dropout regularization (classify train only) | +| `val` | `True` | validate/test during training | +| `plots` | `False` | save plots and images during train/val | + +## Logging + +In training a YOLOv8 model, you might find it valuable to keep track of the model's performance over time. This is where logging comes into play. Ultralytics' YOLO provides support for three types of loggers - Comet, ClearML, and TensorBoard. + +To use a logger, select it from the dropdown menu in the code snippet above and run it. The chosen logger will be installed and initialized. + +### Comet + +[Comet](../integrations/comet.md) is a platform that allows data scientists and developers to track, compare, explain and optimize experiments and models. It provides functionalities such as real-time metrics, code diffs, and hyperparameters tracking. + +To use Comet: + +!!! Example + + === "Python" + ```python + # pip install comet_ml + import comet_ml + + comet_ml.init() + ``` + +Remember to sign in to your Comet account on their website and get your API key. You will need to add this to your environment variables or your script to log your experiments. + +### ClearML + +[ClearML](https://www.clear.ml/) is an open-source platform that automates tracking of experiments and helps with efficient sharing of resources. It is designed to help teams manage, execute, and reproduce their ML work more efficiently. + +To use ClearML: + +!!! Example + + === "Python" + ```python + # pip install clearml + import clearml + + clearml.browser_login() + ``` + +After running this script, you will need to sign in to your ClearML account on the browser and authenticate your session. + +### TensorBoard + +[TensorBoard](https://www.tensorflow.org/tensorboard) is a visualization toolkit for TensorFlow. It allows you to visualize your TensorFlow graph, plot quantitative metrics about the execution of your graph, and show additional data like images that pass through it. + +To use TensorBoard in [Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb): + +!!! Example + + === "CLI" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # replace with 'runs' directory + ``` + +To use TensorBoard locally run the below command and view results at http://localhost:6006/. + +!!! Example + + === "CLI" + ```bash + tensorboard --logdir ultralytics/runs # replace with 'runs' directory + ``` + +This will load TensorBoard and direct it to the directory where your training logs are saved. + +After setting up your logger, you can then proceed with your model training. All training metrics will be automatically logged in your chosen platform, and you can access these logs to monitor your model's performance over time, compare different models, and identify areas for improvement. diff --git a/docs/en/modes/val.md b/docs/en/modes/val.md new file mode 100644 index 0000000..30091d4 --- /dev/null +++ b/docs/en/modes/val.md @@ -0,0 +1,85 @@ +--- +comments: true +description: Guide for Validating YOLOv8 Models. Learn how to evaluate the performance of your YOLO models using validation settings and metrics with Python and CLI examples. +keywords: Ultralytics, YOLO Docs, YOLOv8, validation, model evaluation, hyperparameters, accuracy, metrics, Python, CLI +--- + +# Model Validation with Ultralytics YOLO + +Ultralytics YOLO ecosystem and integrations + +## Introduction + +Validation is a critical step in the machine learning pipeline, allowing you to assess the quality of your trained models. Val mode in Ultralytics YOLOv8 provides a robust suite of tools and metrics for evaluating the performance of your object detection models. This guide serves as a complete resource for understanding how to effectively use the Val mode to ensure that your models are both accurate and reliable. + +## Why Validate with Ultralytics YOLO? + +Here's why using YOLOv8's Val mode is advantageous: + +- **Precision:** Get accurate metrics like mAP50, mAP75, and mAP50-95 to comprehensively evaluate your model. +- **Convenience:** Utilize built-in features that remember training settings, simplifying the validation process. +- **Flexibility:** Validate your model with the same or different datasets and image sizes. +- **Hyperparameter Tuning:** Use validation metrics to fine-tune your model for better performance. + +### Key Features of Val Mode + +These are the notable functionalities offered by YOLOv8's Val mode: + +- **Automated Settings:** Models remember their training configurations for straightforward validation. +- **Multi-Metric Support:** Evaluate your model based on a range of accuracy metrics. +- **CLI and Python API:** Choose from command-line interface or Python API based on your preference for validation. +- **Data Compatibility:** Works seamlessly with datasets used during the training phase as well as custom datasets. + +!!! Tip "Tip" + + * YOLOv8 models automatically remember their training settings, so you can validate a model at the same image size and on the original dataset easily with just `yolo val model=yolov8n.pt` or `model('yolov8n.pt').val()` + +## Usage Examples + +Validate trained YOLOv8n model accuracy on the COCO128 dataset. No argument need to passed as the `model` retains it's training `data` and arguments as model attributes. See Arguments section below for a full list of export arguments. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Validate the model + metrics = model.val() # no arguments needed, dataset and settings remembered + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # a list contains map50-95 of each category + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # val official model + yolo detect val model=path/to/best.pt # val custom model + ``` + +## Arguments + +Validation settings for YOLO models refer to the various hyperparameters and configurations used to evaluate the model's performance on a validation dataset. These settings can affect the model's performance, speed, and accuracy. Some common YOLO validation settings include the batch size, the frequency with which validation is performed during training, and the metrics used to evaluate the model's performance. Other factors that may affect the validation process include the size and composition of the validation dataset and the specific task the model is being used for. It is important to carefully tune and experiment with these settings to ensure that the model is performing well on the validation dataset and to detect and prevent overfitting. + +| Key | Value | Description | +|---------------|---------|--------------------------------------------------------------------| +| `data` | `None` | path to data file, i.e. coco128.yaml | +| `imgsz` | `640` | size of input images as integer | +| `batch` | `16` | number of images per batch (-1 for AutoBatch) | +| `save_json` | `False` | save results to JSON file | +| `save_hybrid` | `False` | save hybrid version of labels (labels + additional predictions) | +| `conf` | `0.001` | object confidence threshold for detection | +| `iou` | `0.6` | intersection over union (IoU) threshold for NMS | +| `max_det` | `300` | maximum number of detections per image | +| `half` | `True` | use half precision (FP16) | +| `device` | `None` | device to run on, i.e. cuda device=0/1/2/3 or device=cpu | +| `dnn` | `False` | use OpenCV DNN for ONNX inference | +| `plots` | `False` | save plots and images during train/val | +| `rect` | `False` | rectangular val with each batch collated for minimum padding | +| `split` | `val` | dataset split to use for validation, i.e. 'val', 'test' or 'train' | diff --git a/docs/en/quickstart.md b/docs/en/quickstart.md new file mode 100644 index 0000000..9ae4069 --- /dev/null +++ b/docs/en/quickstart.md @@ -0,0 +1,327 @@ +--- +comments: true +description: Explore various methods to install Ultralytics using pip, conda, git and Docker. Learn how to use Ultralytics with command line interface or within your Python projects. +keywords: Ultralytics installation, pip install Ultralytics, Docker install Ultralytics, Ultralytics command line interface, Ultralytics Python interface +--- + +## Install Ultralytics + +Ultralytics provides various installation methods including pip, conda, and Docker. Install YOLOv8 via the `ultralytics` pip package for the latest stable release or by cloning the [Ultralytics GitHub repository](https://github.com/ultralytics/ultralytics) for the most up-to-date version. Docker can be used to execute the package in an isolated container, avoiding local installation. + +!!! Example "Install" + + === "Pip install (recommended)" + Install the `ultralytics` package using pip, or update an existing installation by running `pip install -U ultralytics`. Visit the Python Package Index (PyPI) for more details on the `ultralytics` package: [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/). + + [![PyPI version](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # Install the ultralytics package from PyPI + pip install ultralytics + ``` + + You can also install the `ultralytics` package directly from the GitHub [repository](https://github.com/ultralytics/ultralytics). This might be useful if you want the latest development version. Make sure to have the Git command-line tool installed on your system. The `@main` command installs the `main` branch and may be modified to another branch, i.e. `@my-branch`, or removed entirely to default to `main` branch. + + ```bash + # Install the ultralytics package from GitHub + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + + === "Conda install" + Conda is an alternative package manager to pip which may also be used for installation. Visit Anaconda for more details at [https://anaconda.org/conda-forge/ultralytics](https://anaconda.org/conda-forge/ultralytics). Ultralytics feedstock repository for updating the conda package is at [https://github.com/conda-forge/ultralytics-feedstock/](https://github.com/conda-forge/ultralytics-feedstock/). + + + [![Conda Recipe](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # Install the ultralytics package using conda + conda install -c conda-forge ultralytics + ``` + + !!! Note + + If you are installing in a CUDA environment best practice is to install `ultralytics`, `pytorch` and `pytorch-cuda` in the same command to allow the conda package manager to resolve any conflicts, or else to install `pytorch-cuda` last to allow it override the CPU-specific `pytorch` package if necessary. + ```bash + # Install all packages together using conda + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### Conda Docker Image + + Ultralytics Conda Docker images are also available from [DockerHub](https://hub.docker.com/r/ultralytics/ultralytics). These images are based on [Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/) and are an simple way to start using `ultralytics` in a Conda environment. + + ```bash + # Set image name as a variable + t=ultralytics/ultralytics:latest-conda + + # Pull the latest ultralytics image from Docker Hub + sudo docker pull $t + + # Run the ultralytics image in a container with GPU support + sudo docker run -it --ipc=host --gpus all $t # all GPUs + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # specify GPUs + ``` + + === "Git clone" + Clone the `ultralytics` repository if you are interested in contributing to the development or wish to experiment with the latest source code. After cloning, navigate into the directory and install the package in editable mode `-e` using pip. + ```bash + # Clone the ultralytics repository + git clone https://github.com/ultralytics/ultralytics + + # Navigate to the cloned directory + cd ultralytics + + # Install the package in editable mode for development + pip install -e . + ``` + + === "Docker" + + Utilize Docker to effortlessly execute the `ultralytics` package in an isolated container, ensuring consistent and smooth performance across various environments. By choosing one of the official `ultralytics` images from [Docker Hub](https://hub.docker.com/r/ultralytics/ultralytics), you not only avoid the complexity of local installation but also benefit from access to a verified working environment. Ultralytics offers 5 main supported Docker images, each designed to provide high compatibility and efficiency for different platforms and use cases: + + Docker Pulls + + - **Dockerfile:** GPU image recommended for training. + - **Dockerfile-arm64:** Optimized for ARM64 architecture, allowing deployment on devices like Raspberry Pi and other ARM64-based platforms. + - **Dockerfile-cpu:** Ubuntu-based CPU-only version suitable for inference and environments without GPUs. + - **Dockerfile-jetson:** Tailored for NVIDIA Jetson devices, integrating GPU support optimized for these platforms. + - **Dockerfile-python:** Minimal image with just Python and necessary dependencies, ideal for lightweight applications and development. + - **Dockerfile-conda:** Based on Miniconda3 with conda installation of ultralytics package. + + Below are the commands to get the latest image and execute it: + + ```bash + # Set image name as a variable + t=ultralytics/ultralytics:latest + + # Pull the latest ultralytics image from Docker Hub + sudo docker pull $t + + # Run the ultralytics image in a container with GPU support + sudo docker run -it --ipc=host --gpus all $t # all GPUs + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # specify GPUs + ``` + + The above command initializes a Docker container with the latest `ultralytics` image. The `-it` flag assigns a pseudo-TTY and maintains stdin open, enabling you to interact with the container. The `--ipc=host` flag sets the IPC (Inter-Process Communication) namespace to the host, which is essential for sharing memory between processes. The `--gpus all` flag enables access to all available GPUs inside the container, which is crucial for tasks that require GPU computation. + + Note: To work with files on your local machine within the container, use Docker volumes for mounting a local directory into the container: + + ```bash + # Mount local directory to a directory inside the container + sudo docker run -it --ipc=host --gpus all -v /path/on/host:/path/in/container $t + ``` + + Alter `/path/on/host` with the directory path on your local machine, and `/path/in/container` with the desired path inside the Docker container for accessibility. + + For advanced Docker usage, feel free to explore the [Ultralytics Docker Guide](https://docs.ultralytics.com/guides/docker-quickstart/). + +See the `ultralytics` [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) file for a list of dependencies. Note that all examples above install all required dependencies. + +

+
+ +
+ Watch: Ultralytics YOLO Quick Start Guide +

+ +!!! Tip "Tip" + + PyTorch requirements vary by operating system and CUDA requirements, so it's recommended to install PyTorch first following instructions at [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally). + + + PyTorch Installation Instructions + + +## Use Ultralytics with CLI + +The Ultralytics command line interface (CLI) allows for simple single-line commands without the need for a Python environment. CLI requires no customization or Python code. You can simply run all tasks from the terminal with the `yolo` command. Check out the [CLI Guide](usage/cli.md) to learn more about using YOLOv8 from the command line. + +!!! Example + + === "Syntax" + + Ultralytics `yolo` commands use the following syntax: + ```bash + yolo TASK MODE ARGS + ``` + + - `TASK` (optional) is one of ([detect](tasks/detect.md), [segment](tasks/segment.md), [classify](tasks/classify.md), [pose](tasks/pose.md)) + - `MODE` (required) is one of ([train](modes/train.md), [val](modes/val.md), [predict](modes/predict.md), [export](modes/export.md), [track](modes/track.md)) + - `ARGS` (optional) are `arg=value` pairs like `imgsz=640` that override defaults. + + See all `ARGS` in the full [Configuration Guide](usage/cfg.md) or with the `yolo cfg` CLI command. + + === "Train" + + Train a detection model for 10 epochs with an initial learning_rate of 0.01 + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "Predict" + + Predict a YouTube video using a pretrained segmentation model at image size 320: + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "Val" + + Val a pretrained detection model at batch-size 1 and image size 640: + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "Export" + + Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required) + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "Special" + + Run special commands to see version, view settings, run checks and more: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "Warning" + + Arguments must be passed as `arg=val` pairs, split by an equals `=` sign and delimited by spaces between pairs. Do not use `--` argument prefixes or commas `,` between arguments. + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25` ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25` ❌ (missing `=`) + - `yolo predict model=yolov8n.pt, imgsz=640, conf=0.25` ❌ (do not use `,`) + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25` ❌ (do not use `--`) + +[CLI Guide](usage/cli.md){ .md-button } + +## Use Ultralytics with Python + +YOLOv8's Python interface allows for seamless integration into your Python projects, making it easy to load, run, and process the model's output. Designed with simplicity and ease of use in mind, the Python interface enables users to quickly implement object detection, segmentation, and classification in their projects. This makes YOLOv8's Python interface an invaluable tool for anyone looking to incorporate these functionalities into their Python projects. + +For example, users can load a model, train it, evaluate its performance on a validation set, and even export it to ONNX format with just a few lines of code. Check out the [Python Guide](usage/python.md) to learn more about using YOLOv8 within your Python projects. + +!!! Example + + ```python + from ultralytics import YOLO + + # Create a new YOLO model from scratch + model = YOLO('yolov8n.yaml') + + # Load a pretrained YOLO model (recommended for training) + model = YOLO('yolov8n.pt') + + # Train the model using the 'coco128.yaml' dataset for 3 epochs + results = model.train(data='coco128.yaml', epochs=3) + + # Evaluate the model's performance on the validation set + results = model.val() + + # Perform object detection on an image using the model + results = model('https://ultralytics.com/images/bus.jpg') + + # Export the model to ONNX format + success = model.export(format='onnx') + ``` + +[Python Guide](usage/python.md){.md-button .md-button--primary} + +## Ultralytics Settings + +The Ultralytics library provides a powerful settings management system to enable fine-grained control over your experiments. By making use of the `SettingsManager` housed within the `ultralytics.utils` module, users can readily access and alter their settings. These are stored in a YAML file and can be viewed or modified either directly within the Python environment or via the Command-Line Interface (CLI). + +### Inspecting Settings + +To gain insight into the current configuration of your settings, you can view them directly: + +!!! Example "View settings" + + === "Python" + You can use Python to view your settings. Start by importing the `settings` object from the `ultralytics` module. Print and return settings using the following commands: + ```python + from ultralytics import settings + + # View all settings + print(settings) + + # Return a specific setting + value = settings['runs_dir'] + ``` + + === "CLI" + Alternatively, the command-line interface allows you to check your settings with a simple command: + ```bash + yolo settings + ``` + +### Modifying Settings + +Ultralytics allows users to easily modify their settings. Changes can be performed in the following ways: + +!!! Example "Update settings" + + === "Python" + Within the Python environment, call the `update` method on the `settings` object to change your settings: + ```python + from ultralytics import settings + + # Update a setting + settings.update({'runs_dir': '/path/to/runs'}) + + # Update multiple settings + settings.update({'runs_dir': '/path/to/runs', 'tensorboard': False}) + + # Reset settings to default values + settings.reset() + ``` + + === "CLI" + If you prefer using the command-line interface, the following commands will allow you to modify your settings: + ```bash + # Update a setting + yolo settings runs_dir='/path/to/runs' + + # Update multiple settings + yolo settings runs_dir='/path/to/runs' tensorboard=False + + # Reset settings to default values + yolo settings reset + ``` + +### Understanding Settings + +The table below provides an overview of the settings available for adjustment within Ultralytics. Each setting is outlined along with an example value, the data type, and a brief description. + +| Name | Example Value | Data Type | Description | +|--------------------|-----------------------|-----------|------------------------------------------------------------------------------------------------------------------| +| `settings_version` | `'0.0.4'` | `str` | Ultralytics _settings_ version (different from Ultralytics [pip](https://pypi.org/project/ultralytics/) version) | +| `datasets_dir` | `'/path/to/datasets'` | `str` | The directory where the datasets are stored | +| `weights_dir` | `'/path/to/weights'` | `str` | The directory where the model weights are stored | +| `runs_dir` | `'/path/to/runs'` | `str` | The directory where the experiment runs are stored | +| `uuid` | `'a1b2c3d4'` | `str` | The unique identifier for the current settings | +| `sync` | `True` | `bool` | Whether to sync analytics and crashes to HUB | +| `api_key` | `''` | `str` | Ultralytics HUB [API Key](https://hub.ultralytics.com/settings?tab=api+keys) | +| `clearml` | `True` | `bool` | Whether to use ClearML logging | +| `comet` | `True` | `bool` | Whether to use [Comet ML](https://bit.ly/yolov8-readme-comet) for experiment tracking and visualization | +| `dvc` | `True` | `bool` | Whether to use [DVC for experiment tracking](https://dvc.org/doc/dvclive/ml-frameworks/yolo) and version control | +| `hub` | `True` | `bool` | Whether to use [Ultralytics HUB](https://hub.ultralytics.com) integration | +| `mlflow` | `True` | `bool` | Whether to use MLFlow for experiment tracking | +| `neptune` | `True` | `bool` | Whether to use Neptune for experiment tracking | +| `raytune` | `True` | `bool` | Whether to use Ray Tune for hyperparameter tuning | +| `tensorboard` | `True` | `bool` | Whether to use TensorBoard for visualization | +| `wandb` | `True` | `bool` | Whether to use Weights & Biases logging | + +As you navigate through your projects or experiments, be sure to revisit these settings to ensure that they are optimally configured for your needs. diff --git a/docs/en/reference/cfg/__init__.md b/docs/en/reference/cfg/__init__.md new file mode 100644 index 0000000..d73a9f2 --- /dev/null +++ b/docs/en/reference/cfg/__init__.md @@ -0,0 +1,60 @@ +--- +description: Explore Ultralytics cfg functions like cfg2dict, handle_deprecation, merge_equal_args & more to handle YOLO settings and configurations efficiently. +keywords: Ultralytics, YOLO, Configuration, cfg2dict, handle_deprecation, merge_equals_args, handle_yolo_settings, copy_default_cfg, Image Detection +--- + +# Reference for `ultralytics/cfg/__init__.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/__init__.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/__init__.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/cfg/__init__.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.cfg.cfg2dict + +

+ +## ::: ultralytics.cfg.get_cfg + +

+ +## ::: ultralytics.cfg.get_save_dir + +

+ +## ::: ultralytics.cfg._handle_deprecation + +

+ +## ::: ultralytics.cfg.check_dict_alignment + +

+ +## ::: ultralytics.cfg.merge_equals_args + +

+ +## ::: ultralytics.cfg.handle_yolo_hub + +

+ +## ::: ultralytics.cfg.handle_yolo_settings + +

+ +## ::: ultralytics.cfg.parse_key_value_pair + +

+ +## ::: ultralytics.cfg.smart_value + +

+ +## ::: ultralytics.cfg.entrypoint + +

+ +## ::: ultralytics.cfg.copy_default_cfg + +

diff --git a/docs/en/reference/data/annotator.md b/docs/en/reference/data/annotator.md new file mode 100644 index 0000000..ab685b2 --- /dev/null +++ b/docs/en/reference/data/annotator.md @@ -0,0 +1,16 @@ +--- +description: Enhance your machine learning model with Ultralytics’ auto_annotate function. Simplify data annotation for improved model training. +keywords: Ultralytics, Auto-Annotate, Machine Learning, AI, Annotation, Data Processing, Model Training +--- + +# Reference for `ultralytics/data/annotator.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/annotator.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/annotator.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/annotator.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.data.annotator.auto_annotate + +

diff --git a/docs/en/reference/data/augment.md b/docs/en/reference/data/augment.md new file mode 100644 index 0000000..3b16090 --- /dev/null +++ b/docs/en/reference/data/augment.md @@ -0,0 +1,88 @@ +--- +description: Detailed exploration into Ultralytics data augmentation methods including BaseTransform, MixUp, LetterBox, ToTensor, and more for enhancing model performance. +keywords: Ultralytics, Data Augmentation, BaseTransform, MixUp, RandomHSV, LetterBox, Albumentations, classify_transforms, classify_albumentations +--- + +# Reference for `ultralytics/data/augment.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/augment.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/augment.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/augment.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.data.augment.BaseTransform + +

+ +## ::: ultralytics.data.augment.Compose + +

+ +## ::: ultralytics.data.augment.BaseMixTransform + +

+ +## ::: ultralytics.data.augment.Mosaic + +

+ +## ::: ultralytics.data.augment.MixUp + +

+ +## ::: ultralytics.data.augment.RandomPerspective + +

+ +## ::: ultralytics.data.augment.RandomHSV + +

+ +## ::: ultralytics.data.augment.RandomFlip + +

+ +## ::: ultralytics.data.augment.LetterBox + +

+ +## ::: ultralytics.data.augment.CopyPaste + +

+ +## ::: ultralytics.data.augment.Albumentations + +

+ +## ::: ultralytics.data.augment.Format + +

+ +## ::: ultralytics.data.augment.ClassifyLetterBox + +

+ +## ::: ultralytics.data.augment.CenterCrop + +

+ +## ::: ultralytics.data.augment.ToTensor + +

+ +## ::: ultralytics.data.augment.v8_transforms + +

+ +## ::: ultralytics.data.augment.classify_transforms + +

+ +## ::: ultralytics.data.augment.hsv2colorjitter + +

+ +## ::: ultralytics.data.augment.classify_albumentations + +

diff --git a/docs/en/reference/data/base.md b/docs/en/reference/data/base.md new file mode 100644 index 0000000..c98a7c3 --- /dev/null +++ b/docs/en/reference/data/base.md @@ -0,0 +1,16 @@ +--- +description: Explore BaseDataset in Ultralytics docs. Learn how this implementation simplifies dataset creation and manipulation. +keywords: Ultralytics, docs, BaseDataset, data manipulation, dataset creation +--- + +# Reference for `ultralytics/data/base.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/base.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/base.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/base.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.data.base.BaseDataset + +

diff --git a/docs/en/reference/data/build.md b/docs/en/reference/data/build.md new file mode 100644 index 0000000..811c11d --- /dev/null +++ b/docs/en/reference/data/build.md @@ -0,0 +1,40 @@ +--- +description: Explore the Ultralytics YOLO v3 data build procedures, including the InfiniteDataLoader, seed_worker, build_dataloader, and load_inference_source. +keywords: Ultralytics, YOLO v3, Data build, DataLoader, InfiniteDataLoader, seed_worker, build_dataloader, load_inference_source +--- + +# Reference for `ultralytics/data/build.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/build.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/build.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/build.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.data.build.InfiniteDataLoader + +

+ +## ::: ultralytics.data.build._RepeatSampler + +

+ +## ::: ultralytics.data.build.seed_worker + +

+ +## ::: ultralytics.data.build.build_yolo_dataset + +

+ +## ::: ultralytics.data.build.build_dataloader + +

+ +## ::: ultralytics.data.build.check_source + +

+ +## ::: ultralytics.data.build.load_inference_source + +

diff --git a/docs/en/reference/data/converter.md b/docs/en/reference/data/converter.md new file mode 100644 index 0000000..bbad9d3 --- /dev/null +++ b/docs/en/reference/data/converter.md @@ -0,0 +1,36 @@ +--- +description: Explore Ultralytics data converter functions like coco91_to_coco80_class, merge_multi_segment, rle2polygon for efficient data handling. +keywords: Ultralytics, Data Converter, coco91_to_coco80_class, merge_multi_segment, rle2polygon +--- + +# Reference for `ultralytics/data/converter.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/converter.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/converter.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/converter.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.data.converter.coco91_to_coco80_class + +

+ +## ::: ultralytics.data.converter.coco80_to_coco91_class + +

+ +## ::: ultralytics.data.converter.convert_coco + +

+ +## ::: ultralytics.data.converter.convert_dota_to_yolo_obb + +

+ +## ::: ultralytics.data.converter.min_index + +

+ +## ::: ultralytics.data.converter.merge_multi_segment + +

diff --git a/docs/en/reference/data/dataset.md b/docs/en/reference/data/dataset.md new file mode 100644 index 0000000..242a054 --- /dev/null +++ b/docs/en/reference/data/dataset.md @@ -0,0 +1,32 @@ +--- +description: Explore the YOLODataset and SemanticDataset classes in YOLO data. Learn how to efficiently handle and manipulate your data with Ultralytics. +keywords: Ultralytics, YOLO, YOLODataset, SemanticDataset, data handling, data manipulation +--- + +# Reference for `ultralytics/data/dataset.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/dataset.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/dataset.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/dataset.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.data.dataset.YOLODataset + +

+ +## ::: ultralytics.data.dataset.ClassificationDataset + +

+ +## ::: ultralytics.data.dataset.SemanticDataset + +

+ +## ::: ultralytics.data.dataset.load_dataset_cache_file + +

+ +## ::: ultralytics.data.dataset.save_dataset_cache_file + +

diff --git a/docs/en/reference/data/loaders.md b/docs/en/reference/data/loaders.md new file mode 100644 index 0000000..3ba4c16 --- /dev/null +++ b/docs/en/reference/data/loaders.md @@ -0,0 +1,44 @@ +--- +description: Find detailed guides on Ultralytics YOLO data loaders, including LoadStreams, LoadImages and LoadTensor. Learn how to get the best YouTube URLs. +keywords: Ultralytics, data loaders, LoadStreams, LoadImages, LoadTensor, YOLO, YouTube URLs +--- + +# Reference for `ultralytics/data/loaders.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/loaders.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/loaders.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/loaders.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.data.loaders.SourceTypes + +

+ +## ::: ultralytics.data.loaders.LoadStreams + +

+ +## ::: ultralytics.data.loaders.LoadScreenshots + +

+ +## ::: ultralytics.data.loaders.LoadImages + +

+ +## ::: ultralytics.data.loaders.LoadPilAndNumpy + +

+ +## ::: ultralytics.data.loaders.LoadTensor + +

+ +## ::: ultralytics.data.loaders.autocast_list + +

+ +## ::: ultralytics.data.loaders.get_best_youtube_url + +

diff --git a/docs/en/reference/data/utils.md b/docs/en/reference/data/utils.md new file mode 100644 index 0000000..af06ce8 --- /dev/null +++ b/docs/en/reference/data/utils.md @@ -0,0 +1,68 @@ +--- +description: Uncover a detailed guide to Ultralytics data utilities. Learn functions from img2label_paths to autosplit, all boosting your YOLO model’s efficiency. +keywords: Ultralytics, data utils, YOLO, img2label_paths, exif_size, polygon2mask, polygons2masks_overlap, check_cls_dataset, delete_dsstore, autosplit +--- + +# Reference for `ultralytics/data/utils.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/utils.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.data.utils.HUBDatasetStats + +

+ +## ::: ultralytics.data.utils.img2label_paths + +

+ +## ::: ultralytics.data.utils.get_hash + +

+ +## ::: ultralytics.data.utils.exif_size + +

+ +## ::: ultralytics.data.utils.verify_image + +

+ +## ::: ultralytics.data.utils.verify_image_label + +

+ +## ::: ultralytics.data.utils.polygon2mask + +

+ +## ::: ultralytics.data.utils.polygons2masks + +

+ +## ::: ultralytics.data.utils.polygons2masks_overlap + +

+ +## ::: ultralytics.data.utils.find_dataset_yaml + +

+ +## ::: ultralytics.data.utils.check_det_dataset + +

+ +## ::: ultralytics.data.utils.check_cls_dataset + +

+ +## ::: ultralytics.data.utils.compress_one_image + +

+ +## ::: ultralytics.data.utils.autosplit + +

diff --git a/docs/en/reference/engine/exporter.md b/docs/en/reference/engine/exporter.md new file mode 100644 index 0000000..e8daeac --- /dev/null +++ b/docs/en/reference/engine/exporter.md @@ -0,0 +1,32 @@ +--- +description: Explore the exporter functionality of Ultralytics. Learn about exporting formats, IOSDetectModel, and try exporting with examples. +keywords: Ultralytics, Exporter, IOSDetectModel, Export Formats, Try export +--- + +# Reference for `ultralytics/engine/exporter.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/exporter.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/exporter.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/exporter.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.engine.exporter.Exporter + +

+ +## ::: ultralytics.engine.exporter.IOSDetectModel + +

+ +## ::: ultralytics.engine.exporter.export_formats + +

+ +## ::: ultralytics.engine.exporter.gd_outputs + +

+ +## ::: ultralytics.engine.exporter.try_export + +

diff --git a/docs/en/reference/engine/model.md b/docs/en/reference/engine/model.md new file mode 100644 index 0000000..5579d75 --- /dev/null +++ b/docs/en/reference/engine/model.md @@ -0,0 +1,16 @@ +--- +description: Explore the detailed guide on using the Ultralytics YOLO Engine Model. Learn better ways to implement, train and evaluate YOLO models. +keywords: Ultralytics, YOLO, engine model, documentation, guide, implementation, training, evaluation +--- + +# Reference for `ultralytics/engine/model.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/model.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.engine.model.Model + +

diff --git a/docs/en/reference/engine/predictor.md b/docs/en/reference/engine/predictor.md new file mode 100644 index 0000000..ad50352 --- /dev/null +++ b/docs/en/reference/engine/predictor.md @@ -0,0 +1,16 @@ +--- +description: Learn about Ultralytics BasePredictor, an essential component of our engine that serves as the foundation for all prediction operations. +keywords: Ultralytics, BasePredictor, YOLO, prediction, engine +--- + +# Reference for `ultralytics/engine/predictor.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/predictor.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/predictor.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/predictor.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.engine.predictor.BasePredictor + +

diff --git a/docs/en/reference/engine/results.md b/docs/en/reference/engine/results.md new file mode 100644 index 0000000..b4b709e --- /dev/null +++ b/docs/en/reference/engine/results.md @@ -0,0 +1,36 @@ +--- +description: Master Ultralytics engine results including base tensors, boxes, and keypoints with our thorough documentation. +keywords: Ultralytics, engine, results, base tensor, boxes, keypoints +--- + +# Reference for `ultralytics/engine/results.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/results.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/results.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/results.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.engine.results.BaseTensor + +

+ +## ::: ultralytics.engine.results.Results + +

+ +## ::: ultralytics.engine.results.Boxes + +

+ +## ::: ultralytics.engine.results.Masks + +

+ +## ::: ultralytics.engine.results.Keypoints + +

+ +## ::: ultralytics.engine.results.Probs + +

diff --git a/docs/en/reference/engine/trainer.md b/docs/en/reference/engine/trainer.md new file mode 100644 index 0000000..7c74b04 --- /dev/null +++ b/docs/en/reference/engine/trainer.md @@ -0,0 +1,16 @@ +--- +description: Learn about the BaseTrainer class in the Ultralytics library. From training control, customization to advanced usage. +keywords: Ultralytics, BaseTrainer, Machine Learning, Training Control, Python library +--- + +# Reference for `ultralytics/engine/trainer.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/trainer.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/trainer.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/trainer.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.engine.trainer.BaseTrainer + +

diff --git a/docs/en/reference/engine/tuner.md b/docs/en/reference/engine/tuner.md new file mode 100644 index 0000000..7882b8c --- /dev/null +++ b/docs/en/reference/engine/tuner.md @@ -0,0 +1,16 @@ +--- +description: Explore the Ultralytics Tuner, a powerful tool designed for hyperparameter tuning of YOLO models to optimize performance across various tasks like object detection, image classification, and more. +keywords: Ultralytics, Tuner, YOLO, hyperparameter tuning, optimization, object detection, image classification, instance segmentation, pose estimation, multi-object tracking +--- + +# Reference for `ultralytics/engine/tuner.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/tuner.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/tuner.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/tuner.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.engine.tuner.Tuner + +

diff --git a/docs/en/reference/engine/validator.md b/docs/en/reference/engine/validator.md new file mode 100644 index 0000000..5c0f4cd --- /dev/null +++ b/docs/en/reference/engine/validator.md @@ -0,0 +1,16 @@ +--- +description: Learn about the Ultralytics BaseValidator module. Understand its principles, uses, and how it interacts with other components. +keywords: Ultralytics, BaseValidator, Ultralytics engine, module, components +--- + +# Reference for `ultralytics/engine/validator.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/validator.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/validator.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/validator.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.engine.validator.BaseValidator + +

diff --git a/docs/en/reference/hub/__init__.md b/docs/en/reference/hub/__init__.md new file mode 100644 index 0000000..912de06 --- /dev/null +++ b/docs/en/reference/hub/__init__.md @@ -0,0 +1,40 @@ +--- +description: Explore Ultralytics hub functions for model resetting, checking datasets, model exporting and more. Easy-to-follow instructions provided. +keywords: Ultralytics, hub functions, model export, dataset check, reset model, YOLO Docs +--- + +# Reference for `ultralytics/hub/__init__.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/__init__.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/__init__.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/hub/__init__.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.hub.login + +

+ +## ::: ultralytics.hub.logout + +

+ +## ::: ultralytics.hub.reset_model + +

+ +## ::: ultralytics.hub.export_fmts_hub + +

+ +## ::: ultralytics.hub.export_model + +

+ +## ::: ultralytics.hub.get_export + +

+ +## ::: ultralytics.hub.check_dataset + +

diff --git a/docs/en/reference/hub/auth.md b/docs/en/reference/hub/auth.md new file mode 100644 index 0000000..964dc73 --- /dev/null +++ b/docs/en/reference/hub/auth.md @@ -0,0 +1,16 @@ +--- +description: Dive into the Ultralytics Auth API documentation & learn how to manage authentication in your AI & ML projects easily and effectively. +keywords: Ultralytics, Auth, API documentation, User Authentication, AI, Machine Learning +--- + +# Reference for `ultralytics/hub/auth.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/auth.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/auth.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/hub/auth.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.hub.auth.Auth + +

diff --git a/docs/en/reference/hub/session.md b/docs/en/reference/hub/session.md new file mode 100644 index 0000000..fb30715 --- /dev/null +++ b/docs/en/reference/hub/session.md @@ -0,0 +1,16 @@ +--- +description: Explore details about the HUBTrainingSession in Ultralytics framework. Learn to utilize this functionality for effective model training. +keywords: Ultralytics, HUBTrainingSession, Documentation, Model Training, AI, Machine Learning, YOLO +--- + +# Reference for `ultralytics/hub/session.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/session.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/session.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/hub/session.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.hub.session.HUBTrainingSession + +

diff --git a/docs/en/reference/hub/utils.md b/docs/en/reference/hub/utils.md new file mode 100644 index 0000000..e310a01 --- /dev/null +++ b/docs/en/reference/hub/utils.md @@ -0,0 +1,28 @@ +--- +description: Explore Ultralytics docs for various Events, including "request_with_credentials" and "requests_with_progress". Also, understand the use of the "smart_request". +keywords: Ultralytics, Events, request_with_credentials, smart_request, Ultralytics hub utils, requests_with_progress +--- + +# Reference for `ultralytics/hub/utils.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/hub/utils.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.hub.utils.Events + +

+ +## ::: ultralytics.hub.utils.request_with_credentials + +

+ +## ::: ultralytics.hub.utils.requests_with_progress + +

+ +## ::: ultralytics.hub.utils.smart_request + +

diff --git a/docs/en/reference/models/fastsam/model.md b/docs/en/reference/models/fastsam/model.md new file mode 100644 index 0000000..0e99a8c --- /dev/null +++ b/docs/en/reference/models/fastsam/model.md @@ -0,0 +1,16 @@ +--- +description: Learn all about Ultralytics FastSAM model. Dive into our comprehensive guide for seamless integration and efficient model training. +keywords: Ultralytics, FastSAM model, Model documentation, Efficient model training +--- + +# Reference for `ultralytics/models/fastsam/model.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/fastsam/model.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.fastsam.model.FastSAM + +

diff --git a/docs/en/reference/models/fastsam/predict.md b/docs/en/reference/models/fastsam/predict.md new file mode 100644 index 0000000..7c7196e --- /dev/null +++ b/docs/en/reference/models/fastsam/predict.md @@ -0,0 +1,16 @@ +--- +description: Get detailed insights about Ultralytics FastSAMPredictor. Learn to predict and optimize your AI models with our properly documented guidelines. +keywords: Ultralytics, FastSAMPredictor, predictive modeling, AI optimization, machine learning, deep learning, Ultralytics documentation +--- + +# Reference for `ultralytics/models/fastsam/predict.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/fastsam/predict.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.fastsam.predict.FastSAMPredictor + +

diff --git a/docs/en/reference/models/fastsam/prompt.md b/docs/en/reference/models/fastsam/prompt.md new file mode 100644 index 0000000..0a37023 --- /dev/null +++ b/docs/en/reference/models/fastsam/prompt.md @@ -0,0 +1,16 @@ +--- +description: Learn to effectively utilize FastSAMPrompt model from Ultralytics. Detailed guide to help you get the most out of your machine learning models. +keywords: Ultralytics, FastSAMPrompt, machine learning, model, guide, documentation +--- + +# Reference for `ultralytics/models/fastsam/prompt.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/prompt.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/prompt.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/fastsam/prompt.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.fastsam.prompt.FastSAMPrompt + +

diff --git a/docs/en/reference/models/fastsam/utils.md b/docs/en/reference/models/fastsam/utils.md new file mode 100644 index 0000000..0ba33ab --- /dev/null +++ b/docs/en/reference/models/fastsam/utils.md @@ -0,0 +1,20 @@ +--- +description: Learn how to adjust bounding boxes to image borders in Ultralytics models using the bbox_iou utility. Enhance your object detection performance. +keywords: Ultralytics, bounding boxes, Bboxes, image borders, object detection, bbox_iou, model utilities +--- + +# Reference for `ultralytics/models/fastsam/utils.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/fastsam/utils.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.fastsam.utils.adjust_bboxes_to_image_border + +

+ +## ::: ultralytics.models.fastsam.utils.bbox_iou + +

diff --git a/docs/en/reference/models/fastsam/val.md b/docs/en/reference/models/fastsam/val.md new file mode 100644 index 0000000..4064f05 --- /dev/null +++ b/docs/en/reference/models/fastsam/val.md @@ -0,0 +1,16 @@ +--- +description: Learn about FastSAMValidator in Ultralytics models. Comprehensive guide to enhancing AI capabilities with Ultralytics. +keywords: Ultralytics, FastSAMValidator, model, synthetic, AI, machine learning, validation +--- + +# Reference for `ultralytics/models/fastsam/val.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/fastsam/val.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.fastsam.val.FastSAMValidator + +

diff --git a/docs/en/reference/models/nas/model.md b/docs/en/reference/models/nas/model.md new file mode 100644 index 0000000..9536e54 --- /dev/null +++ b/docs/en/reference/models/nas/model.md @@ -0,0 +1,16 @@ +--- +description: Learn how our NAS model operates in Ultralytics. Comprehensive guide with detailed examples. Master the nuances of Ultralytics NAS model. +keywords: Ultralytics, NAS model, NAS guide, machine learning, model documentation +--- + +# Reference for `ultralytics/models/nas/model.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/nas/model.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.nas.model.NAS + +

diff --git a/docs/en/reference/models/nas/predict.md b/docs/en/reference/models/nas/predict.md new file mode 100644 index 0000000..3dcc0fd --- /dev/null +++ b/docs/en/reference/models/nas/predict.md @@ -0,0 +1,16 @@ +--- +description: Explore Ultralytics NASPredictor. Understand high-level architecture of the model for effective implementation and efficient predictions. +keywords: NASPredictor, Ultralytics, Ultralytics model, model architecture, efficient predictions +--- + +# Reference for `ultralytics/models/nas/predict.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/nas/predict.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.nas.predict.NASPredictor + +

diff --git a/docs/en/reference/models/nas/val.md b/docs/en/reference/models/nas/val.md new file mode 100644 index 0000000..5d0adf6 --- /dev/null +++ b/docs/en/reference/models/nas/val.md @@ -0,0 +1,16 @@ +--- +description: Explore the utilities and functions of the Ultralytics NASValidator. Find out how it benefits allocation and optimization in AI models. +keywords: Ultralytics, NASValidator, models.nas.val.NASValidator, AI models, allocation, optimization +--- + +# Reference for `ultralytics/models/nas/val.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/nas/val.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.nas.val.NASValidator + +

diff --git a/docs/en/reference/models/rtdetr/model.md b/docs/en/reference/models/rtdetr/model.md new file mode 100644 index 0000000..a4578b2 --- /dev/null +++ b/docs/en/reference/models/rtdetr/model.md @@ -0,0 +1,16 @@ +--- +description: Explore the specifics of using the RTDETR model in Ultralytics. Detailed documentation layered with explanations and examples. +keywords: Ultralytics, RTDETR model, Ultralytics models, object detection, Ultralytics documentation +--- + +# Reference for `ultralytics/models/rtdetr/model.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/rtdetr/model.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.rtdetr.model.RTDETR + +

diff --git a/docs/en/reference/models/rtdetr/predict.md b/docs/en/reference/models/rtdetr/predict.md new file mode 100644 index 0000000..4763381 --- /dev/null +++ b/docs/en/reference/models/rtdetr/predict.md @@ -0,0 +1,16 @@ +--- +description: Learn how to use the RTDETRPredictor model of the Ultralytics package. Detailed documentation, usage instructions, and advice. +keywords: Ultralytics, RTDETRPredictor, model documentation, guide, real-time object detection +--- + +# Reference for `ultralytics/models/rtdetr/predict.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/rtdetr/predict.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.rtdetr.predict.RTDETRPredictor + +

diff --git a/docs/en/reference/models/rtdetr/train.md b/docs/en/reference/models/rtdetr/train.md new file mode 100644 index 0000000..4f9faaa --- /dev/null +++ b/docs/en/reference/models/rtdetr/train.md @@ -0,0 +1,16 @@ +--- +description: Get insights into RTDETRTrainer, a crucial component of Ultralytics for effective model training. Explore detailed documentation at Ultralytics. +keywords: Ultralytics, RTDETRTrainer, model training, Ultralytics models, PyTorch models, neural networks, machine learning, deep learning +--- + +# Reference for `ultralytics/models/rtdetr/train.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/rtdetr/train.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.rtdetr.train.RTDETRTrainer + +

diff --git a/docs/en/reference/models/rtdetr/val.md b/docs/en/reference/models/rtdetr/val.md new file mode 100644 index 0000000..d31bc86 --- /dev/null +++ b/docs/en/reference/models/rtdetr/val.md @@ -0,0 +1,20 @@ +--- +description: Explore RTDETRDataset in Ultralytics Models. Learn about the RTDETRValidator function, understand its usage in real-time object detection. +keywords: Ultralytics, RTDETRDataset, RTDETRValidator, real-time object detection, models documentation +--- + +# Reference for `ultralytics/models/rtdetr/val.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/rtdetr/val.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.rtdetr.val.RTDETRDataset + +

+ +## ::: ultralytics.models.rtdetr.val.RTDETRValidator + +

diff --git a/docs/en/reference/models/sam/amg.md b/docs/en/reference/models/sam/amg.md new file mode 100644 index 0000000..dea35e4 --- /dev/null +++ b/docs/en/reference/models/sam/amg.md @@ -0,0 +1,56 @@ +--- +description: Explore Ultralytics methods for mask data processing, transformation and encoding. Deepen your understanding of RLE encoding, image cropping and more. +keywords: Ultralytics, Mask Data, Transformation, Encoding, RLE encoding, Image cropping, Pytorch, SAM, AMG, Ultralytics model +--- + +# Reference for `ultralytics/models/sam/amg.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/amg.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/amg.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/amg.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.sam.amg.is_box_near_crop_edge + +

+ +## ::: ultralytics.models.sam.amg.batch_iterator + +

+ +## ::: ultralytics.models.sam.amg.calculate_stability_score + +

+ +## ::: ultralytics.models.sam.amg.build_point_grid + +

+ +## ::: ultralytics.models.sam.amg.build_all_layer_point_grids + +

+ +## ::: ultralytics.models.sam.amg.generate_crop_boxes + +

+ +## ::: ultralytics.models.sam.amg.uncrop_boxes_xyxy + +

+ +## ::: ultralytics.models.sam.amg.uncrop_points + +

+ +## ::: ultralytics.models.sam.amg.uncrop_masks + +

+ +## ::: ultralytics.models.sam.amg.remove_small_regions + +

+ +## ::: ultralytics.models.sam.amg.batched_mask_to_box + +

diff --git a/docs/en/reference/models/sam/build.md b/docs/en/reference/models/sam/build.md new file mode 100644 index 0000000..9adeeb4 --- /dev/null +++ b/docs/en/reference/models/sam/build.md @@ -0,0 +1,36 @@ +--- +description: Master building SAM ViT models with Ultralytics. Discover steps to leverage the power of SAM and Vision Transformer sessions. +keywords: Ultralytics, SAM, build sam, vision transformer, vits, build_sam_vit_l, build_sam_vit_b, build_sam +--- + +# Reference for `ultralytics/models/sam/build.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/build.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/build.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/build.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.sam.build.build_sam_vit_h + +

+ +## ::: ultralytics.models.sam.build.build_sam_vit_l + +

+ +## ::: ultralytics.models.sam.build.build_sam_vit_b + +

+ +## ::: ultralytics.models.sam.build.build_mobile_sam + +

+ +## ::: ultralytics.models.sam.build._build_sam + +

+ +## ::: ultralytics.models.sam.build.build_sam + +

diff --git a/docs/en/reference/models/sam/model.md b/docs/en/reference/models/sam/model.md new file mode 100644 index 0000000..32444bc --- /dev/null +++ b/docs/en/reference/models/sam/model.md @@ -0,0 +1,16 @@ +--- +description: Dive into the SAM model details in the Ultralytics YOLO documentation. Understand, implement, and optimize your model use. +keywords: Ultralytics, YOLO, SAM Model, Documentations, Machine Learning, AI, Convolutional neural network +--- + +# Reference for `ultralytics/models/sam/model.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/model.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.sam.model.SAM + +

diff --git a/docs/en/reference/models/sam/modules/decoders.md b/docs/en/reference/models/sam/modules/decoders.md new file mode 100644 index 0000000..215b832 --- /dev/null +++ b/docs/en/reference/models/sam/modules/decoders.md @@ -0,0 +1,20 @@ +--- +description: Explore MaskDecoder, a part of the Ultralytics models. Gain insights on how to utilize it effectively in the SAM modules decoders MLP. +keywords: Ultralytics, MaskDecoder, SAM modules, decoders, MLP, YOLO, machine learning, image recognition +--- + +# Reference for `ultralytics/models/sam/modules/decoders.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/decoders.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/decoders.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/modules/decoders.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.sam.modules.decoders.MaskDecoder + +

+ +## ::: ultralytics.models.sam.modules.decoders.MLP + +

diff --git a/docs/en/reference/models/sam/modules/encoders.md b/docs/en/reference/models/sam/modules/encoders.md new file mode 100644 index 0000000..ffcbf44 --- /dev/null +++ b/docs/en/reference/models/sam/modules/encoders.md @@ -0,0 +1,52 @@ +--- +description: Discover detailed information on ImageEncoderViT, PositionEmbeddingRandom, Attention, window_partition, get_rel_pos and more in Ultralytics models encoders documentation. +keywords: Ultralytics, Encoders, Modules, Documentation, ImageEncoderViT, PositionEmbeddingRandom, Attention, window_partition, get_rel_pos +--- + +# Reference for `ultralytics/models/sam/modules/encoders.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/encoders.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/encoders.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/modules/encoders.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.sam.modules.encoders.ImageEncoderViT + +

+ +## ::: ultralytics.models.sam.modules.encoders.PromptEncoder + +

+ +## ::: ultralytics.models.sam.modules.encoders.PositionEmbeddingRandom + +

+ +## ::: ultralytics.models.sam.modules.encoders.Block + +

+ +## ::: ultralytics.models.sam.modules.encoders.Attention + +

+ +## ::: ultralytics.models.sam.modules.encoders.PatchEmbed + +

+ +## ::: ultralytics.models.sam.modules.encoders.window_partition + +

+ +## ::: ultralytics.models.sam.modules.encoders.window_unpartition + +

+ +## ::: ultralytics.models.sam.modules.encoders.get_rel_pos + +

+ +## ::: ultralytics.models.sam.modules.encoders.add_decomposed_rel_pos + +

diff --git a/docs/en/reference/models/sam/modules/sam.md b/docs/en/reference/models/sam/modules/sam.md new file mode 100644 index 0000000..d309e24 --- /dev/null +++ b/docs/en/reference/models/sam/modules/sam.md @@ -0,0 +1,16 @@ +--- +description: Explore the Sam module of Ultralytics. Discover detailed methods, classes, and information for efficient deep-learning model training!. +keywords: Ultralytics, Sam module, deep learning, model training, Ultralytics documentation +--- + +# Reference for `ultralytics/models/sam/modules/sam.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/sam.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/sam.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/modules/sam.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.sam.modules.sam.Sam + +

diff --git a/docs/en/reference/models/sam/modules/tiny_encoder.md b/docs/en/reference/models/sam/modules/tiny_encoder.md new file mode 100644 index 0000000..1d945be --- /dev/null +++ b/docs/en/reference/models/sam/modules/tiny_encoder.md @@ -0,0 +1,56 @@ +--- +description: Get in-depth insights about Ultralytics Tiny Encoder Modules such as Conv2d_BN, MBConv, ConvLayer, Attention, BasicLayer, and TinyViT. Improve your understanding of machine learning model components. +keywords: Ultralytics, Tiny Encoder, Conv2d_BN, MBConv, ConvLayer, Attention, BasicLayer, TinyViT, Machine learning modules, Ultralytics models +--- + +# Reference for `ultralytics/models/sam/modules/tiny_encoder.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/tiny_encoder.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/tiny_encoder.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/modules/tiny_encoder.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.Conv2d_BN + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.PatchEmbed + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.MBConv + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.PatchMerging + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.ConvLayer + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.Mlp + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.Attention + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.TinyViTBlock + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.BasicLayer + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.LayerNorm2d + +

+ +## ::: ultralytics.models.sam.modules.tiny_encoder.TinyViT + +

diff --git a/docs/en/reference/models/sam/modules/transformer.md b/docs/en/reference/models/sam/modules/transformer.md new file mode 100644 index 0000000..d36a31e --- /dev/null +++ b/docs/en/reference/models/sam/modules/transformer.md @@ -0,0 +1,24 @@ +--- +description: Learn about TwoWayTransformer and Attention modules in Ultralytics. Leverage these tools to enhance your AI models. +keywords: Ultralytics, TwoWayTransformer, Attention, AI models, transformers +--- + +# Reference for `ultralytics/models/sam/modules/transformer.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/transformer.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/transformer.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/modules/transformer.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.sam.modules.transformer.TwoWayTransformer + +

+ +## ::: ultralytics.models.sam.modules.transformer.TwoWayAttentionBlock + +

+ +## ::: ultralytics.models.sam.modules.transformer.Attention + +

diff --git a/docs/en/reference/models/sam/predict.md b/docs/en/reference/models/sam/predict.md new file mode 100644 index 0000000..ae4c788 --- /dev/null +++ b/docs/en/reference/models/sam/predict.md @@ -0,0 +1,16 @@ +--- +description: Master the ultralytics.models.sam.predict.Predictor class with our comprehensive guide. Discover techniques to enhance your model predictions. +keywords: Ultralytics, predictor, models, sam.predict.Predictor, AI, machine learning, predictive models +--- + +# Reference for `ultralytics/models/sam/predict.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/predict.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.sam.predict.Predictor + +

diff --git a/docs/en/reference/models/utils/loss.md b/docs/en/reference/models/utils/loss.md new file mode 100644 index 0000000..567308e --- /dev/null +++ b/docs/en/reference/models/utils/loss.md @@ -0,0 +1,20 @@ +--- +description: Learn to use the DETRLoss function provided by Ultralytics YOLO. Understand how to utilize loss in RTDETR detection models to improve accuracy. +keywords: Ultralytics, YOLO, Documentation, DETRLoss, Detection Loss, Loss function, DETR, RTDETR Detection Models +--- + +# Reference for `ultralytics/models/utils/loss.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/utils/loss.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/utils/loss.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/utils/loss.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.utils.loss.DETRLoss + +

+ +## ::: ultralytics.models.utils.loss.RTDETRDetectionLoss + +

diff --git a/docs/en/reference/models/utils/ops.md b/docs/en/reference/models/utils/ops.md new file mode 100644 index 0000000..4ca7614 --- /dev/null +++ b/docs/en/reference/models/utils/ops.md @@ -0,0 +1,20 @@ +--- +description: Discover details for "HungarianMatcher" & "inverse_sigmoid" functions in Ultralytics YOLO, advanced tools supporting detection models. +keywords: Ultralytics, YOLO, HungarianMatcher, inverse_sigmoid, detection models, model utilities, ops +--- + +# Reference for `ultralytics/models/utils/ops.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/utils/ops.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/utils/ops.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/utils/ops.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.utils.ops.HungarianMatcher + +

+ +## ::: ultralytics.models.utils.ops.get_cdn_group + +

diff --git a/docs/en/reference/models/yolo/classify/predict.md b/docs/en/reference/models/yolo/classify/predict.md new file mode 100644 index 0000000..581107b --- /dev/null +++ b/docs/en/reference/models/yolo/classify/predict.md @@ -0,0 +1,16 @@ +--- +description: Explore the Ultralytics ClassificationPredictor guide for model prediction and visualization. Build powerful AI models with YOLO. +keywords: Ultralytics, classification predictor, predict, YOLO, AI models, model visualization +--- + +# Reference for `ultralytics/models/yolo/classify/predict.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/classify/predict.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.classify.predict.ClassificationPredictor + +

diff --git a/docs/en/reference/models/yolo/classify/train.md b/docs/en/reference/models/yolo/classify/train.md new file mode 100644 index 0000000..e4b9ea8 --- /dev/null +++ b/docs/en/reference/models/yolo/classify/train.md @@ -0,0 +1,16 @@ +--- +description: Delve into Classification Trainer at Ultralytics YOLO docs and optimize your model's training process with insights from the masters!. +keywords: Ultralytics, YOLO, Classification Trainer, deep learning, training process, AI models, documentation +--- + +# Reference for `ultralytics/models/yolo/classify/train.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/classify/train.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.classify.train.ClassificationTrainer + +

diff --git a/docs/en/reference/models/yolo/classify/val.md b/docs/en/reference/models/yolo/classify/val.md new file mode 100644 index 0000000..27f64ff --- /dev/null +++ b/docs/en/reference/models/yolo/classify/val.md @@ -0,0 +1,16 @@ +--- +description: Explore YOLO ClassificationValidator, a key element of Ultralytics YOLO models. Learn how it validates and fine-tunes model outputs. +keywords: Ultralytics, YOLO, ClassificationValidator, model validation, model fine-tuning, deep learning, computer vision +--- + +# Reference for `ultralytics/models/yolo/classify/val.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/classify/val.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.classify.val.ClassificationValidator + +

diff --git a/docs/en/reference/models/yolo/detect/predict.md b/docs/en/reference/models/yolo/detect/predict.md new file mode 100644 index 0000000..4b6cafd --- /dev/null +++ b/docs/en/reference/models/yolo/detect/predict.md @@ -0,0 +1,16 @@ +--- +description: Explore the guide to using the DetectionPredictor in Ultralytics YOLO. Learn how to predict, detect and analyze objects accurately. +keywords: Ultralytics, YOLO, DetectionPredictor, detect, predict, object detection, analysis +--- + +# Reference for `ultralytics/models/yolo/detect/predict.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/detect/predict.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.detect.predict.DetectionPredictor + +

diff --git a/docs/en/reference/models/yolo/detect/train.md b/docs/en/reference/models/yolo/detect/train.md new file mode 100644 index 0000000..bca6faf --- /dev/null +++ b/docs/en/reference/models/yolo/detect/train.md @@ -0,0 +1,16 @@ +--- +description: Maximize your model's potential with Ultralytics YOLO Detection Trainer. Learn advanced techniques, tips, and tricks for training. +keywords: Ultralytics YOLO, YOLO, Detection Trainer, Model Training, Machine Learning, Deep Learning, Computer Vision +--- + +# Reference for `ultralytics/models/yolo/detect/train.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/detect/train.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.detect.train.DetectionTrainer + +

diff --git a/docs/en/reference/models/yolo/detect/val.md b/docs/en/reference/models/yolo/detect/val.md new file mode 100644 index 0000000..c5333d0 --- /dev/null +++ b/docs/en/reference/models/yolo/detect/val.md @@ -0,0 +1,16 @@ +--- +description: Discover function valuation of your YOLO models with the Ultralytics Detection Validator. Enhance precision and recall rates today. +keywords: Ultralytics, YOLO, Detection Validator, model valuation, precision, recall +--- + +# Reference for `ultralytics/models/yolo/detect/val.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/detect/val.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.detect.val.DetectionValidator + +

diff --git a/docs/en/reference/models/yolo/model.md b/docs/en/reference/models/yolo/model.md new file mode 100644 index 0000000..742685f --- /dev/null +++ b/docs/en/reference/models/yolo/model.md @@ -0,0 +1,16 @@ +--- +description: Discover the Ultralytics YOLO model class. Learn advanced techniques, tips, and tricks for training. +keywords: Ultralytics YOLO, YOLO, YOLO model, Model Training, Machine Learning, Deep Learning, Computer Vision +--- + +# Reference for `ultralytics/models/yolo/model.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/model.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.model.YOLO + +

diff --git a/docs/en/reference/models/yolo/pose/predict.md b/docs/en/reference/models/yolo/pose/predict.md new file mode 100644 index 0000000..9eb4af4 --- /dev/null +++ b/docs/en/reference/models/yolo/pose/predict.md @@ -0,0 +1,16 @@ +--- +description: Discover how to use PosePredictor in the Ultralytics YOLO model. Includes detailed guides, code examples, and explanations. +keywords: Ultralytics, YOLO, PosePredictor, machine learning, AI, predictive models +--- + +# Reference for `ultralytics/models/yolo/pose/predict.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/pose/predict.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.pose.predict.PosePredictor + +

diff --git a/docs/en/reference/models/yolo/pose/train.md b/docs/en/reference/models/yolo/pose/train.md new file mode 100644 index 0000000..1d02bf2 --- /dev/null +++ b/docs/en/reference/models/yolo/pose/train.md @@ -0,0 +1,16 @@ +--- +description: Explore Ultralytics PoseTrainer for YOLO models. Get a step-by-step guide on how to train on custom pose data for more accurate AI modeling. +keywords: Ultralytics, YOLO, PoseTrainer, pose training, AI modeling, custom data training +--- + +# Reference for `ultralytics/models/yolo/pose/train.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/pose/train.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.pose.train.PoseTrainer + +

diff --git a/docs/en/reference/models/yolo/pose/val.md b/docs/en/reference/models/yolo/pose/val.md new file mode 100644 index 0000000..77207be --- /dev/null +++ b/docs/en/reference/models/yolo/pose/val.md @@ -0,0 +1,16 @@ +--- +description: Explore the PoseValidator—review how Ultralytics YOLO validates poses for object detection. Improve your understanding of YOLO. +keywords: PoseValidator, Ultralytics, YOLO, Object detection, Pose validation +--- + +# Reference for `ultralytics/models/yolo/pose/val.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/pose/val.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.pose.val.PoseValidator + +

diff --git a/docs/en/reference/models/yolo/segment/predict.md b/docs/en/reference/models/yolo/segment/predict.md new file mode 100644 index 0000000..a92b6f6 --- /dev/null +++ b/docs/en/reference/models/yolo/segment/predict.md @@ -0,0 +1,16 @@ +--- +description: Discover how to utilize the YOLO Segmentation Predictor in Ultralytics. Enhance your objects detection skills with us. +keywords: YOLO, Ultralytics, object detection, segmentation predictor +--- + +# Reference for `ultralytics/models/yolo/segment/predict.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/segment/predict.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.segment.predict.SegmentationPredictor + +

diff --git a/docs/en/reference/models/yolo/segment/train.md b/docs/en/reference/models/yolo/segment/train.md new file mode 100644 index 0000000..1b5c3d5 --- /dev/null +++ b/docs/en/reference/models/yolo/segment/train.md @@ -0,0 +1,16 @@ +--- +description: Maximize your YOLO model's performance with our SegmentationTrainer. Explore comprehensive guides and tutorials on ultralytics.com. +keywords: Ultralytics, YOLO, SegmentationTrainer, image segmentation, object detection, model training, YOLO model +--- + +# Reference for `ultralytics/models/yolo/segment/train.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/segment/train.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.segment.train.SegmentationTrainer + +

diff --git a/docs/en/reference/models/yolo/segment/val.md b/docs/en/reference/models/yolo/segment/val.md new file mode 100644 index 0000000..745b22c --- /dev/null +++ b/docs/en/reference/models/yolo/segment/val.md @@ -0,0 +1,16 @@ +--- +description: Get practical insights about our SegmentationValidator in YOLO Ultralytics models. Discover functionality details, methods, inputs, and outputs. +keywords: Ultralytics, YOLO, SegmentationValidator, model segmentation, image classification, object detection +--- + +# Reference for `ultralytics/models/yolo/segment/val.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/segment/val.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.models.yolo.segment.val.SegmentationValidator + +

diff --git a/docs/en/reference/nn/autobackend.md b/docs/en/reference/nn/autobackend.md new file mode 100644 index 0000000..462789e --- /dev/null +++ b/docs/en/reference/nn/autobackend.md @@ -0,0 +1,20 @@ +--- +description: Get to know more about Ultralytics nn.autobackend.check_class_names functionality. Optimize your YOLO models seamlessly. +keywords: Ultralytics, AutoBackend, check_class_names, YOLO, YOLO models, optimization +--- + +# Reference for `ultralytics/nn/autobackend.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/autobackend.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/autobackend.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/autobackend.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.nn.autobackend.AutoBackend + +

+ +## ::: ultralytics.nn.autobackend.check_class_names + +

diff --git a/docs/en/reference/nn/modules/block.md b/docs/en/reference/nn/modules/block.md new file mode 100644 index 0000000..8aac258 --- /dev/null +++ b/docs/en/reference/nn/modules/block.md @@ -0,0 +1,80 @@ +--- +description: Explore Ultralytics YOLO neural network modules, Proto to BottleneckCSP. Detailed explanation of each module with easy-to-follow code examples. +keywords: YOLO, Ultralytics, neural network, nn.modules.block, Proto, HGBlock, SPPF, C2, C3, RepC3, C3Ghost, Bottleneck, BottleneckCSP +--- + +# Reference for `ultralytics/nn/modules/block.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/block.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/block.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/block.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.nn.modules.block.DFL + +

+ +## ::: ultralytics.nn.modules.block.Proto + +

+ +## ::: ultralytics.nn.modules.block.HGStem + +

+ +## ::: ultralytics.nn.modules.block.HGBlock + +

+ +## ::: ultralytics.nn.modules.block.SPP + +

+ +## ::: ultralytics.nn.modules.block.SPPF + +

+ +## ::: ultralytics.nn.modules.block.C1 + +

+ +## ::: ultralytics.nn.modules.block.C2 + +

+ +## ::: ultralytics.nn.modules.block.C2f + +

+ +## ::: ultralytics.nn.modules.block.C3 + +

+ +## ::: ultralytics.nn.modules.block.C3x + +

+ +## ::: ultralytics.nn.modules.block.RepC3 + +

+ +## ::: ultralytics.nn.modules.block.C3TR + +

+ +## ::: ultralytics.nn.modules.block.C3Ghost + +

+ +## ::: ultralytics.nn.modules.block.GhostBottleneck + +

+ +## ::: ultralytics.nn.modules.block.Bottleneck + +

+ +## ::: ultralytics.nn.modules.block.BottleneckCSP + +

diff --git a/docs/en/reference/nn/modules/conv.md b/docs/en/reference/nn/modules/conv.md new file mode 100644 index 0000000..6c9fb35 --- /dev/null +++ b/docs/en/reference/nn/modules/conv.md @@ -0,0 +1,68 @@ +--- +description: Explore various Ultralytics convolution modules including Conv2, DWConv, ConvTranspose, GhostConv, Channel Attention and more. +keywords: Ultralytics, Convolution Modules, Conv2, DWConv, ConvTranspose, GhostConv, ChannelAttention, CBAM, autopad +--- + +# Reference for `ultralytics/nn/modules/conv.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/conv.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/conv.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/conv.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.nn.modules.conv.Conv + +

+ +## ::: ultralytics.nn.modules.conv.Conv2 + +

+ +## ::: ultralytics.nn.modules.conv.LightConv + +

+ +## ::: ultralytics.nn.modules.conv.DWConv + +

+ +## ::: ultralytics.nn.modules.conv.DWConvTranspose2d + +

+ +## ::: ultralytics.nn.modules.conv.ConvTranspose + +

+ +## ::: ultralytics.nn.modules.conv.Focus + +

+ +## ::: ultralytics.nn.modules.conv.GhostConv + +

+ +## ::: ultralytics.nn.modules.conv.RepConv + +

+ +## ::: ultralytics.nn.modules.conv.ChannelAttention + +

+ +## ::: ultralytics.nn.modules.conv.SpatialAttention + +

+ +## ::: ultralytics.nn.modules.conv.CBAM + +

+ +## ::: ultralytics.nn.modules.conv.Concat + +

+ +## ::: ultralytics.nn.modules.conv.autopad + +

diff --git a/docs/en/reference/nn/modules/head.md b/docs/en/reference/nn/modules/head.md new file mode 100644 index 0000000..40ffb25 --- /dev/null +++ b/docs/en/reference/nn/modules/head.md @@ -0,0 +1,32 @@ +--- +description: Explore docs covering Ultralytics YOLO detection, pose & RTDETRDecoder. Comprehensive guides to help you understand Ultralytics nn modules. +keywords: Ultralytics, YOLO, Detection, Pose, RTDETRDecoder, nn modules, guides +--- + +# Reference for `ultralytics/nn/modules/head.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/head.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/head.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/head.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.nn.modules.head.Detect + +

+ +## ::: ultralytics.nn.modules.head.Segment + +

+ +## ::: ultralytics.nn.modules.head.Pose + +

+ +## ::: ultralytics.nn.modules.head.Classify + +

+ +## ::: ultralytics.nn.modules.head.RTDETRDecoder + +

diff --git a/docs/en/reference/nn/modules/transformer.md b/docs/en/reference/nn/modules/transformer.md new file mode 100644 index 0000000..91904be --- /dev/null +++ b/docs/en/reference/nn/modules/transformer.md @@ -0,0 +1,52 @@ +--- +description: Learn about Ultralytics transformer encoder, layer, MLP block, LayerNorm2d and the deformable transformer decoder layer. Expand your understanding of these crucial AI modules. +keywords: Ultralytics, Ultralytics documentation, TransformerEncoderLayer, TransformerLayer, MLPBlock, LayerNorm2d, DeformableTransformerDecoderLayer +--- + +# Reference for `ultralytics/nn/modules/transformer.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/transformer.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/transformer.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/transformer.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.nn.modules.transformer.TransformerEncoderLayer + +

+ +## ::: ultralytics.nn.modules.transformer.AIFI + +

+ +## ::: ultralytics.nn.modules.transformer.TransformerLayer + +

+ +## ::: ultralytics.nn.modules.transformer.TransformerBlock + +

+ +## ::: ultralytics.nn.modules.transformer.MLPBlock + +

+ +## ::: ultralytics.nn.modules.transformer.MLP + +

+ +## ::: ultralytics.nn.modules.transformer.LayerNorm2d + +

+ +## ::: ultralytics.nn.modules.transformer.MSDeformAttn + +

+ +## ::: ultralytics.nn.modules.transformer.DeformableTransformerDecoderLayer + +

+ +## ::: ultralytics.nn.modules.transformer.DeformableTransformerDecoder + +

diff --git a/docs/en/reference/nn/modules/utils.md b/docs/en/reference/nn/modules/utils.md new file mode 100644 index 0000000..25b0b1c --- /dev/null +++ b/docs/en/reference/nn/modules/utils.md @@ -0,0 +1,32 @@ +--- +description: Explore Ultralytics neural network utils, such as bias_init_with_prob, inverse_sigmoid and multi_scale_deformable_attn_pytorch functions. +keywords: Ultralytics, neural network, nn.modules.utils, bias_init_with_prob, inverse_sigmoid, multi_scale_deformable_attn_pytorch +--- + +# Reference for `ultralytics/nn/modules/utils.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/utils.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.nn.modules.utils._get_clones + +

+ +## ::: ultralytics.nn.modules.utils.bias_init_with_prob + +

+ +## ::: ultralytics.nn.modules.utils.linear_init_ + +

+ +## ::: ultralytics.nn.modules.utils.inverse_sigmoid + +

+ +## ::: ultralytics.nn.modules.utils.multi_scale_deformable_attn_pytorch + +

diff --git a/docs/en/reference/nn/tasks.md b/docs/en/reference/nn/tasks.md new file mode 100644 index 0000000..aa84d88 --- /dev/null +++ b/docs/en/reference/nn/tasks.md @@ -0,0 +1,72 @@ +--- +description: Dive into the intricacies of YOLO tasks.py. Learn about DetectionModel, PoseModel and more for powerful AI development. +keywords: Ultralytics, YOLO, nn tasks, DetectionModel, PoseModel, RTDETRDetectionModel, model weights, parse model, AI development +--- + +# Reference for `ultralytics/nn/tasks.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/tasks.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/tasks.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/tasks.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.nn.tasks.BaseModel + +

+ +## ::: ultralytics.nn.tasks.DetectionModel + +

+ +## ::: ultralytics.nn.tasks.SegmentationModel + +

+ +## ::: ultralytics.nn.tasks.PoseModel + +

+ +## ::: ultralytics.nn.tasks.ClassificationModel + +

+ +## ::: ultralytics.nn.tasks.RTDETRDetectionModel + +

+ +## ::: ultralytics.nn.tasks.Ensemble + +

+ +## ::: ultralytics.nn.tasks.temporary_modules + +

+ +## ::: ultralytics.nn.tasks.torch_safe_load + +

+ +## ::: ultralytics.nn.tasks.attempt_load_weights + +

+ +## ::: ultralytics.nn.tasks.attempt_load_one_weight + +

+ +## ::: ultralytics.nn.tasks.parse_model + +

+ +## ::: ultralytics.nn.tasks.yaml_model_load + +

+ +## ::: ultralytics.nn.tasks.guess_model_scale + +

+ +## ::: ultralytics.nn.tasks.guess_model_task + +

diff --git a/docs/en/reference/trackers/basetrack.md b/docs/en/reference/trackers/basetrack.md new file mode 100644 index 0000000..97f5c4a --- /dev/null +++ b/docs/en/reference/trackers/basetrack.md @@ -0,0 +1,20 @@ +--- +description: Get familiar with TrackState in Ultralytics. Learn how it is used in the BaseTrack of the Ultralytics tracker for enhanced functionality. +keywords: Ultralytics, TrackState, BaseTrack, Ultralytics tracker, Ultralytics documentation +--- + +# Reference for `ultralytics/trackers/basetrack.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/basetrack.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/basetrack.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/basetrack.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.trackers.basetrack.TrackState + +

+ +## ::: ultralytics.trackers.basetrack.BaseTrack + +

diff --git a/docs/en/reference/trackers/bot_sort.md b/docs/en/reference/trackers/bot_sort.md new file mode 100644 index 0000000..85dfeee --- /dev/null +++ b/docs/en/reference/trackers/bot_sort.md @@ -0,0 +1,20 @@ +--- +description: Master the use of Ultralytics BOTrack, a key component of the powerful Ultralytics tracking system. Learn to integrate and use BOTSORT in your projects. +keywords: Ultralytics, BOTSORT, BOTrack, tracking system, official documentation, machine learning, AI tracking +--- + +# Reference for `ultralytics/trackers/bot_sort.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/bot_sort.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/bot_sort.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/bot_sort.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.trackers.bot_sort.BOTrack + +

+ +## ::: ultralytics.trackers.bot_sort.BOTSORT + +

diff --git a/docs/en/reference/trackers/byte_tracker.md b/docs/en/reference/trackers/byte_tracker.md new file mode 100644 index 0000000..e240d44 --- /dev/null +++ b/docs/en/reference/trackers/byte_tracker.md @@ -0,0 +1,20 @@ +--- +description: Step-in to explore in-depth the functionalities of Ultralytics BYTETracker under STrack. Gain advanced feature insights to streamline your operations. +keywords: STrack, Ultralytics, BYTETracker, documentation, Ultralytics tracker, object tracking, YOLO +--- + +# Reference for `ultralytics/trackers/byte_tracker.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/byte_tracker.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/byte_tracker.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/byte_tracker.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.trackers.byte_tracker.STrack + +

+ +## ::: ultralytics.trackers.byte_tracker.BYTETracker + +

diff --git a/docs/en/reference/trackers/track.md b/docs/en/reference/trackers/track.md new file mode 100644 index 0000000..7f15130 --- /dev/null +++ b/docs/en/reference/trackers/track.md @@ -0,0 +1,24 @@ +--- +description: Explore Ultralytics documentation on prediction function starters & register trackers. Understand our code & its applications better. +keywords: Ultralytics, YOLO, on predict start, register tracker, prediction functions, documentation +--- + +# Reference for `ultralytics/trackers/track.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/track.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/track.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/track.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.trackers.track.on_predict_start + +

+ +## ::: ultralytics.trackers.track.on_predict_postprocess_end + +

+ +## ::: ultralytics.trackers.track.register_tracker + +

diff --git a/docs/en/reference/trackers/utils/gmc.md b/docs/en/reference/trackers/utils/gmc.md new file mode 100644 index 0000000..fa51e8e --- /dev/null +++ b/docs/en/reference/trackers/utils/gmc.md @@ -0,0 +1,16 @@ +--- +description: Explore the Ultralytics GMC tool in our comprehensive documentation. Learn how it works, best practices, and implementation advice. +keywords: Ultralytics, GMC utility, Ultralytics documentation, Ultralytics tracker, machine learning tools +--- + +# Reference for `ultralytics/trackers/utils/gmc.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/gmc.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/gmc.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/utils/gmc.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.trackers.utils.gmc.GMC + +

diff --git a/docs/en/reference/trackers/utils/kalman_filter.md b/docs/en/reference/trackers/utils/kalman_filter.md new file mode 100644 index 0000000..11063ef --- /dev/null +++ b/docs/en/reference/trackers/utils/kalman_filter.md @@ -0,0 +1,20 @@ +--- +description: Explore KalmanFilterXYAH, a key component of Ultralytics trackers. Understand its utilities and learn to leverage it in your own projects. +keywords: Ultralytics, KalmanFilterXYAH, tracker, documentation, guide +--- + +# Reference for `ultralytics/trackers/utils/kalman_filter.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/kalman_filter.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/kalman_filter.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/utils/kalman_filter.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.trackers.utils.kalman_filter.KalmanFilterXYAH + +

+ +## ::: ultralytics.trackers.utils.kalman_filter.KalmanFilterXYWH + +

diff --git a/docs/en/reference/trackers/utils/matching.md b/docs/en/reference/trackers/utils/matching.md new file mode 100644 index 0000000..fb7d326 --- /dev/null +++ b/docs/en/reference/trackers/utils/matching.md @@ -0,0 +1,28 @@ +--- +description: Explore in-depth guidance for using Ultralytics trackers utils matching, including merge_matches, linear_assignment, iou_distance, embedding_distance, fuse_motion, and fuse_score. +keywords: Ultralytics, Trackers Utils, Matching, merge_matches, linear_assignment, iou_distance, embedding_distance, fuse_motion, fuse_score, documentation +--- + +# Reference for `ultralytics/trackers/utils/matching.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/matching.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/matching.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/utils/matching.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.trackers.utils.matching.linear_assignment + +

+ +## ::: ultralytics.trackers.utils.matching.iou_distance + +

+ +## ::: ultralytics.trackers.utils.matching.embedding_distance + +

+ +## ::: ultralytics.trackers.utils.matching.fuse_score + +

diff --git a/docs/en/reference/utils/__init__.md b/docs/en/reference/utils/__init__.md new file mode 100644 index 0000000..43b245f --- /dev/null +++ b/docs/en/reference/utils/__init__.md @@ -0,0 +1,156 @@ +--- +description: Explore the Ultralytics Utils package, with handy functions like colorstr, yaml_save, set_logging & more, designed to enhance your coding experience. +keywords: Ultralytics, Utils, utilitarian functions, colorstr, yaml_save, set_logging, is_kaggle, is_docker, clean_url +--- + +# Reference for `ultralytics/utils/__init__.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/__init__.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/__init__.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/__init__.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.TQDM + +

+ +## ::: ultralytics.utils.SimpleClass + +

+ +## ::: ultralytics.utils.IterableSimpleNamespace + +

+ +## ::: ultralytics.utils.ThreadingLocked + +

+ +## ::: ultralytics.utils.TryExcept + +

+ +## ::: ultralytics.utils.SettingsManager + +

+ +## ::: ultralytics.utils.plt_settings + +

+ +## ::: ultralytics.utils.set_logging + +

+ +## ::: ultralytics.utils.emojis + +

+ +## ::: ultralytics.utils.yaml_save + +

+ +## ::: ultralytics.utils.yaml_load + +

+ +## ::: ultralytics.utils.yaml_print + +

+ +## ::: ultralytics.utils.is_ubuntu + +

+ +## ::: ultralytics.utils.is_colab + +

+ +## ::: ultralytics.utils.is_kaggle + +

+ +## ::: ultralytics.utils.is_jupyter + +

+ +## ::: ultralytics.utils.is_docker + +

+ +## ::: ultralytics.utils.is_online + +

+ +## ::: ultralytics.utils.is_pip_package + +

+ +## ::: ultralytics.utils.is_dir_writeable + +

+ +## ::: ultralytics.utils.is_pytest_running + +

+ +## ::: ultralytics.utils.is_github_action_running + +

+ +## ::: ultralytics.utils.is_git_dir + +

+ +## ::: ultralytics.utils.get_git_dir + +

+ +## ::: ultralytics.utils.get_git_origin_url + +

+ +## ::: ultralytics.utils.get_git_branch + +

+ +## ::: ultralytics.utils.get_default_args + +

+ +## ::: ultralytics.utils.get_ubuntu_version + +

+ +## ::: ultralytics.utils.get_user_config_dir + +

+ +## ::: ultralytics.utils.colorstr + +

+ +## ::: ultralytics.utils.remove_colorstr + +

+ +## ::: ultralytics.utils.threaded + +

+ +## ::: ultralytics.utils.set_sentry + +

+ +## ::: ultralytics.utils.deprecation_warn + +

+ +## ::: ultralytics.utils.clean_url + +

+ +## ::: ultralytics.utils.url2file + +

diff --git a/docs/en/reference/utils/autobatch.md b/docs/en/reference/utils/autobatch.md new file mode 100644 index 0000000..4dc132b --- /dev/null +++ b/docs/en/reference/utils/autobatch.md @@ -0,0 +1,20 @@ +--- +description: Explore Ultralytics documentation for check_train_batch_size utility in the autobatch module. Understand how it could improve your machine learning process. +keywords: Ultralytics, check_train_batch_size, autobatch, utility, machine learning, documentation +--- + +# Reference for `ultralytics/utils/autobatch.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/autobatch.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/autobatch.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/autobatch.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.autobatch.check_train_batch_size + +

+ +## ::: ultralytics.utils.autobatch.autobatch + +

diff --git a/docs/en/reference/utils/benchmarks.md b/docs/en/reference/utils/benchmarks.md new file mode 100644 index 0000000..76d4885 --- /dev/null +++ b/docs/en/reference/utils/benchmarks.md @@ -0,0 +1,20 @@ +--- +description: Discover how to profile your models using Ultralytics utilities. Enhance performance, optimize your benchmarks, and learn best practices. +keywords: Ultralytics, ProfileModels, benchmarks, model profiling, performance optimization +--- + +# Reference for `ultralytics/utils/benchmarks.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/benchmarks.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/benchmarks.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/benchmarks.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.benchmarks.ProfileModels + +

+ +## ::: ultralytics.utils.benchmarks.benchmark + +

diff --git a/docs/en/reference/utils/callbacks/base.md b/docs/en/reference/utils/callbacks/base.md new file mode 100644 index 0000000..609e34b --- /dev/null +++ b/docs/en/reference/utils/callbacks/base.md @@ -0,0 +1,120 @@ +--- +description: Explore how to use the on-train, on-validation, on-pretrain, and on-predict callbacks in Ultralytics. Learn to update params, save models, and add integration callbacks. +keywords: Ultralytics, Callbacks, On-train, On-validation, On-pretrain, On-predict, Parameters update, Model saving, Integration callbacks +--- + +# Reference for `ultralytics/utils/callbacks/base.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/base.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/base.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/base.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.callbacks.base.on_pretrain_routine_start + +

+ +## ::: ultralytics.utils.callbacks.base.on_pretrain_routine_end + +

+ +## ::: ultralytics.utils.callbacks.base.on_train_start + +

+ +## ::: ultralytics.utils.callbacks.base.on_train_epoch_start + +

+ +## ::: ultralytics.utils.callbacks.base.on_train_batch_start + +

+ +## ::: ultralytics.utils.callbacks.base.optimizer_step + +

+ +## ::: ultralytics.utils.callbacks.base.on_before_zero_grad + +

+ +## ::: ultralytics.utils.callbacks.base.on_train_batch_end + +

+ +## ::: ultralytics.utils.callbacks.base.on_train_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.base.on_fit_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.base.on_model_save + +

+ +## ::: ultralytics.utils.callbacks.base.on_train_end + +

+ +## ::: ultralytics.utils.callbacks.base.on_params_update + +

+ +## ::: ultralytics.utils.callbacks.base.teardown + +

+ +## ::: ultralytics.utils.callbacks.base.on_val_start + +

+ +## ::: ultralytics.utils.callbacks.base.on_val_batch_start + +

+ +## ::: ultralytics.utils.callbacks.base.on_val_batch_end + +

+ +## ::: ultralytics.utils.callbacks.base.on_val_end + +

+ +## ::: ultralytics.utils.callbacks.base.on_predict_start + +

+ +## ::: ultralytics.utils.callbacks.base.on_predict_batch_start + +

+ +## ::: ultralytics.utils.callbacks.base.on_predict_batch_end + +

+ +## ::: ultralytics.utils.callbacks.base.on_predict_postprocess_end + +

+ +## ::: ultralytics.utils.callbacks.base.on_predict_end + +

+ +## ::: ultralytics.utils.callbacks.base.on_export_start + +

+ +## ::: ultralytics.utils.callbacks.base.on_export_end + +

+ +## ::: ultralytics.utils.callbacks.base.get_default_callbacks + +

+ +## ::: ultralytics.utils.callbacks.base.add_integration_callbacks + +

diff --git a/docs/en/reference/utils/callbacks/clearml.md b/docs/en/reference/utils/callbacks/clearml.md new file mode 100644 index 0000000..a05b08c --- /dev/null +++ b/docs/en/reference/utils/callbacks/clearml.md @@ -0,0 +1,40 @@ +--- +description: Uncover the specifics of Ultralytics ClearML callbacks, from pretrain routine start to training end. Boost your ML model performance. +keywords: Ultralytics, clearML, callbacks, pretrain routine start, validation end, train epoch end, training end +--- + +# Reference for `ultralytics/utils/callbacks/clearml.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/clearml.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/clearml.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/clearml.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.callbacks.clearml._log_debug_samples + +

+ +## ::: ultralytics.utils.callbacks.clearml._log_plot + +

+ +## ::: ultralytics.utils.callbacks.clearml.on_pretrain_routine_start + +

+ +## ::: ultralytics.utils.callbacks.clearml.on_train_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.clearml.on_fit_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.clearml.on_val_end + +

+ +## ::: ultralytics.utils.callbacks.clearml.on_train_end + +

diff --git a/docs/en/reference/utils/callbacks/comet.md b/docs/en/reference/utils/callbacks/comet.md new file mode 100644 index 0000000..2ba62fa --- /dev/null +++ b/docs/en/reference/utils/callbacks/comet.md @@ -0,0 +1,108 @@ +--- +description: Explore comprehensive documentation for utilising Comet Callbacks in Ultralytics. Learn to optimise training, logging, and experiment workflows. +keywords: Ultralytics, Comet Callbacks, Training optimisation, Logging, Experiment Workflows +--- + +# Reference for `ultralytics/utils/callbacks/comet.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/comet.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/comet.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/comet.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.callbacks.comet._get_comet_mode + +

+ +## ::: ultralytics.utils.callbacks.comet._get_comet_model_name + +

+ +## ::: ultralytics.utils.callbacks.comet._get_eval_batch_logging_interval + +

+ +## ::: ultralytics.utils.callbacks.comet._get_max_image_predictions_to_log + +

+ +## ::: ultralytics.utils.callbacks.comet._scale_confidence_score + +

+ +## ::: ultralytics.utils.callbacks.comet._should_log_confusion_matrix + +

+ +## ::: ultralytics.utils.callbacks.comet._should_log_image_predictions + +

+ +## ::: ultralytics.utils.callbacks.comet._get_experiment_type + +

+ +## ::: ultralytics.utils.callbacks.comet._create_experiment + +

+ +## ::: ultralytics.utils.callbacks.comet._fetch_trainer_metadata + +

+ +## ::: ultralytics.utils.callbacks.comet._scale_bounding_box_to_original_image_shape + +

+ +## ::: ultralytics.utils.callbacks.comet._format_ground_truth_annotations_for_detection + +

+ +## ::: ultralytics.utils.callbacks.comet._format_prediction_annotations_for_detection + +

+ +## ::: ultralytics.utils.callbacks.comet._fetch_annotations + +

+ +## ::: ultralytics.utils.callbacks.comet._create_prediction_metadata_map + +

+ +## ::: ultralytics.utils.callbacks.comet._log_confusion_matrix + +

+ +## ::: ultralytics.utils.callbacks.comet._log_images + +

+ +## ::: ultralytics.utils.callbacks.comet._log_image_predictions + +

+ +## ::: ultralytics.utils.callbacks.comet._log_plots + +

+ +## ::: ultralytics.utils.callbacks.comet._log_model + +

+ +## ::: ultralytics.utils.callbacks.comet.on_pretrain_routine_start + +

+ +## ::: ultralytics.utils.callbacks.comet.on_train_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.comet.on_fit_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.comet.on_train_end + +

diff --git a/docs/en/reference/utils/callbacks/dvc.md b/docs/en/reference/utils/callbacks/dvc.md new file mode 100644 index 0000000..a325989 --- /dev/null +++ b/docs/en/reference/utils/callbacks/dvc.md @@ -0,0 +1,48 @@ +--- +description: Browse through Ultralytics YOLO docs to learn about important logging and callback functions used in training and pretraining models. +keywords: Ultralytics, YOLO, callbacks, logger, training, pretraining, machine learning, models +--- + +# Reference for `ultralytics/utils/callbacks/dvc.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/dvc.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/dvc.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/dvc.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.callbacks.dvc._log_images + +

+ +## ::: ultralytics.utils.callbacks.dvc._log_plots + +

+ +## ::: ultralytics.utils.callbacks.dvc._log_confusion_matrix + +

+ +## ::: ultralytics.utils.callbacks.dvc.on_pretrain_routine_start + +

+ +## ::: ultralytics.utils.callbacks.dvc.on_pretrain_routine_end + +

+ +## ::: ultralytics.utils.callbacks.dvc.on_train_start + +

+ +## ::: ultralytics.utils.callbacks.dvc.on_train_epoch_start + +

+ +## ::: ultralytics.utils.callbacks.dvc.on_fit_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.dvc.on_train_end + +

diff --git a/docs/en/reference/utils/callbacks/hub.md b/docs/en/reference/utils/callbacks/hub.md new file mode 100644 index 0000000..3735e90 --- /dev/null +++ b/docs/en/reference/utils/callbacks/hub.md @@ -0,0 +1,44 @@ +--- +description: Explore the detailed information on key Ultralytics callbacks such as on_pretrain_routine_end, on_model_save, on_train_start, and on_predict_start. +keywords: Ultralytics, callbacks, on_pretrain_routine_end, on_model_save, on_train_start, on_predict_start, hub, training +--- + +# Reference for `ultralytics/utils/callbacks/hub.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/hub.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/hub.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/hub.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.callbacks.hub.on_pretrain_routine_end + +

+ +## ::: ultralytics.utils.callbacks.hub.on_fit_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.hub.on_model_save + +

+ +## ::: ultralytics.utils.callbacks.hub.on_train_end + +

+ +## ::: ultralytics.utils.callbacks.hub.on_train_start + +

+ +## ::: ultralytics.utils.callbacks.hub.on_val_start + +

+ +## ::: ultralytics.utils.callbacks.hub.on_predict_start + +

+ +## ::: ultralytics.utils.callbacks.hub.on_export_start + +

diff --git a/docs/en/reference/utils/callbacks/mlflow.md b/docs/en/reference/utils/callbacks/mlflow.md new file mode 100644 index 0000000..2abdb3d --- /dev/null +++ b/docs/en/reference/utils/callbacks/mlflow.md @@ -0,0 +1,24 @@ +--- +description: Understand routines at the end of pre-training and training in Ultralytics. Elevate your MLflow callbacks expertise. +keywords: Ultralytics, MLflow, Callbacks, on_pretrain_routine_end, on_train_end, Machine Learning, Training +--- + +# Reference for `ultralytics/utils/callbacks/mlflow.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/mlflow.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/mlflow.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/mlflow.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.callbacks.mlflow.on_pretrain_routine_end + +

+ +## ::: ultralytics.utils.callbacks.mlflow.on_fit_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.mlflow.on_train_end + +

diff --git a/docs/en/reference/utils/callbacks/neptune.md b/docs/en/reference/utils/callbacks/neptune.md new file mode 100644 index 0000000..82906ac --- /dev/null +++ b/docs/en/reference/utils/callbacks/neptune.md @@ -0,0 +1,44 @@ +--- +description: Explore exhaustive details about Ultralytics callbacks in Neptune, with specifics about scalar logging, routine start, and more. +keywords: Ultralytics, Neptune callbacks, on_train_epoch_end, on_val_end, _log_plot, _log_images, on_pretrain_routine_start, on_fit_epoch_end, on_train_end +--- + +# Reference for `ultralytics/utils/callbacks/neptune.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/neptune.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/neptune.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/neptune.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.callbacks.neptune._log_scalars + +

+ +## ::: ultralytics.utils.callbacks.neptune._log_images + +

+ +## ::: ultralytics.utils.callbacks.neptune._log_plot + +

+ +## ::: ultralytics.utils.callbacks.neptune.on_pretrain_routine_start + +

+ +## ::: ultralytics.utils.callbacks.neptune.on_train_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.neptune.on_fit_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.neptune.on_val_end + +

+ +## ::: ultralytics.utils.callbacks.neptune.on_train_end + +

diff --git a/docs/en/reference/utils/callbacks/raytune.md b/docs/en/reference/utils/callbacks/raytune.md new file mode 100644 index 0000000..2ed6191 --- /dev/null +++ b/docs/en/reference/utils/callbacks/raytune.md @@ -0,0 +1,16 @@ +--- +description: Discover the functionality of the on_fit_epoch_end callback in the Ultralytics YOLO framework. Learn how to end an epoch in your deep learning projects. +keywords: Ultralytics, YOLO, on_fit_epoch_end, callbacks, documentation, deep learning, YOLO framework +--- + +# Reference for `ultralytics/utils/callbacks/raytune.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/raytune.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/raytune.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/raytune.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.callbacks.raytune.on_fit_epoch_end + +

diff --git a/docs/en/reference/utils/callbacks/tensorboard.md b/docs/en/reference/utils/callbacks/tensorboard.md new file mode 100644 index 0000000..075c936 --- /dev/null +++ b/docs/en/reference/utils/callbacks/tensorboard.md @@ -0,0 +1,36 @@ +--- +description: Explore Ultralytics YOLO Docs for a deep understanding of log_scalars, on_batch_end & other callback utilities embedded in the tensorboard module. +keywords: Ultralytics, YOLO, documentation, callback utilities, log_scalars, on_batch_end, tensorboard +--- + +# Reference for `ultralytics/utils/callbacks/tensorboard.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/tensorboard.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/tensorboard.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/tensorboard.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.callbacks.tensorboard._log_scalars + +

+ +## ::: ultralytics.utils.callbacks.tensorboard._log_tensorboard_graph + +

+ +## ::: ultralytics.utils.callbacks.tensorboard.on_pretrain_routine_start + +

+ +## ::: ultralytics.utils.callbacks.tensorboard.on_train_start + +

+ +## ::: ultralytics.utils.callbacks.tensorboard.on_batch_end + +

+ +## ::: ultralytics.utils.callbacks.tensorboard.on_fit_epoch_end + +

diff --git a/docs/en/reference/utils/callbacks/wb.md b/docs/en/reference/utils/callbacks/wb.md new file mode 100644 index 0000000..59a2088 --- /dev/null +++ b/docs/en/reference/utils/callbacks/wb.md @@ -0,0 +1,40 @@ +--- +description: Deep dive into Ultralytics callbacks. Learn how to use the _log_plots, on_fit_epoch_end, and on_train_end functions effectively. +keywords: Ultralytics, callbacks, _log_plots, on_fit_epoch_end, on_train_end +--- + +# Reference for `ultralytics/utils/callbacks/wb.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/wb.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.callbacks.wb._custom_table + +

+ +## ::: ultralytics.utils.callbacks.wb._plot_curve + +

+ +## ::: ultralytics.utils.callbacks.wb._log_plots + +

+ +## ::: ultralytics.utils.callbacks.wb.on_pretrain_routine_start + +

+ +## ::: ultralytics.utils.callbacks.wb.on_fit_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.wb.on_train_epoch_end + +

+ +## ::: ultralytics.utils.callbacks.wb.on_train_end + +

diff --git a/docs/en/reference/utils/checks.md b/docs/en/reference/utils/checks.md new file mode 100644 index 0000000..f2e4ed9 --- /dev/null +++ b/docs/en/reference/utils/checks.md @@ -0,0 +1,112 @@ +--- +description: Learn about our routine checks that safeguard Ultralytics operations including ASCII, font, YOLO file, YAML, Python and torchvision checks. +keywords: Ultralytics, utility checks, ASCII, check_version, pip_update, check_python, check_torchvision, check_yaml, YOLO filename +--- + +# Reference for `ultralytics/utils/checks.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/checks.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/checks.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/checks.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.checks.parse_requirements + +

+ +## ::: ultralytics.utils.checks.parse_version + +

+ +## ::: ultralytics.utils.checks.is_ascii + +

+ +## ::: ultralytics.utils.checks.check_imgsz + +

+ +## ::: ultralytics.utils.checks.check_version + +

+ +## ::: ultralytics.utils.checks.check_latest_pypi_version + +

+ +## ::: ultralytics.utils.checks.check_pip_update_available + +

+ +## ::: ultralytics.utils.checks.check_font + +

+ +## ::: ultralytics.utils.checks.check_python + +

+ +## ::: ultralytics.utils.checks.check_requirements + +

+ +## ::: ultralytics.utils.checks.check_torchvision + +

+ +## ::: ultralytics.utils.checks.check_suffix + +

+ +## ::: ultralytics.utils.checks.check_yolov5u_filename + +

+ +## ::: ultralytics.utils.checks.check_model_file_from_stem + +

+ +## ::: ultralytics.utils.checks.check_file + +

+ +## ::: ultralytics.utils.checks.check_yaml + +

+ +## ::: ultralytics.utils.checks.check_is_path_safe + +

+ +## ::: ultralytics.utils.checks.check_imshow + +

+ +## ::: ultralytics.utils.checks.check_yolo + +

+ +## ::: ultralytics.utils.checks.collect_system_info + +

+ +## ::: ultralytics.utils.checks.check_amp + +

+ +## ::: ultralytics.utils.checks.git_describe + +

+ +## ::: ultralytics.utils.checks.print_args + +

+ +## ::: ultralytics.utils.checks.cuda_device_count + +

+ +## ::: ultralytics.utils.checks.cuda_is_available + +

diff --git a/docs/en/reference/utils/dist.md b/docs/en/reference/utils/dist.md new file mode 100644 index 0000000..1ca18e3 --- /dev/null +++ b/docs/en/reference/utils/dist.md @@ -0,0 +1,28 @@ +--- +description: Discover the role of dist.find_free_network_port & dist.generate_ddp_command in Ultralytics DDP utilities. Use our guide for efficient deployment. +keywords: Ultralytics, DDP, DDP utility functions, Distributed Data Processing, find free network port, generate DDP command +--- + +# Reference for `ultralytics/utils/dist.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/dist.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/dist.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/dist.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.dist.find_free_network_port + +

+ +## ::: ultralytics.utils.dist.generate_ddp_file + +

+ +## ::: ultralytics.utils.dist.generate_ddp_command + +

+ +## ::: ultralytics.utils.dist.ddp_cleanup + +

diff --git a/docs/en/reference/utils/downloads.md b/docs/en/reference/utils/downloads.md new file mode 100644 index 0000000..aeef4c9 --- /dev/null +++ b/docs/en/reference/utils/downloads.md @@ -0,0 +1,52 @@ +--- +description: Learn about the download utilities in Ultralytics YOLO, featuring functions like is_url, check_disk_space, get_github_assets, and download. +keywords: Ultralytics, YOLO, download utilities, is_url, check_disk_space, get_github_assets, download, documentation +--- + +# Reference for `ultralytics/utils/downloads.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/downloads.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/downloads.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/downloads.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.downloads.is_url + +

+ +## ::: ultralytics.utils.downloads.delete_dsstore + +

+ +## ::: ultralytics.utils.downloads.zip_directory + +

+ +## ::: ultralytics.utils.downloads.unzip_file + +

+ +## ::: ultralytics.utils.downloads.check_disk_space + +

+ +## ::: ultralytics.utils.downloads.get_google_drive_file_info + +

+ +## ::: ultralytics.utils.downloads.safe_download + +

+ +## ::: ultralytics.utils.downloads.get_github_assets + +

+ +## ::: ultralytics.utils.downloads.attempt_download_asset + +

+ +## ::: ultralytics.utils.downloads.download + +

diff --git a/docs/en/reference/utils/errors.md b/docs/en/reference/utils/errors.md new file mode 100644 index 0000000..816328a --- /dev/null +++ b/docs/en/reference/utils/errors.md @@ -0,0 +1,16 @@ +--- +description: Learn about the HUBModelError in Ultralytics. Enhance your understanding, troubleshoot errors and optimize your machine learning projects. +keywords: Ultralytics, HUBModelError, Machine Learning, Error troubleshooting, Ultralytics documentation +--- + +# Reference for `ultralytics/utils/errors.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/errors.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/errors.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/errors.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.errors.HUBModelError + +

diff --git a/docs/en/reference/utils/files.md b/docs/en/reference/utils/files.md new file mode 100644 index 0000000..586373b --- /dev/null +++ b/docs/en/reference/utils/files.md @@ -0,0 +1,40 @@ +--- +description: Discover how to use Ultralytics utility functions for file-related operations including incrementing paths, finding file age, checking file size and creating directories. +keywords: Ultralytics, utility functions, file operations, working directory, file age, file size, create directories +--- + +# Reference for `ultralytics/utils/files.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/files.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/files.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/files.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.files.WorkingDirectory + +

+ +## ::: ultralytics.utils.files.spaces_in_path + +

+ +## ::: ultralytics.utils.files.increment_path + +

+ +## ::: ultralytics.utils.files.file_age + +

+ +## ::: ultralytics.utils.files.file_date + +

+ +## ::: ultralytics.utils.files.file_size + +

+ +## ::: ultralytics.utils.files.get_latest_run + +

diff --git a/docs/en/reference/utils/instance.md b/docs/en/reference/utils/instance.md new file mode 100644 index 0000000..6771cd8 --- /dev/null +++ b/docs/en/reference/utils/instance.md @@ -0,0 +1,24 @@ +--- +description: Dive into Ultralytics detailed utility guide. Learn about Bboxes, _ntuple and more from Ultralytics utils.instance module. +keywords: Ultralytics, Bboxes, _ntuple, utility, ultralytics utils.instance +--- + +# Reference for `ultralytics/utils/instance.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/instance.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/instance.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/instance.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.instance.Bboxes + +

+ +## ::: ultralytics.utils.instance.Instances + +

+ +## ::: ultralytics.utils.instance._ntuple + +

diff --git a/docs/en/reference/utils/loss.md b/docs/en/reference/utils/loss.md new file mode 100644 index 0000000..922ad6c --- /dev/null +++ b/docs/en/reference/utils/loss.md @@ -0,0 +1,44 @@ +--- +description: Explore Ultralytics' versatile loss functions - VarifocalLoss, BboxLoss, v8DetectionLoss, v8PoseLoss. Improve your accuracy on YOLO implementations. +keywords: Ultralytics, Loss functions, VarifocalLoss, BboxLoss, v8DetectionLoss, v8PoseLoss, YOLO, Ultralytics Documentation +--- + +# Reference for `ultralytics/utils/loss.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/loss.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/loss.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/loss.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.loss.VarifocalLoss + +

+ +## ::: ultralytics.utils.loss.FocalLoss + +

+ +## ::: ultralytics.utils.loss.BboxLoss + +

+ +## ::: ultralytics.utils.loss.KeypointLoss + +

+ +## ::: ultralytics.utils.loss.v8DetectionLoss + +

+ +## ::: ultralytics.utils.loss.v8SegmentationLoss + +

+ +## ::: ultralytics.utils.loss.v8PoseLoss + +

+ +## ::: ultralytics.utils.loss.v8ClassificationLoss + +

diff --git a/docs/en/reference/utils/metrics.md b/docs/en/reference/utils/metrics.md new file mode 100644 index 0000000..3154a73 --- /dev/null +++ b/docs/en/reference/utils/metrics.md @@ -0,0 +1,80 @@ +--- +description: Explore Ultralytics YOLO metrics tools - from confusion matrix, detection metrics, pose metrics to box IOU. Learn how to compute and plot precision-recall curves. +keywords: Ultralytics, YOLO, YOLOv3, YOLOv4, metrics, confusion matrix, detection metrics, pose metrics, box IOU, mask IOU, plot precision-recall curves, compute average precision +--- + +# Reference for `ultralytics/utils/metrics.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/metrics.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/metrics.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/metrics.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.metrics.ConfusionMatrix + +

+ +## ::: ultralytics.utils.metrics.Metric + +

+ +## ::: ultralytics.utils.metrics.DetMetrics + +

+ +## ::: ultralytics.utils.metrics.SegmentMetrics + +

+ +## ::: ultralytics.utils.metrics.PoseMetrics + +

+ +## ::: ultralytics.utils.metrics.ClassifyMetrics + +

+ +## ::: ultralytics.utils.metrics.bbox_ioa + +

+ +## ::: ultralytics.utils.metrics.box_iou + +

+ +## ::: ultralytics.utils.metrics.bbox_iou + +

+ +## ::: ultralytics.utils.metrics.mask_iou + +

+ +## ::: ultralytics.utils.metrics.kpt_iou + +

+ +## ::: ultralytics.utils.metrics.smooth_BCE + +

+ +## ::: ultralytics.utils.metrics.smooth + +

+ +## ::: ultralytics.utils.metrics.plot_pr_curve + +

+ +## ::: ultralytics.utils.metrics.plot_mc_curve + +

+ +## ::: ultralytics.utils.metrics.compute_ap + +

+ +## ::: ultralytics.utils.metrics.ap_per_class + +

diff --git a/docs/en/reference/utils/ops.md b/docs/en/reference/utils/ops.md new file mode 100644 index 0000000..c366fd9 --- /dev/null +++ b/docs/en/reference/utils/ops.md @@ -0,0 +1,128 @@ +--- +description: Explore detailed documentation for Ultralytics utility operations. Learn about methods like segment2box, make_divisible, clip_boxes, and many more. +keywords: Ultralytics YOLO, Utility Operations, segment2box, make_divisible, clip_boxes, scale_image, xywh2xyxy, xyxy2xywhn, xywh2ltwh, ltwh2xywh, segments2boxes, crop_mask, process_mask, scale_masks, masks2segments +--- + +# Reference for `ultralytics/utils/ops.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/ops.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.ops.Profile + +

+ +## ::: ultralytics.utils.ops.segment2box + +

+ +## ::: ultralytics.utils.ops.scale_boxes + +

+ +## ::: ultralytics.utils.ops.make_divisible + +

+ +## ::: ultralytics.utils.ops.non_max_suppression + +

+ +## ::: ultralytics.utils.ops.clip_boxes + +

+ +## ::: ultralytics.utils.ops.clip_coords + +

+ +## ::: ultralytics.utils.ops.scale_image + +

+ +## ::: ultralytics.utils.ops.xyxy2xywh + +

+ +## ::: ultralytics.utils.ops.xywh2xyxy + +

+ +## ::: ultralytics.utils.ops.xywhn2xyxy + +

+ +## ::: ultralytics.utils.ops.xyxy2xywhn + +

+ +## ::: ultralytics.utils.ops.xywh2ltwh + +

+ +## ::: ultralytics.utils.ops.xyxy2ltwh + +

+ +## ::: ultralytics.utils.ops.ltwh2xywh + +

+ +## ::: ultralytics.utils.ops.xyxyxyxy2xywhr + +

+ +## ::: ultralytics.utils.ops.xywhr2xyxyxyxy + +

+ +## ::: ultralytics.utils.ops.ltwh2xyxy + +

+ +## ::: ultralytics.utils.ops.segments2boxes + +

+ +## ::: ultralytics.utils.ops.resample_segments + +

+ +## ::: ultralytics.utils.ops.crop_mask + +

+ +## ::: ultralytics.utils.ops.process_mask_upsample + +

+ +## ::: ultralytics.utils.ops.process_mask + +

+ +## ::: ultralytics.utils.ops.process_mask_native + +

+ +## ::: ultralytics.utils.ops.scale_masks + +

+ +## ::: ultralytics.utils.ops.scale_coords + +

+ +## ::: ultralytics.utils.ops.masks2segments + +

+ +## ::: ultralytics.utils.ops.convert_torch2numpy_batch + +

+ +## ::: ultralytics.utils.ops.clean_str + +

diff --git a/docs/en/reference/utils/patches.md b/docs/en/reference/utils/patches.md new file mode 100644 index 0000000..fdcf394 --- /dev/null +++ b/docs/en/reference/utils/patches.md @@ -0,0 +1,28 @@ +--- +description: Learn about Ultralytics utils patches including imread, imshow and torch_save. Enhance your image processing skills. +keywords: Ultralytics, Utils, Patches, imread, imshow, torch_save, image processing +--- + +# Reference for `ultralytics/utils/patches.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/patches.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/patches.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/patches.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.patches.imread + +

+ +## ::: ultralytics.utils.patches.imwrite + +

+ +## ::: ultralytics.utils.patches.imshow + +

+ +## ::: ultralytics.utils.patches.torch_save + +

diff --git a/docs/en/reference/utils/plotting.md b/docs/en/reference/utils/plotting.md new file mode 100644 index 0000000..b465af4 --- /dev/null +++ b/docs/en/reference/utils/plotting.md @@ -0,0 +1,52 @@ +--- +description: Master advanced plotting utils from Ultralytics including color annotations, label and image plotting, and feature visualization. +keywords: Ultralytics, plotting, utils, color annotation, label plotting, image plotting, feature visualization +--- + +# Reference for `ultralytics/utils/plotting.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/plotting.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/plotting.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/plotting.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.plotting.Colors + +

+ +## ::: ultralytics.utils.plotting.Annotator + +

+ +## ::: ultralytics.utils.plotting.plot_labels + +

+ +## ::: ultralytics.utils.plotting.save_one_box + +

+ +## ::: ultralytics.utils.plotting.plot_images + +

+ +## ::: ultralytics.utils.plotting.plot_results + +

+ +## ::: ultralytics.utils.plotting.plt_color_scatter + +

+ +## ::: ultralytics.utils.plotting.plot_tune_results + +

+ +## ::: ultralytics.utils.plotting.output_to_target + +

+ +## ::: ultralytics.utils.plotting.feature_visualization + +

diff --git a/docs/en/reference/utils/tal.md b/docs/en/reference/utils/tal.md new file mode 100644 index 0000000..6519d35 --- /dev/null +++ b/docs/en/reference/utils/tal.md @@ -0,0 +1,36 @@ +--- +description: Explore Ultralytics utilities for optimized task assignment, bounding box creation, and distance calculation. Learn more about algorithm implementations. +keywords: Ultralytics, task aligned assigner, select highest overlaps, make anchors, dist2bbox, bbox2dist, utilities, algorithm +--- + +# Reference for `ultralytics/utils/tal.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/tal.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/tal.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/tal.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.tal.TaskAlignedAssigner + +

+ +## ::: ultralytics.utils.tal.select_candidates_in_gts + +

+ +## ::: ultralytics.utils.tal.select_highest_overlaps + +

+ +## ::: ultralytics.utils.tal.make_anchors + +

+ +## ::: ultralytics.utils.tal.dist2bbox + +

+ +## ::: ultralytics.utils.tal.bbox2dist + +

diff --git a/docs/en/reference/utils/torch_utils.md b/docs/en/reference/utils/torch_utils.md new file mode 100644 index 0000000..5c88f29 --- /dev/null +++ b/docs/en/reference/utils/torch_utils.md @@ -0,0 +1,120 @@ +--- +description: Explore Ultralytics-tailored torch utility features like Model EMA, early stopping, smart inference, image scaling, get_flops, and many more. +keywords: Ultralytics, Torch Utils, Model EMA, Early Stopping, Smart Inference, Get CPU Info, Time Sync, Fuse Deconv and bn, Get num params, Get FLOPs, Scale img, Copy attr, Intersect dicts, De_parallel, Init seeds, Profile +--- + +# Reference for `ultralytics/utils/torch_utils.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/torch_utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/torch_utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/torch_utils.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.torch_utils.ModelEMA + +

+ +## ::: ultralytics.utils.torch_utils.EarlyStopping + +

+ +## ::: ultralytics.utils.torch_utils.torch_distributed_zero_first + +

+ +## ::: ultralytics.utils.torch_utils.smart_inference_mode + +

+ +## ::: ultralytics.utils.torch_utils.get_cpu_info + +

+ +## ::: ultralytics.utils.torch_utils.select_device + +

+ +## ::: ultralytics.utils.torch_utils.time_sync + +

+ +## ::: ultralytics.utils.torch_utils.fuse_conv_and_bn + +

+ +## ::: ultralytics.utils.torch_utils.fuse_deconv_and_bn + +

+ +## ::: ultralytics.utils.torch_utils.model_info + +

+ +## ::: ultralytics.utils.torch_utils.get_num_params + +

+ +## ::: ultralytics.utils.torch_utils.get_num_gradients + +

+ +## ::: ultralytics.utils.torch_utils.model_info_for_loggers + +

+ +## ::: ultralytics.utils.torch_utils.get_flops + +

+ +## ::: ultralytics.utils.torch_utils.get_flops_with_torch_profiler + +

+ +## ::: ultralytics.utils.torch_utils.initialize_weights + +

+ +## ::: ultralytics.utils.torch_utils.scale_img + +

+ +## ::: ultralytics.utils.torch_utils.make_divisible + +

+ +## ::: ultralytics.utils.torch_utils.copy_attr + +

+ +## ::: ultralytics.utils.torch_utils.get_latest_opset + +

+ +## ::: ultralytics.utils.torch_utils.intersect_dicts + +

+ +## ::: ultralytics.utils.torch_utils.is_parallel + +

+ +## ::: ultralytics.utils.torch_utils.de_parallel + +

+ +## ::: ultralytics.utils.torch_utils.one_cycle + +

+ +## ::: ultralytics.utils.torch_utils.init_seeds + +

+ +## ::: ultralytics.utils.torch_utils.strip_optimizer + +

+ +## ::: ultralytics.utils.torch_utils.profile + +

diff --git a/docs/en/reference/utils/triton.md b/docs/en/reference/utils/triton.md new file mode 100644 index 0000000..6a86de6 --- /dev/null +++ b/docs/en/reference/utils/triton.md @@ -0,0 +1,16 @@ +--- +description: Deploy ML models effortlessly with Ultralytics TritonRemoteModel. Simplify serving with our comprehensive utils guide. +keywords: Ultralytics, YOLO, TritonRemoteModel, machine learning, model serving, deployment, utils, documentation +--- + +# Reference for `ultralytics/utils/triton.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/triton.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/triton.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/triton.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.triton.TritonRemoteModel + +

diff --git a/docs/en/reference/utils/tuner.md b/docs/en/reference/utils/tuner.md new file mode 100644 index 0000000..d05f83d --- /dev/null +++ b/docs/en/reference/utils/tuner.md @@ -0,0 +1,16 @@ +--- +description: Learn to utilize the run_ray_tune function with Ultralytics. Make your machine learning tuning process easier and more efficient. +keywords: Ultralytics, run_ray_tune, machine learning tuning, machine learning efficiency +--- + +# Reference for `ultralytics/utils/tuner.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/tuner.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/tuner.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/tuner.py) 🛠️. Thank you 🙏! + +

+ +## ::: ultralytics.utils.tuner.run_ray_tune + +

diff --git a/docs/en/robots.txt b/docs/en/robots.txt new file mode 100644 index 0000000..6d80eae --- /dev/null +++ b/docs/en/robots.txt @@ -0,0 +1,12 @@ +User-agent: * +Sitemap: http://docs.ultralytics.com/sitemap.xml +Sitemap: http://docs.ultralytics.com/ar/sitemap.xml +Sitemap: http://docs.ultralytics.com/de/sitemap.xml +Sitemap: http://docs.ultralytics.com/es/sitemap.xml +Sitemap: http://docs.ultralytics.com/fr/sitemap.xml +Sitemap: http://docs.ultralytics.com/hi/sitemap.xml +Sitemap: http://docs.ultralytics.com/ja/sitemap.xml +Sitemap: http://docs.ultralytics.com/ko/sitemap.xml +Sitemap: http://docs.ultralytics.com/pt/sitemap.xml +Sitemap: http://docs.ultralytics.com/ru/sitemap.xml +Sitemap: http://docs.ultralytics.com/zh/sitemap.xml diff --git a/docs/en/tasks/classify.md b/docs/en/tasks/classify.md new file mode 100644 index 0000000..0e89a87 --- /dev/null +++ b/docs/en/tasks/classify.md @@ -0,0 +1,173 @@ +--- +comments: true +description: Learn about YOLOv8 Classify models for image classification. Get detailed information on List of Pretrained Models & how to Train, Validate, Predict & Export models. +keywords: Ultralytics, YOLOv8, Image Classification, Pretrained Models, YOLOv8n-cls, Training, Validation, Prediction, Model Export +--- + +# Image Classification + +Image classification examples + +Image classification is the simplest of the three tasks and involves classifying an entire image into one of a set of predefined classes. + +The output of an image classifier is a single class label and a confidence score. Image classification is useful when you need to know only what class an image belongs to and don't need to know where objects of that class are located or what their exact shape is. + +!!! Tip "Tip" + + YOLOv8 Classify models use the `-cls` suffix, i.e. `yolov8n-cls.pt` and are pretrained on [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +YOLOv8 pretrained Classify models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. + +[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. + +| Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) at 640 | +|----------------------------------------------------------------------------------------------|-----------------------|------------------|------------------|--------------------------------|-------------------------------------|--------------------|--------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- **acc** values are model accuracies on the [ImageNet](https://www.image-net.org/) dataset validation set. +
Reproduce by `yolo val classify data=path/to/ImageNet device=0` +- **Speed** averaged over ImageNet val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + instance. +
Reproduce by `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` + +## Train + +Train YOLOv8n-cls on the MNIST160 dataset for 100 epochs at image size 64. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.yaml') # build a new model from YAML + model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # build from YAML and transfer weights + + # Train the model + results = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # Build a new model from YAML and start training from scratch + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # Start training from a pretrained *.pt model + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # Build a new model from YAML, transfer pretrained weights to it and start training + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### Dataset format + +YOLO classification dataset format can be found in detail in the [Dataset Guide](../datasets/classify/index.md). + +## Val + +Validate trained YOLOv8n-cls model accuracy on the MNIST160 dataset. No argument need to passed as the `model` retains it's training `data` and arguments as model attributes. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Validate the model + metrics = model.val() # no arguments needed, dataset and settings remembered + metrics.top1 # top1 accuracy + metrics.top5 # top5 accuracy + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # val official model + yolo classify val model=path/to/best.pt # val custom model + ``` + +## Predict + +Use a trained YOLOv8n-cls model to run predictions on images. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Predict with the model + results = model('https://ultralytics.com/images/bus.jpg') # predict on an image + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model + yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model + ``` + +See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page. + +## Export + +Export a YOLOv8n-cls model to a different format like ONNX, CoreML, etc. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-cls.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom trained model + + # Export the model + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # export official model + yolo export model=path/to/best.pt format=onnx # export custom trained model + ``` + +Available YOLOv8-cls export formats are in the table below. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-cls.onnx`. Usage examples are shown for your model after export completes. + +| Format | `format` Argument | Model | Metadata | Arguments | +|--------------------------------------------------------------------|-------------------|-------------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/en/tasks/detect.md b/docs/en/tasks/detect.md new file mode 100644 index 0000000..179692d --- /dev/null +++ b/docs/en/tasks/detect.md @@ -0,0 +1,185 @@ +--- +comments: true +description: Official documentation for YOLOv8 by Ultralytics. Learn how to train, validate, predict and export models in various formats. Including detailed performance stats. +keywords: YOLOv8, Ultralytics, object detection, pretrained models, training, validation, prediction, export models, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# Object Detection + +Object detection examples + +Object detection is a task that involves identifying the location and class of objects in an image or video stream. + +The output of an object detector is a set of bounding boxes that enclose the objects in the image, along with class labels and confidence scores for each box. Object detection is a good choice when you need to identify objects of interest in a scene, but don't need to know exactly where the object is or its exact shape. + +

+
+ +
+ Watch: Object Detection with Pre-trained Ultralytics YOLOv8 Model. +

+ +!!! Tip "Tip" + + YOLOv8 Detect models are the default YOLOv8 models, i.e. `yolov8n.pt` and are pretrained on [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +YOLOv8 pretrained Detect models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. + +[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. + +| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +|--------------------------------------------------------------------------------------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- **mAPval** values are for single-model single-scale on [COCO val2017](http://cocodataset.org) dataset. +
Reproduce by `yolo val detect data=coco.yaml device=0` +- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + instance. +
Reproduce by `yolo val detect data=coco128.yaml batch=1 device=0|cpu` + +## Train + +Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.yaml') # build a new model from YAML + model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # build from YAML and transfer weights + + # Train the model + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Build a new model from YAML and start training from scratch + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Start training from a pretrained *.pt model + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Build a new model from YAML, transfer pretrained weights to it and start training + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Dataset format + +YOLO detection dataset format can be found in detail in the [Dataset Guide](../datasets/detect/index.md). To convert your existing dataset from other formats (like COCO etc.) to YOLO format, please use [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) tool by Ultralytics. + +## Val + +Validate trained YOLOv8n model accuracy on the COCO128 dataset. No argument need to passed as the `model` retains it's training `data` and arguments as model attributes. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Validate the model + metrics = model.val() # no arguments needed, dataset and settings remembered + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # a list contains map50-95 of each category + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # val official model + yolo detect val model=path/to/best.pt # val custom model + ``` + +## Predict + +Use a trained YOLOv8n model to run predictions on images. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Predict with the model + results = model('https://ultralytics.com/images/bus.jpg') # predict on an image + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model + yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model + ``` + +See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page. + +## Export + +Export a YOLOv8n model to a different format like ONNX, CoreML, etc. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom trained model + + # Export the model + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # export official model + yolo export model=path/to/best.pt format=onnx # export custom trained model + ``` + +Available YOLOv8 export formats are in the table below. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n.onnx`. Usage examples are shown for your model after export completes. + +| Format | `format` Argument | Model | Metadata | Arguments | +|--------------------------------------------------------------------|-------------------|---------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/en/tasks/index.md b/docs/en/tasks/index.md new file mode 100644 index 0000000..a5de275 --- /dev/null +++ b/docs/en/tasks/index.md @@ -0,0 +1,51 @@ +--- +comments: true +description: Learn about the cornerstone computer vision tasks YOLOv8 can perform including detection, segmentation, classification, and pose estimation. Understand their uses in your AI projects. +keywords: Ultralytics, YOLOv8, Detection, Segmentation, Classification, Pose Estimation, AI Framework, Computer Vision Tasks +--- + +# Ultralytics YOLOv8 Tasks + +
+Ultralytics YOLO supported tasks + +YOLOv8 is an AI framework that supports multiple computer vision **tasks**. The framework can be used to perform [detection](detect.md), [segmentation](segment.md), [classification](classify.md), and [pose](pose.md) estimation. Each of these tasks has a different objective and use case. + +

+
+ +
+ Watch: Explore Ultralytics YOLO Tasks: Object Detection, Segmentation, Tracking, and Pose Estimation. +

+ +## [Detection](detect.md) + +Detection is the primary task supported by YOLOv8. It involves detecting objects in an image or video frame and drawing bounding boxes around them. The detected objects are classified into different categories based on their features. YOLOv8 can detect multiple objects in a single image or video frame with high accuracy and speed. + +[Detection Examples](detect.md){ .md-button } + +## [Segmentation](segment.md) + +Segmentation is a task that involves segmenting an image into different regions based on the content of the image. Each region is assigned a label based on its content. This task is useful in applications such as image segmentation and medical imaging. YOLOv8 uses a variant of the U-Net architecture to perform segmentation. + +[Segmentation Examples](segment.md){ .md-button } + +## [Classification](classify.md) + +Classification is a task that involves classifying an image into different categories. YOLOv8 can be used to classify images based on their content. It uses a variant of the EfficientNet architecture to perform classification. + +[Classification Examples](classify.md){ .md-button } + +## [Pose](pose.md) + +Pose/keypoint detection is a task that involves detecting specific points in an image or video frame. These points are referred to as keypoints and are used to track movement or pose estimation. YOLOv8 can detect keypoints in an image or video frame with high accuracy and speed. + +[Pose Examples](pose.md){ .md-button } + +## Conclusion + +YOLOv8 supports multiple tasks, including detection, segmentation, classification, and keypoints detection. Each of these tasks has different objectives and use cases. By understanding the differences between these tasks, you can choose the appropriate task for your computer vision application. diff --git a/docs/en/tasks/pose.md b/docs/en/tasks/pose.md new file mode 100644 index 0000000..c754d56 --- /dev/null +++ b/docs/en/tasks/pose.md @@ -0,0 +1,189 @@ +--- +comments: true +description: Learn how to use Ultralytics YOLOv8 for pose estimation tasks. Find pretrained models, learn how to train, validate, predict, and export your own. +keywords: Ultralytics, YOLO, YOLOv8, pose estimation, keypoints detection, object detection, pre-trained models, machine learning, artificial intelligence +--- + +# Pose Estimation + +Pose estimation examples + +Pose estimation is a task that involves identifying the location of specific points in an image, usually referred to as keypoints. The keypoints can represent various parts of the object such as joints, landmarks, or other distinctive features. The locations of the keypoints are usually represented as a set of 2D `[x, y]` or 3D `[x, y, visible]` +coordinates. + +The output of a pose estimation model is a set of points that represent the keypoints on an object in the image, usually along with the confidence scores for each point. Pose estimation is a good choice when you need to identify specific parts of an object in a scene, and their location in relation to each other. + +

+
+ +
+ Watch: Pose Estimation with Ultralytics YOLOv8. +

+ +!!! Tip "Tip" + + YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt`. These models are trained on the [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) dataset and are suitable for a variety of pose estimation tasks. + +## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +YOLOv8 pretrained Pose models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. + +[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. + +| Model | size
(pixels) | mAPpose
50-95 | mAPpose
50 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +|------------------------------------------------------------------------------------------------------|-----------------------|-----------------------|--------------------|--------------------------------|-------------------------------------|--------------------|-------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +- **mAPval** values are for single-model single-scale on [COCO Keypoints val2017](http://cocodataset.org) + dataset. +
Reproduce by `yolo val pose data=coco-pose.yaml device=0` +- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + instance. +
Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` + +## Train + +Train a YOLOv8-pose model on the COCO128-pose dataset. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-pose.yaml') # build a new model from YAML + model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # build from YAML and transfer weights + + # Train the model + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Build a new model from YAML and start training from scratch + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # Start training from a pretrained *.pt model + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # Build a new model from YAML, transfer pretrained weights to it and start training + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### Dataset format + +YOLO pose dataset format can be found in detail in the [Dataset Guide](../datasets/pose/index.md). To convert your existing dataset from other formats (like COCO etc.) to YOLO format, please use [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) tool by Ultralytics. + +## Val + +Validate trained YOLOv8n-pose model accuracy on the COCO128-pose dataset. No argument need to passed as the `model` +retains it's training `data` and arguments as model attributes. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-pose.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Validate the model + metrics = model.val() # no arguments needed, dataset and settings remembered + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # a list contains map50-95 of each category + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # val official model + yolo pose val model=path/to/best.pt # val custom model + ``` + +## Predict + +Use a trained YOLOv8n-pose model to run predictions on images. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-pose.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Predict with the model + results = model('https://ultralytics.com/images/bus.jpg') # predict on an image + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model + yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model + ``` + +See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page. + +## Export + +Export a YOLOv8n Pose model to a different format like ONNX, CoreML, etc. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-pose.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom trained model + + # Export the model + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # export official model + yolo export model=path/to/best.pt format=onnx # export custom trained model + ``` + +Available YOLOv8-pose export formats are in the table below. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-pose.onnx`. Usage examples are shown for your model after export completes. + +| Format | `format` Argument | Model | Metadata | Arguments | +|--------------------------------------------------------------------|-------------------|--------------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half` | + +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/en/tasks/segment.md b/docs/en/tasks/segment.md new file mode 100644 index 0000000..e48517c --- /dev/null +++ b/docs/en/tasks/segment.md @@ -0,0 +1,190 @@ +--- +comments: true +description: Learn how to use instance segmentation models with Ultralytics YOLO. Instructions on training, validation, image prediction, and model export. +keywords: yolov8, instance segmentation, Ultralytics, COCO dataset, image segmentation, object detection, model training, model validation, image prediction, model export +--- + +# Instance Segmentation + +Instance segmentation examples + +Instance segmentation goes a step further than object detection and involves identifying individual objects in an image and segmenting them from the rest of the image. + +The output of an instance segmentation model is a set of masks or contours that outline each object in the image, along with class labels and confidence scores for each object. Instance segmentation is useful when you need to know not only where objects are in an image, but also what their exact shape is. + +

+
+ +
+ Watch: Run Segmentation with Pre-Trained Ultralytics YOLOv8 Model in Python. +

+ +!!! Tip "Tip" + + YOLOv8 Segment models use the `-seg` suffix, i.e. `yolov8n-seg.pt` and are pretrained on [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +YOLOv8 pretrained Segment models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. + +[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. + +| Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +|----------------------------------------------------------------------------------------------|-----------------------|----------------------|-----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- **mAPval** values are for single-model single-scale on [COCO val2017](http://cocodataset.org) dataset. +
Reproduce by `yolo val segment data=coco.yaml device=0` +- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + instance. +
Reproduce by `yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu` + +## Train + +Train YOLOv8n-seg on the COCO128-seg dataset for 100 epochs at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-seg.yaml') # build a new model from YAML + model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # build from YAML and transfer weights + + # Train the model + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Build a new model from YAML and start training from scratch + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # Start training from a pretrained *.pt model + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # Build a new model from YAML, transfer pretrained weights to it and start training + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### Dataset format + +YOLO segmentation dataset format can be found in detail in the [Dataset Guide](../datasets/segment/index.md). To convert your existing dataset from other formats (like COCO etc.) to YOLO format, please use [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) tool by Ultralytics. + +## Val + +Validate trained YOLOv8n-seg model accuracy on the COCO128-seg dataset. No argument need to passed as the `model` +retains it's training `data` and arguments as model attributes. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-seg.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Validate the model + metrics = model.val() # no arguments needed, dataset and settings remembered + metrics.box.map # map50-95(B) + metrics.box.map50 # map50(B) + metrics.box.map75 # map75(B) + metrics.box.maps # a list contains map50-95(B) of each category + metrics.seg.map # map50-95(M) + metrics.seg.map50 # map50(M) + metrics.seg.map75 # map75(M) + metrics.seg.maps # a list contains map50-95(M) of each category + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # val official model + yolo segment val model=path/to/best.pt # val custom model + ``` + +## Predict + +Use a trained YOLOv8n-seg model to run predictions on images. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-seg.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom model + + # Predict with the model + results = model('https://ultralytics.com/images/bus.jpg') # predict on an image + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model + yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model + ``` + +See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page. + +## Export + +Export a YOLOv8n-seg model to a different format like ONNX, CoreML, etc. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n-seg.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom trained model + + # Export the model + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # export official model + yolo export model=path/to/best.pt format=onnx # export custom trained model + ``` + +Available YOLOv8-seg export formats are in the table below. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-seg.onnx`. Usage examples are shown for your model after export completes. + +| Format | `format` Argument | Model | Metadata | Arguments | +|--------------------------------------------------------------------|-------------------|-------------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half` | + +See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page. diff --git a/docs/en/usage/callbacks.md b/docs/en/usage/callbacks.md new file mode 100644 index 0000000..647b62d --- /dev/null +++ b/docs/en/usage/callbacks.md @@ -0,0 +1,86 @@ +--- +comments: true +description: Learn how to utilize callbacks in the Ultralytics framework during train, val, export, and predict modes for enhanced functionality. +keywords: Ultralytics, YOLO, callbacks guide, training callback, validation callback, export callback, prediction callback +--- + +## Callbacks + +Ultralytics framework supports callbacks as entry points in strategic stages of train, val, export, and predict modes. Each callback accepts a `Trainer`, `Validator`, or `Predictor` object depending on the operation type. All properties of these objects can be found in Reference section of the docs. + +## Examples + +### Returning additional information with Prediction + +In this example, we want to return the original frame with each result object. Here's how we can do that + +```python +def on_predict_batch_end(predictor): + # Retrieve the batch data + _, im0s, _, _ = predictor.batch + + # Ensure that im0s is a list + im0s = im0s if isinstance(im0s, list) else [im0s] + + # Combine the prediction results with the corresponding frames + predictor.results = zip(predictor.results, im0s) + +# Create a YOLO model instance +model = YOLO(f'yolov8n.pt') + +# Add the custom callback to the model +model.add_callback("on_predict_batch_end", on_predict_batch_end) + +# Iterate through the results and frames +for (result, frame) in model.track/predict(): + pass +``` + +## All callbacks + +Here are all supported callbacks. See callbacks [source code](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/base.py) for additional details. + +### Trainer Callbacks + +| Callback | Description | +|-----------------------------|---------------------------------------------------------| +| `on_pretrain_routine_start` | Triggered at the beginning of pre-training routine | +| `on_pretrain_routine_end` | Triggered at the end of pre-training routine | +| `on_train_start` | Triggered when the training starts | +| `on_train_epoch_start` | Triggered at the start of each training epoch | +| `on_train_batch_start` | Triggered at the start of each training batch | +| `optimizer_step` | Triggered during the optimizer step | +| `on_before_zero_grad` | Triggered before gradients are zeroed | +| `on_train_batch_end` | Triggered at the end of each training batch | +| `on_train_epoch_end` | Triggered at the end of each training epoch | +| `on_fit_epoch_end` | Triggered at the end of each fit epoch | +| `on_model_save` | Triggered when the model is saved | +| `on_train_end` | Triggered when the training process ends | +| `on_params_update` | Triggered when model parameters are updated | +| `teardown` | Triggered when the training process is being cleaned up | + +### Validator Callbacks + +| Callback | Description | +|----------------------|-------------------------------------------------| +| `on_val_start` | Triggered when the validation starts | +| `on_val_batch_start` | Triggered at the start of each validation batch | +| `on_val_batch_end` | Triggered at the end of each validation batch | +| `on_val_end` | Triggered when the validation ends | + +### Predictor Callbacks + +| Callback | Description | +|------------------------------|---------------------------------------------------| +| `on_predict_start` | Triggered when the prediction process starts | +| `on_predict_batch_start` | Triggered at the start of each prediction batch | +| `on_predict_postprocess_end` | Triggered at the end of prediction postprocessing | +| `on_predict_batch_end` | Triggered at the end of each prediction batch | +| `on_predict_end` | Triggered when the prediction process ends | + +### Exporter Callbacks + +| Callback | Description | +|-------------------|------------------------------------------| +| `on_export_start` | Triggered when the export process starts | +| `on_export_end` | Triggered when the export process ends | diff --git a/docs/en/usage/cfg.md b/docs/en/usage/cfg.md new file mode 100644 index 0000000..5bb0fad --- /dev/null +++ b/docs/en/usage/cfg.md @@ -0,0 +1,247 @@ +--- +comments: true +description: Master YOLOv8 settings and hyperparameters for improved model performance. Learn to use YOLO CLI commands, adjust training settings, and optimize YOLO tasks & modes. +keywords: YOLOv8, settings, hyperparameters, YOLO CLI commands, YOLO tasks, YOLO modes, Ultralytics documentation, model optimization, YOLOv8 training +--- + +YOLO settings and hyperparameters play a critical role in the model's performance, speed, and accuracy. These settings and hyperparameters can affect the model's behavior at various stages of the model development process, including training, validation, and prediction. + +Ultralytics commands use the following syntax: + +!!! Example + + === "CLI" + + ```bash + yolo TASK MODE ARGS + ``` + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a YOLOv8 model from a pre-trained weights file + model = YOLO('yolov8n.pt') + + # Run MODE mode using the custom arguments ARGS (guess TASK) + model.MODE(ARGS) + ``` + +Where: + +- `TASK` (optional) is one of ([detect](../tasks/detect.md), [segment](../tasks/segment.md), [classify](../tasks/classify.md), [pose](../tasks/pose.md)) +- `MODE` (required) is one of ([train](../modes/train.md), [val](../modes/val.md), [predict](../modes/predict.md), [export](../modes/export.md), [track](../modes/track.md)) +- `ARGS` (optional) are `arg=value` pairs like `imgsz=640` that override defaults. + +Default `ARG` values are defined on this page from the `cfg/defaults.yaml` [file](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/default.yaml). + +#### Tasks + +YOLO models can be used for a variety of tasks, including detection, segmentation, classification and pose. These tasks differ in the type of output they produce and the specific problem they are designed to solve. + +**Detect**: For identifying and localizing objects or regions of interest in an image or video. +**Segment**: For dividing an image or video into regions or pixels that correspond to different objects or classes. +**Classify**: For predicting the class label of an input image. +**Pose**: For identifying objects and estimating their keypoints in an image or video. + +| Key | Value | Description | +|--------|------------|-------------------------------------------------| +| `task` | `'detect'` | YOLO task, i.e. detect, segment, classify, pose | + +[Tasks Guide](../tasks/index.md){ .md-button } + +#### Modes + +YOLO models can be used in different modes depending on the specific problem you are trying to solve. These modes include: + +**Train**: For training a YOLOv8 model on a custom dataset. +**Val**: For validating a YOLOv8 model after it has been trained. +**Predict**: For making predictions using a trained YOLOv8 model on new images or videos. +**Export**: For exporting a YOLOv8 model to a format that can be used for deployment. +**Track**: For tracking objects in real-time using a YOLOv8 model. +**Benchmark**: For benchmarking YOLOv8 exports (ONNX, TensorRT, etc.) speed and accuracy. + +| Key | Value | Description | +|--------|-----------|---------------------------------------------------------------| +| `mode` | `'train'` | YOLO mode, i.e. train, val, predict, export, track, benchmark | + +[Modes Guide](../modes/index.md){ .md-button } + +## Train + +The training settings for YOLO models encompass various hyperparameters and configurations used during the training process. These settings influence the model's performance, speed, and accuracy. Key training settings include batch size, learning rate, momentum, and weight decay. Additionally, the choice of optimizer, loss function, and training dataset composition can impact the training process. Careful tuning and experimentation with these settings are crucial for optimizing performance. + +| Key | Value | Description | +|-------------------|----------|------------------------------------------------------------------------------------------------| +| `model` | `None` | path to model file, i.e. yolov8n.pt, yolov8n.yaml | +| `data` | `None` | path to data file, i.e. coco128.yaml | +| `epochs` | `100` | number of epochs to train for | +| `patience` | `50` | epochs to wait for no observable improvement for early stopping of training | +| `batch` | `16` | number of images per batch (-1 for AutoBatch) | +| `imgsz` | `640` | size of input images as integer | +| `save` | `True` | save train checkpoints and predict results | +| `save_period` | `-1` | Save checkpoint every x epochs (disabled if < 1) | +| `cache` | `False` | True/ram, disk or False. Use cache for data loading | +| `device` | `None` | device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu | +| `workers` | `8` | number of worker threads for data loading (per RANK if DDP) | +| `project` | `None` | project name | +| `name` | `None` | experiment name | +| `exist_ok` | `False` | whether to overwrite existing experiment | +| `pretrained` | `True` | (bool or str) whether to use a pretrained model (bool) or a model to load weights from (str) | +| `optimizer` | `'auto'` | optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto] | +| `verbose` | `False` | whether to print verbose output | +| `seed` | `0` | random seed for reproducibility | +| `deterministic` | `True` | whether to enable deterministic mode | +| `single_cls` | `False` | train multi-class data as single-class | +| `rect` | `False` | rectangular training with each batch collated for minimum padding | +| `cos_lr` | `False` | use cosine learning rate scheduler | +| `close_mosaic` | `10` | (int) disable mosaic augmentation for final epochs (0 to disable) | +| `resume` | `False` | resume training from last checkpoint | +| `amp` | `True` | Automatic Mixed Precision (AMP) training, choices=[True, False] | +| `fraction` | `1.0` | dataset fraction to train on (default is 1.0, all images in train set) | +| `profile` | `False` | profile ONNX and TensorRT speeds during training for loggers | +| `freeze` | `None` | (int or list, optional) freeze first n layers, or freeze list of layer indices during training | +| `lr0` | `0.01` | initial learning rate (i.e. SGD=1E-2, Adam=1E-3) | +| `lrf` | `0.01` | final learning rate (lr0 * lrf) | +| `momentum` | `0.937` | SGD momentum/Adam beta1 | +| `weight_decay` | `0.0005` | optimizer weight decay 5e-4 | +| `warmup_epochs` | `3.0` | warmup epochs (fractions ok) | +| `warmup_momentum` | `0.8` | warmup initial momentum | +| `warmup_bias_lr` | `0.1` | warmup initial bias lr | +| `box` | `7.5` | box loss gain | +| `cls` | `0.5` | cls loss gain (scale with pixels) | +| `dfl` | `1.5` | dfl loss gain | +| `pose` | `12.0` | pose loss gain (pose-only) | +| `kobj` | `2.0` | keypoint obj loss gain (pose-only) | +| `label_smoothing` | `0.0` | label smoothing (fraction) | +| `nbs` | `64` | nominal batch size | +| `overlap_mask` | `True` | masks should overlap during training (segment train only) | +| `mask_ratio` | `4` | mask downsample ratio (segment train only) | +| `dropout` | `0.0` | use dropout regularization (classify train only) | +| `val` | `True` | validate/test during training | +| `plots` | `False` | save plots and images during train/val | + +[Train Guide](../modes/train.md){ .md-button } + +## Predict + +The prediction settings for YOLO models encompass a range of hyperparameters and configurations that influence the model's performance, speed, and accuracy during inference on new data. Careful tuning and experimentation with these settings are essential to achieve optimal performance for a specific task. Key settings include the confidence threshold, Non-Maximum Suppression (NMS) threshold, and the number of classes considered. Additional factors affecting the prediction process are input data size and format, the presence of supplementary features such as masks or multiple labels per box, and the particular task the model is employed for. + +Inference arguments: + +| Name | Type | Default | Description | +|-----------------|----------------|------------------------|----------------------------------------------------------------------------| +| `source` | `str` | `'ultralytics/assets'` | source directory for images or videos | +| `conf` | `float` | `0.25` | object confidence threshold for detection | +| `iou` | `float` | `0.7` | intersection over union (IoU) threshold for NMS | +| `imgsz` | `int or tuple` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `half` | `bool` | `False` | use half precision (FP16) | +| `device` | `None or str` | `None` | device to run on, i.e. cuda device=0/1/2/3 or device=cpu | +| `max_det` | `int` | `300` | maximum number of detections per image | +| `vid_stride` | `bool` | `False` | video frame-rate stride | +| `stream_buffer` | `bool` | `False` | buffer all streaming frames (True) or return the most recent frame (False) | +| `visualize` | `bool` | `False` | visualize model features | +| `augment` | `bool` | `False` | apply image augmentation to prediction sources | +| `agnostic_nms` | `bool` | `False` | class-agnostic NMS | +| `retina_masks` | `bool` | `False` | use high-resolution segmentation masks | +| `classes` | `None or list` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] | + +Visualization arguments: + +| Name | Type | Default | Description | +|---------------|---------------|---------|-----------------------------------------------------------------| +| `show` | `bool` | `False` | show predicted images and videos if environment allows | +| `save` | `bool` | `False` | save predicted images and videos | +| `save_frames` | `bool` | `False` | save predicted individual video frames | +| `save_txt` | `bool` | `False` | save results as `.txt` file | +| `save_conf` | `bool` | `False` | save results with confidence scores | +| `save_crop` | `bool` | `False` | save cropped images with results | +| `show_labels` | `bool` | `True` | show prediction labels, i.e. 'person' | +| `show_conf` | `bool` | `True` | show prediction confidence, i.e. '0.99' | +| `show_boxes` | `bool` | `True` | show prediction boxes | +| `line_width` | `None or int` | `None` | line width of the bounding boxes. Scaled to image size if None. | + +[Predict Guide](../modes/predict.md){ .md-button } + +## Val + +The val (validation) settings for YOLO models involve various hyperparameters and configurations used to evaluate the model's performance on a validation dataset. These settings influence the model's performance, speed, and accuracy. Common YOLO validation settings include batch size, validation frequency during training, and performance evaluation metrics. Other factors affecting the validation process include the validation dataset's size and composition, as well as the specific task the model is employed for. Careful tuning and experimentation with these settings are crucial to ensure optimal performance on the validation dataset and detect and prevent overfitting. + +| Key | Value | Description | +|---------------|---------|--------------------------------------------------------------------| +| `data` | `None` | path to data file, i.e. coco128.yaml | +| `imgsz` | `640` | size of input images as integer | +| `batch` | `16` | number of images per batch (-1 for AutoBatch) | +| `save_json` | `False` | save results to JSON file | +| `save_hybrid` | `False` | save hybrid version of labels (labels + additional predictions) | +| `conf` | `0.001` | object confidence threshold for detection | +| `iou` | `0.6` | intersection over union (IoU) threshold for NMS | +| `max_det` | `300` | maximum number of detections per image | +| `half` | `True` | use half precision (FP16) | +| `device` | `None` | device to run on, i.e. cuda device=0/1/2/3 or device=cpu | +| `dnn` | `False` | use OpenCV DNN for ONNX inference | +| `plots` | `False` | save plots and images during train/val | +| `rect` | `False` | rectangular val with each batch collated for minimum padding | +| `split` | `val` | dataset split to use for validation, i.e. 'val', 'test' or 'train' | + +[Val Guide](../modes/val.md){ .md-button } + +## Export + +Export settings for YOLO models encompass configurations and options related to saving or exporting the model for use in different environments or platforms. These settings can impact the model's performance, size, and compatibility with various systems. Key export settings include the exported model file format (e.g., ONNX, TensorFlow SavedModel), the target device (e.g., CPU, GPU), and additional features such as masks or multiple labels per box. The export process may also be affected by the model's specific task and the requirements or constraints of the destination environment or platform. It is crucial to thoughtfully configure these settings to ensure the exported model is optimized for the intended use case and functions effectively in the target environment. + +| Key | Value | Description | +|-------------|-----------------|------------------------------------------------------| +| `format` | `'torchscript'` | format to export to | +| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `keras` | `False` | use Keras for TF SavedModel export | +| `optimize` | `False` | TorchScript: optimize for mobile | +| `half` | `False` | FP16 quantization | +| `int8` | `False` | INT8 quantization | +| `dynamic` | `False` | ONNX/TensorRT: dynamic axes | +| `simplify` | `False` | ONNX/TensorRT: simplify model | +| `opset` | `None` | ONNX: opset version (optional, defaults to latest) | +| `workspace` | `4` | TensorRT: workspace size (GB) | +| `nms` | `False` | CoreML: add NMS | + +[Export Guide](../modes/export.md){ .md-button } + +## Augmentation + +Augmentation settings for YOLO models refer to the various transformations and modifications applied to the training data to increase the diversity and size of the dataset. These settings can affect the model's performance, speed, and accuracy. Some common YOLO augmentation settings include the type and intensity of the transformations applied (e.g. random flips, rotations, cropping, color changes), the probability with which each transformation is applied, and the presence of additional features such as masks or multiple labels per box. Other factors that may affect the augmentation process include the size and composition of the original dataset and the specific task the model is being used for. It is important to carefully tune and experiment with these settings to ensure that the augmented dataset is diverse and representative enough to train a high-performing model. + +| Key | Value | Description | +|---------------|---------|-------------------------------------------------| +| `hsv_h` | `0.015` | image HSV-Hue augmentation (fraction) | +| `hsv_s` | `0.7` | image HSV-Saturation augmentation (fraction) | +| `hsv_v` | `0.4` | image HSV-Value augmentation (fraction) | +| `degrees` | `0.0` | image rotation (+/- deg) | +| `translate` | `0.1` | image translation (+/- fraction) | +| `scale` | `0.5` | image scale (+/- gain) | +| `shear` | `0.0` | image shear (+/- deg) | +| `perspective` | `0.0` | image perspective (+/- fraction), range 0-0.001 | +| `flipud` | `0.0` | image flip up-down (probability) | +| `fliplr` | `0.5` | image flip left-right (probability) | +| `mosaic` | `1.0` | image mosaic (probability) | +| `mixup` | `0.0` | image mixup (probability) | +| `copy_paste` | `0.0` | segment copy-paste (probability) | + +## Logging, checkpoints, plotting and file management + +Logging, checkpoints, plotting, and file management are important considerations when training a YOLO model. + +- Logging: It is often helpful to log various metrics and statistics during training to track the model's progress and diagnose any issues that may arise. This can be done using a logging library such as TensorBoard or by writing log messages to a file. +- Checkpoints: It is a good practice to save checkpoints of the model at regular intervals during training. This allows you to resume training from a previous point if the training process is interrupted or if you want to experiment with different training configurations. +- Plotting: Visualizing the model's performance and training progress can be helpful for understanding how the model is behaving and identifying potential issues. This can be done using a plotting library such as matplotlib or by generating plots using a logging library such as TensorBoard. +- File management: Managing the various files generated during the training process, such as model checkpoints, log files, and plots, can be challenging. It is important to have a clear and organized file structure to keep track of these files and make it easy to access and analyze them as needed. + +Effective logging, checkpointing, plotting, and file management can help you keep track of the model's progress and make it easier to debug and optimize the training process. + +| Key | Value | Description | +|------------|----------|------------------------------------------------------------------------------------------------| +| `project` | `'runs'` | project name | +| `name` | `'exp'` | experiment name. `exp` gets automatically incremented if not specified, i.e, `exp`, `exp2` ... | +| `exist_ok` | `False` | whether to overwrite existing experiment | +| `plots` | `False` | save plots during train/val | +| `save` | `False` | save train checkpoints and predict results | diff --git a/docs/en/usage/cli.md b/docs/en/usage/cli.md new file mode 100644 index 0000000..7561467 --- /dev/null +++ b/docs/en/usage/cli.md @@ -0,0 +1,227 @@ +--- +comments: true +description: 'Learn how to use Ultralytics YOLO through Command Line: train models, run predictions and exports models to different formats easily using terminal commands.' +keywords: Ultralytics, YOLO, CLI, train, validation, prediction, command line interface, YOLO CLI, YOLO terminal, model training, prediction, exporting +--- + +# Command Line Interface Usage + +The YOLO command line interface (CLI) allows for simple single-line commands without the need for a Python environment. CLI requires no customization or Python code. You can simply run all tasks from the terminal with the `yolo` command. + +

+
+ +
+ Watch: Mastering Ultralytics YOLOv8: CLI & Python Usage and Live Inference +

+ +!!! Example + + === "Syntax" + + Ultralytics `yolo` commands use the following syntax: + ```bash + yolo TASK MODE ARGS + + Where TASK (optional) is one of [detect, segment, classify] + MODE (required) is one of [train, val, predict, export, track] + ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults. + ``` + See all ARGS in the full [Configuration Guide](cfg.md) or with `yolo cfg` + + === "Train" + + Train a detection model for 10 epochs with an initial learning_rate of 0.01 + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "Predict" + + Predict a YouTube video using a pretrained segmentation model at image size 320: + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "Val" + + Val a pretrained detection model at batch-size 1 and image size 640: + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "Export" + + Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required) + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "Special" + + Run special commands to see version, view settings, run checks and more: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +Where: + +- `TASK` (optional) is one of `[detect, segment, classify]`. If it is not passed explicitly YOLOv8 will try to guess the `TASK` from the model type. +- `MODE` (required) is one of `[train, val, predict, export, track]` +- `ARGS` (optional) are any number of custom `arg=value` pairs like `imgsz=320` that override defaults. For a full list of available `ARGS` see the [Configuration](cfg.md) page and `defaults.yaml` + GitHub [source](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/default.yaml). + +!!! Warning "Warning" + + Arguments must be passed as `arg=val` pairs, split by an equals `=` sign and delimited by spaces ` ` between pairs. Do not use `--` argument prefixes or commas `,` between arguments. + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25`   ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25`   ❌ + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25`   ❌ + +## Train + +Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. For a full list of available arguments see the [Configuration](cfg.md) page. + +!!! Example "Example" + + === "Train" + + Start training YOLOv8n on COCO128 for 100 epochs at image-size 640. + ```bash + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + ``` + + === "Resume" + + Resume an interrupted training. + ```bash + yolo detect train resume model=last.pt + ``` + +## Val + +Validate trained YOLOv8n model accuracy on the COCO128 dataset. No argument need to passed as the `model` retains it's training `data` and arguments as model attributes. + +!!! Example "Example" + + === "Official" + + Validate an official YOLOv8n model. + ```bash + yolo detect val model=yolov8n.pt + ``` + + === "Custom" + + Validate a custom-trained model. + ```bash + yolo detect val model=path/to/best.pt + ``` + +## Predict + +Use a trained YOLOv8n model to run predictions on images. + +!!! Example "Example" + + === "Official" + + Predict with an official YOLOv8n model. + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' + ``` + + === "Custom" + + Predict with a custom model. + ```bash + yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' + ``` + +## Export + +Export a YOLOv8n model to a different format like ONNX, CoreML, etc. + +!!! Example "Example" + + === "Official" + + Export an official YOLOv8n model to ONNX format. + ```bash + yolo export model=yolov8n.pt format=onnx + ``` + + === "Custom" + + Export a custom-trained model to ONNX format. + ```bash + yolo export model=path/to/best.pt format=onnx + ``` + +Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. + +| Format | `format` Argument | Model | Metadata | Arguments | +|--------------------------------------------------------------------|-------------------|---------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +## Overriding default arguments + +Default arguments can be overridden by simply passing them as arguments in the CLI in `arg=value` pairs. + +!!! Tip "" + + === "Train" + Train a detection model for `10 epochs` with `learning_rate` of `0.01` + ```bash + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "Predict" + Predict a YouTube video using a pretrained segmentation model at image size 320: + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "Val" + Validate a pretrained detection model at batch-size 1 and image size 640: + ```bash + yolo detect val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + +## Overriding default config file + +You can override the `default.yaml` config file entirely by passing a new file with the `cfg` arguments, i.e. `cfg=custom.yaml`. + +To do this first create a copy of `default.yaml` in your current working dir with the `yolo copy-cfg` command. + +This will create `default_copy.yaml`, which you can then pass as `cfg=default_copy.yaml` along with any additional args, like `imgsz=320` in this example: + +!!! Example + + === "CLI" + ```bash + yolo copy-cfg + yolo cfg=default_copy.yaml imgsz=320 + ``` diff --git a/docs/en/usage/engine.md b/docs/en/usage/engine.md new file mode 100644 index 0000000..3ff1361 --- /dev/null +++ b/docs/en/usage/engine.md @@ -0,0 +1,82 @@ +--- +comments: true +description: Discover how to customize and extend base Ultralytics YOLO Trainer engines. Support your custom model and dataloader by overriding built-in functions. +keywords: Ultralytics, YOLO, trainer engines, BaseTrainer, DetectionTrainer, customizing trainers, extending trainers, custom model, custom dataloader +--- + +Both the Ultralytics YOLO command-line and Python interfaces are simply a high-level abstraction on the base engine executors. Let's take a look at the Trainer engine. + +## BaseTrainer + +BaseTrainer contains the generic boilerplate training routine. It can be customized for any task based over overriding the required functions or operations as long the as correct formats are followed. For example, you can support your own custom model and dataloader by just overriding these functions: + +* `get_model(cfg, weights)` - The function that builds the model to be trained +* `get_dataloader()` - The function that builds the dataloader More details and source code can be found in [`BaseTrainer` Reference](../reference/engine/trainer.md) + +## DetectionTrainer + +Here's how you can use the YOLOv8 `DetectionTrainer` and customize it. + +```python +from ultralytics.models.yolo.detect import DetectionTrainer + +trainer = DetectionTrainer(overrides={...}) +trainer.train() +trained_model = trainer.best # get best model +``` + +### Customizing the DetectionTrainer + +Let's customize the trainer **to train a custom detection model** that is not supported directly. You can do this by simply overloading the existing the `get_model` functionality: + +```python +from ultralytics.models.yolo.detect import DetectionTrainer + + +class CustomTrainer(DetectionTrainer): + def get_model(self, cfg, weights): + ... + + +trainer = CustomTrainer(overrides={...}) +trainer.train() +``` + +You now realize that you need to customize the trainer further to: + +* Customize the `loss function`. +* Add `callback` that uploads model to your Google Drive after every 10 `epochs` + Here's how you can do it: + +```python +from ultralytics.models.yolo.detect import DetectionTrainer +from ultralytics.nn.tasks import DetectionModel + + +class MyCustomModel(DetectionModel): + def init_criterion(self): + ... + + +class CustomTrainer(DetectionTrainer): + def get_model(self, cfg, weights): + return MyCustomModel(...) + + +# callback to upload model weights +def log_model(trainer): + last_weight_path = trainer.last + ... + + +trainer = CustomTrainer(overrides={...}) +trainer.add_callback("on_train_epoch_end", log_model) # Adds to existing callback +trainer.train() +``` + +To know more about Callback triggering events and entry point, checkout our [Callbacks Guide](callbacks.md) + +## Other engine components + +There are other components that can be customized similarly like `Validators` and `Predictors` +See Reference section for more information on these. diff --git a/docs/en/usage/python.md b/docs/en/usage/python.md new file mode 100644 index 0000000..0c51c5b --- /dev/null +++ b/docs/en/usage/python.md @@ -0,0 +1,262 @@ +--- +comments: true +description: Boost your Python projects with object detection, segmentation and classification using YOLOv8. Explore how to load, train, validate, predict, export, track and benchmark models with ease. +keywords: YOLOv8, Ultralytics, Python, object detection, segmentation, classification, model training, validation, prediction, model export, benchmark, real-time tracking +--- + +# Python Usage + +Welcome to the YOLOv8 Python Usage documentation! This guide is designed to help you seamlessly integrate YOLOv8 into your Python projects for object detection, segmentation, and classification. Here, you'll learn how to load and use pretrained models, train new models, and perform predictions on images. The easy-to-use Python interface is a valuable resource for anyone looking to incorporate YOLOv8 into their Python projects, allowing you to quickly implement advanced object detection capabilities. Let's get started! + +For example, users can load a model, train it, evaluate its performance on a validation set, and even export it to ONNX format with just a few lines of code. + +!!! Example "Python" + + ```python + from ultralytics import YOLO + + # Create a new YOLO model from scratch + model = YOLO('yolov8n.yaml') + + # Load a pretrained YOLO model (recommended for training) + model = YOLO('yolov8n.pt') + + # Train the model using the 'coco128.yaml' dataset for 3 epochs + results = model.train(data='coco128.yaml', epochs=3) + + # Evaluate the model's performance on the validation set + results = model.val() + + # Perform object detection on an image using the model + results = model('https://ultralytics.com/images/bus.jpg') + + # Export the model to ONNX format + success = model.export(format='onnx') + ``` + +## [Train](../modes/train.md) + +Train mode is used for training a YOLOv8 model on a custom dataset. In this mode, the model is trained using the specified dataset and hyperparameters. The training process involves optimizing the model's parameters so that it can accurately predict the classes and locations of objects in an image. + +!!! Example "Train" + + === "From pretrained(recommended)" + ```python + from ultralytics import YOLO + + model = YOLO('yolov8n.pt') # pass any model type + results = model.train(epochs=5) + ``` + + === "From scratch" + ```python + from ultralytics import YOLO + + model = YOLO('yolov8n.yaml') + results = model.train(data='coco128.yaml', epochs=5) + ``` + + === "Resume" + ```python + model = YOLO("last.pt") + results = model.train(resume=True) + ``` + +[Train Examples](../modes/train.md){ .md-button } + +## [Val](../modes/val.md) + +Val mode is used for validating a YOLOv8 model after it has been trained. In this mode, the model is evaluated on a validation set to measure its accuracy and generalization performance. This mode can be used to tune the hyperparameters of the model to improve its performance. + +!!! Example "Val" + + === "Val after training" + ```python + from ultralytics import YOLO + + model = YOLO('yolov8n.yaml') + model.train(data='coco128.yaml', epochs=5) + model.val() # It'll automatically evaluate the data you trained. + ``` + + === "Val independently" + ```python + from ultralytics import YOLO + + model = YOLO("model.pt") + # It'll use the data YAML file in model.pt if you don't set data. + model.val() + # or you can set the data you want to val + model.val(data='coco128.yaml') + ``` + +[Val Examples](../modes/val.md){ .md-button } + +## [Predict](../modes/predict.md) + +Predict mode is used for making predictions using a trained YOLOv8 model on new images or videos. In this mode, the model is loaded from a checkpoint file, and the user can provide images or videos to perform inference. The model predicts the classes and locations of objects in the input images or videos. + +!!! Example "Predict" + + === "From source" + ```python + from ultralytics import YOLO + from PIL import Image + import cv2 + + model = YOLO("model.pt") + # accepts all formats - image/dir/Path/URL/video/PIL/ndarray. 0 for webcam + results = model.predict(source="0") + results = model.predict(source="folder", show=True) # Display preds. Accepts all YOLO predict arguments + + # from PIL + im1 = Image.open("bus.jpg") + results = model.predict(source=im1, save=True) # save plotted images + + # from ndarray + im2 = cv2.imread("bus.jpg") + results = model.predict(source=im2, save=True, save_txt=True) # save predictions as labels + + # from list of PIL/ndarray + results = model.predict(source=[im1, im2]) + ``` + + === "Results usage" + ```python + # results would be a list of Results object including all the predictions by default + # but be careful as it could occupy a lot memory when there're many images, + # especially the task is segmentation. + # 1. return as a list + results = model.predict(source="folder") + + # results would be a generator which is more friendly to memory by setting stream=True + # 2. return as a generator + results = model.predict(source=0, stream=True) + + for result in results: + # Detection + result.boxes.xyxy # box with xyxy format, (N, 4) + result.boxes.xywh # box with xywh format, (N, 4) + result.boxes.xyxyn # box with xyxy format but normalized, (N, 4) + result.boxes.xywhn # box with xywh format but normalized, (N, 4) + result.boxes.conf # confidence score, (N, 1) + result.boxes.cls # cls, (N, 1) + + # Segmentation + result.masks.data # masks, (N, H, W) + result.masks.xy # x,y segments (pixels), List[segment] * N + result.masks.xyn # x,y segments (normalized), List[segment] * N + + # Classification + result.probs # cls prob, (num_class, ) + + # Each result is composed of torch.Tensor by default, + # in which you can easily use following functionality: + result = result.cuda() + result = result.cpu() + result = result.to("cpu") + result = result.numpy() + ``` + +[Predict Examples](../modes/predict.md){ .md-button } + +## [Export](../modes/export.md) + +Export mode is used for exporting a YOLOv8 model to a format that can be used for deployment. In this mode, the model is converted to a format that can be used by other software applications or hardware devices. This mode is useful when deploying the model to production environments. + +!!! Example "Export" + + === "Export to ONNX" + + Export an official YOLOv8n model to ONNX with dynamic batch-size and image-size. + ```python + from ultralytics import YOLO + + model = YOLO('yolov8n.pt') + model.export(format='onnx', dynamic=True) + ``` + + === "Export to TensorRT" + + Export an official YOLOv8n model to TensorRT on `device=0` for acceleration on CUDA devices. + ```python + from ultralytics import YOLO + + model = YOLO('yolov8n.pt') + model.export(format='onnx', device=0) + ``` + +[Export Examples](../modes/export.md){ .md-button } + +## [Track](../modes/track.md) + +Track mode is used for tracking objects in real-time using a YOLOv8 model. In this mode, the model is loaded from a checkpoint file, and the user can provide a live video stream to perform real-time object tracking. This mode is useful for applications such as surveillance systems or self-driving cars. + +!!! Example "Track" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # load an official detection model + model = YOLO('yolov8n-seg.pt') # load an official segmentation model + model = YOLO('path/to/best.pt') # load a custom model + + # Track with the model + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") + ``` + +[Track Examples](../modes/track.md){ .md-button } + +## [Benchmark](../modes/benchmark.md) + +Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation) +or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for their specific use case based on their requirements for speed and accuracy. + +!!! Example "Benchmark" + + === "Python" + + Benchmark an official YOLOv8n model across all export formats. + ```python + from ultralytics.utils.benchmarks import benchmark + + # Benchmark + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + +[Benchmark Examples](../modes/benchmark.md){ .md-button } + +## Using Trainers + +`YOLO` model class is a high-level wrapper on the Trainer classes. Each YOLO task has its own trainer that inherits from `BaseTrainer`. + +!!! Tip "Detection Trainer Example" + + ```python + from ultralytics.models.yolo import DetectionTrainer, DetectionValidator, DetectionPredictor + + # trainer + trainer = DetectionTrainer(overrides={}) + trainer.train() + trained_model = trainer.best + + # Validator + val = DetectionValidator(args=...) + val(model=trained_model) + + # predictor + pred = DetectionPredictor(overrides={}) + pred(source=SOURCE, model=trained_model) + + # resume from last weight + overrides["resume"] = trainer.last + trainer = detect.DetectionTrainer(overrides=overrides) + ``` + +You can easily customize Trainers to support custom tasks or explore R&D ideas. Learn more about Customizing `Trainers`, `Validators` and `Predictors` to suit your project needs in the Customization Section. + +[Customization tutorials](engine.md){ .md-button } diff --git a/docs/en/yolov5/environments/aws_quickstart_tutorial.md b/docs/en/yolov5/environments/aws_quickstart_tutorial.md new file mode 100644 index 0000000..52c64ec --- /dev/null +++ b/docs/en/yolov5/environments/aws_quickstart_tutorial.md @@ -0,0 +1,88 @@ +--- +comments: true +description: Step-by-step guide to run YOLOv5 on AWS Deep Learning instance. Learn how to create an instance, connect to it and train, validate and deploy models. +keywords: AWS, YOLOv5, instance, deep learning, Ultralytics, guide, training, deployment, object detection +--- + +# YOLOv5 🚀 on AWS Deep Learning Instance: A Comprehensive Guide + +This guide will help new users run YOLOv5 on an Amazon Web Services (AWS) Deep Learning instance. AWS offers a [Free Tier](https://aws.amazon.com/free/) and a [credit program](https://aws.amazon.com/activate/) for a quick and affordable start. + +Other quickstart options for YOLOv5 include our [Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) Open In Colab Open In Kaggle, [GCP Deep Learning VM](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial), and our Docker image at [Docker Hub](https://hub.docker.com/r/ultralytics/yolov5) Docker Pulls. *Updated: 21 April 2023*. + +## 1. AWS Console Sign-in + +Create an account or sign in to the AWS console at [https://aws.amazon.com/console/](https://aws.amazon.com/console/) and select the **EC2** service. + +![Console](https://user-images.githubusercontent.com/26833433/106323804-debddd00-622c-11eb-997f-b8217dc0e975.png) + +## 2. Launch Instance + +In the EC2 section of the AWS console, click the **Launch instance** button. + +![Launch](https://user-images.githubusercontent.com/26833433/106323950-204e8800-622d-11eb-915d-5c90406973ea.png) + +### Choose an Amazon Machine Image (AMI) + +Enter 'Deep Learning' in the search field and select the most recent Ubuntu Deep Learning AMI (recommended), or an alternative Deep Learning AMI. For more information on selecting an AMI, see [Choosing Your DLAMI](https://docs.aws.amazon.com/dlami/latest/devguide/options.html). + +![Choose AMI](https://user-images.githubusercontent.com/26833433/106326107-c9e34880-6230-11eb-97c9-3b5fc2f4e2ff.png) + +### Select an Instance Type + +A GPU instance is recommended for most deep learning purposes. Training new models will be faster on a GPU instance than a CPU instance. Multi-GPU instances or distributed training across multiple instances with GPUs can offer sub-linear scaling. To set up distributed training, see [Distributed Training](https://docs.aws.amazon.com/dlami/latest/devguide/distributed-training.html). + +**Note:** The size of your model should be a factor in selecting an instance. If your model exceeds an instance's available RAM, select a different instance type with enough memory for your application. + +Refer to [EC2 Instance Types](https://aws.amazon.com/ec2/instance-types/) and choose Accelerated Computing to see the different GPU instance options. + +![Choose Type](https://user-images.githubusercontent.com/26833433/106324624-52141e80-622e-11eb-9662-1a376d9c887d.png) + +For more information on GPU monitoring and optimization, see [GPU Monitoring and Optimization](https://docs.aws.amazon.com/dlami/latest/devguide/tutorial-gpu.html). For pricing, see [On-Demand Pricing](https://aws.amazon.com/ec2/pricing/on-demand/) and [Spot Pricing](https://aws.amazon.com/ec2/spot/pricing/). + +### Configure Instance Details + +Amazon EC2 Spot Instances let you take advantage of unused EC2 capacity in the AWS cloud. Spot Instances are available at up to a 70% discount compared to On-Demand prices. We recommend a persistent spot instance, which will save your data and restart automatically when spot instance availability returns after spot instance termination. For full-price On-Demand instances, leave these settings at their default values. + +![Spot Request](https://user-images.githubusercontent.com/26833433/106324835-ac14e400-622e-11eb-8853-df5ec9b16dfc.png) + +Complete Steps 4-7 to finalize your instance hardware and security settings, and then launch the instance. + +## 3. Connect to Instance + +Select the checkbox next to your running instance, and then click Connect. Copy and paste the SSH terminal command into a terminal of your choice to connect to your instance. + +![Connect](https://user-images.githubusercontent.com/26833433/106325530-cf8c5e80-622f-11eb-9f64-5b313a9d57a1.png) + +## 4. Run YOLOv5 + +Once you have logged in to your instance, clone the repository and install the dependencies in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +Then, start training, testing, detecting, and exporting YOLOv5 models: + +```bash +python train.py # train a model +python val.py --weights yolov5s.pt # validate a model for Precision, Recall, and mAP +python detect.py --weights yolov5s.pt --source path/to/images # run inference on images and videos +python export.py --weights yolov5s.pt --include onnx coreml tflite # export models to other formats +``` + +## Optional Extras + +Add 64GB of swap memory (to `--cache` large datasets): + +```bash +sudo fallocate -l 64G /swapfile +sudo chmod 600 /swapfile +sudo mkswap /swapfile +sudo swapon /swapfile +free -h # check memory +``` + +Now you have successfully set up and run YOLOv5 on an AWS Deep Learning instance. Enjoy training, testing, and deploying your object detection models! diff --git a/docs/en/yolov5/environments/azureml_quickstart_tutorial.md b/docs/en/yolov5/environments/azureml_quickstart_tutorial.md new file mode 100644 index 0000000..e1e58a4 --- /dev/null +++ b/docs/en/yolov5/environments/azureml_quickstart_tutorial.md @@ -0,0 +1,95 @@ +--- +comments: true +description: Azure Machine Learning YOLOv5 quickstart +keywords: Ultralytics, YOLO, Deep Learning, Object detection, quickstart, Azure, AzureML +--- + +# YOLOv5 🚀 on AzureML + +This guide provides a quickstart to use YOLOv5 from an AzureML compute instance. + +Note that this guide is a quickstart for quick trials. If you want to unlock the full power AzureML, you can find the documentation to: + +- [Create a data asset](https://learn.microsoft.com/azure/machine-learning/how-to-create-data-assets) +- [Create an AzureML job](https://learn.microsoft.com/azure/machine-learning/how-to-train-model) +- [Register a model](https://learn.microsoft.com/azure/machine-learning/how-to-manage-models) + +## Prerequisites + +You need an [AzureML workspace](https://learn.microsoft.com/azure/machine-learning/concept-workspace?view=azureml-api-2). + +## Create a compute instance + +From your AzureML workspace, select Compute > Compute instances > New, select the instance with the resources you need. + +create-compute-arrow + +## Open a Terminal + +Now from the Notebooks view, open a Terminal and select your compute. + +![open-terminal-arrow](https://github.com/ouphi/ultralytics/assets/17216799/c4697143-7234-4a04-89ea-9084ed9c6312) + +## Setup and run YOLOv5 + +Now you can, create a virtual environment: + +```bash +conda create --name yolov5env -y +conda activate yolov5env +conda install pip -y +``` + +Clone YOLOv5 repository with its submodules: + +```bash +git clone https://github.com/ultralytics/yolov5 +cd yolov5 +git submodule update --init --recursive # Note that you might have a message asking you to add your folder as a safe.directory just copy the recommended command +``` + +Install the required dependencies: + +```bash +pip install -r yolov5/requirements.txt +pip install onnx>=1.10.0 +``` + +Train the YOLOv5 model: + +```bash +python train.py +``` + +Validate the model for Precision, Recall, and mAP + +```bash +python val.py --weights yolov5s.pt +``` + +Run inference on images and videos: + +```bash +python detect.py --weights yolov5s.pt --source path/to/images +``` + +Export models to other formats: + +```bash +python detect.py --weights yolov5s.pt --source path/to/images +``` + +## Notes on using a notebook + +Note that if you want to run these commands from a Notebook, you need to [create a new Kernel](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-access-terminal?view=azureml-api-2#add-new-kernels) +and select your new Kernel on the top of your Notebook. + +If you create Python cells it will automatically use your custom environment, but if you add bash cells, you will need to run `source activate ` on each of these cells to make sure it uses your custom environment. + +For example: + +```bash +%%bash +source activate newenv +python val.py --weights yolov5s.pt +``` diff --git a/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md b/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md new file mode 100644 index 0000000..5ff8797 --- /dev/null +++ b/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md @@ -0,0 +1,64 @@ +--- +comments: true +description: Learn how to set up and run YOLOv5 in a Docker container. This tutorial includes the prerequisites and step-by-step instructions. +keywords: YOLOv5, Docker, Ultralytics, Image Detection, YOLOv5 Docker Image, Docker Container, Machine Learning, AI +--- + +# Get Started with YOLOv5 🚀 in Docker + +This tutorial will guide you through the process of setting up and running YOLOv5 in a Docker container. + +You can also explore other quickstart options for YOLOv5, such as our [Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) Open In Colab Open In Kaggle, [GCP Deep Learning VM](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial), and [Amazon AWS](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial). *Updated: 21 April 2023*. + +## Prerequisites + +1. **Nvidia Driver**: Version 455.23 or higher. Download from [Nvidia's website](https://www.nvidia.com/Download/index.aspx). +2. **Nvidia-Docker**: Allows Docker to interact with your local GPU. Installation instructions are available on the [Nvidia-Docker GitHub repository](https://github.com/NVIDIA/nvidia-docker). +3. **Docker Engine - CE**: Version 19.03 or higher. Download and installation instructions can be found on the [Docker website](https://docs.docker.com/install/). + +## Step 1: Pull the YOLOv5 Docker Image + +The Ultralytics YOLOv5 DockerHub repository is available at [https://hub.docker.com/r/ultralytics/yolov5](https://hub.docker.com/r/ultralytics/yolov5). Docker Autobuild ensures that the `ultralytics/yolov5:latest` image is always in sync with the most recent repository commit. To pull the latest image, run the following command: + +```bash +sudo docker pull ultralytics/yolov5:latest +``` + +## Step 2: Run the Docker Container + +### Basic container: + +Run an interactive instance of the YOLOv5 Docker image (called a "container") using the `-it` flag: + +```bash +sudo docker run --ipc=host -it ultralytics/yolov5:latest +``` + +### Container with local file access: + +To run a container with access to local files (e.g., COCO training data in `/datasets`), use the `-v` flag: + +```bash +sudo docker run --ipc=host -it -v "$(pwd)"/datasets:/usr/src/datasets ultralytics/yolov5:latest +``` + +### Container with GPU access: + +To run a container with GPU access, use the `--gpus all` flag: + +```bash +sudo docker run --ipc=host -it --gpus all ultralytics/yolov5:latest +``` + +## Step 3: Use YOLOv5 🚀 within the Docker Container + +Now you can train, test, detect, and export YOLOv5 models within the running Docker container: + +```bash +python train.py # train a model +python val.py --weights yolov5s.pt # validate a model for Precision, Recall, and mAP +python detect.py --weights yolov5s.pt --source path/to/images # run inference on images and videos +python export.py --weights yolov5s.pt --include onnx coreml tflite # export models to other formats +``` + +

GCP running Docker

diff --git a/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md b/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md new file mode 100644 index 0000000..38a54f1 --- /dev/null +++ b/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md @@ -0,0 +1,49 @@ +--- +comments: true +description: Step-by-step tutorial on how to set up and run YOLOv5 on Google Cloud Platform Deep Learning VM. Perfect guide for beginners and GCP new users!. +keywords: YOLOv5, Google Cloud Platform, GCP, Deep Learning VM, Ultralytics +--- + +# Run YOLOv5 🚀 on Google Cloud Platform (GCP) Deep Learning Virtual Machine (VM) ⭐ + +This tutorial will guide you through the process of setting up and running YOLOv5 on a GCP Deep Learning VM. New GCP users are eligible for a [$300 free credit offer](https://cloud.google.com/free/docs/gcp-free-tier#free-trial). + +You can also explore other quickstart options for YOLOv5, such as our [Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) Open In Colab Open In Kaggle, [Amazon AWS](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial) and our Docker image at [Docker Hub](https://hub.docker.com/r/ultralytics/yolov5) Docker Pulls. *Updated: 21 April 2023*. + +**Last Updated**: 6 May 2022 + +## Step 1: Create a Deep Learning VM + +1. Go to the [GCP marketplace](https://console.cloud.google.com/marketplace/details/click-to-deploy-images/deeplearning) and select a **Deep Learning VM**. +2. Choose an **n1-standard-8** instance (with 8 vCPUs and 30 GB memory). +3. Add a GPU of your choice. +4. Check 'Install NVIDIA GPU driver automatically on first startup?' +5. Select a 300 GB SSD Persistent Disk for sufficient I/O speed. +6. Click 'Deploy'. + +The preinstalled [Anaconda](https://docs.anaconda.com/anaconda/packages/pkg-docs/) Python environment includes all dependencies. + +GCP Marketplace + +## Step 2: Set Up the VM + +Clone the YOLOv5 repository and install the [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) will be downloaded automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +## Step 3: Run YOLOv5 🚀 on the VM + +You can now train, test, detect, and export YOLOv5 models on your VM: + +```bash +python train.py # train a model +python val.py --weights yolov5s.pt # validate a model for Precision, Recall, and mAP +python detect.py --weights yolov5s.pt --source path/to/images # run inference on images and videos +python export.py --weights yolov5s.pt --include onnx coreml tflite # export models to other formats +``` + +GCP terminal diff --git a/docs/en/yolov5/index.md b/docs/en/yolov5/index.md new file mode 100644 index 0000000..d9303fc --- /dev/null +++ b/docs/en/yolov5/index.md @@ -0,0 +1,83 @@ +--- +comments: true +description: Deep dive into Ultralytics' YOLOv5. Learn about object detection model - YOLOv5, how to train it on custom data, multi-GPU training and more. +keywords: Ultralytics, YOLOv5, Deep Learning, Object detection, PyTorch, Tutorial, Multi-GPU training, Custom data training +--- + +# Comprehensive Guide to Ultralytics YOLOv5 + +
+

+ + Ultralytics YOLOv5 v7.0 banner +

+ +YOLOv5 CI +YOLOv5 Citation +Docker Pulls +
+Run on Gradient +Open In Colab +Open In Kaggle +
+
+ +Welcome to the Ultralytics' YOLOv5🚀 Documentation! YOLOv5, the fifth iteration of the revolutionary "You Only Look Once" object detection model, is designed to deliver high-speed, high-accuracy results in real-time. +

+Built on PyTorch, this powerful deep learning framework has garnered immense popularity for its versatility, ease of use, and high performance. Our documentation guides you through the installation process, explains the architectural nuances of the model, showcases various use-cases, and provides a series of detailed tutorials. These resources will help you harness the full potential of YOLOv5 for your computer vision projects. Let's get started! + +
+ +## Tutorials + +Here's a compilation of comprehensive tutorials that will guide you through different aspects of YOLOv5. + +* [Train Custom Data](tutorials/train_custom_data.md) 🚀 RECOMMENDED: Learn how to train the YOLOv5 model on your custom dataset. +* [Tips for Best Training Results](tutorials/tips_for_best_training_results.md) ☘️: Uncover practical tips to optimize your model training process. +* [Multi-GPU Training](tutorials/multi_gpu_training.md): Understand how to leverage multiple GPUs to expedite your training. +* [PyTorch Hub](tutorials/pytorch_hub_model_loading.md) 🌟 NEW: Learn to load pre-trained models via PyTorch Hub. +* [TFLite, ONNX, CoreML, TensorRT Export](tutorials/model_export.md) 🚀: Understand how to export your model to different formats. +* [NVIDIA Jetson platform Deployment](tutorials/running_on_jetson_nano.md) 🌟 NEW: Learn how to deploy your YOLOv5 model on NVIDIA Jetson platform. +* [Test-Time Augmentation (TTA)](tutorials/test_time_augmentation.md): Explore how to use TTA to improve your model's prediction accuracy. +* [Model Ensembling](tutorials/model_ensembling.md): Learn the strategy of combining multiple models for improved performance. +* [Model Pruning/Sparsity](tutorials/model_pruning_and_sparsity.md): Understand pruning and sparsity concepts, and how to create a more efficient model. +* [Hyperparameter Evolution](tutorials/hyperparameter_evolution.md): Discover the process of automated hyperparameter tuning for better model performance. +* [Transfer Learning with Frozen Layers](tutorials/transfer_learning_with_frozen_layers.md): Learn how to implement transfer learning by freezing layers in YOLOv5. +* [Architecture Summary](tutorials/architecture_description.md) 🌟 Delve into the structural details of the YOLOv5 model. +* [Roboflow for Datasets](tutorials/roboflow_datasets_integration.md): Understand how to utilize Roboflow for dataset management, labeling, and active learning. +* [ClearML Logging](tutorials/clearml_logging_integration.md) 🌟 Learn how to integrate ClearML for efficient logging during your model training. +* [YOLOv5 with Neural Magic](tutorials/neural_magic_pruning_quantization.md) Discover how to use Neural Magic's Deepsparse to prune and quantize your YOLOv5 model. +* [Comet Logging](tutorials/comet_logging_integration.md) 🌟 NEW: Explore how to utilize Comet for improved model training logging. + +## Environments + +YOLOv5 is designed to be run in the following up-to-date, verified environments, with all dependencies (including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/)) pre-installed: + +- **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle +- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](environments/google_cloud_quickstart_tutorial.md) +- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](environments/aws_quickstart_tutorial.md) +- **Azure** Azure Machine Learning. See [AzureML Quickstart Guide](environments/azureml_quickstart_tutorial.md) +- **Docker Image**. See [Docker Quickstart Guide](environments/docker_image_quickstart_tutorial.md) Docker Pulls + +## Status + +YOLOv5 CI + +This badge signifies that all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify the correct operation of YOLOv5 [training](https://github.com/ultralytics/yolov5/blob/master/train.py), [validation](https://github.com/ultralytics/yolov5/blob/master/val.py), [inference](https://github.com/ultralytics/yolov5/blob/master/detect.py), [export](https://github.com/ultralytics/yolov5/blob/master/export.py) and [benchmarks](https://github.com/ultralytics/yolov5/blob/master/benchmarks.py) on macOS, Windows, and Ubuntu every 24 hours and with every new commit. + +
+
+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
diff --git a/docs/en/yolov5/quickstart_tutorial.md b/docs/en/yolov5/quickstart_tutorial.md new file mode 100644 index 0000000..90167c8 --- /dev/null +++ b/docs/en/yolov5/quickstart_tutorial.md @@ -0,0 +1,74 @@ +--- +comments: true +description: Kickstart your journey with YOLOv5. Learn how to install, run inference, and train models on your own images. Dive headfirst into object detection with PyTorch. +keywords: YOLOv5, Quickstart, Installation, Inference, Training, Object detection, PyTorch, Ultralytics +--- + +# YOLOv5 Quickstart + +See below for quickstart examples. + +## Install + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a +[**Python>=3.8.0**](https://www.python.org/) environment, including +[**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +## Inference + +YOLOv5 [PyTorch Hub](https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading) inference. [Models](https://github.com/ultralytics/yolov5/tree/master/models) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```python +import torch + +# Model +model = torch.hub.load("ultralytics/yolov5", "yolov5s") # or yolov5n - yolov5x6, custom + +# Images +img = "https://ultralytics.com/images/zidane.jpg" # or file, Path, PIL, OpenCV, numpy, list + +# Inference +results = model(img) + +# Results +results.print() # or .show(), .save(), .crop(), .pandas(), etc. +``` + +## Inference with detect.py + +`detect.py` runs inference on a variety of sources, downloading [models](https://github.com/ultralytics/yolov5/tree/master/models) automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases) and saving results to `runs/detect`. + +```bash +python detect.py --weights yolov5s.pt --source 0 # webcam + img.jpg # image + vid.mp4 # video + screen # screenshot + path/ # directory + list.txt # list of images + list.streams # list of streams + 'path/*.jpg' # glob + 'https://youtu.be/LNwODJXcvt4' # YouTube + 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream +``` + +## Training + +The commands below reproduce YOLOv5 [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) +results. [Models](https://github.com/ultralytics/yolov5/tree/master/models) +and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). Training times for YOLOv5n/s/m/l/x are 1/2/4/6/8 days on a V100 GPU ([Multi-GPU](https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training) times faster). Use the largest `--batch-size` possible, or pass `--batch-size -1` for YOLOv5 [AutoBatch](https://github.com/ultralytics/yolov5/pull/5092). Batch sizes shown for V100-16GB. + +```bash +python train.py --data coco.yaml --epochs 300 --weights '' --cfg yolov5n.yaml --batch-size 128 + yolov5s 64 + yolov5m 40 + yolov5l 24 + yolov5x 16 +``` + +YOLO training curves diff --git a/docs/en/yolov5/tutorials/architecture_description.md b/docs/en/yolov5/tutorials/architecture_description.md new file mode 100644 index 0000000..3c8c661 --- /dev/null +++ b/docs/en/yolov5/tutorials/architecture_description.md @@ -0,0 +1,223 @@ +--- +comments: true +description: Explore the architecture of YOLOv5, an object detection algorithm by Ultralytics. Understand the model structure, data augmentation methods, training strategies, and loss computation techniques. +keywords: Ultralytics, YOLOv5, Object Detection, Architecture, Model Structure, Data Augmentation, Training Strategies, Loss Computation +--- + +# Ultralytics YOLOv5 Architecture + +YOLOv5 (v6.0/6.1) is a powerful object detection algorithm developed by Ultralytics. This article dives deep into the YOLOv5 architecture, data augmentation strategies, training methodologies, and loss computation techniques. This comprehensive understanding will help improve your practical application of object detection in various fields, including surveillance, autonomous vehicles, and image recognition. + +## 1. Model Structure + +YOLOv5's architecture consists of three main parts: + +- **Backbone**: This is the main body of the network. For YOLOv5, the backbone is designed using the `New CSP-Darknet53` structure, a modification of the Darknet architecture used in previous versions. +- **Neck**: This part connects the backbone and the head. In YOLOv5, `SPPF` and `New CSP-PAN` structures are utilized. +- **Head**: This part is responsible for generating the final output. YOLOv5 uses the `YOLOv3 Head` for this purpose. + +The structure of the model is depicted in the image below. The model structure details can be found in `yolov5l.yaml`. + +![yolov5](https://user-images.githubusercontent.com/31005897/172404576-c260dcf9-76bb-4bc8-b6a9-f2d987792583.png) + +YOLOv5 introduces some minor changes compared to its predecessors: + +1. The `Focus` structure, found in earlier versions, is replaced with a `6x6 Conv2d` structure. This change boosts efficiency [#4825](https://github.com/ultralytics/yolov5/issues/4825). +2. The `SPP` structure is replaced with `SPPF`. This alteration more than doubles the speed of processing. + +To test the speed of `SPP` and `SPPF`, the following code can be used: + +
+SPP vs SPPF speed profiling example (click to open) + +```python +import time +import torch +import torch.nn as nn + + +class SPP(nn.Module): + def __init__(self): + super().__init__() + self.maxpool1 = nn.MaxPool2d(5, 1, padding=2) + self.maxpool2 = nn.MaxPool2d(9, 1, padding=4) + self.maxpool3 = nn.MaxPool2d(13, 1, padding=6) + + def forward(self, x): + o1 = self.maxpool1(x) + o2 = self.maxpool2(x) + o3 = self.maxpool3(x) + return torch.cat([x, o1, o2, o3], dim=1) + + +class SPPF(nn.Module): + def __init__(self): + super().__init__() + self.maxpool = nn.MaxPool2d(5, 1, padding=2) + + def forward(self, x): + o1 = self.maxpool(x) + o2 = self.maxpool(o1) + o3 = self.maxpool(o2) + return torch.cat([x, o1, o2, o3], dim=1) + + +def main(): + input_tensor = torch.rand(8, 32, 16, 16) + spp = SPP() + sppf = SPPF() + output1 = spp(input_tensor) + output2 = sppf(input_tensor) + + print(torch.equal(output1, output2)) + + t_start = time.time() + for _ in range(100): + spp(input_tensor) + print(f"SPP time: {time.time() - t_start}") + + t_start = time.time() + for _ in range(100): + sppf(input_tensor) + print(f"SPPF time: {time.time() - t_start}") + + +if __name__ == '__main__': + main() +``` + +result: + +``` +True +SPP time: 0.5373051166534424 +SPPF time: 0.20780706405639648 +``` + +
+ +## 2. Data Augmentation Techniques + +YOLOv5 employs various data augmentation techniques to improve the model's ability to generalize and reduce overfitting. These techniques include: + +- **Mosaic Augmentation**: An image processing technique that combines four training images into one in ways that encourage object detection models to better handle various object scales and translations. + + ![mosaic](https://user-images.githubusercontent.com/31005897/159109235-c7aad8f2-1d4f-41f9-8d5f-b2fde6f2885e.png) + +- **Copy-Paste Augmentation**: An innovative data augmentation method that copies random patches from an image and pastes them onto another randomly chosen image, effectively generating a new training sample. + + ![copy-paste](https://user-images.githubusercontent.com/31005897/159116277-91b45033-6bec-4f82-afc4-41138866628e.png) + +- **Random Affine Transformations**: This includes random rotation, scaling, translation, and shearing of the images. + + ![random-affine](https://user-images.githubusercontent.com/31005897/159109326-45cd5acb-14fa-43e7-9235-0f21b0021c7d.png) + +- **MixUp Augmentation**: A method that creates composite images by taking a linear combination of two images and their associated labels. + + ![mixup](https://user-images.githubusercontent.com/31005897/159109361-3b24333b-f481-478b-ae00-df7838f0b5cd.png) + +- **Albumentations**: A powerful library for image augmenting that supports a wide variety of augmentation techniques. +- **HSV Augmentation**: Random changes to the Hue, Saturation, and Value of the images. + + ![hsv](https://user-images.githubusercontent.com/31005897/159109407-83d100ba-1aba-4f4b-aa03-4f048f815981.png) + +- **Random Horizontal Flip**: An augmentation method that randomly flips images horizontally. + + ![horizontal-flip](https://user-images.githubusercontent.com/31005897/159109429-0d44619a-a76a-49eb-bfc0-6709860c043e.png) + +## 3. Training Strategies + +YOLOv5 applies several sophisticated training strategies to enhance the model's performance. They include: + +- **Multiscale Training**: The input images are randomly rescaled within a range of 0.5 to 1.5 times their original size during the training process. +- **AutoAnchor**: This strategy optimizes the prior anchor boxes to match the statistical characteristics of the ground truth boxes in your custom data. +- **Warmup and Cosine LR Scheduler**: A method to adjust the learning rate to enhance model performance. +- **Exponential Moving Average (EMA)**: A strategy that uses the average of parameters over past steps to stabilize the training process and reduce generalization error. +- **Mixed Precision Training**: A method to perform operations in half-precision format, reducing memory usage and enhancing computational speed. +- **Hyperparameter Evolution**: A strategy to automatically tune hyperparameters to achieve optimal performance. + +## 4. Additional Features + +### 4.1 Compute Losses + +The loss in YOLOv5 is computed as a combination of three individual loss components: + +- **Classes Loss (BCE Loss)**: Binary Cross-Entropy loss, measures the error for the classification task. +- **Objectness Loss (BCE Loss)**: Another Binary Cross-Entropy loss, calculates the error in detecting whether an object is present in a particular grid cell or not. +- **Location Loss (CIoU Loss)**: Complete IoU loss, measures the error in localizing the object within the grid cell. + +The overall loss function is depicted by: + +![loss](https://latex.codecogs.com/svg.image?Loss=\lambda_1L_{cls}+\lambda_2L_{obj}+\lambda_3L_{loc}) + +### 4.2 Balance Losses + +The objectness losses of the three prediction layers (`P3`, `P4`, `P5`) are weighted differently. The balance weights are `[4.0, 1.0, 0.4]` respectively. This approach ensures that the predictions at different scales contribute appropriately to the total loss. + +![obj_loss](https://latex.codecogs.com/svg.image?L_{obj}=4.0\cdot&space;L_{obj}^{small}+1.0\cdot&space;L_{obj}^{medium}+0.4\cdot&space;L_{obj}^{large}) + +### 4.3 Eliminate Grid Sensitivity + +The YOLOv5 architecture makes some important changes to the box prediction strategy compared to earlier versions of YOLO. In YOLOv2 and YOLOv3, the box coordinates were directly predicted using the activation of the last layer. + +![b_x](https://latex.codecogs.com/svg.image?b_x=\sigma(t_x)+c_x) +![b_y](https://latex.codecogs.com/svg.image?b_y=\sigma(t_y)+c_y) +![b_w](https://latex.codecogs.com/svg.image?b_w=p_w\cdot&space;e^{t_w}) +![b_h](https://latex.codecogs.com/svg.image?b_h=p_h\cdot&space;e^{t_h}) + +YOLOv5 grid computation + +However, in YOLOv5, the formula for predicting the box coordinates has been updated to reduce grid sensitivity and prevent the model from predicting unbounded box dimensions. + +The revised formulas for calculating the predicted bounding box are as follows: + +![bx](https://latex.codecogs.com/svg.image?b_x=(2\cdot\sigma(t_x)-0.5)+c_x) +![by](https://latex.codecogs.com/svg.image?b_y=(2\cdot\sigma(t_y)-0.5)+c_y) +![bw](https://latex.codecogs.com/svg.image?b_w=p_w\cdot(2\cdot\sigma(t_w))^2) +![bh](https://latex.codecogs.com/svg.image?b_h=p_h\cdot(2\cdot\sigma(t_h))^2) + +Compare the center point offset before and after scaling. The center point offset range is adjusted from (0, 1) to (-0.5, 1.5). Therefore, offset can easily get 0 or 1. + +YOLOv5 grid scaling + +Compare the height and width scaling ratio(relative to anchor) before and after adjustment. The original yolo/darknet box equations have a serious flaw. Width and Height are completely unbounded as they are simply out=exp(in), which is dangerous, as it can lead to runaway gradients, instabilities, NaN losses and ultimately a complete loss of training. [refer this issue](https://github.com/ultralytics/yolov5/issues/471#issuecomment-662009779) + +YOLOv5 unbounded scaling + +### 4.4 Build Targets + +The build target process in YOLOv5 is critical for training efficiency and model accuracy. It involves assigning ground truth boxes to the appropriate grid cells in the output map and matching them with the appropriate anchor boxes. + +This process follows these steps: + +- Calculate the ratio of the ground truth box dimensions and the dimensions of each anchor template. + +![rw](https://latex.codecogs.com/svg.image?r_w=w_{gt}/w_{at}) + +![rh](https://latex.codecogs.com/svg.image?r_h=h_{gt}/h_{at}) + +![rwmax](https://latex.codecogs.com/svg.image?r_w^{max}=max(r_w,1/r_w)) + +![rhmax](https://latex.codecogs.com/svg.image?r_h^{max}=max(r_h,1/r_h)) + +![rmax](https://latex.codecogs.com/svg.image?r^{max}=max(r_w^{max},r_h^{max})) + +![match](https://latex.codecogs.com/svg.image?r^{max}<{\rm&space;anchor_t}) + +YOLOv5 IoU computation + +- If the calculated ratio is within the threshold, match the ground truth box with the corresponding anchor. + +YOLOv5 grid overlap + +- Assign the matched anchor to the appropriate cells, keeping in mind that due to the revised center point offset, a ground truth box can be assigned to more than one anchor. Because the center point offset range is adjusted from (0, 1) to (-0.5, 1.5). GT Box can be assigned to more anchors. + +YOLOv5 anchor selection + +This way, the build targets process ensures that each ground truth object is properly assigned and matched during the training process, allowing YOLOv5 to learn the task of object detection more effectively. + +## Conclusion + +In conclusion, YOLOv5 represents a significant step forward in the development of real-time object detection models. By incorporating various new features, enhancements, and training strategies, it surpasses previous versions of the YOLO family in performance and efficiency. + +The primary enhancements in YOLOv5 include the use of a dynamic architecture, an extensive range of data augmentation techniques, innovative training strategies, as well as important adjustments in computing losses and the process of building targets. All these innovations significantly improve the accuracy and efficiency of object detection while retaining a high degree of speed, which is the trademark of YOLO models. diff --git a/docs/en/yolov5/tutorials/clearml_logging_integration.md b/docs/en/yolov5/tutorials/clearml_logging_integration.md new file mode 100644 index 0000000..056f30c --- /dev/null +++ b/docs/en/yolov5/tutorials/clearml_logging_integration.md @@ -0,0 +1,240 @@ +--- +comments: true +description: Learn how ClearML can enhance your YOLOv5 pipeline – track your training runs, version your data, remotely monitor your models and optimize performance. +keywords: ClearML, YOLOv5, Ultralytics, AI toolbox, training data, remote training, hyperparameter optimization, YOLOv5 model +--- + +# ClearML Integration + +Clear|MLClear|ML + +## About ClearML + +[ClearML](https://cutt.ly/yolov5-tutorial-clearml) is an [open-source](https://github.com/allegroai/clearml) toolbox designed to save you time ⏱️. + +🔨 Track every YOLOv5 training run in the experiment manager + +🔧 Version and easily access your custom training data with the integrated ClearML Data Versioning Tool + +🔦 Remotely train and monitor your YOLOv5 training runs using ClearML Agent + +🔬 Get the very best mAP using ClearML Hyperparameter Optimization + +🔭 Turn your newly trained YOLOv5 model into an API with just a few commands using ClearML Serving + +
+And so much more. It's up to you how many of these tools you want to use, you can stick to the experiment manager, or chain them all together into an impressive pipeline! +
+
+ +![ClearML scalars dashboard](https://github.com/thepycoder/clearml_screenshots/raw/main/experiment_manager_with_compare.gif) + +
+
+ +## 🦾 Setting Things Up + +To keep track of your experiments and/or data, ClearML needs to communicate to a server. You have 2 options to get one: + +Either sign up for free to the [ClearML Hosted Service](https://cutt.ly/yolov5-tutorial-clearml) or you can set up your own server, see [here](https://clear.ml/docs/latest/docs/deploying_clearml/clearml_server). Even the server is open-source, so even if you're dealing with sensitive data, you should be good to go! + +1. Install the `clearml` python package: + + ```bash + pip install clearml + ``` + +2. Connect the ClearML SDK to the server by [creating credentials](https://app.clear.ml/settings/workspace-configuration) (go right top to Settings -> Workspace -> Create new credentials), then execute the command below and follow the instructions: + + ```bash + clearml-init + ``` + +That's it! You're done 😎 + +
+ +## 🚀 Training YOLOv5 With ClearML + +To enable ClearML experiment tracking, simply install the ClearML pip package. + +```bash +pip install clearml>=1.2.0 +``` + +This will enable integration with the YOLOv5 training script. Every training run from now on, will be captured and stored by the ClearML experiment manager. + +If you want to change the `project_name` or `task_name`, use the `--project` and `--name` arguments of the `train.py` script, by default the project will be called `YOLOv5` and the task `Training`. PLEASE NOTE: ClearML uses `/` as a delimiter for subprojects, so be careful when using `/` in your project name! + +```bash +python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --cache +``` + +or with custom project and task name: + +```bash +python train.py --project my_project --name my_training --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --cache +``` + +This will capture: + +- Source code + uncommitted changes +- Installed packages +- (Hyper)parameters +- Model files (use `--save-period n` to save a checkpoint every n epochs) +- Console output +- Scalars (mAP_0.5, mAP_0.5:0.95, precision, recall, losses, learning rates, ...) +- General info such as machine details, runtime, creation date etc. +- All produced plots such as label correlogram and confusion matrix +- Images with bounding boxes per epoch +- Mosaic per epoch +- Validation images per epoch +- ... + +That's a lot right? 🤯 Now, we can visualize all of this information in the ClearML UI to get an overview of our training progress. Add custom columns to the table view (such as e.g. mAP_0.5) so you can easily sort on the best performing model. Or select multiple experiments and directly compare them! + +There even more we can do with all of this information, like hyperparameter optimization and remote execution, so keep reading if you want to see how that works! + +
+ +## 🔗 Dataset Version Management + +Versioning your data separately from your code is generally a good idea and makes it easy to acquire the latest version too. This repository supports supplying a dataset version ID, and it will make sure to get the data if it's not there yet. Next to that, this workflow also saves the used dataset ID as part of the task parameters, so you will always know for sure which data was used in which experiment! + +![ClearML Dataset Interface](https://github.com/thepycoder/clearml_screenshots/raw/main/clearml_data.gif) + +### Prepare Your Dataset + +The YOLOv5 repository supports a number of different datasets by using YAML files containing their information. By default datasets are downloaded to the `../datasets` folder in relation to the repository root folder. So if you downloaded the `coco128` dataset using the link in the YAML or with the scripts provided by yolov5, you get this folder structure: + +``` +.. +|_ yolov5 +|_ datasets + |_ coco128 + |_ images + |_ labels + |_ LICENSE + |_ README.txt +``` + +But this can be any dataset you wish. Feel free to use your own, as long as you keep to this folder structure. + +Next, ⚠️**copy the corresponding YAML file to the root of the dataset folder**⚠️. This YAML files contains the information ClearML will need to properly use the dataset. You can make this yourself too, of course, just follow the structure of the example YAMLs. + +Basically we need the following keys: `path`, `train`, `test`, `val`, `nc`, `names`. + +``` +.. +|_ yolov5 +|_ datasets + |_ coco128 + |_ images + |_ labels + |_ coco128.yaml # <---- HERE! + |_ LICENSE + |_ README.txt +``` + +### Upload Your Dataset + +To get this dataset into ClearML as a versioned dataset, go to the dataset root folder and run the following command: + +```bash +cd coco128 +clearml-data sync --project YOLOv5 --name coco128 --folder . +``` + +The command `clearml-data sync` is actually a shorthand command. You could also run these commands one after the other: + +```bash +# Optionally add --parent if you want to base +# this version on another dataset version, so no duplicate files are uploaded! +clearml-data create --name coco128 --project YOLOv5 +clearml-data add --files . +clearml-data close +``` + +### Run Training Using A ClearML Dataset + +Now that you have a ClearML dataset, you can very simply use it to train custom YOLOv5 🚀 models! + +```bash +python train.py --img 640 --batch 16 --epochs 3 --data clearml:// --weights yolov5s.pt --cache +``` + +
+ +## 👀 Hyperparameter Optimization + +Now that we have our experiments and data versioned, it's time to take a look at what we can build on top! + +Using the code information, installed packages and environment details, the experiment itself is now **completely reproducible**. In fact, ClearML allows you to clone an experiment and even change its parameters. We can then just rerun it with these new parameters automatically, this is basically what HPO does! + +To **run hyperparameter optimization locally**, we've included a pre-made script for you. Just make sure a training task has been run at least once, so it is in the ClearML experiment manager, we will essentially clone it and change its hyperparameters. + +You'll need to fill in the ID of this `template task` in the script found at `utils/loggers/clearml/hpo.py` and then just run it :) You can change `task.execute_locally()` to `task.execute()` to put it in a ClearML queue and have a remote agent work on it instead. + +```bash +# To use optuna, install it first, otherwise you can change the optimizer to just be RandomSearch +pip install optuna +python utils/loggers/clearml/hpo.py +``` + +![HPO](https://github.com/thepycoder/clearml_screenshots/raw/main/hpo.png) + +## 🤯 Remote Execution (advanced) + +Running HPO locally is really handy, but what if we want to run our experiments on a remote machine instead? Maybe you have access to a very powerful GPU machine on-site, or you have some budget to use cloud GPUs. This is where the ClearML Agent comes into play. Check out what the agent can do here: + +- [YouTube video](https://youtu.be/MX3BrXnaULs) +- [Documentation](https://clear.ml/docs/latest/docs/clearml_agent) + +In short: every experiment tracked by the experiment manager contains enough information to reproduce it on a different machine (installed packages, uncommitted changes etc.). So a ClearML agent does just that: it listens to a queue for incoming tasks and when it finds one, it recreates the environment and runs it while still reporting scalars, plots etc. to the experiment manager. + +You can turn any machine (a cloud VM, a local GPU machine, your own laptop ... ) into a ClearML agent by simply running: + +```bash +clearml-agent daemon --queue [--docker] +``` + +### Cloning, Editing And Enqueuing + +With our agent running, we can give it some work. Remember from the HPO section that we can clone a task and edit the hyperparameters? We can do that from the interface too! + +🪄 Clone the experiment by right-clicking it + +🎯 Edit the hyperparameters to what you wish them to be + +⏳ Enqueue the task to any of the queues by right-clicking it + +![Enqueue a task from the UI](https://github.com/thepycoder/clearml_screenshots/raw/main/enqueue.gif) + +### Executing A Task Remotely + +Now you can clone a task like we explained above, or simply mark your current script by adding `task.execute_remotely()` and on execution it will be put into a queue, for the agent to start working on! + +To run the YOLOv5 training script remotely, all you have to do is add this line to the training.py script after the clearml logger has been instantiated: + +```python +# ... +# Loggers +data_dict = None +if RANK in {-1, 0}: + loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance + if loggers.clearml: + loggers.clearml.task.execute_remotely(queue="my_queue") # <------ ADD THIS LINE + # Data_dict is either None is user did not choose for ClearML dataset or is filled in by ClearML + data_dict = loggers.clearml.data_dict +# ... +``` + +When running the training script after this change, python will run the script up until that line, after which it will package the code and send it to the queue instead! + +### Autoscaling workers + +ClearML comes with autoscalers too! This tool will automatically spin up new remote machines in the cloud of your choice (AWS, GCP, Azure) and turn them into ClearML agents for you whenever there are experiments detected in the queue. Once the tasks are processed, the autoscaler will automatically shut down the remote machines, and you stop paying! + +Check out the autoscalers getting started video below. + +[![Watch the video](https://img.youtube.com/vi/j4XVMAaUt3E/0.jpg)](https://youtu.be/j4XVMAaUt3E) diff --git a/docs/en/yolov5/tutorials/comet_logging_integration.md b/docs/en/yolov5/tutorials/comet_logging_integration.md new file mode 100644 index 0000000..d66ee68 --- /dev/null +++ b/docs/en/yolov5/tutorials/comet_logging_integration.md @@ -0,0 +1,261 @@ +--- +comments: true +description: Learn how to set up and use Comet to enhance your YOLOv5 model training, metrics tracking and visualization. Includes a step by step guide to integrate Comet with YOLOv5. +keywords: YOLOv5, Comet, Machine Learning, Ultralytics, Real time metrics tracking, Hyperparameters, Model checkpoints, Model predictions, YOLOv5 training, Comet Credentials +--- + +![Comet](https://cdn.comet.ml/img/notebook_logo.png) + +# YOLOv5 with Comet + +This guide will cover how to use YOLOv5 with [Comet](https://bit.ly/yolov5-readme-comet2) + +# About Comet + +Comet builds tools that help data scientists, engineers, and team leaders accelerate and optimize machine learning and deep learning models. + +Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://www.comet.com/docs/v2/guides/comet-dashboard/code-panels/about-panels/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=github)! +Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes! + +# Getting Started + +## Install Comet + +```shell +pip install comet_ml +``` + +## Configure Comet Credentials + +There are two ways to configure Comet with YOLOv5. + +You can either set your credentials through environment variables + +**Environment Variables** + +```shell +export COMET_API_KEY= +export COMET_PROJECT_NAME= # This will default to 'yolov5' +``` + +Or create a `.comet.config` file in your working directory and set your credentials there. + +**Comet Configuration File** + +``` +[comet] +api_key= +project_name= # This will default to 'yolov5' +``` + +## Run the Training Script + +```shell +# Train YOLOv5s on COCO128 for 5 epochs +python train.py --img 640 --batch 16 --epochs 5 --data coco128.yaml --weights yolov5s.pt +``` + +That's it! Comet will automatically log your hyperparameters, command line arguments, training and validation metrics. You can visualize and analyze your runs in the Comet UI + +yolo-ui + +# Try out an Example! + +Check out an example of a [completed run here](https://www.comet.com/examples/comet-example-yolov5/a0e29e0e9b984e4a822db2a62d0cb357?experiment-tab=chart&showOutliers=true&smoothing=0&transformY=smoothing&xAxis=step&utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=github) + +Or better yet, try it out yourself in this Colab Notebook + +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing) + +# Log automatically + +By default, Comet will log the following items + +## Metrics + +- Box Loss, Object Loss, Classification Loss for the training and validation data +- mAP_0.5, mAP_0.5:0.95 metrics for the validation data. +- Precision and Recall for the validation data + +## Parameters + +- Model Hyperparameters +- All parameters passed through the command line options + +## Visualizations + +- Confusion Matrix of the model predictions on the validation data +- Plots for the PR and F1 curves across all classes +- Correlogram of the Class Labels + +# Configure Comet Logging + +Comet can be configured to log additional data either through command line flags passed to the training script or through environment variables. + +```shell +export COMET_MODE=online # Set whether to run Comet in 'online' or 'offline' mode. Defaults to online +export COMET_MODEL_NAME= #Set the name for the saved model. Defaults to yolov5 +export COMET_LOG_CONFUSION_MATRIX=false # Set to disable logging a Comet Confusion Matrix. Defaults to true +export COMET_MAX_IMAGE_UPLOADS= # Controls how many total image predictions to log to Comet. Defaults to 100. +export COMET_LOG_PER_CLASS_METRICS=true # Set to log evaluation metrics for each detected class at the end of training. Defaults to false +export COMET_DEFAULT_CHECKPOINT_FILENAME= # Set this if you would like to resume training from a different checkpoint. Defaults to 'last.pt' +export COMET_LOG_BATCH_LEVEL_METRICS=true # Set this if you would like to log training metrics at the batch level. Defaults to false. +export COMET_LOG_PREDICTIONS=true # Set this to false to disable logging model predictions +``` + +## Logging Checkpoints with Comet + +Logging Models to Comet is disabled by default. To enable it, pass the `save-period` argument to the training script. This will save the logged checkpoints to Comet based on the interval value provided by `save-period` + +```shell +python train.py \ +--img 640 \ +--batch 16 \ +--epochs 5 \ +--data coco128.yaml \ +--weights yolov5s.pt \ +--save-period 1 +``` + +## Logging Model Predictions + +By default, model predictions (images, ground truth labels and bounding boxes) will be logged to Comet. + +You can control the frequency of logged predictions and the associated images by passing the `bbox_interval` command line argument. Predictions can be visualized using Comet's Object Detection Custom Panel. This frequency corresponds to every Nth batch of data per epoch. In the example below, we are logging every 2nd batch of data for each epoch. + +**Note:** The YOLOv5 validation dataloader will default to a batch size of 32, so you will have to set the logging frequency accordingly. + +Here is an [example project using the Panel](https://www.comet.com/examples/comet-example-yolov5?shareable=YcwMiJaZSXfcEXpGOHDD12vA1&utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=github) + +```shell +python train.py \ +--img 640 \ +--batch 16 \ +--epochs 5 \ +--data coco128.yaml \ +--weights yolov5s.pt \ +--bbox_interval 2 +``` + +### Controlling the number of Prediction Images logged to Comet + +When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable. + +```shell +env COMET_MAX_IMAGE_UPLOADS=200 python train.py \ +--img 640 \ +--batch 16 \ +--epochs 5 \ +--data coco128.yaml \ +--weights yolov5s.pt \ +--bbox_interval 1 +``` + +### Logging Class Level Metrics + +Use the `COMET_LOG_PER_CLASS_METRICS` environment variable to log mAP, precision, recall, f1 for each class. + +```shell +env COMET_LOG_PER_CLASS_METRICS=true python train.py \ +--img 640 \ +--batch 16 \ +--epochs 5 \ +--data coco128.yaml \ +--weights yolov5s.pt +``` + +## Uploading a Dataset to Comet Artifacts + +If you would like to store your data using [Comet Artifacts](https://www.comet.com/docs/v2/guides/data-management/using-artifacts/#learn-more?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=github), you can do so using the `upload_dataset` flag. + +The dataset be organized in the way described in the [YOLOv5 documentation](train_custom_data.md). The dataset config `yaml` file must follow the same format as that of the `coco128.yaml` file. + +```shell +python train.py \ +--img 640 \ +--batch 16 \ +--epochs 5 \ +--data coco128.yaml \ +--weights yolov5s.pt \ +--upload_dataset +``` + +You can find the uploaded dataset in the Artifacts tab in your Comet Workspace +artifact-1 + +You can preview the data directly in the Comet UI. +artifact-2 + +Artifacts are versioned and also support adding metadata about the dataset. Comet will automatically log the metadata from your dataset `yaml` file +artifact-3 + +### Using a saved Artifact + +If you would like to use a dataset from Comet Artifacts, set the `path` variable in your dataset `yaml` file to point to the following Artifact resource URL. + +``` +# contents of artifact.yaml file +path: "comet:///:" +``` + +Then pass this file to your training script in the following way + +```shell +python train.py \ +--img 640 \ +--batch 16 \ +--epochs 5 \ +--data artifact.yaml \ +--weights yolov5s.pt +``` + +Artifacts also allow you to track the lineage of data as it flows through your Experimentation workflow. Here you can see a graph that shows you all the experiments that have used your uploaded dataset. +artifact-4 + +## Resuming a Training Run + +If your training run is interrupted for any reason, e.g. disrupted internet connection, you can resume the run using the `resume` flag and the Comet Run Path. + +The Run Path has the following format `comet:////`. + +This will restore the run to its state before the interruption, which includes restoring the model from a checkpoint, restoring all hyperparameters and training arguments and downloading Comet dataset Artifacts if they were used in the original run. The resumed run will continue logging to the existing Experiment in the Comet UI + +```shell +python train.py \ +--resume "comet://" +``` + +## Hyperparameter Search with the Comet Optimizer + +YOLOv5 is also integrated with Comet's Optimizer, making is simple to visualize hyperparameter sweeps in the Comet UI. + +### Configuring an Optimizer Sweep + +To configure the Comet Optimizer, you will have to create a JSON file with the information about the sweep. An example file has been provided in `utils/loggers/comet/optimizer_config.json` + +```shell +python utils/loggers/comet/hpo.py \ + --comet_optimizer_config "utils/loggers/comet/optimizer_config.json" +``` + +The `hpo.py` script accepts the same arguments as `train.py`. If you wish to pass additional arguments to your sweep simply add them after the script. + +```shell +python utils/loggers/comet/hpo.py \ + --comet_optimizer_config "utils/loggers/comet/optimizer_config.json" \ + --save-period 1 \ + --bbox_interval 1 +``` + +### Running a Sweep in Parallel + +```shell +comet optimizer -j utils/loggers/comet/hpo.py \ + utils/loggers/comet/optimizer_config.json" +``` + +### Visualizing Results + +Comet provides a number of ways to visualize the results of your sweep. Take a look at a [project with a completed sweep here](https://www.comet.com/examples/comet-example-yolov5/view/PrlArHGuuhDTKC1UuBmTtOSXD/panels?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=github) + +hyperparameter-yolo diff --git a/docs/en/yolov5/tutorials/hyperparameter_evolution.md b/docs/en/yolov5/tutorials/hyperparameter_evolution.md new file mode 100644 index 0000000..6ca148a --- /dev/null +++ b/docs/en/yolov5/tutorials/hyperparameter_evolution.md @@ -0,0 +1,165 @@ +--- +comments: true +description: Learn how to optimize YOLOv5 with hyperparameter evolution using Genetic Algorithm. This guide provides steps to initialize, define, evolve and visualize hyperparameters for top performance. +keywords: Ultralytics, YOLOv5, Hyperparameter Optimization, Genetic Algorithm, Machine Learning, Deep Learning, AI, Object Detection, Image Classification, Python +--- + +📚 This guide explains **hyperparameter evolution** for YOLOv5 🚀. Hyperparameter evolution is a method of [Hyperparameter Optimization](https://en.wikipedia.org/wiki/Hyperparameter_optimization) using a [Genetic Algorithm](https://en.wikipedia.org/wiki/Genetic_algorithm) (GA) for optimization. UPDATED 25 September 2022. + +Hyperparameters in ML control various aspects of training, and finding optimal values for them can be a challenge. Traditional methods like grid searches can quickly become intractable due to 1) the high dimensional search space 2) unknown correlations among the dimensions, and 3) expensive nature of evaluating the fitness at each point, making GA a suitable candidate for hyperparameter searches. + +## Before You Start + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +## 1. Initialize Hyperparameters + +YOLOv5 has about 30 hyperparameters used for various training settings. These are defined in `*.yaml` files in the `/data/hyps` directory. Better initial guesses will produce better final results, so it is important to initialize these values properly before evolving. If in doubt, simply use the default values, which are optimized for YOLOv5 COCO training from scratch. + +```yaml +# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license +# Hyperparameters for low-augmentation COCO training from scratch +# python train.py --batch 64 --cfg yolov5n6.yaml --weights '' --data coco.yaml --img 640 --epochs 300 --linear +# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials + +lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.0005 # optimizer weight decay 5e-4 +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.1 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +# anchors: 3 # anchors per output layer (0 to ignore) +fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.015 # image HSV-Hue augmentation (fraction) +hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.4 # image HSV-Value augmentation (fraction) +degrees: 0.0 # image rotation (+/- deg) +translate: 0.1 # image translation (+/- fraction) +scale: 0.5 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.0 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 1.0 # image mosaic (probability) +mixup: 0.0 # image mixup (probability) +copy_paste: 0.0 # segment copy-paste (probability) +``` + +## 2. Define Fitness + +Fitness is the value we seek to maximize. In YOLOv5 we define a default fitness function as a weighted combination of metrics: `mAP@0.5` contributes 10% of the weight and `mAP@0.5:0.95` contributes the remaining 90%, with [Precision `P` and Recall `R`](https://en.wikipedia.org/wiki/Precision_and_recall) absent. You may adjust these as you see fit or use the default fitness definition in utils/metrics.py (recommended). + +```python +def fitness(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) +``` + +## 3. Evolve + +Evolution is performed about a base scenario which we seek to improve upon. The base scenario in this example is finetuning COCO128 for 10 epochs using pretrained YOLOv5s. The base scenario training command is: + +```bash +python train.py --epochs 10 --data coco128.yaml --weights yolov5s.pt --cache +``` + +To evolve hyperparameters **specific to this scenario**, starting from our initial values defined in **Section 1.**, and maximizing the fitness defined in **Section 2.**, append `--evolve`: + +```bash +# Single-GPU +python train.py --epochs 10 --data coco128.yaml --weights yolov5s.pt --cache --evolve + +# Multi-GPU +for i in 0 1 2 3 4 5 6 7; do + sleep $(expr 30 \* $i) && # 30-second delay (optional) + echo 'Starting GPU '$i'...' && + nohup python train.py --epochs 10 --data coco128.yaml --weights yolov5s.pt --cache --device $i --evolve > evolve_gpu_$i.log & +done + +# Multi-GPU bash-while (not recommended) +for i in 0 1 2 3 4 5 6 7; do + sleep $(expr 30 \* $i) && # 30-second delay (optional) + echo 'Starting GPU '$i'...' && + "$(while true; do nohup python train.py... --device $i --evolve 1 > evolve_gpu_$i.log; done)" & +done +``` + +The default evolution settings will run the base scenario 300 times, i.e. for 300 generations. You can modify generations via the `--evolve` argument, i.e. `python train.py --evolve 1000`. + +The main genetic operators are **crossover** and **mutation**. In this work mutation is used, with an 80% probability and a 0.04 variance to create new offspring based on a combination of the best parents from all previous generations. Results are logged to `runs/evolve/exp/evolve.csv`, and the highest fitness offspring is saved every generation as `runs/evolve/hyp_evolved.yaml`: + +```yaml +# YOLOv5 Hyperparameter Evolution Results +# Best generation: 287 +# Last generation: 300 +# metrics/precision, metrics/recall, metrics/mAP_0.5, metrics/mAP_0.5:0.95, val/box_loss, val/obj_loss, val/cls_loss +# 0.54634, 0.55625, 0.58201, 0.33665, 0.056451, 0.042892, 0.013441 + +lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.0005 # optimizer weight decay 5e-4 +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.1 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +# anchors: 3 # anchors per output layer (0 to ignore) +fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.015 # image HSV-Hue augmentation (fraction) +hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.4 # image HSV-Value augmentation (fraction) +degrees: 0.0 # image rotation (+/- deg) +translate: 0.1 # image translation (+/- fraction) +scale: 0.5 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.0 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 1.0 # image mosaic (probability) +mixup: 0.0 # image mixup (probability) +copy_paste: 0.0 # segment copy-paste (probability) +``` + +We recommend a minimum of 300 generations of evolution for best results. Note that **evolution is generally expensive and time-consuming**, as the base scenario is trained hundreds of times, possibly requiring hundreds or thousands of GPU hours. + +## 4. Visualize + +`evolve.csv` is plotted as `evolve.png` by `utils.plots.plot_evolve()` after evolution finishes with one subplot per hyperparameter showing fitness (y-axis) vs hyperparameter values (x-axis). Yellow indicates higher concentrations. Vertical distributions indicate that a parameter has been disabled and does not mutate. This is user selectable in the `meta` dictionary in train.py, and is useful for fixing parameters and preventing them from evolving. + +![evolve](https://user-images.githubusercontent.com/26833433/89130469-f43e8e00-d4b9-11ea-9e28-f8ae3622516d.png) + +## Environments + +YOLOv5 is designed to be run in the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): + +- **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle +- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) +- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) +- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls + +## Status + +YOLOv5 CI + +If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 [training](https://github.com/ultralytics/yolov5/blob/master/train.py), [validation](https://github.com/ultralytics/yolov5/blob/master/val.py), [inference](https://github.com/ultralytics/yolov5/blob/master/detect.py), [export](https://github.com/ultralytics/yolov5/blob/master/export.py) and [benchmarks](https://github.com/ultralytics/yolov5/blob/master/benchmarks.py) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/docs/en/yolov5/tutorials/model_ensembling.md b/docs/en/yolov5/tutorials/model_ensembling.md new file mode 100644 index 0000000..e7e1200 --- /dev/null +++ b/docs/en/yolov5/tutorials/model_ensembling.md @@ -0,0 +1,145 @@ +--- +comments: true +description: Learn how to ensemble YOLOv5 models for improved mAP and Recall! Clone the repo, install requirements, and start testing and inference. +keywords: YOLOv5, object detection, ensemble learning, mAP, Recall +--- + +📚 This guide explains how to use YOLOv5 🚀 **model ensembling** during testing and inference for improved mAP and Recall. UPDATED 25 September 2022. + +From [https://en.wikipedia.org/wiki/Ensemble_learning](https://en.wikipedia.org/wiki/Ensemble_learning): +> Ensemble modeling is a process where multiple diverse models are created to predict an outcome, either by using many different modeling algorithms or using different training data sets. The ensemble model then aggregates the prediction of each base model and results in once final prediction for the unseen data. The motivation for using ensemble models is to reduce the generalization error of the prediction. As long as the base models are diverse and independent, the prediction error of the model decreases when the ensemble approach is used. The approach seeks the wisdom of crowds in making a prediction. Even though the ensemble model has multiple base models within the model, it acts and performs as a single model. + +## Before You Start + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +## Test Normally + +Before ensembling we want to establish the baseline performance of a single model. This command tests YOLOv5x on COCO val2017 at image size 640 pixels. `yolov5x.pt` is the largest and most accurate model available. Other options are `yolov5s.pt`, `yolov5m.pt` and `yolov5l.pt`, or you own checkpoint from training a custom dataset `./weights/best.pt`. For details on all available models please see our README [table](https://github.com/ultralytics/yolov5#pretrained-checkpoints). + +```bash +python val.py --weights yolov5x.pt --data coco.yaml --img 640 --half +``` + +Output: + +```shell +val: data=./data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True +YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) + +Fusing layers... +Model Summary: 476 layers, 87730285 parameters, 0 gradients + +val: Scanning '../datasets/coco/val2017' images and labels...4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:01<00:00, 2846.03it/s] +val: New cache created: ../datasets/coco/val2017.cache + Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 157/157 [02:30<00:00, 1.05it/s] + all 5000 36335 0.746 0.626 0.68 0.49 +Speed: 0.1ms pre-process, 22.4ms inference, 1.4ms NMS per image at shape (32, 3, 640, 640) # <--- baseline speed + +Evaluating pycocotools mAP... saving runs/val/exp/yolov5x_predictions.json... +... + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.504 # <--- baseline mAP + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.688 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.546 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.351 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.551 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.644 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.382 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.628 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.681 # <--- baseline mAR + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.524 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.735 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.826 +``` + +## Ensemble Test + +Multiple pretrained models may be ensembled together at test and inference time by simply appending extra models to the `--weights` argument in any existing val.py or detect.py command. This example tests an ensemble of 2 models together: + +- YOLOv5x +- YOLOv5l6 + +```bash +python val.py --weights yolov5x.pt yolov5l6.pt --data coco.yaml --img 640 --half +``` + +Output: + +```shell +val: data=./data/coco.yaml, weights=['yolov5x.pt', 'yolov5l6.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True +YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) + +Fusing layers... +Model Summary: 476 layers, 87730285 parameters, 0 gradients # Model 1 +Fusing layers... +Model Summary: 501 layers, 77218620 parameters, 0 gradients # Model 2 +Ensemble created with ['yolov5x.pt', 'yolov5l6.pt'] # Ensemble notice + +val: Scanning '../datasets/coco/val2017.cache' images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:00<00:00, 49695545.02it/s] + Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 157/157 [03:58<00:00, 1.52s/it] + all 5000 36335 0.747 0.637 0.692 0.502 +Speed: 0.1ms pre-process, 39.5ms inference, 2.0ms NMS per image at shape (32, 3, 640, 640) # <--- ensemble speed + +Evaluating pycocotools mAP... saving runs/val/exp3/yolov5x_predictions.json... +... + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.515 # <--- ensemble mAP + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.699 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.557 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.356 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.563 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.668 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.387 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.638 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.689 # <--- ensemble mAR + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.526 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.743 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.844 +``` + +## Ensemble Inference + +Append extra models to the `--weights` argument to run ensemble inference: + +```bash +python detect.py --weights yolov5x.pt yolov5l6.pt --img 640 --source data/images +``` + +Output: + +```bash +YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) + +Fusing layers... +Model Summary: 476 layers, 87730285 parameters, 0 gradients +Fusing layers... +Model Summary: 501 layers, 77218620 parameters, 0 gradients +Ensemble created with ['yolov5x.pt', 'yolov5l6.pt'] + +image 1/2 /content/yolov5/data/images/bus.jpg: 640x512 4 persons, 1 bus, 1 tie, Done. (0.063s) +image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 3 persons, 2 ties, Done. (0.056s) +Results saved to runs/detect/exp2 +Done. (0.223s) +``` + +YOLO inference result + +## Environments + +YOLOv5 is designed to be run in the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): + +- **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle +- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) +- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) +- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls + +## Status + +YOLOv5 CI + +If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 [training](https://github.com/ultralytics/yolov5/blob/master/train.py), [validation](https://github.com/ultralytics/yolov5/blob/master/val.py), [inference](https://github.com/ultralytics/yolov5/blob/master/detect.py), [export](https://github.com/ultralytics/yolov5/blob/master/export.py) and [benchmarks](https://github.com/ultralytics/yolov5/blob/master/benchmarks.py) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/docs/en/yolov5/tutorials/model_export.md b/docs/en/yolov5/tutorials/model_export.md new file mode 100644 index 0000000..05169f1 --- /dev/null +++ b/docs/en/yolov5/tutorials/model_export.md @@ -0,0 +1,244 @@ +--- +comments: true +description: Learn how to export a trained YOLOv5 model from PyTorch to different formats including TorchScript, ONNX, OpenVINO, TensorRT, and CoreML, and how to use these models. +keywords: Ultralytics, YOLOv5, model export, PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, CoreML, TensorFlow +--- + +# TFLite, ONNX, CoreML, TensorRT Export + +📚 This guide explains how to export a trained YOLOv5 🚀 model from PyTorch to ONNX and TorchScript formats. UPDATED 8 December 2022. + +## Before You Start + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +For [TensorRT](https://developer.nvidia.com/tensorrt) export example (requires GPU) see our Colab [notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb#scrollTo=VTRwsvA9u7ln&line=2&uniqifier=1) appendix section. Open In Colab + +## Formats + +YOLOv5 inference is officially supported in 11 formats: + +💡 ProTip: Export to ONNX or OpenVINO for up to 3x CPU speedup. See [CPU Benchmarks](https://github.com/ultralytics/yolov5/pull/6613). 💡 ProTip: Export to TensorRT for up to 5x GPU speedup. See [GPU Benchmarks](https://github.com/ultralytics/yolov5/pull/6963). + +| Format | `export.py --include` | Model | +|:---------------------------------------------------------------------------|:----------------------|:--------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov5s.pt` | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov5s.torchscript` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov5s.onnx` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov5s_openvino_model/` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov5s.engine` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov5s.mlmodel` | +| [TensorFlow SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov5s_saved_model/` | +| [TensorFlow GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov5s.pb` | +| [TensorFlow Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov5s.tflite` | +| [TensorFlow Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov5s_edgetpu.tflite` | +| [TensorFlow.js](https://www.tensorflow.org/js) | `tfjs` | `yolov5s_web_model/` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov5s_paddle_model/` | + +## Benchmarks + +Benchmarks below run on a Colab Pro with the YOLOv5 tutorial notebook Open In Colab. To reproduce: + +```bash +python benchmarks.py --weights yolov5s.pt --imgsz 640 --device 0 +``` + +### Colab Pro V100 GPU + +``` +benchmarks: weights=/content/yolov5/yolov5s.pt, imgsz=640, batch_size=1, data=/content/yolov5/data/coco128.yaml, device=0, half=False, test=False +Checking setup... +YOLOv5 🚀 v6.1-135-g7926afc torch 1.10.0+cu111 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB) +Setup complete ✅ (8 CPUs, 51.0 GB RAM, 46.7/166.8 GB disk) + +Benchmarks complete (458.07s) + Format mAP@0.5:0.95 Inference time (ms) +0 PyTorch 0.4623 10.19 +1 TorchScript 0.4623 6.85 +2 ONNX 0.4623 14.63 +3 OpenVINO NaN NaN +4 TensorRT 0.4617 1.89 +5 CoreML NaN NaN +6 TensorFlow SavedModel 0.4623 21.28 +7 TensorFlow GraphDef 0.4623 21.22 +8 TensorFlow Lite NaN NaN +9 TensorFlow Edge TPU NaN NaN +10 TensorFlow.js NaN NaN +``` + +### Colab Pro CPU + +``` +benchmarks: weights=/content/yolov5/yolov5s.pt, imgsz=640, batch_size=1, data=/content/yolov5/data/coco128.yaml, device=cpu, half=False, test=False +Checking setup... +YOLOv5 🚀 v6.1-135-g7926afc torch 1.10.0+cu111 CPU +Setup complete ✅ (8 CPUs, 51.0 GB RAM, 41.5/166.8 GB disk) + +Benchmarks complete (241.20s) + Format mAP@0.5:0.95 Inference time (ms) +0 PyTorch 0.4623 127.61 +1 TorchScript 0.4623 131.23 +2 ONNX 0.4623 69.34 +3 OpenVINO 0.4623 66.52 +4 TensorRT NaN NaN +5 CoreML NaN NaN +6 TensorFlow SavedModel 0.4623 123.79 +7 TensorFlow GraphDef 0.4623 121.57 +8 TensorFlow Lite 0.4623 316.61 +9 TensorFlow Edge TPU NaN NaN +10 TensorFlow.js NaN NaN +``` + +## Export a Trained YOLOv5 Model + +This command exports a pretrained YOLOv5s model to TorchScript and ONNX formats. `yolov5s.pt` is the 'small' model, the second-smallest model available. Other options are `yolov5n.pt`, `yolov5m.pt`, `yolov5l.pt` and `yolov5x.pt`, along with their P6 counterparts i.e. `yolov5s6.pt` or you own custom training checkpoint i.e. `runs/exp/weights/best.pt`. For details on all available models please see our README [table](https://github.com/ultralytics/yolov5#pretrained-checkpoints). + +```bash +python export.py --weights yolov5s.pt --include torchscript onnx +``` + +💡 ProTip: Add `--half` to export models at FP16 half precision for smaller file sizes + +Output: + +```bash +export: data=data/coco128.yaml, weights=['yolov5s.pt'], imgsz=[640, 640], batch_size=1, device=cpu, half=False, inplace=False, train=False, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=12, verbose=False, workspace=4, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, include=['torchscript', 'onnx'] +YOLOv5 🚀 v6.2-104-ge3e5122 Python-3.8.0 torch-1.12.1+cu113 CPU + +Downloading https://github.com/ultralytics/yolov5/releases/download/v6.2/yolov5s.pt to yolov5s.pt... +100% 14.1M/14.1M [00:00<00:00, 274MB/s] + +Fusing layers... +YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients + +PyTorch: starting from yolov5s.pt with output shape (1, 25200, 85) (14.1 MB) + +TorchScript: starting export with torch 1.12.1+cu113... +TorchScript: export success ✅ 1.7s, saved as yolov5s.torchscript (28.1 MB) + +ONNX: starting export with onnx 1.12.0... +ONNX: export success ✅ 2.3s, saved as yolov5s.onnx (28.0 MB) + +Export complete (5.5s) +Results saved to /content/yolov5 +Detect: python detect.py --weights yolov5s.onnx +Validate: python val.py --weights yolov5s.onnx +PyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5s.onnx') +Visualize: https://netron.app/ +``` + +The 3 exported models will be saved alongside the original PyTorch model: +

YOLO export locations

+ +[Netron Viewer](https://github.com/lutzroeder/netron) is recommended for visualizing exported models: +

YOLO model visualization

+ +## Exported Model Usage Examples + +`detect.py` runs inference on exported models: + +```bash +python detect.py --weights yolov5s.pt # PyTorch + yolov5s.torchscript # TorchScript + yolov5s.onnx # ONNX Runtime or OpenCV DNN with dnn=True + yolov5s_openvino_model # OpenVINO + yolov5s.engine # TensorRT + yolov5s.mlmodel # CoreML (macOS only) + yolov5s_saved_model # TensorFlow SavedModel + yolov5s.pb # TensorFlow GraphDef + yolov5s.tflite # TensorFlow Lite + yolov5s_edgetpu.tflite # TensorFlow Edge TPU + yolov5s_paddle_model # PaddlePaddle +``` + +`val.py` runs validation on exported models: + +```bash +python val.py --weights yolov5s.pt # PyTorch + yolov5s.torchscript # TorchScript + yolov5s.onnx # ONNX Runtime or OpenCV DNN with dnn=True + yolov5s_openvino_model # OpenVINO + yolov5s.engine # TensorRT + yolov5s.mlmodel # CoreML (macOS Only) + yolov5s_saved_model # TensorFlow SavedModel + yolov5s.pb # TensorFlow GraphDef + yolov5s.tflite # TensorFlow Lite + yolov5s_edgetpu.tflite # TensorFlow Edge TPU + yolov5s_paddle_model # PaddlePaddle +``` + +Use PyTorch Hub with exported YOLOv5 models: + +``` python +import torch + +# Model +model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5s.pt') + 'yolov5s.torchscript ') # TorchScript + 'yolov5s.onnx') # ONNX Runtime + 'yolov5s_openvino_model') # OpenVINO + 'yolov5s.engine') # TensorRT + 'yolov5s.mlmodel') # CoreML (macOS Only) + 'yolov5s_saved_model') # TensorFlow SavedModel + 'yolov5s.pb') # TensorFlow GraphDef + 'yolov5s.tflite') # TensorFlow Lite + 'yolov5s_edgetpu.tflite') # TensorFlow Edge TPU + 'yolov5s_paddle_model') # PaddlePaddle + +# Images +img = 'https://ultralytics.com/images/zidane.jpg' # or file, Path, PIL, OpenCV, numpy, list + +# Inference +results = model(img) + +# Results +results.print() # or .show(), .save(), .crop(), .pandas(), etc. +``` + +## OpenCV DNN inference + +OpenCV inference with ONNX models: + +```bash +python export.py --weights yolov5s.pt --include onnx + +python detect.py --weights yolov5s.onnx --dnn # detect +python val.py --weights yolov5s.onnx --dnn # validate +``` + +## C++ Inference + +YOLOv5 OpenCV DNN C++ inference on exported ONNX model examples: + +- [https://github.com/Hexmagic/ONNX-yolov5/blob/master/src/test.cpp](https://github.com/Hexmagic/ONNX-yolov5/blob/master/src/test.cpp) +- [https://github.com/doleron/yolov5-opencv-cpp-python](https://github.com/doleron/yolov5-opencv-cpp-python) + +YOLOv5 OpenVINO C++ inference examples: + +- [https://github.com/dacquaviva/yolov5-openvino-cpp-python](https://github.com/dacquaviva/yolov5-openvino-cpp-python) +- [https://github.com/UNeedCryDear/yolov5-seg-opencv-dnn-cpp](https://github.com/UNeedCryDear/yolov5-seg-opencv-dnn-cpp) + +## TensorFlow.js Web Browser Inference + +- [https://aukerul-shuvo.github.io/YOLOv5_TensorFlow-JS/](https://aukerul-shuvo.github.io/YOLOv5_TensorFlow-JS/) + +## Environments + +YOLOv5 is designed to be run in the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): + +- **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle +- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) +- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) +- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls + +## Status + +YOLOv5 CI + +If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 [training](https://github.com/ultralytics/yolov5/blob/master/train.py), [validation](https://github.com/ultralytics/yolov5/blob/master/val.py), [inference](https://github.com/ultralytics/yolov5/blob/master/detect.py), [export](https://github.com/ultralytics/yolov5/blob/master/export.py) and [benchmarks](https://github.com/ultralytics/yolov5/blob/master/benchmarks.py) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md b/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md new file mode 100644 index 0000000..44ea696 --- /dev/null +++ b/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md @@ -0,0 +1,109 @@ +--- +comments: true +description: Improve YOLOv5 model efficiency by pruning with Ultralytics. Understand the process, conduct tests and view the impact on accuracy and sparsity. Test-maintained API environments. +keywords: YOLOv5, YOLO, Ultralytics, model pruning, PyTorch, machine learning, deep learning, computer vision, object detection +--- + +📚 This guide explains how to apply **pruning** to YOLOv5 🚀 models. UPDATED 25 September 2022. + +## Before You Start + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +## Test Normally + +Before pruning we want to establish a baseline performance to compare to. This command tests YOLOv5x on COCO val2017 at image size 640 pixels. `yolov5x.pt` is the largest and most accurate model available. Other options are `yolov5s.pt`, `yolov5m.pt` and `yolov5l.pt`, or you own checkpoint from training a custom dataset `./weights/best.pt`. For details on all available models please see our README [table](https://github.com/ultralytics/yolov5#pretrained-checkpoints). + +```bash +python val.py --weights yolov5x.pt --data coco.yaml --img 640 --half +``` + +Output: + +```shell +val: data=/content/yolov5/data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True, dnn=False +YOLOv5 🚀 v6.0-224-g4c40933 torch 1.10.0+cu111 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB) + +Fusing layers... +Model Summary: 444 layers, 86705005 parameters, 0 gradients +val: Scanning '/content/datasets/coco/val2017.cache' images and labels... 4952 found, 48 missing, 0 empty, 0 corrupt: 100% 5000/5000 [00:00 + +30% pruned output: + +```bash +val: data=/content/yolov5/data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True, dnn=False +YOLOv5 🚀 v6.0-224-g4c40933 torch 1.10.0+cu111 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB) + +Fusing layers... +Model Summary: 444 layers, 86705005 parameters, 0 gradients +Pruning model... 0.3 global sparsity +val: Scanning '/content/datasets/coco/val2017.cache' images and labels... 4952 found, 48 missing, 0 empty, 0 corrupt: 100% 5000/5000 [00:00Run on Gradient Open In Colab Open In Kaggle +- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) +- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) +- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls + +## Status + +YOLOv5 CI + +If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 [training](https://github.com/ultralytics/yolov5/blob/master/train.py), [validation](https://github.com/ultralytics/yolov5/blob/master/val.py), [inference](https://github.com/ultralytics/yolov5/blob/master/detect.py), [export](https://github.com/ultralytics/yolov5/blob/master/export.py) and [benchmarks](https://github.com/ultralytics/yolov5/blob/master/benchmarks.py) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/docs/en/yolov5/tutorials/multi_gpu_training.md b/docs/en/yolov5/tutorials/multi_gpu_training.md new file mode 100644 index 0000000..6740f61 --- /dev/null +++ b/docs/en/yolov5/tutorials/multi_gpu_training.md @@ -0,0 +1,189 @@ +--- +comments: true +description: Learn how to train datasets on single or multiple GPUs using YOLOv5. Includes setup, training modes and result profiling for efficient leveraging of multiple GPUs. +keywords: YOLOv5, multi-GPU Training, YOLOv5 training, deep learning, machine learning, object detection, Ultralytics +--- + +📚 This guide explains how to properly use **multiple** GPUs to train a dataset with YOLOv5 🚀 on single or multiple machine(s). UPDATED 25 December 2022. + +## Before You Start + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +💡 ProTip! **Docker Image** is recommended for all Multi-GPU trainings. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls + +💡 ProTip! `torch.distributed.run` replaces `torch.distributed.launch` in **PyTorch>=1.9**. See [docs](https://pytorch.org/docs/stable/distributed.html) for details. + +## Training + +Select a pretrained model to start training from. Here we select [YOLOv5s](https://github.com/ultralytics/yolov5/blob/master/models/yolov5s.yaml), the smallest and fastest model available. See our README [table](https://github.com/ultralytics/yolov5#pretrained-checkpoints) for a full comparison of all models. We will train this model with Multi-GPU on the [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) dataset. + +

YOLOv5 Models

+ +### Single GPU + +```bash +python train.py --batch 64 --data coco.yaml --weights yolov5s.pt --device 0 +``` + +### Multi-GPU [DataParallel](https://pytorch.org/docs/stable/nn.html#torch.nn.DataParallel) Mode (⚠️ not recommended) + +You can increase the `device` to use Multiple GPUs in DataParallel mode. + +```bash +python train.py --batch 64 --data coco.yaml --weights yolov5s.pt --device 0,1 +``` + +This method is slow and barely speeds up training compared to using just 1 GPU. + +### Multi-GPU [DistributedDataParallel](https://pytorch.org/docs/stable/nn.html#torch.nn.parallel.DistributedDataParallel) Mode (✅ recommended) + +You will have to pass `python -m torch.distributed.run --nproc_per_node`, followed by the usual arguments. + +```bash +python -m torch.distributed.run --nproc_per_node 2 train.py --batch 64 --data coco.yaml --weights yolov5s.pt --device 0,1 +``` + +`--nproc_per_node` specifies how many GPUs you would like to use. In the example above, it is 2. +`--batch ` is the total batch-size. It will be divided evenly to each GPU. In the example above, it is 64/2=32 per GPU. + +The code above will use GPUs `0... (N-1)`. + +
+ Use specific GPUs (click to expand) + +You can do so by simply passing `--device` followed by your specific GPUs. For example, in the code below, we will use GPUs `2,3`. + +```bash +python -m torch.distributed.run --nproc_per_node 2 train.py --batch 64 --data coco.yaml --cfg yolov5s.yaml --weights '' --device 2,3 +``` + +
+ +
+ Use SyncBatchNorm (click to expand) + +[SyncBatchNorm](https://pytorch.org/docs/master/generated/torch.nn.SyncBatchNorm.html) could increase accuracy for multiple gpu training, however, it will slow down training by a significant factor. It is **only** available for Multiple GPU DistributedDataParallel training. + +It is best used when the batch-size on **each** GPU is small (<= 8). + +To use SyncBatchNorm, simple pass `--sync-bn` to the command like below, + +```bash +python -m torch.distributed.run --nproc_per_node 2 train.py --batch 64 --data coco.yaml --cfg yolov5s.yaml --weights '' --sync-bn +``` + +
+ +
+ Use Multiple machines (click to expand) + +This is **only** available for Multiple GPU DistributedDataParallel training. + +Before we continue, make sure the files on all machines are the same, dataset, codebase, etc. Afterwards, make sure the machines can communicate to each other. + +You will have to choose a master machine(the machine that the others will talk to). Note down its address(`master_addr`) and choose a port(`master_port`). I will use `master_addr = 192.168.1.1` and `master_port = 1234` for the example below. + +To use it, you can do as the following, + +```bash +# On master machine 0 +python -m torch.distributed.run --nproc_per_node G --nnodes N --node_rank 0 --master_addr "192.168.1.1" --master_port 1234 train.py --batch 64 --data coco.yaml --cfg yolov5s.yaml --weights '' +``` + +```bash +# On machine R +python -m torch.distributed.run --nproc_per_node G --nnodes N --node_rank R --master_addr "192.168.1.1" --master_port 1234 train.py --batch 64 --data coco.yaml --cfg yolov5s.yaml --weights '' +``` + +where `G` is number of GPU per machine, `N` is the number of machines, and `R` is the machine number from `0...(N-1)`. Let's say I have two machines with two GPUs each, it would be `G = 2` , `N = 2`, and `R = 1` for the above. + +Training will not start until all `N` machines are connected. Output will only be shown on master machine! + +
+ +### Notes + +- Windows support is untested, Linux is recommended. +- `--batch ` must be a multiple of the number of GPUs. +- GPU 0 will take slightly more memory than the other GPUs as it maintains EMA and is responsible for checkpointing etc. +- If you get `RuntimeError: Address already in use`, it could be because you are running multiple trainings at a time. To fix this, simply use a different port number by adding `--master_port` like below, + +```bash +python -m torch.distributed.run --master_port 1234 --nproc_per_node 2 ... +``` + +## Results + +DDP profiling results on an [AWS EC2 P4d instance](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) with 8x A100 SXM4-40GB for YOLOv5l for 1 COCO epoch. + +
+ Profiling code + +```bash +# prepare +t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/coco:/usr/src/coco $t +pip3 install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html +cd .. && rm -rf app && git clone https://github.com/ultralytics/yolov5 -b master app && cd app +cp data/coco.yaml data/coco_profile.yaml + +# profile +python train.py --batch-size 16 --data coco_profile.yaml --weights yolov5l.pt --epochs 1 --device 0 +python -m torch.distributed.run --nproc_per_node 2 train.py --batch-size 32 --data coco_profile.yaml --weights yolov5l.pt --epochs 1 --device 0,1 +python -m torch.distributed.run --nproc_per_node 4 train.py --batch-size 64 --data coco_profile.yaml --weights yolov5l.pt --epochs 1 --device 0,1,2,3 +python -m torch.distributed.run --nproc_per_node 8 train.py --batch-size 128 --data coco_profile.yaml --weights yolov5l.pt --epochs 1 --device 0,1,2,3,4,5,6,7 +``` + +
+ +| GPUs
A100 | batch-size | CUDA_mem
device0 (G) | COCO
train | COCO
val | +|--------------|------------|------------------------------|--------------------|------------------| +| 1x | 16 | 26GB | 20:39 | 0:55 | +| 2x | 32 | 26GB | 11:43 | 0:57 | +| 4x | 64 | 26GB | 5:57 | 0:55 | +| 8x | 128 | 26GB | 3:09 | 0:57 | + +## FAQ + +If an error occurs, please read the checklist below first! (It could save your time) + +
+ Checklist (click to expand) + +
    +
  • Have you properly read this post?
  • +
  • Have you tried to reclone the codebase? The code changes daily.
  • +
  • Have you tried to search for your error? Someone may have already encountered it in this repo or in another and have the solution.
  • +
  • Have you installed all the requirements listed on top (including the correct Python and Pytorch versions)?
  • +
  • Have you tried in other environments listed in the "Environments" section below?
  • +
  • Have you tried with another dataset like coco128 or coco2017? It will make it easier to find the root cause.
  • +
+ +If you went through all the above, feel free to raise an Issue by giving as much detail as possible following the template. + +
+ +## Environments + +YOLOv5 is designed to be run in the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): + +- **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle +- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) +- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) +- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls + +## Status + +YOLOv5 CI + +If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 [training](https://github.com/ultralytics/yolov5/blob/master/train.py), [validation](https://github.com/ultralytics/yolov5/blob/master/val.py), [inference](https://github.com/ultralytics/yolov5/blob/master/detect.py), [export](https://github.com/ultralytics/yolov5/blob/master/export.py) and [benchmarks](https://github.com/ultralytics/yolov5/blob/master/benchmarks.py) on macOS, Windows, and Ubuntu every 24 hours and on every commit. + +## Credits + +I would like to thank @MagicFrogSJTU, who did all the heavy lifting, and @glenn-jocher for guiding us along the way. diff --git a/docs/en/yolov5/tutorials/neural_magic_pruning_quantization.md b/docs/en/yolov5/tutorials/neural_magic_pruning_quantization.md new file mode 100644 index 0000000..08b448c --- /dev/null +++ b/docs/en/yolov5/tutorials/neural_magic_pruning_quantization.md @@ -0,0 +1,264 @@ +--- +comments: true +description: Explore how to achieve exceptional AI performance with DeepSparse's incredible inference speed. Discover how to deploy YOLOv5, and learn about model sparsification and fine-tuning with SparseML. +keywords: YOLOv5, DeepSparse, Ultralytics, Neural Magic, sparsification, inference runtime, deep learning, deployment, model fine-tuning, SparseML, AI performance, GPU-class performance +--- + + + +Welcome to software-delivered AI. + +This guide explains how to deploy YOLOv5 with Neural Magic's DeepSparse. + +DeepSparse is an inference runtime with exceptional performance on CPUs. For instance, compared to the ONNX Runtime baseline, DeepSparse offers a 5.8x speed-up for YOLOv5s, running on the same machine! + +

+ YOLOv5 speed improvement +

+ +For the first time, your deep learning workloads can meet the performance demands of production without the complexity and costs of hardware accelerators. Put simply, DeepSparse gives you the performance of GPUs and the simplicity of software: + +- **Flexible Deployments**: Run consistently across cloud, data center, and edge with any hardware provider from Intel to AMD to ARM +- **Infinite Scalability**: Scale vertically to 100s of cores, out with standard Kubernetes, or fully-abstracted with Serverless +- **Easy Integration**: Clean APIs for integrating your model into an application and monitoring it in production + +### How Does DeepSparse Achieve GPU-Class Performance? + +DeepSparse takes advantage of model sparsity to gain its performance speedup. + +Sparsification through pruning and quantization is a broadly studied technique, allowing order-of-magnitude reductions in the size and compute needed to execute a network, while maintaining high accuracy. DeepSparse is sparsity-aware, meaning it skips the zeroed out parameters, shrinking amount of compute in a forward pass. Since the sparse computation is now memory bound, DeepSparse executes the network depth-wise, breaking the problem into Tensor Columns, vertical stripes of computation that fit in cache. + +

+ YOLO model pruning +

+ +Sparse networks with compressed computation, executed depth-wise in cache, allows DeepSparse to deliver GPU-class performance on CPUs! + +### How Do I Create A Sparse Version of YOLOv5 Trained on My Data? + +Neural Magic's open-source model repository, SparseZoo, contains pre-sparsified checkpoints of each YOLOv5 model. Using SparseML, which is integrated with Ultralytics, you can fine-tune a sparse checkpoint onto your data with a single CLI command. + +[Checkout Neural Magic's YOLOv5 documentation for more details](https://docs.neuralmagic.com/use-cases/object-detection/sparsifying). + +## DeepSparse Usage + +We will walk through an example benchmarking and deploying a sparse version of YOLOv5s with DeepSparse. + +### Install DeepSparse + +Run the following to install DeepSparse. We recommend you use a virtual environment with Python. + +```bash +pip install "deepsparse[server,yolo,onnxruntime]" +``` + +### Collect an ONNX File + +DeepSparse accepts a model in the ONNX format, passed either as: + +- A SparseZoo stub which identifies an ONNX file in the SparseZoo +- A local path to an ONNX model in a filesystem + +The examples below use the standard dense and pruned-quantized YOLOv5s checkpoints, identified by the following SparseZoo stubs: + +```bash +zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/base-none +zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned65_quant-none +``` + +### Deploy a Model + +DeepSparse offers convenient APIs for integrating your model into an application. + +To try the deployment examples below, pull down a sample image and save it as `basilica.jpg` with the following: + +```bash +wget -O basilica.jpg https://raw.githubusercontent.com/neuralmagic/deepsparse/main/src/deepsparse/yolo/sample_images/basilica.jpg +``` + +#### Python API + +`Pipelines` wrap pre-processing and output post-processing around the runtime, providing a clean interface for adding DeepSparse to an application. The DeepSparse-Ultralytics integration includes an out-of-the-box `Pipeline` that accepts raw images and outputs the bounding boxes. + +Create a `Pipeline` and run inference: + +```python +from deepsparse import Pipeline + +# list of images in local filesystem +images = ["basilica.jpg"] + +# create Pipeline +model_stub = "zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned65_quant-none" +yolo_pipeline = Pipeline.create( + task="yolo", + model_path=model_stub, +) + +# run inference on images, receive bounding boxes + classes +pipeline_outputs = yolo_pipeline(images=images, iou_thres=0.6, conf_thres=0.001) +print(pipeline_outputs) +``` + +If you are running in the cloud, you may get an error that open-cv cannot find `libGL.so.1`. Running the following on Ubuntu installs it: + +``` +apt-get install libgl1 +``` + +#### HTTP Server + +DeepSparse Server runs on top of the popular FastAPI web framework and Uvicorn web server. With just a single CLI command, you can easily setup a model service endpoint with DeepSparse. The Server supports any Pipeline from DeepSparse, including object detection with YOLOv5, enabling you to send raw images to the endpoint and receive the bounding boxes. + +Spin up the Server with the pruned-quantized YOLOv5s: + +```bash +deepsparse.server \ + --task yolo \ + --model_path zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned65_quant-none +``` + +An example request, using Python's `requests` package: + +```python +import requests, json + +# list of images for inference (local files on client side) +path = ['basilica.jpg'] +files = [('request', open(img, 'rb')) for img in path] + +# send request over HTTP to /predict/from_files endpoint +url = 'http://0.0.0.0:5543/predict/from_files' +resp = requests.post(url=url, files=files) + +# response is returned in JSON +annotations = json.loads(resp.text) # dictionary of annotation results +bounding_boxes = annotations["boxes"] +labels = annotations["labels"] +``` + +#### Annotate CLI + +You can also use the annotate command to have the engine save an annotated photo on disk. Try --source 0 to annotate your live webcam feed! + +```bash +deepsparse.object_detection.annotate --model_filepath zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned65_quant-none --source basilica.jpg +``` + +Running the above command will create an `annotation-results` folder and save the annotated image inside. + +

+annotated +

+ +## Benchmarking Performance + +We will compare DeepSparse's throughput to ONNX Runtime's throughput on YOLOv5s, using DeepSparse's benchmarking script. + +The benchmarks were run on an AWS `c6i.8xlarge` instance (16 cores). + +### Batch 32 Performance Comparison + +#### ONNX Runtime Baseline + +At batch 32, ONNX Runtime achieves 42 images/sec with the standard dense YOLOv5s: + +```bash +deepsparse.benchmark zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/base-none -s sync -b 32 -nstreams 1 -e onnxruntime + +> Original Model Path: zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/base-none +> Batch Size: 32 +> Scenario: sync +> Throughput (items/sec): 41.9025 +``` + +#### DeepSparse Dense Performance + +While DeepSparse offers its best performance with optimized sparse models, it also performs well with the standard dense YOLOv5s. + +At batch 32, DeepSparse achieves 70 images/sec with the standard dense YOLOv5s, a **1.7x performance improvement over ORT**! + +```bash +deepsparse.benchmark zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/base-none -s sync -b 32 -nstreams 1 + +> Original Model Path: zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/base-none +> Batch Size: 32 +> Scenario: sync +> Throughput (items/sec): 69.5546 +``` + +#### DeepSparse Sparse Performance + +When sparsity is applied to the model, DeepSparse's performance gains over ONNX Runtime is even stronger. + +At batch 32, DeepSparse achieves 241 images/sec with the pruned-quantized YOLOv5s, a **5.8x performance improvement over ORT**! + +```bash +deepsparse.benchmark zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned65_quant-none -s sync -b 32 -nstreams 1 + +> Original Model Path: zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned65_quant-none +> Batch Size: 32 +> Scenario: sync +> Throughput (items/sec): 241.2452 +``` + +### Batch 1 Performance Comparison + +DeepSparse is also able to gain a speed-up over ONNX Runtime for the latency-sensitive, batch 1 scenario. + +#### ONNX Runtime Baseline + +At batch 1, ONNX Runtime achieves 48 images/sec with the standard, dense YOLOv5s. + +```bash +deepsparse.benchmark zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/base-none -s sync -b 1 -nstreams 1 -e onnxruntime + +> Original Model Path: zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/base-none +> Batch Size: 1 +> Scenario: sync +> Throughput (items/sec): 48.0921 +``` + +#### DeepSparse Sparse Performance + +At batch 1, DeepSparse achieves 135 items/sec with a pruned-quantized YOLOv5s, **a 2.8x performance gain over ONNX Runtime!** + +```bash +deepsparse.benchmark zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned65_quant-none -s sync -b 1 -nstreams 1 + +> Original Model Path: zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned65_quant-none +> Batch Size: 1 +> Scenario: sync +> Throughput (items/sec): 134.9468 +``` + +Since `c6i.8xlarge` instances have VNNI instructions, DeepSparse's throughput can be pushed further if weights are pruned in blocks of 4. + +At batch 1, DeepSparse achieves 180 items/sec with a 4-block pruned-quantized YOLOv5s, a **3.7x performance gain over ONNX Runtime!** + +```bash +deepsparse.benchmark zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned35_quant-none-vnni -s sync -b 1 -nstreams 1 + +> Original Model Path: zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned35_quant-none-vnni +> Batch Size: 1 +> Scenario: sync +> Throughput (items/sec): 179.7375 +``` + +## Get Started With DeepSparse + +**Research or Testing?** DeepSparse Community is free for research and testing. Get started with our [Documentation](https://docs.neuralmagic.com/). diff --git a/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md b/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md new file mode 100644 index 0000000..31f0750 --- /dev/null +++ b/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md @@ -0,0 +1,331 @@ +--- +comments: true +description: Detailed guide on loading YOLOv5 from PyTorch Hub. Includes examples & tips on inference settings, multi-GPU inference, training and more. +keywords: Ultralytics, YOLOv5, PyTorch, loading YOLOv5, PyTorch Hub, inference, multi-GPU inference, training +--- + +📚 This guide explains how to load YOLOv5 🚀 from PyTorch Hub at [https://pytorch.org/hub/ultralytics_yolov5](https://pytorch.org/hub/ultralytics_yolov5). UPDATED 26 March 2023. + +## Before You Start + +Install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +pip install -r https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt +``` + +💡 ProTip: Cloning [https://github.com/ultralytics/yolov5](https://github.com/ultralytics/yolov5) is **not** required 😃 + +## Load YOLOv5 with PyTorch Hub + +### Simple Example + +This example loads a pretrained YOLOv5s model from PyTorch Hub as `model` and passes an image for inference. `'yolov5s'` is the lightest and fastest YOLOv5 model. For details on all available models please see the [README](https://github.com/ultralytics/yolov5#pretrained-checkpoints). + +```python +import torch + +# Model +model = torch.hub.load('ultralytics/yolov5', 'yolov5s') + +# Image +im = 'https://ultralytics.com/images/zidane.jpg' + +# Inference +results = model(im) + +results.pandas().xyxy[0] +# xmin ymin xmax ymax confidence class name +# 0 749.50 43.50 1148.0 704.5 0.874023 0 person +# 1 433.50 433.50 517.5 714.5 0.687988 27 tie +# 2 114.75 195.75 1095.0 708.0 0.624512 0 person +# 3 986.00 304.00 1028.0 420.0 0.286865 27 tie +``` + +### Detailed Example + +This example shows **batched inference** with **PIL** and **OpenCV** image sources. `results` can be **printed** to console, **saved** to `runs/hub`, **showed** to screen on supported environments, and returned as **tensors** or **pandas** dataframes. + +```python +import cv2 +import torch +from PIL import Image + +# Model +model = torch.hub.load('ultralytics/yolov5', 'yolov5s') + +# Images +for f in 'zidane.jpg', 'bus.jpg': + torch.hub.download_url_to_file('https://ultralytics.com/images/' + f, f) # download 2 images +im1 = Image.open('zidane.jpg') # PIL image +im2 = cv2.imread('bus.jpg')[..., ::-1] # OpenCV image (BGR to RGB) + +# Inference +results = model([im1, im2], size=640) # batch of images + +# Results +results.print() +results.save() # or .show() + +results.xyxy[0] # im1 predictions (tensor) +results.pandas().xyxy[0] # im1 predictions (pandas) +# xmin ymin xmax ymax confidence class name +# 0 749.50 43.50 1148.0 704.5 0.874023 0 person +# 1 433.50 433.50 517.5 714.5 0.687988 27 tie +# 2 114.75 195.75 1095.0 708.0 0.624512 0 person +# 3 986.00 304.00 1028.0 420.0 0.286865 27 tie +``` + +YOLO inference results on zidane.jpg +YOLO inference results on bus.jpg + +For all inference options see YOLOv5 `AutoShape()` forward [method](https://github.com/ultralytics/yolov5/blob/30e4c4f09297b67afedf8b2bcd851833ddc9dead/models/common.py#L243-L252). + +### Inference Settings + +YOLOv5 models contain various inference attributes such as **confidence threshold**, **IoU threshold**, etc. which can be set by: + +```python +model.conf = 0.25 # NMS confidence threshold + iou = 0.45 # NMS IoU threshold + agnostic = False # NMS class-agnostic + multi_label = False # NMS multiple labels per box + classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs + max_det = 1000 # maximum number of detections per image + amp = False # Automatic Mixed Precision (AMP) inference + +results = model(im, size=320) # custom inference size +``` + +### Device + +Models can be transferred to any device after creation: + +```python +model.cpu() # CPU +model.cuda() # GPU +model.to(device) # i.e. device=torch.device(0) +``` + +Models can also be created directly on any `device`: + +```python +model = torch.hub.load('ultralytics/yolov5', 'yolov5s', device='cpu') # load on CPU +``` + +💡 ProTip: Input images are automatically transferred to the correct model device before inference. + +### Silence Outputs + +Models can be loaded silently with `_verbose=False`: + +```python +model = torch.hub.load('ultralytics/yolov5', 'yolov5s', _verbose=False) # load silently +``` + +### Input Channels + +To load a pretrained YOLOv5s model with 4 input channels rather than the default 3: + +```python +model = torch.hub.load('ultralytics/yolov5', 'yolov5s', channels=4) +``` + +In this case the model will be composed of pretrained weights **except for** the very first input layer, which is no longer the same shape as the pretrained input layer. The input layer will remain initialized by random weights. + +### Number of Classes + +To load a pretrained YOLOv5s model with 10 output classes rather than the default 80: + +```python +model = torch.hub.load('ultralytics/yolov5', 'yolov5s', classes=10) +``` + +In this case the model will be composed of pretrained weights **except for** the output layers, which are no longer the same shape as the pretrained output layers. The output layers will remain initialized by random weights. + +### Force Reload + +If you run into problems with the above steps, setting `force_reload=True` may help by discarding the existing cache and force a fresh download of the latest YOLOv5 version from PyTorch Hub. + +```python +model = torch.hub.load('ultralytics/yolov5', 'yolov5s', force_reload=True) # force reload +``` + +### Screenshot Inference + +To run inference on your desktop screen: + +```python +import torch +from PIL import ImageGrab + +# Model +model = torch.hub.load('ultralytics/yolov5', 'yolov5s') + +# Image +im = ImageGrab.grab() # take a screenshot + +# Inference +results = model(im) +``` + +### Multi-GPU Inference + +YOLOv5 models can be loaded to multiple GPUs in parallel with threaded inference: + +```python +import torch +import threading + +def run(model, im): + results = model(im) + results.save() + +# Models +model0 = torch.hub.load('ultralytics/yolov5', 'yolov5s', device=0) +model1 = torch.hub.load('ultralytics/yolov5', 'yolov5s', device=1) + +# Inference +threading.Thread(target=run, args=[model0, 'https://ultralytics.com/images/zidane.jpg'], daemon=True).start() +threading.Thread(target=run, args=[model1, 'https://ultralytics.com/images/bus.jpg'], daemon=True).start() +``` + +### Training + +To load a YOLOv5 model for training rather than inference, set `autoshape=False`. To load a model with randomly initialized weights (to train from scratch) use `pretrained=False`. You must provide your own training script in this case. Alternatively see our YOLOv5 [Train Custom Data Tutorial](https://docs.ultralytics.com/yolov5/tutorials/train_custom_data) for model training. + +```python +model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) # load pretrained +model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False, pretrained=False) # load scratch +``` + +### Base64 Results + +For use with API services. See https://github.com/ultralytics/yolov5/pull/2291 and [Flask REST API](https://github.com/ultralytics/yolov5/tree/master/utils/flask_rest_api) example for details. + +```python +results = model(im) # inference + +results.ims # array of original images (as np array) passed to model for inference +results.render() # updates results.ims with boxes and labels +for im in results.ims: + buffered = BytesIO() + im_base64 = Image.fromarray(im) + im_base64.save(buffered, format="JPEG") + print(base64.b64encode(buffered.getvalue()).decode('utf-8')) # base64 encoded image with results +``` + +### Cropped Results + +Results can be returned and saved as detection crops: + +```python +results = model(im) # inference +crops = results.crop(save=True) # cropped detections dictionary +``` + +### Pandas Results + +Results can be returned as [Pandas DataFrames](https://pandas.pydata.org/): + +```python +results = model(im) # inference +results.pandas().xyxy[0] # Pandas DataFrame +``` + +
+ Pandas Output (click to expand) + +```python +print(results.pandas().xyxy[0]) +# xmin ymin xmax ymax confidence class name +# 0 749.50 43.50 1148.0 704.5 0.874023 0 person +# 1 433.50 433.50 517.5 714.5 0.687988 27 tie +# 2 114.75 195.75 1095.0 708.0 0.624512 0 person +# 3 986.00 304.00 1028.0 420.0 0.286865 27 tie +``` + +
+ +### Sorted Results + +Results can be sorted by column, i.e. to sort license plate digit detection left-to-right (x-axis): + +```python +results = model(im) # inference +results.pandas().xyxy[0].sort_values('xmin') # sorted left-right +``` + +### Box-Cropped Results + +Results can be returned and saved as detection crops: + +```python +results = model(im) # inference +crops = results.crop(save=True) # cropped detections dictionary +``` + +### JSON Results + +Results can be returned in JSON format once converted to `.pandas()` dataframes using the `.to_json()` method. The JSON format can be modified using the `orient` argument. See pandas `.to_json()` [documentation](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_json.html) for details. + +```python +results = model(ims) # inference +results.pandas().xyxy[0].to_json(orient="records") # JSON img1 predictions +``` + +
+ JSON Output (click to expand) + +```json +[ +{"xmin":749.5,"ymin":43.5,"xmax":1148.0,"ymax":704.5,"confidence":0.8740234375,"class":0,"name":"person"}, +{"xmin":433.5,"ymin":433.5,"xmax":517.5,"ymax":714.5,"confidence":0.6879882812,"class":27,"name":"tie"}, +{"xmin":115.25,"ymin":195.75,"xmax":1096.0,"ymax":708.0,"confidence":0.6254882812,"class":0,"name":"person"}, +{"xmin":986.0,"ymin":304.0,"xmax":1028.0,"ymax":420.0,"confidence":0.2873535156,"class":27,"name":"tie"} +] +``` + +
+ +## Custom Models + +This example loads a custom 20-class [VOC](https://github.com/ultralytics/yolov5/blob/master/data/VOC.yaml)-trained YOLOv5s model `'best.pt'` with PyTorch Hub. + +```python +model = torch.hub.load('ultralytics/yolov5', 'custom', path='path/to/best.pt') # local model +model = torch.hub.load('path/to/yolov5', 'custom', path='path/to/best.pt', source='local') # local repo +``` + +## TensorRT, ONNX and OpenVINO Models + +PyTorch Hub supports inference on most YOLOv5 export formats, including custom trained models. See [TFLite, ONNX, CoreML, TensorRT Export tutorial](https://docs.ultralytics.com/yolov5/tutorials/model_export) for details on exporting models. + +💡 ProTip: **TensorRT** may be up to 2-5X faster than PyTorch on [**GPU benchmarks**](https://github.com/ultralytics/yolov5/pull/6963) +💡 ProTip: **ONNX** and **OpenVINO** may be up to 2-3X faster than PyTorch on [**CPU benchmarks**](https://github.com/ultralytics/yolov5/pull/6613) + +```python +model = torch.hub.load('ultralytics/yolov5', 'custom', path='yolov5s.pt') # PyTorch + 'yolov5s.torchscript') # TorchScript + 'yolov5s.onnx') # ONNX + 'yolov5s_openvino_model/') # OpenVINO + 'yolov5s.engine') # TensorRT + 'yolov5s.mlmodel') # CoreML (macOS-only) + 'yolov5s.tflite') # TFLite + 'yolov5s_paddle_model/') # PaddlePaddle +``` + +## Environments + +YOLOv5 is designed to be run in the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): + +- **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle +- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) +- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) +- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls + +## Status + +YOLOv5 CI + +If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 [training](https://github.com/ultralytics/yolov5/blob/master/train.py), [validation](https://github.com/ultralytics/yolov5/blob/master/val.py), [inference](https://github.com/ultralytics/yolov5/blob/master/detect.py), [export](https://github.com/ultralytics/yolov5/blob/master/export.py) and [benchmarks](https://github.com/ultralytics/yolov5/blob/master/benchmarks.py) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/docs/en/yolov5/tutorials/roboflow_datasets_integration.md b/docs/en/yolov5/tutorials/roboflow_datasets_integration.md new file mode 100644 index 0000000..80a2831 --- /dev/null +++ b/docs/en/yolov5/tutorials/roboflow_datasets_integration.md @@ -0,0 +1,52 @@ +--- +comments: true +description: Learn how to use Roboflow for organizing, labelling, preparing, and hosting your datasets for YOLOv5 models. Enhance your model deployments with our platform. +keywords: Ultralytics, YOLOv5, Roboflow, data organization, data labelling, data preparation, model deployment, active learning, machine learning pipeline +--- + +# Roboflow Datasets + +You can now use Roboflow to organize, label, prepare, version, and host your datasets for training YOLOv5 🚀 models. Roboflow is free to use with YOLOv5 if you make your workspace public. UPDATED 7 June 2023. + +!!! Warning + + Roboflow users can use Ultralytics under the [AGPL license](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) or procure an [Enterprise license](https://ultralytics.com/license) directly from Ultralytics. Be aware that Roboflow does **not** provide Ultralytics licenses, and it is the responsibility of the user to ensure appropriate licensing. + +## Upload + +You can upload your data to Roboflow via [web UI](https://docs.roboflow.com/adding-data), [REST API](https://docs.roboflow.com/adding-data/upload-api), or [Python](https://docs.roboflow.com/python). + +## Labeling + +After uploading data to Roboflow, you can label your data and review previous labels. + +[![Roboflow Annotate](https://roboflow-darknet.s3.us-east-2.amazonaws.com/roboflow-annotate.gif)](https://roboflow.com/annotate) + +## Versioning + +You can make versions of your dataset with different preprocessing and offline augmentation options. YOLOv5 does online augmentations natively, so be intentional when layering Roboflow's offline augs on top. + +![Roboflow Preprocessing](https://roboflow-darknet.s3.us-east-2.amazonaws.com/robolfow-preprocessing.png) + +## Exporting Data + +You can download your data in YOLOv5 format to quickly begin training. + +``` +from roboflow import Roboflow +rf = Roboflow(api_key="YOUR API KEY HERE") +project = rf.workspace().project("YOUR PROJECT") +dataset = project.version("YOUR VERSION").download("yolov5") +``` + +## Custom Training + +We have released a custom training tutorial demonstrating all of the above capabilities. You can access the code here: + +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/roboflow-ai/yolov5-custom-training-tutorial/blob/main/yolov5-custom-training.ipynb) + +## Active Learning + +The real world is messy and your model will invariably encounter situations your dataset didn't anticipate. Using [active learning](https://blog.roboflow.com/what-is-active-learning/) is an important strategy to iteratively improve your dataset and model. With the Roboflow and YOLOv5 integration, you can quickly make improvements on your model deployments by using a battle tested machine learning pipeline. + +

Roboflow active learning

diff --git a/docs/en/yolov5/tutorials/running_on_jetson_nano.md b/docs/en/yolov5/tutorials/running_on_jetson_nano.md new file mode 100644 index 0000000..86846b9 --- /dev/null +++ b/docs/en/yolov5/tutorials/running_on_jetson_nano.md @@ -0,0 +1,319 @@ +--- +comments: true +description: Detailed guide on deploying trained models on NVIDIA Jetson using TensorRT and DeepStream SDK. Optimize the inference performance on Jetson with Ultralytics. +keywords: TensorRT, NVIDIA Jetson, DeepStream SDK, deployment, Ultralytics, YOLO, Machine Learning, AI, Deep Learning, model optimization, inference performance +--- + +# Deploy on NVIDIA Jetson using TensorRT and DeepStream SDK + +📚 This guide explains how to deploy a trained model into NVIDIA Jetson Platform and perform inference using TensorRT and DeepStream SDK. Here we use TensorRT to maximize the inference performance on the Jetson platform. UPDATED 18 November 2022. + +## Hardware Verification + +We have tested and verified this guide on the following Jetson devices + +- [Seeed reComputer J1010 built with Jetson Nano module](https://www.seeedstudio.com/Jetson-10-1-A0-p-5336.html) +- [Seeed reComputer J2021 built with Jetson Xavier NX module](https://www.seeedstudio.com/reComputer-J2021-p-5438.html) + +## Before You Start + +Make sure you have properly installed **JetPack SDK** with all the **SDK Components** and **DeepStream SDK** on the Jetson device as this includes CUDA, TensorRT and DeepStream SDK which are needed for this guide. + +JetPack SDK provides a full development environment for hardware-accelerated AI-at-the-edge development. All Jetson modules and developer kits are supported by JetPack SDK. + +There are two major installation methods including, + +1. SD Card Image Method +2. NVIDIA SDK Manager Method + +You can find a very detailed installation guide from NVIDIA [official website](https://developer.nvidia.com/jetpack-sdk-461). You can also find guides corresponding to the above-mentioned [reComputer J1010](https://wiki.seeedstudio.com/reComputer_J1010_J101_Flash_Jetpack) and [reComputer J2021](https://wiki.seeedstudio.com/reComputer_J2021_J202_Flash_Jetpack). + +## Install Necessary Packages + +- **Step 1.** Access the terminal of Jetson device, install pip and upgrade it + +```sh +sudo apt update +sudo apt install -y python3-pip +pip3 install --upgrade pip +``` + +- **Step 2.** Clone the following repo + +```sh +git clone https://github.com/ultralytics/yolov5 +``` + +- **Step 3.** Open **requirements.txt** + +```sh +cd yolov5 +vi requirements.txt +``` + +- **Step 5.** Edit the following lines. Here you need to press **i** first to enter editing mode. Press **ESC**, then type **:wq** to save and quit + +```sh +# torch>=1.8.0 +# torchvision>=0.9.0 +``` + +**Note:** torch and torchvision are excluded for now because they will be installed later. + +- **Step 6.** install the below dependency + +```sh +sudo apt install -y libfreetype6-dev +``` + +- **Step 7.** Install the necessary packages + +```sh +pip3 install -r requirements.txt +``` + +## Install PyTorch and Torchvision + +We cannot install PyTorch and Torchvision from pip because they are not compatible to run on Jetson platform which is based on **ARM aarch64 architecture**. Therefore, we need to manually install pre-built PyTorch pip wheel and compile/ install Torchvision from source. + +Visit [this page](https://forums.developer.nvidia.com/t/pytorch-for-jetson) to access all the PyTorch and Torchvision links. + +Here are some of the versions supported by JetPack 4.6 and above. + +**PyTorch v1.10.0** + +Supported by JetPack 4.4 (L4T R32.4.3) / JetPack 4.4.1 (L4T R32.4.4) / JetPack 4.5 (L4T R32.5.0) / JetPack 4.5.1 (L4T R32.5.1) / JetPack 4.6 (L4T R32.6.1) with Python 3.6 + +**file_name:** torch-1.10.0-cp36-cp36m-linux_aarch64.whl +**URL:** [https://nvidia.box.com/shared/static/fjtbno0vpo676a25cgvuqc1wty0fkkg6.whl](https://nvidia.box.com/shared/static/fjtbno0vpo676a25cgvuqc1wty0fkkg6.whl) + +**PyTorch v1.12.0** + +Supported by JetPack 5.0 (L4T R34.1.0) / JetPack 5.0.1 (L4T R34.1.1) / JetPack 5.0.2 (L4T R35.1.0) with Python 3.8 + +**file_name:** torch-1.12.0a0+2c916ef.nv22.3-cp38-cp38-linux_aarch64.whl +**URL:** [https://developer.download.nvidia.com/compute/redist/jp/v50/pytorch/torch-1.12.0a0+2c916ef.nv22.3-cp38-cp38-linux_aarch64.whl](https://developer.download.nvidia.com/compute/redist/jp/v50/pytorch/torch-1.12.0a0+2c916ef.nv22.3-cp38-cp38-linux_aarch64.whl) + +- **Step 1.** Install torch according to your JetPack version in the following format + +```sh +wget -O +pip3 install +``` + +For example, here we are running **JP4.6.1**, and therefore we choose **PyTorch v1.10.0** + +```sh +cd ~ +sudo apt-get install -y libopenblas-base libopenmpi-dev +wget https://nvidia.box.com/shared/static/fjtbno0vpo676a25cgvuqc1wty0fkkg6.whl -O torch-1.10.0-cp36-cp36m-linux_aarch64.whl +pip3 install torch-1.10.0-cp36-cp36m-linux_aarch64.whl +``` + +- **Step 2.** Install torchvision depending on the version of PyTorch that you have installed. For example, we chose **PyTorch v1.10.0**, which means, we need to choose **Torchvision v0.11.1** + +```sh +sudo apt install -y libjpeg-dev zlib1g-dev +git clone --branch v0.11.1 https://github.com/pytorch/vision torchvision +cd torchvision +sudo python3 setup.py install +``` + +Here a list of the corresponding torchvision version that you need to install according to the PyTorch version: + +- PyTorch v1.10 - torchvision v0.11.1 +- PyTorch v1.12 - torchvision v0.13.0 + +## DeepStream Configuration for YOLOv5 + +- **Step 1.** Clone the following repo + +```sh +cd ~ +git clone https://github.com/marcoslucianops/DeepStream-Yolo +``` + +- **Step 2.** Copy **gen_wts_yoloV5.py** from **DeepStream-Yolo/utils** into **yolov5** directory + +```sh +cp DeepStream-Yolo/utils/gen_wts_yoloV5.py yolov5 +``` + +- **Step 3.** Inside the yolov5 repo, download **pt file** from YOLOv5 releases (example for YOLOv5s 6.1) + +```sh +cd yolov5 +wget https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt +``` + +- **Step 4.** Generate the **cfg** and **wts** files + +```sh +python3 gen_wts_yoloV5.py -w yolov5s.pt +``` + +**Note**: To change the inference size (default: 640) + +```sh +-s SIZE +--size SIZE +-s HEIGHT WIDTH +--size HEIGHT WIDTH + +Example for 1280: + +-s 1280 +or +-s 1280 1280 +``` + +- **Step 5.** Copy the generated **cfg** and **wts** files into the **DeepStream-Yolo** folder + +```sh +cp yolov5s.cfg ~/DeepStream-Yolo +cp yolov5s.wts ~/DeepStream-Yolo +``` + +- **Step 6.** Open the **DeepStream-Yolo** folder and compile the library + +```sh +cd ~/DeepStream-Yolo +CUDA_VER=11.4 make -C nvdsinfer_custom_impl_Yolo # for DeepStream 6.1 +CUDA_VER=10.2 make -C nvdsinfer_custom_impl_Yolo # for DeepStream 6.0.1 / 6.0 +``` + +- **Step 7.** Edit the **config_infer_primary_yoloV5.txt** file according to your model + +```sh +[property] +... +custom-network-config=yolov5s.cfg +model-file=yolov5s.wts +... +``` + +- **Step 8.** Edit the **deepstream_app_config** file + +```sh +... +[primary-gie] +... +config-file=config_infer_primary_yoloV5.txt +``` + +- **Step 9.** Change the video source in **deepstream_app_config** file. Here a default video file is loaded as you can see below + +```sh +... +[source0] +... +uri=file:///opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4 +``` + +## Run the Inference + +```sh +deepstream-app -c deepstream_app_config.txt +``` + +
YOLOv5 with deepstream FP32
+ +The above result is running on **Jetson Xavier NX** with **FP32** and **YOLOv5s 640x640**. We can see that the **FPS** is around **30**. + +## INT8 Calibration + +If you want to use INT8 precision for inference, you need to follow the steps below + +- **Step 1.** Install OpenCV + +```sh +sudo apt-get install libopencv-dev +``` + +- **Step 2.** Compile/recompile the **nvdsinfer_custom_impl_Yolo** library with OpenCV support + +```sh +cd ~/DeepStream-Yolo +CUDA_VER=11.4 OPENCV=1 make -C nvdsinfer_custom_impl_Yolo # for DeepStream 6.1 +CUDA_VER=10.2 OPENCV=1 make -C nvdsinfer_custom_impl_Yolo # for DeepStream 6.0.1 / 6.0 +``` + +- **Step 3.** For COCO dataset, download the [val2017](https://drive.google.com/file/d/1gbvfn7mcsGDRZ_luJwtITL-ru2kK99aK/view?usp=sharing), extract, and move to **DeepStream-Yolo** folder + +- **Step 4.** Make a new directory for calibration images + +```sh +mkdir calibration +``` + +- **Step 5.** Run the following to select 1000 random images from COCO dataset to run calibration + +```sh +for jpg in $(ls -1 val2017/*.jpg | sort -R | head -1000); do \ + cp ${jpg} calibration/; \ +done +``` + +**Note:** NVIDIA recommends at least 500 images to get a good accuracy. On this example, 1000 images are chosen to get better accuracy (more images = more accuracy). Higher INT8_CALIB_BATCH_SIZE values will result in more accuracy and faster calibration speed. Set it according to you GPU memory. You can set it from **head -1000**. For example, for 2000 images, **head -2000**. This process can take a long time. + +- **Step 6.** Create the **calibration.txt** file with all selected images + +```sh +realpath calibration/*jpg > calibration.txt +``` + +- **Step 7.** Set environment variables + +```sh +export INT8_CALIB_IMG_PATH=calibration.txt +export INT8_CALIB_BATCH_SIZE=1 +``` + +- **Step 8.** Update the **config_infer_primary_yoloV5.txt** file + +From + +```sh +... +model-engine-file=model_b1_gpu0_fp32.engine +#int8-calib-file=calib.table +... +network-mode=0 +... +``` + +To + +```sh +... +model-engine-file=model_b1_gpu0_int8.engine +int8-calib-file=calib.table +... +network-mode=1 +... +``` + +- **Step 9.** Run the inference + +```sh +deepstream-app -c deepstream_app_config.txt +``` + +
YOLOv5 with deepstream INT8
+ +The above result is running on **Jetson Xavier NX** with **INT8** and **YOLOv5s 640x640**. We can see that the **FPS** is around **60**. + +## Benchmark results + +The following table summarizes how different models perform on **Jetson Xavier NX**. + +| Model Name | Precision | Inference Size | Inference Time (ms) | FPS | +|------------|-----------|----------------|---------------------|-----| +| YOLOv5s | FP32 | 320x320 | 16.66 | 60 | +| | FP32 | 640x640 | 33.33 | 30 | +| | INT8 | 640x640 | 16.66 | 60 | +| YOLOv5n | FP32 | 640x640 | 16.66 | 60 | + +### Additional + +This tutorial is written by our friends at seeed @lakshanthad and Elaine diff --git a/docs/en/yolov5/tutorials/test_time_augmentation.md b/docs/en/yolov5/tutorials/test_time_augmentation.md new file mode 100644 index 0000000..1ba33de --- /dev/null +++ b/docs/en/yolov5/tutorials/test_time_augmentation.md @@ -0,0 +1,163 @@ +--- +comments: true +description: Boost your YOLOv5 performance with our step-by-step guide on Test-Time Augmentation (TTA). Learn to enhance your model's mAP and Recall during testing and inference. +keywords: YOLOv5, Ultralytics, Test-Time Augmentation, TTA, mAP, Recall, model performance, guide +--- + +# Test-Time Augmentation (TTA) + +📚 This guide explains how to use Test Time Augmentation (TTA) during testing and inference for improved mAP and Recall with YOLOv5 🚀. UPDATED 25 September 2022. + +## Before You Start + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +## Test Normally + +Before trying TTA we want to establish a baseline performance to compare to. This command tests YOLOv5x on COCO val2017 at image size 640 pixels. `yolov5x.pt` is the largest and most accurate model available. Other options are `yolov5s.pt`, `yolov5m.pt` and `yolov5l.pt`, or you own checkpoint from training a custom dataset `./weights/best.pt`. For details on all available models please see our README [table](https://github.com/ultralytics/yolov5#pretrained-checkpoints). + +```bash +python val.py --weights yolov5x.pt --data coco.yaml --img 640 --half +``` + +Output: + +```shell +val: data=./data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.65, task=val, device=, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True +YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) + +Fusing layers... +Model Summary: 476 layers, 87730285 parameters, 0 gradients + +val: Scanning '../datasets/coco/val2017' images and labels...4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:01<00:00, 2846.03it/s] +val: New cache created: ../datasets/coco/val2017.cache + Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 157/157 [02:30<00:00, 1.05it/s] + all 5000 36335 0.746 0.626 0.68 0.49 +Speed: 0.1ms pre-process, 22.4ms inference, 1.4ms NMS per image at shape (32, 3, 640, 640) # <--- baseline speed + +Evaluating pycocotools mAP... saving runs/val/exp/yolov5x_predictions.json... +... + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.504 # <--- baseline mAP + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.688 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.546 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.351 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.551 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.644 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.382 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.628 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.681 # <--- baseline mAR + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.524 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.735 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.826 +``` + +## Test with TTA + +Append `--augment` to any existing `val.py` command to enable TTA, and increase the image size by about 30% for improved results. Note that inference with TTA enabled will typically take about 2-3X the time of normal inference as the images are being left-right flipped and processed at 3 different resolutions, with the outputs merged before NMS. Part of the speed decrease is simply due to larger image sizes (832 vs 640), while part is due to the actual TTA operations. + +```bash +python val.py --weights yolov5x.pt --data coco.yaml --img 832 --augment --half +``` + +Output: + +```shell +val: data=./data/coco.yaml, weights=['yolov5x.pt'], batch_size=32, imgsz=832, conf_thres=0.001, iou_thres=0.6, task=val, device=, single_cls=False, augment=True, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=True, project=runs/val, name=exp, exist_ok=False, half=True +YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) + +Fusing layers... +/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.) + return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode) +Model Summary: 476 layers, 87730285 parameters, 0 gradients +val: Scanning '../datasets/coco/val2017' images and labels...4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:01<00:00, 2885.61it/s] +val: New cache created: ../datasets/coco/val2017.cache + Class Images Labels P R mAP@.5 mAP@.5:.95: 100% 157/157 [07:29<00:00, 2.86s/it] + all 5000 36335 0.718 0.656 0.695 0.503 +Speed: 0.2ms pre-process, 80.6ms inference, 2.7ms NMS per image at shape (32, 3, 832, 832) # <--- TTA speed + +Evaluating pycocotools mAP... saving runs/val/exp2/yolov5x_predictions.json... +... + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.516 # <--- TTA mAP + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.701 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.562 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.361 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.564 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.656 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.388 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.640 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.696 # <--- TTA mAR + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.553 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.744 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.833 +``` + +## Inference with TTA + +`detect.py` TTA inference operates identically to `val.py` TTA: simply append `--augment` to any existing `detect.py` command: + +```bash +python detect.py --weights yolov5s.pt --img 832 --source data/images --augment +``` + +Output: + +```bash +YOLOv5 🚀 v5.0-267-g6a3ee7c torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB) + +Downloading https://github.com/ultralytics/yolov5/releases/download/v5.0/yolov5s.pt to yolov5s.pt... +100% 14.1M/14.1M [00:00<00:00, 81.9MB/s] + +Fusing layers... +Model Summary: 224 layers, 7266973 parameters, 0 gradients +image 1/2 /content/yolov5/data/images/bus.jpg: 832x640 4 persons, 1 bus, 1 fire hydrant, Done. (0.029s) +image 2/2 /content/yolov5/data/images/zidane.jpg: 480x832 3 persons, 3 ties, Done. (0.024s) +Results saved to runs/detect/exp +Done. (0.156s) +``` + +YOLOv5 test time augmentations + +### PyTorch Hub TTA + +TTA is automatically integrated into all [YOLOv5 PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5) models, and can be accessed by passing `augment=True` at inference time. + +```python +import torch + +# Model +model = torch.hub.load('ultralytics/yolov5', 'yolov5s') # or yolov5m, yolov5x, custom + +# Images +img = 'https://ultralytics.com/images/zidane.jpg' # or file, PIL, OpenCV, numpy, multiple + +# Inference +results = model(img, augment=True) # <--- TTA inference + +# Results +results.print() # or .show(), .save(), .crop(), .pandas(), etc. +``` + +### Customize + +You can customize the TTA ops applied in the YOLOv5 `forward_augment()` method [here](https://github.com/ultralytics/yolov5/blob/8c6f9e15bfc0000d18b976a95b9d7c17d407ec91/models/yolo.py#L125-L137). + +## Environments + +YOLOv5 is designed to be run in the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): + +- **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle +- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) +- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) +- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls + +## Status + +YOLOv5 CI + +If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 [training](https://github.com/ultralytics/yolov5/blob/master/train.py), [validation](https://github.com/ultralytics/yolov5/blob/master/val.py), [inference](https://github.com/ultralytics/yolov5/blob/master/detect.py), [export](https://github.com/ultralytics/yolov5/blob/master/export.py) and [benchmarks](https://github.com/ultralytics/yolov5/blob/master/benchmarks.py) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/docs/en/yolov5/tutorials/tips_for_best_training_results.md b/docs/en/yolov5/tutorials/tips_for_best_training_results.md new file mode 100644 index 0000000..1153880 --- /dev/null +++ b/docs/en/yolov5/tutorials/tips_for_best_training_results.md @@ -0,0 +1,65 @@ +--- +comments: true +description: Our comprehensive guide provides insights on how to train your YOLOv5 system to get the best mAP. Master dataset preparation, model selection, training settings, and more. +keywords: Ultralytics, YOLOv5, Training guide, dataset preparation, model selection, training settings, mAP results, Machine Learning, Object Detection +--- + +📚 This guide explains how to produce the best mAP and training results with YOLOv5 🚀. UPDATED 25 May 2022. + +Most of the time good results can be obtained with no changes to the models or training settings, **provided your dataset is sufficiently large and well labelled**. If at first you don't get good results, there are steps you might be able to take to improve, but we always recommend users **first train with all default settings** before considering any changes. This helps establish a performance baseline and spot areas for improvement. + +If you have questions about your training results **we recommend you provide the maximum amount of information possible** if you expect a helpful response, including results plots (train losses, val losses, P, R, mAP), PR curve, confusion matrix, training mosaics, test results and dataset statistics images such as labels.png. All of these are located in your `project/name` directory, typically `yolov5/runs/train/exp`. + +We've put together a full guide for users looking to get the best results on their YOLOv5 trainings below. + +## Dataset + +- **Images per class.** ≥ 1500 images per class recommended +- **Instances per class.** ≥ 10000 instances (labeled objects) per class recommended +- **Image variety.** Must be representative of deployed environment. For real-world use cases we recommend images from different times of day, different seasons, different weather, different lighting, different angles, different sources (scraped online, collected locally, different cameras) etc. +- **Label consistency.** All instances of all classes in all images must be labelled. Partial labelling will not work. +- **Label accuracy.** Labels must closely enclose each object. No space should exist between an object and it's bounding box. No objects should be missing a label. +- **Label verification.** View `train_batch*.jpg` on train start to verify your labels appear correct, i.e. see [example](https://docs.ultralytics.com/yolov5/tutorials/train_custom_data#local-logging) mosaic. +- **Background images.** Background images are images with no objects that are added to a dataset to reduce False Positives (FP). We recommend about 0-10% background images to help reduce FPs (COCO has 1000 background images for reference, 1% of the total). No labels are required for background images. + +COCO Analysis + +## Model Selection + +Larger models like YOLOv5x and [YOLOv5x6](https://github.com/ultralytics/yolov5/releases/tag/v5.0) will produce better results in nearly all cases, but have more parameters, require more CUDA memory to train, and are slower to run. For **mobile** deployments we recommend YOLOv5s/m, for **cloud** deployments we recommend YOLOv5l/x. See our README [table](https://github.com/ultralytics/yolov5#pretrained-checkpoints) for a full comparison of all models. + +

YOLOv5 Models

+ +- **Start from Pretrained weights.** Recommended for small to medium-sized datasets (i.e. [VOC](https://github.com/ultralytics/yolov5/blob/master/data/VOC.yaml), [VisDrone](https://github.com/ultralytics/yolov5/blob/master/data/VisDrone.yaml), [GlobalWheat](https://github.com/ultralytics/yolov5/blob/master/data/GlobalWheat2020.yaml)). Pass the name of the model to the `--weights` argument. Models download automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases). + +```shell +python train.py --data custom.yaml --weights yolov5s.pt + yolov5m.pt + yolov5l.pt + yolov5x.pt + custom_pretrained.pt +``` + +- **Start from Scratch.** Recommended for large datasets (i.e. [COCO](https://github.com/ultralytics/yolov5/blob/master/data/coco.yaml), [Objects365](https://github.com/ultralytics/yolov5/blob/master/data/Objects365.yaml), [OIv6](https://storage.googleapis.com/openimages/web/index.html)). Pass the model architecture YAML you are interested in, along with an empty `--weights ''` argument: + +```bash +python train.py --data custom.yaml --weights '' --cfg yolov5s.yaml + yolov5m.yaml + yolov5l.yaml + yolov5x.yaml +``` + +## Training Settings + +Before modifying anything, **first train with default settings to establish a performance baseline**. A full list of train.py settings can be found in the [train.py](https://github.com/ultralytics/yolov5/blob/master/train.py) argparser. + +- **Epochs.** Start with 300 epochs. If this overfits early then you can reduce epochs. If overfitting does not occur after 300 epochs, train longer, i.e. 600, 1200 etc epochs. +- **Image size.** COCO trains at native resolution of `--img 640`, though due to the high amount of small objects in the dataset it can benefit from training at higher resolutions such as `--img 1280`. If there are many small objects then custom datasets will benefit from training at native or higher resolution. Best inference results are obtained at the same `--img` as the training was run at, i.e. if you train at `--img 1280` you should also test and detect at `--img 1280`. +- **Batch size.** Use the largest `--batch-size` that your hardware allows for. Small batch sizes produce poor batchnorm statistics and should be avoided. +- **Hyperparameters.** Default hyperparameters are in [hyp.scratch-low.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-low.yaml). We recommend you train with default hyperparameters first before thinking of modifying any. In general, increasing augmentation hyperparameters will reduce and delay overfitting, allowing for longer trainings and higher final mAP. Reduction in loss component gain hyperparameters like `hyp['obj']` will help reduce overfitting in those specific loss components. For an automated method of optimizing these hyperparameters, see our [Hyperparameter Evolution Tutorial](https://docs.ultralytics.com/yolov5/tutorials/hyperparameter_evolution). + +## Further Reading + +If you'd like to know more, a good place to start is Karpathy's 'Recipe for Training Neural Networks', which has great ideas for training that apply broadly across all ML domains: [http://karpathy.github.io/2019/04/25/recipe/](http://karpathy.github.io/2019/04/25/recipe/) + +Good luck 🍀 and let us know if you have any other questions! diff --git a/docs/en/yolov5/tutorials/train_custom_data.md b/docs/en/yolov5/tutorials/train_custom_data.md new file mode 100644 index 0000000..4fd5290 --- /dev/null +++ b/docs/en/yolov5/tutorials/train_custom_data.md @@ -0,0 +1,228 @@ +--- +comments: true +description: Learn how to train your data on custom datasets using YOLOv5. Simple and updated guide on collection and organization of images, labelling, model training and deployment. +keywords: YOLOv5, train on custom dataset, image collection, model training, object detection, image labelling, Ultralytics, PyTorch, machine learning +--- + +📚 This guide explains how to train your own **custom dataset** with [YOLOv5](https://github.com/ultralytics/yolov5) 🚀. UPDATED 7 June 2023. + +## Before You Start + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +## Train On Custom Data + + +Ultralytics active learning +
+
+ +Creating a custom model to detect your objects is an iterative process of collecting and organizing images, labeling your objects of interest, training a model, deploying it into the wild to make predictions, and then using that deployed model to collect examples of edge cases to repeat and improve. + +### 1. Create Dataset + +YOLOv5 models must be trained on labelled data in order to learn classes of objects in that data. There are two options for creating your dataset before you start training: + +
+Use Roboflow to create your dataset in YOLO format 🌟 + +!!! Warning + + Roboflow users can use Ultralytics under the [AGPL license](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) or can request an [Enterprise license](https://ultralytics.com/license) directly from Ultralytics. Be aware that Roboflow does not provide Ultralytics licenses, and it is the responsibility of the user to ensure appropriate licensing. + +### 1.1 Collect Images + +Your model will learn by example. Training on images similar to the ones it will see in the wild is of the utmost importance. Ideally, you will collect a wide variety of images from the same configuration (camera, angle, lighting, etc.) as you will ultimately deploy your project. + +If this is not possible, you can start from [a public dataset](https://universe.roboflow.com/?ref=ultralytics) to train your initial model and then [sample images from the wild during inference](https://blog.roboflow.com/computer-vision-active-learning-tips/?ref=ultralytics) to improve your dataset and model iteratively. + +### 1.2 Create Labels + +Once you have collected images, you will need to annotate the objects of interest to create a ground truth for your model to learn from. + +

YOLOv5 accuracies

+ +[Roboflow Annotate](https://roboflow.com/annotate?ref=ultralytics) is a simple web-based tool for managing and labeling your images with your team and exporting them in [YOLOv5's annotation format](https://roboflow.com/formats/yolov5-pytorch-txt?ref=ultralytics). + +### 1.3 Prepare Dataset for YOLOv5 + +Whether you [label your images with Roboflow](https://roboflow.com/annotate?ref=ultralytics) or not, you can use it to convert your dataset into YOLO format, create a YOLOv5 YAML configuration file, and host it for importing into your training script. + +[Create a free Roboflow account](https://app.roboflow.com/?model=yolov5&ref=ultralytics) +and upload your dataset to a `Public` workspace, label any unannotated images, then generate and export a version of your dataset in `YOLOv5 Pytorch` format. + +Note: YOLOv5 does online augmentation during training, so we do not recommend applying any augmentation steps in Roboflow for training with YOLOv5. But we recommend applying the following preprocessing steps: + +

Recommended Preprocessing Steps

+ +* **Auto-Orient** - to strip EXIF orientation from your images. +* **Resize (Stretch)** - to the square input size of your model (640x640 is the YOLOv5 default). + +Generating a version will give you a point in time snapshot of your dataset so you can always go back and compare your future model training runs against it, even if you add more images or change its configuration later. + +

Export in YOLOv5 Format

+ +Export in `YOLOv5 Pytorch` format, then copy the snippet into your training script or notebook to download your dataset. + +

Roboflow dataset download snippet

+ +Now continue with `2. Select a Model`. +
+ +
+Or manually prepare your dataset + +### 1.1 Create dataset.yaml + +[COCO128](https://www.kaggle.com/ultralytics/coco128) is an example small tutorial dataset composed of the first 128 images in [COCO](http://cocodataset.org/#home) train2017. These same 128 images are used for both training and validation to verify our training pipeline is capable of overfitting. [data/coco128.yaml](https://github.com/ultralytics/yolov5/blob/master/data/coco128.yaml), shown below, is the dataset config file that defines 1) the dataset root directory `path` and relative paths to `train` / `val` / `test` image directories (or *.txt files with image paths) and 2) a class `names` dictionary: + +```yaml +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/coco128 # dataset root dir +train: images/train2017 # train images (relative to 'path') 128 images +val: images/train2017 # val images (relative to 'path') 128 images +test: # test images (optional) + +# Classes (80 COCO classes) +names: + 0: person + 1: bicycle + 2: car + ... + 77: teddy bear + 78: hair drier + 79: toothbrush +``` + +### 1.2 Create Labels + +After using an annotation tool to label your images, export your labels to **YOLO format**, with one `*.txt` file per image (if no objects in image, no `*.txt` file is required). The `*.txt` file specifications are: + +- One row per object +- Each row is `class x_center y_center width height` format. +- Box coordinates must be in **normalized xywh** format (from 0 to 1). If your boxes are in pixels, divide `x_center` and `width` by image width, and `y_center` and `height` by image height. +- Class numbers are zero-indexed (start from 0). + +

Roboflow annotations

+ +The label file corresponding to the above image contains 2 persons (class `0`) and a tie (class `27`): + +

Roboflow dataset preprocessing

+ +### 1.3 Organize Directories + +Organize your train and val images and labels according to the example below. YOLOv5 assumes `/coco128` is inside a `/datasets` directory **next to** the `/yolov5` directory. **YOLOv5 locates labels automatically for each image** by replacing the last instance of `/images/` in each image path with `/labels/`. For example: + +```bash +../datasets/coco128/images/im0.jpg # image +../datasets/coco128/labels/im0.txt # label +``` + +

YOLOv5 dataset structure

+
+ +### 2. Select a Model + +Select a pretrained model to start training from. Here we select [YOLOv5s](https://github.com/ultralytics/yolov5/blob/master/models/yolov5s.yaml), the second-smallest and fastest model available. See our README [table](https://github.com/ultralytics/yolov5#pretrained-checkpoints) for a full comparison of all models. + +

YOLOv5 models

+ +### 3. Train + +Train a YOLOv5s model on COCO128 by specifying dataset, batch-size, image size and either pretrained `--weights yolov5s.pt` (recommended), or randomly initialized `--weights '' --cfg yolov5s.yaml` (not recommended). Pretrained weights are auto-downloaded from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases). + +```bash +python train.py --img 640 --epochs 3 --data coco128.yaml --weights yolov5s.pt +``` + +!!! Tip "Tip" + + 💡 Add `--cache ram` or `--cache disk` to speed up training (requires significant RAM/disk resources). + +!!! Tip "Tip" + + 💡 Always train from a local dataset. Mounted or network drives like Google Drive will be very slow. + +All training results are saved to `runs/train/` with incrementing run directories, i.e. `runs/train/exp2`, `runs/train/exp3` etc. For more details see the Training section of our tutorial notebook. Open In Colab Open In Kaggle + +### 4. Visualize + +#### Comet Logging and Visualization 🌟 NEW + +[Comet](https://bit.ly/yolov5-readme-comet) is now fully integrated with YOLOv5. Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://bit.ly/yolov5-colab-comet-panels)! Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes! + +Getting started is easy: + +```shell +pip install comet_ml # 1. install +export COMET_API_KEY= # 2. paste API key +python train.py --img 640 --epochs 3 --data coco128.yaml --weights yolov5s.pt # 3. train +``` + +To learn more about all the supported Comet features for this integration, check out the [Comet Tutorial](https://docs.ultralytics.com/yolov5/tutorials/comet_logging_integration). If you'd like to learn more about Comet, head over to our [documentation](https://bit.ly/yolov5-colab-comet-docs). Get started by trying out the Comet Colab Notebook: +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing) + +YOLO UI + +#### ClearML Logging and Automation 🌟 NEW + +[ClearML](https://cutt.ly/yolov5-notebook-clearml) is completely integrated into YOLOv5 to track your experimentation, manage dataset versions and even remotely execute training runs. To enable ClearML: + +- `pip install clearml` +- run `clearml-init` to connect to a ClearML server (**deploy your own open-source server [here](https://github.com/allegroai/clearml-server)**, or use our free hosted server [here](https://cutt.ly/yolov5-notebook-clearml)) + +You'll get all the great expected features from an experiment manager: live updates, model upload, experiment comparison etc. but ClearML also tracks uncommitted changes and installed packages for example. Thanks to that ClearML Tasks (which is what we call experiments) are also reproducible on different machines! With only 1 extra line, we can schedule a YOLOv5 training task on a queue to be executed by any number of ClearML Agents (workers). + +You can use ClearML Data to version your dataset and then pass it to YOLOv5 simply using its unique ID. This will help you keep track of your data without adding extra hassle. Explore the [ClearML Tutorial](https://docs.ultralytics.com/yolov5/tutorials/clearml_logging_integration) for details! + + +ClearML Experiment Management UI + +#### Local Logging + +Training results are automatically logged with [Tensorboard](https://www.tensorflow.org/tensorboard) and [CSV](https://github.com/ultralytics/yolov5/pull/4148) loggers to `runs/train`, with a new experiment directory created for each new training as `runs/train/exp2`, `runs/train/exp3`, etc. + +This directory contains train and val statistics, mosaics, labels, predictions and augmented mosaics, as well as metrics and charts including precision-recall (PR) curves and confusion matrices. + +Local logging results + +Results file `results.csv` is updated after each epoch, and then plotted as `results.png` (below) after training completes. You can also plot any `results.csv` file manually: + +```python +from utils.plots import plot_results + +plot_results('path/to/results.csv') # plot 'results.csv' as 'results.png' +``` + +

results.png

+ +## Next Steps + +Once your model is trained you can use your best checkpoint `best.pt` to: + +* Run [CLI](https://github.com/ultralytics/yolov5#quick-start-examples) or [Python](https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading) inference on new images and videos +* [Validate](https://github.com/ultralytics/yolov5/blob/master/val.py) accuracy on train, val and test splits +* [Export](https://docs.ultralytics.com/yolov5/tutorials/model_export) to TensorFlow, Keras, ONNX, TFlite, TF.js, CoreML and TensorRT formats +* [Evolve](https://docs.ultralytics.com/yolov5/tutorials/hyperparameter_evolution) hyperparameters to improve performance +* [Improve](https://docs.roboflow.com/adding-data/upload-api?ref=ultralytics) your model by sampling real-world images and adding them to your dataset + +## Environments + +YOLOv5 is designed to be run in the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): + +- **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle +- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) +- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) +- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls + +## Status + +YOLOv5 CI + +If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 [training](https://github.com/ultralytics/yolov5/blob/master/train.py), [validation](https://github.com/ultralytics/yolov5/blob/master/val.py), [inference](https://github.com/ultralytics/yolov5/blob/master/detect.py), [export](https://github.com/ultralytics/yolov5/blob/master/export.py) and [benchmarks](https://github.com/ultralytics/yolov5/blob/master/benchmarks.py) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/docs/en/yolov5/tutorials/transfer_learning_with_frozen_layers.md b/docs/en/yolov5/tutorials/transfer_learning_with_frozen_layers.md new file mode 100644 index 0000000..5fd3376 --- /dev/null +++ b/docs/en/yolov5/tutorials/transfer_learning_with_frozen_layers.md @@ -0,0 +1,154 @@ +--- +comments: true +description: Learn to freeze YOLOv5 layers for efficient transfer learning. Optimize your model retraining with less resources and faster training times. +keywords: YOLOv5, freeze layers, transfer learning, model retraining, Ultralytics +--- + +📚 This guide explains how to **freeze** YOLOv5 🚀 layers when **transfer learning**. Transfer learning is a useful way to quickly retrain a model on new data without having to retrain the entire network. Instead, part of the initial weights are frozen in place, and the rest of the weights are used to compute loss and are updated by the optimizer. This requires less resources than normal training and allows for faster training times, though it may also result in reductions to final trained accuracy. UPDATED 25 September 2022. + +## Before You Start + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +## Freeze Backbone + +All layers that match the train.py `freeze` list in train.py will be frozen by setting their gradients to zero before training starts. + +```python + # Freeze + freeze = [f'model.{x}.' for x in range(freeze)] # layers to freeze + for k, v in model.named_parameters(): + v.requires_grad = True # train all layers + if any(x in k for x in freeze): + print(f'freezing {k}') + v.requires_grad = False +``` + +To see a list of module names: + +```python +for k, v in model.named_parameters(): + print(k) + +# Output +model.0.conv.conv.weight +model.0.conv.bn.weight +model.0.conv.bn.bias +model.1.conv.weight +model.1.bn.weight +model.1.bn.bias +model.2.cv1.conv.weight +model.2.cv1.bn.weight +... +model.23.m.0.cv2.bn.weight +model.23.m.0.cv2.bn.bias +model.24.m.0.weight +model.24.m.0.bias +model.24.m.1.weight +model.24.m.1.bias +model.24.m.2.weight +model.24.m.2.bias +``` + +Looking at the model architecture we can see that the model backbone is layers 0-9: + +```yaml +# YOLOv5 backbone + backbone: + # [from, number, module, args] + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, BottleneckCSP, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 9, BottleneckCSP, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, BottleneckCSP, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 1, SPP, [1024, [5, 9, 13]]], + [-1, 3, BottleneckCSP, [1024, False]], # 9 + ] + + # YOLOv5 head + head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, BottleneckCSP, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] +``` + +so we can define the freeze list to contain all modules with 'model.0.' - 'model.9.' in their names: + +```bash +python train.py --freeze 10 +``` + +## Freeze All Layers + +To freeze the full model except for the final output convolution layers in Detect(), we set freeze list to contain all modules with 'model.0.' - 'model.23.' in their names: + +```bash +python train.py --freeze 24 +``` + +## Results + +We train YOLOv5m on VOC on both of the above scenarios, along with a default model (no freezing), starting from the official COCO pretrained `--weights yolov5m.pt`: + +```python +train.py --batch 48 --weights yolov5m.pt --data voc.yaml --epochs 50 --cache --img 512 --hyp hyp.finetune.yaml +``` + +### Accuracy Comparison + +The results show that freezing speeds up training, but reduces final accuracy slightly. + +![Freezing training mAP50 results](https://user-images.githubusercontent.com/26833433/98394454-11579f80-205b-11eb-8e57-d8318e1cc2f8.png) + +![Freezing training mAP50-95 results](https://user-images.githubusercontent.com/26833433/98394459-13216300-205b-11eb-871b-49e20691a423.png) + +Table results + +### GPU Utilization Comparison + +Interestingly, the more modules are frozen the less GPU memory is required to train, and the lower GPU utilization. This indicates that larger models, or models trained at larger --image-size may benefit from freezing in order to train faster. + +![Training GPU memory allocated percent](https://user-images.githubusercontent.com/26833433/98394920-c2f6d080-205b-11eb-9611-fd68522b4e0e.png) + +![Training GPU memory utilization percent](https://user-images.githubusercontent.com/26833433/98394918-bf634980-205b-11eb-948d-311036ef9325.png) + +## Environments + +YOLOv5 is designed to be run in the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): + +- **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle +- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) +- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) +- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls + +## Status + +YOLOv5 CI + +If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 [training](https://github.com/ultralytics/yolov5/blob/master/train.py), [validation](https://github.com/ultralytics/yolov5/blob/master/val.py), [inference](https://github.com/ultralytics/yolov5/blob/master/detect.py), [export](https://github.com/ultralytics/yolov5/blob/master/export.py) and [benchmarks](https://github.com/ultralytics/yolov5/blob/master/benchmarks.py) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/docs/es/index.md b/docs/es/index.md new file mode 100644 index 0000000..163f4c7 --- /dev/null +++ b/docs/es/index.md @@ -0,0 +1,82 @@ +--- +comments: true +description: Explore una guía completa sobre Ultralytics YOLOv8, un modelo de alta velocidad y precisión para detección de objetos y segmentación de imágenes. Tutoriales de instalación, predicción, entrenamiento y más. +keywords: Ultralytics, YOLOv8, detección de objetos, segmentación de imágenes, aprendizaje automático, aprendizaje profundo, visión por computadora, instalación YOLOv8, predicción YOLOv8, entrenamiento YOLOv8, historia de YOLO, licencias YOLO +--- + +
+

+ + Banner de Ultralytics YOLO +

+ GitHub de Ultralytics + space + LinkedIn de Ultralytics + space + Twitter de Ultralytics + space + YouTube de Ultralytics + space + TikTok de Ultralytics + space + Instagram de Ultralytics + space + Discord de Ultralytics +
+
+ Integración continua de Ultralytics + Cobertura de código de Ultralytics + Cita de YOLOv8 + Descargas de Docker +
+ Ejecutar en Gradient + Abrir en Colab + Abrir en Kaggle +
+ +Presentamos [Ultralytics](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics), la última versión del aclamado modelo para detección de objetos y segmentación de imágenes en tiempo real. YOLOv8 está construido sobre avances de vanguardia en aprendizaje profundo y visión por computadora, ofreciendo un rendimiento sin paralelo en términos de velocidad y precisión. Su diseño simplificado lo hace adecuado para varias aplicaciones y fácilmente adaptable a diferentes plataformas de hardware, desde dispositivos de borde hasta API en la nube. + +Explore los documentos de YOLOv8, un recurso integral diseñado para ayudarle a comprender y utilizar sus características y capacidades. Independientemente de que sea un practicante experimentado en aprendizaje automático o nuevo en el campo, este centro tiene como objetivo maximizar el potencial de YOLOv8 en sus proyectos. + +!!! Note "Nota" + + 🚧 Nuestra documentación en varios idiomas está actualmente en construcción y estamos trabajando duro para mejorarla. ¡Gracias por su paciencia! 🙏 + +## Dónde empezar + +- **Instalar** `ultralytics` con pip y comenzar a funcionar en minutos   [:material-clock-fast: Comenzar](quickstart.md){ .md-button } +- **Predecir** nuevas imágenes y videos con YOLOv8   [:octicons-image-16: Predecir en Imágenes](modes/predict.md){ .md-button } +- **Entrenar** un nuevo modelo YOLOv8 en su propio conjunto de datos personalizado   [:fontawesome-solid-brain: Entrenar un Modelo](modes/train.md){ .md-button } +- **Explorar** tareas de YOLOv8 como segmentar, clasificar, posar y seguir   [:material-magnify-expand: Explorar Tareas](tasks/index.md){ .md-button } + +

+
+ +
+ Ver: Cómo entrenar un modelo YOLOv8 en Su Conjunto de Datos Personalizado en Google Colab. +

+ +## YOLO: Una Breve Historia + +[YOLO](https://arxiv.org/abs/1506.02640) (You Only Look Once), un modelo popular de detección de objetos y segmentación de imágenes, fue desarrollado por Joseph Redmon y Ali Farhadi en la Universidad de Washington. Lanzado en 2015, YOLO rápidamente ganó popularidad por su alta velocidad y precisión. + +- [YOLOv2](https://arxiv.org/abs/1612.08242), lanzado en 2016, mejoró el modelo original incorporando normalización por lotes, cajas ancla y clústeres de dimensiones. +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf), lanzado en 2018, mejoró aún más el rendimiento del modelo usando una red dorsal más eficiente, múltiples anclas y agrupación piramidal espacial. +- [YOLOv4](https://arxiv.org/abs/2004.10934) fue lanzado en 2020, introduciendo innovaciones como la ampliación de datos del mosaico, un nuevo cabezal de detección sin ancla y una nueva función de pérdida. +- [YOLOv5](https://github.com/ultralytics/yolov5) mejoró aún más el rendimiento del modelo y agregó nuevas características como la optimización de hiperparámetros, seguimiento de experimentos integrados y exportación automática a formatos de exportación populares. +- [YOLOv6](https://github.com/meituan/YOLOv6) fue publicado en código abierto por [Meituan](https://about.meituan.com/) en 2022 y se utiliza en muchos de los robots de entrega autónomos de la empresa. +- [YOLOv7](https://github.com/WongKinYiu/yolov7) añadió tareas adicionales como la estimación de posturas en el conjunto de datos COCO keypoints. +- [YOLOv8](https://github.com/ultralytics/ultralytics) es la última versión de YOLO de Ultralytics. Como un modelo de vanguardia y del estado del arte (SOTA), YOLOv8 se basa en el éxito de las versiones anteriores, introduciendo nuevas características y mejoras para obtener un rendimiento mejorado, flexibilidad y eficiencia. YOLOv8 soporta una gama completa de tareas de IA de visión, incluyendo [detección](tasks/detect.md), [segmentación](tasks/segment.md), [estimación de pose](tasks/pose.md), [seguimiento](modes/track.md) y [clasificación](tasks/classify.md). Esta versatilidad permite a los usuarios aprovechar las capacidades de YOLOv8 en una amplia gama de aplicaciones y dominios. + +## Licencias de YOLO: ¿Cómo están licenciados los YOLO de Ultralytics? + +Ultralytics ofrece dos opciones de licencia para acomodar casos de uso diversos: + +- **Licencia AGPL-3.0**: Esta licencia de código abierto aprobada por [OSI](https://opensource.org/licenses/) es ideal para estudiantes y entusiastas, promoviendo la colaboración abierta y el intercambio de conocimiento. Consulte el archivo [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) para obtener más detalles. +- **Licencia Empresarial**: Diseñada para uso comercial, esta licencia permite la integración sin problemas de software de Ultralytics y modelos de IA en bienes y servicios comerciales, eludiendo los requisitos de código abierto de AGPL-3.0. Si su escenario implica la incorporación de nuestras soluciones en una oferta comercial, póngase en contacto a través de [Licencias de Ultralytics](https://ultralytics.com/license). + +Nuestra estrategia de licenciamiento está diseñada para asegurar que cualquier mejora a nuestros proyectos de código abierto se devuelva a la comunidad. Mantenemos los principios del código abierto cerca de nuestros corazones ❤️, y nuestra misión es garantizar que nuestras contribuciones puedan ser utilizadas y ampliadas de formas que sean beneficiosas para todos. diff --git a/docs/es/models/fast-sam.md b/docs/es/models/fast-sam.md new file mode 100644 index 0000000..dfaee11 --- /dev/null +++ b/docs/es/models/fast-sam.md @@ -0,0 +1,193 @@ +--- +comments: true +description: Explora FastSAM, una solución basada en CNN para la segmentación en tiempo real de objetos en imágenes. Ofrece una interacción mejorada del usuario, eficiencia computacional y es adaptable a diversas tareas de visión. +keywords: FastSAM, aprendizaje automático, solución basada en CNN, segmentación de objetos, solución en tiempo real, Ultralytics, tareas de visión, procesamiento de imágenes, aplicaciones industriales, interacción del usuario +--- + +# Modelo para Segmentar Cualquier Cosa Rápidamente (FastSAM) + +El Modelo para Segmentar Cualquier Cosa Rápidamente (FastSAM) es una solución novedosa basada en CNN que funciona en tiempo real para la tarea de Segmentar Cualquier Cosa. Esta tarea está diseñada para segmentar cualquier objeto dentro de una imagen basándose en diversas indicaciones posibles de interacción del usuario. FastSAM reduce significativamente las demandas computacionales a la vez que mantiene un rendimiento competitivo, lo que lo convierte en una opción práctica para una variedad de tareas de visión. + +![Descripción general de la arquitectura del Modelo para Segmentar Cualquier Cosa Rápidamente (FastSAM)](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## Descripción general + +FastSAM está diseñado para abordar las limitaciones del [Modelo para Segmentar Cualquier Cosa (SAM)](sam.md), un modelo Transformer pesado con requerimientos sustanciales de recursos computacionales. FastSAM divide la tarea de segmentar cualquier cosa en dos etapas secuenciales: segmentación de todas las instancias y selección basada en indicaciones. La primera etapa utiliza [YOLOv8-seg](../tasks/segment.md) para producir las máscaras de segmentación de todas las instancias en la imagen. En la segunda etapa, produce la región de interés correspondiente a la indicación. + +## Características principales + +1. **Solución en tiempo real:** Al aprovechar la eficiencia computacional de las CNN, FastSAM proporciona una solución en tiempo real para la tarea de segmentar cualquier cosa, lo que lo hace valioso para aplicaciones industriales que requieren resultados rápidos. + +2. **Eficiencia y rendimiento:** FastSAM ofrece una reducción significativa en las demandas computacionales y de recursos sin comprometer la calidad del rendimiento. Alcanza un rendimiento comparable al de SAM, pero con recursos computacionales drásticamente reducidos, lo que permite su aplicación en tiempo real. + +3. **Segmentación guiada por indicaciones:** FastSAM puede segmentar cualquier objeto dentro de una imagen guiado por diversas indicaciones posibles de interacción del usuario, lo que proporciona flexibilidad y adaptabilidad en diferentes escenarios. + +4. **Basado en YOLOv8-seg:** FastSAM se basa en [YOLOv8-seg](../tasks/segment.md), un detector de objetos equipado con una rama de segmentación de instancias. Esto le permite producir de manera efectiva las máscaras de segmentación de todas las instancias en una imagen. + +5. **Resultados competitivos en pruebas de referencia:** En la tarea de propuesta de objetos de MS COCO, FastSAM alcanza puntuaciones altas a una velocidad significativamente más rápida que [SAM](sam.md) en una sola tarjeta NVIDIA RTX 3090, lo que demuestra su eficiencia y capacidad. + +6. **Aplicaciones prácticas:** El enfoque propuesto proporciona una solución nueva y práctica para un gran número de tareas de visión a una velocidad muy alta, varias veces más rápida que los métodos actuales. + +7. **Factibilidad de compresión del modelo:** FastSAM demuestra la factibilidad de un camino que puede reducir significativamente el esfuerzo computacional al introducir una prioridad artificial en la estructura, abriendo así nuevas posibilidades para la arquitectura de modelos grandes en tareas generales de visión. + +## Modelos disponibles, tareas admitidas y modos de funcionamiento + +Esta tabla presenta los modelos disponibles con sus pesos pre-entrenados específicos, las tareas que admiten y su compatibilidad con diferentes modos de funcionamiento, como [Inference](../modes/predict.md) (inferencia), [Validation](../modes/val.md) (validación), [Training](../modes/train.md) (entrenamiento) y [Export](../modes/export.md) (exportación), indicados mediante emojis ✅ para los modos admitidos y emojis ❌ para los modos no admitidos. + +| Tipo de modelo | Pesos pre-entrenados | Tareas admitidas | Inferencia | Validación | Entrenamiento | Exportación | +|----------------|----------------------|---------------------------------------------------|------------|------------|---------------|-------------| +| FastSAM-s | `FastSAM-s.pt` | [Segmentación de Instancias](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| FastSAM-x | `FastSAM-x.pt` | [Segmentación de Instancias](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Ejemplos de uso + +Los modelos FastSAM son fáciles de integrar en tus aplicaciones Python. Ultralytics proporciona una API y comandos de línea de comandos (CLI) fáciles de usar para agilizar el desarrollo. + +### Uso de predicción + +Para realizar la detección de objetos en una imagen, utiliza el método `predict` de la siguiente manera: + +!!! Example "Ejemplo" + + === "Python" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # Define una fuente de inferencia + source = 'ruta/hacia/bus.jpg' + + # Crea un modelo FastSAM + model = FastSAM('FastSAM-s.pt') # o FastSAM-x.pt + + # Ejecuta la inferencia en una imagen + everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # Prepara un objeto de procesamiento de indicaciones + prompt_process = FastSAMPrompt(source, everything_results, device='cpu') + + # Indicación Everything + ann = prompt_process.everything_prompt() + + # Caja predeterminada [0,0,0,0] -> [x1,y1,x2,y2] + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # Indicación de texto + ann = prompt_process.text_prompt(text='una foto de un perro') + + # Indicación de punto + # puntos predeterminados [[0,0]] [[x1,y1],[x2,y2]] + # etiqueta_predeterminada [0] [1,0] 0:fondo, 1:primer plano + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` + + === "CLI" + ```bash + # Carga un modelo FastSAM y segmenta todo con él + yolo segment predict model=FastSAM-s.pt source=ruta/hacia/bus.jpg imgsz=640 + ``` + +Este fragmento de código demuestra la simplicidad de cargar un modelo pre-entrenado y realizar una predicción en una imagen. + +### Uso de validación + +La validación del modelo en un conjunto de datos se puede realizar de la siguiente manera: + +!!! Example "Ejemplo" + + === "Python" + ```python + from ultralytics import FastSAM + + # Crea un modelo FastSAM + model = FastSAM('FastSAM-s.pt') # o FastSAM-x.pt + + # Valida el modelo + results = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # Carga un modelo FastSAM y valida en el conjunto de datos de ejemplo COCO8 con un tamaño de imagen de 640 + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +Ten en cuenta que FastSAM solo admite la detección y segmentación de una sola clase de objeto. Esto significa que reconocerá y segmentará todos los objetos como si fueran de la misma clase. Por lo tanto, al preparar el conjunto de datos, debes convertir todos los IDs de categoría de objetos a 0. + +## Uso oficial de FastSAM + +FastSAM también está disponible directamente en el repositorio [https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM). Aquí hay una descripción general breve de los pasos típicos que podrías seguir para usar FastSAM: + +### Instalación + +1. Clona el repositorio de FastSAM: + ```shell + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. Crea y activa un entorno Conda con Python 3.9: + ```shell + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. Navega hasta el repositorio clonado e instala los paquetes requeridos: + ```shell + cd FastSAM + pip install -r requirements.txt + ``` + +4. Instala el modelo CLIP: + ```shell + pip install git+https://github.com/openai/CLIP.git + ``` + +### Ejemplo de uso + +1. Descarga un [punto de control del modelo](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing). + +2. Utiliza FastSAM para inferencia. Ejemplos de comandos: + + - Segmentar todo en una imagen: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - Segmentar objetos específicos utilizando una indicación de texto: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "el perro amarillo" + ``` + + - Segmentar objetos dentro de una caja delimitadora (proporciona las coordenadas de la caja en formato xywh): + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - Segmentar objetos cerca de puntos específicos: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +Además, puedes probar FastSAM a través de una [demostración en Colab](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing) o en la [demostración web de HuggingFace](https://huggingface.co/spaces/An-619/FastSAM) para tener una experiencia visual. + +## Citas y agradecimientos + +Nos gustaría agradecer a los autores de FastSAM por sus importantes contribuciones en el campo de la segmentación de instancias en tiempo real: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +El artículo original de FastSAM se puede encontrar en [arXiv](https://arxiv.org/abs/2306.12156). Los autores han puesto su trabajo a disposición del público, y el código base se puede acceder en [GitHub](https://github.com/CASIA-IVA-Lab/FastSAM). Agradecemos sus esfuerzos para avanzar en el campo y hacer que su trabajo sea accesible a la comunidad en general. diff --git a/docs/es/models/index.md b/docs/es/models/index.md new file mode 100644 index 0000000..a7137f9 --- /dev/null +++ b/docs/es/models/index.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Explore la amplia gama de modelos de la familia YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS y RT-DETR soportados por Ultralytics. Comienza con ejemplos para el uso tanto de CLI como de Python. +keywords: Ultralytics, documentación, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, modelos, arquitecturas, Python, CLI +--- + +# Modelos soportados por Ultralytics + +¡Bienvenido a la documentación de modelos de Ultralytics! Ofrecemos soporte para una amplia gama de modelos, cada uno adaptado a tareas específicas como [detección de objetos](../tasks/detect.md), [segmentación de instancias](../tasks/segment.md), [clasificación de imágenes](../tasks/classify.md), [estimación de posturas](../tasks/pose.md), y [seguimiento de múltiples objetos](../modes/track.md). Si estás interesado en contribuir con tu arquitectura de modelo a Ultralytics, consulta nuestra [Guía de Contribución](../../help/contributing.md). + +!!! Note "Nota" + + 🚧 Estamos trabajando arduamente para mejorar nuestra documentación en varios idiomas actualmente en construcción. ¡Gracias por tu paciencia! 🙏 + +## Modelos destacados + +Aquí están algunos de los modelos clave soportados: + +1. **[YOLOv3](yolov3.md)**: La tercera iteración de la familia de modelos YOLO, original de Joseph Redmon, conocida por su capacidad de detección de objetos en tiempo real eficientemente. +2. **[YOLOv4](yolov4.md)**: Una actualización nativa de darknet para YOLOv3, lanzada por Alexey Bochkovskiy en 2020. +3. **[YOLOv5](yolov5.md)**: Una versión mejorada de la arquitectura YOLO por Ultralytics, ofreciendo un mejor rendimiento y compromiso de velocidad comparado con versiones anteriores. +4. **[YOLOv6](yolov6.md)**: Lanzado por [Meituan](https://about.meituan.com/) en 2022, y utilizado en muchos de los robots de entrega autónomos de la compañía. +5. **[YOLOv7](yolov7.md)**: Modelos YOLO actualizados lanzados en 2022 por los autores de YOLOv4. +6. **[YOLOv8](yolov8.md) NUEVO 🚀**: La última versión de la familia YOLO, con capacidades mejoradas como segmentación de instancias, estimación de posturas/puntos clave y clasificación. +7. **[Modelo Segment Anything (SAM)](sam.md)**: Modelo Segment Anything (SAM) de Meta. +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**: MobileSAM para aplicaciones móviles, por la Universidad de Kyung Hee. +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)**: FastSAM por el Grupo de Análisis de Imagen y Video, Instituto de Automatización, Academia China de Ciencias. +10. **[YOLO-NAS](yolo-nas.md)**: Modelos YOLO de Búsqueda de Arquitectura Neural (NAS). +11. **[Transformadores de Detección en Tiempo Real (RT-DETR)](rtdetr.md)**: Modelos de Transformador de Detección en Tiempo Real (RT-DETR) de Baidu's PaddlePaddle. + +

+
+ +
+ Mira: Ejecuta modelos YOLO de Ultralytics en solo unas pocas líneas de código. +

+ +## Empezando: Ejemplos de Uso + +Este ejemplo proporciona ejemplos simples de entrenamiento e inferencia YOLO. Para la documentación completa de estos y otros [modos](../modes/index.md), consulta las páginas de documentación de [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) y [Export](../modes/export.md). + +Nota que el siguiente ejemplo es para los modelos YOLOv8 [Detect](../tasks/detect.md) para detección de objetos. Para tareas adicionales soportadas, consulta la documentación de [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) y [Pose](../tasks/pose.md). + +!!! Example "Ejemplo" + + === "Python" + + Los modelos pre-entrenados `*.pt` de PyTorch así como los archivos de configuración `*.yaml` se pueden pasar a las clases `YOLO()`, `SAM()`, `NAS()` y `RTDETR()` para crear una instancia de modelo en Python: + + ```python + from ultralytics import YOLO + + # Cargar un modelo YOLOv8n preentrenado en COCO + model = YOLO('yolov8n.pt') + + # Mostrar información del modelo (opcional) + model.info() + + # Entrenar el modelo en el conjunto de datos de ejemplo COCO8 durante 100 épocas + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Ejecutar inferencia con el modelo YOLOv8n en la imagen 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Los comandos CLI están disponibles para ejecutar directamente los modelos: + + ```bash + # Cargar un modelo YOLOv8n preentrenado en COCO y entrenarlo en el conjunto de datos de ejemplo COCO8 durante 100 épocas + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Cargar un modelo YOLOv8n preentrenado en COCO y ejecutar inferencia en la imagen 'bus.jpg' + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## Contribuir con Nuevos Modelos + +¿Interesado en contribuir con tu modelo a Ultralytics? ¡Genial! Siempre estamos abiertos a expandir nuestro portafolio de modelos. + +1. **Haz un Fork del Repositorio**: Comienza haciendo un fork del [repositorio de GitHub de Ultralytics](https://github.com/ultralytics/ultralytics). + +2. **Clona tu Fork**: Clona tu fork a tu máquina local y crea una nueva rama para trabajar. + +3. **Implementa tu Modelo**: Añade tu modelo siguiendo los estándares de codificación y directrices proporcionadas en nuestra [Guía de Contribución](../../help/contributing.md). + +4. **Prueba Rigurosamente**: Asegúrate de probar tu modelo rigurosamente, tanto de forma aislada como parte del proceso. + +5. **Crea un Pull Request**: Una vez que estés satisfecho con tu modelo, crea un pull request al repositorio principal para revisión. + +6. **Revisión de Código y Fusión**: Después de la revisión, si tu modelo cumple con nuestros criterios, será fusionado al repositorio principal. + +Para pasos detallados, consulta nuestra [Guía de Contribución](../../help/contributing.md). diff --git a/docs/es/models/mobile-sam.md b/docs/es/models/mobile-sam.md new file mode 100644 index 0000000..bf68ab7 --- /dev/null +++ b/docs/es/models/mobile-sam.md @@ -0,0 +1,116 @@ +--- +comments: true +description: Obtén más información sobre MobileSAM, su implementación, comparación con SAM original y cómo descargarlo y probarlo en el framework de Ultralytics. ¡Mejora tus aplicaciones móviles hoy mismo! +keywords: MobileSAM, Ultralytics, SAM, aplicaciones móviles, Arxiv, GPU, API, codificador de imágenes, decodificador de máscaras, descarga de modelos, método de prueba +--- + +![Logotipo de MobileSAM](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# Segmentación Móvil de Cualquier Cosa (MobileSAM) + +El artículo de MobileSAM ahora está disponible en [arXiv](https://arxiv.org/pdf/2306.14289.pdf). + +Una demostración de MobileSAM funcionando en una CPU se puede acceder en este [enlace de demostración](https://huggingface.co/spaces/dhkim2810/MobileSAM). El rendimiento en una CPU Mac i5 tarda aproximadamente 3 segundos. En la demostración de Hugging Face, la interfaz y las CPUs de menor rendimiento contribuyen a una respuesta más lenta, pero sigue funcionando de manera efectiva. + +MobileSAM se implementa en varios proyectos, incluyendo [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything), [AnyLabeling](https://github.com/vietanhdev/anylabeling) y [Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D). + +MobileSAM se entrena en una sola GPU con un conjunto de datos de 100k (1% de las imágenes originales) en menos de un día. El código para este entrenamiento estará disponible en el futuro. + +## Modelos Disponibles, Tareas Admitidas y Modos de Operación + +Esta tabla presenta los modelos disponibles con sus pesos pre-entrenados específicos, las tareas que admiten y su compatibilidad con diferentes modos de operación como [Inference (Inferencia)](../modes/predict.md), [Validation (Validación)](../modes/val.md), [Training (Entrenamiento)](../modes/train.md) y [Export (Exportación)](../modes/export.md), indicados por emojis ✅ para los modos admitidos y emojis ❌ para los modos no admitidos. + +| Tipo de Modelo | Pesos Pre-entrenados | Tareas Admitidas | Inferencia | Validación | Entrenamiento | Exportación | +|----------------|----------------------|---------------------------------------------------|------------|------------|---------------|-------------| +| MobileSAM | `mobile_sam.pt` | [Segmentación de Instancias](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Adaptación de SAM a MobileSAM + +Dado que MobileSAM mantiene el mismo pipeline que SAM original, hemos incorporado el pre-procesamiento, post-procesamiento y todas las demás interfaces del original. En consecuencia, aquellos que actualmente utilizan SAM original pueden hacer la transición a MobileSAM con un esfuerzo mínimo. + +MobileSAM tiene un rendimiento comparable a SAM original y mantiene el mismo pipeline excepto por un cambio en el codificador de imágenes. Específicamente, reemplazamos el codificador de imágenes original ViT-H pesado (632M) por uno más pequeño, Tiny-ViT (5M). En una sola GPU, MobileSAM funciona a aproximadamente 12ms por imagen: 8ms en el codificador de imágenes y 4ms en el decodificador de máscaras. + +La siguiente tabla proporciona una comparación de los codificadores de imágenes basados en ViT: + +| Codificador de Imágenes | SAM Original | MobileSAM | +|-------------------------|--------------|-----------| +| Parámetros | 611M | 5M | +| Velocidad | 452ms | 8ms | + +Tanto SAM original como MobileSAM utilizan el mismo decodificador de máscaras guiado por instrucciones: + +| Decodificador de Máscaras | SAM Original | MobileSAM | +|---------------------------|--------------|-----------| +| Parámetros | 3.876M | 3.876M | +| Velocidad | 4ms | 4ms | + +Aquí está la comparación de todo el pipeline: + +| Pipeline Completo (Enc+Dec) | SAM Original | MobileSAM | +|-----------------------------|--------------|-----------| +| Parámetros | 615M | 9.66M | +| Velocidad | 456ms | 12ms | + +El rendimiento de MobileSAM y SAM original se demuestra utilizando tanto un punto como una caja como instrucciones. + +![Imagen con Punto como Instrucción](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![Imagen con Caja como Instrucción](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +Con su rendimiento superior, MobileSAM es aproximadamente 5 veces más pequeño y 7 veces más rápido que el actual FastSAM. Más detalles están disponibles en la [página del proyecto de MobileSAM](https://github.com/ChaoningZhang/MobileSAM). + +## Probando MobileSAM en Ultralytics + +Al igual que SAM original, ofrecemos un método sencillo de prueba en Ultralytics, que incluye modos tanto para instrucciones de Punto como para Caja. + +### Descarga del Modelo + +Puedes descargar el modelo [aquí](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt). + +### Instrucción de Punto + +!!! Example "Ejemplo" + + === "Python" + ```python + from ultralytics import SAM + + # Carga el modelo + model = SAM('mobile_sam.pt') + + # Predice un segmento basado en una instrucción de punto + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### Instrucción de Caja + +!!! Example "Ejemplo" + + === "Python" + ```python + from ultralytics import SAM + + # Carga el modelo + model = SAM('mobile_sam.pt') + + # Predice un segmento basado en una instrucción de caja + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +Hemos implementado `MobileSAM` y `SAM` utilizando la misma API. Para obtener más información sobre cómo usarlo, consulta la [página de SAM](sam.md). + +## Citaciones y Reconocimientos + +Si encuentras útil MobileSAM en tu investigación o trabajo de desarrollo, considera citar nuestro artículo: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } diff --git a/docs/es/models/rtdetr.md b/docs/es/models/rtdetr.md new file mode 100644 index 0000000..fada0ae --- /dev/null +++ b/docs/es/models/rtdetr.md @@ -0,0 +1,93 @@ +--- +comments: true +description: Descubre las características y beneficios de RT-DETR, un eficiente y adaptable detector de objetos en tiempo real desarrollado por Baidu y potenciado por Vision Transformers, que incluye modelos pre-entrenados. +keywords: RT-DETR, Baidu, Vision Transformers, detección de objetos, rendimiento en tiempo real, CUDA, TensorRT, selección de consultas IoU, Ultralytics, API de Python, PaddlePaddle +--- + +# RT-DETR de Baidu: Un Detector de Objetos en Tiempo Real Basado en Vision Transformers + +## Resumen + +Real-Time Detection Transformer (RT-DETR), desarrollado por Baidu, es un avanzado detector de objetos de extremo a extremo que proporciona un rendimiento en tiempo real manteniendo una alta precisión. Utiliza la potencia de Vision Transformers (ViT) para procesar de manera eficiente características de múltiples escalas mediante la descomposición de la interacción intra-escala y la fusión inter-escala. RT-DETR es altamente adaptable y permite ajustar de manera flexible la velocidad de inferencia utilizando diferentes capas de decodificador sin necesidad de volver a entrenar el modelo. El modelo se destaca en plataformas aceleradas como CUDA con TensorRT, superando a muchos otros detectores de objetos en tiempo real. + +![Ejemplo de imagen del modelo](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**Resumen de RT-DETR de Baidu.** El diagrama de la arquitectura del modelo RT-DETR muestra las últimas tres etapas del canal (S3, S4, S5) como entrada al codificador. El eficiente codificador híbrido transforma características de múltiples escalas en una secuencia de características de imagen a través del módulo de interacción de características intra-escala (AIFI) y el módulo de fusión de características inter-escala (CCFM). Se utiliza la selección de consultas IoU-aware para seleccionar un número fijo de características de imagen que servirán como consultas iniciales de objetos para el decodificador. Finalmente, el decodificador con cabeceras de predicción auxiliares optimiza iterativamente las consultas de objetos para generar cajas y puntuaciones de confianza ([fuente](https://arxiv.org/pdf/2304.08069.pdf)). + +### Características Clave + +- **Codificador Híbrido Eficiente:** RT-DETR de Baidu utiliza un codificador híbrido eficiente que procesa características de múltiples escalas mediante la descomposición de la interacción intra-escala y la fusión inter-escala. Este diseño único basado en Vision Transformers reduce los costos computacionales y permite la detección de objetos en tiempo real. +- **Selección de Consultas IoU-aware:** RT-DETR de Baidu mejora la inicialización de las consultas de objetos utilizando la selección de consultas IoU-aware. Esto permite que el modelo se enfoque en los objetos más relevantes de la escena, mejorando la precisión en la detección. +- **Velocidad de Inferencia Adaptable:** RT-DETR de Baidu admite ajustes flexibles de la velocidad de inferencia utilizando diferentes capas de decodificador sin necesidad de volver a entrenar el modelo. Esta adaptabilidad facilita la aplicación práctica en diversos escenarios de detección de objetos en tiempo real. + +## Modelos Pre-entrenados + +La API de Python de Ultralytics proporciona modelos pre-entrenados de RT-DETR de PaddlePaddle en diferentes escalas: + +- RT-DETR-L: 53.0% AP en COCO val2017, 114 FPS en GPU T4 +- RT-DETR-X: 54.8% AP en COCO val2017, 74 FPS en GPU T4 + +## Ejemplos de Uso + +Este ejemplo proporciona ejemplos sencillos de entrenamiento e inferencia de RT-DETRR. Para obtener una documentación completa sobre estos y otros [modos](../modes/index.md), consulta las páginas de documentación de [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) y [Export](../modes/export.md). + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import RTDETR + + # Cargar un modelo RT-DETR-l pre-entrenado en COCO + model = RTDETR('rtdetr-l.pt') + + # Mostrar información del modelo (opcional) + model.info() + + # Entrenar el modelo en el conjunto de datos de ejemplo COCO8 durante 100 épocas + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Realizar inferencia con el modelo RT-DETR-l en la imagen 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # Cargar un modelo RT-DETR-l pre-entrenado en COCO y entrenarlo en el conjunto de datos de ejemplo COCO8 durante 100 épocas + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # Cargar un modelo RT-DETR-l pre-entrenado en COCO y realizar inferencia en la imagen 'bus.jpg' + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## Tareas y Modos Admitidos + +Esta tabla presenta los tipos de modelos, los pesos pre-entrenados específicos, las tareas admitidas por cada modelo y los diversos modos ([Train](../modes/train.md) , [Val](../modes/val.md), [Predict](../modes/predict.md), [Export](../modes/export.md)) admitidos, indicados por los emojis ✅. + +| Tipo de Modelo | Pesos Pre-entrenados | Tareas Admitidas | Inferencia | Validación | Entrenamiento | Exportación | +|---------------------|----------------------|--------------------------------------------|------------|------------|---------------|-------------| +| RT-DETR Large | `rtdetr-l.pt` | [Detección de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR Extra-Large | `rtdetr-x.pt` | [Detección de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## Citaciones y Agradecimientos + +Si utilizas RT-DETR de Baidu en tu investigación o trabajo de desarrollo, por favor cita el [artículo original](https://arxiv.org/abs/2304.08069): + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Nos gustaría agradecer a Baidu y al equipo de [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection) por crear y mantener este valioso recurso para la comunidad de visión por computadora. Apreciamos enormemente su contribución al campo con el desarrollo del detector de objetos en tiempo real basado en Vision Transformers, RT-DETR. + +*keywords: RT-DETR, Transformer, ViT, Vision Transformers, Baidu RT-DETR, PaddlePaddle, Paddle Paddle RT-DETR, detección de objetos en tiempo real, detección de objetos basada en Vision Transformers, modelos pre-entrenados PaddlePaddle RT-DETR, uso de RT-DETR de Baidu, API de Python de Ultralytics* diff --git a/docs/es/models/sam.md b/docs/es/models/sam.md new file mode 100644 index 0000000..966c5be --- /dev/null +++ b/docs/es/models/sam.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Explora el revolucionario Segment Anything Model (SAM) de Ultralytics que permite la segmentación de imágenes en tiempo real. Aprende sobre su segmentación por indicación, rendimiento en la transferencia sin entrenamiento y cómo usarlo. +keywords: Ultralytics, segmentación de imágenes, Segment Anything Model, SAM, SA-1B dataset, rendimiento en tiempo real, transferencia sin entrenamiento, detección de objetos, análisis de imágenes, aprendizaje automático +--- + +# Segment Anything Model (SAM) + +Bienvenido al frontera de la segmentación de imágenes con el Segment Anything Model, o SAM. Este modelo revolucionario ha cambiado el juego al introducir la segmentación de imágenes por indicación con rendimiento en tiempo real, estableciendo nuevos estándares en el campo. + +## Introducción a SAM: Segment Anything Model + +El Segment Anything Model, o SAM, es un modelo de segmentación de imágenes de vanguardia que permite la segmentación por indicación, ofreciendo una versatilidad sin igual en las tareas de análisis de imágenes. SAM forma el corazón de la iniciativa Segment Anything, un proyecto innovador que presenta un modelo, una tarea y un conjunto de datos nuevos para la segmentación de imágenes. + +El diseño avanzado de SAM le permite adaptarse a nuevas distribuciones y tareas de imágenes sin conocimientos previos, una característica conocida como transferencia sin entrenamiento. Entrenado en el extenso [conjunto de datos SA-1B](https://ai.facebook.com/datasets/segment-anything/), que contiene más de mil millones de máscaras distribuidas en once millones de imágenes seleccionadas cuidadosamente, SAM ha demostrado un impresionante rendimiento en la transferencia sin entrenamiento, superando en muchos casos los resultados de supervisión completa anteriores. + +![Ejemplo de imagen del conjunto de datos](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +Imágenes de ejemplo con máscaras superpuestas de nuestro nuevo conjunto de datos, SA-1B. SA-1B contiene 11 millones de imágenes diversas de alta resolución, con licencia y protección de la privacidad, y 1.1 mil millones de máscaras de segmentación de alta calidad. Estas máscaras fueron anotadas completamente automáticamente por SAM y, según las calificaciones humanas y numerosos experimentos, tienen una alta calidad y diversidad. Las imágenes se agrupan por número de máscaras por imagen para su visualización (hay aproximadamente 100 máscaras por imagen en promedio). + +## Características clave del Segment Anything Model (SAM) + +- **Tarea de segmentación por indicación**: SAM fue diseñado teniendo en cuenta una tarea de segmentación por indicación, lo que le permite generar máscaras de segmentación válidas a partir de cualquier indicación dada, como pistas espaciales o de texto que identifican un objeto. +- **Arquitectura avanzada**: El Segment Anything Model utiliza un potente codificador de imágenes, un codificador de indicaciones y un decodificador de máscaras ligero. Esta arquitectura única permite la indicación flexible, el cálculo de máscaras en tiempo real y la conciencia de ambigüedades en las tareas de segmentación. +- **El conjunto de datos SA-1B**: Introducido por el proyecto Segment Anything, el conjunto de datos SA-1B cuenta con más de mil millones de máscaras en once millones de imágenes. Como el conjunto de datos de segmentación más grande hasta la fecha, proporciona a SAM una fuente de datos de entrenamiento diversa y a gran escala. +- **Rendimiento en la transferencia sin entrenamiento**: SAM muestra un destacado rendimiento en la transferencia sin entrenamiento en diversas tareas de segmentación, lo que lo convierte en una herramienta lista para usar en diversas aplicaciones con una necesidad mínima de ingeniería de indicación. + +Para obtener una visión más detallada del Segment Anything Model y el conjunto de datos SA-1B, visita el [sitio web de Segment Anything](https://segment-anything.com) y consulta el artículo de investigación [Segment Anything](https://arxiv.org/abs/2304.02643). + +## Modelos disponibles, tareas admitidas y modos de funcionamiento + +Esta tabla muestra los modelos disponibles con sus pesos pre-entrenados específicos, las tareas que admiten y su compatibilidad con diferentes modos de funcionamiento como [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md) y [Export](../modes/export.md), indicados con emojis ✅ para los modos admitidos y emojis ❌ para los modos no admitidos. + +| Tipo de modelo | Pesos pre-entrenados | Tareas admitidas | Inference | Validation | Training | Export | +|----------------|----------------------|---------------------------------------------------|-----------|------------|----------|--------| +| SAM base | `sam_b.pt` | [Segmentación de instancias](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| SAM large | `sam_l.pt` | [Segmentación de instancias](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Cómo usar SAM: Versatilidad y potencia en la segmentación de imágenes + +El Segment Anything Model se puede utilizar para una multitud de tareas posteriores que van más allá de sus datos de entrenamiento. Esto incluye detección de bordes, generación de propuestas de objetos, segmentación de instancias y predicción preliminar de texto a máscara. Con la ingeniería de indicación, SAM puede adaptarse rápidamente a nuevas tareas y distribuciones de datos de manera sin entrenamiento, estableciéndolo como una herramienta versátil y potente para todas tus necesidades de segmentación de imágenes. + +### Ejemplo de predicción con SAM + +!!! Example "Segmentar con indicaciones" + + Segmenta la imagen con las indicaciones proporcionadas. + + === "Python" + + ```python + from ultralytics import SAM + + # Cargar un modelo + modelo = SAM('sam_b.pt') + + # Mostrar información del modelo (opcional) + modelo.info() + + # Ejecutar inferencia con indicaciones de bboxes + modelo('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # Ejecutar inferencia con indicaciones de puntos + modelo('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "Segmentar todo" + + Segmenta toda la imagen. + + === "Python" + + ```python + from ultralytics import SAM + + # Cargar un modelo + modelo = SAM('sam_b.pt') + + # Mostrar información del modelo (opcional) + modelo.info() + + # Ejecutar inferencia + modelo('ruta/hacia/imagen.jpg') + ``` + + === "CLI" + + ```bash + # Ejecutar inferencia con un modelo SAM + yolo predict model=sam_b.pt source=ruta/hacia/imagen.jpg + ``` + +- La lógica aquí es segmentar toda la imagen si no se proporcionan indicaciones (bboxes/puntos/máscaras). + +!!! Example "Ejemplo de SAMPredictor" + + De esta manera, puedes configurar una imagen una vez y ejecutar inferencia con indicaciones múltiples sin ejecutar el codificador de imágenes múltiples veces. + + === "Inferencia con indicaciones" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # Crear SAMPredictor + opciones = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(opciones=opciones) + + # Establecer imagen + predictor.set_image("ultralytics/assets/zidane.jpg") # establecer con archivo de imagen + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # establecer con np.ndarray + resultados = predictor(bboxes=[439, 437, 524, 709]) + resultados = predictor(points=[900, 370], labels=[1]) + + # Restablecer imagen + predictor.reset_image() + ``` + + Segmentar todo con argumentos adicionales. + + === "Segmentar todo" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # Crear SAMPredictor + opciones = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(opciones=opciones) + + # Segmentar con argumentos adicionales + resultados = predictor(source="ultralytics/assets/zidane.jpg", crop_n_layers=1, points_stride=64) + ``` + +- Más argumentos adicionales para `Segmentar todo` en [`Referencia de Predictor/generate`](../../../reference/models/sam/predict.md). + +## SAM comparado con YOLOv8 + +Aquí comparamos el modelo SAM más pequeño de Meta, SAM-b, con el modelo de segmentación más pequeño de Ultralytics, [YOLOv8n-seg](../tasks/segment.md): + +| Modelo | Tamaño | Parámetros | Velocidad (CPU) | +|-------------------------------------------------|-------------------------------------|------------------------------|-------------------------------------| +| SAM-b de Meta | 358 MB | 94.7 M | 51096 ms/im | +| [MobileSAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im | +| [FastSAM-s](fast-sam.md) con respaldo de YOLOv8 | 23.7 MB | 11.8 M | 115 ms/im | +| YOLOv8n-seg de Ultralytics | **6.7 MB** (53.4 veces más pequeño) | **3.4 M** (27.9 veces menos) | **59 ms/im** (866 veces más rápido) | + +Esta comparación muestra las diferencias de órdenes de magnitud en los tamaños y velocidades de los modelos. Si bien SAM presenta capacidades únicas para la segmentación automática, no es un competidor directo de los modelos de segmentación YOLOv8, que son más pequeños, más rápidos y más eficientes. + +Las pruebas se realizaron en una MacBook Apple M2 de 2023 con 16 GB de RAM. Para reproducir esta prueba: + +!!! Example "Ejemplo" + + === "Python" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # Perfil del modelo SAM-b + modelo = SAM('sam_b.pt') + modelo.info() + modelo('ultralytics/assets') + + # Perfil de MobileSAM + modelo = SAM('mobile_sam.pt') + modelo.info() + modelo('ultralytics/assets') + + # Perfil de FastSAM-s + modelo = FastSAM('FastSAM-s.pt') + modelo.info() + modelo('ultralytics/assets') + + # Perfil de YOLOv8n-seg + modelo = YOLO('yolov8n-seg.pt') + modelo.info() + modelo('ultralytics/assets') + ``` + +## Auto-anotación: un camino rápido hacia conjuntos de datos de segmentación + +La auto-anotación es una característica clave de SAM que permite a los usuarios generar un [conjunto de datos de segmentación](https://docs.ultralytics.com/datasets/segment) utilizando un modelo de detección pre-entrenado. Esta función permite una anotación rápida y precisa de un gran número de imágenes, evitando la necesidad de una etiquetación manual que consume mucho tiempo. + +### Generar tu conjunto de datos de segmentación utilizando un modelo de detección + +Para auto-anotar tu conjunto de datos con el marco de trabajo de Ultralytics, utiliza la función `auto_annotate` como se muestra a continuación: + +!!! Example "Ejemplo" + + === "Python" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="ruta/a/las/imagenes", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| Argumento | Tipo | Descripción | Predeterminado | +|------------|---------------------|-----------------------------------------------------------------------------------------------------------------------|----------------| +| data | str | Ruta a una carpeta que contiene las imágenes a anotar. | | +| det_model | str, opcional | Modelo de detección YOLO pre-entrenado. Por defecto, 'yolov8x.pt'. | 'yolov8x.pt' | +| sam_model | str, opcional | Modelo de segmentación SAM pre-entrenado. Por defecto, 'sam_b.pt'. | 'sam_b.pt' | +| device | str, opcional | Dispositivo en el que ejecutar los modelos. Por defecto, una cadena vacía (CPU o GPU, si está disponible). | | +| output_dir | str, None, opcional | Directorio para guardar los resultados anotados. Por defecto, una carpeta 'labels' en el mismo directorio que 'data'. | None | + +La función `auto_annotate` toma la ruta de tus imágenes, con argumentos opcionales para especificar los modelos de detección y segmentación SAM pre-entrenados, el dispositivo en el que ejecutar los modelos, y el directorio de salida para guardar los resultados anotados. + +La auto-anotación con modelos pre-entrenados puede reducir drásticamente el tiempo y el esfuerzo requeridos para crear conjuntos de datos de segmentación de alta calidad. Esta característica es especialmente beneficiosa para investigadores y desarrolladores que trabajan con grandes colecciones de imágenes, ya que les permite centrarse en el desarrollo y la evaluación de modelos en lugar de en la anotación manual. + +## Citas y agradecimientos + +Si encuentras útil SAM en tu trabajo de investigación o desarrollo, considera citar nuestro artículo: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Nos gustaría expresar nuestro agradecimiento a Meta AI por crear y mantener este valioso recurso para la comunidad de visión por computadora. + +*keywords: Segment Anything, Segment Anything Model, SAM, Meta SAM, segmentación de imágenes, segmentación por indicación, rendimiento en la transferencia sin entrenamiento, conjunto de datos SA-1B, arquitectura avanzada, auto-anotación, Ultralytics, modelos pre-entrenados, SAM base, SAM large, segmentación de instancias, visión por computadora, IA, inteligencia artificial, aprendizaje automático, anotación de datos, máscaras de segmentación, modelo de detección, modelo de detección YOLO, bibtex, Meta AI.* diff --git a/docs/es/models/yolo-nas.md b/docs/es/models/yolo-nas.md new file mode 100644 index 0000000..3b25a60 --- /dev/null +++ b/docs/es/models/yolo-nas.md @@ -0,0 +1,121 @@ +--- +comments: true +description: Explora la documentación detallada de YOLO-NAS, un modelo de detección de objetos superior. Aprende sobre sus características, modelos pre-entrenados, uso con la API de Ultralytics Python, y más. +keywords: YOLO-NAS, Deci AI, detección de objetos, aprendizaje profundo, búsqueda de arquitectura neural, API de Ultralytics Python, modelo YOLO, modelos pre-entrenados, cuantización, optimización, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## Visión general + +Desarrollado por Deci AI, YOLO-NAS es un modelo revolucionario de detección de objetos. Es el producto de una tecnología avanzada de Búsqueda de Arquitectura Neural, meticulosamente diseñada para abordar las limitaciones de los modelos YOLO anteriores. Con mejoras significativas en el soporte de cuantización y el equilibrio entre precisión y latencia, YOLO-NAS representa un gran avance en la detección de objetos. + +![Ejemplo de imagen del modelo](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**Visión general de YOLO-NAS.** YOLO-NAS utiliza bloques conscientes de cuantización y cuantización selectiva para un rendimiento óptimo. El modelo, cuando se convierte en su versión cuantizada INT8, experimenta una caída mínima de precisión, una mejora significativa en comparación con otros modelos. Estos avances culminan en una arquitectura superior con capacidades de detección de objetos sin precedentes y un rendimiento sobresaliente. + +### Características clave + +- **Bloque básico compatible con cuantización:** YOLO-NAS introduce un nuevo bloque básico que es compatible con la cuantización, abordando una de las limitaciones significativas de los modelos YOLO anteriores. +- **Entrenamiento sofisticado y cuantización:** YOLO-NAS utiliza esquemas avanzados de entrenamiento y cuantización posterior para mejorar el rendimiento. +- **Optimización AutoNAC y pre-entrenamiento:** YOLO-NAS utiliza la optimización AutoNAC y se pre-entrena en conjuntos de datos prominentes como COCO, Objects365 y Roboflow 100. Este pre-entrenamiento lo hace extremadamente adecuado para tareas de detección de objetos en entornos de producción. + +## Modelos pre-entrenados + +Experimenta el poder de la detección de objetos de próxima generación con los modelos pre-entrenados de YOLO-NAS proporcionados por Ultralytics. Estos modelos están diseñados para ofrecer un rendimiento de primera clase tanto en velocidad como en precisión. Elige entre una variedad de opciones adaptadas a tus necesidades específicas: + +| Modelo | mAP | Latencia (ms) | +|------------------|-------|---------------| +| YOLO-NAS S | 47.5 | 3.21 | +| YOLO-NAS M | 51.55 | 5.85 | +| YOLO-NAS L | 52.22 | 7.87 | +| YOLO-NAS S INT-8 | 47.03 | 2.36 | +| YOLO-NAS M INT-8 | 51.0 | 3.78 | +| YOLO-NAS L INT-8 | 52.1 | 4.78 | + +Cada variante del modelo está diseñada para ofrecer un equilibrio entre la Precisión Promedio de las Areas (mAP, por sus siglas en inglés) y la latencia, ayudándote a optimizar tus tareas de detección de objetos en términos de rendimiento y velocidad. + +## Ejemplos de uso + +Ultralytics ha facilitado la integración de los modelos YOLO-NAS en tus aplicaciones de Python a través de nuestro paquete `ultralytics`. El paquete proporciona una API de Python fácil de usar para agilizar el proceso. + +Los siguientes ejemplos muestran cómo usar los modelos YOLO-NAS con el paquete `ultralytics` para inferencia y validación: + +### Ejemplos de inferencia y validación + +En este ejemplo validamos YOLO-NAS-s en el conjunto de datos COCO8. + +!!! Example "Ejemplo" + + Este ejemplo proporciona un código simple de inferencia y validación para YOLO-NAS. Para manejar los resultados de la inferencia, consulta el modo [Predict](../modes/predict.md). Para usar YOLO-NAS con modos adicionales, consulta [Val](../modes/val.md) y [Export](../modes/export.md). El paquete `ultralytics` para YOLO-NAS no admite entrenamiento. + + === "Python" + + Los archivos de modelos pre-entrenados `*.pt` de PyTorch se pueden pasar a la clase `NAS()` para crear una instancia del modelo en Python: + + ```python + from ultralytics import NAS + + # Carga un modelo YOLO-NAS-s pre-entrenado en COCO + modelo = NAS('yolo_nas_s.pt') + + # Muestra información del modelo (opcional) + modelo.info() + + # Valida el modelo en el conjunto de datos de ejemplo COCO8 + resultados = modelo.val(data='coco8.yaml') + + # Ejecuta inferencia con el modelo YOLO-NAS-s en la imagen 'bus.jpg' + resultados = modelo('path/to/bus.jpg') + ``` + + === "CLI" + + Los comandos CLI están disponibles para ejecutar directamente los modelos: + + ```bash + # Carga un modelo YOLO-NAS-s pre-entrenado en COCO y valida su rendimiento en el conjunto de datos de ejemplo COCO8 + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # Carga un modelo YOLO-NAS-s pre-entrenado en COCO y ejecuta inferencia en la imagen 'bus.jpg' + yolo predict model=yolo_nas_s.pt source=path/to/bus.jpg + ``` + +## Tareas y modos compatibles + +Ofrecemos tres variantes de los modelos YOLO-NAS: Small (s), Medium (m) y Large (l). Cada variante está diseñada para satisfacer diferentes necesidades computacionales y de rendimiento: + +- **YOLO-NAS-s**: Optimizado para entornos donde los recursos computacionales son limitados pero la eficiencia es clave. +- **YOLO-NAS-m**: Ofrece un enfoque equilibrado, adecuado para la detección de objetos de propósito general con mayor precisión. +- **YOLO-NAS-l**: Adaptados para escenarios que requieren la mayor precisión, donde los recursos computacionales son menos restrictivos. + +A continuación se muestra una descripción detallada de cada modelo, incluyendo enlaces a sus pesos pre-entrenados, las tareas que admiten y su compatibilidad con diferentes modos de funcionamiento. + +| Tipo de modelo | Pesos pre-entrenados | Tareas admitidas | Inferencia | Validación | Entrenamiento | Exportación | +|----------------|-----------------------------------------------------------------------------------------------|--------------------------------------------|------------|------------|---------------|-------------| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [Detección de objetos](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [Detección de objetos](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [Detección de objetos](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## Citaciones y agradecimientos + +Si utilizas YOLO-NAS en tu investigación o trabajo de desarrollo, por favor cita SuperGradients: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +Agradecemos al equipo de [SuperGradients](https://github.com/Deci-AI/super-gradients/) de Deci AI por sus esfuerzos en la creación y mantenimiento de este valioso recurso para la comunidad de visión por computadora. Creemos que YOLO-NAS, con su arquitectura innovadora y sus capacidades de detección de objetos superiores, se convertirá en una herramienta fundamental tanto para desarrolladores como para investigadores. + +*keywords: YOLO-NAS, Deci AI, detección de objetos, aprendizaje profundo, búsqueda de arquitectura neural, API de Ultralytics Python, modelo YOLO, SuperGradients, modelos pre-entrenados, bloque básico compatible con cuantización, esquemas avanzados de entrenamiento, cuantización posterior, optimización AutoNAC, COCO, Objects365, Roboflow 100* diff --git a/docs/es/models/yolov3.md b/docs/es/models/yolov3.md new file mode 100644 index 0000000..1990e5b --- /dev/null +++ b/docs/es/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Obtén una descripción general de YOLOv3, YOLOv3-Ultralytics y YOLOv3u. Aprende sobre sus características clave, uso y tareas admitidas para la detección de objetos. +keywords: YOLOv3, YOLOv3-Ultralytics, YOLOv3u, Detección de objetos, Inferencia, Entrenamiento, Ultralytics +--- + +# YOLOv3, YOLOv3-Ultralytics y YOLOv3u + +## Descripción general + +Este documento presenta una descripción general de tres modelos de detección de objetos estrechamente relacionados, conocidos como [YOLOv3](https://pjreddie.com/darknet/yolo/), [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3) y [YOLOv3u](https://github.com/ultralytics/ultralytics). + +1. **YOLOv3:** Esta es la tercera versión del algoritmo de detección de objetos You Only Look Once (YOLO). Originalmente desarrollado por Joseph Redmon, YOLOv3 mejoró a sus predecesores al introducir características como predicciones multiescala y tres tamaños diferentes de núcleos de detección. + +2. **YOLOv3-Ultralytics:** Esta es la implementación de YOLOv3 realizada por Ultralytics. Reproduce la arquitectura original de YOLOv3 y ofrece funcionalidades adicionales, como soporte para más modelos pre-entrenados y opciones de personalización más fáciles. + +3. **YOLOv3u:** Esta es una versión actualizada de YOLOv3-Ultralytics que incorpora la cabeza dividida sin anclaje y sin objeto utilizada en los modelos YOLOv8. YOLOv3u mantiene la misma arquitectura de columna vertebral y cuello que YOLOv3, pero con la cabeza de detección actualizada de YOLOv8. + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## Características clave + +- **YOLOv3:** Introdujo el uso de tres escalas diferentes para la detección, aprovechando tres tamaños diferentes de núcleos de detección: 13x13, 26x26 y 52x52. Esto mejoró significativamente la precisión de detección para objetos de diferentes tamaños. Además, YOLOv3 añadió características como predicciones con múltiples etiquetas para cada cuadro delimitador y una mejor red extractora de características. + +- **YOLOv3-Ultralytics:** La implementación de Ultralytics de YOLOv3 proporciona el mismo rendimiento que el modelo original, pero cuenta con soporte adicional para más modelos pre-entrenados, métodos de entrenamiento adicionales y opciones de personalización más fáciles. Esto lo hace más versátil y fácil de usar para aplicaciones prácticas. + +- **YOLOv3u:** Este modelo actualizado incorpora la cabeza dividida sin anclaje y sin objeto de YOLOv8. Al eliminar la necesidad de cajas de anclaje predefinidas y puntuaciones de objeto, este diseño de cabeza de detección puede mejorar la capacidad del modelo para detectar objetos de diferentes tamaños y formas. Esto hace que YOLOv3u sea más robusto y preciso para tareas de detección de objetos. + +## Tareas y modos admitidos + +La serie YOLOv3, que incluye YOLOv3, YOLOv3-Ultralytics y YOLOv3u, está diseñada específicamente para tareas de detección de objetos. Estos modelos son reconocidos por su eficacia en diversos escenarios del mundo real, equilibrando precisión y velocidad. Cada variante ofrece características y optimizaciones únicas, lo que los hace adecuados para una variedad de aplicaciones. + +Los tres modelos admiten un conjunto completo de modos, asegurando versatilidad en diversas etapas del despliegue y desarrollo del modelo. Estos modos incluyen [Inferencia](../modes/predict.md), [Validación](../modes/val.md), [Entrenamiento](../modes/train.md) y [Exportación](../modes/export.md), proporcionando a los usuarios un conjunto completo de herramientas para una detección de objetos efectiva. + +| Tipo de modelo | Tareas admitidas | Inferencia | Validación | Entrenamiento | Exportación | +|--------------------|--------------------------------------------|------------|------------|---------------|-------------| +| YOLOv3 | [Detección de objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [Detección de objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [Detección de objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Esta tabla proporciona una visión rápida de las capacidades de cada variante de YOLOv3, destacando su versatilidad y aptitud para diversas tareas y modos operativos en flujos de trabajo de detección de objetos. + +## Ejemplos de uso + +Este ejemplo proporciona ejemplos sencillos de entrenamiento e inferencia de YOLOv3. Para obtener documentación completa sobre estos y otros [modos](../modes/index.md), consulta las páginas de documentación de [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) y [Export](../modes/export.md). + +!!! Example "Ejemplo" + + === "Python" + + Los modelos pre-entrenados de PyTorch en archivos `*.pt`, así como los archivos de configuración `*.yaml`, se pueden pasar a la clase `YOLO()` para crear una instancia del modelo en Python: + + ```python + from ultralytics import YOLO + + # Cargar un modelo YOLOv3n pre-entrenado en COCO + model = YOLO('yolov3n.pt') + + # Mostrar información del modelo (opcional) + model.info() + + # Entrenar el modelo en el conjunto de datos de ejemplo COCO8 durante 100 épocas + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Ejecutar inferencia con el modelo YOLOv3n en la imagen 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Hay comandos de CLI disponibles para ejecutar directamente los modelos: + + ```bash + # Cargar un modelo YOLOv3n pre-entrenado en COCO y entrenarlo en el conjunto de datos de ejemplo COCO8 durante 100 épocas + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Cargar un modelo YOLOv3n pre-entrenado en COCO y ejecutar inferencia en la imagen 'bus.jpg' + yolo predict model=yolov3n.pt source=path/to/bus.jpg + ``` + +## Citaciones y agradecimientos + +Si utilizas YOLOv3 en tu investigación, por favor, cita los artículos originales de YOLO y el repositorio de YOLOv3 de Ultralytics: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +Gracias a Joseph Redmon y Ali Farhadi por desarrollar YOLOv3 original. diff --git a/docs/es/models/yolov4.md b/docs/es/models/yolov4.md new file mode 100644 index 0000000..04223b7 --- /dev/null +++ b/docs/es/models/yolov4.md @@ -0,0 +1,71 @@ +--- +comments: true +description: Explora nuestra detallada guía sobre YOLOv4, un detector de objetos en tiempo real de vanguardia. Comprende sus aspectos arquitectónicos destacados, características innovadoras y ejemplos de aplicación. +keywords: ultralytics, YOLOv4, detección de objetos, red neuronal, detección en tiempo real, detector de objetos, aprendizaje automático +--- + +# YOLOv4: Detección de objetos rápida y precisa + +Bienvenido a la página de documentación de Ultralytics para YOLOv4, un detector de objetos en tiempo real de vanguardia lanzado en 2020 por Alexey Bochkovskiy en [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). YOLOv4 está diseñado para ofrecer un equilibrio óptimo entre velocidad y precisión, lo que lo convierte en una excelente opción para muchas aplicaciones. + +![Diagrama de arquitectura de YOLOv4](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**Diagrama de arquitectura de YOLOv4**. Muestra el intrincado diseño de red de YOLOv4, incluyendo los componentes backbone, neck y head, y sus capas interconectadas para una detección de objetos en tiempo real óptima. + +## Introducción + +YOLOv4 significa You Only Look Once versión 4. Es un modelo de detección de objetos en tiempo real desarrollado para abordar las limitaciones de versiones anteriores de YOLO como [YOLOv3](yolov3.md) y otros modelos de detección de objetos. A diferencia de otros detectores de objetos basados en redes neuronales convolucionales (CNN), YOLOv4 no solo es aplicable para sistemas de recomendación, sino también para la gestión de procesos independientes y la reducción de la entrada humana. Su funcionamiento en unidades de procesamiento de gráficos (GPU) convencionales permite su uso masivo a un precio asequible, y está diseñado para funcionar en tiempo real en una GPU convencional, siendo necesario solo una GPU para el entrenamiento. + +## Arquitectura + +YOLOv4 utiliza varias características innovadoras que trabajan juntas para optimizar su rendimiento. Estas incluyen Conexiones Residuales Ponderadas (WRC), Conexiones Parciales Cruzadas en Etapas (CSP), Normalización Cruzada de Mini-Batch (CmBN), Entrenamiento Autoadversarial (SAT), Activación Mish, Aumento de Datos Mosaico, Regularización DropBlock y Pérdida CIoU. Estas características se combinan para lograr resultados de vanguardia. + +Un detector de objetos típico está compuesto por varias partes, incluyendo la entrada, el backbone (espinazo), el neck (cuello) y el head (cabeza). El backbone de YOLOv4 está pre-entrenado en ImageNet y se utiliza para predecir las clases y las cajas delimitadoras de los objetos. El backbone puede ser de varios modelos, incluyendo VGG, ResNet, ResNeXt o DenseNet. La parte del neck del detector se utiliza para recolectar mapas de características de diferentes etapas y generalmente incluye varias rutas de abajo hacia arriba y varias rutas de arriba hacia abajo. La parte de la cabeza es la que se utiliza para realizar las detecciones y clasificaciones finales de objetos. + +## Bolsa de regalos + +YOLOv4 también utiliza métodos conocidos como "bolsa de regalos" (bag of freebies), que son técnicas que mejoran la precisión del modelo durante el entrenamiento sin aumentar el costo de la inferencia. La ampliación de datos es una técnica común de la bolsa de regalos utilizada en la detección de objetos, que aumenta la variabilidad de las imágenes de entrada para mejorar la robustez del modelo. Algunos ejemplos de ampliación de datos incluyen distorsiones fotométricas (ajuste del brillo, contraste, matiz, saturación y ruido de una imagen) y distorsiones geométricas (agregar escalado, recorte, volteo y rotación aleatorios). Estas técnicas ayudan al modelo a generalizar mejor para diferentes tipos de imágenes. + +## Características y rendimiento + +YOLOv4 está diseñado para obtener una velocidad y precisión óptimas en la detección de objetos. La arquitectura de YOLOv4 incluye CSPDarknet53 como backbone, PANet como neck y YOLOv3 como cabeza de detección. Este diseño permite que YOLOv4 realice la detección de objetos a una velocidad impresionante, lo que lo hace adecuado para aplicaciones en tiempo real. YOLOv4 también sobresale en precisión, logrando resultados de vanguardia en los benchmarks de detección de objetos. + +## Ejemplos de uso + +Hasta el momento de escribir este documento, Ultralytics actualmente no admite modelos YOLOv4. Por lo tanto, cualquier usuario interesado en usar YOLOv4 deberá consultar directamente el repositorio de YOLOv4 en GitHub para obtener instrucciones de instalación y uso. + +Aquí hay un resumen breve de los pasos típicos que podrías seguir para usar YOLOv4: + +1. Visita el repositorio de YOLOv4 en GitHub: [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). + +2. Sigue las instrucciones proporcionadas en el archivo README para la instalación. Esto generalmente implica clonar el repositorio, instalar las dependencias necesarias y configurar las variables de entorno necesarias. + +3. Una vez que la instalación esté completa, puedes entrenar y usar el modelo según las instrucciones de uso proporcionadas en el repositorio. Esto normalmente implica preparar tu conjunto de datos, configurar los parámetros del modelo, entrenar el modelo y luego usar el modelo entrenado para realizar la detección de objetos. + +Ten en cuenta que los pasos específicos pueden variar dependiendo de tu caso de uso específico y del estado actual del repositorio de YOLOv4. Por lo tanto, se recomienda encarecidamente consultar directamente las instrucciones proporcionadas en el repositorio de YOLOv4 en GitHub. + +Lamentamos cualquier inconveniente que esto pueda causar y nos esforzaremos por actualizar este documento con ejemplos de uso para Ultralytics una vez que se implemente el soporte para YOLOv4. + +## Conclusión + +YOLOv4 es un modelo de detección de objetos potente y eficiente que logra un equilibrio entre velocidad y precisión. Su uso de características únicas y técnicas de bolsa de regalos durante el entrenamiento le permite realizar un excelente desempeño en tareas de detección de objetos en tiempo real. YOLOv4 puede ser entrenado y utilizado por cualquier persona con una GPU convencional, lo que lo hace accesible y práctico para una amplia gama de aplicaciones. + +## Citaciones y agradecimientos + +Nos gustaría reconocer a los autores de YOLOv4 por sus importantes contribuciones en el campo de la detección de objetos en tiempo real: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +El artículo original de YOLOv4 se puede encontrar en [arXiv](https://arxiv.org/pdf/2004.10934.pdf). Los autores han puesto su trabajo a disposición del público, y el código se puede acceder en [GitHub](https://github.com/AlexeyAB/darknet). Apreciamos sus esfuerzos en el avance del campo y en hacer que su trabajo sea accesible para la comunidad en general. diff --git a/docs/es/models/yolov5.md b/docs/es/models/yolov5.md new file mode 100644 index 0000000..66adf22 --- /dev/null +++ b/docs/es/models/yolov5.md @@ -0,0 +1,113 @@ +--- +comments: true +description: Descubra YOLOv5u, una versión mejorada del modelo YOLOv5 con un mejor equilibrio entre precisión y velocidad, y numerosos modelos pre-entrenados para diversas tareas de detección de objetos. +keywords: YOLOv5u, detección de objetos, modelos pre-entrenados, Ultralytics, Inferencia, Validación, YOLOv5, YOLOv8, sin anclas, sin atención al objeto, aplicaciones en tiempo real, aprendizaje automático +--- + +# YOLOv5 + +## Resumen + +YOLOv5u representa un avance en las metodologías de detección de objetos. Originado a partir de la arquitectura fundamental del modelo [YOLOv5](https://github.com/ultralytics/yolov5) desarrollado por Ultralytics, YOLOv5u integra la división de la cabeza Ultralytics sin anclas y sin atención al objeto, una característica introducida previamente en los modelos [YOLOv8](yolov8.md). Esta adaptación perfecciona la arquitectura del modelo, resultando en un mejor equilibrio entre precisión y velocidad en tareas de detección de objetos. Con base en los resultados empíricos y sus características derivadas, YOLOv5u proporciona una alternativa eficiente para aquellos que buscan soluciones robustas tanto en investigación como en aplicaciones prácticas. + +![Ultralytics YOLOv5](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## Características clave + +- **Cabeza dividida Ultralytics sin anclas:** Los modelos tradicionales de detección de objetos dependen de cajas de anclaje predefinidas para predecir la ubicación de los objetos. Sin embargo, YOLOv5u moderniza este enfoque. Al adoptar una cabeza Ultralytics dividida sin anclas, se garantiza un mecanismo de detección más flexible y adaptable, lo que en consecuencia mejora el rendimiento en diversos escenarios. + +- **Equilibrio óptimo entre precisión y velocidad:** La velocidad y la precisión suelen ser contrapuestas. Pero YOLOv5u desafía este equilibrio. Ofrece un balance calibrado, garantizando detecciones en tiempo real sin comprometer la precisión. Esta característica es especialmente valiosa para aplicaciones que requieren respuestas rápidas, como vehículos autónomos, robótica y análisis de video en tiempo real. + +- **Variedad de modelos pre-entrenados:** Entendiendo que diferentes tareas requieren diferentes herramientas, YOLOv5u proporciona una gran cantidad de modelos pre-entrenados. Ya sea que te enfoques en Inferencia, Validación o Entrenamiento, hay un modelo a la medida esperándote. Esta variedad asegura que no estés utilizando una solución genérica, sino un modelo específicamente ajustado para tu desafío único. + +## Tareas y Modos Soportados + +Los modelos YOLOv5u, con diferentes pesos pre-entrenados, sobresalen en las tareas de [Detección de Objetos](../tasks/detect.md). Soportan una amplia gama de modos que los hacen adecuados para diversas aplicaciones, desde el desarrollo hasta la implementación. + +| Tipo de Modelo | Pesos Pre-entrenados | Tarea | Inferencia | Validación | Entrenamiento | Exportación | +|----------------|-----------------------------------------------------------------------------------------------------------------------------|--------------------------------------------|------------|------------|---------------|-------------| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [Detección de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Esta tabla proporciona una descripción detallada de las variantes de modelos YOLOv5u, destacando su aplicabilidad en tareas de detección de objetos y el soporte para varios modos operativos como [Inferencia](../modes/predict.md), [Validación](../modes/val.md), [Entrenamiento](../modes/train.md) y [Exportación](../modes/export.md). Este soporte integral asegura que los usuarios puedan aprovechar al máximo las capacidades de los modelos YOLOv5u en una amplia gama de escenarios de detección de objetos. + +## Métricas de Rendimiento + +!!! Rendimiento + + === "Detección" + + Consulta la [Documentación de Detección](https://docs.ultralytics.com/tasks/detect/) para obtener ejemplos de uso con estos modelos entrenados en [COCO](https://docs.ultralytics.com/datasets/detect/coco/), los cuales incluyen 80 clases pre-entrenadas. + + | Modelo | YAML | tamaño
(píxeles) | mAPval
50-95 | Velocidad
CPU ONNX
(ms) | Velocidad
A100 TensorRT
(ms) | parámetros
(M) | FLOPs
(B) | + |---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 42.1 | 211.0 | 1.83 | 4.3 | 7.8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 48.6 | 422.6 | 2.34 | 15.3 | 24.6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 53.6 | 810.9 | 4.36 | 41.2 | 65.7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 55.7 | 1470.9 | 5.47 | 86.1 | 137.4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 56.8 | 2436.5 | 8.98 | 155.4 | 250.7 | + +## Ejemplos de Uso + +Este ejemplo proporciona ejemplos sencillos de entrenamiento e inferencia de YOLOv5. Para obtener documentación completa sobre estos y otros [modos](../modes/index.md), consulta las páginas de documentación de [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) y [Export](../modes/export.md). + +!!! Example "Ejemplo" + + === "Python" + + Los modelos pre-entrenados `*.pt` de PyTorch, así como los archivos de configuración `*.yaml`, se pueden pasar a la clase `YOLO()` para crear una instancia de modelo en Python: + + ```python + from ultralytics import YOLO + + # Cargar un modelo YOLOv5n pre-entrenado en COCO + modelo = YOLO('yolov5n.pt') + + # Mostrar información del modelo (opcional) + modelo.info() + + # Entrenar el modelo con el conjunto de datos de ejemplo COCO8 durante 100 épocas + resultados = modelo.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Ejecutar inferencia con el modelo YOLOv5n en la imagen 'bus.jpg' + resultados = modelo('path/to/bus.jpg') + ``` + + === "CLI" + + Hay comandos de CLI disponibles para ejecutar directamente los modelos: + + ```bash + # Cargar un modelo YOLOv5n pre-entrenado en COCO y entrenarlo con el conjunto de datos de ejemplo COCO8 durante 100 épocas + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Cargar un modelo YOLOv5n pre-entrenado en COCO y ejecutar inferencia en la imagen 'bus.jpg' + yolo predict model=yolov5n.pt source=path/to/bus.jpg + ``` + +## Citaciones y Reconocimientos + +Si utilizas YOLOv5 o YOLOv5u en tu investigación, por favor cita el repositorio de Ultralytics YOLOv5 de la siguiente manera: + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +Ten en cuenta que los modelos YOLOv5 se proporcionan bajo las licencias [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) y [Enterprise](https://ultralytics.com/license). diff --git a/docs/es/models/yolov6.md b/docs/es/models/yolov6.md new file mode 100644 index 0000000..f65f37f --- /dev/null +++ b/docs/es/models/yolov6.md @@ -0,0 +1,107 @@ +--- +comments: true +description: Explora Meituan YOLOv6, un modelo de detección de objetos de última generación que logra un equilibrio entre velocidad y precisión. Sumérgete en características, modelos pre-entrenados y el uso de Python. +keywords: Meituan YOLOv6, detección de objetos, Ultralytics, documentación de YOLOv6, Concatenación Bidireccional, Entrenamiento con Anclas, modelos pre-entrenados, aplicaciones en tiempo real +--- + +# Meituan YOLOv6 + +## Visión general + +[Meituan](https://about.meituan.com/) YOLOv6 es un detector de objetos de última generación que ofrece un notable equilibrio entre velocidad y precisión, lo que lo convierte en una opción popular para aplicaciones en tiempo real. Este modelo presenta varias mejoras notables en su arquitectura y esquema de entrenamiento, que incluyen la implementación de un módulo de Concatenación Bidireccional (BiC), una estrategia de entrenamiento con anclas (AAT) y un diseño de columna vertebral y cuello mejorado para lograr una precisión de última generación en el conjunto de datos COCO. + +![Meituan YOLOv6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![Ejemplo de imagen del modelo](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**Visión general de YOLOv6.** Diagrama de la arquitectura del modelo que muestra los componentes de la red redesdiseñados y las estrategias de entrenamiento que han llevado a mejoras significativas en el rendimiento. (a) El cuello de YOLOv6 (N y S se muestran). Señalar que, en M/L, RepBlocks es reemplazado por CSPStackRep. (b) La estructura de un módulo BiC. (c) Un bloque SimCSPSPPF. ([fuente](https://arxiv.org/pdf/2301.05586.pdf)). + +### Características clave + +- **Módulo de Concatenación Bidireccional (BiC):** YOLOv6 introduce un módulo de BiC en el cuello del detector, mejorando las señales de localización y ofreciendo mejoras en el rendimiento con una degradación de velocidad despreciable. +- **Estrategia de Entrenamiento con Anclas (AAT):** Este modelo propone AAT para disfrutar de los beneficios de los paradigmas basados en anclas y sin anclas sin comprometer la eficiencia de inferencia. +- **Diseño de Columna Vertebral y Cuello Mejorado:** Al profundizar en YOLOv6 para incluir otra etapa en la columna vertebral y el cuello, este modelo logra un rendimiento de última generación en el conjunto de datos COCO con una entrada de alta resolución. +- **Estrategia de Auto-Destilación:** Se implementa una nueva estrategia de auto-destilación para mejorar el rendimiento de los modelos más pequeños de YOLOv6, mejorando la rama de regresión auxiliar durante el entrenamiento y eliminándola durante la inferencia para evitar una marcada disminución de velocidad. + +## Métricas de rendimiento + +YOLOv6 proporciona varios modelos pre-entrenados con diferentes escalas: + +- YOLOv6-N: 37.5% de precisión promedio (AP) en COCO val2017 a 1187 FPS con la GPU NVIDIA Tesla T4. +- YOLOv6-S: 45.0% de AP a 484 FPS. +- YOLOv6-M: 50.0% de AP a 226 FPS. +- YOLOv6-L: 52.8% de AP a 116 FPS. +- YOLOv6-L6: Precisión de última generación en tiempo real. + +YOLOv6 también proporciona modelos cuantizados para diferentes precisiones y modelos optimizados para plataformas móviles. + +## Ejemplos de uso + +Este ejemplo proporciona ejemplos sencillos de entrenamiento e inferencia con YOLOv6. Para obtener documentación completa sobre estos y otros [modos](../modes/index.md), consulta las páginas de documentación de [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) y [Export](../modes/export.md). + +!!! Example "Ejemplo" + + === "Python" + + Los modelos pre-entrenados en `*.pt` de PyTorch, así como los archivos de configuración `*.yaml`, se pueden pasar a la clase `YOLO()` para crear una instancia del modelo en Python: + + ```python + from ultralytics import YOLO + + # Construir un modelo YOLOv6n desde cero + modelo = YOLO('yolov6n.yaml') + + # Mostrar información del modelo (opcional) + modelo.info() + + # Entrenar el modelo en el conjunto de datos de ejemplo COCO8 durante 100 epochs + resultados = modelo.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Ejecutar inferencia con el modelo YOLOv6n en la imagen 'bus.jpg' + resultados = modelo('path/to/bus.jpg') + ``` + + === "CLI" + + Se dispone de comandos de línea de comandos (CLI) para ejecutar directamente los modelos: + + ```bash + # Construir un modelo YOLOv6n desde cero y entrenarlo en el conjunto de datos de ejemplo COCO8 durante 100 epochs + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # Construir un modelo YOLOv6n desde cero y ejecutar inferencia en la imagen 'bus.jpg' + yolo predict model=yolov6n.yaml source=path/to/bus.jpg + ``` + +## Tareas y Modos Soportados + +La serie YOLOv6 ofrece una variedad de modelos, cada uno optimizado para [Detección de Objetos](../tasks/detect.md) de alto rendimiento. Estos modelos se adaptan a distintas necesidades computacionales y requisitos de precisión, lo que los hace versátiles para una amplia gama de aplicaciones. + +| Tipo de Modelo | Pesos Pre-entrenados | Tareas Soportadas | Inferencia | Validación | Entrenamiento | Exportación | +|----------------|----------------------|--------------------------------------------|------------|------------|---------------|-------------| +| YOLOv6-N | `yolov6-n.pt` | [Detección de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [Detección de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [Detección de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [Detección de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [Detección de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Esta tabla proporciona una descripción detallada de las variantes del modelo YOLOv6, destacando sus capacidades en tareas de detección de objetos y su compatibilidad con varios modos operativos como [Inferencia](../modes/predict.md), [Validación](../modes/val.md), [Entrenamiento](../modes/train.md) y [Exportación](../modes/export.md). Este soporte integral garantiza que los usuarios puedan aprovechar al máximo las capacidades de los modelos YOLOv6 en una amplia gama de escenarios de detección de objetos. + +## Citaciones y Agradecimientos + +Nos gustaría agradecer a los autores por sus importantes contribuciones en el campo de la detección de objetos en tiempo real: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{li2023yolov6, + title={YOLOv6 v3.0: A Full-Scale Reloading}, + author={Chuyi Li and Lulu Li and Yifei Geng and Hongliang Jiang and Meng Cheng and Bo Zhang and Zaidan Ke and Xiaoming Xu and Xiangxiang Chu}, + year={2023}, + eprint={2301.05586}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + + Se puede encontrar el artículo original de YOLOv6 en [arXiv](https://arxiv.org/abs/2301.05586). Los autores han puesto su trabajo a disposición del público y el código fuente se puede acceder en [GitHub](https://github.com/meituan/YOLOv6). Agradecemos sus esfuerzos en avanzar en el campo y hacer que su trabajo sea accesible para la comunidad en general. diff --git a/docs/es/models/yolov7.md b/docs/es/models/yolov7.md new file mode 100644 index 0000000..6452043 --- /dev/null +++ b/docs/es/models/yolov7.md @@ -0,0 +1,66 @@ +--- +comments: true +description: Explora el YOLOv7, un detector de objetos en tiempo real. Comprende su velocidad superior, precisión impresionante y enfoque único en la optimización de entrenamiento de bolsas de características entrenables. +keywords: YOLOv7, detector de objetos en tiempo real, estado del arte, Ultralytics, conjunto de datos MS COCO, re-parametrización del modelo, asignación dinámica de etiquetas, escalado extendido, escalado compuesto +--- + +# YOLOv7: Bolsa de Características Entrenable + +YOLOv7 es un detector de objetos en tiempo real de última generación que supera a todos los detectores de objetos conocidos tanto en velocidad como en precisión en el rango de 5 FPS a 160 FPS. Tiene la mayor precisión (56.8% AP) entre todos los detectores de objetos en tiempo real conocidos con una velocidad de 30 FPS o superior en la GPU V100. Además, YOLOv7 supera a otros detectores de objetos como YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5 y muchos otros en cuanto a velocidad y precisión. El modelo se entrena desde cero utilizando el conjunto de datos MS COCO sin utilizar ningún otro conjunto de datos o pesos pre-entrenados. El código fuente de YOLOv7 está disponible en GitHub. + +![Comparación de YOLOv7 con detectores de objetos SOTA](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) +**Comparación de los detectores de objetos de estado del arte. +** Según los resultados en la Tabla 2, sabemos que el método propuesto tiene el mejor equilibrio entre velocidad y precisión de manera integral. Si comparamos YOLOv7-tiny-SiLU con YOLOv5-N (r6.1), nuestro método es 127 fps más rápido y un 10.7% más preciso en AP. Además, YOLOv7 tiene un AP del 51.4% a una velocidad de cuadro de 161 fps, mientras que PPYOLOE-L con el mismo AP tiene solo una velocidad de cuadro de 78 fps. En términos de uso de parámetros, YOLOv7 utiliza un 41% menos que PPYOLOE-L. Si comparamos YOLOv7-X con una velocidad de inferencia de 114 fps con YOLOv5-L (r6.1) con una velocidad de inferencia de 99 fps, YOLOv7-X puede mejorar el AP en un 3.9%. Si se compara YOLOv7-X con YOLOv5-X (r6.1) de una escala similar, la velocidad de inferencia de YOLOv7-X es 31 fps más rápida. Además, en términos de cantidad de parámetros y cálculos, YOLOv7-X reduce un 22% de los parámetros y un 8% de los cálculos en comparación con YOLOv5-X (r6.1), pero mejora el AP en un 2.2% ([Fuente](https://arxiv.org/pdf/2207.02696.pdf)). + +## Descripción general + +La detección de objetos en tiempo real es un componente importante en muchos sistemas de visión por computadora, incluyendo el seguimiento de múltiples objetos, conducción autónoma, robótica y análisis de imágenes médicas. En los últimos años, el desarrollo de la detección de objetos en tiempo real se ha centrado en el diseño de arquitecturas eficientes y en la mejora de la velocidad de inferencia de diversas CPUs, GPUs y unidades de procesamiento neural (NPUs). YOLOv7 es compatible tanto con GPU para dispositivos móviles como con GPU para dispositivos de escritorio, desde el borde hasta la nube. + +A diferencia de los detectores de objetos en tiempo real tradicionales que se centran en la optimización de la arquitectura, YOLOv7 introduce un enfoque en la optimización del proceso de entrenamiento. Esto incluye módulos y métodos de optimización diseñados para mejorar la precisión de la detección de objetos sin aumentar el costo de inferencia, un concepto conocido como "bolsas de características entrenables". + +## Características clave + +YOLOv7 introduce varias características clave: + +1. **Re-parametrización del modelo**: YOLOv7 propone un modelo re-parametrizado planificado, que es una estrategia aplicable a capas en diferentes redes con el concepto de propagación del gradiente. + +2. **Asignación dinámica de etiquetas**: El entrenamiento del modelo con múltiples capas de salida presenta un nuevo problema: "¿Cómo asignar objetivos dinámicos para las salidas de diferentes ramas?" Para resolver este problema, YOLOv7 introduce un nuevo método de asignación de etiquetas llamado asignación de etiquetas guiadas de manera gruesa a fina. + +3. **Escalado extendido y compuesto**: YOLOv7 propone métodos de "escalado extendido" y "escalado compuesto" para el detector de objetos en tiempo real que pueden utilizar eficazmente los parámetros y cálculos. + +4. **Eficiencia**: El método propuesto por YOLOv7 puede reducir eficazmente aproximadamente el 40% de los parámetros y el 50% de los cálculos del detector de objetos en tiempo real de última generación y tiene una velocidad de inferencia más rápida y una mayor precisión de detección. + +## Ejemplos de uso + +Hasta la fecha de redacción de este documento, Ultralytics no admite actualmente modelos YOLOv7. Por lo tanto, los usuarios interesados en utilizar YOLOv7 deberán consultar directamente el repositorio de GitHub de YOLOv7 para obtener instrucciones de instalación y uso. + +Aquí hay un resumen breve de los pasos típicos que podrías seguir para usar YOLOv7: + +1. Visita el repositorio de GitHub de YOLOv7: [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7). + +2. Sigue las instrucciones proporcionadas en el archivo README para la instalación. Esto generalmente implica clonar el repositorio, instalar las dependencias necesarias y configurar las variables de entorno necesarias. + +3. Una vez que la instalación esté completa, puedes entrenar y utilizar el modelo según las instrucciones de uso proporcionadas en el repositorio. Esto generalmente implica preparar tu conjunto de datos, configurar los parámetros del modelo, entrenar el modelo y luego utilizar el modelo entrenado para realizar la detección de objetos. + +Ten en cuenta que los pasos específicos pueden variar según tu caso de uso específico y el estado actual del repositorio YOLOv7. Por lo tanto, se recomienda encarecidamente consultar directamente las instrucciones proporcionadas en el repositorio de GitHub de YOLOv7. + +Lamentamos cualquier inconveniente que esto pueda causar y nos esforzaremos por actualizar este documento con ejemplos de uso para Ultralytics una vez que se implemente el soporte para YOLOv7. + +## Citaciones y Agradecimientos + +Nos gustaría agradecer a los autores de YOLOv7 por sus importantes contribuciones en el campo de la detección de objetos en tiempo real: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +El artículo original de YOLOv7 se puede encontrar en [arXiv](https://arxiv.org/pdf/2207.02696.pdf). Los autores han hecho su trabajo públicamente disponible y el código se puede acceder en [GitHub](https://github.com/WongKinYiu/yolov7). Agradecemos sus esfuerzos en el avance del campo y en hacer su trabajo accesible a la comunidad en general. diff --git a/docs/es/models/yolov8.md b/docs/es/models/yolov8.md new file mode 100644 index 0000000..7617460 --- /dev/null +++ b/docs/es/models/yolov8.md @@ -0,0 +1,162 @@ +--- +comments: true +description: ¡Explora las emocionantes características de YOLOv8, la última versión de nuestro detector de objetos en tiempo real! Aprende cómo las arquitecturas avanzadas, los modelos preentrenados y el equilibrio óptimo entre precisión y velocidad hacen de YOLOv8 la elección perfecta para tus tareas de detección de objetos. +keywords: YOLOv8, Ultralytics, detector de objetos en tiempo real, modelos preentrenados, documentación, detección de objetos, serie YOLO, arquitecturas avanzadas, precisión, velocidad +--- + +# YOLOv8 + +## Descripción general + +YOLOv8 es la última versión de la serie YOLO de detectores de objetos en tiempo real, ofreciendo un rendimiento de vanguardia en términos de precisión y velocidad. Basándose en los avances de las versiones anteriores de YOLO, YOLOv8 presenta nuevas características y optimizaciones que lo convierten en una opción ideal para diversas tareas de detección de objetos en una amplia gama de aplicaciones. + +![Ultralytics YOLOv8](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## Características principales + +- **Arquitecturas avanzadas de columna vertebral y cuello:** YOLOv8 utiliza arquitecturas de columna vertebral y cuello de última generación, lo que resulta en una mejor extracción de características y rendimiento de detección de objetos. +- **Cabeza Ultralytics dividida sin anclaje:** YOLOv8 adopta una cabeza Ultralytics dividida sin anclaje, lo que contribuye a una mejor precisión y a un proceso de detección más eficiente en comparación con los enfoques basados en anclaje. +- **Equilibrio optimizado entre precisión y velocidad:** Con un enfoque en mantener un equilibrio óptimo entre precisión y velocidad, YOLOv8 es adecuado para tareas de detección de objetos en tiempo real en diversas áreas de aplicación. +- **Variedad de modelos preentrenados:** YOLOv8 ofrece una variedad de modelos preentrenados para adaptarse a diversas tareas y requisitos de rendimiento, lo que facilita encontrar el modelo adecuado para tu caso de uso específico. + +## Tareas y modos compatibles + +La serie YOLOv8 ofrece una amplia gama de modelos, cada uno especializado en tareas específicas en visión por computadora. Estos modelos están diseñados para adaptarse a diversos requisitos, desde la detección de objetos hasta tareas más complejas como la segmentación de instancias, la detección de poses/puntos clave y la clasificación. + +Cada variante de la serie YOLOv8 está optimizada para su respectiva tarea, garantizando un alto rendimiento y precisión. Además, estos modelos son compatibles con varios modos operativos, incluyendo [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md) y [Export](../modes/export.md), lo que facilita su uso en diferentes etapas de implementación y desarrollo. + +| Modelo | Nombres de archivo | Tarea | Inferencia | Validación | Entrenamiento | Exportación | +|-------------|----------------------------------------------------------------------------------------------------------------|---------------------------------------------------|------------|------------|---------------|-------------| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [Detección](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [Segmentación de instancias](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [Pose/Puntos clave](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [Clasificación](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +Esta tabla proporciona una descripción general de las variantes de modelos YOLOv8, resaltando su aplicabilidad en tareas específicas y su compatibilidad con varios modos operativos como Inferencia, Validación, Entrenamiento y Exportación. Muestra la versatilidad y robustez de la serie YOLOv8, haciéndolos adecuados para una variedad de aplicaciones en visión por computadora. + +## Métricas de rendimiento + +!!! Rendimiento + + === "Detección (COCO)" + + Consulta la [documentación de Detección](https://docs.ultralytics.com/tasks/detect/) para ejemplos de uso con estos modelos entrenados en [COCO](https://docs.ultralytics.com/datasets/detect/coco/), que incluyen 80 clases preentrenadas. + + | Modelo | tamaño
(píxeles) | mAPval
50-95 | Velocidad
CPU ONNX
(ms) | Velocidad
A100 TensorRT
(ms) | parámetros
(M) | FLOPs
(B) | + | ------------------------------------------------------------------------------------ | ----------------------- | --------------------- | ------------------------------ | --------------------------------------- | ---------------------- | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + + === "Detección (Open Images V7)" + + Consulta la [documentación de Detección](https://docs.ultralytics.com/tasks/detect/) para ejemplos de uso con estos modelos entrenados en [Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/), que incluyen 600 clases preentrenadas. + + | Modelo | tamaño
(píxeles) | mAPval
50-95 | Velocidad
CPU ONNX
(ms) | Velocidad
A100 TensorRT
(ms) | parámetros
(M) | FLOPs
(B) | + | ----------------------------------------------------------------------------------------- | ----------------------- | --------------------- | -------------------------------- | --------------------------------------- | ---------------------- | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | + + === "Segmentación (COCO)" + + Consulta la [documentación de Segmentación](https://docs.ultralytics.com/tasks/segment/) para ejemplos de uso con estos modelos entrenados en [COCO](https://docs.ultralytics.com/datasets/segment/coco/), que incluyen 80 clases preentrenadas. + + | Modelo | tamaño
(píxeles) | mAPcaja
50-95 | mAPmáscara
50-95 | Velocidad
CPU ONNX
(ms) | Velocidad
A100 TensorRT
(ms) | parámetros
(M) | FLOPs
(B) | + | -------------------------------------------------------------------------------------------- | ----------------------- | ---------------------- | ----------------------- | -------------------------------- | --------------------------------------- | ---------------------- | ----------------- | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + + === "Clasificación (ImageNet)" + + Consulta la [documentación de Clasificación](https://docs.ultralytics.com/tasks/classify/) para ejemplos de uso con estos modelos entrenados en [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/), que incluyen 1000 clases preentrenadas. + + | Modelo | tamaño
(píxeles) | acc
top1 | acc
top5 | Velocidad
CPU ONNX
(ms) | Velocidad
A100 TensorRT
(ms) | parámetros
(M) | FLOPs
(B) a 640 | + | -------------------------------------------------------------------------------------------- | ----------------------- | ---------------- | ---------------- | -------------------------------- | --------------------------------------- | ---------------------- | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + + === "Pose (COCO)" + + Consulta la [documentación de Estimación de Poses](https://docs.ultralytics.com/tasks/segment/) para ejemplos de uso con estos modelos entrenados en [COCO](https://docs.ultralytics.com/datasets/pose/coco/), que incluyen 1 clase preentrenada, 'person'. + + | Modelo | tamaño
(píxeles) | mAPpose
50-95 | mAPpose
50 | Velocidad
CPU ONNX
(ms) | Velocidad
A100 TensorRT
(ms) | parámetros
(M) | FLOPs
(B) | + | ---------------------------------------------------------------------------------------------------- | ----------------------- | --------------------- | ------------------ | -------------------------------- | --------------------------------------- | ---------------------- | ----------------- | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +## Ejemplos de uso + +Este ejemplo proporciona ejemplos sencillos de entrenamiento e inferencia con YOLOv8. Para obtener documentación completa sobre estos y otros [modos](../modes/index.md), consulta las páginas de documentación de [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) y [Export](../modes/export.md). + +Ten en cuenta que el siguiente ejemplo es para modelos de detección YOLOv8. Para ver las tareas adicionales compatibles, consulta la documentación de [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) y [Pose](../tasks/pose.md). + +!!! Example "Ejemplo" + + === "Python" + + Los modelos preentrenados en PyTorch `*.pt`, así como los archivos de configuración `*.yaml`, se pueden pasar a la clase `YOLO()` para crear una instancia del modelo en Python: + + ```python + from ultralytics import YOLO + + # Carga un modelo YOLOv8n preentrenado en COCO + model = YOLO('yolov8n.pt') + + # Muestra información del modelo (opcional) + model.info() + + # Entrena el modelo en el conjunto de datos de ejemplo COCO8 durante 100 épocas + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Realiza inferencia con el modelo YOLOv8n en la imagen 'bus.jpg' + results = model('ruta/a/bus.jpg') + ``` + + === "CLI" + + Hay comandos de CLI disponibles para ejecutar directamente los modelos: + + ```bash + # Carga un modelo YOLOv8n preentrenado en COCO y entrénalo en el conjunto de datos de ejemplo COCO8 durante 100 épocas + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Carga un modelo YOLOv8n preentrenado en COCO y realiza inferencia en la imagen 'bus.jpg' + yolo predict model=yolov8n.pt source=ruta/a/bus.jpg + ``` + +## Citas y reconocimientos + +Si utilizas el modelo YOLOv8 u otro software de este repositorio en tu trabajo, por favor cítalo utilizando el siguiente formato: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + + Ten en cuenta que el DOI está pendiente y se agregará a la cita una vez que esté disponible. Los modelos de YOLOv8 se proporcionan bajo las licencias [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) y [Enterprise](https://ultralytics.com/license). diff --git a/docs/es/modes/benchmark.md b/docs/es/modes/benchmark.md new file mode 100644 index 0000000..3a165da --- /dev/null +++ b/docs/es/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: true +description: Aprenda cómo perfilar la velocidad y exactitud de YOLOv8 en varios formatos de exportación; obtenga perspectivas sobre las métricas mAP50-95, accuracy_top5 y más. +keywords: Ultralytics, YOLOv8, benchmarking, perfilado de velocidad, perfilado de exactitud, mAP50-95, accuracy_top5, ONNX, OpenVINO, TensorRT, formatos de exportación YOLO +--- + +# Model Benchmarking con Ultralytics YOLO + +Ecosistema e integraciones de Ultralytics YOLO + +## Introducción + +Una vez que su modelo está entrenado y validado, el siguiente paso lógico es evaluar su rendimiento en varios escenarios del mundo real. El modo benchmark en Ultralytics YOLOv8 cumple con este propósito proporcionando un marco sólido para valorar la velocidad y exactitud de su modelo a través de una gama de formatos de exportación. + +## ¿Por Qué Es Crucial el Benchmarking? + +- **Decisiones Informadas:** Obtenga perspectivas sobre el equilibrio entre velocidad y precisión. +- **Asignación de Recursos:** Entienda cómo diferentes formatos de exportación se desempeñan en diferentes hardware. +- **Optimización:** Aprenda cuál formato de exportación ofrece el mejor rendimiento para su caso de uso específico. +- **Eficiencia de Costo:** Haga un uso más eficiente de los recursos de hardware basado en los resultados del benchmark. + +### Métricas Clave en el Modo Benchmark + +- **mAP50-95:** Para detección de objetos, segmentación y estimación de pose. +- **accuracy_top5:** Para clasificación de imágenes. +- **Tiempo de Inferencia:** Tiempo tomado para cada imagen en milisegundos. + +### Formatos de Exportación Soportados + +- **ONNX:** Para un rendimiento óptimo de CPU +- **TensorRT:** Para la máxima eficiencia de GPU +- **OpenVINO:** Para la optimización en hardware de Intel +- **CoreML, TensorFlow SavedModel y Más:** Para necesidades de despliegue diversas. + +!!! Tip "Consejo" + + * Exporte a ONNX o OpenVINO para acelerar la velocidad de CPU hasta 3 veces. + * Exporte a TensorRT para acelerar la velocidad de GPU hasta 5 veces. + +## Ejemplos de Uso + +Ejecute benchmarks de YOLOv8n en todos los formatos de exportación soportados incluyendo ONNX, TensorRT, etc. Vea la sección de Argumentos a continuación para una lista completa de argumentos de exportación. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # Benchmark en GPU + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## Argumentos + +Argumentos como `model`, `data`, `imgsz`, `half`, `device`, y `verbose` proporcionan a los usuarios la flexibilidad de ajustar los benchmarks a sus necesidades específicas y comparar el rendimiento de diferentes formatos de exportación con facilidad. + +| Clave | Valor | Descripción | +|-----------|---------|----------------------------------------------------------------------------------------------------------| +| `model` | `None` | ruta al archivo del modelo, es decir, yolov8n.pt, yolov8n.yaml | +| `data` | `None` | ruta a YAML que referencia el conjunto de datos de benchmarking (bajo la etiqueta `val`) | +| `imgsz` | `640` | tamaño de imagen como escalar o lista (h, w), es decir, (640, 480) | +| `half` | `False` | cuantificación FP16 | +| `int8` | `False` | cuantificación INT8 | +| `device` | `None` | dispositivo en el que se ejecutará, es decir, dispositivo cuda=0 o dispositivo=0,1,2,3 o dispositivo=cpu | +| `verbose` | `False` | no continuar en caso de error (bool), o umbral de piso de valor (float) | + +## Formatos de Exportación + +Los benchmarks intentarán ejecutarse automáticamente en todos los posibles formatos de exportación a continuación. + +| Formato | Argumento `format` | Modelo | Metadatos | Argumentos | +|--------------------------------------------------------------------|--------------------|---------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +Vea los detalles completos de `export` en la página [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/es/modes/export.md b/docs/es/modes/export.md new file mode 100644 index 0000000..e701ccb --- /dev/null +++ b/docs/es/modes/export.md @@ -0,0 +1,108 @@ +--- +comments: true +description: Guía paso a paso sobre cómo exportar sus modelos YOLOv8 a varios formatos como ONNX, TensorRT, CoreML y más para su despliegue. ¡Explora ahora!. +keywords: YOLO, YOLOv8, Ultralytics, Exportación de modelos, ONNX, TensorRT, CoreML, TensorFlow SavedModel, OpenVINO, PyTorch, exportar modelo +--- + +# Exportación de Modelos con Ultralytics YOLO + +Ecosistema de Ultralytics YOLO e integraciones + +## Introducción + +El objetivo final de entrenar un modelo es desplegarlo para aplicaciones en el mundo real. El modo exportación en Ultralytics YOLOv8 ofrece una gama versátil de opciones para exportar tu modelo entrenado a diferentes formatos, haciéndolo desplegable en varias plataformas y dispositivos. Esta guía integral pretende guiarte a través de los matices de la exportación de modelos, mostrando cómo lograr la máxima compatibilidad y rendimiento. + +

+
+ +
+ Ver: Cómo Exportar un Modelo Entrenado Personalizado de Ultralytics YOLOv8 y Ejecutar Inferencia en Vivo en la Webcam. +

+ +## ¿Por Qué Elegir el Modo Exportación de YOLOv8? + +- **Versatilidad:** Exporta a múltiples formatos incluyendo ONNX, TensorRT, CoreML y más. +- **Rendimiento:** Acelera hasta 5 veces la velocidad en GPU con TensorRT y 3 veces en CPU con ONNX o OpenVINO. +- **Compatibilidad:** Hacer que tu modelo sea universalmente desplegable en numerosos entornos de hardware y software. +- **Facilidad de Uso:** Interfaz de línea de comandos simple y API de Python para una exportación de modelos rápida y sencilla. + +### Características Clave del Modo de Exportación + +Aquí tienes algunas de las funcionalidades destacadas: + +- **Exportación con Un Solo Clic:** Comandos simples para exportar a diferentes formatos. +- **Exportación por Lotes:** Exporta modelos capaces de inferencia por lotes. +- **Inferencia Optimizada:** Los modelos exportados están optimizados para tiempos de inferencia más rápidos. +- **Vídeos Tutoriales:** Guías y tutoriales en profundidad para una experiencia de exportación fluida. + +!!! Tip "Consejo" + + * Exporta a ONNX u OpenVINO para acelerar la CPU hasta 3 veces. + * Exporta a TensorRT para acelerar la GPU hasta 5 veces. + +## Ejemplos de Uso + +Exporta un modelo YOLOv8n a un formato diferente como ONNX o TensorRT. Consulta la sección Argumentos más abajo para una lista completa de argumentos de exportación. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carga un modelo + model = YOLO('yolov8n.pt') # carga un modelo oficial + model = YOLO('path/to/best.pt') # carga un modelo entrenado personalizado + + # Exporta el modelo + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # exporta modelo oficial + yolo export model=path/to/best.pt format=onnx # exporta modelo entrenado personalizado + ``` + +## Argumentos + +Los ajustes de exportación para modelos YOLO se refieren a las diversas configuraciones y opciones utilizadas para guardar o exportar el modelo para su uso en otros entornos o plataformas. Estos ajustes pueden afectar el rendimiento del modelo, su tamaño y su compatibilidad con diferentes sistemas. Algunos ajustes comunes de exportación de YOLO incluyen el formato del archivo del modelo exportado (p. ej., ONNX, TensorFlow SavedModel), el dispositivo en el que se ejecutará el modelo (p. ej., CPU, GPU) y la presencia de características adicionales como máscaras o múltiples etiquetas por caja. Otros factores que pueden afectar el proceso de exportación incluyen la tarea específica para la que se está utilizando el modelo y los requisitos o limitaciones del entorno o plataforma objetivo. Es importante considerar y configurar cuidadosamente estos ajustes para asegurar que el modelo exportado está optimizado para el caso de uso previsto y se pueda utilizar eficazmente en el entorno objetivo. + +| Llave | Valor | Descripción | +|-------------|-----------------|-----------------------------------------------------------------| +| `format` | `'torchscript'` | formato al que exportar | +| `imgsz` | `640` | tamaño de imagen como escalar o lista (h, w), p. ej. (640, 480) | +| `keras` | `False` | usu Keras para la exportación de TF SavedModel | +| `optimize` | `False` | TorchScript: optimizar para móvil | +| `half` | `False` | cuantificación FP16 | +| `int8` | `False` | cuantificación INT8 | +| `dynamic` | `False` | ONNX/TensorRT: ejes dinámicos | +| `simplify` | `False` | ONNX/TensorRT: simplificar modelo | +| `opset` | `None` | ONNX: versión de opset (opcional, por defecto la más reciente) | +| `workspace` | `4` | TensorRT: tamaño del espacio de trabajo (GB) | +| `nms` | `False` | CoreML: añadir NMS | + +## Formatos de Exportación + +Los formatos de exportación disponibles de YOLOv8 están en la tabla a continuación. Puedes exportar a cualquier formato usando el argumento `format`, por ejemplo, `format='onnx'` o `format='engine'`. + +| Formato | Argumento `format` | Modelo | Metadatos | Argumentos | +|--------------------------------------------------------------------|--------------------|---------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/es/modes/index.md b/docs/es/modes/index.md new file mode 100644 index 0000000..1c9b408 --- /dev/null +++ b/docs/es/modes/index.md @@ -0,0 +1,73 @@ +--- +comments: true +description: Desde el entrenamiento hasta el seguimiento, aprovecha al máximo YOLOv8 con Ultralytics. Obtén información y ejemplos para cada modo compatible incluyendo validación, exportación y evaluación comparativa. +keywords: Ultralytics, YOLOv8, Aprendizaje Automático, Detección de Objetos, Entrenamiento, Validación, Predicción, Exportación, Seguimiento, Benchmarking +--- + +# Modos de Ultralytics YOLOv8 + +Ecosistema Ultralytics YOLO e integraciones + +## Introducción + +Ultralytics YOLOv8 no es solo otro modelo de detección de objetos; es un marco de trabajo versátil diseñado para cubrir todo el ciclo de vida de los modelos de aprendizaje automático, desde la ingesta de datos y el entrenamiento del modelo hasta la validación, implementación y seguimiento en el mundo real. Cada modo sirve para un propósito específico y está diseñado para ofrecerte la flexibilidad y eficiencia necesarias para diferentes tareas y casos de uso. + +

+
+ +
+ Mira: Tutorial de Modos Ultralytics: Entrenar, Validar, Predecir, Exportar y Hacer Benchmarking. +

+ +### Modos a Primera Vista + +Comprender los diferentes **modos** que soporta Ultralytics YOLOv8 es crítico para sacar el máximo provecho a tus modelos: + +- **Modo Entrenar (Train)**: Afina tu modelo en conjuntos de datos personalizados o pre-cargados. +- **Modo Validar (Val)**: Un punto de control post-entrenamiento para validar el rendimiento del modelo. +- **Modo Predecir (Predict)**: Libera el poder predictivo de tu modelo en datos del mundo real. +- **Modo Exportar (Export)**: Prepara tu modelo para la implementación en varios formatos. +- **Modo Seguir (Track)**: Extiende tu modelo de detección de objetos a aplicaciones de seguimiento en tiempo real. +- **Modo Benchmark (Benchmark)**: Analiza la velocidad y precisión de tu modelo en diversos entornos de implementación. + +Esta guía completa tiene como objetivo proporcionarte una visión general y conocimientos prácticos de cada modo, ayudándote a aprovechar todo el potencial de YOLOv8. + +## [Entrenar (Train)](train.md) + +El modo Entrenar se utiliza para entrenar un modelo YOLOv8 en un conjunto de datos personalizado. En este modo, el modelo se entrena utilizando el conjunto de datos y los hiperparámetros especificados. El proceso de entrenamiento implica optimizar los parámetros del modelo para que pueda predecir con precisión las clases y ubicaciones de los objetos en una imagen. + +[Ejemplos de Entrenamiento](train.md){ .md-button } + +## [Validar (Val)](val.md) + +El modo Validar se usa para validar un modelo YOLOv8 después de haber sido entrenado. En este modo, el modelo se evalúa en un conjunto de validación para medir su precisión y rendimiento de generalización. Este modo se puede usar para ajustar los hiperparámetros del modelo y mejorar su rendimiento. + +[Ejemplos de Validación](val.md){ .md-button } + +## [Predecir (Predict)](predict.md) + +El modo Predecir se utiliza para realizar predicciones usando un modelo YOLOv8 entrenado en imágenes o videos nuevos. En este modo, el modelo se carga desde un archivo de punto de control, y el usuario puede proporcionar imágenes o videos para realizar inferencias. El modelo predice las clases y ubicaciones de los objetos en las imágenes o videos de entrada. + +[Ejemplos de Predicción](predict.md){ .md-button } + +## [Exportar (Export)](export.md) + +El modo Exportar se utiliza para exportar un modelo YOLOv8 a un formato que se pueda usar para la implementación. En este modo, el modelo se convierte a un formato que puede ser utilizado por otras aplicaciones de software o dispositivos de hardware. Este modo es útil al implementar el modelo en entornos de producción. + +[Ejemplos de Exportación](export.md){ .md-button } + +## [Seguir (Track)](track.md) + +El modo Seguir se usa para rastrear objetos en tiempo real utilizando un modelo YOLOv8. En este modo, el modelo se carga desde un archivo de punto de control, y el usuario puede proporcionar un flujo de video en vivo para realizar seguimiento de objetos en tiempo real. Este modo es útil para aplicaciones como sistemas de vigilancia o coches autónomos. + +[Ejemplos de Seguimiento](track.md){ .md-button } + +## [Benchmark (Benchmark)](benchmark.md) + +El modo Benchmark se utiliza para perfilar la velocidad y precisión de varios formatos de exportación de YOLOv8. Los benchmarks proporcionan información sobre el tamaño del formato de exportación, sus métricas de `mAP50-95` (para detección de objetos, segmentación y pose) o métricas de `accuracy_top5` (para clasificación), y el tiempo de inferencia en milisegundos por imagen a través de varios formatos de exportación como ONNX, OpenVINO, TensorRT y otros. Esta información puede ayudar a los usuarios a elegir el formato de exportación óptimo para su caso de uso específico, basado en sus requerimientos de velocidad y precisión. + +[Ejemplos de Benchmarking](benchmark.md){ .md-button } diff --git a/docs/es/modes/predict.md b/docs/es/modes/predict.md new file mode 100644 index 0000000..0c1751a --- /dev/null +++ b/docs/es/modes/predict.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Descubra cómo utilizar el modo predictivo de YOLOv8 para diversas tareas. Aprenda acerca de diferentes fuentes de inferencia como imágenes, videos y formatos de datos. +keywords: Ultralytics, YOLOv8, modo predictivo, fuentes de inferencia, tareas de predicción, modo de transmisión, procesamiento de imágenes, procesamiento de videos, aprendizaje automático, IA +--- + +# Predicción del Modelo con YOLO de Ultralytics + +Ecosistema de YOLO de Ultralytics e integraciones + +## Introducción + +En el mundo del aprendizaje automático y la visión por computadora, el proceso de dar sentido a los datos visuales se denomina 'inferencia' o 'predicción'. YOLOv8 de Ultralytics ofrece una característica poderosa conocida como **modo predictivo** que está diseñada para inferencias de alto rendimiento y en tiempo real en una amplia gama de fuentes de datos. + +

+
+ +
+ Ver: Cómo Extraer las Salidas del Modelo YOLOv8 de Ultralytics para Proyectos Personalizados. +

+ +## Aplicaciones en el Mundo Real + +| Manufactura | Deportes | Seguridad | +|:-----------------------------------------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------:| +| ![Detección de Repuestos de Vehículos](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1) | ![Detección de Jugadores de Fútbol](https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442) | ![Detección de Caídas de Personas](https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43) | +| Detección de Repuestos de Vehículos | Detección de Jugadores de Fútbol | Detección de Caídas de Personas | + +## ¿Por Qué Utilizar YOLO de Ultralytics para la Inferencia? + +Estas son algunas razones para considerar el modo predictivo de YOLOv8 para sus necesidades de inferencia: + +- **Versatilidad:** Capaz de realizar inferencias en imágenes, videos e incluso transmisiones en vivo. +- **Rendimiento:** Diseñado para procesamiento en tiempo real y de alta velocidad sin sacrificar precisión. +- **Facilidad de Uso:** Interfaces de Python y CLI intuitivas para una rápida implementación y pruebas. +- **Alta Personalización:** Diversos ajustes y parámetros para afinar el comportamiento de inferencia del modelo según sus requisitos específicos. + +### Características Principales del Modo Predictivo + +El modo predictivo de YOLOv8 está diseñado para ser robusto y versátil, y cuenta con: + +- **Compatibilidad con Múltiples Fuentes de Datos:** Ya sea que sus datos estén en forma de imágenes individuales, una colección de imágenes, archivos de video o transmisiones de video en tiempo real, el modo predictivo le tiene cubierto. +- **Modo de Transmisión:** Utilice la función de transmisión para generar un generador eficiente de memoria de objetos `Results`. Active esto configurando `stream=True` en el método de llamada del predictor. +- **Procesamiento por Lotes:** La capacidad de procesar múltiples imágenes o fotogramas de video en un solo lote, acelerando aún más el tiempo de inferencia. +- **Amigable para la Integración:** Se integra fácilmente con pipelines de datos existentes y otros componentes de software, gracias a su API flexible. + +Los modelos YOLO de Ultralytics devuelven ya sea una lista de objetos `Results` de Python, o un generador de objetos `Results` de Python eficiente en memoria cuando se pasa `stream=True` al modelo durante la inferencia: + +!!! Example "Predict" + + === "Devolver una lista con `stream=False`" + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n.pt') # modelo YOLOv8n preentrenado + + # Ejecutar inferencia por lotes en una lista de imágenes + results = model(['im1.jpg', 'im2.jpg']) # devuelve una lista de objetos Results + + # Procesar lista de resultados + for result in results: + boxes = result.boxes # Objeto Boxes para salidas de bbox + masks = result.masks # Objeto Masks para salidas de máscaras de segmentación + keypoints = result.keypoints # Objeto Keypoints para salidas de postura + probs = result.probs # Objeto Probs para salidas de clasificación + ``` + + === "Devolver un generador con `stream=True`" + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n.pt') # modelo YOLOv8n preentrenado + + # Ejecutar inferencia por lotes en una lista de imágenes + results = model(['im1.jpg', 'im2.jpg'], stream=True) # devuelve un generador de objetos Results + + # Procesar generador de resultados + for result in results: + boxes = result.boxes # Objeto Boxes para salidas de bbox + .masks = result.masks # Objeto Masks para salidas de máscaras de segmentación + keypoints = result.keypoints # Objeto Keypoints para salidas de postura + probs = result.probs # Objeto Probs para salidas de clasificación + ``` + +## Fuentes de Inferencia + +YOLOv8 puede procesar diferentes tipos de fuentes de entrada para la inferencia, como se muestra en la tabla a continuación. Las fuentes incluyen imágenes estáticas, transmisiones de video y varios formatos de datos. La tabla también indica si cada fuente se puede utilizar en modo de transmisión con el argumento `stream=True` ✅. El modo de transmisión es beneficioso para procesar videos o transmisiones en vivo ya que crea un generador de resultados en lugar de cargar todos los fotogramas en la memoria. + +!!! Tip "Consejo" + + Utilice `stream=True` para procesar videos largos o conjuntos de datos grandes para gestionar eficientemente la memoria. Cuando `stream=False`, los resultados de todos los fotogramas o puntos de datos se almacenan en la memoria, lo que puede aumentar rápidamente y causar errores de memoria insuficiente para entradas grandes. En contraste, `stream=True` utiliza un generador, que solo mantiene los resultados del fotograma o punto de datos actual en la memoria, reduciendo significativamente el consumo de memoria y previniendo problemas de falta de memoria. + +| Fuente | Argumento | Tipo | Notas | +|---------------------|--------------------------------------------|----------------|---------------------------------------------------------------------------------------------------------------------------------| +| imagen | `'image.jpg'` | `str` o `Path` | Archivo único de imagen. | +| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | URL a una imagen. | +| captura de pantalla | `'screen'` | `str` | Captura una captura de pantalla. | +| PIL | `Image.open('im.jpg')` | `PIL.Image` | Formato HWC con canales RGB. | +| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | Formato HWC con canales BGR `uint8 (0-255)`. | +| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | Formato HWC con canales BGR `uint8 (0-255)`. | +| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | Formato BCHW con canales RGB `float32 (0.0-1.0)`. | +| CSV | `'sources.csv'` | `str` o `Path` | Archivo CSV que contiene rutas a imágenes, videos o directorios. | +| video ✅ | `'video.mp4'` | `str` o `Path` | Archivo de video en formatos como MP4, AVI, etc. | +| directorio ✅ | `'path/'` | `str` o `Path` | Ruta a un directorio que contiene imágenes o videos. | +| glob ✅ | `'path/*.jpg'` | `str` | Patrón glob para coincidir con múltiples archivos. Utilice el carácter `*` como comodín. | +| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | URL a un video de YouTube. | +| transmisión ✅ | `'rtsp://example.com/media.mp4'` | `str` | URL para protocolos de transmisión como RTSP, RTMP, TCP o una dirección IP. | +| multi-transmisión ✅ | `'list.streams'` | `str` o `Path` | Archivo de texto `*.streams` con una URL de transmisión por fila, es decir, 8 transmisiones se ejecutarán con tamaño de lote 8. | + +A continuación se muestran ejemplos de código para usar cada tipo de fuente: + +!!! Example "Fuentes de predicción" + + === "imagen" + Ejecute inferencia en un archivo de imagen. + ```python + from ultralytics import YOLO + + # Cargar el modelo YOLOv8n preentrenado + model = YOLO('yolov8n.pt') + + # Definir la ruta al archivo de imagen + source = 'ruta/a/imagen.jpg' + + # Ejecutar inferencia en la fuente + results = model(source) # lista de objetos Results + ``` + + === "captura de pantalla" + Ejecute inferencia en el contenido actual de la pantalla como captura de pantalla. + ```python + from ultralytics import YOLO + + # Cargar el modelo YOLOv8n preentrenado + model = YOLO('yolov8n.pt') + + # Definir captura de pantalla actual como fuente + source = 'screen' + + # Ejecutar inferencia en la fuente + results = model(source) # lista de objetos Results + ``` + + === "URL" + Ejecute inferencia en una imagen o video alojados remotamente a través de URL. + ```python + from ultralytics import YOLO + + # Cargar el modelo YOLOv8n preentrenado + model = YOLO('yolov8n.pt') + + # Definir URL remota de imagen o video + source = 'https://ultralytics.com/images/bus.jpg' + + # Ejecutar inferencia en la fuente + results = model(source) # lista de objetos Results + ``` + + === "PIL" + Ejecute inferencia en una imagen abierta con la Biblioteca de Imágenes de Python (PIL). + ```python + from PIL import Image + from ultralytics import YOLO + + # Cargar el modelo YOLOv8n preentrenado + model = YOLO('yolov8n.pt') + + # Abrir una imagen usando PIL + source = Image.open('ruta/a/imagen.jpg') + + # Ejecutar inferencia en la fuente + results = model(source) # lista de objetos Results + ``` + + === "OpenCV" + Ejecute inferencia en una imagen leída con OpenCV. + ```python + import cv2 + from ultralytics import YOLO + + # Cargar el modelo YOLOv8n preentrenado + model = YOLO('yolov8n.pt') + + # Leer una imagen usando OpenCV + source = cv2.imread('ruta/a/imagen.jpg') + + # Ejecutar inferencia en la fuente + results = model(source) # lista de objetos Results + ``` + + === "numpy" + Ejecute inferencia en una imagen representada como un array de numpy. + ```python + import numpy as np + from ultralytics import YOLO + + # Cargar el modelo YOLOv8n preentrenado + model = YOLO('yolov8n.pt') + + # Crear un array aleatorio de numpy con forma HWC (640, 640, 3) con valores en rango [0, 255] y tipo uint8 + source = np.random.randint(low=0, high=255, size=(640, 640, 3), dtype='uint8') + + # Ejecutar inferencia en la fuente + results = model(source) # lista de objetos Results + ``` + + === "torch" + Ejecute inferencia en una imagen representada como un tensor de PyTorch. + ```python + import torch + from ultralytics import YOLO + + # Cargar el modelo YOLOv8n preentrenado + model = YOLO('yolov8n.pt') + + # Crear un tensor aleatorio de torch con forma BCHW (1, 3, 640, 640) con valores en rango [0, 1] y tipo float32 + source = torch.rand(1, 3, 640, 640, dtype=torch.float32) + + # Ejecutar inferencia en la fuente + results = model(source) # lista de objetos Results diff --git a/docs/es/modes/track.md b/docs/es/modes/track.md new file mode 100644 index 0000000..e7b514d --- /dev/null +++ b/docs/es/modes/track.md @@ -0,0 +1,200 @@ +--- +comments: true +description: Aprende a utilizar Ultralytics YOLO para el seguimiento de objetos en flujos de video. Guías para usar diferentes rastreadores y personalizar la configuración del rastreador. +keywords: Ultralytics, YOLO, seguimiento de objetos, flujos de video, BoT-SORT, ByteTrack, guía de Python, guía de CLI +--- + +# Seguimiento de Múltiples Objetos con Ultralytics YOLO + +Ejemplos de seguimiento de múltiples objetos + +El seguimiento de objetos en el ámbito del análisis de video es una tarea crítica que no solo identifica la ubicación y clase de objetos dentro del cuadro, sino que también mantiene una ID única para cada objeto detectado a medida que avanza el video. Las aplicaciones son ilimitadas, desde vigilancia y seguridad hasta análisis deportivos en tiempo real. + +## ¿Por Qué Elegir Ultralytics YOLO para el Seguimiento de Objetos? + +La salida de los rastreadores de Ultralytics es consistente con la detección de objetos estándar, pero con el valor añadido de las IDs de objetos. Esto facilita el seguimiento de objetos en flujos de video y la realización de análisis posteriores. Aquí tienes algunas razones por las que deberías considerar usar Ultralytics YOLO para tus necesidades de seguimiento de objetos: + +- **Eficiencia:** Procesa flujos de video en tiempo real sin comprometer la precisión. +- **Flexibilidad:** Soporta múltiples algoritmos de seguimiento y configuraciones. +- **Facilidad de Uso:** API simple de Python y opciones CLI para una rápida integración y despliegue. +- **Personalización:** Fácil de usar con modelos YOLO entrenados a medida, permitiendo la integración en aplicaciones específicas del dominio. + +

+
+ +
+ Ver: Detección de Objetos y Seguimiento con Ultralytics YOLOv8. +

+ +## Aplicaciones en el Mundo Real + +| Transporte | Venta al por Menor | Acuicultura | +|:------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------:| +| ![Seguimiento de Vehículos](https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab) | ![Seguimiento de Personas](https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527) | ![Seguimiento de Peces](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142) | +| Seguimiento de Vehículos | Seguimiento de Personas | Seguimiento de Peces | + +## Características a Simple Vista + +Ultralytics YOLO extiende sus características de detección de objetos para proporcionar un seguimiento de objetos robusto y versátil: + +- **Seguimiento en Tiempo Real:** Rastrea sin problemas los objetos en videos de alta frecuencia de cuadros. +- **Soporte de Múltiples Rastreadores:** Elige entre una variedad de algoritmos de seguimiento establecidos. +- **Configuraciones de Rastreador Personalizables:** Adapta el algoritmo de seguimiento para satisfacer requisitos específicos ajustando diversos parámetros. + +## Rastreadores Disponibles + +Ultralytics YOLO soporta los siguientes algoritmos de seguimiento. Pueden ser habilitados pasando el archivo de configuración YAML relevante como `tracker=tracker_type.yaml`: + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - Usa `botsort.yaml` para habilitar este rastreador. +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - Usa `bytetrack.yaml` para habilitar este rastreador. + +El rastreador predeterminado es BoT-SORT. + +## Seguimiento + +Para ejecutar el rastreador en flujos de video, usa un modelo Detect, Segment o Pose entrenado tales como YOLOv8n, YOLOv8n-seg y YOLOv8n-pose. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo oficial o personalizado + model = YOLO('yolov8n.pt') # Cargar un modelo oficial Detect + model = YOLO('yolov8n-seg.pt') # Cargar un modelo oficial Segment + model = YOLO('yolov8n-pose.pt') # Cargar un modelo oficial Pose + model = YOLO('path/to/best.pt') # Cargar un modelo entrenado a medida + + # Realizar el seguimiento con el modelo + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # Seguimiento con el rastreador predeterminado + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # Seguimiento con el rastreador ByteTrack + ``` + + === "CLI" + + ```bash + # Realizar seguimiento con varios modelos usando la interfaz de línea de comandos + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # Modelo oficial Detect + yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Modelo oficial Segment + yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Modelo oficial Pose + yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # Modelo entrenado a medida + + # Realizar seguimiento usando el rastreador ByteTrack + yolo track model=path/to/best.pt tracker="bytetrack.yaml" + ``` + +Como se puede ver en el uso anterior, el seguimiento está disponible para todos los modelos Detect, Segment y Pose ejecutados en videos o fuentes de transmisión. + +## Configuración + +### Argumentos de Seguimiento + +La configuración de seguimiento comparte propiedades con el modo Predict, como `conf`, `iou` y `show`. Para configuraciones adicionales, consulta la página del modelo [Predict](https://docs.ultralytics.com/modes/predict/). + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Configurar los parámetros de seguimiento y ejecutar el rastreador + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```bash + # Configurar parámetros de seguimiento y ejecutar el rastreador usando la interfaz de línea de comandos + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### Selección de Rastreador + +Ultralytics también te permite usar un archivo de configuración de rastreador modificado. Para hacerlo, simplemente haz una copia de un archivo de configuración de rastreador (por ejemplo, `custom_tracker.yaml`) de [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) y modifica cualquier configuración (excepto el `tracker_type`) según tus necesidades. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar el modelo y ejecutar el rastreador con un archivo de configuración personalizado + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```bash + # Cargar el modelo y ejecutar el rastreador con un archivo de configuración personalizado usando la interfaz de línea de comandos + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +Para obtener una lista completa de los argumentos de seguimiento, consulta la página [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers). + +## Ejemplos en Python + +### Bucle de Seguimiento Persistente + +Aquí hay un script en Python que utiliza OpenCV (`cv2`) y YOLOv8 para ejecutar el seguimiento de objetos en fotogramas de video. Este script aún asume que ya has instalado los paquetes necesarios (`opencv-python` y `ultralytics`). El argumento `persist=True` le indica al rastreador que la imagen o fotograma actual es el siguiente en una secuencia y que espera rastros de la imagen anterior en la imagen actual. + +!!! Example "Bucle de transmisión en vivo con seguimiento" + + ```python + import cv2 + from ultralytics import YOLO + + # Cargar el modelo YOLOv8 + model = YOLO('yolov8n.pt') + + # Abrir el archivo de video + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # Bucle a través de los fotogramas del video + while cap.isOpened(): + # Leer un fotograma del video + success, frame = cap.read() + + if success: + # Ejecutar seguimiento YOLOv8 en el fotograma, persistiendo los rastreos entre fotogramas + results = model.track(frame, persist=True) + + # Visualizar los resultados en el fotograma + annotated_frame = results[0].plot() + + # Mostrar el fotograma anotado + cv2.imshow("Seguimiento YOLOv8", annotated_frame) + + # Romper el bucle si se presiona 'q' + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # Romper el bucle si se alcanza el final del video + break + + # Liberar el objeto de captura de video y cerrar la ventana de visualización + cap.release() + cv2.destroyAllWindows() + ``` + +Toma en cuenta el cambio de `model(frame)` a `model.track(frame)`, que habilita el seguimiento de objetos en lugar de simplemente la detección. Este script modificado ejecutará el rastreador en cada fotograma del video, visualizará los resultados y los mostrará en una ventana. El bucle puede ser terminado presionando 'q'. + +## Contribuir con Nuevos Rastreadores + +¿Eres experto en seguimiento de múltiples objetos y has implementado o adaptado exitosamente un algoritmo de seguimiento con Ultralytics YOLO? Te invitamos a contribuir en nuestra sección de Rastreadores en [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)! Tus aplicaciones en el mundo real y soluciones podrían ser invaluables para los usuarios que trabajan en tareas de seguimiento. + +Al contribuir en esta sección, ayudarás a ampliar el alcance de las soluciones de seguimiento disponibles dentro del marco de trabajo de Ultralytics YOLO, añadiendo otra capa de funcionalidad y utilidad para la comunidad. + +Para iniciar tu contribución, por favor consulta nuestra [Guía de Contribución](https://docs.ultralytics.com/help/contributing) para obtener instrucciones completas sobre cómo enviar una Solicitud de Extracción (PR) 🛠️. ¡Estamos emocionados de ver lo que traes a la mesa! + +Juntos, vamos a mejorar las capacidades de seguimiento del ecosistema Ultralytics YOLO 🙏! diff --git a/docs/es/modes/train.md b/docs/es/modes/train.md new file mode 100644 index 0000000..6cb798a --- /dev/null +++ b/docs/es/modes/train.md @@ -0,0 +1,206 @@ +--- +comments: true +description: Guía paso a paso para entrenar modelos YOLOv8 con Ultralytics YOLO incluyendo ejemplos de entrenamiento con una sola GPU y múltiples GPUs +keywords: Ultralytics, YOLOv8, YOLO, detección de objetos, modo de entrenamiento, conjunto de datos personalizado, entrenamiento GPU, multi-GPU, hiperparámetros, ejemplos CLI, ejemplos Python +--- + +# Entrenamiento de Modelos con Ultralytics YOLO + +Ecosistema e integraciones de Ultralytics YOLO + +## Introducción + +Entrenar un modelo de aprendizaje profundo implica alimentarlo con datos y ajustar sus parámetros para que pueda hacer predicciones precisas. El modo de entrenamiento en Ultralytics YOLOv8 está diseñado para un entrenamiento efectivo y eficiente de modelos de detección de objetos, aprovechando al máximo las capacidades del hardware moderno. Esta guía tiene como objetivo cubrir todos los detalles que necesita para comenzar a entrenar sus propios modelos utilizando el robusto conjunto de características de YOLOv8. + +

+
+ +
+ Ver: Cómo Entrenar un modelo YOLOv8 en Tu Conjunto de Datos Personalizado en Google Colab. +

+ +## ¿Por Qué Elegir Ultralytics YOLO para Entrenamiento? + +Aquí hay algunas razones convincentes para optar por el modo Entrenamiento de YOLOv8: + +- **Eficiencia:** Aprovecha al máximo tu hardware, ya sea en una configuración de una sola GPU o escalando entre múltiples GPUs. +- **Versatilidad:** Entrena con conjuntos de datos personalizados además de los ya disponibles como COCO, VOC e ImageNet. +- **Amigable al Usuario:** Interfaces CLI y Python simples pero potentes para una experiencia de entrenamiento sencilla. +- **Flexibilidad de Hiperparámetros:** Una amplia gama de hiperparámetros personalizables para ajustar el rendimiento del modelo. + +### Características Clave del Modo Entrenamiento + +Las siguientes son algunas características notables del modo Entrenamiento de YOLOv8: + +- **Descarga Automática de Conjuntos de Datos:** Conjuntos de datos estándar como COCO, VOC e ImageNet se descargan automáticamente en el primer uso. +- **Soporte Multi-GPU:** Escala tus esfuerzos de entrenamiento sin problemas en múltiples GPUs para acelerar el proceso. +- **Configuración de Hiperparámetros:** La opción de modificar hiperparámetros a través de archivos de configuración YAML o argumentos CLI. +- **Visualización y Monitoreo:** Seguimiento en tiempo real de métricas de entrenamiento y visualización del proceso de aprendizaje para una mejor comprensión. + +!!! Tip "Consejo" + + * Los conjuntos de datos de YOLOv8 como COCO, VOC, ImageNet y muchos otros se descargan automáticamente en el primer uso, es decir, `yolo train data=coco.yaml` + +## Ejemplos de Uso + +Entrena YOLOv8n en el conjunto de datos COCO128 durante 100 épocas con un tamaño de imagen de 640. El dispositivo de entrenamiento se puede especificar usando el argumento `device`. Si no se pasa ningún argumento, se usará la GPU `device=0` si está disponible; de lo contrario, se usará `device=cpu`. Consulta la sección de Argumentos a continuación para una lista completa de argumentos de entrenamiento. + +!!! Example "Ejemplo de Entrenamiento con una sola GPU y CPU" + + El dispositivo se determina automáticamente. Si hay una GPU disponible, se usará; de lo contrario, el entrenamiento comenzará en la CPU. + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n.yaml') # construir un modelo nuevo desde YAML + model = YOLO('yolov8n.pt') # cargar un modelo preentrenado (recomendado para entrenamiento) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # construir desde YAML y transferir pesos + + # Entrenar el modelo + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Construir un modelo nuevo desde YAML y comenzar el entrenamiento desde cero + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Comenzar el entrenamiento desde un modelo preentrenado *.pt + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Construir un modelo nuevo desde YAML, transferir pesos preentrenados a él y comenzar el entrenamiento + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Entrenamiento Multi-GPU + +El entrenamiento Multi-GPU permite una utilización más eficiente de los recursos de hardware disponibles, distribuyendo la carga de entrenamiento en varias GPUs. Esta característica está disponible tanto a través de la API de Python como de la interfaz de línea de comandos. Para habilitar el entrenamiento Multi-GPU, especifica los IDs de los dispositivos GPU que deseas usar. + +!!! Example "Ejemplo de Entrenamiento Multi-GPU" + + Para entrenar con 2 GPUs, dispositivos CUDA 0 y 1, usa los siguientes comandos. Amplía a GPUs adicionales según sea necesario. + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n.pt') # cargar un modelo preentrenado (recomendado para entrenamiento) + + # Entrenar el modelo con 2 GPUs + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "CLI" + + ```bash + # Comenzar el entrenamiento desde un modelo preentrenado *.pt usando las GPUs 0 y 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### Entrenamiento con Apple M1 y M2 MPS + +Con el soporte para los chips Apple M1 y M2 integrados en los modelos Ultralytics YOLO, ahora es posible entrenar tus modelos en dispositivos que utilizan el potente marco de Metal Performance Shaders (MPS). El MPS ofrece una forma de alto rendimiento para ejecutar tareas de cálculo y procesamiento de imágenes en el silicio personalizado de Apple. + +Para habilitar el entrenamiento en chips Apple M1 y M2, debes especificar 'mps' como tu dispositivo al iniciar el proceso de entrenamiento. A continuación se muestra un ejemplo de cómo podrías hacer esto en Python y a través de la línea de comandos: + +!!! Example "Ejemplo de Entrenamiento MPS" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n.pt') # cargar un modelo preentrenado (recomendado para entrenamiento) + + # Entrenar el modelo con 2 GPUs + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "CLI" + + ```bash + # Comenzar el entrenamiento desde un modelo preentrenado *.pt usando las GPUs 0 y 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +Al aprovechar el poder computacional de los chips M1/M2, esto permite un procesamiento más eficiente de las tareas de entrenamiento. Para obtener una guía más detallada y opciones de configuración avanzadas, consulta la [documentación de PyTorch MPS](https://pytorch.org/docs/stable/notes/mps.html). + +## Registros (Logging) + +Al entrenar un modelo YOLOv8, puedes encontrar valioso llevar un registro del rendimiento del modelo con el tiempo. Aquí es donde entra en juego el registro. Ultralytics' YOLO ofrece soporte para tres tipos de registradores: Comet, ClearML y TensorBoard. + +Para usar un registrador, selecciónalo en el menú desplegable en el fragmento de código anterior y ejecútalo. El registrador elegido se instalará e inicializará. + +### Comet + +[Comet](https://www.comet.ml/site/) es una plataforma que permite a los científicos de datos y desarrolladores rastrear, comparar, explicar y optimizar experimentos y modelos. Ofrece funcionalidades como métricas en tiempo real, diferencias de código y seguimiento de hiperparámetros. + +Para usar Comet: + +!!! Example "Ejemplo" + + === "Python" + ```python + # pip install comet_ml + import comet_ml + + comet_ml.init() + ``` + +Recuerda iniciar sesión en tu cuenta de Comet en su sitio web y obtener tu clave API. Necesitarás agregar esto a tus variables de entorno o tu script para registrar tus experimentos. + +### ClearML + +[ClearML](https://www.clear.ml/) es una plataforma de código abierto que automatiza el seguimiento de experimentos y ayuda con la compartición eficiente de recursos. Está diseñado para ayudar a los equipos a gestionar, ejecutar y reproducir su trabajo de ML de manera más eficiente. + +Para usar ClearML: + +!!! Example "Ejemplo" + + === "Python" + ```python + # pip install clearml + import clearml + + clearml.browser_login() + ``` + +Después de ejecutar este script, necesitarás iniciar sesión en tu cuenta de ClearML en el navegador y autenticar tu sesión. + +### TensorBoard + +[TensorBoard](https://www.tensorflow.org/tensorboard) es una herramienta de visualización para TensorFlow. Te permite visualizar tu grafo TensorFlow, trazar métricas cuantitativas sobre la ejecución de tu grafo y mostrar datos adicionales como imágenes que lo atraviesan. + +Para usar TensorBoard en [Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb): + +!!! Example "Ejemplo" + + === "CLI" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # reemplazar con el directorio 'runs' + ``` + +Para usar TensorBoard localmente, ejecuta el siguiente comando y visualiza los resultados en http://localhost:6006/. + +!!! Example "Ejemplo" + + === "CLI" + ```bash + tensorboard --logdir ultralytics/runs # reemplazar con el directorio 'runs' + ``` + +Esto cargará TensorBoard y lo dirigirá al directorio donde se guardan tus registros de entrenamiento. + +Después de configurar tu registrador, puedes proceder con tu entrenamiento de modelo. Todas las métricas de entrenamiento se registrarán automáticamente en la plataforma elegida y podrás acceder a estos registros para monitorear el rendimiento de tu modelo con el tiempo, comparar diferentes modelos e identificar áreas de mejora. diff --git a/docs/es/modes/val.md b/docs/es/modes/val.md new file mode 100644 index 0000000..ee81404 --- /dev/null +++ b/docs/es/modes/val.md @@ -0,0 +1,86 @@ +--- +comments: true +description: Guía para validar modelos YOLOv8. Aprenda a evaluar el rendimiento de sus modelos YOLO utilizando configuraciones y métricas de validación con ejemplos en Python y CLI. +keywords: Ultralytics, Documentación YOLO, YOLOv8, validación, evaluación de modelos, hiperparámetros, precisión, métricas, Python, CLI +--- + +# Validación de modelos con Ultralytics YOLO + +Ecosistema e integraciones de Ultralytics YOLO + +## Introducción + +La validación es un paso crítico en el flujo de trabajo de aprendizaje automático, permitiéndole evaluar la calidad de sus modelos entrenados. El modo Val en Ultralytics YOLOv8 proporciona un robusto conjunto de herramientas y métricas para evaluar el rendimiento de sus modelos de detección de objetos. Esta guía sirve como un recurso completo para comprender cómo utilizar efectivamente el modo Val para asegurar que sus modelos sean precisos y confiables. + +## ¿Por qué validar con Ultralytics YOLO? + +Estas son las ventajas de usar el modo Val de YOLOv8: + +- **Precisión:** Obtenga métricas precisas como mAP50, mAP75 y mAP50-95 para evaluar de manera integral su modelo. +- **Comodidad:** Utilice funciones integradas que recuerdan los ajustes de entrenamiento, simplificando el proceso de validación. +- **Flexibilidad:** Valide su modelo con el mismo conjunto de datos o diferentes conjuntos de datos y tamaños de imagen. +- **Ajuste de Hiperparámetros:** Use las métricas de validación para ajustar su modelo y mejorar el rendimiento. + +### Características principales del modo Val + +Estas son las funcionalidades notables ofrecidas por el modo Val de YOLOv8: + +- **Configuraciones Automatizadas:** Los modelos recuerdan sus configuraciones de entrenamiento para una validación sencilla. +- **Soporte de Múltiples Métricas:** Evalúe su modelo basado en una gama de métricas de precisión. +- **CLI y API de Python:** Elija entre la interfaz de línea de comandos o API de Python basada en su preferencia para validación. +- **Compatibilidad de Datos:** Funciona sin problemas con conjuntos de datos utilizados durante la fase de entrenamiento así como con conjuntos de datos personalizados. + +!!! Tip "Consejo" + + * Los modelos YOLOv8 recuerdan automáticamente sus ajustes de entrenamiento, así que puede validar un modelo en el mismo tamaño de imagen y en el conjunto de datos original fácilmente con solo `yolo val model=yolov8n.pt` o `model('yolov8n.pt').val()` + +## Ejemplos de Uso + +Valide la precisión del modelo YOLOv8n entrenado en el conjunto de datos COCO128. No es necesario pasar ningún argumento ya que el `modelo` retiene sus `datos` de entrenamiento y argumentos como atributos del modelo. Vea la sección de Argumentos a continuación para una lista completa de argumentos de exportación. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n.pt') # cargar un modelo oficial + model = YOLO('ruta/a/best.pt') # cargar un modelo personalizado + + # Validar el modelo + metrics = model.val() # no se necesitan argumentos, el conjunto de datos y ajustes se recuerdan + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # una lista que contiene map50-95 de cada categoría + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # val model oficial + yolo detect val model=ruta/a/best.pt # val model personalizado + ``` + +## Argumentos + +Los ajustes de validación para modelos YOLO se refieren a los diversos hiperparámetros y configuraciones utilizados para evaluar el rendimiento del modelo en un conjunto de datos de validación. Estos ajustes pueden afectar el rendimiento, la velocidad y la precisión del modelo. Algunos ajustes comunes de validación YOLO incluyen el tamaño del lote, la frecuencia con la que se realiza la validación durante el entrenamiento y las métricas utilizadas para evaluar el rendimiento del modelo. Otros factores que pueden afectar el proceso de validación incluyen el tamaño y la composición del conjunto de datos de validación y la tarea específica para la que se utiliza el modelo. Es importante ajustar y experimentar cuidadosamente con estos ajustes para asegurarse de que el modelo esté funcionando bien en el conjunto de datos de validación y para detectar y prevenir el sobreajuste. + +| Clave | Valor | Descripción | +|---------------|---------|---------------------------------------------------------------------------------------------------| +| `data` | `None` | ruta al archivo de datos, por ejemplo coco128.yaml | +| `imgsz` | `640` | tamaño de las imágenes de entrada como entero | +| `batch` | `16` | número de imágenes por lote (-1 para AutoBatch) | +| `save_json` | `False` | guardar resultados en archivo JSON | +| `save_hybrid` | `False` | guardar versión híbrida de las etiquetas (etiquetas + predicciones adicionales) | +| `conf` | `0.001` | umbral de confianza del objeto para detección | +| `iou` | `0.6` | umbral de Intersección sobre Unión (IoU) para NMS | +| `max_det` | `300` | número máximo de detecciones por imagen | +| `half` | `True` | usar precisión de punto flotante de media preción (FP16) | +| `device` | `None` | dispositivo en el que se ejecuta, por ejemplo dispositivo cuda=0/1/2/3 o dispositivo=cpu | +| `dnn` | `False` | utilizar OpenCV DNN para inferencia ONNX | +| `plots` | `False` | mostrar gráficos durante el entrenamiento | +| `rect` | `False` | val rectangular con cada lote compilado para el mínimo relleno | +| `split` | `val` | división del conjunto de datos a utilizar para la validación, por ejemplo 'val', 'test' o 'train' | +| diff --git a/docs/es/quickstart.md b/docs/es/quickstart.md new file mode 100644 index 0000000..bc7b5e3 --- /dev/null +++ b/docs/es/quickstart.md @@ -0,0 +1,198 @@ +--- +comments: true +description: Explore diversos métodos para instalar Ultralytics usando pip, conda, git y Docker. Aprende cómo usar Ultralytics con la interfaz de línea de comandos o dentro de tus proyectos de Python. +keywords: instalación de Ultralytics, pip install Ultralytics, instalación de Docker Ultralytics, interfaz de línea de comandos de Ultralytics, interfaz de Python de Ultralytics +--- + +## Instalar Ultralytics + +Ultralytics ofrece varios métodos de instalación incluyendo pip, conda y Docker. Instala YOLOv8 a través del paquete `ultralytics` de pip para la última versión estable o clonando el [repositorio de GitHub de Ultralytics](https://github.com/ultralytics/ultralytics) para obtener la versión más actualizada. Docker se puede utilizar para ejecutar el paquete en un contenedor aislado, evitando la instalación local. + +!!! Example "Instalar" + + === "Instalación con Pip (recomendado)" + Instala el paquete `ultralytics` usando pip o actualiza una instalación existente ejecutando `pip install -U ultralytics`. Visita el Índice de Paquetes de Python (PyPI) para más detalles sobre el paquete `ultralytics`: [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/). + + [![Versión en PyPI](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Descargas](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # Instalar el paquete ultralytics desde PyPI + pip install ultralytics + ``` + + También puedes instalar el paquete `ultralytics` directamente del [repositorio](https://github.com/ultralytics/ultralytics) en GitHub. Esto puede ser útil si quieres la última versión de desarrollo. Asegúrate de tener la herramienta de línea de comandos Git instalada en tu sistema. El comando `@main` instala la rama `main` y puede modificarse a otra rama, es decir, `@my-branch`, o eliminarse por completo para volver por defecto a la rama `main`. + + ```bash + # Instalar el paquete ultralytics desde GitHub + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + + === "Instalación con Conda" + Conda es un gestor de paquetes alternativo a pip que también puede utilizarse para la instalación. Visita Anaconda para más detalles en [https://anaconda.org/conda-forge/ultralytics](https://anaconda.org/conda-forge/ultralytics). El repositorio de paquetes de alimentación de Ultralytics para actualizar el paquete de conda está en [https://github.com/conda-forge/ultralytics-feedstock/](https://github.com/conda-forge/ultralytics-feedstock/). + + + [![Receta de Conda](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Descargas de Conda](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Versión de Conda](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Plataformas de Conda](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # Instalar el paquete ultralytics usando conda + conda install -c conda-forge ultralytics + ``` + + !!! Note "Nota" + + Si estás instalando en un entorno CUDA, la mejor práctica es instalar `ultralytics`, `pytorch` y `pytorch-cuda` en el mismo comando para permitir que el gestor de paquetes de conda resuelva cualquier conflicto, o en su defecto instalar `pytorch-cuda` al final para permitir que sobrescriba el paquete específico de CPU `pytorch` si es necesario. + ```bash + # Instalar todos los paquetes juntos usando conda + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### Imagen Docker de Conda + + Las imágenes Docker de Conda de Ultralytics también están disponibles en [DockerHub](https://hub.docker.com/r/ultralytics/ultralytics). Estas imágenes están basadas en [Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/) y son una manera simple de comenzar a usar `ultralytics` en un entorno Conda. + + ```bash + # Establecer el nombre de la imagen como una variable + t=ultralytics/ultralytics:latest-conda + + # Descargar la última imagen de ultralytics de Docker Hub + sudo docker pull $t + + # Ejecutar la imagen de ultralytics en un contenedor con soporte para GPU + sudo docker run -it --ipc=host --gpus all $t # todas las GPUs + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # especificar GPUs + ``` + + === "Clonar con Git" + Clona el repositorio `ultralytics` si estás interesado en contribuir al desarrollo o deseas experimentar con el código fuente más reciente. Después de clonar, navega al directorio e instala el paquete en modo editable `-e` usando pip. + ```bash + # Clonar el repositorio ultralytics + git clone https://github.com/ultralytics/ultralytics + + # Navegar al directorio clonado + cd ultralytics + + # Instalar el paquete en modo editable para desarrollo + pip install -e . + ``` + +Consulta el archivo [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) de `ultralytics` para ver una lista de dependencias. Ten en cuenta que todos los ejemplos anteriores instalan todas las dependencias requeridas. + +

+
+ +
+ Watch: Ultralytics YOLO Quick Start Guide +

+ +!!! Tip "Consejo" + + Los requisitos de PyTorch varían según el sistema operativo y los requisitos de CUDA, por lo que se recomienda instalar primero PyTorch siguiendo las instrucciones en [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally). + + + Instrucciones de Instalación de PyTorch + + +## Usar Ultralytics con CLI + +La interfaz de línea de comandos (CLI) de Ultralytics permite el uso de comandos simples de una sola línea sin la necesidad de un entorno de Python. La CLI no requiere personalización ni código Python. Puedes simplemente ejecutar todas las tareas desde el terminal con el comando `yolo`. Consulta la [Guía de CLI](/../usage/cli.md) para aprender más sobre el uso de YOLOv8 desde la línea de comandos. + +!!! Example "Ejemplo" + + === "Sintaxis" + + Los comandos `yolo` de Ultralytics usan la siguiente sintaxis: + ```bash + yolo TAREA MODO ARGUMENTOS + + Donde TAREA (opcional) es uno de [detectar, segmentar, clasificar] + MODO (requerido) es uno de [train, val, predict, export, track] + ARGUMENTOS (opcionales) son cualquier número de pares personalizados 'arg=valor' como 'imgsz=320' que sobrescriben los valores por defecto. + ``` + Ver todos los ARGUMENTOS en la guía completa [Configuration Guide](/../usage/cfg.md) o con `yolo cfg` + + === "Entrenar" + + Entrenar un modelo de detección durante 10 épocas con una tasa de aprendizaje inicial de 0.01 + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "Predecir" + + Predecir un video de YouTube usando un modelo de segmentación preentrenado con un tamaño de imagen de 320: + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "Validar" + + Validar un modelo de detección preentrenado con un tamaño de lote de 1 y un tamaño de imagen de 640: + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "Exportar" + + Exportar un modelo de clasificación YOLOv8n a formato ONNX con un tamaño de imagen de 224 por 128 (no se requiere TAREA) + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "Especial" + + Ejecutar comandos especiales para ver la versión, ver configuraciones, ejecutar chequeos y más: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "Advertencia" + + Los argumentos deben pasarse como pares `arg=valor`, separados por un signo igual `=` y delimitados por espacios ` ` entre pares. No utilices prefijos de argumentos `--` ni comas `,` entre los argumentos. + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25`   ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25`   ❌ + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25`   ❌ + +[Guía de CLI](/../usage/cli.md){.md-button .md-button--primary} + +## Usar Ultralytics con Python + +La interfaz de Python de YOLOv8 permite una integración perfecta en tus proyectos de Python, facilitando la carga, ejecución y procesamiento de la salida del modelo. Diseñada con sencillez y facilidad de uso en mente, la interfaz de Python permite a los usuarios implementar rápidamente la detección de objetos, segmentación y clasificación en sus proyectos. Esto hace que la interfaz de Python de YOLOv8 sea una herramienta invaluable para cualquier persona que busque incorporar estas funcionalidades en sus proyectos de Python. + +Por ejemplo, los usuarios pueden cargar un modelo, entrenarlo, evaluar su rendimiento en un conjunto de validación e incluso exportarlo al formato ONNX con solo unas pocas líneas de código. Consulta la [Guía de Python](/../usage/python.md) para aprender más sobre el uso de YOLOv8 dentro de tus proyectos de Python. + +!!! Example "Ejemplo" + + ```python + from ultralytics import YOLO + + # Crear un nuevo modelo YOLO desde cero + model = YOLO('yolov8n.yaml') + + # Cargar un modelo YOLO preentrenado (recomendado para entrenamiento) + model = YOLO('yolov8n.pt') + + # Entrenar el modelo usando el conjunto de datos 'coco128.yaml' durante 3 épocas + results = model.train(data='coco128.yaml', epochs=3) + + # Evaluar el rendimiento del modelo en el conjunto de validación + results = model.val() + + # Realizar detección de objetos en una imagen usando el modelo + results = model('https://ultralytics.com/images/bus.jpg') + + # Exportar el modelo al formato ONNX + success = model.export(format='onnx') + ``` + +[Guía de Python](/../usage/python.md){.md-button .md-button--primary} diff --git a/docs/es/tasks/classify.md b/docs/es/tasks/classify.md new file mode 100644 index 0000000..b9b45ff --- /dev/null +++ b/docs/es/tasks/classify.md @@ -0,0 +1,172 @@ +--- +comments: true +description: Aprenda sobre los modelos de clasificación de imágenes YOLOv8 Classify. Obtenga información detallada sobre la Lista de Modelos Preentrenados y cómo Entrenar, Validar, Predecir y Exportar modelos. +keywords: Ultralytics, YOLOv8, Clasificación de imágenes, Modelos preentrenados, YOLOv8n-cls, Entrenamiento, Validación, Predicción, Exportación de modelos +--- + +# Clasificación de Imágenes + +Ejemplos de clasificación de imágenes + +La clasificación de imágenes es la tarea más sencilla de las tres y consiste en clasificar una imagen completa en una de un conjunto de clases predefinidas. + +La salida de un clasificador de imágenes es una única etiqueta de clase y una puntuación de confianza. La clasificación de imágenes es útil cuando solo necesita saber a qué clase pertenece una imagen y no necesita conocer dónde están ubicados los objetos de esa clase o cuál es su forma exacta. + +!!! Tip "Consejo" + + Los modelos YOLOv8 Classify utilizan el sufijo `-cls`, por ejemplo, `yolov8n-cls.pt` y están preentrenados en [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +## [Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Los modelos Classify preentrenados YOLOv8 se muestran aquí. Los modelos Detect, Segment y Pose están preentrenados en el conjunto de datos [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), mientras que los modelos Classify están preentrenados en el conjunto de datos [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +Los [modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) se descargan automáticamente desde el último [lanzamiento](https://github.com/ultralytics/assets/releases) de Ultralytics en el primer uso. + +| Modelo | Tamaño
(píxeles) | Exactitud
top1 | Exactitud
top5 | Velocidad
CPU ONNX
(ms) | Velocidad
A100 TensorRT
(ms) | Parámetros
(M) | FLOPs
(B) en 640 | +|----------------------------------------------------------------------------------------------|--------------------------|------------------------|------------------------|------------------------------------|-----------------------------------------|------------------------|--------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- Los valores de **Exactitud** son las precisiones de los modelos en el conjunto de datos de validación de [ImageNet](https://www.image-net.org/). +
Para reproducir usar `yolo val classify data=path/to/ImageNet device=0` +- **Velocidad** promediada sobre imágenes de validación de ImageNet usando una instancia de [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) +
Para reproducir usar `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` + +## Entrenamiento + +Entrena el modelo YOLOv8n-cls en el conjunto de datos MNIST160 durante 100 épocas con un tamaño de imagen de 64. Para obtener una lista completa de argumentos disponibles, consulte la página de [Configuración](/../usage/cfg.md). + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-cls.yaml') # construir un nuevo modelo desde YAML + model = YOLO('yolov8n-cls.pt') # cargar un modelo preentrenado (recomendado para entrenamiento) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # construir desde YAML y transferir pesos + + # Entrenar el modelo + results = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # Construir un nuevo modelo desde YAML y empezar entrenamiento desde cero + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # Empezar entrenamiento desde un modelo *.pt preentrenado + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # Construir un nuevo modelo desde YAML, transferir pesos preentrenados e iniciar entrenamiento + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### Formato del conjunto de datos + +El formato del conjunto de datos de clasificación YOLO puede encontrarse en detalle en la [Guía de Conjuntos de Datos](../../../datasets/classify/index.md). + +## Validación + +Validar la exactitud del modelo YOLOv8n-cls entrenado en el conjunto de datos MNIST160. No es necesario pasar ningún argumento ya que el `modelo` retiene su `data` y argumentos como atributos del modelo. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-cls.pt') # cargar un modelo oficial + model = YOLO('path/to/best.pt') # cargar un modelo personalizado + + # Validar el modelo + metrics = model.val() # no se necesitan argumentos, el conjunto de datos y configuraciones se recuerdan + metrics.top1 # precisión top1 + metrics.top5 # precisión top5 + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # validar modelo oficial + yolo classify val model=path/to/best.pt # validar modelo personalizado + ``` + +## Predicción + +Usar un modelo YOLOv8n-cls entrenado para realizar predicciones en imágenes. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-cls.pt') # cargar un modelo oficial + model = YOLO('path/to/best.pt') # cargar un modelo personalizado + + # Predecir con el modelo + results = model('https://ultralytics.com/images/bus.jpg') # predecir en una imagen + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # predecir con modelo oficial + yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predecir con modelo personalizado + ``` + +Ver detalles completos del modo `predict` en la página de [Predicción](https://docs.ultralytics.com/modes/predict/). + +## Exportación + +Exportar un modelo YOLOv8n-cls a un formato diferente como ONNX, CoreML, etc. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-cls.pt') # cargar un modelo oficial + model = YOLO('path/to/best.pt') # cargar un modelo entrenado personalizado + + # Exportar el modelo + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # exportar modelo oficial + yolo export model=path/to/best.pt format=onnx # exportar modelo entrenado personalizado + ``` + +Los formatos de exportación disponibles para YOLOv8-cls se encuentran en la tabla a continuación. Puede predecir o validar directamente en modelos exportados, por ejemplo, `yolo predict model=yolov8n-cls.onnx`. Ejemplos de uso se muestran para su modelo después de que se completa la exportación. + +| Formato | Argumento `format` | Modelo | Metadatos | Argumentos | +|--------------------------------------------------------------------|--------------------|-------------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +Vea detalles completos de `exportación` en la página de [Exportación](https://docs.ultralytics.com/modes/export/). diff --git a/docs/es/tasks/detect.md b/docs/es/tasks/detect.md new file mode 100644 index 0000000..7779c78 --- /dev/null +++ b/docs/es/tasks/detect.md @@ -0,0 +1,184 @@ +--- +comments: true +description: Documentación oficial de YOLOv8 de Ultralytics. Aprende a entrenar, validar, predecir y exportar modelos en varios formatos. Incluyendo estadísticas detalladas de rendimiento. +keywords: YOLOv8, Ultralytics, detección de objetos, modelos preentrenados, entrenamiento, validación, predicción, exportación de modelos, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# Detección de Objetos + +Ejemplos de detección de objetos + +La detección de objetos es una tarea que implica identificar la ubicación y clase de objetos en una imagen o flujo de video. + +La salida de un detector de objetos es un conjunto de cajas delimitadoras que encierran a los objetos en la imagen, junto con etiquetas de clase y puntajes de confianza para cada caja. La detección de objetos es una buena opción cuando necesitas identificar objetos de interés en una escena, pero no necesitas saber exactamente dónde se encuentra el objeto o su forma exacta. + +

+
+ +
+ Ver: Detección de Objetos con Modelo Preentrenado YOLOv8 de Ultralytics. +

+ +!!! Tip "Consejo" + + Los modelos YOLOv8 Detect son los modelos predeterminados de YOLOv8, es decir, `yolov8n.pt` y están preentrenados en [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Los modelos preentrenados de YOLOv8 Detect se muestran aquí. Los modelos de Detect, Segment y Pose están preentrenados en el conjunto de datos [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), mientras que los modelos de Classify están preentrenados en el conjunto de datos [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +Los [modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) se descargan automáticamente desde el último lanzamiento de Ultralytics [release](https://github.com/ultralytics/assets/releases) en el primer uso. + +| Modelo | tamaño
(píxeles) | mAPval
50-95 | Velocidad
CPU ONNX
(ms) | Velocidad
A100 TensorRT
(ms) | parámetros
(M) | FLOPs
(B) | +|--------------------------------------------------------------------------------------|--------------------------|----------------------|------------------------------------|-----------------------------------------|------------------------|-------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- Los valores de **mAPval** son para un solo modelo a una sola escala en el conjunto de datos [COCO val2017](http://cocodataset.org). +
Reproduce utilizando `yolo val detect data=coco.yaml device=0` +- La **Velocidad** es el promedio sobre las imágenes de COCO val utilizando una instancia [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
Reproduce utilizando `yolo val detect data=coco128.yaml batch=1 device=0|cpu` + +## Entrenamiento + +Entrena a YOLOv8n en el conjunto de datos COCO128 durante 100 épocas a tamaño de imagen 640. Para una lista completa de argumentos disponibles, consulta la página [Configuración](/../usage/cfg.md). + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n.yaml') # construye un nuevo modelo desde YAML + model = YOLO('yolov8n.pt') # carga un modelo preentrenado (recomendado para entrenamiento) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # construye desde YAML y transfiere los pesos + + # Entrenar el modelo + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Construir un nuevo modelo desde YAML y comenzar entrenamiento desde cero + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Comenzar entrenamiento desde un modelo *.pt preentrenado + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Construir un nuevo modelo desde YAML, transferir pesos preentrenados y comenzar entrenamiento + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Formato del conjunto de datos + +El formato del conjunto de datos de detección de YOLO se puede encontrar en detalle en la [Guía de Conjuntos de Datos](../../../datasets/detect/index.md). Para convertir tu conjunto de datos existente desde otros formatos (como COCO, etc.) al formato YOLO, por favor usa la herramienta [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) de Ultralytics. + +## Validación + +Valida la precisión del modelo YOLOv8n entrenado en el conjunto de datos COCO128. No es necesario pasar ningún argumento, ya que el `modelo` retiene sus datos de `entrenamiento` y argumentos como atributos del modelo. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n.pt') # cargar un modelo oficial + model = YOLO('ruta/a/mejor.pt') # cargar un modelo personalizado + + # Validar el modelo + metrics = model.val() # sin argumentos necesarios, el conjunto de datos y configuraciones se recuerdan + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # una lista contiene map50-95 de cada categoría + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # validar modelo oficial + yolo detect val model=ruta/a/mejor.pt # validar modelo personalizado + ``` + +## Predicción + +Utiliza un modelo YOLOv8n entrenado para realizar predicciones en imágenes. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n.pt') # cargar un modelo oficial + model = YOLO('ruta/a/mejor.pt') # cargar un modelo personalizado + + # Predecir con el modelo + results = model('https://ultralytics.com/images/bus.jpg') # predecir en una imagen + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # predecir con modelo oficial + yolo detect predict model=ruta/a/mejor.pt source='https://ultralytics.com/images/bus.jpg' # predecir con modelo personalizado + ``` + +Consulta los detalles completos del modo `predict` en la página [Predicción](https://docs.ultralytics.com/modes/predict/). + +## Exportación + +Exporta un modelo YOLOv8n a un formato diferente como ONNX, CoreML, etc. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n.pt') # cargar un modelo oficial + model = YOLO('ruta/a/mejor.pt') # cargar un modelo entrenado personalizado + + # Exportar el modelo + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # exportar modelo oficial + yolo export model=ruta/a/mejor.pt format=onnx # exportar modelo entrenado personalizado + ``` + +Los formatos de exportación de YOLOv8 disponibles se encuentran en la tabla a continuación. Puedes predecir o validar directamente en modelos exportados, es decir, `yolo predict model=yolov8n.onnx`. Ejemplos de uso se muestran para tu modelo después de que la exportación se completa. + +| Formato | Argumento `format` | Modelo | Metadata | Argumentos | +|--------------------------------------------------------------------|--------------------|----------------------------|----------|------------------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimizar` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `mitad`, `dinámico`, `simplificar`, `conjunto de operaciones` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `modelo_yolov8n_openvino/` | ✅ | `imgsz`, `mitad` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `mitad`, `dinámico`, `simplificar`, `espacio de trabajo` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `mitad`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `modelo_guardado_yolov8n/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `mitad`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `modelo_web_yolov8n/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `modelo_yolov8n_paddle/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `modelo_ncnn_yolov8n/` | ✅ | `imgsz`, `mitad` | + +Consulta los detalles completos de la `exportación` en la página [Exportar](https://docs.ultralytics.com/modes/export/). diff --git a/docs/es/tasks/index.md b/docs/es/tasks/index.md new file mode 100644 index 0000000..04d1b5a --- /dev/null +++ b/docs/es/tasks/index.md @@ -0,0 +1,55 @@ +--- +comments: true +description: Aprenda sobre las tareas fundamentales de visión por computadora que YOLOv8 puede realizar, incluyendo detección, segmentación, clasificación y estimación de pose. Comprenda sus usos en sus proyectos de IA. +keywords: Ultralytics, YOLOv8, Detección, Segmentación, Clasificación, Estimación de Pose, Marco de IA, Tareas de Visión por Computadora +--- + +# Tareas de Ultralytics YOLOv8 + +
+Tareas soportadas por Ultralytics YOLO + +YOLOv8 es un marco de trabajo de IA que soporta múltiples **tareas** de visión por computadora. El marco puede usarse para realizar [detección](detect.md), [segmentación](segment.md), [clasificación](classify.md) y estimación de [pose](pose.md). Cada una de estas tareas tiene un objetivo y caso de uso diferente. + +!!! Note "Nota" + + 🚧 Nuestra documentación multilenguaje está actualmente en construcción y estamos trabajando arduamente para mejorarla. ¡Gracias por su paciencia! 🙏 + +

+
+ +
+ Mire: Explore las Tareas de Ultralytics YOLO: Detección de Objetos, Segmentación, Seguimiento y Estimación de Pose. +

+ +## [Detección](detect.md) + +La detección es la tarea principal soportada por YOLOv8. Implica detectar objetos en una imagen o cuadro de video y dibujar cuadros delimitadores alrededor de ellos. Los objetos detectados se clasifican en diferentes categorías basadas en sus características. YOLOv8 puede detectar múltiples objetos en una sola imagen o cuadro de video con alta precisión y velocidad. + +[Ejemplos de Detección](detect.md){ .md-button } + +## [Segmentación](segment.md) + +La segmentación es una tarea que implica segmentar una imagen en diferentes regiones basadas en el contenido de la imagen. A cada región se le asigna una etiqueta basada en su contenido. Esta tarea es útil en aplicaciones tales como segmentación de imágenes y imágenes médicas. YOLOv8 utiliza una variante de la arquitectura U-Net para realizar la segmentación. + +[Ejemplos de Segmentación](segment.md){ .md-button } + +## [Clasificación](classify.md) + +La clasificación es una tarea que implica clasificar una imagen en diferentes categorías. YOLOv8 puede usarse para clasificar imágenes basadas en su contenido. Utiliza una variante de la arquitectura EfficientNet para realizar la clasificación. + +[Ejemplos de Clasificación](classify.md){ .md-button } + +## [Pose](pose.md) + +La detección de pose/puntos clave es una tarea que implica detectar puntos específicos en una imagen o cuadro de video. Estos puntos se conocen como puntos clave y se utilizan para rastrear el movimiento o la estimación de la pose. YOLOv8 puede detectar puntos clave en una imagen o cuadro de video con alta precisión y velocidad. + +[Ejemplos de Pose](pose.md){ .md-button } + +## Conclusión + +YOLOv8 soporta múltiples tareas, incluyendo detección, segmentación, clasificación y detección de puntos clave. Cada una de estas tareas tiene diferentes objetivos y casos de uso. Al entender las diferencias entre estas tareas, puede elegir la tarea adecuada para su aplicación de visión por computadora. diff --git a/docs/es/tasks/pose.md b/docs/es/tasks/pose.md new file mode 100644 index 0000000..5374f5f --- /dev/null +++ b/docs/es/tasks/pose.md @@ -0,0 +1,185 @@ +--- +comments: true +description: Aprende a utilizar Ultralytics YOLOv8 para tareas de estimación de pose. Encuentra modelos preentrenados, aprende a entrenar, validar, predecir y exportar tus propios modelos. +keywords: Ultralytics, YOLO, YOLOv8, estimación de pose, detección de puntos clave, detección de objetos, modelos preentrenados, aprendizaje automático, inteligencia artificial +--- + +# Estimación de Pose + +Ejemplos de estimación de pose + +La estimación de pose es una tarea que implica identificar la ubicación de puntos específicos en una imagen, comúnmente referidos como puntos clave. Estos puntos clave pueden representar varias partes del objeto, como articulaciones, puntos de referencia u otras características distintivas. La ubicación de los puntos clave generalmente se representa como un conjunto de coordenadas 2D `[x, y]` o 3D `[x, y, visible]`. + +La salida de un modelo de estimación de pose es un conjunto de puntos que representan los puntos clave en un objeto de la imagen, generalmente junto con las puntuaciones de confianza para cada punto. La estimación de pose es una buena opción cuando se necesita identificar partes específicas de un objeto en una escena y su ubicación relativa entre ellas. + +

+
+ +
+ Ver: Estimación de Pose con Ultralytics YOLOv8. +

+ +!!! Tip "Consejo" + + Los modelos _pose_ YOLOv8 utilizan el sufijo `-pose`, por ejemplo, `yolov8n-pose.pt`. Estos modelos están entrenados en el conjunto de datos [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) y son adecuados para una variedad de tareas de estimación de pose. + +## [Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Aquí se muestran los modelos preentrenados de YOLOv8 Pose. Los modelos Detect, Segment y Pose están preentrenados en el conjunto de datos [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), mientras que los modelos Classify están preentrenados en el conjunto de datos [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +Los [modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) se descargan automáticamente desde el último lanzamiento de Ultralytics [release](https://github.com/ultralytics/assets/releases) en el primer uso. + +| Modelo | tamaño
(píxeles) | mAPpose
50-95 | mAPpose
50 | Velocidad
CPU ONNX
(ms) | Velocidad
A100 TensorRT
(ms) | parámetros
(M) | FLOPs
(B) | +|------------------------------------------------------------------------------------------------------|--------------------------|-----------------------|--------------------|------------------------------------|-----------------------------------------|------------------------|-------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +- Los valores de **mAPval** son para un solo modelo a una sola escala en el conjunto de datos [COCO Keypoints val2017](http://cocodataset.org). +
Reproducir con `yolo val pose data=coco-pose.yaml device=0` +- **Velocidad** promediada sobre imágenes COCO val usando una instancia [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
Reproducir con `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` + +## Entrenar + +Entrena un modelo YOLOv8-pose en el conjunto de datos COCO128-pose. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-pose.yaml') # construir un nuevo modelo desde YAML + model = YOLO('yolov8n-pose.pt') # cargar un modelo preentrenado (recomendado para entrenar) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # construir desde YAML y transferir los pesos + + # Entrenar el modelo + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Construir un nuevo modelo desde YAML y comenzar entrenamiento desde cero + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # Empezar entrenamiento desde un modelo *.pt preentrenado + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # Construir un nuevo modelo desde YAML, transferir pesos preentrenados y comenzar entrenamiento + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### Formato del conjunto de datos + +El formato del conjunto de datos de pose de YOLO se puede encontrar en detalle en la [Guía de Conjuntos de Datos](../../../datasets/pose/index.md). Para convertir tu conjunto de datos existente de otros formatos (como COCO, etc.) al formato de YOLO, usa la herramienta [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) de Ultralytics. + +## Validar + +Valida la precisión del modelo YOLOv8n-pose entrenado en el conjunto de datos COCO128-pose. No es necesario pasar ningún argumento ya que el `modelo` mantiene sus `datos` de entrenamiento y argumentos como atributos del modelo. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-pose.pt') # cargar un modelo oficial + model = YOLO('path/to/best.pt') # cargar un modelo personalizado + + # Validar el modelo + metrics = model.val() # no se necesitan argumentos, el conjunto de datos y configuraciones se recuerdan + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # una lista contiene map50-95 de cada categoría + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # modelo oficial de val + yolo pose val model=path/to/best.pt # modelo personalizado de val + ``` + +## Predecir + +Usa un modelo YOLOv8n-pose entrenado para realizar predicciones en imágenes. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-pose.pt') # cargar un modelo oficial + model = YOLO('path/to/best.pt') # cargar un modelo personalizado + + # Predecir con el modelo + results = model('https://ultralytics.com/images/bus.jpg') # predecir en una imagen + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # predecir con modelo oficial + yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predecir con modelo personalizado + ``` + +Consulta los detalles completos del modo `predict` en la página de [Predicción](https://docs.ultralytics.com/modes/predict/). + +## Exportar + +Exporta un modelo YOLOv8n Pose a un formato diferente como ONNX, CoreML, etc. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-pose.pt') # cargar un modelo oficial + model = YOLO('path/to/best.pt') # cargar un modelo entrenado personalizado + + # Exportar el modelo + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # exportar modelo oficial + yolo export model=path/to/best.pt format=onnx # exportar modelo entrenado personalizado + ``` + +Los formatos de exportación de YOLOv8-pose disponibles se muestran en la tabla a continuación. Puedes predecir o validar directamente en modelos exportados, por ejemplo, `yolo predict model=yolov8n-pose.onnx`. Los ejemplos de uso se muestran para tu modelo después de que la exportación se completa. + +| Formato | Argumento `format` | Modelo | Metadatos | Argumentos | +|--------------------------------------------------------------------|--------------------|--------------------------------|-----------|---------------------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dinámico`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dinámico`, `simplify`, `espacio de trabajo` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half` | + +Consulta los detalles completos del modo `export` en la página de [Exportación](https://docs.ultralytics.com/modes/export/). diff --git a/docs/es/tasks/segment.md b/docs/es/tasks/segment.md new file mode 100644 index 0000000..24de163 --- /dev/null +++ b/docs/es/tasks/segment.md @@ -0,0 +1,188 @@ +--- +comments: true +description: Aprende a utilizar modelos de segmentación de instancias con Ultralytics YOLO. Instrucciones sobre entrenamiento, validación, predicción de imágenes y exportación de modelos. +keywords: yolov8, segmentación de instancias, Ultralytics, conjunto de datos COCO, segmentación de imágenes, detección de objetos, entrenamiento de modelos, validación de modelos, predicción de imágenes, exportación de modelos. +--- + +# Segmentación de Instancias + +Ejemplos de segmentación de instancias + +La segmentación de instancias va un paso más allá de la detección de objetos e implica identificar objetos individuales en una imagen y segmentarlos del resto de la imagen. + +La salida de un modelo de segmentación de instancias es un conjunto de máscaras o contornos que delimitan cada objeto en la imagen, junto con etiquetas de clase y puntajes de confianza para cada objeto. La segmentación de instancias es útil cuando necesitas saber no solo dónde están los objetos en una imagen, sino también cuál es su forma exacta. + +

+
+ +
+ Mira: Ejecuta la Segmentación con el Modelo Ultralytics YOLOv8 Preentrenado en Python. +

+ +!!! Tip "Consejo" + + Los modelos YOLOv8 Segment utilizan el sufijo `-seg`, es decir, `yolov8n-seg.pt` y están preentrenados en el [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Aquí se muestran los modelos Segment preentrenados YOLOv8. Los modelos Detect, Segment y Pose están preentrenados en el conjunto de datos [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), mientras que los modelos Classify están preentrenados en el conjunto de datos [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +Los [Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) se descargan automáticamente desde el último lanzamiento de Ultralytics [release](https://github.com/ultralytics/assets/releases) en su primer uso. + +| Modelo | Tamaño
(píxeles) | mAPcaja
50-95 | mAPmáscara
50-95 | Velocidad
CPU ONNX
(ms) | Velocidad
A100 TensorRT
(ms) | Parámetros
(M) | FLOPs
(B) | +|----------------------------------------------------------------------------------------------|--------------------------|-----------------------|--------------------------|------------------------------------|-----------------------------------------|------------------------|-------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- Los valores **mAPval** son para un único modelo a una única escala en el conjunto de datos [COCO val2017](http://cocodataset.org). +
Reproducir utilizando `yolo val segment data=coco.yaml device=0` +- La **Velocidad** promediada sobre imágenes de COCO val utilizando una instancia de [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
Reproducir utilizando `yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu` + +## Entrenamiento + +Entrena el modelo YOLOv8n-seg en el conjunto de datos COCO128-seg durante 100 épocas con tamaño de imagen de 640. Para una lista completa de argumentos disponibles, consulta la página de [Configuración](/../usage/cfg.md). + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-seg.yaml') # construir un nuevo modelo desde YAML + model = YOLO('yolov8n-seg.pt') # cargar un modelo preentrenado (recomendado para entrenamiento) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # construir desde YAML y transferir pesos + + # Entrenar el modelo + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Construir un nuevo modelo desde YAML y comenzar a entrenar desde cero + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # Comenzar a entrenar desde un modelo *.pt preentrenado + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # Construir un nuevo modelo desde YAML, transferir pesos preentrenados y comenzar a entrenar + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### Formato del conjunto de datos + +El formato del conjunto de datos de segmentación YOLO puede encontrarse detallado en la [Guía de Conjuntos de Datos](../../../datasets/segment/index.md). Para convertir tu conjunto de datos existente de otros formatos (como COCO, etc.) al formato YOLO, utiliza la herramienta [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) de Ultralytics. + +## Validación + +Valida la precisión del modelo YOLOv8n-seg entrenado en el conjunto de datos COCO128-seg. No es necesario pasar ningún argumento ya que el `modelo` retiene sus `datos` de entrenamiento y argumentos como atributos del modelo. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-seg.pt') # cargar un modelo oficial + model = YOLO('ruta/a/mejor.pt') # cargar un modelo personalizado + + # Validar el modelo + metrics = model.val() # no se necesitan argumentos, el conjunto de datos y configuraciones se recuerdan + metrics.box.map # map50-95(B) + metrics.box.map50 # map50(B) + metrics.box.map75 # map75(B) + metrics.box.maps # una lista contiene map50-95(B) de cada categoría + metrics.seg.map # map50-95(M) + metrics.seg.map50 # map50(M) + metrics.seg.map75 # map75(M) + metrics.seg.maps # una lista contiene map50-95(M) de cada categoría + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # validar el modelo oficial + yolo segment val model=ruta/a/mejor.pt # validar el modelo personalizado + ``` + +## Predicción + +Usa un modelo YOLOv8n-seg entrenado para realizar predicciones en imágenes. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-seg.pt') # cargar un modelo oficial + model = YOLO('ruta/a/mejor.pt') # cargar un modelo personalizado + + # Predecir con el modelo + results = model('https://ultralytics.com/images/bus.jpg') # predecir en una imagen + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # predecir con el modelo oficial + yolo segment predict model=ruta/a/mejor.pt source='https://ultralytics.com/images/bus.jpg' # predecir con el modelo personalizado + ``` + +Consulta todos los detalles del modo `predict` en la página de [Predicción](https://docs.ultralytics.com/modes/predict/). + +## Exportación + +Exporta un modelo YOLOv8n-seg a un formato diferente como ONNX, CoreML, etc. + +!!! Example "Ejemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Cargar un modelo + model = YOLO('yolov8n-seg.pt') # cargar un modelo oficial + model = YOLO('ruta/a/mejor.pt') # cargar un modelo entrenado personalizado + + # Exportar el modelo + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # exportar el modelo oficial + yolo export model=ruta/a/mejor.pt format=onnx # exportar el modelo entrenado personalizado + ``` + +Los formatos disponibles para exportar YOLOv8-seg se muestran en la tabla a continuación. Puedes predecir o validar directamente en modelos exportados, es decir, `yolo predict model=yolov8n-seg.onnx`. Se muestran ejemplos de uso para tu modelo después de que se completa la exportación. + +| Formato | Argumento `format` | Modelo | Metadatos | Argumentos | +|--------------------------------------------------------------------|--------------------|-------------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half` | + +Consulta todos los detalles del modo `export` en la página de [Exportación](https://docs.ultralytics.com/modes/export/). diff --git a/docs/fr/index.md b/docs/fr/index.md new file mode 100644 index 0000000..be3e947 --- /dev/null +++ b/docs/fr/index.md @@ -0,0 +1,82 @@ +--- +comments: true +description: Découvrez un guide complet du modèle Ultralytics YOLOv8, un modèle de détection d'objets et de segmentation d'images à haute vitesse et haute précision. Tutoriels d'installation, de prédiction, d'entraînement et plus encore. +keywords: Ultralytics, YOLOv8, détection d'objets, segmentation d'images, apprentissage automatique, apprentissage profond, vision par ordinateur, installation de YOLOv8, prédiction avec YOLOv8, entraînement de YOLOv8, histoire de YOLO, licences de YOLO +--- + +
+

+ + Bannière Ultralytics YOLO +

+ GitHub Ultralytics + space + LinkedIn Ultralytics + space + Twitter Ultralytics + space + YouTube Ultralytics + space + TikTok Ultralytics + space + Instagram Ultralytics + space + Discord Ultralytics +
+
+ Intégration continue Ultralytics + Couverture de code Ultralytics + Citation YOLOv8 + Téléchargements Docker +
+ Exécuter sur Gradient + Ouvrir dans Colab + Ouvrir dans Kaggle +
+ +Présentation d'[Ultralytics](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics), la dernière version du modèle réputé de détection d'objets en temps réel et de segmentation d'images. YOLOv8 est construit sur des avancées de pointe en apprentissage profond et vision par ordinateur, offrant des performances inégalées en termes de vitesse et de précision. Sa conception épurée le rend adapté à diverses applications et facilement adaptable à différentes plateformes matérielles, des appareils de bord aux API cloud. + +Explorez les Docs YOLOv8, une ressource complète conçue pour vous aider à comprendre et à utiliser ses fonctionnalités et capacités. Que vous soyez un praticien chevronné de l'apprentissage automatique ou nouveau dans le domaine, ce hub vise à maximiser le potentiel de YOLOv8 dans vos projets. + +!!! Note "Note" + + 🚧 Notre documentation multilingue est actuellement en construction et nous travaillons dur pour l'améliorer. Merci de votre patience ! 🙏 + +## Par où commencer + +- **Installer** `ultralytics` avec pip et démarrer en quelques minutes   [:material-clock-fast: Commencer](quickstart.md){ .md-button } +- **Prédire** de nouvelles images et vidéos avec YOLOv8   [:octicons-image-16: Prédire sur Images](modes/predict.md){ .md-button } +- **Entraîner** un nouveau modèle YOLOv8 sur votre propre ensemble de données customisé   [:fontawesome-solid-brain: Entraîner un modèle](modes/train.md){ .md-button } +- **Explorer** les tâches YOLOv8 comme la segmentation, la classification, l'estimation de pose et le suivi   [:material-magnify-expand: Explorer les tâches](tasks/index.md){ .md-button } + +

+
+ +
+ Regarder : Comment entraîner un modèle YOLOv8 sur votre ensemble de données customisé dans Google Colab. +

+ +## YOLO : Un bref historique + +[YOLO](https://arxiv.org/abs/1506.02640) (You Only Look Once), un modèle populaire de détection d'objets et de segmentation d'images, a été développé par Joseph Redmon et Ali Farhadi à l'Université de Washington. Lancé en 2015, YOLO a rapidement gagné en popularité pour sa vitesse et sa précision élevées. + +- [YOLOv2](https://arxiv.org/abs/1612.08242), publié en 2016, a amélioré le modèle original en intégrant la normalisation par lots, les boîtes d'ancrage et les clusters de dimensions. +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf), lancé en 2018, a davantage amélioré la performance du modèle en utilisant un réseau dorsal plus efficace, des ancres multiples et un pool pyramidal spatial. +- [YOLOv4](https://arxiv.org/abs/2004.10934) a été publié en 2020, introduisant des innovations telles que l'augmentation de données Mosaic, une nouvelle tête de détection sans ancre et une nouvelle fonction de perte. +- [YOLOv5](https://github.com/ultralytics/yolov5) a encore amélioré la performance du modèle et a ajouté des fonctionnalités nouvelles telles que l'optimisation des hyperparamètres, le suivi intégré des expériences et l'export automatique vers des formats d'exportation populaires. +- [YOLOv6](https://github.com/meituan/YOLOv6) a été rendu open-source par [Meituan](https://about.meituan.com/) en 2022 et est utilisé dans de nombreux robots de livraison autonomes de l'entreprise. +- [YOLOv7](https://github.com/WongKinYiu/yolov7) a ajouté des tâches supplémentaires telles que l'estimation de pose sur le jeu de données de points clés COCO. +- [YOLOv8](https://github.com/ultralytics/ultralytics) est la dernière version de YOLO par Ultralytics. En tant que modèle de pointe et dernier cri (state-of-the-art, SOTA), YOLOv8 s'appuie sur le succès des versions précédentes, introduisant de nouvelles fonctionnalités et améliorations pour des performances, une flexibilité et une efficacité renforcées. YOLOv8 prend en charge une gamme complète de tâches d'intelligence artificielle visuelle, y compris la [détection](tasks/detect.md), la [segmentation](tasks/segment.md), l'[estimation de pose](tasks/pose.md), le [suivi](modes/track.md) et la [classification](tasks/classify.md). Cette polyvalence permet aux utilisateurs de tirer parti des capacités de YOLOv8 dans diverses applications et domaines. + +## Licences YOLO : Comment est licencié Ultralytics YOLO ? + +Ultralytics offre deux options de licence pour répondre aux différents cas d'utilisation : + +- **Licence AGPL-3.0** : Cette licence open source [approuvée par OSI](https://opensource.org/licenses/) est idéale pour les étudiants et les passionnés, favorisant la collaboration ouverte et le partage des connaissances. Voir le fichier [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) pour plus de détails. +- **Licence Enterprise** : Conçue pour un usage commercial, cette licence permet l'intégration transparente des logiciels et modèles d'IA Ultralytics dans des biens et services commerciaux, en contournant les exigences open source de l'AGPL-3.0. Si votre scénario implique l'incorporation de nos solutions dans une offre commerciale, n'hésitez pas à contacter [Ultralytics Licensing](https://ultralytics.com/license). + +Notre stratégie de licence est conçue pour garantir que toute amélioration de nos projets open source soit restituée à la communauté. Nous tenons les principes de l'open source à cœur ❤️, et notre mission est de garantir que nos contributions puissent être utilisées et développées de manière bénéfique pour tous. diff --git a/docs/fr/models/fast-sam.md b/docs/fr/models/fast-sam.md new file mode 100644 index 0000000..f741b83 --- /dev/null +++ b/docs/fr/models/fast-sam.md @@ -0,0 +1,193 @@ +--- +comments: true +description: Découvrez FastSAM, une solution basée sur les réseaux de neurones à convolution (CNN) pour la segmentation d'objets en temps réel dans les images. Interaction utilisateur améliorée, efficacité computationnelle et adaptabilité à différentes tâches de vision. +keywords: FastSAM, apprentissage automatique, solution basée sur les CNN, segmentation d'objets, solution en temps réel, Ultralytics, tâches de vision, traitement d'images, applications industrielles, interaction utilisateur +--- + +# Fast Segment Anything Model (FastSAM) + +Le Fast Segment Anything Model (FastSAM) est une solution basée sur les réseaux de neurones à convolution (CNN) en temps réel pour la tâche Segment Anything. Cette tâche est conçue pour segmenter n'importe quel objet dans une image en fonction de différentes interactions utilisateur possibles. FastSAM réduit considérablement les demandes computationnelles tout en maintenant des performances compétitives, ce qui en fait un choix pratique pour diverses tâches de vision. + +![Vue d'ensemble de l'architecture du Fast Segment Anything Model (FastSAM)](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## Vue d'ensemble + +FastSAM est conçu pour remédier aux limitations du [Segment Anything Model (SAM)](sam.md), un modèle Transformer lourd nécessitant des ressources computationnelles importantes. FastSAM découpe la tâche de segmentation en deux étapes séquentielles : la segmentation de toutes les instances et la sélection guidée par une invitation. La première étape utilise [YOLOv8-seg](../tasks/segment.md) pour produire les masques de segmentation de toutes les instances de l'image. Dans la deuxième étape, il génère la région d'intérêt correspondant à l'invitation. + +## Fonctionnalités clés + +1. **Solution en temps réel :** En exploitant l'efficacité computationnelle des CNN, FastSAM fournit une solution en temps réel pour la tâche Segment Anything, ce qui en fait une solution précieuse pour les applications industrielles nécessitant des résultats rapides. + +2. **Efficacité et performances :** FastSAM offre une réduction significative des demandes computationnelles et des ressources sans compromettre la qualité des performances. Il atteint des performances comparables à SAM, mais avec une réduction drastique des ressources computationnelles, ce qui permet une application en temps réel. + +3. **Segmentation guidée par une invitation :** FastSAM peut segmenter n'importe quel objet dans une image, guidé par différentes invitations d'interaction utilisateur possibles, offrant ainsi flexibilité et adaptabilité dans différents scénarios. + +4. **Basé sur YOLOv8-seg :** FastSAM est basé sur [YOLOv8-seg](../tasks/segment.md), un détecteur d'objets équipé d'une branche de segmentation d'instances. Cela lui permet de produire efficacement les masques de segmentation de toutes les instances dans une image. + +5. **Résultats concurrentiels sur les bancs d'essai :** Dans la tâche de proposition d'objets sur MS COCO, FastSAM obtient des scores élevés à une vitesse significativement plus rapide que [SAM](sam.md) sur une seule NVIDIA RTX 3090, démontrant ainsi son efficacité et sa capacité. + +6. **Applications pratiques :** Cette approche propose une nouvelle solution pratique pour un grand nombre de tâches de vision à une vitesse très élevée, des dizaines ou des centaines de fois plus rapide que les méthodes actuelles. + +7. **Faisabilité de la compression du modèle :** FastSAM démontre la faisabilité d'une voie qui peut réduire considérablement l'effort computationnel en introduisant une contrainte artificielle dans la structure, ouvrant ainsi de nouvelles possibilités pour l'architecture de modèles de grande taille pour les tâches de vision générales. + +## Modèles disponibles, tâches prises en charge et modes d'exploitation + +Ce tableau présente les modèles disponibles avec leurs poids pré-entraînés spécifiques, les tâches qu'ils prennent en charge et leur compatibilité avec différents modes d'exploitation tels que [Inférence](../modes/predict.md), [Validation](../modes/val.md), [Entraînement](../modes/train.md) et [Exportation](../modes/export.md), indiqués par des emojis ✅ pour les modes pris en charge et des emojis ❌ pour les modes non pris en charge. + +| Type de modèle | Poids pré-entraînés | Tâches prises en charge | Inférence | Validation | Entraînement | Exportation | +|----------------|---------------------|-------------------------------------------------|-----------|------------|--------------|-------------| +| FastSAM-s | `FastSAM-s.pt` | [Segmentation d'instances](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| FastSAM-x | `FastSAM-x.pt` | [Segmentation d'instances](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Exemples d'utilisation + +Les modèles FastSAM sont faciles à intégrer dans vos applications Python. Ultralytics propose une API Python conviviale et des commandes CLI pour simplifier le développement. + +### Utilisation de la prédiction + +Pour effectuer une détection d'objets sur une image, utilisez la méthode `Predict` comme indiqué ci-dessous : + +!!! Example "Exemple" + + === "Python" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # Définir une source d'inférence + source = 'chemin/vers/bus.jpg' + + # Créer un modèle FastSAM + model = FastSAM('FastSAM-s.pt') # ou FastSAM-x.pt + + # Effectuer une inférence sur une image + everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # Préparer un objet Processus Invitation + prompt_process = FastSAMPrompt(source, everything_results, device='cpu') + + # Invitation Everything + ann = prompt_process.everything_prompt() + + # Bbox shape par défaut [0,0,0,0] -> [x1,y1,x2,y2] + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # Invitation Text + ann = prompt_process.text_prompt(text='une photo d\'un chien') + + # Invitation Point + # points par défaut [[0,0]] [[x1,y1],[x2,y2]] + # point_label par défaut [0] [1,0] 0:fond, 1:premier plan + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` + + === "CLI" + ```bash + # Charger un modèle FastSAM et segmenter tout avec + yolo segment predict model=FastSAM-s.pt source=chemin/vers/bus.jpg imgsz=640 + ``` + +Cet exemple démontre la simplicité du chargement d'un modèle pré-entraîné et de l'exécution d'une prédiction sur une image. + +### Utilisation de la validation + +La validation du modèle sur un ensemble de données peut être effectuée de la manière suivante : + +!!! Example "Exemple" + + === "Python" + ```python + from ultralytics import FastSAM + + # Créer un modèle FastSAM + model = FastSAM('FastSAM-s.pt') # ou FastSAM-x.pt + + # Valider le modèle + results = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # Charger un modèle FastSAM et le valider sur l'ensemble de données d'exemple COCO8 avec une taille d'image de 640 pixels + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +Veuillez noter que FastSAM ne prend en charge que la détection et la segmentation d'une seule classe d'objet. Cela signifie qu'il reconnaîtra et segmentera tous les objets comme étant de la même classe. Par conséquent, lors de la préparation de l'ensemble de données, vous devez convertir tous les identifiants de catégorie d'objet en 0. + +## Utilisation officielle de FastSAM + +FastSAM est également disponible directement à partir du dépôt [https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM). Voici un bref aperçu des étapes typiques que vous pourriez suivre pour utiliser FastSAM : + +### Installation + +1. Clonez le dépôt FastSAM : + ```shell + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. Créez et activez un environnement Conda avec Python 3.9 : + ```shell + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. Accédez au dépôt cloné et installez les packages requis : + ```shell + cd FastSAM + pip install -r requirements.txt + ``` + +4. Installez le modèle CLIP : + ```shell + pip install git+https://github.com/openai/CLIP.git + ``` + +### Exemple d'utilisation + +1. Téléchargez un [point de contrôle de modèle](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing). + +2. Utilisez FastSAM pour l'inférence. Exemples de commandes : + + - Segmentez tout dans une image : + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - Segmentez des objets spécifiques à l'aide de l'invitation de texte : + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "le chien jaune" + ``` + + - Segmentez des objets dans un rectangle englobant (fournir les coordonnées du rectangle au format xywh) : + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - Segmentez des objets à proximité de points spécifiques : + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +De plus, vous pouvez essayer FastSAM via une [démonstration Colab](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing) ou sur la [démonstration Web HuggingFace](https://huggingface.co/spaces/An-619/FastSAM) pour une expérience visuelle. + +## Citations et remerciements + +Nous tenons à remercier les auteurs de FastSAM pour leurs contributions importantes dans le domaine de la segmentation d'instances en temps réel : + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Le document original FastSAM peut être consulté sur [arXiv](https://arxiv.org/abs/2306.12156). Les auteurs ont rendu leur travail accessible au public, et le code source peut être consulté sur [GitHub](https://github.com/CASIA-IVA-Lab/FastSAM). Nous apprécions leurs efforts pour faire avancer le domaine et rendre leur travail accessible à la communauté dans son ensemble. diff --git a/docs/fr/models/index.md b/docs/fr/models/index.md new file mode 100644 index 0000000..abf329d --- /dev/null +++ b/docs/fr/models/index.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Explorez la gamme diversifiée de modèles de la famille YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS et RT-DETR pris en charge par Ultralytics. Commencez avec des exemples pour l'utilisation CLI et Python. +keywords: Ultralytics, documentation, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, modèles, architectures, Python, CLI +--- + +# Modèles pris en charge par Ultralytics + +Bienvenue dans la documentation des modèles d'Ultralytics ! Nous offrons un soutien pour une large gamme de modèles, chacun étant adapté à des tâches spécifiques comme [la détection d'objets](../tasks/detect.md), [la segmentation d'instance](../tasks/segment.md), [la classification d'images](../tasks/classify.md), [l'estimation de pose](../tasks/pose.md), et [le suivi multi-objets](../modes/track.md). Si vous êtes intéressé à contribuer avec votre architecture de modèle à Ultralytics, consultez notre [Guide de Contribution](../../help/contributing.md). + +!!! Note "Remarque" + + 🚧 Notre documentation dans différentes langues est actuellement en construction, et nous travaillons dur pour l'améliorer. Merci de votre patience ! 🙏 + +## Modèles en vedette + +Voici quelques-uns des modèles clés pris en charge : + +1. **[YOLOv3](yolov3.md)** : La troisième itération de la famille de modèles YOLO, initialement par Joseph Redmon, connue pour ses capacités de détection d'objets en temps réel efficaces. +2. **[YOLOv4](yolov4.md)** : Une mise à jour native darknet de YOLOv3, publiée par Alexey Bochkovskiy en 2020. +3. **[YOLOv5](yolov5.md)** : Une version améliorée de l'architecture YOLO par Ultralytics, offrant de meilleures performances et compromis de vitesse par rapport aux versions précédentes. +4. **[YOLOv6](yolov6.md)** : Publié par [Meituan](https://about.meituan.com/) en 2022, et utilisé dans beaucoup de ses robots de livraison autonomes. +5. **[YOLOv7](yolov7.md)** : Modèles YOLO mis à jour publiés en 2022 par les auteurs de YOLOv4. +6. **[YOLOv8](yolov8.md) NOUVEAU 🚀**: La dernière version de la famille YOLO, présentant des capacités améliorées telles que la segmentation d'instance, l'estimation de pose/points clés et la classification. +7. **[Segment Anything Model (SAM)](sam.md)** : Le modèle Segment Anything Model (SAM) de Meta. +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)** : MobileSAM pour applications mobiles, développé par l'Université de Kyung Hee. +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)** : FastSAM par le Image & Video Analysis Group, Institute of Automation, Chinese Academy of Sciences. +10. **[YOLO-NAS](yolo-nas.md)** : Modèles de Recherche d'Architecture Neuronale YOLO (NAS). +11. **[Realtime Detection Transformers (RT-DETR)](rtdetr.md)** : Modèles du Transformateur de Détection en Temps Réel (RT-DETR) de PaddlePaddle de Baidu. + +

+
+ +
+ Regardez : Exécutez les modèles YOLO d'Ultralytics en seulement quelques lignes de code. +

+ +## Pour Commencer : Exemples d'Utilisation + +Cet exemple fournit des exemples simples d'entraînement et d'inférence YOLO. Pour une documentation complète sur ces [modes](../modes/index.md) et d'autres, consultez les pages de documentation [Prédire](../modes/predict.md), [Entraîner](../modes/train.md), [Val](../modes/val.md) et [Exporter](../modes/export.md). + +Notez que l'exemple ci-dessous concerne les modèles [Detect](../tasks/detect.md) YOLOv8 pour la détection d'objets. Pour des tâches supplémentaires prises en charge, voir les documentations [Segmenter](../tasks/segment.md), [Classifier](../tasks/classify.md) et [Poser](../tasks/pose.md). + +!!! Example "Exemple" + + === "Python" + + Des modèles pré-entraînés PyTorch `*.pt` ainsi que des fichiers de configuration `*.yaml` peuvent être passés aux classes `YOLO()`, `SAM()`, `NAS()` et `RTDETR()` pour créer une instance de modèle en Python : + + ```python + from ultralytics import YOLO + + # Charger un modèle YOLOv8n pré-entraîné sur COCO + model = YOLO('yolov8n.pt') + + # Afficher les informations du modèle (optionnel) + model.info() + + # Entraîner le modèle sur le jeu de données exemple COCO8 pendant 100 époques + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Exécuter l'inférence avec le modèle YOLOv8n sur l'image 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Des commandes CLI sont disponibles pour exécuter directement les modèles : + + ```bash + # Charger un modèle YOLOv8n pré-entraîné sur COCO et l'entraîner sur le jeu de données exemple COCO8 pendant 100 époques + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Charger un modèle YOLOv8n pré-entraîné sur COCO et exécuter l'inférence sur l'image 'bus.jpg' + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## Contribution de Nouveaux Modèles + +Vous êtes intéressé à contribuer votre modèle à Ultralytics ? Génial ! Nous sommes toujours ouverts à l'expansion de notre portefeuille de modèles. + +1. **Forkez le Référentiel** : Commencez par forker le [référentiel GitHub d'Ultralytics](https://github.com/ultralytics/ultralytics). + +2. **Clonez Votre Fork** : Clonez votre fork sur votre machine locale et créez une nouvelle branche pour travailler dessus. + +3. **Implémentez Votre Modèle** : Ajoutez votre modèle en suivant les normes et directives de codage fournies dans notre [Guide de Contribution](../../help/contributing.md). + +4. **Testez Rigoureusement** : Assurez-vous de tester votre modèle de manière rigoureuse, à la fois isolément et comme partie du pipeline. + +5. **Créez une Pull Request** : Une fois que vous êtes satisfait de votre modèle, créez une pull request au répertoire principal pour examen. + +6. **Revue de Code & Fusion** : Après examen, si votre modèle répond à nos critères, il sera fusionné dans le répertoire principal. + +Pour des étapes détaillées, consultez notre [Guide de Contribution](../../help/contributing.md). diff --git a/docs/fr/models/mobile-sam.md b/docs/fr/models/mobile-sam.md new file mode 100644 index 0000000..d011b03 --- /dev/null +++ b/docs/fr/models/mobile-sam.md @@ -0,0 +1,116 @@ +--- +comments: true +description: En savoir plus sur MobileSAM, son implémentation, la comparaison avec SAM d'origine, et comment le télécharger et le tester dans le cadre de l'environnement Ultralytics. Améliorez vos applications mobiles dès aujourd'hui. +keywords: MobileSAM, Ultralytics, SAM, applications mobiles, Arxiv, GPU, API, encodeur d'image, décodeur de masque, téléchargement de modèle, méthode de test +--- + +![Logo MobileSAM](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# Segmenter N'importe Quoi sur Mobile (MobileSAM) + +Le document MobileSAM est maintenant disponible sur [arXiv](https://arxiv.org/pdf/2306.14289.pdf). + +Une démonstration de MobileSAM exécutée sur un processeur CPU est accessible via ce [lien de démonstration](https://huggingface.co/spaces/dhkim2810/MobileSAM). Les performances sur un CPU Mac i5 prennent environ 3 secondes. Sur la démo de Hugging Face, l'interface ainsi que les CPU moins performants contribuent à une réponse plus lente, mais cela continue de fonctionner efficacement. + +MobileSAM est implémenté dans divers projets, notamment [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything), [AnyLabeling](https://github.com/vietanhdev/anylabeling), et [Segment Anything en 3D](https://github.com/Jumpat/SegmentAnythingin3D). + +MobileSAM est entraîné sur un seul GPU avec un ensemble de données de 100 000 images (1% des images originales) en moins d'une journée. Le code de cet entraînement sera disponible à l'avenir. + +## Modèles Disponibles, Tâches Prises en Charge et Modes d'Utilisation + +Ce tableau présente les modèles disponibles avec leurs poids pré-entraînés spécifiques, les tâches qu'ils prennent en charge, et leur compatibilité avec les différents modes d'utilisation tels que [Inférence](../modes/predict.md), [Validation](../modes/val.md), [Entraînement](../modes/train.md) et [Export](../modes/export.md), indiqués par les emojis ✅ pour les modes pris en charge et ❌ pour les modes non pris en charge. + +| Type de Modèle | Poids Pré-entraînés | Tâches Prises en Charge | Inférence | Validation | Entraînement | Export | +|----------------|---------------------|-------------------------------------------------|-----------|------------|--------------|--------| +| MobileSAM | `mobile_sam.pt` | [Segmentation d'Instances](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Passage de SAM à MobileSAM + +Étant donné que MobileSAM conserve le même pipeline que SAM d'origine, nous avons incorporé le pré-traitement, le post-traitement et toutes les autres interfaces de l'original. Par conséquent, ceux qui utilisent actuellement SAM d'origine peuvent passer à MobileSAM avec un effort minimal. + +MobileSAM a des performances comparables à celles de SAM d'origine et conserve le même pipeline à l'exception d'un changement dans l'encodeur d'image. Plus précisément, nous remplaçons l'encodeur d'image lourd original ViT-H (632M) par un encodeur Tiny-ViT plus petit (5M). Sur un seul GPU, MobileSAM fonctionne à environ 12 ms par image : 8 ms sur l'encodeur d'image et 4 ms sur le décodeur de masque. + +Le tableau suivant présente une comparaison des encodeurs d'image basés sur ViT : + +| Encodeur d'Image | SAM d'Origine | MobileSAM | +|------------------|---------------|-----------| +| Paramètres | 611M | 5M | +| Vitesse | 452 ms | 8 ms | + +SAM d'origine et MobileSAM utilisent tous deux le même décodeur de masque basé sur une instruction : + +| Décodeur de Masque | SAM d'Origine | MobileSAM | +|--------------------|---------------|-----------| +| Paramètres | 3.876M | 3.876M | +| Vitesse | 4 ms | 4 ms | + +Voici une comparaison du pipeline complet : + +| Pipeline Complet (Enc+Dec) | SAM d'Origine | MobileSAM | +|----------------------------|---------------|-----------| +| Paramètres | 615M | 9.66M | +| Vitesse | 456 ms | 12 ms | + +Les performances de MobileSAM et de SAM d'origine sont démontrées en utilisant à la fois un point et une boîte comme instructions. + +![Image avec un Point comme Instruction](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![Image avec une Boîte comme Instruction](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +Avec ses performances supérieures, MobileSAM est environ 5 fois plus petit et 7 fois plus rapide que FastSAM actuel. Plus de détails sont disponibles sur la [page du projet MobileSAM](https://github.com/ChaoningZhang/MobileSAM). + +## Test de MobileSAM dans Ultralytics + +Tout comme SAM d'origine, nous proposons une méthode de test simple dans Ultralytics, comprenant des modes pour les instructions Point et Boîte. + +### Téléchargement du modèle + +Vous pouvez télécharger le modèle [ici](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt). + +### Instruction Point + +!!! Example "Exemple" + + === "Python" + ```python + from ultralytics import SAM + + # Chargement du modèle + model = SAM('mobile_sam.pt') + + # Prédiction d'un segment à partir d'une instruction Point + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### Instruction Boîte + +!!! Example "Exemple" + + === "Python" + ```python + from ultralytics import SAM + + # Chargement du modèle + model = SAM('mobile_sam.pt') + + # Prédiction d'un segment à partir d'une instruction Boîte + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +Nous avons mis en œuvre `MobileSAM` et `SAM` en utilisant la même API. Pour plus d'informations sur l'utilisation, veuillez consulter la [page SAM](sam.md). + +## Citations et Remerciements + +Si vous trouvez MobileSAM utile dans vos travaux de recherche ou de développement, veuillez envisager de citer notre document : + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } diff --git a/docs/fr/models/rtdetr.md b/docs/fr/models/rtdetr.md new file mode 100644 index 0000000..13439be --- /dev/null +++ b/docs/fr/models/rtdetr.md @@ -0,0 +1,93 @@ +--- +comments: true +description: Découvrez les fonctionnalités et les avantages de RT-DETR de Baidu, un détecteur d'objets en temps réel efficace et adaptable grâce aux Vision Transformers, incluant des modèles pré-entraînés. +keywords: RT-DETR, Baidu, Vision Transformers, détection d'objets, performance en temps réel, CUDA, TensorRT, sélection de requêtes informée par IoU, Ultralytics, API Python, PaddlePaddle +--- + +# RT-DETR de Baidu : un détecteur d'objets en temps réel basé sur les Vision Transformers + +## Présentation + +Le Real-Time Detection Transformer (RT-DETR), développé par Baidu, est un détecteur d'objets de pointe de bout en bout qui offre des performances en temps réel tout en maintenant une grande précision. Il exploite la puissance des Vision Transformers (ViT) pour traiter efficacement les caractéristiques multiscalaires en dissociant l'interaction intra-échelle et la fusion inter-échelles. RT-DETR est hautement adaptable, permettant un ajustement flexible de la vitesse d'inférence en utilisant différentes couches de décodeur sans nécessiter de nouvelle formation. Le modèle est performant sur des infrastructures accélérées telles que CUDA avec TensorRT, surpassant de nombreux autres détecteurs d'objets en temps réel. + +![Exemple d'image du modèle](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**Vue d'ensemble du RT-DETR de Baidu.** Le diagramme d'architecture du modèle RT-DETR montre les trois dernières étapes du réseau {S3, S4, S5} comme entrée de l'encodeur. L'encodeur hybride efficace transforme les caractéristiques multiscalaires en une séquence de caractéristiques d'image grâce à l'interaction à l'intérieur de l'échelle (AIFI - *Adeptation of Intra-scale Feature Interaction*) et au module de fusion inter-échelles (CCFM - *Cross-scale Context-aware Feature Fusion Module*). La sélection de requêtes informée par IoU est utilisée pour sélectionner un nombre fixe de caractéristiques d'image pour servir de requêtes d'objets initiales pour le décodeur. Enfin, le décodeur avec des têtes de prédictions auxiliaires optimise de manière itérative les requêtes d'objets pour générer des boîtes et des scores de confiance ([source](https://arxiv.org/pdf/2304.08069.pdf)). + +### Fonctionnalités principales + +- **Encodeur hybride efficace :** RT-DETR de Baidu utilise un encodeur hybride efficace qui traite les caractéristiques multiscalaires en dissociant l'interaction intra-échelle et la fusion inter-échelles. Cette conception unique basée sur les Vision Transformers réduit les coûts de calcul et permet une détection d'objets en temps réel. +- **Sélection de requêtes informée par IoU :** RT-DETR de Baidu améliore l'initialisation des requêtes d'objets en utilisant une sélection de requêtes informée par IoU. Cela permet au modèle de se concentrer sur les objets les plus pertinents de la scène, améliorant ainsi la précision de la détection. +- **Vitesse d'inférence adaptable :** RT-DETR de Baidu prend en charge des ajustements flexibles de la vitesse d'inférence en utilisant différentes couches de décodeur sans nécessiter de nouvelle formation. Cette adaptabilité facilite l'application pratique dans différents scénarios de détection d'objets en temps réel. + +## Modèles pré-entraînés + +L'API Python Ultralytics fournit des modèles pré-entraînés RT-DETR de PaddlePaddle avec différentes échelles : + +- RT-DETR-L : 53,0 % de précision moyenne (AP) sur COCO val2017, 114 images par seconde (FPS) sur GPU T4 +- RT-DETR-X : 54,8 % de précision moyenne (AP) sur COCO val2017, 74 images par seconde (FPS) sur GPU T4 + +## Exemples d'utilisation + +Cet exemple présente des exemples simples d'entraînement et d'inférence avec RT-DETRR. Pour une documentation complète sur ceux-ci et d'autres [modes](../modes/index.md), consultez les pages de documentation [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) et [Export](../modes/export.md). + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import RTDETR + + # Charger un modèle RT-DETR-l pré-entraîné sur COCO + model = RTDETR('rtdetr-l.pt') + + # Afficher des informations sur le modèle (facultatif) + model.info() + + # Entraîner le modèle sur l'ensemble de données d'exemple COCO8 pendant 100 époques + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Effectuer une inférence avec le modèle RT-DETR-l sur l'image 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # Charger un modèle RT-DETR-l pré-entraîné sur COCO et l'entraîner sur l'ensemble de données d'exemple COCO8 pendant 100 époques + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # Charger un modèle RT-DETR-l pré-entraîné sur COCO et effectuer une inférence sur l'image 'bus.jpg' + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## Tâches et modes pris en charge + +Ce tableau présente les types de modèles, les poids pré-entraînés spécifiques, les tâches prises en charge par chaque modèle et les différents modes ([Train](../modes/train.md), [Val](../modes/val.md), [Predict](../modes/predict.md), [Export](../modes/export.md)) pris en charge, indiqués par des emojis ✅. + +| Type de modèle | Poids pré-entraînés | Tâches prises en charge | Inférence | Validation | Entraînement | Export | +|---------------------|---------------------|------------------------------------------|-----------|------------|--------------|--------| +| RT-DETR Large | `rtdetr-l.pt` | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR Extra-Large | `rtdetr-x.pt` | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## Citations et Remerciements + +Si vous utilisez RT-DETR de Baidu dans votre travail de recherche ou de développement, veuillez citer l'[article original](https://arxiv.org/abs/2304.08069) : + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Nous tenons à remercier Baidu et l'équipe [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection) pour la création et la maintenance de cette précieuse ressource pour la communauté de la vision par ordinateur. Leur contribution au domaine avec le développement du détecteur d'objets en temps réel basé sur les Vision Transformers, RT-DETR, est grandement appréciée. + +*keywords: RT-DETR, Transformer, ViT, Vision Transformers, RT-DETR de Baidu, PaddlePaddle, Modèles PaddlePaddle RT-DETR pré-entraînés, utilisation de RT-DETR de Baidu, API Python Ultralytics, détection d'objets en temps réel* diff --git a/docs/fr/models/sam.md b/docs/fr/models/sam.md new file mode 100644 index 0000000..9dfed8a --- /dev/null +++ b/docs/fr/models/sam.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Découvrez le modèle Segment Anything (SAM) de pointe d'Ultralytics permettant la segmentation d'images en temps réel. Apprenez-en davantage sur sa segmentation promptable, ses performances hors échantillon et comment l'utiliser. +keywords: Ultralytics, segmentation d'image, Segment Anything Model, SAM, SA-1B dataset, performances en temps réel, transfert hors échantillon, détection d'objets, analyse d'images, apprentissage automatique +--- + +# Segment Anything Model (SAM) + +Bienvenue à la pointe de la segmentation d'image avec le modèle Segment Anything, ou SAM. Ce modèle révolutionnaire a changé la donne en introduisant la segmentation d'image promptable avec des performances en temps réel, établissant de nouvelles normes dans le domaine. + +## Introduction à SAM : Le modèle Segment Anything + +Le modèle Segment Anything, ou SAM, est un modèle de segmentation d'image de pointe qui permet une segmentation promptable, offrant une polyvalence inégalée dans les tâches d'analyse d'image. SAM forme le cœur de l'initiative Segment Anything, un projet innovant qui introduit un modèle, une tâche et un jeu de données novateurs pour la segmentation d'images. + +La conception avancée de SAM lui permet de s'adapter à de nouvelles distributions et tâches d'images sans connaissance préalable, une fonctionnalité connue sous le nom de transfert hors échantillon. Entraîné sur le vaste ensemble de données [SA-1B](https://ai.facebook.com/datasets/segment-anything/), qui contient plus d'un milliard de masques répartis sur 11 millions d'images soigneusement sélectionnées, SAM a affiché des performances hors échantillon impressionnantes, dépassant les résultats entièrement supervisés précédents dans de nombreux cas. + +![Image d'échantillon de jeu de données](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +Exemple d'images avec des masques superposés provenant de notre nouveau jeu de données, SA-1B. SA-1B contient 11 millions d'images diverses, haute résolution, autorisées et protégeant la vie privée, ainsi que 1,1 milliard de masques de segmentation de haute qualité. Ces masques ont été annotés entièrement automatiquement par SAM, et comme le confirment des évaluations humaines et de nombreux tests, leur qualité et leur diversité sont élevées. Les images sont regroupées par nombre de masques par image pour la visualisation (il y a environ 100 masques par image en moyenne). + +## Caractéristiques clés du modèle Segment Anything (SAM) + +- **Tâche de segmentation promptable :** SAM a été conçu en gardant à l'esprit une tâche de segmentation promptable, ce qui lui permet de générer des masques de segmentation valides à partir de n'importe quelle indication donnée, telle que des indices spatiaux ou des indices textuels identifiant un objet. +- **Architecture avancée :** Le modèle Segment Anything utilise un puissant encodeur d'images, un encodeur de prompt et un décodeur de masques léger. Cette architecture unique permet une invitation flexible, un calcul de masques en temps réel et une prise en compte de l'ambiguïté dans les tâches de segmentation. +- **Le jeu de données SA-1B :** Introduit par le projet Segment Anything, le jeu de données SA-1B comprend plus d'un milliard de masques sur 11 millions d'images. En tant que plus grand jeu de données de segmentation à ce jour, il offre à SAM une source de données d'entraînement diversifiée et à grande échelle. +- **Performances hors échantillon :** SAM affiche des performances hors échantillon exceptionnelles dans diverses tâches de segmentation, ce qui en fait un outil prêt à l'emploi pour des applications diverses nécessitant un minimum d'ingénierie de prompt. + +Pour une analyse approfondie du modèle Segment Anything et du jeu de données SA-1B, veuillez visiter le [site web Segment Anything](https://segment-anything.com) et consulter l'article de recherche [Segment Anything](https://arxiv.org/abs/2304.02643). + +## Modèles disponibles, tâches prises en charge et modes d'exploitation + +Ce tableau présente les modèles disponibles avec leurs poids pré-entraînés spécifiques, les tâches qu'ils prennent en charge et leur compatibilité avec différents modes d'exploitation tels que [Inférence](../modes/predict.md), [Validation](../modes/val.md), [Entraînement](../modes/train.md) et [Exportation](../modes/export.md), indiqués par des emojis ✅ pour les modes pris en charge et des emojis ❌ pour les modes non pris en charge. + +| Type de modèle | Poids pré-entraînés | Tâches prises en charge | Inférence | Validation | Entraînement | Exportation | +|----------------|---------------------|------------------------------------------------|-----------|------------|--------------|-------------| +| SAM de base | `sam_b.pt` | [Segmentation d'instance](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| SAM large | `sam_l.pt` | [Segmentation d'instance](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Comment utiliser SAM : Polyvalence et puissance dans la segmentation d'images + +Le modèle Segment Anything peut être utilisé pour une multitude de tâches secondaires qui vont au-delà de ses données d'entraînement. Cela comprend la détection des contours, la génération de propositions d'objets, la segmentation d'instances et la prédiction préliminaire texte-à-masque. Grâce à l'ingénierie de prompts, SAM peut s'adapter rapidement à de nouvelles tâches et distributions de données de manière sans apprentissage, ce qui en fait un outil polyvalent et puissant pour tous vos besoins en matière de segmentation d'images. + +### Exemple de prédiction SAM + +!!! Example "Segmentation avec des prompts" + + Segmenter l'image avec des prompts donnés. + + === "Python" + + ```python + from ultralytics import SAM + + # Charger un modèle + model = SAM('sam_b.pt') + + # Afficher les informations sur le modèle (facultatif) + model.info() + + # Exécuter l'inférence avec un prompt de zones de délimitation + model('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # Exécuter l'inférence avec un prompt de points + model('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "Segmenter tout" + + Segmenter toute l'image. + + === "Python" + + ```python + from ultralytics import SAM + + # Charger un modèle + model = SAM('sam_b.pt') + + # Afficher les informations sur le modèle (facultatif) + model.info() + + # Exécuter l'inférence + model('path/to/image.jpg') + ``` + + === "CLI" + + ```bash + # Exécuter l'inférence avec un modèle SAM + yolo predict model=sam_b.pt source=path/to/image.jpg + ``` + +- La logique ici est de segmenter toute l'image si vous ne passez aucun prompt (bboxes/points/masks). + +!!! Example "Exemple SAMPredictor" + + De cette manière, vous pouvez définir l'image une fois et exécuter l'inférence des prompts plusieurs fois sans exécuter l'encodeur d'image plusieurs fois. + + === "Inférence avec des prompts" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # Créer un SAMPredictor + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # Définir l'image + predictor.set_image("ultralytics/assets/zidane.jpg") # définir avec un fichier image + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # définir avec np.ndarray + results = predictor(bboxes=[439, 437, 524, 709]) + results = predictor(points=[900, 370], labels=[1]) + + # Réinitialiser l'image + predictor.reset_image() + ``` + + Segmenter toute l'image avec des arguments supplémentaires. + + === "Segmenter tout" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # Créer un SAMPredictor + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # Segmenter avec des arguments supplémentaires + results = predictor(source="ultralytics/assets/zidane.jpg", crop_n_layers=1, points_stride=64) + ``` + +- Plus d'arguments supplémentaires pour `Segmenter tout` voir la référence [`Predictor/generate`](../../../reference/models/sam/predict.md). + +## Comparaison de SAM avec YOLOv8 + +Nous comparons ici le plus petit modèle SAM de Meta, SAM-b, avec le plus petit modèle de segmentation d'Ultralytics, [YOLOv8n-seg](../tasks/segment.md) : + +| Modèle | Taille | Paramètres | Vitesse (CPU) | +|--------------------------------------------------------------|-----------------------------------|-----------------------------|-------------------------------------| +| SAM-b - Meta's SAM-b | 358 Mo | 94,7 M | 51096 ms/im | +| [MobileSAM](mobile-sam.md) | 40,7 Mo | 10,1 M | 46122 ms/im | +| [FastSAM-s](fast-sam.md) with YOLOv8 backbone | 23,7 Mo | 11,8 M | 115 ms/im | +| YOLOv8n-seg - Ultralytics [YOLOv8n-seg](../tasks/segment.md) | **6,7 Mo** (53,4 fois plus petit) | **3,4 M** (27,9 fois moins) | **59 ms/im** (866 fois plus rapide) | + +Cette comparaison montre les différences d'ordre de grandeur dans les tailles et les vitesses des modèles. Alors que SAM présente des fonctionnalités uniques pour la segmentation automatique, il ne rivalise pas directement avec les modèles de segmentation YOLOv8, qui sont plus petits, plus rapides et plus efficaces. + +Tests effectués sur un MacBook Apple M2 de 2023 avec 16 Go de RAM. Pour reproduire ce test : + +!!! Example "Exemple" + + === "Python" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # Profiler SAM-b + modèle = SAM('sam_b.pt') + modèle.info() + modèle('ultralytics/assets') + + # Profiler MobileSAM + modèle = SAM('mobile_sam.pt') + modèle.info() + modèle('ultralytics/assets') + + # Profiler FastSAM-s + modèle = FastSAM('FastSAM-s.pt') + modèle.info() + modèle('ultralytics/assets') + + # Profiler YOLOv8n-seg + modèle = YOLO('yolov8n-seg.pt') + modèle.info() + modèle('ultralytics/assets') + ``` + +## Annotation automatique : Un moyen rapide d'obtenir des jeux de données de segmentation + +L'annotation automatique est une fonctionnalité clé de SAM, permettant aux utilisateurs de générer un [jeu de données de segmentation](https://docs.ultralytics.com/datasets/segment) à l'aide d'un modèle de détection pré-entraîné. Cette fonctionnalité permet une annotation rapide et précise d'un grand nombre d'images, en contournant la nécessité d'une annotation manuelle chronophage. + +### Générez votre jeu de données de segmentation à l'aide d'un modèle de détection + +Pour annoter automatiquement votre jeu de données avec le framework Ultralytics, utilisez la fonction `auto_annotate` comme indiqué ci-dessous : + +!!! Example "Exemple" + + === "Python" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| Argument | Type | Description | Default | +|------------|----------------------|------------------------------------------------------------------------------------------------------------------------|--------------| +| data | str | Chemin d'accès à un dossier contenant les images à annoter. | | +| det_model | str, optionnel | Modèle de détection pré-entraîné YOLO. Par défaut, 'yolov8x.pt'. | 'yolov8x.pt' | +| sam_model | str, optionnel | Modèle de segmentation pré-entraîné SAM. Par défaut, 'sam_b.pt'. | 'sam_b.pt' | +| device | str, optionnel | Appareil sur lequel exécuter les modèles. Par défaut, une chaîne vide (CPU ou GPU, si disponible). | | +| output_dir | str, None, optionnel | Répertoire pour enregistrer les résultats annotés. Par défaut, un dossier 'labels' dans le même répertoire que 'data'. | None | + +La fonction `auto_annotate` prend en compte le chemin de vos images, avec des arguments optionnels pour spécifier les modèles de détection et de segmentation SAM pré-entraînés, l'appareil sur lequel exécuter les modèles et le répertoire de sortie pour enregistrer les résultats annotés. + +L'annotation automatique avec des modèles pré-entraînés peut réduire considérablement le temps et les efforts nécessaires pour créer des jeux de données de segmentation de haute qualité. Cette fonctionnalité est particulièrement bénéfique pour les chercheurs et les développeurs travaillant avec de grandes collections d'images, car elle leur permet de se concentrer sur le développement et l'évaluation des modèles plutôt que sur l'annotation manuelle. + +## Citations et remerciements + +Si vous trouvez SAM utile dans vos travaux de recherche ou de développement, veuillez envisager de citer notre article : + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Nous tenons à exprimer notre gratitude à Meta AI pour la création et la maintenance de cette ressource précieuse pour la communauté de la vision par ordinateur. + +*keywords: Segment Anything, Segment Anything Model, SAM, Meta SAM, segmentation d'image, segmentation promptable, performances hors échantillon, jeu de données SA-1B, architecture avancée, annotation automatique, Ultralytics, modèles pré-entraînés, SAM de base, SAM large, segmentation d'instance, vision par ordinateur, IA, intelligence artificielle, apprentissage automatique, annotation de données, masques de segmentation, modèle de détection, modèle de détection YOLO, bibtex, Meta AI.* diff --git a/docs/fr/models/yolo-nas.md b/docs/fr/models/yolo-nas.md new file mode 100644 index 0000000..e1165ec --- /dev/null +++ b/docs/fr/models/yolo-nas.md @@ -0,0 +1,121 @@ +--- +comments: true +description: Découvrez une documentation détaillée sur YOLO-NAS, un modèle de détection d'objets supérieur. Apprenez-en davantage sur ses fonctionnalités, les modèles pré-entraînés, son utilisation avec l'API Python d'Ultralytics, et bien plus encore. +keywords: YOLO-NAS, Deci AI, détection d'objets, apprentissage profond, recherche architecturale neuronale, API Python d'Ultralytics, modèle YOLO, modèles pré-entraînés, quantification, optimisation, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## Aperçu + +Développé par Deci AI, YOLO-NAS est un modèle de détection d'objets révolutionnaire. Il est le fruit d'une technologie avancée de recherche architecturale neuronale, minutieusement conçu pour pallier les limitations des précédents modèles YOLO. Avec des améliorations significatives en matière de prise en charge de la quantification et de compromis entre précision et latence, YOLO-NAS représente une avancée majeure en matière de détection d'objets. + +![Exemple de modèle](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**Aperçu de YOLO-NAS**. YOLO-NAS utilise des blocs adaptés à la quantification et une quantification sélective pour des performances optimales. Le modèle, une fois converti en version quantifiée INT8, présente une baisse de précision minimale, ce qui constitue une amélioration significative par rapport aux autres modèles. Ces avancées aboutissent à une architecture supérieure offrant des capacités de détection d'objets inégalées et des performances exceptionnelles. + +### Fonctionnalités clés + +- **Bloc de base compatible avec la quantification:** YOLO-NAS introduit un nouveau bloc de base adapté à la quantification, ce qui permet de pallier l'une des principales limitations des précédents modèles YOLO. +- **Entraînement sophistiqué et quantification:** YOLO-NAS utilise des schémas d'entraînement avancés et une quantification après l'entraînement pour améliorer les performances. +- **Optimisation AutoNAC et pré-entraînement:** YOLO-NAS utilise l'optimisation AutoNAC et est pré-entraîné sur des ensembles de données renommés tels que COCO, Objects365 et Roboflow 100. Ce pré-entraînement le rend extrêmement adapté aux tâches de détection d'objets ultérieures dans des environnements de production. + +## Modèles pré-entraînés + +Découvrez la puissance de la détection d'objets de nouvelle génération avec les modèles YOLO-NAS pré-entraînés fournis par Ultralytics. Ces modèles sont conçus pour offrir des performances exceptionnelles en termes de vitesse et de précision. Choisissez parmi une variété d'options adaptées à vos besoins spécifiques : + +| Modèle | mAP | Latence (ms) | +|------------------|-------|--------------| +| YOLO-NAS S | 47.5 | 3.21 | +| YOLO-NAS M | 51.55 | 5.85 | +| YOLO-NAS L | 52.22 | 7.87 | +| YOLO-NAS S INT-8 | 47.03 | 2.36 | +| YOLO-NAS M INT-8 | 51.0 | 3.78 | +| YOLO-NAS L INT-8 | 52.1 | 4.78 | + +Chaque variante de modèle est conçue pour offrir un équilibre entre la précision moyenne (mAP) et la latence, vous permettant ainsi d'optimiser vos tâches de détection d'objets en termes de performance et de vitesse. + +## Exemples d'utilisation + +Ultralytics a rendu les modèles YOLO-NAS faciles à intégrer dans vos applications Python grâce à notre package Python `ultralytics`. Le package fournit une interface conviviale pour simplifier le processus. + +Les exemples suivants montrent comment utiliser les modèles YOLO-NAS avec le package `ultralytics` pour l'inférence et la validation : + +### Exemples d'inférence et de validation + +Dans cet exemple, nous validons YOLO-NAS-s sur l'ensemble de données COCO8. + +!!! Example "Exemple" + + Cet exemple fournit un code simple pour l'inférence et la validation de YOLO-NAS. Pour gérer les résultats de l'inférence, consultez le mode [Predict](../modes/predict.md). Pour utiliser YOLO-NAS avec des modes supplémentaires, consultez [Val](../modes/val.md) et [Export](../modes/export.md). L'entraînement n'est pas pris en charge pour YOLO-NAS avec le package `ultralytics`. + + === "Python" + + Il est possible de passer des modèles pré-entraînés `*.pt` de PyTorch à la classe `NAS()` pour créer une instance de modèle en Python : + + ```python + from ultralytics import NAS + + # Charger un modèle YOLO-NAS-s pré-entraîné sur COCO + model = NAS('yolo_nas_s.pt') + + # Afficher les informations sur le modèle (facultatif) + model.info() + + # Valider le modèle sur l'ensemble de données COCO8 + results = model.val(data='coco8.yaml') + + # Effectuer une inférence avec le modèle YOLO-NAS-s sur l'image 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Des commandes CLI sont disponibles pour exécuter directement les modèles : + + ```bash + # Charger un modèle YOLO-NAS-s pré-entraîné sur COCO et valider ses performances sur l'ensemble de données COCO8 + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # Charger un modèle YOLO-NAS-s pré-entraîné sur COCO et effectuer une inférence sur l'image 'bus.jpg' + yolo predict model=yolo_nas_s.pt source=path/to/bus.jpg + ``` + +## Tâches et modes pris en charge + +Nous proposons trois variantes des modèles YOLO-NAS : Small (s), Medium (m) et Large (l). Chaque variante est conçue pour répondre à des besoins computationnels et de performances différents : + +- **YOLO-NAS-s** : Optimisé pour les environnements où les ressources computationnelles sont limitées mais l'efficacité est primordiale. +- **YOLO-NAS-m** : Offre une approche équilibrée, adaptée à la détection d'objets polyvalente avec une précision accrue. +- **YOLO-NAS-l** : Adapté aux scénarios nécessitant la plus haute précision, où les ressources computationnelles sont moins contraignantes. + +Voici un aperçu détaillé de chaque modèle, comprenant des liens vers leurs poids pré-entraînés, les tâches qu'ils prennent en charge et leur compatibilité avec différents modes opérationnels. + +| Type de modèle | Poids pré-entraînés | Tâches prises en charge | Inférence | Validation | Entraînement | Export | +|----------------|-----------------------------------------------------------------------------------------------|------------------------------------------|-----------|------------|--------------|--------| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## Citations et remerciements + +Si vous utilisez YOLO-NAS dans vos travaux de recherche ou de développement, veuillez citer SuperGradients : + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +Nous exprimons notre gratitude à l'équipe [Super-Gradients](https://github.com/Deci-AI/super-gradients/) de Deci AI pour ses efforts dans la création et la maintenance de cette précieuse ressource pour la communauté de la vision par ordinateur. Nous sommes convaincus que YOLO-NAS, avec son architecture innovante et ses capacités de détection d'objets supérieures, deviendra un outil essentiel pour les développeurs et les chercheurs. + +*keywords: YOLO-NAS, Deci AI, détection d'objets, apprentissage profond, recherche architecturale neuronale, API Python d'Ultralytics, modèle YOLO, SuperGradients, modèles pré-entraînés, bloc de base compatible avec la quantification, schémas d'entraînement avancés, quantification après l'entraînement, optimisation AutoNAC, COCO, Objects365, Roboflow 100* diff --git a/docs/fr/models/yolov3.md b/docs/fr/models/yolov3.md new file mode 100644 index 0000000..ca17b1d --- /dev/null +++ b/docs/fr/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Obtenez un aperçu des modèles YOLOv3, YOLOv3-Ultralytics et YOLOv3u. Apprenez-en davantage sur leurs fonctionnalités clés, leur utilisation et les tâches prises en charge pour la détection d'objets. +keywords: YOLOv3, YOLOv3-Ultralytics, YOLOv3u, Détection d'objets, Inférence, Entraînement, Ultralytics +--- + +# YOLOv3, YOLOv3-Ultralytics et YOLOv3u + +## Aperçu + +Ce document présente un aperçu de trois modèles de détection d'objets étroitement liés, à savoir [YOLOv3](https://pjreddie.com/darknet/yolo/), [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3) et [YOLOv3u](https://github.com/ultralytics/ultralytics). + +1. **YOLOv3**: Il s'agit de la troisième version de l'algorithme de détection d'objets You Only Look Once (YOLO). Initiée par Joseph Redmon, YOLOv3 a amélioré ses prédécesseurs en introduisant des fonctionnalités telles que des prédictions à plusieurs échelles et trois tailles différentes de noyaux de détection. + +2. **YOLOv3-Ultralytics**: Il s'agit de l'implémentation par Ultralytics du modèle YOLOv3. Il reproduit l'architecture d'origine de YOLOv3 et offre des fonctionnalités supplémentaires, telles que la prise en charge de plusieurs modèles pré-entraînés et des options de personnalisation plus faciles. + +3. **YOLOv3u**: Il s'agit d'une version mise à jour de YOLOv3-Ultralytics qui intègre la nouvelle tête de détection sans ancrage et sans objectivité utilisée dans les modèles YOLOv8. YOLOv3u conserve la même architecture de base et de cou de YOLOv3, mais avec la nouvelle tête de détection de YOLOv8. + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## Caractéristiques clés + +- **YOLOv3**: A introduit l'utilisation de trois échelles différentes pour la détection, en tirant parti de trois tailles différentes de noyaux de détection : 13x13, 26x26 et 52x52. Cela a considérablement amélioré la précision de la détection pour les objets de différentes tailles. De plus, YOLOv3 a ajouté des fonctionnalités telles que des prédictions multi-étiquettes pour chaque boîte englobante et un meilleur réseau d'extraction de caractéristiques. + +- **YOLOv3-Ultralytics**: L'implémentation d'Ultralytics de YOLOv3 offre les mêmes performances que le modèle d'origine, mais propose également un support supplémentaire pour plus de modèles pré-entraînés, des méthodes d'entraînement supplémentaires et des options de personnalisation plus faciles. Cela le rend plus polyvalent et convivial pour les applications pratiques. + +- **YOLOv3u**: Ce modèle mis à jour intègre la nouvelle tête de détection sans ancrage et sans objectivité de YOLOv8. En éliminant le besoin de boîtes d'ancrage prédéfinies et de scores d'objectivité, cette conception de tête de détection peut améliorer la capacité du modèle à détecter des objets de différentes tailles et formes. Cela rend YOLOv3u plus robuste et précis pour les tâches de détection d'objets. + +## Tâches et modes pris en charge + +Les modèles de la série YOLOv3, notamment YOLOv3, YOLOv3-Ultralytics et YOLOv3u, sont spécialement conçus pour les tâches de détection d'objets. Ces modèles sont réputés pour leur efficacité dans divers scénarios réels, alliant précision et rapidité. Chaque variante propose des fonctionnalités et des optimisations uniques, les rendant adaptés à une gamme d'applications. + +Les trois modèles prennent en charge un ensemble complet de modes, garantissant ainsi leur polyvalence à différentes étapes du déploiement et du développement du modèle. Ces modes comprennent [Inférence](../modes/predict.md), [Validation](../modes/val.md), [Entraînement](../modes/train.md) et [Export](../modes/export.md), offrant aux utilisateurs un ensemble complet d'outils pour une détection d'objets efficace. + +| Type de modèle | Tâches prises en charge | Inférence | Validation | Entraînement | Export | +|--------------------|------------------------------------------|-----------|------------|--------------|--------| +| YOLOv3 | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Ce tableau offre un aperçu rapide des capacités de chaque variante de YOLOv3, mettant en évidence leur polyvalence et leur pertinence pour diverses tâches et modes opérationnels dans les flux de travail de détection d'objets. + +## Exemples d'utilisation + +Cet exemple présente des exemples simples d'entraînement et d'inférence de YOLOv3. Pour une documentation complète sur ces exemples et d'autres [modes](../modes/index.md), consultez les pages de documentation sur [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) et [Export](../modes/export.md). + +!!! Example "Exemple" + + === "Python" + + Les modèles pré-entraînés PyTorch `*.pt`, ainsi que les fichiers de configuration `*.yaml`, peuvent être transmis à la classe `YOLO()` pour créer une instance de modèle en Python : + + ```python + from ultralytics import YOLO + + # Charger un modèle YOLOv3n pré-entraîné avec COCO + model = YOLO('yolov3n.pt') + + # Afficher les informations sur le modèle (facultatif) + model.info() + + # Entraîner le modèle sur l'ensemble de données d'exemple COCO8 pendant 100 époques + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Exécuter l'inférence avec le modèle YOLOv3n sur l'image 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Des commandes CLI sont disponibles pour exécuter directement les modèles : + + ```bash + # Charger un modèle YOLOv3n pré-entraîné avec COCO et l'entraîner sur l'ensemble de données d'exemple COCO8 pendant 100 époques + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Charger un modèle YOLOv3n pré-entraîné avec COCO et exécuter l'inférence sur l'image 'bus.jpg' + yolo predict model=yolov3n.pt source=path/to/bus.jpg + ``` + +## Citations et remerciements + +Si vous utilisez YOLOv3 dans le cadre de vos recherches, veuillez citer les articles originaux sur YOLO et le référentiel YOLOv3 d'Ultralytics : + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +Merci à Joseph Redmon et Ali Farhadi pour le développement du YOLOv3 original. diff --git a/docs/fr/models/yolov4.md b/docs/fr/models/yolov4.md new file mode 100644 index 0000000..1c44964 --- /dev/null +++ b/docs/fr/models/yolov4.md @@ -0,0 +1,71 @@ +--- +comments: true +description: Découvrez notre guide détaillé sur YOLOv4, un détecteur d'objets en temps réel de pointe. Comprenez ses points forts architecturaux, ses fonctionnalités innovantes et des exemples d'application. +keywords: ultralytics, YOLOv4, détection d'objets, réseau neuronal, détection en temps réel, détecteur d'objets, apprentissage automatique +--- + +# YOLOv4: Détection d'Objets Rapide et Précise + +Bienvenue sur la page de documentation d'Ultralytics pour YOLOv4, un détecteur d'objets en temps réel de pointe lancé en 2020 par Alexey Bochkovskiy sur [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). YOLOv4 est conçu pour offrir un équilibre optimal entre vitesse et précision, en en faisant un excellent choix pour de nombreuses applications. + +![Schéma d'architecture de YOLOv4](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**Schéma d'architecture de YOLOv4**. Présentant la conception détaillée du réseau de YOLOv4, comprenant les composants backbone, neck et head, ainsi que leurs couches interconnectées pour une détection d'objets en temps réel optimale. + +## Introduction + +YOLOv4 signifie You Only Look Once version 4. Il s'agit d'un modèle de détection d'objets en temps réel développé pour remédier aux limitations des versions précédentes de YOLO comme [YOLOv3](yolov3.md) et d'autres modèles de détection d'objets. Contrairement à d'autres détecteurs d'objets basés sur des réseaux neuronaux convolutifs (CNN), YOLOv4 n'est pas seulement applicable aux systèmes de recommandation, mais aussi à la gestion de processus autonomes et à la réduction de l'entrée humaine. Son utilisation sur des unités de traitement graphique (GPU) conventionnelles permet une utilisation massive à un prix abordable, et il est conçu pour fonctionner en temps réel sur un GPU conventionnel tout en ne nécessitant qu'un seul de ces GPU pour l'entraînement. + +## Architecture + +YOLOv4 utilise plusieurs fonctionnalités innovantes qui travaillent ensemble pour optimiser ses performances. Celles-ci incluent les connexions résiduelles pondérées (WRC), les connexions partielles à travers les étapes (CSP), la normalisation mini-batch traversée (CmBN), l'entraînement auto-antagoniste (SAT), l'activation Mish, l'augmentation des données en mosaïque, la régularisation DropBlock et la perte CIoU. Ces fonctionnalités sont combinées pour obtenir des résultats de pointe. + +Un détecteur d'objets typique est composé de plusieurs parties, notamment l'entrée, le backbone, le neck et le head. Le backbone de YOLOv4 est pré-entraîné sur ImageNet et est utilisé pour prédire les classes et les boîtes englobantes des objets. Le backbone peut provenir de plusieurs modèles, notamment VGG, ResNet, ResNeXt ou DenseNet. La partie "neck" du détecteur est utilisée pour collecter des cartes de caractéristiques à partir de différentes étapes et comprend généralement plusieurs chemins "bottom-up" et plusieurs chemins "top-down". La partie "head" est ce qui est utilisé pour faire les détections et classifications finales des objets. + +## Ensemble de Bonus + +YOLOv4 utilise également des méthodes appelées "ensemble de bonus", qui sont des techniques permettant d'améliorer la précision du modèle lors de l'entraînement sans augmenter le coût de l'inférence. L'augmentation de données est une technique commune de l'ensemble de bonus utilisée dans la détection d'objets, qui augmente la variabilité des images d'entrée pour améliorer la robustesse du modèle. Quelques exemples d'augmentation de données incluent les distorsions photométriques (ajustement de la luminosité, du contraste, de la teinte, de la saturation et du bruit d'une image) et les distorsions géométriques (ajout d'échelle aléatoire, de recadrage, de retournement et de rotation). Ces techniques aident le modèle à mieux généraliser à différents types d'images. + +## Fonctionnalités et Performances + +YOLOv4 est conçu pour une vitesse et une précision optimales dans la détection d'objets. L'architecture de YOLOv4 comprend CSPDarknet53 en tant que backbone, PANet en tant que neck et YOLOv3 en tant que detection head. Ce design permet à YOLOv4 de réaliser une détection d'objets à une vitesse impressionnante, ce qui le rend adapté aux applications en temps réel. YOLOv4 excelle également en précision, atteignant des résultats de pointe dans les benchmarks de détection d'objets. + +## Exemples d'Utilisation + +Au moment de la rédaction de ce document, Ultralytics ne prend pas en charge les modèles YOLOv4. Par conséquent, les utilisateurs intéressés par l'utilisation de YOLOv4 devront consulter directement le référentiel GitHub de YOLOv4 pour les instructions d'installation et d'utilisation. + +Voici un bref aperçu des étapes typiques que vous pourriez suivre pour utiliser YOLOv4 : + +1. Rendez-vous sur le référentiel GitHub de YOLOv4 : [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). + +2. Suivez les instructions fournies dans le fichier README pour l'installation. Cela implique généralement de cloner le référentiel, d'installer les dépendances nécessaires et de configurer les variables d'environnement nécessaires. + +3. Une fois l'installation terminée, vous pouvez entraîner et utiliser le modèle selon les instructions d'utilisation fournies dans le référentiel. Cela implique généralement la préparation de votre ensemble de données, la configuration des paramètres du modèle, l'entraînement du modèle, puis l'utilisation du modèle entraîné pour effectuer la détection d'objets. + +Veuillez noter que les étapes spécifiques peuvent varier en fonction de votre cas d'utilisation spécifique et de l'état actuel du référentiel YOLOv4. Il est donc fortement recommandé de se référer directement aux instructions fournies dans le référentiel GitHub de YOLOv4. + +Nous regrettons tout inconvénient que cela pourrait causer et nous nous efforcerons de mettre à jour ce document avec des exemples d'utilisation pour Ultralytics une fois que le support de YOLOv4 sera implémenté. + +## Conclusion + +YOLOv4 est un modèle de détection d'objets puissant et efficace qui concilie vitesse et précision. Son utilisation de fonctionnalités uniques et de techniques "ensemble de bonus" lors de l'entraînement lui permet de réaliser d'excellentes performances dans les tâches de détection d'objets en temps réel. YOLOv4 peut être entraîné et utilisé par n'importe qui disposant d'un GPU conventionnel, le rendant accessible et pratique pour un large éventail d'applications. + +## Citations et Remerciements + +Nous tenons à remercier les auteurs de YOLOv4 pour leurs contributions importantes dans le domaine de la détection d'objets en temps réel : + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +L'article original de YOLOv4 peut être consulté sur [arXiv](https://arxiv.org/pdf/2004.10934.pdf). Les auteurs ont rendu leur travail accessible au public, et le code source peut être consulté sur [GitHub](https://github.com/AlexeyAB/darknet). Nous apprécions leurs efforts pour faire progresser le domaine et rendre leur travail accessible à la communauté élargie. diff --git a/docs/fr/models/yolov5.md b/docs/fr/models/yolov5.md new file mode 100644 index 0000000..885c70e --- /dev/null +++ b/docs/fr/models/yolov5.md @@ -0,0 +1,113 @@ +--- +comments: true +description: Découvrez YOLOv5u, une version améliorée du modèle YOLOv5 offrant un meilleur compromis entre précision et vitesse, ainsi que de nombreux modèles pré-entraînés pour diverses tâches de détection d'objets. +keywords: YOLOv5u, détection d'objets, modèles pré-entraînés, Ultralytics, inférence, validation, YOLOv5, YOLOv8, sans ancre, sans objectivité, applications temps réel, apprentissage automatique +--- + +# YOLOv5 + +## Présentation + +YOLOv5u représente une avancée dans les méthodologies de détection d'objets. Originaire de l'architecture fondamentale du modèle [YOLOv5](https://github.com/ultralytics/yolov5) développé par Ultralytics, YOLOv5u intègre la division sans ancre et sans objectivité, une fonctionnalité précédemment introduite dans les modèles [YOLOv8](yolov8.md). Cette adaptation affine l'architecture du modèle, ce qui conduit à un meilleur compromis entre précision et vitesse dans les tâches de détection d'objets. Compte tenu des résultats empiriques et des fonctionnalités dérivées, YOLOv5u offre une alternative efficace pour ceux qui recherchent des solutions robustes à la fois pour la recherche et les applications pratiques. + +![YOLOv5 Ultralytics](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## Principales fonctionnalités + +- **Division sans ancre Ultralytics :** Les modèles de détection d'objets traditionnels reposent sur des boîtes d'ancrage prédéfinies pour prédire les emplacements des objets. Cependant, YOLOv5u modernise cette approche. En adoptant une division sans ancre Ultralytics, il garantit un mécanisme de détection plus flexible et adaptatif, ce qui améliore les performances dans divers scénarios. + +- **Bon compromis entre précision et vitesse optimisée :** La vitesse et la précision sont souvent opposées. Mais YOLOv5u remet en question ce compromis. Il offre un équilibre calibré, garantissant des détections en temps réel sans compromettre la précision. Cette fonctionnalité est particulièrement précieuse pour les applications qui demandent des réponses rapides, comme les véhicules autonomes, la robotique et l'analyse vidéo en temps réel. + +- **Variété de modèles pré-entraînés :** Comprendre que différentes tâches nécessitent différents ensembles d'outils, YOLOv5u propose une pléthore de modèles pré-entraînés. Que vous vous concentriez sur l'inférence, la validation ou l'entraînement, un modèle sur mesure vous attend. Cette variété garantit que vous n'utilisez pas une solution universelle, mais un modèle spécifiquement ajusté à votre défi unique. + +## Tâches et modes pris en charge + +Les modèles YOLOv5u, avec divers poids pré-entraînés, excellent dans les tâches de [détection d'objets](../tasks/detect.md). Ils prennent en charge une gamme complète de modes, ce qui les rend adaptés à diverses applications, du développement au déploiement. + +| Type de modèle | Poids pré-entraînés | Tâche | Inférence | Validation | Entraînement | Export | +|----------------|-----------------------------------------------------------------------------------------------------------------------------|------------------------------------------|-----------|------------|--------------|--------| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Ce tableau fournit un aperçu détaillé des variantes de modèles YOLOv5u, mettant en évidence leur applicabilité dans les tâches de détection d'objets et leur prise en charge de divers modes opérationnels tels que [Inférence](../modes/predict.md), [Validation](../modes/val.md), [Entraînement](../modes/train.md) et [Exportation](../modes/export.md). Cette prise en charge complète garantit que les utilisateurs peuvent exploiter pleinement les capacités des modèles YOLOv5u dans un large éventail de scénarios de détection d'objets. + +## Métriques de performance + +!!! Performance + + === "Détection" + + Consultez la [documentation sur la détection](https://docs.ultralytics.com/tasks/detect/) pour des exemples d'utilisation avec ces modèles formés sur [COCO](https://docs.ultralytics.com/datasets/detect/coco/), qui comprennent 80 classes pré-entraînées. + + | Modèle | YAML | taille
(pixels) | mAPval
50-95 | Vitesse
CPU ONNX
(ms) | Vitesse
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | + |-------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34,3 | 73,6 | 1,06 | 2,6 | 7,7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43,0 | 120,7 | 1,27 | 9,1 | 24,0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49,0 | 233,9 | 1,86 | 25,1 | 64,2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52,2 | 408,4 | 2,50 | 53,2 | 135,0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53,2 | 763,2 | 3,81 | 97,2 | 246,4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 42,1 | 211,0 | 1,83 | 4,3 | 7,8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 48,6 | 422,6 | 2,34 | 15,3 | 24,6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 53,6 | 810,9 | 4,36 | 41,2 | 65,7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 55,7 | 1470,9 | 5,47 | 86,1 | 137,4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 56,8 | 2436,5 | 8,98 | 155,4 | 250,7 | + +## Exemples d'utilisation + +Cet exemple présente des exemples simples d'entraînement et d'inférence YOLOv5. Pour une documentation complète sur ces exemples et d'autres [modes](../modes/index.md), consultez les pages de documentation [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) et [Export](../modes/export.md). + +!!! Example "Exemple" + + === "Python" + + Les modèles PyTorch pré-entraînés `*.pt` ainsi que les fichiers de configuration `*.yaml` peuvent être passés à la classe `YOLO()` pour créer une instance de modèle en python : + + ```python + from ultralytics import YOLO + + # Charger un modèle YOLOv5n pré-entraîné sur COCO + model = YOLO('yolov5n.pt') + + # Afficher les informations sur le modèle (facultatif) + model.info() + + # Former le modèle sur l'ensemble de données d'exemple COCO8 pendant 100 époques + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Exécuter l'inférence avec le modèle YOLOv5n sur l'image 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Des commandes CLI sont disponibles pour exécuter directement les modèles : + + ```bash + # Charger un modèle YOLOv5n pré-entraîné sur COCO et l'entraîner sur l'ensemble de données d'exemple COCO8 pendant 100 époques + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Charger un modèle YOLOv5n pré-entraîné sur COCO et exécuter l'inférence sur l'image 'bus.jpg' + yolo predict model=yolov5n.pt source=path/to/bus.jpg + ``` + +## Citations et remerciements + +Si vous utilisez YOLOv5 ou YOLOv5u dans vos recherches, veuillez citer le référentiel Ultralytics YOLOv5 comme suit : + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +Veuillez noter que les modèles YOLOv5 sont fournis sous les licences [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) et [Enterprise](https://ultralytics.com/license). diff --git a/docs/fr/models/yolov6.md b/docs/fr/models/yolov6.md new file mode 100644 index 0000000..3d4cc36 --- /dev/null +++ b/docs/fr/models/yolov6.md @@ -0,0 +1,107 @@ +--- +comments: true +description: Explorez Meituan YOLOv6, un modèle de détection d'objets à la pointe de la technologie offrant un équilibre entre vitesse et précision. Plongez-vous dans les fonctionnalités, les modèles pré-entraînés et l'utilisation de Python. +keywords: Meituan YOLOv6, détection d'objets, Ultralytics, YOLOv6 docs, Bi-directional Concatenation, Anchor-Aided Training, modèles pré-entraînés, applications en temps réel +--- + +# Meituan YOLOv6 + +## Vue d'ensemble + +[Meituan](https://about.meituan.com/) YOLOv6 est un détecteur d'objets de pointe qui offre un équilibre remarquable entre vitesse et précision, ce qui en fait un choix populaire pour les applications en temps réel. Ce modèle introduit plusieurs améliorations remarquables sur son architecture et son schéma d'entraînement, notamment la mise en œuvre d'un module de concaténation bidirectionnelle (BiC), d'une stratégie d'entraînement assistée par ancrage (AAT) et d'une conception améliorée de l'épine dorsale et du cou pour une précision de pointe sur l'ensemble de données COCO. + +![Meituan YOLOv6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![Exemple d'image du modèle](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**Aperçu de YOLOv6.** Diagramme de l'architecture du modèle montrant les composants du réseau redessinés et les stratégies d'entraînement qui ont conduit à d'importantes améliorations des performances. (a) L'épine dorsale de YOLOv6 (N et S sont indiqués). Notez que pour M/L, RepBlocks est remplacé par CSPStackRep. (b) La structure d'un module BiC. (c) Un bloc SimCSPSPPF. ([source](https://arxiv.org/pdf/2301.05586.pdf)). + +### Caractéristiques principales + +- **Module de concaténation bidirectionnelle (BiC) :** YOLOv6 introduit un module BiC dans le cou du détecteur, améliorant les signaux de localisation et offrant des gains de performance avec une dégradation de vitesse négligeable. +- **Stratégie d'entraînement assistée par ancrage (AAT) :** Ce modèle propose AAT pour profiter des avantages des paradigmes basés sur ancrage et sans ancrage sans compromettre l'efficacité de l'inférence. +- **Conception améliorée de l'épine dorsale et du cou :** En approfondissant YOLOv6 pour inclure une autre étape dans l'épine dorsale et le cou, ce modèle atteint des performances de pointe sur l'ensemble de données COCO avec une entrée haute résolution. +- **Stratégie d'autodistillation :** Une nouvelle stratégie d'autodistillation est mise en œuvre pour améliorer les performances des modèles plus petits de YOLOv6, en améliorant la branche de régression auxiliaire pendant l'entraînement et en la supprimant lors de l'inférence afin d'éviter une baisse notable de la vitesse. + +## Métriques de performance + +YOLOv6 propose différents modèles pré-entraînés avec différentes échelles : + +- YOLOv6-N : 37,5 % de précision sur COCO val2017 à 1187 FPS avec le GPU NVIDIA Tesla T4. +- YOLOv6-S : 45,0 % de précision à 484 FPS. +- YOLOv6-M : 50,0 % de précision à 226 FPS. +- YOLOv6-L : 52,8 % de précision à 116 FPS. +- YOLOv6-L6 : Précision de pointe en temps réel. + +YOLOv6 propose également des modèles quantifiés pour différentes précisions et des modèles optimisés pour les plates-formes mobiles. + +## Exemples d'utilisation + +Cet exemple fournit des exemples simples d'entraînement et d'inférence de YOLOv6. Pour une documentation complète sur ces exemples et d'autres [modes](../modes/index.md), consultez les pages de documentation [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) et [Export](../modes/export.md). + +!!! Example "Exemple" + + === "Python" + + Les modèles pré-entraînés PyTorch `*.pt`, ainsi que les fichiers de configuration `*.yaml`, peuvent être utilisés pour créer une instance de modèle en python en utilisant la classe `YOLO()` : + + ```python + from ultralytics import YOLO + + # Créer un modèle YOLOv6n à partir de zéro + model = YOLO('yolov6n.yaml') + + # Afficher les informations sur le modèle (facultatif) + model.info() + + # Entraîner le modèle sur l'ensemble de données d'exemple COCO8 pendant 100 epochs + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Effectuer une inférence avec le modèle YOLOv6n sur l'image 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Des commandes CLI sont disponibles pour exécuter directement les modèles : + + ```bash + # Créer un modèle YOLOv6n à partir de zéro et l'entraîner sur l'ensemble de données d'exemple COCO8 pendant 100 epochs + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # Créer un modèle YOLOv6n à partir de zéro et effectuer une inférence sur l'image 'bus.jpg' + yolo predict model=yolov6n.yaml source=path/to/bus.jpg + ``` + +## Tâches et modes pris en charge + +La série YOLOv6 propose une gamme de modèles, chacun optimisé pour la [détection d'objets](../tasks/detect.md) haute performance. Ces modèles répondent à des besoins computationnels et des exigences de précision variables, ce qui les rend polyvalents pour une large gamme d'applications. + +| Type de modèle | Modèles pré-entraînés | Tâches prises en charge | Inférence | Validation | Entraînement | Export | +|----------------|-----------------------|------------------------------------------|-----------|------------|--------------|--------| +| YOLOv6-N | `yolov6-n.pt` | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [Détection d'objets](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Ce tableau fournit un aperçu détaillé des variantes du modèle YOLOv6, mettant en évidence leurs capacités dans les tâches de détection d'objets et leur compatibilité avec différents modes opérationnels tels que l'[Inférence](../modes/predict.md), la [Validation](../modes/val.md), l'[Entraînement](../modes/train.md) et l'[Export](../modes/export.md). Cette prise en charge complète permet aux utilisateurs de tirer pleinement parti des capacités des modèles YOLOv6 dans un large éventail de scénarios de détection d'objets. + +## Citations et remerciements + +Nous tenons à remercier les auteurs pour leur contribution importante dans le domaine de la détection d'objets en temps réel : + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{li2023yolov6, + title={YOLOv6 v3.0: A Full-Scale Reloading}, + author={Chuyi Li and Lulu Li and Yifei Geng and Hongliang Jiang and Meng Cheng and Bo Zhang and Zaidan Ke and Xiaoming Xu and Xiangxiang Chu}, + year={2023}, + eprint={2301.05586}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + + Le document original de YOLOv6 peut être consulté sur [arXiv](https://arxiv.org/abs/2301.05586). Les auteurs ont rendu leur travail accessible au public, et le code source peut être consulté sur [GitHub](https://github.com/meituan/YOLOv6). Nous apprécions leurs efforts pour faire avancer le domaine et rendre leur travail accessible à la communauté plus large. diff --git a/docs/fr/models/yolov7.md b/docs/fr/models/yolov7.md new file mode 100644 index 0000000..b2ec749 --- /dev/null +++ b/docs/fr/models/yolov7.md @@ -0,0 +1,66 @@ +--- +comments: true +description: Découvrez le YOLOv7, un détecteur d'objets en temps réel. Comprenez sa vitesse supérieure, son impressionnante précision et son accent unique sur l'optimisation bag-of-freebies entraînable. +keywords: YOLOv7, détecteur d'objets en temps réel, état de l'art, Ultralytics, jeu de données MS COCO, ré-paramétrisation du modèle, affectation des étiquettes dynamiques, mise à l'échelle étendue, mise à l'échelle composée +--- + +# YOLOv7 : Bag-of-Freebies Entraînable + +YOLOv7 est un détecteur d'objets en temps réel à la pointe de la technologie qui surpasse tous les détecteurs d'objets connus en termes de vitesse et de précision, dans une plage de 5 FPS à 160 FPS. Il présente la précision la plus élevée (56,8% AP) parmi tous les détecteurs d'objets en temps réel connus avec un FPS de 30 ou plus sur GPU V100. De plus, YOLOv7 surpasse les autres détecteurs d'objets tels que YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5 et bien d'autres en termes de vitesse et de précision. Le modèle est entraîné à partir de zéro sur le jeu de données MS COCO, sans utiliser d'autres jeux de données ou de poids pré-entraînés. Le code source de YOLOv7 est disponible sur GitHub. + +![Comparaison de YOLOv7 avec les détecteurs d'objets SOTA](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) +**Comparaison des détecteurs d'objets de pointe. +** À partir des résultats du Tableau 2, nous savons que la méthode proposée présente le meilleur compromis vitesse-précision dans l'ensemble. Si nous comparons YOLOv7-tiny-SiLU avec YOLOv5-N (r6.1), notre méthode est 127 FPS plus rapide et plus précise de 10,7% en AP. De plus, YOLOv7 atteint 51,4% d'AP à une fréquence d'images de 161 FPS, tandis que PPYOLOE-L avec la même AP atteint seulement 78 FPS. En termes d'utilisation des paramètres, YOLOv7 consomme 41% de moins que PPYOLOE-L. Si nous comparons YOLOv7-X avec une vitesse d'inférence de 114 FPS à YOLOv5-L (r6.1) avec une vitesse d'inférence de 99 FPS, YOLOv7-X peut améliorer l'AP de 3,9%. Si YOLOv7-X est comparé à YOLOv5-X (r6.1) de taille similaire, la vitesse d'inférence de YOLOv7-X est de 31 FPS plus rapide. De plus, en termes de nombre de paramètres et de calculs, YOLOv7-X réduit de 22% les paramètres et de 8% les calculs par rapport à YOLOv5-X (r6.1), mais améliore l'AP de 2,2% ([Source](https://arxiv.org/pdf/2207.02696.pdf)). + +## Aperçu + +La détection d'objets en temps réel est un composant important de nombreux systèmes de vision par ordinateur, notamment le suivi multi-objets, la conduite autonome, la robotique et l'analyse d'images médicales. Ces dernières années, le développement de la détection d'objets en temps réel s'est concentré sur la conception d'architectures efficaces et l'amélioration de la vitesse d'inférence des CPU, des GPU et des unités de traitement neuronal (NPU) dans différentes configurations. YOLOv7 prend en charge les GPU mobiles et les appareils GPU, de l'edge au cloud. + +Contrairement aux détecteurs d'objets en temps réel traditionnels qui se concentrent sur l'optimisation de l'architecture, YOLOv7 introduit une approche axée sur l'optimisation du processus d'entraînement. Cela comprend des modules et des méthodes d'optimisation conçus pour améliorer la précision de la détection d'objets sans augmenter le coût de l'inférence, un concept connu sous le nom de "bag-of-freebies entraînable". + +## Fonctionnalités Principales + +YOLOv7 propose plusieurs fonctionnalités principales : + +1. **Ré-paramétrisation du Modèle** : YOLOv7 propose un modèle re-paramétré planifié, qui est une stratégie applicable aux couches de différents réseaux avec le concept de propagation des gradients. + +2. **Affectation Dynamique des Étiquettes** : La formation du modèle avec des couches de sortie multiples présente un nouveau problème : "Comment attribuer des cibles dynamiques aux sorties des différentes branches ?" Pour résoudre ce problème, YOLOv7 introduit une nouvelle méthode d'affectation des étiquettes appelée affectation des étiquettes guidée en cascade de grossières à fines. + +3. **Mise à l'Échelle Étendue et Composée** : YOLOv7 propose des méthodes de "mise à l'échelle étendue" et de "mise à l'échelle composée" pour le détecteur d'objets en temps réel, qui permettent d'utiliser efficacement les paramètres et les calculs. + +4. **Efficacité** : La méthode proposée par YOLOv7 permet de réduire efficacement environ 40% des paramètres et 50% des calculs du détecteur d'objets en temps réel de pointe, tout en offrant une vitesse d'inférence plus rapide et une plus grande précision de détection. + +## Exemples d'Utilisation + +Au moment de la rédaction de cet article, Ultralytics ne prend pas en charge les modèles YOLOv7. Par conséquent, tout utilisateur intéressé par l'utilisation de YOLOv7 devra se référer directement au dépôt GitHub de YOLOv7 pour obtenir les instructions d'installation et d'utilisation. + +Voici un bref aperçu des étapes typiques que vous pourriez suivre pour utiliser YOLOv7 : + +1. Rendez-vous sur le dépôt GitHub de YOLOv7 : [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7). + +2. Suivez les instructions fournies dans le fichier README pour l'installation. Cela implique généralement de cloner le dépôt, d'installer les dépendances nécessaires et de configurer les variables d'environnement nécessaires. + +3. Une fois l'installation terminée, vous pouvez entraîner et utiliser le modèle selon les instructions d'utilisation fournies dans le dépôt. Cela implique généralement la préparation de votre ensemble de données, la configuration des paramètres du modèle, l'entraînement du modèle, puis l'utilisation du modèle entraîné pour effectuer la détection d'objets. + +Veuillez noter que les étapes spécifiques peuvent varier en fonction de votre cas d'utilisation spécifique et de l'état actuel du dépôt YOLOv7. Par conséquent, il est fortement recommandé de vous reporter directement aux instructions fournies dans le dépôt GitHub de YOLOv7. + +Nous nous excusons pour tout inconvénient que cela pourrait causer et nous nous efforcerons de mettre à jour ce document avec des exemples d'utilisation pour Ultralytics une fois la prise en charge de YOLOv7 mise en place. + +## Citations et Remerciements + +Nous tenons à remercier les auteurs de YOLOv7 pour leurs contributions significatives dans le domaine de la détection d'objets en temps réel : + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +Le document original de YOLOv7 peut être consulté sur [arXiv](https://arxiv.org/pdf/2207.02696.pdf). Les auteurs ont rendu leur travail accessible au public, et le code source peut être consulté sur [GitHub](https://github.com/WongKinYiu/yolov7). Nous apprécions leurs efforts pour faire avancer le domaine et rendre leur travail accessible à la communauté élargie. diff --git a/docs/fr/models/yolov8.md b/docs/fr/models/yolov8.md new file mode 100644 index 0000000..f07c40a --- /dev/null +++ b/docs/fr/models/yolov8.md @@ -0,0 +1,162 @@ +--- +comments: true +description: Explorez les fonctionnalités passionnantes de YOLOv8, la dernière version de notre détecteur d'objets en temps réel ! Découvrez comment les architectures avancées, les modèles pré-entraînés et un équilibre optimal entre précision et vitesse font de YOLOv8 le choix parfait pour vos tâches de détection d'objets. +keywords: YOLOv8, Ultralytics, détecteur d'objets en temps réel, modèles pré-entraînés, documentation, détection d'objets, série YOLO, architectures avancées, précision, vitesse +--- + +# YOLOv8 + +## Aperçu + +YOLOv8 est la dernière itération de la série YOLO de détecteurs d'objets en temps réel, offrant des performances de pointe en termes de précision et de vitesse. S'appuyant sur les avancées des versions précédentes de YOLO, YOLOv8 introduit de nouvelles fonctionnalités et optimisations qui en font un choix idéal pour diverses tâches de détection d'objets dans une large gamme d'applications. + +![Ultralytics YOLOv8](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## Principales fonctionnalités + +- **Architectures avancées pour le tronc et le cou:** YOLOv8 utilise des architectures de tronc et de cou de pointe, ce qui permet une meilleure extraction des caractéristiques et des performances de détection d'objets améliorées. +- **Tête Ultralytics sans ancre:** YOLOv8 adopte une tête Ultralytics sans ancre, ce qui contribue à une meilleure précision et à un processus de détection plus efficace par rapport aux approches basées sur les ancres. +- **Équilibre optimal entre précision et vitesse optimisé:** En mettant l'accent sur le maintien d'un équilibre optimal entre précision et vitesse, YOLOv8 convient aux tâches de détection d'objets en temps réel dans divers domaines d'application. +- **Variété de modèles pré-entraînés:** YOLOv8 propose une gamme de modèles pré-entraînés pour répondre à différentes tâches et exigences de performance, ce qui facilite la recherche du modèle adapté à votre cas d'utilisation spécifique. + +## Tâches et modes pris en charge + +La série YOLOv8 propose une gamme diversifiée de modèles, chacun spécialisé dans des tâches spécifiques en vision par ordinateur. Ces modèles sont conçus pour répondre à diverses exigences, de la détection d'objets à des tâches plus complexes telles que la segmentation d'instance, la détection de pose/points clés et la classification. + +Chaque variante de la série YOLOv8 est optimisée pour sa tâche respective, garantissant des performances et une précision élevées. De plus, ces modèles sont compatibles avec divers modes opérationnels, notamment l'[Inférence](../modes/predict.md), la [Validation](../modes/val.md), l'[Entraînement](../modes/train.md) et l'[Exportation](../modes/export.md), ce qui facilite leur utilisation à différentes étapes du déploiement et du développement. + +| Modèle | Noms de fichiers | Tâche | Inférence | Validation | Entraînement | Exportation | +|-------------|----------------------------------------------------------------------------------------------------------------|------------------------------------------------|-----------|------------|--------------|-------------| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [Détection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [Segmentation d'instance](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [Pose/Points clés](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [Classification](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +Ce tableau donne un aperçu des variantes des modèles YOLOv8, mettant en évidence leur applicabilité dans des tâches spécifiques et leur compatibilité avec différents modes opérationnels tels que l'inférence, la validation, l'entraînement et l'exportation. Il met en avant la polyvalence et la robustesse de la série YOLOv8, ce qui les rend adaptés à une variété d'applications en vision par ordinateur. + +## Métriques de performance + +!!! Performance + + === "Détection (COCO)" + + Consultez la [doc de détection](https://docs.ultralytics.com/tasks/detect/) pour des exemples d'utilisation avec ces modèles entraînés sur [COCO](https://docs.ultralytics.com/datasets/detect/coco/), qui comprennent 80 classes pré-entrainées. + + | Modèle | taille
(pixels) | mAPval
50-95 | Vitesse
CPU ONNX
(ms) | Vitesse
A100 TensorRT
(ms) | paramètres
(M) | FLOPs
(B) | + | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------- | ------------------------------------ | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37,3 | 80,4 | 0,99 | 3,2 | 8,7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44,9 | 128,4 | 1,20 | 11,2 | 28,6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50,2 | 234,7 | 1,83 | 25,9 | 78,9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52,9 | 375,2 | 2,39 | 43,7 | 165,2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53,9 | 479,1 | 3,53 | 68,2 | 257,8 | + + === "Détection (Open Images V7)" + + Consultez la [doc de détection](https://docs.ultralytics.com/tasks/detect/) pour des exemples d'utilisation avec ces modèles entraînés sur [Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/), qui comprennent 600 classes pré-entrainées. + + | Modèle | taille
(pixels) | mAPval
50-95 | Vitesse
CPU ONNX
(ms) | Vitesse
A100 TensorRT
(ms) | paramètres
(M) | FLOPs
(B) | + | ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------- | ------------------------------------ | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18,4 | 142,4 | 1,21 | 3,5 | 10,5 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27,7 | 183,1 | 1,40 | 11,4 | 29,7 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33,6 | 408,5 | 2,26 | 26,2 | 80,6 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34,9 | 596,9 | 2,43 | 44,1 | 167,4 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36,3 | 860,6 | 3,56 | 68,7 | 260,6 | + + === "Segmentation (COCO)" + + Consultez la [doc de segmentation](https://docs.ultralytics.com/tasks/segment/) pour des exemples d'utilisation avec ces modèles entraînés sur [COCO](https://docs.ultralytics.com/datasets/segment/coco/), qui comprennent 80 classes pré-entrainées. + + | Modèle | taille
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Vitesse
CPU ONNX
(ms) | Vitesse
A100 TensorRT
(ms) | paramètres
(M) | FLOPs
(B) | + | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------- | ------------------------------------ | ------------------ | ----------------- | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36,7 | 30,5 | 96,1 | 1,21 | 3,4 | 12,6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44,6 | 36,8 | 155,7 | 1,47 | 11,8 | 42,6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49,9 | 40,8 | 317,0 | 2,18 | 27,3 | 110,2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52,3 | 42,6 | 572,4 | 2,79 | 46,0 | 220,5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53,4 | 43,4 | 712,1 | 4,02 | 71,8 | 344,1 | + + === "Classification (ImageNet)" + + Consultez la [doc de classification](https://docs.ultralytics.com/tasks/classify/) pour des exemples d'utilisation avec ces modèles entraînés sur [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/), qui comprennent 1000 classes pré-entrainées. + + | Modèle | taille
(pixels) | acc
top1 | acc
top5 | Vitesse
CPU ONNX
(ms) | Vitesse
A100 TensorRT
(ms) | paramètres
(M) | FLOPs
(B) at 640 | + | -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------- | ------------------------------------ | ------------------ | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66,6 | 87,0 | 12,9 | 0,31 | 2,7 | 4,3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72,3 | 91,1 | 23,4 | 0,35 | 6,4 | 13,5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76,4 | 93,2 | 85,4 | 0,62 | 17,0 | 42,7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78,0 | 94,1 | 163,0 | 0,87 | 37,5 | 99,7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78,4 | 94,3 | 232,0 | 1,01 | 57,4 | 154,8 | + + === "Pose (COCO)" + + Consultez la [doc d'estimation de pose](https://docs.ultralytics.com/tasks/segment/) pour des exemples d'utilisation avec ces modèles entraînés sur [COCO](https://docs.ultralytics.com/datasets/pose/coco/), qui comprennent 1 classe pré-entrainée, 'person'. + + | Modèle | taille
(pixels) | mAPpose
50-95 | mAPpose
50 | Vitesse
CPU ONNX
(ms) | Vitesse
A100 TensorRT
(ms) | paramètres
(M) | FLOPs
(B) | + | ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------- | ------------------------------------ | ------------------ | ----------------- | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50,4 | 80,1 | 131,8 | 1,18 | 3,3 | 9,2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60,0 | 86,2 | 233,2 | 1,42 | 11,6 | 30,2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65,0 | 88,8 | 456,3 | 2,00 | 26,4 | 81,0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67,6 | 90,0 | 784,5 | 2,59 | 44,4 | 168,6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69,2 | 90,2 | 1607,1 | 3,73 | 69,4 | 263,2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71,6 | 91,2 | 4088,7 | 10,04 | 99,1 | 1066,4 | + +## Exemples d'utilisation + +Cet exemple fournit des exemples simples d'entraînement et d'inférence avec YOLOv8. Pour une documentation complète sur ces exemples et d'autres [modes](../modes/index.md), consultez les pages de documentation [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) et [Export](../modes/export.md). + +Veuillez noter que l'exemple ci-dessous concerne les modèles de détection YOLOv8. Pour d'autres tâches prises en charge, consultez la documentation de [Segmentation](../tasks/segment.md), [Classification](../tasks/classify.md) et [Pose/Points clés](../tasks/pose.md). + +!!! Example "Exemple" + + === "Python" + + Les modèles pré-entraînés PyTorch `*.pt` ainsi que les fichiers de configuration `*.yaml` peuvent être utilisés pour créer une instance de modèle en python en passant aux classes `YOLO()` : + + ```python + from ultralytics import YOLO + + # Charger un modèle YOLOv8n pré-entraîné sur COCO + model = YOLO('yolov8n.pt') + + # Afficher les informations du modèle (facultatif) + model.info() + + # Entraîner le modèle sur l'exemple de jeu de données COCO8 pendant 100 époques + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Effectuer une inférence avec le modèle YOLOv8n sur l'image 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Des commandes CLI sont disponibles pour exécuter directement les modèles : + + ```bash + # Charger un modèle YOLOv8n pré-entraîné sur COCO et l'entraîner sur l'exemple de jeu de données COCO8 pendant 100 époques + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Charger un modèle YOLOv8n pré-entraîné sur COCO et effectuer une inférence sur l'image 'bus.jpg' + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## Citations et remerciements + +Si vous utilisez le modèle YOLOv8 ou tout autre logiciel de ce référentiel dans votre travail, veuillez le citer selon le format suivant : + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + +Veuillez noter que le DOI est en attente et sera ajouté à la citation dès qu'il sera disponible. Les modèles YOLOv8 sont fournis sous licence [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) et [Enterprise](https://ultralytics.com/license). diff --git a/docs/fr/modes/benchmark.md b/docs/fr/modes/benchmark.md new file mode 100644 index 0000000..5708aa4 --- /dev/null +++ b/docs/fr/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: true +description: Apprenez comment profiler la vitesse et l'exactitude de YOLOv8 à travers divers formats d'exportation ; obtenez des insights sur les métriques mAP50-95, accuracy_top5 et plus. +keywords: Ultralytics, YOLOv8, benchmarking, profilage de vitesse, profilage de précision, mAP50-95, accuracy_top5, ONNX, OpenVINO, TensorRT, formats d'exportation YOLO +--- + +# Benchmarking de Modèles avec Ultralytics YOLO + +Écosystème Ultralytics YOLO et intégrations + +## Introduction + +Une fois votre modèle entraîné et validé, l'étape logique suivante est d'évaluer ses performances dans divers scénarios du monde réel. Le mode benchmark dans Ultralytics YOLOv8 répond à cet objectif en fournissant un cadre robuste pour évaluer la vitesse et l'exactitude de votre modèle sur une gamme de formats d'exportation. + +## Pourquoi le Benchmarking est-il Crucial ? + +- **Décisions Éclairées :** Obtenez des insights sur les arbitrages entre la vitesse et l'exactitude. +- **Allocation des Ressources :** Comprenez comment les différents formats d'exportation se comportent sur différents matériels. +- **Optimisation :** Découvrez quel format d'exportation offre la meilleure performance pour votre cas d'utilisation spécifique. +- **Efficacité des Coûts :** Utilisez les ressources matérielles plus efficacement en vous basant sur les résultats des benchmarks. + +### Mesures Clés en Mode Benchmark + +- **mAP50-95 :** Pour la détection d'objets, la segmentation et l'estimation de pose. +- **accuracy_top5 :** Pour la classification d'images. +- **Temps d'Inférence :** Temps pris pour chaque image en millisecondes. + +### Formats d'Exportation Supportés + +- **ONNX :** Pour une performance optimale sur CPU. +- **TensorRT :** Pour une efficacité maximale sur GPU. +- **OpenVINO :** Pour l'optimisation du matériel Intel. +- **CoreML, TensorFlow SavedModel, et Plus :** Pour des besoins variés de déploiement. + +!!! astuce "Conseil" + + * Exportez vers ONNX ou OpenVINO pour un gain de vitesse CPU jusqu'à 3x. + * Exportez vers TensorRT pour un gain de vitesse GPU jusqu'à 5x. + +## Exemples d'Utilisation + +Exécutez les benchmarks YOLOv8n sur tous les formats d'exportation supportés, y compris ONNX, TensorRT, etc. Consultez la section Arguments ci-dessous pour une liste complète des arguments d'exportation. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # Benchmark sur GPU + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## Arguments + +Des arguments tels que `model`, `data`, `imgsz`, `half`, `device` et `verbose` offrent aux utilisateurs la flexibilité d'ajuster précisément les benchmarks à leurs besoins spécifiques et de comparer facilement les performances de différents formats d'exportation. + +| Clé | Valeur | Description | +|-----------|---------|---------------------------------------------------------------------------------------| +| `model` | `None` | chemin vers le fichier modèle, par ex. yolov8n.pt, yolov8n.yaml | +| `data` | `None` | chemin vers le YAML référençant le dataset de benchmarking (sous l'étiquette `val`) | +| `imgsz` | `640` | taille de l'image comme scalaire ou liste (h, w), par ex. (640, 480) | +| `half` | `False` | quantification FP16 | +| `int8` | `False` | quantification INT8 | +| `device` | `None` | appareil sur lequel exécuter, par ex. appareil cuda=0 ou device=0,1,2,3 ou device=cpu | +| `verbose` | `False` | ne pas continuer en cas d'erreur (bool), ou seuil de plancher val (float) | + +## Formats d'Exportation + +Les benchmarks tenteront de s'exécuter automatiquement sur tous les formats d'exportation possibles ci-dessous. + +| Format | Argument `format` | Modèle | Métadonnées | Arguments | +|--------------------------------------------------------------------|-------------------|---------------------------|-------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +Consultez les détails complets sur `export` dans la page [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/fr/modes/export.md b/docs/fr/modes/export.md new file mode 100644 index 0000000..e37d6e9 --- /dev/null +++ b/docs/fr/modes/export.md @@ -0,0 +1,108 @@ +--- +comments: true +description: Guide étape par étape sur l'exportation de vos modèles YOLOv8 vers divers formats tels que ONNX, TensorRT, CoreML et plus encore pour le déploiement. Explorez maintenant !. +keywords: YOLO, YOLOv8, Ultralytics, Exportation de modèle, ONNX, TensorRT, CoreML, TensorFlow SavedModel, OpenVINO, PyTorch, exporter un modèle +--- + +# Exportation de modèle avec Ultralytics YOLO + +Écosystème et intégrations Ultralytics YOLO + +## Introduction + +L'objectif ultime de l'entraînement d'un modèle est de le déployer pour des applications dans le monde réel. Le mode d'exportation de Ultralytics YOLOv8 offre une large gamme d'options pour exporter votre modèle entraîné dans différents formats, le rendant déployable sur diverses plateformes et appareils. Ce guide complet vise à vous guider à travers les nuances de l'exportation de modèles, en montrant comment atteindre une compatibilité et des performances maximales. + +

+
+ +
+ Regardez : Comment exporter un modèle Ultralytics YOLOv8 entraîné personnalisé et effectuer une inférence en direct sur webcam. +

+ +## Pourquoi choisir le mode d'exportation YOLOv8 ? + +- **Polyvalence :** Exportation vers plusieurs formats, y compris ONNX, TensorRT, CoreML et plus encore. +- **Performance :** Gagnez jusqu'à 5 fois la vitesse d'une GPU avec TensorRT et 3 fois la vitesse d'une CPU avec ONNX ou OpenVINO. +- **Compatibilité :** Rendez votre modèle universellement déployable sur de nombreux environnements matériels et logiciels. +- **Facilité d'utilisation :** Interface en ligne de commande (CLI) et API Python simples pour une exportation rapide et directe du modèle. + +### Caractéristiques clés du mode d'exportation + +Voici quelques-unes des fonctionnalités remarquables : + +- **Exportation en un clic :** Commandes simples pour exporter vers différents formats. +- **Exportation groupée :** Exportez des modèles capables d'inférence par lot. +- **Inférence optimisée :** Les modèles exportés sont optimisés pour des temps d'inférence plus rapides. +- **Vidéos tutorielles :** Guides détaillés et tutoriels pour une expérience d'exportation fluide. + +!!! astuce "Conseil" + + * Exportez vers ONNX ou OpenVINO pour une accélération de la CPU jusqu'à 3 fois. + * Exportez vers TensorRT pour une accélération de la GPU jusqu'à 5 fois. + +## Exemples d'utilisation + +Exportez un modèle YOLOv8n vers un format différent tel que ONNX ou TensorRT. Voir la section Arguments ci-dessous pour une liste complète des arguments d'exportation. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.pt') # chargez un modèle officiel + model = YOLO('path/to/best.pt') # chargez un modèle entraîné personnalisé + + # Exporter le modèle + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # exporter modèle officiel + yolo export model=path/to/best.pt format=onnx # exporter modèle entraîné personnalisé + ``` + +## Arguments + +Les paramètres d'exportation pour les modèles YOLO se réfèrent aux diverses configurations et options utilisées pour sauvegarder ou exporter le modèle pour utilisation dans d'autres environnements ou plateformes. Ces paramètres peuvent affecter la performance, la taille et la compatibilité du modèle avec différents systèmes. Certains paramètres d'exportation YOLO courants incluent le format du fichier modèle exporté (par exemple, ONNX, TensorFlow SavedModel), le dispositif sur lequel le modèle sera exécuté (par exemple, CPU, GPU), et la présence de fonctionnalités supplémentaires telles que des masques ou des étiquettes multiples par boîte. D'autres facteurs qui peuvent affecter le processus d'exportation incluent la tâche spécifique pour laquelle le modèle est utilisé et les exigences ou contraintes de l'environnement ou de la plateforme cible. Il est important de considérer et de configurer ces paramètres avec soin pour s'assurer que le modèle exporté est optimisé pour le cas d'utilisation visé et peut être utilisé efficacement dans l'environnement cible. + +| Clé | Valeur | Description | +|-------------|-----------------|----------------------------------------------------------------------------------| +| `format` | `'torchscript'` | format vers lequel exporter | +| `imgsz` | `640` | taille d'image sous forme scalaire ou liste (h, w), par ex. (640, 480) | +| `keras` | `False` | utilisez Keras pour l'exportation TensorFlow SavedModel | +| `optimize` | `False` | TorchScript : optimisation pour mobile | +| `half` | `False` | quantification FP16 | +| `int8` | `False` | quantification INT8 | +| `dynamic` | `False` | ONNX/TensorRT : axes dynamiques | +| `simplify` | `False` | ONNX/TensorRT : simplifier le modèle | +| `opset` | `None` | ONNX : version de l'ensemble d'opérations (facultatif, par défaut à la dernière) | +| `workspace` | `4` | TensorRT : taille de l'espace de travail (GB) | +| `nms` | `False` | CoreML : ajout de la NMS | + +## Formats d'exportation + +Les formats d'exportation disponibles pour YOLOv8 sont dans le tableau ci-dessous. Vous pouvez exporter vers n'importe quel format en utilisant l'argument `format`, par ex. `format='onnx'` ou `format='engine'`. + +| Format | Argument `format` | Modèle | Métadonnées | Arguments | +|--------------------------------------------------------------------|-------------------|---------------------------|-------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/fr/modes/index.md b/docs/fr/modes/index.md new file mode 100644 index 0000000..9975e04 --- /dev/null +++ b/docs/fr/modes/index.md @@ -0,0 +1,73 @@ +--- +comments: true +description: De l'entraînement au suivi, exploitez au mieux YOLOv8 d'Ultralytics. Obtenez des aperçus et des exemples pour chaque mode pris en charge, y compris la validation, l'exportation et le benchmarking. +keywords: Ultralytics, YOLOv8, Machine Learning, Détection d'objets, Entraînement, Validation, Prédiction, Exportation, Suivi, Benchmarking +--- + +# Modes Ultralytics YOLOv8 + +Écosystème Ultralytics YOLO et intégrations + +## Introduction + +Ultralytics YOLOv8 n'est pas simplement un autre modèle de détection d'objets ; c'est un cadre polyvalent conçu pour couvrir l'intégralité du cycle de vie des modèles d'apprentissage automatique — de l'ingestion de données et l'entraînement des modèles à la validation, le déploiement et le suivi en conditions réelles. Chaque mode remplit un objectif spécifique et est conçu pour vous offrir la flexibilité et l'efficacité nécessaires pour différentes tâches et cas d'utilisation. + +

+
+ +
+ Regardez : Tutoriel sur les modes Ultralytics : Entraînement, Validation, Prédiction, Exportation & Benchmark. +

+ +### Aperçu des Modes + +Comprendre les différents **modes** pris en charge par Ultralytics YOLOv8 est crucial pour tirer le maximum de vos modèles : + +- **Mode d'entraînement (Train)** : Affinez votre modèle sur des jeux de données personnalisés ou préchargés. +- **Mode de validation (Val)** : Un contrôle post-entraînement pour évaluer la performance du modèle. +- **Mode de prédiction (Predict)** : Déployez la puissance prédictive de votre modèle sur des données du monde réel. +- **Mode d'exportation (Export)** : Préparez votre modèle au déploiement dans différents formats. +- **Mode de suivi (Track)** : Étendez votre modèle de détection d'objets à des applications de suivi en temps réel. +- **Mode benchmark (Benchmark)** : Analysez la vitesse et la précision de votre modèle dans divers environnements de déploiement. + +Ce guide complet vise à vous donner un aperçu et des informations pratiques sur chaque mode, en vous aidant à exploiter tout le potentiel de YOLOv8. + +## [Entraînement (Train)](train.md) + +Le mode d'entraînement est utilisé pour entraîner un modèle YOLOv8 sur un jeu de données personnalisé. Dans ce mode, le modèle est entraîné en utilisant le jeu de données et les hyperparamètres spécifiés. Le processus d'entraînement implique l'optimisation des paramètres du modèle afin qu'il puisse prédire avec précision les classes et les emplacements des objets dans une image. + +[Exemples d'entraînement](train.md){ .md-button } + +## [Validation (Val)](val.md) + +Le mode de validation est utilisé pour valider un modèle YOLOv8 après qu'il ait été entraîné. Dans ce mode, le modèle est évalué sur un ensemble de validation pour mesurer sa précision et sa capacité de généralisation. Ce mode peut être utilisé pour ajuster les hyperparamètres du modèle afin d'améliorer ses performances. + +[Exemples de validation](val.md){ .md-button } + +## [Prédiction (Predict)](predict.md) + +Le mode de prédiction est utilisé pour faire des prédictions à l'aide d'un modèle YOLOv8 entraîné sur de nouvelles images ou vidéos. Dans ce mode, le modèle est chargé à partir d'un fichier de checkpoint, et l'utilisateur peut fournir des images ou vidéos pour effectuer l'inférence. Le modèle prédit les classes et les emplacements des objets dans les images ou vidéos fournies. + +[Exemples de prédiction](predict.md){ .md-button } + +## [Exportation (Export)](export.md) + +Le mode d'exportation est utilisé pour exporter un modèle YOLOv8 dans un format pouvant être utilisé pour le déploiement. Dans ce mode, le modèle est converti dans un format pouvant être utilisé par d'autres applications logicielles ou dispositifs matériels. Ce mode est pratique pour déployer le modèle dans des environnements de production. + +[Exemples d'exportation](export.md){ .md-button } + +## [Suivi (Track)](track.md) + +Le mode de suivi est utilisé pour suivre des objets en temps réel à l'aide d'un modèle YOLOv8. Dans ce mode, le modèle est chargé à partir d'un fichier de checkpoint, et l'utilisateur peut fournir un flux vidéo en direct pour effectuer le suivi d'objets en temps réel. Ce mode est utile pour des applications telles que les systèmes de surveillance ou les voitures autonomes. + +[Exemples de suivi](track.md){ .md-button } + +## [Benchmark (Benchmark)](benchmark.md) + +Le mode benchmark est utilisé pour profiler la vitesse et la précision de divers formats d'exportation pour YOLOv8. Les benchmarks fournissent des informations sur la taille du format exporté, ses métriques `mAP50-95` (pour la détection d'objets, la segmentation et la pose) ou `accuracy_top5` (pour la classification), et le temps d'inférence en millisecondes par image pour différents formats d'exportation comme ONNX, OpenVINO, TensorRT et autres. Ces informations peuvent aider les utilisateurs à choisir le format d'export optimal pour leur cas d'utilisation spécifique en fonction de leurs exigences de vitesse et de précision. + +[Exemples de benchmark](benchmark.md){ .md-button } diff --git a/docs/fr/modes/predict.md b/docs/fr/modes/predict.md new file mode 100644 index 0000000..4279dff --- /dev/null +++ b/docs/fr/modes/predict.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Découvrez comment utiliser le mode de prédiction YOLOv8 pour diverses tâches. Apprenez sur différentes sources d'inférence comme des images, vidéos et formats de données. +keywords: Ultralytics, YOLOv8, mode de prédiction, sources d'inférence, tâches de prédiction, mode streaming, traitement d'images, traitement vidéo, apprentissage automatique, IA +--- + +# Prédiction de Modèle avec Ultralytics YOLO + +Écosystème et intégrations Ultralytics YOLO + +## Introduction + +Dans l'univers de l'apprentissage automatique et de la vision par ordinateur, le processus de donner du sens aux données visuelles est appelé 'inférence' ou 'prédiction'. Ultralytics YOLOv8 propose une fonctionnalité puissante connue sous le nom de **mode de prédiction** adapté pour l'inférence en temps réel et haute performance sur une large gamme de sources de données. + +

+
+ +
+ Regardez : Comment Extraire les Sorties du Modèle Ultralytics YOLOv8 pour des Projets Personnalisés. +

+ +## Applications Réelles + +| Fabrication | Sports | Sécurité | +|:---------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------:| +| ![Détection des Pièces de Véhicules](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1) | ![Détection des Joueurs de Football](https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442) | ![Détection de Chutes de Personnes](https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43) | +| Détection des Pièces de Véhicules | Détection des Joueurs de Football | Détection de Chutes de Personnes | + +## Pourquoi Utiliser Ultralytics YOLO pour l'Inférence ? + +Voici pourquoi vous devriez considérer le mode de prédiction YOLOv8 pour vos besoins variés en inférence : + +- **Polyvalence :** Capable de faire des inférences sur des images, des vidéos et même des flux en direct. +- **Performance :** Conçu pour le traitement en temps réel à grande vitesse sans sacrifier la précision. +- **Facilité d'Utilisation :** Interfaces Python et CLI intuitives pour un déploiement et des tests rapides. +- **Très Personnalisable :** Divers paramètres et réglages pour ajuster le comportement d'inférence du modèle selon vos besoins spécifiques. + +### Caractéristiques Clés du Mode de Prédiction + +Le mode de prédiction YOLOv8 est conçu pour être robuste et polyvalent, avec des fonctionnalités telles que : + +- **Compatibilité avec Plusieurs Sources de Données :** Que vos données soient sous forme d'images individuelles, d'une collection d'images, de fichiers vidéo ou de flux vidéo en temps réel, le mode de prédiction répond à vos besoins. +- **Mode Streaming :** Utilisez la fonctionnalité de streaming pour générer un générateur efficace en termes de mémoire d'objets `Results`. Activez-le en réglant `stream=True` dans la méthode d'appel du prédicteur. +- **Traitement par Lots :** La capacité de traiter plusieurs images ou trames vidéo dans un seul lot, accélérant ainsi le temps d'inférence. +- **Facile à Intégrer :** S'intègre facilement dans les pipelines de données existants et autres composants logiciels, grâce à son API souple. + +Les modèles YOLO d'Ultralytics renvoient soit une liste d'objets `Results` Python, soit un générateur Python efficace en termes de mémoire d'objets `Results` lorsque `stream=True` est passé au modèle pendant l'inférence : + +!!! Example "Prédire" + + === "Renvoie une liste avec `stream=False`" + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.pt') # modèle YOLOv8n pré-entraîné + + # Exécuter une inférence par lots sur une liste d'images + results = model(['im1.jpg', 'im2.jpg']) # renvoie une liste d'objets Results + + # Traiter la liste des résultats + for result in results: + boxes = result.boxes # Objet Boxes pour les sorties bbox + masks = result.masks # Objet Masks pour les masques de segmentation + keypoints = result.keypoints # Objet Keypoints pour les sorties de pose + probs = result.probs # Objet Probs pour les sorties de classification + ``` + + === "Renvoie un générateur avec `stream=True`" + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.pt') # modèle YOLOv8n pré-entraîné + + # Exécuter une inférence par lots sur une liste d'images + results = model(['im1.jpg', 'im2.jpg'], stream=True) # renvoie un générateur d'objets Results + + # Traiter le générateur de résultats + for result in results: + boxes = result.boxes # Objet Boxes pour les sorties bbox + masks = result.masks # Objet Masks pour les masques de segmentation + keypoints = result.keypoints # Objet Keypoints pour les sorties de pose + probs = result.probs # Objet Probs pour les sorties de classification + ``` + +## Sources d'Inférence + +YOLOv8 peut traiter différents types de sources d'entrée pour l'inférence, comme illustré dans le tableau ci-dessous. Les sources incluent des images statiques, des flux vidéos et divers formats de données. Le tableau indique également si chaque source peut être utilisée en mode streaming avec l'argument `stream=True` ✅. Le mode streaming est bénéfique pour traiter des vidéos ou des flux en direct car il crée un générateur de résultats au lieu de charger tous les cadres en mémoire. + +!!! astuce "Astuce" + + Utilisez `stream=True` pour traiter des vidéos longues ou des jeux de données volumineux afin de gérer efficacement la mémoire. Quand `stream=False`, les résultats pour tous les cadres ou points de données sont stockés en mémoire, ce qui peut rapidement s'accumuler et provoquer des erreurs de mémoire insuffisante pour de grandes entrées. En revanche, `stream=True` utilise un générateur, qui ne garde que les résultats du cadre ou point de données actuel en mémoire, réduisant considérablement la consommation de mémoire et prévenant les problèmes de mémoire insuffisante. + +| Source | Argument | Type | Notes | +|-----------------|--------------------------------------------|-----------------|------------------------------------------------------------------------------------------------------------------------------| +| image | `'image.jpg'` | `str` ou `Path` | Fichier image unique. | +| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | URL vers une image. | +| capture d'écran | `'screen'` | `str` | Prendre une capture d'écran. | +| PIL | `Image.open('im.jpg')` | `PIL.Image` | Format HWC avec canaux RGB. | +| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | Format HWC avec canaux BGR `uint8 (0-255)`. | +| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | Format HWC avec canaux BGR `uint8 (0-255)`. | +| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | Format BCHW avec canaux RGB `float32 (0.0-1.0)`. | +| CSV | `'sources.csv'` | `str` ou `Path` | Fichier CSV contenant des chemins vers des images, vidéos ou répertoires. | +| vidéo ✅ | `'video.mp4'` | `str` ou `Path` | Fichier vidéo dans des formats comme MP4, AVI, etc. | +| répertoire ✅ | `'chemin/'` | `str` ou `Path` | Chemin vers un répertoire contenant des images ou des vidéos. | +| motif global ✅ | `'chemin/*.jpg'` | `str` | Motif glob pour faire correspondre plusieurs fichiers. Utilisez le caractère `*` comme joker. | +| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | URL vers une vidéo YouTube. | +| flux ✅ | `'rtsp://exemple.com/media.mp4'` | `str` | URL pour des protocoles de streaming comme RTSP, RTMP, TCP, ou une adresse IP. | +| multi-flux ✅ | `'liste.streams'` | `str` ou `Path` | Fichier texte `*.streams` avec une URL de flux par ligne, c'est-à-dire que 8 flux s'exécuteront avec une taille de lot de 8. | + +Ci-dessous des exemples de code pour utiliser chaque type de source : + +!!! Example "Sources de prédiction" + + === "image" + Exécutez une inférence sur un fichier image. + ```python + from ultralytics import YOLO + + # Charger un modèle YOLOv8n pré-entraîné + model = YOLO('yolov8n.pt') + + # Définir le chemin vers le fichier image + source = 'chemin/vers/image.jpg' + + # Exécuter une inférence sur la source + results = model(source) # liste d'objets Results + ``` + + === "capture d'écran" + Exécutez une inférence sur le contenu actuel de l'écran sous forme de capture d'écran. + ```python + from ultralytics import YOLO + + # Charger un modèle YOLOv8n pré-entraîné + model = YOLO('yolov8n.pt') + + # Définir la capture d'écran actuelle comme source + source = 'screen' + + # Exécuter une inférence sur la source + results = model(source) # liste d'objets Results + ``` + + === "URL" + Exécutez une inférence sur une image ou vidéo hébergée à distance via URL. + ```python + from ultralytics import YOLO + + # Charger un modèle YOLOv8n pré-entraîné + model = YOLO('yolov8n.pt') + + # Définir l'URL d'une image ou vidéo distante + source = 'https://ultralytics.com/images/bus.jpg' + + # Exécuter une inférence sur la source + results = model(source) # liste d'objets Results + ``` + + === "PIL" + Exécutez une inférence sur une image ouverte avec la bibliothèque Python Imaging Library (PIL). + ```python + from PIL import Image + from ultralytics import YOLO + + # Charger un modèle YOLOv8n pré-entraîné + model = YOLO('yolov8n.pt') + + # Ouvrir une image avec PIL + source = Image.open('chemin/vers/image.jpg') + + # Exécuter une inférence sur la source + results = model(source) # liste d'objets Results + ``` + + === "OpenCV" + Exécutez une inférence sur une image lue avec OpenCV. + ```python + import cv2 + from ultralytics import YOLO + + # Charger un modèle YOLOv8n pré-entraîné + model = YOLO('yolov8n.pt') + + # Lire une image avec OpenCV + source = cv2.imread('chemin/vers/image.jpg') + + # Exécuter une inférence sur la source + results = model(source) # liste d'objets Results + ``` + + === "numpy" + Exécutez une inférence sur une image représentée sous forme de tableau numpy. + ```python + import numpy as np + from ultralytics import YOLO + + # Charger un modèle YOLOv8n pré-entraîné + model = YOLO('yolov8n.pt') + + # Créer un tableau numpy aléatoire de forme HWC (640, 640, 3) avec des valeurs dans l'intervalle [0, 255] et de type uint8 + source = np.random.randint(low=0, high=255, size=(640, 640, 3), dtype='uint8') + + # Exécuter une inférence sur la source + results = model(source) # liste d'objets Results + ``` + + === "torch" + Exécutez une inférence sur une image représentée sous forme de tenseur PyTorch. + ```python + import torch + from ultralytics import YOLO + + # Charger un modèle YOLOv8n pré-entraîné + model = YOLO('yolov8n.pt') + + # Créer un tenseur aléatoire torch de forme BCHW (1, 3, 640, 640) avec des valeurs dans l'intervalle [0, 1] et de type float32 + source = torch.rand(1, 3, 640, 640, dtype=torch.float32) + + # Exécuter une inférence sur la source + results = model(source) # liste d'objets Results diff --git a/docs/fr/modes/track.md b/docs/fr/modes/track.md new file mode 100644 index 0000000..5da691c --- /dev/null +++ b/docs/fr/modes/track.md @@ -0,0 +1,200 @@ +--- +comments: true +description: Apprenez à utiliser Ultralytics YOLO pour le suivi d'objets dans les flux vidéo. Guides pour utiliser différents traceurs et personnaliser les configurations de traceurs. +keywords: Ultralytics, YOLO, suivi d'objets, flux vidéo, BoT-SORT, ByteTrack, guide Python, guide CLI +--- + +# Suivi Multi-Objets avec Ultralytics YOLO + +Exemples de suivi multi-objets + +Le suivi d'objets dans le domaine de l'analyse vidéo est une tâche essentielle qui non seulement identifie l'emplacement et la classe des objets à l'intérieur de l'image, mais maintient également un identifiant unique pour chaque objet détecté au fur et à mesure que la vidéo progresse. Les applications sont illimitées, allant de la surveillance et de la sécurité à l'analytique sportive en temps réel. + +## Pourquoi Choisir Ultralytics YOLO pour le Suivi d'Objet ? + +La sortie des traceurs Ultralytics est cohérente avec la détection standard d'objets mais apporte la valeur ajoutée des identifiants d'objets. Cela facilite le suivi des objets dans les flux vidéo et effectue des analyses subséquentes. Voici pourquoi vous devriez envisager d'utiliser Ultralytics YOLO pour vos besoins de suivi d'objet : + +- **Efficacité :** Traitez les flux vidéo en temps réel sans compromettre la précision. +- **Flexibilité :** Prend en charge de multiples algorithmes de suivi et configurations. +- **Facilité d'Utilisation :** API Python simple et options CLI pour une intégration et un déploiement rapides. +- **Personnalisabilité :** Facile à utiliser avec des modèles YOLO entraînés sur mesure, permettant une intégration dans des applications spécifiques au domaine. + +

+
+ +
+ Regardez : Détection et suivi d'objets avec Ultralytics YOLOv8. +

+ +## Applications dans le Monde Réel + +| Transport | Distribution | Aquaculture | +|:------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------:| +| ![Suivi de véhicules](https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab) | ![Suivi de personnes](https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527) | ![Suivi de poissons](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142) | +| Suivi de Véhicules | Suivi de Personnes | Suivi de Poissons | + +## Caractéristiques en Bref + +Ultralytics YOLO étend ses fonctionnalités de détection d'objets pour fournir un suivi d'objets robuste et polyvalent : + +- **Suivi en Temps Réel :** Suivi fluide d'objets dans des vidéos à fréquence d'images élevée. +- **Prise en Charge de Multiples Traceurs :** Choisissez parmi une variété d'algorithmes de suivi éprouvés. +- **Configurations de Traceurs Personnalisables :** Adaptez l'algorithme de suivi pour répondre à des exigences spécifiques en réglant divers paramètres. + +## Traceurs Disponibles + +Ultralytics YOLO prend en charge les algorithmes de suivi suivants. Ils peuvent être activés en passant le fichier de configuration YAML correspondant tel que `tracker=tracker_type.yaml` : + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - Utilisez `botsort.yaml` pour activer ce traceur. +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - Utilisez `bytetrack.yaml` pour activer ce traceur. + +Le traceur par défaut est BoT-SORT. + +## Suivi + +Pour exécuter le traceur sur des flux vidéo, utilisez un modèle Detect, Segment ou Pose formé tel que YOLOv8n, YOLOv8n-seg et YOLOv8n-pose. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle officiel ou personnalisé + model = YOLO('yolov8n.pt') # Charger un modèle Detect officiel + model = YOLO('yolov8n-seg.pt') # Charger un modèle Segment officiel + model = YOLO('yolov8n-pose.pt') # Charger un modèle Pose officiel + model = YOLO('chemin/vers/best.pt') # Charger un modèle entraîné personnalisé + + # Effectuer le suivi avec le modèle + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # Suivi avec le traceur par défaut + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # Suivi avec le traceur ByteTrack + ``` + + === "CLI" + + ```bash + # Effectuer le suivi avec divers modèles en utilisant l'interface en ligne de commande + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # Modèle Detect officiel + yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Modèle Segment officiel + yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Modèle Pose officiel + yolo track model=chemin/vers/best.pt source="https://youtu.be/LNwODJXcvt4" # Modèle entraîné personnalisé + + # Suivi en utilisant le traceur ByteTrack + yolo track model=chemin/vers/best.pt tracker="bytetrack.yaml" + ``` + +Comme on peut le voir dans l'utilisation ci-dessus, le suivi est disponible pour tous les modèles Detect, Segment et Pose exécutés sur des vidéos ou des sources de diffusion. + +## Configuration + +### Arguments de Suivi + +La configuration du suivi partage des propriétés avec le mode Prédiction, telles que `conf`, `iou`, et `show`. Pour des configurations supplémentaires, référez-vous à la page [Predict](https://docs.ultralytics.com/modes/predict/) du modèle. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Configurer les paramètres de suivi et exécuter le traceur + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```bash + # Configurer les paramètres de suivi et exécuter le traceur en utilisant l'interface en ligne de commande + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### Sélection du Traceur + +Ultralytics vous permet également d'utiliser un fichier de configuration de traceur modifié. Pour cela, faites simplement une copie d'un fichier de configuration de traceur (par exemple, `custom_tracker.yaml`) à partir de [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) et modifiez toute configuration (à l'exception du `tracker_type`) selon vos besoins. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger le modèle et exécuter le traceur avec un fichier de configuration personnalisé + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```bash + # Charger le modèle et exécuter le traceur avec un fichier de configuration personnalisé en utilisant l'interface en ligne de commande + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +Pour une liste complète des arguments de suivi, référez-vous à la page [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers). + +## Exemples Python + +### Boucle de Persistance des Pistes + +Voici un script Python utilisant OpenCV (`cv2`) et YOLOv8 pour exécuter le suivi d'objet sur des images vidéo. Ce script suppose toujours que vous avez déjà installé les packages nécessaires (`opencv-python` et `ultralytics`). L'argument `persist=True` indique au traceur que l'image ou la trame actuelle est la suivante dans une séquence et s'attend à ce que les pistes de l'image précédente soient présentes dans l'image actuelle. + +!!! Example "Boucle for streaming avec suivi" + + ```python + import cv2 + from ultralytics import YOLO + + # Charger le modèle YOLOv8 + model = YOLO('yolov8n.pt') + + # Ouvrir le fichier vidéo + video_path = "chemin/vers/video.mp4" + cap = cv2.VideoCapture(video_path) + + # Parcourir les images vidéo + while cap.isOpened(): + # Lire une image de la vidéo + success, frame = cap.read() + + if success: + # Exécuter le suivi YOLOv8 sur l'image, en persistant les pistes entre les images + results = model.track(frame, persist=True) + + # Visualiser les résultats sur l'image + annotated_frame = results[0].plot() + + # Afficher l'image annotée + cv2.imshow("Suivi YOLOv8", annotated_frame) + + # Interrompre la boucle si 'q' est pressée + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # Interrompre la boucle si la fin de la vidéo est atteinte + break + + # Relâcher l'objet de capture vidéo et fermer la fenêtre d'affichage + cap.release() + cv2.destroyAllWindows() + ``` + +Veuillez noter le changement de `model(frame)` à `model.track(frame)`, qui active le suivi d'objet à la place de la simple détection. Ce script modifié exécutera le traceur sur chaque image de la vidéo, visualisera les résultats et les affichera dans une fenêtre. La boucle peut être quittée en appuyant sur 'q'. + +## Contribuer de Nouveaux Traceurs + +Êtes-vous compétent en suivi multi-objets et avez-vous réussi à implémenter ou adapter un algorithme de suivi avec Ultralytics YOLO ? Nous vous invitons à contribuer à notre section Traceurs sur [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) ! Vos applications et solutions dans le monde réel pourraient être inestimables pour les utilisateurs travaillant sur des tâches de suivi. + +En contribuant à cette section, vous aidez à élargir l'éventail des solutions de suivi disponibles au sein du cadre Ultralytics YOLO, ajoutant une autre couche de fonctionnalité et d'utilité pour la communauté. + +Pour initier votre contribution, veuillez vous référer à notre [Guide de Contribution](https://docs.ultralytics.com/help/contributing) pour des instructions complètes sur la soumission d'une Pull Request (PR) 🛠️. Nous sommes impatients de voir ce que vous apportez à la table ! + +Ensemble, améliorons les capacités de suivi de l'écosystème Ultralytics YOLO 🙏 ! diff --git a/docs/fr/modes/train.md b/docs/fr/modes/train.md new file mode 100644 index 0000000..652a918 --- /dev/null +++ b/docs/fr/modes/train.md @@ -0,0 +1,206 @@ +--- +comments: true +description: Guide étape par étape pour entraîner des modèles YOLOv8 avec Ultralytics YOLO incluant des exemples d'entraînement mono-GPU et multi-GPU +keywords: Ultralytics, YOLOv8, YOLO, détection d'objet, mode entraînement, jeu de données personnalisé, entraînement GPU, multi-GPU, hyperparamètres, exemples CLI, exemples Python +--- + +# Entraînement de modèles avec Ultralytics YOLO + +Ultralytics YOLO écosystème et intégrations + +## Introduction + +L'entraînement d'un modèle d'apprentissage profond implique de lui fournir des données et d'ajuster ses paramètres afin qu'il puisse faire des prédictions précises. Le mode Entraînement de Ultralytics YOLOv8 est conçu pour un entraînement efficace et performant de modèles de détection d'objets, en utilisant pleinement les capacités du matériel moderne. Ce guide vise à couvrir tous les détails nécessaires pour commencer à entraîner vos propres modèles en utilisant l'ensemble robuste de fonctionnalités de YOLOv8. + +

+
+ +
+ Regardez : Comment entraîner un modèle YOLOv8 sur votre jeu de données personnalisé dans Google Colab. +

+ +## Pourquoi choisir Ultralytics YOLO pour l'entraînement ? + +Voici quelques raisons convaincantes de choisir le mode Entraînement de YOLOv8 : + +- **Efficacité :** Optimisez l'utilisation de votre matériel, que vous soyez sur une configuration mono-GPU ou que vous échelonnier sur plusieurs GPUs. +- **Polyvalence :** Entraînez sur des jeux de données personnalisés en plus de ceux déjà disponibles comme COCO, VOC et ImageNet. +- **Convivialité :** Interfaces CLI et Python simples mais puissantes pour une expérience d'entraînement directe. +- **Flexibilité des hyperparamètres :** Un large éventail d'hyperparamètres personnalisables pour peaufiner les performances du modèle. + +### Principales caractéristiques du mode Entraînement + +Voici quelques caractéristiques remarquables du mode Entraînement de YOLOv8 : + +- **Téléchargement automatique de jeux de données :** Les jeux de données standards comme COCO, VOC et ImageNet sont téléchargés automatiquement lors de la première utilisation. +- **Support multi-GPU :** Échelonnez vos efforts de formation de manière fluide sur plusieurs GPUs pour accélérer le processus. +- **Configuration des hyperparamètres :** La possibilité de modifier les hyperparamètres via des fichiers de configuration YAML ou des arguments CLI. +- **Visualisation et suivi :** Suivi en temps réel des métriques d'entraînement et visualisation du processus d'apprentissage pour de meilleures perspectives. + +!!! Tip "Astuce" + + * Les jeux de données YOLOv8 comme COCO, VOC, ImageNet et bien d'autres se téléchargent automatiquement lors de la première utilisation, par exemple `yolo train data=coco.yaml` + +## Exemples d'utilisation + +Entraînez YOLOv8n sur le jeu de données COCO128 pendant 100 époques avec une taille d'image de 640. Le dispositif d'entraînement peut être spécifié à l'aide de l'argument `device`. Si aucun argument n'est passé, le GPU `device=0` sera utilisé s'il est disponible, sinon `device=cpu` sera utilisé. Consultez la section Arguments ci-dessous pour obtenir une liste complète des arguments d'entraînement. + +!!! Example "Exemple d'entraînement mono-GPU et CPU" + + Le dispositif est déterminé automatiquement. Si un GPU est disponible, il sera utilisé, sinon l'entraînement commencera sur CPU. + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.yaml') # construire un nouveau modèle à partir de YAML + model = YOLO('yolov8n.pt') # charger un modèle préentraîné (recommandé pour l'entraînement) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # construire à partir de YAML et transférer les poids + + # Entraîner le modèle + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Construire un nouveau modèle à partir de YAML et commencer l'entraînement à partir de zéro + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Commencer l'entraînement à partir d'un modèle préentraîné *.pt + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Construire un nouveau modèle à partir de YAML, transférer les poids préentraînés et commencer l'entraînement + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Entraînement multi-GPU + +L'entraînement multi-GPU permet une utilisation plus efficace des ressources matérielles disponibles en répartissant la charge d'entraînement sur plusieurs GPUs. Cette fonctionnalité est disponible via l'API Python et l'interface de ligne de commande. Pour activer l'entraînement multi-GPU, spécifiez les ID des dispositifs GPU que vous souhaitez utiliser. + +!!! Example "Exemple d'entraînement multi-GPU" + + Pour s'entraîner avec 2 GPUs, les dispositifs CUDA 0 et 1, utilisez les commandes suivantes. Développez à des GPUs supplémentaires selon le besoin. + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.pt') # charger un modèle préentraîné (recommandé pour l'entraînement) + + # Entraîner le modèle avec 2 GPUs + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "CLI" + + ```bash + # Commencer l'entraînement à partir d'un modèle préentraîné *.pt en utilisant les GPUs 0 et 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### Entraînement MPS avec Apple M1 et M2 + +Avec le support pour les puces Apple M1 et M2 intégré dans les modèles Ultralytics YOLO, il est maintenant possible d'entraîner vos modèles sur des dispositifs utilisant le puissant framework Metal Performance Shaders (MPS). Le MPS offre un moyen performant d'exécuter des tâches de calcul et de traitement d'image sur le silicium personnalisé d'Apple. + +Pour activer l'entraînement sur les puces Apple M1 et M2, vous devez spécifier 'mps' comme votre dispositif lors du lancement du processus d'entraînement. Voici un exemple de la manière dont vous pourriez le faire en Python et via la ligne de commande : + +!!! Example "Exemple d'entraînement MPS" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.pt') # charger un modèle préentraîné (recommandé pour l'entraînement) + + # Entraîner le modèle avec MPS + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "CLI" + + ```bash + # Commencer l'entraînement à partir d'un modèle préentraîné *.pt avec MPS + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +Tout en exploitant la puissance de calcul des puces M1/M2, cela permet un traitement plus efficace des tâches d'entraînement. Pour des conseils plus détaillés et des options de configuration avancée, veuillez consulter la [documentation MPS de PyTorch](https://pytorch.org/docs/stable/notes/mps.html). + +## Journalisation + +Lors de l'entraînement d'un modèle YOLOv8, il peut être précieux de suivre la performance du modèle au fil du temps. C'est là que la journalisation entre en jeu. YOLO d'Ultralytics prend en charge trois types de journaux - Comet, ClearML et TensorBoard. + +Pour utiliser un journal, sélectionnez-le dans le menu déroulant ci-dessus et exécutez-le. Le journal choisi sera installé et initialisé. + +### Comet + +[Comet](https://www.comet.ml/site/) est une plateforme qui permet aux scientifiques de données et aux développeurs de suivre, comparer, expliquer et optimiser les expériences et les modèles. Elle offre des fonctionnalités telles que le suivi en temps réel des mesures, les différences de code et le suivi des hyperparamètres. + +Pour utiliser Comet : + +!!! Example "Exemple" + + === "Python" + ```python + # pip install comet_ml + import comet_ml + + comet_ml.init() + ``` + +N'oubliez pas de vous connecter à votre compte Comet sur leur site web et d'obtenir votre clé API. Vous devrez ajouter cela à vos variables d'environnement ou à votre script pour enregistrer vos expériences. + +### ClearML + +[ClearML](https://www.clear.ml/) est une plateforme open source qui automatise le suivi des expériences et aide à partager efficacement les ressources. Elle est conçue pour aider les équipes à gérer, exécuter et reproduire leur travail en ML plus efficacement. + +Pour utiliser ClearML : + +!!! Example "Exemple" + + === "Python" + ```python + # pip install clearml + import clearml + + clearml.browser_login() + ``` + +Après avoir exécuté ce script, vous devrez vous connecter à votre compte ClearML sur le navigateur et authentifier votre session. + +### TensorBoard + +[TensorBoard](https://www.tensorflow.org/tensorboard) est un ensemble d'outils de visualisation pour TensorFlow. Il vous permet de visualiser votre graphique TensorFlow, de tracer des mesures quantitatives sur l'exécution de votre graphique et de montrer des données supplémentaires comme des images qui le traversent. + +Pour utiliser TensorBoard dans [Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb) : + +!!! Example "Exemple" + + === "CLI" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # remplacer par le répertoire 'runs' + ``` + +Pour utiliser TensorBoard localement, exécutez la commande ci-dessous et consultez les résultats à l'adresse http://localhost:6006/. + +!!! Example "Exemple" + + === "CLI" + ```bash + tensorboard --logdir ultralytics/runs # remplacer par le répertoire 'runs' + ``` + +Cela chargera TensorBoard et le dirigera vers le répertoire où vos journaux d'entraînement sont sauvegardés. + +Après avoir configuré votre journal, vous pouvez ensuite poursuivre l'entraînement de votre modèle. Toutes les métriques d'entraînement seront automatiquement enregistrées sur votre plateforme choisie, et vous pourrez accéder à ces journaux pour surveiller les performances de votre modèle au fil du temps, comparer différents modèles et identifier les domaines d'amélioration. diff --git a/docs/fr/modes/val.md b/docs/fr/modes/val.md new file mode 100644 index 0000000..f6f51bf --- /dev/null +++ b/docs/fr/modes/val.md @@ -0,0 +1,86 @@ +--- +comments: true +description: Guide de validation des modèles YOLOv8. Apprenez à évaluer la performance de vos modèles YOLO en utilisant les paramètres de validation et les métriques avec des exemples en Python et en CLI. +keywords: Ultralytics, YOLO Docs, YOLOv8, validation, évaluation de modèle, hyperparamètres, précision, métriques, Python, CLI +--- + +# Validation des modèles avec Ultralytics YOLO + +Écosystème Ultralytics YOLO et intégrations + +## Introduction + +La validation est une étape cruciale dans le pipeline d'apprentissage automatique, vous permettant d'évaluer la qualité de vos modèles entraînés. Le mode Val dans Ultralytics YOLOv8 offre une gamme robuste d'outils et de métriques pour évaluer la performance de vos modèles de détection d'objets. Ce guide sert de ressource complète pour comprendre comment utiliser efficacement le mode Val pour assurer que vos modèles sont à la fois précis et fiables. + +## Pourquoi valider avec Ultralytics YOLO ? + +Voici pourquoi l'utilisation du mode Val de YOLOv8 est avantageuse : + +- **Précision :** Obtenez des métriques précises telles que mAP50, mAP75 et mAP50-95 pour évaluer de manière exhaustive votre modèle. +- **Convenance :** Utilisez des fonctionnalités intégrées qui se souviennent des paramètres d'entraînement, simplifiant ainsi le processus de validation. +- **Flexibilité :** Validez votre modèle avec les mêmes jeux de données ou des jeux différents et des tailles d'image variées. +- **Réglage des hyperparamètres :** Utilisez les métriques de validation pour peaufiner votre modèle pour de meilleures performances. + +### Caractéristiques clés du mode Val + +Voici les fonctionnalités notables offertes par le mode Val de YOLOv8 : + +- **Paramètres Automatisés :** Les modèles se souviennent de leurs configurations d'entraînement pour une validation simple. +- **Support Multi-métrique :** Évaluez votre modèle en fonction d'une gamme de métriques de précision. +- **CLI et API Python :** Choisissez entre l'interface en ligne de commande ou l'API Python en fonction de vos préférences pour la validation. +- **Compatibilité des Données :** Fonctionne de manière transparente avec les jeux de données utilisés pendant la phase d'entraînement ainsi qu'avec les jeux personnalisés. + +!!! Tip "Conseil" + + * Les modèles YOLOv8 se souviennent automatiquement de leurs paramètres d'entraînement, vous pouvez donc facilement valider un modèle à la même taille d'image et sur le jeu de données original avec juste `yolo val model=yolov8n.pt` ou `model('yolov8n.pt').val()` + +## Exemples d'utilisation + +Validez la précision du modèle YOLOv8n entraîné sur le jeu de données COCO128. Aucun argument n'a besoin d'être passé car le `modèle` conserve ses `données` d'entraînement et arguments comme attributs du modèle. Consultez la section des arguments ci-dessous pour une liste complète des arguments d'exportation. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.pt') # charger un modèle officiel + model = YOLO('chemin/vers/meilleur.pt') # charger un modèle personnalisé + + # Valider le modèle + metrics = model.val() # pas besoin d'arguments, jeu de données et paramètres mémorisés + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # une liste contenant map50-95 de chaque catégorie + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # val modèle officiel + yolo detect val model=chemin/vers/meilleur.pt # val modèle personnalisé + ``` + +## Arguments + +Les paramètres de validation pour les modèles YOLO font référence aux divers hyperparamètres et configurations utilisés pour évaluer la performance du modèle sur un jeu de données de validation. Ces paramètres peuvent affecter la performance, la vitesse et la précision du modèle. Certains paramètres de validation YOLO courants incluent la taille du lot, la fréquence à laquelle la validation est effectuée pendant l'entraînement et les métriques utilisées pour évaluer la performance du modèle. D'autres facteurs pouvant affecter le processus de validation incluent la taille et la composition du jeu de données de validation et la tâche spécifique pour laquelle le modèle est utilisé. Il est important de régler et d'expérimenter soigneusement ces paramètres pour s'assurer que le modèle fonctionne bien sur le jeu de données de validation et pour détecter et prévenir le surajustement. + +| Clé | Valeur | Description | +|---------------|---------|------------------------------------------------------------------------------------------------| +| `data` | `None` | chemin vers le fichier de données, par exemple coco128.yaml | +| `imgsz` | `640` | taille des images d'entrée en tant qu'entier | +| `batch` | `16` | nombre d'images par lot (-1 pour AutoBatch) | +| `save_json` | `False` | sauvegarder les résultats dans un fichier JSON | +| `save_hybrid` | `False` | sauvegarder la version hybride des étiquettes (étiquettes + prédictions supplémentaires) | +| `conf` | `0.001` | seuil de confiance de l'objet pour la détection | +| `iou` | `0.6` | seuil d'intersection sur union (IoU) pour la NMS | +| `max_det` | `300` | nombre maximum de détections par image | +| `half` | `True` | utiliser la précision moitié (FP16) | +| `device` | `None` | appareil sur lequel exécuter, par exemple cuda device=0/1/2/3 ou device=cpu | +| `dnn` | `False` | utiliser OpenCV DNN pour l'inférence ONNX | +| `plots` | `False` | afficher les graphiques lors de la formation | +| `rect` | `False` | val rectangulaire avec chaque lot regroupé pour un minimum de rembourrage | +| `split` | `val` | fraction du jeu de données à utiliser pour la validation, par exemple 'val', 'test' ou 'train' | +| diff --git a/docs/fr/quickstart.md b/docs/fr/quickstart.md new file mode 100644 index 0000000..025ae5d --- /dev/null +++ b/docs/fr/quickstart.md @@ -0,0 +1,198 @@ +--- +comments: true +description: Explorez diverses méthodes pour installer Ultralytics en utilisant pip, conda, git et Docker. Apprenez comment utiliser Ultralytics avec l'interface en ligne de commande ou au sein de vos projets Python. +keywords: installation d'Ultralytics, pip install Ultralytics, Docker install Ultralytics, interface en ligne de commande Ultralytics, interface Python Ultralytics +--- + +## Installer Ultralytics + +Ultralytics propose diverses méthodes d'installation, y compris pip, conda et Docker. Installez YOLOv8 via le package `ultralytics` avec pip pour obtenir la dernière version stable ou en clonant le [répertoire GitHub d'Ultralytics](https://github.com/ultralytics/ultralytics) pour la version la plus récente. Docker peut être utilisé pour exécuter le package dans un conteneur isolé, évitant l'installation locale. + +!!! Example "Installer" + + === "Installation avec Pip (recommandé)" + Installez le package `ultralytics` en utilisant pip, ou mettez à jour une installation existante en exécutant `pip install -U ultralytics`. Visitez l'Index des Packages Python (PyPI) pour plus de détails sur le package `ultralytics` : [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/). + + [![Version PyPI](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Téléchargements](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # Installer le package ultralytics depuis PyPI + pip install ultralytics + ``` + + Vous pouvez également installer le package `ultralytics` directement depuis le [répertoire GitHub](https://github.com/ultralytics/ultralytics). Cela peut être utile si vous voulez la version de développement la plus récente. Assurez-vous d'avoir l'outil en ligne de commande Git installé sur votre système. La commande `@main` installe la branche `main` et peut être modifiée pour une autre branche, p. ex. `@my-branch`, ou supprimée entièrement pour revenir par défaut à la branche `main`. + + ```bash + # Installer le package ultralytics depuis GitHub + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + + === "Installation avec Conda" + Conda est un gestionnaire de packages alternatif à pip qui peut également être utilisé pour l'installation. Visitez Anaconda pour plus de détails à [https://anaconda.org/conda-forge/ultralytics](https://anaconda.org/conda-forge/ultralytics). Le répertoire feedstock d'Ultralytics pour la mise à jour du package conda est sur [https://github.com/conda-forge/ultralytics-feedstock/](https://github.com/conda-forge/ultralytics-feedstock/). + + + [![Recette Conda](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Téléchargements Conda](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Version Conda](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Plateformes Conda](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # Installer le package ultralytics en utilisant conda + conda install -c conda-forge ultralytics + ``` + + !!! Note "Note" + + Si vous installez dans un environnement CUDA, la meilleure pratique est d'installer `ultralytics`, `pytorch` et `pytorch-cuda` dans la même commande pour permettre au gestionnaire de package conda de résoudre les conflits, ou bien d'installer `pytorch-cuda` en dernier pour lui permettre de remplacer le package `pytorch` spécifique aux CPU si nécessaire. + ```bash + # Installer tous les packages ensemble en utilisant conda + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### Image Docker Conda + + Les images Docker Conda d'Ultralytics sont également disponibles sur [DockerHub](https://hub.docker.com/r/ultralytics/ultralytics). Ces images sont basées sur [Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/) et constituent un moyen simple de commencer à utiliser `ultralytics` dans un environnement Conda. + + ```bash + # Définir le nom de l'image comme variable + t=ultralytics/ultralytics:latest-conda + + # Télécharger la dernière image ultralytics de Docker Hub + sudo docker pull $t + + # Exécuter l'image ultralytics dans un conteneur avec support GPU + sudo docker run -it --ipc=host --gpus all $t # tous les GPUs + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # spécifier les GPUs + ``` + + === "Clone Git" + Clonez le répertoire `ultralytics` si vous êtes intéressé par la contribution au développement ou si vous souhaitez expérimenter avec le dernier code source. Après le clonage, naviguez dans le répertoire et installez le package en mode éditable `-e` en utilisant pip. + ```bash + # Cloner le répertoire ultralytics + git clone https://github.com/ultralytics/ultralytics + + # Naviguer vers le répertoire cloné + cd ultralytics + + # Installer le package en mode éditable pour le développement + pip install -e . + ``` + +Voir le fichier [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) d'`ultralytics` pour une liste des dépendances. Notez que tous les exemples ci-dessus installent toutes les dépendances requises. + +

+
+ +
+ Watch: Ultralytics YOLO Quick Start Guide +

+ +!!! astuce "Conseil" + + Les prérequis de PyTorch varient selon le système d'exploitation et les exigences CUDA, donc il est recommandé d'installer PyTorch en premier en suivant les instructions sur [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally). + + + Instructions d'installation de PyTorch + + +## Utiliser Ultralytics avec CLI + +L'interface en ligne de commande (CLI) d'Ultralytics permet l'utilisation de commandes simples en une seule ligne sans nécessiter d'environnement Python. La CLI ne requiert pas de personnalisation ou de code Python. Vous pouvez simplement exécuter toutes les tâches depuis le terminal avec la commande `yolo`. Consultez le [Guide CLI](/../usage/cli.md) pour en savoir plus sur l'utilisation de YOLOv8 depuis la ligne de commande. + +!!! Example "Exemple" + + === "Syntaxe" + + Les commandes `yolo` d'Ultralytics utilisent la syntaxe suivante : + ```bash + yolo TÂCHE MODE ARGS + + Où TÂCHE (facultatif) est l'une de [detect, segment, classify] + MODE (obligatoire) est l'un de [train, val, predict, export, track] + ARGS (facultatif) sont n'importe quel nombre de paires personnalisées 'arg=valeur' comme 'imgsz=320' qui remplacent les valeurs par défaut. + ``` + Voyez tous les ARGS dans le [Guide de Configuration](/../usage/cfg.md) complet ou avec `yolo cfg` + + === "Entraînement" + + Entraînez un modèle de détection pour 10 epochs avec un learning_rate initial de 0.01 + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "Prédiction" + + Prédisez une vidéo YouTube en utilisant un modèle de segmentation pré-entraîné à une taille d'image de 320 : + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "Validation" + + Validez un modèle de détection pré-entraîné avec un batch-size de 1 et une taille d'image de 640 : + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "Exportation" + + Exportez un modèle de classification YOLOv8n au format ONNX à une taille d'image de 224 par 128 (pas de TÂCHE requise) + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "Spécial" + + Exécutez des commandes spéciales pour voir la version, afficher les paramètres, effectuer des vérifications et plus encore : + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "Avertissement" + + Les arguments doivent être passés sous forme de paires `arg=val`, séparés par un signe égal `=` et délimités par des espaces ` ` entre les paires. N'utilisez pas de préfixes d'arguments `--` ou de virgules `,` entre les arguments. + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25`   ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25`   ❌ + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25`   ❌ + +[Guide CLI](/../usage/cli.md){ .md-button } + +## Utiliser Ultralytics avec Python + +L'interface Python de YOLOv8 permet une intégration transparente dans vos projets Python, facilitant le chargement, l'exécution et le traitement de la sortie du modèle. Conçue avec simplicité et facilité d'utilisation à l'esprit, l'interface Python permet aux utilisateurs de mettre en œuvre rapidement la détection d'objets, la segmentation et la classification dans leurs projets. Cela fait de l'interface Python de YOLOv8 un outil inestimable pour quiconque cherche à intégrer ces fonctionnalités dans ses projets Python. + +Par exemple, les utilisateurs peuvent charger un modèle, l'entraîner, évaluer ses performances sur un set de validation, et même l'exporter au format ONNX avec seulement quelques lignes de code. Consultez le [Guide Python](/../usage/python.md) pour en savoir plus sur l'utilisation de YOLOv8 au sein de vos projets Python. + +!!! Example "Exemple" + + ```python + from ultralytics import YOLO + + # Créer un nouveau modèle YOLO à partir de zéro + model = YOLO('yolov8n.yaml') + + # Charger un modèle YOLO pré-entraîné (recommandé pour l'entraînement) + model = YOLO('yolov8n.pt') + + # Entraîner le modèle en utilisant le jeu de données 'coco128.yaml' pour 3 epochs + résultats = model.train(data='coco128.yaml', epochs=3) + + # Évaluer la performance du modèle sur le set de validation + résultats = model.val() + + # Effectuer la détection d'objets sur une image en utilisant le modèle + résultats = model('https://ultralytics.com/images/bus.jpg') + + # Exporter le modèle au format ONNX + succès = model.export(format='onnx') + ``` + +[Guide Python](/../usage/python.md){.md-button .md-button--primary} diff --git a/docs/fr/tasks/classify.md b/docs/fr/tasks/classify.md new file mode 100644 index 0000000..c9d8155 --- /dev/null +++ b/docs/fr/tasks/classify.md @@ -0,0 +1,172 @@ +--- +comments: true +description: Apprenez-en davantage sur les modèles de classification d'images YOLOv8 Classify. Obtenez des informations détaillées sur la liste des modèles pré-entraînés et comment entraîner, valider, prédire et exporter des modèles. +keywords: Ultralytics, YOLOv8, Classification d'images, Modèles pré-entraînés, YOLOv8n-cls, Entraînement, Validation, Prédiction, Exportation de modèles +--- + +# Classification d'images + +Exemples de classification d'images + +La classification d'images est la tâche la plus simple des trois et consiste à classer une image entière dans l'une d'un ensemble de classes prédéfinies. + +Le résultat d'un classificateur d'images est une étiquette de classe unique et un score de confiance. La classification d'images est utile lorsque vous avez besoin de savoir seulement à quelle classe appartient une image et que vous n'avez pas besoin de connaître l'emplacement des objets de cette classe ou leur forme exacte. + +!!! Tip "Astuce" + + Les modèles YOLOv8 Classify utilisent le suffixe `-cls`, par exemple `yolov8n-cls.pt` et sont pré-entraînés sur [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +## [Modèles](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Les modèles Classify pré-entraînés YOLOv8 sont présentés ici. Les modèles Detect, Segment et Pose sont pré-entraînés sur le dataset [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), tandis que les modèles Classify sont pré-entraînés sur le dataset [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +Les [modèles](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) se téléchargent automatiquement depuis la dernière version Ultralytics [release](https://github.com/ultralytics/assets/releases) lors de la première utilisation. + +| Modèle | taille
(pixels) | acc
top1 | acc
top5 | Vitesse
CPU ONNX
(ms) | Vitesse
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) à 640 | +|----------------------------------------------------------------------------------------------|-------------------------|------------------|------------------|----------------------------------|---------------------------------------|--------------------|-------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- Les valeurs **acc** sont les précisions des modèles sur le jeu de données de validation d'[ImageNet](https://www.image-net.org/). +
Pour reproduire : `yolo val classify data=path/to/ImageNet device=0` +- Les **vitesses** sont calculées sur les images de validation d'ImageNet à l'aide d'une instance [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
Pour reproduire : `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` + +## Entraînement + +Entraînez le modèle YOLOv8n-cls sur le dataset MNIST160 pendant 100 époques avec une taille d'image de 64. Pour une liste complète des arguments disponibles, consultez la page [Configuration](/../usage/cfg.md). + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-cls.yaml') # construire un nouveau modèle à partir du YAML + model = YOLO('yolov8n-cls.pt') # charger un modèle pré-entraîné (recommandé pour l'entraînement) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # construire à partir du YAML et transférer les poids + + # Entraîner le modèle + results = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # Construire un nouveau modèle à partir du YAML et commencer l'entraînement à partir de zéro + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # Commencer l'entraînement à partir d'un modèle *.pt pré-entraîné + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # Construire un nouveau modèle à partir du YAML, transférer les poids pré-entraînés et commencer l'entraînement + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### Format du dataset + +Le format du dataset de classification YOLO peut être trouvé en détails dans le [Guide des Datasets](../../../datasets/classify/index.md). + +## Validation + +Validez la précision du modèle YOLOv8n-cls entraîné sur le dataset MNIST160. Aucun argument n'est nécessaire car le `modèle` conserve ses données d'entraînement et arguments en tant qu'attributs du modèle. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-cls.pt') # charger un modèle officiel + model = YOLO('path/to/best.pt') # charger un modèle personnalisé + + # Valider le modèle + metrics = model.val() # aucun argument nécessaire, les données et les paramètres sont mémorisés + metrics.top1 # précision top 1 + metrics.top5 # précision top 5 + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # valider le modèle officiel + yolo classify val model=path/to/best.pt # valider le modèle personnalisé + ``` + +## Prédiction + +Utilisez un modèle YOLOv8n-cls entraîné pour exécuter des prédictions sur des images. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-cls.pt') # charger un modèle officiel + model = YOLO('path/to/best.pt') # charger un modèle personnalisé + + # Prédire avec le modèle + results = model('https://ultralytics.com/images/bus.jpg') # prédire sur une image + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # prédiction avec le modèle officiel + yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # prédiction avec le modèle personnalisé + ``` + +Voir les détails complets du mode `predict` sur la page [Prédire](https://docs.ultralytics.com/modes/predict/). + +## Exportation + +Exportez un modèle YOLOv8n-cls dans un format différent comme ONNX, CoreML, etc. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-cls.pt') # charger un modèle officiel + model = YOLO('path/to/best.pt') # charger un modèle entraîné personnalisé + + # Exporter le modèle + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # exporter le modèle officiel + yolo export model=path/to/best.pt format=onnx # exporter le modèle entraîné personnalisé + ``` + +Les formats d'exportation disponibles pour YOLOv8-cls sont présentés dans le tableau ci-dessous. Vous pouvez prédire ou valider directement sur les modèles exportés, par exemple `yolo predict model=yolov8n-cls.onnx`. Des exemples d'utilisation sont présentés pour votre modèle une fois l'exportation terminée. + +| Format | Argument `format` | Modèle | Métadonnées | Arguments | +|--------------------------------------------------------------------|-------------------|-------------------------------|-------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +Voir les détails complets de l'`exportation` sur la page [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/fr/tasks/detect.md b/docs/fr/tasks/detect.md new file mode 100644 index 0000000..21ad108 --- /dev/null +++ b/docs/fr/tasks/detect.md @@ -0,0 +1,184 @@ +--- +comments: true +description: Documentation officielle pour YOLOv8 par Ultralytics. Apprenez comment entraîner, valider, prédire et exporter des modèles dans différents formats. Incluant des statistiques de performances détaillées. +keywords: YOLOv8, Ultralytics, détection d'objets, modèles pré-entraînés, entraînement, validation, prédiction, exportation de modèles, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# Détection d'Objets + +Exemples de détection d'objets + +La détection d'objets est une tâche qui implique l'identification de l'emplacement et de la classe des objets dans une image ou un flux vidéo. + +La sortie d'un détecteur d'objets est un ensemble de boîtes englobantes qui entourent les objets de l'image, accompagnées de libellés de classe et de scores de confiance pour chaque boîte. La détection d'objets est un bon choix lorsque vous avez besoin d'identifier des objets d'intérêt dans une scène, mais que vous n'avez pas besoin de connaître exactement où se trouve l'objet ou sa forme exacte. + +

+
+ +
+ Regardez : Détection d'Objets avec le Modèle Pré-entraîné Ultralytics YOLOv8. +

+ +!!! Tip "Conseil" + + Les modèles Detect YOLOv8 sont les modèles YOLOv8 par défaut, c.-à-d. `yolov8n.pt` et sont pré-entraînés sur le jeu de données [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [Modèles](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Les modèles pré-entraînés Detect YOLOv8 sont présentés ici. Les modèles Detect, Segment, et Pose sont pré-entraînés sur le jeu de données [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), tandis que les modèles Classify sont pré-entraînés sur le jeu de données [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +[Les modèles](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) se téléchargent automatiquement à partir de la dernière [version](https://github.com/ultralytics/assets/releases) d'Ultralytics lors de la première utilisation. + +| Modèle | Taille
(pixels) | mAPval
50-95 | Vitesse
CPU ONNX
(ms) | Vitesse
A100 TensorRT
(ms) | Paramètres
(M) | FLOPs
(B) | +|--------------------------------------------------------------------------------------|-------------------------|----------------------|----------------------------------|---------------------------------------|------------------------|-------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- Les valeurs de **mAPval** sont pour un seul modèle à une seule échelle sur le jeu de données [COCO val2017](http://cocodataset.org). +
Reproductible avec `yolo val detect data=coco.yaml device=0` +- La **Vitesse** est moyennée sur les images COCO val en utilisant une instance [Amazon EC2 P4d](https://aws.amazon.com/fr/ec2/instance-types/p4/). +
Reproductible avec `yolo val detect data=coco128.yaml batch=1 device=0|cpu` + +## Entraînement + +Entraînez le modèle YOLOv8n sur le jeu de données COCO128 pendant 100 époques à la taille d'image de 640. Pour une liste complète des arguments disponibles, consultez la page [Configuration](/../usage/cfg.md). + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.yaml') # construire un nouveau modèle à partir de YAML + model = YOLO('yolov8n.pt') # charger un modèle pré-entraîné (recommandé pour l'entraînement) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # construire à partir de YAML et transférer les poids + + # Entraîner le modèle + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Construire un nouveau modèle à partir de YAML et commencer l'entraînement à partir de zéro + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Commencer l'entraînement à partir d'un modèle *.pt pré-entraîné + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Construire un nouveau modèle à partir de YAML, transférer les poids pré-entraînés et commencer l'entraînement + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Format des données + +Le format des jeux de données de détection YOLO est détaillé dans le [Guide des Jeux de Données](../../../datasets/detect/index.md). Pour convertir votre jeu de données existant depuis d'autres formats (comme COCO, etc.) vers le format YOLO, veuillez utiliser l'outil [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) par Ultralytics. + +## Validation + +Validez la précision du modèle YOLOv8n entraîné sur le jeu de données COCO128. Aucun argument n'est nécessaire puisque le `modèle` conserve ses `données` d'entraînement et arguments en tant qu'attributs du modèle. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.pt') # charger un modèle officiel + model = YOLO('chemin/vers/best.pt') # charger un modèle personnalisé + + # Valider le modèle + metrics = model.val() # pas d'arguments nécessaires, jeu de données et paramètres enregistrés + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # une liste contenant map50-95 de chaque catégorie + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # valider le modèle officiel + yolo detect val model=chemin/vers/best.pt # valider le modèle personnalisé + ``` + +## Prédiction + +Utilisez un modèle YOLOv8n entraîné pour exécuter des prédictions sur des images. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.pt') # charger un modèle officiel + model = YOLO('chemin/vers/best.pt') # charger un modèle personnalisé + + # Prédire avec le modèle + results = model('https://ultralytics.com/images/bus.jpg') # prédire sur une image + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # prédire avec le modèle officiel + yolo detect predict model=chemin/vers/best.pt source='https://ultralytics.com/images/bus.jpg' # prédire avec le modèle personnalisé + ``` + +Consultez les détails complets du mode `predict` sur la page [Prédire](https://docs.ultralytics.com/modes/predict/). + +## Exportation + +Exportez un modèle YOLOv8n dans un format différent tel que ONNX, CoreML, etc. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n.pt') # charger un modèle officiel + model = YOLO('chemin/vers/best.pt') # charger un modèle entraîné personnalisé + + # Exporter le modèle + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # exporter le modèle officiel + yolo export model=chemin/vers/best.pt format=onnx # exporter le modèle entraîné personnalisé + ``` + +Les formats d'exportation YOLOv8 disponibles sont présentés dans le tableau ci-dessous. Vous pouvez directement prédire ou valider sur des modèles exportés, c'est-à-dire `yolo predict model=yolov8n.onnx`. Des exemples d'utilisation sont présentés pour votre modèle après l'exportation complète. + +| Format | Argument `format` | Modèle | Métadonnées | Arguments | +|----------------------------------------------------------------------|-------------------|---------------------------|-------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [Modèle TF Enregistré](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [GraphDef TF](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TPU Edge TF](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +Consultez tous les détails `export` sur la page [Exporter](https://docs.ultralytics.com/modes/export/). diff --git a/docs/fr/tasks/index.md b/docs/fr/tasks/index.md new file mode 100644 index 0000000..7c0260a --- /dev/null +++ b/docs/fr/tasks/index.md @@ -0,0 +1,55 @@ +--- +comments: true +description: Découvrez les tâches fondamentales de vision par ordinateur que YOLOv8 peut effectuer, y compris la détection, la segmentation, la classification et l'estimation de la pose. Comprenez leur utilité dans vos projets d'IA. +keywords: Ultralytics, YOLOv8, Détection, Segmentation, Classification, Estimation de la Pose, Cadre IA, Tâches de Vision par Ordinateur +--- + +# Tâches d'Ultralytics YOLOv8 + +
+Tâches prises en charge par Ultralytics YOLO + +YOLOv8 est un cadre d'intelligence artificielle qui prend en charge de multiples tâches de **vision par ordinateur**. Le cadre peut être utilisé pour effectuer de la [détection](detect.md), de la [segmentation](segment.md), de la [classification](classify.md) et de l'estimation de la [pose](pose.md). Chacune de ces tâches a un objectif et un cas d'utilisation différents. + +!!! Note "Note" + + 🚧 Notre documentation multilingue est actuellement en construction et nous travaillons dur pour l'améliorer. Merci de votre patience ! 🙏 + +

+
+ +
+ Regardez : Explorez les Tâches YOLO Ultralytics : Détection d'Objets, Segmentation, Suivi et Estimation de la Pose. +

+ +## [Détection](detect.md) + +La détection est la tâche principale prise en charge par YOLOv8. Elle implique de détecter des objets dans une image ou une trame vidéo et de dessiner des boîtes englobantes autour d'eux. Les objets détectés sont classés dans différentes catégories en fonction de leurs caractéristiques. YOLOv8 peut détecter plusieurs objets dans une seule image ou trame vidéo avec une grande précision et rapidité. + +[Exemples de Détection](detect.md){ .md-button } + +## [Segmentation](segment.md) + +La segmentation est une tâche qui implique de segmenter une image en différentes régions en fonction du contenu de l'image. Chaque région se voit attribuer une étiquette en fonction de son contenu. Cette tâche est utile dans des applications telles que la segmentation d'image et l'imagerie médicale. YOLOv8 utilise une variante de l'architecture U-Net pour effectuer la segmentation. + +[Exemples de Segmentation](segment.md){ .md-button } + +## [Classification](classify.md) + +La classification est une tâche qui implique de classer une image dans différentes catégories. YOLOv8 peut être utilisé pour classifier des images en fonction de leur contenu. Il utilise une variante de l'architecture EfficientNet pour effectuer la classification. + +[Exemples de Classification](classify.md){ .md-button } + +## [Pose](pose.md) + +La détection de pose/points clés est une tâche qui implique de détecter des points spécifiques dans une image ou une trame vidéo. Ces points sont appelés points clés et sont utilisés pour suivre le mouvement ou pour l'estimation de la pose. YOLOv8 peut détecter des points clés dans une image ou une trame vidéo avec une grande précision et rapidité. + +[Exemples de Pose](pose.md){ .md-button } + +## Conclusion + +YOLOv8 prend en charge de multiples tâches, y compris la détection, la segmentation, la classification et la détection de points clés. Chacune de ces tâches a des objectifs et des cas d'utilisation différents. En comprenant les différences entre ces tâches, vous pouvez choisir la tâche appropriée pour votre application de vision par ordinateur. diff --git a/docs/fr/tasks/pose.md b/docs/fr/tasks/pose.md new file mode 100644 index 0000000..7a972de --- /dev/null +++ b/docs/fr/tasks/pose.md @@ -0,0 +1,176 @@ +--- +comments: true +description: Apprenez à utiliser Ultralytics YOLOv8 pour des tâches d'estimation de pose. Trouvez des modèles pré-entraînés, apprenez à entraîner, valider, prédire et exporter vos propres modèles. +keywords: Ultralytics, YOLO, YOLOv8, estimation de pose, détection de points clés, détection d'objet, modèles pré-entraînés, apprentissage automatique, intelligence artificielle +--- + +# Estimation de Pose + +![Estimation de pose exemples](https://user-images.githubusercontent.com/26833433/243418616-9811ac0b-a4a7-452a-8aba-484ba32bb4a8.png) + +L'estimation de pose est une tâche qui consiste à identifier l'emplacement de points spécifiques dans une image, souvent appelés points clés. Ces points clés peuvent représenter différentes parties de l'objet telles que les articulations, les repères ou d'autres caractéristiques distinctives. L'emplacement des points clés est généralement représenté par un ensemble de coordonnées 2D `[x, y]` ou 3D `[x, y, visible]`. + +La sortie d'un modèle d'estimation de pose est un ensemble de points représentant les points clés sur un objet dans l'image, généralement accompagnés des scores de confiance pour chaque point. L'estimation de pose est un bon choix lorsque vous avez besoin d'identifier des parties spécifiques d'un objet dans une scène, et leur emplacement les uns par rapport aux autres. + +![Regardez : Estimation de Pose avec Ultralytics YOLOv8](https://www.youtube.com/embed/Y28xXQmju64?si=pCY4ZwejZFu6Z4kZ) + +!!! astuce "Conseil" + + Les modèles YOLOv8 _pose_ utilisent le suffixe `-pose`, c'est-à-dire `yolov8n-pose.pt`. Ces modèles sont entraînés sur le jeu de données [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) et conviennent à une variété de tâches d'estimation de pose. + +## [Modèles](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Les modèles Pose pré-entraînés YOLOv8 sont montrés ici. Les modèles Detect, Segment et Pose sont pré-entraînés sur le jeu de données [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), tandis que les modèles Classify sont pré-entraînés sur le jeu de données [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +Les [Modèles](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) se téléchargent automatiquement à partir de la dernière version d'Ultralytics [release](https://github.com/ultralytics/assets/releases) lors de la première utilisation. + +| Modèle | taille
(pixels) | mAPpose
50-95 | mAPpose
50 | Vitesse
CPU ONNX
(ms) | Vitesse
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | +|------------------------------------------------------------------------------------------------------|-------------------------|-----------------------|--------------------|----------------------------------|---------------------------------------|--------------------|-------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +- Les valeurs de **mAPval** sont pour un seul modèle à une seule échelle sur le jeu de données [COCO Keypoints val2017](http://cocodataset.org). +
Reproduire avec `yolo val pose data=coco-pose.yaml device=0` +- La **vitesse** moyenne sur les images de validation COCO en utilisant une instance [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
Reproduire avec `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` + +## Entraînement + +Entraînez un modèle YOLOv8-pose sur le jeu de données COCO128-pose. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-pose.yaml') # construire un nouveau modèle à partir du YAML + model = YOLO('yolov8n-pose.pt') # charger un modèle pré-entraîné (recommandé pour l'entraînement) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # construire à partir du YAML et transférer les poids + + # Entraîner le modèle + résultats = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Construire un nouveau modèle à partir du YAML et commencer l'entraînement à partir de zéro + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # Commencer l'entraînement à partir d'un modèle *.pt pré-entraîné + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # Construire un nouveau modèle à partir du YAML, transférer les poids pré-entraînés et commencer l'entraînement + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### Format du jeu de données + +Le format du jeu de données YOLO pose peut être trouvé en détail dans le [Guide des jeux de données](../../../datasets/pose/index.md). Pour convertir votre jeu de données existant à partir d'autres formats (comme COCO, etc.) vers le format YOLO, veuillez utiliser l'outil [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) d'Ultralytics. + +## Val + +Validez la précision du modèle YOLOv8n-pose entraîné sur le jeu de données COCO128-pose. Aucun argument n'est nécessaire car le `modèle` conserve ses données d'entraînement et arguments en tant qu'attributs du modèle. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-pose.pt') # charger un modèle officiel + model = YOLO('chemin/vers/best.pt') # charger un modèle personnalisé + + # Valider le modèle + métriques = model.val() # aucun argument nécessaire, jeu de données et paramètres mémorisés + métriques.box.map # map50-95 + métriques.box.map50 # map50 + métriques.box.map75 # map75 + métriques.box.maps # une liste contenant map50-95 de chaque catégorie + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # val modèle officiel + yolo pose val model=chemin/vers/best.pt # val modèle personnalisé + ``` + +## Prédiction + +Utilisez un modèle YOLOv8n-pose entraîné pour exécuter des prédictions sur des images. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-pose.pt') # charger un modèle officiel + model = YOLO('chemin/vers/best.pt') # charger un modèle personnalisé + + # Prédire avec le modèle + résultats = model('https://ultralytics.com/images/bus.jpg') # prédire sur une image + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # prédire avec modèle officiel + yolo pose predict model=chemin/vers/best.pt source='https://ultralytics.com/images/bus.jpg' # prédire avec modèle personnalisé + ``` + +Consultez les détails complets du mode `predict` sur la page [Prédire](https://docs.ultralytics.com/modes/predict/). + +## Exportation + +Exportez un modèle YOLOv8n Pose dans un autre format tel que ONNX, CoreML, etc. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-pose.pt') # charger un modèle officiel + model = YOLO('chemin/vers/best.pt') # charger un modèle personnalisé entraîné + + # Exporter le modèle + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # exporter modèle officiel + yolo export model=chemin/vers/best.pt format=onnx # exporter modèle personnalisé entraîné + ``` + +Les formats d'exportation YOLOv8-pose disponibles sont dans le tableau ci-dessous. Vous pouvez prédire ou valider directement sur des modèles exportés, par exemple `yolo predict model=yolov8n-pose.onnx`. Des exemples d'utilisation sont montrés pour votre modèle après la fin de l'exportation. + +| Format | Argument `format` | Modèle | Métadonnées | Arguments | +|--------------------------------------------------------------------|-------------------|--------------------------------|-------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half` | + +Consultez les détails complets de `export` sur la page [Exporter](https://docs.ultralytics.com/modes/export/). diff --git a/docs/fr/tasks/segment.md b/docs/fr/tasks/segment.md new file mode 100644 index 0000000..a49d64b --- /dev/null +++ b/docs/fr/tasks/segment.md @@ -0,0 +1,189 @@ +--- +comments: true +description: Apprenez à utiliser les modèles de segmentation d'instance avec Ultralytics YOLO. Instructions pour la formation, la validation, la prédiction d'image et l'exportation de modèle. +keywords: yolov8, segmentation d'instance, Ultralytics, jeu de données COCO, segmentation d'image, détection d'objet, formation de modèle, validation de modèle, prédiction d'image, exportation de modèle +--- + +# Segmentation d'Instance + +Exemples de segmentation d'instance + +La segmentation d'instance va plus loin que la détection d'objet et implique d'identifier des objets individuels dans une image et de les segmenter du reste de l'image. + +Le résultat d'un modèle de segmentation d'instance est un ensemble de masques ou de contours qui délimitent chaque objet dans l'image, accompagnés d'étiquettes de classe et de scores de confiance pour chaque objet. La segmentation d'instance est utile lorsque vous avez besoin de savoir non seulement où se trouvent les objets dans une image, mais aussi quelle est leur forme exacte. + +

+
+ +
+ Regarder : Exécutez la Segmentation avec le Modèle Ultralytics YOLOv8 Pré-Entraîné en Python. +

+ +!!! astuce "Astuce" + + Les modèles YOLOv8 Segment utilisent le suffixe `-seg`, par exemple `yolov8n-seg.pt` et sont pré-entraînés sur [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [Modèles](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Les modèles Segment pré-entraînés YOLOv8 sont indiqués ici. Les modèles Detect, Segment et Pose sont pré-entraînés sur le jeu de données [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), tandis que les modèles Classify sont pré-entraînés sur le jeu de données [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +Les [modèles](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) se téléchargent automatiquement depuis la dernière [version](https://github.com/ultralytics/assets/releases) Ultralytics lors de la première utilisation. + +| Modèle | Taille
(pixels) | mAPboîte
50-95 | mAPmasque
50-95 | Vitesse
CPU ONNX
(ms) | Vitesse
A100 TensorRT
(ms) | Paramètres
(M) | FLOPs
(B) | +|----------------------------------------------------------------------------------------------|-------------------------|------------------------|-------------------------|----------------------------------|---------------------------------------|------------------------|-------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- Les valeurs **mAPval** sont pour un seul modèle à une seule échelle sur le jeu de données [COCO val2017](http://cocodataset.org). +
Pour reproduire, utilisez `yolo val segment data=coco.yaml device=0` +- **Vitesse** moyennée sur les images COCO val en utilisant une instance [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
Pour reproduire, utilisez `yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu` + +## Formation + +Entraînez YOLOv8n-seg sur le jeu de données COCO128-seg pendant 100 époques à la taille d'image 640. Pour une liste complète des arguments disponibles, consultez la page [Configuration](/../usage/cfg.md). + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-seg.yaml') # construire un nouveau modèle à partir du YAML + model = YOLO('yolov8n-seg.pt') # charger un modèle pré-entraîné (recommandé pour la formation) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # construire à partir du YAML et transférer les poids + + # Entraîner le modèle + résultats = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Construire un nouveau modèle à partir du YAML et commencer la formation à partir de zéro + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # Commencer la formation à partir d'un modèle *.pt pré-entraîné + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # Construire un nouveau modèle à partir du YAML, transférer les poids pré-entraînés et commencer la formation + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### Format des données + +Le format des données de segmentation YOLO peut être trouvé en détail dans le [Guide du Jeu de Données](../../../datasets/segment/index.md). Pour convertir votre jeu de données existant à partir d'autres formats (comme COCO, etc.) au format YOLO, veuillez utiliser l'outil [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) par Ultralytics. + +## Validation + +Validez la précision du modèle YOLOv8n-seg entraîné sur le jeu de données COCO128-seg. Aucun argument n'est nécessaire car le `modèle` +conserve ses données de formation et ses arguments comme attributs du modèle. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-seg.pt') # charger un modèle officiel + model = YOLO('chemin/vers/le/meilleur.pt') # charger un modèle personnalisé + + # Valider le modèle + métriques = model.val() # aucun argument nécessaire, jeu de données et paramètres mémorisés + métriques.box.map # map50-95(B) + métriques.box.map50 # map50(B) + métriques.box.map75 # map75(B) + métriques.box.maps # une liste contient map50-95(B) de chaque catégorie + métriques.seg.map # map50-95(M) + métriques.seg.map50 # map50(M) + métriques.seg.map75 # map75(M) + métriques.seg.maps # une liste contient map50-95(M) de chaque catégorie + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # valider le modèle officiel + yolo segment val model=chemin/vers/le/meilleur.pt # valider le modèle personnalisé + ``` + +## Prédiction + +Utilisez un modèle YOLOv8n-seg entraîné pour effectuer des prédictions sur des images. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-seg.pt') # charger un modèle officiel + model = YOLO('chemin/vers/le/meilleur.pt') # charger un modèle personnalisé + + # Prédire avec le modèle + résultats = model('https://ultralytics.com/images/bus.jpg') # prédire sur une image + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # prédire avec le modèle officiel + yolo segment predict model=chemin/vers/le/meilleur.pt source='https://ultralytics.com/images/bus.jpg' # prédire avec le modèle personnalisé + ``` + +Voir les détails complets du mode `predict` sur la page [Predict](https://docs.ultralytics.com/modes/predict/). + +## Exportation + +Exportez un modèle YOLOv8n-seg vers un format différent comme ONNX, CoreML, etc. + +!!! Example "Exemple" + + === "Python" + + ```python + from ultralytics import YOLO + + # Charger un modèle + model = YOLO('yolov8n-seg.pt') # charger un modèle officiel + model = YOLO('chemin/vers/le/meilleur.pt') # charger un modèle entraîné personnalisé + + # Exporter le modèle + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # exporter le modèle officiel + yolo export model=chemin/vers/le/meilleur.pt format=onnx # exporter le modèle entraîné personnalisé + ``` + +Les formats d'exportation YOLOv8-seg disponibles sont dans le tableau ci-dessous. Vous pouvez prédire ou valider directement sur les modèles exportés, par exemple `yolo predict model=yolov8n-seg.onnx`. Des exemples d'utilisation sont présentés pour votre modèle après l'exportation. + +| Format | Argument `format` | Modèle | Métadonnées | Arguments | +|--------------------------------------------------------------------|-------------------|-------------------------------|-------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half` | + +Voir les détails complets d'`export` sur la page [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/hi/index.md b/docs/hi/index.md new file mode 100644 index 0000000..f06b6b8 --- /dev/null +++ b/docs/hi/index.md @@ -0,0 +1,83 @@ +--- +comments: true +description: Ultralytics YOLOv8 के पूर्ण गाइड को जानें, एक उच्च गति, उच्च योग्यता वाले वस्तु का पता लगाने और छवि विभाजन मॉडल। स्थापना, भविष्यवाणी, प्रशिक्षण ट्यूटोरियल और बहुत कुछ। +keywords: Ultralytics, YOLOv8, वस्तु पता लगाना, छवि विभाजन, मशीन लर्निंग, गहरी लर्निंग, कंप्यूटर विज़न, YOLOv8 स्थापना, YOLOv8 भविष्यवाणी, YOLOv8 प्रशिक्षण, YOLO इतिहास, YOLO लाइसेंसेस +--- + +
+

+ + Ultralytics YOLO banner +

+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ Ultralytics CI + Ultralytics Code Coverage + YOLOv8 Citation + Docker Pulls +
+ Run on Gradient + Open In Colab + Open In Kaggle +
+ + +पेश करते हैं [युल्ट्रालिटिक्स](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics), प्रसिद्ध वास्तविक समय वस्तु पता लगाने और छवि विभाजन मॉडल की नवीनतम संस्करण। YOLOv8 गहरी लर्निंग और कंप्यूटर विज़न में कटिंग-एज उन्नति पर आधारित है, इसलिए गति और योग्यता के मामले में इसका प्रदर्शन अद्वितीय है। इसका संक्षेपित डिज़ाइन इसे विभिन्न अनुप्रयोगों के लिए उपयुक्त बनाता है और विभिन्न हार्डवेयर प्लेटफ़ॉर्म्स पर आसानी से अनुकूल बनाता है, शुरू और धारण के लिए िजोग्य करता है। + +YOLOv8 डॉक्स का अन्वेषण करें, यह एक व्यापक स्रोत है जो आपको इसके सुविधाओं और क्षमताओं को समझने और उपयोग करने में मदद करने के लिए विकसित किया गया है। चाहे आप एक अनुभवी मशीन लर्निंग प्रैक्टीशनर हो या क्षेत्र में नये हों, इस हब का उद्देश्य आपके परियोजनाओं में YOLOv8 की क्षमताओं को अधिकतम करना है। + +!!! Note "नोट" + + 🚧 हमारी बहुभाषी दस्तावेजीकरण वर्तमान में निर्माणाधीन है, और हम इसे सुधारने के लिए कठिनताओं पर काम कर रहे हैं। आपकी सहायता के लिए धन्यवाद! 🙏 + +## शुरुआत कहाँ से करें + +- **Install** `pip` के साथ `ultralytics` स्थापित करें और कुछ मिनट में चलता हुआ पाएं   [:material-clock-fast: शुरू हो जाओ](quickstart.md){ .md-button } +- **Predict** यूनिक images और videos को YOLOv8 के साथ   [:octicons-image-16: छवियों पर भविष्यवाणी करें](modes/predict.md){ .md-button } +- **Train** अपने खुद के custom डेटासेट पर एक नया YOLOv8 मॉडल   [:fontawesome-solid-brain: मॉडल प्रशिक्षित करें](modes/train.md){ .md-button } +- **अन्वेषण** करें YOLOv8 tasks जैसे कि विभाजित, वर्गीकृत, स्थिति और ट्रैक करें   [:material-magnify-expand: टास्क्स अन्वेषण करें](tasks/index.md){ .md-button } + +

+
+ +
+ देखें: अपने कस्टम डेटासेट पर YOLOv8 मॉडल को कैसे ट्रेन करें Google Colab में। +

+ +## YOLO: एक संक्षिप्त इतिहास + +[YOLO](https://arxiv.org/abs/1506.02640) (You Only Look Once), एक लोकप्रिय वस्तु पता लगाने और छवि विभाजन मॉडल, यूनिवर्सिटी ऑफ वाशिंगटन में Joseph Redmon और Ali Farhadi द्वारा विकसित किया गया था। YOLO की उच्च गति और योग्यता के कारण, यह 2015 में तेजी से प्रसिद्ध हुआ। + +- [YOLOv2](https://arxiv.org/abs/1612.08242), 2016 में जारी किया गया, मूल मॉडल में batch normalization, anchor boxes और dimension clusters शामिल करके मॉडल में सुधार किया। +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf), 2018 में लॉन्च किया गया, एक अधिक प्रभावी बैकबोन नेटवर्क, एंकर बॉक्सेस और स्थानिक पिरामिड पूलिंग के उपयोग से मॉडल की प्रदर्शन को और बढ़ाया। +- [YOLOv4](https://arxiv.org/abs/2004.10934) 2020 में जारी किया गया, Mosaic डेटा वृद्धि, एक नया anchor-free डिटेक्शन हेड और एक नया लॉस फ़ंक्शन के जैसे नवाचार द्वारा मॉडल को बेहतर बनाया गया। +- [YOLOv5](https://github.com/ultralytics/yolov5) मॉडल की प्रदर्शन को और बेहतर बनाने के साथ, हाइपरपैरामीटर ऑप्टिमाइज़ेशन, एकीकृत प्रयोग ट्रैकिंग और लोकप्रिय export formats में स्वचालित निर्यात जैसे नए सुविधाएं जोड़ी गईं। +- [YOLOv6](https://github.com/meituan/YOLOv6) 2022 में [मेटुआन](https://about.meituan.com/) द्वारा ओपन-सोस्ड किया गया था और कई कम्पनी के स्वतंत्र वितरण रोबोट में उपयोग में है। +- [YOLOv7](https://github.com/WongKinYiu/yolov7) ने COCO keypoints डेटासेट पर पोज अनुमान जैसे अतिरिक्त टास्क जोड़ दिया। +- [YOLOv8](https://github.com/ultralytics/ultralytics) Ultralytics द्वारा YOLO का नवीनतम संस्करण है। एक तलवार की काट, आपातता मॉडल के सफलता पर निर्मितकर्ताओं की मेहनत की चटानों पर निर्माण करके YOLOv8 ने पिछले संस्करणों की सफलता पर आधारित, नई सुविधाएं और सुधार अद्यतित प्रदर्शन, लचीलापन और प्रदार्थता के लिए प्रस्तुत किए हैं। YOLOv8 विजन AI tasks, जैसे [पता लगाना](tasks/detect.md), [विभाजन](tasks/segment.md), [पोज अनुमान](tasks/pose.md), [ट्रैकिंग](modes/track.md), और [वर्गीकरण](tasks/classify.md) का पूरा समर्थन करता है। यह विविध अनुप्रयोग और क्षेत्रों में योलोवी8 की क्षमताओं का उपयोग करने की अनुमति देता है। + +YOLO लाइसेंसेस: Ultralytics YOLO का प्रयोग कैसे होता है? + +Ultralytics विभिन्न उपयोग मामलों को समर्थित करने के लिए दो लाइसेंसिंग विकल्प प्रदान करता है: + +- **AGPL-3.0 लाइसेंस**: यह [OSI स्वीकृत](https://opensource.org/licenses/) ओपन-सोर्स लाइसेंस छात्रों और उत्साहीयों के लिए उपयुक्त है, गहन सहयोग और ज्ञान साझा करने के लिए प्रोत्साहित करता है। अधिक जानकारी के लिए [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) फ़ाइल देखें। +- **व्यवसायिक लाइसेंस**: व्यावसायिक उपयोग के लिए डिज़ाइन किया गया, यह लाइसेंस Ultralytics सॉफ़्टवेयर और AI मॉडल को वाणिज्यिक माल और सेवाओं में सरलतापूर्वक सम्मिलित करने की अनुमति देता है, AGPL-3.0 की ओपन-सोर्स आवश्यकताओं को छोड़ता है। यदि आपके परिदृश्य में हमारे समाधानों को एक वाणिज्यिक प्रस्ताव में एम्बेड करना शामिल है, [Ultralytics Licensing](https://ultralytics.com/license) के माध्यम से संपर्क करें। + +हमारी लाइसेंसिंग रणनीति इस सुनिश्चित करने के लिए है कि हमारे ओपन-सोर्स परियोजनाओं में किए गए कोई भी सुधार समुदाय को लौटाए जाएं। हम ओपन सोर्स के सिद्धांतों को अपने दिल के पास रखते हैं ❤️, और हमारा मिशन यह सुनिश्चित करना है कि हमारे योगदानों का उपयोग और विस्तार किए जाने के तरीकों में क्रियान्वयन किए जाएं जो सभी के लिए लाभदायक हों। diff --git a/docs/hi/models/fast-sam.md b/docs/hi/models/fast-sam.md new file mode 100644 index 0000000..4dc714f --- /dev/null +++ b/docs/hi/models/fast-sam.md @@ -0,0 +1,193 @@ +--- +comments: true +description: FastSAM एक सीएनएन पर आधारित समाधान है जो छवियों में वास्तविक समय ऑब्जेक्ट सेगमेंटेशन के लिए बनाया गया है। यह यूजर इंटरेक्शन, संगणनीय प्रभावशीलता और विजन कार्यों को अनुकूलित करने में सक्षम है। +keywords: FastSAM, machine learning, CNN-based solution, object segmentation, वास्तविक समय का समाधान, Ultralytics, विजन कार्य, छवि प्रोसेसिंग, उद्योगिक अनुप्रयोग, यूजर इंटरैक्शन +--- + +# Fast Segment Anything Model (FastSAM) + +फास्ट सेगमेंट एनीथिंग मॉडल (फास्टएसएएम) एक नवीन, वास्तविक समय में कार्यरत सीएनएन पर आधारित समाधान है जो एनीथिंग टास्क को सेगमेंट करने के लिए बनाया गया है। इस टास्क का उद्देश्य विभिन्न संभावित उपयोक्ता इंटरेक्शन प्रोम्प्ट्स पर आधारित छवियों में किसी भी ऑब्जेक्ट को सेगमेंट करना है। फास्टएसएएम ने संगणनात्मक मांग को कम करते हुए मुकाबले क्षमता को बरकरार रखते हुए संगणकीय मांगों को काफी कम किया है, जिसके कारण यह विभिन्न विजन कार्यों के लिए एक व्यावहारिक विकल्प बनाता है। + +![फास्ट सेगमेंट एनीथिंग मॉडल (फास्टएसएएम) आर्किटेक्चर इंट्रो](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## अवलोकन + +FastSAM, [सेगमेंट एनीथिंग मॉडल (एसएएम)](sam.md) की सीमाओं का सामना करने के लिए बनाया गया है, जो एक भारी ट्रांसफॉर्मर मॉडल है और उचित संसाधन आवश्यकताओं को है। फास्टएसएएम ने सार्वभौमिक चरणों में सेगमेंट एनीथिंग टास्क को अलग-अलग दो साँप्रदायिक चरणों में अलग किया है: सभी इंस्टेंस सेगमेंटेशन और प्रॉम्प्ट-गाइडेड चयन। पहले चरण में, यह [व्योलोवी8-सेग](../tasks/segment.md) का उपयोग करके छवि में सभी इंस्टेंस की सेगमेंटेशन मास्क उत्पन्न करता है। दूसरे चरण में, यह प्रोम्प्ट के बराबर क्षेत्र-ऑफ-इंटरेस्ट को आउटपुट करता है। + +## प्रमुख सुविधाएं + +1. **वास्तविक समय का समाधान:** सीएनएन की संगणनात्मक प्रभावशीलता का उपयोग करके, फास्टएसएएम एनीथिंग टास्क के लिए वास्तविक समय समाधान प्रदान करता है, जिससे यह उद्योगिक अनुप्रयोगों के लिए महत्वपूर्ण परिणामों की आवश्यकता होती है। + +2. **प्रदार्थी और प्रदर्शन क्षमता:** फास्टएसएएम संगणनात्मक और संसाधन मांग में काफी कमी प्रदान करता है बिना प्रदर्शन गुणवत्ता पर कुछ बदले के। यह संसाधनों की बहुत अधिक कमी के साथ एसएएम के लगभग तुलनात्मक प्रदर्शन प्राप्त करता है, जिससे वास्तविक समय अनुप्रयोग संभव होता है। + +3. **प्रोम्प्ट-गाइडेड सेगमेंटेशन:** फास्टएसएएम कई संभावित उपयोक्ता इंटरेक्शन प्रोम्प्ट्स द्वारा निर्दिष्ट छवि में किसी भी ऑब्जेक्ट को सेगमेंट कर सकता है, जो विभिन्न परिस्थितियों में लचीलापूर्णता और अनुकूलन प्रदान करता है। + +4. **व्योलोवी8-सेग पर आधारित:** फास्टएसएएम [व्योलोवी8-सेग](../tasks/segment.md) पर आधारित है, जो एक ऑब्जेक्ट डिटेक्टर है जिसमें एक इंस्टेंस सेगमेंटेशन टास्क है। इससे यह संभव होता है कि यह छवि में सभी इंस्टेंस की सेगमेंटेशन मास्क प्रभावी ढंग से उत्पन्न करें। + +5. **बेंचमार्क पर प्रतिस्पर्धी परिणाम:** MS COCO पर ऑब्जेक्ट प्रासलन टास्क पर, फास्टएसएएम एकल NVIDIA RTX 3090 पर [एसएएम](sam.md) की तुलना में काफी तेज गति में उच्च अंक प्राप्त करता है, जो इसकी प्रभावशीलता और क्षमता को दिखाता है। + +6. **वास्तविक अनुप्रयोग:** प्रस्तावित दृष्टांत एक नए, वास्तविक समय में कई विजन कार्यों के लिए एक नया, व्यावहारिक समाधान प्रदान करता है, जिसमें मौजूदा विधियों से कई या सौ गुना तेज तरीके से किया जाता है। + +7. **मॉडल संक्षिप्ति क्षमता:** फास्टएसएएम ने प्रवेशी अभिकरण के लिए एक कृत्रिम प्राथमिकता को संरचना में परिचय कराने के द्वारा कंप्यूटेशनल प्रयास को काफी कम करने की संभावना दिखाई है, इस प्रकार सामान्य विजन कार्यों के लिए बड़े मॉडल आर्किटेक्चर के लिए नए संभावनाओं को खोलती है। + +## उपलब्ध मॉडल, समर्थित कार्य और ऑपरेटिंग मोड + +इस सारणी में उपलब्ध मॉडल, उनके विशिष्ट पूर्व-प्रशिक्षित वेट और उनके समर्थनित कार्यों को पेश किया गया है, साथ ही उनकी विभिन्न ऑपरेटिंग मोड के साथ संगतता को दर्शाने के लिए समर्थित मोजी में ✅ इमोज़ी और असमर्थित मोजी में ❌ इमोज़ी लगाए गए हैं। + +| मॉडल प्रकार | पूर्व-प्रशिक्षित वेट्स | समर्थित कार्य | भावना | मान्यीकरण | प्रशिक्षण | निर्यात | +|-----------------|------------------------|--------------------------------------------|-------|-----------|-----------|---------| +| फास्टएसएएम-एस | `FastSAM-s.pt` | [इंस्टेंस सेगमेंटेशन](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| फास्टएसएएम-एक्स | `FastSAM-x.pt` | [इंस्टेंस सेगमेंटेशन](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## उपयोग के उदाहरण + +फास्टएसएएम मॉडल को अपने पायथन ऐप्लिकेशन में आसानी से एकीकृत करना आसान है। उल्ट्राल्याटिक्स उपयोगकर्ता-मित्रपूर्ण पायथन API और CLI कमांड्स प्रदान करता है ताकि विकास को सरल बनाया जा सके। + +### पूर्वानुमान उपयोग + +एक छवि पर ऑब्जेक्ट डिटेक्शन करने के लिए, नीचे दिए गए उदाहरण का उपयोग करें: + +!!! Example "उदाहरण" + + === "पायथन" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # एक inference source निर्धारित करें + source = 'path/to/bus.jpg' + + # एक फास्टएसएएम मॉडल बनाएं + model = FastSAM('FastSAM-s.pt') # या FastSAM-x.pt + + # छवि पर inference चलाएं + everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # प्रोम्प्ट प्रक्रिया वस्तु को तैयार करें + prompt_process = FastSAMPrompt(source, everything_results, device='cpu') + + # सब कुछ प्रोम्प्ट + ann = prompt_process.everything_prompt() + + # बॉक्स डिफ़ॉल्टवत आकार [0,0,0,0] -> [x1,y1,x2,y2] + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # पाठ प्रोम्प्ट + ann = prompt_process.text_prompt(text='a photo of a dog') + + # पॉइंट प्रोम्प्ट + # डिफ़ॉल्ट point [[0,0]] [[x1,y1],[x2,y2]] + # डिफ़ॉल्ट point_label [0] [1,0] 0:background, 1:foreground + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` + + === "CLI" + ```bash + # शीघ्रदर्शन मॉडल को लोड करें और उसे कुछ वस्तुओं के साथ सेगमेंट करें + yolo segment predict model=FastSAM-s.pt source=path/to/bus.jpg imgsz=640 + ``` + +यह स्निपेट प्रशिक्षित मॉडल को लोड करने और एक इमेज पर प्रभासिती का निर्धारण करने की सरलता का दिखावा करता है। + +### वैल उपयोग + +एक डेटासेट पर मॉडल की मान्यीकरण करने के लिए निम्नलिखित तरीके का उपयोग किया जा सकता है: + +!!! Example "उदाहरण" + + === "पायथन" + ```python + from ultralytics import FastSAM + + # एक फास्टएसएएम मॉडल बनाएं + model = FastSAM('FastSAM-s.pt') # या FastSAM-x.pt + + # मॉडल को मान्यित करें + results = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # शीघ्रदर्शन मॉडल को लोड करें और इसे साइज़ 640 पर COCO8 उदाहरण डेटासेट पर मान्यित करें + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +कृपया ध्यान दें कि फास्टएसएएम केवल एकल वस्तु कार और सेगमेंटेशन का समर्थन करता है। इसका मतलब है कि यह सभी ऑब्जेक्टों को एक ही वर्ग के रूप में मान्यता देगा और सभी ऑब्जेक्ट्स को एक ही वर्ग के रूप में सेगमेंट करेगा। इसलिए, डेटासेट को तैयार करते समय, आपको सभी ऑब्जेक्ट श्रेणी आईडी को 0 में रूपांतरित करने की आवश्यकता होगी। + +## फास्टएसएएम आधिकारिक उपयोग + +फास्टएसएएम को [https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) रिपॉजिटरी से सीधे भी उपयोग किया जा सकता है। यहां आपको फास्टएसएएम का उपयोग करने के लिए आमतौर पर लिए जाने वाले कदमों का संक्षेपिक अवलोकन है: + +### स्थापना + +1. फास्टएसएएम रिपॉजिटरी क्लोन करें: + ```शेल + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. पायथन 3.9 के साथ एक रुपे में संचालित करने के लिए एक Conda वातावरण बनाएं और सक्रिय करें: + ```शेल + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. क्लोन किए गए रिपॉजिटरी में जाएं और आवश्यक पैकेजों को स्थापित करें: + ```शेल + cd FastSAM + pip install -r requirements.txt + ``` + +4. CLIP मॉडल स्थापित करें: + ```शेल + pip install git+https://github.com/openai/CLIP.git + ``` + +### उदाहरण उपयोग + +1. [मॉडल चेकपॉइंट](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing)डाउनलोड करें। + +2. FastSAM का उपयोग करके इंफरेंस करें। उदाहरण कमांड: + + - छवि में सब कुछ सेगमेंट करें: + ```शेल + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - पाठ प्रोम्प्ट का उपयोग करके विशेष ऑब्जेक्ट सेगमेंट करें: + ```शेल + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "the yellow dog" + ``` + + - एक बाउंडिंग बॉक्स के भीतर बाउंडर ऑब्जेक्ट को सेगमेंट करें (xywh स्वरूप में बॉक्स की कोणयों की निर्धारण करें): + ```शेल + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - विशेष बिंदुओं के पास ऑब्जेक्ट को सेगमेंट करें: + ```शेल + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +इसके अलावा, आप फास्टएसएएम का उपयोग करने के लिए एक [कोलैब डेमो](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing) या एक [हगिंगफेस वेब डेमो](https://huggingface.co/spaces/An-619/FastSAM) पर भी प्रयास कर सकते हैं। + +## उद्धृति और प्रशंसापत्र + +हम वास्तविक समय आवंटन संबंधी क्षेत्र में महत्वपूर्ण योगदान देने के लिए फास्टएसएएम लेखकों को प्रशंसा करते हैं: + +!!! Quote "" + + === "बिबटेक्स्ट" + + ```बिबटेक्स्ट + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +[FastSAM](https://arxiv.org/abs/2306.12156) पेपर आरएक्सिव में मौजूद है। लेखकों ने अपना काम सार्वजनिक रूप से उपलब्ध कराया है, और कोडबेस [गिटहब](https://github.com/CASIA-IVA-Lab/FastSAM) पर उपलब्ध है। हम इन दोनों के प्रयासों की कीमत करते हैं और उनके प्रयास के लिए धन्यवाद देते हैं जो क्षेत्र को आगे बढ़ाने और अपने काम को व्यापक समुदाय के लिए उपलब्ध कराने में समर्थ हैं। diff --git a/docs/hi/models/index.md b/docs/hi/models/index.md new file mode 100644 index 0000000..3ac9539 --- /dev/null +++ b/docs/hi/models/index.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Ultralytics द्वारा समर्थित YOLO परिवार की विविध रेंज, SAM, MobileSAM, FastSAM, YOLO-NAS, और RT-DETR मॉडल्स का पता लगाएं। CLI और Python उपयोग के लिए उदाहरणों के साथ प्रारंभ करें। +keywords: Ultralytics, दस्तावेज़ीकरण, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, मॉडल्स, आर्किटेक्चर्स, Python, CLI +--- + +# Ultralytics द्वारा समर्थित मॉडल + +Ultralytics के मॉडल दस्तावेज़ीकरण में आपका स्वागत है! हम [ऑब्जेक्ट डिटेक्शन](../tasks/detect.md), [इंस्टेंस सेगमेंटेशन](../tasks/segment.md), [इमेज क्लासिफिकेशन](../tasks/classify.md), [पोज़ एस्टिमेशन](../tasks/pose.md), और [मल्टी-ऑब्जेक्ट ट्रैकिंग](../modes/track.md) जैसे विशिष्ट कामों के लिए डिज़ाइन किए गए मॉडलों की एक विस्तृत रेंज का समर्थन प्रदान करते हैं। यदि आप Ultralytics में अपने मॉडल आर्किटेक्चर को योगदान देने में रुचि रखते हैं, तो हमारा [Contributing Guide](../../help/contributing.md) देखें। + +!!! Note "ध्यान दें" + + 🚧 हमारी अलग-अलग भाषाओं में दस्तावेज़ीकरण वर्तमान में निर्माणाधीन है, और हम इसे सुधारने के लिए कठिन परिश्रम कर रहे हैं। धैर्य रखने के लिए धन्यवाद! 🙏 + +## प्रमुख मॉडल + +यहां कुछ मुख्य मॉडल दिए गए हैं: + +1. **[YOLOv3](yolov3.md)**: YOLO मॉडल परिवार का तीसरा संस्करण, जिसे जोसेफ रेडमोन द्वारा बनाया गया है, जो इसकी कुशल रियल-टाइम ऑब्जेक्ट डिटेक्शन क्षमताओं के लिए जाना जाता है। +2. **[YOLOv4](yolov4.md)**: YOLOv3 को अपडेट करने वाला एक डार्कनेट-नेटिव, जिसे 2020 में एलेक्सी बोचकोवस्की द्वारा जारी किया गया। +3. **[YOLOv5](yolov5.md)**: उल्ट्रालाइटिक्स द्वारा बेहतर YOLO आर्किटेक्चर का एक सुधारित संस्करण, जो पिछले संस्करणों की तुलना में बेहतर प्रदर्शन और गति की समझौता की पेशकश करता है। +4. **[YOLOv6](yolov6.md)**: 2022 में [Meituan](https://about.meituan.com/) द्वारा जारी किया गया, और कंपनी के कई स्वायत्त डिलीवरी रोबोट्स में उपयोग में। +5. **[YOLOv7](yolov7.md)**: 2022 में YOLOv4 के लेखकों द्वारा जारी किया गया अपडेटेड YOLO मॉडल। +6. **[YOLOv8](yolov8.md) नया 🚀**: YOLO परिवार का नवीनतम संस्करण, जिसमें इंस्टेंस सेगमेंटेशन, पोज/कीपॉइंट्स अनुमान, और क्लासिफिकेशन जैसी उन्नत क्षमताएं शामिल हैं। +7. **[Segment Anything Model (SAM)](sam.md)**: मेटा के Segment Anything Model (SAM)। +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**: मोबाइल एप्लिकेशनों के लिए MobileSAM, क्युंग ही यूनिवर्सिटी द्वारा। +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)**: चीनी विज्ञान अकादमी, ऑटोमेशन संस्थान के इमेज & वीडियो एनालिसिस ग्रुप द्वारा FastSAM। +10. **[YOLO-NAS](yolo-nas.md)**: YOLO न्यूरल आर्किटेक्चर सर्च (NAS) मॉडल्स। +11. **[Realtime Detection Transformers (RT-DETR)](rtdetr.md)**: बैदु के पडलपैडल Realtime Detection Transformer (RT-DETR) मॉडल। + +

+
+ +
+ देखें: कुछ लाइनों के कोड में Ultralytics YOLO मॉडल्स को चलाएं। +

+ +## प्रारंभ करना: उपयोग उदाहरण + +यह उदाहरण योलो प्रशिक्षण और अनुमान के सरल उदाहरण प्रदान करता है। इन और अन्य [modes](../modes/index.md) के पूर्ण दस्तावेज़ीकरण के लिए [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) और [Export](../modes/export.md) दस्तावेज़ों के पन्नों को देखें। + +नीचे दिया गया उदाहरण YOLOv8 [Detect](../tasks/detect.md) मॉडल्स के लिए है, जो ऑब्जेक्ट डिटेक्शन के लिए हैं। अतिरिक्त समर्थित कार्यों के लिए [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) और [Pose](../tasks/pose.md) दस्तावेज़ों को देखें। + +!!! Example "उदाहरण" + + === "Python" + + पायथन में मॉडल बनाने के लिए PyTorch प्रीट्रेन्ड '*.pt' मॉडल्स के साथ-साथ कॉन्फ़िगरेशन '*.yaml' फ़ाइलों को `YOLO()`, `SAM()`, `NAS()` और `RTDETR()` क्लासेज़ में पास किया जा सकता है: + + ```python + from ultralytics import YOLO + + # COCO-प्रीट्रेन्ड YOLOv8n मॉडल लोड करें + model = YOLO('yolov8n.pt') + + # मॉडल की जानकारी दिखाएँ (वैकल्पिक) + model.info() + + # COCO8 उदाहरण डेटासेट पर 100 एपोक्स के लिए मॉडल प्रशिक्षित करें + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 'bus.jpg' इमेज पर YOLOv8n मॉडल के साथ अनुमान चलाएँ + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI कमांड्स उपलब्ध हैं जो सीधे मॉडल्स को चलाने के लिए हैं: + + ```bash + # COCO-प्रीट्रेन्ड YOLOv8n मॉडल को लोड करें और COCO8 उदाहरण डेटासेट पर 100 एपोक्स के लिए प्रशिक्षित करें + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCO-प्रीट्रेन्ड YOLOv8n मॉडल को लोड करें और 'bus.jpg' इमेज पर अनुमान चलाएँ + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## नए मॉडल्स का योगदान + +आप Ultralytics में अपने मॉडल का योगदान देने के इच्छुक हैं? बहुत बढ़िया! हम हमेशा अपने मॉडल पोर्टफोलियो का विस्तार करने के लिए खुले हैं। + +1. **रिपॉजिटरी फोर्क करें**: [Ultralytics GitHub रिपॉजिटरी](https://github.com/ultralytics/ultralytics) को फोर्क करके शुरू करें। + +2. **अपने फोर्क को क्लोन करें**: अपने फोर्क को अपनी लोकल मशीन पर क्लोन करें और काम करने के लिए एक नई ब्रांच बनाएं। + +3. **अपना मॉडल लागू करें**: हमारे [Contributing Guide](../../help/contributing.md) में दिए गए कोडिंग स्टैंडर्ड्स और दिशानिर्देशों का अनुसरण करते हुए अपने मॉडल को जोड़ें। + +4. **गहराई से परीक्षण करें**: अपने मॉडल का परीक्षण अलग से और पाइपलाइन के हिस्से के रूप में किया जा सकता है। + +5. **पुल रिक्वेस्ट बनाएं**: एक बार जब आप अपने मॉडल से संतुष्ट हो जाएं, तो समीक्षा के लिए मुख्य रिपॉजिटरी को एक पुल रिक्वेस्ट बनाएं। + +6. **कोड समीक्षा और मिलान**: समीक्षा के बाद, यदि आपका मॉडल हमारे मानदंडों को पूरा करता है, तो इसे मुख्य रिपॉजिटरी में मिला दिया जाएगा। + +विस्तृत चरणों के लिए हमारा [Contributing Guide](../../help/contributing.md) देखें। diff --git a/docs/hi/models/mobile-sam.md b/docs/hi/models/mobile-sam.md new file mode 100644 index 0000000..ea9da4a --- /dev/null +++ b/docs/hi/models/mobile-sam.md @@ -0,0 +1,115 @@ +--- +comments: true +description: Ultralytics मार्गदर्शिका में MobileSAM के बारे में और उसके प्रायोगशाला तुलनात्मक विवेचन, मूल SAM के साथ तुलना और इसे Ultralytics ढांचे में डाउनलोड और परीक्षण कैसे करें। अपने मोबाइल ऐप्लिकेशन को बेहतर बनाएं। +keywords: MobileSAM, Ultralytics, SAM, मोबाइल ऐप्लिकेशन, Arxiv, GPU, API, छवि एनकोडर, मास्क डिकोडर, मॉडल डाउनलोड, परीक्षण पद्धति +--- + +![MobileSAM लोगो](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# मोबाइल सेगमेंट कुछ भी (MobileSAM) + +मोबाइलSAM पेपर [arXiv](https://arxiv.org/pdf/2306.14289.pdf) पर अब उपलब्ध है। + +MobileSAM के संचालन का एक प्रदर्शन कम्प्यूटर पर पहुंचा जा सकता है उस [डेमो लिंक](https://huggingface.co/spaces/dhkim2810/MobileSAM) के माध्यम से। Mac i5 CPU पर प्रदर्शन करने में लगभग 3 सेकंड का समय लगता है। हगिंग फेस डेमो परिवेश और कम प्रदर्शन वाले सीपियू ने प्रतिक्रिया को धीमी किया है, लेकिन यह अभी भी प्रभावी ढंग से काम करता है। + +मोबाइलSAM [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything), [AnyLabeling](https://github.com/vietanhdev/anylabeling), और [Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D) सहित विभिन्न परियोजनाओं में लागू है। + +मोबाइलSAM एक एकल GPU पर 100k डेटासेट (मूल छवि का 1%) के साथ प्रशिक्षित होता है और इसमें एक दिन से कम समय लगता है। इस प्रशिक्षण के लिए कोड भविष्य में उपलब्ध कराया जाएगा। + +## उपलब्ध मॉडल, समर्थित कार्य और ऑपरेटिंग मोड + +इस तालिका में उपलब्ध मॉडल, उनके विशिष्ट पूर्व-प्रशिक्षित वजन, वे कार्य जिन्हें वे समर्थन करते हैं, और उनका अभिन्नतम संगतता के साथ विभिन्न ऑपरेटिंग मोड (इंफरेंस, वैधानिकी, प्रशिक्षण, और निर्यात) प्रदर्शित किए गए हैं, जिन्हें समर्थित मोड के लिए ✅ emoji और असमर्थित मोड के लिए ❌ emoji से दर्शाया गया है। + +| मॉडल प्रकार | पूर्व-प्रशिक्षित वजन | समर्थित कार्य | इंफरेंस | वैधानिकी | प्रशिक्षण | निर्यात | +|-------------|----------------------|--------------------------------------------|---------|----------|-----------|---------| +| MobileSAM | `mobile_sam.pt` | [इंस्टेंस सेगमेंटेशन](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## SAM से MobileSAM में अनुकूलन + +MobileSAM मूल SAM की तरफ से समान पाइपलाइन बरकरार रखता है, हमने मूल की प्री-प्रोसेसिंग, पोस्ट-प्रोसेसिंग और सभी अन्य इंटरफेसों को सम्मिलित कर दिया है। इसलिए, वर्तमान में मूल SAM का उपयोग करने वाले लोग मिनिमल प्रयास के साथ MobileSAM में ट्रांसिशन कर सकते हैं। + +MobileSAM मूल SAM के समान पाइपलाइन में उत्तम प्रदर्शन करता है और केवल छवि एन्कोडर में परिवर्तन होता है। विशेष रूप से, हम मूल भारीवज्ञानिक ViT-H एन्कोडर (632M) को एक छोटे Tiny-ViT (5M) से बदलते हैं। एकल GPU पर MobileSAM लगभग 12ms प्रति छवि पर ऑपरेट करता है: 8ms छवि एन्कोडर पर और 4ms मास्क डिकोडर पर। + +विट-आधारित इमेज एन्कोडरों की तुलना नीचे दी गई तालिका प्रदान करती है: + +| छवि एन्कोडर | मूल SAM | MobileSAM | +|-------------|---------|-----------| +| पैरामीटर्स | 611M | 5M | +| स्पीड | 452ms | 8ms | + +मूल SAM और MobileSAM दोनों में समान प्रॉम्प्ट गाइडेड मास्क डिकोडर का उपयोग किया गया है: + +| मास्क डिकोडर | मूल SAM | MobileSAM | +|--------------|---------|-----------| +| पैरामीटर्स | 3.876M | 3.876M | +| स्पीड | 4ms | 4ms | + +यहां पाइपलाइन की तुलना है: + +| पूरा पाइपलाइन (एन्कोडर+डिकोडर) | मूल SAM | MobileSAM | +|--------------------------------|---------|-----------| +| पैरामीटर्स | 615M | 9.66M | +| स्पीड | 456ms | 12ms | + +MobileSAM और मूल SAM के प्रदर्शन को एक बिन्दु और बॉक्स के रूप में प्रदर्शित किया जाता है। + +![बिन्दु के रूप में छवि](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![बॉक्स के रूप में छवि](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +बेहतर प्रदर्शन से MobileSAM मौजूदा FastSAM की तुलना में लगभग 5 गुना छोटा और 7 गुना तेज है। अधिक विवरण [MobileSAM प्रोजेक्ट पेज](https://github.com/ChaoningZhang/MobileSAM) पर उपलब्ध हैं। + +## Ultralytics में MobileSAM का परीक्षण + +मूल SAM की तरह ही, हम Ultralytics में एक सीधा परीक्षण विधि प्रदान करते हैं, जिसमें बिंदु और बॉक्स प्रॉम्प्ट्स दोनों के लिए मोड शामिल हैं। + +### मॉडल डाउनलोड + +आप यहां से मॉडल डाउनलोड कर सकते हैं [here](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt)। + +### बिंदु प्रॉम्प्ट + +!!! Example "उदाहरण" + + === "Python" + ```python + from ultralytics import SAM + + # मॉडल लोड करें + model = SAM('mobile_sam.pt') + + # बिंदु प्रॉम्प्ट पर आधारित एक सेगमेंट पूर्वानुमान करें + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### बॉक्स प्रॉम्प्ट + +!!! Example "उदाहरण" + + === "Python" + ```python + from ultralytics import SAM + + # मॉडल लोड करें + model = SAM('mobile_sam.pt') + + # बॉक्स प्रॉम्प्ट पर आधारित एक सेगमेंट पूर्वानुमान करें + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +हमने `MobileSAM` और `SAM` दोनों को एक ही API का उपयोग करके इम्प्लिमेंट किया है। अधिक उपयोग जानकारी के लिए, कृपया [SAM पेज](sam.md) देखें। + +## संदर्भ और आभार + +अगर आप अपने अनुसंधान या विकास कार्य में MobileSAM का उपयोगयोगी पाते हैं, तो कृपया हमारे पेपर को साइट करने का विचार करें: + +!!! Quote "" +=== "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } diff --git a/docs/hi/models/rtdetr.md b/docs/hi/models/rtdetr.md new file mode 100644 index 0000000..383d780 --- /dev/null +++ b/docs/hi/models/rtdetr.md @@ -0,0 +1,94 @@ +--- +comments: true +description: + Baidu के RT-DETR का अवलोकन करें: विज़न ट्रांसफॉर्मर के द्वारा संचालित, उन्नत और अनुकूलनयोग्य वास्तविक समय ऑब्जेक्ट डिटेक्टर, जिसमें तैयार मॉडल शामिल हैं। +keywords: RT-DETR, Baidu, विज़न ट्रांसफॉर्मर्स, ऑब्जेक्ट डिटेक्शन, वास्तविक समय प्रदर्शन, CUDA, TensorRT, IoU-जागरूक क्वेरी चयन, Ultralytics, पायथन एपीआई, PaddlePaddle +--- + +# Baidu के RT-DETR: एक विज़न ट्रांसफॉर्मर के आधार पर वास्तविक समय ऑब्जेक्ट डिटेक्टर + +## अवलोकन + +Baidu द्वारा विकसित रियल-टाइम डिटेक्शन ट्रांसफॉर्मर (RT-DETR) एक उन्नत, end-to-end ऑब्जेक्ट डिटेक्टर है जो उच्च नि: शुल्कता बनाए रखते हुए वास्तविक समय प्रदर्शन प्रदान करता है। यह Vision ट्रांसफॉर्मर्स (ViT) की शक्ति का उपयोग करके बहुस्तरीय सुविधाओं की कुशलतापूर्वक प्रोसेसिंग करता है और इंट्रा-स्केल इंटरेक्शन और क्रॉस-स्केल फ्यूजन को अलग करके विभिन्न मापों की सुविधा प्रदान करता है। RT-DETR अत्यधिक अनुकूलनयोग्य है, जिसमें ब्यावसायिक लिंग के विभिन्न डिकोडर लेयर का उपयोग करके उपन्यासात्मक गति को समायोजित करने की समर्थन की गई है बिना पुनर्प्रशिक्षण के। इस मॉडल का परिणामस्वरूप, जो कि CUDA और TensorRT के समर्थनयुक्त बैकेंड पर अच्छा क्रियाशील ऑब्जैक्ट डिटेक्टरों से अधिक प्रदर्शन करता है। + +![आदर्श छवि](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**Baidu के RT-DETR का अवलोकन।** RT-DETR मॉडल आर्किटेक्चर आवचेदक जैसे आखिरी तीन स्टेज {S3, S4, S5} को इंपुट टू एन्कोडर के रूप में दर्शाता है। कटिहरण योग्य हाइब्रिड एन्कोडर अंत: धारण विशिष्टताओं को Ekत्रित और इंत्राक्ष स्थलीय लक्षण (AIFI) और क्रॉस-स्केल लक्षण-संघटन मॉड्यूल (CCFM) द्वारा चित्र विशेषण में परिवर्तित करता है। IoU-जागरूक क्वेरी चयन का उपयोग आदर्श छवि चयन के लिए निर्धारित संख्या के चित्र विशेषताओं को कवल वस्त्रण स्वरूप में चयनित करने के लिए किया जाता है। अंत में, डिकोडर सहायक पूर्वानुमान मुद्दा किसी विशेष छवि को उपयुक्त ऑब्जेक्ट क्वेरी के रूप में आरम्भ करने के लिए संशोधित किया जाता है जिसके अंत में बॉक्स और विश्वास स्कोर पैदा किया जाता है ([स्रोत](https://arxiv.org/pdf/2304.08069.pdf))। + +### मुख्य विशेषताएँ + +- **कटिहरण योग्य हाइब्रिड एन्कोडर:** Baidu के RT-DETR ने इंत्रा-स्केल इंटरेक्शन और क्रॉस-स्केल फ्यूजन को अलग करके बहुस्तरीय विशेषताओं को प्रोसेस करने के लिए एक कटिहरण योग्य हाइब्रिड एन्कोडर का उपयोग किया है। यह अद्वितीय विज़न ट्रांसफॉर्मर से आधिक हिमांशीय लागत को कम करता है और वास्तविक समय ऑब्जेक्ट डिटेक्शन की अनुमति देता है। +- **IoU-जागरूक क्वेरी चयन:** Baidu के RT-DETR ने IoU-जागरूक क्वेरी चयन का उपयोग करके आदर्श छवि चयन को सुधारा है। इससे मॉडल द्वारा सीन में सबसे प्रासंगिक ऑब्जेक्ट पर केन्द्रित किया जा सकता है, जिससे डिटेक्शन की निशाना स्पष्टता में सुधार होती है। +- **अनुकूलनयोग्य अनुमान प्रदान:** Baidu के RT-DETR ने पुनर्प्रशिक्षण के आवश्यकता के बिना भिन्न डिकोडर लेयर का उपयोग करके अनुमान की गति की मान्यता में सुविधाएं समर्थित की हैं। यह अनुकूलनयोग्यता वास्तविक समय ऑब्जेक्ट डिटेक्शन स्थितियों में व्यावहारिक लागू करने की सहायता करती है। + +## प्री-तारकित मॉडल + +Ultralytics Python API विभिन्न स्केलों के साथ प्री-तारकित PaddlePaddle RT-DETR मॉडल प्रदान करता है: + +- RT-DETR-L: COCO val2017 पर 53.0% AP, T4 GPU पर 114 FPS +- RT-DETR-X: COCO val2017 पर 54.8% AP, T4 GPU पर 74 FPS + +## उपयोग उदाहरण + +यह उदाहरण सरल RT-DETRR प्रशिक्षण और संदर्भ उदाहरण प्रदान करता है। पूरे दस्तावेज़ीकरण के लिए इन और अन्य [modes](../modes/index.md) पर देखें: [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) और [Export](../modes/export.md) दस्तावेज़ पृष्ठों। + +!!! Example "उदाहरण" + + === "पायथन" + + ```python + from ultralytics import RTDETR + + # COCO-प्री-तारकित RT-DETR-l मॉडल लोड करें + model = RTDETR('rtdetr-l.pt') + + # मॉडल जानकारी प्रदर्शित करें (वैकल्पिक) + model.info() + + # COCO8 उदाहरण डेटासेट पर मॉडल को 100 एपिसोड के लिए प्रशिक्षित करें + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # RT-DETR-l मॉडल के साथ 'bus.jpg' छवि पर संदर्भ चलाएं + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # COCO-प्री-तारकित RT-DETR-l मॉडल को लोड करें और उसे COCO8 उदाहरण डेटासेट पर 100 एपिसोड के लिए प्रशिक्षित करें + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCO-प्री-तारकित RT-DETR-l मॉडल को लोड करें और 'bus.jpg' छवि पर संदर्भ चलाएं + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## समर्थित कार्य और मोड + +इस तालिका में मॉडल प्रकार, विशेष प्री-तारकित भार, हर मॉडल द्वारा समर्थित कार्य, और [Train](../modes/train.md) , [Val](../modes/val.md), [Predict](../modes/predict.md), [Export](../modes/export.md) इत्यादि जैसे विभिन्न मोड्स विभाजित की गई हैं, ✅ इमोजियों द्वारा संकेतित हैं + +| मॉडल प्रकार | प्री-तारकित भार | समर्थित कार्य | संदर्भ | मान्यता | प्रशिक्षण | निर्यात | +|---------------------|-----------------|-----------------------------------------|--------|---------|-----------|---------| +| RT-DETR Large | `rtdetr-l.pt` | [ऑब्जेक्ट डिटेक्शन](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR Extra-Large | `rtdetr-x.pt` | [ऑब्जेक्ट डिटेक्शन](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## प्रशंसापत्र और आभार + +यदि आप अपने शोध या विकास कार्य में Baidu के RT-DETR का उपयोग करते हैं, कृपया [मूल पेपर](https://arxiv.org/abs/2304.08069) को उद्धृत करें: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +हम Baidu और [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection) टीम को आभार प्रकट करना चाहेंगे जिन्होंने बिल्कुल लोक-दृष्टि समुदाय के लिए इस योग्य संसाधन को बनाने और संभालने का कार्य किया है। विज़न ट्रांसफॉर्मर्स-आधारित वास्तविक समय ऑब्जेक्ट डिटेक्टर RT-DETR के विकास के साथ उनके योगदान को बड़ा रूप से स्वीकार किया जाता है। + +*Keywords: RT-DETR, ट्रांसफॉर्मर, ViT, विज़न ट्रांसफॉर्मर्स, Baidu RT-DETR, PaddlePaddle, PaddlePaddle RT-DETR, वास्तविक समय ऑब्जेक्ट डिटेक्शन, विज़न ट्रांसफॉर्मर्स आधारित ऑब्जेक्ट डिटेक्शन, प्री-तारकित PaddlePaddle RT-DETR मॉडल, Baidu के RT-DETR का उपयोग, Ultralytics Python API* diff --git a/docs/hi/models/sam.md b/docs/hi/models/sam.md new file mode 100644 index 0000000..8057178 --- /dev/null +++ b/docs/hi/models/sam.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Ultralytics के Segment Anything Model (SAM) की अभिनव सेगमेंटेशन मॉडल का पता लगाएं जो रीयल-टाइम छवि सेगमेंटेशन की अनुमति देता है। समझें कि इसमें promptable सेगमेंटेशन, zero-shot प्रदर्शन शामिल है और यह कैसे उपयोग किया जाता है। +keywords: Ultralytics, Image segmentation, Segment Anything Model, SAM, SA-1B डेटासेट, रीयल-टाइम प्रदर्शन, zero-shot ट्रांसफर, वस्तु पहचानन, छवि विश्लेषण, मशीन लर्निंग +--- + +# Segment Anything Model (SAM) + +Segment Anything Model (SAM), यानी वस्तु को के सिलसीलेबंद छवि सेगमेंटेशन के पहले खंड "SAM"में आपका स्वागत है। यह क्रांतिकारी मॉडल समयबद्ध प्रदर्शन के साथ promptable छवि सेगमेंटेशन द्वारा खेल को बदल चुका है और क्षेत्र में नई मानकों को स्थापित किए हैं। + +## SAM: Segment Anything Model का परिचय + +Segment Anything Model (SAM), यानी SAM, एक अभिनव छवि सेगमेंटेशन मॉडल है जो promptable सेगमेंटेशन की अनुमति देता है, जिससे छवि विश्लेषण कार्यों में अविश्वसनीय विविधता प्राप्त होती है। SAM, Segment Anything पहल के दिल में आता है, जो छवि सेगमेंटेशन के लिए नई मॉडल, कार्य और डेटासेट का परिचय कराता है। + +SAM का उन्नत डिजाइन इसे नई छवि वितरणों और कार्यों के लिए पूर्वज्ञान के बिना सामायिक होने देता है, जिसे जीरो-शॉट ट्रांसफर के नाम से जाना जाता है। विस्तारशील [SA-1B डेटासेट](https://ai.facebook.com/datasets/segment-anything/) पर (जिसमें 1 अरब से अधिक मास्क हैं जो 11 मिलियन सावधानीपूर्वक इच्छीत छवियों पर बिखेरे गए हैं), SAM ने वास्तव में दिखाया है कि यह बेहद श्रेष्ठ जीरो-शॉट प्रदर्शन का anomaly-detection करने वाले पहले परिणामों को हर हाल में छू सकता है। + +![सैंपल डेटासेट छवि](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +हमारे नई पेशकश डेटासेट, SA-1B से दिए गए ओवरले मास्क वाली उदाहरण छवियाँ। SA-1B में 11M विविध, उच्च-रिजोल्यूशन, लाइसेंस और प्राइवेसी संरक्षण योग्य छवियाँ और 1.1B उच्च-गुणवत्ता वाले सेगमेंटेशन मास्क मौजूद हैं। इन मास्क को SAM द्वारा पूर्णतः स्वचालित रूप से टिप्पणी की गई हैं, और मानवीय रेटिंग और अनेक अभ्यासों से सत्यापित हुए अनुसार, इनकी गुणवत्ता और विविधता प्रमुख हैं। छवियों की संख्या के आधार पर छवियाँ समूहित की गई हैं (औसतन प्रति छवि पर लगभग 100 मास्क होती हैं)। + +## Segment Anything Model (SAM) की मुख्य विशेषताएं + +- **Promptable Segmentation Task:** SAM का निर्माण promptable सेगमेंटेशन कार्य के साथ किया गया है, जिसके बाद वह कोई भी प्रोम्प्ट देकर मान्य सेगमेंटेशन मास्क उत्पन्न कर सकता है, जैसे कि वस्तु का निर्देश करने वाले स्थानिक या पाठ संकेत। +- **उन्नत वास्तविकता:** Segment Anything Model में एक शक्तिशाली छवि इनकोडर, प्रोम्प्ट इनकोडर, और हल्की वजन के मास्क डीकोडर की बाईं लगाम लगातार बदलाव और अस्पष्टता की जागरूकता पर आधारित होती है। यह विशेष आर्किटेक्चर प्रारंभिक जोखिम ज्ञान के बिना नए क्षेत्रों और छवि वितरणों के लिए सुविधाजनक होने की अनुमति देता है। +- **SA-1B डेटासेट:** SAM पहल के द्वारा प्रस्तुत किए गए SA-1B डेटासेट में 11 मिलियन छवियों पर 1 अरब से अधिक मास्क होती हैं। इसे अब तक का सबसे बड़ा सेगमेंटेशन डेटासेट माना जाता है, जिससे SAM को विविधतापूर्ण और बड़े पैमाने पर भड़ास ट्रेनिंग डेटा स्रोत प्राप्त होता है। +- **जीरो-शॉट प्रदर्शन:** SAM विभिन्न सेगमेंटेशन कार्यों में उत्कृष्ट जीरो-शॉट प्रदर्शन प्रदर्शित करता है, जिससे यह विविध अनुप्रयोगों के लिए तत्परता के आवश्यकता के साथ तत्परता के यन्त्रियों के लिए तत्परता के यंत्रों के लिए तत्पर करने योग्य एक तत्परता यंत्र बन गया है। + +Segment Anything Model और SA-1B डेटासेट की गहन जानकारी के लिए, कृपया [Segment Anything वेबसाइट](https://segment-anything.com) पर जाएं और शोध पेपर [Segment Anything](https://arxiv.org/abs/2304.02643) की जाँच करें। + +## उपलब्ध मॉडल, समर्थित कार्य और संचालनिक विधियाँ + +यह तालिका उपलब्ध मॉडल, उनकी विशेष पूर्व-प्रशिक्षित वेट, उनके समर्थित कार्य और इंफरेंस, मान्यीकरण, प्रशिक्षण और निर्यात जैसे विभिन्न संचालित विधियों के साथ उनकी संबद्धता का प्रस्ताव प्रस्तुत करती है. + +| मॉडल प्रकार | पूर्व-प्रशिक्षित वेट | समर्थित कार्य | Inference | Validation | Training | Export | +|-------------|----------------------|---------------------|-----------|------------|----------|--------| +| SAM बेस | `sam_b.pt` | इंस्टेंस सेगमेंटेशन | ✅ | ❌ | ❌ | ✅ | +| SAM लार्ज | `sam_l.pt` | इंस्टेंस सेगमेंटेशन | ✅ | ❌ | ❌ | ✅ | + +## SAM का उपयोग कैसे करें: छवि सेगमेंटेशन में यथार्थता और शक्ति + +Segment Anything Model का उपयोग उपस्थित डेटा से आगे के कार्यों के लिए किया जा सकता है। इसमें एज डिटेक्शन, निवेदन प्रस्ताव उत्पादन, इंस्टेंस सेगमेंटेशन, और प्राथमिक पाठ-तो-मास्क पूर्वानुमान शामिल हैं। प्रोम्प्ट इंजीनियरिंग के साथ, SAM नए कार्यों और डेटा वितरणों के लिए जीरो-शॉट तरीके में शीघ्र रूप से अनुकूलित हो सकता है, जिससे यह आपकी सभी छवि सेगमेंटेशन आवश्यकताओं के लिए एक सुगम और प्रभावी उपकरण बन जाता है। + +### SAM पूर्वानुमान उदाहरण + +!!! Example "निर्दिष्ट प्रप्ति के साथ सेगमेंट तय करें" + + निर्दिष्ट प्रप्ति के साथ चित्रीय भविष्यवाणी करें। + + === "टख्ती" + + ```python + from ultralytics import SAM + + # एक मॉडल लोड करें + model = SAM('sam_b.pt') + + # मॉडल सूचना प्रदर्शित करें (वैकल्पिक) + model.info() + + # बॉक्स प्रम्प्ट के साथ इनफ़रन्स चलाएं + model('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # बिंदु प्रम्प्ट के साथ इनफ़रन्स चलाएं + model('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "सब कुछ सेगमेंट करें" + + पूरी छवि को सेगमेंट करें। + + === "टख्ती" + + ```python + from ultralytics import SAM + + # एक मॉडल लोड करें + model = SAM('sam_b.pt') + + # मॉडल सूचना प्रदर्शित करें (वैकल्पिक) + model.info() + + # इनफ़रेंस चलाएं + model('पथ/फ़ाइल/सदृश छवि.जेपीजी') + ``` + + === "CLI" + + ```बैश + # सब कुछ SEKैग के साथ SAM मॉडल के साथ इनफ़रन्स चलाएं + yolo predict model=sam_b.pt source=पथ/फ़ाइल/सदृश छवि.जेपीजी + ``` + +- यहां यह तर्क है कि आप प्रोम्प्ट (बॉक्स / पॉइंट / मास्क) पास नहीं करते हैं तो पूरी छवि को सेगमेंट करें। + +!!! Example "SAMPredictor उदाहरण" + + इस तरह से आप एक बार छवि सेट कर सकते हैं और बार-बार प्रोम्प्ट इन्फ़रेंस चला सकते हैं। छवि को बार बार इन्कोडर के मध्य में नहीं चलाने के लिए। + + === "प्रोम्प्ट भविष्यवाणी करना" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # SAMPredictor बनाएं + overrides = dict(conf=0.25, task='कटा', mode='पूर्वानुमान', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # चित्र सेट करें + predictor.set_image("ultralytics/assets/zidane.jpg") # चित्र फ़ाइल के साथ सेट करें + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # एनपी. एस. एन. द्वारा सेट करें + results = predictor(bboxes=[439, 437, 524, 709]) + results = predictor(points=[900, 370], labels=[1]) + + # चित्र रीसेट करें + predictor.reset_image() + ``` + + अतिरिक्त तत्वों के साथ सब कुछ को टुकड़ों में विभाजित करें। + + === "सब कुछ का सेगमेंट" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # SAMPredictor बनाएं + overrides = dict(conf=0.25, task='सेगमेंट', mode='पूर्वानुमान', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # अतिरिक्त तत्वों के साथ सेगमेंट + results = predictor(source="ultralytics/assets/zidane.jpg", crop_n_layers=1, points_stride=64) + ``` + +- `सब कुछ का सेगमेंट` के लिए अतिरिक्त तत्वों के लिए अधिक देखें [`Predictor/generate` Reference](../../../reference/models/sam/predict.md). + +## SAM की तुलना YOLOv8 के बनाम + +यहां हम SAM के सबसे छोटे मॉडल, SAM-b, की तुलना Ultralytics के सबसे छोटे सेगमेंट मॉडल, [YOLOv8n-seg](../tasks/segment.md), के साथ करते हैं: + +| मॉडल | आकार | पैरामीटर | गति (सीपीयू) | +|--------------------------------------------------------------------|-----------------------------|----------------------|-------------------------| +| SAM का सबसे छोटा, SAM-b | 358 MB | 94.7 M | 51096 ms/im | +| [मोबाइल SAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im | +| [अग्री सेगमेंटेशन वाली FastSAM-s, YOLOv8 बैकबोन सहित](fast-sam.md) | 23.7 MB | 11.8 M | 115 ms/im | +| Ultralytics [योलोवी8न-seg](yolov8.md) | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) | + +यह तुलना मॉडल के आकार और गति में दस्तावेजीय अंतर दिखाती है। जहां SAM स्वचालित सेगमेंटेशन के लिए अद्वितीय क्षमताओं को प्रस्तुत करता है, वहीं Ultralytics विद्यमान सेगमेंटेशन मानदंडों के तुलनात्मक आकार, गति और संचालन क्षमता में समर्थन प्रदान करती है। + +एक 2023 Apple M2 Macbook (16GB रैम के साथ) पर चलाई गई परीक्षा। इस परीक्षण को दोहराने के लिए: + +!!! Example "उदाहरण" + + === "Python" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # SAM-b प्रोफाइल करें + model = SAM('sam_b.pt') + model.info() + model('ultralytics/assets') + + # मोबाइलSAM प्रोफाइल करें + model = SAM('mobile_sam.pt') + model.info() + model('ultralytics/assets') + + # FastSAM-s प्रोफाइल करें + model = FastSAM('FastSAM-s.pt') + model.info() + model('ultralytics/assets') + + # YOLOv8n-seg प्रोफाइल करें + model = YOLO('yolov8n-seg.pt') + model.info() + model('ultralytics/assets') + ``` + +## स्वत: टिपण्णीकरण: सेगमेंटेशन डेटासेट के लिए एक त्वरित मार्ग + +स्वत: टिपण्णीकरण SAM की एक मुख्य सुविधा है जो उपयोगकर्ताओं को एक पूर्व-प्रशिक्षित डिटेक्शन मॉडल का उपयोग करके [सेगमेंटेशन डेटासेट](https://docs.ultralytics.com/datasets/segment) उत्पन्न करने की अनुमति देती है। यह सुविधा बड़ी संख्या में छवियों को क्वालिटी कंट्रोल से आनोत करने की जरूरत के दौरान समयरोकी मूल्य अंकन की जरूरत छोड़ती है। + +### अपने सेगमेंटेशन डेटासेट के उपयोग के लिए गतिशीलता निर्मित करें + +Ultralytics फ़्रेमवर्क के साथ स्‍वस्‍थ दिखाई देने वाले वर्गानुसार [सेगमेंटेशन डेटासेट](https://docs.ultralytics.com/datasets/segment) बनाएं। एपनी में दिए गए आदेश का उपयोग करके। + +!!! Example "उदाहरण" + + === "Python" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="पथ/सामग्री", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| तत्व | प्रकार | विवरण | डिफ़ॉल्ट | +|------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------|--------------| +| डेटा | str | अनुमानित मूल छवियाँ जो आंशिक प्रतिसादान के लिए अनुमानित हैं। | | +| det_model | str, वैकल्पिक | पूर्व-प्रशिक्षित YOLO डिटेक्शन मॉडल। 'yolov8x.pt' डिफ़ॉल्ट रूप से होता है। | 'yolov8x.pt' | +| sam_model | str, वैकल्पिक | पूर्व-प्रशिक्षित SAM सेगमेंटेशन मॉडल। डिफ़ॉल्ट रूप से 'sam_b.pt' है। | 'sam_b.pt' | +| device | str, वैकल्पिक | मॉडल को चलाने के लिए डिवाइस। डिफ़ॉल्ट रूप से एक खाली स्ट्रिंग होता है (सीपीयू या जीपीयू, यदि उपलब्ध है)। | | +| output_dir | str, कोई वैकल्पिक | टिपण्णीत परिणाम सहेजने के लिए निर्देशिका। इसे 'डेटा' के समान डिरेक्टरी में "निर्देशिकाएं" निर्दिष्ट करने के लिए डिफ़ॉल्ट मान नहीं होता है। | कोई | + +`ऑटो_तिपण्णी` फ़ंक्शन आपकी छवियों के लिए यातायात का कार्यक्रम देखती है और सेगमेंटेशन सेट को आपके छवि के पथ सेट करती है, वैकल्पिक तत्वों के लिए पूर्व-प्रशिक्षित डिटेक्शन और SAM सेगमेंटेशन मॉडल, मॉडल को चलाने के लिए डिवाइस और टिपण्णीत परिणाम सहेजने के लिए निर्देशिका जैसे, इन कार्यक्रम तत्वों का उपयोग करते हुए। + +पूर्व-प्रशिक्षित मॉडल के साथ स्वत: टिपण्णीकरण छवि एनोटेशन एक बड़े पैमाने पर सेगमेंटेशन डेटासेट बनाने के लिए प्रायोगिक और ऊर्जावान उपकारण है। यह सुविधा विशेष रूप से उस समय सुरेखितह रेखांकीकरण के लिए समय-शीघ्रता द्वारा तय कार्यों के साथ, उच्च गुणवत्ता वाले सेगमेंटेशन डेटासेट बनाने के लिए अधिक जरूरी होता है। इस सुविधा का उद्देश्यसिद्ध उपयोगकर्ताओं और डेवलपर्स के लिए विद्यमान छवि संग्रहों के साथ जटिल सूक्ष्म मॉडल विकास और मान्यानुयोग का महत्वपूर्ण विकल्प प्रदान करती है। + +## प्रशंसा और आभार + +यदि आप अपने शोध या विकास के लिए SAM का उपयोगकर्ता करते हैं, तो कृपया हमारे पेपर को उदाहरणित करने को विचार करें: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +हम कंप्यूटर विज्ञान समुदाय के लिए इस मूल्यवान संसाधन को निर्मित और बनाए रखने के लिए तारीफ करना चाहेंगे। + +*संकेत: Segment Anything, Segment Anything Model, समुद्री, Meta SAM, छवि सेगमेंटेशन, promptable सेगमेंटेशन, zero-shot प्रदर्शन, SA-1B डेटा सेट, उन्नत आर्किटेक्चर, स्वत: टिपण्णीकरण, Ultralytics, पूर्व-प्रशिक्षित मॉडलें, SAM बेस, SAM-लार्ज, इंस्टेंस सेगमेंटेशन, कंप्यूटर विज्ञान, AI, artificial intelligence, machine learning, data annotation, segmentation masks, डिटेक्शन मॉडल, YOLO डिटेक्शन मॉडल, bibtex, Meta AI. diff --git a/docs/hi/models/yolo-nas.md b/docs/hi/models/yolo-nas.md new file mode 100644 index 0000000..54c44a5 --- /dev/null +++ b/docs/hi/models/yolo-nas.md @@ -0,0 +1,119 @@ +--- +comments: true +description: YOLO-NAS एक बेहतर वस्तु पहचान मॉडल है। इसकी विशेषताएँ, प्री-प्रशित मॉडल, Ultralytics Python API के साथ उपयोग और अधिक के बारे में विस्तृत दस्तावेज़ीकरण जानें। +keywords: YOLO-NAS, Deci AI, object detection, deep learning, neural architecture search, Ultralytics Python API, YOLO model, pre-trained models, quantization, optimization, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## अवलोकन + +डेसी एआई द्वारा विकसित YOLO-NAS एक महत्वपूर्ण वस्तु पहचान मूलभूत मॉडल है। यह पिछले YOLO मॉडलों की सीमाओं का समाधान करने के लिए मानकर्मशास्त्रीय संरचना खोज प्रौद्योगिकी का उत्पाद है। क्वांटाइज़ेशन समर्थन और सटीकता-लेटेंसी विनिमय में काफी सुधार के साथ, YOLO-NAS वस्तु पहचान में एक महत्वपूर्ण आगे की लड़ाई प्रस्तुत करता है। + +![मॉडल उदाहरण छवि](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**YOLO-NAS का अवलोकन।** YOLO-NAS आपेक्षिकता के लिए ब्लॉक्स और वैकल्पिक क्वांटाइज़ेशन के साथ क्वांटाइज़ेशन योग्य ब्लॉक का उपयोग करता है। मॉडल को INT8 क्वांटाइज़ेशन में रूपांतरित किया जाने पर, अन्य मॉडलों की तुलना में केवल थोड़ी सी सटीकता नुकसान होती है। ये उन्नति सभी विकसित विशेषताओं द्वारा योग्यतापूर्वक वस्तु पहचान क्षमताओं और अद्वितीय प्रदर्शन में समाप्त होती है। + +### मुख्य विशेषताएँ + +- **क्वांटाइज़ेशन-मित्रशील मूल ब्लॉक:** YOLO-NAS पिछले YOLO मॉडलों की एक महत्वपूर्ण सीमा को समाप्त करने के लिए एक नया मूल ब्लॉक पेश करता है। +- **सुरुचिपूर्वक प्रशिक्षण और क्वांटाइज़ेशन:** YOLO-NAS उन्नत प्रशिक्षण योजनाएं और पोस्ट-प्रशिक्षण क्वांटाइज़ेशन का उपयोग करता है ताकि प्रदर्शन में सुधार हो सके। +- **ऑटोएनएसी अनुकूलन और प्री-प्रशित:** YOLO-NAS ऑटोएनएसी अनुकूलन का उपयोग करता है और COCO, Objects365 और Roboflow 100 जैसे प्रमुख डेटासेटों पर प्री-प्रशित होता है। यह प्री-प्रशित इसे उत्पादन वातावरण में नीचे पड़ने वाले वस्तु पहचान कार्यों के लिए अत्यंत उपयुक्त बनाता है। + +## प्री-प्रशित मॉडल + +Ultralytics द्वारा प्रदान की गई प्री-प्रशित YOLO-NAS मॉडल के साथ आगामी पीढ़ी की वस्तु पहचान की शक्ति का अनुभव करें। ये मॉडल स्पीड और सटीकता दोनों में उत्कृष्ट प्रदर्शन करने के लिए डिज़ाइन किए गए हैं। अपनी विशिष्ट आवश्यकताओं के लिए विभिन्न विकल्पों में से चुनें: + +| मॉडल | mAP | लेटेंसी (मिलीसेकंड) | +|------------------|-------|---------------------| +| YOLO-NAS S | 47.5 | 3.21 | +| YOLO-NAS M | 51.55 | 5.85 | +| YOLO-NAS L | 52.22 | 7.87 | +| YOLO-NAS S INT-8 | 47.03 | 2.36 | +| YOLO-NAS M INT-8 | 51.0 | 3.78 | +| YOLO-NAS L INT-8 | 52.1 | 4.78 | + +प्रत्येक मॉडल वेरिएंट mAP और लेटेंसी के बीच संतुलन का प्रस्ताव करने के लिए डिज़ाइन किए गए हैं, जो आपके वस्तु पहचान कार्यों को प्रदर्शन और गति दोनों के लिए आपकी आवश्यकताओं के लिए अनुकूलित करने में मदद करेगा। + +## उपयोग उदाहरण + +Ultralytics ने YOLO-NAS मॉडलों को आपके Python ऐप्लिकेशन में आसानी से एकीकृत करने के लिए हमारे `ultralytics` पाइथन पैकेज के माध्यम से सुविधाजनक पायथन API प्रदान किया है। पैकेज प्रक्रिया को सुगठित करने के लिए एक उपयोगकर्ता-मित्रपूर्ण पाइथन API प्रदान करता है। + +निम्न उदाहरण दिखाते हैं कि `उल्ट्रालिटिक्स` पैकेज के साथ YOLO-NAS मॉडलों का उपयोग `ultralytics` पैकेज के साथ कैसे करें: + +### पहचान और मान्यता उदाहरण + +इस उदाहरण में हम COCO8 डेटासेट पर YOLO-NAS-s की मान्यता करते हैं। + +!!! Example "उदाहरण" + + इस उदाहरण में हम YOLO-NAS के लिए सरल पहचान और मान्यता कोड प्रदान करते हैं। पहचान परिणामों का हैंडलिंग करने के लिए देखें [भविष्यवाणी](../modes/predict.md) मोड को। अतिरिक्त मोड के साथ YOLO-NAS का उपयोग करने के लिए [मान्यता](../modes/val.md) और [निर्यात](../modes/export.md) को देखें। `उल्ट्रालिटिक्स` पैकेज पर YOLO-NAS का प्रशिक्षण समर्थन नहीं करती है। + + === "Python" + + PyTorch प्री-प्रशित `*.pt` मॉडल फ़ाइलों को उल्ट्रालिटिक्स क्लास 'नास()' को पास किया जा सकता है ताकि पायथन में एक मॉडल मामला बनाया जा सके: + + ```python + from ultralytics import NAS + + # COCO-pretrained YOLO-NAS-s मॉडल लोड करें + model = NAS('yolo_nas_s.pt') + + # मॉडल की जानकारी दिखाएँ (वैकल्पिक) + model.info() + + # COCO8 उदाहरण डेटासेट पर मॉडल की मान्यता करें + results = model.val(data='coco8.yaml') + + # 'bus.jpg' छवि पर YOLO-NAS-s मॉडल के साथ पहचान चलाएं + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + मॉडलों को सीधे चलाने के लिए CLI कमांड उपलब्ध हैं: + + ```bash + # COCO-pretrained YOLO-NAS-s मॉडल लोड करें और उसकी प्रदर्शन मान्यता करें COCO8 उदाहरण डेटासेट पर + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # COCO-pretrained YOLO-NAS-s मॉडल लोड करें और 'bus.jpg' छवि पर पहचान चलाएं + yolo predict model=yolo_nas_s.pt source=path/to/bus.jpg + ``` + +## समर्थित कार्य और मोड + +हम तीन YOLO-NAS मॉडल वेरिएंट प्रदान करते हैं: छोटा (s), मध्यम (m) और बड़ा (l)। प्रत्येक वेरिएंट विभिन्न कंप्यूटेशनल और प्रदर्शन आवश्यकताओं के लिए डिज़ाइन किए गए हैं: + +- **YOLO-NAS-s**: कम्प्यूटेशन संसाधन सीमित होते हैं लेकिन दक्षता महत्वपूर्ण हैं, ऐसे वातावरणों के लिए अनुकूलित। +- **YOLO-NAS-m**: एक संतुलित दृष्टिकोण प्रदान करता है, जिससे आपके पास उच्च सटीकता वाले सामान्य-उद्देश्य वस्तु पहचान के लिए उपयुक्तता होती है। +- **YOLO-NAS-l**: सबसे अधिक सटीकता की आवश्यकता होने वाले स्थितियों के लिए विशेष रूप से तैयार किया जाता है, जहां गणना संसाधनों की कम बाधा होती है। + +नीचे प्रत्येक मॉडल के बारे में एक विस्तृत अवलोकन दिया गया है, जिसमें उनकी प्री-प्रशित वेट, ये कार्य समर्थित करते हैं, और इनकी आपरेटिंग मोड के साथ उनका संगतता शामिल है। + +| मॉडल प्रकार | प्री-प्रशित वेट | समर्थित कार्य | पहचान | मान्यता | प्रशिक्षण | निर्यात | +|-------------|-----------------------------------------------------------------------------------------------|-----------------------------------|-------|---------|-----------|---------| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [वस्तु पहचान](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [वस्तु पहचान](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [वस्तु पहचान](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## उद्धरण और प्रशंसापत्र + +यदि आप अपने शोध या विकास कार्य में YOLO-NAS का उपयोग करते हैं, कृपया SuperGradients को उद्धरण दें: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +हम Deci AI के [SuperGradients](https://github.com/Deci-AI/super-gradients/) टीम के प्रयासों के लिए आभार प्रकट करते हैं जिन्होंने इस महत्वपूर्ण संसाधन को बनाने और बनाए रखने के लिए। हम मानते हैं कि YOLO-NAS, अपने नवाचारी श्रेणीबद्ध संरचना और उत्कृष्ट वस्तु पहचान क्षमताओं के साथ एक महत्वपूर्ण उपकरण बनेगा जिसे उद्यमियों और शोधकर्ताओं के लिए आवश्यक साधन बना रखा जाएगा। diff --git a/docs/hi/models/yolov3.md b/docs/hi/models/yolov3.md new file mode 100644 index 0000000..4fc14a8 --- /dev/null +++ b/docs/hi/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: YOLOv3, YOLOv3-Ultralytics और YOLOv3u के बारे में जानें। इनकी प्रमुख विशेषताएँ, उपयोग और वस्तु डिटेक्शन के लिए समर्थित कार्यों की जानकारी प्राप्त करें। +keywords: YOLOv3, YOLOv3-Ultralytics, YOLOv3u, वस्तु डिटेक्शन, इनफेरेन्स, प्रशिक्षण, Ultralytics +--- + +# YOLOv3, YOLOv3-Ultralytics, और YOLOv3u + +## सवाल + +ये प्रदर्शनी में तीन प्रमुख वस्तु डिटेक्शन मॉडलों, यानी [YOLOv3](https://pjreddie.com/darknet/yolo/), [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3) और [YOLOv3u](https://github.com/ultralytics/ultralytics), की एक अवलोकन प्रस्तुत करती है। + +1. **YOLOv3:** यह You Only Look Once (YOLO) वस्तु डिटेक्शन एल्गोरिदम का तीसरा संस्करण है। प्रारंभिक रूप में Joseph Redmon ने विकसित किया, YOLOv3 ने योग्यताओं का एक श्रेणी, जैसे कि मल्टीस्केल पूर्वानुमान और तीन भिन्न आकार के डिटेक्शन कर्नल्स, को शामिल करके अपने पूर्वजों पर सुधार किया। + +2. **YOLOv3-Ultralytics:** यह YOLOv3 मॉडल का Ultralytics अनुपालन है। इसने मूल YOLOv3 की वास्तविकता को पुनर्जीवित किया है और अतिरिक्त कार्यक्षमताओं, जैसे कि अधिक पूर्व-प्रशिक्षित मॉडलों का समर्थन और और सुगठनितकरण विकल्पों की सुविधा भी प्रदान करता है। + +3. **YOLOv3u:** यह YOLOv3-Ultralytics का एक अद्यतित संस्करण है जो YOLOv8 मॉडलों में उपयोग में लाया गया एंकर-मुक्त, टैग-न्यूमनेस स्प्लिट हेड को शामिल करता है। YOLOv3u में योग्यता योजना (backbone) और गर्दन (neck) की वास्तविकता तो बनाए रखती है, लेकिन डिटेक्शन हेड को YOLOv8 से अद्यतित कर देती है। + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## प्रमुख विशेषताएं + +- **YOLOv3:** डिटेक्शन के लिए तीन भिन्न पैमाने का उपयोग करने की एकता वाला यह मॉडल, 13x13, 26x26 और 52x52 के तीन भिन्न आकाृ के डिटेक्शन कर्नल्स के उपयोग से जल्दी जाने जाने वाले वस्तुओं के लिए डिटेक्शन की योग्यता में सुधार करती है। इसके अतिरिक्त, YOLOv3 ने प्रति बाउंडिग बॉक्स के लिए मल्टी-लेबल पूर्वानुमान और एक बेहतर फ़ीचर एक्सट्रैक्टर नेटवर्क जैसी विशेषताएँ भी जोड़ी हैं। + +- **YOLOv3-Ultralytics:** Ultralytics के YOLOv3 के अनुपालन में यह मॉडल मूल मॉडल की प्रदर्शन की समता प्रदान करता है, लेकिन इसके पास अतिरिक्त पूर्व-प्रशिक्षित मॉडलों, अधिक प्रशिक्षण विधियों और सुविधाजनक सुविधाएँ जैसे अतिरिक्त समर्थन होता है। इससे इसका विभिन्न अनुप्रयोगों में उपयोग करना संभव होता है। + +- **YOLOv3u:** यह अद्यतित मॉडल YOLOv8 से योग्यता योजना का उपयोग करता है। योज्ञता के पूर्व-निर्धारित एंकर बॉक्स और पदार्थता स्कोर की आवश्यकता को मिटा कर, यह डिटेक्शन हेड का डिजाइन मॉडल की योग्यता को बेहतरीन आकार और आकृति के वस्तुओं की पहचान करने की क्षमता में सुधार कर सकता है। इससे वस्तु डिटेक्शन के कार्यों के लिए YOLOv3u मॉडलों में बढ़िया सुरक्षा और सटीकता प्राप्त हो सकती है। + +## समर्थित कार्य और मोड + +YOLOv3 श्रृंखला, इनमें YOLOv3, YOLOv3-Ultralytics और YOLOv3u शामिल हैं, विशेष रूप से वस्तु डिटेक्शन कार्यों के लिए विभिन्न प्रतिष्ठित हालत में डिज़ाइन किए गए हैं। ये मॉडल सटीकता और गति में संतुलन स्थापित करने के लिए प्रसिद्ध हैं। प्रत्येक वैरिएंट अद्वितीय सुविधाएँ और अनुकूलन प्रदान करती हैं, इसलिए इनका उपयोग विभिन्न अनुप्रयोगों के लिए संभव है। + +तीनों मॉडलों को पूर्ण उपयोगता में उपयोग करने के लिए विभिन्न मोड हैं, इनमें [इनफेरेंस](../modes/predict.md), [मान्यकरण](../modes/val.md), [प्रशिक्षण](../modes/train.md) और [निर्यात](../modes/export.md) शामिल हैं, विभाजनित किये गए हैं। ये मोडल उपयोगकर्ताओं को वस्तू डिटेक्शन के प्रभावी आवागमन और विकास के विभाजन के विभाजन के लिए एक पूरी उपकरण पर्याप्तता प्रदान करते हैं। + +| मॉडल प्रकार | समर्थित कार्य | इनफेरेंस | मान्यकरण | प्रशिक्षण | निर्यात | +|--------------------|--------------------------------------|----------|----------|-----------|---------| +| YOLOv3 | [वस्तु डिटेक्शन](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [वस्तु डिटेक्शन](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [वस्तु डिटेक्शन](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +यह तालिका प्रत्येक YOLOv3 वेरिएंट की क्षमताओं की क्षणभंगुर झलक प्रदान करती है, इसमें विभिन्न कार्यों और ऑपरेशनल मोड के लिए उपयुक्तता और विभाजन मानकों को प्रदर्शित किया गया है। + +## उपयोग की उदाहरण + +यह उदाहरण YOLOv3 प्रशिक्षण और इनफेरेंस के आसान उदाहरण प्रदान करता है। इन और अन्य [मोड](../modes/index.md) के बारे में विस्तारपूर्वक दस्तावेज़ीकरण के लिए इसके साथी [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) और [Export](../modes/export.md) दस्तावेज़ पेजेज़ की जांच करें। + +!!! Example "उदाहरण" + + === "Python" + + `*.pt` प्रीट्रेन किए गए PyTorch मॉडल और कॉन्फ़िगरेशन *.yaml फ़ाइल Python में YOLO() क्लास कों यूज़ करके एक मॉडल इंस्टेंस तैयार करने के लिए पास कर सकते हैं: + + ```python + from ultralytics import YOLO + + # COCO-pretrained YOLOv3n मॉडल लोड करें + model = YOLO('yolov3n.pt') + + # मॉडल की जानकारी प्रदर्शित करें (वैकल्पिक) + model.info() + + # COCO8 उदाहरण डेटासेट पर मॉडल 100 epochs के लिए प्रशिक्षण दें + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # YOLOv3n मॉडल के साथ 'bus.jpg' छवि पर इनफेरेंस चलाएं + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + मॉडल पर चलाने के लिए CLI आदेश उपलब्ध हैं: + + ```bash + # COCO-pretrained YOLOv3n मॉडल लोड करें और COCO8 उदाहरण डेटासेट पर इसे 100 epochs के लिए प्रशिक्षित करें + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCO-pretrained YOLOv3n मॉडल लोड करें और 'bus.jpg' छवि पर इनफेरेंस चलाएं + yolo predict model=yolov3n.pt source=path/to/bus.jpg + ``` + +## उद्धरण और प्रशंसापत्र + +अगर आप अपने शोध में YOLOv3 का उपयोग करते हैं, तो कृपया मूल YOLO पेपर्स और Ultralytics YOLOv3 रिपॉज़िटरी को उद्धृत करें। + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +Joseph Redmon और Ali Farhadi को मूल YOLOv3 विकसकर्ताओं के लिए धन्यवाद। diff --git a/docs/hi/models/yolov4.md b/docs/hi/models/yolov4.md new file mode 100644 index 0000000..e00284d --- /dev/null +++ b/docs/hi/models/yolov4.md @@ -0,0 +1,69 @@ +--- +comments: true +description: YOLOv4 के बारे में हमारे विस्तृत गाइड पर आपका स्वागत है, जो एक नवीनतम समय मेंकिए गए वास्तविक समय वस्तुओं का पता लगाने वाला उत्कृष्ट ऑब्जेक्ट डिटेक्टर है। इसे योग्यता से उपयोग करने, आर्किटेक्चर के प्रमुख बिंदुओं को समझने और उपयोग की दृष्टि से कुछ उदाहरणों को देखने के लिए पढ़ें। +keywords: ultralytics, YOLOv4, ऑब्जेक्ट डिटेक्शन, न्यूरल नेटवर्क, वास्तविक समय में पता लगाने वाला, ऑब्जेक्ट डिटेक्टर, मशीन लर्निंग +--- + +# YOLOv4: उच्च गति और सटीक ऑब्जेक्ट डिटेक्शन + +YOLOv4, जो 2020 में अलेक्सी बोचकोवस्की (Alexey Bochkovskiy) द्वारा [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet) पर लॉन्च हुआ एक उत्कृष्ट बाज़ार में समयविशेषी-मेंट करने वाले ऑब्जेक्ट डिटेक्टर है, के बारे में Ultralytics दस्तावेज़ीकरण पेज में आपका स्वागत है। YOLOv4 गति और सटीकता के बीच आदर्श संतुलन प्रदान करने के लिए डिज़ाइन किया गया है, जो इसे बहुत सारे एप्लिकेशन के लिए एक उत्कृष्ट विकल्प बनाता है। + +![YOLOv4 आर्किटेक्चर आरेख](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**YOLOv4 आर्किटेक्चर आरेख**। YOLOv4 की जटिल नेटवर्क डिज़ाइन का प्रदर्शन, जिसमें बैकबोन, नेक और हेड घटक और उनके जोड़े गए स्तरों की दिखावटी तस्वीर है, ताकि वास्तविक समय ऑब्जेक्ट डिटेक्शन के लिए उनका उपयोग किया जा सके। + +## परिचय + +YOLOv4 का मतलब होता है 'You Only Look Once' संस्करण 4। यह एक वास्तविक समय ऑब्जेक्ट डिटेक्शन मॉडल है जो पिछले YOLO संस्करणों जैसे YOLOv3 और अन्य ऑब्जेक्ट डिटेक्शन मॉडलों की सीमाओं का सामना करने के लिए विकसित किया गया है। अन्य कन्वोल्यूशनल न्यूरल नेटवर्क (CNN) आधारित ऑब्जेक्ट डिटेक्टरों के विपरीत, YOLOv4 का उपयोग सिर्फ अनुशंसा प्रणालियों के लिए ही नहीं, बल्कि स्वतंत्र प्रक्रिया प्रबंधन और मानव इनपुट को कम करने के लिए भी किया जा सकता है। यह एक साधारण ग्राफ़िक्स प्रोसेसिंग इकाइयां (जीपीयू) पर अपना कार्य संपादित करने की अनुमति देता है और केवल एक ऐसे जीपीयू के लिए प्रशिक्षण के लिए एक ही जीपीयू की आवश्यकता होती है। + +## आर्किटेक्चर + +YOLOv4 अपने प्रदर्शन को अनुकूलित करने के लिए कई नवाचारी विशेषताओं का उपयोग करता है। इनमें वेटेड-रेजिड्यूल-कनेक्शंस (WRC), क्रॉस-स्टेज-पार्शल-कनेक्शंस (CSP), क्रॉस मिनी-बैच नियमन (CmBN), सेल्फ-प्रतिहिंसा-प्रशिक्षण (SAT), मिश ऐक्टिवेशन, मोज़ेक डेटा वृद्धि, ड्रॉपब्लॉक नियमन और सीआईओयू हानि शामिल हैं। ये विशेषताएँ संयुक्त रूप से बेहतरीन परिणाम प्राप्त करने के लिए संयोजित की जाती हैं। + +आमतौर पर, एक ऑब्जेक्ट डिटेक्टर में कई हिस्से होते हैं जिनमें इनपुट, बैकबोन, नेक और हेड शामिल होते हैं। YOLOv4 का बैकबोन ImageNet पर पूरी तरह से प्रशिक्षित होता है और यह वस्तुओं के वर्गों और बाउंडिंग बॉक्स का अनुमान लगाने के लिए उपयोग किया जाता है। बैकबोन विभिन्न मॉडलों में से हो सकता है, जैसे VGG, ResNet, ResNeXt या DenseNet। डिटेक्शन हेड भाग ताकि अंतिम ऑब्जेक्ट डिटेक्शन और श्रेणीकरण किए जा सकें। + +## फ्रीबीज़ सूचना + +YOLOv4 उन विधियों का उपयोग करता है जिन्हें "फ्रीबीज़ का संग्रह" कहा जाता है, जो मॉडल की उन्नतता को प्रशिक्षण के दौरान बढ़ाने और संविधान के लागू होने की कीमत बढ़ाते हैं। डाटा वृद्धि ऑब्जेक्ट डिटेक्शन में उपयोग होने वाली एक सामान्य फ्रीबीज़ टिकनिक है, जो मॉडल की संवेगीता बढ़ाने के लिए इनपुट छवियों की विविधता बढ़ाती है। डाटा वृद्धि के कुछ उदाहरण हैं जिनमें फोटोमेट्रिक दिसंगत (चित्र के एक छवि के तेजी, बंदिश, सोने, उष्णता और आवाज़ को समायोजित करना) और ज्यामितिक दिसंगत (एक छवि के रैंडम स्केलिंग, क्रॉपिंग, फ्लिपिंग और घुमाने के जोड़ने) शामिल होते हैं। ये टिकनिक मॉडल को अलग-अलग प्रकार की छवियों में बेहतर सामान्यीकरण करने में सहायता करते हैं। + +## विशेषताएँ और प्रदर्शन + +YOLOv4 को वास्तविक समय ऑब्जेक्ट डिटेक्शन में उच्च गति और सटीकता के लिए डिज़ाइन किया गया है। YOLOv4 की आर्किटेक्चर में CSPDarknet53 बैकबोन के रूप में, नेक के रूप में PANet, और डिटेक्शन हेड के रूप में YOLOv3 शामिल हैं। यह डिज़ाइन YOLOv4 को एक प्रभावशाली गति पर ऑब्जेक्ट डिटेक्शन करने की अनुमति देता है, जिससे यह वास्तविक समय में उपयुक्त होता है। YOLOv4 ऑब्जेक्ट डिटेक्शन मानकों में उच्च गुणवत्ता हासिल करने में भी उत्कृष्टता करता है। + +## उपयोग की उदाहरण + +मामले से आप योग्यता संबंधित विस्तारित चरणों का उपयोग करने के लिए YOLOv4 का उपयोग करना चाहेंगे: + +1. YOLOv4 GitHub रिपॉज़िटरी पर जाएं: [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet)। + +2. स्थापना के लिए readme फ़ाइल में दिए गए निर्देशों का पालन करें। इसमें सामान्यतया रिपॉज़िटरी क्लोन करना, आवश्यक डिपेंडेंसियों को स्थापित करना और किसी आवश्यक पर्यावरण चर पर सेटअप करना शामिल होता है। + +3. स्थापना पूरी होने पर, आप मॉडल को प्रशिक्षित कर सकते हैं और उसे ऑब्जेक्ट डिटेक्शन करने के लिए उपयोग कर सकते हैं हमॉडन/रिपॉज़िटरी में दिए गए उपयोग के निर्देशों के अनुसार। यह आमतौर पर अपने डेटासेट को तैयार करना, मॉडल पैरामीटर्स को कॉन्फ़िगर करना, मॉडल को प्रशिक्षित करना और फिर प्रशिक्षित मॉडल का उपयोग ऑब्जेक्ट डिटेक्शन करने के लिए करता है। + +कृपया ध्यान दें कि विशिष्ट चरण आपके विशिष्ट उपयोग मामले और YOLOv4 रेपोज़िटरी की स्थिति पर निर्भर कर सकते हैं। इसलिए, YOLOv4 गिद्धशाली रिपॉज़िटरी में दिए गए निर्देशों का उपयोग करने की कड़ी सलाह दी जाती है। + +हम इस बात की असुविधा के लिए माफी चाहते हैं और YOLOv4 के समर्थन को Ultralytics के लिए लागू किया जाने पर उपयोग की उदाहरणों के साथ इस दस्तावेज़ को अद्यतित करने की कोशिश करेंगे। + +## निष्कर्ष + +YOLOv4 एक शक्तिशाली और कुशल ऑब्जेक्ट डिटेक्शन मॉडल है जो गति और सटीकता के बीच संतुलन स्थापित करता है। प्रशिक्षण के दौरान उनिक विशेषताओं और फ्रीबीज़ तकनीकों का उपयोग करके, यह वास्तविक समय ऑब्जेक्ट डिटेक्शन कार्यों में उत्कृष्ट प्रदर्शन करता है। किसी आम जीपीयू के साथ किसी भी व्यक्ति द्वारा प्रशिक्षित और उपयोग किया जा सकता है, जो इसे विभिन्न एप्लिकेशन के लिए पहुंच योग्य और व्यावहारिक बनाता है। + +## संदर्भ और प्रशंसा + +हम YOLOv4 लेखकों को इस फ़ील्ड में उनके महत्वपूर्ण योगदानों के लिए आभार व्यक्त करना चाहेंगे: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +मूल YOLOv4 पेपर [arXiv](https://arxiv.org/pdf/2004.10934.pdf) पर मिल सकता है। लेखकों ने अपने कार्य को पब्लिकली उपलब्ध कराया है, और कोडबेस [GitHub](https://github.com/AlexeyAB/darknet) पर एक्सेस किया जा सकता है। हम उनके कोशिशों की सराहना करते हैं जो क्षेत्र को आगे बढ़ाने और उनके काम को व्यापक समुदाय के लिए उपलब्ध कराने में करते हैं। diff --git a/docs/hi/models/yolov5.md b/docs/hi/models/yolov5.md new file mode 100644 index 0000000..4a9172e --- /dev/null +++ b/docs/hi/models/yolov5.md @@ -0,0 +1,113 @@ +--- +comments: true +description: योलोवी5यू की खोज करें, योलोवी5 मॉडल का एक बढ़ाया हुआ संस्करण जिसमें एक निश्चित रफ़्तार के बदलाव और विभिन्न वस्तु ज्ञापन कार्यों के लिए कई पूर्व प्रशिक्षित मॉडल शामिल हैं। +keywords: YOLOv5u, वस्तु ज्ञापन, पूर्व प्रशिक्षित मॉडल, Ultralytics, Inference, Validation, YOLOv5, YOLOv8, एंचर-मुक्त, वस्तुनिपाति रहित, वास्तविक समय अनुप्रयोग, मशीन लर्निंग +--- + +# योलोवी5 + +## समीक्षा + +YOLOv5u वस्तु ज्ञापन के तरीकों में एक पटल बढ़ोतरी को प्रतिष्ठानित करता है। योग्यता ग्रहण और समय की मूल्य-माप बदलती शैली के आधार पर आधारित योलोवी5 मॉडल की स्थापना से परिचय में सुधार लाती है। तात्कालिक परिणामों और इसकी प्राप्त विशेषताओं के मद्देनजर, YOLOv5u एक ऐसा कुशल स्थानांतरण प्रदान करता है जो नवीन रंगेंगर में शोध और व्यावसायिक अनुप्रयोगों में सठिक समाधानों की तलाश कर रहे हैं। + +![Ultralytics YOLOv5](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## मुख्य विशेषताएं + +- **एंचर-मुक्त हिस्सा उल्ट्रालिटिक्स हेड:** पारंपरिक वस्तु ज्ञापन मॉडल निश्चित प्रमुख बॉक्सों पर आधारित होते हैं। हालांकि, YOLOv5u इस दृष्टिकोण को आधुनिक बनाता है। एक एंचर-मुक्त हिस्सा उल्ट्रालिटिक्स हेड की अपनाने से यह सुनिश्चित करता है कि एक और उचित और अनुरूप ज्ञापन मेकेनिज़म निर्धारित करें, जिससे विभिन्न परिदृश्यों में प्रदर्शन में सुधार होता है। + +- **में सुधार गया गुणांक गति वस्तु:** गति और सुधार का anomaly रहता हैं। लेकिन YOLOv5u इस विरोधाभासी को चुनौती देता है। इस रंगेंगर व पुष्टि दृढ़ कर सुनिश्चित करता है वास्तविक समयगत ज्ञापन में स्थैतिकता नुकसान के बिना। यह विशेषता वाहन स्वतंत्र, रोबोटिक्स, और वास्तविक समयगत वीडियो विश्लेषण जैसे तत्वों को चाहती अनुप्रयोगों के लिए विशिष्ट सबक की अनमोलता होती है। + +- **प्रशिक्षित मॉडल के विभिन्न वस्तुधापर्यावथाएं:** यह समझने कि लिए कि विभिन्न कार्यों के लिए विभिन्न उपकरण की जरूरत होती है, YOLOv5u एक कई पूर्व प्रशिक्षित मॉडल प्रदान करता है। चाहे आप ज्ञापन, मान्यता, या प्रशिक्षण पर ध्यान केंद्रित कर रहे हैं, आपकी अद्वितीय चुनौती के लिए एक टेलरमेड मॉडल है। यह विविधता यह सुनिश्चित करती है कि आप एक वन-साइज-फिट ऑल समाधान ही नहीं उपयोग कर रहे हैं, बल्कि अपनी अद्यापित अद्वितीय चुनौती के लिए एक मॉडल का उपयोग कर रहे हैं। + +## समर्थित कार्य तथा मोड + +योलोवी5u मॉडल, विभिन्न पूर्व प्रशिक्षित वेट वाली, [वस्तु ज्ञापन](../tasks/detect.md) कार्यों में उत्कृष्ट हैं। इन्हें विभिन्न ऑपरेशन मोड्स का समर्थन है, इसलिए इन्हें विकास से लेकर अंतर्गत उन्नतिशील अनुप्रयोगों के लिए उपयुक्त ठहराया जा सकता है। + +| मॉडल प्रकार | पूर्व प्रशिक्षित वेट | कार्य | ज्ञापन | मान्यता | प्रशिक्षण | निर्यात | +|-------------|-----------------------------------------------------------------------------------------------------------------------------|------------------------------------|--------|---------|-----------|---------| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [वस्तु ज्ञापन](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +यह तालिका योलोवी5u मॉडल के विभिन्न जैविक वेशभूषा प्रस्तुत करती है, इनके वस्तु ज्ञापन कार्यों में लागूहोने और [ज्ञापन](../modes/predict.md), [मान्यता](../modes/val.md), [प्रशिक्षण](../modes/train.md), और [निर्यात](../modes/export.md) की समर्थनता को उज्ज्वल बनाती है। इस समर्थन की पूर्णता सुनिश्चित करती है कि उपयोगकर्ता योलोवी5u मॉडल्स की संपूर्ण क्षमताओं का खास लाभ उठा सकते हैं विभिन्न ऑब्जेक्ट ज्ञापन स्थितियों में। + +## प्रदर्शन पैमाने + +!!! Performance + + === "ज्ञापन" + + [देखें ज्ञापन डॉकस](https://docs.ultralytics.com/tasks/detect/) को [COCO](https://docs.ultralytics.com/datasets/detect/coco/) पर प्रशिक्षित इन मॉडल्स के उपयोग के साथ उपयोग उदाहरण जैसे विविध पूर्व-प्रशिक्षित वर्गों को शामिल करता है। + + | मॉडल | YAML | साइज़
(पिक्सेल) | mAPवैल
50-95 | गति
CPU ONNX
(मि.से.) | गति
A100 TensorRT
(मि.से.) | params
(M) | FLOPs
(B) | + |---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|---------------------------|------------------------|--------------------------------|-----------------------------------------|--------------------|-------------------| + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 42.1 | 211.0 | 1.83 | 4.3 | 7.8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 48.6 | 422.6 | 2.34 | 15.3 | 24.6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 53.6 | 810.9 | 4.36 | 41.2 | 65.7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 55.7 | 1470.9 | 5.47 | 86.1 | 137.4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 56.8 | 2436.5 | 8.98 | 155.4 | 250.7 | + +## उपयोग उदाहरण + +इस उदाहरण में सरल YOLOv5 चालन और ज्ञापन उदाहरण प्रदान किए गए हैं। इन और अन्य [modes](../modes/index.md) के लिए पूर्ण संदर्भ सामग्री के लिए दस्तावेज़ीकरण पृष्ठों में जाएं। + +!!! Example "उदाहरण" + + === "पायथन" + + पायथन में एक मॉडल उदाहरण के लिए योलोवी5 आईएमजेड हालत में `*.pt` मॉडल्स के साथ मॉडल निर्माण के लिए `YOLO()` श्रेणी को पारित किया जा सकता है: + + ```python + from ultralytics import YOLO + + # COCO-pretrained YOLOv5n मॉडल लोड करें + model = YOLO('yolov5n.pt') + + # मॉडल जानकारी प्रदर्शित करें (वैकल्पिक) + model.info() + + # COCO8 प्रायोगिक उदाहरण डेटासेट पर 100 एपॉक के लिए मॉडल + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # YOLOv5n मॉडल के साथ 'bus.jpg' छविमें ज्ञापन चलाएं + results = model('path/to/bus.jpg') + ``` + + === "सी.एल.आई." + + मालिशी आदेशों का उपयोग सीधे मॉडलों को चलाने के लिए उपलब्ध हैं: + + ```bash + # COCO-प्रशिक्षित YOLOv5n मॉडल खोलें और 100 एपॉक के लिए इसे COCO8 प्रायोगिक उदाहरण डेटासेट पर प्रशिक्षित करें + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCO-प्रशिक्षित YOLOv5n मॉडल खोलें और 'bus.jpg' छवि में ज्ञापन चलाएं + yolo predict model=yolov5n.pt source=path/to/bus.jpg + ``` + +## उद्धरण और मान्यता + +यदि आप अपने शोध में YOLOv5 या YOLOv5u का उपयोग करते हैं, तो कृपया Ultralytics YOLOv5 दस्तावेज़ीकरण में मुख्य रूप से उल्लेख करें: + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +कृपया ध्यान दें कि YOLOv5 मॉडलें [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) और [एंटरप्राइज](https://ultralytics.com/license) लाइसेंस में उपलब्ध हैं। diff --git a/docs/hi/models/yolov6.md b/docs/hi/models/yolov6.md new file mode 100644 index 0000000..06b946a --- /dev/null +++ b/docs/hi/models/yolov6.md @@ -0,0 +1,90 @@ +--- +comments: true +description: एक उत्कृष्ट वस्तु पहचानने मॉडल में स्पीड और सटीकता के बीच एक सामंजस्य स्थापित करने वाला, रीयल-टाइम एप्लिकेशन्स के लिए लोकप्रिय भारतीय ब्रांड योलोवी6 का अध्ययन करें। उल्ट्रालिटिक्स के लिए मीथुन योलोवी6, फ़ीचर्स, पूर्व-प्रशिक्षित मॉडल्स और पायथन उपयोग पर डाइव करें। +keywords: मिथुन योलोवी6, वस्तु पहचान, Ultralytics, योलोवी6 दस्तावेज़, प्रतिस्थापन में द्विदिशीय जोड़, एंकर-सहायित प्रशिक्षण, पूर्व-प्रशिक्षित मॉडल, वास्तविक समय एप्लिकेशन्स +--- + +# मिथुन योलोवी6 + +## परियोजना + +[मिथुन](https://about.meituan.com/) योलोवी6 एक नवीनतम वस्तु पहचानकर्ता है जो स्पीड और सटीकता के बीच अद्वितीय संतुलन प्रदान करता है, जिसके कारण यह वास्तविक समय एप्लिकेशन्स के लिए एक लोकप्रिय विकल्प है। इस मॉडल ने अपने आर्किटेक्चर और प्रशिक्षण योजना पर कई आदर्श नवीनतमान वृद्धियों को पेश किया है, जिसमें एक जोड़ने-द्विदिशीकरण (BiC) मॉड्यूल, एंकर-सहायित प्रशिक्षण (AAT) स्ट्रेटेजी, और COCO डेटासेट पर अद्वितीय सटीकता के लिए सुधारित बैकबोन और गर्दन डिज़ाइन का क्रियान्वयन शामिल है। + +![मिथुन योलोवी6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![माॅडेल उदाहरण छवि](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**योलोवी6 का अवलोकन।** मॉडल आर्किटेक्चर आरेख आपको बड़ी प्रदर्शन वृद्धि करने वाले संकरणों और प्रशिक्षण रणनीतियों का आभास कराता है। (a) योलोवी6 का गर्दन (N और S दिखाए गए हैं)। M/L के लिए, रिपब्लॉक को सीएसपीस्टैकरेप से बदल दिया गया है। (b) बीसी मॉड्यूल का संरचना। (c) एक सिमकुस्पस्पफ ब्लॉक। ([स्रोत](https://arxiv.org/pdf/2301.05586.pdf))। + +### मुख्य विशेषताएं + +- **द्विदिशीय जोड़ने (BiC) मॉड्यूल:** योलोवी6 डिटेक्टर के गर्दन में BiC मॉड्यूल प्रस्तुत करता है, जिससे स्थानांतरण सिग्नल में सुधार होती है और ज्ञानसंक्षेप में गतिविधि के साथ प्रदर्शन सुधार होता है। +- **एंकर-सहायित प्रशिक्षण (AAT) स्ट्रेटेजी:** यह मॉडल AAT प्रस्तावित करता है ताकि यह एंकर-आधारित और एंकर-मुक्त दोनों परंपराओं के लाभ प्राप्त कर सके और अंतर्निहित क्षमता पर खराब प्रभाव न हो। +- **सुधारित बैकबोन और गर्दन डिज़ाइन:** YOLOv6 को बैकबोन और गर्दन में एक और स्टेज शामिल करके, इस मॉडल ने कोको डेटासेट पर अद्वितीय प्रदर्शन को उच्च-संकल्पन इनपुट पर प्राप्त किया है। +- **स्व-स्त्रावबद्धि (Self-Distillation) स्ट्रेटेजी:** छोटे YOLOv6 मॉडलों के प्रदर्शन को बढ़ाने के लिए, यहां नई स्व-स्त्रावबद्धि स्ट्रेटेजी का अमल किया गया है, जिससे प्रशिक्षण के दौरान मददगार संश्लेषण शाखा को सुधारा जाता है और प्रशिक्षण में इसे हटा दिया जाता है ताकि मार्क की गति में प्रमुख गिरावट न हो। + +## प्रदर्शन माप + +YOLOv6 विभिन्न पूर्व-प्रशिक्षित मॉडलों के साथ प्रदान करता है जिनमें अलग-अलग स्केल होती हैं: + +- YOLOv6-N: NVIDIA Tesla T4 GPU पर 1187 फ्रेम प्रति सेकंड पर COCO val2017 में 37.5% एपी। +- YOLOv6-S: 484 फ्रेम प्रति सेकंड पर 45.0% एपी। +- YOLOv6-M: 226 फ्रेम प्रति सेकंड पर 50.0% एपी। +- YOLOv6-L: 116 फ्रेम प्रति सेकंड पर 52.8% एपी। +- YOLOv6-L6: वास्तविक समय में पराकाष्ठा की सटीकता। + +YOLOv6 ऐसे पालटने वाले मॉडल भी प्रदान करता है जिनमें विभिन्न परिशुद्धियां होती हैं और मोबाइल प्लेटफ़ॉर्म के लिए अनुकूलित मॉडल्स होती हैं। + +## उपयोग उदाहरण + +यह उदाहरण आसान YOLOv6 प्रशिक्षण और संदर्भ उदाहरण प्रदान करता है। इन और अन्य [modes](../modes/index.md) के लिए पूर्ण दस्तावेज़ीकरण के लिए [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) और [Export](../modes/export.md) डॉक्स पेज देखें। + +!!! Example "उदाहरण" + + === "पायथन" + + PyTorch से पूर्व-प्रशिक्षित `*.pt` मॉडल और कॉन्फ़िगरेशन `*.yaml` फ़ाइलें पास करके `YOLO()` कक्षा में एक मॉडल उदाहरण बनाया जा सकता है: + + ```python + from ultralytics import YOLO + + # खाली स्थान से एक YOLOv6n मॉडल बनाएँ + model = YOLO('yolov6n.yaml') + + # मॉडल की जानकारी प्रदर्शित करें (वैकल्पिक) + model.info() + + # COCO8 उदाहरण डेटासेट पर मॉडल को 100 एपोक के लिए प्रशिक्षित करें + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 'bus.jpg' छवि पर YOLOv6n मॉडल के साथ निर्धारण चलाएँ + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + मॉडल को निर्धारित करने के लिए सीएलआई कमांड उपलब्ध हैं: + + ```bash + # शुरू से एक YOLOv6n मॉडल बनाएँ और इसे COCO8 उदाहरण डेटासेट पर 100 एपोक के लिए प्रशिक्षित करें + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # शुरू से एक YOLOv6n मॉडल बनाएँ और 'bus.jpg' छवि पर निधारण चलाएँ + yolo predict model=yolov6n.yaml source=path/to/bus.jpg + ``` + +## समर्थित कार्य और मोड + +YOLOv6 श्रृंखला उच्च प्रदर्शन [वस्तु पहचान](../tasks/detect.md) के लिए विभिन्न मॉडल प्रदान करती है। इन मॉडल्स में विभिन्न गणना और सटीकता की आवश्यकताओं के लिए आदर्श होते हैं, जिससे इन्हें विभिन्न अनुप्रयोगों के लिए विचारशील होता है। + +| मॉडल का प्रकार | पूर्व-प्रशिक्षित भार | समर्थित कार्य | निर्धारण | मान्यीकरण | प्रशिक्षण | निर्यात | +|----------------|----------------------|-----------------------------------|----------|-----------|-----------|---------| +| YOLOv6-N | `yolov6-n.pt` | [वस्तु पहचान](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [वस्तु पहचान](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [वस्तु पहचान](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [वस्तु पहचान](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [वस्तु पहचान](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +यह तालिका योलोवी6 मॉडल वेरिएंट्स का विस्तृत अवलोकन प्रदान करती है, जो वस्तु पहचान कार्यों में उनकी क्षमताओं और विभिन्न संचालन मोडों के साथ [निर्धारण](../modes/predict.md), [मान्यीकरण](../modes/val.md), [प्रशिक्षण](../modes/train.md), और [निर्यात](../modes/export.md) के संगतता को हाइलाइट करते हैं। इस व्यापक समर्थन से उपयोगकर्ताओं को योलोवी6 मॉडलों की क्षमताओं का पूरा उपयोग करने की सुविधा होती है एक व्यापक वस्तु पहचान स्थिति में। + +## सन्दर्भ और पुन:ज्ञानजनक + +हम मूल योलोवी6 कागज पर [arXiv](https://arxiv.org/abs/2301.05586) में उपलब्ध हैं काम के लिए संघ द्वारा स्विकृति दी जाती है। लेखकों ने अपने काम को सार्वजनिक रूप से उपलब्ध कराया है, और कोडबेस [GitHub](https://github.com/meituan/YOLOv6) पर पहुंचने के लिए है। हम उनके प्रयासों की प्रशंसा करते हैं क्योंकि वे क्षेत्र को आगे बढ़ाने और अपने काम को आपातकालीन रूप से ब्रॉडर समुदाय को सुलभ बनाने के लिए उनके प्रयासों को पहुंचने में लगे हैं। diff --git a/docs/hi/models/yolov7.md b/docs/hi/models/yolov7.md new file mode 100644 index 0000000..173b177 --- /dev/null +++ b/docs/hi/models/yolov7.md @@ -0,0 +1,65 @@ +--- +comments: true +description: YOLOv7, एक रियल-टाइम ऑब्जेक्ट डिटेक्टर है। इसकी अद्वितीय स्पीड, प्रभावशाली सटीकता, और अद्यतनीय "बैग-ऑफ-फ्रीबीज" अनुकूलन ध्यानदेने योग्यता को समझें। +keywords: YOLOv7, रियल-टाइम ऑब्जेक्ट डिटेक्टर, तकनीक-से-अलावा कुछ औफ़ान, Ultralytics, MS COCO डेटासेट, मॉडल फिर-पैरामीटरीकरण, डायनामिक लेबल असाइनमेंट, विस्तारित मापन, कंपाउंड मापन +--- + +# YOLOv7: ट्रेनबल बैग-ऑफ-फ्रीबीज़ + +YOLOv7 एक अद्वितीय रियल-टाइम ऑब्जेक्ट डिटेक्टर है जो 5 FPS से 160 FPS तक की रेंज में सभी ज्ञात ऑब्जेक्ट डिटेक्टर्स को तेज़ी और सटीकता के मामले में पीछे छोड़ देता है। यह 30 FPS या उससे अधिक पर GPU V100 पर सभी ज्ञात रियल-टाइम ऑब्जेक्ट डिटेक्टर्स में सबसे अधिक अक्यूरेट (56.8% AP) है। इसके अलावा, YOLOv7 तेजी और सटीकता के मामले में YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5, और कई अन्य ऑब्जेक्ट डिटेक्टर्स से बेहतर परफॉर्म करता है। यह मॉडल MS COCO डेटासेट पर पहले से ही ट्रेनिंग है और किसी अन्य डेटासेट या पूर्व-ट्रेन हुई वेट्स का प्रयोग नहीं करता है। YOLOv7 के स्रोत कोड [GitHub](https://github.com/WongKinYiu/yolov7) पर उपलब्ध है। + +![SOTA ऑब्जेक्ट डिटेक्टर्स के तुलनात्मक मुक़ाबला](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) +**वैश्विक-से-अलावा ऑब्जेक्ट डिटेक्टर्स का तुलनात्मक मुक़ाबला।** तालिका 2 में प्रदर्शित परिणामों से हमें यह पता चलता है कि प्रस्तावित विधि संपूर्णता से सबसे अच्छा तेजी-अक्यूरेटता संघर्ष लाती है। अगर हम YOLOv7-tiny-SiLU को YOLOv5-N (r6.1) के साथ तुलना करें, तो हमारी विधि 127 fps तेज़ और AP में 10.7% अधिक सटीक है। साथ ही, YOLOv7 के साथ AP 51.4% है जबकि PPYOLOE-L के साथ एक ही AP के साथ 78 fps फ्रेम रेट है। पैरामीटर उपयोग के मामले में, YOLOv7 PPYOLOE-L की तुलना में 41% कम है। YOLOv7-X को 114 fps इन्फरेंस स्पीड के साथ YOLOv5-L (r6.1) के साथ तुलना करें तो, YOLOv7-X AP को 3.9% बढ़ा सकता है। YOLOv7-X को बराबर स्केल के YOLOv5-X (r6.1) के साथ तुलना करें तो, YOLOv7-X की इन्फेरेंस स्पीड 31 fps तेज़ है। साथ ही, पैरामीटर और परिकलन की मात्रा के मामले में, YOLOv7-X पहले योलोव5-X (r6.1) की तुलना में 22% पैरामीटर कम करता है और 8% परिकलन कम करता है, लेकिन AP को 2.2% बढ़ाता है। ([स्रोत](https://arxiv.org/pdf/2207.02696.pdf))। + +## अवलोकन + +रियल-टाइम ऑब्जेक्ट डिटेक्शन कंप्यूटर विजन के कई सिस्टमों में एक महत्वपूर्ण घटक है, जिसमें मल्टी-ऑब्जेक्ट ट्रैकिंग, स्वतंत्र चालन, रोबोटिक्स, और मेडिकल इमेज विश्लेषण शामिल हैं। योग्यता के विकास के लिए हाल के वर्षों में रियल-टाइम ऑब्जेक्ट डिटेक्शन विकास ने विभिन्न सीपीयू, जीपीयू, और न्यूरल प्रोसेसिंग यूनिटों (एनपीयू) के कार्यान्वयन की तेजी बढ़ाने और अधिक अधिक ज्ञानसरचकों के लिए। YOLOv7 ने मोबाइल जीपीयू और जीपीयू डिवाइस, इज़्ज टु द क्लाउड, को समर्थन दिया है। + +पारंपरिक रियल-टाइम ऑब्जेक्ट डिटेक्टर्स की तुलना में जो कि शारीरिक अभिव्यक्ति अनुकूलन पर ध्यान केंद्रित होते हैं, YOLOv7 ट्रेनिंग प्रक्रिया को अनुकरण करने पर ध्यान केंद्रित करने का सुझाव देता है। इसमें मॉड्यूल्स और ऑप्टिमाइज़ेशन मेथड्स शामिल हैं जिनका उपयोग करके ऑब्जेक्ट डिटेक्शन की अक्यूरेसी में सुधार किया जाता है बिना इन्फेरेंस की लागत बढ़ाए, जिसे "ट्रेनबल बैग-ऑफ-फ्रीबीज़" की एक नई कांसेप्ट के आधार पर कहा जाता है। + +## मुख्य विशेषताएँ + +YOLOv7 साथ में कई मुख्य विशेषताएँ लाता है: + +1. **मॉडल फिर-पैरामीटरीकरण**: YOLOv7 एक योजना बनाकर फिर-पैरामीटराइज़्ड मॉडल प्रस्तावित करता है, जो एक कण्टिनुअस संकर्णन पथ की संकल्पना के साथ विभिन्न नेटवर्कों के लिए लागू करने योग्य है। + +2. **डायनामिक लेबल असाइनमेंट**: एकाधिक आउटपुट लेयर्स के साथ मॉडल की ट्रेनिंग करने से एक नया मुद्दा पेश किया जाता है: "अलग-अलग शाखाओं के आउटपुट के लिए डायनामिक निर्धारित लक्ष्य कैसे दें?" इस समस्या को हल करने के लिए, YOLOv7 ने "कॉर्स-टू-फ़ाइन लीड गाइडेड लेबल असाइनमेंट" नामक एक नई लेबल असाइनमेंट विधि पेश की है। + +3. **विस्तारित और कंपाउंड मापन**: YOLOv7 वास्तविक समय ऑब्जेक्ट डिटेक्टर के लिए "विस्तारित" और "कंपाउंड मापन" विधियों को प्रस्तावित करता है जो पैरामीटर और परिकलन का सकारात्मक रूप से उपयोग कर सकती हैं। + +4. **दक्षता**: YOLOv7 द्वारा प्रस्तावित विधि राष्ट्रीय स्तर के वास्तविक समय ऑब्जेक्ट डिटेक्टर के लगभग 40% पैरामीटर और 50% परिकलन को कम कर सकती है, और बेहतर इन्फेरेंस गति और अधिक डिटेक्शन अक्यूरेसी है। + +## उपयोग की उदाहरण + +लेख की अवस्था के समय, Ultralytics वर्तमान में YOLOv7 मॉडेल का समर्थन नहीं करता है। इसलिए, YOLOv7 का उपयोग करना चाहने वाले किसी भी उपयोगकर्ता के लिए योग्यता के लिए कृपया सीधे YOLOv7 GitHub रिपॉज़िटरी के निर्देशिका का संदर्भ लें। + +यहां YOLOv7 का उपयोग करने के लिए आप निम्नलिखित आम कदमों का सारांश देख सकते हैं: + +1. YOLOv7 GitHub रिपॉज़िटरी पर जाएं: [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7)। + +2. स्थापना के लिए README फ़ाइल में दिए हुए निर्देशों का पालन करें। इसमें आमतौर पर रिपॉज़िटरी क्लोनिंग, आवश्यक डिपेंडेंसियों की स्थापना, और आवश्यक पर्यावरण चरों का सेटअप शामिल होता है। + +3. स्थापना सम्पूर्ण होने के बाद, आप मॉडल को ट्रेन कर सकते हैं और विशेषताओं का उपयोग करके ऑब्जेक्ट डिटेक्शन कर सकते हैं, जैसा कि रेपॉज़िटरी में उपयोग के निर्देश दिए गए हैं। यह आमतौर पर अपने डेटासेट को तैयार करना, मॉडल पैरामीटर कॉन्फ़िगर करना, मॉडल को ट्रेन करना, और फिर ट्रेन किए गए मॉडल का उपयोग करके ऑब्जेक्ट डिटेक्शन करना शामिल होता है। + +कृपया ध्यान दें कि निर्दिष्ट कदम आपके विशिष्ट उपयोग मामले और YOLOv7 रिपॉज़िटरी की वर्तमान स्थिति पर निर्भर कर सकते हैं। इसलिए, योग्यता के लिए सीधे YOLOv7 GitHub रिपॉज़िटरी में दिए गए निर्देशों का संदर्भ लेना माज़बूती से अनुशंसित है। + +हम इससे होने वाली किसी भी असुविधा के लिए खेद प्रकट करते हैं और YOLOv7 के समर्थन को Ultralytics में लागू होने पर उदाहरणों के साथ इस दस्तावेज़ को अद्यतित करने का प्रयास करेंगे। + +## संदर्भ और आभार + +हम YOLOv7 लेखकों को यहां उल्लेख करना चाहेंगे, उनके वास्तविक समय ऑब्जेक्ट डिटेक्शन क्षेत्र में महत्वपूर्ण योगदान के लिए: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: ट्रेनबल बैग-ऑफ-फ्रीबीज हासिल करता है वास्तविक समय ऑब्जेक्ट डिटेक्टर्स के लिए नई राष्ट्रीय स्तर को}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +YOLOv7 के मूल लेख को [arXiv](https://arxiv.org/pdf/2207.02696.pdf) पर पाया जा सकता है। लेखकों ने अपना काम सार्वजनिक रूप से उपलब्ध किया है और कोडबेस [GitHub](https://github.com/WongKinYiu/yolov7) पर एक्सेस किया जा सकता है। हम उनके प्रयासों की सराहना करते हैं जो क्षेत्र को आगे बढ़ाने और उसे व्यापक समुदाय के लिए सुलभ बनाने में किए गए हैं। diff --git a/docs/hi/models/yolov8.md b/docs/hi/models/yolov8.md new file mode 100644 index 0000000..44f4628 --- /dev/null +++ b/docs/hi/models/yolov8.md @@ -0,0 +1,162 @@ +--- +comments: true +description: YOLOv8 की रोमांचक विशेषताओं का अन्वेषण करें, हमारे वास्तविक समय वस्तु निर्धारक के नवीनतम संस्करण। देखें कैसे प्रगतिशील शृंखलाओं, पूर्व-प्रशिक्षित मॉडलों और सटीकता और गति के बीच सही संतुलन को YOLOv8 के विकल्प में सटे करते हैं संज्ञानघन वस्तुनिर्धारण कार्यों के लिए YOLOv8 को आपके वस्तु आरोप के लिए सही चुनाव बनाता है। +keywords: YOLOv8, Ultralytics, वास्तविक समय वस्तुनिर्धारक, पूर्व-प्रशिक्षित मॉडल, दस्तावेज़ीकरण, वस्तुवाहीनिर्धारण, YOLO श्रृंखला, प्रगतिशील शृंखलाएं, सटीकता, गति +--- + +# YOLOv8 + +## अवलोकन + +YOLOv8 योलो श्रृंखला का नवीनतम संस्करण है, जो सटीकता और गति के मामले में कटिंग-एज प्रदान करता है। पिछले YOLO संस्करणों की प्रगति को अवधारणा करते हुए, YOLOv8 उन्नत सुविधाओं और अनुकूलन को प्रस्तुत करता है, जो इसे विभिन्न वस्तुनिर्धारण कार्यों के लिए एक आदर्श चुनाव बनाता है विभिन्न अनुप्रयोगों में। + +![Ultralytics YOLOv8](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## मुख्य विशेषताएं + +- **उन्नत पीठ और गर्दन शृंखलाएं:** YOLOv8 उन्नत पीठ और गर्दन शृंखलाएं प्रयोग करता है, जिससे विशेषता निष्कर्षण और वस्तु निर्धारण क्षमता की सुधार होती है। +- **एंकर-मुक्त स्प्लिट Ultralytics हैड:** YOLOv8 एंकर-आधारित दृष्टिकोणों की तुलना में अधिक सटीकता और एक अधिक संचालनयोग्य निर्धारण प्रक्रिया के लिए एक एंकर-मुक्त स्प्लिट Ultralytics हेड अपनाता है। +- **सुधारित सटीकता-गति का संतुलन:** सटीकता और गति के मध्य में उचित संतुलन बनाए रखने के ध्यान के साथ, YOLOv8 वास्तविक समय वस्तुनिर्धारण कार्यों के लिए उपयुक्त है जो विभिन्न अनुप्रयोग क्षेत्रों में हो सकते हैं। +- **विभिन्न पूर्व-प्रशिक्षित मॉडल:** YOLOv8 विभिन्न कार्यों और प्रदर्शन आवश्यकताओं के लिए एक विस्तृत पूर्व-प्रशिक्षित मॉडल रेंज प्रदान करता है, इससे अपने विशेषता उपयोग के लिए सही मॉडल खोजना आसान हो जाता है। + +## समर्थित कार्य और मोड + +YOLOv8 श्रृंखला वास्तविक समय वस्तुनिर्धारण के लिए विशेषकृत कई मॉडल प्रदान करती है। ये मॉडल विभिन्न आवश्यकताओं को पूरा करने के लिए डिजाइन किए गए हैं, वैश्विक स्तर पहुंचने से लेकर इंस्टेंस सेगमेंटेशन, पोज/किंतुमांक निर्धारण और श्रेणीकरण जैसे जटिल कार्यों तक। + +Yएक मॉडल के हर मानक, विशिष्ट कार्यों में अपनी विशेषताओं को ध्यान में रखते हुए, उच्च प्रदर्शन और सटीकता सुनिश्चित किए जाते हैं। इसके अलावा, ये मॉडल विभिन्न संचालन मोड के साथ अनुकूलित हैं जैसे [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), और [Export](../modes/export.md), जो उनका उपयोग वितरण और विकास के विभिन्न स्तरों में सरल बनाने में मदद करता है। + +| मॉडल | फ़ाइलनेम | कार्य | Inference | Validation | Training | Export | +|-------------|----------------------------------------------------------------------------------------------------------------|--------------------------------------------|-----------|------------|----------|--------| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [वस्तुनिर्धारण](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [इंस्टेंस सेगमेंटेशन](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [पोज/किंतुमांक](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [श्रेणीबद्दीकरण](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +इस सारणी में YOLOv8 मॉडल विभिन्न कार्यों के लिए उपयुक्तता और विभिन्न संचालन मोड के साथ मॉडल के विभिन्न रूपों का अवलोकन प्रदान करती है। यह YOLOv8 श्रृंखला की व्याप्ति और मजबूती का प्रदर्शन करती है, जो कंप्यूटर दृष्टि में विभिन्न अनुप्रयोगों के लिए उपयुक्त बनाती है। + +## प्रदर्शन की मापदंड + +!!! Note "प्रदर्शन" + + === "वस्तुनिर्धारण (COCO)" + + [वस्तुनिर्धारण दस्तावेज़ीकरण](https://docs.ultralytics.com/tasks/detect/) पर उपयोग उदाहरण देखें जहां COCO ट्रेन किए गए [80 पूर्व-प्रशिक्षित वर्गों](https://docs.ultralytics.com/datasets/detect/coco/) के साथ ये मॉडल दिए गए हैं। + + | मॉडल | आकार
(पिक्स) | mAPवैल
50-95 | गति
CPU ONNX
(ms) | गति
A100 TensorRT
(ms) | params
(एम) | FLOPs
(बी) | + | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + + === "वस्तुनिर्धारण (Open Images V7)" + + [वस्तुनिर्धारण दस्तावेज़ीकरण](https://docs.ultralytics.com/tasks/detect/) पर उपयोग उदाहरण देखें जहां इन मॉडलों को [Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/) पर ट्रेन किया गया है, जिसमें 600 पूर्व-प्रशिक्षित वर्ग हैं। + + | मॉडल | आकार
(पिक्स) | mAPवैल
50-95 | गति
CPU ONNX
(ms) | गति
A100 TensorRT
(ms) | params
(एम) | FLOPs
(बी) | + | ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | + + === "सेगमेंटेशन (COCO)" + + [सेगमेंटेशन दस्तावेज़ीकरण](https://docs.ultralytics.com/tasks/segment/) पर उपयोग उदाहरण देखें जहां इन मॉडलों को [COCO](https://docs.ultralytics.com/datasets/segment/coco/) पर ट्रेन किया गया है, जिसमें 80 पूर्व-प्रशिक्षित वर्ग हैं। + + | मॉडल | आकार
(पिक्स) | mAPबॉक्स
50-95 | mAPमास्क
50-95 | गति
CPU ONNX
(ms) | गति
A100 TensorRT
(ms) | params
(एम) | FLOPs
(बी) | + | -------------------------------------------------------------------------------------------- | --------------------- | --------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + + === "श्रेणीकरण (ImageNet)" + + [श्रेणीकरण दस्तावेज़ीकरण](https://docs.ultralytics.com/tasks/classify/) पर उपयोग उदाहरण देखें जहां इन मॉडलों को [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) पर ट्रेन किया गया है, जिसमें 1000 पूर्व-प्रशिक्षित वर्ग हैं। + + | मॉडल | आकार
(पिक्स) | शीर्ष1 विजयी
योग्यता | शीर्ष5 विजयी
योग्यता | गति
CPU ONNX
(ms) | गति
A100 TensorRT
(ms) | params
(एम) | FLOPs
(बी) at 640 | + | ------------------------------------------------------------------------------------------ | --------------------- | ------------------------ | ------------------------ | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + + === "पोज (COCO)" + + [पोज निर्धारण दस्तावेज़ीकरण](https://docs.ultralytics.com/tasks/pose/) पर उपयोग उदाहरण देखें जहां इन मॉडलों को [COCO](https://docs.ultralytics.com/datasets/pose/coco/) पर ट्रेन किया गया है, जिसमें 1 पूर्व-प्रशिक्षित वर्ग, 'person' शामिल है। + + | मॉडल | आकार
(पिक्स) | mAPशामिती
50-95 | mAPशामिती
50 | गति
CPU ONNX
(ms) | गति
A100 TensorRT
(ms) | params
(एम) | FLOPs
(बी) | + | ----------------------------------------------------------------------------------------------------- | --------------------- | ------------------------ | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +## उपयोग की उदाहरण + +यह उदाहरण सरल YOLOv8 प्रशिक्षण और निर्धारण उदाहरण प्रदान करता है। इन और अन्य [मोड](../modes/index.md) की पूरी दस्तावेज़ीकरण के लिए दस्तावेज़ पृष्ठों [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) और [Export](../modes/export.md) का उपयोग करें। + +इसे ध्यान दें कि नीचे दिए गए उदाहरण योलोवी [वस्तुनिर्धारण](../tasks/detect.md) मॉडल के लिए हैं। अतिरिक्त समर्थित कार्यों के लिए [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) और [Pose](../tasks/pose.md) दस्तावेज़ीकरण देखें। + +!!! Example "उदाहरण" + + === "पायथन" + + पायटोर्च का पूर्व-प्रशिक्षित `*.pt` मॉडल और विन्यास `*.yaml` फ़ाइल पायटन में एक मॉडल नमूना बनाने के लिए `YOLO()` कक्षा को पारित किया जा सकता है: + + ```python + from ultralytics import YOLO + + # कोहली के COCO-pretrained YOLOv8n मॉडल को लोड करें + model = YOLO('yolov8n.pt') + + # मॉडल जानकारी दिखाएँ (वैकल्पिक) + model.info() + + # COCO8 उदाहरण डेटासेट पर 100 एपोक के लिए मॉडल को प्रशिक्षित करें + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 'bus.jpg' छवि पर YOLOv8n मॉडल के साथ निर्धारण चलाएँ + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI कमांड को सीधे चलाने के लिए उपलब्ध हैं: + + ```bash + # COCO-pretrained YOLOv8n मॉडल को लोड करें और उसे COCO8 उदाहरण डेटासेट पर 100 एपोक के लिए प्रशिक्षित करें + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCO-pretrained YOLOv8n मॉडल को लोड करें और 'bus.jpg' छवि पर निर्धारण चलाएँ + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## सन्दर्भ और पुरस्कार + +यदि आप अपने काम में YOLOv8 मॉडल या इस रिपॉजिटरी के किसी अन्य सॉफ़्टवेयर का उपयोग करते हैं, तो कृपया इसकी उद्धरण इस प्रकार करें: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {ग्लेन जोचर and आयुष चौरसिया and जिंग क्यू}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + +कृपया ध्यान दें कि DOI लंबित है और जब यह उपलब्ध हो जाएगा तो उद्धरण में इसे शामिल किया जाएगा। YOLOv8 मॉडल [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) और [एंटरप्राइज](https://ultralytics.com/license) लाइसेंस के तहत उपलब्ध हैं। diff --git a/docs/hi/modes/benchmark.md b/docs/hi/modes/benchmark.md new file mode 100644 index 0000000..9187029 --- /dev/null +++ b/docs/hi/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: true +description: Ultralytics YOLO के विभिन्न निर्यात प्रारूपों के जरिए YOLOv8 की गति और सटीकता का जांच करें; mAP50-95, accuracy_top5 माप, और अन्य मापों पर अनुभव प्राप्त करें। +keywords: Ultralytics, YOLOv8, बंचमार्किंग, गति प्रोफाइलिंग, सटीकता प्रोफाइलिंग, mAP50-95, accuracy_top5, ONNX, OpenVINO, TensorRT, YOLO निर्यात प्रारूप +--- + +# उल्ट्राल्याटिक्स YOLO के साथ मॉडल बंचमार्किंग + +उल्ट्राल्याटिक्स YOLO पारिस्थितिकी और समावेश + +## परिचय + +जब आपका मॉडल प्रशिक्षित और सत्यापित हो जाता है, तो आगामी तार्किक चरण होता है कि तत्कालिक वास्तविक-दुनिया की स्थितियों में इसके प्रदर्शन का मूल्यांकन करें। Ultralytics YOLOv8 में बेंचमार्क मोड इस उद्देश्य की सेवा करता है, जहां उपयोगकर्ताओं को अपने मॉडल की गति और सटीकता का मूल्यांकन करने के लिए एक मजबूत ढांचा प्रदान करता है। + +## बंचमार्किंग क्यों महत्वपूर्ण है? + +- **जागरूक निर्णय:** गति और सटीकता के बीच ट्रेड-ऑफ के बारे में जानकारी प्राप्त करें। +- **संसाधन आवंटन:** अलग-अलग निर्यात प्रारूपों का विभिन्न हार्डवेयर पर कैसा काम करता है इसकी समझ पाएं। +- **अनुकूलन:** अपने विशिष्ट उपयोग मामले में सर्वोत्तम प्रदर्शन प्रदान करने वाला निर्यात प्रारूप कौन सा है, इसकी जानकारी प्राप्त करें। +- **लागत संचय:** बंचमार्क परिणामों के आधार पर हार्डवेयर संसाधनों का अधिक अभिकल्प सेवन करें। + +### बंचमार्क मोड में मुख्य माप + +- **mAP50-95:** वस्तु का पता लगाने, विभाजन करने और स्थिति मान के लिए। +- **accuracy_top5:** छवि वर्गीकरण के लिए। +- **परिन्दता समय:** प्रति छवि के लिए लिया गया समय मिलीसेकंड में। + +### समर्थित निर्यात प्रारूप + +- **ONNX:** CPU प्रदर्शन के लिए आदर्श +- **TensorRT:** अधिकतम GPU क्षमता के लिए +- **OpenVINO:** Intel हार्डवेयर संशोधन के लिए +- **CoreML, TensorFlow SavedModel, और अधिक:** विविध डिप्लॉयमेंट आवश्यकताओं के लिए। + +!!! Tip "युक्ति" + + * तकनीकी कारणों से कंप्यूटिंग संसाधनों का उपयोग करते समय ONNX या OpenVINO में निर्यात करें, ताकि आप CPU स्पीड तक upto 3x तक स्पीडअप कर सकें। + * GPU स्पीड तक अपने कंप्यूटिंग संसाधनों का उपयोग करते समय TensorRT में निर्यात करें ताकि आप तक 5x तक स्पीडअप कर सकें। + +## उपयोग उदाहरण + +समर्थित सभी निर्यात प्रारूपों पर ONNX, TensorRT आदि के साथ YOLOv8n बंचमार्क चलाएं। पूरी निर्यात विवरण के लिए नीचे Arguments अनुभाग देखें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # GPU पर बंचमार्क + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo बंचमार्क model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## Arguments + +`model`, `data`, `imgsz`, `half`, `device`, और `verbose` जैसे तर्क उपयोगकर्ताओं को मानदंडों को अपनी विशेष आवश्यकताओं के लिए सुगमता के साथ बंचमार्क को संशोधित करने की सुविधा प्रदान करते हैं, और विभिन्न निर्यात प्रारूपों के प्रदर्शन की तुलना करने की सुविधा प्रदान करते हैं। + +| कुंजी | मान | विवरण | +|-----------|------------|---------------------------------------------------------------------------------| +| `model` | `कोई नहीं` | मॉडल फ़ाइल का पथ, यानी yolov8n.pt, yolov8n.yaml | +| `data` | `कोई नहीं` | बेंचमार्किंग डेटासेट को संदर्भित करने वाले YAML फ़ाइल का पथ (val लेबल के तहत) | +| `imgsz` | `640` | छवि का आकार स्कैलर या (h, w) सूची, अर्थात (640, 480) | +| `half` | `असत्य` | FP16 माप्यांकन | +| `int8` | `असत्य` | INT8 माप्यांकन | +| `device` | `कोई नहीं` | चलाने के लिए युक्ति उपकरण, अर्थात cuda device=0 या device=0,1,2,3 या device=cpu | +| `verbose` | `असत्य` | त्रुटि में न जारी रखे (बूल), या वाल (फ्लोट) | + +## निर्यात प्रारूप + +बंचमार्क प्रयास होगा निम्नलिखित सभी संभावित निर्यात प्रारूपों पर स्वचालित रूप से चलाने की कोशिश करेगा। + +| प्रारूप | `प्रारूप` तर्क | मॉडल | मेटाडेटा | तर्क | +|--------------------------------------------------------------------|----------------|---------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +पूर्ण निर्यात विवरण देखें निर्यात पृष्ठ में [Export](https://docs.ultralytics.com/modes/export/)। diff --git a/docs/hi/modes/export.md b/docs/hi/modes/export.md new file mode 100644 index 0000000..646b439 --- /dev/null +++ b/docs/hi/modes/export.md @@ -0,0 +1,108 @@ +--- +comments: true +description: सभी प्रकार के निर्यात स्तर पर YOLOv8 मॉडल्स को निर्यात करने के लिए आपके लिए चरण-दर-चरण मार्गदर्शिका। अब निर्यात की जांच करें! +keywords: YOLO, YOLOv8, Ultralytics, मॉडल निर्यात, ONNX, TensorRT, CoreML, TensorFlow SavedModel, OpenVINO, PyTorch, निर्यात मॉडल +--- + +# Ultralytics YOLO के साथ मॉडल निर्यात + +यूल्ट्रालिक्स YOLO ecosystem and integrations + +## परिचय + +एक मॉडल की प्रशिक्षण की अंतिम लक्ष्य उसे वास्तविक दुनिया के आवेदनों के लिए तैनात करना होता है। उल्ट्रालिटीक्स YOLOv8 में निर्यात मोड में आपको अभिनवता रेंज के ऑप्शन प्रदान करता है, वायरले किए गए मॉडल को विभिन्न स्वरूपों में निर्यात करने के लिए, जिससे वे विभिन्न प्लेटफॉर्मों और उपकरणों पर प्रदर्शित किए जा सकें। यह व्यापक मार्गदर्शिका अधिकतम संगतता और प्रदर्शन प्राप्त करने के तरीकों को दिखाने का लक्ष्य रखती है। + +

+
+ +
+ देखें: अपने उत्पादन को निर्यात करने के लिए कस्टम प्रशिक्षित Ultralytics YOLOv8 मॉडल निर्यात करने और वेबकैम पर लाइव अनुमान चलाने। +

+ +## YOLOv8 के निर्यात मोड को क्यों चुनें? + +- **विविधता:** ONNX, TensorRT, CoreML और अन्य सहित कई फॉर्मेट में निर्यात करें। +- **प्रदर्शन:** TensorRT में 5x जीपीयू स्पीडअप और ONNX या OpenVINO में 3x सीपीयू स्पीडअप प्राप्त करें। +- **संगतता:** अपने मॉडल को कई हार्डवेयर और सॉफ़्टवेयर पर संगठित करें। +- **उपयोग की सुविधा:** त्वरित और सीधी मॉडल निर्यात के लिए सरल CLI और Python API। + +### निर्यात मोड की प्रमुख विशेषताएं + +यहाँ कुछ मुख्य विशेषताएँ हैं: + +- **एक-क्लिक निर्यात:** अलग-अलग फॉर्मेट में निर्यात करने के लिए सरल कमांड। +- **बैच निर्यात:** बैच-इन्फरेंस क्षमता वाले मॉडलों को निर्यात करें। +- **सुधारित अनुमान:** निर्यात किए गए मॉडल अनुमान समय के लिए अनुकूलन किए जाते हैं। +- **ट्यूटोरियल वीडियो:** सुविधाएं और ट्यूटोरियल सुनिश्चित करने के लिए गहन मार्गदर्शिकाओं का उपयोग करें। + +!!! Tip "सुझाव" + + * 3x सीपीयू स्पीडअप के लिए ONNX या OpenVINO में निर्यात करें। + * 5x जीपीयू स्पीडअप के लिए TensorRT में निर्यात करें। + +## उपयोग उदाहरण + +YOLOv8n मॉडल को ONNX या TensorRT जैसे अलग फॉर्मेट में निर्यात करें। पूरी सूची निर्यात तर्कों के लिए नीचे दिए गए Arguments खंड को देखें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('yolov8n.pt') # एक आधिकारिक मॉडल लोड करें + model = YOLO('path/to/best.pt') # एक कस्टम प्रशिक्षित मॉडल लोड करें + + # मॉडल निर्यात करें + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # आधिकारिक मॉडल का निर्यात करें + yolo export model=path/to/best.pt format=onnx # कस्टम प्रशिक्षित मॉडल का निर्यात करें + ``` + +## Arguments + +YOLO मॉडलों के निर्यात सेटिंग्स निर्यात के विभिन्न विन्यास और विकल्पों के बारे में होते हैं, जिन्हें यूज़ करके मॉडल को अन्य पर्यावरण या प्लेटफ़ॉर्म में सहेजने या निर्यात करने के लिए उपयोग किया जा सकता है। इन सेटिंग्स से मॉडल के प्रदर्शन, आकार और विभिन्न सिस्टम के साथ संगतता प्रभावित हो सकती हैं। कुछ सामान्य YOLO निर्यात सेटिंग्स में निर्यात की गई मॉडल फ़ाइल का स्वरूप (जैसे ONNX, TensorFlow SavedModel), मॉडल कोरी सहवास में चलाने वाली उपकरण (जैसे CPU, GPU) और मास्क या प्रत्येक बॉक्स पर कई लेबलों की उपस्थिति जैसे अतिरिक्त विशेषताएँ शामिल हो सकते हैं। निर्यात प्रक्रिया प्रभावित करने वाले अन्य कारकों में मॉडल द्वारा उपयोग के लिए एक विशेष कार्य और लक्षित पर्यावरण या प्लेटफ़ॉर्म की आवश्यकताओं या सीमाओं का ध्यान देना महत्वपूर्ण है। लक्ष्य प्रयोजन और लक्ष्यित वातावरण में प्रभावी ढंग से उपयोग होने के लिए इन सेटिंग्स को ध्यान से विचार करना महत्वपूर्ण है। + +| कुंजी | मान | विवरण | +|-------------|-----------------|------------------------------------------------------------------------| +| `format` | `'torchscript'` | योग्यता के लिए निर्यात करने के लिए स्वरूप | +| `imgsz` | `640` | एकल रूप में छवि का आकार या (h, w) सूची, जैसे (640, 480) | +| `keras` | `False` | TF SavedModel निर्यात के लिए केरस का प्रयोग करें | +| `optimize` | `False` | TorchScript: मोबाइल के लिए ऑप्टिमाइज़ करें | +| `half` | `False` | FP16 संगणना | +| `int8` | `False` | INT8 संगणना | +| `dynamic` | `False` | ONNX/TensorRT: गतिशील ध्यान दिलाने वाले ध्यान | +| `simplify` | `False` | ONNX/TensorRT: मॉडल को सरल बनाएं | +| `opset` | `None` | ONNX: ऑपसेट संस्करण (वैकल्पिक, डिफ़ॉल्ट्स को नवीनतम के रूप में छोड़ें) | +| `workspace` | `4` | TensorRT: कार्यक्षेत्र आकार (GB) | +| `nms` | `False` | CoreML: NMS जोड़ें | + +## निर्यात स्वरूप + +नीचे दिए गए तालिका में YOLOv8 निर्यात स्वरूप दिए गए हैं। आप किसी भी स्वरूप में निर्यात कर सकते हैं, जैसे `format='onnx'` या `format='engine'`। + +| स्वरूप | `format` तर्क | मॉडल | मेटाडाटा | तर्क | +|--------------------------------------------------------------------|---------------|---------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/hi/modes/index.md b/docs/hi/modes/index.md new file mode 100644 index 0000000..9e49886 --- /dev/null +++ b/docs/hi/modes/index.md @@ -0,0 +1,78 @@ +--- +comments: true +description: प्रशिक्षण से ट्रैकिंग तक, Ultralytics के साथ YOLOv8 का अधिकतम लाभ उठाएं। मान्यता प्राप्त मोड, जैसे पुष्टीकरण, निर्यात और बेंचमार्किंग, के लिए अवधारणाओं और उदाहरण प्राप्त करें। +keywords: Ultralytics, YOLOv8, मशीन लर्निंग, ऑब्जेक्ट डिटेक्शन, प्रशिक्षण, पुष्टीकरण, पूर्वावलोकन, निर्यात, ट्रैकिंग, बेंचमार्किंग +--- + +# Ultralytics YOLOv8 मोड + +Ultralytics YOLO ecosystem and integrations + +## परिचय + +Ultralytics YOLOv8 सिर्फ एक ओब्जेक्ट डिटेक्शन मॉडल नहीं है; यह मशीन लर्निंग मॉडलों के पूर्ण जीवन चक्र के लिए एक विकशील फ्रेमवर्क है—डेटा संग्रह और मॉडल प्रशिक्षण से पुष्टीकरण, डिप्लॉयमेंट और वास्तविक दुनिया के ट्रैकिंग तक। प्रत्येक मोड का एक विशेष उद्देश्य होता है और आपको विभिन्न कार्यों और यूज-केस के लिए आवश्यक लचीलापन और कार्यक्षमता प्रदान करने के लिए बनाया जाता है। + +!!! Note "नोट" + + 🚧 हमारी बहुभाषीय दस्तावेज़ीकरण वर्तमान में निर्माणाधीन है, और हम इसे सुधारने के लिए कड़ी मेहनत कर रहे हैं। आपकी सहनशीलता के लिए धन्यवाद! 🙏 + +

+
+ +
+ देखें: Ultralytics मोड ट्यूटोरियल: प्रशिक्षण, पुष्टीकरण, पूर्वावलोकन, निर्यात और बेंचमार्किंग। +

+ +### एक पल मोडें + +Ultralytics YOLOv8 के समर्थित **मोड** को समझना आपके मॉडल का अधिकतम उपयोग करने के लिए महत्वपूर्ण है: + +- **प्रशिक्षण** मोड: अपने मॉडल को कस्टम या पूर्व-भर्ती डेटासेट में संशोधित करें। +- **पुष्टीकरण** मोड: मॉडल प्रदर्शन को मान्यता प्राप्त करने के लिए प्रशिक्षण के बाद के चेकप्वाइंट का उपयोग करें। +- **पूर्वावलोकन** मोड: नए छवियों या वीडियो का उपयोग करके प्रशिक्षित YOLOv8 मॉडल के माध्यम से पूर्वानुमान करें। +- **निर्यात** मोड: डिप्लॉयमेंट के लिए मॉडल को विभिन्न प्रारूपों में तैयार करें। +- **ट्रैक** मोड: रीयल-टाइम ट्रैकिंग अनुप्रयोगों में योजित आइटम डिटेक्शन मॉडल का विस्तार करें। +- **बेंचमार्क** मोड: विविध डिप्लॉयमेंट वातावरणों में मॉडल की गति और सटीकता का विश्लेषण करें। + +यह सामग्री आपको प्रत्येक मोड का अवलोकन और व्यावहारिक अंदाज़ देने का उद्देश्य रखती है, जिससे आप YOLOv8 की पूरी क्षमता का उपयोग कर सकें। + +## [प्रशिक्षण](train.md) + +प्रशिक्षण मोड का उपयोग कस्टम डेटासेट पर YOLOv8 मॉडल के प्रशिक्षण के लिए किया जाता है। इस मोड में, मॉडल को निर्दिष्ट डेटासेट और हाइपरपैरामीटर का उपयोग करके प्रशिक्षित किया जाता है। प्रशिक्षण प्रक्रिया में, मॉडल के पैरामीटरों को अनुकूलित किया जाता है ताकि यह छवियों में ऑब्जेक्टों की कक्षाओं और स्थानों का सटीक पूर्वानुमान कर सके। + +[प्रशिक्षण उदाहरण](train.md){ .md-button } + +## [पुष्टीकरण](val.md) + +पुष्टीकरण मोड का उपयोग YOLOv8 मॉडल के प्रशिक्षण के बाद मॉडल की मान्यता और सामान्यीकरण प्रदर्शन को मापने के लिए किया जाता है। इस मोड में, मॉडल को एक प्रमाणीकरण सेट पर मूल्यांकन किया जाता है ताकि उसकी सटीकता और सामान्यीकरण प्रदर्शन को मापा जा सके। इस मोड का उपयोग मॉडल के प्रदर्शन को सुधारने के लिए मॉडल के हाइपरपैरामीटरों को ट्यून करने के लिए किया जा सकता है। + +[पुष्टीकरण उदाहरण](val.md){ .md-button } + +## [पूर्वानुमान](predict.md) + +पूर्वानुमान मोड का उपयोग नई छवियों या वीडियो पर प्रशिक्षित YOLOv8 मॉडल का उपयोग करके पूर्वानुमान बनाने के लिए किया जाता है। इस मोड में, मॉडल एक चेकप्वाइंट फ़ाइल से लोड किया जाता है, और उपयोगकर्ता छवियों या वीडियों को उपयोग करके इन्फेरेंस कर सकता है। मॉडल उपयोगकर्ता को इनपुट छवियों या वीडियों में ऑब्जेक्टों की कक्षाओं और स्थानों का पूर्वानुमान करता है। + +[पूर्वानुमान उदाहरण](predict.md){ .md-button } + +## [निर्यात](export.md) + +निर्यात मोड का उपयोग एक YOLOv8 मॉडल को इस्तेमाल करने के लिए एक प्रारूप में करने के लिए किया जाता है जो कि अन्य सॉफ़्टवेयर अनुप्रयोगों या हार्डवेयर उपकरणों द्वारा इस्तेमाल किया जा सकता है। यह मोडल को उत्पादन उद्योगों में डिप्लॉय करने के लिए उपयोगी होता है। + +[निर्यात उदाहरण](export.md){ .md-button } + +## [ट्रैक](track.md) + +ट्रैक मोड का उपयोग एक YOLOv8 मॉडल का उपयोग करके वास्तविक समय में वस्तुओं का ट्रैकिंग करने के लिए किया जाता है। इस मोड में, मॉडल एक चेकप्वाइंट फ़ाइल से लोड किया जाता है, और उपयोगकर्ता एक लाइव वीडियो स्ट्रीम प्रदान कर सकता है ताकि वास्तविक समय में वस्तुओं का ट्रैकिंग किया जा सके। यह मोड सतर्कता प्रणालियों या स्वयं चालित कार जैसे अनुप्रयोगों के लिए उपयोगी होता है। + +[ट्रैक उदाहरण](track.md){ .md-button } + +## [बेंचमार्क](benchmark.md) + +बेंचमार्क मोड का उपयोग YOLOv8 के विभिन्न निर्यात प्रारूपों की गति और सटीकता का प्रोफ़ाइल बनाने के लिए किया जाता है। बेंचमार्क से प्राप्त जानकारी निर्यात प्रारूप के आकार, उसकी `mAP50-95` metric (ऑब्जेक्ट डिटेक्शन, सेगमेंटेशन और पोज़ के लिए) +या `accuracy_top5` metric (वर्गीकरण के लिए), और चित्र माध्यमिक समय के मिलीसेकंड प्रति इमेज के अलग-अलग निर्यात प्रारूपों की जानकारी प्रदान करता है। यह जानकारी उपयोगकर्ताओं को उनकी विशेष उपयोग के मामले में उनकी खासियतों के लिए मिति और सटीकता के लिए सर्वोत्तम निर्यात प्रारूप का चयन करने में मदद कर सकती है। + +[बेंचमार्क उदाहरण](benchmark.md){ .md-button } diff --git a/docs/hi/modes/predict.md b/docs/hi/modes/predict.md new file mode 100644 index 0000000..86c3a04 --- /dev/null +++ b/docs/hi/modes/predict.md @@ -0,0 +1,226 @@ +--- +comments: true +description: योलोवी 8 के अल्ट्रालायटिक्स पूर्वानुमान मोड का उपयोग करना सीखें और विभिन्न कार्यों के लिए विभिन्न पूर्वानुमान स्रोतों के बारे में जानें। इमेजेस, वीडियोज़ और डेटा प्रारूपों जैसे पूर्वानुमान स्रोतों के बारे में जानें। +keywords: Ultralytics, YOLOv8, पूर्वानुमान मोड, पूर्वानुमान स्रोत, पूर्वानुमान कार्य, धारणा योजना, छवि प्रसंस्करण, वीडियो प्रसंस्करण, मशीन लर्निंग, एआई +--- + +# अल्ट्रालायटिक्स YOLO मॉडल पूर्वानुमान + +अल्ट्रालायटिक्स YOLO संघटना और एकीकरण + +## परिचय + +मशीन लर्निंग और कंप्यूटर विजन की दुनिया में दृश्यांश से समझने की प्रक्रिया को 'पूर्वानुमान' या 'पूर्वानुमान' कहा जाता है। अल्ट्रालायटिक्स YOLOv8 एक शक्तिशाली विशेषता प्रदान करता है जिसे **पूर्वानुमान मोड** कहा जाता है, जो व्यापक डेटा स्रोतों पर उच्च प्रदर्शन, वास्तुकालिक पूर्वानुमान के लिए विशेष रूप से तैयार किया गया है। + +

+
+ +
+ देखें: अल्ट्रालायटिक्स YOLOv8 मॉडल से आउटपुट निकालने का तरीका कस्टम परियोजनाओं के लिए। +

+ +## वास्तविक जगत में अनुप्रयोग + +| विनिर्माण | खेल संघ | सुरक्षा | +|:-------------------------------------------:|:--------------------------------------------------:|:---------------------------------------------:| +| ![वाहन के पुर्जे डिटेक्शन][car spare parts] | ![फुटबॉल खिलाड़ी डिटेक्शन][football player detect] | ![लोगों का गिरना डिटेक्शन][human fall detect] | +| वाहन के पुर्जे डिटेक्शन | फुटबॉल खिलाड़ी डिटेक्शन | लोगों का गिरना | + +## पूर्वानुमान के लिए अल्ट्रालायटिक्स YOLO का उपयोग क्यों करें? + +यहां आपको योलोवी 8 के पूर्वानुमान मोड का उपयोग अपने विभिन्न पूर्वानुमान की आवश्यकताओं के लिए करना चाहिए का कारण है: + +- **बहुमुखीपन:** छवियों, वीडियोज और यह तक कि लाइव स्ट्रीम की पूर्वानुमान पर योग्य हैं। +- **प्रदर्शन:** मुख्यतः बिना सटीकता पर बलवर्धित, रियल-टाइम, उच्च गति प्रसंस्करण के लिए engineering किए गए हैं। +- **उपयोग सहज:** खद्य पाइथन और यथार्थता (CLI) इंटरफ़ेसों को जल्दी विपणन और परीक्षण के लिए। +- **ऊच्चतम अनुकूलनयोग्यता:** अपनी विशिष्ट आवश्यकताओं के अनुसार मॉडल के पूर्वानुमान कृति को निर्धारित करने के लिए विभिन्न सेटिंग और पैरामीटर। + +### पूर्वानुमान मोड की प्रमुख सुविधाएँ + +YOLOv8 का पूर्वानुमान मोड मजबूत और विशेषता प्राप्त करने के लिए डिज़ाइन किया गया है, जिसमें शामिल हैं: + +- **यदि आपके डेटा के कई स्रोतों के पंजीकरण:** चाहे आपका डेटा व्यक्तिगत छवियों, छोटू माला छवियों, वीडियो फ़ाइलों या वास्तविक समय वीडियो स्ट्रीमों की रूप में हो, पूर्वानुमान मोड आपके लिए उपयुक्त है। +- **स्ट्रीमिंग मोड:** `स्ट्रीमिंग` सुविधाका उपयोग करें और `पूर्वानुमान की कॉल विधि` में `स्ट्रीम = ट्रू` सेट करके `रिजल्ट्स` ऑब्जेक्ट के एक मेमोरी-पर्याप्त जेनरेटर का उत्पादन करें। +- **बैच प्रोसेसिंग:** एक ही बैच में कई छवियों या वीडियो फ़्रेम्स की प्रोसेसिंग करने की क्षमता, पूर्वानुमान समय को और तेज़ करती है। +- **इंटीग्रेशन फ्रेंडली:** लचीली API के कारण मौजूदा डेटा पाईपलाइन और अन्य सॉफ़्टवेयर घटकों के साथ आसानी से इंटीग्रेट करें। + +जब पूर्वानुमान के दौरान मॉडल को `गेनरेटर की `रूप में लोड किया जाता है, तो अल्ट्रालायटिक्स YOLO मॉडल निम्नलिखित मेथड से `रिजल्ट` ऑब्जेक्ट के एक पायथन सूची या यादृच्छिक संख्यकारी जनरेटर लौटाते हैं: + +!!! Example "पूर्वानुमान" + + === "`स्ट्रीम = फाल्स` के साथ सूची यादृच्छिक" + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('yolov8n.pt') # पूर्व-प्रशिक्षित YOLOv8n मॉडल + + # सूची के लिए बैच्ड पूर्वानुमान चलाएं + results = model(['im1.jpg', 'im2.jpg']) # रिजल्ट्स ऑब्जेक्ट की सूची लौटाएँ + + # परिणाम सूची को प्रोसेस करें + for result in results: + boxes = result.boxes # बॉक्स के लिए बॉक्स ऑब्जेक्ट + masks = result.masks # सेगमेंटेशन मोड के लिए मास्क्स ऑब्जेक्ट + keypoints = result.keypoints # पोज़ के लिए कीपॉइंट्स ऑब्जेक्ट + probs = result.probs # वर्गीकरण के लिए प्रोब्स ऑब्जेक्ट + ``` + + === "`स्ट्रीम = ट्रू के साथ जेनरेटर` की प्राथमिकता" + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('yolov8n.pt') # पूर्व-प्रशिक्षित YOLOv8n मॉडल + + # सूची के लिए बैच्ड पूर्वानुमान चलाएं + results = model(['im1.jpg', 'im2.jpg'], stream=True) # रिजल्ट्स ऑब्जेक्ट का जनरेटर लौटाएँ + + # रिजल्ट्स जनरेटर को प्रोसेस करें + for result in results: + boxes = result.boxes # बॉक्स के लिए बॉक्स ऑब्जेक्ट + masks = result.masks # सेगमेंटेशन मास्क्स के लिए मास्क्स ऑब्जेक्ट + keypoints = result.keypoints # पोज़ के लिए कीपॉइंट्स ऑब्जेक्ट + probs = result.probs # वर्गीकरण के लिए प्रोब्स ऑब्जेक्ट + ``` + +## पूर्वानुमान स्रोत + +YOLOv8 पूर्वानुमान के लिए विभिन्न प्रकार के इनपुट स्रोतों को process कर सकता है, जैसा कि नीचे दिए गए तालिका में दिखाया गया है। स्रोतों में स्थिर छवियाँ, वीडियो स्ट्रीम्स, और विभिन्न डेटा प्रारूपों को विकास के साथ उपयोग किया जा सकता है। यह तालिका भी इंगित करती है कि क्या प्रत्येक स्रोत को स्ट्रीमिंग मोड में `द्वारा इस्तेमाल किया जा सकता है।' यहां स्ट्रीमिंग मोड का उपयोग वीडियो या लाइव स्ट्रीम को प्रोसेस करने के लिए उपयोगी है क्योंकि इसमें सभी फ्रेम्स को मेमोरी में लोड किए बिना एक रिजल्ट की generator बनाई जाती है। + +!!! Tip "सुझाव" + + `स्ट्रीम = ट्रू` का उपयोग बड़ी वीडियोज़ या विशाल डेटासेट को संचालित करने के लिए करें ताकि मेमोरी का दक्षिणा प्रबंधित किया जा सके। `स्ट्रीम = फाल्स` के खंड के खंड में सभी फ्रेम्स या डेटा बिंदुओं के लिए परिणाम स्तोर किए जाते हैं, जो अधिकांशता में मेमोरी में लोड हो सकते हैं और बड़े इनपुट के लिए आउट-ऑफ-मेमोरी त्रुटियां उत्पन्न कर सकते हैं। इसके बराबर उपयोग करके `स्ट्रीम= True` एक जेनरेटर का उपयोग करता है, जिसके संचित होने वाले + +केवल ब्रह्मण्ड के परिणामों को सीमित संग्रह किया जाता है, बहुत कम मेमोरी खपत करता है और बड़े इनपुट के लिए आउट ऑफमेमोरीनुमान syllabus नुकसान होने से बचाता है। + +| स्रोत | तर्क | प्रकार | टिप्पणियाँ | +|-----------------|-------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------| +| छवि | `'छवि.जेपीजी'` | `श. या पथ` | एकल छवि फ़ाइल। | +| यूआरएल | `'https://ultralytics.com/छवि/बस.जेपीजी'` | `शः` | छवि होस्टेड रिमोटली उन्नत करने के लिए यूआरएल । | +| स्क्रीनशॉट | `'स्क्रीन'` | `शः` | स्क्रीन की वर्तमान सामग्री के रूप में कैप्चर । | +| आदर्श | `इमेज.ओपन('चित्र.जेपीजी')` | `पीआईएल.इमेज` | HWC format with RGB channels। | +| ओपनसीवी | `ओपेंसीवी.इमरेड('चित्र.जेपीजी')` | `एनपी.न्डआरे` | HWC format with BGR channels `uint8 (0-255)`। | +| नम्पी | `नपाई.जीरोस((640,1280,३))` | `एनपी.नडअरे` | HWC format with BGR channels `uint8 (0-255)`। | +| टॉर्च | `टॉर्च.जीरोस(16,3,320,640)` | `टॉर्च.टेंसर` | BCHW format with RGB channels `float32 (0.0-1.0)`। | +| सीएसवी | `'स्रोत.सीएसवी'` | `शः` or `पथ` | छवियों, वीडियोज़, या निर्देशिकाओं की पथों को समेटने वाली CSV फ़ाइल। | +| वीडियो ✅ | `'वीडियो.म्प४'` | `पथ` or `पथ` | MP4, AVI, आदि जैसे वीडियो फ़ाइल में वीडियो। | +| निर्देशिका ✅ | `'पथ/'` | `शः` or `पथ` | छवियों या वीडियोज़ को समेटने वाली एक निर्देशिका का पथ। | +| ग्लॉब ✅ | `'पथ/ *.जेपीजी'` | `शः` | एकाधिक फ़ाइलों के मिलते-जुलते गोलियाँ। वाइल्डकार्ड के रूप में `*` चरित्र का उपयोग करें। | +| यूट्यूब ✅ | `'https://youtu.be/LNwODJXcvt4'` | `शः` | एक यूट्यूब वीडियो के लिए यूआरएल। | +| स्ट्रीम ✅ | `'rtsp://माद्यदिनता.कॉम/media.म्प४'` | `शः` | RTSP, RTMP, TCP या IP पते जैसे स्ट्रीमिंग प्रोटोकॉल्स के लिए पता। | +| मल्टी-स्ट्रीम ✅ | `'सूची.स्ट्रीम्स'` | `शः` or `पथ` | प्रति पंक्ति एक स्ट्रिम URL के साथ `*.streams` पाठ फ़ाइल, उदाहरण के लिए 8 स्ट्रीम 8 बैच-आकार के साथ चलेंगे। | + +लेखक आदान प्रियतमानसों का सुझाव देते हैं: + +!!! Example "पूर्वानुमान स्रोत" + + === "छवि" + एक छवि फ़ाइल पर पूर्वानुमान चलाएं। + ```python + from ultralytics import YOLO + + # पूर्व-प्रशिक्षित YOLOv8n मॉडल लोड करें + model = YOLO('yolov8n.pt') + + # छवि फ़ाइल के लिए पथ निर्धारित करें + स्रोत = 'फाईल/पर/चित्र.jpg' + + # छवि पर पूर्वानुमान चलाएं + परिणाम = model(स्रोत) # रिजल्ट्स ऑब्जेक्ट की सूची + + # परिणाम सूची को प्रोसेस करें + for परिणाम in परिणाम: + बॉक्स = परिणाम.बॉक्स # बॉक्स आउटपुट्स के लिए बॉक्स ऑब्जेक्ट + मास्क्स = परिणाम.मास्क्स # सेगमेंटेशन मास्क्स आउटपुट्स के लिए मास्क्स ऑब्जेक्ट + कीपॉइंट्स = परिणाम.कीपॉइंट्स # पोज के लिए कीपॉइंट्स ऑब्जेक्ट + प्रोब्स = परिणाम.प्रोब्स # वर्गीकरण आउटपुट्स के लिए प्रोब्स ऑब्जेक्ट + ``` + + === "स्क्रीनशॉट" + वर्तमान स्क्रीन सामग्री पर पूर्वानुमान चलाएं। + ```python + from ultralytics import YOLO + + # पूर्व-प्रशिक्षित YOLOv8n मॉडल लोड करें + model = YOLO('yolov8n.pt') + + # वर्तमान स्क्रीन सामग्री को स्रोत रूप में परिभाषित करें + स्रोत = 'स्क्रीन' + + # वर्तमान सामग्री पर पूर्वानुमान चलाएं + परिणाम = model(स्रोत) # रिजल्ट्स ऑब्जेक्ट की सूची + ``` + + === "यूआरएल" + दूरस्थ छवि या वीडियो पर पूर्वानुमान चलाएं। + ```python + from ultralytics import YOLO + + # पूर्व-प्रशिक्षित YOLOv8n मॉडल लोड करें + model = YOLO('yolov8n.pt') + + # गहनर् रूप से निर्धारित दूरस्थ छवि या वीडियो की यूआरएल + स्रोत = 'https://ultralytics.com/छवि/बस.जेपीजी' + + # यूआरएल पर पूर्वानुमान चलाएं + परिणाम = model(स्रोत) # रिजल्ट्स ऑब्जेक्ट की सूची + ``` + + === "आदर्श" + Python Imaging Library (PIL) के साथ खोली गई छवि पर पूर्वानुमान चलाएं। + ```python + from PIL import Image + from ultralytics import YOLO + + # पूर्व-प्रशिक्षित YOLOv8n मॉडल लोड करें + model = YOLO('yolov8n.pt') + + # Python Imaging Library (PIL) के साथ खोली गई छवि + स्रोत = Image.open('छवि.जेपीजी') + + # आदर्श पर पूर्वानुमान चलाएं + परिणाम = model(स्रोत) # रिजल्ट्स ऑब्जेक्ट की सूची + ``` + + === "ओपेंसीवी" + OpenCV के साथ पढ़ी गई छवि पर पूर्वानुमान चलाएं। + ```python + import cv2 + from ultralytics import YOLO + + # पूर्व-प्रशिक्षित YOLOv8n मॉडल लोड करें + model = YOLO('yolov8n.pt') + + # OpenCV के साथ पढ़ी गई छवि + स्रोत = cv2.imread('छवि.जेपीजी') + + # ओपेंसीवी पर पूर्वानुमान चलाएं + परिणाम = model(स्रोत) # रिजल्ट्स ऑब्जेक्ट की सूची + ``` + + === "नम्पी" + numpy array के रूप में प्रस्तुत छवि पर पूर्वानुमान चलाएं। + ```python + import numpy as np + from ultralytics import YOLO + + # पूर्व-प्रशिक्षित YOLOv8n मॉडल लोड करें + model = YOLO('yolov8n.pt') + + # छवि रूप में एक हिंदी छवि को बनाएँ + स्रोत = np.zeros((640, 640, 3)) + + # नम्पी पर पूर्वानुमान चलाएं + परिणाम = model(स्रोत) # रिजल्ट्स ऑब्जेक्ट की सूची + ``` + +[वाहन के पुर्जे डिटेक्शन]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1 + +[फुटबॉल खिलाड़ी डिटेक्शन]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442 + +[लोगों का गिरना डिटेक्शन]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43 diff --git a/docs/hi/modes/track.md b/docs/hi/modes/track.md new file mode 100644 index 0000000..7f5773f --- /dev/null +++ b/docs/hi/modes/track.md @@ -0,0 +1,358 @@ +--- +comments: true +description: वीडियो स्ट्रीम में आवक ट्रेक करने के लिए Ultralytics YOLO का उपयोग कैसे करें। ट्रैकर्स का उपयोग करने और ट्रैकर कॉन्फ़िगरेशन को अनुकूलित करने के लिए गाइड। +keywords: Ultralytics, YOLO, आवक ट्रैकिंग, वीडियो स्ट्रीम, BoT-SORT, ByteTrack, पायथन गाइड, CLI गाइड +--- + +# Ultralytics YOLO के साथ मल्टी-ऑब्जेक्ट ट्रैकिंग + +Multi-object tracking examples + +वीडियो एनालिटिक्स के क्षेत्र में, ऑब्जेक्ट ट्रैकिंग एक महत्वपूर्ण कार्य है जो केवल फ्रेम में वस्तुओं के स्थान और वर्ग की पहचान करने के अलावा वीडियो के प्रगति के साथ-साथ प्रत्येक खोजी गई वस्तु के लिए एक अद्वितीय आईडी बनाए रखता है। इसके अनुप्रयोग हैं असीमित—निगरानी और सुरक्षा से लेकर रियल-टाइम स्पोर्ट्स एनालिटिक्स तक। + +## ऑब्जेक्ट ट्रैकिंग के लिए Ultralytics YOLO क्यों चुनें? + +Ultralytics ट्रैकरों से उत्पन्न परिणाम मानक ऑब्जेक्ट डिटेक्शन के साथ मेल खाते हैं, लेकिन वीडियो स्ट्रीम में ऑब्जेक्टों को ट्रैक करने और उपयोगी गणना करने में आसान हो जाता है। यहाँ आपको Ultralytics YOLO का उपयोग अपनी ऑब्जेक्ट ट्रैकिंग की जरूरतों के लिए करने की सलाह दी जा रही है: + +- **प्रदर्शनशीलता:** सटीकता के मामले में समय-सत्य ही होने के साथ वीडियो स्ट्रीम को प्रक्रिया करें। +- **लचीलापन:** विभिन्न ट्रैकिंग ऍल्गोरिदम और विन्यास पर समर्थन करें। +- **उपयोग करने में आसानी:** झटपट एकीकरण और डिप्लॉय करने के लिए सरल पायथन API और CLI विकल्प। +- **कस्टमाइज़ेबिलिटी:** कस्टम ट्रेन किए गए YOLO मॉडल के साथ उपयोग में आसान, जिससे डोमेन-विशिष्ट एप्लिकेशन में समावेश करना संभव होता है। + +

+
+ +
+ देखें: Ultralytics YOLOv8 के साथ ऑब्जेक्ट डिटेक्शन और ट्रैकिंग। +

+ +## वास्तविक दुनिया के अनुप्रयोग + +| परिवहन | खुदराबाज़ार | जलजीवाणुजनित उत्पादन | +|:-------------------------------:|:-----------------------------:|:----------------------------:| +| ![वाहन ट्रैकिंग][vehicle track] | ![लोग ट्रैकिंग][people track] | ![मछली ट्रैकिंग][fish track] | +| वाहन ट्रैकिंग | लोग ट्रैकिंग | मछली ट्रैकिंग | + +## विशेषताएँ एक झलक में + +Ultralytics YOLO अपनी ऑब्जेक्ट डिटेक्शन विशेषताओं को बढ़ाकर मज़बूत और बहुमुखी ऑब्जेक्ट ट्रैकिंग प्रदान करता है: + +- **रीयल-टाइम ट्रैकिंग:** उच्च फ्रेम दर वाले वीडियो में समयबद्ध रूप से ऑब्जेक्ट्स को ट्रैक करें। +- **एकाधिक ट्रैकर समर्थन:** इस्थापित ट्रैकिंग ऍल्गोरिदमों में से एक चुनें। +- **कस्टमाइज़ेबल ट्रैकर कॉन्फ़िगरेशन:** विभिन्न पैरामीटर्स को समायोजित करके विशेष आवश्यकताओं को पूरा करने के लिए ट्रैकिंग ऍल्गोरिदम को अनुकूलित करें। + +## उपलब्ध ट्रैकर्स + +Ultralytics YOLO निम्नलिखित ट्रैकिंग ऍल्गोरिदमों का समर्थन करता है। आप इन्हें योग्य YAML कॉन्फ़िगरेशन फ़ाइल (`tracker=tracker_type.yaml`) पारित करके सक्षम कर सकते हैं: + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - इस ट्रैकर को सक्षम करने के लिए `botsort.yaml` का उपयोग करें। +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - इस ट्रैकर को सक्षम करने के लिए `bytetrack.yaml` का उपयोग करें। + +डिफ़ॉल्ट ट्रैकर BoT-SORT है। + +## ट्रैकिंग + +वीडियो स्ट्रीम्स पर ट्रैकर चलाने के लिए, YOLOv8n, YOLOv8n-seg और YOLOv8n-pose जैसे प्रशिक्षित Detect, Segment या Pose मॉडल का उपयोग करें। + +!!! Example "उदाहरण" + + === "पायथन" + + ```python + from ultralytics import YOLO + + # एक आधिकारिक या कस्टम मॉडल लोड करें + model = YOLO('yolov8n.pt') # एक आधिकारिक Detect मॉडल लोड करें + model = YOLO('yolov8n-seg.pt') # एक आधिकारिक Segment मॉडल लोड करें + model = YOLO('yolov8n-pose.pt') # एक आधिकारिक Pose मॉडल लोड करें + model = YOLO('path/to/best.pt') # एक कस्टम प्रशिक्षित मॉडल लोड करें + + # मॉडल के साथ ट्रैकिंग करें + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # डिफ़ॉल्ट ट्रैकर के साथ ट्रैकिंग करें + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # ByteTrack ट्रैकर के साथ ट्रैकिंग करें + ``` + + === "CLI" + + ```बैश + # CLI के साथ विभिन्न मॉडल के साथ ट्रैकिंग करें + योलो ट्रैक मॉडल=yolov8n.pt स्रोत="https://youtu.be/LNwODJXcvt4" # आधिकारिक डिटेक्ट मॉडल + योलो ट्रैक मॉडल=yolov8n-seg.pt स्रोत="https://youtu.be/LNwODJXcvt4" # आधिकारिक सेगमेंट मॉडल + योलो ट्रैक मॉडल=yolov8n-pose.pt स्रोत="https://youtu.be/LNwODJXcvt4" # आधिकारिक पोज मॉडल + योलो ट्रैक मॉडल=path/to/best.pt स्रोत="https://youtu.be/LNwODJXcvt4" # कस्टम प्रशिक्षित मॉडल + + # ByteTrack ट्रैकर का उपयोग करें + योलो ट्रैक मॉडल=path/to/best.pt ट्रैकर="bytetrack.yaml" + ``` + +ऊपर के उपयोग में उंगलियों के निचले हिस्से के द्वारा ट्रैकिंग उपलब्ध है सभी डिटेक्ट, सेगमेंट और पोज मॉडल्स के लिए जो वीडियो या स्ट्रीमिंग स्रोत पर चला जा सकते हैं। + +## कॉन्फ़िगरेशन + +### ट्रैकिंग आर्ग्युमेंट्स + +ट्रैकिंग कॉन्फ़िगरेशन, जैसे कि `conf`, `iou` और `show`, ने प्रेडिक्शन मोड के साथ गुणों को साझा करता है। और विन्यास करने के लिए, कृपया [प्रेडिक्शन](../modes/predict.md#inference-arguments) मॉडल पृष्ठ पर संदर्भ करें। + +!!! Example "उदाहरण" + + === "पायथन" + + ```python + from ultralytics import YOLO + + # ट्रैकिंग पैरामीटर आवंटन करें और ट्रैकर चलाएं + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```बैश + # कमांड लाइन इंटरफेस का उपयोग करें ट्रैकिंग पैरामीटर कॉन्फ़िगर करें और ट्रैकर चलाएं + योलो ट्रैक मॉडल=yolov8n.pt स्रोत="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### ट्रैकर चयन + +Ultralytics आपको एक संशोधित ट्रैकर कॉन्फ़िगरेशन फ़ाइल का उपयोग करने की भी अनुमति देता है। ऐसा करने के लिए, बस [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) से एक ट्रैकर कॉन्फ़िगरेशन फ़ाइल (जैसे `custom_tracker.yaml`) की एक प्रतिलिपि बनाएँ और किसी भी विन्यास को संशोधित करें ( `tracker_type` को छोड़कर) अपनी जरूरतों के अनुसार। + +!!! Example "उदाहरण" + + === "पायथन" + + ```python + from ultralytics import YOLO + + # मॉडल लोड करें और एक कस्टम कॉन्फ़िगरेशन फ़ाइल के साथ ट्रैकर चलाएं + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```बैश + # ट्रैकर के साथ एक कस्टम कॉन्फ़िगरेशन फ़ाइल का उपयोग करके मॉडल लोड करें और ट्रैकर चलाएं + योलो ट्रैक मॉडल=yolov8n.pt स्रोत="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +ट्रैकिंग आर्ग्युमेंट्स की एक व्यापक सूची के लिए, [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) पेज पर संदर्भ करें। + +## पायथन उदाहरण + +### ट्रैक पर्सिस्ट करना + +यहाँ एक Python स्क्रिप्ट है जो OpenCV (`cv2`) और YOLOv8 का उपयोग करके वीडियो फ़्रेम पर ऑब्जेक्ट ट्रैकिंग चलाने के लिए है। इस स्क्रिप्ट में यह मान लिया गया है कि आपने पहले ही आवश्यक पैकेज (`opencv-python` और `ultralytics`) इंस्टॉल कर लिए हैं। `persist=True` आर्ग्युमेंट ये ट्रैकर को बताता है कि मौजूदा इमेज या फ़्रेम उन अनुसरण तथ्यों के लिए होता है जो पिछले इमेज में से बनाए गए होते हैं। + +!!! Example "ट्रैकिंग के लिए स्ट्रीमिंग फ़ोर-लूप" + + ```python + import cv2 + from ultralytics import YOLO + + # YOLOv8 मॉडल लोड करें + model = YOLO('yolov8n.pt') + + # वीडियो फ़ाइल खोलें + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # वीडियो फ़्रेम्स पर लूप चलाएं + while cap.isOpened(): + # वीडियो से एक फ्रेम पढ़ें + success, frame = cap.read() + + if success: + # फ्रेम पर YOLOv8 ट्रैकिंग चलाएं, फ़्रेम के बीच ट्रैक पर्सिस्ट करता है + results = model.track(frame, persist=True) + + # परिणामों को फ़्रेम पर दिखाएं + annotated_frame = results[0].plot() + + # ट्रैक करें फ़्रेम को प्रदर्शित करें + cv2.imshow("YOLOv8 ट्रैकिंग", annotated_frame) + + # 'q' दबाएं तो फ़्रेम से बाहर निकलें + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # वीडियो के अंत तक पहुँचने पर भी फ़्रेम से बाहर निकलें + break + + # वीडियो कैप्चर ऑब्जेक्ट छोड़ें और प्रदर्शन विंडो बंद करें + cap.release() + cv2.destroyAllWindows() + ``` + +मैने फ़्रेम से ट्रैकिंग के लिए 'model(frame)' से 'model.track(frame)' में बदलाव किया है, जो साधारण डिटेक्शन की बजाय ऑब्जेक्ट ट्रैकिंग को सक्षम करता है। यह संशोधित स्क्रिप्ट प्रति फ़्रेम वाली वीडियो पर ट्रैकर चलाएगा, परिणामों को दिखाएगा और एक विंडो में दिखाएगा। 'q' दबाने पर फ़्रेम से बाहर निकला जा सकता है। + +### समय के साथ ट्रैक चित्रित करना + +संबंधित वीडियो फ्रेम पर ऑब्जेक्ट ट्रैक्स को प्लॉट करके समान्तर स्थानीय मार्गों को प्रदर्शित करने से हमें चित्रित पथ के माध्यम से पहले के अंतरालों और पतों की आपूर्ति में मूल्यवान प्रेरणा मिल सकती है। Ultralytics YOLOv8 के साथ समय के साथ ट्रैक्स को प्लॉट करना एक चुस्त और कुशल प्रक्रिया है। + +निम्न उदाहरण में, हम दिखाए गए वीडियो फ्रेम्स पर YOLO मॉडल का उपयोग करके विभिन्न ऑब्जेक्ट की गति को चित्रित करने के लिए कैसे करेंगे। यह स्क्रिप्ट एक वीडियो फ़ाइल को खोलता है, फ्रेम दर फ्रेम यह पढ़ता है, और YOLO मॉडल का उपयोग करके विभिन्न ऑब्जेक्ट की पहचान और ट्रैक करता है। पहचान वाले बॉक्स के केंद्रीय प्रांक्तियों को संवेदी करके उन्हें जोड़ते हैं, हम ट्रैक किए गए वस्तुओं द्वारा फ़ालतू की जगहों को चूंकियों का संग्रहित करने के लिए लाइनें खींच सकते हैं। + +!!! Example "कई वीडियो फ़्रेम्स पर पथ चित्रित करना" + + ```python + from collections import defaultdict + + import cv2 + import numpy as np + + from ultralytics import YOLO + + # YOLOv8 मॉडल लोड करें + model = YOLO('yolov8n.pt') + + # वीडियो फ़ाइल खोलें + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # ट्रैक इतिहास को संग्रहीत करें + track_history = defaultdict(lambda: []) + + # वीडियो फ्रेम्स पर लूप चलाएं + while cap.isOpened(): + # वीडियो से एक फ्रेम पढ़ें + success, frame = cap.read() + + if success: + # फ्रेम पर YOLOv8 ट्रैकिंग चलाएं, फ़्रेम के बीच ट्रैक पर्सिस्ट करता है + results = model.track(frame, persist=True) + + # बॉक्स और ट्रैक आईडी प्राप्त करें + boxes = results[0].boxes.xywh.cpu() + track_ids = results[0].boxes.id.int().cpu().tolist() + + # रिज़ल्ट पर विजुअलाइज़ करें + annotated_frame = results[0].plot() + + # पथ चित्रित करें + for box, track_id in zip(boxes, track_ids): + x, y, w, h = box + track = track_history[track_id] + track.append((float(x), float(y))) # x, y centre point + if len(track) > 30: # 90 फ़्रेम्स के लिए 90 ट्रैक्स को जमा करें + track.pop(0) + + # ट्रैकिंग लाइनें खींचें + points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10) + + # पथ को प्रदर्शित करें + cv2.imshow("YOLOv8 ट्रैकिंग", annotated_frame) + + # 'q' दबायें तो फ़्रेम से बाहर निकलें + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # वीडियो के अंत तक पहुँचने पर भी फ़्रेम से बाहर निकलें + break + + # वीडियो कैप्चर ऑब्जेक्ट छोड़ें और प्रदर्शन विंडो बंद करें + cap.release() + cv2.destroyAllWindows() + ``` + +### मल्टीथ्रेड ट्रैकिंग + +मल्टीथ्रेड ट्रैकिंग एक साथ कई वीडियो स्ट्रीमों पर ऑब्जेक्ट ट्रैकिंग चलाने की क्षमता प्रदान करता है। यह खासकर उपयोगी होता है जब हम कई निगरानी कैमरों से जैसे कि वहां से मौजूद वीडियो इनपुट को संभालने के लिए परस्पर प्रोसेसिंग करने की क्षमता बढ़ा सकते हैं। + +प्रदान किए गए पायथन स्क्रिप्ट में हम Python के `threading` मॉड्यूल का उपयोग करके यह संभव करते हैं कि कई इंस्टेंसेज को एक साथ ट्रैकर चलाया जा सके। यह हर थ्रेड के लिए एक ट्रैकर चलाने की जिम्मेदारी होती है, और सभी थ्रेड संघ थ्रेड बैकग्राउंड में एक साथ चलते हैं। + +हर थ्रेड को सही पैरामीटर्स (वीडियो फ़ाइल, उपयोग करने के लिए मॉडल और फ़ाइल इंडेक्स) प्राप्त करने के लिए, हम `run_tracker_in_thread` नामक एक फ़ंक्शन को परिभाषित करते हैं जो इन पैरामीटर्स को स्वीकार करता है और मुख्य ट्रैकिंग लूप को संबंधित करता है। यह फ़ंक्शन वीडियो फ्रेम्स को फ्रेम द्वारा पढकर, ट्रैकर चलाने और परिणामों को प्रदर्शित कर रही है। + +इस उदाहरण में दो अलग मॉडल इस्तेमाल होते हैं: `yolov8n.pt` और `yolov8n-seg.pt`, जो हर एक अलग वीडियो फ़ाइल में ऑब्जेक्ट को ट्रैक करते हैं। वीडियो फाइल `video_file1` और `video_file2` में निर्दिष्ट किए गए हैं। `threading.Thread` में `daemon=True` विधिमति का उपयोग संकेत करता है कि यह सुनिश्चित करता है कि जब प्रमुख कार्यक्रम समाप्त हो जाए, तो ये सभी थ्रेड बंद हो जाएंगे। हम `start()` का उपयोग करके थ्रेडों को शुरू करते हैं और `join()` का उपयोग करके मुख्य थ्रेड को प्रतीक्षा करने के लिए बनाते हैं जब तक कि ट्रैकर थ्रेड खत्म नहीं हो जाते। + +चूंकि सभी थ्रेडों ने अपना कार्य पूरा कर लिया है, इसलिए `cv2.destroyAllWindows()` का उपयोग करके परिणामों को दिखाने वाली विंडो को बंद करते हैं। + +!!! Example "ट्रैकिंग के लिए स्ट्रीमिंग फ़ोर-लूप" + + ```python + import threading + import cv2 + from ultralytics import YOLO + + + def run_tracker_in_thread(filename, model, file_index): + """ + थ्रेडिंग के साथ YOLOv8 मॉडल के साथ एक वीडियो फ़ाइल या webcam स्रोत संगतरूप पर ट्रैकर चलाता है। + + यह फ़ंक्शन एक वेदनीय वीडियो फ़ाइल या कैमरा स्रोत से वीडियो फ़्रेमों को पकड़ता है और ऑब्जेक्ट ट्रैकिंग के लिए YOLOv8 मॉडल का उपयोग करता है। यह फ़ंक्शन अपनी थ्रेड में चलता है जो कार्य प्रसंस्करण के रूप में एक साथ चलता है। + + Args: + filename (str): वीडियो फ़ाइल के पथ या कैमरे / बाहरी कैमरे स्रोत का पहचानकर्ता। + model (obj): YOLOv8 मॉडल ऑब्जेक्ट। + file_index (int): फ़ाइल को पहचानने के लिए ऐंद्रिक कोड। + + ध्यान दें: + वीडियो डिस्प्ले विंडो बंद करने के लिए 'q' दबाएं। + """ + वीडियो = cv2.VideoCapture(filename) # वीडियो फ़ाइल पढ़ें + + while True: + सफलता, फ़्रेम = वीडियो.read() # वीडियो फ़्रेम पढ़ें + + # कोई भी फ़्रेम न बचा हो, तो लूप से बाहर निकलें + if not सफलता: + तोड़ो + तोड़ो + + # ऑब्जेक्ट्स को ट्रैक करें यदि उपलब्ध हों + results = model.track(फ़्रेम, persist=True) + res_plotted = results[0].plot() + cv2.imshow(f"स्रोत_{file_index} पर ट्रैकिंग", res_plotted) + + कुंजी = cv2.waitKey(1) + if कुंजी == ord('q'): + तोड़ो + + # वीडियो स्रोतों को छोड़ें + वीडियो.रिलीज़े() + + + # मॉडल लोड करें + model1 = YOLO('yolov8n.pt') + model2 = YOLO('yolov8n-seg.pt') + + # ट्रैकर के लिए वीडियो फ़ाइलें परिभाषित करें + video_file1 = "path/to/video1.mp4" # वीडियो फ़ाइल का पथ, वेबकैम के लिए 0 + video_file2 = 0 # वीडियो फ़ाइल का पथ, वेबकैम के लिए 0, बाहरी कैमरा के लिए 1 + + # ट्रैकर थ्रेड सबसे ऊपर बनाएं + tracker_thread1 = threading.Thread(target=run_tracker_in_thread, args=(video_file1, model1, 1), daemon=True) + tracker_thread2 = threading.Thread(target=run_tracker_in_thread, args=(video_file2, model2, 2), daemon=True) + + # ट्रैकर थ्रेड प्रारंभ करें + tracker_thread1.start() + tracker_thread2.start() + + # ट्रैकर थ्रेड की प्रतीक्षा करें + tracker_thread1.join() + tracker_thread2.join() + + # सभी ट्रैकर थ्रेडों के निपटाए जाने के बाद, परिणामों को प्रदर्शन विंडोज बंद करें + cv2.destroyAllWindows() + ``` + +यह उदाहरण स्क्रिप्ट जोड़कर और इसी मार्गदर्शन का उपयोग करके और अधिक वीडियो फ़ाइल और मॉडल के लिए बाहरी थ्रेड बना कर इसे कार्यान्वित करने के लिए आसानी से विस्तारित किया जा सकता है। + +## नए ट्रैकरों में सहयोग दें + +क्या आप बहु-ऑब्जेक्ट ट्रैकिंग में माहिर हैं और उल्ट्रालिटिक्स YOLO के साथ एक ट्रैकिंग ऍल्गोरिदम को सफलतापूर्वक अमल में लाया है? हम आपको [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) में हमारे ट्रैकर खंड के लिए योगदान देने के लिए आमंत्रित करते हैं! आपका वास्तविक दुनिया के अनुप्रयोग और समाधान आपके समुदाय के लिए अमूल्य हो सकते हैं। + +इस खंड में योगदान देकर, आप उल्ट्रालिटिक्स YOLO फ्रेमवर्क के भीतर उपलब्ध ट्रैकिंग समाधानों की विस्तारवादी सूची बढ़ा सकते हैं, जो उल्ट्रालिटिक्स YOLO माध्यम से काम कर रहे उपयोगकर्ताओं के लिए अत्यधिक समर्पणशीलता और उपयोगीता जोड़ते हैं। + +अपनी योगदान की शुरुआत करने के लिए, कृपया हमारे [योगदान गाइड](https://docs.ultralytics.com/help/contributing) का संदर्भ लें जहां परामर्शिका प्रस्तुत करने के सचेत निर्देश दिए गए हैं। हम इंतजार कर रहे हैं देखें आप क्या लाते हैं! + +साथ में, चलिए Ultralytics YOLO पारिस्थितिकी की गतिशीलता को मजबूत करें 🙏! + +[वाहन ट्रैकिंग]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab + +[लोग ट्रैकिंग]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527 + +[मछली ट्रैकिंग]: https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142 diff --git a/docs/hi/modes/train.md b/docs/hi/modes/train.md new file mode 100644 index 0000000..5447a84 --- /dev/null +++ b/docs/hi/modes/train.md @@ -0,0 +1,293 @@ +--- +comments: true +description: Ultralytics YOLO के साथ YOLOv8 मॉडल ट्रेन करने के लिए चरणबद्ध मार्गदर्शिका, एकल-GPU और बहु-GPU ट्रेनिंग के उदाहरणों के साथ। +keywords: Ultralytics, YOLOv8, YOLO, ऑब्जेक्ट डिटेक्शन, ट्रेन मोड, कस्टम डेटासेट, GPU ट्रेनिंग, बहु-GPU, हाइपरपैरामीटर, CLI उदाहरण, Python उदाहरण +--- + +# Ultralytics YOLO के साथ मॉडल ट्रेनिंग + +Ultralytics YOLO इकोसिस्टम और इंटीग्रेशन + +## परिचय + +एक गहरी यान्त्रिकी मॉडल को ट्रेनिंग देना उसे डेटा खिलाते हुए और इसके पैरामीटर्स को समायोजित करके सही पूर्वानुमान करने की सामर्थ्य को शामिल करता है। YOLOv8 मॉडल में Ultralytics YOLO के ट्रेन मोड ने ऑब्जेक्ट डिटेक्शन मॉडल्स को प्रभावी और दक्ष ट्रेनिंग के लिए इंजीनियरिंग किया गया है, जिससे आधुनिक हार्डवेयर क्षमताओं का पूरी तरह से उपयोग किया जा सके। यह मार्गदर्शिका उन सभी विवरणों को कवर करने का उद्देश्य रखती है जो आपको YOLOv8 के मजबूत सेट ऑफ़ सुविधाओं का उपयोग करके अपने खुद के मॉडल्स को ट्रेनिंग शुरू करने के लिए चाहिए। + +

+
+ +
+ देखें: Google Colab में अपने कस्टम डेटासेट पर एक YOLOv8 मॉडल को ट्रेन करने का तरीका। +

+ +## प्रशिक्षण के लिए Ultralytics YOLO का चयन क्यों करें? + +यहां YOLOv8 के ट्रेन मोड को चुनने के कुछ प्रमुख कारण हैं: + +- **दक्षता:** अपने हार्डवेयर से सबसे अधिक लाभ उठाएं, चाहे आप सिंगल-GPU सेटअप पर हों या कई GPU पर स्केल कर रहें हों। +- **प्राक्तिशिल्ता:** COCO, VOC और ImageNet जैसे तत्परता उपलब्ध डेटासेटों के अलावा कस्टम डेटासेट पर ट्रेन करें। +- **उपयोगकर्ता मित्रपूर्णता:** सीधे और शक्तिशाली CLI और Python इंटरफ़ेस का उपयोग एक सीधी ट्रेनिंग अनुभव के लिए। +- **हाइपरपैरामीटर लचीलापन:** मॉडल प्रदर्शन को सुधारने के लिए वैश्विक स्तर पर अनुकूलन योग्य हाइपरपैरामीटरों की एक व्यापक श्रृंखला। + +### ट्रेन मोड की प्रमुख सुविधाएं + +निम्नलिखित YOLOv8 के ट्रेन मोड की कुछ महत्वपूर्ण सुविधाएं हैं: + +- **स्वत: डेटासेट डाउनलोड:** COCO, VOC और ImageNet जैसे मानक डेटासेट्स को पहली बार के उपयोग पर स्वत: डाउनलोड किया जाता है। +- **बहु-GPU समर्थन:** प्रक्रिया की गति को तेज करने के लिए अनुप्रयोग में कई जीपीयू का उपयोग करें। +- **हाइपरपैरामीटर कॉन्फ़िगरेशन:** हाइपरपैरामीटर को यामल कॉन्फ़िगरेशन फ़ाइल या CLI तर्कों के माध्यम से संशोधित करने का विकल्प। +- **दृश्यीकरण और मॉनिटरिंग:** प्रशिक्षण मैट्रिक्स के वास्तविक समय ट्रैकिंग और सीखने की प्रक्रिया के दृश्यीकरण के लिए बेहतर अवधारणा के लिए। + +!!! Tip "टिप" + + * COCO, VOC, ImageNet और कई अन्य जैसे YOLOv8 डेटासेट पहले से आपूर्ति हो जाते हैं, उपयोग पर स्वत: डाउनलोड होते हैं, जैसे `yolo train data=coco.yaml` + +## उपयोग उदाहरण + +सौंधांग्रही कोड को नजरअंदाज किए बिना कोई उत्तर देने के लिए, कोको128 डेटासेट के लिए YOLOv8n पर ट्रेनिंग करें। ट्रेनिंग उपकरण `device` तर्क का उपयोग करके निर्दिष्ट किया जा सकता है। आगर कोई तर्क निर्दिष्ट नहीं किया जाता है, तो प्रशिक्षण `device=0` लगाने के लिए उपयुक्त GPU `device=0` का उपयोग करेगा, अन्यथा `device=cpu` का उपयोग किया जाएगा। पूरी प्रशिक्षण तर्कों की पूरी सूची के लिए नीचे देखें। + +!!! Example "सिंगल-जीपीयू और सीपीयू प्रशिक्षण उदाहरण" + + उपकरण स्वत: निर्धारित किया जाता है। यदि साझा-GPU उपलब्ध हो तो उसका उपयोग किया जाएगा, अन्यथा प्रशिक्षण सीपीयू पर शुरू होगा। + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('yolov8n.yaml') # YAML से एक नया मॉडल बनाएं + model = YOLO('yolov8n.pt') # प्रशिक्षण के लिए सिफारिश की जाती है, एक पूर्व-प्रशिक्षित मॉडल लोड करें + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # YAML से बनाएं और वजन मारे ट्रांसफर करें + + # मॉडल प्रशिक्षण + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash बैश + # YAML से एक नया मॉडल बनाएं और शुरू से प्रशिक्षण शुरू करें + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # पूर्व-प्रशिक्षित *.pt मॉडल से प्रशिक्षण शुरू करें + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # YAML से एक नया मॉडल बनाएं, पूर्व-प्रशिक्षित वजनों को इसमें स्थानांतरित करें और प्रशिक्षण शुरू करें + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### बहु-जीपीयू प्रशिक्षण + +बहु-जीपीयू प्रशिक्षण एकाधिक जीपीयू के उपयोग से उपलब्ध होता है और उपकरण माध्यम से भी Python API के माध्यम से उपलब्ध है। बहु-जीपीयू प्रशिक्षण को सक्षम करने के लिए, आप उपयोग करना चाहते हैं उन जीपीयू उपकरण आईडीजी को निर्दिष्ट करें। + +!!! Example "बहु-जीपीयू प्रशिक्षण का उदाहरण" + + 2 जीपीयू के साथ प्रशिक्षित करें, CUDA उपकरण 0 और 1 का उपयोग करें। अतिरिक्त जीपीयू के लिए विस्तार करें जितना आवश्यक हो। + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('yolov8n.pt') # प्रशिक्षण के लिए सिफारिश की जाती है, एक पूर्व-प्रशिक्षित मॉडल लोड करें + + # दो जीपीयू के साथ मॉडल प्रशिक्षण + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "CLI" + + ```bash + # पूर्व-प्रशिक्षित *.pt मॉडल से जीपीयू 0 और 1 का उपयोग करके प्रशिक्षण शुरू करें + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### ऐपल M1 और M2 MPS प्रशिक्षण + +ऐपल M1 और M2 चिप्स के समर्थन के साथ Ultralytics YOLO मॉडल पर ट्रेनिंग करना अब ऐसे उपकरणों पर संभव होता है जहां शक्तिशाली मेटल परफार्मेंस शेडर (MPS) फ़्रेमवर्क का उपयोग किया जाता है। MPS कंप्यूटेशन और छवि प्रसंस्करण कार्यों को आईयूपी स्लिकॉन पर निष्पादित करने का एक उच्च कार्यक्षमता तरीका प्रदान करता है। + +ऐपल M1 और M2 चिप्स पर प्रशिक्षण को सक्षम करने के लिए, आपको प्रशिक्षण प्रक्रिया शुरू करते समय "mps" को अपने उपकरण के रूप में निर्दिष्ट करना चाहिए। नीचे Python और कमांड लाइन में इसे कैसे कर सकते हैं उसका एक उदाहरण दिया गया है: + +!!! Example "MPS प्रशिक्षण का उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('yolov8n.pt') # प्रशिक्षण के लिए सिफारिश की जाती है, एक पूर्व-प्रशिक्षित मॉडल लोड करें + + # दो जीपीयू के साथ मॉडल प्रशिक्षण + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "CLI" + + ```bash + # पूर्व-प्रशिक्षित *.pt मॉडल से जीपीयू 0 और 1 का उपयोग करके प्रशिक्षण शुरू करें + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +M1/M2 चिप्स के गणितात्मक शक्ति का लाभ लेते हुए, इससे प्रशिक्षण कार्यों की कार्यक्षमता को और बढ़ाया जाता है। अधिक विस्तृत मार्गदर्शन और उन्नत रूपरेखा विकल्पों के लिए, कृपया [PyTorch MPS दस्तावेज़ीकरण](https://pytorch.org/docs/stable/notes/mps.html) का संदर्भ देखें। + +### बाधित प्रशिक्षण को बहाल करना + +पहले ही बचे हुए अवस्था की तालिका स्थापित करना, गहरी यान्त्रिकी मॉडल के साथ काम करते समय एक महत्वपूर्ण सुविधा है। यह विविध परिदृश्यों में उपयोगी है, जैसे जब अप्रत्याशित रूप से प्रशिक्षण प्रक्रिया रुक गई हो, या जब आप नए डेटा के साथ या अधिक इपॉक्स के लिए एक मॉडल को प्रशिक्षण जारी रखना चाहते हैं। + +प्रशिक्षण बहाल करने पर, Ultralytics YOLO अंतिम सहेजे गए मॉडल से वजनों को लोड करता है और अद्यतनकर्ता की स्थिति, शिक्षा दर नियोजक और युग क्रमांक को भी पुनर्स्थापित करता है। इससे आप प्रशिक्षण प्रक्रिया को बिना किसी गड़बड़ के बाहर छोड़ देने के लिए कर सकते हैं। + +आप आसानी से Ultralytics YOLO में प्रशिक्षण को बहाल कर सकते हैं जब आप `train` विधि को बुलाने पर `resume` तर्क को `True` निर्दिष्ट करके और आंशिक रूप से निर्दिष्ट `pt` फ़ाइल के पथ को निर्दिष्ट करके, और आपका ट्रेनिंग प्रक्रिया जहां से छोड़ गई थी से प्रशिक्षण जारी रखने के लिए `train` फ़ंक्शन को कम्युट कीजिए। + +नीचे एक उदाहरण दिया गया है कि कैसे पायथन और कमांड लाइन में एक अविरल प्रशिक्षण को कैसे बहाल करें: + +!!! Example "प्रशिक्षण बहाल करने का उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('path/to/last.pt') # एक आंशिक-प्रशिक्षित मॉडल लोड करें + + # प्रशिक्षण बहाल करें + results = model.train(resume=True) + ``` + + === "CLI" + ```bash शैल + # एक अविरल प्रशिक्षण बहाल करें + yolo train resume model=path/to/last.pt + ``` + +`resume=True` सेट करके, `train` फ़ंक्शन पहले से बचे हुए मॉडल के स्थान में बचे हुए अवस्था में से प्रशिक्षण जारी रखेगा। यदि `resume` तर्क छोड़ दिया जाता है या `False` के रूप में निर्दिष्ट किया जाता है, तो `train` फ़ंक्शन एक नया प्रशिक्षण सत्र शुरू करेगा। + +याद रखें कि डिफ़ॉल्ट रूप स्थिति पर दशा-अतीत प्रति के अंत में बचावात्मक संग्रहण होते हैं, या `save_period` तर्क का उपयोग करके निश्चित अंतराल पर, इसलिए आपको एक प्रशिक्षण दौड़ को बहाल करने के लिए कम से कम 1 इपॉक्स पूर्ण करना होगा। + +## तर्क + +YOLO मॉडलों के लिए प्रशिक्षण सेटिंग विभिन्न हाइपरपैरामीटर और कॉन्फ़िगरेशन का उपयोग करते हैं जो मॉडल को एक डेटासेट पर प्रशिक्षित करने के लिए उपयोग होता है। इन सेटिंग्स में मॉडल के प्रदर्शन, गति और नियमितता पर प्रभाव पड़ सकता है। कुछ सामान्य YOLO प्रशिक्षण सेटिंग्स में बैच का आकार, सीखने दर, मोमेंटम और वेट डिके जैसी मानक अद्यतन वाली चीजें शामिल हैं। प्रशिक्षण प्रक्रिया को प्रभावी ढंग से स्थापित करने के लिए इन सेटिंग्स को सावधानीपूर्वक संयोजित करना महत्वपूर्ण है और एक दिए गए कार्य के लिए श्रेणी में सबसे अच्छे परिणाम प्राप्त करने के लिए इन सेटिंग्स के साथ संगतन करने की आवश्यकता होती है। + +| कुंजी | मान | विवरण | +|-------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `model` | `None` | मॉडल फ़ाइल का पथ, चाहे yolov8n.pt, yolov8n.yaml | +| `data` | `None` | डेटा फ़ाइल का पथ, चाहे coco128.yaml | +| `epochs` | `100` | प्रशिक्षण के लिए बार की संख्या | +| `patience` | `50` | प्रशिक्षण के आरंभ में कोई देखने के योग्य सुधार के लिए इपॉक्स इंतजार करें | +| `batch` | `16` | प्रति बैच छवि की संख्या (-1 के लिए AutoBatch) | +| `imgsz` | `640` | प्रारंभिक छवियों का आकार मानदंड | +| `save` | `True` | प्रशिक्षण नियंत्रितक और पूर्वानुमान परिणाम सहेजें | +| `save_period` | `-1` | प्रत्येक x ईपॉक्स पर निर्वाचित चेकप्वाइंट (1 से कम द्वारा अक्षम) | +| `cache` | `False` | [सही/रैम](https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/constants.py) या खोलने के लिए ब्राउज़र के लिए ब्राउज़र डेटा लोड करने के लिए उपयोग करें | +| `device` | `None` | चलाने के लिए उपकरण, उदाहरण के लिए cuda उपकरण का उपयोग करें device=0 या device=0,1 या device=cpu | +| `workers` | `8` | वर्कर सूत्रों की संख्या | +| `project` | `None` | प्रोजेक्ट का नाम | +| `name` | `None` | प्रयोग का नाम | +| `exist_ok` | `False` | मौजूदा प्रयोग को अधिलेखित करने के लिए या नहीं | +| `pretrained` | `True` | (बूल या स्ट्रिंग) आज्ञानुसार एक पूर्व-प्रशिक्षित मॉडल का उपयोग करें (बूल) या वजनों को लोड करने के लिए मॉडल से (स्ट्रिंग) | +| `optimizer` | `'auto'` | चयन के लिए बराबरी=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto] | +| `verbose` | `False` | वर्बोज़ आउटपुट प्रिंट करें | +| `seed` | `0` | नियंत्रित (प्रशिक्षणीय) बीज के लिए | +| `deterministic` | `True` | नियंत्रित माध्यम को सक्षम करें | +| `single_cls` | `False` | हिल विशेषज्ञता डेटा सिंगल-कक्षा के रूप में | +| `rect` | `False` | न्यूनतम पैडिंग के लिए प्रति बैच रो टैब्री के साथ आयतात्मक प्रशिक्षण | +| `cos_lr` | `False` | साइन के साइन शिक्षण दर नियोजक का उपयोग करें | +| `close_mosaic` | `10` | अंतिम अवधि के लिए मॉज़ेक त断श्रावक में माध्यम वृक्षों की सक्षमता (0 को अक्षम करें) | +| `resume` | `False` | आखिरी निर्वाचित चेकप्वाइंट से प्रशिक्षण बहाल करें | +| `amp` | `True` | ऑटोमेटिक मिक्स्ड प्रेसिजन (AMP) प्रशिक्षण, चयन=[True, False] | +| `fraction` | `1.0` | प्रशिक्षित करने के लिए डेटासेट आंशिक (डिफ़ॉल्ट 1.0, प्रशिक्षण सेट में सभी छवियां) | +| `profile` | `False` | लॉगर्स के लिए प्रशिक्षण के दौरान ONNX और TensorRT की स्पीड प्रोफ़ाइल | +| `freeze` | `None` | श्रोणि की पहले n परतें, या श्रोणि सूची लेयर सूची को प्रशिक्षण के दौरान लॉक करें | +| `lr0` | `0.01` | प्रारंभिक सीखने दर (उदा. SGD=1E-2, Adam=1E-3) | +| `lrf` | `0.01` | परिणामकारी सीखने दर (lr0 * lrf) | +| `momentum` | `0.937` | SGD मोमेंटम/Adam बीटा1 | +| `weight_decay` | `0.0005` | शव्य वजन दण्ड 5e-4 | +| `warmup_epochs` | `3.0` | प्रारंभिक अवधि (अंशों में ठंडा) | +| `warmup_momentum` | `0.8` | प्रारंभिक अवधि मे प्रारम्भिक अवधि | +| `warmup_bias_lr` | `0.1` | प्रारंभिक जुकान एलआर | +| `box` | `7.5` | बॉक्स हानि प्राप्ति | +| `cls` | `0.5` | वर्ग हानि प्राप्ति (पिक्सेल के साथ स्थापना करें) | +| `dfl` | `1.5` | खींची हानि प्राप्ति | +| `pose` | `12.0` | माथाप्रविष्टि हानि प्राप्ति (केवल ठंडा) | +| `kobj` | `2.0` | कीपॉइंट obj हानि प्राप्ति (केवल ठंडा) | +| `label_smoothing` | `0.0` | लेबल स्मूदिंग (अंश) | +| `nbs` | `64` | नामोज़यल बैच का आकार | +| `overlap_mask` | `True` | प्रशिक्षण के दौरान मास्क ओवरलैप होने चाहिए (सेगमेंट ट्रेन केवल) | +| `mask_ratio` | `4` | स्थानकटू औरता (सेगमेंट ट्रेन केवल) | +| `dropout` | `0.0` | निर्द्यमता का उपयोग करें (वर्गीकरण केवल प्रशिक्षण) | +| `val` | `True` | प्रशिक्षण के दौरान जाँच/परीक्षण | + +## लॉगिंग + +YOLO मॉडल के प्रशिक्षण में आपको समय-समय पर मॉडल के प्रदर्शन का पता रखना महत्वपूर्ण हो सकता है। यहां लॉगिंग की एक वैरांगणिकता, यानी कीमेट, क्लियरएमएल और टेंसरबोर्ड का समर्थन है। + +लॉगर का उपयोग करने के लिए, ऊपरी कोड स्निपेट के ठोकवाला मेनू से इसे चयन करें और इसे चलाएं। चयनित लॉगर स्थापित किया जाएगा और इनिशलाइज़ किया जाएगा। + +### कीमेट + +[कीमेट](../../../integrations/comet.md) एक प्लेटफ़ॉर्म है जो डेटा वैज्ञानिकों और डेवलपरों को प्रयोग और मॉडलों की प्रशिक्षण में तुलनात्मक, व्याख्यान करने और अग्रिम निर्धारण करने में मदद करता है। इसकी सुविधाएं वास्तविक समय मापक, कोड अंतर और हाइपरपैरामीटर ट्रैकिंग जैसी विभिन्नताएं प्रदान करती हैं। + +कीमेट का उपयोग करने के लिए: + +!!! Example "उदाहरण" + + === "Python" + ```python + # pip install comet_ml + import comet_ml + + comet_ml.init() + ``` + +कृपया कीमेट वेबसाइट पर अपने कीमेट खाते में साइन इन करें और अपनी एपीआई कुंजी प्राप्त करें। आपको अपने वातावरण प्रतिस्थापित करने या अपने स्क्रिप्ट में इसे जोड़ने की आवश्यकता होगी ताकि आप अपने प्रयोगों को लॉग कर सकें। + +### क्लियरएमएल + +[क्लियरएमएल](https://www.clear.ml/) एक ओपन-सोर्स प्लेटफ़ॉर्म है जो प्रयोगों के ट्रैकिंग को स्वतंत्र और प्रभावी संसाधित करने में मदद करता है। यह टीम को उनके एमएल का कार्य प्रबंधन, क्रियाकलापों को क्रियान्वयन करने और उनकी पुनःसृजन की संवेदनशीलता से सहायता करने के लिए डिज़ाइन दोबारा करने के लिए विकसित किया गया है। + +क्लियरएमएल का उपयोग करने के लिए: + +!!! Example "उदाहरण" + + === "Python" + ```python + # pip install clearml + import clearml + + clearml.browser_login() + ``` + +इस स्क्रिप्ट को चलाने के बाद, कृपया क्लियरएमएल वेबसाइट पर अपने क्लियरएमएल खाते में साइन इन करें और अपने ब्राउज़र सत्र की प्रमाणिकता स्वीकार करें। + +### टेंसरबोर्ड + +[टेंसरबोर्ड](https://www.tensorflow.org/tensorboard) एक टेन्सरफ़्लो वीज़ुअलाइज़ेशन टूलकिट है। यह आपको अपने टेन्सरफ़्लो ग्राफ को दृष्टिगतिक टुकड़ों में वेटवेद्य करने, आपातकालीन अवकलनों के बारे में मितियों को प्लॉट करने और इसके मध्य से जाने की कल्पना से बदलने जैसे अतिरिक्त डेटा दिखाने की अनुमति देता है। + +[Google Colab में](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb) टेंसरबोर्ड का उपयोग करने के लिए: + +!!! Example "उदाहरण" + + === "CLI" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # ध्यान दें कि 'धावक' निर्देशिका के साथ बदलें + ``` + +स्थानीय टेंसरबोर्ड का उपयोग करने के लिए नीचे दिए गए कमांड को चलाएं और परिणामों को http://localhost:6006/ पर देखें। + +!!! Example "उदाहरण" + + === "CLI" + ```bash + tensorboard --logdir ultralytics/runs # ध्यान दें कि 'धावक' निर्देशिका के साथ बदलें + ``` + +इससे टेंसरबोर्ड लोड होगा और यह आपके प्रशिक्षण लॉगों की सहेजी हुई निर्देशिका की ओर दिशानिर्देश करेगा। + +लॉगर स्थापित करने के बाद, आप अपने चयनित प्लेटफ़ॉर्म में स्वचालित रूप से रूपांतरण मात्राओं को अद्यतन करने के लिए प्रशिक्षणीय कोड जारी रख सकते हैं, और आपको इन लॉगों का उपयोग करके अपने मॉडल के प्रदर्शन का मूल्यांकन कर सकते हैं चाहे यह मॉडलों के प्रदर्शन के समय, विभिन्न मॉडलों का तुलनात्मक मूल्यांकन, और सुधार करने का पहचान करने के लिए। diff --git a/docs/hi/modes/val.md b/docs/hi/modes/val.md new file mode 100644 index 0000000..8880f82 --- /dev/null +++ b/docs/hi/modes/val.md @@ -0,0 +1,86 @@ +--- +comments: true +description: YOLOv8 मॉडलों की मान्यता सत्यापन के लिए गाइड। यहाँ जानें कि कैसे पायथन और CLI उदाहरणों के साथ परीक्षण सेटिंग्स और मापों का उपयोग करके अपने YOLO मॉडलों के प्रदर्शन का मूल्यांकन करें। +keywords: Ultralytics, YOLO दस्तावेज़, YOLOv8, मान्यता, मॉडल मूल्यांकन, हाइपरपैरामीटर, सटीकता, माप, पायथन, सीएलआई +--- + +# Ultralytics YOLO के साथ मॉडल मान्यता + +Ultralytics YOLO पारिस्थितिकी और एकीकरण + +## परिचय + +मान्यता मशीन लर्निंग पाइपलाइन में एक महत्वपूर्ण चरण है, जो आपको अपने प्रशिक्षित मॉडलों की गुणवत्ता का मूल्यांकन करने की अनुमति देता है। Ultralytics YOLOv8 में Val मोड बहुत सारे टूल्स और मापों का प्रयोग करके आपके ऑब्जेक्ट डिटेक्शन मॉडलों के प्रदर्शन का मूल्यांकन करने के लिए है। यह गाइड योग्यता और विश्वसनीयता दोनों सुनिश्चित करने के लिए Val मोड का सविस्तर संसाधन के रूप में काम आता है। + +## Ultralytics YOLO के साथ मान्यता करने के फायदे + +यहाँ योलोवी8 के Val मोड का उपयोग करने के फायदे हैं: + +- **सटीकता:** अपने मॉडल को पूरी तरह से मूल्यांकित करने के लिए mAP50, mAP75, और mAP50-95 जैसे टिकाऊ मापों को प्राप्त करें। +- **सुविधा:** मूल्यांकन प्रक्रिया को सरल बनाने के लिए ट्रेनिंग सेटिंग्स को याद करने वाली इनबिल्ट सुविधा का उपयोग करें। +- **लचीलापन:** अपने मॉडल को एक ही या अलग डेटासेट और छवि आकार के साथ मान्यता दें। +- **हाइपरपैरामीटर ट्यूनिंग:** मूल्यांकन मापों का उपयोग करके अपने मॉडल को बेहतर प्रदर्शन के लिए समायोजित करें। + +### Val मोड की मुख्य विशेषताएं + +ये हैं YOLOv8 के Val मोड द्वारा प्रदान की जाने वाली महत्वपूर्ण कार्यक्षमताएं: + +- **स्वत: सेटिंग्स:** मॉडल योग्यता के लिए अपने प्रशिक्षण समायोजनों को स्वतः याद रखते हैं। +- **बहुमान्यता समर्थन:** विभिन्न सटीकता मापों के आधार पर अपने मॉडल की मूल्यांकन करें। +- **CLI और पायथन एपीआई:** मान्यता के लिए CLI या पायथन एपीआई में से एक का चयन करें। +- **डेटा सम्पर्कता:** कोकोविवक प्रशिक्षण चरण में उपयोग की जाने वाली डेटासेट के साथ सहजता से काम करता है। + +!!! Tip "टिप" + + * YOLOv8 मॉडल अपने प्रशिक्षण सेटिंग्स को स्वतः याद रखते हैं, इसलिए आप केवल `yolo val model=yolov8n.pt` या `model('yolov8n.pt').val()` द्वारा सरलतापूर्वक एक मॉडल को समान छवि आकार के साथ और मूल डेटासेट पर मान्यता दे सकते हैं। + +## उपयोग के उदाहरण + +COCO128 डेटासेट पर प्रशिक्षित YOLOv8n मॉडल की सटीकता मान्यांकन करें। `model` को विद्यमान ट्रेनिंग `data` और तर्क बने रहते हैं, इसलिए कोई तर्क पास कराने की आवश्यकता नहीं है। पूरी सूची निर्यात तर्कों के लिए नीचे देखें। + +!!! Example "उदाहरण" + + === "पायथन" + + ```python + from ultralytics import YOLO + + # मॉडल लोड करें + model = YOLO('yolov8n.pt') # एक आधिकारिक मॉडल लोड करें + model = YOLO('path/to/best.pt') # एक कस्टम मॉडल लोड करें + + # मॉडल को मान्यांकन करें + metrics = model.val() # कोई तर्क आवश्यक नहीं होते हैं, डेटासेट और सेटिंग्स याद रखे जाते हैं + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # हर श्रेणी के map50-95 से बना एक सूची + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # आधिकारिक मॉडल को मान्यांकन करें + yolo detect val model=path/to/best.pt # कस्टम मॉडल को मान्यांकन करें + ``` + +## तर्क + +YOLO मॉडल के लिए मान्यांकन सेटिंग्स निम्नलिखित होते हैं: हाइपरपैरामीटर और विन्यास जैसे, जो मॉडल की मान्यता को मूल्यांकित करने के लिए उपयोग होते हैं। ये सेटिंग्स मॉडल के प्रदर्शन, गति, और सटीकता पर प्रभाव डाल सकती हैं। कुछ आम YOLO मान्यांकन सेटिंग्स में दाल-दालत, ट्रेनिंग के दौरान मान्यांकन कब किया जाता है और मान्यांकन के लिए उपयोग किए जाने वाले माप शामिल हैं। मान्यांकन प्रक्रिया को प्रभावित कर सकने वाले अन्य कारकों में मान्यांकन डेटासेट का आकार और संरचना और मॉडल का विशेष कार्य शामिल हैं। ओवरफिटिंग का पता लगाने और रोकने के लिए इन सेटिंग्स को सावधानीपूर्वक समायोजित और प्रयोग करना महत्वपूर्ण है। + +| कुंजी | मान | विवरण | +|---------------|---------|------------------------------------------------------------------------------------| +| `data` | `None` | डेटा फ़ाइल का पथ, जैसे की coco128.yaml | +| `imgsz` | `640` | प्रारूपिक छवि का आकार एक पूर्णांक के रूप में | +| `batch` | `16` | प्रति बैच छवि की संख्या (-1 for AutoBatch) | +| `save_json` | `False` | परिणाम JSON फ़ाइल में सहेजें | +| `save_hybrid` | `False` | प्रकारों के हाइब्रिड संस्करण को सहेजें (लेबल + अतिरिक्त पूर्वानुमान) | +| `conf` | `0.001` | डिटेक्शन के लिए वस्तु का विश्वसनीयता थ्रेशहोल्ड | +| `iou` | `0.6` | संयोग/संधि (IoU) के लिए थ्रेशहोल्ड डाकघर | +| `max_det` | `300` | प्रति छवि के लिए अधिकतम निकासी संख्या | +| `half` | `True` | अर्धसरलता (FP16) का उपयोग करें | +| `device` | `None` | चलाएं के लिए युक्ति, उदाहरण के लिए cuda device=0/1/2/3 या device=cpu | +| `dnn` | `False` | ओएनएनएक्स संज्ञानात्मक के लिए ओपेंसीवी डीएनएन का उपयोग करें | +| `plots` | `False` | प्रशिक्षण के दौरान चित्रितियाँ दिखाएं | +| `rect` | `False` | न्यूनतम पैडिंग के लिए हर बैच को संकलित आयताकारक विमान करें | +| `split` | `val` | मान्यांकन के लिए उपयोग की जाने वाली डेटासेट स्प्लिट, जैसे 'val', 'test' या 'train' | +| diff --git a/docs/hi/quickstart.md b/docs/hi/quickstart.md new file mode 100644 index 0000000..1db79eb --- /dev/null +++ b/docs/hi/quickstart.md @@ -0,0 +1,327 @@ +--- +comments: true +description: Ultralytics को स्थापित करने के विभिन्न तरीकों के बारे में जानें। Ultralytics को pip, conda, git और Docker का उपयोग करके स्थापित करें। Ultralytics का उपयोग कमांड लाइन इंटरफेस या अपनी Python परियोजनाओं के भीतर करना सीखें। +keywords: Ultralytics स्थापना, pip install Ultralytics, Docker install Ultralytics, Ultralytics कमांड लाइन इंटरफेस, Ultralytics Python इंटरफेस +--- + +## Ultralytics स्थापित करें + +Ultralytics ने pip, conda और Docker सहित कई स्थापना विधियाँ प्रदान की हैं। नवीनतम स्थिर संस्करण के लिए `ultralytics` pip पैकेज का उपयोग करके YOLOv8 स्थापित करें या सबसे अद्यतित संस्करण के लिए [Ultralytics GitHub repository](https://github.com/ultralytics/ultralytics) क्लोन करें। Docker का उपयोग करके, स्थानीय स्थापना से बच कर, एक छोटे जगह में पैकेज के नए संस्करण का निष्पादन किया जा सकता है। + +!!! Note "नोट" + + 🚧 हमारे बहुभाषीय दस्तावेज़ीकरण की वर्तमान में निर्माणाधीन है और हम उसे सुधारने के लिए कठिनताओं पर काम कर रहे हैं। आपके धैर्य के लिए धन्यवाद! 🙏 + +!!! Example "स्थापित करें" + + === "Pip स्थापित करें (अनुशंसित)" + यदि आपके पास पिछले संस्करण का स्थापना है, तो पिप का उपयोग करके `ultralytics` पैकेज को स्थापित करने के लिए `pip install -U ultralytics` कमांड चलाएं। `ultralytics` पैकेज के बारे में अधिक विवरण के लिए [Python Package Index (PyPI)](https://pypi.org/project/ultralytics/) पर जाएं। + + [![PyPI version](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # PyPI से ultralytics पैकेज का स्थापना करें + pip install ultralytics + ``` + + आप इसे सीधे [GitHub repository](https://github.com/ultralytics/ultralytics) से भी स्थापित कर सकते हैं। यह अद्यतन संस्करण प्राप्त करना चाहते हैं तो यह सर्वोत्तम हो सकता है। इसके लिए अपने सिस्टम पर गिट कमांड-लाइन टूल स्थापित होना चाहिए। `@main` अपदेश की `main` शाखा को स्थापित करता है और इसे दूसरी शाखा, उदा. `@my-branch`, में संशोधित किया जा सकता है, या पूर्णतः हटा दिया जा सकता है, ताकि यह डिफ़ॉल्ट रूप से `main` शाखा को ले जाए। + + ```bash + # GitHub से ultralytics पैकेज का स्थापना करें + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + + === "Conda स्थापित करें" + स्थापना के लिए pip के बदले Conda एक वैकल्पिक पैकेज प्रबंधक है जिसे आप स्थापना के लिए उपयोग कर सकते हैं। किसी भी जानकारी के लिए [Anaconda की मुख्य साइट](https://anaconda.org/conda-forge/ultralytics) पर जाएं। कंडा पैकेज की अद्यतन और संसाधन रिपो के लिए [यहां](https://github.com/conda-forge/ultralytics-feedstock/) देखें। + + + [![Conda Recipe](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # conda का उपयोग करके ultralytics पैकेज का स्थापना करें + conda install -c conda-forge ultralytics + ``` + + !!! Note "नोट" + + यदि आप CUDA परिवेश में स्थापित कर रहे हैं तो सर्वोत्तम अनुशंसा है कि आप कमांड-लाइन पर `pytorch` और `pytorch-cuda` स्थापित करने के लिए कमांड एक साथ इंस्टॉल करें ताकि कोण्डा पैकेज प्रबंधक को कोई भी टकराव सुलझाने के लिए अनुमति मिले, या फिर जरूरत पड़ने पर CPU-विशिष्ट `pytorch` पैकेज को CPU-विशिष्ट होने वाले `pytorch-cuda` पैकेज को अधिरोहित करने की अनुमति दें। + ```bash + # conda का उपयोग करके सभी पैकेजों को एक साथ स्थापित करें + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### Conda Docker इमेज + + Ultralytics Conda Docker इमेज [DockerHub](https://hub.docker.com/r/ultralytics/ultralytics) से उपलब्ध हैं। ये इमेजेज [Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/) पर आधारित हैं और `ultralytics` का उपयोग Conda पर्यावरण में करने के लिए एक सरल तरीका है। + + ```bash + # रूपरेखा नाम को एक चर के रूप में सेट करें + t=ultralytics/ultralytics:latest-conda + + # Docker Hub से नवीनतम ultralytics इमेज को पुल करें + sudo docker pull $t + + # जीपीयू समर्थन वाले कंटेनर में ultralytics इमेज चलाएं + sudo docker run -it --ipc=host --gpus all $t # सभी जीपीयू + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # जीपीयू द्वारा निर्दिष्ट करें + ``` + + === "Git क्लोन" + यदि आप विकास में योगदान करने में रुचि रखते हैं या नवीनतम स्रोत कोड के साथ प्रयोग करने की इच्छा रखते हैं, तो `ultralytics` रिपॉजिटरी क्लोन करें। क्लोनिंग के बाद, उस निर्दिष्टित संदर्भ में नेविगेट करें और पैकेज को पहचानने के लिए pip का उपयोग करते हुए संगठनात्मक मोड `-e` के साथ पैकेज स्थापित करें। + ```bash + # ultralytics रिपॉजिटरी क्लोन करें + git clone https://github.com/ultralytics/ultralytics + + # क्लोन की गई निर्देशिका में नेविगेट करें + cd ultralytics + + # विकास के लिए संगठनात्मक मोड में पैकेज स्थापित करें + pip install -e . + ``` + + === "Docker" + + Docker का उपयोग करके `ultralytics` पैकेज का आसानी से निष्पादन करें और इसे रखरखाव में बेहद सुगम बनाएं, इस पैकेज का उपयोग करें, विभिन्न पर्यावरणों पर सतत और सुगम प्रदर्शन सुनिश्चित करने के लिए। [Docker Hub](https://hub.docker.com/r/ultralytics/ultralytics) से सत्यापित कार्यकारी वातावरण तक पहुंच के लिए Ultralytics 5 मुख्य समर्थित Docker इमेज उपलब्ध हैं, जो विभिन्न प्लेटफ़ॉर्म और उपयोग मामलों के लिए उच्च संगतता और प्रदार्थशीलता प्रदान करने के लिए डिज़ाइन किए गए हैं: + + डॉकर पुल्ल्स + + - **Dockerfile:** प्रशिक्षण के लिए अद्यतन संस्करण के लिए अनुशंसित GPU चित्र। + - **Dockerfile-arm64:** ARM64 वाणिज्यिकरण के लिए अनुकूलित, Raspberry Pi और अन्य ARM64 आधारित प्लेटफ़ॉर्म पर यातायात की अनुमति देता है। + - **Dockerfile-cpu:** GPU रहित पतला मॉडल, उबंटू आधारित योग्यता तक पुनर्निर्माण के लिए उपयुक्त है। + - **Dockerfile-jetson:** NVIDIA Jetson उपकरणों के लिए आदर्शों के आधार पर गीयू समर्थन मिलान, इन प्लेटफ़ॉर्मों के लिए अनुकूल यूपीयू समर्थन समेकित करता है। + - **Dockerfile-python:** केवल Python और आवश्यकता प्रतिस्थापित करने वाले न्यूनतम छवि, हल्के ऐप्स और विकास के लिए आदर्श छवि। + - **Dockerfile-conda:** Miniconda3 पर आधारित, Ultralytics पैकेज के कोण्डा स्थापना के साथ। + + निम्नलिखित कमांडों का उपयोग करके नवीनतम छवि लाएँ और उसे निष्पादित करें: + + ```bash + # छवि नाम को एक चर के रूप में सेट करें + t=ultralytics/ultralytics:latest + + # Docker Hub से नवीनतम ultralytics छवि पुल करें + sudo docker pull $t + + # जीपीयू समर्थन वाले कंटेनर में ultralytics छवि चलाएं + sudo docker run -it --ipc=host --gpus all $t # सभी जीपीयू + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # जीपीयू द्वारा निर्दिष्ट करें + ``` + + उपरोक्त कमांड ने एक Docker कंटेनर को एक्सेस करने के लिए उत्थान किया है। `-it` झंझटी एक प्रतीक TTY को निर्धारित करती है और stdin खुली रखती है, जिससे आप कंटेनर के साथ इंटरैक्ट कर सकते हैं। `--ipc=host` झंझटी IPC (Inter-Process Communication) नेमस्पेस को होस्ट पर सेट करता है, जो प्रक्रियाओं के बीच मेमोरी साझा करने के लिए आवश्यक होता है। `--gpus all` निर्दिष्ट जीपीयू कंटेनर के बीतर सभी उपलब्ध जीपीयू के लिए पहुंच सक्षम करता है, जो जीपीयू हस्तक्षेप आवश्यकता वाले कार्यों के लिए महत्वपूर्ण है। + + ध्यान दें: कंटेनर में स्थिति में अपनी स्थानीय मशीन पर फ़ाइलों के साथ काम करने के लिए Docker वॉल्यूम का उपयोग करें: + + ```bash + # स्थानीय निर्देशिका को कंटेनर में निर्देशिका में माउंट करें + sudo docker run -it --ipc=host --gpus all -v /path/on/host:/path/in/container $t + ``` + + `/path/on/host` को अपनी स्थानीय मशीन पर निर्देशिका पथ के साथ बदलें और `/path/in/container` को कंटेनर में योग्यता तक पथ बदलें जिससे पहुंच मिल सके। + + पूर्ण Docker उपयोग के लिए, आप [Ultralytics Docker मार्गदर्शिका](https://docs.ultralytics.com/guides/docker-quickstart/) के अन्वेषण कर सकते हैं। + +`ultralytics` के लिए सभी आवश्यकताओं की सूची के लिए `ultralytics` [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) फ़ाइल देखें। ध्यान दें कि उपरोक्त सभी उदाहरणों में सभी आवश्यकताएं स्थापित होती हैं। + +

+
+ +
+ देखें: Ultralytics YOLO Quick Start Guide +

+ +!!! Tip "सुझाव" + + ऑपरेटिंग सिस्टम और CUDA आवश्यकताओं के अनुसार PyTorch आवश्यकताएं अलग-अलग हो सकती हैं, इसलिए अनुशंसा की जाती है कि पहले PyTorch स्थापित करने के लिए इंस्ट्रक्शंस पर जाएं। [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally) पर उपयोग के बारे में अधिक जानकारी के लिए। + + + PyTorch Installation Instructions + + +## CLI के साथ Ultralytics का उपयोग करें + +Ultralytics कमांड लाइन इंटरफ़ेस (CLI) आसान एकल-पंक्ति कमांड के लिए संक्षेप में होसला अद्यतित करता है, पायथन पर्यावरण की ज़रूरत के बिना। CLI कोई अनुकूलन या पायथन कोड की आवश्यकता नहीं होती है। आप केवल `yolo` कमांड के साथ टर्मिनल से सभी कार्यों को चला सकते हैं। CLI से YOLOv8 का उपयोग करने के बारे में और अधिक जानने के लिए [CLI Guide](/../usage/cli.md) देखें। + +!!! Example "उदाहरण" + + === "संयोजन" + Ultralytics `yolo` कमांड का उपयोग निम्नलिखित प्रारूप का उपयोग करता है: + ```bash + yolo टास्क मोड ARGS + ``` + + - `टास्क` (वैकल्पिक) इनमें से एक है ([खोजो](tasks/detect.md), [खंड](tasks/segment.md), [वर्गीकरण करो](tasks/classify.md), [स्थिति](tasks/pose.md)) + - `मोड` (आवश्यक) इनमें से एक है ([प्रशिक्षण](modes/train.md), [मान्य](modes/val.md), [पूर्वानुमान](modes/predict.md), [निर्यात](modes/export.md), [ट्रैक](modes/track.md)) + - `ARGS` (वैकल्पिक) `imgsz=640` जैसे `arg=मान` जो डिफ़ॉल्ट को ओवरराइड करते हैं। + + सभी `ARGS` को पूर्ण [Configuration Guide](/../usage/cfg.md) या `yolo cfg` CLI कमांड के साथ देखें। + + === "प्रशिक्षण" + प्रारंभिक शिक्षण और language के साथ 10 एपोक्स के लिए एक डिटेक्शन मॉडल प्रशिक्षित करें, जहां + इंगिती शिक्षण दर 0.01 है + ```bash + yolo ट्रेन data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "पूर्वानुमान" + पूर्व-प्रशिक्षित सेगमेंटेशन मॉडल का उपयोग करके YouTube वीडियो की भविष्यवाणी करें + छवि आकार 320: + ```bash + yolo पूर्वानुमान model=yolov8n-seg.pt स्रोत='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "मान्य करो" + एक पूर्व-प्रशिक्षित डिटेक्शन मॉडल की मान्यता वाली प्रमाणित करें और इमेज का आकार 640 के बैच-आकार 1 के साथ देखें: + ```bash + yolo मान्य model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "निर्यात करें" + एक YOLOv8n वर्गीकरण मॉडल को ONNX प्रारूप में निर्यात करें, 224x224 के आकार पर छवि (कोई टास्क आवश्यक नहीं है) + ```bash + yolo निर्यात model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "विशेष" + संस्पेष्ट कमांडों को चलाएं ताकि संस्करण, सेटिंग देखें, चेक करें और अधिक देखें: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "चेतावनी" + + ताकि दुविधा न हो, तज़्ज़ा सेटिंग को `arg=val` जोड़े के रूप में पार करना होगा, जिन्हें `=` रेखा द्वारा विभाजित किया जाता है और जोड़ों के बीच अंतरित होता है। `--` तर्क-पूर्वक अंटीरे शब्द या `,` अंतराल द्वारा तर्कों का उपयोग न करें। + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25` ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25` ❌ (अभाव `=`) + - `yolo predict model=yolov8n.pt, imgsz=640, conf=0.25` ❌ (`,` उपयोग न करें) + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25` ❌ (`--` उपयोग न करें) + +एकेन्द्रीय योग्यताएँ [Configuration Guide](/../usage/cfg.md) या `yolo cfg` CLI कमांड के साथ देखें। + +## Python के साथ Ultralytics का उपयोग करें + +YOLOv8 का Python इंटरफ़ेस आपकी Python परियोजनाओं में अंकित मिलने के लिए एक आसान तकनीक प्रदान करता है, जिसे हमारे पास शामिल करना आसान हो जाता है। उपयोगकर्ताओं को उनके परियोजनाओं में आपातकालीन पहुंच, चलाने और मॉडल के आउटपुट की प्रसंस्करण करने की आसानी के साथ प्रश्नोत्तरी, खंड, और वर्गीकरण कार्यों के लिए सुविधाजनक मूल्य प्रदान करता है। इस तकनीक के साथ, उपयोगकर्ताओं के लिए यह अद्वितीय साधन है जो अपनी Python परियोजनाओं में इन गुणों को शामिल करने की इच्छा रखते हैं। + +उदाहरण के लिए, उपयोगकर्ता संख्या गिनती के लिए कुछ-कुछ तारणी की योजना में मॉडल को लोड करके उसे प्रशिक्षित कर सकते हैं, इसका मूल्यांकन समाप्त कर सकते हैं और यदि आवश्यक हो, उसे ONNX प्रारूप में निर्यात कर सकते हैं। अपनी Python परियोजनाओं में YOLOv8 का उपयोग करने के बारे में और अधिक जानने के लिए [Python Guide](/../usage/python.md) देखें। + +!!! Example "उदाहरण" + + ```python + from ultralytics import YOLO + + # पूरी नई YOLO मॉडल बनाएँ + model = YOLO('yolov8n.yaml') + + # प्रशिक्षित YOLO मॉडल लोड करें (प्रशिक्षण के लिए अनुशंसित है) + model = YOLO('yolov8n.pt') + + # 3 एपोक्स के लिए "coco128.yaml" डेटासेट का उपयोग करके मॉडल को प्रशिक्षित करें + results = model.train(data='coco128.yaml', epochs=3) + + # मॉडल के द्वारा मान्यता वाले सेट पर प्रदर्शन करें + results = model.val() + + # मॉडल को उपयोग करके छवि पर डिटेक्शन करें + results = model('https://ultralytics.com/images/bus.jpg') + + # मॉडल को ONNX प्रारूप में निर्यात करें + success = model.export(format='onnx') + ``` + +[Python Guide](/../usage/python.md){.md-button .md-button--primary} + +## Ultralytics सेटिंग्स + +Ultralytics लाइब्रेरी सेटिंग्स प्रबंधन प्रणाली प्रदान करती है ताकि आप अपने प्रयोगों पर फाइन-ग्रेन्ड नियंत्रण बनाए रख सकें। `ultralytics.utils` में स्थित `SettingsManager` का उपयोग करके उपयोगकर्ता अपनी सेटिंग्स तक पहुंच करके उन्हें पढ़ और बदल सकते हैं। इन्हें पायथन पर्यावरण के भीतर सीधे देखने और संशोधित करने के लिए, या CLI (कमांड लाइन इंटरफ़ेस) के माध्यम से किया जा सकता है। + +### सेटिंग्स का गणना + +अपनी सेटिंग्स के मौजूदा विन्यास की ओरदारी करने के लिए आप उन्हें सीधे देख सकते हैं: + +!!! Example "सेटिंग्स देखें" + + === "पायथन" + आप PyTorch से `ultralytics` मॉड्यूल में `सेटिंग्स` ऑब्जेक्ट को आयात करके अपनी सेटिंग्स देख सकते हैं। `settings` ऑब्जेक्ट पर प्रिंट और रिटर्न सेटिंग्स के लिए निम्नलिखित कमांडों का उपयोग करें: + ```python + from ultralytics import settings + + # सभी सेटिंग्स देखें + print(settings) + + # एक विशेष सेटिंग प्राप्त करें + value = settings['runs_dir'] + ``` + + === "CLI" + यदि आप प्राथमिकताएँ लेते हैं CLI का उपयोग करना पसंद करते हैं, तो निम्नलिखित कमांड के माध्यम से अपनी सेटिंग्स की जांच कर सकते हैं: + ```bash + yolo settings + ``` + +### सेटिंग्स संशोधित करना + +Ultralytics के सेटिंग्स को संशोधित करना आसान है। बदलावों को निम्न तरीकों से किया जा सकता है: + +!!! Example "सेटिंग्स अपडेट करें" + + === "पायथन" + पायथन पर्यावरण के भीतर, अपनी सेटिंग्स पर `अपडेट` विधि को बुलाकर अपनी सेटिंग्स को बदल सकते हैं: + ```python + from ultralytics import settings + + # एक सेटिंग अपडेट करें + settings.update({'runs_dir': '/path/to/runs'}) + + # एकाधिक सेटिंग अपडेट करें + settings.update({'runs_dir': '/path/to/runs', 'tensorboard': False}) + + # डिफ़ॉल्ट मान में सेटिंग रीसेट करें + settings.reset() + ``` + + === "CLI" + यदि आप कमांड लाइन इंटरफ़ेस पर ध्यान देते हैं, तो निम्नलिखित कमांड के माध्यम से अपनी सेटिंग्स को संशोधित कर सकते हैं: + ```bash + # एक सेटिंग अपडेट करें + yolo settings runs_dir='/path/to/runs' + + # एकाधिक सेटिंग अपडेट करें + yolo settings runs_dir='/path/to/runs' tensorboard=False + + # डिफ़ॉल्ट मान में सेटिंग्स को बराबरी में रीसेट करें + yolo settings reset + ``` + +### सेटिंग्स को समझना + +निम्नलिखित टेबल सेटिंग्स का अवलोकन प्रदान करता है, जबकि प्रति सेटिंग्स के लिए उदाहरण मान, डेटा प्रकार और संक्षेप में विवरण दिया गया है। + +| नाम | उदाहरण मान | डेटा प्रकार | विवरण | +|--------------------|-----------------------|-------------|----------------------------------------------------------------------------------------------------------------------------| +| `settings_version` | `'0.0.4'` | `str` | Ultralytics _settings_ संस्करण (Ultralytics [pip](https://pypi.org/project/ultralytics/) संस्करण से अलग होता है) | +| `datasets_dir` | `'/path/to/datasets'` | `str` | डेटासेट को संग्रहीत करने वाली निर्देशिका | | +| `weights_dir` | `'/path/to/weights'` | `str` | मॉडल वेट को संग्रहीत करने वाली निर्देशिका | +| `runs_dir` | `'/path/to/runs'` | `str` | प्रयोग दौड़ को संग्रहीत करने वाली निर्देशिका | +| `uuid` | `'a1b2c3d4'` | `str` | मौजूदा सेटिंग्स के लिए अद्वितीय पहचानकर्ता | +| `sync` | `True` | `bool` | Ultralytics और दुविधा को HUB में समकालीन रखें | +| `api_key` | `''` | `str` | Ultralytics HUB [API Key](https://hub.ultralytics.com/settings?tab=api+keys) | +| `clearml` | `True` | `bool` | ClearML लॉगिंग का उपयोग करें | +| `comet` | `True` | `bool` | यदि [Comet ML](https://bit.ly/yolov8-readme-comet) प्रयोग करें या नहीं experiment ट्रैकिंग और visualization | +| `dvc` | `True` | `bool` | शोध और संस्करण नियंत्रण के लिए [DVC for experiment tracking](https://dvc.org/doc/dvclive/ml-frameworks/yolo) का उपयोग करें | +| `hub` | `True` | `bool` | [Ultralytics HUB](https://hub.ultralytics.com) एकीकरण का उपयोग करें | +| `mlflow` | `True` | `bool` | एक्सपेरिमेंट ट्रैकिंग के लिए MLFlow का उपयोग करें | +| `neptune` | `True` | `bool` | एक्सपेरिमेंट ट्रैकिंग के लिए Neptune का उपयोग करें | +| `raytune` | `True` | `bool` | hyperparameter tuning के लिए Ray Tune का उपयोग करें | +| `tensorboard` | `True` | `bool` | विज़ुअलाइज़ेशन के लिए TensorBoard का उपयोग करें | +| `wandb` | `True` | `bool` | Weights & Biases logging का उपयोग करें | + +जब आप अपने परियोजनाओं या अनुभागों के माध्यम से चलते होने के द्वारा यात्रा करते हैं, तो इन सेटिंग्स पर सुधार करने के लिए लौटें। diff --git a/docs/hi/tasks/classify.md b/docs/hi/tasks/classify.md new file mode 100644 index 0000000..951ec42 --- /dev/null +++ b/docs/hi/tasks/classify.md @@ -0,0 +1,172 @@ +--- +comments: true +description: YOLOv8 Classify मॉडल्स के बारे में जानें इमेज क्लासिफिकेशन के लिए। प्रीट्रेन्ड माॅडेल्स की सूची और ट्रेन, वेलिडेट, प्रेडिक्ट और एक्सपोर्ट माॅडेल्स के बारे में विस्तृत जानकारी प्राप्त करें। +keywords: Ultralytics, YOLOv8, इमेज क्लासिफिकेशन, प्रीट्रेन्ड माॅडेल्स, YOLOv8n-cls, ट्रेन, वेलिडेट, प्रेडिक्ट, माॅडेल एक्सपोर्ट +--- + +# इमेज क्लासिफिकेशन + +इमेज क्लासिफिकेशन उदाहरण + +इमेज क्लासिफिकेशन तीन कार्यों में से सबसे सरल है और पूरी तस्वीर को एक पूर्वनिर्धारित कक्षा में वर्गीकृत करना शामिल होता है। + +इमेज क्लासिफायर का आउटपुट एक एकल क्लास लेबल और एक विश्वास प्रामाणिकता स्कोर होता है। इमेज क्लासिफिकेशन उपयोगी होता है जब आपको केवल इसे जानने की जरूरत होती है कि एक इमेज किस कक्षा में सम्मिलित है और आपको नहीं पता होना चाहिए कि उस कक्षा के वस्त्राणु किस स्थान पर स्थित हैं या उनकी सटीक आकृति क्या है। + +!!! Tip "टिप" + + YOLOv8 Classify मॉडेल्स में `-cls` संकेतक प्रयोग किया जाता है, जैसे `yolov8n-cls.pt` और इन्हें पूर्व प्रशिक्षित किया जाता है [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) पर। + +## [मॉडेल](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +यहां YOLOv8 पूर्व प्रशिक्षित Classify मॉडेल दिखाए गए हैं। Detect, Segment, और Pose मॉडेल्स [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) डेटासेट पर पूर्व प्रशिक्षित होते हैं, जबकि Classify मॉडेल [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) डेटासेट पर पूर्व प्रशिक्षित होते हैं। + +[मॉडेल](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) डाउनलोड पहली बार उपयोग पर ताजगी Ultralytics [प्रकाशन](https://github.com/ultralytics/assets/releases) से स्वतः होता है। + +| मॉडेल | आकार
(पिक्सेल) | तालिका
शीर्ष 1 | तालिका
शीर्ष 5 | स्पीड
सीपीयू ONNX
(मि. सेकंड) | स्पीड
A100 TensorRT
(मि. सेकंड) | पैरामीटर
(M) | FLOPs
(B) at 640 | +|----------------------------------------------------------------------------------------------|------------------------|------------------------|------------------------|------------------------------------------|--------------------------------------------|----------------------|--------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- **तालिका** मॉडेलों की ImageNet डेटासेट मान्यीकरण सेट पर सटीकता है। +
`yolo val classify data=path/to/ImageNet device=0` द्वारा पुनः उत्पन्न करें +- **स्पीड** एक [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) इंस्टेंस का उपयोग करके ImageNet के वैल छवियों पर औसत जोड़ी गई है। +
`yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` द्वारा पुनः उत्पन्न करें + +## ट्रेन + +100 एपॉक्स के लिए MNIST160 डेटासेट पर YOLOv8n-cls को 64 इमेज आकार पर रिक्तियों के साथ ट्रेन करें। उपलब्ध विकल्पों की पूरी सूची के लिए [Configuration](/../usage/cfg.md) पेज देखें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडेल लोड करें + model = YOLO('yolov8n-cls.yaml') # YAML से एक नया मॉडेल बनाएं + model = YOLO('yolov8n-cls.pt') # पूर्व प्रशिक्षित मॉडेल लोड करें (ट्रेनिंग के लिए सिफारिश की जाती है) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # YAML से बनाएँ और भार ट्रांसफर करें + + # मॉडेल ट्रेन करें + results = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # YAML से नया मॉडेल बनाएं और अच्छे से प्रशिक्षण शुरू करें + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # पूर्व प्रशिक्षित *.pt मॉडेल से प्रशिक्षण शुरू करें + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # YAML से नया मॉडेल बनाएँ, उसमें पूर्व प्रशिक्षित भार भी स्थानांतरित करें और प्रशिक्षण शुरू करें + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### डेटासेट प्रारूप + +YOLO क्लासिफिकेशन डेटासेट प्रारूप [Dataset Guide](../../../datasets/classify/index.md) में विस्तृत रूप में दिया गया है। + +## वेलिडेट + +MNIST160 डेटासेट पर प्रशिक्षित YOLOv8n-cls मॉडेल की सटीकता का मूल्यांकन करें। कोई आर्गुमेंट चक्रवात नहीं करना चाहिए क्योंकि `मॉडेल` अपने प्रशिक्षण यथार्थ डेटा और आर्गुमेंट्स को स्मरण रखता है। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडेल लोड करें + model = YOLO('yolov8n-cls.pt') # एक आधिकारिक मॉडेल लोड करें + model = YOLO('path/to/best.pt') # एक स्वचालित मॉडेल लोड करें + + # मॉडेल का मूल्यांकन करें + metrics = model.val() # कोई आर्गुमेंट आवश्यक नहीं हैं, डेटासेट और सेटिंग्स याद रखे जाते हैं + metrics.top1 # शीर्ष1 सटीकता + metrics.top5 # शीर्ष5 सटीकता + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # आधिकारिक मॉडेल का मूल्यांकन करें + yolo classify val model=path/to/best.pt # कस्टम मॉडेल का मूल्यांकन करें + ``` + +## प्रेडिक्ट + +प्रशिक्षित YOLOv8n-cls मॉडेल का उपयोग तस्वीरों पर पूर्वानुमान चलाने के लिए करें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # मॉडेल लोड करें + model = YOLO('yolov8n-cls.pt') # एक आधिकारिक मॉडेल लोड करें + model = YOLO('path/to/best.pt') # एक स्वचालित मॉडेल लोड करें + + # मॉडेल के साथ पूर्वानुमान करें + results = model('https://ultralytics.com/images/bus.jpg') # एक इमेज पर पूर्वानुमान करें + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # आधिकारिक मॉडेल के साथ पूर्वानुमान करें + yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # कस्टम मॉडेल के साथ पूर्वानुमान करें + ``` + +पूर्वानुमान पूरा होने के बाद निर्यात को सीधे पूर्वानुमानित मॉडेल पर लागू कर सकते हैं, जैसे `yolo predict model=yolov8n-cls.onnx`। एक्सपोर्ट पूर्ण होने के बाद, अपने मॉडेल के उपयोग के लिए आपको उपयोग उदाहरण दिखाए गए हैं। + +## एक्सपोर्ट + +YOLOv8n-cls मॉडल को ONNX, CoreML आदि जैसे विभिन्न प्रारूपों में निर्यात करें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडेल लोड करें + model = YOLO('yolov8n-cls.pt') # load an official model + model = YOLO('path/to/best.pt') # load a custom trained model + + # मॉडेल को निर्यात करें + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # export official model + yolo export model=path/to/best.pt format=onnx # export custom trained model + ``` + +टेबल में उपलब्ध YOLOv8-cls निर्यात प्रारूप निम्नानुसार हैं। निर्यात पूरा होने के बाद आप सीधे निर्यात किए गए मॉडेल पर पूर्व-आश्रिताओं की तरह पूर्वानुमान या मूल्यांकन कर सकते हैं, जैसे `yolo predict model=yolov8n-cls.onnx`। उपयोग की उदाहरण आपके मॉडेल के लिए निर्यात पूरा होने के बाद दिखाए गए हैं। + +| प्रारूप | `format` आर्गुमेंट | मॉडेल | मेटाडेटा | आर्गुमेंट्स | +|--------------------------------------------------------------------|--------------------|-------------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +[Export](https://docs.ultralytics.com/modes/export/) पेज में `export` के पूरी विवरण देखें। diff --git a/docs/hi/tasks/detect.md b/docs/hi/tasks/detect.md new file mode 100644 index 0000000..6a55e73 --- /dev/null +++ b/docs/hi/tasks/detect.md @@ -0,0 +1,186 @@ +--- +comments: true +description: Ultralytics द्वारा YOLOv8 के आधिकारिक दस्तावेज़ीकरण। Various प्रारूपों में मॉडल को प्रशिक्षित, मान्य करें, निरुपित और निर्यात करने का कैसे करें सीखें। विस्तृत प्रदर्शन आँकड़े समेत। +keywords: YOLOv8, Ultralytics, वस्तु पहचान, पूर्वप्रशिक्षित मॉडल, प्रशिक्षण, मान्यता, भविष्यवाणी, मॉडल निर्यात, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# वस्तु पहचान + +वस्तु पहचान उदाहरण + +वस्तु पहचान एक कार्य है जिसमें चित्र या वीडियो स्ट्रीम में वस्तुओं की स्थान और वर्ग की पहचान करने का समय शामिल होता है। + +वस्तु पहचान एक सेट होती है जिसमें वस्तुओं को घेरने वाले बाउंडिंग बॉक्स का पता लगाया जाता है, साथ ही प्रत्येक बॉक्स के लिए वर्ग लेबल और विश्वसनीयता स्कोर शामिल होते हैं। चित्र में हरी उड़ी रेस सामग्री डिटेक्ट करी, बांदर को डिटेक्ट करें. प्रतिस्थान से यह पता चलता है कि वस्तु कहाँ है या उसकी सटीक आकृति क्या है, परंतु कुछ तो हैं है। + +

+
+ +
+ देखें: पूर्व प्रशिक्षित Ultralytics YOLOv8 मॉडल के साथ वस्तु पहचान। +

+ + +!!! Tip "टिप" + +YOLOv8 Detect मॉडल डिफ़ॉल्ट YOLOv8 मॉडल हैं, यानी `yolov8n.pt` और [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) पर प्रशिक्षित हैं। + +## [मॉडल](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +YOLOv8 पूर्व प्रशिक्षित Detect मॉडल यहाँ दिखाए गए हैं। Detect, Segment और Pose मॉडल [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) डेटासेट पर पूर्वप्रशिक्षित होते हैं, जबकि Classify मॉडल [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) डेटासेट पर पूर्वप्रशिक्षित होते हैं। + +[मॉडल](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) पहली बार इस्तेमाल पर Ultralytics के नवीनतम [प्रकाशन](https://github.com/ultralytics/assets/releases) से स्वचालित रूप से डाउनलोड होते हैं। + +| मॉडल | साइज़
(pixels) | mAPval
50-95 | स्पीडCPU ONNX
(ms) | स्पीडA100 TensorRT
(ms) | पैराम्स
(M) | FLOPs
(B) | +|--------------------------------------------------------------------------------------|------------------------|----------------------|----------------------------|---------------------------------|---------------------|-------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- **mAPval** मान को [COCO val2017](http://cocodataset.org) डेटासेट पर सिंगल-मॉडेल सिंगल-स्केल के लिए है। +
`yolo` द्वारा पुनः उत्पन्न करें `के द्वारा विन्यास करें yolo val data=coco.yaml device=0` +- **Speed** [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + इंस्टेंस का उपयोग करके COCO val छवियों पर औसत लिया जाता है। +
`yolo` के द्वारा पुनः उत्पन्न करें `के द्वारा विन्यास करें yolo val data=coco128.yaml batch=1 device=0|cpu` + +## प्रशिक्षण + +100 युगों में 640 आकृति वाले प्रशिक्षित योलोवी8 एन को COCO128 डेटासेट पर प्रशिक्षित करें। उपलब्ध तार्किक तर्कों की पूरी सूची के लिए [कॉन्फ़िगरेशन](/../usage/cfg.md) पृष्ठ देखें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # मॉडल लोड करें + model = YOLO('yolov8n.yaml') # YAML से नया मॉडल बनाएँ + model = YOLO('yolov8n.pt') # प्रशिक्षण के लिए सिफारिश किए गए पूर्वप्रशिक्षित मॉडल लोड करें + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # YAML से बनाएं और भार ट्रांसफर करें और प्रशिक्षित करें + + # मॉडल को प्रशिक्षित करें + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # YAML से एक नया मॉडल बनाकर खाली से शुरू करें + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # पूर्व प्रशिक्षित *.pt मॉडल से प्रशिक्षण शुरू करें + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # यैतायत्मिक रूप से भार ट्रांसफर करके नया मॉडल बनाएँ और प्रशिक्षण शुरू करें + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### डेटासेट प्रारूप + +YOLO डिटेक्शन डेटासेट प्रारूप को [डेटासेट गाइड](../../../datasets/detect/index.md) में विस्तार से देखा जा सकता है। कृपया अपने मौजूदा डेटासेट को अन्य प्रारूपों (जैसे COCO आदि) से YOLO प्रारूप में बदलने के लिए [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) उपकरण का उपयोग करें। + +## मान्यता + +COCO128 डेटासेट पर प्रशिक्षित YOLOv8n मॉडल की सटीकता को मान्यता दें। मॉडल प्रदर्शन से जुड़ी कोई विधि नहीं होनी चाहिए। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # मॉडल लोड करें + model = YOLO('yolov8n.pt') # आधिकारिक मॉडल लोड करें + model = YOLO('path/to/best.pt') # कस्टम मॉडल लोड करें + + # मॉडल की मान्यता जांचें + metrics = model.val() # तुलना करने के लिए कोई विधि की आवश्यकता नहीं है, डेटासेट और सेटिंग्स याद रखे जाते हैं + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # हर श्रेणी के map50-95 से संबंधित सूची + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # आधिकारिक मॉडल की मान्यता + yolo detect val model=path/to/best.pt # कस्टम मॉडल की मान्यता + ``` + +## भविष्यवाणी + +प्रशिक्षित YOLOv8n मॉडल का उपयोग चित्रों पर भविष्यवाणी करने के लिए करें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # मॉडल लोड करें + model = YOLO('yolov8n.pt') # आधिकारिक मॉडल लोड करें + model = YOLO('path/to/best.pt') # कस्टम मॉडल लोड करें + + # मॉडल के साथ भविष्यवाणी करें + results = model('https://ultralytics.com/images/bus.jpg') # एक छवि पर भविष्यवाणी करें + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # आधिकारिक मॉडल के साथ भविष्यवाणी + yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # कस्टम मॉडल के साथ भविष्यवाणी + ``` + +पूर्ण `predict` मोड़ विवरण को [भविष्यवाणी](https://docs.ultralytics.com/modes/predict/) पृष्ठ में देखें। + +## निर्यात + +YOLOv8n मॉडल को अन्य प्रारूप (जैसे ONNX, CoreML आदि) में निर्यात करें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # मॉडल लोड करें + model = YOLO('yolov8n.pt') # आधिकारिक मॉडल लोड करें + model = YOLO('path/to/best.pt') # कस्टम प्रशिक्षित मॉडल लोड करें + + # मॉडल को निर्यात करें + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # आधिकारिक मॉडल को निर्यात करें + yolo export model=path/to/best.pt format=onnx # कस्टम प्रशिक्षित मॉडल को निर्यात करें + ``` + +उपलब्ध YOLOv8 निर्यात प्रारूप नीचे की सारणी में हैं। आप निर्यातित मॉडल पर सीधे भविष्यवाणी या मान्यता कर सकते हैं, जैसे 'yolo predict model=yolov8n.onnx' आदि। निर्यात पूर्ण होने के बाद आपके मॉडल के उपयोग के उदाहरण दिखाए जाते हैं। + +| प्रारूप | `format` तर्क | मॉडल | मेटाडाटा | तर्क | +|--------------------------------------------------------------------|---------------|---------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +पूर्ण `export` विवरण को [निर्यात](https://docs.ultralytics.com/modes/export/) पृष्ठ में देखें। diff --git a/docs/hi/tasks/index.md b/docs/hi/tasks/index.md new file mode 100644 index 0000000..7aaf33b --- /dev/null +++ b/docs/hi/tasks/index.md @@ -0,0 +1,55 @@ +--- +comments: true +description: जानें YOLOv8 जो कि विभिन्न कंप्यूटर विजन कार्यों जैसे डिटेक्शन, सेग्मेंटेशन, क्लासिफिकेशन और पोज़ एस्टिमेशन को कर सकता है| अपनें AI प्रोजेक्ट्स म इन टास्क का उपयोग के बारें म मर्यादित हो जाएं +keywords: Ultralytics, YOLOv8, डिटेक्शन, सेग्मेंटेशन, क्लासिफिकेशन, पोज़ एस्टिमेशन, AI Framework, कंप्यूटर विजन कार्य +--- + +# Ultralytics YOLOv8 तास्क + +
+Ultralytics YOLO Supported टास्क्स + +YOLOv8 एक AI फ्रेमवर्क है जो मल्टीपल कंप्यूटर विजन **तास्क्स** को सपोर्ट करता है। इस फ्रेमवर्क का उपयोग [डिटेक्शन](detect.md), [सेग्मेंटेशन](segment.md), [क्लासिफिकेशन](classify.md), और [पोज़](pose.md) एस्टिमेशन को करने के लिए किया जा सकता हैं। हर टास्क का एक अलग उद्देश्य और यूज केस होता हैं। + +!!! Note "नोट" + + 🚧 हमारा मल्टी-भाषा डॉक्युमेंटेशन वर्तमान में निर्माणाधीन हैं, और हम उसे सुधारने के लिए मेहनत कर रहें हैं। आपकी सहानुभूति के लिए धन्यवाद! 🙏 + +

+
+ +
+ देखें: जांचें Ultralytics YOLO टास्क्स: वस्तु डिटेक्शन, सेग्मेंटेशन, ट्रैकिंग और पोज़ एस्टिमेशन। +

+ +## [डिटेक्शन](detect.md) + +डिटेक्शन YOLOv8 द्वारा सपोर्ट किया जाने वाला प्राथमिक टास्क हैं। इसका मतलब होता हैं कि एक छवि या वीडियो फ्रेम में वस्तुओं को खोजें और उनके चारों ओर ग्रेडीयेशन बॉक्स बनाएँ। पायी गयी वस्तुओं को उनके फीचर्स के आधार पर विभिन्न श्रेणियों में वर्गीकृत किया जाता हैं। YOLOv8 एक ही छवि या वीडियो फ्रेम में कई वस्तुएं पहचान सकती हैं और उसे उच्च सटीकता और गति से कर सकती हैं। + +[डिटेक्शन उदाहरण](detect.md){ .md-button } + +## [सेग्मेंटेशन](segment.md) + +सेग्मेंटेशन एक टास्क हैं जिसमे एक छवि को उसकी सामग्री के आधार पर विभिन्न क्षेत्रों में विभाजित किया जाता हैं। प्रत्येक क्षेत्र को उसकी सामग्री के आधार पर एक लेबल दिया जाता हैं। यह टास्क छवि सेग्मेंटेशन और मेडिकल इमेजिंग जैसे एप्लिकेशन्स में उपयोगी होती हैं। YOLOv8 सेग्मेंटेशन करने के लिए U-Net आर्किटेक्चर का इस्तेमाल करता हैं। + +[सेग्मेंटेशन उदाहरण](segment.md){ .md-button } + +## [क्लासिफिकेशन](classify.md) + +क्लासिफिकेशन एक टास्क हैं जिसमे एक छवि को विभिन्न श्रेणियों में वर्गीकृत किया जाता हैं। YOLOv8 का उपयोग छवियों को उनकी सामग्री के आधार पर क्लासिफाई करने के लिए किया जा सकता हैं। यह क्लासिफिकेशन करने के लिए EfficientNet आर्किटेक्चर का उपयोग करता हैं। + +[क्लासिफिकेशन उदाहरण](classify.md){ .md-button } + +## [पोज़](pose.md) + +पोज़/कीपॉइंट डिटेक्शन एक टास्क हैं जिसमे एक छवि या वीडियो फ्रेम में विशेष बिंदुओं को खोजें। इन बिंदुओं को कीपॉइंट कहा जाता हैं और इनका उपयोग गति या पोज़ एस्टिमेशन करने के लिए किया जाता हैं। YOLOv8 एक छवि या वीडियो फ्रेम में उच्च सटीकता और गति से कीपॉइंट डिटेक्ट कर सकता हैं। + +[पोज़ उदाहरण](pose.md){ .md-button } + +## निष्कर्ष + +YOLOv8 डिटेक्शन, सेग्मेंटेशन, क्लासिफिकेशन और कीपॉइंट डिटेक्शन जैसे मल्टीपल टास्क्स को सपोर्ट करता हैं। हर एक टास्क का अलग उद्देश्य और यूज केस होता हैं। इन टास्क्स के बीच अंतर को समझकर, आप अपने कंप्यूटर विजन एप्लिकेशन के लिए उचित टास्क का चुनाव कर सकते हैं। diff --git a/docs/hi/tasks/pose.md b/docs/hi/tasks/pose.md new file mode 100644 index 0000000..1a7a7a9 --- /dev/null +++ b/docs/hi/tasks/pose.md @@ -0,0 +1,183 @@ +--- +comments: true +description: Ultralytics YOLOv8 का उपयोग पोज निर्धारण कार्यों के लिए कैसे किया जाता है इसकी जानें। प्री-शिक्षित मॉडल ढूंढें, प्रशिक्षण, मान्यता प्राप्त करें, पूर्वानुमान लगाएं, और अपना खुद का निर्यात करें। +keywords: Ultralytics, YOLO, YOLOv8, pose estimation, keypoints detection, object detection, pre-trained models, machine learning, artificial intelligence +--- + +# पोज निर्धारण + +पोज निर्धारण उदाहरण + +पोज निर्धारण एक कार्य है जिसमें एक छवि में विशेष बिंदुओं के स्थान की पहचान करना शामिल होता है, जिसे आमतौर पर कीपॉइंट्स के रूप में कहा जाता है। कीपॉइंट्स विभिन्न अंगों, भूमिकाओं या अन्य विशिष्ट सुविधाओं आदि के रूप में वस्तु के विभिन्न हिस्सों को प्रतिष्ठित कर सकते हैं। कीपॉइंट्स के स्थान आमतौर पर 2D `[x, y]` या 3D `[x, y, दिखाई देने वाला]` कोआर्डिनेट के सेट के रूप में प्रदर्शित होते हैं। + +पोज निर्धारण मॉडल की उत्पादन एक छवि में वस्तु के कीपॉइंट्स को प्रतिष्ठित करने वाले कुछ बिंदुओं का सेट होती है, आमतौर पर हर बिंदु के लिए विश्वसनीयता स्कोर के साथ। पोज निर्धारण उचित विकल्प है जब आपको स्टीन में एक वस्तु के विशेष हिस्सों की पहचान करनी होती है और विभिन्न हिस्सों के लिए उनके स्थान की पहचान करनी होती है। + +

+
+ +
+ देखें: Ultralytics YOLOv8 के साथ पोज निर्धारण। +

+ +!!! Tip "युक्ति" + + YOLOv8 _pose_ मॉडल में `-pose` सफिक्स का उपयोग किया जाता है, जैसे `yolov8n-pose.pt`। ये मॉडल [COCO कीपॉइंट](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) डेटासेट पर प्रशिक्षित होते हैं और विभिन्न पोज निर्धारण कार्यों के लिए उपयुक्त होते हैं। + +## [मॉडल्स](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +YOLOv8 पूर्वानुमानित पोज मॉडलस यहाँ दिखाए जाते हैं। पहचानें, अंश और पोज मॉडल मुख्यतः [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) डेटासेट पर प्रशिक्षित हैं, जबकि क्लासिफाई मॉडल्स को [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) डेटासेट पर प्रशिक्षित किया जाता है। + +पूर्वानुमानित मॉडल `Models` को Ultralytics के नवीनतम [रिलीज़](https://github.com/ultralytics/assets/releases) से स्वचालित रूप से डाउनलोड करेंगे। + +| मॉडल | आकार
(तत्व) | mAPपोज
50-95 | mAPपोज
50 | ह्वेग
CPU ONNX
(ms) | ह्वेग
A100 TensorRT
(ms) | पैराम्स
(M) | FLOPs
(B) | +|------------------------------------------------------------------------------------------------------|---------------------|----------------------|-------------------|--------------------------------|-------------------------------------|---------------------|-------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +- **mAPval** मान एकल मॉडल एकल स्केल पर [COCO कीपॉइंट val2017](http://cocodataset.org) डेटासेट पर है। +
`yolo val pose data=coco-pose.yaml device=0` के द्वारा पुनरोत्पादित करें +- **Speed** [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) इन्स्टेंस का उपयोग करते हुए COCO val छवियों पर औसतित गणना। +
`yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` के द्वारा पुनरार्चन करें + +## ट्रेन + +COCO128-pose डेटासेट पर YOLOv8-pose मॉडल को प्रशिक्षित करें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('yolov8n-pose.yaml') # YAML से एक नया मॉडल बनाएँ + model = YOLO('yolov8n-pose.pt') # पूर्वानुमानित मॉडल लोड करें (प्रशिक्षण के लिए सिफारिश किया जाता है) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # YAML से बनाएँ और वजन स्थानांतरित करें + + # मॉडल को प्रशिक्षित करें + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # YAML से नया मॉडल बनाएँ और पूर्वानुमानित वजन स्थानांतरित करना शुरू करें + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # पूर्वानुमानित *.pt मॉडल से प्रशिक्षण शुरू करें + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # YAML से नया मॉडल बनाएँ, पूर्वानुमानित वजनों को स्थानांतरित करें और प्रशिक्षण शुरू करें + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### डेटासेट प्रारूप + +YOLO पोज डेटासेट प्रारूप को विस्तार से [डेटासेट गाइड](../../../datasets/pose/index.md) में दिया गया है। अपनी मौजूदा डेटासेट को अन्य प्रारूपों (जैसे कि COCO आदि) से YOLO प्रारूप में रूपांतरित करने के लिए कृपया [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) उपकरण का उपयोग करें। + +## मान्यता प्राप्त करें + +COCO128-pose डेटासेट पर प्रशिक्षित YOLOv8n-pose मॉडल की सटीकता को मान्यता प्राप्त करें। `model` के रूप में कोई आर्ग्युमेंट पारित करने की आवश्यकता नहीं है प्रशिक्षण `data` और सेटिंग्स को मॉडल खिताबों के रूप में रखता है। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('yolov8n-pose.pt') # रिपोर्टेड मॉडल लोड करें + model = YOLO('path/to/best.pt') # एक कस्टम मॉडल लोड करें + + # मॉडल की सटीकता मान्यता प्राप्त करें + metrics = model.val() # कोई आर्ग्युमेंट आवश्यक नहीं है, डेटासेट और सेटिंग्स याद रखा जाता है + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # प्रत्येक श्रेणी के map50-95 सूची में है + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # आधिकारिक मॉडल मान्यांकन करें + yolo pose val model=path/to/best.pt # कस्टम मॉडल को मान्यता प्राप्त करें + ``` + +## पूर्वानुमान लगाएं + +प्रशिक्षित YOLOv8n-pose मॉडल के साथ छवियों पर पूर्वानुमान चलाएं। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('yolov8n-pose.pt') # रिपोर्टेड मॉडल लोड करें + model = YOLO('path/to/best.pt') # एक कस्टम मॉडल लोड करें + + # मॉडल के साथ पूर्वानुमान करें + results = model('https://ultralytics.com/images/bus.jpg') # एक छवि पर पूर्वानुमान करें + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # आधिकारिक मॉडल के साथ पूर्वानुमान लगाएं + yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # कस्टम मॉडल के साथ पूर्वानुमान लगाएं + ``` + +एक्सपोर्ट + +YOLOv8n पोज मॉडल को ONNX, CoreML जैसे अन्य प्रारूप में निर्यात करें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # एक मॉडल लोड करें + model = YOLO('yolov8n-pose.pt') # रिपोर्टेड मॉडल लोड करें + model = YOLO('path/to/best.pt') # एक कस्टम प्रशिक्षित मॉडल लोड करें + + # मॉडल को निर्यात करें + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # आधिकारिक मॉडल को निर्यात करें + yolo export model=path/to/best.pt format=onnx # कस्टम प्रशिक्षित मॉडल को निर्यात करें + ``` + +निर्यात के लिए उपलब्ध YOLOv8-pose निर्यात प्रारूप नीचे करें दिए गए हैं। आप निर्यात किए गए मॉडल पर सीधा पूर्वानुमान या मान्यता कर सकते हैं, उदाहरण के लिए `yolo predict model=yolov8n-pose.onnx`। निर्यात पूरा होने के बाद अपने मॉडल के उपयोग के उदाहरण दिखाए गए हैं। + +| प्रारूप | `format` आर्ग्युमेंट | मॉडल | मेटाडेटा | आर्ग्युमेंट। | +|--------------------------------------------------------------------|----------------------|--------------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half` | + +निर्यात विवरण के लिए [निर्यात](https://docs.ultralytics.com/modes/export/) पृष्ठ देखें। diff --git a/docs/hi/tasks/segment.md b/docs/hi/tasks/segment.md new file mode 100644 index 0000000..cd111b2 --- /dev/null +++ b/docs/hi/tasks/segment.md @@ -0,0 +1,187 @@ +--- +comments: true +description: Ultralytics YOLO के साथ उदाहरण देखें कि कैसे instance segmentation मॉडल का उपयोग करें। प्रशिक्षण, मान्यता, छवि की भविष्यवाणी और मॉडल निर्यात पर निर्देश। +keywords: yolov8, instance segmentation, Ultralytics, COCO dataset, image segmentation, object detection, model training, model validation, image prediction, model export +--- + +# Instance Segmentation + +इंस्टेंस सेगमेंटेशन ऑब्जेक्ट डिटेक्शन से एक कदम आगे जाता है और छवि में व्यक्ति ऑब्जेक्ट की पहचान करता है और उन्हें छवि के बाकी हिस्से से विभाजित करता है। + +इंस्टेंस सेगमेंटेशन मॉडल का आउटपुट एक सेट मास्क या कंटोर होता है जो छवि में प्रत्येक ऑब्जेक्ट का संकेत देता है, साथ ही प्रत्येक ऑब्जेक्ट के लिए वर्ग लेबल और आत्मविश्वास स्कोर होता है। इंस्टेंस सेगमेंटेशन उपयोगी होता है जब आपको न केवल पता चलेगा कि छवि में ऑब्जेक्ट कहाँ हैं, बल्कि वास्तव में उनका वास्तविक आकार क्या है। + +

+
+ +
+ देखें: पायथन में पूर्व-प्रशिक्षित Ultralytics YOLOv8 मॉडल के साथ Segmentation चलाएं। +

+ +!!! Tip "टिप" + + YOLOv8 Segment मॉडल `yolov8n-seg.pt` का उपयोग करते हैं, और इसे [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) पर पूरी प्रशिक्षित किया जाता है। + +## [मॉडल](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +YOLOv8 पूर्व प्रशिक्षित Segment मॉडल यहां दिखाए गए हैं। Detect, Segment और Pose मॉडल [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) डेटासेट पर पूर्व प्रशिक्षित हैं, जबकि Classify मॉडल [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) डेटासेट पर पूर्व प्रशिक्षित हैं। + +[मॉडल](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) को उपयोग करके Ultralytics [रिलीज़](https://github.com/ultralytics/assets/releases) से पूर्ण डाउनलोड होते हैंं। + +| मॉडल | आकार
(पिक्सेल) | mAPबॉक्स
50-95 | mAPमास्क
50-95 | स्पीड
CPU ONNX
(मि.सेकंड) | स्पीड
A100 TensorRT
(मि.सेकंड) | पैराम्स
(M) | FLOPs
(B) | +|----------------------------------------------------------------------------------------------|------------------------|------------------------|------------------------|--------------------------------------|-------------------------------------------|---------------------|-------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- **mAPval** मान एकल मॉडल एकल स्केल के लिए [COCO val2017](http://cocodataset.org) डेटासेट पर होते हैं। +
`yolo val segment data=coco.yaml device=0` के द्वारा पुनर्जीवित किए जाएं। +- **स्पीड** एक [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) इंस्टेंस का उपयोग करते हुए COCO val छवियों के बीच औसतन। +
`yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu` के द्वारा पुनर्जीवित किए जा सकते हैं। + +## प्रशिक्षण + +100 एपॉक्स पर 640 छवि के आकार के COCO128-seg डेटासेट पर YOLOv8n-seg को प्रशिक्षित करें। उपलब्ध तार्किक तर्क की पूरी सूची के लिए [Configuration](/../usage/cfg.md) पृष्ठ देखें। + +!!! Example "उदाहरण" + + === "पायथन" + + ```python + from ultralytics import YOLO + + # मॉडल लोड करें + model = YOLO('yolov8n-seg.yaml') # YAML से नया मॉडल बनाएं + model = YOLO('yolov8n-seg.pt') # पूर्व-प्रशिक्षित मॉडल लोड करें (प्रशिक्षण के लिए सिफारिश की जाती है) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # YAML से नए मॉडल बनाएं और धारित करें + + # मॉडल प्रशिक्षित करें + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # YAML से नया मॉडल बनाएं और शून्य से प्रशिक्षण शुरू करें + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # पूर्व-प्रशिक्षित *.pt मॉडल से प्रशिक्षण शुरू करें + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # YAML से नया मॉडल बनाएं, पूर्व-प्रशिक्षित वजनों को इसे ट्रांसफर करें और प्रशिक्षण शुरू करें + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### डेटासेट प्रारूप + +YOLO सेगमेंटेशन डेटासेट प्रारूप [डेटासेट गाइड](../../../datasets/segment/index.md) में विस्तार से देखा जा सकता है। कृपया अपने मौजूदा डेटासेट को अन्य प्रारूपों (जैसे कि COCO आदि) से YOLO प्रारूप में परिवर्तित करने के लिए [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) उपकरण का उपयोग करें। + +## मान्यता + +COCO128-seg डेटासेट पर प्रशिक्षित YOLOv8n-seg मॉडल की सत्यापन करें। `मॉडल` पास करने के लिए कोई तर्क आवश्यक नहीं होता है क्योंकि `मॉडल` +प्रशिक्षण के `डेटा` और तर्कों का ध्यान रखता है। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # मॉडल लोड करें + model = YOLO('yolov8n-seg.pt') # आधिकारिक मॉडल लोड करें + model = YOLO('path/to/best.pt') # कस्टम मॉडल लोड करें + + # मॉडल की सत्यापना करें + metrics = model.val() # कोई तर्क आवश्यक नहीं है, डेटा और सेटिंग्स याद रखे जाते हैं + metrics.box.map # map50-95(B) + metrics.box.map50 # map50(B) + metrics.box.map75 # map75(B) + metrics.box.maps # एक सूची है जिसमें प्रत्येक श्रेणी का map50-95(B) होता है + metrics.seg.map # map50-95(M) + metrics.seg.map50 # map50(M) + metrics.seg.map75 # map75(M) + metrics.seg.maps # एक सूची है जिसमें प्रत्येक श्रेणी का map50-95(M) होता है + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # आधिकारिक मॉडल की मान्यता + yolo segment val model=path/to/best.pt # कस्टम मॉडल की मान्यता + ``` + +## भविष्यवाणी + +प्रशिक्षित YOLOv8n-seg मॉडल का उपयोग छवियों पर भविष्यवाणी करने के लिए करें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # मॉडल लोड करें + model = YOLO('yolov8n-seg.pt') # आधिकारिक मॉडल लोड करें + model = YOLO('path/to/best.pt') # कस्टम मॉडल लोड करें + + # मॉडल के साथ भविष्यवाणी करें + results = model('https://ultralytics.com/images/bus.jpg') # एक छवि पर भविष्यवाणी करें + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # आधिकारिक मॉडल के साथ भविष्यवाणी करें + yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # कस्टम मॉडल के साथ भविष्यवाणी करें + ``` + +भविष्यवाणी मोड के पूर्ण विवरण को [Predict](https://docs.ultralytics.com/modes/predict/) पृष्ठ में देखें। + +## निर्यात + +YOLOv8n-seg मॉडल को ONNX, CoreML आदि जैसे अन्य प्रारूप में निर्यात करें। + +!!! Example "उदाहरण" + + === "Python" + + ```python + from ultralytics import YOLO + + # मॉडल लोड करें + model = YOLO('yolov8n-seg.pt') # आधिकारिक मॉडल लोड करें + model = YOLO('path/to/best.pt') # कस्टम प्रशिक्षित मॉडल लोड करें + + # मॉडल निर्यात करें + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # आधिकारिक मॉडल को निर्यात करें + yolo export model=path/to/best.pt format=onnx # कस्टम प्रशिक्षित मॉडल को निर्यात करें + ``` + +YOLOv8-seg निर्यात प्रारूप निम्नलिखित तालिका में बताए गए हैं। आप निर्यात किए गए मॉडल पर सीधे भविष्यवाणी या मान्यता कर सकते हैं, अर्थात `yolo predict model=yolov8n-seg.onnx`। निर्यात होने के बाद अपने मॉडल के लिए उपयोग के उदाहरण देखें। + +| प्रारूप | `format` Argument | मॉडल | मेटाडेटा | तर्क | +|--------------------------------------------------------------------|-------------------|-------------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half` | + +[Export](https://docs.ultralytics.com/modes/export/) पृष्ठ में पूर्ण `निर्यात` विवरण देखें। diff --git a/docs/ja/index.md b/docs/ja/index.md new file mode 100644 index 0000000..97f5ec6 --- /dev/null +++ b/docs/ja/index.md @@ -0,0 +1,82 @@ +--- +comments: true +description: Ultralytics YOLOv8に関する完全ガイド。高速で高精度なオブジェクト検出・画像セグメンテーションモデル。インストール、予測、トレーニングチュートリアルなど。 +keywords: Ultralytics, YOLOv8, オブジェクト検出, 画像セグメンテーション, 機械学習, ディープラーニング, コンピュータビジョン, YOLOv8 インストール, YOLOv8 予測, YOLOv8 トレーニング, YOLO 歴史, YOLO ライセンス +--- + +
+

+ + Ultralytics YOLOバナー +

+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ Ultralytics CI + Ultralytics コードカバレッジ + YOLOv8 引用情報 + Docker プル +
+ Gradient上で実行 + Colabで開く + Kaggleで開く +
+ +全く新しい[Ultralytics](https://ultralytics.com)の[YOLOv8](https://github.com/ultralytics/ultralytics)を紹介します。これは、実時間で動作するオブジェクト検出および画像セグメンテーションモデルの最新バージョンです。YOLOv8は、ディープラーニングとコンピュータビジョンの最先端の進歩に基づいており、速度と精度の面で比類のない性能を提供します。その合理化された設計により、エッジデバイスからクラウドAPIまで、さまざまなアプリケーションやハードウェアプラットフォームへの適応が容易です。 + +YOLOv8ドキュメントを探索し、その特徴と能力を理解し、活用するための包括的なリソースを提供します。機械学習の経験者であれ、分野の新入りであれ、このハブはあなたのプロジェクトでYOLOv8のポテンシャルを最大限に引き出すことを目指しています。 + +!!! Note "ノート" + + 🚧 多言語ドキュメントは現在作成中であり、改善に努めております。お待ちいただき、ありがとうございます! 🙏 + +## はじめに + +- pipで`ultralytics`を**インストール**し、数分で稼働   [:material-clock-fast: はじめに](quickstart.md){ .md-button } +- YOLOv8で新しい画像やビデオに**予測**   [:octicons-image-16: 画像で予測](modes/predict.md){ .md-button } +- 独自のカスタムデータセットで新しいYOLOv8モデルを**トレーニング**   [:fontawesome-solid-brain: モデルをトレーニング](modes/train.md){ .md-button } +- セグメント、クラス分け、ポーズ、トラッキングなどのYOLOv8タスクを**探求**   [:material-magnify-expand: タスクを探求](tasks/index.md){ .md-button } + +

+
+ +
+ 視聴: Google ColabでカスタムデータセットにYOLOv8モデルをトレーニングする方法。 +

+ +## YOLO: 簡単な歴史 + +[YOLO](https://arxiv.org/abs/1506.02640)(You Only Look Once、一度だけ見る)は、ワシントン大学のJoseph RedmonとAli Farhadiによって開発された、流行のオブジェクト検出および画像セグメンテーションモデルです。2015年に発売されたYOLOは、その高速かつ正確さからすぐに人気を博しました。 + +- [YOLOv2](https://arxiv.org/abs/1612.08242)は、2016年にリリースされ、バッチ正規化、アンカーボックス、次元クラスタリングを導入し、オリジナルモデルを改善しました。 +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf)は、2018年により効率的なバックボーンネットワーク、複数のアンカー、空間ピラミッドプーリングを使用して、モデルの性能を一段と向上させました。 +- [YOLOv4](https://arxiv.org/abs/2004.10934)は2020年にリリースされ、モザイクデータオーギュメンテーション、新しいアンカーフリー検出ヘッド、新しい損失関数などの革新を導入しました。 +- [YOLOv5](https://github.com/ultralytics/yolov5)は、モデルの性能をさらに向上させ、ハイパーパラメータ最適化、統合実験トラッキング、一般的なエクスポート形式への自動エクスポートなどの新機能を追加しました。 +- [YOLOv6](https://github.com/meituan/YOLOv6)は、2022年に[Meituan](https://about.meituan.com/)によってオープンソース化され、同社の多くの自動配送ロボットで使用されています。 +- [YOLOv7](https://github.com/WongKinYiu/yolov7)は、COCOキーポイントデータセット上のポーズ推定などの追加タスクを追加しました。 +- [YOLOv8](https://github.com/ultralytics/ultralytics)は、UltralyticsによるYOLOの最新版です。最先端の最新モデルとして、YOLOv8は前バージョンの成功に基づき、性能、柔軟性、効率を向上させる新機能や改善を導入しています。YOLOv8は、[検出](tasks/detect.md)、[セグメンテーション](tasks/segment.md)、[ポーズ推定](tasks/pose.md)、[トラッキング](modes/track.md)、[分類](tasks/classify.md)など、視覚AIタスクの全範囲をサポートしています。この多才性により、ユーザーは多様なアプリケーションとドメインでYOLOv8の機能を活用することができます。 + +## YOLO ライセンス: UltralyticsのYOLOはどのようにライセンスされていますか? + +Ultralyticsは、さまざまなユースケースに対応するために2種類のライセンスオプションを提供しています: + +- **AGPL-3.0 ライセンス**: この[OSI認定](https://opensource.org/licenses/)のオープンソースライセンスは、学生や愛好家に理想的であり、オープンなコラボレーションと知識共有を奨励しています。詳細は[ライセンス](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)ファイルをご覧ください。 +- **エンタープライズ ライセンス**: 商業用途に設計されたこのライセンスは、UltralyticsのソフトウェアおよびAIモデルを商業商品やサービスにシームレスに統合することを許可し、AGPL-3.0のオープンソース要件をバイパスできます。商業的なオファリングへの組み込みを含むシナリオであれば、[Ultralytics ライセンス](https://ultralytics.com/license)を通じてお問い合わせください。 + +私たちのライセンス戦略は、オープンソースプロジェクトに対するあらゆる改善がコミュニティに還元されることを確実にするために設計されています。私たちはオープンソースの原則を大切にしており、私たちの貢献が全ての人にとって有益な方法で利用可能であり、さらに拡張されることを保証することを使命としています。❤️ diff --git a/docs/ja/models/fast-sam.md b/docs/ja/models/fast-sam.md new file mode 100644 index 0000000..0f148b2 --- /dev/null +++ b/docs/ja/models/fast-sam.md @@ -0,0 +1,193 @@ +--- +comments: true +description: FastSAMは、画像内のオブジェクトをリアルタイムでセグメンテーションするためのCNNベースのソリューションです。利用者の対話、計算効率の向上、様々なビジョンタスクに対応可能です。 +keywords: FastSAM, 機械学習, CNNベースのソリューション, オブジェクトセグメンテーション, リアルタイムソリューション, Ultralytics, ビジョンタスク, 画像処理, 工業用途, ユーザー対話 +--- + +# Fast Segment Anything Model (FastSAM) + +Fast Segment Anything Model(FastSAM)は、セグメントエニシングタスクのための新しいリアルタイムのCNNベースのソリューションです。このタスクは、さまざまなユーザー対話のプロンプトに基づいて画像内の任意のオブジェクトをセグメント化することを目的としています。FastSAMは、優れた性能を維持しながら、計算要件を大幅に削減し、様々なビジョンタスクに実用的な選択肢となります。 + +![Fast Segment Anything Model (FastSAM) architecture overview](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## 概要 + +FastSAMは、[Segment Anything Model (SAM)](sam.md)の制約事項に対処するために設計されました。SAMは、大規模な計算リソースを要する重いTransformerモデルです。FastSAMは、セグメントエニシングタスクを2つの連続するステージに分割し、すべてのインスタンスセグメンテーションとプロンプトガイドの選択を行います。最初のステージでは、[YOLOv8-seg](../tasks/segment.md)を使用して、画像内のすべてのインスタンスのセグメンテーションマスクを生成します。2番目のステージでは、プロンプトに対応する領域を出力します。 + +## 主な特徴 + +1. **リアルタイムソリューション:** CNNの計算効率を活用することで、FastSAMはセグメントエニシングタスクのためのリアルタイムソリューションを提供し、迅速な結果を必要とする工業用途に価値をもたらします。 + +2. **効率と性能:** FastSAMは、計算およびリソースの要求を大幅に削減しながら、パフォーマンスの品質を損なうことなく、SAMと同等のパフォーマンスを達成します。これにより、リアルタイムアプリケーションが可能となります。 + +3. **プロンプトガイドのセグメンテーション:** FastSAMは、さまざまなユーザー対話のプロンプトに基づいて画像内の任意のオブジェクトをセグメント化することができます。これにより、様々なシナリオでの柔軟性と適応性が提供されます。 + +4. **YOLOv8-segに基づく:** FastSAMは、インスタンスセグメンテーションブランチを備えたオブジェクト検出器である[YOLOv8-seg](../tasks/segment.md)に基づいています。これにより、画像内のすべてのインスタンスのセグメンテーションマスクを効果的に生成することができます。 + +5. **ベンチマークでの競合力のある結果:** MS COCOのオブジェクトプロポーザルタスクにおいて、FastSAMは単一のNVIDIA RTX 3090上でのSAMよりもはるかに高速に高得点を獲得し、その効率性と能力を示しています。 + +6. **実用的な応用:** 提案されたアプローチは、現在の方法よりも数十倍または数百倍も高速な速度で、非常に高速なvisionタスクの新しい実用的なソリューションを提供します。 + +7. **モデルの圧縮の可能性:** FastSAMは、構造への人工的な事前条件を導入することにより、計算負荷を大幅に削減する可能な経路を示し、一般的なビジョンタスクの大規模モデルアーキテクチャの新たな可能性を開くことを示しています。 + +## 利用可能なモデル、サポートされるタスク、および動作モード + +この表は、利用可能なモデルとそれぞれの特定の事前学習済みウェイト、サポートされるタスク、およびInference、Validation、Training、Exportなどの異なる操作モードとの互換性を示しています。サポートされているモードは✅、サポートされていないモードは❌の絵文字で示されます。 + +| モデルの種類 | 事前学習済みウェイト | サポートされるタスク | Inference | Validation | Training | Export | +|-----------|----------------|----------------------------------------|-----------|------------|----------|--------| +| FastSAM-s | `FastSAM-s.pt` | [インスタンスセグメンテーション](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| FastSAM-x | `FastSAM-x.pt` | [インスタンスセグメンテーション](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## 使用例 + +FastSAMモデルは、Pythonアプリケーションに簡単に統合できます。Ultralyticsは、開発を効率化するためのユーザーフレンドリーなPython APIおよびCLIコマンドを提供しています。 + +### 予測の使用方法 + +画像のオブジェクト検出を実行するには、以下のように`predict`メソッドを使用します: + +!!! Example "例" + + === "Python" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # 推論元のソースを定義する + source = 'path/to/bus.jpg' + + # FastSAMモデルを作成する + model = FastSAM('FastSAM-s.pt') # または FastSAM-x.pt + + # 画像への推論を実行する + everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # Prompt Processオブジェクトを準備する + prompt_process = FastSAMPrompt(source, everything_results, device='cpu') + + # Everything prompt + ann = prompt_process.everything_prompt() + + # バウンディングボックスのデフォルトの形状は [0,0,0,0] -> [x1,y1,x2,y2] + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # テキストプロンプト + ann = prompt_process.text_prompt(text='a photo of a dog') + + # ポイントプロンプト + # pointsのデフォルトは [[0,0]] [[x1,y1],[x2,y2]] + # point_labelのデフォルトは [0] [1,0] 0:background, 1:foreground + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` + + === "CLI" + ```bash + # FastSAMモデルをロードし、それによってeverythingをセグメント化する + yolo segment predict model=FastSAM-s.pt source=path/to/bus.jpg imgsz=640 + ``` + +このスニペットは、事前学習済みモデルをロードし、イメージに対する予測を実行するシンプルさを示しています。 + +### 検証の使用方法 + +データセット上でモデルの検証を行うには、以下のようにします: + +!!! Example "例" + + === "Python" + ```python + from ultralytics import FastSAM + + # FastSAMモデルを作成する + model = FastSAM('FastSAM-s.pt') # または FastSAM-x.pt + + # モデルを検証する + results = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # FastSAMモデルをロードし、COCO8の例のデータセットで検証する(イメージサイズ:640) + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +FastSAMは、オブジェクトの検出とセグメンテーションを1つのクラスのオブジェクトに対してのみサポートしています。これは、すべてのオブジェクトを同じクラスとして認識し、セグメント化することを意味します。そのため、データセットを準備する際には、すべてのオブジェクトのカテゴリIDを0に変換する必要があります。 + +## FastSAM公式の使用方法 + +FastSAMは、[https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)リポジトリから直接利用することもできます。以下は、FastSAMを使用するための一般的な手順の概要です。 + +### インストール + +1. FastSAMリポジトリをクローンする: + ```shell + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. Python 3.9を使用したConda環境を作成してアクティベートする: + ```shell + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. クローンされたリポジトリに移動し、必要なパッケージをインストールする: + ```shell + cd FastSAM + pip install -r requirements.txt + ``` + +4. CLIPモデルをインストールする: + ```shell + pip install git+https://github.com/openai/CLIP.git + ``` + +### 使用例 + +1. [モデルのチェックポイント](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing)をダウンロードします。 + +2. FastSAMを推論に使用します。以下は実行例です: + + - 画像内のすべてをセグメント化する: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - テキストプロンプトを使用して特定のオブジェクトをセグメント化する: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "the yellow dog" + ``` + + - バウンディングボックス内のオブジェクトをセグメント化する(xywh形式でボックス座標を指定します): + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - 特定のポイントの近くにあるオブジェクトをセグメント化する: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +さらに、FastSAMを[Colabデモ](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing)や[HuggingFaceウェブデモ](https://huggingface.co/spaces/An-619/FastSAM)で試すこともできます。 + +## 引用と謝辞 + +FastSAMの著者には、リアルタイムインスタンスセグメンテーションの分野での重要な貢献を称えたいと思います。 + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +FastSAMのオリジナルの論文は、[arXiv](https://arxiv.org/abs/2306.12156)で入手できます。著者は彼らの作品を広く公開し、コードベースは[GitHub](https://github.com/CASIA-IVA-Lab/FastSAM)でアクセスできるようにしています。私たちは、彼らがフィールドを進歩させ、その成果を広いコミュニティにアクセス可能にしてくれた彼らの努力に感謝しています。 diff --git a/docs/ja/models/index.md b/docs/ja/models/index.md new file mode 100644 index 0000000..490ac76 --- /dev/null +++ b/docs/ja/models/index.md @@ -0,0 +1,98 @@ +--- +comments: true +description: UltralyticsがサポートするYOLOファミリー、SAM、MobileSAM、FastSAM、YOLO-NAS、RT-DETRモデルの多様な範囲を探索し、CLIおよびPythonの使用例で始めましょう。 +keywords: Ultralytics, ドキュメント, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, モデル, アーキテクチャ, Python, CLI +--- + +# Ultralyticsがサポートするモデル + +Ultralyticsのモデルドキュメントへようこそ!我々は、[オブジェクト検出](../tasks/detect.md)、[インスタンスセグメンテーション](../tasks/segment.md)、[画像分類](../tasks/classify.md)、[ポーズ推定](../tasks/pose.md)、[多対象トラッキング](../modes/track.md)などの特定のタスクに特化した幅広いモデルのサポートを提供しています。Ultralyticsにあなたのモデルアーキテクチャを貢献したい場合は、[貢献ガイド](../../help/contributing.md)を確認してください。 + +!!! Note "注意" + + 🚧 現在、さまざまな言語でのドキュメントを構築中であり、改善に努めています。ご理解ありがとうございます!🙏 + +## 特集モデル + +ここではサポートされている主要なモデルをいくつか紹介します: + +1. **[YOLOv3](yolov3.md)**:Joseph RedmonによるYOLOモデルファミリーの第三世代で、効率的なリアルタイムオブジェクト検出能力で知られています。 +2. **[YOLOv4](yolov4.md)**:2020年にAlexey BochkovskiyによってリリースされたYOLOv3のdarknetネイティブアップデートです。 +3. **[YOLOv5](yolov5.md)**:UltralyticsによるYOLOアーキテクチャの改良版で、以前のバージョンと比較してパフォーマンスと速度のトレードオフが向上しています。 +4. **[YOLOv6](yolov6.md)**:2022年に[美団](https://about.meituan.com/)によってリリースされ、同社の多数の自動配送ロボットで使用されています。 +5. **[YOLOv7](yolov7.md)**:YOLOv4の著者によって2022年にリリースされたYOLOモデルのアップデートです。 +6. **[YOLOv8](yolov8.md) 新機能 🚀**:YOLOファミリーの最新バージョンで、例えばインスタンスセグメンテーション、ポーズ/キーポイント推定、分類などの機能が強化されています。 +7. **[Segment Anything Model (SAM)](sam.md)**:MetaのSegment Anything Model (SAM)です。 +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**:慶應義塾大学によるモバイルアプリケーションのためのMobileSAMです。 +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)**:中国科学院自動化研究所、画像及びビデオ解析グループのFastSAMです。 +10. **[YOLO-NAS](yolo-nas.md)**:YOLO Neural Architecture Search (NAS)モデルです。 +11. **[Realtime Detection Transformers (RT-DETR)](rtdetr.md)**:百度のPaddlePaddle Realtime Detection Transformer (RT-DETR)モデルです。 + +

+
+ +
+ 視聴: Ultralytics YOLOモデルをわずか数行のコードで実行します。 +

+ +## Getting Started: 使用例 + +この例は、YOLOのトレーニングと推論の簡単な例を提供します。これらおよびその他の[モード](../modes/index.md)についての完全なドキュメントについては、[Predict](../modes/predict.md)、[Train](../modes/train.md)、[Val](../modes/val.md)、[Export](../modes/export.md)のドキュメントページを参照してください。 + +以下の例は、オブジェクト検出のためのYOLOv8 [Detect](../tasks/detect.md) モデルについてです。追加のサポートされるタスクについては、[Segment](../tasks/segment.md)、[Classify](../tasks/classify.md)、[Pose](../tasks/pose.md)のドキュメントを参照してください。 + +!!! Example "例" + + === "Python" + + PyTorchの事前訓練済み`*.pt`モデルや構成`*.yaml`ファイルは、`YOLO()`、`SAM()`、`NAS()`、`RTDETR()`クラスに渡して、Pythonでモデルインスタンスを作成することができます: + + ```python + from ultralytics import YOLO + + # COCOで事前訓練されたYOLOv8nモデルをロードする + model = YOLO('yolov8n.pt') + + # モデル情報を表示する(任意) + model.info() + + # モデルをCOCO8の例示データセットで100エポックトレーニングする + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 'bus.jpg'画像でYOLOv8nモデルを用いた推論を実行する + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + モデルを直接実行するためのCLIコマンドが利用可能です: + + ```bash + # COCOで事前訓練されたYOLOv8nモデルをロードし、COCO8の例示データセットで100エポックトレーニングする + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCOで事前訓練されたYOLOv8nモデルをロードし、'bus.jpg'画像で推論を実行する + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## 新しいモデルの貢献 + +Ultralyticsにあなたのモデルを貢献することに興味がありますか?素晴らしいです!我々は常にモデルのポートフォリオを拡張することに興味があります。 + +1. **リポジトリをフォークする**:[Ultralytics GitHubリポジトリ](https://github.com/ultralytics/ultralytics)をフォークすることから始めます。 + +2. **あなたのフォークをクローンする**:あなたのフォークをローカルマシンにクローンし、作業を行う新しいブランチを作成します。 + +3. **あなたのモデルを実装する**:[貢献ガイド](../../help/contributing.md)に示されているコーディング規格および指針に従ってモデルを追加します。 + +4. **徹底的にテストする**:パイプラインの一部としてだけでなく、単独でモデルを厳密にテストすることを確認してください。 + +5. **プルリクエストを作成する**:モデルに満足したら、レビューのために本リポジトリにプルリクエストを作成します。 + +6. **コードレビュー&マージ**:レビュー後、モデルが我々の基準を満たしている場合、本リポジトリにマージされます。 + +詳細な手順については、[貢献ガイド](../../help/contributing.md)を参照してください。 diff --git a/docs/ja/models/mobile-sam.md b/docs/ja/models/mobile-sam.md new file mode 100644 index 0000000..53414f5 --- /dev/null +++ b/docs/ja/models/mobile-sam.md @@ -0,0 +1,116 @@ +--- +comments: true +description: Ultralyticsフレームワーク内でMobileSAMをダウンロードしてテストする方法、MobileSAMの実装、オリジナルのSAMとの比較について詳しく知ることができます。今日からモバイルアプリケーションを改善しましょう。 +keywords: MobileSAM, Ultralytics, SAM, モバイルアプリケーション, Arxiv, GPU, API, 画像エンコーダ, マスクデコーダ, モデルのダウンロード, テスト方法 +--- + +![MobileSAM ロゴ](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# Mobile Segment Anything(MobileSAM) + +MobileSAM論文が[arXiv](https://arxiv.org/pdf/2306.14289.pdf)で利用可能になりました。 + +CPU上で動作するMobileSAMのデモは、[こちらのデモリンク](https://huggingface.co/spaces/dhkim2810/MobileSAM)からアクセスできます。Mac i5 CPU上では、約3秒かかります。Hugging Faceのデモでは、インターフェースと低性能なCPUが遅い応答に寄与していますが、効果的に動作し続けます。 + +MobileSAMは、[Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything)、[AnyLabeling](https://github.com/vietanhdev/anylabeling)、および[Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D)など、さまざまなプロジェクトで実装されています。 + +MobileSAMは、100kのデータセット(元の画像の1%)を単一のGPUで学習し、1日未満で訓練が完了します。このトレーニングのコードは将来公開される予定です。 + +## 利用可能なモデル、サポートされているタスク、および動作モード + +この表は、利用可能なモデルとそれぞれの固有の事前学習重み、サポートされているタスク、および[予測](../modes/predict.md)、[検証](../modes/val.md)、[訓練](../modes/train.md)、および[エクスポート](../modes/export.md)のようなさまざまな動作モードに対する互換性を示しています。`✅`は対応しているモード、`❌`は対応していないモードを示しています。 + +| モデルタイプ | 事前学習重み | サポートされているタスク | 予測 | 検証 | 訓練 | エクスポート | +|-----------|-----------------|----------------------------------------|----|----|----|--------| +| MobileSAM | `mobile_sam.pt` | [インスタンスセグメンテーション](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## SAMからMobileSAMへの移行 + +MobileSAMは、オリジナルのSAMと同じパイプラインを維持しているため、オリジナルの前処理、後処理、およびその他のインタフェースを組み込んでいます。そのため、現在オリジナルのSAMを使用している場合でも、MobileSAMへの移行は最小限の労力で行うことができます。 + +MobileSAMは、オリジナルのSAMと同等のパフォーマンスを発揮し、イメージエンコーダを変更することで同じパイプラインを保持しています。具体的には、元の重いViT-Hエンコーダ(632M)をより小さいTiny-ViT(5M)に置き換えています。単一のGPU上でMobileSAMは、おおよそ画像あたり12msで動作します:イメージエンコーダで8ms、マスクデコーダで4msです。 + +次の表は、ViTベースのイメージエンコーダの比較です: + +| イメージエンコーダ | オリジナルのSAM | MobileSAM | +|-----------|-----------|-----------| +| パラメーター | 611M | 5M | +| 速度 | 452ms | 8ms | + +オリジナルのSAMとMobileSAMは、同じプロンプト誘導型マスクデコーダを使用しています: + +| マスクデコーダ | オリジナルのSAM | MobileSAM | +|---------|-----------|-----------| +| パラメーター | 3.876M | 3.876M | +| 速度 | 4ms | 4ms | + +以下は、全体のパイプラインの比較です: + +| パイプライン全体(エンコーダ+デコーダ) | オリジナルのSAM | MobileSAM | +|----------------------|-----------|-----------| +| パラメーター | 615M | 9.66M | +| 速度 | 456ms | 12ms | + +MobileSAMとオリジナルのSAMのパフォーマンスは、ポイントとボックスをプロンプトとして使用した場合に示されます。 + +![ポイントをプロンプトにした画像](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![ボックスをプロンプトにした画像](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +MobileSAMは、現在のFastSAMよりも約5倍小さく、約7倍高速です。詳細は[MobileSAMプロジェクトページ](https://github.com/ChaoningZhang/MobileSAM)でご覧いただけます。 + +## UltralyticsでのMobileSAMのテスト + +オリジナルのSAMと同様に、ポイントとボックスのプロンプトの両方に対応したUltralyticsでの簡単なテスト方法を提供しています。 + +### モデルのダウンロード + +モデルは[こちらからダウンロード](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt)できます。 + +### ポイントプロンプト + +!!! Example "例" + + === "Python" + ```python + from ultralytics import SAM + + # モデルをロード + model = SAM('mobile_sam.pt') + + # ポイントプロンプトに基づいてセグメントを予測 + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### ボックスプロンプト + +!!! Example "例" + + === "Python" + ```python + from ultralytics import SAM + + # モデルをロード + model = SAM('mobile_sam.pt') + + # ボックスプロンプトに基づいてセグメントを予測 + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +`MobileSAM`と`SAM`は、同じAPIを使用して実装されています。詳細な使用方法については、[SAMページ](sam.md)をご覧ください。 + +## 引用と謝辞 + +MobileSAMが研究や開発のお役に立つ場合は、次の論文を引用していただけると幸いです: + +!!! Quote文 "" + + === "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } diff --git a/docs/ja/models/rtdetr.md b/docs/ja/models/rtdetr.md new file mode 100644 index 0000000..11b73b7 --- /dev/null +++ b/docs/ja/models/rtdetr.md @@ -0,0 +1,93 @@ +--- +comments: true +description: RT-DETRは、Baiduによって開発された、高速かつ高精度なリアルタイムオブジェクト検出器です。Vision Transformers(ViT)の力を借りて、マルチスケールの特徴を効率的に処理します。RT-DETRは非常に適応性があり、再学習せずに異なるデコーダーレイヤーを使用して推論速度を柔軟に調整できます。このモデルは、TensorRTを使用したCUDAなどの高速エンドバックエンドで優れた性能を発揮し、多くの他のリアルタイムオブジェクト検出器を凌駕します。 +keywords: RT-DETR, Baidu, Vision Transformers, object detection, real-time performance, CUDA, TensorRT, IoU-aware query selection, Ultralytics, Python API, PaddlePaddle +--- + +# BaiduのRT-DETR: Vision Transformerベースのリアルタイムオブジェクト検出器 + +## 概要 + +Baiduが開発したリアルタイム検出Transformer(RT-DETR)は、高い精度を維持しながらリアルタイム性能を提供する最先端のエンドツーエンドのオブジェクト検出器です。Vision Transformers(ViT)の力を借りて、マルチスケールの特徴を効率的に処理することにより、RT-DETRは高い適応性を持ちます。再学習せずに異なるデコーダーレイヤーを使用して推論速度を柔軟に調整できるため、このモデルはTensorRTを使用したCUDAなどの高速バックエンドで多くの他のリアルタイムオブジェクト検出器を凌駕します。 + +![モデルの例](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**BaiduのRT-DETRの概要。** RT-DETRのモデルアーキテクチャダイアグラムでは、バックボーンの最後の3つのステージ{S3、S4、S5}がエンコーダーへの入力として表示されます。効率的なハイブリッドエンコーダーは、マルチスケールの特徴をイントラスケール特徴の相互作用(AIFI)とクロススケール特徴融合モジュール(CCFM)を介して画像特徴のシーケンスに変換します。IoU-awareクエリ選択は、デコーダーの初期オブジェクトクエリとして固定数の画像特徴を選択するために使用されます。最後に、デコーダーは補助予測ヘッドとともに、オブジェクトクエリを反復最適化してボックスと信頼スコアを生成します([出典](https://arxiv.org/pdf/2304.08069.pdf))。 + +### 主な特徴 + +- **効率的なハイブリッドエンコーダー:** BaiduのRT-DETRは、マルチスケールの特徴をイントラスケールの相互作用とクロススケールの融合を分離することで処理する効率的なハイブリッドエンコーダーを使用しています。このユニークなVision Transformersベースの設計により、計算コストを削減し、リアルタイムオブジェクト検出を実現しています。 +- **IoU-awareクエリ選択:** BaiduのRT-DETRは、IoU-awareクエリ選択を活用してオブジェクトクエリの初期化を改善します。これにより、モデルはシーン内の関連性の高いオブジェクトに焦点を当てて検出の精度を向上させることができます。 +- **適応可能な推論速度:** BaiduのRT-DETRは、再学習せずに異なるデコーダーレイヤーを使用して推論速度を柔軟に調整することができます。この適応性により、さまざまなリアルタイムオブジェクト検出シナリオでの実用的な応用が容易になります。 + +## 事前学習済みモデル + +Ultralytics Python APIは、異なるスケールの事前学習済みPaddlePaddle RT-DETRモデルを提供しています。 + +- RT-DETR-L:COCO val2017で53.0%のAP、T4 GPUで114 FPS +- RT-DETR-X:COCO val2017で54.8%のAP、T4 GPUで74 FPS + +## 使用例 + +この例では、RT-DETRの訓練と推論の簡単な例を提供します。これらと他の[モード](../modes/index.md)の詳しいドキュメントについては、[Predict](../modes/predict.md)、[Train](../modes/train.md)、[Val](../modes/val.md)、および[Export](../modes/export.md)ドキュメントページを参照してください。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import RTDETR + + # COCOで事前学習済みのRT-DETR-lモデルをロードします + model = RTDETR('rtdetr-l.pt') + + # モデル情報を表示します(オプション) + model.info() + + # COCO8の例のデータセットでモデルを100エポックトレーニングします + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 'bus.jpg'画像でRT-DETR-lモデルで推論を実行します + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # COCOで事前学習済みのRT-DETR-lモデルをロードし、COCO8の例のデータセットで100エポックトレーニングします + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCOで事前学習済みのRT-DETR-lモデルをロードし、'bus.jpg'画像で推論を実行します + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## サポートされているタスクとモード + +この表には、各モデルがサポートするタスク、特定の事前学習済み重み、およびサポートされるさまざまなモード([Train](../modes/train.md)、[Val](../modes/val.md)、[Predict](../modes/predict.md)、[Export](../modes/export.md))が✅絵文字で示されている情報が示されています。 + +| モデルの種類 | 事前学習済み重み | サポートされるタスク | 推論 | 検証 | 訓練 | エクスポート | +|---------------------|---------------|--------------------------------|----|----|----|--------| +| RT-DETR Large | `rtdetr-l.pt` | [オブジェクト検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR Extra-Large | `rtdetr-x.pt` | [オブジェクト検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## 引用と謝辞 + +研究や開発の中でBaiduのRT-DETRを使用する場合は、[元の論文](https://arxiv.org/abs/2304.08069)を引用してください。 + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +私たちは、Baiduと[PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection)チームに、コンピュータビジョンコミュニティ向けのこの貴重なリソースを作成しメンテナンスしていただいたことに感謝いたします。Vision Transformersベースのリアルタイムオブジェクト検出器であるRT-DETRの開発による、彼らのフィールドへの貢献は非常に評価されています。 + +*Keywords: RT-DETR, Transformer, ViT, Vision Transformers, Baidu RT-DETR, PaddlePaddle, Paddle Paddle RT-DETR, real-time object detection, Vision Transformers-based object detection, pre-trained PaddlePaddle RT-DETR models, Baidu's RT-DETR usage, Ultralytics Python API* diff --git a/docs/ja/models/sam.md b/docs/ja/models/sam.md new file mode 100644 index 0000000..ae63eb1 --- /dev/null +++ b/docs/ja/models/sam.md @@ -0,0 +1,226 @@ +--- +comments: true +description: リアルタイムの画像セグメンテーションを可能にするウルトラリティクスの最先端Segment Anything Model (SAM)を紹介します。SAMのプロンプト可能なセグメンテーション、ゼロショットパフォーマンス、使用方法について学びましょう。 +keywords: Ultralytics, 画像セグメンテーション, Segment Anything Model, SAM, SA-1B データセット, リアルタイムパフォーマンス, ゼロショット転送, 物体検出, 画像解析, 機械学習 +--- + +# Segment Anything Model (SAM) + +ウルトラリティクスのSegment Anything Model(SAM)へようこそ。この革新的なモデルは、プロンプト可能な画像セグメンテーションを実現し、リアルタイムのパフォーマンスで画期的な成果を上げ、この分野で新たな基準を設定しました。 + +## SAMの紹介: Segment Anything Model + +Segment Anything Model(SAM)は、画像解析タスクにおける柔軟なセグメンテーションを可能にする最先端の画像セグメンテーションモデルです。SAMは、セグメンテーションという新しいモデル、タスク、データセットを導入した画期的なプロジェクト「Segment Anything」の中核をなしています。 + +SAMの高度な設計により、新しい画像分布やタスクに事前の知識なしで適応するゼロショット転送の機能を持っています。豊富な[SA-1B データセット](https://ai.facebook.com/datasets/segment-anything/)でトレーニングされたSAMは、1億以上のマスクを含む1,100万枚以上の厳選された画像に広がる自動的にアノテーションされたセグメンテーションマスクを備えており、多くの場合、前向きに監督された結果を上回る卓越したゼロショットパフォーマンスを発揮しています。 + +![データセットサンプルイメージ](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +新たに導入されたSA-1Bデータセットからガイドマスクを重畳した例の画像です。SA-1Bには、多様な高解像度のライセンス画像と11億件以上の高品質のセグメンテーションマスクが含まれています。これらのマスクは、SAMによって完全自動的に注釈付けされ、人間の評価と数多くの実験で高品質と多様性が確認されています。画像は可視化のために画像あたりのマスクの数でグループ化されています(平均でおおよそ100個のマスクがあります)。 + +## Segment Anything Model (SAM)の主な特徴 + +- **プロンプト可能なセグメンテーションタスク:** SAMは、プロンプト(オブジェクトを特定する空間的なまたはテキスト的な手がかり)から有効なセグメンテーションマスクを生成するように設計されています。 +- **高度なアーキテクチャ:** Segment Anything Modelは、強力な画像エンコーダ、プロンプトエンコーダ、軽量のマスクデコーダを採用しています。このユニークなアーキテクチャにより、柔軟なプロンプティング、リアルタイムのマスク計算、セグメンテーションタスクの曖昧さの認識が可能です。 +- **SA-1Bデータセット:** Segment Anythingプロジェクトによって導入されたSA-1Bデータセットは、1,100万枚以上の画像に1,000,000,000件以上のマスクを提供します。これまでで最も大規模なセグメンテーションデータセットであり、SAMに多様で大規模なトレーニングデータソースを提供します。 +- **ゼロショットパフォーマンス:** SAMは、さまざまなセグメンテーションタスクで優れたゼロショットパフォーマンスを発揮し、プロンプトエンジニアリングの最小限の必要性で多様なアプリケーションに即座に使用できるツールとなります。 + +Segment Anything ModelおよびSA-1Bデータセットの詳細については、[Segment Anything website](https://segment-anything.com)をご覧いただくか、研究論文[Segment Anything](https://arxiv.org/abs/2304.02643)をご覧ください。 + +## 使用可能なモデル、サポートされるタスク、および動作モード + +このテーブルでは、使用可能なモデルとその特定の事前トレーニング済み重み、サポートされているタスク、およびInference、Validation、Training、Exportなどのさまざまな操作モードに対する互換性を示しています。サポートされるモードは✅の絵文字で表示され、サポートされていないモードは❌の絵文字で表示されます。 + +| モデルの種類 | 事前トレーニング済みの重み | サポートされているタスク | Inference | Validation | Training | Export | +|-----------|---------------|---------------------------------------------------------------|-----------|------------|----------|--------| +| SAM base | `sam_b.pt` | [Instance Segmentation(インスタンスセグメンテーション)](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| SAM large | `sam_l.pt` | [Instance Segmentation(インスタンスセグメンテーション)](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## SAMの使用方法: 画像セグメンテーションにおける柔軟性とパワー + +Segment Anything Modelは、トレーニングデータを超えた多くのダウンストリームタスクに使用することができます。これにはエッジ検出、オブジェクトの提案生成、インスタンスセグメンテーション、および予備的なテキストからマスクへの予測などが含まれます。プロンプトエンジニアリングを使用することで、SAMはゼロショットの方法で新しいタスクとデータ分布にすばやく適応することができ、あらゆる画像セグメンテーションに関する柔軟で強力なツールとなります。 + +### SAMの予測の例 + +!!! Example "プロンプトでセグメントする" + + 与えられたプロンプトで画像をセグメンテーションします。 + + === "Python" + + ```python + from ultralytics import SAM + + # モデルをロード + model = SAM('sam_b.pt') + + # モデル情報を表示(オプション) + model.info() + + # バウンディングボックスのプロンプトで予測を実行 + model('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # ポイントのプロンプトで予測を実行 + model('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "すべてをセグメントする" + + 画像全体をセグメンテーションします。 + + === "Python" + + ```python + from ultralytics import SAM + + # モデルをロード + model = SAM('sam_b.pt') + + # モデル情報を表示(オプション) + model.info() + + # 予測を実行 + model('path/to/image.jpg') + ``` + + === "CLI" + + ```bash + # SAMモデルで予測を実行 + yolo predict model=sam_b.pt source=path/to/image.jpg + ``` + +- ここでは、プロンプト(バウンディングボックス/ポイント/マスク)を指定しない場合は、画像全体がセグメンテーションされるロジックです。 + +!!! Example "SAMPredictorの例" + + 画像を一度設定し、イメージエンコーダを複数回実行することなく複数回プロンプト推論を実行できます。 + + === "プロンプト推論" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # SAMPredictorを作成 + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # イメージを設定する + predictor.set_image("ultralytics/assets/zidane.jpg") # 画像ファイルで設定する + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # np.ndarrayで設定する + results = predictor(bboxes=[439, 437, 524, 709]) + results = predictor(points=[900, 370], labels=[1]) + + # イメージをリセットする + predictor.reset_image() + ``` + + 追加の引数を指定してすべてのセグメントを設定します。 + + === "すべてをセグメントする" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # SAMPredictorを作成 + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # 追加の引数でセグメント + results = predictor(source="ultralytics/assets/zidane.jpg", crop_n_layers=1, points_stride=64) + ``` + +- `すべてをセグメントする` のための追加の引数の詳細は、[`Predictor/generate` リファレンス](../../../reference/models/sam/predict.md)を参照してください。 + +## YOLOv8とのSAM比較 + +ここでは、Metaの最小のSAMモデルであるSAM-bと、Ultralyticsの最小のセグメンテーションモデルである[YOLOv8n-seg](../tasks/segment.md)とを比較します。 + +| モデル | サイズ | パラメータ数 | スピード(CPU) | +|------------------------------------------------|-----------------------|----------------------|-----------------------| +| MetaのSAM-b | 358 MB | 94.7 M | 51096 ms/im | +| [MobileSAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im | +| [FastSAM-s](fast-sam.md) with YOLOv8 backbone | 23.7 MB | 11.8 M | 115 ms/im | +| Ultralytics [YOLOv8n-seg](../tasks/segment.md) | **6.7 MB** (53.4倍小さい) | **3.4 M** (27.9倍少ない) | **59 ms/im** (866倍速い) | + +この比較では、モデルのサイズとスピードの桁違いの違いが示されています。SAMは自動セグメンテーションのユニークな機能を提供しますが、より小さい、より速く、より効率的なYOLOv8セグメントモデルとは競合しません。 + +テストは、2023年製のApple M2 Macbook、16GBのRAMで実行されました。このテストを再現するには: + +!!! Example "例" + + === "Python" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # SAM-bのプロファイリング + model = SAM('sam_b.pt') + model.info() + model('ultralytics/assets') + + # MobileSAMのプロファイリング + model = SAM('mobile_sam.pt') + model.info() + model('ultralytics/assets') + + # FastSAM-sのプロファイリング + model = FastSAM('FastSAM-s.pt') + model.info() + model('ultralytics/assets') + + # YOLOv8n-segのプロファイリング + model = YOLO('yolov8n-seg.pt') + model.info() + model('ultralytics/assets') + ``` + +## オートアノテーション: セグメンテーションデータセットの迅速な作成方法 + +オートアノテーションは、SAMの主要な機能の一つであり、事前トレーニング済みの検出モデルを使用して[セグメンテーションデータセット](https://docs.ultralytics.com/datasets/segment)を生成することができます。この機能により、時間のかかる手作業のラベリング作業を回避し、大量の画像の迅速かつ正確な注釈付けが可能になります。 + +### ディテクションモデルを使用したセグメンテーションデータセットの生成 + +Ultralyticsフレームワークを使用してデータセットをオートアノテーションするには、以下のように`auto_annotate`関数を使用します: + +!!! Example "例" + + === "Python" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| 引数 | タイプ | 説明 | デフォルト | +|------------|------------------|--------------------------------------------------------------|--------------| +| data | str | 注釈を付ける画像が含まれるフォルダへのパス。 | | +| det_model | str, オプション | 事前トレーニング済みのYOLO検出モデル。デフォルトは'yolov8x.pt'。 | 'yolov8x.pt' | +| sam_model | str, オプション | 事前トレーニング済みのSAMセグメンテーションモデル。デフォルトは'sam_b.pt'。 | 'sam_b.pt' | +| device | str, オプション | モデルを実行するデバイス。デフォルトは空の文字列(CPUまたはGPUが利用可能な場合)。 | | +| output_dir | str, None, オプション | 注釈付け結果を保存するディレクトリ。デフォルトは、'data'と同じディレクトリ内の 'labels' フォルダーです。 | None | + +`auto_annotate`関数は、画像へのパス、任意の事前トレーニング済みの検出およびSAMセグメンテーションモデル、モデルを実行するデバイス、および注釈付け結果を保存する出力ディレクトリを指定するためのオプション引数を取ります。 + +事前トレーニング済みモデルを使用したオートアノテーションにより、高品質なセグメンテーションデータセットを作成するための時間と労力を大幅に節約することができます。この機能は、大量の画像コレクションに取り組んでいる研究者や開発者にとって特に有益であり、モデルの開発と評価に集中することができます。 + +## 引用と謝辞 + +SAMが研究や開発の場で役立つ場合は、引用にご協力いただけると幸いです。 + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +この貴重なコンピュータビジョンコミュニティ向けのリソースを作成および維持してくれたMeta AIに感謝の意を表します。 + +*keywords: Segment Anything, Segment Anything Model, SAM, Meta SAM, 画像セグメンテーション, プロンプト可能なセグメンテーション, ゼロショットパフォーマンス, SA-1B データセット, 先進のアーキテクチャ, オートアノテーション, Ultralytics, 事前トレーニング済みモデル, SAM base, SAM large, インスタンスセグメンテーション, コンピュータビジョン, AI, 人工知能, 機械学習, データアノテーション, セグメンテーションマスク, ディテクションモデル, YOLOディテクションモデル, bibtex, Meta AI.* diff --git a/docs/ja/models/yolo-nas.md b/docs/ja/models/yolo-nas.md new file mode 100644 index 0000000..a7d1863 --- /dev/null +++ b/docs/ja/models/yolo-nas.md @@ -0,0 +1,121 @@ +--- +comments: true +description: YOLO-NASは、優れた物体検出モデルです。その機能、事前学習モデル、Ultralytics Python APIの使用法などについて詳しく説明します。 +keywords: YOLO-NAS, Deci AI, 物体検出, 深層学習, ニューラルアーキテクチャ検索, Ultralytics Python API, YOLOモデル, 事前学習モデル, 量子化, 最適化, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## 概要 + +Deci AIによって開発されたYOLO-NASは、画期的な物体検出ベースモデルです。従来のYOLOモデルの制約に対処するための高度なニューラルアーキテクチャ検索技術によって生み出されています。量子化のサポートと精度とレイテンシのトレードオフの改善により、YOLO-NASは物体検出において大きな進歩を遂げています。 + +![モデルの例の画像](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**YOLO-NASの概要。** YOLO-NASは、量子化対応のブロックと選択的量子化を使用して最適なパフォーマンスを実現します。モデルをINT8で量子化すると、他のモデルよりも精度がほとんど低下せずに最適なパフォーマンスが得られます。これらの進歩により、前例のない物体検出能力と優れたパフォーマンスを備えた優れたアーキテクチャが実現されます。 + +### 主な特徴 + +- **量子化フレンドリーな基本ブロック:** YOLO-NASは、従来のYOLOモデルの制約の1つである量子化に対応した新しい基本ブロックを導入しています。 +- **洗練されたトレーニングと量子化:** YOLO-NASは、高度なトレーニングスキームとポストトレーニング量子化を活用してパフォーマンスを向上させています。 +- **AutoNAC最適化と事前学習:** YOLO-NASはAutoNAC最適化を利用し、COCO、Objects365、Roboflow 100などの注目されるデータセットで事前学習されています。この事前学習により、製品環境での下流物体検出タスクに非常に適しています。 + +## 事前学習モデル + +Ultralyticsが提供する事前学習済みのYOLO-NASモデルを使用して、次世代の物体検出のパワーを体験してください。これらのモデルは、速度と精度の両方の面で優れたパフォーマンスを提供するように設計されています。特定のニーズに合わせてさまざまなオプションから選択できます。 + +| モデル | mAP | レイテンシ (ms) | +|------------------|-------|------------| +| YOLO-NAS S | 47.5 | 3.21 | +| YOLO-NAS M | 51.55 | 5.85 | +| YOLO-NAS L | 52.22 | 7.87 | +| YOLO-NAS S INT-8 | 47.03 | 2.36 | +| YOLO-NAS M INT-8 | 51.0 | 3.78 | +| YOLO-NAS L INT-8 | 52.1 | 4.78 | + +各モデルのバリエーションは、Mean Average Precision(mAP)とレイテンシのバランスを取り、パフォーマンスとスピードの両方に最適化されています。 + +## 使用例 + +Ultralyticsの`ultralytics` Pythonパッケージを使用して、YOLO-NASモデルをPythonアプリケーションに簡単に統合できるようにしました。このパッケージは、プロセスをスムーズにするユーザーフレンドリーなPython APIを提供します。 + +次の例では、推論と検証のために`ultralytics`パッケージを使用してYOLO-NASモデルをどのように使用するかを示しています。 + +### 推論と検証の例 + +この例では、COCO8データセットでYOLO-NAS-sを検証します。 + +!!! Example "例" + + この例では、YOLO-NASの推論と検証のためのシンプルなコードを提供しています。推論結果の処理については、[Predict](../modes/predict.md)モードを参照してください。他のモードでYOLO-NASを使用する方法については、[Val](../modes/val.md)および[Export](../modes/export.md)を参照してください。`ultralytics`パッケージのYOLO-NASはトレーニングをサポートしていません。 + + === "Python" + + Pythonで、PyTorchの事前学習済みの`*.pt`モデルファイルを`NAS()`クラスに渡すことで、モデルのインスタンスを作成できます: + + ```python + from ultralytics import NAS + + # COCO事前学習済みのYOLO-NAS-sモデルをロード + model = NAS('yolo_nas_s.pt') + + # モデル情報の表示(オプション) + model.info() + + # COCO8の例データセットでモデルを検証 + results = model.val(data='coco8.yaml') + + # 'bus.jpg'画像上でYOLO-NAS-sモデルを使用した推論 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + モデルを直接実行するためのCLIコマンドもあります: + + ```bash + # COCO事前学習済みのYOLO-NAS-sモデルをロードし、COCO8の例データセットでパフォーマンスを検証 + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # COCO事前学習済みのYOLO-NAS-sモデルをロードし、'bus.jpg'画像上で推論を実行 + yolo predict model=yolo_nas_s.pt source=path/to/bus.jpg + ``` + +## サポートされているタスクとモード + +YOLO-NASモデルは、Small(s)、Medium(m)、Large(l)の3つのバリエーションを提供しています。各バリエーションは、異なる計算リソースとパフォーマンスのニーズに対応するように設計されています: + +- **YOLO-NAS-s:** 計算リソースが限られている環境で効率が重要な場合に最適化されています。 +- **YOLO-NAS-m:** 幅広い一般的な物体検出のニーズに適したバランスの取れたアプローチです。 +- **YOLO-NAS-l:** 計算リソースの制約が少ない最高の精度が求められるシナリオに対応しています。 + +以下は、各モデルの詳細な概要であり、それらの事前学習済み重みへのリンク、サポートされるタスク、さまざまな動作モードとの互換性が示されています。 + +| モデルの種類 | 事前学習済みの重み | サポートされるタスク | 推論 | 検証 | トレーニング | エクスポート | +|------------|-----------------------------------------------------------------------------------------------|----------------------------|----|----|--------|--------| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [物体検出](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [物体検出](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [物体検出](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## 引用と謝辞 + +研究や開発の中でYOLO-NASを使用する場合は、SuperGradientsを引用してください: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +このコンピュータビジョンコミュニティ向けの貴重なリソースを作成および維持するために、Deci AIの[SuperGradients](https://github.com/Deci-AI/super-gradients/)チームに感謝の意を表します。革新的なアーキテクチャと優れた物体検出能力を持つYOLO-NASが、開発者や研究者の重要なツールになると信じています。 + +*keywords: YOLO-NAS, Deci AI, 物体検出, 深層学習, ニューラルアーキテクチャ検索, Ultralytics Python API, YOLOモデル, SuperGradients, 事前学習モデル, 量子化フレンドリーな基本ブロック, 高度なトレーニングスキーム, ポストトレーニング量子化, AutoNAC最適化, COCO, Objects365, Roboflow 100* diff --git a/docs/ja/models/yolov3.md b/docs/ja/models/yolov3.md new file mode 100644 index 0000000..19f62af --- /dev/null +++ b/docs/ja/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: YOLOv3、YOLOv3-Ultralytics、およびYOLOv3uの概要を把握してください。オブジェクト検出に対するこれらのモデルの主な特徴、使用方法、およびサポートされるタスクについて学びます。 +keywords: YOLOv3, YOLOv3-Ultralytics, YOLOv3u, オブジェクト検出, 推論, トレーニング, Ultralytics +--- + +# YOLOv3、YOLOv3-Ultralytics、およびYOLOv3u + +## 概要 + +このドキュメントでは、[YOLOv3](https://pjreddie.com/darknet/yolo/)、[YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3)、および[YOLOv3u](https://github.com/ultralytics/ultralytics)という3つの関連するオブジェクト検出モデルについて概説します。 + +1. **YOLOv3:** これはYou Only Look Once (YOLO) オブジェクト検出アルゴリズムの3番目のバージョンです。Joseph Redmonによって最初に開発されたYOLOv3は、マルチスケール予測や3つの異なるサイズの検出カーネルなど、さまざまな機能を導入し、前バージョンよりも性能を向上させました。 + +2. **YOLOv3-Ultralytics:** これはUltralyticsによるYOLOv3モデルの実装です。オリジナルのYOLOv3アーキテクチャを再現し、より多くの事前学習済みモデルのサポートや簡単なカスタマイズオプションなど、追加の機能を提供します。 + +3. **YOLOv3u:** これはYOLOv8モデルで使用されるアンカーフリーでオブジェクトネスフリーなスプリットヘッドを組み込んだYOLOv3-Ultralyticsの更新版です。YOLOv3uは、YOLOv3と同じバックボーンとネックアーキテクチャを保持しながら、YOLOv8の更新された検出ヘッドを備えています。 + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## 主な特徴 + +- **YOLOv3:** 3つの異なる検出スケールを使用し、13x13、26x26、および52x52の3つの異なるサイズの検出カーネルを活用しました。これにより、さまざまなサイズのオブジェクトの検出精度が大幅に向上しました。さらに、YOLOv3では各バウンディングボックスの複数のラベル予測や、より良い特徴抽出ネットワークなどの機能が追加されました。 + +- **YOLOv3-Ultralytics:** UltralyticsのYOLOv3の実装は、オリジナルのモデルと同じ性能を提供しながら、より多くの事前学習済みモデルのサポート、追加のトレーニング方法、および簡単なカスタマイズオプションを提供します。これにより、実用的なアプリケーションにおいてより柔軟で使いやすくなります。 + +- **YOLOv3u:** この更新されたモデルは、YOLOv8から使用されているアンカーフリーでオブジェクトネスフリーなスプリットヘッドを組み込んでいます。事前定義されたアンカーボックスとオブジェクトネススコアの必要性を排除することで、この検出ヘッドの設計は、さまざまなサイズや形状のオブジェクトの検出能力を向上させることができます。これにより、YOLOv3uはオブジェクト検出タスクにおいてより堅牢で正確なモデルとなります。 + +## サポートされるタスクとモード + +YOLOv3シリーズ、YOLOv3、YOLOv3-Ultralytics、およびYOLOv3uは、オブジェクト検出タスクに特化して設計されています。これらのモデルは、精度と速度のバランスを取りながらさまざまな実世界のシナリオでの効果が高いことで知られています。各バリアントはユニークな機能と最適化を提供し、さまざまなアプリケーションに適しています。 + +3つのモデルは[推論](../modes/predict.md)、[検証](../modes/val.md)、[トレーニング](../modes/train.md)、および[エクスポート](../modes/export.md)など、幅広いモードをサポートしており、効果的なオブジェクト検出のための完全なツールキットを提供します。 + +| モデルの種類 | サポートされるタスク | 推論 | 検証 | トレーニング | エクスポート | +|--------------------|--------------------------------|----|----|--------|--------| +| YOLOv3 | [オブジェクト検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [オブジェクト検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [オブジェクト検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +この表は、各YOLOv3バリアントの機能を一目で把握するためのもので、オブジェクト検出ワークフローのさまざまなタスクと操作モードに対する多様性と適用性を強調しています。 + +## 使用例 + +この例では、YOLOv3の簡単なトレーニングおよび推論の例を提供します。これらおよびその他の[モード](../modes/index.md)の完全なドキュメンテーションについては、[Predict](../modes/predict.md)、[Train](../modes/train.md)、[Val](../modes/val.md)、および[Export](../modes/export.md)のドキュメントページを参照してください。 + +!!! Example "例" + + === "Python" + + PyTorchの事前学習済み `*.pt` モデルと設定 `*.yaml` ファイルは、`YOLO()` クラスに渡してモデルインスタンスを作成できます。 + + ```python + from ultralytics import YOLO + + # COCOで学習済みのYOLOv3nモデルをロード + model = YOLO('yolov3n.pt') + + # モデル情報の表示(任意) + model.info() + + # COCO8のサンプルデータセットでモデルを100エポックトレーニング + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # YOLOv3nモデルで'bus.jpg'画像に対して推論実行 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLIコマンドを使用して直接モデルを実行できます。 + + ```bash + # COCOで学習済みのYOLOv3nモデルをロードし、COCO8のサンプルデータセットで100エポックトレーニング + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCOで学習済みのYOLOv3nモデルをロードし、'bus.jpg'画像に対して推論実行 + yolo predict model=yolov3n.pt source=path/to/bus.jpg + ``` + +## 引用と謝辞 + +研究でYOLOv3を使用する場合は、元のYOLO論文とUltralyticsのYOLOv3リポジトリを引用してください。 + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +Joseph RedmonとAli Farhadiには、オリジナルのYOLOv3を開発していただいたことに感謝します。 diff --git a/docs/ja/models/yolov4.md b/docs/ja/models/yolov4.md new file mode 100644 index 0000000..19ec65a --- /dev/null +++ b/docs/ja/models/yolov4.md @@ -0,0 +1,71 @@ +--- +comments: true +description: YOLOv4は、2020年にAlexey Bochkovskiyによってリリースされた最先端のリアルタイムオブジェクト検出器に関する詳細なガイドをご覧ください。そのアーキテクチャのハイライト、革新的な機能、および応用例を理解してください。 +keywords: ultralytics、YOLOv4、オブジェクト検出、ニューラルネットワーク、リアルタイム検出、オブジェクト検出器、機械学習 +--- + +# YOLOv4: 高速で正確なオブジェクト検出 + +YOLOv4のUltralyticsドキュメンテーションページへようこそ。YOLOv4は、2020年にAlexey Bochkovskiyによってリリースされた最先端のリアルタイムオブジェクト検出器です。速度と精度の最適なバランスを提供するよう設計されており、さまざまなアプリケーションに優れた選択肢です。 + +![YOLOv4アーキテクチャ図](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**YOLOv4アーキテクチャ図**。YOLOv4の複雑なネットワーク設計を示しており、最適なリアルタイムオブジェクト検出のためのバックボーン、ネック、およびヘッドコンポーネント、およびそれらの相互接続されたレイヤーが表示されています。 + +## 導入 + +YOLOv4は、You Only Look Once version 4の略で、[YOLOv3](yolov3.md)や他のオブジェクト検出モデルなど、以前のYOLOバージョンの制約に対処するために開発されたリアルタイムオブジェクト検出モデルです。他の畳み込みニューラルネットワーク(CNN)ベースのオブジェクト検出器とは異なり、YOLOv4は推薦システムだけでなく、スタンドアロンのプロセス管理や人間の入力削減にも適用可能です。従来のグラフィックスプロセッシングユニット(GPU)上での動作は、手ごろな価格で大量の使用が可能であり、トレーニングには1つのGPUのみが必要です。 + +## アーキテクチャ + +YOLOv4は、パフォーマンスの最適化に一緒に機能するいくつかの革新的な機能を利用しています。これには、Weighted-Residual-Connections(WRC)、Cross-Stage-Partial-connections(CSP)、Cross mini-Batch Normalization(CmBN)、Self-adversarial-training(SAT)、Mish-activation、Mosaic data augmentation、DropBlock regularization、およびCIoU lossが含まれます。これらの機能は組み合わせて、最先端の結果を達成するために使用されます。 + +典型的なオブジェクト検出器は、入力、バックボーン、ネック、そしてヘッドの複数の部分で構成されています。YOLOv4のバックボーンはImageNetで事前にトレーニングされ、オブジェクトのクラスと境界ボックスを予測するために使用されます。バックボーンはVGG、ResNet、ResNeXt、またはDenseNetなどの複数のモデルから選択できます。ディテクターのネック部分は、異なるステージからの特徴マップを収集するために使用され、通常、複数のボトムアップパスと複数のトップダウンパスが含まれます。ヘッド部分は、最終的なオブジェクトの検出と分類に使用されます。 + +## フリービーのバッグ + +YOLOv4は、「フリービーのバッグ」として知られる手法も使用しており、これは推論のコストを増やさずにモデルの精度を向上させる技術です。データ拡張は、オブジェクト検出で使用される一般的なフリービーのバッグの技術の一つであり、入力画像の変動性を増加させることでモデルの堅牢性を向上させます。データ拡張の例には、光度の歪み(画像の明るさ、コントラスト、色調、彩度、ノイズの調整)や幾何学的歪み(ランダムなスケーリング、クロッピング、反転、回転の追加)があります。これらの技術は、モデルが異なる種類の画像に対してより良い一般化をするのに役立ちます。 + +## 機能と性能 + +YOLOv4は、オブジェクト検出の最適な速度と精度を目指して設計されています。YOLOv4のアーキテクチャには、バックボーンとしてCSPDarknet53、ネックとしてPANet、検出ヘッドとしてYOLOv3が含まれています。この設計により、YOLOv4は印象的な速度でオブジェクト検出を実行できるため、リアルタイムアプリケーションに適しています。YOLOv4は精度でも優れており、オブジェクト検出のベンチマークで最先端の結果を達成しています。 + +## 使用例 + +現時点では、Ultralyticsは現在、YOLOv4モデルをサポートしていません。そのため、YOLOv4を使用したいユーザーは、インストールおよび使用方法に関する情報は直接YOLOv4のGitHubリポジトリを参照する必要があります。 + +ここでは、YOLOv4を使用するための一般的な手順の概要を示します。 + +1. YOLOv4のGitHubリポジトリにアクセスします: [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). + +2. インストールのためにREADMEファイルに記載されている指示に従います。一般的には、リポジトリのクローン、必要な依存関係のインストール、および必要な環境変数のセットアップが含まれます。 + +3. インストールが完了したら、データセットの準備、モデルパラメータの設定、モデルのトレーニング、トレーニングされたモデルを使用してオブジェクト検出を実行するなど、リポジトリで提供されている使用方法に従ってモデルをトレーニングおよび使用できます。 + +特定の手順は、特定のユースケースとYOLOv4リポジトリの現在の状態によって変わる場合があります。したがって、YOLOv4 GitHubリポジトリで提供されている指示に直接参照することを強くお勧めします。 + +YOLOv4のサポートが実装され次第、Ultralyticsの使用例のためにこのドキュメントを更新することをお詫び申し上げます。 + +## 結論 + +YOLOv4は、速度と精度のバランスを取った強力で効率的なオブジェクト検出モデルです。トレーニング中にユニークな機能とバッグのフリービーの技術を使用することで、リアルタイムのオブジェクト検出タスクで優れたパフォーマンスを発揮します。通常のGPUを持つ誰でもがトレーニングと使用を行うことができるため、幅広いアプリケーションにアクセス可能かつ実用的です。 + +## 引用と謝辞 + +リアルタイムオブジェクト検出の分野での重要な貢献に対して、YOLOv4の著者に謝意を表します: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +オリジナルのYOLOv4の論文は[arXiv](https://arxiv.org/pdf/2004.10934.pdf)で見つけることができます。著者は彼らの仕事を一般に公開しており、コードベースは[GitHub](https://github.com/AlexeyAB/darknet)でアクセスできます。私たちは、彼らの努力に感謝し、彼らの仕事を広いコミュニティにアクセス可能にしてくれたことに感謝しています。 diff --git a/docs/ja/models/yolov5.md b/docs/ja/models/yolov5.md new file mode 100644 index 0000000..b5e4e86 --- /dev/null +++ b/docs/ja/models/yolov5.md @@ -0,0 +1,113 @@ +--- +comments: true +description: YOLOv5uは、改良された精度と速度のトレードオフと、さまざまな物体検出タスク向けの多数の事前トレーニングモデルを備えたYOLOv5モデルの進化バージョンです。 +keywords: YOLOv5u, 物体検出, 事前トレーニングモデル, Ultralytics, Inference, Validation, YOLOv5, YOLOv8, アンカーフリー, オブジェクトフリー, リアルタイムアプリケーション, 機械学習 +--- + +# YOLOv5 + +## 概要 + +YOLOv5uは、物体検出方法論の進歩を表しています。Ultralyticsが開発した[YOLOv5](https://github.com/ultralytics/yolov5)モデルの基本アーキテクチャを起源とするYOLOv5uは、アンカーフリーでオブジェクトフリーの分割ヘッドを採用しており、以前の[YOLOv8](yolov8.md)モデルで導入された特徴です。この適応により、モデルのアーキテクチャが洗練され、物体検出タスクにおける改善された精度と速度のトレードオフが実現されます。経験的な結果と派生した特徴から明らかなとおり、YOLOv5uは、研究と実用の両方で堅牢なソリューションを求める人々にとって効率的な選択肢です。 + +![Ultralytics YOLOv5](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## 主な特徴 + +- **アンカーフリーな分割Ultralyticsヘッド:** 伝統的な物体検出モデルは、事前に定義されたアンカーボックスを使用してオブジェクトの位置を予測します。しかし、YOLOv5uはこのアプローチを近代化しています。アンカーフリーな分割Ultralyticsヘッドを採用することで、より柔軟かつ適応性のある検出メカニズムが確保され、さまざまなシナリオでのパフォーマンスが向上します。 + +- **最適化された精度と速度のトレードオフ:** 速度と精度はしばしば反対の方向に引っ張られます。しかし、YOLOv5uはこのトレードオフに挑戦しています。リアルタイムの検出を確保しながら、精度を損なうことなく、キャリブレーションされたバランスを提供します。この機能は、自動車、ロボット工学、リアルタイムビデオ解析など、迅速な応答を必要とするアプリケーションに特に有用です。 + +- **さまざまな事前トレーニングモデル:** 異なるタスクには異なるツールセットが必要であることを理解して、YOLOv5uはさまざまな事前トレーニングモデルを提供しています。Inference、Validation、Trainingに焦点を当てていても、ユーザーには待ち受けている特別に調整されたモデルがあります。この多様性により、ワンサイズがすべての解決策ではなく、一意の課題に特化したモデルを使用することができます。 + +## サポートされるタスクとモード + +各種の事前トレーニング済みのYOLOv5uモデルは、[物体検出](../tasks/detect.md)タスクで優れたパフォーマンスを発揮します。[Inference](../modes/predict.md)、[Validation](../modes/val.md)、[Training](../modes/train.md)、および[Export](../modes/export.md)などのさまざまなモードをサポートしているため、開発から展開まで幅広いアプリケーションに適しています。 + +| モデルの種類 | 事前トレーニング済みの重み | タスク | 推論 | 汎化 | トレーニング | エクスポート | +|---------|-----------------------------------------------------------------------------------------------------------------------------|----------------------------|----|----|--------|--------| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [物体検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +この表では、YOLOv5uモデルのバリアントについて詳細な概要を提供し、物体検出タスクでの適用可能性と、[Inference](../modes/predict.md)、[Validation](../modes/val.md)、[Training](../modes/train.md)、[Export](../modes/export.md)などのさまざまな操作モードのサポートを強調しています。この包括的なサポートにより、ユーザーは広範な物体検出シナリオでYOLOv5uモデルの機能を十分に活用することができます。 + +## パフォーマンスメトリクス + +!!! パフォーマンス + + === "検出" + + [COCO](https://docs.ultralytics.com/datasets/detect/coco/)でトレーニングされたこれらのモデルを使用した使用例については、[検出ドキュメント](https://docs.ultralytics.com/tasks/detect/)を参照してください。これらのモデルには80の事前トレーニングクラスが含まれています。 + + | モデル | YAML | サイズ
(pixels) | mAPval
50-95 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | パラメータ
(M) | FLOPS
(B) | + |-------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|-----------------------|----------------------|----------------------------|-------------------------------------|----------------------|-------------------| + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 42.1 | 211.0 | 1.83 | 4.3 | 7.8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 48.6 | 422.6 | 2.34 | 15.3 | 24.6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 53.6 | 810.9 | 4.36 | 41.2 | 65.7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 55.7 | 1470.9 | 5.47 | 86.1 | 137.4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 56.8 | 2436.5 | 8.98 | 155.4 | 250.7 | + +## 使用例 + +この例では、単純なYOLOv5のトレーニングと推論の使用例を提供します。これらと他の[モード](../modes/index.md)の完全なドキュメントについては、[Predict](../modes/predict.md)、[Train](../modes/train.md)、[Val](../modes/val.md)、[Export](../modes/export.md)のドキュメントページを参照してください。 + +!!! Example "例" + + === "Python" + + Pythonでモデルインスタンスを作成するには、PyTorchの事前トレーニング済みの`*.pt`モデルおよび構成`*.yaml`ファイルを`YOLO()`クラスに渡すことができます。 + + ```python + from ultralytics import YOLO + + # COCOで事前トレーニング済みのYOLOv5nモデルをロードする + model = YOLO('yolov5n.pt') + + # モデル情報を表示する(任意) + model.info() + + # COCO8の例のデータセットでモデルを100エポックトレーニングする + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # YOLOv5nモデルを使用して'bus.jpg'画像で推論を実行する + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLIコマンドを使用してモデルを直接実行することもできます。 + + ```bash + # COCOで事前トレーニング済みのYOLOv5nモデルをロードし、COCO8の例のデータセットで100エポックトレーニングする + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCOで事前トレーニング済みのYOLOv5nモデルをロードし、'bus.jpg'画像で推論を実行する + yolo predict model=yolov5n.pt source=path/to/bus.jpg + ``` + +## 引用および謝辞 + +研究でYOLOv5またはYOLOv5uを使用する場合は、以下のようにUltralytics YOLOv5リポジトリを引用してください: + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +なお、YOLOv5モデルは[AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)および[Enterprise](https://ultralytics.com/license)ライセンスの下で提供されています。 diff --git a/docs/ja/models/yolov6.md b/docs/ja/models/yolov6.md new file mode 100644 index 0000000..139b0a7 --- /dev/null +++ b/docs/ja/models/yolov6.md @@ -0,0 +1,107 @@ +--- +comments: true +description: 最先端の速度と精度のバランスを実現する、Meituan YOLOv6というオブジェクト検出モデルを紹介します。機能、事前学習済みモデル、Pythonの使用方法について深く掘り下げます。 +keywords: Meituan YOLOv6、オブジェクト検出、Ultralytics、YOLOv6ドキュメント、Bi-directional Concatenation、Anchor-Aided Training、事前学習済みモデル、リアルタイムアプリケーション +--- + +# Meituan YOLOv6 + +## 概要 + +[Meituan](https://about.meituan.com/) YOLOv6は、速度と精度のバランスに優れた最先端のオブジェクト検出器であり、リアルタイムアプリケーションにおいては人気のある選択肢となっています。このモデルは、Bi-directional Concatenation(BiC)モジュール、アンカー支援トレーニング(AAT)戦略の実装、およびCOCOデータセットにおける最先端の精度を実現するための改良されたバックボーンとネックの設計など、アーキテクチャとトレーニング方法にいくつかの注目すべき技術的改善をもたらしました。 + +![Meituan YOLOv6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![モデルの例](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**YOLOv6の概要。** モデルのアーキテクチャ図は、重要な改善点として再設計されたネットワークコンポーネントとトレーニング戦略を示しており、著しいパフォーマンス向上につながっています。 (a) YOLOv6のネック(NおよびSが表示されています)。M/Lの場合、RepBlocksはCSPStackRepで置き換えられます。 (b) BiCモジュールの構造。 (c) SimCSPSPPFブロック。 ([出典](https://arxiv.org/pdf/2301.05586.pdf))。 + +### 主な特徴 + +- **Bi-directional Concatenation(BiC)モジュール:** YOLOv6は、検出器のネックにBiCモジュールを導入し、ローカリゼーション信号を強化して性能を向上させ、速度の低下をほとんど無視できる優れた結果を実現します。 +- **アンカー支援トレーニング(AAT)戦略:** このモデルでは、AATを提案して、アンカーベースとアンカーフリーのパラダイムの両方の利点を享受することができます。これにより、推論効率を損なうことなく性能を向上させることができます。 +- **改良されたバックボーンとネックの設計:** YOLOv6をバックボーンとネックに別のステージを追加することで、このモデルはCOCOデータセットでの最先端の性能を高解像度入力で実現しています。 +- **自己蒸留戦略:** YOLOv6の小型モデルの性能を向上させるために、新しい自己蒸留戦略が実装されており、トレーニング中に補助回帰ブランチを強化し、推論時にはそれを除去して顕著な速度低下を回避します。 + +## パフォーマンスメトリクス + +YOLOv6にはさまざまなスケールの事前学習済みモデルが提供されています。 + +- YOLOv6-N: NVIDIA Tesla T4 GPUで、COCO val2017において37.5%のAPを1187 FPSで達成。 +- YOLOv6-S: 484 FPSで45.0%のAP。 +- YOLOv6-M: 226 FPSで50.0%のAP。 +- YOLOv6-L: 116 FPSで52.8%のAP。 +- YOLOv6-L6: リアルタイムでの最先端の精度。 + +YOLOv6には、異なる精度に最適化されたクォンタイズ済みのモデルや、モバイルプラットフォーム向けに最適化されたモデルも提供されています。 + +## 使用例 + +この例では、YOLOv6のトレーニングおよび推論の簡単な使用例を示します。これらおよび他の[モード](../modes/index.md)の完全なドキュメントについては、[Predict](../modes/predict.md)、[Train](../modes/train.md)、[Val](../modes/val.md)、[Export](../modes/export.md)のドキュメントページを参照してください。 + +!!! Example "例" + + === "Python" + + PyTorchで事前学習済みの`*.pt`モデルと`*.yaml`設定ファイルを`YOLO()`クラスに渡すことで、Pythonでモデルインスタンスを作成することができます。 + + ```python + from ultralytics import YOLO + + # YOLOv6nモデルをゼロから構築する + model = YOLO('yolov6n.yaml') + + # モデルの情報を表示する(オプション) + model.info() + + # COCO8の例題データセットでモデルを100エポックトレーニングする + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # YOLOv6nモデルで'bus.jpg'画像に対して推論を実行する + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + モデルを直接実行するためのCLIコマンドも利用できます。 + + ```bash + # ゼロからYOLOv6nモデルを構築し、COCO8の例題データセットで100エポックトレーニングする + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # ゼロからYOLOv6nモデルを構築し、'bus.jpg'画像に対して推論を実行する + yolo predict model=yolov6n.yaml source=path/to/bus.jpg + ``` + +## サポートされるタスクとモード + +YOLOv6シリーズは、高性能の[オブジェクト検出](../tasks/detect.md)に最適化されたモデルを提供しています。これらのモデルは、さまざまな計算ニーズと精度要件に対応しており、幅広いアプリケーションに適応することができます。 + +| モデルタイプ | 事前学習済みの重み | サポートされるタスク | 推論 | 検証 | トレーニング | エクスポート | +|-----------|----------------|--------------------------------|----|----|--------|--------| +| YOLOv6-N | `yolov6-n.pt` | [オブジェクト検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [オブジェクト検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [オブジェクト検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [オブジェクト検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [オブジェクト検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +この表は、YOLOv6モデルのバリアントについての詳細な概要を提供し、オブジェクト検出のタスクにおける機能と、[推論](../modes/predict.md)、[検証](../modes/val.md)、[トレーニング](../modes/train.md)、[エクスポート](../modes/export.md)などのさまざまな操作モードとの互換性を強調しています。この包括的なサポートにより、ユーザーはさまざまなオブジェクト検出シナリオでYOLOv6モデルの機能を十分に活用することができます。 + +## 引用と謝辞 + +リアルタイムオブジェクト検出の分野における重要な貢献をした著者に謝意を表します。 + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{li2023yolov6, + title={YOLOv6 v3.0: A Full-Scale Reloading}, + author={Chuyi Li and Lulu Li and Yifei Geng and Hongliang Jiang and Meng Cheng and Bo Zhang and Zaidan Ke and Xiaoming Xu and Xiangxiang Chu}, + year={2023}, + eprint={2301.05586}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +YOLOv6のオリジナル論文は[arXiv](https://arxiv.org/abs/2301.05586)で入手できます。著者は自身の研究を広く共有しており、コードベースは[GitHub](https://github.com/meituan/YOLOv6)でアクセスできます。私たちは彼らがこの分野の進歩に貢献し、その研究を広く公開していることに感謝しています。 diff --git a/docs/ja/models/yolov7.md b/docs/ja/models/yolov7.md new file mode 100644 index 0000000..3981150 --- /dev/null +++ b/docs/ja/models/yolov7.md @@ -0,0 +1,65 @@ +--- +comments: true +description: YOLOv7は高速性と精度の両方で既知の物体検出器を凌駕する最先端のリアルタイム物体検出器です。この技術では、モデル再パラメータ化、動的ラベル割り当て、拡張スケーリング、複合スケーリングなど、学習可能なBag-of-Freebies最適化に焦点を当てています。 +keywords: YOLOv7, リアルタイム物体検出器, 最先端, Ultralytics, MS COCO データセット, モデル再パラメータ化, 動的ラベル割り当て, 拡張スケーリング, 複合スケーリング +--- + +# YOLOv7: 学習可能なBag-of-Freebies + +YOLOv7は、5 FPSから160 FPSの範囲で、既知のすべての物体検出器を速度と精度の両方で凌駕する最先端のリアルタイム物体検出器です。GPU V100上で30 FPS以上の性能を持つリアルタイム物体検出器の中で、最高の精度(56.8% AP)を持っています。さらに、YOLOv7はYOLOR、YOLOX、Scaled-YOLOv4、YOLOv5などの他の物体検出器を速度と精度の面で上回っています。このモデルは、他のデータセットや事前学習重みを使用せずに、MS COCOデータセットでトレーニングされています。YOLOv7のソースコードはGitHubで入手できます。 + +![SOTA物体検出器との比較](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) +**最先端物体検出器との比較。** 表2の結果からわかるように、提案手法は速度と精度のトレードオフにおいて最も優れています。例えば、YOLOv7-tiny-SiLUとYOLOv5-N(r6.1)を比較すると、我々の手法は127 fps速く、APにおいて10.7%精度が向上しています。また、YOLOv7はフレームレート161 fpsで51.4%のAPを達成していますが、同じAPを持つPPYOLOE-Lのフレームレートは78 fpsのみです。パラメータ使用量に関しては、YOLOv7はPPYOLOE-Lよりも41%少ないです。さらに、114 fpsの推論速度を持つYOLOv7-Xを99 fpsの推論速度を持つYOLOv5-L(r6.1)と比較すると、YOLOv7-XはAPを3.9%向上させることができます。YOLOv7-Xをスケールの近いYOLOv5-X(r6.1)と比較すると、YOLOv7-Xの推論速度は31 fps速いです。また、パラメータ量と計算量の観点から、YOLOv7-XはYOLOv5-X(r6.1)に比べてパラメータを22%、計算量を8%削減していますが、APは2.2%向上しています([出典](https://arxiv.org/pdf/2207.02696.pdf))。 + +## 概要 + +リアルタイム物体検出は、マルチオブジェクトトラッキング、自動運転、ロボティクス、医療画像解析など、多くのコンピュータビジョンシステムの重要なコンポーネントです。近年、リアルタイム物体検出の開発は、さまざまなCPU、GPU、ニューラルプロセッシングユニット(NPU)の推論速度の効率的なアーキテクチャの設計と向上に焦点を当てています。YOLOv7は、エッジからクラウドまで、モバイルGPUとGPUデバイスの両方をサポートしています。 + +従来のリアルタイム物体検出器がアーキテクチャの最適化に焦点を当てるのに対し、YOLOv7では学習プロセスの最適化に注力しています。これには、推論コストを増やさずに物体検出の精度を向上させるためのモジュールや最適化手法が含まれます。これは、「学習可能なBag-of-Freebies」というコンセプトです。 + +## 主な特徴 + +YOLOv7は、いくつかの主な特徴を導入しています。 + +1. **モデル再パラメータ化**: YOLOv7は、グラデーション伝播経路の概念を持つ、さまざまなネットワークのレイヤーに適用可能な計画された再パラメータ化モデルを提案しています。 + +2. **動的ラベル割り当て**: 複数の出力層を持つモデルのトレーニングでは、異なるブランチの出力に動的なターゲットを割り当てる方法が新たな課題となります。この問題を解決するために、YOLOv7はコーストゥーファインリードガイド付きラベル割り当てと呼ばれる新しいラベル割り当て手法を導入しています。 + +3. **拡張スケーリングと複合スケーリング**: YOLOv7は、「拡張」および「複合スケーリング」の方法を提案し、効果的にパラメータと計算を利用できるリアルタイム物体検出器になります。 + +4. **効率性**: YOLOv7による方法は、最先端のリアルタイム物体検出器のパラメータ量を約40%、計算量を約50%効率的に削減し、より高速な推論速度と高い検出精度を実現します。 + +## 使用例 + +執筆時点では、Ultralyticsは現在、YOLOv7モデルをサポートしていません。そのため、YOLOv7を使用したい場合は、YOLOv7のGitHubリポジトリを直接参照する必要があります。 + +以下は、YOLOv7を使用するための典型的な手順の概要です。 + +1. YOLOv7のGitHubリポジトリにアクセスします: [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7)。 + +2. READMEファイルに記載されている手順に従ってインストールします。通常、リポジトリをクローンし、必要な依存関係をインストールし、必要な環境変数を設定する必要があります。 + +3. インストールが完了したら、データセットの準備、モデルパラメータの設定、モデルのトレーニング、トレーニングされたモデルを使用して物体検出を実行するなど、リポジトリで提供されている使用方法に従って、モデルをトレーニングおよび使用することができます。 + +具体的な手順は、具体的なユースケースとYOLOv7リポジトリの現在の状態によって異なる場合があります。そのため、YOLOv7のGitHubリポジトリで提供されている手順を直接参照することを強くお勧めします。 + +YOLOv7のサポートが実装されるまで、このドキュメントを更新して、Ultralyticsの使用例を追加するための努力を続けます。 + +## 引用と謝辞 + +リアルタイム物体検出の分野での重要な貢献に対して、YOLOv7の著者に感謝いたします。 + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +YOLOv7のオリジナル論文は[arXiv](https://arxiv.org/pdf/2207.02696.pdf)で見つけることができます。著者は自分たちの研究を広く公開しており、コードベースは[GitHub](https://github.com/WongKinYiu/yolov7)でアクセスできます。彼らの研究がこの分野を進め、他の研究者にもアクセス可能にする努力に感謝します。 diff --git a/docs/ja/models/yolov8.md b/docs/ja/models/yolov8.md new file mode 100644 index 0000000..a404036 --- /dev/null +++ b/docs/ja/models/yolov8.md @@ -0,0 +1,162 @@ +--- +comments: true +description: YOLOv8の魅力的な機能を探索しましょう。これは当社のリアルタイムオブジェクト検出器の最新バージョンです!高度なアーキテクチャ、事前学習済みモデル、そして精度と速度の最適なバランスがYOLOv8を完璧なオブジェクト検出タスクの選択肢とします。 +keywords: YOLOv8, Ultralytics, リアルタイムオブジェクト検出器, 事前学習済みモデル, ドキュメント, オブジェクト検出, YOLOシリーズ, 高度なアーキテクチャ, 精度, 速度 +--- + +# YOLOv8 + +## 概要 + +YOLOv8は、リアルタイムオブジェクト検出器のYOLOシリーズの最新版であり、精度と速度において最先端の性能を提供します。以前のYOLOバージョンの進化を基にして、YOLOv8は新機能と最適化を導入し、様々なオブジェクト検出タスクにおいて広範なアプリケーションに適した選択肢となります。 + +![Ultralytics YOLOv8](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## 主な特徴 + +- **高度なバックボーンおよびネックアーキテクチャ:** YOLOv8は最先端のバックボーンとネックアーキテクチャを採用しており、特徴抽出とオブジェクト検出のパフォーマンスが向上しています。 +- **アンカーフリーな分割Ultralyticsヘッド:** YOLOv8はアンカーベースの方法と比べて、アンカーフリーな分割Ultralyticsヘッドを採用し、より高い精度と効率的な検出プロセスに貢献しています。 +- **最適な精度と速度のトレードオフの最適化:** 精度と速度の最適なバランスを保ちながら、YOLOv8は多岐にわたるリアルタイムオブジェクト検出タスクに適しています。 +- **さまざまな事前学習済みモデル:** YOLOv8はさまざまなタスクやパフォーマンス要件に対応する事前学習済みモデルを提供しており、特定のユースケースに適したモデルを簡単に見つけることができます。 + +## サポートされるタスクとモード + +YOLOv8シリーズは、コンピュータビジョンのさまざまなタスクに特化したさまざまなモデルを提供しています。これらのモデルは、オブジェクト検出からインスタンスセグメンテーション、ポーズ/キーポイントの検出、分類などのより複雑なタスクまで、さまざまな要件に対応するように設計されています。 + +YOLOv8シリーズの各バリアントは、それぞれのタスクに最適化されており、高いパフォーマンスと精度を提供します。また、これらのモデルは、推論、検証、トレーニング、エクスポートといったさまざまな運用モードと互換性があり、展開と開発のさまざまな段階で使用することができます。 + +| モデル | ファイル名 | タスク | 推論 | 検証 | トレーニング | エクスポート | +|-------------|----------------------------------------------------------------------------------------------------------------|----------------------------------------|----|----|--------|--------| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [検出](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [インスタンスセグメンテーション](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [ポーズ/キーポイント](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [分類](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +この表は、YOLOv8モデルのバリアントの概要を提供し、特定のタスクでの適用性と推論、検証、トレーニング、エクスポートなどのさまざまな運用モードとの互換性を示しています。これにより、YOLOv8シリーズの多様性と堅牢性が示されており、コンピュータビジョンのさまざまなアプリケーションに適しています。 + +## 性能メトリクス + +!!! パフォーマンス + + === "検出(COCO)" + + [COCO](https://docs.ultralytics.com/datasets/detect/coco/)でトレーニングされたこれらのモデルを使用した使用例については、[検出ドキュメント](https://docs.ultralytics.com/tasks/detect/)を参照してください。これらのモデルには80の事前学習済みクラスが含まれています。 + + | モデル | サイズ(ピクセル) | mAPval
50-95 | スピード(CPU ONNX
(ミリ秒)) | スピード(A100 TensorRT
(ミリ秒)) | パラメータ数(百万) | FLOPs(10億) | + | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + + === "検出(Open Images V7)" + + [Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/)でトレーニングされたこれらのモデルを使用した使用例については、[検出ドキュメント](https://docs.ultralytics.com/tasks/detect/)を参照してください。これらのモデルには600の事前訓練クラスが含まれています。 + + | モデル | サイズ(ピクセル) | mAPval
50-95 | スピード(CPU ONNX
(ミリ秒)) | スピード(A100 TensorRT
(ミリ秒)) | パラメータ数(百万) | FLOPs(10億) | + | ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | + + === "セグメンテーション(COCO)" + + [COCO](https://docs.ultralytics.com/datasets/segment/coco/)でトレーニングされたこれらのモデルを使用した使用例については、[セグメンテーションドキュメント](https://docs.ultralytics.com/tasks/segment/)を参照してください。これらのモデルには80の事前学習済みクラスが含まれています。 + + | モデル | サイズ(ピクセル) | mAPbox
50-95 | mAPmask
50-95 | スピード(CPU ONNX
(ミリ秒)) | スピード(A100 TensorRT
(ミリ秒)) | パラメータ数(百万) | FLOPs(10億) | + | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + + === "分類(ImageNet)" + + [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/)でトレーニングされたこれらのモデルを使用した使用例については、[分類ドキュメント](https://docs.ultralytics.com/tasks/classify/)を参照してください。これらのモデルには1000の事前学習済みクラスが含まれています。 + + | モデル | サイズ(ピクセル) | top1精度 | top5精度 | スピード(CPU ONNX
(ミリ秒)) | スピード(A100 TensorRT
(ミリ秒)) | パラメータ数(百万) | FLOPs(640ピクセル時の10億) | + | -------------------------------------------------------------------------------------------- | --------------------- | --------- | --------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + + === "ポーズ(COCO)" + + ['person']クラスの1つの事前学習済みクラスを含む、[COCO](https://docs.ultralytics.com/datasets/pose/coco/)でトレーニングされたこれらのモデルを使用した使用例については、[ポーズ推定ドキュメント](https://docs.ultralytics.com/tasks/pose/)を参照してください。 + + | モデル | サイズ(ピクセル) | mAPpose
50-95 | mAPpose
50 | スピード(CPU ONNX
(ミリ秒)) | スピード(A100 TensorRT
(ミリ秒)) | パラメータ数(百万) | FLOPs(10億) | + | ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +## 使用例 + +この例では、YOLOv8のシンプルなトレーニングと推論の使用例を提供しています。これらとその他のモードに関する完全なドキュメントについては、[Predict](../modes/predict.md)、[Train](../modes/train.md)、[Val](../modes/val.md)、および[Export](../modes/export.md)のドキュメントページを参照してください。 + +以下の例は、オブジェクト検出のためのYOLOv8 [Detect](../tasks/detect.md)モデル用のものです。他のサポートされるタスクについては、[Segment](../tasks/segment.md)、[Classify](../tasks/classify.md)、および[Pose](../tasks/pose.md)のドキュメントを参照してください。 + +!!! Example "例" + + === "Python" + + Pythonでの、`.pt`形式の事前学習済みPyTorchモデルおよび設定ファイル`*.yaml`を使用して、`YOLO()`クラスを介してモデルインスタンスを作成する方法の例です: + + ```python + from ultralytics import YOLO + + # COCOで事前学習済みのYOLOv8nモデルをロード + model = YOLO('yolov8n.pt') + + # モデルの情報を表示(オプション) + model.info() + + # COCO8の例データセットで100エポックトレーニングを実行 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 'bus.jpg'画像に対してYOLOv8nモデルで推論を実行 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + 直接モデルを実行するためのCLIコマンドも使用できます: + + ```bash + # COCOで事前学習済みのYOLOv8nモデルをロードし、COCO8の例データセットで100エポックトレーニングを実行 + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCOで事前学習済みのYOLOv8nモデルをロードし、'bus.jpg'画像に対して推論を実行 + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## 引用および謝辞 + +このリポジトリからYOLOv8モデルまたは他のソフトウェアを使用する場合は、次の形式で引用してください: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + +DOIは準備中であり、利用可能になり次第、引用に追加されます。YOLOv8モデルは[AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)および[Enterprise](https://ultralytics.com/license)ライセンスで提供されています。 diff --git a/docs/ja/modes/benchmark.md b/docs/ja/modes/benchmark.md new file mode 100644 index 0000000..91572f3 --- /dev/null +++ b/docs/ja/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: true +description: YOLOv8を様々なエクスポート形式でスピードと精度をプロファイリングする方法を学び、mAP50-95、accuracy_top5のメトリクスなどの洞察を得る。 +keywords: Ultralytics, YOLOv8, ベンチマーク, スピードプロファイリング, 精度プロファイリング, mAP50-95, accuracy_top5, ONNX, OpenVINO, TensorRT, YOLOエクスポート形式 +--- + +# Ultralytics YOLO でのモデルベンチマーク + +Ultralytics YOLOエコシステムと統合 + +## はじめに + +モデルがトレーニングされ、検証された後、次の論理的なステップは、さまざまな現実世界のシナリオでのパフォーマンスを評価することです。Ultralytics YOLOv8 のベンチマークモードは、さまざまなエクスポート形式でモデルのスピードと精度を評価するための頑健なフレームワークを提供します。 + +## ベンチマークが重要な理由は? + +- **情報に基づいた決定:** スピードと精度のトレードオフについての洞察を得る。 +- **リソース割り当て:** 異なるハードウェアで異なるエクスポート形式がどのように動作するかを理解する。 +- **最適化:** 特定のユースケースで最高のパフォーマンスを提供するエクスポート形式を学ぶ。 +- **コスト効率:** ベンチマーク結果に基づいてハードウェアリソースをより効率的に使用する。 + +### ベンチマークモードでの主要なメトリクス + +- **mAP50-95:** 物体検出、セグメンテーション、ポーズ推定に使用。 +- **accuracy_top5:** 画像分類に使用。 +- **推論時間:** 各画像に要する時間(ミリ秒)。 + +### サポートされるエクスポート形式 + +- **ONNX:** 最適なCPUパフォーマンスのために +- **TensorRT:** 最大限のGPU効率のために +- **OpenVINO:** Intelハードウェアの最適化のために +- **CoreML, TensorFlow SavedModel など:** 多様なデプロイメントニーズに。 + +!!! Tip "ヒント" + + * CPUスピードアップのためにONNXまたはOpenVINOにエクスポートする。 + * GPUスピードアップのためにTensorRTにエクスポートする。 + +## 使用例 + +ONNX、TensorRTなど、すべてのサポートされるエクスポート形式でYOLOv8nベンチマークを実行します。完全なエクスポート引数のリストについては、以下のArgumentsセクションを参照してください。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # GPUでベンチマーク + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## 引数 + +`model`、`data`、`imgsz`、`half`、`device`、`verbose` などの引数は、特定のニーズに合わせてベンチマークを微調整し、さまざまなエクスポート形式のパフォーマンスを容易に比較するためにユーザーに柔軟性を提供します。 + +| キー | 値 | 説明 | +|-----------|---------|-----------------------------------------------------------| +| `model` | `None` | モデルファイルへのパス、例: yolov8n.pt, yolov8n.yaml | +| `data` | `None` | ベンチマークデータセットを参照するYAMLへのパス(`val`ラベルの下) | +| `imgsz` | `640` | 画像サイズをスカラーまたは(h, w)リストで、例: (640, 480) | +| `half` | `False` | FP16量子化 | +| `int8` | `False` | INT8量子化 | +| `device` | `None` | 実行デバイス、例: cuda device=0 または device=0,1,2,3 または device=cpu | +| `verbose` | `False` | エラー時に続行しない(bool)、またはval床しきい値(float) | + +## エクスポート形式 + +以下の可能なすべてのエクスポート形式で自動的にベンチマークを試みます。 + +| 形式 | `format` 引数 | モデル | メタデータ | 引数 | +|--------------------------------------------------------------------|---------------|---------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +[エクスポート](https://docs.ultralytics.com/modes/export/)ページでさらに詳しい`export`の詳細をご覧ください。 diff --git a/docs/ja/modes/export.md b/docs/ja/modes/export.md new file mode 100644 index 0000000..fbd30c3 --- /dev/null +++ b/docs/ja/modes/export.md @@ -0,0 +1,76 @@ +--- +comments: true +description: YOLOv8モデルをONNX, TensorRT, CoreMLなどの様々なフォーマットへエキスポートする手順についてのガイドです。今すぐ展開を探求してください! +keywords: YOLO, YOLOv8, Ultralytics, モデルエキスポート, ONNX, TensorRT, CoreML, TensorFlow SavedModel, OpenVINO, PyTorch, モデルをエキスポート +--- + +# Ultralytics YOLO でのモデルエキスポート + +Ultralytics YOLO エコシステムと統合 + +## はじめに + +モデルのトレーニング終了後の最終目標は、実世界のアプリケーションに導入することです。Ultralytics YOLOv8のエキスポートモードは、トレーニング済みモデルを異なるフォーマットにエキスポートして、様々なプラットフォームやデバイスで展開可能にするための多様なオプションを提供します。この包括的なガイドは、モデルエキスポートのニュアンスをわかりやすく解説し、最大の互換性とパフォーマンスを達成する方法をご紹介します。 + +

+
+ +
+ 視聴: カスタムトレーニングしたUltralytics YOLOv8モデルをエキスポートして、ウェブカムでリアルタイム推論を実行する方法。 +

+ +## YOLOv8のエキスポートモードを選ぶ理由は? + +- **汎用性:** ONNX, TensorRT, CoreMLなど複数のフォーマットへエキスポート。 +- **パフォーマンス:** TensorRTで最大5倍のGPU高速化、ONNXまたはOpenVINOで3倍のCPU高速化を実現。 +- **互換性:** 様々なハードウェアおよびソフトウェア環境でユニバーサルにモデルを展開。 +- **使いやすさ:** シンプルなCLIおよびPython APIで簡単かつ迅速なモデルエキスポート。 + +### エキスポートモードの主要機能 + +いくつかの注目すべき機能は以下の通りです: + +- **ワンクリックエキスポート:** 異なるフォーマットへのシンプルなコマンド。 +- **バッチエキスポート:** バッチ推論対応モデルをエキスポート。 +- **最適化推論:** より高速な推論のために最適化されたエキスポートモデル。 +- **チュートリアル動画:** スムーズなエキスポート体験のための詳細なガイドとチュートリアル。 + +!!! Tip "ヒント" + + * ONNXまたはOpenVINOへのエキスポートで最大3倍のCPU速度アップ。 + * TensorRTへのエキスポートで最大5倍のGPU速度アップ。 + +## 使用例 + +YOLOv8nモデルをONNXやTensorRTなどの異なるフォーマットにエキスポートします。エキスポート引数のフルリストについては、以下のArgumentsセクションをご覧ください。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.pt') # 公式モデルを読み込む + model = YOLO('path/to/best.pt') # カスタムトレーニングモデルを読み込む + + # モデルをエキスポート + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # 公式モデルをエキスポート + yolo export model=path/to/best.pt format=onnx # カスタムトレーニングモデルをエキスポート + ``` + +## 引数 + +YOLOモデルのエキスポート設定 + +[...content truncated for length...] diff --git a/docs/ja/modes/index.md b/docs/ja/modes/index.md new file mode 100644 index 0000000..2e9abc1 --- /dev/null +++ b/docs/ja/modes/index.md @@ -0,0 +1,53 @@ +--- +comments: true +description: トレーニングからトラッキングまで、UltralyticsのYOLOv8を活用して最大限に活用します。検証、エクスポート、ベンチマーキングなど、各サポートモードの洞察と例を得る。 +keywords: Ultralytics, YOLOv8, 機械学習, オブジェクト検出, トレーニング, 検証, 予測, エクスポート, トラッキング, ベンチマーキング +--- + +# Ultralytics YOLOv8 モード + +Ultralytics YOLOエコシステムと統合 + +## はじめに + +Ultralytics YOLOv8は、単なるオブジェクト検出モデルではありません; これは、データ収集、モデルトレーニングから検証、デプロイメント、実際のトラッキングに至るまでの機械学習モデルのライフサイクル全体をカバーするように設計された多用途フレームワークです。各モードは特定の目的に役立ち、異なるタスクやユースケースに必要な柔軟性と効率を提供するように工夫されています。 + +

+
+ +
+ 視聴する: Ultralytics モードチュートリアル: トレーニ、検証、予測、エクスポート & ベンチマーキング。 +

+ +### モードの概要 + +Ultralytics YOLOv8がサポートする異なる**モード**を理解することは、モデルから最大限に活用するためには欠かせません: + +- **トレーニング**モード:カスタムまたは事前読み込みされたデータセットでのモデルの微調整を行います。 +- **バリデーション**モード:トレーニング後のチェックポイントとして、モデルの性能を検証します。 +- **予測**モード:実世界のデータでモデルの予測能力を発揮します。 +- **エクスポート**モード:様々な形式でデプロイメント準備ができたモデルを作成します。 +- **トラッキング**モード:オブジェクト検出モデルをリアルタイム追跡アプリケーションに拡張します。 +- **ベンチマーキング**モード:様々なデプロイメント環境でモデルの速度と精度を分析します。 + +この包括的なガイドは、YOLOv8の全潜在能力を引き出すための概要と実践的な洞察を提供することを目指しています。 + +## [トレーニング](train.md) + +トレーニングモードは、カスタムデータセットでYOLOv8モデルのトレーニングを行うために使用されます。このモードでは、指定されたデータセットとハイパーパラメータを使用してモデルがトレーニングされます。トレーニングプロセスには、モデルのパラメータを最適化して、写真内のオブジェクトのクラスと位置を正確に予測できるようにする作業が含まれます。 + +[トレーニング例](train.md){ .md-button } + +## [バリデーション](val.md) + +バリデーションモードは、トレーニングされたYOLOv8モデルを検証するために使用されます。このモードでは、モデルがバリデーションセットで評価され、その精度と一般化性能を測定します。このモードは、モデルのハイパーパラメータを調整し、そのパフォーマンスを向上させるために利用できます。 + +[バリデーション例](val.md){ .md-button } + +## [予測](predict.md) + +予測モードは、新しい画像やビデオでトレーニングされたYOLOv8モデルを使用して予測を行うために使用されます。このモードでは、モデルがチェックポイントファイルから読み込まれ、ユーザーが推論を行うために画像やビデオを提供できます。モデルは、入力した画像やビデオ内のオブジェクトのクラスと位置を予測します。 diff --git a/docs/ja/modes/predict.md b/docs/ja/modes/predict.md new file mode 100644 index 0000000..6708187 --- /dev/null +++ b/docs/ja/modes/predict.md @@ -0,0 +1,211 @@ +--- +comments: true +description: YOLOv8予測モードの使用方法について学び、画像、動画、データフォーマットなどさまざまな推論ソースについて解説します。 +keywords: Ultralytics, YOLOv8, 予測モード, 推論ソース, 予測タスク, ストリーミングモード, 画像処理, 動画処理, 機械学習, AI +--- + +# Ultralytics YOLOによるモデル予測 + +Ultralytics YOLO ecosystem and integrations + +## イントロダクション + +機械学習やコンピュータビジョンの世界では、視覚データから意味を引き出すプロセスを「推論」または「予測」と呼ばれています。UltralyticsのYOLOv8は、幅広いデータソースに対して高性能でリアルタイムな推論を行うために特化した、「予測モード」と呼ばれる強力な機能を提供しています。 + +

+
+ +
+ 視聴: Ultralytics YOLOv8モデルの出力をカスタムプロジェクトに取り込む方法を学ぶ。 +

+ +## 実際の応用例 + +| 製造業 | スポーツ | 安全 | +|:-----------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------:| +| ![Vehicle Spare Parts Detection](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1) | ![Football Player Detection](https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442) | ![People Fall Detection](https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43) | +| 車両のスペアパーツ検出 | フットボール選手検出 | 人の転倒検出 | + +## 予測にUltralytics YOLOを使う理由 + +様々な推論ニーズにYOLOv8の予測モードを検討すべき理由です: + +- **柔軟性:** 画像、動画、さらにはライブストリームにおいて推論を行う能力があります。 +- **パフォーマンス:** 正確さを犠牲にすることなく、リアルタイムで高速な処理が行えるように設計されています。 +- **使いやすさ:** 迅速な展開とテストのための直感的なPythonおよびCLIインターフェース。 +- **高いカスタマイズ性:** 特定の要件に応じてモデルの推論動作を調整するためのさまざまな設定とパラメーター。 + +### 予測モードの主な特徴 + +YOLOv8の予測モードは、頑健で多様性があり、次の特徴を備えています: + +- **複数のデータソースとの互換性:** データが個々の画像、画像の集合、動画ファイル、またはリアルタイムの動画ストリームのいずれの形式であっても、予測モードが対応しています。 +- **ストリーミングモード:** `Results`オブジェクトのメモリ効率の良いジェネレータを生成するためにストリーミング機能を使用します。`stream=True`を予測器の呼び出しメソッドに設定することにより有効になります。 +- **バッチ処理:** 単一のバッチで複数の画像や動画フレームを処理する能力は、さらに推論時間を短縮します。 +- **統合が容易:** 柔軟なAPIのおかげで、既存のデータパイプラインや他のソフトウェアコンポーネントに簡単に統合できます。 + +UltralyticsのYOLOモデルは、`stream=True`が推論中にモデルに渡されると、Pythonの`Results`オブジェクトのリストまたは`Results`オブジェクトのメモリ効率の良いPythonジェネレータのいずれかを返します: + +!!! Example "予測" + + === "`stream=False`でリストを返す" + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n.pt') # 事前にトレーニングされたYOLOv8nモデル + + # 画像のリストに対してバッチ推論を実行 + results = model(['im1.jpg', 'im2.jpg']) # Resultsオブジェクトのリストを返す + + # 結果リストを処理 + for result in results: + boxes = result.boxes # バウンディングボックス出力用のBoxesオブジェクト + masks = result.masks # セグメンテーションマスク出力用のMasksオブジェクト + keypoints = result.keypoints # 姿勢出力用のKeypointsオブジェクト + probs = result.probs # 分類出力用のProbsオブジェクト + ``` + + === "`stream=True`でジェネレータを返す" + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n.pt') # 事前にトレーニングされたYOLOv8nモデル + + # 画像のリストに対してバッチ推論を実行 + results = model(['im1.jpg', 'im2.jpg'], stream=True) # Resultsオブジェクトのジェネレータを返す + + # 結果ジェネレータを処理 + for result in results: + boxes = result.boxes # バウンディングボックス出力用のBoxesオブジェクト + masks = result.masks # セグメンテーションマスク出力用のMasksオブジェクト + keypoints = result.keypoints # 姿勢出力用のKeypointsオブジェクト + probs = result.probs # 分類出力用のProbsオブジェクト + ``` + +## 推論ソース + +YOLOv8は、以下の表に示されるように、異なるタイプの入力ソースを推論に処理できます。ソースには静止画像、動画ストリーム、およびさまざまなデータフォーマットが含まれます。表には、各ソースがストリーミングモードで使用できるかどうかも示されており、引数`stream=True`で✅が表示されています。ストリーミングモードは、動画やライブストリームを処理する場合に有利であり、すべてのフレームをメモリにロードする代わりに結果のジェネレータを作成します。 + +!!! Tip "ヒント" + + 長い動画や大きなデータセットを効率的にメモリ管理するために`stream=True`を使用します。`stream=False`では、すべてのフレームまたはデータポイントの結果がメモリに格納されますが、大きな入力で迅速にメモリが積み上がり、メモリ不足のエラーを引き起こす可能性があります。対照的に、`stream=True`はジェネレータを利用し、現在のフレームまたはデータポイントの結果のみをメモリに保持し、メモリ消費を大幅に削減し、メモリ不足の問題を防ぎます。 + +| ソース | 引数 | タイプ | 備考 | +|------------|--------------------------------------------|------------------|------------------------------------------------------------------| +| 画像 | `'image.jpg'` | `str` または `Path` | 単一の画像ファイル。 | +| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | 画像へのURL。 | +| スクリーンショット | `'screen'` | `str` | スクリーンショットをキャプチャ。 | +| PIL | `Image.open('im.jpg')` | `PIL.Image` | HWCフォーマットでRGBチャンネル。 | +| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | HWCフォーマットでBGRチャンネル `uint8 (0-255)`。 | +| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | HWCフォーマットでBGRチャンネル `uint8 (0-255)`。 | +| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | BCHWフォーマットでRGBチャンネル `float32 (0.0-1.0)`。 | +| CSV | `'sources.csv'` | `str` または `Path` | 画像、動画、ディレクトリへのパスを含むCSVファイル。 | +| 動画 ✅ | `'video.mp4'` | `str` または `Path` | MP4、AVIなどの形式の動画ファイル。 | +| ディレクトリ ✅ | `'path/'` | `str` または `Path` | 画像または動画を含むディレクトリへのパス。 | +| グロブ ✅ | `'path/*.jpg'` | `str` | 複数のファイルに一致するグロブパターン。ワイルドカードとして`*`文字を使用します。 | +| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | YouTube動画のURL。 | +| ストリーム ✅ | `'rtsp://example.com/media.mp4'` | `str` | RTSP、RTMP、TCP、IPアドレスなどのストリーミングプロトコルのためのURL。 | +| マルチストリーム ✅ | `'list.streams'` | `str` または `Path` | ストリームURLを行ごとに1つ含む`*.streams`テキストファイル。つまり、8つのストリームをバッチサイズ8で実行します。 | + +以下は、それぞれのソースタイプを使用するためのコード例です: + +!!! Example "予測ソース" + + === "画像" + 画像ファイルに推論を実行します。 + ```python + from ultralytics import YOLO + + # 事前にトレーニングされたYOLOv8nモデルをロード + model = YOLO('yolov8n.pt') + + # 画像ファイルへのパスを定義 + source = 'path/to/image.jpg' + + # ソースに推論を実行 + results = model(source) # Resultsオブジェクトのリスト + ``` + + === "スクリーンショット" + 現在の画面内容のスクリーンショットに推論を実行します。 + ```python + from ultralytics import YOLO + + # 事前にトレーニングされたYOLOv8nモデルをロード + model = YOLO('yolov8n.pt') + + # 現在のスクリーンショットをソースとして定義 + source = 'screen' + + # ソースに推論を実行 + results = model(source) # Resultsオブジェクトのリスト + ``` + + === "URL" + リモートのURL経由でホストされている画像や動画に推論を実行します。 + ```python + from ultralytics import YOLO + + # 事前にトレーニングされたYOLOv8nモデルをロード + model = YOLO('yolov8n.pt') + + # リモート画像や動画のURLを定義 + source = 'https://ultralytics.com/images/bus.jpg' + + # ソースに推論を実行 + results = model(source) # Resultsオブジェクトのリスト + ``` + + === "PIL" + Python Imaging Library (PIL)を使用して開いた画像に推論を実行します。 + ```python + from PIL import Image + from ultralytics import YOLO + + # 事前にトレーニングされたYOLOv8nモデルをロード + model = YOLO('yolov8n.pt') + + # PILを使用して画像を開く + source = Image.open('path/to/image.jpg') + + # ソースに推論を実行 + results = model(source) # Resultsオブジェクトのリスト + ``` + + === "OpenCV" + OpenCVを使用して読み込んだ画像に推論を実行します。 + ```python + import cv2 + from ultralytics import YOLO + + # 事前にトレーニングされたYOLOv8nモデルをロード + model = YOLO('yolov8n.pt') + + # OpenCVを使用して画像を読み込む + source = cv2.imread('path/to/image.jpg') + + # ソースに推論を実行 + results = model(source) # Resultsオブジェクトのリスト + ``` + + === "numpy" + numpy配列として表される画像に推論を実行します。 + ```python + import numpy as np + from ultralytics import YOLO + + # 事前にトレーニングされたYOLOv8nモデルをロード + model = YOLO('yolov8n.pt') + + # HWC形状(640, 640, 3)、範囲[0, 255]、型`uint8`のランダムなnumpy配列を作成 + source = np.random.randint(low=0, high=255, size=(640,640,3), dtype='uint8') + + # ソースに推論を実行 + results = model(source) # Resultsオブジェクトのリスト + ``` diff --git a/docs/ja/modes/track.md b/docs/ja/modes/track.md new file mode 100644 index 0000000..56453f9 --- /dev/null +++ b/docs/ja/modes/track.md @@ -0,0 +1,200 @@ +--- +comments: true +description: Ultralytics YOLOを使用したビデオストリームでのオブジェクトトラッキングの使用方法を学びます。異なるトラッカーの使用ガイドとトラッカー構成のカスタマイズについて。 +keywords: Ultralytics, YOLO, オブジェクトトラッキング, ビデオストリーム, BoT-SORT, ByteTrack, Pythonガイド, CLIガイド +--- + +# Ultralytics YOLOによる複数オブジェクトのトラッキング + +複数オブジェクトのトラッキング例 + +ビデオ分析の領域でのオブジェクトトラッキングは、フレーム内のオブジェクトの位置とクラスを特定するだけでなく、ビデオが進行するにつれてそれぞれの検出されたオブジェクトにユニークなIDを維持する重要なタスクです。その応用範囲は無限で、監視やセキュリティからリアルタイムスポーツ分析まで及びます。 + +## オブジェクトトラッキングにUltralytics YOLOを選ぶ理由は? + +Ultralyticsのトラッカーからの出力は標準のオブジェクト検出と一致しており、さらにオブジェクトIDの付加価値があります。これにより、ビデオストリーム内のオブジェクトを追跡し、後続の分析を行うことが容易になります。Ultralytics YOLOをオブジェクトトラッキングのニーズに利用を検討する理由は以下の通りです: + +- **効率性:** 精度を損なうことなくリアルタイムでビデオストリームを処理します。 +- **柔軟性:** 複数のトラッキングアルゴリズムと構成をサポートしています。 +- **使いやすさ:** 簡単なPython APIとCLIオプションで迅速な統合と展開が可能です。 +- **カスタマイズ性:** カスタムトレーニング済みのYOLOモデルとの容易な使用により、ドメイン特有のアプリケーションへの統合が可能です。 + +

+
+ +
+ 視聴: Ultralytics YOLOv8によるオブジェクト検出とトラッキング。 +

+ +## 実世界での応用例 + +| 交通 | 小売 | 水産業 | +|:---------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------:| +| ![車両のトラッキング](https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab) | ![人々のトラッキング](https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527) | ![魚のトラッキング](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142) | +| 車両トラッキング | 人々のトラッキング | 魚のトラッキング | + +## 一目でわかる機能 + +Ultralytics YOLOは、オブジェクト検出機能を拡張して、堅牢で多機能なオブジェクトトラッキングを提供します: + +- **リアルタイムトラッキング:** 高フレームレートのビデオでオブジェクトをシームレスに追跡します。 +- **複数トラッカーサポート:** 確立されたトラッキングアルゴリズムから選択できます。 +- **カスタマイズ可能なトラッカー構成:** 様々なパラメーターを調整することで特定の要件に合わせてトラッキングアルゴリズムを調整します。 + +## 利用可能なトラッカー + +Ultralytics YOLOは、次のトラッキングアルゴリズムをサポートしています。それらは、関連するYAML構成ファイル(たとえば`tracker=tracker_type.yaml`)を渡すことで有効にすることができます: + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - このトラッカーを有効にするには`botsort.yaml`を使用します。 +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - このトラッカーを有効にするには`bytetrack.yaml`を使用します。 + +デフォルトのトラッカーはBoT-SORTです。 + +## トラッキング + +ビデオストリームでトラッカーを実行するには、YOLOv8n、YOLOv8n-seg、YOLOv8n-poseなどのトレーニング済みのDetect、Segment、またはPoseモデルを使用します。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 公式またはカスタムモデルをロード + model = YOLO('yolov8n.pt') # 公式のDetectモデルをロード + model = YOLO('yolov8n-seg.pt') # 公式のSegmentモデルをロード + model = YOLO('yolov8n-pose.pt') # 公式のPoseモデルをロード + model = YOLO('path/to/best.pt') # カスタムトレーニング済みモデルをロード + + # モデルでトラッキングを実行 + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # デフォルトトラッカーでトラッキング + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # ByteTrackトラッカーでトラッキング + ``` + + === "CLI" + + ```bash + # コマンドラインインターフェースを使用して、様々なモデルでトラッキングを実行 + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # 公式のDetectモデル + yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # 公式のSegmentモデル + yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # 公式のPoseモデル + yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # カスタムトレーニング済みモデル + + # ByteTrackトラッカーを使用してトラッキング + yolo track model=path/to/best.pt tracker="bytetrack.yaml" + ``` + +上記の使用法に示されているように、トラッキングはビデオやストリーミングソースで実行されるすべてのDetect、Segment、およびPoseモデルで利用可能です。 + +## 構成 + +### トラッキング引数 + +トラッキング構成は、`conf`、`iou`、および`show`などのPredictモードと同じプロパティを共有します。さらなる構成については、[Predict](https://docs.ultralytics.com/modes/predict/)モデルページを参照してください。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # トラッキングパラメーターを構成し、トラッカーを実行 + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```bash + # コマンドラインインターフェースを使用してトラッキングパラメータを構成し、トラッカーを実行 + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### トラッカーの選択 + +Ultralyticsは、変更されたトラッカー構成ファイルの使用も可能にします。これを行うには、[ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)からトラッカー構成ファイル(たとえば`custom_tracker.yaml`)のコピーを作成し、必要に応じて任意の構成(`tracker_type`を除く)を変更します。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロードし、カスタム構成ファイルでトラッカーを実行 + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```bash + # コマンドラインインターフェースを使用して、カスタム構成ファイルでモデルをロードし、トラッカーを実行 + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +トラッキング引数の包括的なリストについては、[ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)ページを参照してください。 + +## Pythonの例 + +### トラックループの永続化 + +次は、OpenCV (`cv2`)とYOLOv8を使用してビデオフレームでオブジェクトトラッキングを実行するPythonスクリプトです。このスクリプトでは、必要なパッケージ(`opencv-python`および`ultralytics`)が既にインストールされていることが前提です。`persist=True`引数は、トラッカーに現在の画像またはフレームがシーケンスの次のものであり、現在の画像に前の画像からのトラックを期待することを伝えます。 + +!!! Example "トラッキングを伴うストリーミングforループ" + + ```python + import cv2 + from ultralytics import YOLO + + # YOLOv8モデルをロード + model = YOLO('yolov8n.pt') + + # ビデオファイルを開く + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # ビデオフレームをループする + while cap.isOpened(): + # ビデオからフレームを読み込む + success, frame = cap.read() + + if success: + # フレームでYOLOv8トラッキングを実行し、フレーム間でトラックを永続化 + results = model.track(frame, persist=True) + + # フレームに結果を可視化 + annotated_frame = results[0].plot() + + # 注釈付きのフレームを表示 + cv2.imshow("YOLOv8トラッキング", annotated_frame) + + # 'q'が押されたらループから抜ける + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # ビデオの終わりに到達したらループから抜ける + break + + # ビデオキャプチャオブジェクトを解放し、表示ウィンドウを閉じる + cap.release() + cv2.destroyAllWindows() + ``` + +ここでの変更は、単純な検出ではなくオブジェクトトラッキングを有効にする`model(frame)`から`model.track(frame)`への変更です。この変更されたスクリプトは、ビデオの各フレームでトラッカーを実行し、結果を視覚化し、ウィンドウに表示します。ループは'q'を押すことで終了できます。 + +## 新しいトラッカーの貢献 + +あなたはマルチオブジェクトトラッキングに精通しており、Ultralytics YOLOでトラッキングアルゴリズムをうまく実装または適応させたことがありますか?[ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)セクションへの貢献を私たちは歓迎します!あなたの実世界での応用例とソリューションは、トラッキングタスクに取り組むユーザーにとって非常に有益かもしれません。 + +このセクションへの貢献により、Ultralytics YOLOフレームワーク内で利用可能なトラッキングソリューションの範囲が広がり、コミュニティにとっての機能性とユーティリティーに新たな層が加わります。 + +ご自身の貢献を開始するには、プルリクエスト(PR)の送信に関する総合的な指示について我々の[貢献ガイド](https://docs.ultralytics.com/help/contributing)をご参照ください 🛠️。あなたが何をもたらすか私たちは期待しています! + +一緒に、Ultralytics YOLOエコシステムのトラッキング機能を高めましょう 🙏! diff --git a/docs/ja/modes/train.md b/docs/ja/modes/train.md new file mode 100644 index 0000000..c8d3642 --- /dev/null +++ b/docs/ja/modes/train.md @@ -0,0 +1,206 @@ +--- +comments: true +description: YOLOv8モデルをUltralytics YOLOを使用してトレーニングする手順についてのガイドで、シングルGPUとマルチGPUトレーニングの例を含む +keywords: Ultralytics, YOLOv8, YOLO, 物体検出, トレーニングモード, カスタムデータセット, GPUトレーニング, マルチGPU, ハイパーパラメータ, CLI例, Python例 +--- + +# Ultralytics YOLOを使ったモデルトレーニング + +Ultralytics YOLOエコシステムと統合 + +## はじめに + +ディープラーニングモデルのトレーニングは、データを与えてパラメーターを調整し、正確な予測を行えるようにするプロセスを含みます。UltralyticsのYOLOv8のトレーニングモードは、現代のハードウェアの能力をフルに活用して物体検出モデルを効果的かつ効率的にトレーニングするために設計されています。このガイドは、YOLOv8 の豊富な機能セットを使用して自身のモデルをトレーニングするために必要なすべての詳細をカバーすることを目的としています。 + +

+
+ +
+ 視聴: Google Colab でカスタムデータセットにYOLOv8モデルをトレーニングする方法。 +

+ +## トレーニングにUltralyticsのYOLOを選ぶ理由 + +YOLOv8のトレーニングモードを選択するいくつかの魅力的な理由を以下に示します: + +- **効率性:** シングルGPUセットアップであろうと複数のGPUにスケールする場合であろうと、あなたのハードウェアを最大限に活用します。 +- **汎用性:** COCO、VOC、ImageNetのような既存のデータセットに加え、カスタムデータセットでのトレーニングが可能です。 +- **ユーザーフレンドリー:** 直感的でありながら強力なCLIとPythonインターフェースを備え、簡単なトレーニング体験を提供します。 +- **ハイパーパラメータの柔軟性:** モデルのパフォーマンスを微調整するための幅広いカスタマイズ可能なハイパーパラメータ。 + +### トレーニングモードの主な特徴 + +以下に、YOLOv8のトレーニングモードのいくつかの注目すべき特徴を挙げます: + +- **自動データセットダウンロード:** COCO、VOC、ImageNetのような標準データセットは最初の使用時に自動的にダウンロードされます。 +- **マルチGPUサポート:** 複数のGPUにわたってトレーニングをスケールし、プロセスを迅速に行います。 +- **ハイパーパラメータの設定:** YAML設定ファイルやCLI引数を通じてハイパーパラメータを変更するオプション。 +- **可視化とモニタリング:** トレーニング指標のリアルタイム追跡と学習プロセスの可視化により、より良い洞察を得ます。 + +!!! Tip "ヒント" + + * YOLOv8のデータセット、例えばCOCO、VOC、ImageNetなどは、最初の使用時に自動的にダウンロードされます。例:`yolo train data=coco.yaml` + +## 使用例 + +COCO128データセットでYOLOv8nを100エポック、画像サイズ640でトレーニングする。トレーニングデバイスは、`device`引数を使って指定できます。引数が渡されない場合、利用可能であればGPU `device=0`が、そうでなければ`device=cpu`が利用されます。全てのトレーニング引数のリストは以下の引数セクションを参照してください。 + +!!! Example "シングルGPUとCPUトレーニング例" + + デバイスは自動的に決定されます。GPUが利用可能であればそれが使用され、そうでなければCPUでトレーニングが開始されます。 + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n.yaml') # YAMLから新しいモデルを作成 + model = YOLO('yolov8n.pt') # トレーニングにはおすすめの事前学習済みモデルをロード + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # YAMLからモデルを作成し、重みを転送 + + # モデルをトレーニング + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # YAMLから新しいモデルを作成し、最初からトレーニングを開始 + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # 事前学習済み*.ptモデルからトレーニングを開始 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # YAMLから新しいモデルを作成し、事前学習済みの重みを転送してトレーニングを開始 + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### マルチGPUトレーニング + +マルチGPUトレーニングは、利用可能なハードウェアリソースをより効率的に活用するために、トレーニングの負荷を複数のGPUに分散させることを可能にします。この機能はPython APIとコマンドラインインターフェィスの両方を通じて利用できます。マルチGPUトレーニングを有効にするには、使用したいGPUデバイスIDを指定します。 + +!!! Example "マルチGPUトレーニング例" + + 2つのGPUを使ってトレーニングするには、CUDAデバイス0と1を使い以下のコマンドを使用します。必要に応じて追加のGPUに拡張します。 + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n.pt') # トレーニングにはおすすめの事前学習済みモデルをロード + + # 2つのGPUでモデルをトレーニング + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "CLI" + + ```bash + # 事前学習済み*.ptモデルからGPU 0と1を使ってトレーニングを開始 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### Apple M1 および M2 MPSトレーニング + +AppleのM1およびM2チップに対するサポートがUltralyticsのYOLOモデルに統合されたことで、Appleの強力なMetal Performance Shaders(MPS)フレームワークを使用してデバイスでモデルをトレーニングすることが可能になりました。 MPSは、Appleのカスタムシリコン上での計算や画像処理タスクの高性能な実行方法を提供します。 + +AppleのM1およびM2チップでのトレーニングを有効にするには、トレーニングプロセスを開始する際に`mps`をデバイスとして指定する必要があります。以下はPythonおよびコマンドラインでこれを行う例です: + +!!! Example "MPSトレーニング例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n.pt') # トレーニングにはおすすめの事前学習済みモデルをロード + + # MPSを使ってモデルをトレーニング + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "CLI" + + ```bash + # MPSを使って、事前学習済み*.ptモデルからトレーニングを開始 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +M1/M2チップの計算能力を利用しながら、これによりトレーニングタスクのより効率的な処理が可能になります。より詳細なガイダンスや高度な設定オプションについては、[PyTorch MPSのドキュメント](https://pytorch.org/docs/stable/notes/mps.html)を参照してください。 + +## ロギング + +YOLOv8モデルをトレーニングする際、モデルのパフォーマンスを時間とともに追跡することが価値あることであると考えられます。これがロギングの役割になります。UltralyticsのYOLOは、Comet、ClearML、TensorBoardの3種類のロガーをサポートしています。 + +ロガーを使用するには、上記のコードスニペットからドロップダウンメニューを選択し、実行します。選択したロガーがインストールされ、初期化されます。 + +### Comet + +[Comet](https://www.comet.ml/site/)は、データサイエンティストや開発者が実験やモデルを追跡、比較、説明、最適化するためのプラットフォームです。リアルタイムメトリクスやコード差分、ハイパーパラメータの追跡などの機能を提供しています。 + +Cometを使用するには: + +!!! Example "例" + + === "Python" + ```python + # pip install comet_ml + import comet_ml + + comet_ml.init() + ``` + +Cometアカウントにサインインし、APIキーを取得してください。このキーを環境変数またはスクリプトに追加して、実験をログに記録する必要があります。 + +### ClearML + +[ClearML](https://www.clear.ml/)は、実験の追跡を自動化し、資源の効率的な共有を支援するオープンソースプラットフォームです。チームがML作業をより効率的に管理、実行、再現するのに役立ちます。 + +ClearMLを使用するには: + +!!! Example "例" + + === "Python" + ```python + # pip install clearml + import clearml + + clearml.browser_login() + ``` + +このスクリプトを実行した後、ブラウザでClearMLアカウントにサインインし、セッションを認証する必要があります。 + +### TensorBoard + +[TensorBoard](https://www.tensorflow.org/tensorboard)は、TensorFlowの視覚化ツールキットです。TensorFlowグラフを可視化し、グラフの実行に関する定量的メトリックをプロットし、それを通過する画像などの追加データを表示することができます。 + +[Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb)でTensorBoardを使用するには: + +!!! Example "例" + + === "CLI" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # 'runs'ディレクトリと置き換えてください + ``` + +TensorBoardをローカルで使用する場合は、http://localhost:6006/ で結果を確認できます。 + +!!! Example "例" + + === "CLI" + ```bash + tensorboard --logdir ultralytics/runs # 'runs'ディレクトリと置き換えてください + ``` + +これでTensorBoardがロードされ、トレーニングログが保存されているディレクトリを指定します。 + +ログを設定した後、モデルのトレーニングを進めてください。すべてのトレーニングメトリクスが選択したプラットフォームに自動的に記録され、これらのログをアクセスして、時間とともにモデルのパフォーマンスを監視したり、さまざまなモデルを比較したり、改善の余地を特定したりすることができます。 diff --git a/docs/ja/modes/val.md b/docs/ja/modes/val.md new file mode 100644 index 0000000..354ca37 --- /dev/null +++ b/docs/ja/modes/val.md @@ -0,0 +1,86 @@ +--- +comments: true +description: YOLOv8モデルのバリデーションガイド。バリデーション設定とメトリクスを使用してYOLOモデルのパフォーマンスを評価する方法をPythonとCLIの例で学びましょう。 +keywords: Ultralytics, YOLO ドキュメント, YOLOv8, バリデーション, モデル評価, ハイパーパラメータ, 正確性, メトリクス, Python, CLI +--- + +# Ultralytics YOLOによるモデルバリデーション + +Ultralytics YOLOのエコシステムと統合機能 + +## はじめに + +バリデーションは、訓練されたモデルの品質を評価するために機械学習パイプラインで重要なステップです。Ultralytics YOLOv8のValモードは、モデルのパフォーマンスを評価するための堅牢なツールとメトリクスを提供します。このガイドは、Valモードを効果的に使用して、モデルが正確で信頼できることを確認するための完全なリソースとなっています。 + +## Ultralytics YOLOでバリデーションを行う利点 + +YOLOv8のValモードを使用することの利点は以下の通りです: + +- **Precision(精度):** mAP50、mAP75、mAP50-95といった正確なメトリクスを取得し、モデルを総合的に評価します。 +- **Convenience(便利さ):** 訓練設定を記憶する組み込みの機能を利用して、バリデーションプロセスを簡素化します。 +- **Flexibility(柔軟性):** 同じデータセットや異なるデータセット、画像サイズでモデルをバリデーションできます。 +- **Hyperparameter Tuning(ハイパーパラメータチューニング):** バリデーションメトリクスを利用してモデルのパフォーマンスを向上させるためのチューニングを行います。 + +### Valモードの主要機能 + +YOLOv8のValモードにより提供される注目すべき機能は以下の通りです: + +- **Automated Settings(自動設定):** 訓練時の設定をモデルが記憶しているため、バリデーションが直感的に行えます。 +- **Multi-Metric Support(複数メトリックのサポート):** 精度メトリックの範囲に基づいてモデルを評価します。 +- **CLI and Python API:** バリデーションにコマンドラインインターフェイスもしくはPython APIのどちらかを選択できます。 +- **Data Compatibility(データ互換性):** 訓練段階で使われたデータセットはもちろん、カスタムデータセットともシームレスに動作します。 + +!!! Tip "Tip" + + * YOLOv8モデルは訓練設定を自動的に記憶しているので、`yolo val model=yolov8n.pt`や`model('yolov8n.pt').val()`だけで、元のデータセットと同じ画像サイズで簡単にバリデーション可能です。 + +## 使用例 + +COCO128データセット上で訓練済みのYOLOv8nモデルの精度を検証します。`model`はその訓練時の`data`及び引数をモデル属性として保持しているため、引数を渡す必要はありません。全てのエクスポート引数のリストについては、以下のArgumentsセクションをご覧ください。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロードする + model = YOLO('yolov8n.pt') # 公式モデルをロード + model = YOLO('path/to/best.pt') # カスタムモデルをロード + + # モデルをバリデーションする + metrics = model.val() # 引数は必要なし、データセットと設定は記憶持ち + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # 各カテゴリのmap50-95が含まれたリスト + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # 公式モデルをバリデーション + yolo detect val model=path/to/best.pt # カスタムモデルをバリデーション + ``` + +## 引数 + +YOLOモデルに対するバリデーション設定は、バリデーションデータセット上でのモデルのパフォーマンスを評価するために使用されるさまざまなハイパーパラメータと設定を指します。これらの設定は、モデルのパフォーマンス、スピード、そして精度に影響を与える可能性があります。一般的なYOLOのバリデーション設定には、バッチサイズや訓練中のバリデーション頻度、モデルのパフォーマンスを評価するために使用されるメトリックが含まれます。バリデーションプロセスに影響を与えるかもしれない他の要素には、バリデーションデータセットのサイズと構成、およびモデルが使用されている具体的なタスクなどがあります。モデルがバリデーションデータセット上でうまく動作していることを確認し、過学習を検出して防ぐために、これらの設定を慎重にチューニングして実験することが重要です。 + +| キー | 値 | 説明 | +|---------------|---------|--------------------------------------------------| +| `data` | `None` | データファイルへのパス、例: coco128.yaml | +| `imgsz` | `640` | 入力画像のサイズを整数で | +| `batch` | `16` | バッチごとの画像数(AutoBatchの場合は-1) | +| `save_json` | `False` | JSONファイルに結果を保存 | +| `save_hybrid` | `False` | ハイブリッドバージョンのラベルを保存(ラベル+追加の予測) | +| `conf` | `0.001` | 検出のためのオブジェクト信頼度閾値 | +| `iou` | `0.6` | NMS用の交差オーバーユニオン(IoU)閾値 | +| `max_det` | `300` | 画像あたりの最大検出数 | +| `half` | `True` | 半精度(FP16)を使用する | +| `device` | `None` | 実行デバイス、例: cuda device=0/1/2/3やdevice=cpu | +| `dnn` | `False` | ONNX推論用のOpenCV DNNを使用 | +| `plots` | `False` | 訓練中にプロットを表示 | +| `rect` | `False` | 各バッチが最小限のパディングで整理された矩形バリデーション | +| `split` | `val` | バリデーションに使用するデータセットのスプリット、例: 'val'、'test'、'train' | +| | | | diff --git a/docs/ja/quickstart.md b/docs/ja/quickstart.md new file mode 100644 index 0000000..35c737a --- /dev/null +++ b/docs/ja/quickstart.md @@ -0,0 +1,198 @@ +--- +comments: true +description: Ultralyticsのpip、conda、git、Dockerを使用した様々なインストール方法を探索し、コマンドラインインターフェースまたはPythonプロジェクト内でのUltralyticsの使用方法を学びます。 +keywords: Ultralyticsインストール, pipインストールUltralytics, DockerインストールUltralytics, Ultralyticsコマンドラインインターフェース, Ultralytics Pythonインターフェース +--- + +## Ultralyticsのインストール + +Ultralyticsはpip、conda、Dockerを含むさまざまなインストール方法を提供しています。最新の安定版リリースである`ultralytics` pipパッケージを通じてYOLOv8をインストールするか、最新バージョンを取得するために[Ultralytics GitHubリポジトリ](https://github.com/ultralytics/ultralytics)をクローンします。Dockerは、ローカルインストールを回避し、孤立したコンテナ内でパッケージを実行するために使用できます。 + +!!! Example "インストール" + + === "Pipでのインストール(推奨)" + pipを使用して`ultralytics`パッケージをインストールするか、`pip install -U ultralytics`を実行して既存のインストールをアップデートします。`ultralytics`パッケージの詳細については、Python Package Index(PyPI)を参照してください: [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/)。 + + [![PyPI version](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # PyPIからultralyticsパッケージをインストール + pip install ultralytics + ``` + + GitHubの[リポジトリ](https://github.com/ultralytics/ultralytics)から直接`ultralytics`パッケージをインストールすることもできます。これは、最新の開発版が必要な場合に便利かもしれません。システムにGitコマンドラインツールがインストールされている必要があります。`@main`コマンドは`main`ブランチをインストールし、別のブランチ、例えば`@my-branch`に変更したり、`main`ブランチにデフォルトするために完全に削除することができます。 + + ```bash + # GitHubからultralyticsパッケージをインストール + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + + === "Condaでのインストール" + Condaはpipの代わりのパッケージマネージャーで、インストールにも使用できます。より詳細はAnacondaを参照してください [https://anaconda.org/conda-forge/ultralytics](https://anaconda.org/conda-forge/ultralytics)。Condaパッケージを更新するためのUltralyticsフィードストックリポジトリはこちらです [https://github.com/conda-forge/ultralytics-feedstock/](https://github.com/conda-forge/ultralytics-feedstock/)。 + + + [![Conda Recipe](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # Condaを使用してultralyticsパッケージをインストール + conda install -c conda-forge ultralytics + ``` + + !!! Note "ノート" + + CUDA環境でインストールする場合、パッケージマネージャーが競合を解決できるようにするため、`ultralytics`、`pytorch`、`pytorch-cuda`を同じコマンドで一緒にインストールするのがベストプラクティスです。または、CPU専用の`pytorch`パッケージに必要な場合は上書きするように`pytorch-cuda`を最後にインストールします。 + ```bash + # Condaを使用して一緒にすべてのパッケージをインストール + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### Conda Dockerイメージ + + UltralyticsのConda Dockerイメージも[DockerHub](https://hub.docker.com/r/ultralytics/ultralytics)から利用可能です。これらのイメージは[Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/)に基づいており、Conda環境で`ultralytics`を使用する簡単な方法です。 + + ```bash + # イメージ名を変数として設定 + t=ultralytics/ultralytics:latest-conda + + # Docker Hubから最新のultralyticsイメージをプル + sudo docker pull $t + + # すべてのGPUを持つコンテナでultralyticsイメージを実行 + sudo docker run -it --ipc=host --gpus all $t # すべてのGPU + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # GPUを指定 + ``` + + === "Gitクローン" + 開発への貢献に興味がある場合や、最新のソースコードで実験したい場合は、`ultralytics`リポジトリをクローンしてください。クローンした後、ディレクトリに移動し、pipを使って編集可能モード`-e`でパッケージをインストールします。 + ```bash + # ultralyticsリポジトリをクローン + git clone https://github.com/ultralytics/ultralytics + + # クローンしたディレクトリに移動 + cd ultralytics + + # 開発用に編集可能モードでパッケージをインストール + pip install -e . + ``` + +必要な依存関係のリストについては、`ultralytics`の[requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt)ファイルを参照してください。上記の全ての例では、必要な依存関係を全てインストールします。 + +

+
+ +
+ Watch: Ultralytics YOLO Quick Start Guide +

+ +!!! Tip "ヒント" + + PyTorchの要件はオペレーティングシステムとCUDAの要件によって異なるため、[https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally)に従って最初にPyTorchをインストールすることをお勧めします。 + + + PyTorch Installation Instructions + + +## CLIでUltralyticsを使用 + +Ultralyticsコマンドラインインターフェース(CLI)を使用すると、Python環境がなくても単一の行のコマンドを簡単に実行できます。CLIはカスタマイズもPythonコードも必要ありません。単純にすべてのタスクを`yolo`コマンドでターミナルから実行することができます。コマンドラインからYOLOv8を使用する方法について詳しくは、[CLIガイド](/../usage/cli.md)を参照してください。 + +!!! Example "例" + + === "構文" + + Ultralyticsの`yolo`コマンドは以下の構文を使用します: + ```bash + yolo TASK MODE ARGS + + ここで TASK(オプション)は[detect, segment, classify]のうちの1つ + MODE(必須)は[train, val, predict, export, track]のうちの1つ + ARGS(オプション)はデフォルトを上書きする任意の数のカスタム'arg=value'ペアです。 + ``` + full [Configuration Guide](/../usage/cfg.md)または`yolo cfg`で全てのARGSを確認してください + + === "トレーニング" + + 10エポックにわたって初期学習率0.01で検出モデルをトレーニング + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "予測" + + 画像サイズ320で事前トレーニングされたセグメンテーションモデルを使用してYouTubeビデオを予測: + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "検証" + + バッチサイズ1および画像サイズ640で事前トレーニングされた検出モデルを検証する: + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "エクスポート" + + 画像サイズ224 x 128でYOLOv8n分類モデルをONNX形式にエクスポート(TASKは不要) + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "特殊" + + バージョンを確認したり、設定を表示したり、チェックを行ったりするための特別なコマンドを実行します: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "警告" + + 引数は`arg=val`ペアとして渡され、`=`記号で分割され、ペア間にスペース` `が必要です。引数のプレフィックスに`--`や引数間にカンマ`,`を使用しないでください。 + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25`   ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25`   ❌ + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25`   ❌ + +[CLIガイド](/../usage/cli.md){ .md-button } + +## PythonでUltralyticsを使用 + +YOLOv8のPythonインターフェースを使用すると、Pythonプロジェクトにシームレスに統合し、モデルをロード、実行、出力を処理することが可能です。簡単さと使いやすさを念頭に設計されたPythonインターフェースにより、ユーザーは素早くプロジェクトに物体検出、セグメンテーション、分類を実装することができます。このように、YOLOv8のPythonインターフェースは、これらの機能をPythonプロジェクトに取り入れたいと考えている方にとって貴重なツールです。 + +たとえば、ユーザーはモデルをロードして、トレーニングし、検証セットでのパフォーマンスを評価し、ONNX形式にエクスポートするまでの一連の処理を数行のコードで行うことができます。YOLOv8をPythonプロジェクトで使用する方法について詳しくは、[Pythonガイド](/../usage/python.md)を参照してください。 + +!!! Example "例" + + ```python + from ultralytics import YOLO + + # スクラッチから新しいYOLOモデルを作成 + model = YOLO('yolov8n.yaml') + + # 事前トレーニドされたYOLOモデルをロード(トレーニングに推奨) + model = YOLO('yolov8n.pt') + + # 'coco128.yaml'データセットを使用して3エポックでモデルをトレーニング + results = model.train(data='coco128.yaml', epochs=3) + + # モデルのパフォーマンスを検証セットで評価 + results = model.val() + + # モデルを使用して画像で物体検出を実行 + results = model('https://ultralytics.com/images/bus.jpg') + + # モデルをONNX形式にエクスポート + success = model.export(format='onnx') + ``` + +[Pythonガイド](/../usage/python.md){.md-button .md-button--primary} diff --git a/docs/ja/tasks/classify.md b/docs/ja/tasks/classify.md new file mode 100644 index 0000000..def346f --- /dev/null +++ b/docs/ja/tasks/classify.md @@ -0,0 +1,172 @@ +--- +comments: true +description: YOLOv8 分類モデルについての画像分類。事前トレーニングされたモデルのリストとモデルのトレーニング、検証、予測、エクスポート方法の詳細情報を学びます。 +keywords: Ultralytics, YOLOv8, 画像分類, 事前トレーニングされたモデル, YOLOv8n-cls, トレーニング, 検証, 予測, モデルエクスポート +--- + +# 画像分類 + +画像分類の例 + +画像分類は3つのタスクの中で最も単純で、1枚の画像をあらかじめ定義されたクラスのセットに分類します。 + +画像分類器の出力は単一のクラスラベルと信頼度スコアです。画像がどのクラスに属しているかのみを知る必要があり、クラスのオブジェクトがどこにあるか、その正確な形状は必要としない場合に画像分類が役立ちます。 + +!!! Tip "ヒント" + + YOLOv8 分類モデルは `-cls` 接尾辞を使用します。例: `yolov8n-cls.pt` これらは [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) で事前にトレーニングされています。 + +## [モデル](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +ここに事前トレーニングされた YOLOv8 分類モデルが表示されています。検出、セグメンテーション、ポーズモデルは [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) データセットで事前にトレーニングされていますが、分類モデルは [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) で事前にトレーニングされています。 + +[モデル](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) は初回使用時に Ultralytics の最新 [リリース](https://github.com/ultralytics/assets/releases) から自動的にダウンロードされます。 + +| モデル | サイズ
(ピクセル) | 正確性
トップ1 | 正確性
トップ5 | スピード
CPU ONNX
(ms) | スピード
A100 TensorRT
(ms) | パラメータ
(M) | FLOPs
(B) at 640 | +|----------------------------------------------------------------------------------------------|--------------------|------------------|------------------|-------------------------------|------------------------------------|-------------------|--------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- **正確性** の値は [ImageNet](https://www.image-net.org/) データセットの検証セットでのモデルの正確性です。 +
再現するには `yolo val classify data=path/to/ImageNet device=0` +- **スピード** は [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) インスタンスを使用して ImageNet 検証画像を平均化したものです。 +
再現するには `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` + +## トレーニング + +画像サイズ64で100エポックにわたってMNIST160データセットにYOLOv8n-clsをトレーニングします。利用可能な引数の完全なリストについては、[設定](/../usage/cfg.md) ページを参照してください。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロードする + model = YOLO('yolov8n-cls.yaml') # YAMLから新しいモデルをビルド + model = YOLO('yolov8n-cls.pt') # 事前にトレーニングされたモデルをロード(トレーニングに推奨) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # YAMLからビルドしてウェイトを転送 + + # モデルをトレーニングする + results = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # YAMLから新しいモデルをビルドし、ゼロからトレーニングを開始 + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # 事前にトレーニングされた *.pt モデルからトレーニングを開始 + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # YAMLから新しいモデルをビルドし、事前トレーニングされたウェイトを転送してトレーニングを開始 + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### データセットフォーマット + +YOLO分類データセットのフォーマットの詳細は [データセットガイド](../../../datasets/classify/index.md) にあります。 + +## 検証 + +MNIST160データセットでトレーニング済みのYOLOv8n-clsモデルの正確性を検証します。引数は必要ありません。`model` はトレーニング時の `data` および引数をモデル属性として保持しています。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロードする + model = YOLO('yolov8n-cls.pt') # 公式モデルをロード + model = YOLO('path/to/best.pt') # カスタムモデルをロード + + # モデルを検証する + metrics = model.val() # 引数不要、データセットと設定は記憶されている + metrics.top1 # トップ1の正確性 + metrics.top5 # トップ5の正確性 + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # 公式モデルを検証 + yolo classify val model=path/to/best.pt # カスタムモデルを検証 + ``` + +## 予測 + +トレーニング済みのYOLOv8n-clsモデルを使用して、画像に対する予測を実行します。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロードする + model = YOLO('yolov8n-cls.pt') # 公式モデルをロード + model = YOLO('path/to/best.pt') # カスタムモデルをロード + + # モデルで予測する + results = model('https://ultralytics.com/images/bus.jpg') # 画像で予測 + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # 公式モデルで予測 + yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # カスタムモデルで予測 + ``` + +`predict` モードの完全な詳細は [予測](https://docs.ultralytics.com/modes/predict/) ページを参照してください。 + +## エクスポート + +YOLOv8n-clsモデルをONNX、CoreMLなどの異なる形式にエクスポートします。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロードする + model = YOLO('yolov8n-cls.pt') # 公式モデルをロード + model = YOLO('path/to/best.pt') # カスタムトレーニングされたモデルをロード + + # モデルをエクスポートする + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # 公式モデルをエクスポート + yolo export model=path/to/best.pt format=onnx # カスタムトレーニングされたモデルをエクスポート + ``` + +利用可能な YOLOv8-cls エクスポート形式は以下の表にあります。エクスポートされたモデルで直接予測または検証が可能です、例: `yolo predict model=yolov8n-cls.onnx`。エクスポート完了後、モデルの使用例が表示されます。 + +| 形式 | `format` 引数 | モデル | メタデータ | 引数 | +|--------------------------------------------------------------------|---------------|-------------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +`export` の詳細は [エクスポート](https://docs.ultralytics.com/modes/export/) ページを参照してください。 diff --git a/docs/ja/tasks/detect.md b/docs/ja/tasks/detect.md new file mode 100644 index 0000000..3bc24c1 --- /dev/null +++ b/docs/ja/tasks/detect.md @@ -0,0 +1,184 @@ +--- +comments: true +description: Ultralyticsの公式ドキュメント YOLOv8。モデルのトレーニング、検証、予測、そして様々なフォーマットでのモデルエクスポート方法を学ぶ。詳細なパフォーマンス統計も含む。 +keywords: YOLOv8, Ultralytics, 物体検出, 事前訓練済みモデル, トレーニング, 検証, 予測, モデルエクスポート, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# 物体検出 + +物体検出の例 + +物体検出とは、画像やビデオストリーム内の物体の位置とクラスを特定するタスクです。 + +物体検出器の出力は、画像内の物体を囲む一連のバウンディングボックスであり、各ボックスにはクラスラベルと信頼度スコアが付けられます。シーン内の関心対象を識別する必要があるが、その物体の正確な位置や形状までは必要ない場合に、物体検出が適しています。 + +

+
+ +
+ 視聴する: Ultralyticsの事前訓練済みYOLOv8モデルによる物体検出。 +

+ +!!! Tip "ヒント" + + YOLOv8 Detectモデルは、デフォルトのYOLOv8モデル、つまり`yolov8n.pt`であり、[COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)で事前訓練されています。 + +## [モデル](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +事前訓練されたYOLOv8 Detectモデルがこちらに示されます。Detect, Segment, Poseモデルは[COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)データセットで、Classifyモデルは[ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml)データセットで事前訓練されています。 + +[モデル](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)は、最初の使用時にUltralyticsの最新の[リリース](https://github.com/ultralytics/assets/releases)から自動的にダウンロードされます。 + +| モデル | サイズ
(ピクセル) | mAPval
50-95 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | パラメータ数
(M) | FLOPs
(B) | +|--------------------------------------------------------------------------------------|--------------------|----------------------|-----------------------------|----------------------------------|--------------------|-------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- **mAPval** の値は[COCO val2017](http://cocodataset.org)データセットにおいて、単一モデル単一スケールでのものです。 +
再現方法: `yolo val detect data=coco.yaml device=0` +- **速度** は[Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)インスタンスを使用してCOCO val画像に対して平均化されたものです。 +
再現方法: `yolo val detect data=coco128.yaml batch=1 device=0|cpu` + +## トレーニング + +YOLOv8nを画像サイズ640でCOCO128データセットに対して100エポックでトレーニングします。使用可能な引数の完全なリストについては、[設定](/../usage/cfg.md)ページをご覧ください。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロードする + model = YOLO('yolov8n.yaml') # YAMLから新しいモデルを構築 + model = YOLO('yolov8n.pt') # 事前訓練済みモデルをロード(トレーニングに推奨) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # YAMLから構築し、重みを転送 + + # モデルをトレーニングする + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # YAMLから新しいモデルを作成し、ゼロからトレーニングを開始 + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # 事前訓練済みの*.ptモデルからトレーニングを開始 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # YAMLから新しいモデルを作成し、事前訓練済みの重みを転送してトレーニングを開始 + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### データセットの形式 + +YOLO検出データセットの形式の詳細は、[データセットガイド](../../../datasets/detect/index.md)に記載されています。他の形式(COCO等)からYOLO形式に既存のデータセットを変換するには、Ultralyticsの[JSON2YOLO](https://github.com/ultralytics/JSON2YOLO)ツールをご利用ください。 + +## 検証 + +トレーニングされたYOLOv8nモデルの精度をCOCO128データセットで検証します。引数は不要で、モデルはトレーニングの`data`と引数をモデル属性として保持しています。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロードする + model = YOLO('yolov8n.pt') # 公式モデルをロード + model = YOLO('パス/ベスト.pt') # カスタムモデルをロード + + # モデルを検証する + metrics = model.val() # 引数不要、データセットと設定は記憶されている + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # 各カテゴリのmap50-95を含むリスト + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # 公式モデルを検証 + yolo detect val model=パス/ベスト.pt # カスタムモデルを検証 + ``` + +## 予測 + +トレーニングされたYOLOv8nモデルを使用して画像の予測を実行します。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロードする + model = YOLO('yolov8n.pt') # 公式モデルをロード + model = YOLO('パス/ベスト.pt') # カスタムモデルをロード + + # モデルで予測 + results = model('https://ultralytics.com/images/bus.jpg') # 画像の予測実行 + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # 公式モデルで予測 + yolo detect predict model=パス/ベスト.pt source='https://ultralytics.com/images/bus.jpg' # カスタムモデルで予測 + ``` + +`predict`モードの詳細は、[Predict](https://docs.ultralytics.com/modes/predict/)ページで全て見ることができます。 + +## エクスポート + +YOLOv8nモデルをONNX、CoreMLなどの異なるフォーマットにエクスポートします。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n.pt') # 公式モデルをロード + model = YOLO('パス/ベスト.pt') # カスタムトレーニングモデルをロード + + # モデルをエクスポート + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # 公式モデルをエクスポート + yolo export model=パス/ベスト.pt format=onnx # カスタムトレーニングモデルをエクスポート + ``` + +YOLOv8エクスポート可能なフォーマットのテーブルは以下です。エクスポート完了後に、エクスポートされたモデルで直接予測または検証が可能です。つまり、`yolo predict model=yolov8n.onnx` です。使用例はエクスポート完了後にモデルに表示されます。 + +| フォーマット | `format`引数 | モデル | メタデータ | 引数 | +|--------------------------------------------------------------------|---------------|---------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +`export`の詳細は、[Export](https://docs.ultralytics.com/modes/export/)ページで全て見ることができます。 diff --git a/docs/ja/tasks/index.md b/docs/ja/tasks/index.md new file mode 100644 index 0000000..bc5d3b7 --- /dev/null +++ b/docs/ja/tasks/index.md @@ -0,0 +1,55 @@ +--- +comments: true +description: YOLOv8が実行できる基本的なコンピュータービジョンタスクについて学び、検出、セグメンテーション、分類、ポーズ認識がAIプロジェクトでどのように使用されるかを理解します。 +keywords: Ultralytics, YOLOv8, 検出, セグメンテーション, 分類, ポーズ推定, AIフレームワーク, コンピュータービジョンタスク +--- + +# Ultralytics YOLOv8タスク + +
+Ultralytics YOLOがサポートするタスク + +YOLOv8は、複数のコンピュータービジョン**タスク**をサポートするAIフレームワークです。このフレームワークは、[検出](detect.md)、[セグメンテーション](segment.md)、[分類](classify.md)、及び[ポーズ](pose.md)推定を実行するために使用できます。これらのタスクはそれぞれ異なる目的と用途を持っています。 + +!!! Note "ノート" + + 🚧 当社の多言語ドキュメントは現在建設中であり、改善のために一生懸命作業を行っています。ご理解いただきありがとうございます!🙏 + +

+
+ +
+ 視聴する: Ultralytics YOLOタスクの探索:オブジェクト検出、セグメンテーション、トラッキング、ポーズ推定。 +

+ +## [検出](detect.md) + +検出はYOLOv8がサポートする基本的なタスクです。それは画像やビデオフレーム内のオブジェクトを検出し、周囲に境界ボックスを描くことを含みます。検出されたオブジェクトはその特徴に基づいて異なるカテゴリーに分類されます。YOLOv8は一枚の画像やビデオフレームに複数のオブジェクトを高い精度と速度で検出することができます。 + +[検出例](detect.md){ .md-button } + +## [セグメンテーション](segment.md) + +セグメンテーションは、画像の内容に基づいて画像を異なる領域に分割するタスクです。各領域はその内容に基づいてラベルが割り当てられます。このタスクは、画像分割や医療画像処理などのアプリケーションにおいて有用です。YOLOv8はU-Netアーキテクチャのバリエーションを使用してセグメンテーションを実行します。 + +[セグメンテーション例](segment.md){ .md-button } + +## [分類](classify.md) + +分類は、画像を異なるカテゴリーに分類するタスクです。YOLOv8は画像の内容に基づいて画像を分類するために使用できます。それはEfficientNetアーキテクチャのバリエーションを使用して分類を実行します。 + +[分類例](classify.md){ .md-button } + +## [ポーズ](pose.md) + +ポーズ/キーポイント検出は、画像やビデオフレーム内の特定の点を検出するタスクです。これらの点はキーポイントと呼ばれ、動きやポーズ推定を追跡するために使用されます。YOLOv8は高い精度と速度で画像やビデオフレーム内のキーポイントを検出することができます。 + +[ポーズ例](pose.md){ .md-button } + +## 結論 + +YOLOv8は、検出、セグメンテーション、分類、キーポイント検出を含む複数のタスクをサポートしています。これらのタスクはそれぞれ異なる目的と用途を持っています。これらのタスクの違いを理解することにより、コンピュータービジョンアプリケーションに適切なタスクを選択することができます。 diff --git a/docs/ja/tasks/pose.md b/docs/ja/tasks/pose.md new file mode 100644 index 0000000..d4acc94 --- /dev/null +++ b/docs/ja/tasks/pose.md @@ -0,0 +1,185 @@ +--- +comments: true +description: Ultralytics YOLOv8を使用してポーズ推定タスクを行う方法を学びます。事前トレーニング済みのモデルを見つけ、トレーニング、検証、予測、独自のエクスポートを行います。 +keywords: Ultralytics, YOLO, YOLOv8, ポーズ推定, キーポイント検出, 物体検出, 事前トレーニング済みモデル, 機械学習, 人工知能 +--- + +# ポーズ推定 + +ポーズ推定例 + +ポーズ推定は、通常キーポイントとして参照される画像内の特定の点の位置を識別するタスクです。キーポイントは、関節、ランドマーク、またはその他の特徴的な特徴など、対象物のさまざまな部分を表すことができます。キーポイントの位置は、通常2Dの `[x, y]` または3D `[x, y, visible]` 座標のセットとして表されます。 + +ポーズ推定モデルの出力は、画像内のオブジェクト上のキーポイントを表す一連の点であり、通常は各点の信頼スコアを伴います。ポーズ推定は、シーン内のオブジェクトの特定の部分と、それらが互いに対して位置する場所を特定する必要がある場合に適しています。 + +

+
+ +
+ 視聴: Ultralytics YOLOv8によるポーズ推定。 +

+ +!!! Tip "ヒント" + + YOLOv8 _pose_ モデルは `-pose` サフィックスを使用します。例:`yolov8n-pose.pt`。これらのモデルは [COCOキーポイント](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) データセットでトレーニングされ、多様なポーズ推定タスクに適しています。 + +## [モデル](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +YOLOv8事前トレーニング済みポーズモデルはこちらです。Detect, Segment, Poseモデルは [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) データセットで、Classifyモデルは [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) データセットで事前トレーニングされています。 + +[モデル](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)は最新のUltralytics [リリース](https://github.com/ultralytics/assets/releases)から最初の使用時に自動的にダウンロードされます。 + +| モデル | サイズ
(ピクセル) | mAPポーズ
50-95 | mAPポーズ
50 | 速度
CPU ONNX
(ミリ秒) | 速度
A100 TensorRT
(ミリ秒) | パラメータ
(M) | FLOPs
(B) | +|------------------------------------------------------------------------------------------------------|--------------------|----------------------|-------------------|------------------------------|-----------------------------------|-------------------|-------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +- **mAPval** の値は、[COCO Keypoints val2017](http://cocodataset.org)データセットでの単一モデル単一スケールに対するものです。 +
再現方法 `yolo val pose data=coco-pose.yaml device=0` +- **速度** は [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)インスタンスを使用したCOCO val画像の平均です。 +
再現方法 `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` + +## トレーニング + +COCO128-poseデータセットでYOLOv8-poseモデルをトレーニングします。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n-pose.yaml') # 新しいモデルをYAMLからビルド + model = YOLO('yolov8n-pose.pt') # 事前トレーニング済みのモデルをロード(トレーニング用に推奨) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # YAMLからビルドして重みを転送 + + # モデルのトレーニング + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # YAMLから新しいモデルをビルドし、最初からトレーニングを開始 + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # 事前トレーニング済みの*.ptモデルからトレーニングを開始 + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # YAMLから新しいモデルをビルド、事前トレーニング済みの重みを転送してトレーニングを開始 + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### データセットフォーマット + +YOLOポーズデータセットフォーマットの詳細は、[データセットガイド](../../../datasets/pose/index.md)に記載されています。既存のデータセットを他のフォーマット(COCOなど)からYOLOフォーマットに変換するには、Ultralyticsの[JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) ツールをご使用ください。 + +## Val + +COCO128-poseデータセットでトレーニングされたYOLOv8n-poseモデルの精度を検証します。引数は必要なく、`model`にはトレーニング`data`と引数がモデル属性として保持されます。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n-pose.pt') # 公式モデルをロード + model = YOLO('path/to/best.pt') # カスタムモデルをロード + + # モデルを検証 + metrics = model.val() # データセットや設定は記録されているため引数は不要 + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # 各カテゴリのmap50-95が含まれるリスト + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # 公式モデルを検証 + yolo pose val model=path/to/best.pt # カスタムモデルを検証 + ``` + +## Predict + +トレーニング済みのYOLOv8n-poseモデルを使用して画像を予測します。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n-pose.pt') # 公式モデルをロード + model = YOLO('path/to/best.pt') # カスタムモデルをロード + + # モデルで予測 + results = model('https://ultralytics.com/images/bus.jpg') # 画像に予測を実行 + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # 公式モデルで予測 + yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # カスタムモデルで予測 + ``` + +`predict`モードの詳細を[Predict](https://docs.ultralytics.com/modes/predict/)ページでご覧いただけます。 + +## Export + +YOLOv8n PoseモデルをONNX、CoreMLなどの異なるフォーマットにエクスポートします。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n-pose.pt') # 公式モデルをロード + model = YOLO('path/to/best.pt') # カスタムトレーニング済みモデルをロード + + # モデルをエクスポート + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # 公式モデルをエクスポート + yolo export model=path/to/best.pt format=onnx # カスタムトレーニング済みモデルをエクスポート + ``` + +利用可能なYOLOv8-poseエクスポートフォーマットは以下の表に示されており、エクスポート完了後にお使いのモデルに関する使用例が示されます。 + +| フォーマット | `format`引数 | モデル | メタデータ | 引数 | +|--------------------------------------------------------------------|---------------|--------------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half` | + +`export`の詳細は[Export](https://docs.ultralytics.com/modes/export/)ページでご覧いただけます。 diff --git a/docs/ja/tasks/segment.md b/docs/ja/tasks/segment.md new file mode 100644 index 0000000..bc6eae3 --- /dev/null +++ b/docs/ja/tasks/segment.md @@ -0,0 +1,186 @@ +--- +comments: true +description: Ultralytics YOLOを使用してインスタンスセグメンテーションモデルを使いこなす方法を学びましょう。トレーニング、バリデーション、画像予測、モデルエクスポートに関する指示が含まれています。 +keywords: yolov8, インスタンスセグメンテーション, Ultralytics, COCOデータセット, 画像セグメンテーション, オブジェクト検出, モデルトレーニング, モデルバリデーション, 画像予測, モデルエクスポート +--- + +# インスタンスセグメンテーション + +インスタンスセグメンテーションの例 + +インスタンスセグメンテーションはオブジェクト検出を一歩進めており、画像内の個々のオブジェクトを識別し、それらを画像の残りの部分からセグメント化します。 + +インスタンスセグメンテーションモデルの出力は、画像内の各オブジェクトを概説するマスクまたは輪郭のセットであり、各オブジェクトにはクラスラベルと信頼スコアが含まれています。オブジェクトの位置だけでなく、その正確な形状を知る必要がある場合に、インスタンスセグメンテーションが役立ちます。 + +

+
+ +
+ 視聴: Pythonで事前トレーニング済みのUltralytics YOLOv8モデルでセグメンテーションを実行する。 +

+ +!!! Tip "ヒント" + + YOLOv8セグメントモデルは`-seg`サフィックスを使用し、つまり`yolov8n-seg.pt`などは[COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)で事前トレーニングされています。 + +## [モデル](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +ここでは、事前トレーニングされたYOLOv8セグメントモデルが示されています。Detect、Segment、Poseモデルは[COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)データセットで事前トレーニングされている一方、Classifyモデルは[ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml)データセットで事前トレーニングされています。 + +[モデル](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)は初回使用時に最新のUltralytics[リリース](https://github.com/ultralytics/assets/releases)から自動的にダウンロードされます。 + +| モデル | サイズ
(ピクセル) | mAPbox
50-95 | mAPmask
50-95 | スピード
CPU ONNX
(ms) | スピード
A100 TensorRT
(ms) | パラメータ
(M) | FLOPs
(B) | +|----------------------------------------------------------------------------------------------|--------------------|----------------------|-----------------------|-------------------------------|------------------------------------|-------------------|-------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- **mAPval**の値は[COCO val2017](http://cocodataset.org)データセットでの単一モデル単一スケールの値です。 +
再現するには `yolo val segment data=coco.yaml device=0` +- **スピード**は[Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)インスタンスを使用してCOCO val画像で平均化されます。 +
再現するには `yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu` + +## トレーニング + +COCO128-segデータセットで、画像サイズ640でYOLOv8n-segを100エポックトレーニングします。利用可能な全ての引数については、[コンフィギュレーション](/../usage/cfg.md)ページを参照してください。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n-seg.yaml') # YAMLから新しいモデルをビルド + model = YOLO('yolov8n-seg.pt') # 事前トレーニングされたモデルをロード(トレーニングに推奨) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # YAMLからビルドしウェイトを移行 + + # モデルをトレーニング + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # YAMLから新しいモデルをビルドしゼロからトレーニングを開始 + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # 事前トレーニング済みの*.ptモデルからトレーニングを開始 + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # YAMLから新しいモデルをビルドし、事前トレーニング済みウェイトを移行してトレーニングを開始 + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### データセットフォーマット + +YOLOセグメンテーションデータセットのフォーマットの詳細は、[データセットガイド](../../../datasets/segment/index.md)で見つけることができます。既存のデータセットを他のフォーマット(例えばCOCOなど)からYOLOフォーマットに変換するには、Ultralyticsの[JSON2YOLO](https://github.com/ultralytics/JSON2YOLO)ツールを使用してください。 + +## 評価 + +訓練されたYOLOv8n-segモデルの精度をCOCO128-segデータセットで検証します。引数は必要ありません、なぜなら`model`はモデル属性としてトレーニング`data`と引数を保持しているからです。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n-seg.pt') # 公式モデルをロード + model = YOLO('path/to/best.pt') # カスタムモデルをロード + + # モデルを評価 + metrics = model.val() # 引数は必要なし、データセットと設定は記憶している + metrics.box.map # map50-95(B) + metrics.box.map50 # map50(B) + metrics.box.map75 # map75(B) + metrics.box.maps # 各カテゴリのmap50-95(B)のリスト + metrics.seg.map # map50-95(M) + metrics.seg.map50 # map50(M) + metrics.seg.map75 # map75(M) + metrics.seg.maps # 各カテゴリのmap50-95(M)のリスト + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # 公式モデルを評価 + yolo segment val model=path/to/best.pt # カスタムモデルを評価 + ``` + +## 予測 + +訓練されたYOLOv8n-segモデルを使用して画像の予測を実行します。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n-seg.pt') # 公式モデルをロード + model = YOLO('path/to/best.pt') # カスタムモデルをロード + + # モデルで予測 + results = model('https://ultralytics.com/images/bus.jpg') # 画像で予測 + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # 公式モデルで予測 + yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # カスタムモデルで予測 + ``` + +`predict`モードの完全な詳細は、[予測](https://docs.ultralytics.com/modes/predict/)ページにて確認できます。 + +## エクスポート + +YOLOv8n-segモデルをONNX、CoreMLなどの別の形式にエクスポートします。 + +!!! Example "例" + + === "Python" + + ```python + from ultralytics import YOLO + + # モデルをロード + model = YOLO('yolov8n-seg.pt') # 公式モデルをロード + model = YOLO('path/to/best.pt') # カスタムトレーニングされたモデルをロード + + # モデルをエクスポート + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # 公式モデルをエクスポート + yolo export model=path/to/best.pt format=onnx # カスタムトレーニングされたモデルをエクスポート + ``` + +ご利用可能なYOLOv8-segエクスポート形式は以下の表に示されています。エクスポートされたモデルに直接予測または評価が可能です、つまり `yolo predict model=yolov8n-seg.onnx`。エクスポートが完了した後に、モデルの使用例が表示されます。 + +| 形式 | `format`引数 | モデル | メタデータ | 引数 | +|--------------------------------------------------------------------|---------------|-------------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/ko/index.md b/docs/ko/index.md new file mode 100644 index 0000000..cf6acbe --- /dev/null +++ b/docs/ko/index.md @@ -0,0 +1,82 @@ +--- +comments: true +description: Ultralytics YOLOv8을 완벽하게 탐구하는 가이드로, 고속 및 정확성이 특징인 객체 탐지 및 이미지 분할 모델입니다. 설치, 예측, 훈련 튜토리얼 등이 포함되어 있습니다. +keywords: Ultralytics, YOLOv8, 객체 탐지, 이미지 분할, 기계 학습, 딥러닝, 컴퓨터 비전, YOLOv8 설치, YOLOv8 예측, YOLOv8 훈련, YOLO 역사, YOLO 라이센스 +--- + +
+

+ + Ultralytics YOLO 배너 +

+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ Ultralytics CI + Ultralytics 코드 커버리지 + YOLOv8 인용 + Docker 당기기 +
+ Run on Gradient + Open In Colab + Open In Kaggle +
+ +Ultralytics의 최신 버전인 [YOLOv8](https://github.com/ultralytics/ultralytics)을 소개합니다. 이 모델은 딥러닝과 컴퓨터 비전의 최신 발전을 바탕으로 구축되었으며, 속도와 정확성 면에서 뛰어난 성능을 제공합니다. 간결한 설계로 인해 다양한 애플리케이션에 적합하며, 엣지 디바이스에서부터 클라우드 API에 이르기까지 다양한 하드웨어 플랫폼에 쉽게 적응 가능합니다. + +YOLOv8 문서를 탐구하여, 그 기능과 능력을 이해하고 활용할 수 있도록 돕는 종합적인 자원입니다. 기계 학습 분야에서 경험이 많건, 새롭게 시작하는 이들이건, 이 허브는 YOLOv8의 잠재력을 극대화하기 위해 설계되었습니다. + +!!! Note "노트" + + 🚧 다국어 문서는 현재 제작 중이며, 이를 개선하기 위해 노력하고 있습니다. 인내해 주셔서 감사합니다! 🙏 + +## 시작하기 + +- **설치** `ultralytics`를 pip으로 설치하고 몇 분 만에 시작하세요   [:material-clock-fast: 시작하기](quickstart.md){ .md-button } +- **예측** YOLOv8로 새로운 이미지와 비디오를 감지하세요   [:octicons-image-16: 이미지에서 예측하기](modes/predict.md){ .md-button } +- **훈련** 새로운 YOLOv8 모델을 사용자의 맞춤 데이터셋으로 훈련하세요   [:fontawesome-solid-brain: 모델 훈련하기](modes/train.md){ .md-button } +- **탐험** 세분화, 분류, 자세 인식, 추적과 같은 YOLOv8 작업   [:material-magnify-expand: 작업 탐험하기](tasks/index.md){ .md-button } + +

+
+ +
+ 시청하기: 사용자의 맞춤 데이터셋으로 YOLOv8 모델을 훈련하는 방법을 Google Colab에서 알아보세요. +

+ +## YOLO: 간단한 역사 + +[YOLO](https://arxiv.org/abs/1506.02640) (You Only Look Once, 단 한 번의 검사)는 워싱턴 대학교의 Joseph Redmon과 Ali Farhadi가 개발한 인기 있는 객체 탐지 및 이미지 분할 모델입니다. 2015년에 출시된 YOLO는 그 빠른 속도와 정확성으로 인해 빠르게 인기를 얻었습니다. + +- [YOLOv2](https://arxiv.org/abs/1612.08242)는 2016년에 공개되었으며 배치 정규화, 앵커 박스, 차원 클러스터를 통합하여 원본 모델을 개선했습니다. +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf)는 2018년에 출시되어 더 효율적인 백본 네트워크, 복수 앵커 및 공간 피라미드 풀링을 사용하여 모델의 성능을 더욱 향상시켰습니다. +- [YOLOv4](https://arxiv.org/abs/2004.10934)는 2020년에 나와서 모자이크 데이터 증가, 새로운 앵커-프리 탐지 헤드, 새로운 손실 함수와 같은 혁신을 도입했습니다. +- [YOLOv5](https://github.com/ultralytics/yolov5)는 모델의 성능을 더욱 향상시키고 하이퍼파라미터 최적화, 통합 실험 추적 및 인기 있는 수출 형식으로의 자동 수출과 같은 새로운 기능을 추가했습니다. +- [YOLOv6](https://github.com/meituan/YOLOv6)는 2022년에 [Meituan](https://about.meituan.com/)에 의해 오픈 소스화되었으며, 이 회사의 자율 배달 로봇에서 사용되고 있습니다. +- [YOLOv7](https://github.com/WongKinYiu/yolov7)는 COCO 키포인트 데이터셋에서의 자세 추정과 같은 추가 작업을 추가했습니다. +- [YOLOv8](https://github.com/ultralytics/ultralytics)은 Ultralytics에서 출시한 YOLO의 최신 버전입니다. 첨단 상태 기술 모델로서, YOLOv8은 이전 버전들의 성공을 기반으로 새로운 기능과 개선 사항을 도입하여 성능, 유연성 및 효율성을 향상시켰습니다. YOLOv8은 [탐지](tasks/detect.md), [분할](tasks/segment.md), [자세 추정](tasks/pose.md), [추적](modes/track.md), [분류](tasks/classify.md)를 포함하여 다양한 비전 AI 작업을 지원합니다. 이러한 다재다능함은 사용자들이 다양한 애플리케이션과 도메인 전반에 걸쳐 YOLOv8의 능력을 활용할 수 있도록 합니다. + +## YOLO 라이센스: Ultralytics YOLO는 어떻게 라이센스가 부여되나요? + +Ultralytics는 다양한 사용 사례에 맞춰 두 가지 라이선스 옵션을 제공합니다: + +- **AGPL-3.0 라이선스**: 이 [OSI 승인](https://opensource.org/licenses/) 오픈 소스 라이선스는 학생 및 애호가에게 이상적입니다. 오픈 협력과 지식 공유를 촉진합니다. 자세한 내용은 [라이선스](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) 파일을 참조하세요. +- **기업 라이선스**: 상업적 사용을 위해 설계된 이 라이선스는 Ultralytics 소프트웨어 및 AI 모델을 상업적 제품 및 서비스에 원활하게 통합할 수 있게 하여 AGPL-3.0의 오픈 소스 요건을 우회할 수 있습니다. 상업적 제공물에 솔루션을 내장하는 시나리오에 관여하는 경우 [Ultralytics 라이선싱](https://ultralytics.com/license)을 통해 문의하시기 바랍니다. + +우리의 라이선스 전략은 오픈 소스 프로젝트에 대한 개선 사항이 커뮤니티에 되돌아가도록 보장하려는 것입니다. 우리는 오픈 소스의 원칙을 가슴 깊이 새기고 있으며, 우리의 기여가 모두에게 유용한 방식으로 활용되고 확장될 수 있도록 보장하는 것이 우리의 사명입니다.❤️ diff --git a/docs/ko/models/fast-sam.md b/docs/ko/models/fast-sam.md new file mode 100644 index 0000000..4480421 --- /dev/null +++ b/docs/ko/models/fast-sam.md @@ -0,0 +1,193 @@ +--- +comments: true +description: FastSAM은 이미지에서 실시간 객체 분할을 위한 CNN 기반 솔루션으로, 향상된 사용자 상호작용, 계산 효율성, 다양한 비전 작업에 대응할 수 있는 특징을 갖고 있습니다. +keywords: FastSAM, 머신러닝, CNN 기반 솔루션, 객체 분할, 실시간 솔루션, Ultralytics, 비전 작업, 이미지 처리, 산업 응용, 사용자 상호작용 +--- + +# Fast Segment Anything Model (FastSAM) + +Fast Segment Anything Model (FastSAM)은 Segment Anything 작업을 위한 새로운 실시간 CNN 기반 솔루션입니다. 이 작업은 다양한 사용자 상호작용 프롬프트에 따라 이미지 내의 모든 객체를 분할하는 것을 목표로 합니다. FastSAM은 계산 요구 사항을 크게 줄이면서 경쟁력 있는 성능을 유지하기 때문에 다양한 비전 작업에 실용적인 선택지가 될 수 있습니다. + +![Fast Segment Anything Model (FastSAM) 아키텍처 개요](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## 개요 + +FastSAM은 계산 리소스 요구 사항이 큰 Transformer 모델인 Segment Anything Model (SAM)의 한계를 해결하기 위해 설계되었습니다. FastSAM은 Segment Anything 작업을 두 단계로 분리한 방식을 채택합니다: 모든 인스턴스 분할과 프롬프트로 인한 영역 선택. 첫 번째 단계에서는 [YOLOv8-seg](../tasks/segment.md)를 사용하여 이미지의 모든 인스턴스의 분할 마스크를 생성합니다. 두 번째 단계에서는 프롬프트에 해당하는 관심 영역을 출력합니다. + +## 주요 특징 + +1. **실시간 솔루션**: CNN의 계산 효율성을 활용하여 FastSAM은 Segment Anything 작업에 대한 실시간 솔루션을 제공하며, 빠른 결과가 필요한 산업 응용에 가치가 있습니다. + +2. **효율성과 성능**: FastSAM은 성능 품질을 희생하지 않고 계산과 리소스 요구 사항을 크게 줄입니다. SAM과 비교해 유사한 성능을 달성하면서 계산 리소스를 크게 줄여 실시간 응용이 가능해집니다. + +3. **프롬프트 안내 분할**: FastSAM은 다양한 사용자 상호작용 프롬프트에 따라 이미지 내의 모든 객체를 분할할 수 있으므로 다양한 시나리오에서 유연성과 적응성을 제공합니다. + +4. **YOLOv8-seg 기반**: FastSAM은 [YOLOv8-seg](../tasks/segment.md)를 기반으로 한 것으로, 인스턴스 분할 브랜치가 장착된 객체 감지기입니다. 이를 통해 이미지의 모든 인스턴스의 분할 마스크를 효과적으로 생성할 수 있습니다. + +5. **벤치마크에서 경쟁 결과**: MS COCO에서의 객체 제안 작업에서 FastSAM은 [SAM](sam.md)에 비해 단일 NVIDIA RTX 3090에서 훨씬 더 빠른 속도로 높은 점수를 달성하여 효율성과 능력을 입증했습니다. + +6. **실용적인 응용**: FastSAM은 현재 방법보다 수십 배 또는 수백 배 더 빠른 속도로 여러 비전 작업의 신속한 솔루션을 제공하여 실질적인 적용 가능성을 제시합니다. + +7. **모델 압축 가능성**: FastSAM은 구조에 인공 사전을 도입하여 계산 비용을 크게 줄일 수 있는 경로를 보여주어 일반 비전 작업에 대한 대형 모델 아키텍처에 대한 새로운 가능성을 열어줍니다. + +## 사용 가능한 모델, 지원되는 작업 및 운영 모드 + +이 표는 사용 가능한 모델과 해당하는 사전 훈련 가중치, 지원하는 작업 및 [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), [Export](../modes/export.md)와 같은 다른 운영 모드에 대한 호환성을 나타내며, 지원되는 모드는 ✅ 이모지로, 지원되지 않는 모드는 ❌ 이모지로 표시됩니다. + +| 모델 유형 | 사전 훈련 가중치 | 지원되는 작업 | Inference | Validation | Training | Export | +|-----------|----------------|--------------------------------|-----------|------------|----------|--------| +| FastSAM-s | `FastSAM-s.pt` | [인스턴스 분할](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| FastSAM-x | `FastSAM-x.pt` | [인스턴스 분할](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## 사용 예시 + +FastSAM 모델을 Python 애플리케이션에 쉽게 통합할 수 있습니다. Ultralytics는 개발을 간소화하기 위해 사용자 친화적인 Python API 및 CLI 명령을 제공합니다. + +### 예측 사용법 + +이미지에서 객체 검출을 수행하려면 다음과 같이 `predict` 메서드를 사용합니다: + +!!! Example "예제" + + === "Python" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # 추론 소스 정의 + source = 'path/to/bus.jpg' + + # FastSAM 모델 생성 + model = FastSAM('FastSAM-s.pt') # 또는 FastSAM-x.pt + + # 이미지에 대한 추론 실행 + everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # Prompt Process 객체 준비 + prompt_process = FastSAMPrompt(source, everything_results, device='cpu') + + # 모든 프롬프트 + ann = prompt_process.everything_prompt() + + # 바운딩 박스의 기본 모양은 [0,0,0,0]에서 [x1,y1,x2,y2]로 변경 + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # 텍스트 프롬프트 + ann = prompt_process.text_prompt(text='a photo of a dog') + + # 포인트 프롬프트 + # 기본 포인트 [[0,0]] [[x1,y1],[x2,y2]] + # 기본 포인트 레이블 [0] [1,0] 0:배경, 1:전경 + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` + + === "CLI" + ```bash + # FastSAM 모델 로드 및 모든 것을 세분화하여 추출 + yolo segment predict model=FastSAM-s.pt source=path/to/bus.jpg imgsz=640 + ``` + +이 코드 조각은 사전 훈련된 모델을 로드하고 이미지에 대한 예측을 실행하는 간편함을 보여줍니다. + +### 검증 사용법 + +데이터셋에서 모델을 검증하는 방법은 다음과 같습니다: + +!!! Example "예제" + + === "Python" + ```python + from ultralytics import FastSAM + + # FastSAM 모델 생성 + model = FastSAM('FastSAM-s.pt') # 또는 FastSAM-x.pt + + # 모델 검증 + results = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # FastSAM 모델 로드 및 이미지 크기 640에서 COCO8 예제 데이터셋에 대해 유효성 검사 + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +FastSAM은 단일 클래스 객체의 감지와 분할만 지원합니다. 이는 모든 객체를 동일한 클래스로 인식하고 분할한다는 의미입니다. 따라서 데이터셋을 준비할 때 모든 객체 카테고리 ID를 0으로 변환해야 합니다. + +## FastSAM 공식 사용법 + +FastSAM은 [https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) 저장소에서 직접 사용할 수도 있습니다. FastSAM을 사용하기 위해 수행할 일반적인 단계를 간단히 소개합니다: + +### 설치 + +1. FastSAM 저장소를 복제합니다: + ```shell + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. Python 3.9로 Conda 환경을 생성하고 활성화합니다: + ```shell + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. 복제한 저장소로 이동하여 필요한 패키지를 설치합니다: + ```shell + cd FastSAM + pip install -r requirements.txt + ``` + +4. CLIP 모델을 설치합니다: + ```shell + pip install git+https://github.com/openai/CLIP.git + ``` + +### 예시 사용법 + +1. [모델 체크포인트](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing)를 다운로드합니다. + +2. FastSAM을 추론하기 위해 다음과 같이 사용합니다. 예시 명령어: + + - 이미지에서 모든 것을 세분화: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - 텍스트 프롬프트를 사용하여 특정 객체를 세분화: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "the yellow dog" + ``` + + - 바운딩 박스 내의 객체를 세분화 (xywh 형식으로 상자 좌표 제공): + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - 특정 지점 근처의 객체를 세분화: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +또한, FastSAM을 [Colab 데모](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing) 또는 [HuggingFace 웹 데모](https://huggingface.co/spaces/An-619/FastSAM)에서 시각적인 경험으로 시도해 볼 수 있습니다. + +## 인용 및 감사의 말씀 + +FastSAM의 실시간 인스턴스 분할 분야에 대한 혁신적인 기여를 위해 FastSAM 저자들에게 감사의 말씀을 전합니다: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +FastSAM 원본 논문은 [arXiv](https://arxiv.org/abs/2306.12156)에서 찾을 수 있습니다. 저자들은 자신들의 작업을 공개적으로 제공하였으며, 코드베이스는 [GitHub](https://github.com/CASIA-IVA-Lab/FastSAM)에서 이용할 수 있습니다. 저자들의 노력에 감사드리며 저작물을 더 폭넓은 커뮤니티에 알리기 위한 기여를 기대합니다. diff --git a/docs/ko/models/index.md b/docs/ko/models/index.md new file mode 100644 index 0000000..78f43ac --- /dev/null +++ b/docs/ko/models/index.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Ultralytics가 지원하는 다양한 YOLO 계열 모델, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR에 대해 알아보고 CLI와 Python 사용 예제를 통해 시작하세요. +keywords: Ultralytics, 문서화, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, 모델, 아키텍처, Python, CLI +--- + +# Ultralytics가 지원하는 모델들 + +Ultralytics 모델 문서에 오신 것을 환영합니다! 우리는 [객체 감지](../tasks/detect.md), [인스턴스 분할](../tasks/segment.md), [이미지 분류](../tasks/classify.md), [자세 추정](../tasks/pose.md), [다중 객체 추적](../modes/track.md)과 같은 특정 작업에 맞춰진 다양한 범위의 모델을 지원합니다. Ultralytics에 모델 아키텍처를 기여하고 싶다면, [기여 가이드](../../help/contributing.md)를 확인해 보세요. + +!!! Note "주의사항" + + 🚧 현재 다양한 언어로 된 문서 작업이 진행 중이며, 이를 개선하기 위해 열심히 노력하고 있습니다. 인내해 주셔서 감사합니다! 🙏 + +## 주요 모델들 + +다음은 지원되는 핵심 모델 목록입니다: + +1. **[YOLOv3](yolov3.md)**: Joseph Redmon에 의해 최초로 만들어진 YOLO 모델 패밀리의 세 번째 버전으로, 효율적인 실시간 객체 감지 능력으로 알려져 있습니다. +2. **[YOLOv4](yolov4.md)**: 2020년 Alexey Bochkovskiy가 발표한 YOLOv3의 다크넷 기반 업데이트 버전입니다. +3. **[YOLOv5](yolov5.md)**: Ultralytics에 의해 향상된 YOLO 아키텍처로, 이전 버전들에 비해 더 나은 성능과 속도 트레이드오프를 제공합니다. +4. **[YOLOv6](yolov6.md)**: [미투안](https://about.meituan.com/)에서 2022년에 발표하여, 회사의 자율 주행 배달 로봇에서 많이 사용되고 있습니다. +5. **[YOLOv7](yolov7.md)**: YOLOv4의 저자들에 의해 2022년에 업데이트된 YOLO 모델들입니다. +6. **[YOLOv8](yolov8.md) 새로운 🚀**: YOLO 패밀리의 최신 버전으로, 인스턴스 분할, 자세/키포인트 추정, 분류 등 향상된 기능을 제공합니다. +7. **[Segment Anything Model (SAM)](sam.md)**: 메타의 Segment Anything Model (SAM)입니다. +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**: 경희대학교에서 모바일 어플리케이션을 위해 개발한 MobileSAM입니다. +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)**: 중국 과학원 자동화 연구소의 이미지 및 비디오 분석 그룹에 의해 개발된 FastSAM입니다. +10. **[YOLO-NAS](yolo-nas.md)**: YOLO Neural Architecture Search (NAS) 모델들입니다. +11. **[Realtime Detection Transformers (RT-DETR)](rtdetr.md)**: 바이두의 PaddlePaddle Realtime Detection Transformer (RT-DETR) 모델들입니다. + +

+
+ +
+ 시청하기: 몇 줄의 코드로 Ultralytics YOLO 모델을 실행하세요. +

+ +## 시작하기: 사용 예제 + +이 예제는 YOLO 학습과 추론에 대한 간단한 예제를 제공합니다. 이에 대한 전체 문서는 [예측](../modes/predict.md), [학습](../modes/train.md), [검증](../modes/val.md), [내보내기](../modes/export.md) 문서 페이지에서 확인할 수 있습니다. + +아래 예제는 객체 감지를 위한 YOLOv8 [감지](../tasks/detect.md) 모델에 대한 것입니다. 추가적으로 지원되는 작업들은 [분할](../tasks/segment.md), [분류](../tasks/classify.md), [자세](../tasks/pose.md) 문서를 참조하세요. + +!!! Example "예제" + + === "Python" + + PyTorch로 사전 학습된 `*.pt` 모델들과 구성 `*.yaml` 파일들은 `YOLO()`, `SAM()`, `NAS()`, `RTDETR()` 클래스에 전달하여 파이썬에서 모델 인스턴스를 생성할 수 있습니다: + + ```python + from ultralytics import YOLO + + # COCO로 사전 학습된 YOLOv8n 모델 불러오기 + model = YOLO('yolov8n.pt') + + # 모델 정보 표시 (선택사항) + model.info() + + # COCO8 예제 데이터셋에 대해 100 에포크 동안 모델 학습 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 'bus.jpg' 이미지에 대한 YOLOv8n 모델 추론 실행 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + 모델을 직접 실행하기 위한 CLI 명령어가 제공됩니다: + + ```bash + # COCO로 사전 학습된 YOLOv8n 모델을 불러와 COCO8 예제 데이터셋에서 100 에포크 동안 학습 + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCO로 사전 학습된 YOLOv8n 모델을 불러와 'bus.jpg' 이미지에 대한 추론 실행 + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## 새로운 모델 기여하기 + +Ultralytics에 여러분의 모델을 기여하고 싶으신가요? 훌륭합니다! 우리는 항상 모델 포트폴리오를 확장하는 것에 열려 있습니다. + +1. **저장소 포크하기**: [Ultralytics GitHub 저장소](https://github.com/ultralytics/ultralytics)를 포크하여 시작합니다. + +2. **포크 복제하기**: 포크한 저장소를 로컬 기계에 복제하고 새로운 브랜치를 생성하여 작업합니다. + +3. **모델 구현하기**: 우리의 [기여 가이드](../../help/contributing.md)에 제공된 코딩 표준 및 가이드라인을 따라 모델을 추가합니다. + +4. **철저히 테스트하기**: 독립적으로뿐만 아니라 파이프라인의 일부로도 모델을 철저히 테스트해야 합니다. + +5. **풀 리퀘스트 생성하기**: 모델에 만족하게 되면, 리뷰를 위해 메인 저장소에 풀 리퀘스트를 생성합니다. + +6. **코드 리뷰 & 병합**: 리뷰 후, 여러분의 모델이 우리 기준에 부합한다면 메인 저장소에 병합됩니다. + +자세한 단계는 [기여 가이드](../../help/contributing.md)를 참조해주십시오. diff --git a/docs/ko/models/mobile-sam.md b/docs/ko/models/mobile-sam.md new file mode 100644 index 0000000..4e9169b --- /dev/null +++ b/docs/ko/models/mobile-sam.md @@ -0,0 +1,116 @@ +--- +comments: true +description: Ultralytics 프레임워크에서 MobileSAM을 다운로드하고 테스트하는 방법, MobileSAM의 구현 방식, 원본 SAM과의 비교, 모바일 애플리케이션 향상 등에 대해 자세히 알아보세요. 오늘부터 모바일 애플리케이션을 개선하세요. +keywords: MobileSAM, Ultralytics, SAM, 모바일 애플리케이션, Arxiv, GPU, API, 이미지 인코더, 마스크 디코더, 모델 다운로드, 테스트 방법 +--- + +![MobileSAM 로고](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# Mobile Segment Anything (MobileSAM) + +MobileSAM 논문은 이제 [arXiv](https://arxiv.org/pdf/2306.14289.pdf)에서 사용할 수 있습니다. + +MobileSAM을 CPU에서 실행하는 데모는 이 [데모 링크](https://huggingface.co/spaces/dhkim2810/MobileSAM)에서 확인할 수 있습니다. Mac i5 CPU에서의 성능은 약 3초입니다. Hugging Face 데모에서는 인터페이스와 낮은 성능의 CPU가 느린 응답으로 이어지지만, 여전히 효과적으로 작동합니다. + +MobileSAM은 [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything), [AnyLabeling](https://github.com/vietanhdev/anylabeling) 및 [Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D)를 비롯한 여러 프로젝트에서 구현되었습니다. + +MobileSAM은 1%의 원본 이미지로 구성된 100k 데이터셋에서 하루 이내에 단일 GPU로 학습됩니다. 이 학습을 위한 코드는 나중에 공개될 예정입니다. + +## 사용 가능한 모델, 지원되는 작업 및 작동 모드 + +이 표에서는 사용 가능한 모델과 해당 모델에 대한 사전 훈련 가중치, 지원되는 작업, [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md) 및 [Export](../modes/export.md)와 같은 다양한 작동 모드의 호환성을 나타냅니다. 지원되는 모드는 ✅ 이모지로 표시되고, 지원되지 않는 모드는 ❌ 이모지로 표시됩니다. + +| 모델 유형 | 사전 훈련 가중치 | 지원되는 작업 | Inference | Validation | Training | Export | +|-----------|-----------------|------------------------------------|-----------|------------|----------|--------| +| MobileSAM | `mobile_sam.pt` | [인스턴스 세그멘테이션](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## SAM에서 MobileSAM으로의 적응 + +MobileSAM은 원본 SAM과 동일한 파이프라인을 유지하므로, 원본의 전처리, 후처리 및 모든 다른 인터페이스를 통합했습니다. 따라서 현재 원본 SAM을 사용 중인 경우, MobileSAM으로 전환하는 데 최소한의 노력이 필요합니다. + +MobileSAM은 원본 SAM과 비교 가능한 성능을 발휘하며, 이미지 인코더만 변경되었습니다. 구체적으로, 원본의 무거운 ViT-H 인코더 (632M)를 더 작은 Tiny-ViT (5M)로 대체했습니다. 단일 GPU에서 MobileSAM은 이미지 당 약 12ms의 작업 시간이 소요됩니다. 이미지 인코더에는 8ms가 소요되고, 마스크 디코더에는 4ms가 소요됩니다. + +다음 표는 ViT 기반 이미지 인코더를 비교합니다: + +| 이미지 인코더 | 원본 SAM | MobileSAM | +|---------|--------|-----------| +| 매개변수 | 611M | 5M | +| 속도 | 452ms | 8ms | + +원본 SAM과 MobileSAM은 동일한 프롬프트 가이드 마스크 디코더를 사용합니다: + +| 마스크 디코더 | 원본 SAM | MobileSAM | +|---------|--------|-----------| +| 매개변수 | 3.876M | 3.876M | +| 속도 | 4ms | 4ms | + +전체 파이프라인의 비교는 다음과 같습니다: + +| 전체 파이프라인 (인코더+디코더) | 원본 SAM | MobileSAM | +|--------------------|--------|-----------| +| 매개변수 | 615M | 9.66M | +| 속도 | 456ms | 12ms | + +MobileSAM과 원본 SAM의 성능은 포인트 및 박스를 사용한 프롬프트를 통해 확인할 수 있습니다. + +![포인트 프롬프트가 있는 이미지](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![박스 프롬프트가 있는 이미지](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +MobileSAM은 우수한 성능을 자랑하며, 현재의 FastSAM보다 약 5배 작고 7배 빠릅니다. 자세한 내용은 [MobileSAM 프로젝트 페이지](https://github.com/ChaoningZhang/MobileSAM)에서 확인할 수 있습니다. + +## Ultralytics에서 MobileSAM 테스트 + +원본 SAM과 마찬가지로, 포인트 및 박스 프롬프트 모드를 포함한 Ultralytics에서 간단한 테스트 방법을 제공합니다. + +### 모델 다운로드 + +모델을 [여기](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt)에서 다운로드할 수 있습니다. + +### 포인트 프롬프트 + +!!! Example "예제" + + === "Python" + ```python + from ultralytics import SAM + + # 모델 불러오기 + model = SAM('mobile_sam.pt') + + # 포인트 프롬프트를 기반으로 세그먼트 예측 + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### 박스 프롬프트 + +!!! Example "예제" + + === "Python" + ```python + from ultralytics import SAM + + # 모델 불러오기 + model = SAM('mobile_sam.pt') + + # 박스 프롬프트를 기반으로 세그먼트 예측 + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +`MobileSAM`과 `SAM`은 동일한 API를 사용하여 구현되었습니다. 더 많은 사용법에 대해서는 [SAM 페이지](sam.md)를 참조하세요. + +## 인용 및 감사의 글 + +MobileSAM이 연구 또는 개발에 유용하게 사용된 경우, 다음의 논문을 인용해 주시기 바랍니다: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } diff --git a/docs/ko/models/rtdetr.md b/docs/ko/models/rtdetr.md new file mode 100644 index 0000000..904fc73 --- /dev/null +++ b/docs/ko/models/rtdetr.md @@ -0,0 +1,93 @@ +--- +comments: true +description: 비둘기(Baidu)가 개발한 RT-DETR은 비전 트랜스포머(Vision Transformers)를 기반으로 한 실시간 객체 검출기로, 사전 훈련된 모델을 사용하여 시간지연이 없는 고성능을 제공합니다. +keywords: RT-DETR, 비둘기, 비전 트랜스포머, 객체 검출, 실시간 성능, CUDA, TensorRT, IoU-aware query selection, Ultralytics, 파이썬 API, PaddlePaddle +--- + +# 비둘기의 RT-DETR: 비전 트랜스포머 기반 실시간 객체 검출기 + +## 개요 + +비둘기(Baidu)가 개발한 Real-Time Detection Transformer(RT-DETR)은 고정밀도를 유지하면서 실시간 성능을 제공하는 첨단 엔드 투 엔드 객체 검출기입니다. 비전 트랜스포머(Vision Transformers, ViT)의 성능을 활용하여, 다중 스케일 특징을 효율적으로 처리할 수 있도록 인트라 스케일 상호 작용과 크로스 스케일 퓨전을 분리합니다. RT-DETR은 다양한 디코더 레이어를 사용하여 추론 속도를 유연하게 조정할 수 있으므로 재훈련 없이 실시간 객체 검출에 적용하기에 매우 적합합니다. 이 모델은 CUDA와 TensorRT와 같은 가속화된 백엔드에서 많은 다른 실시간 객체 검출기보다 뛰어난 성능을 발휘합니다. + +![모델 예시 이미지](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**비둘기의 RT-DETR 개요** 비둘기의 RT-DETR 모델 구조 다이어그램은 백본 네트워크의 마지막 세 단계 {S3, S4, S5}를 인코더의 입력으로 보여줍니다. 효율적인 하이브리드 인코더는 인트라스케일 특징 상호 작용(AIFI, intrascale feature interaction)과 크로스 스케일 특징 퓨전 모듈(CCFM, cross-scale feature-fusion module)을 통해 다중 스케일 특징을 이미지 특징의 시퀀스로 변환합니다. IoU-aware query selection은 디코더에 대한 초기 객체 쿼리로 작동하기 위해 일정한 수의 이미지 특징을 선택하는 데 사용됩니다. 마지막으로, 보조 예측 헤드와 함께 디코더는 객체 쿼리를 반복하여 박스와 신뢰도 점수를 최적화합니다. ([원문](https://arxiv.org/pdf/2304.08069.pdf) 참조). + +### 주요 기능 + +- **효율적인 하이브리드 인코더:** 비둘기의 RT-DETR은 다중 스케일 특징을 인트라 스케일 상호 작용과 크로스 스케일 퓨전을 분리하여 처리하는 효율적인 하이브리드 인코더를 사용합니다. 이 독특한 비전 트랜스포머 기반 디자인은 계산 비용을 줄이고 실시간 객체 검출이 가능하도록 합니다. +- **IoU-aware 쿼리 선택:** 비둘기의 RT-DETR은 IoU-aware 쿼리 선택을 사용하여 개체 쿼리 초기화를 개선합니다. 이를 통해 모델은 장면에서 가장 관련성 있는 개체에 집중하며 검출 정확도를 향상시킵니다. +- **융통성 있는 추론 속도 조정:** 비둘기의 RT-DETR은 훈련 없이 다른 디코더 레이어를 사용하여 추론 속도를 유연하게 조정할 수 있습니다. 이러한 적응성은 다양한 실시간 객체 검출 시나리오에서 실용적인 응용을 용이하게 합니다. + +## 사전 훈련된 모델 + +Ultralytics의 파이썬 API는 다양한 스케일의 사전 훈련된 PaddlePaddle RT-DETR 모델을 제공합니다: + +- RT-DETR-L: COCO val2017에서 53.0% AP, T4 GPU에서 114 FPS +- RT-DETR-X: COCO val2017에서 54.8% AP, T4 GPU에서 74 FPS + +## 사용 예시 + +이 예시는 간단한 RT-DETRR 훈련 및 추론 예시를 제공합니다. [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md), [Export](../modes/export.md) 등의 자세한 문서는 [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md), [Export](../modes/export.md) 문서 페이지를 참조하십시오. + +!!! 예시 + + === "파이썬" + + ```python + from ultralytics import RTDETR + + # COCO 사전 훈련된 RT-DETR-l 모델 로드 + model = RTDETR('rtdetr-l.pt') + + # 모델 정보 표시 (선택 사항) + model.info() + + # COCO8 예제 데이터셋에 대해 100 epoch 동안 모델 훈련 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 'bus.jpg' 이미지에서 RT-DETR-l 모델로 추론 실행 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # COCO 사전 훈련된 RT-DETR-l 모델 로드하고 COCO8 예제 데이터셋에 대해 100 epoch 동안 훈련 + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCO 사전 훈련된 RT-DETR-l 모델 로드하고 'bus.jpg' 이미지에서 추론 실행 + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## 지원되는 작업 및 모드 + +이 테이블은 각 모델의 유형, 특정 사전 훈련 가중치, 각 모델이 지원하는 작업 및 [모드](../modes/train.md), [Val](../modes/val.md), [Predict](../modes/predict.md), [Export](../modes/export.md)와 같은 다양한 모드를 나타내는 ✅ 이모지로 표시된 모드를 지원합니다. + +| 모델 유형 | 사전 훈련 가중치 | 지원되는 작업 | 추론 | 검증 | 훈련 | 출력 | +|---------------------|---------------|-----------------------------|----|----|----|----| +| RT-DETR Large | `rtdetr-l.pt` | [객체 검출](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR Extra-Large | `rtdetr-x.pt` | [객체 검출](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## 인용 및 감사의 말 + +만약 연구나 개발 작업에서 비둘기(Baidu)의 RT-DETR을 사용한다면, [원래 논문을](https://arxiv.org/abs/2304.08069) 인용해주시기 바랍니다: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +컴퓨터 비전 커뮤니티에게 귀중한 자료인 비전 트랜스포머 기반 실시간 객체 검출기인 비둘기(Baidu)의 RT-DETR을 만들고 유지하기 위해 비둘기와 [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection) 팀에게 감사의 인사를 전합니다. + +*Keywords: RT-DETR, Transformer, ViT, 비전 트랜스포머, 비둘기 RT-DETR, PaddlePaddle, Paddle Paddle RT-DETR, 실시간 객체 검출, 비전 트랜스포머 기반 객체 검출, 사전 훈련된 PaddlePaddle RT-DETR 모델, 비둘기 RT-DETR 사용법, Ultralytics 파이썬 API* diff --git a/docs/ko/models/sam.md b/docs/ko/models/sam.md new file mode 100644 index 0000000..55bfe93 --- /dev/null +++ b/docs/ko/models/sam.md @@ -0,0 +1,226 @@ +--- +comments: true +description: 얼트라리얼리틱스(Ultralytics)의 최첨단 이미지 세분화 모델인 Segment Anything Model(SAM)에 대해 알아보세요. 해당 모델은 실시간 이미지 세분화를 가능하게 하며, 프롬프트를 이용한 세분화, 제로샷 성능 및 사용법에 대해 알아봅니다. +keywords: 얼트라리얼리틱스, 이미지 세분화, Segment Anything Model, SAM, SA-1B 데이터셋, 실시간 성능, 제로샷 전이, 객체 감지, 이미지 분석, 머신 러닝 +--- + +# Segment Anything Model (SAM) + +Segment Anything Model(SAM) 을 어서 오세요. 이 혁신적인 모델은 프롬프트 기반의 실시간 세분화를 통해 세분화 분야에서 새로운 기준을 세웠습니다. + +## SAM 소개: Segment Anything Model의 소개 + +Segment Anything Model(SAM)은 프롬프트 기반의 세분화를 가능하게 하는 뛰어난 이미지 세분화 모델입니다. SAM은 이미지 세분석 작업에서 독창성을 보여주는 Segment Anything 이니셔티브의 핵심을 형성하고 있으며, 이미지 세분화를 위한 새로운 모델, 작업 및 데이터셋을 소개하는 혁신적인 프로젝트입니다. + +SAM의 고급설계는 모델이 기존 지식 없이도 새로운 이미지 분포 및 작업에 대응할 수 있는 기능인 제로샷 전이를 보여줍니다. 1,100만 개의 정교하게 선별된 이미지에 분포된 10억 개 이상의 마스크를 포함한 SA-1B 데이터셋으로 학습된 SAM은 많은 경우에 전적으로 감독된 학습 결과를 능가하는 인상적인 제로샷 성능을 보여줍니다. + +![데이터셋 샘플 이미지](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +새롭게 도입된 SA-1B 데이터셋에서 오버레이된 마스크를 포함한 예시 이미지입니다. SA-1B는 다양한 고해상도의 이미지를 라이선스 보호하며 사생활을 보호하고 있으며, 1,100만 개의 고품질 세분화 마스크를 가지고 있습니다. 이러한 마스크는 SAM에 의해 자동으로 주석이 달렸으며, 인간 평가 및 다양한 실험을 통해 높은 품질과 다양성을 갖추었음이 검증되었습니다. 시각화를 위해 이미지는 이미지 당 평균 100개의 마스크로 그룹화되었습니다. + +## Segment Anything Model (SAM)의 주요 기능 + +- **프롬프트 기반 세분화 작업:** SAM은 프롬프트 기반의 세분화 작업을 위해 설계되어, 공간 또는 텍스트 단서를 이용하여 개체를 식별합니다. +- **고급설계:** Segment Anything Model은 강력한 이미지 인코더, 프롬프트 인코더 및 가벼운 마스크 디코더를 사용합니다. 이 독특한 아키텍처는 유연한 프롬프팅, 실시간 마스크 계산 및 세분화 작업에서의 모호성 인식을 가능케 합니다. +- **SA-1B 데이터셋:** Segment Anything 프로젝트에서 소개된 SA-1B 데이터셋은 1,100만 개의 이미지에 10억 개 이상의 세분화 마스크를 가지고 있습니다. 이는 지금까지 가장 큰 세분화 데이터셋으로, SAM에게 다양하고 대규모의 학습 데이터를 제공합니다. +- **제로샷 성능:** SAM은 다양한 세분화 작업에서 뛰어난 제로샷 성능을 보여주므로, 프롬프트 엔지니어링의 필요성을 최소화하고 다양한 응용 프로그램에 즉시 사용할 수 있는 도구입니다. + +Segment Anything Model 및 SA-1B 데이터셋에 대한 자세한 내용은 [Segment Anything 웹사이트](https://segment-anything.com)와 연구 논문 [Segment Anything](https://arxiv.org/abs/2304.02643)을 참조해 주세요. + +## 사용 가능한 모델, 지원하는 작업 및 운영 모드 + +아래 표는 사용 가능한 모델과 해당 모델의 사전 훈련 가중치, 지원하는 작업 및 [추론](../modes/predict.md), [검증](../modes/val.md), [훈련](../modes/train.md) 및 [내보내기](../modes/export.md)와 같은 다른 운영 모드와의 호환성을 나타냅니다. 지원되는 모드는 ✅ 이모지로, 지원되지 않는 모드는 ❌ 이모지로 표시되었습니다. + +| 모델 유형 | 사전 훈련 가중치 | 지원 작업 | 추론 | 검증 | 훈련 | 내보내기 | +|-----------|------------|---------------------------------|----|----|----|------| +| SAM base | `sam_b.pt` | [인스턴스 세분화](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| SAM large | `sam_l.pt` | [인스턴스 세분화](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## SAM 사용 방법: 이미지 세분화에서의 다재다능함과 강력함 + +Segment Anything Model은 훈련 데이터를 초월하는 다양한 하위 작업에 대해서도 사용될 수 있습니다. 이에는 가장자리 검출, 객체 제안 생성, 인스턴스 세분장 및 초기 텍스트-마스크 예측 등이 포함됩니다. SAM은 프롬프팅 엔지니어링을 통해 새로운 작업 및 데이터 분포에 빠르게 적응할 수 있으므로, 이미지 세분화에 대한 다재다능하고 강력한 도구로 사용될 수 있습니다. + +### SAM 예측 예제 + +!!! Example "프롬프트를 이용한 세분화" + + 주어진 프롬프트로 이미지 세분화를 실행합니다. + + === "파이썬" + + ```python + from ultralytics import SAM + + # 모델 로드 + model = SAM('sam_b.pt') + + # 모델 정보 표시 (선택 사항) + model.info() + + # bboxes 프롬프트로 추론 실행 + model('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # points 프롬프트로 추론 실행 + model('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "전체 이미지 세분화" + + 전체 이미지 세분화를 실행합니다. + + === "파이썬" + + ```python + from ultralytics import SAM + + # 모델 로드 + model = SAM('sam_b.pt') + + # 모델 정보 표시 (선택 사항) + model.info() + + # 추론 실행 + model('path/to/image.jpg') + ``` + + === "CLI" + + ```bash + # SAM 모델로 추론 실행 + yolo predict model=sam_b.pt source=path/to/image.jpg + ``` + +- 여기서 전체 이미지 세분화는 프롬프트(bboxes/points/masks)를 전달하지 않으면 실행됩니다. + +!!! Example "SAMPredictor 예제" + + 이미지를 설정하고 이미지 인코더를 여러번 실행하지 않고 여러번 프롬프트 추론을 실행할 수 있습니다. + + === "프롬프트 추론" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # SAMPredictor 생성 + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # 이미지 설정 + predictor.set_image("ultralytics/assets/zidane.jpg") # 이미지 파일로 설정 + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # np.ndarray로 설정 + results = predictor(bboxes=[439, 437, 524, 709]) + results = predictor(points=[900, 370], labels=[1]) + + # 이미지 리셋 + predictor.reset_image() + ``` + + 추가 인수로 전체 이미지를 세분화합니다. + + === "전체 이미지 세분화" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # SAMPredictor 생성 + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # 추가 인수로 이미지 세분화 + results = predictor(source="ultralytics/assets/zidane.jpg", crop_n_layers=1, points_stride=64) + ``` + +- `전체 이미지 세분화`에 대한 자세한 추가 인수는 [`Predictor/generate` 참조](../../../reference/models/sam/predict.md)를 참조하세요. + +## YOLOv8과의 SAM 비교 + +여기서는 Meta의 가장 작은 SAM 모델인 SAM-b를 얼트라리얼리틱스의 가장 작은 세분화 모델, [YOLOv8n-seg](../tasks/segment.md),과 비교합니다: + +| 모델 | 크기 | 파라미터 | 속도 (CPU) | +|------------------------------------------------|-----------------------|----------------------|------------------------| +| Meta's SAM-b | 358 MB | 94.7 M | 51096 ms/im | +| [MobileSAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im | +| [FastSAM-s](fast-sam.md) with YOLOv8 backbone | 23.7 MB | 11.8 M | 115 ms/im | +| Ultralytics [YOLOv8n-seg](../tasks/segment.md) | **6.7 MB** (53.4배 작음) | **3.4 M** (27.9배 적음) | **59 ms/im** (866배 빠름) | + +이 비교는 모델 크기 및 속도에 대한 상당한 차이를 보여줍니다. SAM은 자동으로 세분화하는 독특한 기능을 제공하지만, 작은 크기와 높은 처리 속도로 인해 YOLOv8 세분화 모델과 직접 경쟁하지는 않습니다. + +이 테스트는 2023년 애플 M2 맥북(16GB RAM)에서 수행되었습니다. 이 테스트를 재현하려면: + +!!! Example "예제" + + === "파이썬" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # SAM-b 프로파일링 + model = SAM('sam_b.pt') + model.info() + model('ultralytics/assets') + + # MobileSAM 프로파일링 + model = SAM('mobile_sam.pt') + model.info() + model('ultralytics/assets') + + # FastSAM-s 프로파일링 + model = FastSAM('FastSAM-s.pt') + model.info() + model('ultralytics/assets') + + # YOLOv8n-seg 프로파일링 + model = YOLO('yolov8n-seg.pt') + model.info() + model('ultralytics/assets') + ``` + +## 자동 주석: 세분화 데이터셋을 위한 신속한 경로 + +자동 주석은 SAM의 핵심 기능으로, 미리 훈련된 탐지 모델을 사용하여 [세분화 데이터셋](https://docs.ultralytics.com/datasets/segment)을 생성할 수 있습니다. 이 기능을 사용하면 번거롭고 시간이 오래 걸리는 수작업 주석 작업을 건너뛰고 대량의 이미지를 신속하게 정확하게 주석을 달 수 있습니다. + +### 탐지 모델을 사용하여 세분화 데이터셋 생성하기 + +Ultralytics 프레임워크를 사용하여 미리 훈련된 탐지 및 SAM 세분화 모델과 함께 데이터셋을 자동으로 주석할 수 있습니다. 아래와 같이 `auto_annotate` 함수를 사용하세요: + +!!! Example "예제" + + === "파이썬" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| 인수 | 유형 | 설명 | 기본값 | +|------------|-----------------|-------------------------------------------------------------------|--------------| +| data | 문자열 | 주석을 달 이미지가 포함된 폴더 경로. | | +| det_model | 문자열, 선택사항 | 미리 훈련된 YOLO 탐지 모델. 기본값은 'yolov8x.pt'. | 'yolov8x.pt' | +| sam_model | 문자열, 선택사항 | 미리 훈련된 SAM 세분화 모델. 기본값은 'sam_b.pt'. | 'sam_b.pt' | +| device | 문자열, 선택사항 | 모델을 실행할 디바이스. 기본값은 빈 문자열 (CPU 또는 사용 가능한 GPU 사용). | | +| output_dir | 문자열, None, 선택사항 | 주석이 포함된 결과를 저장할 디렉토리 경로. 기본값은 'data'와 같은 디렉토리 내부의 'labels' 폴더입니다. | None | + +`auto_annotate` 함수는 이미지 경로를 입력으로 받아, 입력한 미리 훈련된 탐지와 SAM 세분화 모델, 이 함수를 실행할 디바이스 및 주석이 포함된 결과를 저장할 디렉토리 경로를 선택적으로 지정할 수 있는 기능을 제공합니다. + +미리 훈련된 모델을 사용한 자동 주석 기능을 활용하면 높은 품질의 세분화 데이터셋을 생성하는 데 소요되는 시간과 노력을 크게 줄일 수 있습니다. 이 기능은 특히 대량의 이미지 컬렉션을 다루는 연구원과 개발자에게 유용하며, 수작업 주석 대신 모델 개발과 평가에 집중할 수 있습니다. + +## 인용 및 감사의 말 + +귀하의 연구 또는 개발 작업에 SAM이 유용하게 사용된 경우, 저희 논문을 인용해 주시기 바랍니다: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +모델 개발과 알고리즘 개발을 위한 귀중한 리소스를 만들고 유지 관리하는 Meta AI에게 감사의 말씀을 드립니다. + +*keywords: Segment Anything, Segment Anything Model, SAM, Meta SAM, 이미지 세분화, 프롬프트 기반 세분화, 제로샷 성능, SA-1B 데이터셋, 고급설계, 자동 주석, 얼트라리얼리틱스, 사전 훈련 모델, SAM base, SAM large, 인스턴스 세분화, 컴퓨터 비전, 인공 지능, 머신 러닝, 데이터 주석, 세분화 마스크, 탐지 모델, YOLO 탐지 모델, bibtex, Meta AI.* diff --git a/docs/ko/models/yolo-nas.md b/docs/ko/models/yolo-nas.md new file mode 100644 index 0000000..04d8765 --- /dev/null +++ b/docs/ko/models/yolo-nas.md @@ -0,0 +1,119 @@ +--- +comments: true +description: YOLO-NAS는 우수한 물체 감지 모델로서 자세한 설명서를 탐색해보세요. Ultralytics Python API를 사용한 기능, 사전 훈련된 모델, 사용법 등을 자세히 알아보세요. +keywords: YOLO-NAS, Deci AI, 물체 감지, 딥러닝, 신경 아키텍처 검색, Ultralytics Python API, YOLO 모델, 사전 훈련된 모델, 양자화, 최적화, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## 개요 + +Deci AI에서 개발한 YOLO-NAS는 원래의 YOLO 모델의 한계를 해결하기 위해 고도의 신경 아키텍처 검색(Neural Architecture Search) 기술로 만들어진 혁신적인 물체 감지 기반 모델입니다. 양자화 지원과 정확성-지연 트레이드오프의 중요한 개선을 통해 YOLO-NAS는 물체 감지 분야에서 주목할 만한 성능 향상을 제공합니다. + +![모델 예시 이미지](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**YOLO-NAS 개요.** YOLO-NAS는 양자화 관련 블록과 선택적 양자화를 적용하여 최적의 성능을 달성합니다. 모델은 INT8 양자화 버전으로 변환될 때 최소한의 정확도 감소를 경험하므로 다른 모델들과 비교했을 때 상당한 개선을 이끌어냅니다. 이러한 혁신은 예측할 수 없는 물체 감지 능력과 높은 성능을 가진 우수한 아키텍처로 이어집니다. + +### 주요 기능 + +- **양자화 친화적인 기본 블록**: YOLO-NAS는 이전 YOLO 모델의 한계 중 하나인 양자화에 적합한 새로운 기본 블록을 도입합니다. +- **정교한 훈련과 양자화**: YOLO-NAS는 고급 훈련 방식과 훈련 후 양자화를 활용하여 성능을 향상시킵니다. +- **AutoNAC 최적화와 사전 훈련**: YOLO-NAS는 AutoNAC 최적화를 활용하며 COCO, Objects365, Roboflow 100과 같은 유명한 데이터셋에서 사전 훈련됩니다. 이를 통해 YOLO-NAS는 본격적인 프로덕션 환경에서의 물체 감지 작업에 매우 적합합니다. + +## 사전 훈련된 모델 + +Ultralytics가 제공하는 사전 훈련된 YOLO-NAS 모델로 다음 세대의 물체 감지 기술의 힘을 체험해 보세요. 이러한 모델은 속도와 정확성 측면에서 최고의 성능을 제공하기 위해 설계되었습니다. 특정 요구에 맞게 다양한 옵션 중 선택하세요: + +| 모델 | mAP | 지연 시간 (밀리초) | +|------------------|-------|-------------| +| YOLO-NAS S | 47.5 | 3.21 | +| YOLO-NAS M | 51.55 | 5.85 | +| YOLO-NAS L | 52.22 | 7.87 | +| YOLO-NAS S INT-8 | 47.03 | 2.36 | +| YOLO-NAS M INT-8 | 51.0 | 3.78 | +| YOLO-NAS L INT-8 | 52.1 | 4.78 | + +각 모델 변형은 평균 평균 정밀도(mAP)와 지연 시간 간의 균형을 제공하여 물체 감지 작업을 성능과 속도 모두 최적화할 수 있도록 합니다. + +## 사용 예시 + +Ultralytics는 YOLO-NAS 모델을 `ultralytics` Python 패키지를 통해 Python 애플리케이션에 쉽게 통합할 수 있도록 지원합니다. 이 패키지는 프로세스를 간소화하기 위한 사용자 친화적인 Python API를 제공합니다. + +다음 예시에서는 추론과 유효성 검사를 위해 `ultralytics` 패키지와 함께 YOLO-NAS 모델을 사용하는 방법을 보여줍니다: + +### 추론과 유효성 검사 예시 + +이 예시에서는 COCO8 데이터셋에서 YOLO-NAS-s 모델을 유효성 검사합니다. + +!!! Example "예제" + + 이 예시에서는 YOLO-NAS를 위한 간단한 추론 및 유효성 검사 코드를 제공합니다. 추론 결과를 처리하기 위한 방법은 [예측](../modes/predict.md) 모드를 참조하세요. 추가 모드에서 YOLO-NAS를 사용하는 방법은 [Val](../modes/val.md) 및 [Export](../modes/export.md)를 참조하세요. `ultralytics` 패키지에서 YOLO-NAS의 훈련은 지원하지 않습니다. + + === "Python" + + PyTorch 사전 훈련된 `*.pt` 모델 파일을 `NAS()` 클래스에 전달하여 Python에서 모델 인스턴스를 생성할 수 있습니다: + + ```python + from ultralytics import NAS + + # COCO 사전 훈련된 YOLO-NAS-s 모델 로드 + model = NAS('yolo_nas_s.pt') + + # 모델 정보 표시 (선택 사항) + model.info() + + # COCO8 예제 데이터셋에서 모델 유효성 검사 + results = model.val(data='coco8.yaml') + + # YOLO-NAS-s 모델로 'bus.jpg' 이미지에 추론 실행 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI 명령을 사용하여 모델을 직접 실행할 수 있습니다: + + ```bash + # COCO 사전 훈련된 YOLO-NAS-s 모델로 COCO8 예제 데이터셋의 성능 유효성 검사 + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # COCO 사전 훈련된 YOLO-NAS-s 모델로 'bus.jpg' 이미지에 추론 실행 + yolo predict model=yolo_nas_s.pt source=path/to/bus.jpg + ``` + +## 지원되는 작업 및 모드 + +YOLO-NAS 모델은 Small (s), Medium (m) 및 Large (l) 세 가지 변형이 있습니다. 각 변형은 다른 계산 및 성능 요구 사항을 충족시키기 위해 설계되었습니다: + +- **YOLO-NAS-s**: 계산 자원이 제한되고 효율성이 중요한 환경에 최적화되었습니다. +- **YOLO-NAS-m**: 더 높은 정확성을 가지는 일반적인 물체 감지 작업에 적합한 균형잡힌 모델입니다. +- **YOLO-NAS-l**: 계산 자원이 제한되지 않는 환경에서 가장 높은 정확성이 필요한 시나리오에 맞게 설계되었습니다. + +아래는 각 모델에 대한 자세한 개요로, 사전 훈련된 가중치, 지원하는 작업, 다양한 작동 모드와의 호환성에 대한 링크가 제공됩니다. + +| 모델 유형 | 사전 훈련된 가중치 | 지원되는 작업 | 추론 | 유효성 검사 | 훈련 | 내보내기 | +|------------|-----------------------------------------------------------------------------------------------|-----------------------------|----|--------|----|------| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [물체 감지](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [물체 감지](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [물체 감지](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## 인용 및 감사의 말씀 + +YOLO-NAS를 연구 또는 개발 작업에 활용한 경우 SuperGradients를 인용해 주세요. + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +Deci AI의 [SuperGradients](https://github.com/Deci-AI/super-gradients/) 팀에게 컴퓨터 비전 커뮤니티를 위해 이 가치 있는 자료를 만들고 유지 관리한 데 대해 감사의 말씀을 전합니다. 혁신적인 아키텍처와 우수한 물체 감지 능력을 갖춘 YOLO-NAS가 개발자와 연구자에게 중요한 도구가 될 것으로 기대합니다. diff --git a/docs/ko/models/yolov3.md b/docs/ko/models/yolov3.md new file mode 100644 index 0000000..103896f --- /dev/null +++ b/docs/ko/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: YOLOv3, YOLOv3-Ultralytics 및 YOLOv3u에 대한 개요를 얻으세요. 물체 탐지를 위한 주요 기능, 사용법 및 지원 작업에 대해 알아보세요. +keywords: YOLOv3, YOLOv3-Ultralytics, YOLOv3u, 물체 탐지, 추론, 훈련, Ultralytics +--- + +# YOLOv3, YOLOv3-Ultralytics 및 YOLOv3u + +## 개요 + +이 문서는 세 가지 밀접하게 관련된 물체 탐지 모델인 [YOLOv3](https://pjreddie.com/darknet/yolo/), [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3) 및 [YOLOv3u](https://github.com/ultralytics/ultralytics)에 대한 개요를 제공합니다. + +1. **YOLOv3:** 이것은 You Only Look Once (YOLO) 물체 탐지 알고리즘의 세 번째 버전입니다. Joseph Redmon이 처음 개발한 YOLOv3는 다중 스케일 예측 및 세 가지 다른 크기의 탐지 커널과 같은 기능을 도입하여 이전 모델보다 향상됐습니다. + +2. **YOLOv3-Ultralytics:** 이것은 Ultralytics의 YOLOv3 모델 구현입니다. 이 모델은 원본 YOLOv3 아키텍처를 복제하며 더 많은 사전 훈련 모델 및 쉬운 사용자 정의 옵션과 같은 추가 기능을 제공합니다. + +3. **YOLOv3u:** 이것은 YOLOv8 모델에서 사용되는 앵커 없이 물체 없음 분리 헤드를 통합한 YOLOv3-Ultralytics의 업데이트된 버전입니다. YOLOv3u는 YOLOv3와 동일한 백본 및 네크 아키텍처를 유지하지만 YOLOv8에서 업데이트된 탐지 헤드를 사용합니다. + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## 주요 기능 + +- **YOLOv3:** 이 모델은 탐지를 위해 13x13, 26x26 및 52x52의 세 가지 다른 크기의 탐지 커널을 활용하는 세 가지 다른 스케일을 도입했습니다. 이는 다양한 크기의 객체에 대한 탐지 정확도를 크게 향상시켰습니다. 또한 YOLOv3은 각 경계 상자에 대한 다중 레이블 예측과 더 나은 특징 추출기 네트워크와 같은 기능을 추가했습니다. + +- **YOLOv3-Ultralytics:** Ultralytics의 YOLOv3 구현은 원본 모델과 동일한 성능을 제공하지만 더 많은 사전 훈련 모델, 추가적인 훈련 방법 및 쉬운 사용자 정의 옵션을 제공합니다. 이로써 실제 응용 분야에 대해 더 다양하고 사용자 친화적인 모델이 됩니다. + +- **YOLOv3u:** 이 업데이트된 모델은 YOLOv8의 앵커 없음, 물체 없는 분리 헤드를 통합합니다. 미리 정의된 앵커 박스 및 물체 점수가 필요 없어진 이 탐지 헤드 설계는 다양한 크기와 모양의 객체를 탐지하는 능력을 향상시킬 수 있습니다. 이로써 YOLOv3u는 물체 탐지 작업에 대해 더 견고하고 정확한 모델이 됩니다. + +## 지원되는 작업 및 모드 + +YOLOv3, YOLOv3-Ultralytics 및 YOLOv3u 시리즈는 물체 탐지 작업을 위해 특별히 설계되었습니다. 이러한 모델은 정확성과 속도를 균형있게 유지하여 다양한 실제 시나리오에서 효과적으로 사용될 수 있습니다. 각 버전은 독특한 기능과 최적화를 제공하여 다양한 응용 분야에 적합합니다. + +세 가지 모델은 [추론](../modes/predict.md), [유효성 검사](../modes/val.md), [훈련](../modes/train.md) 및 [내보내기](../modes/export.md)와 같은 포괄적인 모드를 지원하여 효과적인 물체 탐지를 위한 완벽한 도구 세트를 제공합니다. + +| 모델 유형 | 지원되는 작업 | 추론 | 유효성 검사 | 훈련 | 내보내기 | +|--------------------|-----------------------------|----|--------|----|------| +| YOLOv3 | [물체 탐지](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [물체 탐지](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [물체 탐지](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +이 표는 각 YOLOv3 버전의 기능을 한 눈에 보여주며, 물체 탐지 워크플로우의 다양한 작업 및 운영 모드에 대해 다양성과 적합성을 강조합니다. + +## 사용 예제 + +다음 예제는 간단한 YOLOv3 훈련 및 추론 예제를 제공합니다. 이와 다른 [모드](../modes/index.md)의 전체 설명은 [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) 및 [Export](../modes/export.md) 문서 페이지를 참조하세요. + +!!! Example "예제" + + === "Python" + + Python에서 PyTorch 사전 훈련된 `*.pt` 모델 및 설정 `*.yaml` 파일을 YOLO() 클래스에 전달하여 모델 인스턴스를 만들 수 있습니다. + + ```python + from ultralytics import YOLO + + # COCO 사전 훈련된 YOLOv3n 모델 로드 + model = YOLO('yolov3n.pt') + + # 모델 정보 표시 (선택 사항) + model.info() + + # COCO8 예제 데이터셋에서 100 epoch 동안 모델 훈련 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # YOLOv3n 모델로 'bus.jpg' 이미지에 추론 실행 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI 명령어를 사용하여 모델을 직접 실행할 수 있습니다. + + ```bash + # COCO 사전 훈련된 YOLOv3n 모델 로드하고 COCO8 예제 데이터셋에서 100 epoch 동안 훈련 + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCO 사전 훈련된 YOLOv3n 모델 로드하고 'bus.jpg' 이미지에 추론 실행 + yolo predict model=yolov3n.pt source=path/to/bus.jpg + ``` + +## 인용 및 감사의 글 + +본인의 연구에서 YOLOv3를 사용한다면, 원본 YOLO 논문과 Ultralytics YOLOv3 저장소를 인용해 주십시오. + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +Joseph Redmon과 Ali Farhadi에게 원본 YOLOv3 개발에 대한 감사의 글을 전합니다. diff --git a/docs/ko/models/yolov4.md b/docs/ko/models/yolov4.md new file mode 100644 index 0000000..d8e1c88 --- /dev/null +++ b/docs/ko/models/yolov4.md @@ -0,0 +1,71 @@ +--- +comments: true +description: YOLOv4에 대한 상세 가이드를 살펴보세요. 최신 실시간 객체 감지기의 아키텍처 하이라이트, 혁신적인 기능 및 응용 예제를 이해하세요. +keywords: ultralytics, YOLOv4, 객체 감지, 신경망, 실시간 감지, 객체 감지기, 기계 학습 +--- + +# YOLOv4: 높은 속도와 정밀도를 갖는 객체 감지 + +Ultralytics YOLOv4 문서 페이지에 오신 것을 환영합니다. YOLOv4는 아키텍처 및 알고리즘 개선으로 실시간 객체 감지의 최적 속도와 정확도를 제공하는 최신 객체 감지기입니다. 2020년에 Alexey Bochkovskiy가 [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet)에서 출시되었습니다. YOLOv4는 많은 응용 분야에서 우수한 선택입니다. + +![YOLOv4 아키텍처 다이어그램](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**YOLOv4 아키텍처 다이어그램**. YOLOv4의 복잡한 네트워크 설계를 보여줍니다. 최적의 실시간 객체 감지를 위해 백본, 넥 및 헤드 구성 요소와 이들의 상호 연결된 레이어가 포함되어 있습니다. + +## 소개 + +YOLOv4는 You Only Look Once의 4번째 버전을 의미합니다. 이전 YOLO 버전인 [YOLOv3](yolov3.md) 및 기타 객체 감지 모델의 한계를 극복하기 위해 개발된 실시간 객체 감지 모델입니다. 다른 합성곱 신경망(Convolutional Neural Network, CNN) 기반 객체 감지기와는 달리 YOLOv4는 추천 시스템뿐만 아니라 독립적인 프로세스 관리 및 인적 감소에도 적용할 수 있습니다. 이는 일반적인 그래픽 처리 장치(Graphics Processing Unit, GPU)에서 작동함으로써 저렴한 가격에 대량 사용을 가능하게 합니다. 또한, 훈련을 위해 하나의 GPU만 필요합니다. + +## 아키텍처 + +YOLOv4는 성능을 최적화하기 위해 여러 혁신적인 기능을 사용합니다. 이에는 Weighted-Residual-Connections (WRC), Cross-Stage-Partial-connections (CSP), Cross mini-Batch Normalization (CmBN), Self-adversarial-training (SAT), Mish-activation, Mosaic data augmentation, DropBlock regularization 및 CIoU loss가 포함됩니다. 이러한 기능들은 최첨단 결과를 달성하기 위해 결합되었습니다. + +일반적인 객체 감지기는 입력, 백본, 넥 및 헤드와 같은 여러 부분으로 구성됩니다. YOLOv4의 백본은 ImageNet에서 사전 훈련되며, 객체의 클래스 및 경계 상자를 예측하는 데 사용됩니다. 백본은 VGG, ResNet, ResNeXt 또는 DenseNet과 같은 여러 모델에서 가져올 수 있습니다. 객체 감지기의 넥 부분은 다양한 단계에서 피처 맵을 수집하는 데 사용되며, 일반적으로 여러 하향 경로 및 여러 상향 경로를 포함합니다. 헤드 부분은 최종 객체 감지 및 분류에 사용됩니다. + +## 베고 오브 프리비스 + +YOLOv4는 학습 중 모델의 정확성을 향상시키는 기법인 "베고 오브 프리비스"를 사용하기도 합니다. 데이터 증강은 객체 감지에서 주로 사용되는 베고 오브 프리비스 기법으로, 입력 이미지의 다양성을 높여 모델의 견고성을 향상시킵니다. 데이터 증강의 몇 가지 예는 화질 왜곡(이미지의 밝기, 대조도, 색상, 채도 및 노이즈 조정) 및 기하학적 왜곡(임의의 스케일링, 크롭, 뒤집기, 회전 추가)입니다. 이러한 기술은 모델이 다양한 유형의 이미지에 대해 더 잘 일반화되도록 돕습니다. + +## 기능 및 성능 + +YOLOv4는 객체 감지의 최적 속도와 정확도를 위해 설계되었습니다. YOLOv4의 아키텍처에는 백본으로 CSPDarknet53, 넥으로 PANet, 감지 헤드로 YOLOv3가 포함되어 있습니다. 이 설계를 통해 YOLOv4는 뛰어난 속도로 객체 감지를 수행하며, 실시간 응용 프로그램에 적합합니다. YOLOv4는 객체 감지 벤치마크에서 최첨단 결과를 달성하고 정확도 면에서도 뛰어난 성능을 보입니다. + +## 사용 예제 + +작성 시점 기준으로 Ultralytics는 현재 YOLOv4 모델을 지원하지 않습니다. 따라서 YOLOv4를 사용하려는 사용자는 YOLOv4 GitHub 저장소의 설치 및 사용 지침을 직접 참조해야 합니다. + +다음은 YOLOv4를 사용하는 일반적인 단계에 대한 간략한 개요입니다: + +1. YOLOv4 GitHub 저장소를 방문하세요: [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). + +2. 설치에 대한 README 파일에 제공된 지침을 따르세요. 일반적으로 저장소를 클론하고 필요한 종속성을 설치하고 필요한 환경 변수를 설정하는 과정을 포함합니다. + +3. 설치가 완료되면, 저장소에서 제공하는 사용 지침에 따라 모델을 훈련하고 사용할 수 있습니다. 이는 일반적으로 데이터셋을 준비하고 모델 매개변수를 설정하고 모델을 훈련한 다음 훈련된 모델을 사용하여 객체 감지를 수행하는 것을 포함합니다. + +특정 단계는 사용 사례와 YOLOv4 저장소의 현재 상태에 따라 다를 수 있습니다. 따라서 YOLOv4 GitHub 저장소에서 제공되는 지침을 직접 참조하는 것이 강력히 권장됩니다. + +YOLOv4의 지원이 구현되면 Ultralytics를 위한 사용 예제로 이 문서를 업데이트하기 위해 노력하겠습니다. + +## 결론 + +YOLOv4는 속도와 정확도의 균형을 이루는 강력하고 효율적인 객체 감지 모델입니다. 학습 중 특정 기법 및 베고 오브 프리비스 기법의 사용으로 실시간 객체 감지 작업에서 탁월한 성능을 발휘합니다. 일반적인 GPU를 가진 사용자 누구나 사용하고 훈련할 수 있어 다양한 응용 분야에 접근 가능하고 실용적입니다. + +## 인용 및 감사의 글 + +실시간 객체 감지 분야에서 중요한 기여를 한 YOLOv4 저자들에게 감사드립니다: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +원본 YOLOv4 논문은 [arXiv](https://arxiv.org/pdf/2004.10934.pdf)에서 확인할 수 있습니다. 저자들은 자신들의 작업을 일반에 공개하고 코드베이스는 [GitHub](https://github.com/AlexeyAB/darknet)에서 액세스할 수 있도록 했습니다. 저자들의 노력과 널리 알려진 커뮤니티에 작업을 제공해 준 사항을 감사히 여깁니다. diff --git a/docs/ko/models/yolov5.md b/docs/ko/models/yolov5.md new file mode 100644 index 0000000..451c4ed --- /dev/null +++ b/docs/ko/models/yolov5.md @@ -0,0 +1,113 @@ +--- +comments: true +description: YOLOv5u는 YOLOv5 모델의 개선된 정확도-속도 절충 모델로, 다양한 객체 감지 작업에 대한 사전 훈련된 모델을 제공합니다. +keywords: YOLOv5u, 객체 감지, 사전 훈련된 모델, Ultralytics, 추론, 검증, YOLOv5, YOLOv8, 앵커 없음, 객체 여부 없음, 실시간 응용, 머신 러닝 +--- + +# YOLOv5 + +## 개요 + +YOLOv5u는 객체 감지 기법에서의 진보를 나타냅니다. Ultralytics에서 개발한 [YOLOv5](https://github.com/ultralytics/yolov5) 모델의 기본 아키텍처를 기반으로 한 YOLOv5u는 [YOLOv8](yolov8.md) 모델에서 도입된 앵커 없음, 객체 여부 없음 분리 헤드(head) 기능을 통합합니다. 이러한 적응으로 인해 모델의 아키텍처가 개선되어, 객체 감지 작업의 정확도와 속도 절충을 더욱 향상시킵니다. 경험적 결과와 해당 기능을 고려할 때, YOLOv5u는 연구 및 실제 응용 모두에서 견고한 솔루션을 찾고 있는 사용자들에게 효율적인 대안을 제공합니다. + +![Ultralytics YOLOv5](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## 주요 기능 + +- **앵커 없는 분리 Ultralytics 헤드:** 기존의 객체 감지 모델은 사전 정의된 앵커 박스를 사용하여 객체의 위치를 예측합니다. 그러나 YOLOv5u는 이 방식을 현대화합니다. 앵커 없는 분리 Ultralytics 헤드를 도입함으로써 더욱 유연하고 적응적인 감지 메커니즘을 보장하여 다양한 시나리오에서 성능을 향상시킵니다. + +- **정확도-속도 절충의 최적화:** 속도와 정확도는 종종 상충하는 관계에 있습니다. 그러나 YOLOv5u는 이러한 절충을 도전합니다. 실시간 탐지를 보장하면서도 정확도를 희생하지 않는 균형을 제시합니다. 이 기능은 자율주행 차량, 로봇 공학, 실시간 비디오 분석 등 신속한 응답을 요구하는 응용 프로그램에서 특히 중요합니다. + +- **다양한 사전 훈련된 모델:** 다른 작업에는 다른 도구 세트가 필요하다는 것을 이해하는 YOLOv5u는 다양한 사전 훈련된 모델을 제공합니다. 추론, 검증 또는 훈련에 집중하고 있는지 여부에 관계없이 맞춤형 모델이 기다리고 있습니다. 이 다양성은 일반적인 솔루션이 아닌 독특한 도전 과제에 대해 특별히 세밀하게 조정된 모델을 사용하고 있다는 것을 보장합니다. + +## 지원되는 작업 및 모드 + +разнобойacionales of YOLOv5u 모델은 다양한 사전 훈련된 가중치로 [객체 감지](../tasks/detect.md) 작업에서 뛰어난 성능을 발휘합니다. 이들은 개발부터 배포까지 다양한 응용 프로그램에 적합한 다양한 모드를 지원합니다. + +| 모델 유형 | 사전 훈련된 가중치 | 작업 | 추론 | 검증 | 훈련 | 내보내기 | +|---------|-----------------------------------------------------------------------------------------------------------------------------|-----------------------------|----|----|----|------| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [객체 감지](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +이 표는 YOLOv5u 모델의 다양한 변형을 상세히 보여주며, 객체 감지 작업에서의 적용 가능성과 [추론](../modes/predict.md), [검증](../modes/val.md), [훈련](../modes/train.md), [내보내기](../modes/export.md)와 같은 다양한 작업 모드의 지원을 강조합니다. 이러한 포괄적인 지원을 통해 사용자는 다양한 객체 감지 시나리오에서 YOLOv5u 모델의 기능을 완전히 활용할 수 있습니다. + +## 성능 지표 + +!!! 성능 + + === "감지" + + [COCO](https://docs.ultralytics.com/datasets/detect/coco/)에서 학습된 이러한 모델을 사용한 사용 예제는 [감지 문서](https://docs.ultralytics.com/tasks/detect/)를 참조하세요. 이 문서에는 80개의 사전 훈련된 클래스를 포함합니다. + + | 모델 | YAML | 크기
(픽셀) | mAPval
50-95 | 속도
CPU ONNX
(ms) | 속도
A100 TensorRT
(ms) | 매개변수
(M) | FLOPs
(B) | + |---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 42.1 | 211.0 | 1.83 | 4.3 | 7.8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 48.6 | 422.6 | 2.34 | 15.3 | 24.6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 53.6 | 810.9 | 4.36 | 41.2 | 65.7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 55.7 | 1470.9 | 5.47 | 86.1 | 137.4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 56.8 | 2436.5 | 8.98 | 155.4 | 250.7 | + +## 사용 예제 + +이 예제는 간단한 YOLOv5 훈련 및 추론 예제를 제공합니다. 이와 기타 [모드](../modes/index.md)의 자세한 설명은 [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) 및 [Export](../modes/export.md) 문서 페이지를 참조하세요. + +!!! Example "예제" + + === "Python" + + Python에서 `YOLO()` 클래스로 `*.pt` 사전 훈련된 모델과 구성 `*.yaml` 파일을 전달하여 모델 인스턴스를 만들 수 있습니다. + + ```python + from ultralytics import YOLO + + # COCO 사전 훈련된 YOLOv5n 모델 로드 + model = YOLO('yolov5n.pt') + + # 모델 정보 표시 (선택 사항) + model.info() + + # COCO8 예제 데이터셋을 사용하여 모델을 100번 에포크로 훈련 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 'bus.jpg' 이미지에 대해 YOLOv5n 모델로 추론 실행 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI 명령을 사용하여 모델을 직접 실행할 수 있습니다. + + ```bash + # COCO 사전 훈련된 YOLOv5n 모델 로드 및 COCO8 예제 데이터셋을 사용하여 모델을 100번 에포크로 훈련 + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCO 사전 훈련된 YOLOv5n 모델 로드 및 'bus.jpg' 이미지에서 추론 실행 + yolo predict model=yolov5n.pt source=path/to/bus.jpg + ``` + +## 인용 및 감사의 글 + +연구에서 YOLOv5 또는 YOLOv5u를 사용하는 경우 Ultralytics YOLOv5 리포지토리를 다음과 같이 인용하세요. + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +YOLOv5 모델은 [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) 및 [Enterprise](https://ultralytics.com/license) 라이선스로 제공됩니다. diff --git a/docs/ko/models/yolov6.md b/docs/ko/models/yolov6.md new file mode 100644 index 0000000..a28ee86 --- /dev/null +++ b/docs/ko/models/yolov6.md @@ -0,0 +1,107 @@ +--- +comments: true +description: 최첨단 물체 감지(오브젝트 디텍션) 모델인 'Meituan YOLOv6'을 알아보세요. 속도와 정확도 사이의 균형을 유지하는 이 모델은 실시간 애플리케이션에 인기 있는 선택입니다. 이 모델은 BiC(Bi-directional Concatenation) 모듈, AAT(Anchor-Aided Training) 전략, COCO 데이터셋에서 최첨단 정확도를 실현하기 위한 개선된 백본(backbone) 및 네크(neck) 설계 등에 대한 여러 주목할만한 향상 사항을 도입하고 있습니다. +keywords: Meituan YOLOv6, 오브젝트 디텍션, Ultralytics, YOLOv6 문서, Bi-directional Concatenation, Anchor-Aided Training, 사전 훈련 모델, 실시간 애플리케이션 +--- + +# Meituan YOLOv6 + +## 개요 + +[Meituan](https://about.meituan.com/) YOLOv6은 속도와 정확도 사이에서 현저한 균형을 제공하는 최첨단 물체 감지기입니다. 이 모델은 Bi-directional Concatenation(BiC) 모듈, Anchor-Aided Training(AAT) 전략, 그리고 COCO 데이터셋에서 최첨단 정확도를 실현하기 위한 개선된 백본(backbone) 및 네크(neck) 디자인 등, 아키텍처와 훈련 방식에 대한 여러 주목할만한 향상 사항을 제공합니다. + +![Meituan YOLOv6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![모델 예시 이미지](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**YOLOv6 개요**. 아키텍처 다이어그램으로, 다시 설계된 네트워크 구성 요소와 훈련 전략이 중요한 성능 개선을 이끈 모습을 보여줍니다. (a) YOLOv6의 네크(neck) (N과 S 표시)입니다. M/L의 경우, RepBlocks은 CSPStackRep으로 대체됩니다. (b) BiC 모듈의 구조입니다. (c) SimCSPSPPF 블록입니다. ([출처](https://arxiv.org/pdf/2301.05586.pdf)). + +### 주요 특징 + +- **Bi-directional Concatenation (BiC) 모듈**: YOLOv6은 감지기(neck)에 BiC 모듈을 도입하여 위치 신호(localization signals)를 강화하고 성능을 향상시키는데, 속도 저하가 거의 없습니다. +- **Anchor-Aided Training (AAT) 전략**: 이 모델은 추론 효율을 저하시키지 않고 앵커 기반(anchor-based)과 앵커 없음(anchor-free) 패러다임의 이점을 모두 누릴 수 있도록 AAT를 제안합니다. +- **개선된 백본 및 네크 디자인**: YOLOv6을 백본과 네크에 추가적인 단계를 포함하여 깊게 만들어 COCO 데이터셋에서 최첨단 성능을 달성합니다. +- **셀프 디스틸레이션 전략**: YOLOv6의 작은 모델 성능을 강화하기 위해 새로운 셀프 디스틸레이션 전략이 도입되었습니다. 이는 훈련 중 보조 회귀 브랜치를 강화하고 추론 중에는 이를 제거하여 성능 저하를 방지합니다. + +## 성능 메트릭 + +YOLOv6은 다양한 스케일의 사전 훈련 모델을 제공합니다: + +- YOLOv6-N: NVIDIA Tesla T4 GPU에서 1187 FPS로 COCO val2017에서 37.5% AP. +- YOLOv6-S: 484 FPS로 45.0% AP. +- YOLOv6-M: 226 FPS로 50.0% AP. +- YOLOv6-L: 116 FPS로 52.8% AP. +- YOLOv6-L6: 실시간에서 최첨단 정확성. + +또한, YOLOv6은 다양한 정밀도에 대한 양자화 모델과 모바일 플랫폼에 최적화된 모델도 제공합니다. + +## 사용 예시 + +다음은 간단한 YOLOv6 훈련 및 추론 예시입니다. 이 외에도 [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md), [Export](../modes/export.md) 문서 페이지에서 자세한 내용을 확인할 수 있습니다. + +!!! Example "예제" + + === "Python" + + `*.pt` 사전 훈련된 PyTorch 모델과 구성 `*.yaml` 파일을 `YOLO()` 클래스에 전달하여 파이썬에서 모델 인스턴스를 만들 수 있습니다: + + ```python + from ultralytics import YOLO + + # YOLOv6n 모델을 처음부터 만듭니다 + model = YOLO('yolov6n.yaml') + + # 모델 정보를 표시합니다 (선택 사항) + model.info() + + # COCO8 예시 데이터셋으로 모델을 100 에폭 동안 훈련합니다 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # YOLOv6n 모델로 'bus.jpg' 이미지에서 추론을 실행합니다 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI 명령을 사용하여 모델을 직접 실행할 수 있습니다: + + ```bash + # 처음부터 YOLOv6n 모델을 만들고 COCO8 예시 데이터셋으로 100 에폭 동안 훈련합니다 + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # 처음부터 YOLOv6n 모델을 만들고 'bus.jpg' 이미지에서 추론을 실행합니다 + yolo predict model=yolov6n.yaml source=path/to/bus.jpg + ``` + +## 지원되는 작업 및 모드 + +YOLOv6 시리즈는 높은 성능의 [오브젝트 디텍션](../tasks/detect.md)을 위해 최적화된 다양한 모델을 제공합니다. 이 모델들은 다양한 계산 요구 사항과 정확도 요구 사항에 맞추어 다용도로 사용할 수 있습니다. + +| 모델 유형 | 사전 훈련 가중치 | 지원되는 작업 | 추론 | 검증 | 훈련 | 익스포트 | +|-----------|----------------|--------------------------------|----|----|----|------| +| YOLOv6-N | `yolov6-n.pt` | [오브젝트 디텍션](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [오브젝트 디텍션](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [오브젝트 디텍션](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [오브젝트 디텍션](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [오브젝트 디텍션](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +이 표는 YOLOv6 모델의 다양한 변형에 대한 자세한 개요를 제공하며, 오브젝트 디텍션 작업과 [추론](../modes/predict.md), [검증](../modes/val.md), [훈련](../modes/train.md), [익스포트](../modes/export.md)와 같은 다양한 운영 모드와의 호환성을 강조합니다. 이러한 포괄적인 지원을 통해 사용자들은 다양한 오브젝트 디텍션 시나리오에서 YOLOv6 모델의 기능을 최대한 활용할 수 있습니다. + +## 인용 및 감사의 글 + +실시간 물체 감지 분야에서의 중요한 기여에 대해 작성자들에게 감사의 말씀을 전합니다: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{li2023yolov6, + title={YOLOv6 v3.0: A Full-Scale Reloading}, + author={Chuyi Li and Lulu Li and Yifei Geng and Hongliang Jiang and Meng Cheng and Bo Zhang and Zaidan Ke and Xiaoming Xu and Xiangxiang Chu}, + year={2023}, + eprint={2301.05586}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + + YOLOv6 원본 논문은 [arXiv](https://arxiv.org/abs/2301.05586)에서 찾을 수 있습니다. 작성자들이 자신의 작업을 공개하지 않았으며, 코드는 [GitHub](https://github.com/meituan/YOLOv6)에서 액세스할 수 있습니다. 우리는 그들의 노력과 업계 발전을 위해 노력해 널리 알려져 있게 한 저자들에게 감사의 말씀을 전합니다. diff --git a/docs/ko/models/yolov7.md b/docs/ko/models/yolov7.md new file mode 100644 index 0000000..2071682 --- /dev/null +++ b/docs/ko/models/yolov7.md @@ -0,0 +1,65 @@ +--- +comments: true +description: YOLOv7은 실시간 객체 검출기로, 뛰어난 속도, 강력한 정확성, 독특한 trainable bag-of-freebies 최적화에 대해 알아봅니다. +keywords: YOLOv7, 실시간 객체 검출기, 최첨단, Ultralytics, MS COCO 데이터셋, 모델 재파라미터화, 동적 라벨 할당, 확장 스케일, 복합 스케일 +--- + +# YOLOv7: Trainable Bag-of-Freebies + +YOLOv7은 5 FPS에서 160 FPS까지의 범위에서 알려진 모든 객체 검출기를 속도와 정확성에서 능가하는 최첨단 실시간 객체 검출기입니다. 이 모델은 GPU V100에서 30 FPS 이상을 달성하여, 알려진 실시간 객체 검출기 중 가장 높은 정확도(56.8% AP)를 보여줍니다. 게다가, YOLOv7은 다른 객체 검출기인 YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5 등에 비해 속도와 정확성 면에서 더 뛰어납니다. 이 모델은 다른 데이터셋이나 사전 학습된 가중치를 사용하지 않고 MS COCO 데이터셋에서 처음부터 훈련되었습니다. YOLOv7의 소스 코드는 GitHub에서 확인할 수 있습니다. + +![YOLOv7와 최첨단 객체 검출기 비교](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) +**최첨단 객체 검출기 비교**. 표 2의 결과에서 볼 수 있듯이, 제안된 방법은 최상의 속도-정확성 균형을 가지고 있습니다. YOLOv7-tiny-SiLU를 YOLOv5-N(r6.1)과 비교해보면, 저희 방법은 약 127 fps가 더 빠르고 AP에서 10.7% 정확도가 향상됩니다. 게다가, YOLOv7은 161 fps 프레임 속도에서 51.4% AP를 달성하는 반면, PPYOLOE-L은 동일한 AP에서 78 fps 프레임 속도만 갖습니다. 매개 변수 사용 측면에서 YOLOv7는 PPYOLOE-L의 41%를 줄입니다. YOLOv7-X를 114 fps의 추론 속도로 YOLOv5-L(r6.1)의 99 fps 추론 속도와 비교하면, YOLOv7-X는 AP를 3.9% 향상시킵니다. YOLOv7-X를 유사한 스케일의 YOLOv5-X(r6.1)와 비교하면, YOLOv7-X의 추론 속도가 31 fps 더 빨라집니다. 또한, 매개 변수 및 계산의 양 측면에서 YOLOv7-X는 YOLOv5-X(r6.1)과 비교하여 매개 변수 22%와 계산 8%를 줄이고 AP를 2.2% 향상시킵니다 ([출처](https://arxiv.org/pdf/2207.02696.pdf)). + +## 개요 + +실시간 객체 검출은 다중 객체 추적, 자율 주행, 로봇 공학 및 의료 이미지 분석을 비롯한 많은 컴퓨터 비전 시스템의 중요한 구성 요소입니다. 최근 몇 년간 실시간 객체 검출 개발은 효율적인 구조 설계와 다양한 CPU, GPU 및 신경 처리 장치(NPU)의 추론 속도 향상에 초점을 맞추고 있습니다. YOLOv7은 모바일 GPU와 GPU 장치를 모두 지원하여 엣지부터 클라우드까지 다양한 환경에서 사용할 수 있습니다. + +기존의 실시간 객체 검출기가 아키텍처 최적화에 중점을 둔 것과는 달리, YOLOv7은 훈련 과정 최적화에 초점을 두고 있습니다. 이는 추론 비용을 증가시키지 않고 객체 검출의 정확도를 향상시키는 모듈과 최적화 방법을 포함한 "trainable bag-of-freebies" 개념을 도입합니다. + +## 주요 기능 + +YOLOv7은 다음과 같은 주요 기능을 도입합니다: + +1. **모델 재파라미터화**: YOLOv7은 그래디언트 전파 경로 개념을 이용한 다른 네트워크의 레이어에 적용 가능한 전략인 계획된 재파라미터화 모델을 제안합니다. + +2. **동적 라벨 할당**: 다중 출력 레이어 모델의 훈련에서는 "다른 브랜치의 출력에 대해 동적 타깃을 어떻게 할당할 것인가?"라는 새로운 문제가 발생합니다. 이를 해결하기 위해 YOLOv7은 coarse-to-fine 리드 가이드 라벨 할당이라는 새로운 라벨 할당 방법을 도입합니다. + +3. **확장 및 복합 스케일링**: YOLOv7은 매개 변수와 계산을 효과적으로 활용할 수 있는 실시간 객체 검출기를 위한 "확장" 및 "복합 스케일링" 방법을 제안합니다. + +4. **효율성**: YOLOv7이 제안한 방법은 최첨단 실시간 객체 검출기의 매개 변수 약 40%, 계산 약 50%를 효과적으로 줄일 수 있으며, 더 빠른 추론 속도와 더 높은 검출 정확도를 달성할 수 있습니다. + +## 사용 예시 + +기술 시점에서 Ultralytics은 현재 YOLOv7 모델을 지원하지 않습니다. 따라서 YOLOv7을 사용하려는 사용자는 YOLOv7 GitHub 저장소의 설치 및 사용 지침을 직접 참조해야 합니다. + +YOLOv7을 사용하는 일반적인 단계에 대해 간략히 설명해 드리겠습니다: + +1. YOLOv7 GitHub 저장소를 방문합니다: [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7). + +2. 설치에 대한 README 파일에서 제공하는 지침을 따릅니다. 일반적으로 저장소를 복제하고 필요한 종속성을 설치하고 필요한 환경 변수를 설정하는 것이 포함됩니다. + +3. 설치가 완료되면 저장소에서 제공하는 사용 지침에 따라 모델을 훈련하고 사용할 수 있습니다. 이는 데이터셋을 준비하고 모델 매개 변수를 구성하고 모델을 훈련한 다음 훈련된 모델을 사용하여 객체 검출을 수행하는 것을 일반적으로 포함합니다. + +특정 단계는 사용 사례와 YOLOv7 저장소의 현재 상태에 따라 달라질 수 있습니다. 따라서 YOLOv7 GitHub 저장소에서 제공하는 지침을 직접 참조하는 것이 권장됩니다. + +YOLOv7을 지원하게 되면, Ultralytics의 사용 예시를 포함하여 이 문서를 업데이트하기 위해 최선을 다하겠습니다. + +## 인용 및 감사의 글 + +실시간 객체 검출 분야에서의 중요한 기여로 인해 YOLOv7의 저자들에게 감사의 말씀을 전하고자 합니다: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +원본 YOLOv7 논문은 [arXiv](https://arxiv.org/pdf/2207.02696.pdf)에서 찾을 수 있습니다. 저자들은 작업을 공개적으로 사용 가능하게 하였고, 코드베이스는 [GitHub](https://github.com/WongKinYiu/yolov7)에서 확인할 수 있습니다. 저희는 이들이 해당 분야의 발전에 기여하고 작업을 폭넓은 커뮤니티에게 공개 가능하게 한 노력에 감사드립니다. diff --git a/docs/ko/models/yolov8.md b/docs/ko/models/yolov8.md new file mode 100644 index 0000000..15084fa --- /dev/null +++ b/docs/ko/models/yolov8.md @@ -0,0 +1,162 @@ +--- +comments: true +description: YOLOv8는 실시간 객체 탐지기인 YOLO 시리즈의 최신 버전으로, 최신 아키텍처, 사전 훈련된 모델 및 정확도와 속도의 최적 균형을 제공함으로써 다양한 객체 탐지 작업에 적합한 선택지입니다. +keywords: YOLOv8, Ultralytics, 실시간 객체 탐지기, 사전 훈련된 모델, 문서, 객체 탐지, YOLO 시리즈, 고급 아키텍처, 정확도, 속도 +--- + +# YOLOv8 + +## 개요 + +YOLOv8는 실시간 객체 탐지기인 YOLO 시리즈의 최신 버전으로, 정확도와 속도면에서 최신 기술을 제공합니다. 이전 YOLO 버전의 기술적 발전을 바탕으로, YOLOv8은 새로운 기능과 최적화를 도입하여 다양한 응용 분야에서 다양한 객체 탐지 작업에 이상적인 선택지가 됩니다. + +![Ultralytics YOLOv8](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## 주요 기능 + +- **고급 백본 및 넥 아키텍처:** YOLOv8은 최첨단 백본 및 넥 아키텍처를 사용하여 향상된 특징 추출과 객체 탐지 성능을 제공합니다. +- **앵커 없는 분리된 Ultralytics 헤드:** YOLOv8은 앵커 기반 접근법에 비해 더 나은 정확도와 효율적인 탐지 과정을 위한 앵커 없는 분리된 Ultralytics 헤드를 채택합니다. +- **정확도와 속도의 최적 균형화:** 정확도와 속도의 최적 균형을 유지하는 것에 초점을 맞춘 YOLOv8은 다양한 응용 분야에서 실시간 객체 탐지 작업에 적합합니다. +- **다양한 사전 훈련된 모델:** YOLOv8은 다양한 작업과 성능 요건에 맞는 사전 훈련된 모델을 제공하여 특정 사용 사례에 적합한 모델을 쉽게 찾을 수 있도록 합니다. + +## 지원하는 작업 및 모드 + +YOLOv8 시리즈는 컴퓨터 비전의 특정 작업을 위해 특화된 다양한 모델을 제공합니다. 이러한 모델은 객체 탐지부터 인스턴스 세그멘테이션, 포즈/키포인트 탐지, 분류와 같은 복잡한 작업까지 다양한 요구 사항을 충족시킬 수 있도록 설계되었습니다. + +각 YOLOv8 시리즈의 변형은 해당 작업에 최적화되어 고성능과 정확도를 제공합니다. 또한 이러한 모델은 [추론](../modes/predict.md), [검증](../modes/val.md), [훈련](../modes/train.md) 및 [내보내기](../modes/export.md)와 같은 다양한 운영 모드와 호환되어 배포 및 개발의 다양한 단계에서 사용할 수 있도록 지원합니다. + +| 모델 | 파일 이름 | 작업 | 추론 | 검증 | 훈련 | 내보내기 | +|-------------|----------------------------------------------------------------------------------------------------------------|------------------------------------|----|----|----|------| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [객체 탐지](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [인스턴스 세그멘테이션](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [포즈/키포인트](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [분류](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +이 표는 YOLOv8 모델의 변형을 개요로 제공하며, 특정 작업에서의 적용 가능성 및 추론, 검증, 훈련 및 내보내기와 같은 다양한 운영 모드와의 호환성을 강조합니다. 이는 컴퓨터 비전의 다양한 응용 분야에 적합한 YOLOv8 시리즈의 다재다능함과 견고성을 보여줍니다. + +## 성능 지표 + +!!! 성능 + + === "객체 탐지 (COCO)" + + [COCO](https://docs.ultralytics.com/datasets/detect/coco/)에서 사전 훈련된 이러한 모델을 사용한 사용 예제에 대한 정보는 [객체 탐지 문서](https://docs.ultralytics.com/tasks/detect/)를 참조하십시오. 이는 80개의 사전 훈련된 클래스를 포함합니다. + + | 모델 | 크기
(픽셀) | mAP
val 50-95 | 속도
CPU ONNX
(밀리초) | 속도
A100 TensorRT
(밀리초) | 매개변수
(백만) | FLOPs
(십억) | + | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------- | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + + === "객체 탐지 (Open Images V7)" + + [Open Images V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/)에서 사전 훈련된 이러한 모델을 사용한 사용 예제에 대한 정보는 [객체 탐지 문서](https://docs.ultralytics.com/tasks/detect/)를 참조하십시오. 이는 600개의 사전 훈련된 클래스를 포함합니다. + + | 모델 | 크기
(픽셀) | mAP
val 50-95 | 속도
CPU ONNX
(밀리초) | 속도
A100 TensorRT
(밀리초) | 매개변수
(백만) | FLOPs
(십억) | + | ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------- | ----------------- | + | [YOLOv8n-oiv7](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | + | [YOLOv8s-oiv7](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | + | [YOLOv8m-oiv7](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | + | [YOLOv8l-oiv7](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | + | [YOLOv8x-oiv7](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | + + === "세그멘테이션 (COCO)" + + [COCO](https://docs.ultralytics.com/datasets/segment/coco/)에서 사전 훈련된 이러한 모델을 사용한 사용 예제에 대한 정보는 [세그멘테이션 문서](https://docs.ultralytics.com/tasks/segment/)를 참조하십시오. 이는 80개의 사전 훈련된 클래스를 포함합니다. + + | 모델 | 크기
(픽셀) | mAP
box 50-95 | mAP
mask 50-95 | 속도
CPU ONNX
(밀리초) | 속도
A100 TensorRT
(밀리초) | 매개변수
(백만) | FLOPs
(십억) | + | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------- | ----------------- | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + + === "분류 (ImageNet)" + + [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/)에서 사전 훈련된 이러한 모델을 사용한 사용 예제에 대한 정보는 [분류 문서](https://docs.ultralytics.com/tasks/classify/)를 참조하십시오. 이는 1000개의 사전 훈련된 클래스를 포함합니다. + + | 모델 | 크기
(픽셀) | top1 정확도 | top5 정확도 | 속도
CPU ONNX
(밀리초) | 속도
A100 TensorRT
(밀리초) | 매개변수
(백만) | FLOPs
(백만) at 640 | + | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | ----------------- | ------------------------------ | ----------------------------------- | ------------------- | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + + === "포즈 (COCO)" + + [COCO](https://docs.ultralytics.com/datasets/pose/coco/)에서 사전 훈련된 이러한 모델을 사용한 사용 예제에 대한 정보는 [포즈 문서](https://docs.ultralytics.com/tasks/pose/)를 참조하십시오. 이는 'person' 하나의 사전 훈련된 클래스를 포함합니다. + + | 모델 | 크기
(픽셀) | mAP
pose 50-95 | mAP
pose 50 | 속도
CPU ONNX
(밀리초) | 속도
A100 TensorRT
(밀리초) | 매개변수
(백만) | FLOPs
(십억) | + | ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | ----------------- | ------------------------------ | ----------------------------------- | ------------------- | ----------------- | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +## 사용 예제 + +이 예제는 간단한 YOLOv8 훈련 및 추론 예제를 제공합니다. 이와 다른 [모드](../modes/index.md)의 전체 문서는 [예측](../modes/predict.md), [훈련](../modes/train.md), [검증](../modes/val.md) 및 [내보내기](../modes/export.md) 문서를 참조하십시오. + +아래 예제는 객체 탐지를 위한 YOLOv8 [Detect](../tasks/detect.md) 모델을 대상으로 합니다. 추가 지원되는 작업에 대해서는 [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) 및 [Pose](../tasks/pose.md) 문서를 참조하십시오. + +!!! 예시 + + === "Python" + + python에서 PyTorch 사전 훈련된 `*.pt` 모델 및 구성 `*.yaml` 파일을 `YOLO()` 클래스에 전달하여 모델 인스턴스를 만들 수 있습니다. + + ```python + from ultralytics import YOLO + + # COCO 사전 훈련된 YOLOv8n 모델 로드 + model = YOLO('yolov8n.pt') + + # 모델 정보 표시 (옵션) + model.info() + + # COCO8 예제 데이터셋에서 모델을 100회 에포크로 훈련 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 'bus.jpg' 이미지에서 YOLOv8n 모델로 추론 실행 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI 명령을 사용하면 직접 모델을 실행할 수 있습니다. + + ```bash + # COCO 사전 훈련된 YOLOv8n 모델을 로드하고 COCO8 예제 데이터셋에서 100회 에포크로 훈련 + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # COCO 사전 훈련된 YOLOv8n 모델을 로드하고 'bus.jpg' 이미지에서 추론 실행 + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## 인용 및 감사의 글 + +이 저장소에서 YOLOv8 모델이나 다른 소프트웨어를 사용한 작업에 대해서는 다음 형식으로 인용해주시기 바랍니다. + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + +DOI는 준비 중이며 이용 가능해지면 인용문에 추가될 예정입니다. YOLOv8 모델은 [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) 및 [Enterprise](https://ultralytics.com/license) 라이센스에 따라 제공됩니다. diff --git a/docs/ko/modes/benchmark.md b/docs/ko/modes/benchmark.md new file mode 100644 index 0000000..147da2f --- /dev/null +++ b/docs/ko/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: true +description: YOLOv8의 다양한 내보내기 형식에 걸쳐 속도 및 정확성을 프로파일링하는 방법을 알아보고, mAP50-95, accuracy_top5 메트릭 및 기타에 대한 통찰을 얻으십시오. +keywords: Ultralytics, YOLOv8, 벤치마킹, 속도 프로파일링, 정확도 프로파일링, mAP50-95, accuracy_top5, ONNX, OpenVINO, TensorRT, YOLO 내보내기 형식 +--- + +# Ultralytics YOLO를 사용한 모델 벤치마킹 + +Ultralytics YOLO 생태계 및 통합 + +## 소개 + +모델을 학습하고 검증한 후, 다음으로 논리적인 단계는 다양한 실제 상황에서의 성능을 평가하는 것입니다. Ultralytics YOLOv8의 벤치마크 모드는 다양한 내보내기 형식에서 모델의 속도와 정확도를 평가하는 강력한 프레임워크를 제공하여 이와 같은 목적을 수행하는 역할을 합니다. + +## 벤치마킹이 왜 중요한가요? + +- **정보에 기반한 결정:** 속도와 정확도 사이의 타협점에 대한 통찰력을 얻을 수 있습니다. +- **자원 배분:** 다양한 하드웨어에서 각기 다른 내보내기 형식의 성능을 이해합니다. +- **최적화:** 특정 사용 사례에 가장 적합한 내보내기 형식을 알아냅니다. +- **비용 효율성:** 벤치마크 결과에 기반하여 하드웨어 자원을 보다 효율적으로 사용합니다. + +### 벤치마크 모드의 주요 메트릭 + +- **mAP50-95:** 객체 인식, 세분화, 자세 추정에 사용됩니다. +- **accuracy_top5:** 이미지 분류에 사용됩니다. +- **추론 시간:** 각 이미지 당 밀리초로 측정된 시간입니다. + +### 지원되는 내보내기 형식 + +- **ONNX:** CPU 성능 최적화를 위함 +- **TensorRT:** GPU 효율성을 극대화하기 위함 +- **OpenVINO:** 인텔 하드웨어 최적화를 위함 +- **CoreML, TensorFlow SavedModel, 그 외:** 다양한 배포 요구 사항을 위함. + +!!! Tip "팁" + + * CPU 속도 향상을 위해 ONNX 또는 OpenVINO로 내보내기. + * GPU 속도 향상을 위해 TensorRT로 내보내기. + +## 사용 예제 + +YOLOv8n 벤치마킹을 ONNX, TensorRT 등 모든 지원되는 내보내기 형식에 대해 실행합니다. 완벽한 내보내기 인수 목록을 보려면 아래의 인수 섹션을 참조하세요. + +!!! Example "예제" + + === "파이썬" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # GPU에서 벤치마킹 + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## 인수 + +`model`, `data`, `imgsz`, `half`, `device`, `verbose`와 같은 인수들은 사용자들이 벤치마킹을 특정 필요에 맞게 조정하고 쉽게 다른 내보내기 형식의 성능을 비교할 수 있도록 유연성을 제공합니다. + +| 키 | 값 | 설명 | +|-----------|---------|----------------------------------------------------------| +| `model` | `None` | 모델 파일 경로, 예: yolov8n.pt, yolov8n.yaml | +| `data` | `None` | 벤치마킹 데이터 세트를 참조하는 YAML 경로 ('val' 레이블 아래) | +| `imgsz` | `640` | 스칼라 또는 (h, w) 리스트 형태의 이미지 크기, 예: (640, 480) | +| `half` | `False` | FP16 양자화 | +| `int8` | `False` | INT8 양자화 | +| `device` | `None` | 실행할 기기, 예: CUDA device=0 혹은 device=0,1,2,3 또는 device=cpu | +| `verbose` | `False` | 오류 시 계속하지 않음 (bool), 또는 val 하한 임계값 (float) | + +## 내보내기 형식 + +벤치마크는 아래에 나와있는 가능한 모든 내보내기 형식에서 자동으로 실행을 시도합니다. + +| 형식 | `format` 인자 | 모델 | 메타데이터 | 인수 | +|--------------------------------------------------------------------|---------------|---------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +전체 `export` 세부 정보는 [Export](https://docs.ultralytics.com/modes/export/) 페이지에서 확인할 수 있습니다. diff --git a/docs/ko/modes/export.md b/docs/ko/modes/export.md new file mode 100644 index 0000000..98a9f10 --- /dev/null +++ b/docs/ko/modes/export.md @@ -0,0 +1,108 @@ +--- +comments: true +description: YOLOv8 모델을 ONNX, TensorRT, CoreML 등의 다양한 형식으로 내보내는 단계별 가이드를 확인해보세요. 이제 배포를 진행해보세요!. +keywords: YOLO, YOLOv8, Ultralytics, 모델 내보내기, ONNX, TensorRT, CoreML, TensorFlow SavedModel, OpenVINO, PyTorch, 모델 내보내기 +--- + +# Ultralytics YOLO를 사용한 모델 내보내기 + +Ultralytics YOLO 생태계 및 통합 + +## 소개 + +모델을 훈련하는 최종 목적은 실제 환경에서 배포하기 위함입니다. Ultralytics YOLOv8의 내보내기 모드는 훈련된 모델을 다양한 형식으로 내보내어 여러 플랫폼과 디바이스에서 배포할 수 있는 범용적인 옵션을 제공합니다. 이 포괄적인 가이드는 모델 내보내기의 미묘한 점들을 설명하고 최대의 호환성과 성능을 달성하는 방법을 안내하는 것을 목표로 합니다. + +

+
+ +
+ 시청하기: 사용자 지정 훈련된 Ultralytics YOLOv8 모델을 내보내고 웹캠에서 실시간 추론을 실행하는 방법. +

+ +## YOLOv8의 내보내기 모드를 선택해야 하는 이유는 무엇인가요? + +- **범용성:** ONNX, TensorRT, CoreML 등 다양한 형식으로 내보낼 수 있습니다. +- **성능:** TensorRT를 사용할 경우 최대 5배 빠른 GPU 속도 향상 및 ONNX 또는 OpenVINO를 사용하여 최대 3배 빠른 CPU 속도 향상을 얻을 수 있습니다. +- **호환성:** 모델을 다양한 하드웨어 및 소프트웨어 환경에서 배포할 수 있도록 만들어줍니다. +- **사용의 용이성:** 모델 내보내기를 위한 간단한 CLI 및 Python API 제공. + +### 내보내기 모드의 주요 기능 + +다음은 몇 가지 주요 기능들입니다: + +- **원클릭 내보내기:** 다양한 형식으로 내보내기 위한 간단한 명령어. +- **배치 내보내기:** 배치 추론이 가능한 모델들을 내보낼 수 있습니다. +- **최적화된 추론:** 내보낸 모델들은 더 빠른 추론 시간을 위해 최적화되어 있습니다. +- **튜토리얼 비디오:** 원활한 내보내기 경험을 위한 심도 있는 가이드 및 튜토리얼. + +!!! Tip "팁" + + * CPU 속도 향상을 위해 ONNX 또는 OpenVINO로 내보내세요. + * GPU 속도 향상을 위해 TensorRT로 내보내세요. + +## 사용 예시 + +YOLOv8n 모델을 ONNX나 TensorRT와 같은 다른 형식으로 내보냅니다. 내보내기 인수에 대한 전체 목록은 아래 '인수' 섹션을 참조하세요. + +!!! 예시 "" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델을 불러오기 + model = YOLO('yolov8n.pt') # 공식 모델을 불러오기 + model = YOLO('path/to/best.pt') # 사용자 지정 훈련 모델을 불러오기 + + # 모델을 내보내기 + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # 공식 모델을 내보내기 + yolo export model=path/to/best.pt format=onnx # 사용자 지정 훈련 모델을 내보내기 + ``` + +## 인수 + +YOLO 모델의 내보내기 설정은 다른 환경이나 플랫폼에서 모델을 사용하기 위해 저장 또는 내보내기할 때 사용하는 다양한 구성 및 옵션을 의미합니다. 이러한 설정은 모델의 성능, 크기 및 다양한 시스템과의 호환성에 영향을 미칠 수 있습니다. 일반적인 YOLO 내보내기 설정에는 내보낸 모델 파일의 형식(예: ONNX, TensorFlow SavedModel), 모델이 실행될 장치(예: CPU, GPU) 및 마스크 또는 상자당 여러 라벨과 같은 추가 기능의 포함 여부 등이 있습니다. 모델이 사용되는 특정 작업과 대상 환경 또는 플랫폼의 요구 사항이나 제약 사항에 따라 내보내기 과정에 영향을 미치는 다른 요소들도 있을 수 있습니다. 내보낸 모델이 의도한 용도로 최적화되어 있고 대상 환경에서 효과적으로 사용할 수 있도록 이러한 설정을 세심하게 고려하고 구성하는 것이 중요합니다. + +| 키 | 값 | 설명 | +|-------------|-----------------|---------------------------------------------| +| `format` | `'torchscript'` | 내보낼 형식 | +| `imgsz` | `640` | 스칼라 또는 (h, w) 리스트 형식의 이미지 크기, 예: (640, 480) | +| `keras` | `False` | TF SavedModel 내보내기에 Keras 사용 | +| `optimize` | `False` | TorchScript: 모바일 최적화 | +| `half` | `False` | FP16 양자화 | +| `int8` | `False` | INT8 양자화 | +| `dynamic` | `False` | ONNX/TensorRT: 동적 축 | +| `simplify` | `False` | ONNX/TensorRT: 모델 단순화 | +| `opset` | `None` | ONNX: opset 버전 (선택적, 기본값은 최신) | +| `workspace` | `4` | TensorRT: 작업공간 크기 (GB) | +| `nms` | `False` | CoreML: NMS 추가 | + +## 내보내기 형식 + +아래 표에는 사용 가능한 YOLOv8 내보내기 형식이 나와 있습니다. `format` 인수를 사용하여 어떤 형식으로든 내보낼 수 있습니다. 예: `format='onnx'` 또는 `format='engine'`. + +| 형식 | `format` 인수 | 모델 | 메타데이터 | 인수 | +|--------------------------------------------------------------------|---------------|---------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/ko/modes/index.md b/docs/ko/modes/index.md new file mode 100644 index 0000000..1d406be --- /dev/null +++ b/docs/ko/modes/index.md @@ -0,0 +1,73 @@ +--- +comments: true +description: YOLOv8를 통해 트레이닝부터 추적까지, Ultralytics에 최적화된 모든 기능을 활용하세요. 지원되는 각 모드에 대한 통찰력과 예시를 포함하여 검증, 내보내기, 벤치마킹까지 이해하실 수 있습니다. +keywords: Ultralytics, YOLOv8, 머신러닝, 객체탐지, 트레이닝, 검증, 예측, 내보내기, 추적, 벤치마킹 +--- + +# Ultralytics YOLOv8 모드 + +Ultralytics YOLO 생태계 및 통합 + +## 서론 + +Ultralytics YOLOv8는 단순한 객체 탐지 모델이 아닙니다; 데이터 수집에서 모델 트레이닝, 검증, 배포, 실세계 추적에 이르기까지 머신러닝 모델의 전체 생애주기를 커버하기 위해 설계된 다재다능한 프레임워크입니다. 각각의 모드는 특정 목적을 위해 섬세하게 구성되며, 다양한 작업 및 사용 사례에 필요한 유연성과 효율성을 제공합니다. + +

+
+ +
+ 시청하기: Ultralytics 모드 튜토리얼: 트레이닝, 검증, 예측, 내보내기 및 벤치마킹. +

+ +### 모드 요약 + +YOLOv8이 지원하는 **모드**를 이해하는 것은 모델을 최대한 활용하기 위해 필수적입니다: + +- **Train** 모드: 사용자 맞춤 또는 사전 로드된 데이터셋 위에서 모델을 튜닝합니다. +- **Val** 모드: 트레이닝 후 모델 성능을 검증하기 위한 체크포인트. +- **Predict** 모드: 실세계 데이터에서 모델의 예측력을 발휘합니다. +- **Export** 모드: 다양한 포맷으로 모델을 배포 준비 상태로 만듭니다. +- **Track** 모드: 객체 탐지 모델을 실시간 추적 애플리케이션으로 확장합니다. +- **Benchmark** 모드: 다양한 배포 환경에서 모델의 속도와 정확도를 분석합니다. + +이 포괄적인 가이드는 각 모드에 대한 개요와 실제 인사이트를 제공하여 YOLOv8의 전체 잠재력을 활용할 수 있도록 도와줍니다. + +## [Train](train.md) + +Train 모드는 사용자 맞춤 데이터셋 위에서 YOLOv8 모델을 트레이닝하기 위해 사용됩니다. 이 모드에서는 지정된 데이터셋과 하이퍼파라미터를 사용하여 모델을 트레이닝합니다. 트레이닝 과정에서 모델의 파라미터를 최적화하여 이미지 내 객체의 클래스와 위치를 정확히 예측할 수 있도록 합니다. + +[Train 예시](train.md){ .md-button } + +## [Val](val.md) + +Val 모드는 트레이닝된 YOLOv8 모델을 검증하기 위해 사용됩니다. 이 모드에서는 모델을 검증 세트에서 평가하여 정확도 및 일반화 성능을 측정합니다. 이 모드는 모델의 하이퍼파라미터를 조정하고 성능을 개선하는데 사용할 수 있습니다. + +[Val 예시](val.md){ .md-button } + +## [Predict](predict.md) + +Predict 모드는 트레이닝된 YOLOv8 모델을 사용하여 새 이미지 또는 비디오에서 예측을 수행하기 위해 사용됩니다. 이 모드에서는 체크포인트 파일에서 모델을 로드하고, 사용자가 이미지나 비디오를 제공하여 추론을 수행합니다. 모델은 입력 이미지 또는 비디오에서 객체의 클래스와 위치를 예측합니다. + +[Predict 예시](predict.md){ .md-button } + +## [Export](export.md) + +Export 모드는 배포를 위해 YOLOv8 모델을 내보낼 수 있는 포맷으로 변환하기 위해 사용됩니다. 이 모드에서는 모델을 다른 소프트웨어 어플리케이션 또는 하드웨어 기기에서 사용할 수 있는 포맷으로 변환합니다. 이 모드는 모델을 생산 환경으로 배포하는데 유용합니다. + +[Export 예시](export.md){ .md-button } + +## [Track](track.md) + +Track 모드는 실시간으로 YOLOv8 모델을 사용하여 객체를 추적하기 위해 사용됩니다. 이 모드에서는 체크포인트 파일에서 모델을 로드하고, 사용자가 실시간 비디오 스트림을 제공하여 실시간 객체 추적을 수행합니다. 이 모드는 감시 시스템이나 자율 주행 차량 같은 애플리케이션에 유용합니다. + +[Track 예시](track.md){ .md-button } + +## [Benchmark](benchmark.md) + +Benchmark 모드는 YOLOv8의 다양한 내보내기 포맷에 대한 속도와 정확도를 프로파일링하기 위해 사용됩니다. 벤치마크는 내보낸 포맷의 크기, 그리고 객체 탐지, 세분화 및 포즈에 대한 `mAP50-95` 메트릭 또는 분류에 대한 `accuracy_top5` 메트릭, 그리고 ONNX, OpenVINO, TensorRT 등 다양한 내보내기 포맷에서의 이미지당 추론 시간을 밀리초로 제공합니다. 이 정보는 속도와 정확도에 대한 특정 사용 사례 요구 사항에 기반하여 최적의 내보내기 포맷을 선택하는 데 도움이 될 수 있습니다. + +[Benchmark 예시](benchmark.md){ .md-button } diff --git a/docs/ko/modes/predict.md b/docs/ko/modes/predict.md new file mode 100644 index 0000000..ce0f6ea --- /dev/null +++ b/docs/ko/modes/predict.md @@ -0,0 +1,227 @@ +--- +comments: true +description: YOLOv8 예측 모드를 사용하여 다양한 작업을 수행하는 방법을 알아보십시오. 이미지, 비디오 및 데이터 형식과 같은 다양한 추론 소스에 대해 자세히 알아봅니다. +keywords: Ultralytics, YOLOv8, 예측 모드, 추론 소스, 예측 작업, 스트리밍 모드, 이미지 처리, 비디오 처리, 머신 러닝, AI +--- + +# Ultralytics YOLO로 모델 예측 + +Ultralytics YOLO 생태계와 통합 + +## 소개 + +머신 러닝 및 컴퓨터 비전의 세계에서 시각적 데이터를 해석하는 과정을 '추론' 또는 '예측'이라고 합니다. Ultralytics YOLOv8는 다양한 데이터 소스에서의 고성능, 실시간 추론을 위해 맞춤화된 강력한 기능인 **예측 모드**를 제공합니다. + +

+
+ +
+ 시청: Ultralytics YOLOv8 모델에서 출력을 추출하여 맞춤 프로젝트에 사용하는 방법. +

+ +## 실제 응용 분야 + +| 제조업 | 스포츠 | 안전 | +|:-----------------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------:| +| ![차량 예비 부품 탐지](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1) | ![축구 선수 탐지](https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442) | ![사람 넘어짐 탐지](https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43) | +| 차량 예비 부품 탐지 | 축구 선수 탐지 | 사람 넘어짐 탐지 | + +## 예측 인퍼런스를 위해 Ultralytics YOLO 사용하기 + +다음은 YOLOv8의 예측 모드를 다양한 추론 요구 사항에 사용해야 하는 이유입니다: + +- **다양성:** 이미지, 비디오, 심지어 실시간 스트림에 대한 추론을 수행할 수 있습니다. +- **성능:** 정확성을 희생하지 않고 실시간, 고속 처리를 위해 설계되었습니다. +- **사용 편의성:** 빠른 배포 및 테스트를 위한 직관적인 Python 및 CLI 인터페이스를 제공합니다. +- **고도의 사용자 정의:** 특정 요구 사항에 맞게 모델의 추론 행동을 조율하기 위한 다양한 설정 및 매개변수를 제공합니다. + +### 예측 모드의 주요 기능 + +YOLOv8의 예측 모드는 강력하고 다재다능하게 설계되었으며, 다음과 같은 특징을 갖고 있습니다: + +- **다중 데이터 소스 호환성:** 데이터가 개별 이미지, 이미지 컬렉션, 비디오 파일 또는 실시간 비디오 스트림의 형태로 존재하는지 여부에 관계없이 예측 모드가 지원합니다. +- **스트리밍 모드:** `Results` 객체의 메모리 효율적인 생성자로 스트리밍 기능을 사용합니다. 예측기의 호출 메서드에서 `stream=True`로 설정하여 활성화합니다. +- **배치 처리:** 단일 배치에서 여러 이미지 또는 비디오 프레임을 처리하는 기능을 통해 추론 시간을 더욱 단축합니다. +- **통합 친화적:** 유연한 API 덕분에 기존 데이터 파이프라인 및 기타 소프트웨어 구성 요소와 쉽게 통합할 수 있습니다. + +Ultralytics YOLO 모델은 Python `Results` 객체의 리스트를 반환하거나, 추론 중 `stream=True`가 모델에 전달될 때 `Results` 객체의 메모리 효율적인 Python 생성자를 반환합니다: + +!!! 예시 "예측" + + === "`stream=False`로 리스트 반환" + ```python + from ultralytics import YOLO + + # 모델 로드 + model = YOLO('yolov8n.pt') # 사전 훈련된 YOLOv8n 모델 + + # 이미지 리스트에 대한 배치 추론 실행 + results = model(['im1.jpg', 'im2.jpg']) # Results 객체의 리스트 반환 + + # 결과 리스트 처리 + for result in results: + boxes = result.boxes # bbox 출력을 위한 Boxes 객체 + masks = result.masks # 세그멘테이션 마스크 출력을 위한 Masks 객체 + keypoints = result.keypoints # 자세 출력을 위한 Keypoints 객체 + probs = result.probs # 분류 출력을 위한 Probs 객체 + ``` + + === "`stream=True`로 생성자 반환" + ```python + from ultralytics import YOLO + + # 모델 로드 + model = YOLO('yolov8n.pt') # 사전 훈련된 YOLOv8n 모델 + + # 이미지 리스트에 대한 배치 추론 실행 + results = model(['im1.jpg', 'im2.jpg'], stream=True) # Results 객체의 생성자 반환 + + # 결과 생성자 처리 + for result in results: + boxes = result.boxes # bbox 출력을 위한 Boxes 객체 + masks = result.masks # 세그멘테이션 마스크 출력을 위한 Masks 객체 + keypoints = result.keypoints # 자세 출력을 위한 Keypoints 객체 + probs = result.probs # 분류 출력을 위한 Probs 객체 + ``` + +## 추론 소스 + +YOLOv8은 아래 표에 표시된 바와 같이 추론을 위한 다양한 유형의 입력 소스를 처리할 수 있습니다. 소스에는 정적 이미지, 비디오 스트림, 다양한 데이터 형식이 포함됩니다. 표는 또한 각 소스를 'stream=True' ✅와 함께 스트리밍 모드에서 사용할 수 있는지 여부를 나타냅니다. 스트리밍 모드는 비디오나 라이브 스트림을 처리할 때 결과를 메모리에 모두 로드하는 대신 결과의 생성자를 만들어 유용하게 사용됩니다. + +!!! Tip "팁" + + 긴 비디오나 큰 데이터 세트를 처리할 때 'stream=True'를 사용하여 효율적으로 메모리를 관리합니다. 'stream=False'일 때는 모든 프레임 또는 데이터 포인트에 대한 결과가 메모리에 저장되어, 입력이 크면 메모리 부족 오류를 빠르게 유발할 수 있습니다. 반면에, 'stream=True'는 생성자를 사용하여 현재 프레임 또는 데이터 포인트의 결과만 메모리에 유지하여 메모리 소비를 크게 줄이고 메모리 부족 문제를 방지합니다. + +| 소스 | 인수 | 유형 | 비고 | +|-----------|--------------------------------------------|-----------------|--------------------------------------------------------------------------| +| 이미지 | `'image.jpg'` | `str` 또는 `Path` | 단일 이미지 파일. | +| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | 이미지 URL. | +| 스크린샷 | `'screen'` | `str` | 스크린샷을 캡처합니다. | +| PIL | `Image.open('im.jpg')` | `PIL.Image` | HWC 형식으로 RGB 채널이 있습니다. | +| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | HWC 형식으로 BGR 채널이 있고 `uint8 (0-255)` 입니다. | +| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | HWC 형식으로 BGR 채널이 있고 `uint8 (0-255)` 입니다. | +| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | BCHW 형식으로 RGB 채널이 있고 `float32 (0.0-1.0)` 입니다. | +| CSV | `'sources.csv'` | `str` 또는 `Path` | 이미지, 비디오 또는 디렉토리 경로가 있는 CSV 파일. | +| 비디오 ✅ | `'video.mp4'` | `str` 또는 `Path` | MP4, AVI 등과 같은 형식의 비디오 파일입니다. | +| 디렉토리 ✅ | `'path/'` | `str` 또는 `Path` | 이미지나 비디오가 있는 디렉토리 경로입니다. | +| 글로브 ✅ | `'path/*.jpg'` | `str` | 여러 파일에 일치하는 글로브 패턴입니다. '*' 문자를 와일드카드로 사용하세요. | +| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | YouTube 비디오의 URL입니다. | +| 스트림 ✅ | `'rtsp://example.com/media.mp4'` | `str` | RTSP, RTMP, TCP 또는 IP 주소와 같은 스트리밍 프로토콜의 URL입니다. | +| 멀티-스트림 ✅ | `'list.streams'` | `str` 또는 `Path` | 스트림 URL이 행당 하나씩 있는 `*.streams` 텍스트 파일이며, 예를 들어 8개의 스트림은 배치 크기 8에서 실행됩니다. | + +아래는 각 유형의 소스를 사용하는 코드 예제입니다: + +!!! 예시 "예측 소스" + + === "이미지" + 이미지 파일에서 추론을 실행합니다. + ```python + from ultralytics import YOLO + + # 사전 훈련된 YOLOv8n 모델 로드 + model = YOLO('yolov8n.pt') + + # 이미지 파일 경로 정의 + source = 'path/to/image.jpg' + + # 소스에서 추론 실행 + results = model(source) # Results 객체의 리스트 + ``` + + === "스크린샷" + 현재 스크린 콘텐츠를 스크린샷으로 추론을 실행합니다. + ```python + from ultralytics import YOLO + + # 사전 훈련된 YOLOv8n 모델 로드 + model = YOLO('yolov8n.pt') + + # 현재 스크린샷을 소스로 정의 + source = 'screen' + + # 소스에서 추론 실행 + results = model(source) # Results 객체의 리스트 + ``` + + === "URL" + URL을 통해 원격으로 호스팅되는 이미지나 비디오에서 추론을 실행합니다. + ```python + from ultralytics import YOLO + + # 사전 훈련된 YOLOv8n 모델 로드 + model = YOLO('yolov8n.pt') + + # 원격 이미지나 동영상 URL 정의 + source = 'https://ultralytics.com/images/bus.jpg' + + # 소스에서 추론 실행 + results = model(source) # Results 객체의 리스트 + ``` + + === "PIL" + Python Imaging Library (PIL)로 열린 이미지에서 추론을 실행합니다. + ```python + from PIL import Image + from ultralytics import YOLO + + # 사전 훈련된 YOLOv8n 모델 로드 + model = YOLO('yolov8n.pt') + + # PIL을 사용하여 이미지 열기 + source = Image.open('path/to/image.jpg') + + # 소스에서 추론 실행 + results = model(source) # Results 객체의 리스트 + ``` + + === "OpenCV" + OpenCV로 읽은 이미지에서 추론을 실행합니다. + ```python + import cv2 + from ultralytics import YOLO + + # 사전 훈련된 YOLOv8n 모델 로드 + model = YOLO('yolov8n.pt') + + # OpenCV를 사용하여 이미지 읽기 + source = cv2.imread('path/to/image.jpg') + + # 소스에서 추론 실행 + results = model(source) # Results 객체의 리스트 + ``` + + === "numpy" + numpy 배열로 표현된 이미지에서 추론을 실행합니다. + ```python + import numpy as np + from ultralytics import YOLO + + # 사전 훈련된 YOLOv8n 모델 로드 + model = YOLO('yolov8n.pt') + + # 무작위 numpy 배열 생성, HWC 형태 (640, 640, 3), 값 범위 [0, 255], 타입 uint8 + source = np.random.randint(low=0, high=255, size=(640, 640, 3), dtype='uint8') + + # 소스에서 추론 실행 + results = model(source) # Results 객체의 리스트 + ``` + + === "torch" + PyTorch 텐서로 표현된 이미지에서 추론을 실행합니다. + ```python + import torch + from ultralytics import YOLO + + # 사전 훈련된 YOLOv8n 모델 로드 + model = YOLO('yolov8n.pt') + + # 무작위 torch 텐서 생성, BCHW 형태 (1, 3, 640, 640), 값 범위 [0, 1], 타입 float32 + source = torch.rand(1, 3, 640, 640, dtype=torch.float32) + + # 소스에서 추론 실행 + results = model(source) # Results 객체의 리스트 + ``` diff --git a/docs/ko/modes/track.md b/docs/ko/modes/track.md new file mode 100644 index 0000000..73f2cca --- /dev/null +++ b/docs/ko/modes/track.md @@ -0,0 +1,256 @@ +--- +comments: true +description: Ultralytics YOLO를 사용하여 비디오 스트림에서 객체 추적을 사용하는 방법을 알아보세요. 다양한 추적기를 사용하는 안내와 추적기 구성을 맞춤 설정하는 방법에 대한 가이드가 있습니다. +keywords: Ultralytics, YOLO, 객체 추적, 비디오 스트림, BoT-SORT, ByteTrack, 파이썬 가이드, CLI 가이드 +--- + +# Ultralytics YOLO를 이용한 다중 객체 추적 + +다중 객체 추적 예시 + +비디오 분석의 영역에서 객체 추적은 프레임 내에서 객체의 위치와 클래스를 식별할 뿐만 아니라 비디오가 진행됨에 따라 각각의 검출된 객체에 대해 고유 ID를 유지하는 중요한 작업입니다. 응용 프로그램은 감시 및 보안에서 실시간 스포츠 분석에 이르기까지 무한합니다. + +## 객체 추적을 위해 Ultralytics YOLO를 선택해야 하는 이유는? + +Ultralytics 추적기의 출력은 표준 객체 검출과 일관되지만 객체 ID가 추가된 가치가 있습니다. 이를 통해 비디오 스트림에서 객체를 추적하고 이후 분석을 수행하기가 쉽습니다. 여기에 어떤 이유로 Ultralytics YOLO를 사용해야 하는지에 대해 설명합니다: + +- **효율성:** 정확성을 저하시키지 않으면서 실시간으로 비디오 스트림을 처리합니다. +- **유연성:** 다양한 추적 알고리즘과 구성을 지원합니다. +- **사용하기 쉬움:** 간단한 파이썬 API 및 CLI 옵션으로 빠른 통합 및 배치가 가능합니다. +- **맞춤 설정:** 맞춤 학습된 YOLO 모델과 함께 사용하기 쉬워 특정 도메인 응용 프로그램에 통합할 수 있습니다. + +

+
+ +
+ 시청하기: Ultralytics YOLOv8로 객체 감지 및 추적하기. +

+ +## 실제 세계 응용 프로그램 + +| 교통수단 | 소매업 | 수산업 | +|:-----------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------:| +| ![차량 추적](https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab) | ![사람 추적](https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527) | ![물고기 추적](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142) | +| 차량 추적 | 사람 추적 | 물고기 추적 | + +## 한눈에 보기 + +Ultralytics YOLO는 객체 감지 기능을 확장하여 견고하고 다재다능한 객체 추적을 제공합니다: + +- **실시간 추적:** 고화면률의 비디오에서 매끄럽게 객체 추적합니다. +- **다중 추적기 지원:** 다양한 검증된 추적 알고리즘 중에서 선택 가능합니다. +- **맞춤형 추적기 구성:** 다양한 매개변수를 조정하여 특정 요구사항에 맞게 추적 알고리즘을 맞춤화할 수 있습니다. + +## 사용 가능한 추적기 + +Ultralytics YOLO는 다음과 같은 추적 알고리즘을 지원합니다. 관련 YAML 구성 파일(예: `tracker=tracker_type.yaml`)을 전달하여 사용할 수 있습니다: + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - 이 추적기를 활성화하려면 `botsort.yaml`을 사용합니다. +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - 이 추적기를 활성화하려면 `bytetrack.yaml`을 사용합니다. + +기본 추적기는 BoT-SORT입니다. + +## 추적 + +비디오 스트림에서 추적기를 실행하려면 YOLOv8n, YOLOv8n-seg 및 YOLOv8n-pose와 같은 훈련된 Detect, Segment 또는 Pose 모델을 사용하십시오. + +!!! 예시 "" + + === "파이썬" + + ```python + from ultralytics import YOLO + + # 공식 모델 또는 맞춤 모델을 불러오기 + model = YOLO('yolov8n.pt') # 공식 Detect 모델 불러오기 + model = YOLO('yolov8n-seg.pt') # 공식 Segment 모델 불러오기 + model = YOLO('yolov8n-pose.pt') # 공식 Pose 모델 불러오기 + model = YOLO('path/to/best.pt') # 맞춤 학습된 모델 불러오기 + + # 모델을 사용하여 추적 수행 + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # 기본 추적기로 추적하기 + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # ByteTrack 추적기로 추적하기 + ``` + + === "CLI" + + ```bash + # 명령 행 인터페이스를 사용하여 다양한 모델로 추적 수행 + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # 공식 Detect 모델 + yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # 공식 Segment 모델 + yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # 공식 Pose 모델 + yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # 맞춤 학습된 모델 + + # ByteTrack 추적기를 사용하여 추적하기 + yolo track model=path/to/best.pt tracker="bytetrack.yaml" + ``` + +위의 사용법에서 볼 수 있듯이 모든 Detect, Segment 및 Pose 모델은 비디오나 스트리밍 출처에서 추적이 가능합니다. + +## 구성 + +### 추적 인수 + +추적 구성은 `conf`, `iou` 및 `show`와 같은 예측 모드와 동일한 속성을 공유합니다. 추가 구성에 대해서는 [Predict](https://docs.ultralytics.com/modes/predict/) 모델 페이지를 참조하십시오. + +!!! 예시 "" + + === "파이썬" + + ```python + from ultralytics import YOLO + + # 추적 매개변수를 구성하고 추적기를 실행합니다 + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```bash + # 추적 매개변수를 구성하고 명령 행 인터페이스를 사용하여 추적기를 실행합니다 + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### 추적기 선택 + +Ultralytics에서는 수정된 추적기 구성 파일도 사용할 수 있습니다. 이를 위해 [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)에서 추적기 구성 파일의 복사본(예: `custom_tracker.yaml`)을 만들고 필요한대로 구성을 수정하면 됩니다(단, `tracker_type` 제외). + +!!! 예시 "" + + === "파이썬" + + ```python + from ultralytics import YOLO + + # 모델을 불러오고 맞춤 구성 파일로 추적기를 실행합니다 + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```bash + # 명령 행 인터페이스를 사용하여 맞춤 구성 파일로 모델을 불러오고 추적기를 실행합니다 + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +추적 인수에 대한 종합적인 목록은 [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) 페이지를 참조하세요. + +## 파이썬 예시 + +### 보존하는 추적 루프 + +다음은 OpenCV(`cv2`)와 YOLOv8를 사용하여 비디오 프레임에서 객체 추적을 실행하는 파이썬 스크립트입니다. 이 스크립트에서는 필요한 패키지(`opencv-python` 및 `ultralytics`)를 이미 설치했다고 가정합니다. `persist=True` 인수는 추적기에 현재 이미지 또는 프레임이 시퀀스에서 다음 것이며 현재 이미지에서 이전 이미지의 추적을 예상한다고 알립니다. + +!!! 예시 "추적이 포함된 스트리밍 for-loop" + + ```python + import cv2 + from ultralytics import YOLO + + # YOLOv8 모델을 불러옵니다 + model = YOLO('yolov8n.pt') + + # 비디오 파일을 엽니다 + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # 비디오 프레임을 반복합니다 + while cap.isOpened(): + # 비디오에서 프레임을 읽습니다 + success, frame = cap.read() + + if success: + # 프레임에 YOLOv8 추적을 실행하여 추적을 유지합니다 + results = model.track(frame, persist=True) + + # 결과를 프레임에 시각화합니다 + annotated_frame = results[0].plot() + + # 어노테이션된 프레임을 표시합니다 + cv2.imshow("YOLOv8 추적", annotated_frame) + + # 'q'가 눌리면 루프를 중단합니다 + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # 비디오의 끝에 도달하면 루프를 중단합니다 + break + + # 비디오 캡처 객체를 해제하고 표시 창을 닫습니다 + cap.release() + cv2.destroyAllWindows() + ``` + +여기서 `model(frame)`을 `model.track(frame)`으로 변경하면 단순 감지가 아닌 객체 추적이 가능해집니다. 이 수정된 스크립트는 비디오의 각 프레임에 추적기를 실행하고 결과를 시각화한 후 창에 표시합니다. 'q'를 누르면 루프가 종료됩니다. + +### 시간에 따른 추적 그리기 + +연속 프레임에서 객체 추적을 시각화하면 비디오 내에서 검출된 객체의 이동 패턴과 행동에 대한 소중한 통찰력을 제공할 수 있습니다. Ultralytics YOLOv8을 사용하면 이러한 추적을 원활하고 효율적으로 플로팅할 수 있습니다. + +다음 예시에서, 여러 비디오 프레임에 걸친 검출된 객체의 움직임을 플로팅하기 위해 YOLOv8의 추적 기능을 활용하는 방법을 보여줍니다. 이 스크립트는 비디오 파일을 여는 것을 포함하여 프레임별로 읽고 YOLO 모델을 사용하여 다양한 객체를 식별하고 추적합니다. 검출된 경계 상자의 중심점을 보존하고 연결하여 추적된 객체의 경로를 나타내는 선을 그립니다. + +!!! 예시 "비디오 프레임에 걸쳐 추적 그리기" + + ```python + from collections import defaultdict + + import cv2 + import numpy as np + + from ultralytics import YOLO + + # YOLOv8 모델을 불러옵니다 + model = YOLO('yolov8n.pt') + + # 비디오 파일을 엽니다 + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # 추적 내역을 저장합니다 + track_history = defaultdict(lambda: []) + + # 비디오 프레임을 반복합니다 + while cap.isOpened(): + # 비디오에서 프레임을 읽습니다 + success, frame = cap.read() + + if success: + # 프레임에 YOLOv8 추적을 실행하여 추적을 유지합니다 + results = model.track(frame, persist=True) + + # 상자 및 추적 ID를 가져옵니다 + boxes = results[0].boxes.xywh.cpu() + track_ids = results[0].boxes.id.int().cpu().tolist() + + # 결과를 프레임에 시각화합니다 + annotated_frame = results[0].plot() + + # 추적을 플롯합니다 + for box, track_id in zip(boxes, track_ids): + x, y, w, h = box + track = track_history[track_id] + track.append((float(x), float(y))) # x, y의 중심점 + if len(track) > 30: # 90프레임에 대해 90개의 추적을 유지 + track.pop(0) + + # 추적 라인을 그립니다 + points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10) + + # 어노테이션된 프레임을 표시합니다 + cv2.imshow("YOLOv8 추적", annotated_frame) + + # 'q'가 눌리면 루프를 중단합니다 + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # 비디오의 끝에 도달하면 루프를 중단합니다 + break + + ``` diff --git a/docs/ko/modes/train.md b/docs/ko/modes/train.md new file mode 100644 index 0000000..2caafd3 --- /dev/null +++ b/docs/ko/modes/train.md @@ -0,0 +1,145 @@ +--- +comments: true +description: YOLOv8 모델을 Ultralytics YOLO를 사용하여 훈련하는 단계별 가이드로, 단일 GPU 및 다중 GPU 훈련의 예제 포함 +keywords: Ultralytics, YOLOv8, YOLO, 객체 감지, 훈련 모드, 사용자 정의 데이터셋, GPU 훈련, 다중 GPU, 하이퍼파라미터, CLI 예제, Python 예제 +--- + +# Ultralytics YOLO와 함께 하는 모델 훈련 + +Ultralytics YOLO 생태계 및 통합 + +## 소개 + +딥러닝 모델을 훈련한다는 것은 모델에 데이터를 공급하고 그것이 정확한 예측을 할 수 있도록 매개변수를 조정하는 과정을 말합니다. Ultralytics YOLOv8의 훈련 모드는 현대 하드웨어 기능을 완전히 활용하여 객체 감지 모델의 효과적이고 효율적인 훈련을 위해 설계되었습니다. 이 가이드는 YOLOv8의 강력한 기능 세트를 사용하여 자체 모델을 훈련하는 데 필요한 모든 세부 정보를 다루는 것을 목표로 합니다. + +

+
+ +
+ 시청하기: Google Colab에서 여러분의 사용자 정의 데이터셋으로 YOLOv8 모델을 훈련하는 방법. +

+ +## Ultralytics YOLO로 훈련하는 이유? + +YOLOv8의 훈련 모드를 선택하는 데는 몇 가지 설득력 있는 이유가 있습니다: + +- **효율성:** 단일 GPU 설정이든 여러 GPU로 확장하든, 하드웨어를 최대한 활용하세요. +- **다양성:** COCO, VOC, ImageNet과 같은 기존의 데이터셋뿐만 아니라 사용자 정의 데이터셋으로도 훈련 가능. +- **사용자 친화적:** 간단하면서도 강력한 CLI 및 Python 인터페이스를 통한 직관적인 훈련 경험 제공. +- **하이퍼파라미터 유연성:** 모델의 성능을 미세 조정할 수 있는 다양하게 구성 가능한 하이퍼파라미터. + +### 훈련 모드의 주요 기능 + +다음은 YOLOv8의 훈련 모드의 주요 기능 중 일부입니다: + +- **자동 데이터셋 다운로드:** COCO, VOC, ImageNet과 같은 표준 데이터셋들은 첫 사용시 자동으로 다운로드됩니다. +- **다중 GPU 지원:** 여러 GPU에 걸쳐 훈련 노력을 빠르게 확대하기 위한 규모있는 훈련 지원. +- **하이퍼파라미터 구성:** YAML 구성 파일이나 CLI 인수를 통해 하이퍼파라미터 수정 가능. +- **시각화 및 모니터링:** 훈련 지표의 실시간 추적 및 학습 과정의 시각화로 더 나은 인사이트 제공. + +!!! Tip "팁" + + * YOLOv8 데이터셋들은 첫 사용시 자동으로 다운로드됩니다, 예: `yolo train data=coco.yaml` + +## 사용 예제 + +COCO128 데이터셋에서 YOLOv8n을 이미지 크기 640으로 100 에포크 동안 훈련합니다. 훈련 장치는 `device` 인수를 사용하여 지정할 수 있습니다. 인수를 전달하지 않으면 사용 가능한 경우 GPU `device=0`이, 아니면 `device=cpu`가 사용됩니다. 전체 훈련 인수 목록은 아래 Arguments 섹션을 참조하세요. + +!!! Example "단일 GPU 및 CPU 훈련 예제" + + 장치는 자동으로 결정됩니다. GPU가 사용 가능하면 사용되며, 그렇지 않으면 CPU에서 훈련이 시작됩니다. + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델을 로드하세요. + model = YOLO('yolov8n.yaml') # YAML에서 새 모델 구축 + model = YOLO('yolov8n.pt') # 사전 훈련된 모델 로드 (훈련을 위해 권장됨) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # YAML에서 구축 및 가중치 전달 + + # 모델을 훈련합니다. + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # YAML에서 새 모델을 구축하고 처음부터 훈련을 시작하세요. + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # 사전 훈련된 *.pt 모델에서 훈련을 시작하세요. + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # YAML에서 새 모델을 구축하고, 사전 훈련된 가중치를 전달하고 훈련을 시작하세요. + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### 다중 GPU 훈련 + +다중 GPU 훈련을 통해 사용 가능한 하드웨어 리소스를 더 효율적으로 활용할 수 있습니다. 이 기능은 Python API와 명령행 인터페이스 모두를 통해 사용할 수 있습니다. 다중 GPU 훈련을 활성화하려면 사용하려는 GPU 장치 ID를 지정하세요. + +!!! Example "다중 GPU 훈련 예제" + + 2개의 GPU, CUDA 장치 0과 1로 훈련하려면 다음 명령을 사용하세요. 필요에 따라 추가 GPU로 확장하세요. + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델을 로드하세요. + model = YOLO('yolov8n.pt') # 사전 훈련된 모델 로드 (훈련 추천됨) + + # 2개의 GPU로 모델을 훈련합니다. + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "CLI" + + ```bash + # 사전 훈련된 *.pt 모델로부터 시작하여 GPU 0과 1을 사용하여 훈련합니다. + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### Apple M1 및 M2 MPS 훈련 + +Ultralytics YOLO 모델에 통합된 Apple M1 및 M2 칩들에 대한 지원을 통해 Apple의 강력한 Metal Performance Shaders (MPS) 프레임워크를 활용하여 장치에서 모델을 훈련할 수 있습니다. MPS는 Apple 사용자 지정 실리콘에서 컴퓨터 및 이미지 처리 작업을 실행하는 고성능 방법을 제공합니다. + +Apple M1 및 M2 칩에서 훈련을 활성화하려면, 훈련 과정을 시작할 때 장치로 'mps'를 지정해야 합니다. 아래는 Python 및 명령행 인터페이스를 통해 이를 수행할 수 있는 예제입니다: + +!!! Example "MPS 훈련 예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델을 로드하세요. + model = YOLO('yolov8n.pt') # 사전 훈련된 모델 로드 (훈련 추천됨) + + # 2개의 GPU로 모델을 훈련합니다. + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "CLI" + + ```bash + # 사전 훈련된 *.pt 모델을 사용하여 mps 장치에서 훈련을 시작합니다. + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +M1/M2 칩의 연산력을 활용하면서 훈련 작업을 더 효율적으로 처리할 수 있습니다. 자세한 지침과 고급 설정 옵션을 원하신다면 [PyTorch MPS 문서](https://pytorch.org/docs/stable/notes/mps.html)를 참조하세요. + +### 중단된 훈련 이어나가기 + +이전에 저장된 상태에서 훈련을 이어나가는 기능은 딥러닝 모델을 다룰 때 중요한 기능입니다. 이 기능은 훈련 과정이 예기치 않게 중단되었거나 새로운 데이터로 모델을 계속 훈련하거나 더 많은 에포크 동안 훈련을 진행하고 싶을 때 유용합니다. + +훈련을 재개할 때, Ultralytics YOLO는 마지막으로 저장된 모델에서 가중치를 로드하고 옵티마이저 상태, 학습률 스케줄러, 에포크 번호도 복원합니다. 이를 통해 훈련 과정을 중단된 지점부터 이어갈 수 있습니다. + +Ultralytics YOLO에서 `train` 메서드 호출 시 `resume` 인수를 `True`로 설정하고 부분적으로 훈련된 모델 가중치가 포함된 `.pt` 파일의 경로를 지정하면 훈련을 이어나갈 수 있습니다. diff --git a/docs/ko/modes/val.md b/docs/ko/modes/val.md new file mode 100644 index 0000000..2ebaf77 --- /dev/null +++ b/docs/ko/modes/val.md @@ -0,0 +1,86 @@ +--- +comments: true +description: YOLOv8 모델 검증 가이드. 검증 설정 및 측정 항목을 사용하여 YOLO 모델의 성능을 평가하는 방법에 대해 알아보세요. Python 및 CLI 예제가 포함되어 있습니다. +keywords: Ultralytics, YOLO 문서, YOLOv8, 검증, 모델 평가, 하이퍼파라미터, 정확도, 측정 항목, Python, CLI +--- + +# Ultralytics YOLO로 모델 검증하기 + +Ultralytics YOLO 생태계 및 통합 + +## 도입 + +검증은 훈련된 모델의 품질을 평가할 수 있게 해주는 기계학습 파이프라인에서 중요한 단계입니다. Ultralytics YOLOv8의 Val 모드는 모델의 객체 탐지 성능을 평가하기 위한 강력한 도구 및 측정 항목 모음을 제공합니다. 이 가이드는 Val 모드를 효과적으로 사용하여 모델의 정확성과 신뢰성을 보장하는 방법에 대한 완벽한 리소스 역할을 합니다. + +## 왜 Ultralytics YOLO로 검증을 해야 할까요? + +YOLOv8의 Val 모드를 사용하는 이점은 다음과 같습니다: + +- **정밀도:** mAP50, mAP75, mAP50-95와 같은 정확한 측정 항목으로 모델을 종합적으로 평가합니다. +- **편의성:** 훈련 설정을 기억하는 내장 기능을 활용하여 검증 절차를 단순화합니다. +- **유연성:** 같거나 다른 데이터셋과 이미지 크기로 모델을 검증할 수 있습니다. +- **하이퍼파라미터 튜닝:** 검증 측정 항목을 사용하여 모델의 성능을 더 잘 조율합니다. + +### Val 모드의 주요 기능 + +YOLOv8의 Val 모드가 제공하는 주목할 만한 기능들은 다음과 같습니다: + +- **자동화된 설정:** 모델은 훈련 구성을 기억하여 간단하게 검증이 가능합니다. +- **멀티-메트릭 지원:** 다양한 정확도 측정 항목을 기반으로 모델을 평가합니다. +- **CLI 및 Python API:** 검증을 위해 명령 줄 인터페이스 또는 Python API 중에서 선택할 수 있습니다. +- **데이터 호환성:** 훈련 단계에서 사용된 데이터셋과 사용자 정의 데이터셋 모두와 원활하게 작동합니다. + +!!! Tip "팁" + + * YOLOv8 모델은 훈련 설정을 자동으로 기억하므로 `yolo val model=yolov8n.pt`나 `model('yolov8n.pt').val()`만으로 같은 이미지 크기와 원본 데이터셋에서 쉽게 검증할 수 있습니다. + +## 사용 예제 + +COCO128 데이터셋에서 훈련된 YOLOv8n 모델의 정확도를 검증합니다. `모델`은 훈련 `데이터`와 인자를 모델 속성으로 유지하므로 인자가 필요 없습니다. 전체 내보내기 인자 목록은 아래의 인자 섹션을 참고하세요. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 로드 + model = YOLO('yolov8n.pt') # 공식 모델을 로드합니다 + model = YOLO('path/to/best.pt') # 사용자 정의 모델을 로드합니다 + + # 모델 검증 + metrics = model.val() # 인자가 필요 없음, 데이터셋과 설정이 기억됩니다 + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # 각 카테고리의 map50-95가 포함된 목록 + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # 공식 모델 검증 + yolo detect val model=path/to/best.pt # 사용자 정의 모델 검증 + ``` + +## 인자 + +YOLO 모델의 검증 설정은 모델의 성능을 검증 데이터셋에서 평가하기 위한 다양한 하이퍼파라미터 및 구성을 의미합니다. 이러한 설정은 모델의 성능, 속도, 정확성에 영향을 미칠 수 있습니다. 일반적인 YOLO 검증 설정에는 배치 크기, 훈련 중 검증이 수행되는 빈도 및 모델 성능을 평가하기 위해 사용되는 측정 항목이 포함됩니다. 검증 과정에 영향을 줄 수 있는 다른 요소로는 검증 데이터셋의 크기와 구성 및 모델이 사용되는 구체적인 작업이 있습니다. 모델이 검증 데이터셋에서 잘 수행되고 있고 과적합을 감지하고 방지하기 위해서는 이러한 설정을 신중하게 조정하고 실험하는 것이 중요합니다. + +| Key | Value | Description | +|---------------|---------|---------------------------------------------------| +| `data` | `None` | 데이터 파일 경로 예: coco128.yaml | +| `imgsz` | `640` | 입력 이미지의 크기를 정수로 지정 | +| `batch` | `16` | 배치 당 이미지 수 (-1은 AutoBatch에 해당) | +| `save_json` | `False` | 결과를 JSON 파일로 저장 | +| `save_hybrid` | `False` | 라벨의 하이브리드 버전(라벨 + 추가 예측)을 저장 | +| `conf` | `0.001` | 탐지를 위한 객체 신뢰도 임계값 | +| `iou` | `0.6` | NMS 용 교차 영역과 합친 영역(IoU)의 임계값 | +| `max_det` | `300` | 이미지 당 최대 탐지 개수 | +| `half` | `True` | 반정밀도(FP16) 사용 | +| `device` | `None` | 사용할 장치 예: cuda의 device=0/1/2/3이나 device=cpu | +| `dnn` | `False` | ONNX 추론에 OpenCV DNN 사용 | +| `plots` | `False` | 훈련 중 플롯 표시 | +| `rect` | `False` | 최소한의 패딩을 위해 각 배치가 직사각형 val로 조정됨 | +| `split` | `val` | 검증을 위해 사용되는 데이터셋 분할, 예: 'val', 'test', 혹은 'train' | +| diff --git a/docs/ko/quickstart.md b/docs/ko/quickstart.md new file mode 100644 index 0000000..ca52392 --- /dev/null +++ b/docs/ko/quickstart.md @@ -0,0 +1,207 @@ +--- +comments: true +description: pip, conda, git 및 Docker를 사용하여 Ultralytics을 설치하는 다양한 방법을 탐색해 보세요. Ultralytics을 명령줄 인터페이스 또는 Python 프로젝트 내에서 사용하는 방법을 알아보세요. +keywords: Ultralytics 설치, pip를 이용한 Ultralytics 설치, Docker를 이용한 Ultralytics 설치, Ultralytics 명령줄 인터페이스, Ultralytics Python 인터페이스 +--- + +## Ultralytics 설치하기 + +Ultralytics는 pip, conda, Docker를 포함한 다양한 설치 방법을 제공합니다. `ultralytics` pip 패키지를 이용해 가장 안정적인 최신 버전의 YOLOv8을 설치하거나 [Ultralytics GitHub 저장소](https://github.com/ultralytics/ultralytics)를 복제하여 가장 최신 버전을 받아볼 수 있습니다. Docker를 이용하면 패키지를 로컬에 설치하지 않고 격리된 컨테이너에서 실행할 수 있습니다. + +!!! Example "설치하기" + + === "Pip 설치하기 (권장)" + pip을 사용하여 `ultralytics` 패키지를 설치하거나, `pip install -U ultralytics`를 실행하여 기존 설치를 업데이트하세요. Python Package Index(PyPI)에서 `ultralytics` 패키지에 대한 자세한 내용을 확인하세요: [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/). + + [![PyPI 버전](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![다운로드](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # PyPI에서 ultralytics 패키지 설치하기 + pip install ultralytics + ``` + + GitHub [저장소](https://github.com/ultralytics/ultralytics)에서 직접 `ultralytics` 패키지를 설치할 수도 있습니다. 최신 개발 버전이 필요한 경우 유용할 수 있습니다. 시스템에 Git 명령줄 도구가 설치되어 있는지 확인하세요. `@main` 명령어는 `main` 브랜치를 설치하며, `@my-branch`로 변경하거나 `main` 브랜치를 기본으로 사용하려면 아예 제거하면 됩니다. + + ```bash + # GitHub에서 ultralytics 패키지 설치하기 + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + + === "Conda 설치하기" + pip의 대안으로 사용할 수 있는 또 다른 패키지 관리자인 Conda를 통해서도 설치할 수 있습니다. [https://anaconda.org/conda-forge/ultralytics](https://anaconda.org/conda-forge/ultralytics)에서 Anaconda에 대한 자세한 정보를 확인하세요. Conda 패키지를 업데이트하는 Ultralytics feedstock 저장소는 [https://github.com/conda-forge/ultralytics-feedstock/](https://github.com/conda-forge/ultralytics-feedstock/)에 있습니다. + + + [![Conda 레시피](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda 다운로드](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda 버전](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda 플랫폼](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # conda를 사용하여 ultralytics 패키지 설치하기 + conda install -c conda-forge ultralytics + ``` + + !!! Note "노트" + + CUDA 환경에서 설치하는 경우 일반적으로 `ultralytics`, `pytorch` 및 `pytorch-cuda`를 동일한 명령어로 설치하여 Conda 패키지 관리자가 충돌을 해결하도록 하거나, 필요한 경우 CPU 전용 `pytorch` 패키지를 덮어쓸 수 있도록 `pytorch-cuda`를 마지막에 설치하는 것이 좋습니다. + ```bash + # Conda를 사용하여 모든 패키지 함께 설치하기 + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### Conda Docker 이미지 + + Ultralytics Conda Docker 이미지들도 [DockerHub](https://hub.docker.com/r/ultralytics/ultralytics)에서 사용할 수 있습니다. 이 이미지들은 [Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/)를 기반으로 하며, Conda 환경에서 `ultralytics`를 사용하기 위한 간단한 방법입니다. + + ```bash + # 이미지 이름을 변수로 설정하기 + t=ultralytics/ultralytics:latest-conda + + # Docker Hub에서 최신 ultralytics 이미지 가져오기 + sudo docker pull $t + + # GPU 지원으로 ultralytics 이미지를 컨테이너에서 실행하기 + sudo docker run -it --ipc=host --gpus all $t # 모든 GPU 사용 + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # 특정 GPU 지정 + ``` + + + === "Git 복제하기" + 개발에 기여하거나 최신 소스 코드를 실험해 보고 싶다면 `ultralytics` 저장소를 복제하세요. 복제한 후 해당 디렉토리로 이동하여 pip을 이용해 편집 가능 모드 `-e`로 패키지를 설치합니다. + ```bash + # ultralytics 저장소 복제하기 + git clone https://github.com/ultralytics/ultralytics + + # 복제한 디렉토리로 이동하기 + cd ultralytics + + # 개발을 위한 편집 가능 모드로 패키지 설치하기 + pip install -e . + ``` + + === "Docker 사용하기" + + Docker를 사용하면 `ultralytics` 패키지를 격리된 컨테이너에서 원활하게 실행할 수 있으며, 다양한 환경에서 일관된 성능을 보장합니다. [Docker Hub](https://hub.docker.com/r/ultralytics/ultralytics)의 공식 `ultralytics` 이미지 중 하나를 선택함으로써 로컬 설치의 복잡함을 피하고 검증된 작업 환경에 접근할 수 있습니다. Ultralytics은 서로 다른 플랫폼과 사용 사례에 대해 높은 호환성과 효율성을 제공하기 위해 5가지 주요 Docker 이미지를 제공합니다: + + Docker Pulls + + - **Dockerfile:** 트레이닝에 추천되는 GPU 이미지입니다. + - **Dockerfile-arm64:** Raspberry Pi와 같은 ARM64 기반 플랫폼에 배포하기에 최적화된 ARM64 아키텍처용입니다. + - **Dockerfile-cpu:** GPU가 없는 환경에서 인퍼런스에 적합한 Ubuntu 기반 CPU 전용 버전입니다. + - **Dockerfile-jetson:** NVIDIA Jetson 장치에 최적화된 GPU 지원을 통합한 버전입니다. + - **Dockerfile-python:** 가볍게 애플리케이션을 위해 필요한 종속성과 Python만 있는 최소한의 이미지입니다. + - **Dockerfile-conda:** Miniconda3를 기반으로 하며 ultralytics 패키지의 conda 설치를 포함하고 있습니다. + + 아래의 명령어로 최신 이미지를 받고 실행할 수 있습니다: + + ```bash + # 이미지 이름을 변수로 설정하기 + t=ultralytics/ultralytics:latest + + # Docker Hub에서 최신 ultralytics 이미지 가져오기 + sudo docker pull $t + + # GPU 지원으로 ultralytics 이미지를 컨테이너에서 실행하기 + sudo docker run -it --ipc=host --gpus all $t # 모든 GPU 사용 + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # 특정 GPU 지정 + ``` + + 위 명령어는 최신 `ultralytics` 이미지로 Docker 컨테이너를 초기화합니다. `-it` 플래그는 pseudo-TTY를 할당하고 표준 입력을 유지하여 컨테이너와 상호 작용할 수 있게 해줍니다. `--ipc=host` 플래그는 프로세스 간 메모리 공유에 필요한 IPC(Inter-Process Communication) 네임스페이스를 호스트로 설정합니다. `--gpus all` 플래그는 컨테이너 내에서 사용 가능한 모든 GPU에 대한 접근을 활성화하는데, GPU 계산이 필요한 작업에 중요합니다. + + 참고: 로컬 기계의 파일을 컨테이너 내에서 작업하기 위해서는 로컬 디렉토리를 컨테이너에 마운트하는 데 Docker 볼륨을 사용하세요: + + ```bash + # 로컬 디렉토리를 컨테이너 내부 디렉토리에 마운트하기 + sudo docker run -it --ipc=host --gpus all -v /path/on/host:/path/in/container $t + ``` + + `/path/on/host`를 로컬 기계의 디렉토리 경로로, `/path/in/container`를 컨테이너 내부에서 원하는 경로로 변경하여 접근할 수 있게 하세요. + + Docker 사용에 대한 고급 기능은 [Ultralytics Docker 가이드](https://docs.ultralytics.com/guides/docker-quickstart/)에서 더 탐구해보세요. + +`ultralytics`의 종속성 목록은 [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) 파일에서 확인할 수 있습니다. 위 예제에서는 모든 필요한 종속성을 설치합니다. + +

+
+ +
+ Watch: Ultralytics YOLO Quick Start Guide +

+ +!!! Tip "팁" + + PyTorch 설치 요구사항은 운영 체제와 CUDA 요구사항에 따라 다르므로 [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally)의 지침에 따라 PyTorch를 먼저 설치하는 것이 권장됩니다. + + + PyTorch 설치 지침 + + +## 명령줄 인터페이스(CLI)로 Ultralytics 사용하기 + +Ultralytics 명령줄 인터페이스(CLI)는 Python 환경이 필요 없이 단일 라인 명령어를 통해 작업을 쉽게 실행할 수 있도록 합니다. CLI는 커스터마이징이나 Python 코드가 필요 없습니다. `yolo` 명령어를 이용해 터미널에서 모든 작업을 실행할 수 있습니다. 명령줄에서 YOLOv8을 사용하는 방법에 대해 더 알아보려면 [CLI 가이드](/../usage/cli.md)를 참고하세요. + +!!! Example "예제" + + === "문법" + + Ultralytics `yolo` 명령어는 다음과 같은 문법을 사용합니다: + ```bash + yolo TASK MODE ARGS + + 여기서 TASK (선택적)은 [detect, segment, classify] 중 하나 + MODE (필수)는 [train, val, predict, export, track] 중 하나 + ARGS (선택적)은 'imgsz=320'과 같이 기본값을 재정의하는 'arg=value' 쌍을 아무 개수나 지정할 수 있습니다. + ``` + 모든 ARGS는 전체 [구성 가이드](/../usage/cfg.md)에서 또는 `yolo cfg`로 확인할 수 있습니다 + + === "Train" + + 10 에포크 동안 초기 학습률 0.01로 감지 모델을 훈련합니다. + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "Predict" + + 이전 훈련된 세분화 모델을 사용하여 이미지 크기 320으로 YouTube 동영상을 예측합니다: + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "Val" + + 배치 크기 1와 이미지 크기 640으로 이전 훈련된 감지 모델을 검증합니다: + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "Export" + + YOLOv8n 분류 모델을 ONNX 형식으로 내보냅니다. 이미지 크기는 224x128입니다 (TASK 필요 없음). + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "특별" + + 버전 확인, 설정 보기, 검사 실행 등을 위한 특별 명령어를 실행하세요: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "주의" + + 모든 인수는 `arg=val`쌍으로 전달되어야 하며, 각 쌍 사이에는 공백으로 구분해야 합니다. 인수 접두사로 `--`를 사용하거나 인수 사이에 쉼표 `,`를 사용해서는 안 됩니다. + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25`   ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25`   ❌ + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25`   ❌ + +[CLI 가이드](/../usage/cli.md){ .md-button } diff --git a/docs/ko/tasks/classify.md b/docs/ko/tasks/classify.md new file mode 100644 index 0000000..74e43bd --- /dev/null +++ b/docs/ko/tasks/classify.md @@ -0,0 +1,172 @@ +--- +comments: true +description: YOLOv8 분류 모델에 대한 이미지 분류 정보를 알아보세요. 사전 훈련된 모델 목록과 모델 학습, 검증, 예측, 내보내기 방법에 대한 자세한 정보를 확인하실 수 있습니다. +keywords: Ultralytics, YOLOv8, 이미지 분류, 사전 훈련된 모델, YOLOv8n-cls, 학습, 검증, 예측, 모델 내보내기 +--- + +# 이미지 분류 + +Image classification examples + +이미지 분류는 가장 단순한 세 가지 작업 중 하나로, 전체 이미지를 미리 정의된 클래스 집합 중 하나로 분류하는 작업입니다. + +이미지 분류기의 출력은 단일 클래스 라벨과 신뢰도 점수입니다. 이미지 분류는 클래스의 이미지만 알고 싶고 해당 클래스의 객체가 어디에 위치하고 있는지 또는 그 정확한 형태가 무엇인지 알 필요가 없을 때 유용합니다. + +!!! Tip "팁" + + YOLOv8 분류 모델은 `-cls` 접미사를 사용합니다. 예: `yolov8n-cls.pt`이며, [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml)에서 사전 훈련되었습니다. + +## [모델](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +여기에는 사전 훈련된 YOLOv8 분류 모델이 표시됩니다. Detect, Segment 및 Pose 모델은 [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) 데이터셋에서 사전 훈련되고, 분류 모델은 [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) 데이터셋에서 사전 훈련됩니다. + +[모델](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)은 첫 사용 시 최신 Ultralytics [릴리스](https://github.com/ultralytics/assets/releases)에서 자동으로 다운로드됩니다. + +| 모델 | 크기
(픽셀) | 정확도
top1 | 정확도
top5 | 속도
CPU ONNX
(ms) | 속도
A100 TensorRT
(ms) | 매개변수
(M) | FLOPs
(B) at 640 | +|----------------------------------------------------------------------------------------------|-----------------|------------------|------------------|-----------------------------|----------------------------------|------------------|--------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- **정확도** 값은 [ImageNet](https://www.image-net.org/) 데이터셋 검증 세트에서의 모델 정확도입니다. +
[ImageNet](https://www.image-net.org/)에서 재현 가능합니다: `yolo val classify data=path/to/ImageNet device=0` +- **속도**는 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 인스턴스를 사용해 ImageNet 검증 이미지들의 평균 속도입니다. +
[ImageNet](https://www.image-net.org/)에서 재현 가능합니다: `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` + +## 학습 + +YOLOv8n-cls 모델을 MNIST160 데이터셋에서 100 에포크 동안 학습시키고 이미지 크기는 64로 설정합니다. 가능한 모든 인자는 [설정](/../usage/cfg.md) 페이지에서 확인할 수 있습니다. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 불러오기 + model = YOLO('yolov8n-cls.yaml') # YAML에서 새 모델 구축 + model = YOLO('yolov8n-cls.pt') # 사전 훈련된 모델 불러오기 (학습용 추천) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # YAML로 구축하고 가중치 전송 + + # 모델 학습 + result = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # YAML에서 새 모델을 구축하고 처음부터 학습 시작 + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # 사전 훈련된 *.pt 모델에서 학습 시작 + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # YAML에서 새 모델을 구축하고 사전 훈련된 가중치를 전송한 뒤 학습 시작 + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### 데이터셋 형식 + +YOLO 분류 데이터셋 형식은 [데이터셋 가이드](../../../datasets/classify/index.md)에서 자세히 확인할 수 있습니다. + +## 검증 + +학습된 YOLOv8n-cls 모델의 정확도를 MNIST160 데이터셋에서 검증합니다. `model`은 모델 속성으로 훈련 시 `data` 및 인자를 유지하므로 추가 인자를 전달할 필요가 없습니다. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 불러오기 + model = YOLO('yolov8n-cls.pt') # 공식 모델 불러오기 + model = YOLO('path/to/best.pt') # 사용자 모델 불러오기 + + # 모델 검증 + metrics = model.val() # 추가 인자 불필요, 데이터셋 및 설정 기억함 + metrics.top1 # top1 정확도 + metrics.top5 # top5 정확도 + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # 공식 모델 검증 + yolo classify val model=path/to/best.pt # 사용자 모델 검증 + ``` + +## 예측 + +학습된 YOLOv8n-cls 모델을 사용하여 이미지에 대한 예측을 실행합니다. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 불러오기 + model = YOLO('yolov8n-cls.pt') # 공식 모델 불러오기 + model = YOLO('path/to/best.pt') # 사용자 모델 불러오기 + + # 예측 실행 + results = model('https://ultralytics.com/images/bus.jpg') # 이미지에 대한 예측 실행 + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # 공식 모델로 예측 실행 + yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # 사용자 모델로 예측 실행 + ``` + +자세한 `predict` 모드 정보는 [예측](https://docs.ultralytics.com/modes/predict/) 페이지에서 확인하세요. + +## 내보내기 + +YOLOv8n-cls 모델을 ONNX, CoreML 등과 같은 다른 형식으로 내보냅니다. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 불러오기 + model = YOLO('yolov8n-cls.pt') # 공식 모델 불러오기 + model = YOLO('path/to/best.pt') # 사용자 훈련 모델 불러오기 + + # 모델 내보내기 + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # 공식 모델 내보내기 + yolo export model=path/to/best.pt format=onnx # 사용자 훈련 모델 내보내기 + ``` + +아래 표에 사용 가능한 YOLOv8-cls 내보내기 형식이 나와 있습니다. 내보낸 모델에서 바로 예측하거나 검증할 수 있습니다. 즉, `yolo predict model=yolov8n-cls.onnx`를 사용할 수 있습니다. 내보내기가 완료된 후 모델에 대한 사용 예제들이 표시됩니다. + +| 형식 | `format` 인자 | 모델 | 메타데이터 | 인자 | +|--------------------------------------------------------------------|---------------|-------------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +자세한 `export` 정보는 [내보내기](https://docs.ultralytics.com/modes/export/) 페이지에서 확인하세요. diff --git a/docs/ko/tasks/detect.md b/docs/ko/tasks/detect.md new file mode 100644 index 0000000..ceee1eb --- /dev/null +++ b/docs/ko/tasks/detect.md @@ -0,0 +1,184 @@ +--- +comments: true +description: Ultralytics 공식 YOLOv8 문서입니다. 모델 훈련, 검증, 예측 및 다양한 형식으로 모델 내보내기 방법을 배우십시오. 세부적인 성능 통계를 포함합니다. +keywords: YOLOv8, Ultralytics, 객체 감지, 사전 훈련된 모델, 훈련, 검증, 예측, 모델 내보내기, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# 객체 감지 + +객체 감지 예제 + +객체 감지는 이미지 또는 비디오 스트림 내의 객체의 위치와 클래스를 식별하는 작업입니다. + +객체 감지기의 출력은 이미지 속 객체를 내포하는 경계 상자(bounding box) 세트와 각 상자에 대한 클래스 레이블과 신뢰도 점수를 포함합니다. 장면 내 관심 객체를 식별해야 하지만 객체의 정확한 위치나 정확한 모양을 알 필요가 없을 때 객체 감지가 좋은 선택입니다. + +

+
+ +
+ 시청하기: 사전 훈련된 Ultralytics YOLOv8 모델로 객체 감지하기. +

+ +!!! Tip "팁" + + YOLOv8 Detect 모델들은 기본 YOLOv8 모델이며 예를 들어 `yolov8n.pt` 이 [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) 데이터셋에서 사전 훈련되었습니다. + +## [모델](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +여기서는 YOLOv8 사전 훈련된 Detect 모델을 나타냅니다. Detect, Segment, 및 Pose 모델은 [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) 데이터셋에서, Classify 모델은 [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) 데이터셋에서 사전 훈련되었습니다. + +[모델](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)은 첫 사용 시 Ultralytics의 최신 [릴리즈](https://github.com/ultralytics/assets/releases)에서 자동으로 다운로드됩니다. + +| 모델 | 크기
(픽셀) | mAPval
50-95 | 속도
CPU ONNX
(ms) | 속도
A100 TensorRT
(ms) | 파라미터
(M) | FLOPs
(B) | +|--------------------------------------------------------------------------------------|-----------------|----------------------|-----------------------------|----------------------------------|------------------|-------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- **mAPval** 값은 [COCO val2017](http://cocodataset.org) 데이터셋에서 단일 모델 단일 스케일을 사용한 값입니다. +
[COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) 데이터와 `yolo val detect data=coco.yaml device=0` 명령으로 재현할 수 있습니다. +- **속도**는 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 인스턴스를 사용해 COCO val 이미지들을 평균한 것입니다. +
[COCO128](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco128.yaml) 데이터와 `yolo val detect data=coco128.yaml batch=1 device=0|cpu` 명령으로 재현할 수 있습니다. + +## 훈련 + +COCO128 데이터셋에서 이미지 크기 640으로 YOLOv8n 모델을 100 에포크 동안 훈련합니다. 가능한 모든 인수에 대한 목록은 [설정](/../usage/cfg.md) 페이지에서 확인할 수 있습니다. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 로드하기 + model = YOLO('yolov8n.yaml') # YAML에서 새 모델을 빌드합니다. + model = YOLO('yolov8n.pt') # 사전 훈련된 모델을 로드합니다(훈련을 위해 권장됩니다). + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # YAML에서 빌드하고 가중치를 전달합니다. + + # 모델 훈련하기 + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # YAML에서 새 모델을 빌드하고 처음부터 훈련을 시작합니다. + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # 사전 훈련된 *.pt 모델로부터 훈련을 시작합니다. + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # YAML에서 새 모델을 빌드하고, 사전 훈련된 가중치를 전달한 후 훈련을 시작합니다. + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### 데이터셋 형식 + +YOLO 감지 데이터셋 형식은 [데이터셋 가이드](../../../datasets/detect/index.md)에서 자세히 볼 수 있습니다. 다른 형식(예: COCO 등)의 기존 데이터셋을 YOLO 형식으로 변환하려면 Ultralytics의 [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) 도구를 사용하십시오. + +## 검증 + +COCO128 데이터셋에서 훈련된 YOLOv8n 모델의 정확도를 검증합니다. `model`은 훈련 시의 `data`와 인수를 모델 속성으로 보존하기 때문에 인수를 전달할 필요가 없습니다. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 로드하기 + model = YOLO('yolov8n.pt') # 공식 모델을 로드합니다. + model = YOLO('path/to/best.pt') # 사용자 정의 모델을 로드합니다. + + # 모델 검증하기 + metrics = model.val() # 데이터셋과 설정을 기억하니 인수는 필요 없습니다. + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # 각 카테고리의 map50-95가 포함된 리스트입니다. + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # 공식 모델 검증하기 + yolo detect val model=path/to/best.pt # 사용자 정의 모델 검증하기 + ``` + +## 예측 + +훈련된 YOLOv8n 모델을 사용하여 이미지에 대한 예측을 수행합니다. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 로드하기 + model = YOLO('yolov8n.pt') # 공식 모델을 로드합니다. + model = YOLO('path/to/best.pt') # 사용자 정의 모델을 로드합니다. + + # 모델로 예측하기 + results = model('https://ultralytics.com/images/bus.jpg') # 이미지에 대해 예측합니다. + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # 공식 모델로 예측하기 + yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # 사용자 정의 모델로 예측하기 + ``` + +전체 'predict' 모드 세부 사항은 [Predict](https://docs.ultralytics.com/modes/predict/) 페이지에서 확인하세요. + +## 내보내기 + +YOLOv8n 모델을 ONNX, CoreML 등과 같은 다른 형식으로 내보냅니다. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 로드하기 + model = YOLO('yolov8n.pt') # 공식 모델을 로드합니다. + model = YOLO('path/to/best.pt') # 사용자 정의 모델을 로드합니다. + + # 모델 내보내기 + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # 공식 모델 내보내기 + yolo export model=path/to/best.pt format=onnx # 사용자 정의 모델 내보내기 + ``` + +사용 가능한 YOLOv8 내보내기 형식은 아래 표에 나와 있습니다. 내보내기 완료 후 사용 예시는 모델에 대해 보여줍니다. + +| 형식 | `format` 인수 | 모델 | 메타데이터 | 인수 | +|--------------------------------------------------------------------|---------------|---------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +전체 'export' 세부 사항은 [Export](https://docs.ultralytics.com/modes/export/) 페이지에서 확인하세요. diff --git a/docs/ko/tasks/index.md b/docs/ko/tasks/index.md new file mode 100644 index 0000000..5864a0f --- /dev/null +++ b/docs/ko/tasks/index.md @@ -0,0 +1,55 @@ +--- +comments: true +description: YOLOv8을 사용하여 수행할 수 있는 컴퓨터 비전 작업의 기초인 탐지, 세분화, 분류 및 자세 추정에 대해 알아보세요. AI 프로젝트에서의 그 용도를 이해하세요. +keywords: Ultralytics, YOLOv8, 탐지, 세분화, 분류, 자세 추정, AI 프레임워크, 컴퓨터 비전 작업 +--- + +# Ultralytics YOLOv8 작업 + +
+Ultralytics YOLO 지원 작업 + +YOLOv8는 여러 컴퓨터 비전 **작업**을 지원하는 AI 프레임워크입니다. 이 프레임워크는 [탐지](detect.md), [세분화](segment.md), [분류](classify.md), 그리고 [자세](pose.md) 추정을 수행하는 데 사용될 수 있습니다. 각각의 작업은 서로 다른 목적과 사용 사례를 가지고 있습니다. + +!!! Note "노트" + + 🚧 다국어 문서화 작업이 진행 중에 있으며, 더 나은 문서를 제공하기 위해 노력하고 있습니다. 인내해 주셔서 감사합니다! 🙏 + +

+
+ +
+ 보기: Ultralytics YOLO 작업 탐색: 객체 탐지, 세분화, 추적, 자세 추정. +

+ +## [탐지](detect.md) + +탐지는 YOLOv8이 지원하는 기본 작업입니다. 이미지 또는 비디오 프레임에서 객체를 탐지하고 주변에 경계 상자를 그리는 것을 포함합니다. 탐지된 객체들은 그 특징에 따라 다른 카테고리로 분류됩니다. YOLOv8은 단일 이미지나 비디오 프레임에서 여러 객체를 정확하고 빠르게 탐지할 수 있습니다. + +[탐지 예시](detect.md){ .md-button } + +## [세분화](segment.md) + +세분화는 이미지를 내용에 기반하여 다른 영역으로 나누는 작업입니다. 각 영역은 내용에 따라 레이블이 지정됩니다. 이 작업은 이미지 세분화와 의료 영상과 같은 응용 분야에 유용합니다. YOLOv8는 U-Net 아키텍처의 변형을 사용하여 세분화를 수행합니다. + +[세분화 예시](segment.md){ .md-button } + +## [분류](classify.md) + +분류는 이미지를 다른 카테고리로 분류하는 작업입니다. YOLOv8는 이미지의 내용을 바탕으로 이미지 분류에 사용될 수 있습니다. 이는 EfficientNet 아키텍처의 변형을 사용하여 분류 작업을 수행합니다. + +[분류 예시](classify.md){ .md-button } + +## [자세](pose.md) + +자세/키포인트 탐지는 이미지나 비디오 프레임에서 특정 점들을 탐지하는 작업입니다. 이들 점은 키포인트로 불리며, 움직임 추적이나 자세 추정에 사용됩니다. YOLOv8은 이미지나 비디오 프레임의 키포인트를 정확하고 빠르게 탐지할 수 있습니다. + +[자세 예시](pose.md){ .md-button } + +## 결론 + +YOLOv8은 탐지, 세분화, 분류, 키포인트 탐지 등 다양한 작업을 지원합니다. 각각의 작업은 다른 목적과 사용 사례를 가지고 있습니다. 이러한 작업의 차이점을 이해함으로써, 컴퓨터 비전 응용 프로그램에 적합한 작업을 선택할 수 있습니다. diff --git a/docs/ko/tasks/pose.md b/docs/ko/tasks/pose.md new file mode 100644 index 0000000..ee4944f --- /dev/null +++ b/docs/ko/tasks/pose.md @@ -0,0 +1,185 @@ +--- +comments: true +description: Ultralytics YOLOv8을 사용하여 포즈 추정 작업을 수행하는 방법을 알아보세요. 미리 학습된 모델을 찾고, 학습, 검증, 예측, 내보내기 등을 진행하는 방법을 배울 수 있습니다. +keywords: Ultralytics, YOLO, YOLOv8, 포즈 추정, 키포인트 검출, 객체 검출, 미리 학습된 모델, 기계 학습, 인공 지능 +--- + +# 포즈 추정 + +포즈 추정 예시 + +포즈 추정은 이미지 내 특정 점들의 위치를 식별하는 작업입니다. 이러한 점들은 보통 관절, 표식, 또는 기타 구별 가능한 특징으로 나타나는 키포인트입니다. 키포인트의 위치는 대개 2D `[x, y]` 또는 3D `[x, y, visible]` 좌표의 집합으로 표현됩니다. + +포즈 추정 모델의 출력은 이미지 속 객체 상의 키포인트를 나타내는 점들의 집합과 각 점의 신뢰도 점수를 포함합니다. 포즈 추정은 장면 속 객체의 구체적인 부분을 식별하고, 서로 관련된 위치를 파악해야 할 때 좋은 선택입니다. + +

+
+ +
+ 시청하기: Ultralytics YOLOv8을 이용한 포즈 추정. +

+ +!!! Tip "팁" + + YOLOv8 _pose_ 모델은 `-pose` 접미사가 붙습니다. 예: `yolov8n-pose.pt`. 이 모델들은 [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) 데이터셋으로 학습되었으며 포즈 추정 작업에 적합합니다. + +## [모델](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +여기에 표시된 YOLOv8 미리 학습된 포즈 모델을 확인하세요. Detect, Segment 및 Pose 모델은 [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) 데이터셋으로 미리 학습되며, Classify 모델은 [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) 데이터셋으로 미리 학습됩니다. + +[모델](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)은 첫 사용 시 Ultralytics [릴리스](https://github.com/ultralytics/assets/releases)에서 자동으로 다운로드됩니다. + +| 모델 | 크기
(픽셀) | mAP포즈
50-95 | mAP포즈
50 | 속도
CPU ONNX
(ms) | 속도
A100 TensorRT
(ms) | 파라미터
(M) | FLOPs
(B) | +|------------------------------------------------------------------------------------------------------|-----------------|---------------------|------------------|-----------------------------|----------------------------------|------------------|-------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +- **mAPval** 값은 [COCO Keypoints val2017](http://cocodataset.org) 데이터셋에서 단일 모델 단일 규모를 기준으로 합니다. +
재현하려면 `yolo val pose data=coco-pose.yaml device=0`을 사용하세요. +- **속도**는 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 인스턴스를 사용하여 COCO val 이미지 평균입니다. +
재현하려면 `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu`를 사용하세요. + +## 학습 + +COCO128-pose 데이터셋에서 YOLOv8-pose 모델 학습하기. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 불러오기 + model = YOLO('yolov8n-pose.yaml') # YAML에서 새로운 모델 구축 + model = YOLO('yolov8n-pose.pt') # 사전 학습된 모델 불러오기 (학습에 추천) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # YAML에서 구축하고 가중치 전달 + + # 모델 학습 + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # YAML에서 새로운 모델 구축하고 처음부터 학습 시작 + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # 사전 학습된 *.pt 모델로부터 학습 시작 + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # YAML에서 새로운 모델 구축하고 사전 학습된 가중치를 전달하여 학습 시작 + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### 데이터셋 형식 + +YOLO 포즈 데이터셋 형식에 대한 자세한 내용은 [데이터셋 가이드](../../../datasets/pose/index.md)에서 찾아볼 수 있습니다. 기존 데이터셋을 다른 형식(예: COCO 등)에서 YOLO 형식으로 변환하려면 Ultralytics의 [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) 도구를 사용하세요. + +## 검증 + +학습된 YOLOv8n-pose 모델의 정확도를 COCO128-pose 데이터셋에서 검증하기. 모델은 학습 `data` 및 인수를 모델 속성으로 유지하기 때문에 인수를 전달할 필요가 없습니다. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 불러오기 + model = YOLO('yolov8n-pose.pt') # 공식 모델 불러오기 + model = YOLO('path/to/best.pt') # 사용자 모델 불러오기 + + # 모델 검증 + metrics = model.val() # 데이터셋 및 설정을 기억하므로 인수 필요 없음 + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # 각 범주의 map50-95를 포함하는 리스트 + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # 공식 모델 검증 + yolo pose val model=path/to/best.pt # 사용자 모델 검증 + ``` + +## 예측 + +학습된 YOLOv8n-pose 모델을 사용하여 이미지에 대한 예측 수행하기. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 불러오기 + model = YOLO('yolov8n-pose.pt') # 공식 모델 불러오기 + model = YOLO('path/to/best.pt') # 사용자 모델 불러오기 + + # 모델로 예측하기 + results = model('https://ultralytics.com/images/bus.jpg') # 이미지에서 예측 + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # 공식 모델로 예측 + yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # 사용자 모델로 예측 + ``` + +`predict` 모드의 전체 세부 정보는 [예측](https://docs.ultralytics.com/modes/predict/) 페이지에서 확인하세요. + +## 내보내기 + +YOLOv8n 포즈 모델을 ONNX, CoreML 등 다른 형식으로 내보내기. + +!!! Example "예제" + + === "Python" + + ```python + from ultralytics import YOLO + + # 모델 불러오기 + model = YOLO('yolov8n-pose.pt') # 공식 모델 불러오기 + model = YOLO('path/to/best.pt') # 사용자 학습 모델 불러오기 + + # 모델 내보내기 + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # 공식 모델 내보내기 + yolo export model=path/to/best.pt format=onnx # 사용자 학습 모델 내보내기 + ``` + +YOLOv8-pose 내보내기 가능한 형식은 아래 표에 나열되어 있습니다. 내보낸 모델에서 직접 예측 또는 검증이 가능합니다, 예: `yolo predict model=yolov8n-pose.onnx`. 내보내기가 완료된 후 모델 사용 예제가 표시됩니다. + +| 형식 | `format` 인수 | 모델 | 메타데이터 | 인수 | +|--------------------------------------------------------------------|---------------|--------------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half` | + +`export`의 전체 세부 정보는 [내보내기](https://docs.ultralytics.com/modes/export/) 페이지에서 확인하세요. diff --git a/docs/ko/tasks/segment.md b/docs/ko/tasks/segment.md new file mode 100644 index 0000000..82c36b0 --- /dev/null +++ b/docs/ko/tasks/segment.md @@ -0,0 +1,188 @@ +--- +comments: true +description: Ultralytics YOLO를 이용한 인스턴스 세그멘테이션 모델 사용법 배우기. 훈련, 검증, 이미지 예측 및 모델 수출에 대한 지침. +keywords: yolov8, 인스턴스 세그멘테이션, Ultralytics, COCO 데이터셋, 이미지 세그멘테이션, 객체 탐지, 모델 훈련, 모델 검증, 이미지 예측, 모델 수출 +--- + +# 인스턴스 세그멘테이션 + +인스턴스 세그멘테이션 예시 + +인스턴스 세그멘테이션은 객체 탐지를 한 단계 더 발전시켜 이미지에서 각각의 개별 객체를 식별하고 이미지의 나머지 부분에서 분리하는 기술입니다. + +인스턴스 세그멘테이션 모델의 출력은 이미지의 각 객체를 윤곽하는 마스크나 윤곽 선뿐만 아니라 각 객체에 대한 클래스 레이블과 신뢰도 점수로 구성됩니다. 객체들이 이미지 안에서 어디에 있는지 뿐만 아니라 그들의 정확한 형태가 무엇인지 알아야 할 때 인스턴스 세그멘테이션이 유용합니다. + +

+
+ +
+ 시청하기: Python에서 사전 훈련된 Ultralytics YOLOv8 모델로 세그멘테이션 실행. +

+ +!!! Tip "팁" + + YOLOv8 Segment 모델은 '-seg' 접미사를 사용하며 즉, `yolov8n-seg.pt`와 같이 [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) 데이터셋에 사전 훈련되어 있습니다. + +## [모델](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +여기에는 YOLOv8 사전 훈련 세그먼트 모델들이 나열되어 있습니다. Detect, Segment, Pose 모델들은 [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) 데이터셋에 사전 훈련되어 있으며, Classify 모델들은 [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) 데이터셋에 사전 훈련되어 있습니다. + +[모델](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)은 첫 사용 시 Ultralytics의 최신 [릴리스](https://github.com/ultralytics/assets/releases)에서 자동으로 다운로드 됩니다. + +| 모델 | 크기
(픽셀) | mAP박스
50-95 | mAP마스크
50-95 | 속도
CPU ONNX
(밀리초) | 속도
A100 TensorRT
(밀리초) | 매개변수
(M) | FLOPs
(B) | +|----------------------------------------------------------------------------------------------|-----------------|---------------------|----------------------|------------------------------|-----------------------------------|------------------|-------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- **mAPval** 값들은 [COCO val2017](http://cocodataset.org) 데이터셋에서 단일 모델 단일 스케일로 얻은 값입니다. +
복제는 `yolo val segment data=coco.yaml device=0` 명령어로 실행할 수 있습니다. +- **속도**는 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 인스턴스를 이용하여 COCO 검증 이미지로 평균 내었습니다. +
복제는 `yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu` 명령어로 실행할 수 있습니다. + +## 훈련 + +COCO128-seg 데이터셋에서 이미지 크기 640으로 YOLOv8n-seg을 100 에포크 동안 훈련합니다. 가능한 모든 인자 목록은 [설정](/../usage/cfg.md) 페이지에서 확인할 수 있습니다. + +!!! Example "예제" + + === "파이썬" + + ```python + from ultralytics import YOLO + + # 모델을 불러옵니다 + model = YOLO('yolov8n-seg.yaml') # YAML에서 새로운 모델을 구성 + model = YOLO('yolov8n-seg.pt') # 사전 훈련된 모델을 불러옴 (훈련에 추천) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # YAML에서 구성하고 가중치를 전달 + + # 모델을 훈련시킵니다 + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # YAML에서 새로운 모델을 구성하고 처음부터 훈련을 시작합니다 + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # 사전 훈련된 *.pt 모델로 부터 훈련을 시작합니다 + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # YAML에서 새로운 모델을 구성하고 사전 훈련된 가중치를 전달한 뒤 훈련을 시작합니다 + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### 데이터셋 형식 + +YOLO 세그멘테이션 데이터셋 형식은 [데이터셋 가이드](../../../datasets/segment/index.md)에서 자세히 확인할 수 있습니다. 기존 데이터셋 (COCO 등)을 YOLO 형식으로 변환하려면 Ultralytics의 [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) 도구를 이용하세요. + +## 검증 + +COCO128-seg 데이터셋에서 훈련된 YOLOv8n-seg 모델의 정확도를 검증합니다. 모델은 훈련할 때의 `data`와 인자를 모델 속성으로 기억하기 때문에 별도의 인자를 전달할 필요가 없습니다. + +!!! Example "예제" + + === "파이썬" + + ```python + from ultralytics import YOLO + + # 모델을 불러옵니다 + model = YOLO('yolov8n-seg.pt') # 공식 모델을 불러옴 + model = YOLO('path/to/best.pt') # 커스텀 모델을 불러옴 + + # 모델을 검증합니다 + metrics = model.val() # 데이터셋과 설정이 기억되어 있어 인자가 필요 없습니다 + metrics.box.map # map50-95(B) + metrics.box.map50 # map50(B) + metrics.box.map75 # map75(B) + metrics.box.maps # 각 카테고리별 map50-95(B) 리스트 + metrics.seg.map # map50-95(M) + metrics.seg.map50 # map50(M) + metrics.seg.map75 # map75(M) + metrics.seg.maps # 각 카테고리별 map50-95(M) 리스트 + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # 공식 모델로 검증 + yolo segment val model=path/to/best.pt # 커스텀 모델로 검증 + ``` + +## 예측 + +훈련된 YOLOv8n-seg 모델을 사용하여 이미지에 대한 예측을 실행합니다. + +!!! Example "예제" + + === "파이썬" + + ```python + from ultralytics import YOLO + + # 모델을 불러옵니다 + model = YOLO('yolov8n-seg.pt') # 공식 모델을 불러옴 + model = YOLO('path/to/best.pt') # 커스텀 모델을 불러옴 + + # 모델로 예측을 진행합니다 + results = model('https://ultralytics.com/images/bus.jpg') # 이미지에 대한 예측 + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # 공식 모델로 예측 실행 + yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # 커스텀 모델로 예측 실행 + ``` + +`predict` 모드의 전체 세부 사항은 [예측](https://docs.ultralytics.com/modes/predict/) 페이지에서 확인할 수 있습니다. + +## 수출 + +ONNX, CoreML 등과 같은 다른 형식으로 YOLOv8n-seg 모델을 수출합니다. + +!!! Example "예제" + + === "파이썬" + + ```python + from ultralytics import YOLO + + # 모델을 불러옵니다 + model = YOLO('yolov8n-seg.pt') # 공식 모델을 불러옴 + model = YOLO('path/to/best.pt') # 커스텀 훈련 모델을 불러옴 + + # 모델을 수출합니다 + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # 공식 모델을 수출합니다 + yolo export model=path/to/best.pt format=onnx # 커스텀 훈련 모델을 수출합니다 + ``` + +아래 표에 나열된 것은 가능한 YOLOv8-seg 수출 형식입니다. 수출 완료 후 모델 사용 예는 모델을 직접 예측하거나 검증할 때 사용할 수 있습니다. + +| 형식 | `format` 인자 | 모델 | 메타데이터 | 인자 | +|--------------------------------------------------------------------|---------------|-------------------------------|-------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half` | + +`export`의 전체 세부 사항은 [수출](https://docs.ultralytics.com/modes/export/) 페이지에서 확인할 수 있습니다. diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml new file mode 100644 index 0000000..765b9d7 --- /dev/null +++ b/docs/mkdocs.yml @@ -0,0 +1,566 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: Ultralytics YOLOv8 Docs +site_description: Explore Ultralytics YOLOv8, a cutting-edge real-time object detection and image segmentation model for various applications and hardware platforms. +site_url: https://docs.ultralytics.com +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/en/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'en/' # where to find the markdown files +site_dir: '../site/' # where to publish to + +theme: + name: material + language: en + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - Home: + - Home: index.md + - Quickstart: quickstart.md + - Modes: + - modes/index.md + - Train: modes/train.md + - Val: modes/val.md + - Predict: modes/predict.md + - Export: modes/export.md + - Track: modes/track.md + - Benchmark: modes/benchmark.md + - Tasks: + - tasks/index.md + - Detect: tasks/detect.md + - Segment: tasks/segment.md + - Classify: tasks/classify.md + - Pose: tasks/pose.md + - Languages: + - 🇬🇧  English: https://docs.ultralytics.com/ + - 🇨🇳  简体中文: https://docs.ultralytics.com/zh/ + - 🇰🇷  한국어: https://docs.ultralytics.com/ko/ + - 🇯🇵  日本語: https://docs.ultralytics.com/ja/ + - 🇷🇺  Русский: https://docs.ultralytics.com/ru/ + - 🇩🇪  Deutsch: https://docs.ultralytics.com/de/ + - 🇫🇷  Français: https://docs.ultralytics.com/fr/ + - 🇪🇸  Español: https://docs.ultralytics.com/es/ + - 🇵🇹  Português: https://docs.ultralytics.com/pt/ + - 🇮🇳  हिन्दी: https://docs.ultralytics.com/hi/ + - 🇸🇦  العربية: https://docs.ultralytics.com/ar/ + - Quickstart: quickstart.md + - Modes: + - modes/index.md + - Train: modes/train.md + - Val: modes/val.md + - Predict: modes/predict.md + - Export: modes/export.md + - Track: modes/track.md + - Benchmark: modes/benchmark.md + - Tasks: + - tasks/index.md + - Detect: tasks/detect.md + - Segment: tasks/segment.md + - Classify: tasks/classify.md + - Pose: tasks/pose.md + - Models: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - Datasets: + - datasets/index.md + - Detection: + - datasets/detect/index.md + - Argoverse: datasets/detect/argoverse.md + - COCO: datasets/detect/coco.md + - COCO8: datasets/detect/coco8.md + - GlobalWheat2020: datasets/detect/globalwheat2020.md + - Objects365: datasets/detect/objects365.md + - OpenImagesV7: datasets/detect/open-images-v7.md + - SKU-110K: datasets/detect/sku-110k.md + - VisDrone: datasets/detect/visdrone.md + - VOC: datasets/detect/voc.md + - xView: datasets/detect/xview.md + - Segmentation: + - datasets/segment/index.md + - COCO: datasets/segment/coco.md + - COCO8-seg: datasets/segment/coco8-seg.md + - Pose: + - datasets/pose/index.md + - COCO: datasets/pose/coco.md + - COCO8-pose: datasets/pose/coco8-pose.md + - Tiger-pose: datasets/pose/tiger-pose.md + - Classification: + - datasets/classify/index.md + - Caltech 101: datasets/classify/caltech101.md + - Caltech 256: datasets/classify/caltech256.md + - CIFAR-10: datasets/classify/cifar10.md + - CIFAR-100: datasets/classify/cifar100.md + - Fashion-MNIST: datasets/classify/fashion-mnist.md + - ImageNet: datasets/classify/imagenet.md + - ImageNet-10: datasets/classify/imagenet10.md + - Imagenette: datasets/classify/imagenette.md + - Imagewoof: datasets/classify/imagewoof.md + - MNIST: datasets/classify/mnist.md + - Oriented Bounding Boxes (OBB): + - datasets/obb/index.md + - DOTAv2: datasets/obb/dota-v2.md + - Multi-Object Tracking: + - datasets/track/index.md + - Guides: + - guides/index.md + - YOLO Common Issues: guides/yolo-common-issues.md + - YOLO Performance Metrics: guides/yolo-performance-metrics.md + - YOLO Thread-Safe Inference: guides/yolo-thread-safe-inference.md + - Model Deployment Options: guides/model-deployment-options.md + - K-Fold Cross Validation: guides/kfold-cross-validation.md + - Hyperparameter Tuning: guides/hyperparameter-tuning.md + - SAHI Tiled Inference: guides/sahi-tiled-inference.md + - AzureML Quickstart: guides/azureml-quickstart.md + - Conda Quickstart: guides/conda-quickstart.md + - Docker Quickstart: guides/docker-quickstart.md + - Raspberry Pi: guides/raspberry-pi.md + - Triton Inference Server: guides/triton-inference-server.md + - Isolating Segmentation Objects: guides/isolating-segmentation-objects.md + - Integrations: + - integrations/index.md + - Comet ML: integrations/comet.md + - OpenVINO: integrations/openvino.md + - Ray Tune: integrations/ray-tune.md + - Roboflow: integrations/roboflow.md + - MLflow: integrations/mlflow.md + - ClearML: integrations/clearml.md + - DVC: integrations/dvc.md + - Usage: + - CLI: usage/cli.md + - Python: usage/python.md + - Callbacks: usage/callbacks.md + - Configuration: usage/cfg.md + - Advanced Customization: usage/engine.md + - YOLOv5: + - yolov5/index.md + - Quickstart: yolov5/quickstart_tutorial.md + - Environments: + - Amazon Web Services (AWS): yolov5/environments/aws_quickstart_tutorial.md + - Google Cloud (GCP): yolov5/environments/google_cloud_quickstart_tutorial.md + - AzureML: yolov5/environments/azureml_quickstart_tutorial.md + - Docker Image: yolov5/environments/docker_image_quickstart_tutorial.md + - Tutorials: + - Train Custom Data: yolov5/tutorials/train_custom_data.md + - Tips for Best Training Results: yolov5/tutorials/tips_for_best_training_results.md + - Multi-GPU Training: yolov5/tutorials/multi_gpu_training.md + - PyTorch Hub: yolov5/tutorials/pytorch_hub_model_loading.md + - TFLite, ONNX, CoreML, TensorRT Export: yolov5/tutorials/model_export.md + - NVIDIA Jetson Nano Deployment: yolov5/tutorials/running_on_jetson_nano.md + - Test-Time Augmentation (TTA): yolov5/tutorials/test_time_augmentation.md + - Model Ensembling: yolov5/tutorials/model_ensembling.md + - Pruning/Sparsity Tutorial: yolov5/tutorials/model_pruning_and_sparsity.md + - Hyperparameter evolution: yolov5/tutorials/hyperparameter_evolution.md + - Transfer learning with frozen layers: yolov5/tutorials/transfer_learning_with_frozen_layers.md + - Architecture Summary: yolov5/tutorials/architecture_description.md + - Roboflow Datasets: yolov5/tutorials/roboflow_datasets_integration.md + - Neural Magic's DeepSparse: yolov5/tutorials/neural_magic_pruning_quantization.md + - Comet Logging: yolov5/tutorials/comet_logging_integration.md + - Clearml Logging: yolov5/tutorials/clearml_logging_integration.md + - HUB: + - hub/index.md + - Quickstart: hub/quickstart.md + - Datasets: hub/datasets.md + - Projects: hub/projects.md + - Models: hub/models.md + - Integrations: hub/integrations.md + - Ultralytics HUB App: + - hub/app/index.md + - 'iOS': hub/app/ios.md + - 'Android': hub/app/android.md + - Inference API: hub/inference_api.md + - Reference: + - cfg: + - __init__: reference/cfg/__init__.md + - data: + - annotator: reference/data/annotator.md + - augment: reference/data/augment.md + - base: reference/data/base.md + - build: reference/data/build.md + - converter: reference/data/converter.md + - dataset: reference/data/dataset.md + - loaders: reference/data/loaders.md + - utils: reference/data/utils.md + - engine: + - exporter: reference/engine/exporter.md + - model: reference/engine/model.md + - predictor: reference/engine/predictor.md + - results: reference/engine/results.md + - trainer: reference/engine/trainer.md + - tuner: reference/engine/tuner.md + - validator: reference/engine/validator.md + - hub: + - __init__: reference/hub/__init__.md + - auth: reference/hub/auth.md + - session: reference/hub/session.md + - utils: reference/hub/utils.md + - models: + - fastsam: + - model: reference/models/fastsam/model.md + - predict: reference/models/fastsam/predict.md + - prompt: reference/models/fastsam/prompt.md + - utils: reference/models/fastsam/utils.md + - val: reference/models/fastsam/val.md + - nas: + - model: reference/models/nas/model.md + - predict: reference/models/nas/predict.md + - val: reference/models/nas/val.md + - rtdetr: + - model: reference/models/rtdetr/model.md + - predict: reference/models/rtdetr/predict.md + - train: reference/models/rtdetr/train.md + - val: reference/models/rtdetr/val.md + - sam: + - amg: reference/models/sam/amg.md + - build: reference/models/sam/build.md + - model: reference/models/sam/model.md + - modules: + - decoders: reference/models/sam/modules/decoders.md + - encoders: reference/models/sam/modules/encoders.md + - sam: reference/models/sam/modules/sam.md + - tiny_encoder: reference/models/sam/modules/tiny_encoder.md + - transformer: reference/models/sam/modules/transformer.md + - predict: reference/models/sam/predict.md + - utils: + - loss: reference/models/utils/loss.md + - ops: reference/models/utils/ops.md + - yolo: + - classify: + - predict: reference/models/yolo/classify/predict.md + - train: reference/models/yolo/classify/train.md + - val: reference/models/yolo/classify/val.md + - detect: + - predict: reference/models/yolo/detect/predict.md + - train: reference/models/yolo/detect/train.md + - val: reference/models/yolo/detect/val.md + - model: reference/models/yolo/model.md + - pose: + - predict: reference/models/yolo/pose/predict.md + - train: reference/models/yolo/pose/train.md + - val: reference/models/yolo/pose/val.md + - segment: + - predict: reference/models/yolo/segment/predict.md + - train: reference/models/yolo/segment/train.md + - val: reference/models/yolo/segment/val.md + - nn: + - autobackend: reference/nn/autobackend.md + - modules: + - block: reference/nn/modules/block.md + - conv: reference/nn/modules/conv.md + - head: reference/nn/modules/head.md + - transformer: reference/nn/modules/transformer.md + - utils: reference/nn/modules/utils.md + - tasks: reference/nn/tasks.md + - trackers: + - basetrack: reference/trackers/basetrack.md + - bot_sort: reference/trackers/bot_sort.md + - byte_tracker: reference/trackers/byte_tracker.md + - track: reference/trackers/track.md + - utils: + - gmc: reference/trackers/utils/gmc.md + - kalman_filter: reference/trackers/utils/kalman_filter.md + - matching: reference/trackers/utils/matching.md + - utils: + - __init__: reference/utils/__init__.md + - autobatch: reference/utils/autobatch.md + - benchmarks: reference/utils/benchmarks.md + - callbacks: + - base: reference/utils/callbacks/base.md + - clearml: reference/utils/callbacks/clearml.md + - comet: reference/utils/callbacks/comet.md + - dvc: reference/utils/callbacks/dvc.md + - hub: reference/utils/callbacks/hub.md + - mlflow: reference/utils/callbacks/mlflow.md + - neptune: reference/utils/callbacks/neptune.md + - raytune: reference/utils/callbacks/raytune.md + - tensorboard: reference/utils/callbacks/tensorboard.md + - wb: reference/utils/callbacks/wb.md + - checks: reference/utils/checks.md + - dist: reference/utils/dist.md + - downloads: reference/utils/downloads.md + - errors: reference/utils/errors.md + - files: reference/utils/files.md + - instance: reference/utils/instance.md + - loss: reference/utils/loss.md + - metrics: reference/utils/metrics.md + - ops: reference/utils/ops.md + - patches: reference/utils/patches.md + - plotting: reference/utils/plotting.md + - tal: reference/utils/tal.md + - torch_utils: reference/utils/torch_utils.md + - triton: reference/utils/triton.md + - tuner: reference/utils/tuner.md + + - Help: + - Help: help/index.md + - Frequently Asked Questions (FAQ): help/FAQ.md + - Contributing Guide: help/contributing.md + - Continuous Integration (CI) Guide: help/CI.md + - Contributor License Agreement (CLA): help/CLA.md + - Minimum Reproducible Example (MRE) Guide: help/minimum_reproducible_example.md + - Code of Conduct: help/code_of_conduct.md + - Environmental, Health and Safety (EHS) Policy: help/environmental-health-safety.md + - Security Policy: help/security.md + - Privacy Policy: help/privacy.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: en + - mkdocstrings: + enabled: true + default_handler: python + handlers: + python: + options: + docstring_style: google + show_root_heading: true + show_source: true + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 + - redirects: + redirect_maps: + callbacks.md: usage/callbacks.md + cfg.md: usage/cfg.md + cli.md: usage/cli.md + config.md: usage/cfg.md + engine.md: usage/engine.md + environments/AWS-Quickstart.md: yolov5/environments/aws_quickstart_tutorial.md + environments/Docker-Quickstart.md: yolov5/environments/docker_image_quickstart_tutorial.md + environments/GCP-Quickstart.md: yolov5/environments/google_cloud_quickstart_tutorial.md + FAQ/augmentation.md: yolov5/tutorials/tips_for_best_training_results.md + package-framework.md: index.md + package-framework/mock_detector.md: index.md + predict.md: modes/predict.md + python.md: usage/python.md + quick-start.md: quickstart.md + app.md: hub/app/index.md + sdk.md: index.md + usage/hyperparameter_tuning.md: integrations/ray-tune.md + reference/base_pred.md: reference/engine/predictor.md + reference/base_trainer.md: reference/engine/trainer.md + reference/exporter.md: reference/engine/exporter.md + reference/model.md: reference/engine/model.md + reference/nn.md: reference/nn/modules/head.md + reference/ops.md: reference/utils/ops.md + reference/results.md: reference/engine/results.md + reference/base_val.md: index.md + tasks/classification.md: tasks/classify.md + tasks/detection.md: tasks/detect.md + tasks/segmentation.md: tasks/segment.md + tasks/keypoints.md: tasks/pose.md + tasks/tracking.md: modes/track.md + SECURITY.md: help/security.md + tutorials/architecture-summary.md: yolov5/tutorials/architecture_description.md + tutorials/clearml-logging.md: yolov5/tutorials/clearml_logging_integration.md + tutorials/comet-logging.md: yolov5/tutorials/comet_logging_integration.md + tutorials/hyperparameter-evolution.md: yolov5/tutorials/hyperparameter_evolution.md + tutorials/model-ensembling.md: yolov5/tutorials/model_ensembling.md + tutorials/multi-gpu-training.md: yolov5/tutorials/multi_gpu_training.md + tutorials/nvidia-jetson.md: yolov5/tutorials/running_on_jetson_nano.md + tutorials/pruning-sparsity.md: yolov5/tutorials/model_pruning_and_sparsity.md + tutorials/pytorch-hub.md: yolov5/tutorials/pytorch_hub_model_loading.md + tutorials/roboflow.md: yolov5/tutorials/roboflow_datasets_integration.md + tutorials/test-time-augmentation.md: yolov5/tutorials/test_time_augmentation.md + tutorials/torchscript-onnx-coreml-export.md: yolov5/tutorials/model_export.md + tutorials/train-custom-datasets.md: yolov5/tutorials/train_custom_data.md + tutorials/training-tips-best-results.md: yolov5/tutorials/tips_for_best_training_results.md + tutorials/transfer-learning-froze-layers.md: yolov5/tutorials/transfer_learning_with_frozen_layers.md + tutorials/weights-and-biasis-logging.md: yolov5/tutorials/comet_logging_integration.md + yolov5/pytorch_hub.md: yolov5/tutorials/pytorch_hub_model_loading.md + yolov5/hyp_evolution.md: yolov5/tutorials/hyperparameter_evolution.md + yolov5/pruning_sparsity.md: yolov5/tutorials/model_pruning_and_sparsity.md + yolov5/roboflow.md: yolov5/tutorials/roboflow_datasets_integration.md + yolov5/comet.md: yolov5/tutorials/comet_logging_integration.md + yolov5/clearml.md: yolov5/tutorials/clearml_logging_integration.md + yolov5/tta.md: yolov5/tutorials/test_time_augmentation.md + yolov5/multi_gpu_training.md: yolov5/tutorials/multi_gpu_training.md + yolov5/ensemble.md: yolov5/tutorials/model_ensembling.md + yolov5/jetson_nano.md: yolov5/tutorials/running_on_jetson_nano.md + yolov5/transfer_learn_frozen.md: yolov5/tutorials/transfer_learning_with_frozen_layers.md + yolov5/neural_magic.md: yolov5/tutorials/neural_magic_pruning_quantization.md + yolov5/train_custom_data.md: yolov5/tutorials/train_custom_data.md + yolov5/architecture.md: yolov5/tutorials/architecture_description.md + yolov5/export.md: yolov5/tutorials/model_export.md + yolov5/yolov5_quickstart_tutorial.md: yolov5/quickstart_tutorial.md + yolov5/tips_for_best_training_results.md: yolov5/tutorials/tips_for_best_training_results.md + yolov5/tutorials/yolov5_neural_magic_tutorial.md: yolov5/tutorials/neural_magic_pruning_quantization.md + yolov5/tutorials/model_ensembling_tutorial.md: yolov5/tutorials/model_ensembling.md + yolov5/tutorials/pytorch_hub_tutorial.md: yolov5/tutorials/pytorch_hub_model_loading.md + yolov5/tutorials/yolov5_architecture_tutorial.md: yolov5/tutorials/architecture_description.md + yolov5/tutorials/multi_gpu_training_tutorial.md: yolov5/tutorials/multi_gpu_training.md + yolov5/tutorials/yolov5_pytorch_hub_tutorial.md: yolov5/tutorials/pytorch_hub_model_loading.md + yolov5/tutorials/model_export_tutorial.md: yolov5/tutorials/model_export.md + yolov5/tutorials/jetson_nano_tutorial.md: yolov5/tutorials/running_on_jetson_nano.md + yolov5/tutorials/yolov5_model_ensembling_tutorial.md: yolov5/tutorials/model_ensembling.md + yolov5/tutorials/roboflow_integration.md: yolov5/tutorials/roboflow_datasets_integration.md + yolov5/tutorials/pruning_and_sparsity_tutorial.md: yolov5/tutorials/model_pruning_and_sparsity.md + yolov5/tutorials/yolov5_transfer_learning_with_frozen_layers_tutorial.md: yolov5/tutorials/transfer_learning_with_frozen_layers.md + yolov5/tutorials/transfer_learning_with_frozen_layers_tutorial.md: yolov5/tutorials/transfer_learning_with_frozen_layers.md + yolov5/tutorials/yolov5_model_export_tutorial.md: yolov5/tutorials/model_export.md + yolov5/tutorials/neural_magic_tutorial.md: yolov5/tutorials/neural_magic_pruning_quantization.md + yolov5/tutorials/yolov5_clearml_integration_tutorial.md: yolov5/tutorials/clearml_logging_integration.md + yolov5/tutorials/yolov5_train_custom_data.md: yolov5/tutorials/train_custom_data.md + yolov5/tutorials/comet_integration_tutorial.md: yolov5/tutorials/comet_logging_integration.md + yolov5/tutorials/yolov5_pruning_and_sparsity_tutorial.md: yolov5/tutorials/model_pruning_and_sparsity.md + yolov5/tutorials/yolov5_jetson_nano_tutorial.md: yolov5/tutorials/running_on_jetson_nano.md + yolov5/tutorials/yolov5_roboflow_integration.md: yolov5/tutorials/roboflow_datasets_integration.md + yolov5/tutorials/hyperparameter_evolution_tutorial.md: yolov5/tutorials/hyperparameter_evolution.md + yolov5/tutorials/yolov5_hyperparameter_evolution_tutorial.md: yolov5/tutorials/hyperparameter_evolution.md + yolov5/tutorials/clearml_integration_tutorial.md: yolov5/tutorials/clearml_logging_integration.md + yolov5/tutorials/test_time_augmentation_tutorial.md: yolov5/tutorials/test_time_augmentation.md + yolov5/tutorials/yolov5_test_time_augmentation_tutorial.md: yolov5/tutorials/test_time_augmentation.md + yolov5/environments/yolov5_amazon_web_services_quickstart_tutorial.md: yolov5/environments/aws_quickstart_tutorial.md + yolov5/environments/yolov5_google_cloud_platform_quickstart_tutorial.md: yolov5/environments/google_cloud_quickstart_tutorial.md + yolov5/environments/yolov5_docker_image_quickstart_tutorial.md: yolov5/environments/docker_image_quickstart_tutorial.md diff --git a/docs/mkdocs_ar.yml b/docs/mkdocs_ar.yml new file mode 100644 index 0000000..966e196 --- /dev/null +++ b/docs/mkdocs_ar.yml @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: وثائق Ultralytics YOLOv8 +site_description: استكشف Ultralytics YOLOv8، نموذج الكشف عن الأجسام وتقطيع الصورة في الوقت الحقيقي المتطور لمختلف التطبيقات والمنصات الأجهزة. +site_url: https://docs.ultralytics.com/ar/ +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/ar/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'ar/' # where to find the markdown files +site_dir: '../site/ar' # where to publish to + +theme: + name: material + language: ar + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - الصفحة الرئيسية: + - الصفحة الرئيسية: index.md + - البدء السريع: quickstart.md + - الأوضاع: + - modes/index.md + - التدريب: modes/train.md + - التحقق: modes/val.md + - التنبؤ: modes/predict.md + - التصدير: modes/export.md + - التتبع: modes/track.md + - المعايير: modes/benchmark.md + - المهام: + - tasks/index.md + - الكشف: tasks/detect.md + - التجزئة: tasks/segment.md + - التصنيف: tasks/classify.md + - الوضعية: tasks/pose.md + - البدء السريع: quickstart.md + - الأوضاع: + - modes/index.md + - التدريب: modes/train.md + - التحقق: modes/val.md + - التنبؤ: modes/predict.md + - التصدير: modes/export.md + - التتبع: modes/track.md + - المعايير: modes/benchmark.md + - المهام: + - tasks/index.md + - الكشف: tasks/detect.md + - التجزئة: tasks/segment.md + - التصنيف: tasks/classify.md + - الوضعية: tasks/pose.md + - النماذج: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - المجموعات البيانية: + - datasets/index.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: ar + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 diff --git a/docs/mkdocs_de.yml b/docs/mkdocs_de.yml new file mode 100644 index 0000000..c63c47d --- /dev/null +++ b/docs/mkdocs_de.yml @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: Ultralytics YOLOv8 Dokumentation +site_description: Entdecken Sie Ultralytics YOLOv8, ein hochmodernes Echtzeit-Objekterkennungs- und Bildsegmentierungsmodell für verschiedene Anwendungen und Hardware-Plattformen. +site_url: https://docs.ultralytics.com/de/ +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/de/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'de/' # where to find the markdown files +site_dir: '../site/de' # where to publish to + +theme: + name: material + language: de + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - Startseite: + - Startseite: index.md + - Schnellstart: quickstart.md + - Modi: + - modes/index.md + - Training: modes/train.md + - Validierung: modes/val.md + - Vorhersage: modes/predict.md + - Exportieren: modes/export.md + - Verfolgen: modes/track.md + - Benchmarking: modes/benchmark.md + - Aufgaben: + - tasks/index.md + - Erkennung: tasks/detect.md + - Segmentierung: tasks/segment.md + - Klassifizierung: tasks/classify.md + - Pose: tasks/pose.md + - Schnellstart: quickstart.md + - Modi: + - modes/index.md + - Training: modes/train.md + - Validierung: modes/val.md + - Vorhersage: modes/predict.md + - Exportieren: modes/export.md + - Verfolgen: modes/track.md + - Benchmarking: modes/benchmark.md + - Aufgaben: + - tasks/index.md + - Erkennung: tasks/detect.md + - Segmentierung: tasks/segment.md + - Klassifizierung: tasks/classify.md + - Pose: tasks/pose.md + - Modelle: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - Datensätze: + - datasets/index.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: de + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 diff --git a/docs/mkdocs_es.yml b/docs/mkdocs_es.yml new file mode 100644 index 0000000..ec29a50 --- /dev/null +++ b/docs/mkdocs_es.yml @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: Documentación Ultralytics YOLOv8 +site_description: Explore Ultralytics YOLOv8, un modelo avanzado de detección de objetos e segmentación de imágenes en tiempo real para diversas aplicaciones y plataformas de hardware. +site_url: https://docs.ultralytics.com/es/ +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/es/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'es/' # where to find the markdown files +site_dir: '../site/es' # where to publish to + +theme: + name: material + language: es + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - Inicio: + - Inicio: index.md + - Inicio rápido: quickstart.md + - Modos: + - modes/index.md + - Entrenamiento: modes/train.md + - Validación: modes/val.md + - Predicción: modes/predict.md + - Exportar: modes/export.md + - Seguimiento: modes/track.md + - Benchmarking: modes/benchmark.md + - Tareas: + - tasks/index.md + - Detección: tasks/detect.md + - Segmentación: tasks/segment.md + - Clasificación: tasks/classify.md + - Pose: tasks/pose.md + - Inicio rápido: quickstart.md + - Modos: + - modes/index.md + - Entrenamiento: modes/train.md + - Validación: modes/val.md + - Predicción: modes/predict.md + - Exportar: modes/export.md + - Seguimiento: modes/track.md + - Benchmarking: modes/benchmark.md + - Tareas: + - tasks/index.md + - Detección: tasks/detect.md + - Segmentación: tasks/segment.md + - Clasificación: tasks/classify.md + - Pose: tasks/pose.md + - Modelos: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - Conjuntos de datos: + - datasets/index.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: es + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 diff --git a/docs/mkdocs_fr.yml b/docs/mkdocs_fr.yml new file mode 100644 index 0000000..37f99d8 --- /dev/null +++ b/docs/mkdocs_fr.yml @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: Documentation Ultralytics YOLOv8 +site_description: Explorez Ultralytics YOLOv8, un modèle de pointe pour la détection d'objets et la segmentation d'image en temps réel, adapté à diverses applications et plateformes matérielles. +site_url: https://docs.ultralytics.com/fr/ +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/fr/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'fr/' # where to find the markdown files +site_dir: '../site/fr' # where to publish to + +theme: + name: material + language: fr + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - Accueil: + - Accueil: index.md + - Démarrage rapide: quickstart.md + - Modes: + - modes/index.md + - Entraînement: modes/train.md + - Validation: modes/val.md + - Prédiction: modes/predict.md + - Exportation: modes/export.md + - Suivi: modes/track.md + - Benchmarking: modes/benchmark.md + - Tâches: + - tasks/index.md + - Détection: tasks/detect.md + - Segmentation: tasks/segment.md + - Classification: tasks/classify.md + - Pose: tasks/pose.md + - Démarrage rapide: quickstart.md + - Modes: + - modes/index.md + - Entraînement: modes/train.md + - Validation: modes/val.md + - Prédiction: modes/predict.md + - Exportation: modes/export.md + - Suivi: modes/track.md + - Benchmarking: modes/benchmark.md + - Tâches: + - tasks/index.md + - Détection: tasks/detect.md + - Segmentation: tasks/segment.md + - Classification: tasks/classify.md + - Pose: tasks/pose.md + - Modèles: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - Jeux de données: + - datasets/index.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: fr + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 diff --git a/docs/mkdocs_hi.yml b/docs/mkdocs_hi.yml new file mode 100644 index 0000000..45314bb --- /dev/null +++ b/docs/mkdocs_hi.yml @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: Ultralytics YOLOv8 दस्तावेज़ +site_description: विभिन्न एप्लिकेशन्स और हार्डवेयर प्लेटफॉर्म्स के लिए Ultralytics YOLOv8 की खोज करें, एक अत्याधुनिक वास्तविक समय वस्तु पहचान और छवि विभाजन मॉडल। +site_url: https://docs.ultralytics.com/hi/ +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/hi/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'hi/' # where to find the markdown files +site_dir: '../site/hi' # where to publish to + +theme: + name: material + language: hi + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - होमपेज: + - होमपेज: index.md + - त्वरित प्रारंभ: quickstart.md + - मोड: + - modes/index.md + - प्रशिक्षण: modes/train.md + - मान्यता: modes/val.md + - भविष्यवाणी: modes/predict.md + - निर्यात: modes/export.md + - ट्रैकिंग: modes/track.md + - बेंचमार्किंग: modes/benchmark.md + - कार्य: + - tasks/index.md + - पहचान: tasks/detect.md + - खंडन: tasks/segment.md + - वर्गीकरण: tasks/classify.md + - मुद्रा: tasks/pose.md + - त्वरित प्रारंभ: quickstart.md + - मोड: + - modes/index.md + - प्रशिक्षण: modes/train.md + - मान्यता: modes/val.md + - भविष्यवाणी: modes/predict.md + - निर्यात: modes/export.md + - ट्रैकिंग: modes/track.md + - बेंचमार्किंग: modes/benchmark.md + - कार्य: + - tasks/index.md + - पहचान: tasks/detect.md + - खंडन: tasks/segment.md + - वर्गीकरण: tasks/classify.md + - मुद्रा: tasks/pose.md + - मॉडल: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - डेटासेट्स: + - datasets/index.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: hi + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 diff --git a/docs/mkdocs_ja.yml b/docs/mkdocs_ja.yml new file mode 100644 index 0000000..53bd906 --- /dev/null +++ b/docs/mkdocs_ja.yml @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: Ultralytics YOLOv8 ドキュメント +site_description: Ultralytics YOLOv8を探求してください。これは、さまざまなアプリケーションおよびハードウェアプラットフォームに適した最先端のリアルタイム物体検出および画像分割モデルです。 +site_url: https://docs.ultralytics.com/ja/ +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/ja/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'ja/' # where to find the markdown files +site_dir: '../site/ja' # where to publish to + +theme: + name: material + language: ja + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - ホーム: + - ホーム: index.md + - クイックスタート: quickstart.md + - モード: + - modes/index.md + - トレーニング: modes/train.md + - 検証: modes/val.md + - 予測: modes/predict.md + - エクスポート: modes/export.md + - トラッキング: modes/track.md + - ベンチマーク: modes/benchmark.md + - タスク: + - tasks/index.md + - 検出: tasks/detect.md + - セグメンテーション: tasks/segment.md + - 分類: tasks/classify.md + - ポーズ: tasks/pose.md + - クイックスタート: quickstart.md + - モード: + - modes/index.md + - トレーニング: modes/train.md + - 検証: modes/val.md + - 予測: modes/predict.md + - エクスポート: modes/export.md + - トラッキング: modes/track.md + - ベンチマーク: modes/benchmark.md + - タスク: + - tasks/index.md + - 検出: tasks/detect.md + - セグメンテーション: tasks/segment.md + - 分類: tasks/classify.md + - ポーズ: tasks/pose.md + - モデル: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - データセット: + - datasets/index.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: ja + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 diff --git a/docs/mkdocs_ko.yml b/docs/mkdocs_ko.yml new file mode 100644 index 0000000..649bbc8 --- /dev/null +++ b/docs/mkdocs_ko.yml @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: Ultralytics YOLOv8 문서 +site_description: Ultralytics YOLOv8을 탐색하세요. 이는 다양한 애플리케이션 및 하드웨어 플랫폼을 위한 최첨단 실시간 객체 감지 및 이미지 분할 모델입니다. +site_url: https://docs.ultralytics.com/ko/ +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/ko/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'ko/' # where to find the markdown files +site_dir: '../site/ko' # where to publish to + +theme: + name: material + language: ko + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - 홈: + - 홈: index.md + - 빠른 시작: quickstart.md + - 모드: + - modes/index.md + - 훈련: modes/train.md + - 검증: modes/val.md + - 예측: modes/predict.md + - 내보내기: modes/export.md + - 추적: modes/track.md + - 벤치마크: modes/benchmark.md + - 작업: + - tasks/index.md + - 감지: tasks/detect.md + - 분할: tasks/segment.md + - 분류: tasks/classify.md + - 포즈: tasks/pose.md + - 빠른 시작: quickstart.md + - 모드: + - modes/index.md + - 훈련: modes/train.md + - 검증: modes/val.md + - 예측: modes/predict.md + - 내보내기: modes/export.md + - 추적: modes/track.md + - 벤치마크: modes/benchmark.md + - 작업: + - tasks/index.md + - 감지: tasks/detect.md + - 분할: tasks/segment.md + - 분류: tasks/classify.md + - 포즈: tasks/pose.md + - 모델: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - 데이터셋: + - datasets/index.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: ko + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 diff --git a/docs/mkdocs_pt.yml b/docs/mkdocs_pt.yml new file mode 100644 index 0000000..724afab --- /dev/null +++ b/docs/mkdocs_pt.yml @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: Documentação Ultralytics YOLOv8 +site_description: Explore o Ultralytics YOLOv8, um modelo avançado de detecção de objetos e segmentação de imagem em tempo real, adequado para várias aplicações e plataformas de hardware. +site_url: https://docs.ultralytics.com/pt/ +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/pt/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'pt/' # where to find the markdown files +site_dir: '../site/pt' # where to publish to + +theme: + name: material + language: pt + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - Página Inicial: + - Página Inicial: index.md + - Início Rápido: quickstart.md + - Modos: + - modes/index.md + - Treinamento: modes/train.md + - Validação: modes/val.md + - Previsão: modes/predict.md + - Exportação: modes/export.md + - Rastreamento: modes/track.md + - Benchmarking: modes/benchmark.md + - Tarefas: + - tasks/index.md + - Detecção: tasks/detect.md + - Segmentação: tasks/segment.md + - Classificação: tasks/classify.md + - Pose: tasks/pose.md + - Início Rápido: quickstart.md + - Modos: + - modes/index.md + - Treinamento: modes/train.md + - Validação: modes/val.md + - Previsão: modes/predict.md + - Exportação: modes/export.md + - Rastreamento: modes/track.md + - Benchmarking: modes/benchmark.md + - Tarefas: + - tasks/index.md + - Detecção: tasks/detect.md + - Segmentação: tasks/segment.md + - Classificação: tasks/classify.md + - Pose: tasks/pose.md + - Modelos: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - Conjuntos de Dados: + - datasets/index.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: pt + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 diff --git a/docs/mkdocs_ru.yml b/docs/mkdocs_ru.yml new file mode 100644 index 0000000..b097c13 --- /dev/null +++ b/docs/mkdocs_ru.yml @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: Документация Ultralytics YOLOv8 +site_description: Изучите Ultralytics YOLOv8, передовую модель для обнаружения объектов и сегментации изображений в реальном времени, подходящую для различных приложений и аппаратных платформ. +site_url: https://docs.ultralytics.com/ru/ +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/ru/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'ru/' # where to find the markdown files +site_dir: '../site/ru' # where to publish to + +theme: + name: material + language: ru + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - Главная: + - Главная: index.md + - Быстрый старт: quickstart.md + - Режимы: + - modes/index.md + - Обучение: modes/train.md + - Валидация: modes/val.md + - Прогнозирование: modes/predict.md + - Экспорт: modes/export.md + - Отслеживание: modes/track.md + - Бенчмаркинг: modes/benchmark.md + - Задачи: + - tasks/index.md + - Обнаружение: tasks/detect.md + - Сегментация: tasks/segment.md + - Классификация: tasks/classify.md + - Поза: tasks/pose.md + - Быстрый старт: quickstart.md + - Режимы: + - modes/index.md + - Обучение: modes/train.md + - Валидация: modes/val.md + - Прогнозирование: modes/predict.md + - Экспорт: modes/export.md + - Отслеживание: modes/track.md + - Бенчмаркинг: modes/benchmark.md + - Задачи: + - tasks/index.md + - Обнаружение: tasks/detect.md + - Сегментация: tasks/segment.md + - Классификация: tasks/classify.md + - Поза: tasks/pose.md + - Модели: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - Данные: + - datasets/index.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: ru + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 diff --git a/docs/mkdocs_zh.yml b/docs/mkdocs_zh.yml new file mode 100644 index 0000000..393d749 --- /dev/null +++ b/docs/mkdocs_zh.yml @@ -0,0 +1,213 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +site_name: Ultralytics YOLOv8 文档 +site_description: 探索Ultralytics YOLOv8,这是一款尖端的实时对象检测和图像分割模型,适用于各种应用和硬件平台。 +site_url: https://docs.ultralytics.com/zh/ +site_author: Ultralytics +repo_url: https://github.com/ultralytics/ultralytics +edit_uri: https://github.com/ultralytics/ultralytics/tree/main/docs/zh/ +repo_name: ultralytics/ultralytics +remote_name: https://github.com/ultralytics/docs +docs_dir: 'zh/' # where to find the markdown files +site_dir: '../site/zh' # where to publish to + +theme: + name: material + language: zh + custom_dir: overrides/ + logo: https://github.com/ultralytics/assets/raw/main/logo/Ultralytics_Logotype_Reverse.svg + favicon: assets/favicon.ico + icon: + repo: fontawesome/brands/github + # font: # disabled for faster page load times + # text: Helvetica + # code: Roboto Mono + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - announce.dismiss + - content.action.edit + - content.code.annotate + - content.code.copy + - content.tooltips + - search.highlight + - search.share + - search.suggest + - toc.follow + - navigation.top + - navigation.tabs + - navigation.tabs.sticky + - navigation.prune + - navigation.footer + - navigation.tracking + - navigation.instant + - navigation.instant.progress + - navigation.indexes + - navigation.sections + - content.tabs.link # all code tabs change simultaneously + +# Customization +copyright: © 2023 Ultralytics Inc. All rights reserved. +extra: + # version: + # provider: mike # version drop-down menu + robots: robots.txt + analytics: + provider: google + property: G-2M5EHKC0BH + alternate: # language drop-down + - name: 🇬🇧 English + link: / + lang: en + - name: 🇨🇳 简体中文 + link: /zh/ + lang: zh + - name: 🇰🇷 한국어 + link: /ko/ + lang: ko + - name: 🇯🇵 日本語 + link: /ja/ + lang: ja + - name: 🇷🇺 Русский + link: /ru/ + lang: ru + - name: 🇩🇪 Deutsch + link: /de/ + lang: de + - name: 🇫🇷 Français + link: /fr/ + lang: fr + - name: 🇪🇸 Español + link: /es/ + lang: es + - name: 🇵🇹 Português + link: /pt/ + lang: pt + - name: 🇮🇳 हिन्दी + link: /hi/ + lang: hi + - name: 🇸🇦 العربية + link: /ar/ + lang: ar + social: + - icon: fontawesome/brands/github + link: https://github.com/ultralytics + - icon: fontawesome/brands/linkedin + link: https://www.linkedin.com/company/ultralytics/ + - icon: fontawesome/brands/twitter + link: https://twitter.com/ultralytics + - icon: fontawesome/brands/youtube + link: https://www.youtube.com/ultralytics + - icon: fontawesome/brands/docker + link: https://hub.docker.com/r/ultralytics/ultralytics/ + - icon: fontawesome/brands/python + link: https://pypi.org/project/ultralytics/ + - icon: fontawesome/brands/discord + link: https://ultralytics.com/discord + +extra_css: + - stylesheets/style.css +extra_javascript: + - javascript/extra.js + +markdown_extensions: + - admonition + - md_in_html + - tables + - attr_list + - def_list + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - pymdownx.details + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.snippets: + base_path: ./ + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + + +# Primary navigation --------------------------------------------------------------------------------------------------- +nav: + - 主页: + - 主页: index.md + - 快速开始: quickstart.md + - 模式: + - modes/index.md + - 训练: modes/train.md + - 验证: modes/val.md + - 预测: modes/predict.md + - 导出: modes/export.md + - 跟踪: modes/track.md + - 基准测试: modes/benchmark.md + - 任务: + - tasks/index.md + - 检测: tasks/detect.md + - 分割: tasks/segment.md + - 分类: tasks/classify.md + - 姿态: tasks/pose.md + - 快速开始: quickstart.md + - 模式: + - modes/index.md + - 训练: modes/train.md + - 验证: modes/val.md + - 预测: modes/predict.md + - 导出: modes/export.md + - 跟踪: modes/track.md + - 基准测试: modes/benchmark.md + - 任务: + - tasks/index.md + - 检测: tasks/detect.md + - 分割: tasks/segment.md + - 分类: tasks/classify.md + - 姿态: tasks/pose.md + - 模型: + - models/index.md + - YOLOv3: models/yolov3.md + - YOLOv4: models/yolov4.md + - YOLOv5: models/yolov5.md + - YOLOv6: models/yolov6.md + - YOLOv7: models/yolov7.md + - YOLOv8: models/yolov8.md + - SAM (Segment Anything Model): models/sam.md + - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md + - FastSAM (Fast Segment Anything Model): models/fast-sam.md + - YOLO-NAS (Neural Architecture Search): models/yolo-nas.md + - RT-DETR (Realtime Detection Transformer): models/rtdetr.md + - 数据集: + - datasets/index.md + +# Plugins including 301 redirects navigation --------------------------------------------------------------------------- +plugins: + - search: + lang: zh + - ultralytics: + add_desc: False + add_image: True + add_share_buttons: True + default_image: https://github.com/ultralytics/ultralytics/assets/26833433/6d09221c-c52a-4234-9a5d-b862e93c6529 diff --git a/docs/overrides/assets/favicon.ico b/docs/overrides/assets/favicon.ico new file mode 100644 index 0000000..7aa5066 Binary files /dev/null and b/docs/overrides/assets/favicon.ico differ diff --git a/docs/overrides/javascript/extra.js b/docs/overrides/javascript/extra.js new file mode 100644 index 0000000..d67fece --- /dev/null +++ b/docs/overrides/javascript/extra.js @@ -0,0 +1,69 @@ +// Function that applies light/dark theme based on the user's preference +const applyAutoTheme = () => { + // Determine the user's preferred color scheme + const prefersLight = window.matchMedia("(prefers-color-scheme: light)").matches; + const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches; + + // Apply the appropriate attributes based on the user's preference + if (prefersLight) { + document.body.setAttribute("data-md-color-scheme", "default"); + document.body.setAttribute("data-md-color-primary", "indigo"); + } else if (prefersDark) { + document.body.setAttribute("data-md-color-scheme", "slate"); + document.body.setAttribute("data-md-color-primary", "black"); + } +}; + +// Function that checks and applies light/dark theme based on the user's preference (if auto theme is enabled) +function checkAutoTheme() { + // Array of supported language codes -> each language has its own palette (stored in local storage) + const supportedLangCodes = ["en", "zh", "ko", "ja", "ru", "de", "fr", "es", "pt"]; + // Get the URL path + const path = window.location.pathname; + // Extract the language code from the URL (assuming it's in the format /xx/...) + const langCode = path.split("/")[1]; + // Check if the extracted language code is in the supported languages + const isValidLangCode = supportedLangCodes.includes(langCode); + // Construct the local storage key based on the language code if valid, otherwise default to the root key + const localStorageKey = isValidLangCode ? `/${langCode}/.__palette` : "/.__palette"; + // Retrieve the palette from local storage using the constructed key + const palette = localStorage.getItem(localStorageKey); + if (palette) { + // Check if the palette's index is 0 (auto theme) + const paletteObj = JSON.parse(palette); + if (paletteObj && paletteObj.index === 0) { + applyAutoTheme(); + } + } +} + +// Run function when the script loads +checkAutoTheme(); + +// Re-run the function when the user's preference changes (when the user changes their system theme) +window.matchMedia("(prefers-color-scheme: light)").addEventListener("change", checkAutoTheme); +window.matchMedia("(prefers-color-scheme: dark)").addEventListener("change", checkAutoTheme); + +// Re-run the function when the palette changes (e.g. user switched from dark theme to auto theme) +// ! We can't use window.addEventListener("storage", checkAutoTheme) because it will NOT be triggered on the current tab +// ! So we have to use the following workaround: +// Get the palette input for auto theme +var autoThemeInput = document.getElementById("__palette_1"); +if (autoThemeInput) { + // Add a click event listener to the input + autoThemeInput.addEventListener("click", function () { + // Check if the auto theme is selected + if (autoThemeInput.checked) { + // Re-run the function after a short delay (to ensure that the palette has been updated) + setTimeout(applyAutoTheme); + } + }); +} + +// Add iframe navigation +window.onhashchange = function() { + window.parent.postMessage({ + type: 'navigation', + hash: window.location.pathname + window.location.search + window.location.hash + }, '*'); +}; diff --git a/docs/overrides/partials/comments.html b/docs/overrides/partials/comments.html new file mode 100644 index 0000000..57050a1 --- /dev/null +++ b/docs/overrides/partials/comments.html @@ -0,0 +1,50 @@ +{% if page.meta.comments %} +

{{ lang.t("meta.comments") }}

+ + + + + + +{% endif %} diff --git a/docs/overrides/partials/source-file.html b/docs/overrides/partials/source-file.html new file mode 100644 index 0000000..84e2ab1 --- /dev/null +++ b/docs/overrides/partials/source-file.html @@ -0,0 +1,26 @@ +{% import "partials/language.html" as lang with context %} + + + +
+
+ + + + {% if page.meta.git_revision_date_localized %} + 📅 {{ lang.t("source.file.date.updated") }}: + {{ page.meta.git_revision_date_localized }} + {% if page.meta.git_creation_date_localized %} +
+ 🎂 {{ lang.t("source.file.date.created") }}: + {{ page.meta.git_creation_date_localized }} + {% endif %} + + + {% elif page.meta.revision_date %} + 📅 {{ lang.t("source.file.date.updated") }}: + {{ page.meta.revision_date }} + {% endif %} +
+
diff --git a/docs/overrides/stylesheets/style.css b/docs/overrides/stylesheets/style.css new file mode 100644 index 0000000..2fa6c54 --- /dev/null +++ b/docs/overrides/stylesheets/style.css @@ -0,0 +1,52 @@ +/* Table format like GitHub ----------------------------------------------------------------------------------------- */ +th, +td { + border: 1px solid var(--md-typeset-table-color); + border-spacing: 0; + border-bottom: none; + border-left: none; + border-top: none; +} + +.md-typeset__table { + line-height: 1; +} + +.md-typeset__table table:not([class]) { + font-size: 0.74rem; + border-right: none; +} + +.md-typeset__table table:not([class]) td, +.md-typeset__table table:not([class]) th { + padding: 9px; +} + +/* light mode alternating table bg colors */ +.md-typeset__table tr:nth-child(2n) { + background-color: #f6f8fa; +} + +/* dark mode alternating table bg colors */ +[data-md-color-scheme="slate"] .md-typeset__table tr:nth-child(2n) { + background-color: #161b22; +} +/* Table format like GitHub ----------------------------------------------------------------------------------------- */ + +/* Code block vertical scroll */ +div.highlight { + max-height: 20rem; + overflow-y: auto; /* for adding a scrollbar when needed */ +} + +/* Set content width */ +.md-grid { + max-width: 1440px; +} + +/* Language dropdown selector (default is shorter 200px in length) */ +/* Not working as activates on hover under icon area +body .md-select__inner { + max-height: 500px !important; +} +*/ diff --git a/docs/pt/index.md b/docs/pt/index.md new file mode 100644 index 0000000..cc87e6e --- /dev/null +++ b/docs/pt/index.md @@ -0,0 +1,82 @@ +--- +comments: true +description: Explore um guia completo do Ultralytics YOLOv8, um modelo de detecção de objetos e segmentação de imagens de alta velocidade e precisão. Tutoriais de instalação, previsão, treinamento e muito mais. +keywords: Ultralytics, YOLOv8, detecção de objetos, segmentação de imagens, aprendizado de máquina, aprendizado profundo, visão computacional, instalação do YOLOv8, previsão do YOLOv8, treinamento do YOLOv8, história do YOLO, licenças do YOLO +--- + +
+

+ + Banner Ultralytics YOLO +

+ GitHub da Ultralytics + space + LinkedIn da Ultralytics + space + Twitter da Ultralytics + space + YouTube da Ultralytics + space + TikTok da Ultralytics + space + Instagram da Ultralytics + space + Discord da Ultralytics +
+
+ Integração Contínua da Ultralytics + Cobertura de Código da Ultralytics + Citação do YOLOv8 + Contagem de Pulls no Docker +
+ Executar no Gradient + Abrir no Colab + Abrir no Kaggle +
+ +Apresentamos o [Ultralytics](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics), a mais recente versão do aclamado modelo de detecção de objetos em tempo real e segmentação de imagens. O YOLOv8 é baseado nos mais recentes avanços do aprendizado profundo e visão computacional, oferecendo um desempenho sem paralelo em termos de velocidade e precisão. Seu design simplificado o torna adequado para várias aplicações e facilmente adaptável a diferentes plataformas de hardware, desde dispositivos de borda até APIs na nuvem. + +Explore os Documentos do YOLOv8, um recurso abrangente projetado para ajudá-lo a entender e utilizar suas características e capacidades. Seja você um praticante experiente de aprendizado de máquina ou novo no campo, este hub tem como objetivo maximizar o potencial do YOLOv8 em seus projetos + +!!! Note "Nota" + + 🚧 Nossa documentação em vários idiomas está atualmente em construção e estamos trabalhando arduamente para aprimorá-la. Agradecemos sua paciência! 🙏 + +## Por Onde Começar + +- **Instalar** `ultralytics` com pip e começar a funcionar em minutos   [:material-clock-fast: Começar](quickstart.md){ .md-button } +- **Prever** novas imagens e vídeos com o YOLOv8   [:octicons-image-16: Prever em Imagens](modes/predict.md){ .md-button } +- **Treinar** um novo modelo YOLOv8 em seu próprio conjunto de dados personalizado   [:fontawesome-solid-brain: Treinar um Modelo](modes/train.md){ .md-button } +- **Explorar** tarefas do YOLOv8 como segmentar, classificar, estimar pose e rastrear   [:material-magnify-expand: Explorar Tarefas](tasks/index.md){ .md-button } + +

+
+ +
+ Assistir: Como Treinar um Modelo YOLOv8 em Seu Conjunto de Dados Personalizado no Google Colab. +

+ +## YOLO: Uma Breve História + +[YOLO](https://arxiv.org/abs/1506.02640) (You Only Look Once), um popular modelo de detecção de objetos e segmentação de imagens, foi desenvolvido por Joseph Redmon e Ali Farhadi na Universidade de Washington. Lançado em 2015, o YOLO rapidamente ganhou popularidade por sua alta velocidade e precisão. + +- [YOLOv2](https://arxiv.org/abs/1612.08242), lançado em 2016, aprimorou o modelo original incorporando normalização em lote, caixas âncora e aglomerados dimensionais. +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf), lançado em 2018, melhorou ainda mais o desempenho do modelo usando uma rede dorsal mais eficiente, múltiplas âncoras e pooling piramidal espacial. +- [YOLOv4](https://arxiv.org/abs/2004.10934) foi lançado em 2020, introduzindo inovações como a ampliação de dados Mosaic, uma nova cabeça de detecção sem âncoras e uma nova função de perda. +- [YOLOv5](https://github.com/ultralytics/yolov5) melhorou ainda mais o desempenho do modelo e adicionou novos recursos, como otimização de hiperparâmetros, rastreamento integrado de experimentos e exportação automática para formatos de exportação populares. +- [YOLOv6](https://github.com/meituan/YOLOv6) foi disponibilizado em código aberto por [Meituan](https://about.meituan.com/) em 2022 e está em uso em muitos dos robôs autônomos de entrega da empresa. +- [YOLOv7](https://github.com/WongKinYiu/yolov7) adicionou tarefas adicionais, como estimativa de pose no conjunto de dados de keypoints COCO. +- [YOLOv8](https://github.com/ultralytics/ultralytics), a mais recente versão do YOLO pela Ultralytics. Como um modelo de última geração, o YOLOv8 baseia-se no sucesso das versões anteriores, introduzindo novos recursos e melhorias para desempenho, flexibilidade e eficiência aprimorados. O YOLOv8 suporta uma gama completa de tarefas de IA de visão, incluindo [detecção](tasks/detect.md), [segmentação](tasks/segment.md), [estimativa de pose](tasks/pose.md), [rastreamento](modes/track.md) e [classificação](tasks/classify.md). Essa versatilidade permite que os usuários aproveitem as capacidades do YOLOv8 em diversas aplicações e domínios. + +## Licenças YOLO: Como o YOLO da Ultralytics é licenciado? + +A Ultralytics oferece duas opções de licença para acomodar casos de uso diversos: + +- **Licença AGPL-3.0**: Essa licença de código aberto [aprovada pela OSI](https://opensource.org/licenses/) é ideal para estudantes e entusiastas, promovendo colaboração aberta e compartilhamento de conhecimento. Veja o arquivo [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) para mais detalhes. +- **Licença Empresarial**: Projetada para uso comercial, esta licença permite a integração perfeita do software Ultralytics e modelos de IA em bens e serviços comerciais, contornando os requisitos de código aberto da AGPL-3.0. Se o seu cenário envolver a incorporação de nossas soluções em uma oferta comercial, entre em contato através do [Licenciamento da Ultralytics](https://ultralytics.com/license). + +Nossa estratégia de licenciamento é projetada para garantir que qualquer melhoria em nossos projetos de código aberto retorne à comunidade. Mantemos os princípios de código aberto próximos ao nosso coração ❤️, e nossa missão é garantir que nossas contribuições possam ser utilizadas e expandidas de formas que beneficiem todos. diff --git a/docs/pt/models/fast-sam.md b/docs/pt/models/fast-sam.md new file mode 100644 index 0000000..54543ec --- /dev/null +++ b/docs/pt/models/fast-sam.md @@ -0,0 +1,193 @@ +--- +comments: true +description: Explore o FastSAM, uma solução baseada em CNN para segmentação em tempo real de objetos em imagens. Melhor interação do usuário, eficiência computacional e adaptabilidade em tarefas de visão. +keywords: FastSAM, aprendizado de máquina, solução baseada em CNN, segmentação de objetos, solução em tempo real, Ultralytics, tarefas de visão, processamento de imagens, aplicações industriais, interação do usuário +--- + +# Fast Segment Anything Model (FastSAM) + +O Fast Segment Anything Model (FastSAM) é uma solução inovadora baseada em CNN em tempo real para a tarefa de Segmentar Qualquer Coisa. Essa tarefa foi projetada para segmentar qualquer objeto dentro de uma imagem com base em várias possíveis instruções de interação do usuário. O FastSAM reduz significativamente as demandas computacionais, mantendo um desempenho competitivo, tornando-o uma escolha prática para uma variedade de tarefas de visão. + +![Visão geral da arquitetura do Fast Segment Anything Model (FastSAM)](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## Visão Geral + +O FastSAM é projetado para abordar as limitações do [Segment Anything Model (SAM)](sam.md), um modelo Transformer pesado com requisitos substanciais de recursos computacionais. O FastSAM divide a tarefa de segmentar qualquer coisa em duas etapas sequenciais: segmentação de todas as instâncias e seleção guiada por instruções. A primeira etapa usa o [YOLOv8-seg](../tasks/segment.md) para produzir as máscaras de segmentação de todas as instâncias na imagem. Na segunda etapa, ele gera a região de interesse correspondente à instrução. + +## Recursos Principais + +1. **Solução em Tempo Real:** Aproveitando a eficiência computacional das CNNs, o FastSAM fornece uma solução em tempo real para a tarefa de segmentar qualquer coisa, tornando-o valioso para aplicações industriais que exigem resultados rápidos. + +2. **Eficiência e Desempenho:** O FastSAM oferece uma redução significativa nas demandas computacionais e de recursos sem comprometer a qualidade do desempenho. Ele alcança um desempenho comparável ao SAM, mas com recursos computacionais drasticamente reduzidos, permitindo aplicações em tempo real. + +3. **Segmentação Guiada por Instruções:** O FastSAM pode segmentar qualquer objeto dentro de uma imagem com base em várias possíveis instruções de interação do usuário, proporcionando flexibilidade e adaptabilidade em diferentes cenários. + +4. **Baseado em YOLOv8-seg:** O FastSAM é baseado no [YOLOv8-seg](../tasks/segment.md), um detector de objetos equipado com um ramo de segmentação de instâncias. Isso permite que ele produza efetivamente as máscaras de segmentação de todas as instâncias em uma imagem. + +5. **Resultados Competitivos em Bancos de Dados de Referência:** Na tarefa de proposta de objetos no MS COCO, o FastSAM alcança pontuações altas em uma velocidade significativamente mais rápida do que o [SAM](sam.md) em uma única NVIDIA RTX 3090, demonstrando sua eficiência e capacidade. + +6. **Aplicações Práticas:** A abordagem proposta fornece uma nova solução prática para um grande número de tarefas de visão em alta velocidade, dezenas ou centenas de vezes mais rápido do que os métodos atuais. + +7. **Viabilidade de Compressão do Modelo:** O FastSAM demonstra a viabilidade de um caminho que pode reduzir significativamente o esforço computacional, introduzindo uma prioridade artificial à estrutura, abrindo assim novas possibilidades para arquiteturas de modelos grandes para tarefas gerais de visão. + +## Modelos Disponíveis, Tarefas Suportadas e Modos de Operação + +Esta tabela apresenta os modelos disponíveis com seus pesos pré-treinados específicos, as tarefas que eles suportam e sua compatibilidade com diferentes modos de operação, como [Inferência](../modes/predict.md), [Validação](../modes/val.md), [Treinamento](../modes/train.md) e [Exportação](../modes/export.md), indicados por emojis ✅ para modos suportados e emojis ❌ para modos não suportados. + +| Tipo de Modelo | Pesos Pré-treinados | Tarefas Suportadas | Inferência | Validação | Treinamento | Exportação | +|----------------|---------------------|--------------------------------------------------|------------|-----------|-------------|------------| +| FastSAM-s | `FastSAM-s.pt` | [Segmentação de Instâncias](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| FastSAM-x | `FastSAM-x.pt` | [Segmentação de Instâncias](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Exemplos de Uso + +Os modelos FastSAM são fáceis de integrar em suas aplicações Python. A Ultralytics fornece uma API Python amigável ao usuário e comandos de linha de comando (CLI) para facilitar o desenvolvimento. + +### Uso de Predição + +Para realizar detecção de objetos em uma imagem, use o método `predict` conforme mostrado abaixo: + +!!! Example "Exemplo" + + === "Python" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # Definir uma fonte de inferência + source = 'caminho/para/onibus.jpg' + + # Criar um modelo FastSAM + model = FastSAM('FastSAM-s.pt') # ou FastSAM-x.pt + + # Executar inferência em uma imagem + everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # Preparar um objeto de Processo de Instruções + prompt_process = FastSAMPrompt(source, everything_results, device='cpu') + + # Instrução: tudo + ann = prompt_process.everything_prompt() + + # Forma padrão (bbox) [0,0,0,0] -> [x1,y1,x2,y2] + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # Instrução: texto + ann = prompt_process.text_prompt(text='uma foto de um cachorro') + + # Instrução: ponto + # pontos padrão [[0,0]] [[x1,y1],[x2,y2]] + # ponto_label padrão [0] [1,0] 0:fundo, 1:frente + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` + + === "CLI" + ```bash + # Carregar um modelo FastSAM e segmentar tudo com ele + yolo segment predict model=FastSAM-s.pt source=caminho/para/onibus.jpg imgsz=640 + ``` + +Este trecho de código demonstra a simplicidade de carregar um modelo pré-treinado e executar uma predição em uma imagem. + +### Uso de Validação + +A validação do modelo em um conjunto de dados pode ser feita da seguinte forma: + +!!! Example "Exemplo" + + === "Python" + ```python + from ultralytics import FastSAM + + # Criar um modelo FastSAM + model = FastSAM('FastSAM-s.pt') # ou FastSAM-x.pt + + # Validar o modelo + results = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # Carregar um modelo FastSAM e validá-lo no conjunto de dados de exemplo COCO8 com tamanho de imagem 640 + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +Observe que o FastSAM suporta apenas detecção e segmentação de uma única classe de objeto. Isso significa que ele reconhecerá e segmentará todos os objetos como a mesma classe. Portanto, ao preparar o conjunto de dados, você precisará converter todos os IDs de categoria de objeto para 0. + +## Uso Oficial do FastSAM + +O FastSAM também está disponível diretamente no repositório [https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM). Aqui está uma visão geral breve das etapas típicas que você pode seguir para usar o FastSAM: + +### Instalação + +1. Clone o repositório do FastSAM: + ```shell + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. Crie e ative um ambiente Conda com Python 3.9: + ```shell + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. Navegue até o repositório clonado e instale os pacotes necessários: + ```shell + cd FastSAM + pip install -r requirements.txt + ``` + +4. Instale o modelo CLIP: + ```shell + pip install git+https://github.com/openai/CLIP.git + ``` + +### Exemplo de Uso + +1. Baixe um [checkpoint do modelo](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing). + +2. Use o FastSAM para inferência. Exemplos de comandos: + + - Segmentar tudo em uma imagem: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - Segmentar objetos específicos usando uma instrução de texto: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "o cachorro amarelo" + ``` + + - Segmentar objetos dentro de uma caixa delimitadora (fornecer coordenadas da caixa no formato xywh): + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - Segmentar objetos próximos a pontos específicos: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +Além disso, você pode experimentar o FastSAM através de um [demo no Colab](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing) ou no [demo web do HuggingFace](https://huggingface.co/spaces/An-619/FastSAM) para ter uma experiência visual. + +## Citações e Reconhecimentos + +Gostaríamos de reconhecer os autores do FastSAM por suas contribuições significativas no campo da segmentação de instâncias em tempo real: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +O artigo original do FastSAM pode ser encontrado no [arXiv](https://arxiv.org/abs/2306.12156). Os autores disponibilizaram seu trabalho publicamente, e o código pode ser acessado no [GitHub](https://github.com/CASIA-IVA-Lab/FastSAM). Agradecemos seus esforços em avançar o campo e tornar seu trabalho acessível à comunidade em geral. diff --git a/docs/pt/models/index.md b/docs/pt/models/index.md new file mode 100644 index 0000000..c7958f3 --- /dev/null +++ b/docs/pt/models/index.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Explore a ampla gama de modelos da família YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS e RT-DETR suportados pela Ultralytics. Comece com exemplos para uso tanto em CLI quanto em Python. +keywords: Ultralytics, documentação, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, modelos, arquiteturas, Python, CLI +--- + +# Modelos Suportados pela Ultralytics + +Bem-vindo à documentação de modelos da Ultralytics! Oferecemos suporte para uma ampla variedade de modelos, cada um adaptado para tarefas específicas como [detecção de objetos](../tasks/detect.md), [segmentação de instâncias](../tasks/segment.md), [classificação de imagens](../tasks/classify.md), [estimativa de pose](../tasks/pose.md), e [rastreamento de múltiplos objetos](../modes/track.md). Se você tem interesse em contribuir com sua arquitetura de modelo para a Ultralytics, confira nosso [Guia de Contribuição](../../help/contributing.md). + +!!! Note "Nota" + + 🚧 Nossa documentação em vários idiomas está atualmente em construção, e estamos trabalhando arduamente para melhorá-la. Agradecemos sua paciência! 🙏 + +## Modelos em Destaque + +Aqui estão alguns dos principais modelos suportados: + +1. **[YOLOv3](yolov3.md)**: A terceira iteração da família de modelos YOLO, originalmente por Joseph Redmon, conhecida por suas capacidades eficientes de detecção de objetos em tempo real. +2. **[YOLOv4](yolov4.md)**: Uma atualização nativa para o darknet do YOLOv3, lançada por Alexey Bochkovskiy em 2020. +3. **[YOLOv5](yolov5.md)**: Uma versão aprimorada da arquitetura YOLO pela Ultralytics, oferecendo melhor desempenho e compensações de velocidade em comparação com as versões anteriores. +4. **[YOLOv6](yolov6.md)**: Lançado pela [Meituan](https://about.meituan.com/) em 2022, e em uso em muitos dos robôs autônomos de entregas da empresa. +5. **[YOLOv7](yolov7.md)**: Modelos YOLO atualizados lançados em 2022 pelos autores do YOLOv4. +6. **[YOLOv8](yolov8.md) NOVO 🚀**: A versão mais recente da família YOLO, apresentando capacidades aprimoradas, como segmentação de instâncias, estimativa de pose/pontos-chave e classificação. +7. **[Segment Anything Model (SAM)](sam.md)**: Modelo Segment Anything (SAM) da Meta. +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**: MobileSAM para aplicações móveis, pela Universidade Kyung Hee. +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)**: FastSAM pelo Grupo de Análise de Imagem e Vídeo, Instituto de Automação, Academia Chinesa de Ciências. +10. **[YOLO-NAS](yolo-nas.md)**: Modelos de Pesquisa de Arquitetura Neural YOLO (NAS). +11. **[Realtime Detection Transformers (RT-DETR)](rtdetr.md)**: Modelos de Transformador de Detecção em Tempo Real (RT-DETR) do PaddlePaddle da Baidu. + +

+
+ +
+ Assista: Execute modelos YOLO da Ultralytics em apenas algumas linhas de código. +

+ +## Introdução: Exemplos de Uso + +Este exemplo oferece exemplos simples de treinamento e inferência com YOLO. Para uma documentação completa sobre estes e outros [modos](../modes/index.md), veja as páginas de documentação de [Previsão](../modes/predict.md), [Treinamento](../modes/train.md), [Validação](../modes/val.md) e [Exportação](../modes/export.md). + +Note que o exemplo abaixo é para modelos YOLOv8 [Detect](../tasks/detect.md) para detecção de objetos. Para tarefas suportadas adicionais, veja as documentações de [Segmentação](../tasks/segment.md), [Classificação](../tasks/classify.md) e [Pose](../tasks/pose.md). + +!!! Example "Exemplo" + + === "Python" + + Modelos `*.pt` pré-treinados em PyTorch, bem como arquivos de configuração `*.yaml`, podem ser passados para as classes `YOLO()`, `SAM()`, `NAS()` e `RTDETR()` para criar uma instância de modelo em Python: + + ```python + from ultralytics import YOLO + + # Carregar um modelo YOLOv8n pré-treinado no COCO + modelo = YOLO('yolov8n.pt') + + # Exibir informações do modelo (opcional) + modelo.info() + + # Treinar o modelo no conjunto de dados de exemplo COCO8 por 100 épocas + resultados = modelo.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Executar inferência com o modelo YOLOv8n na imagem 'bus.jpg' + resultados = modelo('path/to/bus.jpg') + ``` + + === "CLI" + + Comandos CLI estão disponíveis para executar diretamente os modelos: + + ```bash + # Carregar um modelo YOLOv8n pré-treinado no COCO e treiná-lo no conjunto de dados de exemplo COCO8 por 100 épocas + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Carregar um modelo YOLOv8n pré-treinado no COCO e executar inferência na imagem 'bus.jpg' + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## Contribuindo com Novos Modelos + +Interessado em contribuir com seu modelo para a Ultralytics? Ótimo! Estamos sempre abertos a expandir nosso portfólio de modelos. + +1. **Fork do Repositório**: Comece fazendo um fork do [repositório no GitHub da Ultralytics](https://github.com/ultralytics/ultralytics). + +2. **Clone Seu Fork**: Clone seu fork para a sua máquina local e crie uma nova branch para trabalhar. + +3. **Implemente Seu Modelo**: Adicione seu modelo seguindo as normas e diretrizes de codificação fornecidas no nosso [Guia de Contribuição](../../help/contributing.md). + +4. **Teste Cuidadosamente**: Assegure-se de testar seu modelo rigorosamente, tanto isoladamente quanto como parte do pipeline. + +5. **Crie um Pull Request**: Uma vez que estiver satisfeito com seu modelo, crie um pull request para o repositório principal para revisão. + +6. **Revisão de Código & Mesclagem**: Após a revisão, se seu modelo atender aos nossos critérios, ele será integrado ao repositório principal. + +Para etapas detalhadas, consulte nosso [Guia de Contribuição](../../help/contributing.md). diff --git a/docs/pt/models/mobile-sam.md b/docs/pt/models/mobile-sam.md new file mode 100644 index 0000000..e4ffdc5 --- /dev/null +++ b/docs/pt/models/mobile-sam.md @@ -0,0 +1,116 @@ +--- +comments: true +description: Saiba mais sobre o MobileSAM, sua implementação, comparação com o SAM original e como baixá-lo e testá-lo no framework Ultralytics. Melhore suas aplicações móveis hoje. +keywords: MobileSAM, Ultralytics, SAM, aplicações móveis, Arxiv, GPU, API, codificador de imagens, decodificador de máscaras, download do modelo, método de teste +--- + +![Logotipo do MobileSAM](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# Segmentação Móvel de Qualquer Coisa (MobileSAM) + +O artigo do MobileSAM agora está disponível no [arXiv](https://arxiv.org/pdf/2306.14289.pdf). + +Uma demonstração do MobileSAM executando em uma CPU pode ser acessada neste [link de demonstração](https://huggingface.co/spaces/dhkim2810/MobileSAM). O desempenho em um Mac i5 CPU leva aproximadamente 3 segundos. Na demonstração do Hugging Face, a interface e CPUs de menor desempenho contribuem para uma resposta mais lenta, mas ela continua funcionando efetivamente. + +O MobileSAM é implementado em vários projetos, incluindo [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything), [AnyLabeling](https://github.com/vietanhdev/anylabeling) e [Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D). + +O MobileSAM é treinado em uma única GPU com um conjunto de dados de 100 mil imagens (1% das imagens originais) em menos de um dia. O código para esse treinamento será disponibilizado no futuro. + +## Modelos Disponíveis, Tarefas Suportadas e Modos de Operação + +Esta tabela apresenta os modelos disponíveis com seus pesos pré-treinados específicos, as tarefas que eles suportam e sua compatibilidade com diferentes modos de operação, como [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md) e [Export](../modes/export.md), indicados pelos emojis ✅ para os modos suportados e ❌ para os modos não suportados. + +| Tipo de Modelo | Pesos Pré-treinados | Tarefas Suportadas | Inference | Validation | Training | Export | +|----------------|---------------------|--------------------------------------------------|-----------|------------|----------|--------| +| MobileSAM | `mobile_sam.pt` | [Segmentação de Instâncias](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Adaptação de SAM para MobileSAM + +Como o MobileSAM mantém o mesmo pipeline do SAM original, incorporamos o pré-processamento original, pós-processamento e todas as outras interfaces. Consequentemente, aqueles que estão atualmente usando o SAM original podem fazer a transição para o MobileSAM com um esforço mínimo. + +O MobileSAM tem um desempenho comparável ao SAM original e mantém o mesmo pipeline, exceto por uma mudança no codificador de imagens. Especificamente, substituímos o codificador de imagens ViT-H original (632M) por um ViT menor (5M). Em uma única GPU, o MobileSAM opera em cerca de 12 ms por imagem: 8 ms no codificador de imagens e 4 ms no decodificador de máscaras. + +A tabela a seguir fornece uma comparação dos codificadores de imagens baseados em ViT: + +| Codificador de Imagens | SAM Original | MobileSAM | +|------------------------|--------------|-----------| +| Parâmetros | 611M | 5M | +| Velocidade | 452ms | 8ms | + +Tanto o SAM original quanto o MobileSAM utilizam o mesmo decodificador de máscaras baseado em prompt: + +| Decodificador de Máscaras | SAM Original | MobileSAM | +|---------------------------|--------------|-----------| +| Parâmetros | 3,876M | 3,876M | +| Velocidade | 4ms | 4ms | + +Aqui está a comparação de todo o pipeline: + +| Pipeline Completo (Enc+Dec) | SAM Original | MobileSAM | +|-----------------------------|--------------|-----------| +| Parâmetros | 615M | 9,66M | +| Velocidade | 456ms | 12ms | + +O desempenho do MobileSAM e do SAM original é demonstrado usando tanto um ponto quanto uma caixa como prompts. + +![Imagem com Ponto como Prompt](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![Imagem com Caixa como Prompt](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +Com seu desempenho superior, o MobileSAM é aproximadamente 5 vezes menor e 7 vezes mais rápido que o FastSAM atual. Mais detalhes estão disponíveis na [página do projeto MobileSAM](https://github.com/ChaoningZhang/MobileSAM). + +## Testando o MobileSAM no Ultralytics + +Assim como o SAM original, oferecemos um método de teste simples no Ultralytics, incluindo modos para prompts de Ponto e Caixa. + +### Download do Modelo + +Você pode baixar o modelo [aqui](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt). + +### Prompt de Ponto + +!!! Example "Exemplo" + + === "Python" + ```python + from ultralytics import SAM + + # Carregar o modelo + model = SAM('mobile_sam.pt') + + # Prever um segmento com base em um prompt de ponto + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### Prompt de Caixa + +!!! Example "Exemplo" + + === "Python" + ```python + from ultralytics import SAM + + # Carregar o modelo + model = SAM('mobile_sam.pt') + + # Prever um segmento com base em um prompt de caixa + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +Implementamos `MobileSAM` e `SAM` usando a mesma API. Para obter mais informações sobre o uso, consulte a [página do SAM](sam.md). + +## Citações e Agradecimentos + +Se você achar o MobileSAM útil em sua pesquisa ou trabalho de desenvolvimento, considere citar nosso artigo: + +!!! Citar "" + + === "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } diff --git a/docs/pt/models/rtdetr.md b/docs/pt/models/rtdetr.md new file mode 100644 index 0000000..1d8a252 --- /dev/null +++ b/docs/pt/models/rtdetr.md @@ -0,0 +1,93 @@ +--- +comments: true +description: Descubra as características e benefícios do RT-DETR da Baidu, um detector de objetos em tempo real eficiente e adaptável baseado em Vision Transformers, incluindo modelos pré-treinados. +keywords: RT-DETR, Baidu, Vision Transformers, detecção de objetos, desempenho em tempo real, CUDA, TensorRT, seleção de consulta IoU, Ultralytics, API Python, PaddlePaddle +--- + +# RT-DETR da Baidu: Um Detector de Objetos em Tempo Real Baseado em Vision Transformers + +## Visão Geral + +O Real-Time Detection Transformer (RT-DETR), desenvolvido pela Baidu, é um detector de objetos de última geração que proporciona desempenho em tempo real mantendo alta precisão. Ele utiliza a potência dos Vision Transformers (ViT) para processar eficientemente recursos multiescala, separando a interação intra-escala e a fusão entre escalas. O RT-DETR é altamente adaptável, com suporte para ajuste flexível da velocidade de inferência usando diferentes camadas de decodificador sem a necessidade de retratamento. O modelo se destaca em backends acelerados como o CUDA com o TensorRT, superando muitos outros detectores de objetos em tempo real. + +![Exemplo de imagem do modelo](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**Visão geral do RT-DETR da Baidu.** O diagrama da arquitetura do modelo RT-DETR mostra as últimas três etapas da espinha dorsal {S3, S4, S5} como entrada para o codificador. O codificador híbrido eficiente transforma recursos multiescala em uma sequência de recursos de imagem por meio da interação de recursos intra-escala (AIFI) e do módulo de fusão de recursos entre escalas (CCFM). A seleção de consulta, consciente da IoU, é utilizada para selecionar um número fixo de recursos de imagem para servir como consultas de objeto iniciais para o decodificador. Por fim, o decodificador com cabeçotes de previsão auxiliares otimiza iterativamente as consultas de objeto para gerar caixas e pontuações de confiança ([fonte](https://arxiv.org/pdf/2304.08069.pdf)). + +### Características Principais + +- **Codificador Híbrido Eficiente:** O RT-DETR da Baidu utiliza um codificador híbrido eficiente para processar recursos multiescala por meio da separação da interação intra-escala e da fusão entre escalas. Esse design exclusivo baseado em Vision Transformers reduz os custos computacionais e permite a detecção de objetos em tempo real. +- **Seleção de Consulta Consciente de IoU:** O RT-DETR da Baidu melhora a inicialização das consultas de objeto ao utilizar seleção de consulta consciente de IoU. Isso permite que o modelo foque nos objetos mais relevantes na cena, aprimorando a precisão da detecção. +- **Velocidade de Inferência Adaptável:** O RT-DETR da Baidu suporta ajustes flexíveis da velocidade de inferência ao utilizar diferentes camadas de decodificador sem a necessidade de retratamento. Essa adaptabilidade facilita a aplicação prática em diversos cenários de detecção de objetos em tempo real. + +## Modelos Pré-Treinados + +A API Python do Ultralytics fornece modelos pré-treinados do RT-DETR do PaddlePaddle com diferentes escalas: + +- RT-DETR-L: 53,0% de AP em COCO val2017, 114 FPS em GPU T4 +- RT-DETR-X: 54,8% de AP em COCO val2017, 74 FPS em GPU T4 + +## Exemplos de Uso + +Este exemplo fornece exemplos simples de treinamento e inferência com o RT-DETRR. Para obter documentação completa sobre esses e outros [modos](../modes/index.md), consulte as páginas de documentação [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) e [Export](../modes/export.md). + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import RTDETR + + # Carregue um modelo RT-DETR-l pré-treinado no COCO + model = RTDETR('rtdetr-l.pt') + + # Exiba informações do modelo (opcional) + model.info() + + # Treine o modelo com o conjunto de dados de exemplo COCO8 por 100 épocas + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Execute a inferência com o modelo RT-DETR-l na imagem 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # Carregue um modelo RT-DETR-l pré-treinado no COCO e treine-o com o conjunto de dados de exemplo COCO8 por 100 épocas + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # Carregue um modelo RT-DETR-l pré-treinado no COCO e execute a inferência na imagem 'bus.jpg' + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## Tarefas e Modos Suportados + +Esta tabela apresenta os tipos de modelo, os pesos pré-treinados específicos, as tarefas suportadas por cada modelo e os vários modos ([Train](../modes/train.md), [Val](../modes/val.md), [Predict](../modes/predict.md), [Export](../modes/export.md)) que são suportados, indicados por emojis ✅. + +| Tipo de Modelo | Pesos Pré-treinados | Tarefas Suportadas | Inferência | Validação | Treinamento | Exportação | +|----------------------|---------------------|-------------------------------------------|------------|-----------|-------------|------------| +| RT-DETR Grande | `rtdetr-l.pt` | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR Extra-Grande | `rtdetr-x.pt` | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## Citações e Reconhecimentos + +Se você utilizar o RT-DETR da Baidu em seu trabalho de pesquisa ou desenvolvimento, por favor cite o [artigo original](https://arxiv.org/abs/2304.08069): + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Gostaríamos de agradecer à Baidu e à equipe do [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection) por criar e manter esse recurso valioso para a comunidade de visão computacional. Sua contribuição para o campo com o desenvolvimento do detector de objetos em tempo real baseado em Vision Transformers, RT-DETR, é muito apreciada. + +*keywords: RT-DETR, Transformer, ViT, Vision Transformers, RT-DETR da Baidu, PaddlePaddle, modelos pré-treinados PaddlePaddle RT-DETR, uso do RT-DETR da Baidu, API Python do Ultralytics* diff --git a/docs/pt/models/sam.md b/docs/pt/models/sam.md new file mode 100644 index 0000000..408af95 --- /dev/null +++ b/docs/pt/models/sam.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Explore o Modelo de Segmentação de Qualquer Coisa (SAM) de última geração da Ultralytics que permite a segmentação de imagens em tempo real. Aprenda sobre a segmentação baseada em prompts, o desempenho de transferência zero e como utilizá-lo. +keywords: Ultralytics, segmentação de imagem, Modelo de Segmentação de Qualquer Coisa, SAM, conjunto de dados SA-1B, desempenho em tempo real, transferência zero, detecção de objetos, análise de imagens, aprendizado de máquina +--- + +# Modelo de Segmentação de Qualquer Coisa (SAM) + +Bem-vindo à fronteira da segmentação de imagem com o Modelo de Segmentação de Qualquer Coisa, ou SAM. Este modelo revolucionário mudou o jogo ao introduzir a segmentação de imagem baseada em prompts com desempenho em tempo real, estabelecendo novos padrões no campo. + +## Introdução ao SAM: O Modelo de Segmentação de Qualquer Coisa + +O Modelo de Segmentação de Qualquer Coisa, ou SAM, é um modelo de segmentação de imagem de ponta que permite a segmentação baseada em prompts, proporcionando uma versatilidade incomparável em tarefas de análise de imagem. O SAM é o cerne da iniciativa Segment Anything, um projeto inovador que introduz um modelo, tarefa e conjunto de dados novos para a segmentação de imagem. + +O design avançado do SAM permite que ele se adapte a novas distribuições de imagem e tarefas sem conhecimento prévio, um recurso conhecido como transferência zero. Treinado no abrangente [conjunto de dados SA-1B](https://ai.facebook.com/datasets/segment-anything/), que contém mais de 1 bilhão de máscaras espalhadas por 11 milhões de imagens cuidadosamente selecionadas, o SAM tem demonstrado um impressionante desempenho de transferência zero, superando os resultados totalmente supervisionados anteriores em muitos casos. + +![Exemplo de imagem do conjunto de dados](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +Imagens de exemplo com máscaras sobrepostas do nosso conjunto de dados recém-introduzido, SA-1B. O SA-1B contém 11 milhões de imagens diversas, de alta resolução, licenciadas e com proteção de privacidade, e 1,1 bilhão de máscaras de segmentação de alta qualidade. Essas máscaras foram anotadas totalmente automaticamente pelo SAM, e, como verificado por classificações humanas e inúmeros experimentos, são de alta qualidade e diversidade. As imagens são agrupadas pelo número de máscaras por imagem para visualização (em média, há ∼100 máscaras por imagem). + +## Recursos Principais do Modelo de Segmentação de Qualquer Coisa (SAM) + +- **Tarefa de Segmentação Baseada em Prompts:** O SAM foi projetado com uma tarefa de segmentação baseada em prompts em mente, permitindo que ele gere máscaras de segmentação válidas a partir de qualquer prompt fornecido, como dicas espaciais ou textuais que identifiquem um objeto. +- **Arquitetura Avançada:** O Modelo de Segmentação de Qualquer Coisa utiliza um poderoso codificador de imagens, um codificador de prompts e um decodificador de máscaras leve. Essa arquitetura única possibilita o uso flexível de prompts, cálculo de máscaras em tempo real e consciência de ambiguidade em tarefas de segmentação. +- **O Conjunto de Dados SA-1B:** Introduzido pelo projeto Segment Anything, o conjunto de dados SA-1B apresenta mais de 1 bilhão de máscaras em 11 milhões de imagens. Como o maior conjunto de dados de segmentação até o momento, ele fornece ao SAM uma fonte diversificada e em grande escala de dados de treinamento. +- **Desempenho de Transferência Zero:** O SAM apresenta um desempenho de transferência zero excepcional em diversas tarefas de segmentação, tornando-se uma ferramenta pronta para uso em aplicações diversas com necessidade mínima de engenharia de prompts. + +Para obter uma visão mais aprofundada do Modelo de Segmentação de Qualquer Coisa e do conjunto de dados SA-1B, visite o [site do Segment Anything](https://segment-anything.com) e consulte o artigo de pesquisa [Segment Anything](https://arxiv.org/abs/2304.02643). + +## Modelos Disponíveis, Tarefas Suportadas e Modos de Operação + +Esta tabela apresenta os modelos disponíveis com seus pesos pré-treinados específicos, as tarefas suportadas por eles e sua compatibilidade com diferentes modos de operação, como [Inferência](../modes/predict.md), [Validação](../modes/val.md), [Treinamento](../modes/train.md) e [Exportação](../modes/export.md), indicados pelos emojis ✅ para modos suportados e ❌ para modos não suportados. + +| Tipo de Modelo | Pesos Pré-Treinados | Tarefas Suportadas | Inferência | Validação | Treinamento | Exportação | +|----------------|---------------------|--------------------------------------------------|------------|-----------|-------------|------------| +| SAM base | `sam_b.pt` | [Segmentação de Instâncias](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| SAM large | `sam_l.pt` | [Segmentação de Instâncias](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Como Usar o SAM: Versatilidade e Poder na Segmentação de Imagens + +O Modelo de Segmentação de Qualquer Coisa pode ser utilizado para uma variedade de tarefas secundárias que vão além dos dados de treinamento. Isso inclui detecção de bordas, geração de propostas de objeto, segmentação de instâncias e predição preliminar de texto para máscara. Com a engenharia de prompts, o SAM pode se adaptar rapidamente a novas tarefas e distribuições de dados de maneira inovadora, estabelecendo-se como uma ferramenta versátil e poderosa para todas as suas necessidades de segmentação de imagem. + +### Exemplo de predição do SAM + +!!! Example "Segmentar com prompts" + + Segmenta a imagem com prompts fornecidos. + + === "Python" + + ```python + from ultralytics import SAM + + # Carregar o modelo + modelo = SAM('sam_b.pt') + + # Exibir informações do modelo (opcional) + modelo.info() + + # Executar inferência com prompt de bboxes + modelo('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # Executar inferência com prompt de pontos + modelo('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "Segmentar tudo" + + Segmenta toda a imagem. + + === "Python" + + ```python + from ultralytics import SAM + + # Carregar o modelo + modelo = SAM('sam_b.pt') + + # Exibir informações do modelo (opcional) + modelo.info() + + # Executar inferência + modelo('caminho/para/imagem.jpg') + ``` + + === "CLI" + + ```bash + # Executar inferência com um modelo SAM + yolo predict model=sam_b.pt source=caminho/para/imagem.jpg + ``` + +- A lógica aqui é segmentar toda a imagem se nenhum prompt (bboxes/pontos/máscaras) for especificado. + +!!! Example "Exemplo do SAMPredictor" + + Desta forma, você pode definir a imagem uma vez e executar inferência de prompts várias vezes sem executar o codificador de imagem várias vezes. + + === "Inferência com prompt" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # Criar o SAMPredictor + substituições = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(substituições=substituições) + + # Definir imagem + predictor.set_image("ultralytics/assets/zidane.jpg") # definir com arquivo de imagem + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # definir com np.ndarray + results = predictor(bboxes=[439, 437, 524, 709]) + results = predictor(points=[900, 370], labels=[1]) + + # Redefinir imagem + predictor.reset_image() + ``` + + Segmentar tudo com argumentos adicionais. + + === "Segmentar tudo" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # Criar o SAMPredictor + substituições = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(substituições=substituições) + + # Segmentar com argumentos adicionais + results = predictor(source="ultralytics/assets/zidane.jpg", crop_n_layers=1, points_stride=64) + ``` + +- Mais argumentos adicionais para `Segmentar tudo` consulte a [Referência do `Predictor/generate`](../../../reference/models/sam/predict.md). + +## Comparação SAM vs. YOLOv8 + +Aqui, comparamos o menor modelo SAM-b da Meta com o menor modelo de segmentação da Ultralytics, [YOLOv8n-seg](../tasks/segment.md): + +| Modelo | Tamanho | Parâmetros | Velocidade (CPU) | +|-----------------------------------------------|-------------------------------|--------------------------------|--------------------------------------| +| SAM-b da Meta | 358 MB | 94,7 M | 51096 ms/im | +| [MobileSAM](mobile-sam.md) | 40,7 MB | 10,1 M | 46122 ms/im | +| [FastSAM-s](fast-sam.md) com YOLOv8 como base | 23,7 MB | 11,8 M | 115 ms/im | +| YOLOv8n-seg da Ultralytics | **6,7 MB** (53,4 vezes menor) | **3,4 M** (27,9 vezes a menos) | **59 ms/im** (866 vezes mais rápido) | + +Essa comparação mostra as diferenças de ordem de magnitude nos tamanhos e velocidades dos modelos. Enquanto o SAM apresenta capacidades exclusivas para segmentação automática, ele não é um concorrente direto dos modelos de segmentação YOLOv8, que são menores, mais rápidos e mais eficientes. + +Os testes foram executados em um MacBook Apple M2 de 2023 com 16GB de RAM. Para reproduzir este teste: + +!!! Example "Exemplo" + + === "Python" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # Perfil do SAM-b + modelo = SAM('sam_b.pt') + modelo.info() + modelo('ultralytics/assets') + + # Perfil do MobileSAM + modelo = SAM('mobile_sam.pt') + modelo.info() + modelo('ultralytics/assets') + + # Perfil do FastSAM-s + modelo = FastSAM('FastSAM-s.pt') + modelo.info() + modelo('ultralytics/assets') + + # Perfil do YOLOv8n-seg + modelo = YOLO('yolov8n-seg.pt') + modelo.info() + modelo('ultralytics/assets') + ``` + +## Autoanotação: Um Caminho Rápido para Conjuntos de Dados de Segmentação + +A autoanotação é um recurso-chave do SAM que permite aos usuários gerar um [conjunto de dados de segmentação](https://docs.ultralytics.com/datasets/segment) usando um modelo de detecção pré-treinado. Esse recurso permite a anotação rápida e precisa de um grande número de imagens, contornando a necessidade de anotação manual demorada. + +### Gere seu Conjunto de Dados de Segmentação Usando um Modelo de Detecção + +Para fazer a autoanotação do seu conjunto de dados com o framework Ultralytics, use a função `auto_annotate` conforme mostrado abaixo: + +!!! Example "Exemplo" + + === "Python" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="caminho/para/imagens", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| Argumento | Tipo | Descrição | Padrão | +|------------|---------------------|-----------------------------------------------------------------------------------------------------------|--------------| +| data | str | Caminho para uma pasta que contém as imagens a serem anotadas. | | +| det_model | str, opcional | Modelo de detecção YOLO pré-treinado. O padrão é 'yolov8x.pt'. | 'yolov8x.pt' | +| sam_model | str, opcional | Modelo de segmentação SAM pré-treinado. O padrão é 'sam_b.pt'. | 'sam_b.pt' | +| device | str, opcional | Dispositivo no qual executar os modelos. O padrão é uma string vazia (CPU ou GPU, se disponível). | | +| output_dir | str, None, opcional | Diretório para salvar os resultados anotados. O padrão é uma pasta 'labels' no mesmo diretório de 'data'. | None | + +A função `auto_annotate` recebe o caminho para suas imagens, com argumentos opcionais para especificar os modelos de detecção pré-treinados e de segmentação SAM, o dispositivo onde executar os modelos e o diretório de saída para salvar os resultados anotados. + +A autoanotação com modelos pré-treinados pode reduzir drasticamente o tempo e o esforço necessários para criar conjuntos de dados de segmentação de alta qualidade. Esse recurso é especialmente benéfico para pesquisadores e desenvolvedores que lidam com grandes coleções de imagens, pois permite que eles se concentrem no desenvolvimento e na avaliação do modelo, em vez de na anotação manual. + +## Citações e Reconhecimentos + +Se você encontrar o SAM útil em seu trabalho de pesquisa ou desenvolvimento, considere citar nosso artigo: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Gostaríamos de expressar nossa gratidão à Meta AI por criar e manter esse recurso valioso para a comunidade de visão computacional. + +*keywords: Segment Anything, Modelo de Segmentação de Qualquer Coisa, SAM, SAM da Meta, segmentação de imagem, segmentação baseada em prompts, desempenho de transferência zero, conjunto de dados SA-1B, arquitetura avançada, autoanotação, Ultralytics, modelos pré-treinados, SAM base, SAM large, segmentação de instâncias, visão computacional, IA, inteligência artificial, aprendizado de máquina, anotação de dados, máscaras de segmentação, modelo de detecção, modelo de detecção YOLO, bibtex, Meta AI.* diff --git a/docs/pt/models/yolo-nas.md b/docs/pt/models/yolo-nas.md new file mode 100644 index 0000000..2a10705 --- /dev/null +++ b/docs/pt/models/yolo-nas.md @@ -0,0 +1,121 @@ +--- +comments: true +description: Explore a documentação detalhada do YOLO-NAS, um modelo superior de detecção de objetos. Saiba mais sobre suas funcionalidades, modelos pré-treinados, uso com a API do Ultralytics Python e muito mais. +keywords: YOLO-NAS, Deci AI, detecção de objetos, aprendizado profundo, busca de arquitetura neural, API do Ultralytics Python, modelo YOLO, modelos pré-treinados, quantização, otimização, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## Visão Geral + +Desenvolvido pela Deci AI, o YOLO-NAS é um modelo de detecção de objetos inovador. É o produto da tecnologia avançada de Busca de Arquitetura Neural, meticulosamente projetado para superar as limitações dos modelos YOLO anteriores. Com melhorias significativas no suporte à quantização e compromisso entre precisão e latência, o YOLO-NAS representa um grande avanço na detecção de objetos. + +![Exemplo de imagem do modelo](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**Visão geral do YOLO-NAS.** O YOLO-NAS utiliza blocos que suportam quantização e quantização seletiva para obter um desempenho ideal. O modelo, quando convertido para sua versão quantizada INT8, apresenta uma queda mínima na precisão, uma melhoria significativa em relação a outros modelos. Esses avanços culminam em uma arquitetura superior com capacidades de detecção de objetos sem precedentes e desempenho excepcional. + +### Principais Características + +- **Bloco Básico Amigável para Quantização:** O YOLO-NAS introduz um novo bloco básico que é amigo da quantização, abordando uma das limitações significativas dos modelos YOLO anteriores. +- **Treinamento e Quantização Sofisticados:** O YOLO-NAS utiliza esquemas avançados de treinamento e quantização pós-treinamento para melhorar o desempenho. +- **Otimização AutoNAC e Pré-Treinamento:** O YOLO-NAS utiliza a otimização AutoNAC e é pré-treinado em conjuntos de dados proeminentes, como COCO, Objects365 e Roboflow 100. Esse pré-treinamento torna o modelo extremamente adequado para tarefas de detecção de objetos em ambientes de produção. + +## Modelos Pré-Treinados + +Experimente o poder da detecção de objetos de última geração com os modelos pré-treinados do YOLO-NAS fornecidos pela Ultralytics. Esses modelos foram projetados para oferecer um desempenho excelente em termos de velocidade e precisão. Escolha entre várias opções adaptadas às suas necessidades específicas: + +| Modelo | mAP | Latência (ms) | +|------------------|-------|---------------| +| YOLO-NAS S | 47.5 | 3.21 | +| YOLO-NAS M | 51.55 | 5.85 | +| YOLO-NAS L | 52.22 | 7.87 | +| YOLO-NAS S INT-8 | 47.03 | 2.36 | +| YOLO-NAS M INT-8 | 51.0 | 3.78 | +| YOLO-NAS L INT-8 | 52.1 | 4.78 | + +Cada variante do modelo foi projetada para oferecer um equilíbrio entre Precisão Média Média (mAP) e latência, ajudando você a otimizar suas tarefas de detecção de objetos em termos de desempenho e velocidade. + +## Exemplos de Uso + +A Ultralytics tornou os modelos YOLO-NAS fáceis de serem integrados em suas aplicações Python por meio de nosso pacote `ultralytics`. O pacote fornece uma API Python de fácil utilização para simplificar o processo. + +Os seguintes exemplos mostram como usar os modelos YOLO-NAS com o pacote `ultralytics` para inferência e validação: + +### Exemplos de Inferência e Validação + +Neste exemplo, validamos o YOLO-NAS-s no conjunto de dados COCO8. + +!!! Example "Exemplo" + + Este exemplo fornece um código simples de inferência e validação para o YOLO-NAS. Para lidar com os resultados da inferência, consulte o modo [Predict](../modes/predict.md). Para usar o YOLO-NAS com modos adicionais, consulte [Val](../modes/val.md) e [Export](../modes/export.md). O YOLO-NAS no pacote `ultralytics` não suporta treinamento. + + === "Python" + + Arquivos de modelos pré-treinados `*.pt` do PyTorch podem ser passados para a classe `NAS()` para criar uma instância do modelo em Python: + + ```python + from ultralytics import NAS + + # Carrega um modelo YOLO-NAS-s pré-treinado no COCO + model = NAS('yolo_nas_s.pt') + + # Exibe informações do modelo (opcional) + model.info() + + # Valida o modelo no conjunto de dados de exemplo COCO8 + results = model.val(data='coco8.yaml') + + # Executa inferência com o modelo YOLO-NAS-s na imagem 'bus.jpg' + results = model('caminho/para/bus.jpg') + ``` + + === "CLI" + + Comandos de CLI estão disponíveis para executar diretamente os modelos: + + ```bash + # Carrega um modelo YOLO-NAS-s pré-treinado no COCO e valida seu desempenho no conjunto de dados de exemplo COCO8 + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # Carrega um modelo YOLO-NAS-s pré-treinado no COCO e executa inferência na imagem 'bus.jpg' + yolo predict model=yolo_nas_s.pt source=caminho/para/bus.jpg + ``` + +## Tarefas e Modos Compatíveis + +Oferecemos três variantes dos modelos YOLO-NAS: Pequeno (s), Médio (m) e Grande (l). Cada variante foi projetada para atender a diferentes necessidades computacionais e de desempenho: + +- **YOLO-NAS-s**: Otimizado para ambientes com recursos computacionais limitados, mas eficiência é fundamental. +- **YOLO-NAS-m**: Oferece uma abordagem equilibrada, adequada para detecção de objetos em geral com maior precisão. +- **YOLO-NAS-l**: Adaptado para cenários que requerem a maior precisão, onde os recursos computacionais são menos restritos. + +Abaixo está uma visão geral detalhada de cada modelo, incluindo links para seus pesos pré-treinados, as tarefas que eles suportam e sua compatibilidade com diferentes modos de operação. + +| Tipo de Modelo | Pesos Pré-Treinados | Tarefas Suportadas | Inferência | Validação | Treinamento | Exportação | +|----------------|-----------------------------------------------------------------------------------------------|-------------------------------------------|------------|-----------|-------------|------------| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## Citações e Agradecimentos + +Se você utilizar o YOLO-NAS em seus estudos ou trabalho de desenvolvimento, por favor, cite o SuperGradients: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +Expressamos nossa gratidão à equipe [SuperGradients](https://github.com/Deci-AI/super-gradients/) da Deci AI por seus esforços na criação e manutenção deste recurso valioso para a comunidade de visão computacional. Acreditamos que o YOLO-NAS, com sua arquitetura inovadora e capacidades superiores de detecção de objetos, se tornará uma ferramenta fundamental para desenvolvedores e pesquisadores. + +*keywords: YOLO-NAS, Deci AI, detecção de objetos, aprendizado profundo, busca de arquitetura neural, API do Ultralytics Python, modelo YOLO, SuperGradients, modelos pré-treinados, bloco básico amigável para quantização, esquemas avançados de treinamento, quantização pós-treinamento, otimização AutoNAC, COCO, Objects365, Roboflow 100* diff --git a/docs/pt/models/yolov3.md b/docs/pt/models/yolov3.md new file mode 100644 index 0000000..f706d99 --- /dev/null +++ b/docs/pt/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Obtenha uma visão geral do YOLOv3, YOLOv3-Ultralytics e YOLOv3u. Saiba mais sobre suas principais características, uso e tarefas suportadas para detecção de objetos. +keywords: YOLOv3, YOLOv3-Ultralytics, YOLOv3u, Detecção de Objetos, Inferência, Treinamento, Ultralytics +--- + +# YOLOv3, YOLOv3-Ultralytics, e YOLOv3u + +## Visão Geral + +Este documento apresenta uma visão geral de três modelos de detecção de objetos intimamente relacionados, nomeadamente o [YOLOv3](https://pjreddie.com/darknet/yolo/), [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3) e [YOLOv3u](https://github.com/ultralytics/ultralytics). + +1. **YOLOv3:** Esta é a terceira versão do algoritmo de detecção de objetos You Only Look Once (YOLO). Originalmente desenvolvido por Joseph Redmon, o YOLOv3 melhorou seus predecessores ao introduzir recursos como previsões em várias escalas e três tamanhos diferentes de kernels de detecção. + +2. **YOLOv3-Ultralytics:** Esta é a implementação do YOLOv3 pela Ultralytics. Ela reproduz a arquitetura original do YOLOv3 e oferece funcionalidades adicionais, como suporte para mais modelos pré-treinados e opções de personalização mais fáceis. + +3. **YOLOv3u:** Esta é uma versão atualizada do YOLOv3-Ultralytics que incorpora o cabeçalho dividido livre de âncoras e sem "objectness" usado nos modelos YOLOv8. O YOLOv3u mantém a mesma arquitetura de "backbone" e "neck" do YOLOv3, mas com o cabeçalho de detecção atualizado do YOLOv8. + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## Principais Características + +- **YOLOv3:** Introduziu o uso de três escalas diferentes para detecção, aproveitando três tamanhos diferentes de kernels de detecção: 13x13, 26x26 e 52x52. Isso melhorou significativamente a precisão da detecção para objetos de diferentes tamanhos. Além disso, o YOLOv3 adicionou recursos como previsões multi-rótulos para cada caixa delimitadora e uma rede de extração de características melhor. + +- **YOLOv3-Ultralytics:** A implementação do YOLOv3 pela Ultralytics oferece o mesmo desempenho do modelo original, porém possui suporte adicional para mais modelos pré-treinados, métodos de treinamento adicionais e opções de personalização mais fáceis. Isso torna o modelo mais versátil e fácil de usar para aplicações práticas. + +- **YOLOv3u:** Este modelo atualizado incorpora o cabeçalho dividido livre de âncoras e "objectness" do YOLOv8. Ao eliminar a necessidade de caixas de âncoras pré-definidas e pontuações de "objectness", esse design de cabeçalho de detecção pode melhorar a capacidade do modelo de detectar objetos de tamanhos e formatos variados. Isso torna o YOLOv3u mais robusto e preciso para tarefas de detecção de objetos. + +## Tarefas e Modos Suportados + +A série YOLOv3, incluindo YOLOv3, YOLOv3-Ultralytics e YOLOv3u, foi projetada especificamente para tarefas de detecção de objetos. Esses modelos são conhecidos por sua eficácia em vários cenários do mundo real, equilibrando precisão e velocidade. Cada variante oferece recursos e otimizações únicos, tornando-os adequados para uma variedade de aplicações. + +Os três modelos suportam um conjunto abrangente de modos, garantindo versatilidade em várias etapas do desenvolvimento e implantação de modelos. Esses modos incluem [Inferência](../modes/predict.md), [Validação](../modes/val.md), [Treinamento](../modes/train.md) e [Exportação](../modes/export.md), fornecendo aos usuários um conjunto completo de ferramentas para detecção eficaz de objetos. + +| Tipo de Modelo | Tarefas Suportadas | Inferência | Validação | Treinamento | Exportação | +|--------------------|-------------------------------------------|------------|-----------|-------------|------------| +| YOLOv3 | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Esta tabela fornece uma visão rápida das capacidades de cada variante do YOLOv3, destacando sua versatilidade e adequação para várias tarefas e modos operacionais em fluxos de trabalho de detecção de objetos. + +## Exemplos de Uso + +Este exemplo apresenta exemplos simples de treinamento e inferência do YOLOv3. Para obter documentação completa sobre esses e outros [modos](../modes/index.md), consulte as páginas de documentação do [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) e [Export](../modes/export.md). + +!!! Example "Exemplo" + + === "Python" + + Modelos pré-treinados do PyTorch `*.pt`, bem como arquivos de configuração `*.yaml`, podem ser passados para a classe `YOLO()` para criar uma instância do modelo em Python: + + ```python + from ultralytics import YOLO + + # Carregue um modelo YOLOv3n pré-treinado na COCO + model = YOLO('yolov3n.pt') + + # Exiba informações sobre o modelo (opcional) + model.info() + + # Treine o modelo no conjunto de dados de exemplo COCO8 por 100 épocas + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Execute inferência com o modelo YOLOv3n na imagem 'bus.jpg' + results = model('caminho/para/bus.jpg') + ``` + + === "CLI" + + Comandos CLI estão disponíveis para executar diretamente os modelos: + + ```bash + # Carregue um modelo YOLOv3n pré-treinado na COCO e treine-o no conjunto de dados de exemplo COCO8 por 100 épocas + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Carregue um modelo YOLOv3n pré-treinado na COCO e execute inferência na imagem 'bus.jpg' + yolo predict model=yolov3n.pt source=caminho/para/bus.jpg + ``` + +## Citações e Reconhecimentos + +Se você utilizar o YOLOv3 em sua pesquisa, por favor, cite os artigos originais do YOLO e o repositório Ultralytics YOLOv3: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +Agradecemos a Joseph Redmon e Ali Farhadi por desenvolverem o YOLOv3 original. diff --git a/docs/pt/models/yolov4.md b/docs/pt/models/yolov4.md new file mode 100644 index 0000000..c05c196 --- /dev/null +++ b/docs/pt/models/yolov4.md @@ -0,0 +1,71 @@ +--- +comments: true +description: Explore nosso guia detalhado sobre o YOLOv4, um detector de objetos em tempo real de última geração. Entenda seus destaques arquiteturais, recursos inovadores e exemplos de aplicação. +keywords: ultralytics, YOLOv4, detecção de objetos, rede neural, detecção em tempo real, detector de objetos, aprendizado de máquina +--- + +# YOLOv4: Detecção de Objetos Rápida e Precisa + +Bem-vindo à página de documentação do Ultralytics para o YOLOv4, um detector de objetos em tempo real de última geração lançado em 2020 por Alexey Bochkovskiy em [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). O YOLOv4 foi projetado para fornecer o equilíbrio ideal entre velocidade e precisão, tornando-o uma excelente escolha para muitas aplicações. + +![Diagrama da arquitetura do YOLOv4](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**Diagrama da arquitetura do YOLOv4**. Mostra o design intricado da rede do YOLOv4, incluindo os componentes backbone, neck e head, bem como suas camadas interconectadas para uma detecção de objetos em tempo real otimizada. + +## Introdução + +YOLOv4 significa You Only Look Once versão 4. É um modelo de detecção de objetos em tempo real desenvolvido para superar as limitações de versões anteriores do YOLO, como [YOLOv3](yolov3.md) e outros modelos de detecção de objetos. Ao contrário de outros detectores de objetos baseados em redes neurais convolucionais (CNN), o YOLOv4 é aplicável não apenas a sistemas de recomendação, mas também ao gerenciamento de processos independentes e à redução da entrada humana. Sua operação em unidades de processamento gráfico (GPUs) convencionais permite o uso em massa a um preço acessível, e foi projetado para funcionar em tempo real em uma GPU convencional, exigindo apenas uma GPU para treinamento. + +## Arquitetura + +O YOLOv4 faz uso de várias características inovadoras que trabalham juntas para otimizar seu desempenho. Estas incluem Conexões Residuais Ponderadas (WRC), Conexões Parciais Cruzadas de Estágio (CSP), Normalização Cruzada em Mini Lote (CmBN), Treinamento Autoadversário (SAT), Ativação Mish, Aumento de Dados Mosaic, Regularização DropBlock e Perda CIoU. Essas características são combinadas para obter resultados de última geração. + +Um detector de objetos típico é composto por várias partes, incluindo a entrada, o backbone, o neck e o head. O backbone do YOLOv4 é pré-treinado no ImageNet e é usado para prever as classes e caixas delimitadoras dos objetos. O backbone pode ser de vários modelos, incluindo VGG, ResNet, ResNeXt ou DenseNet. A parte neck do detector é usada para coletar mapas de características de diferentes estágios e geralmente inclui várias caminhadas bottom-up e várias caminhadas top-down. A parte head é responsável por fazer as detecções e classificações finais dos objetos. + +## Bag of Freebies + +O YOLOv4 também faz uso de métodos conhecidos como "bag of freebies" (saco de brindes), que são técnicas que melhoram a precisão do modelo durante o treinamento sem aumentar o custo da inferência. O aumento de dados é uma técnica comum de "bag of freebies" usada na detecção de objetos, que aumenta a variabilidade das imagens de entrada para melhorar a robustez do modelo. Alguns exemplos de aumento de dados incluem distorções fotométricas (ajustando o brilho, contraste, matiz, saturação e ruído de uma imagem) e distorções geométricas (adicionando dimensionamento aleatório, recorte, espelhamento e rotação). Essas técnicas ajudam o modelo a generalizar melhor para diferentes tipos de imagens. + +## Recursos e Desempenho + +O YOLOv4 foi projetado para oferecer velocidade e precisão ideais na detecção de objetos. A arquitetura do YOLOv4 inclui o CSPDarknet53 como o backbone, o PANet como o neck e o YOLOv3 como a cabeça de detecção. Esse design permite que o YOLOv4 realize detecção de objetos em uma velocidade impressionante, tornando-o adequado para aplicações em tempo real. O YOLOv4 também se destaca em termos de precisão, alcançando resultados de última geração em benchmarks de detecção de objetos. + +## Exemplos de Uso + +No momento da escrita, o Ultralytics não oferece suporte a modelos YOLOv4. Portanto, os usuários interessados em usar o YOLOv4 deverão consultar diretamente o repositório YOLOv4 no GitHub para instruções de instalação e uso. + +Aqui está uma breve visão geral das etapas típicas que você pode seguir para usar o YOLOv4: + +1. Visite o repositório YOLOv4 no GitHub: [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). + +2. Siga as instruções fornecidas no arquivo README para a instalação. Isso geralmente envolve clonar o repositório, instalar as dependências necessárias e configurar as variáveis de ambiente necessárias. + +3. Uma vez que a instalação esteja completa, você pode treinar e usar o modelo de acordo com as instruções de uso fornecidas no repositório. Isso geralmente envolve a preparação do seu conjunto de dados, a configuração dos parâmetros do modelo, o treinamento do modelo e, em seguida, o uso do modelo treinado para realizar a detecção de objetos. + +Observe que as etapas específicas podem variar dependendo do seu caso de uso específico e do estado atual do repositório YOLOv4. Portanto, é altamente recomendável consultar diretamente as instruções fornecidas no repositório YOLOv4 do GitHub. + +Lamentamos qualquer inconveniente que isso possa causar e nos esforçaremos para atualizar este documento com exemplos de uso para o Ultralytics assim que o suporte para o YOLOv4 for implementado. + +## Conclusão + +O YOLOv4 é um modelo poderoso e eficiente de detecção de objetos que oferece um equilíbrio entre velocidade e precisão. O uso de recursos exclusivos e técnicas "Bag of Freebies" durante o treinamento permite que ele tenha um excelente desempenho em tarefas de detecção de objetos em tempo real. O YOLOv4 pode ser treinado e usado por qualquer pessoa com uma GPU convencional, tornando-o acessível e prático para uma ampla variedade de aplicações. + +## Referências e Agradecimentos + +Gostaríamos de agradecer aos autores do YOLOv4 por suas contribuições significativas no campo da detecção de objetos em tempo real: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +O artigo original do YOLOv4 pode ser encontrado no [arXiv](https://arxiv.org/pdf/2004.10934.pdf). Os autores disponibilizaram seu trabalho publicamente, e o código pode ser acessado no [GitHub](https://github.com/AlexeyAB/darknet). Agradecemos seus esforços em avançar o campo e tornar seu trabalho acessível à comunidade em geral. diff --git a/docs/pt/models/yolov5.md b/docs/pt/models/yolov5.md new file mode 100644 index 0000000..942b1d4 --- /dev/null +++ b/docs/pt/models/yolov5.md @@ -0,0 +1,113 @@ +--- +comments: true +description: Descubra o YOLOv5u, uma versão aprimorada do modelo YOLOv5 com uma relação aprimorada entre precisão e velocidade e vários modelos pré-treinados para várias tarefas de detecção de objetos. +keywords: YOLOv5u, detecção de objetos, modelos pré-treinados, Ultralytics, Inferência, Validação, YOLOv5, YOLOv8, sem âncora, sem certeza de objectness, aplicativos em tempo real, machine learning +--- + +# YOLOv5 + +## Visão Geral + +O YOLOv5u representa um avanço nas metodologias de detecção de objetos. Originário da arquitetura fundamental do modelo [YOLOv5](https://github.com/ultralytics/yolov5) desenvolvido pela Ultralytics, o YOLOv5u integra a divisão da cabeça do Ultralytics sem âncora e sem certeza de objectness, uma formação introduzida anteriormente nos modelos [YOLOv8](yolov8.md). Essa adaptação aprimora a arquitetura do modelo, resultando em uma relação aprimorada entre precisão e velocidade em tarefas de detecção de objetos. Com base nos resultados empíricos e em suas características derivadas, o YOLOv5u oferece uma alternativa eficiente para aqueles que procuram soluções robustas tanto na pesquisa quanto em aplicações práticas. + +![Ultralytics YOLOv5](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## Principais Recursos + +- **Cabeça do Ultralytics sem Âncora:** Modelos tradicionais de detecção de objetos dependem de caixas âncora predefinidas para prever as localizações dos objetos. No entanto, o YOLOv5u moderniza essa abordagem. Ao adotar uma cabeça do Ultralytics sem âncora, ele garante um mecanismo de detecção mais flexível e adaptável, melhorando consequentemente o desempenho em cenários diversos. + +- **Equilíbrio otimizado entre precisão e velocidade:** Velocidade e precisão muitas vezes puxam em direções opostas. Mas o YOLOv5u desafia esse equilíbrio. Ele oferece um equilíbrio calibrado, garantindo detecções em tempo real sem comprometer a precisão. Esse recurso é particularmente valioso para aplicativos que exigem respostas rápidas, como veículos autônomos, robótica e análise de vídeo em tempo real. + +- **Variedade de Modelos Pré-Treinados:** Entendendo que diferentes tarefas exigem conjuntos de ferramentas diferentes, o YOLOv5u oferece uma variedade de modelos pré-treinados. Se você está focado em Inferência, Validação ou Treinamento, há um modelo personalizado esperando por você. Essa variedade garante que você não esteja apenas usando uma solução genérica, mas sim um modelo ajustado especificamente para o seu desafio único. + +## Tarefas e Modos Suportados + +Os modelos YOLOv5u, com vários pesos pré-treinados, se destacam nas tarefas de [Detecção de Objetos](../tasks/detect.md). Eles suportam uma ampla gama de modos, tornando-os adequados para aplicações diversas, desde o desenvolvimento até a implantação. + +| Tipo de Modelo | Pesos Pré-Treinados | Tarefa | Inferência | Validação | Treinamento | Exportação | +|----------------|-----------------------------------------------------------------------------------------------------------------------------|-------------------------------------------|------------|-----------|-------------|------------| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Essa tabela oferece uma visão detalhada das variantes do modelo YOLOv5u, destacando sua aplicabilidade em tarefas de detecção de objetos e suporte a diversos modos operacionais, como [Inferência](../modes/predict.md), [Validação](../modes/val.md), [Treinamento](../modes/train.md) e [Exportação](../modes/export.md). Esse suporte abrangente garante que os usuários possam aproveitar totalmente as capacidades dos modelos YOLOv5u em uma ampla gama de cenários de detecção de objetos. + +## Métricas de Desempenho + +!!! Desempenho + + === "Detecção" + + Consulte a [Documentação de Detecção](https://docs.ultralytics.com/tasks/detect/) para exemplos de uso com esses modelos treinados no conjunto de dados [COCO](https://docs.ultralytics.com/datasets/detect/coco/), que incluem 80 classes pré-treinadas. + + | Modelo | YAML | tamanho
(pixels) | mAPval
50-95 | Velocidade
CPU ONNX
(ms) | Velocidade
A100 TensorRT
(ms) | parâmetros
(M) | FLOPs
(B) | + | --------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------| -------------------------| ----------------------| -------------------------------------| -------------------------------------- | ---------------------- | ----------------- | + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 42.1 | 211.0 | 1.83 | 4.3 | 7.8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 48.6 | 422.6 | 2.34 | 15.3 | 24.6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 53.6 | 810.9 | 4.36 | 41.2 | 65.7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 55.7 | 1470.9 | 5.47 | 86.1 | 137.4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 56.8 | 2436.5 | 8.98 | 155.4 | 250.7 | + +## Exemplos de Uso + +Este exemplo fornece exemplos simples de treinamento e inferência do YOLOv5. Para documentação completa sobre esses e outros [modos](../modes/index.md), consulte as páginas de documentação [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) e [Export](../modes/export.md). + +!!! Example "Exemplo" + + === "Python" + + Modelos pré-treinados `*.pt` do PyTorch, assim como os arquivos de configuração `*.yaml`, podem ser passados para a classe `YOLO()` para criar uma instância do modelo em Python: + + ```python + from ultralytics import YOLO + + # Carrega um modelo YOLOv5n pré-treinado no COCO + modelo = YOLO('yolov5n.pt') + + # Mostra informações do modelo (opcional) + modelo.info() + + # Treina o modelo no conjunto de dados de exemplo COCO8 por 100 épocas + resultados = modelo.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Executa a inferência com o modelo YOLOv5n na imagem 'bus.jpg' + resultados = modelo('path/to/bus.jpg') + ``` + + === "CLI" + + Comandos CLI estão disponíveis para executar diretamente os modelos: + + ```bash + # Carrega um modelo YOLOv5n pré-treinado no COCO e o treina no conjunto de dados de exemplo COCO8 por 100 épocas + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Carrega um modelo YOLOv5n pré-treinado no COCO e executa a inferência na imagem 'bus.jpg' + yolo predict model=yolov5n.pt source=path/to/bus.jpg + ``` + +## Citações e Agradecimentos + +Se você usar o YOLOv5 ou YOLOv5u em sua pesquisa, por favor, cite o repositório YOLOv5 da Ultralytics da seguinte forma: + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +Observe que os modelos YOLOv5 são fornecidos sob licenças [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) e [Enterprise](https://ultralytics.com/license). diff --git a/docs/pt/models/yolov6.md b/docs/pt/models/yolov6.md new file mode 100644 index 0000000..4db8e46 --- /dev/null +++ b/docs/pt/models/yolov6.md @@ -0,0 +1,107 @@ +--- +comments: true +description: Explore Meituan YOLOv6, um modelo avançado de detecção de objetos que alcança um equilíbrio entre velocidade e precisão. Saiba mais sobre suas características, modelos pré-treinados e uso em Python. +keywords: Meituan YOLOv6, detecção de objetos, Ultralytics, documentação YOLOv6, Concatenação Bidirecional, Treinamento Assistido por Âncora, modelos pré-treinados, aplicações em tempo real +--- + +# Meituan YOLOv6 + +## Visão Geral + +O Meituan YOLOv6 é um detector de objetos de ponta que oferece um equilíbrio notável entre velocidade e precisão, tornando-se uma escolha popular para aplicações em tempo real. Este modelo apresenta várias melhorias em sua arquitetura e esquema de treinamento, incluindo a implementação de um módulo de Concatenação Bidirecional (BiC), uma estratégia de treinamento assistido por âncora (AAT) e um design aprimorado de espinha dorsal e pescoço para obter precisão de última geração no conjunto de dados COCO. + +![Meituan YOLOv6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![Modelo exemplo de imagem](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**Visão geral do YOLOv6.** Diagrama da arquitetura do modelo mostrando os componentes de rede redesenhados e as estratégias de treinamento que levaram a melhorias significativas no desempenho. (a) O pescoço do YOLOv6 (N e S são mostrados). RepBlocks é substituída por CSPStackRep para M/L. (b) A estrutura de um módulo BiC. (c) Um bloco SimCSPSPPF. ([fonte](https://arxiv.org/pdf/2301.05586.pdf)). + +### Principais Características + +- **Módulo de Concatenação Bidirecional (BiC):** O YOLOv6 introduz um módulo BiC no pescoço do detector, aprimorando os sinais de localização e oferecendo ganhos de desempenho com uma degradação de velocidade insignificante. +- **Estratégia de Treinamento Assistido por Âncora (AAT):** Este modelo propõe AAT para aproveitar os benefícios dos paradigmas baseados em âncoras e sem âncoras sem comprometer a eficiência da inferência. +- **Design de Espinha Dorsal e Pescoço Aprimorado:** Ao aprofundar o YOLOv6 para incluir mais uma etapa na espinha dorsal e no pescoço, este modelo alcança desempenho de última geração no conjunto de dados COCO com entrada de alta resolução. +- **Estratégia de Auto-Destilação:** Uma nova estratégia de auto-destilação é implementada para aumentar o desempenho de modelos menores do YOLOv6, aprimorando o ramo auxiliar de regressão durante o treinamento e removendo-o durante a inferência para evitar uma queda significativa na velocidade. + +## Métricas de Desempenho + +O YOLOv6 fornece vários modelos pré-treinados com diferentes escalas: + +- YOLOv6-N: 37,5% AP na val2017 do COCO a 1187 FPS com GPU NVIDIA Tesla T4. +- YOLOv6-S: 45,0% de AP a 484 FPS. +- YOLOv6-M: 50,0% de AP a 226 FPS. +- YOLOv6-L: 52,8% de AP a 116 FPS. +- YOLOv6-L6: Precisão de última geração em tempo real. + +O YOLOv6 também fornece modelos quantizados para diferentes precisões e modelos otimizados para plataformas móveis. + +## Exemplos de Uso + +Este exemplo fornece exemplos simples de treinamento e inferência do YOLOv6. Para documentação completa sobre esses e outros [modos](../modes/index.md), consulte as páginas de documentação [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) e [Export](../modes/export.md). + +!!! Example "Exemplo" + + === "Python" + + Modelos pré-treinados `*.pt` do PyTorch, assim como arquivos de configuração `*.yaml`, podem ser passados à classe `YOLO()` para criar uma instância do modelo em Python: + + ```python + from ultralytics import YOLO + + # Constrói um modelo YOLOv6n do zero + model = YOLO('yolov6n.yaml') + + # Exibe informações do modelo (opcional) + model.info() + + # Treina o modelo no conjunto de dados de exemplo COCO8 por 100 épocas + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Executa inferência com o modelo YOLOv6n na imagem 'bus.jpg' + results = model('caminho/para/onibus.jpg') + ``` + + === "CLI" + + Comandos da CLI estão disponíveis para executar diretamente os modelos: + + ```bash + # Constrói um modelo YOLOv6n do zero e o treina no conjunto de dados de exemplo COCO8 por 100 épocas + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # Constrói um modelo YOLOv6n do zero e executa inferência na imagem 'bus.jpg' + yolo predict model=yolov6n.yaml source=caminho/para/onibus.jpg + ``` + +## Tarefas e Modos Suportados + +A série YOLOv6 oferece uma variedade de modelos, cada um otimizado para [Detecção de Objetos](../tasks/detect.md) de alta performance. Esses modelos atendem a diferentes necessidades computacionais e requisitos de precisão, tornando-os versáteis para uma ampla variedade de aplicações. + +| Tipo de Modelo | Pesos Pré-treinados | Tarefas Suportadas | Inferência | Validação | Treinamento | Exportação | +|----------------|---------------------|-------------------------------------------|------------|-----------|-------------|------------| +| YOLOv6-N | `yolov6-n.pt` | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [Detecção de Objetos](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Esta tabela fornece uma visão geral detalhada das variantes do modelo YOLOv6, destacando suas capacidades em tarefas de detecção de objetos e sua compatibilidade com vários modos operacionais, como [inferência](../modes/predict.md), [validação](../modes/val.md), [treinamento](../modes/train.md) e [exportação](../modes/export.md). Esse suporte abrangente garante que os usuários possam aproveitar totalmente as capacidades dos modelos YOLOv6 em uma ampla gama de cenários de detecção de objetos. + +## Citações e Agradecimentos + +Gostaríamos de agradecer aos autores por suas contribuições significativas no campo da detecção de objetos em tempo real: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{li2023yolov6, + title={YOLOv6 v3.0: A Full-Scale Reloading}, + author={Chuyi Li and Lulu Li and Yifei Geng and Hongliang Jiang and Meng Cheng and Bo Zhang and Zaidan Ke and Xiaoming Xu and Xiangxiang Chu}, + year={2023}, + eprint={2301.05586}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + + O artigo original do YOLOv6 pode ser encontrado no [arXiv](https://arxiv.org/abs/2301.05586). Os autores disponibilizaram publicamente seu trabalho, e o código pode ser acessado no [GitHub](https://github.com/meituan/YOLOv6). Agradecemos seus esforços em avançar no campo e disponibilizar seu trabalho para a comunidade em geral. diff --git a/docs/pt/models/yolov7.md b/docs/pt/models/yolov7.md new file mode 100644 index 0000000..69896b9 --- /dev/null +++ b/docs/pt/models/yolov7.md @@ -0,0 +1,66 @@ +--- +comments: true +description: Explore o YOLOv7, um detector de objetos em tempo real. Entenda sua velocidade superior, impressionante precisão e foco exclusivo em otimização treinável de recursos gratuitos. +keywords: YOLOv7, detector de objetos em tempo real, state-of-the-art, Ultralytics, conjunto de dados MS COCO, reparametrização de modelo, atribuição dinâmica de rótulo, escalonamento estendido, escalonamento composto +--- + +# YOLOv7: Treinável Bag-of-Freebies + +O YOLOv7 é um detector de objetos em tempo real state-of-the-art que supera todos os detectores de objetos conhecidos em termos de velocidade e precisão na faixa de 5 FPS a 160 FPS. Ele possui a maior precisão (56,8% de AP) entre todos os detectores de objetos em tempo real conhecidos com 30 FPS ou mais no GPU V100. Além disso, o YOLOv7 supera outros detectores de objetos, como YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5 e muitos outros em velocidade e precisão. O modelo é treinado no conjunto de dados MS COCO do zero, sem usar outros conjuntos de dados ou pesos pré-treinados. O código-fonte para o YOLOv7 está disponível no GitHub. + +![Comparação YOLOv7 com outros detectores de objetos](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) +**Comparação de detectores de objetos state-of-the-art. +** A partir dos resultados na Tabela 2, sabemos que o método proposto tem a melhor relação velocidade-precisão de forma abrangente. Se compararmos o YOLOv7-tiny-SiLU com o YOLOv5-N (r6.1), nosso método é 127 FPS mais rápido e 10,7% mais preciso em AP. Além disso, o YOLOv7 tem 51,4% de AP em uma taxa de quadros de 161 FPS, enquanto o PPYOLOE-L com o mesmo AP tem apenas uma taxa de quadros de 78 FPS. Em termos de uso de parâmetros, o YOLOv7 é 41% menor do que o PPYOLOE-L. Se compararmos o YOLOv7-X com uma velocidade de inferência de 114 FPS com o YOLOv5-L (r6.1) com uma velocidade de inferência de 99 FPS, o YOLOv7-X pode melhorar o AP em 3,9%. Se o YOLOv7-X for comparado com o YOLOv5-X (r6.1) de escala similar, a velocidade de inferência do YOLOv7-X é 31 FPS mais rápida. Além disso, em termos da quantidade de parâmetros e cálculos, o YOLOv7-X reduz 22% dos parâmetros e 8% dos cálculos em comparação com o YOLOv5-X (r6.1), mas melhora o AP em 2,2% ([Fonte](https://arxiv.org/pdf/2207.02696.pdf)). + +## Visão Geral + +A detecção de objetos em tempo real é um componente importante em muitos sistemas de visão computacional, incluindo rastreamento de múltiplos objetos, direção autônoma, robótica e análise de imagens médicas. Nos últimos anos, o desenvolvimento de detecção de objetos em tempo real tem se concentrado em projetar arquiteturas eficientes e melhorar a velocidade de inferência de várias CPUs, GPUs e unidades de processamento neural (NPUs). O YOLOv7 suporta tanto GPUs móveis quanto dispositivos GPU, desde a borda até a nuvem. + +Ao contrário dos detectores de objetos em tempo real tradicionais que se concentram na otimização de arquitetura, o YOLOv7 introduz um foco na otimização do processo de treinamento. Isso inclui módulos e métodos de otimização projetados para melhorar a precisão da detecção de objetos sem aumentar o custo de inferência, um conceito conhecido como "treinável bag-of-freebies". + +## Recursos Principais + +O YOLOv7 apresenta vários recursos principais: + +1. **Reparametrização do Modelo**: O YOLOv7 propõe um modelo reparametrizado planejado, que é uma estratégia aplicável a camadas em diferentes redes com o conceito de caminho de propagação de gradiente. + +2. **Atribuição Dinâmica de Rótulo**: O treinamento do modelo com várias camadas de saída apresenta um novo problema: "Como atribuir alvos dinâmicos para as saídas de diferentes ramificações?" Para resolver esse problema, o YOLOv7 introduz um novo método de atribuição de rótulo chamado atribuição de rótulo orientada por liderança de granularidade fina (coarse-to-fine). + +3. **Escalonamento Estendido e Composto**: O YOLOv7 propõe métodos de "escalonamento estendido" e "escalonamento composto" para o detector de objetos em tempo real que podem utilizar efetivamente parâmetros e cálculos. + +4. **Eficiência**: O método proposto pelo YOLOv7 pode reduzir efetivamente cerca de 40% dos parâmetros e 50% dos cálculos do detector de objetos em tempo real state-of-the-art, além de apresentar uma velocidade de inferência mais rápida e maior precisão de detecção. + +## Exemplos de Uso + +No momento em que este texto foi escrito, a Ultralytics ainda não oferece suporte aos modelos YOLOv7. Portanto, qualquer usuário interessado em usar o YOLOv7 precisará se referir diretamente ao repositório do YOLOv7 no GitHub para obter instruções de instalação e uso. + +Aqui está uma breve visão geral das etapas típicas que você pode seguir para usar o YOLOv7: + +1. Acesse o repositório do YOLOv7 no GitHub: [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7). + +2. Siga as instruções fornecidas no arquivo README para a instalação. Isso normalmente envolve clonar o repositório, instalar as dependências necessárias e configurar quaisquer variáveis de ambiente necessárias. + +3. Após a conclusão da instalação, você pode treinar e usar o modelo conforme as instruções de uso fornecidas no repositório. Isso geralmente envolve a preparação do conjunto de dados, a configuração dos parâmetros do modelo, o treinamento do modelo e, em seguida, o uso do modelo treinado para realizar a detecção de objetos. + +Observe que as etapas específicas podem variar dependendo do caso de uso específico e do estado atual do repositório do YOLOv7. Portanto, é altamente recomendável consultar diretamente as instruções fornecidas no repositório do YOLOv7 no GitHub. + +Lamentamos qualquer inconveniente que isso possa causar e nos esforçaremos para atualizar este documento com exemplos de uso para a Ultralytics assim que o suporte para o YOLOv7 for implementado. + +## Citações e Agradecimentos + +Gostaríamos de agradecer aos autores do YOLOv7 por suas contribuições significativas no campo da detecção de objetos em tempo real: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +O artigo original do YOLOv7 pode ser encontrado no [arXiv](https://arxiv.org/pdf/2207.02696.pdf). Os autores disponibilizaram publicamente seu trabalho, e o código pode ser acessado no [GitHub](https://github.com/WongKinYiu/yolov7). Agradecemos seus esforços em avançar o campo e tornar seu trabalho acessível à comunidade em geral. diff --git a/docs/pt/models/yolov8.md b/docs/pt/models/yolov8.md new file mode 100644 index 0000000..0240a43 --- /dev/null +++ b/docs/pt/models/yolov8.md @@ -0,0 +1,162 @@ +--- +comments: true +description: Explore as emocionantes características do YOLOv8, a versão mais recente do nosso detector de objetos em tempo real! Saiba como as arquiteturas avançadas, modelos pré-treinados e o equilíbrio ideal entre precisão e velocidade tornam o YOLOv8 a escolha perfeita para as suas tarefas de detecção de objetos. +keywords: YOLOv8, Ultralytics, detector de objetos em tempo real, modelos pré-treinados, documentação, detecção de objetos, série YOLO, arquiteturas avançadas, precisão, velocidade +--- + +# YOLOv8 + +## Visão Geral + +O YOLOv8 é a versão mais recente da série YOLO de detectores de objetos em tempo real, oferecendo um desempenho de ponta em termos de precisão e velocidade. Construindo sobre as inovações das versões anteriores do YOLO, o YOLOv8 introduz novas características e otimizações que o tornam uma escolha ideal para diversas tarefas de detecção de objetos em uma ampla variedade de aplicações. + +![YOLOv8 da Ultralytics](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## Principais Características + +- **Arquiteturas Avançadas de Backbone e Neck:** O YOLOv8 utiliza arquiteturas avançadas de backbone e neck, resultando em uma melhor extração de características e desempenho na detecção de objetos. +- **Anchor-free Split Ultralytics Head:** O YOLOv8 adota um head Ultralytics dividido sem ancoragem, o que contribui para uma melhor precisão e um processo de detecção mais eficiente em comparação com abordagens baseadas em âncoras. +- **Equilíbrio Otimizado entre Precisão e Velocidade:** Com foco em manter um equilíbrio ideal entre precisão e velocidade, o YOLOv8 é adequado para tarefas de detecção de objetos em tempo real em diversas áreas de aplicação. +- **Variedade de Modelos Pré-treinados:** O YOLOv8 oferece uma variedade de modelos pré-treinados para atender a diversas tarefas e requisitos de desempenho, tornando mais fácil encontrar o modelo adequado para o seu caso de uso específico. + +## Tarefas e Modos Suportados + +A série YOLOv8 oferece uma variedade de modelos, cada um especializado em tarefas específicas de visão computacional. Esses modelos são projetados para atender a diversos requisitos, desde a detecção de objetos até tarefas mais complexas, como segmentação de instâncias, detecção de poses/pontos-chave e classificação. + +Cada variante da série YOLOv8 é otimizada para a respectiva tarefa, garantindo alto desempenho e precisão. Além disso, esses modelos são compatíveis com diversos modos operacionais, incluindo [Inferência](../modes/predict.md), [Validação](../modes/val.md), [Treinamento](../modes/train.md) e [Exportação](../modes/export.md), facilitando o uso em diferentes estágios de implantação e desenvolvimento. + +| Modelo | Nomes de Arquivo | Tarefa | Inferência | Validação | Treinamento | Exportação | +|-------------|----------------------------------------------------------------------------------------------------------------|--------------------------------------------------|------------|-----------|-------------|------------| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [Detecção](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [Segmentação de Instâncias](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [Pose/Pontos-chave](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [Classificação](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +Esta tabela fornece uma visão geral das variantes de modelos YOLOv8, destacando suas aplicações em tarefas específicas e sua compatibilidade com diversos modos operacionais, como inferência, validação, treinamento e exportação. Ela demonstra a versatilidade e robustez da série YOLOv8, tornando-os adequados para diversas aplicações em visão computacional. + +## Métricas de Desempenho + +!!! Desempenho + + === "Detecção (COCO)" + + Consulte a [Documentação de Detecção](https://docs.ultralytics.com/tasks/detect/) para exemplos de uso com esses modelos treinados no conjunto de dados [COCO](https://docs.ultralytics.com/datasets/detect/coco/), que inclui 80 classes pré-treinadas. + + | Modelo | tamanho
(pixels) | mAPval
50-95 | Velocidade
CPU ONNX
(ms) | Velocidade
A100 TensorRT
(ms) | parâmetros
(M) | FLOPs
(B) | + | --------------------------------------------------------------------------------------- | ----------------------- | -------------------- | ----------------------------------- | --------------------------------------- | ---------------------- | ------------------ | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37,3 | 80,4 | 0,99 | 3,2 | 8,7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44,9 | 128,4 | 1,20 | 11,2 | 28,6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50,2 | 234,7 | 1,83 | 25,9 | 78,9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52,9 | 375,2 | 2,39 | 43,7 | 165,2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53,9 | 479,1 | 3,53 | 68,2 | 257,8 | + + === "Detecção (Open Images V7)" + + Consulte a [Documentação de Detecção](https://docs.ultralytics.com/tasks/detect/) para exemplos de uso com esses modelos treinados no conjunto de dados [Open Images V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/), que inclui 600 classes pré-treinadas. + + | Modelo | tamanho
(pixels) | mAPval
50-95 | Velocidade
CPU ONNX
(ms) | Velocidade
A100 TensorRT
(ms) | parâmetros
(M) | FLOPs
(B) | + | ----------------------------------------------------------------------------------------- | ----------------------- | -------------------- | ----------------------------------- | --------------------------------------- | ---------------------- | ------------------ | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18,4 | 142,4 | 1,21 | 3,5 | 10,5 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27,7 | 183,1 | 1,40 | 11,4 | 29,7 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33,6 | 408,5 | 2,26 | 26,2 | 80,6 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34,9 | 596,9 | 2,43 | 44,1 | 167,4 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36,3 | 860,6 | 3,56 | 68,7 | 260,6 | + + === "Segmentação (COCO)" + + Consulte a [Documentação de Segmentação](https://docs.ultralytics.com/tasks/segment/) para exemplos de uso com esses modelos treinados no conjunto de dados [COCO](https://docs.ultralytics.com/datasets/segment/coco/), que inclui 80 classes pré-treinadas. + + | Modelo | tamanho
(pixels) | mAPbox
50-95 | mAPmáscara
50-95 | Velocidade
CPU ONNX
(ms) | Velocidade
A100 TensorRT
(ms) | parâmetros
(M) | FLOPs
(B) | + | ---------------------------------------------------------------------------------------------- | ----------------------- | -------------------- | ------------------------ | ----------------------------------- | --------------------------------------- | ---------------------- | ------------------ | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36,7 | 30,5 | 96,1 | 1,21 | 3,4 | 12,6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44,6 | 36,8 | 155,7 | 1,47 | 11,8 | 42,6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49,9 | 40,8 | 317,0 | 2,18 | 27,3 | 110,2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52,3 | 42,6 | 572,4 | 2,79 | 46,0 | 220,5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53,4 | 43,4 | 712,1 | 4,02 | 71,8 | 344,1 | + + === "Classificação (ImageNet)" + + Consulte a [Documentação de Classificação](https://docs.ultralytics.com/tasks/classify/) para exemplos de uso com esses modelos treinados no conjunto de dados [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/), que inclui 1000 classes pré-treinadas. + + | Modelo | tamanho
(pixels) | acurácia
top1 | acurácia
top5 | Velocidade
CPU ONNX
(ms) | Velocidade
A100 TensorRT
(ms) | parâmetros
(M) | FLOPs
(B) a 640 | + | ------------------------------------------------------------------------------------------------ | ----------------------- | --------------------- | --------------------- | ----------------------------------- | --------------------------------------- | ---------------------- | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66,6 | 87,0 | 12,9 | 0,31 | 2,7 | 4,3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72,3 | 91,1 | 23,4 | 0,35 | 6,4 | 13,5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76,4 | 93,2 | 85,4 | 0,62 | 17,0 | 42,7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78,0 | 94,1 | 163,0 | 0,87 | 37,5 | 99,7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78,4 | 94,3 | 232,0 | 1,01 | 57,4 | 154,8 | + + === "Pose (COCO)" + + Consulte a [Documentação de Estimativa de Pose](https://docs.ultralytics.com/tasks/segment/) para exemplos de uso com esses modelos treinados no conjunto de dados [COCO](https://docs.ultralytics.com/datasets/pose/coco/), que inclui 1 classe pré-treinada, 'person'. + + | Modelo | tamanho
(pixels) | mAPpose
50-95 | mAPpose
50 | Velocidade
CPU ONNX
(ms) | Velocidade
A100 TensorRT
(ms) | parâmetros
(M) | FLOPs
(B) | + | ---------------------------------------------------------------------------------------------- | ----------------------- | --------------------- | ------------------ | ----------------------------------- | --------------------------------------- | ---------------------- | ------------------ | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50,4 | 80,1 | 131,8 | 1,18 | 3,3 | 9,2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60,0 | 86,2 | 233,2 | 1,42 | 11,6 | 30,2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65,0 | 88,8 | 456,3 | 2,00 | 26,4 | 81,0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67,6 | 90,0 | 784,5 | 2,59 | 44,4 | 168,6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69,2 | 90,2 | 1607,1 | 3,73 | 69,4 | 263,2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71,6 | 91,2 | 4088,7 | 10,04 | 99,1 | 1066,4 | + +## Exemplos de Uso + +Este exemplo fornece exemplos simples de treinamento e inferência do YOLOv8. Para a documentação completa desses e outros [modos](../modes/index.md), consulte as páginas de documentação [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) e [Export](../modes/export.md). + +Observe que o exemplo abaixo é para modelos YOLOv8 de [Detecção](../tasks/detect.md) para detecção de objetos. Para outras tarefas suportadas, consulte a documentação de [Segmentação](../tasks/segment.md), [Classificação](../tasks/classify.md) e [Pose](../tasks/pose.md). + +!!! Example "Exemplo" + + === "Python" + + Modelos pré-treinados `*.pt` PyTorch, bem como arquivos de configuração `*.yaml`, podem ser passados para a classe `YOLO()` para criar uma instância do modelo em Python: + + ```python + from ultralytics import YOLO + + # Carregar um modelo YOLOv8n pré-treinado para COCO + model = YOLO('yolov8n.pt') + + # Exibir informações do modelo (opcional) + model.info() + + # Treinar o modelo no exemplo de conjunto de dados COCO8 por 100 épocas + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Executar inferência com o modelo YOLOv8n na imagem 'bus.jpg' + results = model('caminho/para/bus.jpg') + ``` + + === "CLI" + + Comandos da CLI estão disponíveis para executar os modelos diretamente: + + ```bash + # Carregar um modelo YOLOv8n pré-treinado para COCO e treiná-lo no exemplo de conjunto de dados COCO8 por 100 épocas + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Carregar um modelo YOLOv8n pré-treinado para COCO e executar inferência na imagem 'bus.jpg' + yolo predict model=yolov8n.pt source=caminho/para/bus.jpg + ``` + +## Citações e Reconhecimentos + +Se você utilizar o modelo YOLOv8 ou qualquer outro software deste repositório em seu trabalho, por favor cite-o utilizando o formato abaixo: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + +Observe que o DOI está pendente e será adicionado à citação assim que estiver disponível. Os modelos YOLOv8 são disponibilizados sob as licenças [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) e [Enterprise](https://ultralytics.com/license). diff --git a/docs/pt/modes/benchmark.md b/docs/pt/modes/benchmark.md new file mode 100644 index 0000000..f311481 --- /dev/null +++ b/docs/pt/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: true +description: Aprenda a avaliar a velocidade e a precisão do YOLOv8 em diversos formatos de exportação; obtenha informações sobre métricas mAP50-95, accuracy_top5 e mais. +keywords: Ultralytics, YOLOv8, benchmarking, perfilagem de velocidade, perfilagem de precisão, mAP50-95, accuracy_top5, ONNX, OpenVINO, TensorRT, formatos de exportação YOLO +--- + +# Benchmarking de Modelos com o Ultralytics YOLO + +Ecossistema Ultralytics YOLO e integrações + +## Introdução + +Uma vez que seu modelo esteja treinado e validado, o próximo passo lógico é avaliar seu desempenho em diversos cenários do mundo real. O modo de benchmark no Ultralytics YOLOv8 serve a esse propósito, oferecendo uma estrutura robusta para avaliar a velocidade e a precisão do seu modelo em uma gama de formatos de exportação. + +## Por Que o Benchmarking é Crucial? + +- **Decisões Informadas:** Obtenha insights sobre o equilíbrio entre velocidade e precisão. +- **Alocação de Recursos:** Entenda como diferentes formatos de exportação se comportam em diferentes hardwares. +- **Otimização:** Aprenda qual formato de exportação oferece o melhor desempenho para o seu caso específico. +- **Eficiência de Custos:** Faça uso mais eficiente dos recursos de hardware com base nos resultados do benchmark. + +### Métricas Chave no Modo de Benchmark + +- **mAP50-95:** Para detecção de objetos, segmentação e estimativa de pose. +- **accuracy_top5:** Para classificação de imagens. +- **Tempo de Inferência:** Tempo levado para cada imagem em milissegundos. + +### Formatos de Exportação Suportados + +- **ONNX:** Para desempenho ótimo em CPU +- **TensorRT:** Para eficiência máxima em GPU +- **OpenVINO:** Para otimização em hardware Intel +- **CoreML, TensorFlow SavedModel e Mais:** Para uma variedade de necessidades de implantação. + +!!! Tip "Dica" + + * Exporte para ONNX ou OpenVINO para acelerar até 3x a velocidade em CPU. + * Exporte para TensorRT para acelerar até 5x em GPU. + +## Exemplos de Uso + +Execute benchmarks do YOLOv8n em todos os formatos de exportação suportados incluindo ONNX, TensorRT etc. Consulte a seção Argumentos abaixo para ver uma lista completa de argumentos de exportação. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # Benchmark na GPU + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## Argumentos + +Argumentos como `model`, `data`, `imgsz`, `half`, `device` e `verbose` proporcionam aos usuários flexibilidade para ajustar os benchmarks às suas necessidades específicas e comparar o desempenho de diferentes formatos de exportação com facilidade. + +| Chave | Valor | Descrição | +|-----------|---------|----------------------------------------------------------------------------------------| +| `model` | `None` | caminho para o arquivo do modelo, ou seja, yolov8n.pt, yolov8n.yaml | +| `data` | `None` | caminho para o YAML com dataset de benchmarking (sob o rótulo `val`) | +| `imgsz` | `640` | tamanho da imagem como um escalar ou lista (h, w), ou seja, (640, 480) | +| `half` | `False` | quantização FP16 | +| `int8` | `False` | quantização INT8 | +| `device` | `None` | dispositivo para execução, ou seja, dispositivo cuda=0 ou device=0,1,2,3 ou device=cpu | +| `verbose` | `False` | não continuar em erro (bool), ou limiar mínimo para val (float) | + +## Formatos de Exportação + +Os benchmarks tentarão executar automaticamente em todos os possíveis formatos de exportação listados abaixo. + +| Formato | Argumento `format` | Modelo | Metadados | Argumentos | +|-----------------------------------------------------------------------|--------------------|---------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [Modelo Salvo do TF](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [GraphDef do TF](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +Veja os detalhes completos de `exportação` na página [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/pt/modes/export.md b/docs/pt/modes/export.md new file mode 100644 index 0000000..a6cb313 --- /dev/null +++ b/docs/pt/modes/export.md @@ -0,0 +1,108 @@ +--- +comments: true +description: Guia passo a passo sobre como exportar seus modelos YOLOv8 para vários formatos como ONNX, TensorRT, CoreML e mais para implantação. Explore agora! +keywords: YOLO, YOLOv8, Ultralytics, Exportação de modelo, ONNX, TensorRT, CoreML, TensorFlow SavedModel, OpenVINO, PyTorch, exportar modelo +--- + +# Exportação de Modelo com Ultralytics YOLO + +Ecossistema Ultralytics YOLO e integrações + +## Introdução + +O objetivo final de treinar um modelo é implantá-lo para aplicações no mundo real. O modo de exportação no Ultralytics YOLOv8 oferece uma ampla gama de opções para exportar seu modelo treinado para diferentes formatos, tornando-o implantável em várias plataformas e dispositivos. Este guia abrangente visa orientá-lo através das nuances da exportação de modelos, mostrando como alcançar a máxima compatibilidade e performance. + +

+
+ +
+ Assista: Como Exportar Modelo Treinado Customizado do Ultralytics YOLOv8 e Executar Inferência ao Vivo na Webcam. +

+ +## Por Que Escolher o Modo de Exportação do YOLOv8? + +- **Versatilidade:** Exporte para múltiplos formatos incluindo ONNX, TensorRT, CoreML e mais. +- **Performance:** Ganhe até 5x aceleração em GPU com TensorRT e 3x aceleração em CPU com ONNX ou OpenVINO. +- **Compatibilidade:** Torne seu modelo universalmente implantável em numerosos ambientes de hardware e software. +- **Facilidade de Uso:** Interface de linha de comando simples e API Python para exportação rápida e direta de modelos. + +### Principais Recursos do Modo de Exportação + +Aqui estão algumas das funcionalidades de destaque: + +- **Exportação com Um Clique:** Comandos simples para exportação em diferentes formatos. +- **Exportação em Lote:** Exporte modelos capazes de inferência em lote. +- **Inferência Otimizada:** Modelos exportados são otimizados para tempos de inferência mais rápidos. +- **Vídeos Tutoriais:** Guias e tutoriais detalhados para uma experiência de exportação tranquila. + +!!! Tip "Dica" + + * Exporte para ONNX ou OpenVINO para até 3x aceleração em CPU. + * Exporte para TensorRT para até 5x aceleração em GPU. + +## Exemplos de Uso + +Exporte um modelo YOLOv8n para um formato diferente como ONNX ou TensorRT. Veja a seção de Argumentos abaixo para uma lista completa dos argumentos de exportação. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n.pt') # carrega um modelo oficial + model = YOLO('caminho/para/best.pt') # carrega um modelo treinado personalizado + + # Exportar o modelo + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # exporta modelo oficial + yolo export model=caminho/para/best.pt format=onnx # exporta modelo treinado personalizado + ``` + +## Argumentos + +Configurações de exportação para modelos YOLO referem-se às várias configurações e opções usadas para salvar ou exportar o modelo para uso em outros ambientes ou plataformas. Essas configurações podem afetar a performance, tamanho e compatibilidade do modelo com diferentes sistemas. Algumas configurações comuns de exportação de YOLO incluem o formato do arquivo de modelo exportado (por exemplo, ONNX, TensorFlow SavedModel), o dispositivo em que o modelo será executado (por exemplo, CPU, GPU) e a presença de recursos adicionais como máscaras ou múltiplos rótulos por caixa. Outros fatores que podem afetar o processo de exportação incluem a tarefa específica para a qual o modelo está sendo usado e os requisitos ou restrições do ambiente ou plataforma alvo. É importante considerar e configurar cuidadosamente essas configurações para garantir que o modelo exportado seja otimizado para o caso de uso pretendido e possa ser usado eficazmente no ambiente alvo. + +| Chave | Valor | Descrição | +|-------------|-----------------|---------------------------------------------------------------------| +| `format` | `'torchscript'` | formato para exportação | +| `imgsz` | `640` | tamanho da imagem como escalar ou lista (h, w), ou seja, (640, 480) | +| `keras` | `False` | usar Keras para exportação TF SavedModel | +| `optimize` | `False` | TorchScript: otimizar para mobile | +| `half` | `False` | quantização FP16 | +| `int8` | `False` | quantização INT8 | +| `dynamic` | `False` | ONNX/TensorRT: eixos dinâmicos | +| `simplify` | `False` | ONNX/TensorRT: simplificar modelo | +| `opset` | `None` | ONNX: versão do opset (opcional, padrão para a mais recente) | +| `workspace` | `4` | TensorRT: tamanho do espaço de trabalho (GB) | +| `nms` | `False` | CoreML: adicionar NMS | + +## Formatos de Exportação + +Os formatos de exportação disponíveis para YOLOv8 estão na tabela abaixo. Você pode exportar para qualquer formato usando o argumento `format`, ou seja, `format='onnx'` ou `format='engine'`. + +| Formato | Argumento `format` | Modelo | Metadados | Argumentos | +|--------------------------------------------------------------------|--------------------|---------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/pt/modes/index.md b/docs/pt/modes/index.md new file mode 100644 index 0000000..332489b --- /dev/null +++ b/docs/pt/modes/index.md @@ -0,0 +1,73 @@ +--- +comments: true +description: Da treinamento a rastreamento, aproveite ao máximo o YOLOv8 da Ultralytics. Obtenha insights e exemplos para cada modo suportado, incluindo validação, exportação e benchmarking. +keywords: Ultralytics, YOLOv8, Aprendizado de Máquina, Detecção de Objetos, Treinamento, Validação, Predição, Exportação, Rastreamento, Benchmarking +--- + +# Modos Ultralytics YOLOv8 + +Ecossistema e integrações do Ultralytics YOLO + +## Introdução + +O Ultralytics YOLOv8 não é apenas mais um modelo de detecção de objetos; é um framework versátil projetado para cobrir todo o ciclo de vida dos modelos de aprendizado de máquina — desde a ingestão de dados e treinamento do modelo até a validação, implantação e rastreamento no mundo real. Cada modo serve a um propósito específico e é projetado para oferecer a flexibilidade e eficiência necessárias para diferentes tarefas e casos de uso. + +

+
+ +
+ Assista: Tutorial dos Modos Ultralytics: Treinar, Validar, Prever, Exportar e Benchmark. +

+ +### Visão Geral dos Modos + +Entender os diferentes **modos** que o Ultralytics YOLOv8 suporta é crítico para tirar o máximo proveito de seus modelos: + +- **Modo Treino**: Ajuste fino do seu modelo em conjuntos de dados personalizados ou pré-carregados. +- **Modo Validação (Val)**: Um checkpoint pós-treinamento para validar o desempenho do modelo. +- **Modo Predição (Predict)**: Libere o poder preditivo do seu modelo em dados do mundo real. +- **Modo Exportação (Export)**: Prepare seu modelo para implantação em vários formatos. +- **Modo Rastreamento (Track)**: Estenda seu modelo de detecção de objetos para aplicações de rastreamento em tempo real. +- **Modo Benchmarking**: Analise a velocidade e precisão do seu modelo em diversos ambientes de implantação. + +Este guia abrangente visa fornecer uma visão geral e insights práticos para cada modo, ajudando você a aproveitar o potencial total do YOLOv8. + +## [Treinar](train.md) + +O modo Treinar é utilizado para treinar um modelo YOLOv8 em um conjunto de dados personalizado. Neste modo, o modelo é treinado usando o conjunto de dados especificado e os hiperparâmetros escolhidos. O processo de treinamento envolve otimizar os parâmetros do modelo para que ele possa prever com precisão as classes e localizações de objetos em uma imagem. + +[Exemplos de Treino](train.md){ .md-button } + +## [Validar](val.md) + +O modo Validar é utilizado para validar um modelo YOLOv8 após ter sido treinado. Neste modo, o modelo é avaliado em um conjunto de validação para medir sua precisão e desempenho de generalização. Este modo pode ser usado para ajustar os hiperparâmetros do modelo para melhorar seu desempenho. + +[Exemplos de Validação](val.md){ .md-button } + +## [Prever](predict.md) + +O modo Prever é utilizado para fazer previsões usando um modelo YOLOv8 treinado em novas imagens ou vídeos. Neste modo, o modelo é carregado de um arquivo de checkpoint, e o usuário pode fornecer imagens ou vídeos para realizar a inferência. O modelo prevê as classes e localizações dos objetos nas imagens ou vídeos fornecidos. + +[Exemplos de Predição](predict.md){ .md-button } + +## [Exportar](export.md) + +O modo Exportar é utilizado para exportar um modelo YOLOv8 para um formato que possa ser utilizado para implantação. Neste modo, o modelo é convertido para um formato que possa ser utilizado por outras aplicações de software ou dispositivos de hardware. Este modo é útil ao implantar o modelo em ambientes de produção. + +[Exemplos de Exportação](export.md){ .md-button } + +## [Rastrear](track.md) + +O modo Rastrear é utilizado para rastrear objetos em tempo real usando um modelo YOLOv8. Neste modo, o modelo é carregado de um arquivo de checkpoint, e o usuário pode fornecer um fluxo de vídeo ao vivo para realizar o rastreamento de objetos em tempo real. Este modo é útil para aplicações como sistemas de vigilância ou carros autônomos. + +[Exemplos de Rastreamento](track.md){ .md-button } + +## [Benchmark](benchmark.md) + +O modo Benchmark é utilizado para fazer um perfil da velocidade e precisão de vários formatos de exportação para o YOLOv8. Os benchmarks fornecem informações sobre o tamanho do formato exportado, suas métricas `mAP50-95` (para detecção de objetos, segmentação e pose) ou `accuracy_top5` (para classificação), e o tempo de inferência em milissegundos por imagem em diversos formatos de exportação, como ONNX, OpenVINO, TensorRT e outros. Essas informações podem ajudar os usuários a escolher o formato de exportação ótimo para seu caso de uso específico, com base em seus requisitos de velocidade e precisão. + +[Exemplos de Benchmark](benchmark.md){ .md-button } diff --git a/docs/pt/modes/predict.md b/docs/pt/modes/predict.md new file mode 100644 index 0000000..319f1d2 --- /dev/null +++ b/docs/pt/modes/predict.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Descubra como usar o modo predict do YOLOv8 para diversas tarefas. Aprenda sobre diferentes fontes de inferência, como imagens, vídeos e formatos de dados. +keywords: Ultralytics, YOLOv8, modo predict, fontes de inferência, tarefas de previsão, modo de streaming, processamento de imagens, processamento de vídeo, aprendizado de máquina, IA +--- + +# Predição de Modelo com Ultralytics YOLO + +Ecossistema e integrações do Ultralytics YOLO + +## Introdução + +No mundo do aprendizado de máquina e visão computacional, o processo de fazer sentido a partir de dados visuais é chamado de 'inferência' ou 'predição'. O Ultralytics YOLOv8 oferece um recurso poderoso conhecido como **modo predict** que é personalizado para inferência em tempo real de alto desempenho em uma ampla gama de fontes de dados. + +

+
+ +
+ Assista: Como Extrair as Saídas do Modelo Ultralytics YOLOv8 para Projetos Personalizados. +

+ +## Aplicações no Mundo Real + +| Manufatura | Esportes | Segurança | +|:-----------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------:| +| ![Detecção de Peças de Reposição de Veículo](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1) | ![Detecção de Jogador de Futebol](https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442) | ![Detecção de Queda de Pessoas](https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43) | +| Detecção de Peças de Reposição de Veículo | Detecção de Jogador de Futebol | Detecção de Queda de Pessoas | + +## Por Que Usar o Ultralytics YOLO para Inferência? + +Aqui está o porquê de você considerar o modo predict do YOLOv8 para suas diversas necessidades de inferência: + +- **Versatilidade:** Capaz de fazer inferências em imagens, vídeos e até transmissões ao vivo. +- **Desempenho:** Projetado para processamento em tempo real e de alta velocidade sem sacrificar a precisão. +- **Facilidade de Uso:** Interfaces Python e CLI intuitivas para implantação e testes rápidos. +- **Altamente Customizável:** Várias configurações e parâmetros para ajustar o comportamento de inferência do modelo de acordo com suas necessidades específicas. + +### Recursos Chave do Modo Predict + +O modo predict do YOLOv8 é projetado para ser robusto e versátil, apresentando: + +- **Compatibilidade com Múltiplas Fontes de Dados:** Se seus dados estão na forma de imagens individuais, uma coleção de imagens, arquivos de vídeo ou transmissões de vídeo em tempo real, o modo predict atende a todas as necessidades. +- **Modo de Streaming:** Use o recurso de streaming para gerar um gerador eficiente de memória de objetos `Results`. Ative isso definindo `stream=True` no método de chamada do preditor. +- **Processamento em Lote:** A capacidade de processar várias imagens ou quadros de vídeo em um único lote, acelerando ainda mais o tempo de inferência. +- **Integração Amigável:** Integração fácil com pipelines de dados existentes e outros componentes de software, graças à sua API flexível. + +Os modelos Ultralytics YOLO retornam ou uma lista de objetos `Results` em Python, ou um gerador em Python eficiente de memória de objetos `Results` quando `stream=True` é passado para o modelo durante a inferência: + +!!! Example "Predict" + + === "Retorna uma lista com `stream=False`" + ```python + from ultralytics import YOLO + + # Carrega um modelo + model = YOLO('yolov8n.pt') # modelo YOLOv8n pré-treinado + + # Executa a inferência em lote em uma lista de imagens + results = model(['im1.jpg', 'im2.jpg']) # retorna uma lista de objetos Results + + # Processa a lista de resultados + for result in results: + boxes = result.boxes # Objeto Boxes para saídas de bbox + masks = result.masks # Objeto Masks para saídas de máscaras de segmentação + keypoints = result.keypoints # Objeto Keypoints para saídas de pose + probs = result.probs # Objeto Probs para saídas de classificação + ``` + + === "Retorna um gerador com `stream=True`" + ```python + from ultralytics import YOLO + + # Carrega um modelo + model = YOLO('yolov8n.pt') # modelo YOLOv8n pré-treinado + + # Executa a inferência em lote em uma lista de imagens + results = model(['im1.jpg', 'im2.jpg'], stream=True) # retorna um gerador de objetos Results + + # Processa o gerador de resultados + for result in results: + boxes = result.boxes # Objeto Boxes para saídas de bbox + masks = result.masks # Objeto Masks para saídas de máscaras de segmentação + keypoints = result.keypoints # Objeto Keypoints para saídas de pose + probs = result.probs # Objeto Probs para saídas de classificação + ``` + +## Fontes de Inferência + +O YOLOv8 pode processar diferentes tipos de fontes de entrada para inferência, conforme mostrado na tabela abaixo. As fontes incluem imagens estáticas, transmissões de vídeo e vários formatos de dados. A tabela também indica se cada fonte pode ser usada no modo de streaming com o argumento `stream=True` ✅. O modo de streaming é benéfico para processar vídeos ou transmissões ao vivo, pois cria um gerador de resultados em vez de carregar todos os quadros na memória. + +!!! Tip "Dica" + + Use `stream=True` para processar vídeos longos ou grandes conjuntos de dados para gerenciar a memória de forma eficiente. Quando `stream=False`, os resultados de todos os quadros ou pontos de dados são armazenados na memória, o que pode aumentar rapidamente e causar erros de falta de memória para grandes entradas. Em contraste, `stream=True` utiliza um gerador, que mantém apenas os resultados do quadro atual ou ponto de dados na memória, reduzindo significativamente o consumo de memória e prevenindo problemas de falta dela. + +| Fonte | Argumento | Tipo | Notas | +|-----------------|--------------------------------------------|-----------------|-------------------------------------------------------------------------------------------------------------------------| +| imagem | `'image.jpg'` | `str` ou `Path` | Arquivo de imagem único. | +| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | URL para uma imagem. | +| captura de tela | `'screen'` | `str` | Captura uma captura de tela. | +| PIL | `Image.open('im.jpg')` | `PIL.Image` | Formato HWC com canais RGB. | +| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | Formato HWC com canais BGR `uint8 (0-255)`. | +| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | Formato HWC com canais BGR `uint8 (0-255)`. | +| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | Formato BCHW com canais RGB `float32 (0.0-1.0)`. | +| CSV | `'sources.csv'` | `str` ou `Path` | Arquivo CSV contendo caminhos para imagens, vídeos ou diretórios. | +| vídeo ✅ | `'video.mp4'` | `str` ou `Path` | Arquivo de vídeo em formatos como MP4, AVI, etc. | +| diretório ✅ | `'path/'` | `str` ou `Path` | Caminho para um diretório contendo imagens ou vídeos. | +| glob ✅ | `'path/*.jpg'` | `str` | Padrão glob para combinar vários arquivos. Use o caractere `*` como curinga. | +| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | URL para um vídeo do YouTube. | +| stream ✅ | `'rtsp://example.com/media.mp4'` | `str` | URL para protocolos de streaming como RTSP, RTMP, TCP ou um endereço IP. | +| multi-stream ✅ | `'list.streams'` | `str` ou `Path` | Arquivo de texto `*.streams` com uma URL de stream por linha, ou seja, 8 streams serão executados em lote de tamanho 8. | + +Abaixo estão exemplos de código para usar cada tipo de fonte: + +!!! Example "Fontes de previsão" + + === "imagem" + Executa a inferência em um arquivo de imagem. + ```python + from ultralytics import YOLO + + # Carrega um modelo YOLOv8n pré-treinado + model = YOLO('yolov8n.pt') + + # Define o caminho para o arquivo de imagem + source = 'caminho/para/imagem.jpg' + + # Executa a inferência na fonte + results = model(source) # lista de objetos Results + ``` + + === "captura de tela" + Executa a inferência no conteúdo atual da tela como uma captura de tela. + ```python + from ultralytics import YOLO + + # Carrega um modelo YOLOv8n pré-treinado + model = YOLO('yolov8n.pt') + + # Define a captura de tela atual como fonte + source = 'screen' + + # Executa a inferência na fonte + results = model(source) # lista de objetos Results + ``` + + === "URL" + Executa a inferência em uma imagem ou vídeo hospedado remotamente via URL. + ```python + from ultralytics import YOLO + + # Carrega um modelo YOLOv8n pré-treinado + model = YOLO('yolov8n.pt') + + # Define a URL remota da imagem ou vídeo + source = 'https://ultralytics.com/images/bus.jpg' + + # Executa a inferência na fonte + results = model(source) # lista de objetos Results + ``` + + === "PIL" + Executa a inferência em uma imagem aberta com a Biblioteca de Imagens do Python (PIL). + ```python + from PIL import Image + from ultralytics import YOLO + + # Carrega um modelo YOLOv8n pré-treinado + model = YOLO('yolov8n.pt') + + # Abre uma imagem usando PIL + source = Image.open('caminho/para/imagem.jpg') + + # Executa a inferência na fonte + results = model(source) # lista de objetos Results + ``` + + === "OpenCV" + Executa a inferência em uma imagem lida com OpenCV. + ```python + import cv2 + from ultralytics import YOLO + + # Carrega um modelo YOLOv8n pré-treinado + model = YOLO('yolov8n.pt') + + # Lê uma imagem usando OpenCV + source = cv2.imread('caminho/para/imagem.jpg') + + # Executa a inferência na fonte + results = model(source) # lista de objetos Results + ``` + + === "numpy" + Executa a inferência em uma imagem representada como um array numpy. + ```python + import numpy as np + from ultralytics import YOLO + + # Carrega um modelo YOLOv8n pré-treinado + model = YOLO('yolov8n.pt') + + # Cria um array random de numpy com forma HWC (640, 640, 3) com valores no intervalo [0, 255] e tipo uint8 + source = np.random.randint(low=0, high=255, size=(640, 640, 3), dtype='uint8') + + # Executa a inferência na fonte + results = model(source) # lista de objetos Results + ``` + + === "torch" + Executa a inferência em uma imagem representada como um tensor PyTorch. + ```python + import torch + from ultralytics import YOLO + + # Carrega um modelo YOLOv8n pré-treinado + model = YOLO('yolov8n.pt') + + # Cria um tensor random de torch com forma BCHW (1, 3, 640, 640) com valores no intervalo [0, 1] e tipo float32 + source = torch.rand(1, 3, 640, 640, dtype=torch.float32) + + # Executa a inferência na fonte + results = model(source) # lista de objetos Results diff --git a/docs/pt/modes/track.md b/docs/pt/modes/track.md new file mode 100644 index 0000000..b2148f8 --- /dev/null +++ b/docs/pt/modes/track.md @@ -0,0 +1,200 @@ +--- +comments: true +description: Aprenda a usar o Ultralytics YOLO para rastreamento de objetos em fluxos de vídeo. Guias para usar diferentes rastreadores e personalizar configurações de rastreador. +keywords: Ultralytics, YOLO, rastreamento de objetos, fluxos de vídeo, BoT-SORT, ByteTrack, guia em Python, guia CLI +--- + +# Rastreamento de Múltiplos Objetos com Ultralytics YOLO + +Exemplos de rastreamento de múltiplos objetos + +Rastreamento de objetos no âmbito da análise de vídeo é uma tarefa crucial que não apenas identifica a localização e classe dos objetos dentro do quadro, mas também mantém um ID único para cada objeto detectado à medida que o vídeo avança. As aplicações são ilimitadas — variando desde vigilância e segurança até análises esportivas em tempo real. + +## Por Que Escolher Ultralytics YOLO para Rastreamento de Objetos? + +A saída dos rastreadores da Ultralytics é consistente com a detecção de objetos padrão, mas com o valor agregado dos IDs dos objetos. Isso facilita o rastreamento de objetos em fluxos de vídeo e a realização de análises subsequentes. Aqui está o porquê de considerar usar Ultralytics YOLO para suas necessidades de rastreamento de objetos: + +- **Eficiência:** Processa fluxos de vídeo em tempo real sem comprometer a precisão. +- **Flexibilidade:** Suporta múltiplos algoritmos de rastreamento e configurações. +- **Facilidade de Uso:** Simples API em Python e opções CLI para rápida integração e implantação. +- **Personalização:** Fácil de usar com modelos YOLO treinados personalizados, permitindo integração em aplicações específicas de domínio. + +

+
+ +
+ Assistir: Detecção e Rastreamento de Objetos com Ultralytics YOLOv8. +

+ +## Aplicações no Mundo Real + +| Transporte | Varejo | Aquicultura | +|:------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------:| +| ![Rastreamento de Veículos](https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab) | ![Rastreamento de Pessoas](https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527) | ![Rastreamento de Peixes](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142) | +| Rastreamento de Veículos | Rastreamento de Pessoas | Rastreamento de Peixes | + +## Características em Destaque + +Ultralytics YOLO estende suas funcionalidades de detecção de objetos para fornecer rastreamento de objetos robusto e versátil: + +- **Rastreamento em Tempo Real:** Acompanha objetos de forma contínua em vídeos de alta taxa de quadros. +- **Suporte a Múltiplos Rastreadores:** Escolha dentre uma variedade de algoritmos de rastreamento estabelecidos. +- **Configurações de Rastreador Personalizáveis:** Adapte o algoritmo de rastreamento para atender requisitos específicos ajustando vários parâmetros. + +## Rastreadores Disponíveis + +Ultralytics YOLO suporta os seguintes algoritmos de rastreamento. Eles podem ser ativados passando o respectivo arquivo de configuração YAML, como `tracker=tracker_type.yaml`: + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - Use `botsort.yaml` para ativar este rastreador. +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - Use `bytetrack.yaml` para ativar este rastreador. + +O rastreador padrão é o BoT-SORT. + +## Rastreamento + +Para executar o rastreador em fluxos de vídeo, use um modelo Detect, Segment ou Pose treinado, como YOLOv8n, YOLOv8n-seg e YOLOv8n-pose. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo oficial ou personalizado + model = YOLO('yolov8n.pt') # Carregar um modelo Detect oficial + model = YOLO('yolov8n-seg.pt') # Carregar um modelo Segment oficial + model = YOLO('yolov8n-pose.pt') # Carregar um modelo Pose oficial + model = YOLO('caminho/para/melhor.pt') # Carregar um modelo treinado personalizado + + # Realizar rastreamento com o modelo + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # Rastreamento com rastreador padrão + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # Rastreamento com o rastreador ByteTrack + ``` + + === "CLI" + + ```bash + # Realizar rastreamento com vários modelos usando a interface de linha de comando + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # Modelo Detect oficial + yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Modelo Segment oficial + yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Modelo Pose oficial + yolo track model=caminho/para/melhor.pt source="https://youtu.be/LNwODJXcvt4" # Modelo treinado personalizado + + # Rastrear usando o rastreador ByteTrack + yolo track model=caminho/para/melhor.pt tracker="bytetrack.yaml" + ``` + +Como pode ser visto no uso acima, o rastreamento está disponível para todos os modelos Detect, Segment e Pose executados em vídeos ou fontes de streaming. + +## Configuração + +### Argumentos de Rastreamento + +A configuração de rastreamento compartilha propriedades com o modo Predict, como `conf`, `iou`, e `show`. Para mais configurações, consulte a página de [Predict](https://docs.ultralytics.com/modes/predict/) model page. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Configurar os parâmetros de rastreamento e executar o rastreador + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```bash + # Configurar parâmetros de rastreamento e executar o rastreador usando a interface de linha de comando + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### Seleção de Rastreador + +A Ultralytics também permite que você use um arquivo de configuração de rastreador modificado. Para fazer isso, simplesmente faça uma cópia de um arquivo de configuração de rastreador (por exemplo, `custom_tracker.yaml`) de [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) e modifique quaisquer configurações (exceto `tracker_type`) conforme suas necessidades. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar o modelo e executar o rastreador com um arquivo de configuração personalizado + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```bash + # Carregar o modelo e executar o rastreador com um arquivo de configuração personalizado usando a interface de linha de comando + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +Para uma lista completa de argumentos de rastreamento, consulte a página [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers). + +## Exemplos em Python + +### Loop de Persistência de Rastreamentos + +Aqui está um script em Python usando OpenCV (`cv2`) e YOLOv8 para executar rastreamento de objetos em quadros de vídeo. Este script ainda pressupõe que você já instalou os pacotes necessários (`opencv-python` e `ultralytics`). O argumento `persist=True` indica ao rastreador que a imagem ou quadro atual é o próximo de uma sequência e que espera rastreamentos da imagem anterior na imagem atual. + +!!! Example "Loop de fluxo com rastreamento" + + ```python + import cv2 + from ultralytics import YOLO + + # Carregar o modelo YOLOv8 + model = YOLO('yolov8n.pt') + + # Abrir o arquivo de vídeo + video_path = "caminho/para/video.mp4" + cap = cv2.VideoCapture(video_path) + + # Repetir através dos quadros de vídeo + while cap.isOpened(): + # Ler um quadro do vídeo + success, frame = cap.read() + + if success: + # Executar rastreamento YOLOv8 no quadro, persistindo rastreamentos entre quadros + results = model.track(frame, persist=True) + + # Visualizar os resultados no quadro + annotated_frame = results[0].plot() + + # Exibir o quadro anotado + cv2.imshow("Rastreamento YOLOv8", annotated_frame) + + # Interromper o loop se 'q' for pressionado + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # Interromper o loop se o fim do vídeo for atingido + break + + # Liberar o objeto de captura de vídeo e fechar a janela de exibição + cap.release() + cv2.destroyAllWindows() + ``` + +Note a mudança de `model(frame)` para `model.track(frame)`, que habilita o rastreamento de objetos ao invés de detecção simples. Este script modificado irá executar o rastreador em cada quadro do vídeo, visualizar os resultados e exibi-los em uma janela. O loop pode ser encerrado pressionando 'q'. + +## Contribuir com Novos Rastreadores + +Você é proficiente em rastreamento de múltiplos objetos e implementou ou adaptou com sucesso um algoritmo de rastreamento com Ultralytics YOLO? Convidamos você a contribuir para nossa seção de Rastreadores em [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)! Suas aplicações do mundo real e soluções podem ser inestimáveis para usuários trabalhando em tarefas de rastreamento. + +Ao contribuir para esta seção, você ajuda a expandir o escopo de soluções de rastreamento disponíveis dentro do framework Ultralytics YOLO, adicionando outra camada de funcionalidade e utilidade para a comunidade. + +Para iniciar sua contribuição, por favor, consulte nosso [Guia de Contribuição](https://docs.ultralytics.com/help/contributing) para instruções completas sobre como enviar um Pedido de Pull (PR) 🛠️. Estamos ansiosos para ver o que você traz para a mesa! + +Juntos, vamos aprimorar as capacidades de rastreamento do ecossistema Ultralytics YOLO 🙏! diff --git a/docs/pt/modes/train.md b/docs/pt/modes/train.md new file mode 100644 index 0000000..b677b4b --- /dev/null +++ b/docs/pt/modes/train.md @@ -0,0 +1,206 @@ +--- +comments: true +description: Guia passo a passo para treinar modelos YOLOv8 com a YOLO da Ultralytics, incluindo exemplos de treinamento com uma única GPU e múltiplas GPUs +keywords: Ultralytics, YOLOv8, YOLO, detecção de objetos, modo de treino, conjunto de dados personalizado, treinamento com GPU, multi-GPU, hiperparâmetros, exemplos de CLI, exemplos em Python +--- + +# Treinamento de Modelos com a YOLO da Ultralytics + +Ecossistema e integrações da YOLO da Ultralytics + +## Introdução + +O treinamento de um modelo de aprendizado profundo envolve fornecer dados e ajustar seus parâmetros para que ele possa fazer previsões precisas. O modo de treino na YOLOv8 da Ultralytics é projetado para um treinamento eficaz e eficiente de modelos de detecção de objetos, aproveitando totalmente as capacidades do hardware moderno. Este guia visa cobrir todos os detalhes que você precisa para começar a treinar seus próprios modelos usando o robusto conjunto de recursos da YOLOv8. + +

+
+ +
+ Assista: Como Treinar um modelo YOLOv8 no Seu Conjunto de Dados Personalizado no Google Colab. +

+ +## Por Que Escolher a YOLO da Ultralytics para Treinamento? + +Aqui estão algumas razões convincentes para optar pelo modo de Treino da YOLOv8: + +- **Eficiência:** Aproveite ao máximo seu hardware, seja em um setup com uma única GPU ou expandindo para múltiplas GPUs. +- **Versatilidade:** Treine em conjuntos de dados personalizados, além dos já disponíveis, como COCO, VOC e ImageNet. +- **Facilidade de Uso:** Interfaces de linha de comando (CLI) e em Python simples, porém poderosas, para uma experiência de treinamento direta. +- **Flexibilidade de Hiperparâmetros:** Uma ampla gama de hiperparâmetros personalizáveis para ajustar o desempenho do modelo. + +### Principais Recursos do Modo de Treino + +Os seguintes são alguns recursos notáveis ​​do modo de Treino da YOLOv8: + +- **Download Automático de Datasets:** Datasets padrões como COCO, VOC e ImageNet são baixados automaticamente na primeira utilização. +- **Suporte a Multi-GPU:** Escalone seus esforços de treinamento de maneira uniforme entre várias GPUs para acelerar o processo. +- **Configuração de Hiperparâmetros:** Opção de modificar hiperparâmetros através de arquivos de configuração YAML ou argumentos de CLI. +- **Visualização e Monitoramento:** Acompanhamento em tempo real das métricas de treinamento e visualização do processo de aprendizagem para obter melhores insights. + +!!! Tip "Dica" + + * Conjuntos de dados YOLOv8 como COCO, VOC, ImageNet e muitos outros são baixados automaticamente na primeira utilização, ou seja, `yolo train data=coco.yaml` + +## Exemplos de Uso + +Treine o YOLOv8n no conjunto de dados COCO128 por 100 épocas com tamanho de imagem de 640. O dispositivo de treinamento pode ser especificado usando o argumento `device`. Se nenhum argumento for passado, a GPU `device=0` será usado se disponível, caso contrário, `device=cpu` será usado. Veja a seção Argumentos abaixo para uma lista completa dos argumentos de treinamento. + +!!! Example "Exemplo de Treinamento em Uma Única GPU e CPU" + + O dispositivo é determinado automaticamente. Se uma GPU estiver disponível, ela será usada, caso contrário, o treinamento começará na CPU. + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n.yaml') # construir um novo modelo a partir do YAML + model = YOLO('yolov8n.pt') # carregar um modelo pré-treinado (recomendado para treinamento) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # construir a partir do YAML e transferir pesos + + # Treinar o modelo + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Construir um novo modelo a partir do YAML e começar o treinamento do zero + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Começar o treinamento a partir de um modelo *.pt pré-treinado + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Construir um novo modelo a partir do YAML, transferir pesos pré-treinados para ele e começar o treinamento + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Treinamento com Multi-GPU + +O treinamento com múltiplas GPUs permite uma utilização mais eficiente dos recursos de hardware disponíveis, distribuindo a carga de treinamento entre várias GPUs. Esse recurso está disponível por meio da API do Python e da interface de linha de comando. Para habilitar o treinamento com várias GPUs, especifique os IDs dos dispositivos de GPU que deseja usar. + +!!! Example "Exemplo de Treinamento com Multi-GPU" + + Para treinar com 2 GPUs, dispositivos CUDA 0 e 1 use os seguintes comandos. Expanda para GPUs adicionais conforme necessário. + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n.pt') # carregar um modelo pré-treinado (recomendado para treinamento) + + # Treinar o modelo com 2 GPUs + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "CLI" + + ```bash + # Começar o treinamento a partir de um modelo *.pt pré-treinado usando as GPUs 0 e 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### Treinamento com Apple M1 e M2 MPS + +Com a integração do suporte para os chips Apple M1 e M2 nos modelos YOLO da Ultralytics, agora é possível treinar seus modelos em dispositivos que utilizam o poderoso framework Metal Performance Shaders (MPS). O MPS oferece uma forma de alto desempenho de executar tarefas de computação e processamento de imagens no silício personalizado da Apple. + +Para habilitar o treinamento nos chips Apple M1 e M2, você deve especificar 'mps' como seu dispositivo ao iniciar o processo de treinamento. Abaixo está um exemplo de como você pode fazer isso em Python e via linha de comando: + +!!! Example "Exemplo de Treinamento com MPS" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n.pt') # carregar um modelo pré-treinado (recomendado para treinamento) + + # Treinar o modelo com MPS + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "CLI" + + ```bash + # Começar o treinamento a partir de um modelo *.pt pré-treinado usando MPS + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +Ao aproveitar o poder computacional dos chips M1/M2, isso possibilita o processamento mais eficiente das tarefas de treinamento. Para orientações mais detalhadas e opções avançadas de configuração, consulte a [documentação do PyTorch MPS](https://pytorch.org/docs/stable/notes/mps.html). + +## Registro de Logs + +Ao treinar um modelo YOLOv8, você pode achar valioso acompanhar o desempenho do modelo ao longo do tempo. É aqui que o registro de logs se torna útil. O YOLO da Ultralytics oferece suporte para três tipos de loggers - Comet, ClearML e TensorBoard. + +Para usar um logger, selecione-o no menu suspenso no trecho de código acima e execute-o. O logger escolhido será instalado e inicializado. + +### Comet + +[Comet](https://www.comet.ml/site/) é uma plataforma que permite a cientistas de dados e desenvolvedores rastrear, comparar, explicar e otimizar experimentos e modelos. Oferece funcionalidades como métricas em tempo real, diffs de código e acompanhamento de hiperparâmetros. + +Para usar o Comet: + +!!! Example "Exemplo" + + === "Python" + ```python + # pip install comet_ml + import comet_ml + + comet_ml.init() + ``` + +Lembre-se de fazer login na sua conta Comet no site deles e obter sua chave de API. Você precisará adicionar isso às suas variáveis de ambiente ou ao seu script para registrar seus experimentos. + +### ClearML + +[ClearML](https://www.clear.ml/) é uma plataforma de código aberto que automatiza o rastreamento de experimentos e ajuda com o compartilhamento eficiente de recursos. É projetada para ajudar as equipes a gerenciar, executar e reproduzir seus trabalhos de ML de maneira mais eficiente. + +Para usar o ClearML: + +!!! Example "Exemplo" + + === "Python" + ```python + # pip install clearml + import clearml + + clearml.browser_login() + ``` + +Após executar este script, você precisará fazer login na sua conta ClearML no navegador e autenticar sua sessão. + +### TensorBoard + +[TensorBoard](https://www.tensorflow.org/tensorboard) é um kit de ferramentas de visualização para TensorFlow. Permite visualizar o seu gráfico TensorFlow, plotar métricas quantitativas sobre a execução do seu gráfico e mostrar dados adicionais como imagens que passam por ele. + +Para usar o TensorBoard em [Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb): + +!!! Example "Exemplo" + + === "CLI" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # substitua pelo diretório 'runs' + ``` + +Para usar o TensorBoard localmente, execute o comando abaixo e veja os resultados em http://localhost:6006/: + +!!! Example "Exemplo" + + === "CLI" + ```bash + tensorboard --logdir ultralytics/runs # substitua pelo diretório 'runs' + ``` + +Isso irá carregar o TensorBoard e direcioná-lo para o diretório onde seus logs de treinamento estão salvos. + +Depois de configurar o seu logger, você pode então prosseguir com o treinamento do seu modelo. Todas as métricas de treinamento serão registradas automaticamente na sua plataforma escolhida, e você pode acessar esses logs para monitorar o desempenho do seu modelo ao longo do tempo, comparar diferentes modelos e identificar áreas para melhoria. diff --git a/docs/pt/modes/val.md b/docs/pt/modes/val.md new file mode 100644 index 0000000..a482f14 --- /dev/null +++ b/docs/pt/modes/val.md @@ -0,0 +1,86 @@ +--- +comments: true +description: Guia para Validação de Modelos YOLOv8. Aprenda como avaliar o desempenho dos seus modelos YOLO utilizando configurações e métricas de validação com exemplos em Python e CLI. +keywords: Ultralytics, Documentação YOLO, YOLOv8, validação, avaliação de modelo, hiperparâmetros, precisão, métricas, Python, CLI +--- + +# Validação de Modelos com Ultralytics YOLO + +Ecossistema e integrações do Ultralytics YOLO + +## Introdução + +A validação é um passo crítico no pipeline de aprendizado de máquina, permitindo que você avalie a qualidade dos seus modelos treinados. O modo Val no Ultralytics YOLOv8 fornece um robusto conjunto de ferramentas e métricas para avaliar o desempenho dos seus modelos de detecção de objetos. Este guia serve como um recurso completo para entender como usar efetivamente o modo Val para garantir que seus modelos sejam precisos e confiáveis. + +## Por Que Validar com o Ultralytics YOLO? + +Aqui estão as vantagens de usar o modo Val no YOLOv8: + +- **Precisão:** Obtenha métricas precisas como mAP50, mAP75 e mAP50-95 para avaliar seu modelo de forma abrangente. +- **Conveniência:** Utilize recursos integrados que lembram as configurações de treinamento, simplificando o processo de validação. +- **Flexibilidade:** Valide seu modelo com os mesmos ou diferentes conjuntos de dados e tamanhos de imagem. +- **Ajuste de Hiperparâmetros:** Utilize as métricas de validação para refinar seu modelo e obter um desempenho melhor. + +### Principais Recursos do Modo Val + +Estas são as funcionalidades notáveis oferecidas pelo modo Val do YOLOv8: + +- **Configurações Automatizadas:** Os modelos lembram suas configurações de treinamento para validação direta. +- **Suporte Multi-Métrico:** Avalie seu modelo com base em uma variedade de métricas de precisão. +- **API em Python e CLI:** Escolha entre a interface de linha de comando ou API em Python com base na sua preferência de validação. +- **Compatibilidade de Dados:** Funciona perfeitamente com conjuntos de dados usados durante a fase de treinamento, bem como conjuntos de dados personalizados. + +!!! Tip "Dica" + + * Os modelos YOLOv8 lembram automaticamente suas configurações de treinamento, então você pode validar um modelo no mesmo tamanho de imagem e no conjunto de dados original facilmente com apenas `yolo val model=yolov8n.pt` ou `model('yolov8n.pt').val()` + +## Exemplos de Uso + +Validar a precisão do modelo YOLOv8n treinado no conjunto de dados COCO128. Nenhum argumento precisa ser passado, pois o `model` retém os dados de treinamento e argumentos como atributos do modelo. Veja a seção de Argumentos abaixo para uma lista completa dos argumentos de exportação. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n.pt') # carregar um modelo oficial + model = YOLO('path/to/best.pt') # carregar um modelo personalizado + + # Validar o modelo + metrics = model.val() # nenhum argumento necessário, conjunto de dados e configurações lembrados + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # uma lista contém map50-95 de cada categoria + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # validar modelo oficial + yolo detect val model=path/to/best.pt # validar modelo personalizado + ``` + +## Argumentos + +As configurações de validação para os modelos YOLO referem-se aos vários hiperparâmetros e configurações usados para avaliar o desempenho do modelo em um conjunto de dados de validação. Essas configurações podem afetar o desempenho, velocidade e precisão do modelo. Algumas configurações comuns de validação do YOLO incluem o tamanho do lote, a frequência com que a validação é realizada durante o treinamento e as métricas usadas para avaliar o desempenho do modelo. Outros fatores que podem afetar o processo de validação incluem o tamanho e a composição do conjunto de dados de validação e a tarefa específica para a qual o modelo está sendo usado. É importante ajustar e experimentar cuidadosamente essas configurações para garantir que o modelo apresente um bom desempenho no conjunto de dados de validação e para detectar e prevenir o sobreajuste. + +| Chave | Valor | Descrição | +|---------------|---------|-----------------------------------------------------------------------------------| +| `data` | `None` | caminho para o arquivo de dados, ex. coco128.yaml | +| `imgsz` | `640` | tamanho das imagens de entrada como inteiro | +| `batch` | `16` | número de imagens por lote (-1 para AutoBatch) | +| `save_json` | `False` | salvar resultados em arquivo JSON | +| `save_hybrid` | `False` | salvar versão híbrida das etiquetas (etiquetas + previsões adicionais) | +| `conf` | `0.001` | limite de confiança do objeto para detecção | +| `iou` | `0.6` | limiar de interseção sobre união (IoU) para NMS | +| `max_det` | `300` | número máximo de detecções por imagem | +| `half` | `True` | usar precisão meia (FP16) | +| `device` | `None` | dispositivo para execução, ex. dispositivo cuda=0/1/2/3 ou device=cpu | +| `dnn` | `False` | usar OpenCV DNN para inferência ONNX | +| `plots` | `False` | mostrar gráficos durante o treinamento | +| `rect` | `False` | val retangular com cada lote colado para minimizar o preenchimento | +| `split` | `val` | divisão do conjunto de dados para usar na validação, ex. 'val', 'test' ou 'train' | +| diff --git a/docs/pt/quickstart.md b/docs/pt/quickstart.md new file mode 100644 index 0000000..242193a --- /dev/null +++ b/docs/pt/quickstart.md @@ -0,0 +1,198 @@ +--- +comments: true +description: Explore os diversos métodos para instalar o Ultralytics usando pip, conda, git e Docker. Aprenda a usar o Ultralytics com a interface de linha de comando ou dentro dos seus projetos Python. +keywords: Instalação do Ultralytics, pip install Ultralytics, Docker install Ultralytics, interface de linha de comando do Ultralytics, interface Python do Ultralytics +--- + +## Instalação do Ultralytics + +O Ultralytics oferece diversos métodos de instalação, incluindo pip, conda e Docker. Instale o YOLOv8 através do pacote `ultralytics` pip para a versão estável mais recente ou clonando o [repositório GitHub do Ultralytics](https://github.com/ultralytics/ultralytics) para obter a versão mais atualizada. O Docker pode ser usado para executar o pacote em um contêiner isolado, evitando a instalação local. + +!!! Example "Instalar" + + === "Pip install (recomendado)" + Instale o pacote `ultralytics` usando pip, ou atualize uma instalação existente executando `pip install -U ultralytics`. Visite o Índice de Pacotes Python (PyPI) para mais detalhes sobre o pacote `ultralytics`: [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/). + + [![PyPI version](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # Instalar o pacote ultralytics do PyPI + pip install ultralytics + ``` + + Você também pode instalar o pacote `ultralytics` diretamente do [repositório](https://github.com/ultralytics/ultralytics) GitHub. Isso pode ser útil se você desejar a versão de desenvolvimento mais recente. Certifique-se de ter a ferramenta de linha de comando Git instalada no seu sistema. O comando `@main` instala a branch `main` e pode ser modificado para outra branch, ou seja, `@my-branch`, ou removido completamente para padrão na branch `main`. + + ```bash + # Instalar o pacote ultralytics do GitHub + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + + === "Conda install" + Conda é um gerenciador de pacotes alternativo ao pip que também pode ser usado para instalação. Visite Anaconda para mais detalhes em [https://anaconda.org/conda-forge/ultralytics](https://anaconda.org/conda-forge/ultralytics). O repositório de feedstock do Ultralytics para atualizar o pacote conda está em [https://github.com/conda-forge/ultralytics-feedstock/](https://github.com/conda-forge/ultralytics-feedstock/). + + + [![Conda Recipe](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # Instalar o pacote ultralytics usando conda + conda install -c conda-forge ultralytics + ``` + + !!! Note "Nota" + + Se você está instalando em um ambiente CUDA a prática recomendada é instalar `ultralytics`, `pytorch` e `pytorch-cuda` no mesmo comando para permitir que o gerenciador de pacotes conda resolva quaisquer conflitos, ou instalar `pytorch-cuda` por último para permitir que ele substitua o pacote específico para CPU `pytorch`, se necessário. + ```bash + # Instalar todos os pacotes juntos usando conda + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### Imagem Docker Conda + + As imagens Docker Conda do Ultralytics também estão disponíveis em [DockerHub](https://hub.docker.com/r/ultralytics/ultralytics). Estas imagens são baseadas em [Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/) e são um modo simples de começar a usar `ultralytics` em um ambiente Conda. + + ```bash + # Definir o nome da imagem como uma variável + t=ultralytics/ultralytics:latest-conda + + # Puxar a imagem mais recente do ultralytics do Docker Hub + sudo docker pull $t + + # Executar a imagem ultralytics em um contêiner com suporte a GPU + sudo docker run -it --ipc=host --gpus all $t # todas as GPUs + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # especificar GPUs + ``` + + === "Git clone" + Clone o repositório `ultralytics` se você está interessado em contribuir para o desenvolvimento ou deseja experimentar com o código-fonte mais recente. Após clonar, navegue até o diretório e instale o pacote em modo editável `-e` usando pip. + ```bash + # Clonar o repositório ultralytics + git clone https://github.com/ultralytics/ultralytics + + # Navegar para o diretório clonado + cd ultralytics + + # Instalar o pacote em modo editável para desenvolvimento + pip install -e . + ``` + +Veja o arquivo [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) do `ultralytics` para uma lista de dependências. Note que todos os exemplos acima instalam todas as dependências necessárias. + +

+
+ +
+ Watch: Ultralytics YOLO Quick Start Guide +

+ +!!! Tip "Dica" + + Os requisitos do PyTorch variam pelo sistema operacional e pelos requisitos de CUDA, então é recomendado instalar o PyTorch primeiro seguindo as instruções em [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally). + + + Instruções de Instalação do PyTorch + + +## Use o Ultralytics com CLI + +A interface de linha de comando (CLI) do Ultralytics permite comandos simples de uma única linha sem a necessidade de um ambiente Python. O CLI não requer personalização ou código Python. Você pode simplesmente rodar todas as tarefas do terminal com o comando `yolo`. Confira o [Guia CLI](/../usage/cli.md) para aprender mais sobre o uso do YOLOv8 pela linha de comando. + +!!! Example "Exemplo" + + === "Sintaxe" + + Os comandos `yolo` do Ultralytics usam a seguinte sintaxe: + ```bash + yolo TAREFA MODO ARGUMENTOS + + Onde TAREFA (opcional) é um entre [detect, segment, classify] + MODO (obrigatório) é um entre [train, val, predict, export, track] + ARGUMENTOS (opcional) são qualquer número de pares personalizados 'arg=valor' como 'imgsz=320' que substituem os padrões. + ``` + Veja todos os ARGUMENTOS no guia completo de [Configuração](/../usage/cfg.md) ou com `yolo cfg` + + === "Train" + + Treinar um modelo de detecção por 10 épocas com uma taxa de aprendizado inicial de 0.01 + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "Predict" + + Prever um vídeo do YouTube usando um modelo de segmentação pré-treinado com tamanho de imagem 320: + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "Val" + + Validar um modelo de detecção pré-treinado com tamanho de lote 1 e tamanho de imagem 640: + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "Export" + + Exportar um modelo de classificação YOLOv8n para formato ONNX com tamanho de imagem 224 por 128 (nenhuma TAREFA necessária) + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "Special" + + Executar comandos especiais para ver versão, visualizar configurações, rodar verificações e mais: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "Aviso" + + Argumentos devem ser passados como pares `arg=valor`, separados por um sinal de igual `=` e delimitados por espaços ` ` entre pares. Não use prefixos de argumentos `--` ou vírgulas `,` entre os argumentos. + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25`   ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25`   ❌ + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25`   ❌ + +[Guia CLI](/../usage/cli.md){ .md-button } + +## Use o Ultralytics com Python + +A interface Python do YOLOv8 permite uma integração tranquila em seus projetos Python, tornando fácil carregar, executar e processar a saída do modelo. Projetada com simplicidade e facilidade de uso em mente, a interface Python permite que os usuários implementem rapidamente detecção de objetos, segmentação e classificação em seus projetos. Isto torna a interface Python do YOLOv8 uma ferramenta inestimável para qualquer pessoa buscando incorporar essas funcionalidades em seus projetos Python. + +Por exemplo, os usuários podem carregar um modelo, treiná-lo, avaliar o seu desempenho em um conjunto de validação e até exportá-lo para o formato ONNX com apenas algumas linhas de código. Confira o [Guia Python](/../usage/python.md) para aprender mais sobre o uso do YOLOv8 dentro dos seus projetos Python. + +!!! Example "Exemplo" + + ```python + from ultralytics import YOLO + + # Criar um novo modelo YOLO do zero + model = YOLO('yolov8n.yaml') + + # Carregar um modelo YOLO pré-treinado (recomendado para treinamento) + model = YOLO('yolov8n.pt') + + # Treinar o modelo usando o conjunto de dados 'coco128.yaml' por 3 épocas + results = model.train(data='coco128.yaml', epochs=3) + + # Avaliar o desempenho do modelo no conjunto de validação + results = model.val() + + # Realizar detecção de objetos em uma imagem usando o modelo + results = model('https://ultralytics.com/images/bus.jpg') + + # Exportar o modelo para formato ONNX + success = model.export(format='onnx') + ``` + +[Guia Python](/../usage/python.md){.md-button .md-button--primary} diff --git a/docs/pt/tasks/classify.md b/docs/pt/tasks/classify.md new file mode 100644 index 0000000..0e04bec --- /dev/null +++ b/docs/pt/tasks/classify.md @@ -0,0 +1,172 @@ +--- +comments: true +description: Aprenda sobre modelos YOLOv8 Classify para classificação de imagens. Obtenha informações detalhadas sobre Lista de Modelos Pré-treinados e como Treinar, Validar, Prever e Exportar modelos. +keywords: Ultralytics, YOLOv8, Classificação de Imagem, Modelos Pré-treinados, YOLOv8n-cls, Treinamento, Validação, Previsão, Exportação de Modelo +--- + +# Classificação de Imagens + +Exemplos de classificação de imagens + +A classificação de imagens é a tarefa mais simples das três e envolve classificar uma imagem inteira em uma de um conjunto de classes pré-definidas. + +A saída de um classificador de imagem é um único rótulo de classe e uma pontuação de confiança. A classificação de imagem é útil quando você precisa saber apenas a qual classe uma imagem pertence e não precisa conhecer a localização dos objetos dessa classe ou o formato exato deles. + +!!! Tip "Dica" + + Os modelos YOLOv8 Classify usam o sufixo `-cls`, ou seja, `yolov8n-cls.pt` e são pré-treinados na [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +## [Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Aqui são mostrados os modelos pré-treinados YOLOv8 Classify. Modelos de Detecção, Segmentação e Pose são pré-treinados no dataset [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), enquanto que os modelos de Classificação são pré-treinados no dataset [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +[Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) são baixados automaticamente do último lançamento da Ultralytics [release](https://github.com/ultralytics/assets/releases) no primeiro uso. + +| Modelo | Tamanho
(pixels) | acurácia
top1 | acurácia
top5 | Velocidade
CPU ONNX
(ms) | Velocidade
A100 TensorRT
(ms) | parâmetros
(M) | FLOPs
(B) a 640 | +|----------------------------------------------------------------------------------------------|--------------------------|-----------------------|-----------------------|-------------------------------------|------------------------------------------|------------------------|-------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- Os valores de **acc** são as acurácias dos modelos no conjunto de validação do dataset [ImageNet](https://www.image-net.org/). +
Reproduza com `yolo val classify data=path/to/ImageNet device=0` +- **Velocidade** média observada sobre imagens de validação da ImageNet usando uma instância [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
Reproduza com `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` + +## Treino + +Treine o modelo YOLOv8n-cls no dataset MNIST160 por 100 épocas com tamanho de imagem 64. Para uma lista completa de argumentos disponíveis, veja a página de [Configuração](/../usage/cfg.md). + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-cls.yaml') # construir um novo modelo a partir do YAML + model = YOLO('yolov8n-cls.pt') # carregar um modelo pré-treinado (recomendado para treino) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # construir a partir do YAML e transferir pesos + + # Treinar o modelo + results = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # Construir um novo modelo a partir do YAML e começar treino do zero + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # Começar treino de um modelo pré-treinado *.pt + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # Construir um novo modelo do YAML, transferir pesos pré-treinados e começar treino + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### Formato do dataset + +O formato do dataset de classificação YOLO pode ser encontrado em detalhes no [Guia de Datasets](../../../datasets/classify/index.md). + +## Val + +Valide a acurácia do modelo YOLOv8n-cls treinado no dataset MNIST160. Não é necessário passar argumento, pois o `modelo` retém seus dados de treinamento e argumentos como atributos do modelo. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-cls.pt') # carregar um modelo oficial + model = YOLO('path/to/best.pt') # carregar um modelo personalizado + + # Validar o modelo + metrics = model.val() # sem argumentos necessários, dataset e configurações lembrados + metrics.top1 # acurácia top1 + metrics.top5 # acurácia top5 + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # validar modelo oficial + yolo classify val model=path/to/best.pt # validar modelo personalizado + ``` + +## Previsão + +Use um modelo YOLOv8n-cls treinado para realizar previsões em imagens. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-cls.pt') # carregar um modelo oficial + model = YOLO('path/to/best.pt') # carregar um modelo personalizado + + # Prever com o modelo + results = model('https://ultralytics.com/images/bus.jpg') # prever em uma imagem + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # prever com modelo oficial + yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # prever com modelo personalizado + ``` + +Veja detalhes completos do modo de `previsão` na página [Predict](https://docs.ultralytics.com/modes/predict/). + +## Exportar + +Exporte um modelo YOLOv8n-cls para um formato diferente, como ONNX, CoreML, etc. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-cls.pt') # carregar um modelo oficial + model = YOLO('path/to/best.pt') # carregar um modelo treinado personalizado + + # Exportar o modelo + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # exportar modelo oficial + yolo export model=path/to/best.pt format=onnx # exportar modelo treinado personalizado + ``` + +Os formatos de exportação YOLOv8-cls disponíveis estão na tabela abaixo. Você pode prever ou validar diretamente nos modelos exportados, ou seja, `yolo predict model=yolov8n-cls.onnx`. Exemplos de uso são mostrados para seu modelo após a conclusão da exportação. + +| Formato | Argumento `format` | Modelo | Metadata | Argumentos | +|--------------------------------------------------------------------|--------------------|-------------------------------|----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +Veja detalhes completos da `exportação` na página [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/pt/tasks/detect.md b/docs/pt/tasks/detect.md new file mode 100644 index 0000000..d3f84c0 --- /dev/null +++ b/docs/pt/tasks/detect.md @@ -0,0 +1,185 @@ +--- +comments: true +description: Documentação oficial do YOLOv8 por Ultralytics. Aprenda como treinar, validar, predizer e exportar modelos em vários formatos. Incluindo estatísticas detalhadas de desempenho. +keywords: YOLOv8, Ultralytics, detecção de objetos, modelos pré-treinados, treinamento, validação, predição, exportação de modelos, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# Detecção de Objetos + +Exemplos de detecção de objetos + +Detecção de objetos é uma tarefa que envolve identificar a localização e a classe de objetos em uma imagem ou fluxo de vídeo. + +A saída de um detector de objetos é um conjunto de caixas delimitadoras que cercam os objetos na imagem, junto com rótulos de classe e pontuações de confiança para cada caixa. A detecção de objetos é uma boa escolha quando você precisa identificar objetos de interesse em uma cena, mas não precisa saber exatamente onde o objeto está ou seu formato exato. + +

+
+ +
+ Assista: Detecção de Objetos com Modelo Pre-treinado Ultralytics YOLOv8. +

+ +!!! Tip "Dica" + + Os modelos YOLOv8 Detect são os modelos padrão do YOLOv8, ou seja, `yolov8n.pt` e são pré-treinados no [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Os modelos pré-treinados YOLOv8 Detect são mostrados aqui. Os modelos Detect, Segment e Pose são pré-treinados no dataset [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), enquanto os modelos Classify são pré-treinados no dataset [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +Os [Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) são baixados automaticamente a partir do último lançamento da Ultralytics [release](https://github.com/ultralytics/assets/releases) no primeiro uso. + +| Modelo | Tamanho
(pixels) | mAPval
50-95 | Velocidade
CPU ONNX
(ms) | Velocidade
A100 TensorRT
(ms) | Parâmetros
(M) | FLOPs
(B) | +|--------------------------------------------------------------------------------------|--------------------------|----------------------|-------------------------------------|------------------------------------------|------------------------|-------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- Os valores de **mAPval** são para um único modelo e uma única escala no dataset [COCO val2017](http://cocodataset.org). +
Reproduza usando `yolo val detect data=coco.yaml device=0` +- A **Velocidade** é média tirada sobre as imagens do COCO val num [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + instância. +
Reproduza usando `yolo val detect data=coco128.yaml batch=1 device=0|cpu` + +## Treinar + +Treine o YOLOv8n no dataset COCO128 por 100 épocas com tamanho de imagem 640. Para uma lista completa de argumentos disponíveis, veja a página [Configuração](/../usage/cfg.md). + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n.yaml') # construir um novo modelo pelo YAML + model = YOLO('yolov8n.pt') # carregar um modelo pré-treinado (recomendado para treinamento) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # construir pelo YAML e transferir pesos + + # Treinar o modelo + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Construir um novo modelo pelo YAML e começar o treinamento do zero + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Começar o treinamento a partir de um modelo pré-treinado *.pt + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Construir um novo modelo pelo YAML, transferir pesos pré-treinados e começar o treinamento + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Formato do Dataset + +O formato do dataset de detecção do YOLO pode ser encontrado em detalhes no [Guia de Datasets](../../../datasets/detect/index.md). Para converter seu dataset existente de outros formatos (como COCO, etc.) para o formato YOLO, por favor utilize a ferramenta [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) da Ultralytics. + +## Validar + +Valide a precisão do modelo YOLOv8n treinado no dataset COCO128. Não é necessário passar nenhum argumento, pois o `modelo` mantém seus `dados` de treino e argumentos como atributos do modelo. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n.pt') # carregar um modelo oficial + model = YOLO('caminho/para/best.pt') # carregar um modelo personalizado + + # Validar o modelo + metrics = model.val() # sem a necessidade de argumentos, dataset e configurações lembradas + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # uma lista contém map50-95 de cada categoria + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # validação do modelo oficial + yolo detect val model=caminho/para/best.pt # validação do modelo personalizado + ``` + +## Predizer + +Use um modelo YOLOv8n treinado para fazer predições em imagens. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n.pt') # carregar um modelo oficial + model = YOLO('caminho/para/best.pt') # carregar um modelo personalizado + + # Predizer com o modelo + results = model('https://ultralytics.com/images/bus.jpg') # predizer em uma imagem + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # predizer com modelo oficial + yolo detect predict model=caminho/para/best.pt source='https://ultralytics.com/images/bus.jpg' # predizer com modelo personalizado + ``` + +Veja os detalhes completos do modo `predict` na página [Predição](https://docs.ultralytics.com/modes/predict/). + +## Exportar + +Exporte um modelo YOLOv8n para um formato diferente, como ONNX, CoreML, etc. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n.pt') # carregar um modelo oficial + model = YOLO('caminho/para/best.pt') # carregar um modelo treinado personalizado + + # Exportar o modelo + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # exportar modelo oficial + yolo export model=caminho/para/best.pt format=onnx # exportar modelo treinado personalizado + ``` + +Os formatos de exportação YOLOv8 disponíveis estão na tabela abaixo. Você pode fazer predições ou validar diretamente em modelos exportados, ou seja, `yolo predict model=yolov8n.onnx`. Exemplos de uso são mostrados para o seu modelo após a exportação ser concluída. + +| Formato | Argumento `format` | Modelo | Metadados | Argumentos | +|--------------------------------------------------------------------|--------------------|---------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +Veja os detalhes completos de `exportar` na página [Exportação](https://docs.ultralytics.com/modes/export/). diff --git a/docs/pt/tasks/index.md b/docs/pt/tasks/index.md new file mode 100644 index 0000000..209cac7 --- /dev/null +++ b/docs/pt/tasks/index.md @@ -0,0 +1,55 @@ +--- +comments: true +description: Aprenda sobre as principais tarefas de visão computacional que o YOLOv8 pode realizar, incluindo detecção, segmentação, classificação e estimativa de pose. Entenda seus usos em seus projetos de IA. +keywords: Ultralytics, YOLOv8, Detecção, Segmentação, Classificação, Estimativa de Pose, Framework de IA, Tarefas de Visão Computacional +--- + +# Tarefas do Ultralytics YOLOv8 + +
+Tarefas suportadas pelo Ultralytics YOLO + +YOLOv8 é um framework de IA que suporta múltiplas tarefas de **visão computacional**. O framework pode ser usado para realizar [detecção](detect.md), [segmentação](segment.md), [classificação](classify.md) e estimativa de [pose](pose.md). Cada uma dessas tarefas tem um objetivo e caso de uso diferente. + +!!! Note "Nota" + + 🚧 Nossa documentação multilíngue está atualmente em construção e estamos trabalhando para aprimorá-la. Agradecemos sua paciência! 🙏 + +

+
+ +
+ Assista: Explore as Tarefas do Ultralytics YOLO: Detecção de Objetos, Segmentação, Rastreamento e Estimativa de Pose. +

+ +## [Detecção](detect.md) + +A detecção é a principal tarefa suportada pelo YOLOv8. Envolve detectar objetos em uma imagem ou quadro de vídeo e desenhar caixas delimitadoras ao redor deles. Os objetos detectados são classificados em diferentes categorias com base em suas características. YOLOv8 pode detectar múltiplos objetos em uma única imagem ou quadro de vídeo com alta precisão e velocidade. + +[Exemplos de Detecção](detect.md){ .md-button } + +## [Segmentação](segment.md) + +Segmentação é uma tarefa que envolve segmentar uma imagem em diferentes regiões com base no conteúdo da imagem. Cada região recebe um rótulo com base em seu conteúdo. Essa tarefa é útil em aplicações como segmentação de imagens e imagiologia médica. YOLOv8 usa uma variante da arquitetura U-Net para realizar a segmentação. + +[Exemplos de Segmentação](segment.md){ .md-button } + +## [Classificação](classify.md) + +Classificação é uma tarefa que envolve classificar uma imagem em diferentes categorias. YOLOv8 pode ser usado para classificar imagens com base em seu conteúdo. Utiliza uma variante da arquitetura EfficientNet para realizar a classificação. + +[Exemplos de Classificação](classify.md){ .md-button } + +## [Pose](pose.md) + +A detecção de pose/pontos-chave é uma tarefa que envolve detectar pontos específicos em uma imagem ou quadro de vídeo. Esses pontos são chamados de keypoints e são usados para rastrear movimento ou estimar poses. YOLOv8 pode detectar keypoints em uma imagem ou quadro de vídeo com alta precisão e velocidade. + +[Exemplos de Pose](pose.md){ .md-button } + +## Conclusão + +YOLOv8 suporta múltiplas tarefas, incluindo detecção, segmentação, classificação e detecção de keypoints. Cada uma dessas tarefas tem objetivos e casos de uso diferentes. Ao entender as diferenças entre essas tarefas, você pode escolher a tarefa apropriada para sua aplicação de visão computacional. diff --git a/docs/pt/tasks/pose.md b/docs/pt/tasks/pose.md new file mode 100644 index 0000000..5bb4505 --- /dev/null +++ b/docs/pt/tasks/pose.md @@ -0,0 +1,188 @@ +--- +comments: true +description: Aprenda a usar o Ultralytics YOLOv8 para tarefas de estimativa de pose. Encontre modelos pré-treinados, aprenda a treinar, validar, prever e exportar seu próprio modelo. +keywords: Ultralytics, YOLO, YOLOv8, estimativa de pose, detecção de pontos-chave, detecção de objetos, modelos pré-treinados, aprendizado de máquina, inteligência artificial +--- + +# Estimativa de Pose + +Exemplos de estimativa de pose + +A estimativa de pose é uma tarefa que envolve identificar a localização de pontos específicos em uma imagem, geralmente referidos como pontos-chave. Os pontos-chave podem representar várias partes do objeto como articulações, pontos de referência ou outras características distintas. As localizações dos pontos-chave são geralmente representadas como um conjunto de coordenadas 2D `[x, y]` ou 3D `[x, y, visível]`. + +A saída de um modelo de estimativa de pose é um conjunto de pontos que representam os pontos-chave em um objeto na imagem, geralmente junto com os escores de confiança para cada ponto. A estimativa de pose é uma boa escolha quando você precisa identificar partes específicas de um objeto em uma cena, e sua localização relativa entre si. + +

+
+ +
+ Assista: Estimativa de Pose com Ultralytics YOLOv8. +

+ +!!! Tip "Dica" + + Modelos YOLOv8 _pose_ usam o sufixo `-pose`, isto é `yolov8n-pose.pt`. Esses modelos são treinados no conjunto de dados [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) e são adequados para uma variedade de tarefas de estimativa de pose. + +## [Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Os modelos YOLOv8 Pose pré-treinados são mostrados aqui. Os modelos Detect, Segment e Pose são pré-treinados no conjunto de dados [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), enquanto os modelos Classify são pré-treinados no conjunto de dados [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +[Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) são baixados automaticamente do último lançamento da Ultralytics [release](https://github.com/ultralytics/assets/releases) no primeiro uso. + +| Modelo | tamanho
(pixels) | mAPpose
50-95 | mAPpose
50 | Velocidade
CPU ONNX
(ms) | Velocidade
A100 TensorRT
(ms) | parâmetros
(M) | FLOPs
(B) | +|------------------------------------------------------------------------------------------------------|--------------------------|-----------------------|--------------------|-------------------------------------|------------------------------------------|------------------------|-------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +- **mAPval** valores são para um único modelo em escala única no conjunto de dados [COCO Keypoints val2017](http://cocodataset.org) + . +
Reproduza `yolo val pose data=coco-pose.yaml device=0` +- **Velocidade** média em imagens COCO val usando uma instância [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + . +
Reproduza `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` + +## Treinar + +Treine um modelo YOLOv8-pose no conjunto de dados COCO128-pose. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-pose.yaml') # construir um novo modelo a partir do YAML + model = YOLO('yolov8n-pose.pt') # carregar um modelo pré-treinado (recomendado para treinamento) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # construir a partir do YAML e transferir pesos + + # Treinar o modelo + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Construir um novo modelo a partir do YAML e começar o treinamento do zero + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # Começar treinamento de um modelo *.pt pré-treinado + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # Construir um novo modelo a partir do YAML, transferir pesos pré-treinados para ele e começar o treinamento + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### Formato do conjunto de dados + +O formato do conjunto de dados de pose YOLO pode ser encontrado em detalhes no [Guia de Conjuntos de Dados](../../../datasets/pose/index.md). Para converter seu conjunto de dados existente de outros formatos (como COCO etc.) para o formato YOLO, por favor, use a ferramenta [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) da Ultralytics. + +## Validar + +Valide a acurácia do modelo YOLOv8n-pose treinado no conjunto de dados COCO128-pose. Não é necessário passar nenhum argumento, pois o `model` +retém seus `data` de treinamento e argumentos como atributos do modelo. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-pose.pt') # carregar um modelo oficial + model = YOLO('caminho/para/melhor.pt') # carregar um modelo personalizado + + # Validar o modelo + metrics = model.val() # nenhum argumento necessário, conjunto de dados e configurações lembradas + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # uma lista contém map50-95 de cada categoria + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # validar modelo oficial + yolo pose val model=caminho/para/melhor.pt # validar modelo personalizado + ``` + +## Prever + +Use um modelo YOLOv8n-pose treinado para executar previsões em imagens. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-pose.pt') # carregar um modelo oficial + model = YOLO('caminho/para/melhor.pt') # carregar um modelo personalizado + + # Prever com o modelo + results = model('https://ultralytics.com/images/bus.jpg') # prever em uma imagem + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # prever com modelo oficial + yolo pose predict model=caminho/para/melhor.pt source='https://ultralytics.com/images/bus.jpg' # prever com modelo personalizado + ``` + +Veja detalhes completos do modo `predict` na página [Prever](https://docs.ultralytics.com/modes/predict/). + +## Exportar + +Exporte um modelo YOLOv8n Pose para um formato diferente como ONNX, CoreML, etc. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-pose.pt') # carregar um modelo oficial + model = YOLO('caminho/para/melhor.pt') # carregar um modelo treinado personalizado + + # Exportar o modelo + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # exportar modelo oficial + yolo export model=caminho/para/melhor.pt format=onnx # exportar modelo treinado personalizado + ``` + +Os formatos de exportação YOLOv8-pose disponíveis estão na tabela abaixo. Você pode prever ou validar diretamente em modelos exportados, ou seja, `yolo predict model=yolov8n-pose.onnx`. Exemplos de uso são mostrados para o seu modelo após a conclusão da exportação. + +| Formato | Argumento `format` | Modelo | Metadados | Argumentos | +|--------------------------------------------------------------------|--------------------|--------------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half` | + +Veja detalhes completos da `exportação` na página [Exportar](https://docs.ultralytics.com/modes/export/). diff --git a/docs/pt/tasks/segment.md b/docs/pt/tasks/segment.md new file mode 100644 index 0000000..958b4b7 --- /dev/null +++ b/docs/pt/tasks/segment.md @@ -0,0 +1,188 @@ +--- +comments: true +description: Aprenda a usar modelos de segmentação de instâncias com o Ultralytics YOLO. Instruções sobre treinamento, validação, previsão de imagem e exportação de modelo. +keywords: yolov8, segmentação de instâncias, Ultralytics, conjunto de dados COCO, segmentação de imagem, detecção de objeto, treinamento de modelo, validação de modelo, previsão de imagem, exportação de modelo +--- + +# Segmentação de Instâncias + +Exemplos de segmentação de instâncias + +A segmentação de instâncias vai além da detecção de objetos e envolve a identificação de objetos individuais em uma imagem e a sua segmentação do resto da imagem. + +A saída de um modelo de segmentação de instâncias é um conjunto de máscaras ou contornos que delineiam cada objeto na imagem, juntamente com rótulos de classe e pontuações de confiança para cada objeto. A segmentação de instâncias é útil quando você precisa saber não apenas onde os objetos estão em uma imagem, mas também qual é a forma exata deles. + +

+
+ +
+ Assista: Executar Segmentação com o Modelo Treinado Ultralytics YOLOv8 em Python. +

+ +!!! Tip "Dica" + + Modelos YOLOv8 Segment usam o sufixo `-seg`, ou seja, `yolov8n-seg.pt` e são pré-treinados no [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Os modelos Segment pré-treinados do YOLOv8 estão mostrados aqui. Os modelos Detect, Segment e Pose são pré-treinados no conjunto de dados [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), enquanto os modelos Classify são pré-treinados no conjunto de dados [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +[Modelos](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) são baixados automaticamente do último lançamento da Ultralytics [release](https://github.com/ultralytics/assets/releases) na primeira utilização. + +| Modelo | Tamanho
(pixels) | mAPbox
50-95 | mAPmáscara
50-95 | Velocidade
CPU ONNX
(ms) | Velocidade
A100 TensorRT
(ms) | Parâmetros
(M) | FLOPs
(B) | +|----------------------------------------------------------------------------------------------|--------------------------|----------------------|--------------------------|-------------------------------------|------------------------------------------|------------------------|-------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- Os valores de **mAPval** são para um único modelo em uma única escala no conjunto de dados [COCO val2017](http://cocodataset.org). +
Reproduza por meio de `yolo val segment data=coco.yaml device=0` +- **Velocidade** média em imagens COCO val usando uma instância [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
Reproduza por meio de `yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu` + +## Treinar + +Treine o modelo YOLOv8n-seg no conjunto de dados COCO128-seg por 100 épocas com tamanho de imagem 640. Para uma lista completa de argumentos disponíveis, consulte a página [Configuração](/../usage/cfg.md). + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-seg.yaml') # construir um novo modelo a partir do YAML + model = YOLO('yolov8n-seg.pt') # carregar um modelo pré-treinado (recomendado para treinamento) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # construir a partir do YAML e transferir os pesos + + # Treinar o modelo + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Construir um novo modelo a partir do YAML e começar o treinamento do zero + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # Começar o treinamento a partir de um modelo *.pt pré-treinado + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # Construir um novo modelo a partir do YAML, transferir pesos pré-treinados para ele e começar o treinamento + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### Formato do conjunto de dados + +O formato do conjunto de dados de segmentação YOLO pode ser encontrado em detalhes no [Guia de Conjuntos de Dados](../../../datasets/segment/index.md). Para converter seu conjunto de dados existente de outros formatos (como COCO etc.) para o formato YOLO, utilize a ferramenta [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) da Ultralytics. + +## Val + +Valide a acurácia do modelo YOLOv8n-seg treinado no conjunto de dados COCO128-seg. Não é necessário passar nenhum argumento, pois o `modelo` retém seus `dados` de treino e argumentos como atributos do modelo. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-seg.pt') # carregar um modelo oficial + model = YOLO('path/to/best.pt') # carregar um modelo personalizado + + # Validar o modelo + metrics = model.val() # sem necessidade de argumentos, conjunto de dados e configurações são lembrados + metrics.box.map # map50-95(B) + metrics.box.map50 # map50(B) + metrics.box.map75 # map75(B) + metrics.box.maps # uma lista contendo map50-95(B) de cada categoria + metrics.seg.map # map50-95(M) + metrics.seg.map50 # map50(M) + metrics.seg.map75 # map75(M) + metrics.seg.maps # uma lista contendo map50-95(M) de cada categoria + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # val modelo oficial + yolo segment val model=path/to/best.pt # val modelo personalizado + ``` + +## Prever + +Use um modelo YOLOv8n-seg treinado para realizar previsões em imagens. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-seg.pt') # carregar um modelo oficial + model = YOLO('path/to/best.pt') # carregar um modelo personalizado + + # Realizar previsão com o modelo + results = model('https://ultralytics.com/images/bus.jpg') # prever em uma imagem + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # previsão com modelo oficial + yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # previsão com modelo personalizado + ``` + +Veja detalhes completos do modo `predict` na página [Prever](https://docs.ultralytics.com/modes/predict/). + +## Exportar + +Exporte um modelo YOLOv8n-seg para um formato diferente como ONNX, CoreML, etc. + +!!! Example "Exemplo" + + === "Python" + + ```python + from ultralytics import YOLO + + # Carregar um modelo + model = YOLO('yolov8n-seg.pt') # carregar um modelo oficial + model = YOLO('path/to/best.pt') # carregar um modelo treinado personalizado + + # Exportar o modelo + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # exportar modelo oficial + yolo export model=path/to/best.pt format=onnx # exportar modelo treinado personalizado + ``` + +Os formatos de exportação disponíveis para YOLOv8-seg estão na tabela abaixo. Você pode prever ou validar diretamente em modelos exportados, ou seja, `yolo predict model=yolov8n-seg.onnx`. Exemplos de uso são mostrados para o seu modelo após a conclusão da exportação. + +| Formato | Argumento `format` | Modelo | Metadados | Argumentos | +|--------------------------------------------------------------------|--------------------|-------------------------------|-----------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half` | + +Veja detalhes completos da `exportação` na página [Exportar](https://docs.ultralytics.com/modes/export/). diff --git a/docs/ru/index.md b/docs/ru/index.md new file mode 100644 index 0000000..aac4406 --- /dev/null +++ b/docs/ru/index.md @@ -0,0 +1,82 @@ +--- +comments: true +description: Изучите полное руководство по Ultralytics YOLOv8, модели обнаружения объектов и сегментации изображений с высокой скоростью и точностью. Учебники по установке, предсказаниям, тренировке и многое другое. +keywords: Ultralytics, YOLOv8, обнаружение объектов, сегментация изображений, машинное обучение, глубокое обучение, компьютерное зрение, установка YOLOv8, предсказание YOLOv8, тренировка YOLOv8, история YOLO, лицензии YOLO +--- + +
+

+ + Ultralytics YOLO banner +

+ GitHub Ultralytics + space + LinkedIn Ultralytics + space + Twitter Ultralytics + space + YouTube Ultralytics + space + TikTok Ultralytics + space + Instagram Ultralytics + space + Discord Ultralytics +
+
+ CI Ultralytics + Покрытие кода Ultralytics + Цитирование YOLOv8 + Загрузки Docker +
+ Запустить на Gradient + Открыть в Colab + Открыть в Kaggle +
+ +Представляем [Ultralytics](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics) — последнюю версию знаменитой модели для обнаружения объектов в реальном времени и сегментации изображений. YOLOv8 основан на передовом прогрессе в области глубокого обучения и компьютерного зрения и предлагает непревзойденную производительность в отношении скорости и точности. Его оптимизированная конструкция делает его подходящим для различных приложений и легко адаптируемым к различным аппаратным платформам, от устройств на краю сети до облачных API. + +Исследуйте документацию YOLOv8 — обширный ресурс, разработанный, чтобы помочь вам понять и использовать его функции и возможности. Независимо от того, являетесь ли вы опытным практиком машинного обучения или новичком в этой области, этот центр ресурсов нацелен на максимальное раскрытие потенциала YOLOv8 в ваших проектах. + +!!! Note "Заметка" + + 🚧 Наша многоязычная документация в настоящее время находится в стадии разработки, и мы усердно работаем над ее улучшением. Спасибо за ваше терпение! 🙏 + +## С чего начать + +- **Установите** `ultralytics` с помощью pip и приступайте к работе за несколько минут   [:material-clock-fast: Начать работу](quickstart.md){ .md-button } +- **Предскажите** новые изображения и видео с помощью YOLOv8   [:octicons-image-16: Предсказания для изображений](modes/predict.md){ .md-button } +- **Тренируйте** новую модель YOLOv8 на своих собственных наборах данных   [:fontawesome-solid-brain: Тренировать модель](modes/train.md){ .md-button } +- **Исследуйте** задачи YOLOv8, такие как сегментация, классификация, оценка позы и отслеживание   [:material-magnify-expand: Исследовать задачи](tasks/index.md){ .md-button } + +

+
+ +
+ Смотрите: Как тренировать модель YOLOv8 на своем пользовательском наборе данных в Google Colab. +

+ +## YOLO: Краткая история + +[YOLO](https://arxiv.org/abs/1506.02640) (You Only Look Once — «Смотришь только один раз»), популярная модель для обнаружения объектов и сегментации изображений, была разработана Джозефом Редмоном и Али Фархади из Вашингтонского университета. Запущенная в 2015 году, YOLO быстро обрела популярность благодаря своей высокой скорости и точности. + +- [YOLOv2](https://arxiv.org/abs/1612.08242), выпущенная в 2016 году, улучшила оригинальную модель, включив в себя пакетную нормализацию, якорные окна и кластеры размеров. +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf), запущенная в 2018 году, дополнительно улучшила производительность модели, используя более эффективную основную сеть, множество якорей и пирамидальное пространственное пулинг. +- [YOLOv4](https://arxiv.org/abs/2004.10934) была выпущена в 2020 году, представив такие инновации, как увеличение данных Mosaic, новую свободную от якорьной голову детектирования и новую функцию потерь. +- [YOLOv5](https://github.com/ultralytics/yolov5) дальше улучшила производительность модели и добавила новые функции, такие как оптимизация гиперпараметров, интегрированнное отслеживание экспериментов и автоматический экспорт в популярные форматы. +- [YOLOv6](https://github.com/meituan/YOLOv6) была опубликована в открытом доступе компанией [Meituan](https://about.meituan.com/) в 2022 году и используется во многих автономных роботах доставки компании. +- [YOLOv7](https://github.com/WongKinYiu/yolov7) добавила дополнительные задачи, такие как оценка позы по набору данных ключевых точек COCO. +- [YOLOv8](https://github.com/ultralytics/ultralytics) — последняя версия YOLO от Ultralytics. Будучи передовой, современной моделью, YOLOv8 основана на успехе предыдущих версий, внедряя новые функции и усовершенствования для повышения производительности, гибкости и эффективности. YOLOv8 поддерживает полный спектр задач в области компьютерного зрения, включая [обнаружение](tasks/detect.md), [сегментацию](tasks/segment.md), [оценку позы](tasks/pose.md), [отслеживание](modes/track.md) и [классификацию](tasks/classify.md). Это многосторонность позволяет пользователям использовать возможности YOLOv8 в самых разнообразных приложениях и сферах деятельности. + +## Лицензии YOLO: Как лицензируется YOLO от Ultralytics? + +Ultralytics предлагает два варианта лицензирования для удовлетворения разнообразных сценариев использования: + +- **Лицензия AGPL-3.0**: Эта одобренная [OSI](https://opensource.org/licenses/) лицензия с открытым исходным кодом идеально подходит для студентов и энтузиастов, способствуя открытому сотрудничеству и обмену знаниями. Подробную информацию смотрите в файле [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE). +- **Корпоративная лицензия**: Разработанная для коммерческого использования, эта лицензия позволяет беспрепятственно интегрировать программное обеспечение и AI-модели Ultralytics в товары и услуги коммерческого назначения, обходя требования открытого исходного кода AGPL-3.0. Если ваш сценарий включает внедрение наших решений в коммерческое предложение, обратитесь через [Лицензирование Ultralytics](https://ultralytics.com/license). + +Наша стратегия лицензирования разработана, чтобы обеспечить возврат усовершенствований наших проектов с открытым исходным кодом обществу. Мы твердо привержены принципам открытого исходного кода ❤️ и наша миссия заключается в гарантировании того, что наши вклады могут быть использованы и расширены таким образом, который будет полезен для всех. diff --git a/docs/ru/models/fast-sam.md b/docs/ru/models/fast-sam.md new file mode 100644 index 0000000..4756718 --- /dev/null +++ b/docs/ru/models/fast-sam.md @@ -0,0 +1,193 @@ +--- +comments: true +description: Исследуйте FastSAM, базирующуюся на CNN сегментацию объектов в реальном времени на изображениях. Улучшенное взаимодействие с пользователем, высокая вычислительная эффективность и применимость к различным задачам компьютерного зрения. +keywords: FastSAM, машинное обучение, решение на базе CNN, сегментация объектов, решение в реальном времени, Ultralytics, задачи компьютерного зрения, обработка изображений, применение в промышленности, пользовательское взаимодействие +--- + +# Модель Fast Segment Anything (FastSAM) + +Модель Fast Segment Anything (FastSAM) - это новаторское решение на базе CNN, предназначенное для решения задачи сегментации объектов в реальном времени. Эта задача разработана для сегментации любого объекта на изображении на основе различных возможных запросов пользователя. FastSAM значительно снижает вычислительные требования, при этом сохраняя конкурентоспособность работы, что делает ее практически подходящей для различных задач компьютерного зрения. + +![Обзор архитектуры модели Fast Segment Anything (FastSAM)](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## Обзор + +FastSAM разработана для преодоления ограничений [модели Segment Anything (SAM)](sam.md), тяжелой модели Transformer, требующей значительных вычислительных ресурсов. FastSAM разделяет задачу сегментации объектов на два последовательных этапа: сегментация всех экземпляров и выбор, основанный на запросах пользователя. На первом этапе используется [YOLOv8-seg](../tasks/segment.md) для создания сегментационных масок всех экземпляров на изображении. На втором этапе модель выводит область интереса, соответствующую запросу. + +## Основные особенности + +1. **Решение в реальном времени:** Благодаря эффективности вычислений на базе CNN, FastSAM обеспечивает решение задачи сегментации объектов в реальном времени, что делает ее ценной для применения в промышленных приложениях, требующих быстрых результатов. + +2. **Эффективность и производительность:** FastSAM обеспечивает значительное снижение вычислительных и ресурсных требований, не ухудшая качество работы. Она достигает сопоставимой производительности с моделью SAM, но требует значительно меньше вычислительных ресурсов, что позволяет использовать ее в реальном времени. + +3. **Сегментация на основе запросов пользователя:** FastSAM может выполнять сегментацию любого объекта на изображении, основываясь на различных возможных запросах пользователя, что обеспечивает гибкость и приспособляемость к различным сценариям. + +4. **Основана на YOLOv8-seg:** FastSAM основана на модели [YOLOv8-seg](../tasks/segment.md), которая является детектором объектов с ветвью сегментации экземпляров. Это позволяет ей эффективно создавать сегментационные маски всех экземпляров на изображении. + +5. **Высокие результаты на показателях:** При выполнении задачи предложения объектов на наборе данных MS COCO FastSAM достигает высоких показателей производительности при значительно большей скорости работы, чем [SAM](sam.md) на одном графическом процессоре NVIDIA RTX 3090, что свидетельствует о ее эффективности и способности. + +6. **Практическое применение:** Предложенный подход предоставляет новое практическое решение для большого количества задач компьютерного зрения с очень высокой скоростью, в десятки или сотни раз превышающей скорость существующих методов. + +7. **Возможность сжатия модели:** FastSAM демонстрирует возможность существенно снизить вычислительные затраты, введя искусственное преимущество в структуру модели, открывая новые возможности для создания крупномасштабных архитектур моделей для общих задач компьютерного зрения. + +## Доступные модели, поддерживаемые задачи и режимы работы + +В следующей таблице представлены доступные модели с их конкретными заранее обученными весами, поддерживаемые задачи и совместимость с различными режимами работы, такими как [Вывод](../modes/predict.md), [Валидация](../modes/val.md), [Обучение](../modes/train.md) и [Экспорт](../modes/export.md), обозначенные значками ✅ для поддерживаемых режимов и значками ❌ для неподдерживаемых режимов. + +| Тип модели | Заранее обученные веса | Поддерживаемые задачи | Вывод | Валидация | Обучение | Экспорт | +|------------|------------------------|------------------------------------------------|-------|-----------|----------|---------| +| FastSAM-s | `FastSAM-s.pt` | [Сегментация экземпляров](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| FastSAM-x | `FastSAM-x.pt` | [Сегментация экземпляров](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Примеры использования + +Модели FastSAM легко интегрировать в ваши приложения на Python. Ultralytics предоставляет удобный пользовательский интерфейс API и команды CLI для упрощения разработки. + +### Использование для предсказаний + +Для выполнения обнаружения объектов на изображении используйте метод `predict`, как показано ниже: + +!!! Example "Пример" + + === "Python" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # Определение исхода вывода + source = 'путь/к/фото_автобуса.jpg' + + # Создание модели FastSAM + model = FastSAM('FastSAM-s.pt') # или FastSAM-x.pt + + # Выполнение вывода на изображение + результаты = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # Создание объекта Prompt Process + prompt_process = FastSAMPrompt(source, результаты, device='cpu') + + # Вывод всего + ann = prompt_process.everything_prompt() + + # Прямоугольная область по умолчанию [0,0,0,0] -> [x1,y1,x2,y2] + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # Текстовый запрос + ann = prompt_process.text_prompt(text='фотография собаки') + + # Запрос точки + # Точки по умолчанию [[0,0]] [[x1,y1],[x2,y2]] + # метка точки по умолчанию [0] [1,0] 0:фон, 1:передний план + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` + + === "CLI" + ```bash + # Загрузка модели FastSAM и сегментация всего объекта на нем + yolo segment predict model=FastSAM-s.pt source=путь/к/фото_автобуса.jpg imgsz=640 + ``` + +В этом фрагменте кода демонстрируется простота загрузки предобученной модели и выполнения предсказаний на изображении. + +### Использование для валидации + +Валидацию модели на наборе данных можно выполнить следующим образом: + +!!! Example "Пример" + + === "Python" + ```python + from ultralytics import FastSAM + + # Создание модели FastSAM + model = FastSAM('FastSAM-s.pt') # или FastSAM-x.pt + + # Валидация модели + результаты = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # Загрузка модели FastSAM и ее валидация на примере набора данных COCO8 при размере изображения 640 + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +Пожалуйста, обратите внимание, что FastSAM поддерживает только обнаружение и сегментацию единственного класса объектов. Это означает, что модель будет распознавать и сегментировать все объекты как один и тот же класс. Поэтому при подготовке набора данных вам нужно преобразовать все идентификаторы категорий объектов в 0. + +## Официальное использование FastSAM + +FastSAM также доступна непосредственно из репозитория [https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM). Вот краткий обзор типичных шагов для использования FastSAM: + +### Установка + +1. Клонируйте репозиторий FastSAM: + ```shell + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. Создайте и активируйте виртуальное окружение Conda с Python 3.9: + ```shell + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. Перейдите в каталог склонированного репозитория и установите требуемые пакеты: + ```shell + cd FastSAM + pip install -r requirements.txt + ``` + +4. Установите модель CLIP: + ```shell + pip install git+https://github.com/openai/CLIP.git + ``` + +### Пример использования + +1. Скачайте [файл контрольной точки модели](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing). + +2. Используйте модель FastSAM для выполнения вывода. Примеры команд: + + - Сегментация всего объекта на изображении: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - Сегментация определенных объектов с использованием текстового запроса: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "желтая собака" + ``` + + - Сегментация объектов в пределах ограничивающей рамки (укажите координаты рамки в формате xywh): + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - Сегментация объектов, находящихся близко к определенным точкам: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +Кроме того, вы можете опробовать FastSAM с помощью [демонстрационного ноутбука Colab](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing) или на [веб-демо HuggingFace](https://huggingface.co/spaces/An-619/FastSAM) для визуального опыта. + +## Цитирование и благодарности + +Мы хотели бы выразить благодарность авторам FastSAM за их значительный вклад в области сегментации объектов в реальном времени: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Оригинальная статья FastSAM доступна на [arXiv](https://arxiv.org/abs/2306.12156). Авторы сделали свою работу общедоступной, и код можно получить на [GitHub](https://github.com/CASIA-IVA-Lab/FastSAM). Мы ценим их усилия по развитию отрасли и доступу к их работе для широкого круга пользователей. diff --git a/docs/ru/models/index.md b/docs/ru/models/index.md new file mode 100644 index 0000000..43fd65b --- /dev/null +++ b/docs/ru/models/index.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Исследуйте разнообразный спектр поддерживаемых Ultralytics моделей семейства YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS и RT-DETR. Начните работу с примерами использования как для CLI, так и для Python. +keywords: Ultralytics, документация, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, модели, архитектуры, Python, CLI +--- + +# Поддерживаемые модели Ultralytics + +Добро пожаловать в документацию по моделям Ultralytics! Мы поддерживаем широкий спектр моделей, каждая из которых адаптирована для конкретных задач, таких как [обнаружение объектов](../tasks/detect.md), [сегментация на уровне инстанций](../tasks/segment.md), [классификация изображений](../tasks/classify.md), [оценка позы](../tasks/pose.md) и [слежение за несколькими объектами](../modes/track.md). Если вы заинтересованы в добавлении архитектуры своей модели в Ultralytics, ознакомьтесь с нашим [Руководством для участников](../../help/contributing.md). + +!!! Note "Примечание" + + 🚧 Наша документация на разных языках находится в стадии разработки, и мы усердно работаем над ее улучшением. Спасибо за ваше терпение! 🙏 + +## Особенные модели + +Вот некоторые ключевые поддерживаемые модели: + +1. **[YOLOv3](yolov3.md)**: Третье поколение семейства моделей YOLO, авторства Джозефа Редмона, известное своей эффективностью в реальном времени для обнаружения объектов. +2. **[YOLOv4](yolov4.md)**: Нативное для darknet обновление YOLOv3, выпущенное Алексеем Бочковским в 2020 году. +3. **[YOLOv5](yolov5.md)**: Улучшенная версия архитектуры YOLO от Ultralytics, предлагающая лучшие компромиссы производительности и скорости по сравнению с предыдущими версиями. +4. **[YOLOv6](yolov6.md)**: Выпущенная в 2022 году компанией [Meituan](https://about.meituan.com/) и используемая во многих роботах автономной доставки компании. +5. **[YOLOv7](yolov7.md)**: Обновленные модели YOLO, выпущенные в 2022 году авторами YOLOv4. +6. **[YOLOv8](yolov8.md) НОВИНКА 🚀**: Последняя версия семейства YOLO, обладающая расширенными возможностями, такими как сегментация на уровне инстанций, оценка позы/ключевых точек и классификация. +7. **[Segment Anything Model (SAM)](sam.md)**: Модель сегментации всего и вся (SAM) от Meta. +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**: MobileSAM для мобильных приложений от университета Kyung Hee. +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)**: FastSAM от Группы анализа изображений и видео, Института автоматики, Китайской академии наук. +10. **[YOLO-NAS](yolo-nas.md)**: Модели нейронной архитектуры поиска YOLO (NAS). +11. **[Realtime Detection Transformers (RT-DETR)](rtdetr.md)**: Модели трансформеров реального времени для обнаружения объектов (RT-DETR) от Baidu PaddlePaddle. + +

+
+ +
+ Смотрите: Запуск моделей YOLO от Ultralytics всего в несколько строк кода. +

+ +## Начало работы: Примеры использования + +Этот пример предоставляет простые примеры обучения и вывода для YOLO. Полная документация по этим и другим [режимам](../modes/index.md) представлена на страницах документации [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) и [Export](../modes/export.md). + +Обратите внимание, что ниже приведен пример для моделей [Detect](../tasks/detect.md) YOLOv8 для обнаружения объектов. Для дополнительных поддерживаемых задач смотрите документацию по [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) и [Pose](../tasks/pose.md). + +!!! Example "Пример" + + === "Python" + + Предобученные модели PyTorch `*.pt`, а также конфигурационные файлы `*.yaml` могут быть переданы в классы `YOLO()`, `SAM()`, `NAS()` и `RTDETR()`, чтобы создать экземпляр модели на Python: + + ```python + from ultralytics import YOLO + + # Загрузить предварительно обученную модель YOLOv8n для COCO + model = YOLO('yolov8n.pt') + + # Отобразить информацию о модели (необязательно) + model.info() + + # Обучить модель на примерном наборе данных COCO8 в течение 100 эпох + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Запустить вывод с помощью модели YOLOv8n на изображении 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Команды CLI доступны для непосредственного запуска моделей: + + ```bash + # Загрузить предварительно обученную модель YOLOv8n для COCO и обучить её на примерном наборе данных COCO8 в течение 100 эпох + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Загрузить предварительно обученную модель YOLOv8n для COCO и запустить вывод на изображении 'bus.jpg' + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## Вклад в новые модели + +Заинтересованы в том, чтобы внести свою модель в Ultralytics? Отлично! Мы всегда открыты для расширения нашего портфолио моделей. + +1. **Сделайте Fork Репозитория**: Начните с создания форка [репозитория Ultralytics на GitHub](https://github.com/ultralytics/ultralytics). + +2. **Склонируйте свой Fork**: Склонируйте ваш форк на локальную машину и создайте новую ветку для работы. + +3. **Реализуйте свою Модель**: Добавьте вашу модель, следуя стандартам программирования и руководящим принципам, указанным в нашем [Руководстве для участников](../../help/contributing.md). + +4. **Тщательно протестируйте**: Убедитесь, что вы тщательно протестировали свою модель, как изолированно, так и как часть пайплайна. + +5. **Создайте Pull Request**: Как только вы будете удовлетворены своей моделью, создайте pull request в основной репозиторий для рассмотрения. + +6. **Код-ревью и Слияние**: После рассмотрения, если ваша модель соответствует нашим критериям, она будет объединена с основным репозиторием. + +Для подробных инструкций см. наше [Руководство для участников](../../help/contributing.md). diff --git a/docs/ru/models/mobile-sam.md b/docs/ru/models/mobile-sam.md new file mode 100644 index 0000000..906ec88 --- /dev/null +++ b/docs/ru/models/mobile-sam.md @@ -0,0 +1,116 @@ +--- +comments: true +description: Узнайте больше о MobileSAM, его реализации, сравнении с оригинальным SAM и о том, как его загрузить и протестировать в фреймворке Ultralytics. Улучшите свои мобильные приложения уже сегодня. +keywords: MobileSAM, Ultralytics, SAM, мобильные приложения, Arxiv, GPU, API, кодировщик изображений, декодер масок, загрузка модели, метод тестирования +--- + +![Логотип MobileSAM](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# Mobile Segment Anything (MobileSAM) + +Теперь доступна статья MobileSAM в [архиве arXiv](https://arxiv.org/pdf/2306.14289.pdf). + +Демонстрацию работы MobileSAM на CPU можно найти по этой [ссылке](https://huggingface.co/spaces/dhkim2810/MobileSAM). Время выполнения на Mac i5 CPU составляет примерно 3 секунды. В демонстрационной версии Hugging Face интерфейс и менее производительные процессоры CPU могут вызывать замедление работы, но она продолжает работать эффективно. + +MobileSAM реализован в различных проектах, включая [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything), [AnyLabeling](https://github.com/vietanhdev/anylabeling) и [Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D). + +MobileSAM обучается на одном графическом процессоре (GPU) со 100 тысячами данных (1% от оригинальных изображений) за менее чем день. Код для обучения будет доступен в будущем. + +## Доступные модели, поддерживаемые задачи и режимы работы + +В таблице представлены доступные модели с соответствующими предварительно обученными весами, поддерживаемыми задачами и их совместимостью с различными режимами работы, такими как [вывод](../modes/predict.md), [валидация](../modes/val.md), [тренировка](../modes/train.md) и [экспорт](../modes/export.md), указанными с помощью эмодзи ✅ для поддерживаемых режимов и эмодзи ❌ для неподдерживаемых. + +| Тип модели | Предварительно обученные веса | Поддерживаемые задачи | Вывод | Валидация | Тренировка | Экспорт | +|------------|-------------------------------|------------------------------------------------|-------|-----------|------------|---------| +| MobileSAM | `mobile_sam.pt` | [Сегментация экземпляров](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Переход от SAM к MobileSAM + +Поскольку MobileSAM сохраняет ту же самую последовательность операций, что и оригинальный SAM, в него интегрированы все интерфейсы для предобработки, постобработки и прочие интерфейсы. В результате, пользователи, уже использующие оригинальный SAM, могут легко перейти на MobileSAM. + +MobileSAM работает сравнимо с оригинальным SAM и имеет ту же самую последовательность операций, за исключением изменения кодировщика изображений. В частности, мы заменяем оригинальный "тяжёлый" кодировщик изображений ViT-H (632M) на более компактный Tiny-ViT (5M). На одном графическом процессоре MobileSAM работает примерно за 12 мс на изображение: 8 мс на кодировщик изображений и 4 мс на декодер масок. + +В таблице представлено сравнение кодировщиков изображений на базе ViT: + +| Кодировщик изображений | Оригинальный SAM | MobileSAM | +|------------------------|------------------|-----------| +| Параметры | 611M | 5M | +| Скорость | 452 мс | 8 мс | + +Как оригинальный SAM, так и MobileSAM используют один и тот же декодер масок, управляемый подсказками: + +| Декодер масок | Оригинальный SAM | MobileSAM | +|---------------|------------------|-----------| +| Параметры | 3.876M | 3.876M | +| Скорость | 4 мс | 4 мс | + +Ниже приведено сравнение всей последовательности операций: + +| Полная последовательность операций (Enc+Dec) | Оригинальный SAM | MobileSAM | +|----------------------------------------------|------------------|-----------| +| Параметры | 615M | 9.66M | +| Скорость | 456 мс | 12 мс | + +Производительность MobileSAM и оригинального SAM демонстрируется с использованием подсказок в форме точки и прямоугольника. + +![Изображение с подсказкой в виде точки](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![Изображение с подсказкой в виде прямоугольника](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +MobileSAM обеспечивает примерно в 5 раз меньший размер и в 7 раз большую скорость работы по сравнению с текущим FastSAM. Более подробная информация доступна на [странице проекта MobileSAM](https://github.com/ChaoningZhang/MobileSAM). + +## Тестирование MobileSAM в Ultralytics + +Как и оригинальный SAM, мы предлагаем простой метод тестирования в Ultralytics, включая режимы тестирования с использованием подсказок в форме точки и прямоугольника. + +### Загрузка модели + +Вы можете загрузить модель [здесь](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt). + +### Подсказка в форме точки + +!!! Example "Пример" + + === "Python" + ```python + from ultralytics import SAM + + # Загрузка модели + model = SAM('mobile_sam.pt') + + # Предсказание сегмента на основе подсказки в форме точки + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### Подсказка в форме прямоугольника + +!!! Example "Пример" + + === "Python" + ```python + from ultralytics import SAM + + # Загрузка модели + model = SAM('mobile_sam.pt') + + # Предсказание сегмента на основе подсказки в форме прямоугольника + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +Мы реализовали `MobileSAM` и `SAM` с использованием одного и того же API. Для получения дополнительной информации о использовании, пожалуйста, см. [страницу SAM](sam.md). + +## Цитирование и благодарности + +Если вы считаете MobileSAM полезным в своей научно-исследовательской или разработочной работе, пожалуйста, рассмотрите возможность цитирования нашей статьи: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } diff --git a/docs/ru/models/rtdetr.md b/docs/ru/models/rtdetr.md new file mode 100644 index 0000000..7b8b374 --- /dev/null +++ b/docs/ru/models/rtdetr.md @@ -0,0 +1,93 @@ +--- +comments: true +description: Узнайте о возможностях и преимуществах RT-DETR от Baidu - эффективного и гибкого детектора объектов в реальном времени, основанного на Vision Transformers. Включает предобученные модели. +keywords: RT-DETR, Baidu, Vision Transformers, object detection, real-time performance, CUDA, TensorRT, IoU-aware query selection, Ultralytics, Python API, PaddlePaddle +--- + +# RT-DETR от Baidu: детектор объектов в реальном времени на основе Vision Transformers + +## Обзор + +Real-Time Detection Transformer (RT-DETR), разработанный компанией Baidu, является передовым энд-ту-энд детектором объектов, который обеспечивает высокую точность при работе в реальном времени. Он использует преимущества Vision Transformers (ViT) для эффективной обработки мультимасштабных признаков путем разделения взаимодействия внутри масштаба и слияния между масштабами. RT-DETR легко адаптируется для поддержки гибкой настройки скорости вывода с использованием разных слоев декодера без необходимости повторного обучения. Модель показывает высокую производительность на ускоренных вычислительных платформах, таких как CUDA с TensorRT, превосходя многие другие детекторы объектов в реальном времени. + +![Пример модели](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**Обзор RT-DETR от Baidu.** Схема архитектуры модели RT-DETR показывает последние три стадии основной сети {S3, S4, S5} в качестве входных данных для энкодера. Эффективный гибридный энкодер преобразует мультимасштабные признаки в последовательность признаков изображения с помощью интерактивного интраскального взаимодействия признаков (AIFI) и модуля слияния признаков между кросс-масштабами (CCFM). Для начальной инициализации объектных запросов декодера используется выбор запросов с учетом оценки пересечения объединения (IoU-aware query selection). Наконец, декодер с вспомогательными головами предсказания итеративно оптимизирует объектные запросы для генерации рамок и вероятностей ([источник](https://arxiv.org/pdf/2304.08069.pdf)). + +### Основные особенности + +- **Эффективный гибридный энкодер:** RT-DETR от Baidu использует эффективный гибридный энкодер, который обрабатывает мультимасштабные признаки путем разделения взаимодействия внутри масштаба и слияния между масштабами. Это уникальное решение на основе Vision Transformers снижает вычислительные затраты и позволяет осуществлять детекцию объектов в реальном времени. +- **Выбор запроса с учетом оценки пересечения объединения (IoU-aware):** RT-DETR от Baidu улучшает инициализацию запросов объектов путем использования осознанного запроса с учетом оценки пересечения объединения (IoU-aware query selection). Это позволяет модели фокусироваться на наиболее значимых объектах на сцене и повышает точность детектирования. +- **Гибкая скорость вывода:** RT-DETR от Baidu поддерживает гибкую настройку скорости вывода с использованием различных слоев декодера без необходимости повторного обучения. Гибкость данного подхода упрощает его применение в различных сценариях детекции объектов в реальном времени. + +## Предобученные модели + +Python API Ultralytics предоставляет предобученные модели RT-DETR от PaddlePaddle с различными масштабами: + +- RT-DETR-L: 53.0% AP на COCO val2017, 114 FPS на GPU T4 +- RT-DETR-X: 54.8% AP на COCO val2017, 74 FPS на GPU T4 + +## Примеры использования + +В этом примере представлены простые примеры обучения и вывода модели RT-DETR. Для полной документации по этим и другим [режимам](../modes/index.md) смотрите страницы документации [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) и [Export](../modes/export.md). + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import RTDETR + + # Загрузка предобученной модели RT-DETR-l на COCO + model = RTDETR('rtdetr-l.pt') + + # Отображение информации о модели (по желанию) + model.info() + + # Обучение модели на примере набора данных COCO8 в течение 100 эпох + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Осуществление вывода модели RT-DETR-l на изображении 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # Загрузка предобученной модели RT-DETR-l на COCO и ее обучение на примере набора данных COCO8 в течение 100 эпох + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # Загрузка предобученной модели RT-DETR-l на COCO и вывод ее на изображении 'bus.jpg' + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## Поддерживаемые задачи и режимы + +В таблице представлены типы моделей, конкретные предобученные веса, задачи, поддерживаемые каждой моделью, а также различные режимы ([Train](../modes/train.md) , [Val](../modes/val.md), [Predict](../modes/predict.md), [Export](../modes/export.md)), поддерживаемые каждой моделью, что обозначено символом ✅. + +| Тип модели | Предобученные веса | Поддерживаемые задачи | Вывод | Валидация | Обучение | Экспорт | +|---------------------|--------------------|--------------------------------------------|-------|-----------|----------|---------| +| RT-DETR Large | `rtdetr-l.pt` | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR Extra-Large | `rtdetr-x.pt` | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## Цитирование и благодарности + +Если вы используете Baidu RT-DETR в своих исследованиях или разработке, пожалуйста, процитируйте [оригинальную статью](https://arxiv.org/abs/2304.08069): + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Мы хотели бы выразить свою благодарность компании Baidu и команде [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection) за создание и поддержку этого ценного ресурса для сообщества компьютерного зрения. Мы очень ценим их вклад в области разработки детекторов объектов в реальном времени на основе Vision Transformers, RT-DETR. + +*Keywords: RT-DETR, Transformer, ViT, Vision Transformers, Baidu RT-DETR, PaddlePaddle, Paddle Paddle RT-DETR, real-time object detection, Vision Transformers-based object detection, pre-trained PaddlePaddle RT-DETR models, Baidu's RT-DETR usage, Ultralytics Python API* diff --git a/docs/ru/models/sam.md b/docs/ru/models/sam.md new file mode 100644 index 0000000..11479c3 --- /dev/null +++ b/docs/ru/models/sam.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Изучите передовую модель сегментации изображений "Segment Anything Model" (SAM) от компании Ultralytics, которая позволяет выполнять сегментацию изображений в режиме реального времени. Узнайте о возможности дать подсказки модели для выполнения сегментации, о ее возможностях нулевой настройки и о том, как ее использовать. +keywords: Ultralytics, сегментация изображений, "Segment Anything Model", SAM, набор данных SA-1B, работа в режиме реального времени, нулевая настройка, обнаружение объектов, анализ изображений, машинное обучение +--- + +# Segment Anything Model (SAM) + +Добро пожаловать в мир передовой модели сегментации изображений "Segment Anything Model" (SAM). Эта революционная модель установила новые стандарты в области сегментации изображений, предоставляя возможность ввода подсказок для выполнения сегментации в реальном времени. + +## Введение в Segment Anything Model (SAM) + +Segment Anything Model (SAM) - это передовая модель сегментации изображений, позволяющая осуществлять сегментацию с возможностью задавать подсказки, что обеспечивает уникальную гибкость в задачах анализа изображений. SAM является ключевым элементом инициативы "Segment Anything", которая вводит новую модель, задачу и набор данных для сегментации изображений. + +Благодаря своему передовому дизайну, SAM может адаптироваться к новым распределениям изображений и задачам без предварительных знаний, что называется возможностью нулевой настройки. Обученная на обширном наборе данных [SA-1B](https://ai.facebook.com/datasets/segment-anything/), который содержит более 1 миллиарда масок, распределенных на 11 миллионов тщательно отобранных изображений, модель SAM проявила впечатляющую производительность в задачах нулевой настройки, превосходя предыдущие полностью надзираемые результаты во многих случаях. + +![Пример изображения из набора данных](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +Изображения с наложенными масками из нашего нового набора данных SA-1B. SA-1B содержит 11 млн. разнообразных лицензированных изображений высокого разрешения, сгенерированных полностью автоматически SAM, и 1,1 млрд. высококачественных масок сегментации. Эти маски были аннотированы полностью автоматически SAM, и, как показали человеческие оценки и множество экспериментов, они являются высококачественными и разнообразными. Изображения сгруппированы по количеству масок на изображение для наглядности (в среднем на изображение приходится около 100 масок). + +## Основные особенности модели Segment Anything (SAM) + +- **Задача предоставления подсказок для сегментации:** SAM была разработана с учетом задачи предоставления подсказок для сегментации, что позволяет ей генерировать корректные маски сегментации на основе любых подсказок, таких как пространственные или текстовые подсказки, идентифицирующие объект. +- **Расширенная архитектура:** Модель Segment Anything использует мощный кодировщик изображений, кодировщик подсказок и легкий декодер масок. Эта уникальная архитектура обеспечивает гибкое использование подсказок, вычисление масок в реальном времени и учет неопределенности в задачах сегментации. +- **Набор данных SA-1B:** Набор данных SA-1B, предложенный проектом Segment Anything, содержит более 1 миллиарда масок на 11 миллионах изображений. Как самый большой набор данных для сегментации на сегодняшний день, он обеспечивает модели SAM разнообразный и масштабный источник данных для обучения. +- **Производительность при нулевой настройке:** Модель SAM проявляет выдающуюся производительность при выполнении задач сегментации в режиме нулевой настройки, что делает ее готовым к использованию инструментом для различных приложений с минимальной необходимостью настраивать подсказки. + +Для более подробного рассмотрения модели Segment Anything и набора данных SA-1B, пожалуйста, посетите [веб-сайт Segment Anything](https://segment-anything.com) и ознакомьтесь с исследовательской статьей [Segment Anything](https://arxiv.org/abs/2304.02643). + +## Доступные модели, поддерживаемые задачи и режимы работы + +В таблице представлены доступные модели с их специфическими заранее обученными весами, поддерживаемыми задачами и их совместимость с различными режимами работы, такими как [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md) и [Export](../modes/export.md), обозначенная символами ✅ для поддерживаемых режимов и символами ❌ для неподдерживаемых режимов. + +| Тип модели | Заранее обученные веса | Поддерживаемые задачи | Inference | Validation | Training | Export | +|------------------------------------|------------------------|---------------------------------------------|-----------|------------|----------|--------| +| Базовая версия SAM (SAM base) | `sam_b.pt` | [Сегментация объектов](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| Расширенная версия SAM (SAM large) | `sam_l.pt` | [Сегментация объектов](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## Как использовать модель SAM: гибкость и мощность в сегментации изображений + +Модель Segment Anything может использоваться для множества задач, выходящих за рамки ее тренировочных данных. Это включает обнаружение границ, генерацию предложений объектов, сегментацию экземпляров и предварительное преобразование текста в маску. С использованием подсказок, SAM может быстро адаптироваться к новым задачам и распределениям данных в режиме нулевой настройки, делая его гибким и мощным инструментом для всех ваших потребностей в сегментации изображений. + +### Пример прогнозирования с использованием SAM + +!!! Example "Сегментация с указанием подсказки" + + Выполнение сегментации изображения с указанными подсказками. + + === "Python" + + ```python + from ultralytics import SAM + + # Загрузка модели + model = SAM('sam_b.pt') + + # Вывод информации о модели (по желанию) + model.info() + + # Выполнение вывода с указанием границы объекта (bboxes prompt) + model('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # Выполнение вывода с указанием точки (points prompt) + model('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "Сегментация всего изображения" + + Сегментация всего изображения. + + === "Python" + + ```python + from ultralytics import SAM + + # Загрузка модели + model = SAM('sam_b.pt') + + # Вывод информации о модели (по желанию) + model.info() + + # Выполнение вывода + model('путь/к/изображению.jpg') + ``` + + === "CLI" + + ```bash + # Выполнение вывода с помощью модели SAM + yolo predict model=sam_b.pt source=путь/к/изображению.jpg + ``` + +- Логика здесь состоит в том, чтобы выполнить сегментацию всего изображения, если вы не передаете никаких подсказок (bboxes/points/masks). + +!!! Example "Пример SAMPredictor" + + В этом примере вы можете установить изображение один раз и выполнить множество прогнозирований с использованием подсказок, не запуская кодировщик изображения несколько раз. + + === "Прогнозирование с подсказками" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # Создание SAMPredictor + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # Установка изображения + predictor.set_image("ultralytics/assets/zidane.jpg") # установить с помощью файла изображения + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # установить с помощью np.ndarray + results = predictor(bboxes=[439, 437, 524, 709]) + results = predictor(points=[900, 370], labels=[1]) + + # Сброс изображения + predictor.reset_image() + ``` + + Сегментация всего изображения с дополнительными аргументами. + + === "Сегментация всего изображения" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # Создание SAMPredictor + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # Сегментация с дополнительными аргументами + results = predictor(source="ultralytics/assets/zidane.jpg", crop_n_layers=1, points_stride=64) + ``` + +- Больше дополнительных аргументов для `Сегментации всего изображения` см. [Ссылка на`Predictor/generate`](../../../reference/models/sam/predict.md). + +## Сравнение SAM и YOLOv8 + +Здесь мы сравниваем самую маленькую модель SAM, SAM-b, с самой маленькой моделью сегментации Ultralytics, [YOLOv8n-seg](../tasks/segment.md): + +| Модель | Размер | Параметры | Скорость (CPU) | +|------------------------------------------------|--------------------------------|----------------------------------|-------------------------------------------| +| SAM-b | 358 МБ | 94.7 млн. | 51096 мс/изображение | +| [MobileSAM](mobile-sam.md) | 40.7 МБ | 10.1 млн. | 46122 мс/изображение | +| [FastSAM-s](fast-sam.md) с основой YOLOv8 | 23.7 МБ | 11.8 млн. | 115 мс/изображение | +| Ultralytics [YOLOv8n-seg](../tasks/segment.md) | **6.7 МБ** (в 53.4 раз меньше) | **3.4 млн.** (в 27.9 раз меньше) | **59 мс/изображение** (в 866 раз быстрее) | + +Это сравнение показывает разницу в порядке величины между моделями по их размерам и скорости. В то время как SAM предлагает уникальные возможности автоматической сегментации, он не является прямым конкурентом моделям сегментации YOLOv8, которые являются более маленькими, быстрее и эффективнее. + +Тесты проводились на ноутбуке Apple M2 с 16 ГБ оперативной памяти 2023 года. Чтобы воспроизвести этот тест: + +!!! Example "Пример" + + === "Python" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # Анализ SAM-b + model = SAM('sam_b.pt') + model.info() + model('ultralytics/assets') + + # Анализ MobileSAM + model = SAM('mobile_sam.pt') + model.info() + model('ultralytics/assets') + + # Анализ FastSAM-s + model = FastSAM('FastSAM-s.pt') + model.info() + model('ultralytics/assets') + + # Анализ YOLOv8n-seg + model = YOLO('yolov8n-seg.pt') + model.info() + model('ultralytics/assets') + ``` + +## Авто-аннотация: быстрый путь к наборам данных для сегментации + +Авто-аннотация - это ключевая функция SAM, позволяющая пользователям генерировать [наборы данных для сегментации](https://docs.ultralytics.com/datasets/segment) с использованием предварительно обученной модели обнаружения. Эта функция позволяет быстро и точно аннотировать большое количество изображений, обходя необходимость трудоемкой ручной разметки. + +### Создание набора данных для сегментации с использованием модели обнаружения + +Для авто-аннотации набора данных с использованием фреймворка Ultralytics используйте функцию `auto_annotate`, как показано ниже: + +!!! Example "Пример" + + === "Python" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| Аргумент | Тип | Описание | Значение по умолчанию | +|------------|------------------------|--------------------------------------------------------------------------------------------------------------|-----------------------| +| data | str | Путь к папке с изображениями, которые должны быть аннотированы. | | +| det_model | str, опционально | Предварительно обученная модель обнаружения YOLO. По умолчанию 'yolov8x.pt'. | 'yolov8x.pt' | +| sam_model | str, опционально | Предварительно обученная модель сегментации SAM. По умолчанию 'sam_b.pt'. | 'sam_b.pt' | +| device | str, опционально | Устройство для запуска моделей. По умолчанию пустая строка (ЦП или ГП, если доступно). | | +| output_dir | str, None, опционально | Каталог для сохранения результатов аннотации. По умолчанию - папка "labels" в том же каталоге, что и "data". | None | + +Функция `auto_annotate` принимает путь к вашим изображениям со всеми опциональными аргументами для указания предварительно обученных моделей обнаружения и сегментации SAM, устройства для запуска моделей и каталога вывода для сохранения аннотированных результатов. + +Авто-аннотация с помощью предварительно обученных моделей может существенно сократить время и усилия, затрачиваемые на создание высококачественных наборов данных для сегментации. Эта функция особенно полезна для исследователей и разработчиков, работающих с большими сборниками изображений, поскольку она позволяет им сконцентрироваться на разработке и оценке моделей, а не на ручной разметке. + +## Цитирование и благодарности + +Если вам пригодилась модель SAM в вашей исследовательской или разработческой работе, пожалуйста, рассмотрите возможность цитирования нашей статьи: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Мы хотели бы выразить свою благодарность компании Meta AI за создание и поддержку этого ценного ресурса для сообщества визуального анализа. + +*keywords: Segment Anything, Segment Anything Model, SAM, Meta SAM, сегментация изображений, возможность давать подсказки для выполнения сегментации, производительность с нулевой настройкой, набор данных SA-1B, передовая архитектура, авто-аннотация, Ultralytics, предварительно обученные модели, базовая версия SAM, расширенная версия SAM, сегментация экземпляров, визуальный анализ, искусственный интеллект, машинное обучение, аннотация данных, маски сегментации, модель обнаружения, модель обнаружения YOLOv8, библиографическая ссылка, Meta AI.* diff --git a/docs/ru/models/yolo-nas.md b/docs/ru/models/yolo-nas.md new file mode 100644 index 0000000..cb44c48 --- /dev/null +++ b/docs/ru/models/yolo-nas.md @@ -0,0 +1,121 @@ +--- +comments: true +description: Изучите подробную документацию YOLO-NAS, превосходной модели обнаружения объектов. Узнайте о ее функциях, предварительно обученных моделях, использовании с помощью Ultralytics Python API и многом другом. +keywords: YOLO-NAS, Deci AI, обнаружение объектов, глубокое обучение, поиск нейроархитектур, Ultralytics Python API, модель YOLO, предварительно обученные модели, квантизация, оптимизация, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## Обзор + +Разработанный компанией Deci AI, YOLO-NAS является революционной фундаментальной моделью обнаружения объектов. Она является продуктом продвинутой технологии поиска нейроархитектур и специально разработана для преодоления ограничений предыдущих моделей YOLO. Благодаря существенному улучшению поддержки квантования и компромисса между точностью и задержкой, YOLO-NAS представляет собой значительный прорыв в области обнаружения объектов. + +![Иллюстрация модели](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**Обзор YOLO-NAS.** YOLO-NAS использует блоки, поддерживающие квантование, и селективное квантование для достижения оптимальной производительности. Модель, когда переводится в квантованную версию INT8, имеет минимальное падение точности, что является значительным улучшением по сравнению с другими моделями. Эти достижения приводят к превосходной архитектуре с беспрецедентными возможностями обнаружения объектов и выдающей производительностью. + +### Основные функции + +- **Базовый блок поддерживающий квантование:** YOLO-NAS предлагает новый базовый блок, который хорошо работает с квантованием, что позволяет преодолеть одно из значительных ограничений предыдущих моделей YOLO. +- **Совершенствование тренировки и квантования:** YOLO-NAS использует продвинутые схемы тренировки и пост-тренировочное квантование для улучшения производительности. +- **Оптимизация AutoNAC и предварительная обучение:** YOLO-NAS использует оптимизацию AutoNAC и предварительное обучение на известных наборах данных, таких как COCO, Objects365 и Roboflow 100. Это предварительное обучение делает модель идеально подходящей для решений по обнаружению объектов в производственных средах. + +## Предварительно обученные модели + +Ощутите мощь обнаружения объектов нового поколения с предварительно обученными моделями YOLO-NAS, предоставленными компанией Ultralytics. Эти модели разработаны для обеспечения высокой производительности как в плане скорости, так и точности. Выберите из различных вариантов, настроенных под ваши конкретные потребности: + +| Модель | mAP | Задержка (ms) | +|------------------|-------|---------------| +| YOLO-NAS S | 47.5 | 3.21 | +| YOLO-NAS M | 51.55 | 5.85 | +| YOLO-NAS L | 52.22 | 7.87 | +| YOLO-NAS S INT-8 | 47.03 | 2.36 | +| YOLO-NAS M INT-8 | 51.0 | 3.78 | +| YOLO-NAS L INT-8 | 52.1 | 4.78 | + +Каждый вариант модели разработан для достижения баланса между средней точностью обнаружения (mAP) и задержкой, помогая вам оптимизировать задачи по обнаружению объектов с точки зрения производительности и скорости. + +## Примеры использования + +Компания Ultralytics сделала интеграцию моделей YOLO-NAS в ваши приложения на Python максимально простой с помощью нашего пакета `ultralytics`. Пакет предоставляет удобный API на Python, чтобы упростить весь процесс. + +Ниже приведены примеры использования моделей YOLO-NAS с пакетом `ultralytics` для вывода результатов и их проверки: + +### Примеры вывода результатов и проверки + +В этом примере мы проверяем модель YOLO-NAS-s на наборе данных COCO8. + +!!! Example "Пример" + + В этом примере представлен простой код вывода результатов и проверки для YOLO-NAS. Для обработки результатов вывода см. режим [Predict](../modes/predict.md). Для использования YOLO-NAS с другими режимами см. [Val](../modes/val.md) и [Export](../modes/export.md). Пакет `ultralytics` для YOLO-NAS не поддерживает обучение. + + === "Python" + + Файлы предварительно обученных моделей PyTorch `*.pt` могут быть переданы в класс `NAS()` для создания экземпляра модели на Python: + + ```python + from ultralytics import NAS + + # Загрузка предварительно обученной модели YOLO-NAS-s на наборе данных COCO + model = NAS('yolo_nas_s.pt') + + # Вывод информации о модели (опционально) + model.info() + + # Проверка модели на примере набора данных COCO8 + results = model.val(data='coco8.yaml') + + # Вывод результатов работы модели YOLO-NAS-s на изображении 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Для прямого запуска моделей доступны следующие команды: + + ```bash + # Загрузка предварительно обученной модели YOLO-NAS-s на наборе данных COCO и проверка ее производительности на примере набора данных COCO8 + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # Загрузка предварительно обученной модели YOLO-NAS-s на наборе данных COCO и запуск вывода результатов на изображении 'bus.jpg' + yolo predict model=yolo_nas_s.pt source=path/to/bus.jpg + ``` + +## Поддерживаемые задачи и режимы + +Мы предлагаем три варианта моделей YOLO-NAS: Small (s), Medium (m) и Large (l). Каждый вариант разработан для различных вычислительных и производственных потребностей: + +- **YOLO-NAS-s**: Оптимизирована для сред с ограниченными вычислительными ресурсами, где эффективность является ключевым фактором. +- **YOLO-NAS-m**: Предлагает сбалансированный подход, подходит для общих задач обнаружения объектов с более высокой точностью. +- **YOLO-NAS-l**: Адаптирована для сценариев, требующих максимальной точности, где вычислительные ресурсы менее ограничены. + +Ниже приведен подробный обзор каждой модели, включая ссылки на их предварительно обученные веса, задачи, которые они поддерживают, и их совместимость с различными режимами работы. + +| Тип модели | Предварительно обученные веса | Поддерживаемые задачи | Вывод результатов | Проверка | Обучение | Экспорт | +|------------|-----------------------------------------------------------------------------------------------|--------------------------------------------|-------------------|----------|----------|---------| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## Цитирование и благодарности + +Если вы используете YOLO-NAS в своей научно-исследовательской или разработочной работе, пожалуйста, ссылайтесь на SuperGradients: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +Мы выражаем благодарность команде [SuperGradients](https://github.com/Deci-AI/super-gradients/) компании Deci AI за их усилия в создании и поддержке этого ценного ресурса для компьютерного зрения. Мы считаем, что YOLO-NAS со своей инновационной архитектурой и улучшенными возможностями обнаружения объектов станет важным инструментом как для разработчиков, так и для исследователей. + +*keywords: YOLO-NAS, Deci AI, обнаружение объектов, глубокое обучение, поиск нейроархитектур, Ultralytics Python API, модель YOLO, SuperGradients, предварительно обученные модели, базовый блок, дружественный квантованию, продвинутые схемы тренировки, пост-тренировочное квантование, оптимизация AutoNAC, COCO, Objects365, Roboflow 100* diff --git a/docs/ru/models/yolov3.md b/docs/ru/models/yolov3.md new file mode 100644 index 0000000..53d2fc0 --- /dev/null +++ b/docs/ru/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: Получите обзор моделей YOLOv3, YOLOv3-Ultralytics и YOLOv3u. Узнайте о их основных функциях, использовании и поддерживаемых задачах для обнаружения объектов. +keywords: YOLOv3, YOLOv3-Ultralytics, YOLOv3u, Обнаружение объектов, Вывод, Обучение, Ultralytics +--- + +# YOLOv3, YOLOv3-Ultralytics и YOLOv3u + +## Обзор + +Этот документ представляет обзор трех тесно связанных моделей обнаружения объектов: [YOLOv3](https://pjreddie.com/darknet/yolo/), [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3) и [YOLOv3u](https://github.com/ultralytics/ultralytics). + +1. **YOLOv3:** Это третья версия алгоритма обнаружения объектов You Only Look Once (YOLO). Изначально разработанная Джозефом Редмоном (Joseph Redmon), YOLOv3 улучшила своих предшественников, внедрив такие функции, как многошкальные предсказания и три различных размера ядер детекции. + +2. **YOLOv3-Ultralytics:** Это имплементация модели YOLOv3 от Ultralytics. Она воспроизводит оригинальную архитектуру YOLOv3 и предлагает дополнительные возможности, такие как поддержка большего числа предварительно обученных моделей и более простые варианты настройки. + +3. **YOLOv3u:** Это обновленная версия YOLOv3-Ultralytics, которая включает в себя разделение головы на свободные от привязки якоря и объектности, используемое в моделях YOLOv8. YOLOv3u имеет такую же архитектуру основного модуля и модуля "шеи", как YOLOv3, но с обновленной головой детекции из YOLOv8. + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## Основные характеристики + +- **YOLOv3:** Внедрение трех масштабов детекции позволило использовать три разных размера ядер детекции: 13x13, 26x26 и 52x52. Это значительно улучшило точность обнаружения объектов различных размеров. Кроме того, YOLOv3 добавила такие функции, как множественные предсказания для каждого ограничивающего прямоугольника и более высококачественную сеть экстрактора признаков. + +- **YOLOv3-Ultralytics:** Имплементация YOLOv3 от Ultralytics обеспечивает такую же производительность, как у оригинальной модели, но дополнительно поддерживает больше предварительно обученных моделей, дополнительные методы обучения и более простые варианты настройки. Это делает ее более гибкой и удобной для практического применения. + +- **YOLOv3u:** В этой обновленной модели использовано разделение головы на свободные от привязки якоря и объектности из YOLOv8. Путем устранения необходимости в предопределенных координатах привязки и оценках объектности эта архитектура головы детекции способна улучшить способность модели обнаруживать объекты различных размеров и форм. Это делает YOLOv3u более устойчивой и точной для задач обнаружения объектов. + +## Поддерживаемые задачи и режимы + +Серия YOLOv3, включая YOLOv3, YOLOv3-Ultralytics и YOLOv3u, специально разработана для задач обнаружения объектов. Эти модели заслужили признание своей эффективности в различных сценариях реального мира, обеспечивая баланс между точностью и скоростью. Каждый вариант предлагает уникальные функции и оптимизации, что делает их подходящими для широкого спектра приложений. + +Все три модели поддерживают полный набор режимов, обеспечивая гибкость на разных этапах развертывания и разработки моделей. Эти режимы включают [Вывод](../modes/predict.md), [Проверку](../modes/val.md), [Обучение](../modes/train.md) и [Экспорт](../modes/export.md), что позволяет пользователям полноценно использовать эти модели для эффективного обнаружения объектов. + +| Тип модели | Поддерживаемые задачи | Вывод | Проверка | Обучение | Экспорт | +|--------------------|--------------------------------------------|-------|----------|----------|---------| +| YOLOv3 | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Эта таблица предоставляет краткий обзор возможностей каждого варианта модели YOLOv3, подчеркивая их гибкость и пригодность для выполнения различных задач и операционных режимов в потоке обнаружения объектов. + +## Примеры использования + +Этот пример предоставляет простые примеры обучения и вывода с использованием YOLOv3. Полную документацию об этих и других [режимах](../modes/index.md) см. на страницах документации по [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) и [Export](../modes/export.md). + +!!! Example "Пример" + + === "Python" + + В Python можно передавать предварительно обученные модели `*.pt` PyTorch и конфигурационные файлы `*.yaml` в класс `YOLO()`, чтобы создать экземпляр модели: + + ```python + from ultralytics import YOLO + + # Загрузка предварительно обученной модели YOLOv3n на наборе данных COCO + model = YOLO('yolov3n.pt') + + # Отображение информации о модели (необязательно) + model.info() + + # Обучение модели на примере набора данных COCO8 в течение 100 эпох + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Выполнение вывода модели YOLOv3n на изображении 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Имеются команды интерфейса командной строки для прямого запуска моделей: + + ```bash + # Загрузить предварительно обученную модель YOLOv3n на наборе данных COCO и обучить ее на примере набора данных COCO8 в течение 100 эпох + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Загрузить предварительно обученную модель YOLOv3n на наборе данных COCO и выполнить вывод на изображении 'bus.jpg' + yolo predict model=yolov3n.pt source=path/to/bus.jpg + ``` + +## Цитаты и благодарности + +Если вы используете YOLOv3 в своем исследовании, пожалуйста, цитируйте оригинальные статьи о YOLO и репозиторий Ultralytics YOLOv3: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +Благодарим Джозефа Редмона и Али Фархади за разработку оригинальной модели YOLOv3. diff --git a/docs/ru/models/yolov4.md b/docs/ru/models/yolov4.md new file mode 100644 index 0000000..aec5f38 --- /dev/null +++ b/docs/ru/models/yolov4.md @@ -0,0 +1,71 @@ +--- +comments: true +description: Изучите наш подробный руководство по YOLOv4 - передовому обнаружителю объектов в режиме реального времени. Понимание его архитектурных особенностей, инновационных функций и примеров применения. +keywords: ultralytics, YOLOv4, обнаружение объектов, нейронные сети, обнаружение в реальном времени, детектор объектов, машинное обучение +--- + +# YOLOv4: Быстрое и точное обнаружение объектов + +Добро пожаловать на страницу документации Ultralytics по YOLOv4 - передовому обнаружителю объектов в режиме реального времени, созданному в 2020 году Алексеем Бочковским на [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). YOLOv4 разработан для обеспечения оптимального баланса между скоростью и точностью, что делает его отличным выбором для многих приложений. + +![Диаграмма архитектуры YOLOv4](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**Диаграмма архитектуры YOLOv4**. Показана сложная сетевая архитектура YOLOv4, включая компоненты основной части, шеи и головы, а также их взаимосвязанные слои для оптимального обнаружения объектов в режиме реального времени. + +## Введение + +YOLOv4 означает You Only Look Once, версия 4. Эта модель обнаружения объектов в реальном времени была разработана для преодоления ограничений предыдущих версий YOLO, таких как [YOLOv3](yolov3.md) и других моделей обнаружения объектов. В отличие от других детекторов объектов на основе сверточных нейронных сетей (CNN), YOLOv4 применим как для систем рекомендаций, так и для автономного управления процессами и уменьшения человеческого ввода. Благодаря работе на обычных графических процессорах (GPU), YOLOv4 может использоваться массово по доступной цене и разработан для работы в реальном времени на обычном GPU, требуя для обучения лишь одного такого GPU. + +## Архитектура + +YOLOv4 использует несколько инновационных функций, которые работают вместе для оптимизации его производительности. Среди них встроенные связи с резильтатами (WRC), частичные соединения между этапами (CSP), нормализация на мини-пакетах (CmBN), самоадаптивное обучение (SAT), активация Mish, аугментация данных Mosaic, регуляризация DropBlock и функция потерь CIoU. Эти функции объединены для достижения передовых результатов. + +Типичный детектор объектов состоит из нескольких частей, включая входные данные, основную часть, шейку и голову. Основа YOLOv4 предварительно обучена на наборе данных ImageNet и используется для предсказания классов и ограничивающих рамок объектов. Основа может быть представлена несколькими моделями, включая VGG, ResNet, ResNeXt или DenseNet. Часть шейки детектора используется для сбора карт признаков с разных этапов и обычно включает несколько путей снизу вверх и несколько путей сверху вниз. Головная часть используется для окончательного обнаружения и классификации объектов. + +## Bag of Freebies + +YOLOv4 также использует методы, известные как «bag of freebies», которые улучшают точность модели во время обучения без увеличения стоимости вывода. Аугментация данных - это общая техника "bag of freebies", используемая в обнаружении объектов, которая увеличивает изменчивость входных изображений для повышения устойчивости модели. Некоторые примеры аугментации данных включают фотометрические и геометрические искажения. Эти техники помогают модели лучше обобщаться для различных типов изображений. + +## Особенности и производительность + +YOLOv4 разработан для оптимальной скорости и точности обнаружения объектов. Архитектура YOLOv4 включает в себя CSPDarknet53 в качестве основы, PANet в качестве шейки и YOLOv3 в качестве головы. Это позволяет YOLOv4 выполнять обнаружение объектов с впечатляющей скоростью, что делает его подходящим для приложений в реальном времени. YOLOv4 также отличается точностью и достигает передовых результатов в тестовых бенчмарках для обнаружения объектов. + +## Примеры использования + +На момент написания данного документа Ultralytics пока не поддерживает модели YOLOv4. Поэтому все пользователи, заинтересованные в использовании YOLOv4, должны обращаться напрямую к репозиторию YOLOv4 на GitHub для инструкций по установке и использованию. + +Вот краткий обзор типичных шагов, которые вы можете предпринять для использования YOLOv4: + +1. Посетите репозиторий YOLOv4 на GitHub: [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet). + +2. Следуйте инструкциям, предоставленным в файле README для установки. Это обычно включает клонирование репозитория, установку необходимых зависимостей и настройку необходимых переменных среды. + +3. После завершения установки вы можете обучать и использовать модель в соответствии с инструкциями использования, предоставленными в репозитории. Обычно это включает подготовку ваших данных, настройку параметров модели, обучение модели и затем использование обученной модели для выполнения обнаружения объектов. + +Обратите внимание, что конкретные шаги могут отличаться в зависимости от вашего конкретного случая использования и текущего состояния репозитория YOLOv4. Поэтому настоятельно рекомендуется обращаться непосредственно к инструкциям, предоставленным в репозитории YOLOv4 на GitHub. + +Приносим извинения за возможные неудобства, и мы постараемся обновить этот документ с примерами использования для Ultralytics, как только поддержка YOLOv4 будет реализована. + +## Заключение + +YOLOv4 - это мощная и эффективная модель обнаружения объектов, которая находит баланс между скоростью и точностью. Благодаря уникальным функциям и методам "bag of freebies" во время обучения, она превосходно справляется с задачами обнаружения объектов в реальном времени. YOLOv4 может быть обучена и использована всеми, у кого есть обычный GPU, что делает ее доступной и пригодной для широкого круга применений. + +## Цитирование и благодарности + +Мы хотели бы выразить благодарность авторам YOLOv4 за их значительный вклад в область обнаружения объектов в режиме реального времени: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +Оригинальная статья YOLOv4 доступна на [arXiv](https://arxiv.org/pdf/2004.10934.pdf). Авторы сделали свою работу общедоступной, и код доступен на [GitHub](https://github.com/AlexeyAB/darknet). Мы ценим их усилия в развитии области и доступности их работы для широкого сообщества. diff --git a/docs/ru/models/yolov5.md b/docs/ru/models/yolov5.md new file mode 100644 index 0000000..84b1b8c --- /dev/null +++ b/docs/ru/models/yolov5.md @@ -0,0 +1,113 @@ +--- +comments: true +description: Познакомьтесь с YOLOv5u, улучшенной версией модели YOLOv5 с улучшенным компромиссом между точностью и скоростью и множеством готовых моделей для различных задач обнаружения объектов. +keywords: YOLOv5u, обнаружение объектов, готовые модели, Ultralytics, Вывод, Проверка, YOLOv5, YOLOv8, без якорей, без учета объектности, реальное время работы, машинное обучение +--- + +# YOLOv5 + +## Обзор + +YOLOv5u представляет собой прогресс в методологиях обнаружения объектов. Исходя из основной архитектуры модели [YOLOv5](https://github.com/ultralytics/yolov5), разработанной компанией Ultralytics, YOLOv5u интегрирует разделение головы без якорей и объектности, функциональность, ранее представленную в моделях [YOLOv8](yolov8.md). Эта адаптация улучшает архитектуру модели, что приводит к улучшенному компромиссу между точностью и скоростью в задачах обнаружения объектов. Учитывая эмпирические результаты и полученные характеристики, YOLOv5u предлагает эффективную альтернативу для тех, кто ищет надежные решения как в научных исследованиях, так и в практических приложениях. + +![Ultralytics YOLOv5](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## Основные возможности + +- **Разделение головы без якорей**: Традиционные модели обнаружения объектов полагаются на заранее определенные привязочные рамки для предсказания расположения объектов. Однако YOLOv5u модернизирует этот подход. Принимая безякорную голову, она обеспечивает более гибкий и адаптивный механизм обнаружения, что в итоге повышает производительность в различных сценариях. + +- **Оптимизированный компромисс между точностью и скоростью**: Скорость и точность часто движутся в противоположных направлениях. Но YOLOv5u вызывает этот компромисс. Она предлагает настроенный баланс, обеспечивая обнаружение в режиме реального времени без ущерба для точности. Эта функция особенно ценна для приложений, которым требуются быстрые ответы, таких как автономные транспортные средства, робототехника и аналитика видеозаписей в режиме реального времени. + +- **Разнообразие готовых моделей**: Понимая, что различные задачи требуют разного инструментария, YOLOv5u предлагает множество готовых моделей. Независимо от того, придерживаетесь ли вы вывода, проверки или обучения, вас ожидает модель, разработанная специально под вашу уникальную задачу. Это разнообразие гарантирует, что вы не используете универсальное решение, а модель, специально настроенную для вашего уникального вызова. + +## Поддерживаемые задачи и режимы + +Модели YOLOv5u с различными предварительно обученными весами превосходят в задачах [Обнаружение объектов](../tasks/detect.md). Они поддерживают широкий спектр режимов работы, что делает их подходящими для разных приложений, от разработки до развертывания. + +| Тип модели | Предварительно обученные веса | Задача | Вывод | Проверка | Обучение | Экспорт | +|------------|-----------------------------------------------------------------------------------------------------------------------------|--------------------------------------------|-------|----------|----------|---------| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +В этой таблице предоставлена подробная информация о вариантах моделей YOLOv5u, основных задачах обнаружения объектов и поддержке различных операционных режимов, таких как [Вывод](../modes/predict.md), [Проверка](../modes/val.md), [Обучение](../modes/train.md) и [Экспорт](../modes/export.md). Эта всесторонняя поддержка позволяет пользователям полностью использовать возможности моделей YOLOv5u в широком спектре задач обнаружения объектов. + +## Показатели производительности + +!!! Производительность + + === "Обнаружение" + + См. [Документацию по обнаружению](https://docs.ultralytics.com/tasks/detect/) для примеров использования этих моделей, обученных на [COCO](https://docs.ultralytics.com/datasets/detect/coco/), которая включает 80 предварительно обученных классов. + + | Модель | YAML | размер
(пиксели) | mAPval
50-95 | Скорость
CPU ONNX
(мс) | Скорость
A100 TensorRT
(мс) | параметры
(М) | FLOPs
(Б) | + |-------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|-------------------------|----------------------|--------------------------------|-------------------------------------|----------------------|-------------------| + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 42.1 | 211.0 | 1.83 | 4.3 | 7.8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 48.6 | 422.6 | 2.34 | 15.3 | 24.6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 53.6 | 810.9 | 4.36 | 41.2 | 65.7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 55.7 | 1470.9 | 5.47 | 86.1 | 137.4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 56.8 | 2436.5 | 8.98 | 155.4 | 250.7 | + +## Примеры использования + +В этом примере приведены простые примеры обучения и вывода моделей YOLOv5. Для получения полной документации по этим и другим [режимам работы](../modes/index.md) см. страницы документации по [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) и [Export](../modes/export.md). + +!!! Example "Пример" + + === "Python" + + Предварительно обученные модели PyTorch `*.pt` и файлы конфигурации `*.yaml` можно передать классу `YOLO()` для создания экземпляра модели на Python: + + ```python + from ultralytics import YOLO + + # Загрузите предварительно обученную модель YOLOv5n на COCO + model = YOLO('yolov5n.pt') + + # Отобразить информацию о модели (опционально) + model.info() + + # Обучение модели на примере набора данных на основе COCO8 в течение 100 эпох + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Выполнение вывода с моделью YOLOv5n на изображении 'bus.jpg' + results = model('путь/к/файлу/bus.jpg') + ``` + + === "CLI" + + Доступны команды CLI для непосредственного выполнения моделей: + + ```bash + # Загрузка предварительно обученной модели YOLOv5n на COCO и обучение на примере набора данных на основе COCO8 в течение 100 эпох + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Загрузка предварительно обученной модели YOLOv5n на COCO и выполнение вывода на изображении 'bus.jpg' + yolo predict model=yolov5n.pt source=путь/к/файлу/bus.jpg + ``` + +## Цитирование и благодарности + +Если вы используете YOLOv5 или YOLOv5u в своих исследованиях, пожалуйста, ссылайтесь на репозиторий Ultralytics YOLOv5 следующим образом: + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +Пожалуйста, обратите внимание, что модели YOLOv5 предоставляются под [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) и [Enterprise](https://ultralytics.com/license) лицензиями. diff --git a/docs/ru/models/yolov6.md b/docs/ru/models/yolov6.md new file mode 100644 index 0000000..34ac079 --- /dev/null +++ b/docs/ru/models/yolov6.md @@ -0,0 +1,107 @@ +--- +comments: true +description: Изучите Meituan YOLOv6 - современную модель обнаружения объектов, отличающуюся балансом между скоростью и точностью. Подробнее о функциях, предварительно обученных моделях и использовании Python. +keywords: Meituan YOLOv6, обнаружение объектов, Ultralytics, документация YOLOv6, двустороннее объединение, тренировка с использованием якорей, предварительно обученные модели, приложения в реальном времени +--- + +# Meituan YOLOv6 + +## Обзор + +[Meituan](https://about.meituan.com/) YOLOv6 - это передовая модель обнаружения объектов, которая отлично сочетает в себе скорость и точность, что делает ее популярным выбором для приложений в реальном времени. Эта модель включает несколько значимых улучшений в своей архитектуре и схеме обучения, включая внедрение модуля двустороннего объединения (BiC), стратегию тренировки с использованием якорей (AAT) и улучшенный дизайн базовой и верхней частей для достижения передовой точности на наборе данных COCO. + +![Meituan YOLOv6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![Изображение примера модели](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**Обзор YOLOv6.** Диаграмма архитектуры модели, показывающая переработанные компоненты сети и стратегии тренировки, которые приводят к значительному улучшению производительности. (a) Верхняя часть YOLOv6 (показаны N и S). Обратите внимание, что для M/L блок RepBlocks заменен на CSPStackRep. (b) Структура модуля BiC. (c) Блок SimCSPSPPF block. ([исходник](https://arxiv.org/pdf/2301.05586.pdf)). + +### Основные функции + +- **Модуль двустороннего объединения (BiC):** YOLOv6 включает модуль BiC в верхнюю часть детектора, улучшая сигналы локализации и обеспечивая прирост производительности при минимальном снижении скорости. +- **Стратегия тренировки с использованием якорей (AAT):** Эта модель предлагает AAT, чтобы воспользоваться преимуществами как якорных, так и бесконтурных парадигм без ущерба для эффективности вывода. +- **Улучшенный дизайн базовой и верхней частей:** Путем расширения YOLOv6 за счет добавления еще одной стадии в базовую и верхнюю часть модели достигается передовая производительность на наборе данных COCO при высоком разрешении входных изображений. +- **Стратегия самостоятельной стабилизации:** Внедряется новая стратегия самостоятельной стабилизации для повышения производительности меньших моделей YOLOv6, улучшая вспомогательное регрессионное ветвление во время тренировки и удаляя его во время вывода, чтобы избежать заметного снижения скорости. + +## Метрики производительности + +YOLOv6 предоставляет несколько предварительно обученных моделей различных масштабов: + +- YOLOv6-N: 37.5% AP на наборе данных COCO val2017 при 1187 кадрах в секунду (FPS) с использованием графического процессора NVIDIA Tesla T4. +- YOLOv6-S: 45.0% AP при 484 FPS. +- YOLOv6-M: 50.0% AP при 226 FPS. +- YOLOv6-L: 52.8% AP при 116 FPS. +- YOLOv6-L6: Передовая точность в реальном времени. + +YOLOv6 также предоставляет квантованные модели для разных точностей и модели, оптимизированные для мобильных платформ. + +## Примеры использования + +В этом примере приведены простые примеры тренировки и вывода с использованием YOLOv6. Полная документация по этим и другим [режимам](../modes/index.md) доступна на страницах документации [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) и [Export](../modes/export.md). + +!!! Example "Пример" + + === "Python" + + Модели PyTorch, предварительно обученные с помощью файлов `*.pt`, а также файлы конфигурации `*.yaml` могут быть переданы в класс `YOLO()` для создания экземпляра модели на Python: + + ```python + from ultralytics import YOLO + + # Построение модели YOLOv6n с нуля + model = YOLO('yolov6n.yaml') + + # Отображение информации о модели (по желанию) + model.info() + + # Тренировка модели на примере набора данных COCO8 в течение 100 эпох + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Вывод результатов с использованием модели YOLOv6n на изображении 'bus.jpg' + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + Доступны команды интерфейса командной строки для непосредственного запуска моделей: + + ```bash + # Построение модели YOLOv6n с нуля и тренировка на примере набора данных COCO8 в течение 100 эпох + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # Построение модели YOLOv6n с нуля и вывод результатов на изображении 'bus.jpg' + yolo predict model=yolov6n.yaml source=path/to/bus.jpg + ``` + +## Поддерживаемые задачи и режимы + +Серия моделей YOLOv6 предлагает широкий выбор моделей с оптимизацией для [обнаружения объектов](../tasks/detect.md) высокой производительности. Они удовлетворяют различным вычислительным потребностям и требованиям точности, что делает их универсальными для широкого спектра приложений. + +| Тип модели | Предварительно обученные веса | Поддерживаемые задачи | Вывод результатов | Валидация | Тренировка | Экспорт | +|------------|-------------------------------|--------------------------------------------|-------------------|-----------|------------|---------| +| YOLOv6-N | `yolov6-n.pt` | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [Обнаружение объектов](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +Эта таблица предоставляет подробный обзор вариантов моделей YOLOv6, подчеркивая их возможности в задачах обнаружения объектов и совместимость с различными операционными режимами, такими как [Вывод результатов](../modes/predict.md), [Валидация](../modes/val.md), [Тренировка](../modes/train.md) и [Экспорт](../modes/export.md). Это обширная поддержка позволяет пользователям полностью использовать возможности моделей YOLOv6 в широком спектре сценариев обнаружения объектов. + +## Цитирования и благодарности + +Мы хотели бы выразить благодарность авторам исследования за их значительный вклад в области обнаружения объектов в реальном времени: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{li2023yolov6, + title={YOLOv6 v3.0: A Full-Scale Reloading}, + author={Chuyi Li and Lulu Li and Yifei Geng and Hongliang Jiang and Meng Cheng and Bo Zhang and Zaidan Ke and Xiaoming Xu and Xiangxiang Chu}, + year={2023}, + eprint={2301.05586}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + + Исходную статью об YOLOv6 можно найти на [arXiv](https://arxiv.org/abs/2301.05586). Авторы сделали свою работу общедоступной, и код доступен на [GitHub](https://github.com/meituan/YOLOv6). Мы ценим их усилия в развитии этой области и доступности их работы для широкого сообщества. diff --git a/docs/ru/models/yolov7.md b/docs/ru/models/yolov7.md new file mode 100644 index 0000000..2d03172 --- /dev/null +++ b/docs/ru/models/yolov7.md @@ -0,0 +1,65 @@ +--- +comments: true +description: Исследуйте YOLOv7, реальный детектор объектов в реальном времени. Познакомьтесь с его высокой скоростью, впечатляющей точностью и уникальным фокусом на тренируемой оптимизации с использованием "мешка бесплатных улучшений". +keywords: YOLOv7, детектор объектов в реальном времени, новейший, Ultralytics, набор данных MS COCO, репараметризация модели, динамическое присвоение меток, расширенное масштабирование, компаунд-масштабирование +--- + +# YOLOv7: Тренируемый "мешок бесплатных улучшений" + +YOLOv7 - это новейший детектор объектов в реальном времени, который превосходит все известные детекторы объектов по скорости и точности в диапазоне от 5 к/с до 160 к/с. У него самая высокая точность (56,8% AP) среди всех известных детекторов объектов в режиме реального времени со скоростью 30 к/с и выше на GPU V100. Кроме того, YOLOv7 превосходит другие детекторы объектов, такие как YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5 и многие другие, в скорости и точности. Модель обучается с нуля на наборе данных MS COCO без использования других наборов данных или предварительно обученных весов. Исходный код для YOLOv7 доступен на GitHub. + +![Сравнение YOLOv7 с новейшими детекторами объектов](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) +**Сравнение новейших детекторов объектов.** Из результатов в Таблице 2 мы знаем, что предложенный метод обеспечивает лучшее соотношение скорость-точность в целом. Если сравнивать YOLOv7-tiny-SiLU с YOLOv5-N (r6.1), наш метод быстрее на 127 к/с и точнее на 10,7% по AP. Кроме того, YOLOv7 имеет точность AP 51,4% при скорости кадров 161 к/с, в то время как PPYOLOE-L с такой же точностью имеет только скорость кадров 78 к/с. Что касается использования параметров, YOLOv7 сокращает их на 41% по сравнению с PPYOLOE-L. Если сравнить YOLOv7-X с 114 к/с скоростью вывода с YOLOv5-L (r6.1) с 99 к/с скоростью вывода, YOLOv7-X может повысить AP на 3,9%. Если сравнить YOLOv7-X с YOLOv5-X (r6.1) с похожими масштабами, скорость вывода YOLOv7-X на 31 к/с выше. Кроме того, по количеству параметров и вычислений YOLOv7-X сокращает параметры на 22% и вычисления на 8% по сравнению с YOLOv5-X (r6.1), но повышает AP на 2,2% ([Источник](https://arxiv.org/pdf/2207.02696.pdf)). + +## Обзор + +Детектирование объектов в реальном времени - это важный компонент многих систем компьютерного зрения, включая многотаргетное отслеживание, автономное вождение, робототехнику и анализ медицинских изображений. В последние годы разработка детекторов объектов в реальном времени сосредоточена на проектировании эффективных архитектур и повышении скорости вывода на различных ЦП, ГПУ и нейропроцессорах (NPUs). YOLOv7 поддерживает как мобильные графические процессоры (GPU), так и устройства GPU, от периферии до облачных вычислений. + +В отличие от традиционных детекторов объектов в реальном времени, которые сосредоточены на оптимизации архитектуры, YOLOv7 вводит концепцию оптимизации процесса обучения. Это включает модули и методы оптимизации, разработанные для повышения точности детектирования объектов без увеличения стоимости вывода, известного как "тренируемый мешок бесплатных улучшений". + +## Основные функции + +YOLOv7 предлагает несколько ключевых функций: + +1. **Репараметризация модели**: YOLOv7 предлагает запланированную переапарамеризацию модели, которая является стратегией, применимой к слоям в разных сетях с концепцией пути градиентного распространения. + +2. **Динамическое присвоение меток**: Обучение модели с несколькими выходными слоями представляет новую проблему: "Как назначить динамические цели для выходов разных ветвей?" Для решения этой проблемы YOLOv7 предлагает новый метод присвоения меток, называемый грубо-тонким присвоением меток с управлением ведущей ветви. + +3. **Расширенное и компаунд-масштабирование**: YOLOv7 предлагает методы "расширения" и "компаунд-масштабирования" для детектора объектов в реальном времени, которые эффективно используют параметры и вычисления. + +4. **Эффективность**: Метод, предложенный YOLOv7, может эффективно сокращать примерно на 40% количество параметров и на 50% вычислений известного детектора объектов в реальном времени, обеспечивая более быструю скорость вывода и более высокую точность детектирования. + +## Примеры использования + +На момент написания данного документа Ultralytics в настоящее время не поддерживает модели YOLOv7. Поэтому все пользователи, заинтересованные в использовании YOLOv7, должны обратиться непосредственно к репозиторию YOLOv7 на GitHub для инструкций по установке и использованию. + +Вот краткий обзор типичных шагов, которые могут потребоваться для использования YOLOv7: + +1. Посетите репозиторий YOLOv7 на GitHub: [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7). + +2. Следуйте инструкциям, предоставленным в файле README по установке. Обычно это включает клонирование репозитория, установку необходимых зависимостей и настройку необходимых переменных среды. + +3. После завершения установки вы можете обучать и использовать модель в соответствии с инструкциями по использованию, предоставленными в репозитории. Обычно это включает подготовку набора данных, настройку параметров модели, обучение модели, а затем использование обученной модели для выполнения детектирования объектов. + +Обратите внимание, что конкретные шаги могут варьироваться в зависимости от вашего конкретного случая использования и текущего состояния репозитория YOLOv7. Поэтому настоятельно рекомендуется обратиться непосредственно к инструкциям, предоставленным в репозитории YOLOv7 на GitHub. + +Мы сожалеем обо всех неудобствах, которые это может вызвать, и будем стараться обновлять этот документ с примерами использования для Ultralytics, как только будет реализована поддержка YOLOv7. + +## Цитирования и благодарности + +Мы хотели бы выразить признательность авторам YOLOv7 за их значительный вклад в области детектирования объектов в реальном времени: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +Исходную статью YOLOv7 можно найти на [arXiv](https://arxiv.org/pdf/2207.02696.pdf). Авторы опубликовали свою работу публично, и код доступен на [GitHub](https://github.com/WongKinYiu/yolov7). Мы ценим их усилия в совершенствовании этой области и доступности своей работы для широкой общественности. diff --git a/docs/ru/models/yolov8.md b/docs/ru/models/yolov8.md new file mode 100644 index 0000000..da8ce41 --- /dev/null +++ b/docs/ru/models/yolov8.md @@ -0,0 +1,162 @@ +--- +comments: true +description: Изучите захватывающие возможности YOLOv8, последней версии нашего детектора объектов в реальном времени! Узнайте, как передовая архитектура, предварительно обученные модели и оптимальное сочетание точности и скорости делают YOLOv8 идеальным выбором для ваших задач по обнаружению объектов. +keywords: YOLOv8, Ultralytics, детектор объектов в реальном времени, предварительно обученные модели, документация, обнаружение объектов, серия YOLO, передовая архитектура, точность, скорость +--- + +# YOLOv8 + +## Обзор + +YOLOv8 - это последняя версия в серии детекторов объектов в реальном времени YOLO, обеспечивающая передовую производительность в терминах точности и скорости. Основываясь на достижениях предыдущих версий YOLO, YOLOv8 вводит новые возможности и оптимизации, делая его идеальным выбором для различных задач по обнаружению объектов в широком спектре приложений. + +![Ultralytics YOLOv8](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## Основные возможности + +- **Передовые архитектуры основы и шеи:** YOLOv8 использует передовые архитектуры основы и шеи, что приводит к улучшенному извлечению признаков и производительности обнаружения объектов. +- **Ключевая голова Ultralytics без якорей:** YOLOv8 применяет ключевую голову Ultralytics без якорей, что способствует более точному обнаружению и более эффективному процессу обнаружения по сравнению с якорными подходами. +- **Оптимальное сочетание точности и скорости:** С основным акцентом на поддержании оптимального баланса между точностью и скоростью, YOLOv8 подходит для задач обнаружения объектов в режиме реального времени в различных областях применения. +- **Разнообразие предварительно обученных моделей:** YOLOv8 предлагает ряд предварительно обученных моделей для различных задач и требований к производительности, что упрощает выбор подходящей модели для конкретного случая использования. + +## Поддерживаемые задачи и режимы работы + +Серия YOLOv8 предлагает разнообразные модели, каждая из которых специализирована для конкретных задач в компьютерном зрении. Эти модели разработаны для удовлетворения различных требований, от обнаружения объектов до более сложных задач, таких как сегментация экземпляров, определение позы/ключевых точек и классификация. + +Каждая вариация серии YOLOv8 оптимизирована для своей соответствующей задачи, обеспечивая высокую производительность и точность. Кроме того, эти модели совместимы со множеством режимов работы, включая [Вывод](../modes/predict.md), [Проверку](../modes/val.md), [Обучение](../modes/train.md) и [Экспорт](../modes/export.md), что облегчает их использование на различных этапах развертывания и разработки. + +| Модель | Названия файлов | Задача | Вывод | Проверка | Обучение | Экспорт | +|-------------|----------------------------------------------------------------------------------------------------------------|------------------------------------------------|-------|----------|----------|---------| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [Обнаружение](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [Сегментация экземпляров](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [Поза/ключевые точки](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [Классификация](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +Данная таблица предоставляет обзор вариантов моделей YOLOv8, подчеркивая их применимость к конкретным задачам и их совместимость с различными режимами работы, такими как Вывод, Проверка, Обучение и Экспорт. Это демонстрирует гибкость и надежность серии YOLOv8, что делает их подходящими для широкого спектра приложений в компьютерном зрении. + +## Показатели производительности + +!!! Производительность + + === "Обнаружение (COCO)" + + См. [Документацию по обнаружению](https://docs.ultralytics.com/tasks/detect/) для примеров использования этих моделей, обученных на [COCO](https://docs.ultralytics.com/datasets/detect/coco/), включающих 80 предварительно обученных классов. + + | Модель | размер
(пиксели) | mAPval
50-95 | Скорость
CPU ONNX
(мс) | Скорость
A100 TensorRT
(мс) | параметры
(М) | FLOPs
(Б) | + | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + + === "Обнаружение (Open Images V7)" + + См. [Документацию по обнаружению](https://docs.ultralytics.com/tasks/detect/) для примеров использования этих моделей, обученных на [Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/), включающих 600 предварительно обученных классов. + + | Модель | размер
(пиксели) | mAPval
50-95 | Скорость
CPU ONNX
(мс) | Скорость
A100 TensorRT
(мс) | параметры
(М) | FLOPs
(Б) | + | ----------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | + + === "Сегментация (COCO)" + + См. [Документацию по сегментации](https://docs.ultralytics.com/tasks/segment/) для примеров использования этих моделей, обученных на [COCO](https://docs.ultralytics.com/datasets/segment/coco/), включающих 80 предварительно обученных классов. + + | Модель | размер
(пиксели) | mAPbox
50-95 | mAPmask
50-95 | Скорость
CPU ONNX
(мс) | Скорость
A100 TensorRT
(мс) | параметры
(M) | FLOPs
(Б) | + | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + + === "Классификация (ImageNet)" + + См. [Документацию по классификации](https://docs.ultralytics.com/tasks/classify/) для примеров использования этих моделей, обученных на [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/), включающих 1000 предварительно обученных классов. + + | Модель | размер
(пиксели) | acc
top1 | acc
top5 | Скорость
CPU ONNX
(мс) | Скорость
A100 TensorRT
(мс) | параметры
(M) | FLOPs
(Б) при 640 | + | -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + + === "Поза (COCO)" + + См. [Документацию по оценке позы](https://docs.ultralytics.com/tasks/segment/) для примеров использования этих моделей, обученных на [COCO](https://docs.ultralytics.com/datasets/pose/coco/), включающих 1 предварительно обученный класс - 'person'. + + | Модель | размер
(пиксели) | mAPpose
50-95 | mAPpose
50 | Скорость
CPU ONNX
(мс) | Скорость
A100 TensorRT
(мс) | параметры
(M) | FLOPs
(Б) | + | ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +## Примеры использования + +В этом примере представлены простые примеры обучения и вывода с использованием YOLOv8. Для полной документации об этих и других [режимах](../modes/index.md) см. страницы документации по [Предсказанию](../modes/predict.md), [Обучению](../modes/train.md), [Проверке](../modes/val.md) и [Экспорту](../modes/export.md). + +Обратите внимание, что приведенный ниже пример относится к моделям YOLOv8 для [Детекции](../tasks/detect.md) объектов. Для дополнительных поддерживаемых задач см. документацию по [Сегментации](../tasks/segment.md), [Классификации](../tasks/classify.md) и [Позе](../tasks/pose.md). + +!!! Example "Пример" + + === "Python" + + Предварительно обученные модели PyTorch `*.pt`, а также файлы конфигурации `*.yaml` могут быть переданы классу `YOLO()` для создания экземпляра модели на Python: + + ```python + from ultralytics import YOLO + + # Загрузите предварительно обученную модель YOLOv8n для COCO + model = YOLO('yolov8n.pt') + + # Отобразить информацию о модели (по желанию) + model.info() + + # Обучите модель на примере набора данных COCO8 в течение 100 эпох + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # Выполните вывод с использованием модели YOLOv8n на изображении 'bus.jpg' + results = model('путь/к/изображению/bus.jpg') + ``` + + === "CLI" + + Доступны команды CLI для прямого запуска моделей: + + ```bash + # Загрузите предварительно обученную модель YOLOv8n для COCO и обучите ее на примере набора данных COCO8 в течение 100 эпох + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # Загрузить предварительно обученную модель YOLOv8n для COCO и выполнить вывод на изображении 'bus.jpg' + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## Цитирование и благодарности + +Если вы используете модель YOLOv8 или любое другое программное обеспечение из этого репозитория в своей работе, пожалуйста, процитируйте его в следующем формате: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + +Обратите внимание, что идентификатор цифрового объекта (DOI) находится на стадии получения и будет добавлен в цитирование, как только он станет доступным. Модели YOLOv8 предоставляются под лицензией [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) и лицензией [Enterprise](https://ultralytics.com/license). diff --git a/docs/ru/modes/benchmark.md b/docs/ru/modes/benchmark.md new file mode 100644 index 0000000..564edb1 --- /dev/null +++ b/docs/ru/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: true +description: Узнайте, как профилировать скорость и точность YOLOv8 в различных форматах экспорта; получите информацию о метриках mAP50-95, accuracy_top5 и др. +keywords: Ultralytics, YOLOv8, бенчмаркинг, профилирование скорости, профилирование точности, mAP50-95, accuracy_top5, ONNX, OpenVINO, TensorRT, форматы экспорта YOLO +--- + +# Бенчмаркинг моделей с Ultralytics YOLO + +Экосистема и интеграции Ultralytics YOLO + +## Введение + +После того, как ваша модель обучена и валидирована, следующим логическим шагом является оценка ее производительности в различных реальных сценариях. Режим бенчмаркинга в Ultralytics YOLOv8 служит этой цели, предоставляя надежный инструментарий для оценки скорости и точности вашей модели в ряде форматов экспорта. + +## Почему бенчмаркинг критичен? + +- **Обоснованные решения:** Получение представления о компромиссе между скоростью и точностью. +- **Распределение ресурсов:** Понимание производительности различных форматов экспорта на разном оборудовании. +- **Оптимизация:** Выяснение, какой формат экспорта предлагает лучшую производительность для вашего конкретного случая. +- **Эффективность затрат:** Сделайте использование аппаратных ресурсов более эффективным на основе результатов бенчмаркинга. + +### Ключевые метрики в режиме бенчмаркинга + +- **mAP50-95:** Для детектирования объектов, сегментации и оценки поз. +- **accuracy_top5:** Для классификации изображений. +- **Время инференса:** Время, затрачиваемое на каждое изображение в миллисекундах. + +### Поддерживаемые форматы экспорта + +- **ONNX:** Для оптимальной производительности ЦП +- **TensorRT:** Для максимальной эффективности GPU +- **OpenVINO:** Для оптимизации под аппаратное обеспечение Intel +- **CoreML, TensorFlow SavedModel и другие:** Для разнообразных потребностей развертывания. + +!!! Tip "Совет" + + * Экспортируйте в ONNX или OpenVINO для ускорения процессора до 3 раз. + * Экспортируйте в TensorRT для ускорения GPU до 5 раз. + +## Примеры использования + +Запустите бенчмарк YOLOv8n на всех поддерживаемых форматах экспорта, включая ONNX, TensorRT и т. д. Смотрите раздел Аргументы ниже для полного списка параметров экспорта. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # Бенчмарк на GPU + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## Аргументы + +Аргументы, такие как `model`, `data`, `imgsz`, `half`, `device` и `verbose`, предоставляют пользователям гибкость для тонкой настройки бенчмарков под их конкретные потребности и сравнения производительности различных форматов экспорта с легкостью. + +| Ключ | Значение | Описание | +|-----------|----------|----------------------------------------------------------------------------------| +| `model` | `None` | путь к файлу модели, например yolov8n.pt, yolov8n.yaml | +| `data` | `None` | путь к YAML, ссылающемуся на набор данных для бенчмаркинга (под меткой `val`) | +| `imgsz` | `640` | размер изображения как скаляр или список (h, w), например (640, 480) | +| `half` | `False` | квантование FP16 | +| `int8` | `False` | квантование INT8 | +| `device` | `None` | устройство для запуска, например cuda device=0 или device=0,1,2,3 или device=cpu | +| `verbose` | `False` | не продолжать при ошибке (bool), или пороговое значение для `val` (float) | + +## Форматы экспорта + +Бенчмарки попытаются автоматически запустить для всех возможных форматов экспорта ниже. + +| Формат | Аргумент `format` | Модель | Метаданные | Аргументы | +|--------------------------------------------------------------------|-------------------|---------------------------|------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +Смотрите полную информацию о `export` на странице [Экспорт](https://docs.ultralytics.com/modes/export/). diff --git a/docs/ru/modes/export.md b/docs/ru/modes/export.md new file mode 100644 index 0000000..6351f21 --- /dev/null +++ b/docs/ru/modes/export.md @@ -0,0 +1,108 @@ +--- +comments: true +description: Пошаговое руководство по экспорту ваших моделей YOLOv8 в различные форматы, такие как ONNX, TensorRT, CoreML и другие, для развертывания. Изучите сейчас!. +keywords: YOLO, YOLOv8, Ultralytics, Экспорт модели, ONNX, TensorRT, CoreML, TensorFlow SavedModel, OpenVINO, PyTorch, экспорт модели +--- + +# Экспорт модели с Ultralytics YOLO + +Экосистема и интеграции Ultralytics YOLO + +## Введение + +Основная цель тренировки модели — её развертывание для реальных приложений. Режим экспорта в Ultralytics YOLOv8 предлагает множество вариантов для экспорта обученной модели в различные форматы, обеспечивая возможность развертывания на разных платформах и устройствах. Это исчерпывающее руководство направлено на то, чтобы провести вас через тонкости экспорта моделей, демонстрируя, как достичь максимальной совместимости и производительности. + +

+
+ +
+ Смотрите: Как экспортировать обученную пользовательскую модель Ultralytics YOLOv8 и запустить живое воспроизведение на веб-камере. +

+ +## Почему стоит выбрать режим экспорта YOLOv8? + +- **Универсальность:** Экспорт в несколько форматов, включая ONNX, TensorRT, CoreML и другие. +- **Производительность:** Увеличение скорости на GPU до 5 раз с TensorRT и ускорение на CPU до 3 раз с ONNX или OpenVINO. +- **Совместимость:** Сделайте вашу модель универсально развертываемой в различных аппаратных и программных средах. +- **Простота использования:** Простой интерфейс командной строки и Python API для быстрого и простого экспорта моделей. + +### Ключевые особенности режима экспорта + +Вот некоторые из ключевых функций: + +- **Экспорт одним кликом:** Простые команды для экспорта в разные форматы. +- **Пакетный экспорт:** Экспорт моделей, способных к пакетной обработке. +- **Оптимизированное предсказание:** Экспортированные модели оптимизированы для более быстрого предсказания. +- **Учебные видео:** Глубокие руководства и обучающие видео для гладкого опыта экспорта. + +!!! Tip "Совет" + + * Экспортируйте в ONNX или OpenVINO для ускорения CPU до 3 раз. + * Экспортируйте в TensorRT для увеличения скорости на GPU до 5 раз. + +## Примеры использования + +Экспорт модели YOLOv8n в другой формат, например ONNX или TensorRT. Смотрите раздел Аргументы ниже для полного списка аргументов экспорта. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите модель + model = YOLO('yolov8n.pt') # загрузка официальной модели + model = YOLO('path/to/best.pt') # загрузка обученной пользовательской модели + + # Экспорт модели + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # экспорт официальной модели + yolo export model=path/to/best.pt format=onnx # экспорт обученной пользовательской модели + ``` + +## Аргументы + +Настройки экспорта моделей YOLO относятся к различным конфигурациям и опциям, используемым для сохранения или экспорта модели для использования в других средах или платформах. Эти настройки могут влиять на производительность модели, размер и совместимость с разными системами. Некоторые общие настройки экспорта YOLO включают формат экспортируемого файла модели (например, ONNX, TensorFlow SavedModel), устройство, на котором будет запущена модель (например, CPU, GPU), а также наличие дополнительных функций, таких как маски или несколько меток на коробку. Другие факторы, которые могут повлиять на процесс экспорта, включают конкретное задание, для которого используется модель, и требования или ограничения целевой среды или платформы. Важно тщательно рассмотреть и настроить эти параметры, чтобы убедиться, что экспортированная модель оптимизирована для предполагаемого использования и может быть эффективно использована в целевой среде. + +| Ключ | Значение | Описание | +|-------------|-----------------|---------------------------------------------------------------------------| +| `format` | `'torchscript'` | формат для экспорта | +| `imgsz` | `640` | размер изображения в виде скаляра или списка (h, w), например, (640, 480) | +| `keras` | `False` | использовать Keras для экспорта TF SavedModel | +| `optimize` | `False` | TorchScript: оптимизация для мобильных устройств | +| `half` | `False` | квантование FP16 | +| `int8` | `False` | квантование INT8 | +| `dynamic` | `False` | ONNX/TensorRT: динамические оси | +| `simplify` | `False` | ONNX/TensorRT: упрощение модели | +| `opset` | `None` | ONNX: версия набора операций (необязательный, по умолчанию последний) | +| `workspace` | `4` | TensorRT: размер рабочей области (ГБ) | +| `nms` | `False` | CoreML: добавление NMS | + +## Форматы экспорта + +Доступные форматы экспорта YOLOv8 указаны в таблице ниже. Вы можете экспортировать в любой формат, используя аргумент `format`, например, `format='onnx'` или `format='engine'`. + +| Формат | Аргумент `format` | Модель | Метаданные | Аргументы | +|--------------------------------------------------------------------|-------------------|---------------------------|------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/ru/modes/index.md b/docs/ru/modes/index.md new file mode 100644 index 0000000..fe4987e --- /dev/null +++ b/docs/ru/modes/index.md @@ -0,0 +1,73 @@ +--- +comments: true +description: От обучения до отслеживания - используйте все возможности YOLOv8 от Ultralytics. Получите информацию и примеры для каждого поддерживаемого режима, включая проверку, экспорт и бенчмаркинг. +keywords: Ultralytics, YOLOv8, Машинное обучение, Обнаружение объектов, Обучение, Проверка, Предсказание, Экспорт, Отслеживание, Бенчмаркинг +--- + +# Режимы Ultralytics YOLOv8 + +Экосистема Ultralytics YOLO и интеграции + +## Введение + +Ultralytics YOLOv8 - это не просто еще одна модель обнаружения объектов; это многофункциональная платформа, предназначенная для охвата всего жизненного цикла моделей машинного обучения - от ввода данных и обучения модели до валидации, развертывания и отслеживания в реальном мире. Каждый режим служит определенной цели и разработан, чтобы предложить вам гибкость и эффективность, необходимую для различных задач и сценариев использования. + +

+
+ +
+ Смотрите: Руководство по режимам Ultralytics: Обучение, Проверка, Предсказание, Экспорт и Бенчмаркинг. +

+ +### Обзор режимов + +Понимание различных **режимов**, которые поддерживает Ultralytics YOLOv8, критически важно для эффективного использования ваших моделей: + +- **Режим обучения (Train mode)**: Настройте вашу модель на пользовательские или предзагруженные наборы данных. +- **Режим проверки (Val mode)**: Контрольная точка после обучения для валидации производительности модели. +- **Режим предсказания (Predict mode)**: Раскройте предсказательную мощь вашей модели на данных из реального мира. +- **Режим экспорта (Export mode)**: Подготовьте вашу модель к развертыванию в различных форматах. +- **Режим отслеживания (Track mode)**: Расширьте вашу модель обнаружения объектов до приложений отслеживания в реальном времени. +- **Режим бенчмаркинга (Benchmark mode)**: Проанализируйте скорость и точность вашей модели в разнообразных средах развертывания. + +Это исчерпывающее руководство направлено на то, чтобы дать вам обзор и практические сведения о каждом режиме, помогая вам использовать полный потенциал YOLOv8. + +## [Обучение (Train)](train.md) + +Режим обучения используется для обучения модели YOLOv8 на пользовательском наборе данных. В этом режиме модель обучается с использованием указанного набора данных и гиперпараметров. Процесс обучения включает в себя оптимизацию параметров модели, чтобы она могла точно предсказывать классы и местоположения объектов на изображении. + +[Примеры обучения](train.md){ .md-button } + +## [Проверка (Val)](val.md) + +Режим проверки используется для валидации модели YOLOv8 после ее обучения. В этом режиме модель оценивается на наборе данных для валидации, чтобы измерить ее точность и способность к обобщению. Этот режим может быть использован для настройки гиперпараметров модели с целью улучшения ее производительности. + +[Примеры проверки](val.md){ .md-button } + +## [Предсказание (Predict)](predict.md) + +Режим предсказания используется для выполнения предсказаний с использованием обученной модели YOLOv8 на новых изображениях или видео. В этом режиме модель загружается из файла контрольной точки, и пользователь может предоставить изображения или видео для выполнения вывода. Модель предсказывает классы и местоположения объектов во входных изображениях или видео. + +[Примеры предсказания](predict.md){ .md-button } + +## [Экспорт (Export)](export.md) + +Режим экспорта используется для экспортирования модели YOLOv8 в формат, который может быть использован для развертывания. В этом режиме модель преобразуется в формат, который может быть использован другими программными приложениями или аппаратными устройствами. Этот режим полезен при развертывании модели в производственной среде. + +[Примеры экспорта](export.md){ .md-button } + +## [Отслеживание (Track)](track.md) + +Режим отслеживания используется для отслеживания объектов в реальном времени с использованием модели YOLOv8. В этом режиме модель загружается из файла контрольной точки, и пользователь может предоставить прямую видеотрансляцию для выполнения отслеживания объектов в реальном времени. Этот режим полезен для приложений, таких как системы видеонаблюдения или беспилотные автомобили. + +[Примеры отслеживания](track.md){ .md-button } + +## [Бенчмаркинг (Benchmark)](benchmark.md) + +Режим бенчмаркинга используется для профилирования скорости и точности различных форматов экспорта для YOLOv8. Бенчмарки предоставляют информацию о размере экспортируемого формата, его метриках `mAP50-95` (для обнаружения объектов, сегментации и позы) или метриках `accuracy_top5` (для классификации), а также время вывода в миллисекундах на изображение для различных форматов экспорта, таких как ONNX, OpenVINO, TensorRT и других. Эта информация может помочь пользователям выбрать оптимальный формат экспорта для их конкретного сценария использования на основе их требований к скорости и точности. + +[Примеры бенчмаркинга](benchmark.md){ .md-button } diff --git a/docs/ru/modes/predict.md b/docs/ru/modes/predict.md new file mode 100644 index 0000000..3e849a4 --- /dev/null +++ b/docs/ru/modes/predict.md @@ -0,0 +1,226 @@ +--- +comments: true +description: Узнайте, как использовать режим предсказаний YOLOv8 для различных задач. Изучите различные источники вывода, такие как изображения, видео и форматы данных. +keywords: Ultralytics, YOLOv8, режим предсказаний, источники вывода, задачи предсказания, режим потоковой передачи, обработка изображений, обработка видео, машинное обучение, искусственный интеллект +--- + +# Прогнозирование моделью Ultralytics YOLO + +Экосистема и интеграции Ultralytics YOLO + +## Введение + +В мире машинного обучения и компьютерного зрения процесс извлечения информации из визуальных данных называется 'выводом' или 'предсказанием'. Ultralytics YOLOv8 предлагает мощную функцию, известную как **режим предсказаний**, который предназначен для высокопроизводительного реального времени вывода на широкий спектр источников данных. + +

+
+ +
+ Смотреть: Как извлечь результаты из модели Ultralytics YOLOv8 для пользовательских проектов. +

+ +## Прикладные области + +| Производство | Спорт | Безопасность | +|:-------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------:| +| ![Обнаружение автозапчастей](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1) | ![Обнаружение футболистов](https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442) | ![Обнаружение падения людей](https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43) | +| Обнаружение автозапчастей | Обнаружение футболистов | Обнаружение падения людей | + +## Почему стоит использовать Ultralytics YOLO для вывода? + +Вот почему вам следует рассмотреть режим предсказаний YOLOv8 для ваших различных потребностей в выводе: + +- **Универсальность:** Возможность делать выводы по изображениям, видео и даже потоковым трансляциям. +- **Производительность:** Разработан для обработки данных в реальном времени без потери точности. +- **Простота использования:** Интуитивно понятные интерфейсы Python и CLI для быстрого развертывания и тестирования. +- **Высокая настраиваемость:** Различные настройки и параметры для настройки поведения модели вывода в соответствии с вашими конкретными требованиями. + +### Ключевые особенности режима предсказаний + +Режим предсказаний YOLOv8 разработан, чтобы быть надежным и универсальным, он включает в себя: + +- **Совместимость с несколькими источниками данных:** независимо от того, представлены ли ваши данные в виде отдельных изображений, коллекции изображений, видеофайлов или потокового видео, режим предсказаний покрывает все это. +- **Режим потоковой передачи:** Используйте функцию потоковой передачи для создания генератора объектов `Results`, экономящего память. Активируйте это, установив `stream=True` в методе вызова предиктора. +- **Пакетная обработка:** Возможность обрабатывать несколько изображений или видеокадров за один пакет, что дополнительно ускоряет время вывода. +- **Дружественная интеграция:** Легко интегрируется с существующими данными и другими программными компонентами благодаря гибкому API. + +Модели Ultralytics YOLO возвращают либо список объектов `Results`, либо генератор объектов `Results` в Python, экономящий память, когда `stream=True` передается в модель во время вывода: + +!!! Example "Предсказание" + + === "Вернуть список с `stream=False`" + ```python + from ultralytics import YOLO + + # Загрузка модели + model = YOLO('yolov8n.pt') # предварительно обученная модель YOLOv8n + + # Пакетный вывод на список изображений + results = model(['im1.jpg', 'im2.jpg']) # вернуть список объектов Results + + # Обработка списка результатов + for result in results: + boxes = result.boxes # Объект Boxes для вывода bbox + masks = result.masks # Объект Masks для вывода масок сегментации + keypoints = result.keypoints # Объект Keypoints для вывода поз + probs = result.probs # Объект Probs для вывода вероятностей классификации + ``` + + === "Вернуть генератор с `stream=True`" + ```python + from ultralytics import YOLO + + # Загрузка модели + model = YOLO('yolov8n.pt') # предварительно обученная модель YOLOv8n + + # Пакетный вывод на список изображений + results = model(['im1.jpg', 'im2.jpg'], stream=True) # вернуть генератор объектов Results + + # Обработка генератора результатов + for result in results: + boxes = result.boxes # Объект Boxes для вывода bbox + masks = result.masks # Объект Masks для вывода масок сегментации + keypoints = result.keypoints # Объект Keypoints для вывода поз + probs = result.probs # Объект Probs для вывода вероятностей классификации + ``` + +## Источники вывода + +YOLOv8 может обрабатывать различные типы входных источников для вывода, как показано в таблице ниже. Источники включают статические изображения, видеопотоки и различные форматы данных. В таблице также указано, можно ли использовать каждый источник в режиме потоковой передачи с аргументом `stream=True` ✅. Режим потоковой передачи полезен для обработки видео или живых трансляций, так как создает генератор результатов вместо загрузки всех кадров в память. + +!!! Tip "Совет" + + Используйте `stream=True` для обработки длинных видеороликов или больших наборов данных для эффективного управления памятью. Когда `stream=False`, результаты для всех кадров или точек данных хранятся в памяти, что может быстро накопиться и вызвать ошибки переполнения памяти для больших входов. В отличие от этого, `stream=True` использует генератор, который хранит в памяти результаты только текущего кадра или точки данных, значительно сокращая потребление памяти и предотвращая проблемы с переполнением памяти. + +| Источник | Аргумент | Тип | Заметки | +|-----------------|--------------------------------------------|------------------|---------------------------------------------------------------------------------------------------------------| +| изображение | `'image.jpg'` | `str` или `Path` | Одиночный файл изображения. | +| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | URL на изображение. | +| скриншот | `'screen'` | `str` | Снять скриншот. | +| PIL | `Image.open('im.jpg')` | `PIL.Image` | Формат HWC с RGB каналами. | +| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | Формат HWC с BGR каналами `uint8 (0-255)`. | +| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | Формат HWC с BGR каналами `uint8 (0-255)`. | +| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | Формат BCHW с RGB каналами `float32 (0.0-1.0)`. | +| CSV | `'sources.csv'` | `str` или `Path` | CSV-файл, содержащий пути к изображениям, видео или каталогам. | +| видео ✅ | `'video.mp4'` | `str` или `Path` | Видеофайл в форматах, вроде MP4, AVI и т.д. | +| каталог ✅ | `'path/'` | `str` или `Path` | Путь к каталогу, содержащему изображения или видео. | +| глоб ✅ | `'path/*.jpg'` | `str` | Шаблон глоба для сопоставления нескольких файлов. Используйте символ `*` как подстановочный. | +| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | URL на видео YouTube. | +| поток ✅ | `'rtsp://example.com/media.mp4'` | `str` | URL для потоковых протоколов, таких как RTSP, RTMP, TCP, или IP-адрес. | +| много-потоков ✅ | `'list.streams'` | `str` или `Path` | Текстовый файл `*.streams` с одним URL потока на строку, например, 8 потоков запустятся с пакетом размером 8. | + +Ниже приведены примеры кода для использования каждого типа источника: + +!!! Example "Источники предсказаний" + + === "изображение" + Выполнить вывод на файл изображения. + ```python + from ultralytics import YOLO + + # Загрузка предварительно обученной модели YOLOv8n + model = YOLO('yolov8n.pt') + + # Определите путь к файлу изображения + source = 'path/to/image.jpg' + + # Выполнить вывод на источник + results = model(source) # список объектов Results + ``` + + === "скриншот" + Выполнить вывод на текущее содержимое экрана в виде скриншота. + ```python + from ultralytics import YOLO + + # Загрузка предварительно обученной модели YOLOv8n + model = YOLO('yolov8n.pt') + + # Определение текущего скриншота как источника + source = 'screen' + + # Выполнить вывод на источник + results = model(source) # список объектов Results + ``` + + === "URL" + Выполнить вывод на изображение или видео, размещенные удаленно по URL. + ```python + from ultralytics import YOLO + + # Загрузка предварительно обученной модели YOLOv8n + model = YOLO('yolov8n.pt') + + # Определение URL удаленного изображения или видео + source = 'https://ultralytics.com/images/bus.jpg' + + # Выполнить вывод на источник + results = model(source) # список объектов Results + ``` + + === "PIL" + Выполнение вывода на изображение, открытое с помощью Python Imaging Library (PIL). + ```python + from PIL import Image + from ultralytics import YOLO + + # Загрузка предварительно обученной модели YOLOv8n + model = YOLO('yolov8n.pt') + + # Открытие изображения с помощью PIL + source = Image.open('path/to/image.jpg') + + # Выполнение вывода на источник + results = model(source) # список объектов Results + ``` + + === "OpenCV" + Выполнение вывода на изображение, прочитанное с помощью OpenCV. + ```python + import cv2 + from ultralytics import YOLO + + # Загрузка предварительно обученной модели YOLOv8n + model = YOLO('yolov8n.pt') + + # Чтение изображения с помощью OpenCV + source = cv2.imread('path/to/image.jpg') + + # Выполнение вывода на источник + results = model(source) # список объектов Results + ``` + + === "numpy" + Выполнение вывода на изображение, представленное в виде массива numpy. + ```python + import numpy as np + from ultralytics import YOLO + + # Загрузка предварительно обученной модели YOLOv8n + model = YOLO('yolov8n.pt') + + # Создание случайного массива numpy с формой HWC (640, 640, 3) со значениями в диапазоне [0, 255] и типом uint8 + source = np.random.randint(low=0, high=255, size=(640, 640, 3), dtype='uint8') + + # Выполнение вывода на источник + results = model(source) # список объектов Results + ``` + + === "torch" + Выполнение вывода на изображение, представленное в виде тензора PyTorch. + ```python + import torch + from ultralytics import YOLO + + # Загрузка предварительно обученной модели YOLOv8n + model = YOLO('yolov8n.pt') + + # Создание случайного тензора torch с формой BCHW (1, 3, 640, 640) со значениями в диапазоне [0, 1] и типом float32 + source = torch.rand(1, 3, 640, 640, dtype=torch.float32) + + # Выполнение вывода на источник + results = model(source) # список объектов Results diff --git a/docs/ru/modes/track.md b/docs/ru/modes/track.md new file mode 100644 index 0000000..97bd877 --- /dev/null +++ b/docs/ru/modes/track.md @@ -0,0 +1,200 @@ +--- +comments: true +description: Узнайте, как использовать Ultralytics YOLO для отслеживания объектов в видеопотоках. Руководства по использованию различных трекеров и настройке конфигурации трекера. +keywords: Ultralytics, YOLO, отслеживание объектов, видеопотоки, BoT-SORT, ByteTrack, руководство на Python, руководство CLI +--- + +# Множественное отслеживание объектов с помощью Ultralytics YOLO + +Примеры множественного отслеживания объектов + +Отслеживание объектов в сфере видеоаналитики является ключевой задачей, которая определяет не только местоположение и класс объектов в кадре, но также поддерживает уникальный ID для каждого обнаруженного объекта по мере развития видео. Приложения безграничны — от наблюдения и безопасности до аналитики реального времени в спорте. + +## Почему стоит выбрать Ultralytics YOLO для отслеживания объектов? + +Вывод с трекеров Ultralytics согласуется со стандартным обнаружением объектов, но имеет добавленные ID объектов. Это упрощает отслеживание объектов в видеопотоках и выполнение последующей аналитики. Вот почему вы должны рассмотреть использование Ultralytics YOLO для ваших потребностей в отслеживании объектов: + +- **Эффективность:** Обработка видеопотоков в режиме реального времени без потери точности. +- **Гибкость:** Поддержка множества алгоритмов отслеживания и конфигураций. +- **Простота использования:** Простой Python API и CLI-опции для быстрой интеграции и развертывания. +- **Настраиваемость:** Легкость использования с пользовательскими обученными моделями YOLO, позволяющая интеграцию в специфические для домена приложения. + +

+
+ +
+ Смотрите: Обнаружение объектов и отслеживание с Ultralytics YOLOv8. +

+ +## Прикладные применения + +| Транспорт | Ритейл | Аквакультура | +|:---------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------:| +| ![Отслеживание транспортных средств](https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab) | ![Отслеживание людей](https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527) | ![Отслеживание рыб](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142) | +| Отслеживание транспортных средств | Отслеживание людей | Отслеживание рыб | + +## Ключевые особенности + +Ultralytics YOLO расширяет свои возможности обнаружения объектов для обеспечения надежного и универсального отслеживания объектов: + +- **Отслеживание в реальном времени:** Безпрерывное отслеживание объектов в видео с высокой частотой кадров. +- **Поддержка множества трекеров:** Выбор из разнообразия установленных алгоритмов отслеживания. +- **Настраиваемые конфигурации трекеров:** Настройка алгоритма отслеживания для конкретных требований путем регулировки различных параметров. + +## Доступные трекеры + +Ultralytics YOLO поддерживает следующие алгоритмы отслеживания. Их можно включить, передав соответствующий YAML файл конфигурации, например `tracker=tracker_type.yaml`: + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - Используйте `botsort.yaml`, чтобы активировать этот трекер. +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - Используйте `bytetrack.yaml`, чтобы активировать этот трекер. + +Трекер по умолчанию - BoT-SORT. + +## Отслеживание + +Для запуска трекера на видеопотоках используйте обученные модели Detect, Segment или Pose, такие как YOLOv8n, YOLOv8n-seg и YOLOv8n-pose. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите официальную или пользовательскую модель + model = YOLO('yolov8n.pt') # Загрузить официальную модель Detect + model = YOLO('yolov8n-seg.pt') # Загрузить официальную модель Segment + model = YOLO('yolov8n-pose.pt') # Загрузить официальную модель Pose + model = YOLO('path/to/best.pt') # Загрузить пользовательскую обученную модель + + # Выполнить отслеживание с помощью модели + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # Отслеживание с трекером по умолчанию + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # Отслеживание с трекером ByteTrack + ``` + + === "CLI" + + ```bash + # Выполнить отслеживание с различными моделями используя командный интерфейс + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # Официальная модель Detect + yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Официальная модель Segment + yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Официальная модель Pose + yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # Пользовательская обученная модель + + # Отслеживание с использованием трекера ByteTrack + yolo track model=path/to/best.pt tracker="bytetrack.yaml" + ``` + +Как видно из вышеуказанного использования, отслеживание доступно для всех моделей Detect, Segment и Pose, работающих с видео или потоковыми источниками. + +## Конфигурация + +### Аргументы для отслеживания + +Конфигурация отслеживания имеет общие свойства с режимом Predict, такие как `conf`, `iou` и `show`. Для дальнейшей настройки обратитесь к странице модели [Predict](https://docs.ultralytics.com/modes/predict/). + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Настройте параметры отслеживания и запустите трекер + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```bash + # Настройте параметры отслеживания и запустите трекер, используя командный интерфейс + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### Выбор трекера + +Ultralytics также позволяет использовать измененный файл конфигурации трекера. Для этого просто сделайте копию файла конфигурации трекера (например, `custom_tracker.yaml`) из [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) и измените любые настройки (кроме `tracker_type`) в соответствии с вашими потребностями. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите модель и запустите трекер с пользовательским файлом конфигурации + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```bash + # Загрузите модель и запустите трекер с пользовательским файлом конфигурации, используя командный интерфейс + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +Для полного списка аргументов отслеживания обратитесь к странице [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers). + +## Примеры на Python + +### Цикл сохранения следов + +Вот пример скрипта Python, использующий OpenCV (`cv2`) и YOLOv8 для выполнения отслеживания объектов на кадрах видео. В этом сценарии предполагается, что вы уже установили необходимые пакеты (`opencv-python` и `ultralytics`). Аргумент `persist=True` указывает трекеру, что текущее изображение или кадр является следующим в последовательности и ожидает, что следы с предыдущего изображения будут присутствовать в текущем изображении. + +!!! Example "Цикл с потоковым отслеживанием for-loop" + + ```python + import cv2 + from ultralytics import YOLO + + # Загрузите модель YOLOv8 + model = YOLO('yolov8n.pt') + + # Откройте видеофайл + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # Цикл по кадрам видео + while cap.isOpened(): + # Чтение кадра из видео + success, frame = cap.read() + + if success: + # Выполните отслеживание YOLOv8 для кадра, сохраняя следы между кадрами + results = model.track(frame, persist=True) + + # Визуализируйте результаты на кадре + annotated_frame = results[0].plot() + + # Покажите аннотированный кадр + cv2.imshow("Отслеживание YOLOv8", annotated_frame) + + # Прервать цикл, если нажата клавиша 'q' + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # Прервать цикл, если достигнут конец видео + break + + # Освободите объект захвата видео и закройте окно отображения + cap.release() + cv2.destroyAllWindows() + ``` + +Обратите внимание на изменение с `model(frame)` на `model.track(frame)`, которое позволяет включить отслеживание объектов вместо простого обнаружения. Этот измененный скрипт будет выполнять трекер на каждом кадре видео, визуализировать результаты и отображать их в окне. Цикл можно завершить нажатием 'q'. + +## Содействие в новых трекерах + +Вы являетесь профессионалом в множественном отслеживании объектов и успешно реализовали или адаптировали алгоритм отслеживания с Ultralytics YOLO? Мы приглашаем вас внести свой вклад в наш раздел Trackers на [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)! Ваши реальные приложения и решения могут быть бесценными для пользователей, работающих над задачами отслеживания. + +Внося свой вклад в этот раздел, вы помогаете расширить спектр доступных решений для отслеживания в рамках фреймворка Ultralytics YOLO, добавляя еще один уровень функциональности и полезности для сообщества. + +Чтобы начать свой вклад, пожалуйста, ознакомьтесь с нашим [Руководством для участников](https://docs.ultralytics.com/help/contributing) для получения полной инструкции по отправке Pull Request (PR) 🛠️. Мы в предвкушении увидеть, что вы принесете на стол! + +Вместе давайте улучшим возможности отслеживания экосистемы Ultralytics YOLO 🙏! diff --git a/docs/ru/modes/train.md b/docs/ru/modes/train.md new file mode 100644 index 0000000..38642f3 --- /dev/null +++ b/docs/ru/modes/train.md @@ -0,0 +1,206 @@ +--- +comments: true +description: Пошаговое руководство по обучению моделей YOLOv8 с использованием Ultralytics YOLO, включая примеры обучения на одном и нескольких GPU +keywords: Ultralytics, YOLOv8, YOLO, обнаружение объектов, режим обучения, настраиваемый набор данных, обучение на GPU, много-GPU, гиперпараметры, примеры CLI, примеры Python +--- + +# Обучение моделей с помощью Ultralytics YOLO + +Экосистема и интеграции Ultralytics YOLO + +## Введение + +Обучение глубокой обучающей модели включает в себя подачу данных и настройку её параметров, так чтобы она могла делать точные прогнозы. Режим обучения в Ultralytics YOLOv8 предназначен для эффективного и результативного обучения моделей обнаружения объектов с полным использованием возможностей современной аппаратуры. Это руководство нацелено на описание всех деталей, необходимых для начала обучения ваших моделей с использованием богатого набора функций YOLOv8. + +

+
+ +
+ Смотреть: Как обучить модель YOLOv8 на вашем настраиваемом наборе данных в Google Colab. +

+ +## Почему стоит выбрать Ultralytics YOLO для обучения? + +Вот несколько убедительных причин использовать режим Train YOLOv8: + +- **Эффективность:** Используйте максимум возможностей вашего оборудования, будь то настройка с одним GPU или распределение нагрузки на несколько GPU. +- **Универсальность:** Обучайте на настраиваемых наборах данных, помимо уже доступных, таких как COCO, VOC и ImageNet. +- **Дружелюбный интерфейс:** Простой, но мощный интерфейс командной строки (CLI) и Python для прямолинейного опыта обучения. +- **Гибкость гиперпараметров:** Широкий спектр настраиваемых гиперпараметров для тонкой настройки производительности модели. + +### Ключевые особенности режима Train + +Вот некоторые заметные особенности режима Train YOLOv8: + +- **Автоматическая загрузка набора данных:** Стандартные наборы данных, такие как COCO, VOC и ImageNet, загружаются автоматически при первом использовании. +- **Поддержка многих GPU:** Масштабируйте усилия по обучению без проблем на нескольких GPU, чтобы ускорить процесс. +- **Настройка гиперпараметров:** Возможность изменения гиперпараметров через файлы конфигурации YAML или аргументы CLI. +- **Визуализация и мониторинг:** Отслеживание метрик обучения в реальном времени и визуализация процесса обучения для лучшего понимания. + +!!! Tip "Совет" + + * Наборы данных YOLOv8, такие как COCO, VOC, ImageNet и многие другие, автоматически загружаются при первом использовании, например, `yolo train data=coco.yaml` + +## Примеры использования + +Обучение YOLOv8n на наборе данных COCO128 в течение 100 эпох с размером изображения 640. Устройство для обучения может быть указано с помощью аргумента `device`. Если аргумент не передан, будет использоваться GPU `device=0`, если доступен, в противном случае будет использоваться `device=cpu`. Смотрите раздел Аргументы ниже для полного списка аргументов обучения. + +!!! Example "Пример обучения на одном GPU и CPU" + + Устройство определяется автоматически. Если доступен GPU, то он будет использован, иначе обучение начнется на CPU. + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n.yaml') # создать новую модель из YAML + model = YOLO('yolov8n.pt') # загрузить предобученную модель (рекомендуется для обучения) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # создать из YAML и перенести веса + + # Обучить модель + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Создать новую модель из YAML и начать обучение с нуля + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Начать обучение с предобученной модели *.pt + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Создать новую модель из YAML, перенести предобученные веса и начать обучение + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Обучение на нескольких GPU + +Обучение на нескольких GPU позволяет более эффективно использовать доступные аппаратные ресурсы, распределяя нагрузку по обучению на несколько GPU. Эта функция доступна как через Python API, так и через командный интерфейс. Чтобы включить обучение на нескольких GPU, укажите идентификаторы устройств GPU, которые вы хотите использовать. + +!!! Example "Пример обучения на нескольких GPU" + + Чтобы обучить с использованием 2 GPU, устройств CUDA 0 и 1 используйте следующие команды. Расширьте до дополнительных GPU по мере необходимости. + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n.pt') # загрузить предобученную модель (рекомендуется для обучения) + + # Обучить модель с использованием 2 GPU + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "CLI" + + ```bash + # Начать обучение с предобученной модели *.pt используя GPU 0 и 1 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### Обучение на Apple M1 и M2 с использованием MPS + +С интеграцией поддержки чипов Apple M1 и M2 в модели Ultralytics YOLO теперь можно обучать ваши модели на устройствах, использующих мощную платформу Metal Performance Shaders (MPS). MPS предлагает производительный способ выполнения вычислений и задач обработки изображений на пользовательских кремниевых чипах Apple. + +Чтобы запустить обучение на чипах Apple M1 и M2, вы должны указать 'mps' в качестве вашего устройства при запуске процесса обучения. Ниже приведены примеры использования Python и командной строки: + +!!! Example "Пример обучения с MPS" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n.pt') # загрузить предобученную модель (рекомендуется для обучения) + + # Обучить модель с использованием MPS + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "CLI" + + ```bash + # Начать обучение с предобученной модели *.pt используя MPS + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +Используя вычислительные возможности чипов M1/M2, это позволяет более эффективно обрабатывать задачи обучения. Для более подробного руководства и расширенных параметров конфигурации, пожалуйста, обратитесь к [документации PyTorch MPS](https://pytorch.org/docs/stable/notes/mps.html). + +## Логирование + +В процессе обучения модели YOLOv8 вы можете найти ценным отслеживание производительности модели со временем. Здесь на помощь приходит логирование. YOLO от Ultralytics поддерживает три типа логгеров - Comet, ClearML и TensorBoard. + +Чтобы использовать логгер, выберите его из выпадающего меню в приведенном выше примере кода и запустите его. Выбранный логгер будет установлен и инициализирован. + +### Comet + +[Comet](https://www.comet.ml/site/) - это платформа, которая позволяет ученым и разработчикам отслеживать, сравнивать, объяснять и оптимизировать эксперименты и модели. Она предоставляет такие функции, как метрики в реальном времени, сравнение кода и отслеживание гиперпараметров. + +Чтобы использовать Comet: + +!!! Example "Пример" + + === "Python" + ```python + # pip install comet_ml + import comet_ml + + comet_ml.init() + ``` + +Не забудьте войти в свою учетную запись Comet на их сайте и получить свой API-ключ. Вам нужно будет добавить его в переменные среды или в свой скрипт, чтобы вести журнал своих экспериментов. + +### ClearML + +[ClearML](https://www.clear.ml/) - это открытая платформа, которая автоматизирует отслеживание экспериментов и помогает в эффективном обмене ресурсами. Она предназначена для помощи командам в управлении, выполнении и воспроизведении их работы в области ML более эффективно. + +Чтобы использовать ClearML: + +!!! Example "Пример" + + === "Python" + ```python + # pip install clearml + import clearml + + clearml.browser_login() + ``` + +После запуска этого скрипта вам нужно будет войти в вашу учетную запись ClearML в браузере и аутентифицировать вашу сессию. + +### TensorBoard + +[TensorBoard](https://www.tensorflow.org/tensorboard) - это инструмент визуализации для TensorFlow. Он позволяет вам визуализировать граф TensorFlow, выводить количественные метрики о выполнении вашего графа и показывать дополнительные данные, такие как изображения, проходящие через него. + +Чтобы использовать TensorBoard в [Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb): + +!!! Example "Пример" + + === "CLI" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # заменить на директорию 'runs' + ``` + +Чтобы использовать TensorBoard локально, запустите приведенную ниже команду и просмотрите результаты по адресу http://localhost:6006/. + +!!! Example "Пример" + + === "CLI" + ```bash + tensorboard --logdir ultralytics/runs # заменить на директорию 'runs' + ``` + +Это загрузит TensorBoard и направит его к каталогу, где сохраняются ваши журналы обучения. + +После настройки вашего логгера вы можете продолжать обучение модели. Все метрики обучения будут автоматически записаны на выбранной вами платформе, и вы сможете получить доступ к этим журналам, чтобы отслеживать производительность вашей модели со временем, сравнивать различные модели и определять области для улучшения. diff --git a/docs/ru/modes/val.md b/docs/ru/modes/val.md new file mode 100644 index 0000000..1d7fb5d --- /dev/null +++ b/docs/ru/modes/val.md @@ -0,0 +1,86 @@ +--- +comments: true +description: Руководство по проверке моделей YOLOv8. Узнайте, как оценить производительность ваших моделей YOLO, используя параметры проверки и метрики с примерами на Python и CLI. +keywords: Ultralytics, YOLO Документация, YOLOv8, проверка, оценка модели, гиперпараметры, точность, метрики, Python, CLI +--- + +# Валидация моделей с Ultralytics YOLO + +Ultralytics YOLO экосистема и интеграции + +## Введение + +Валидация является критически важным этапом в процессе машинного обучения, позволяющим оценить качество ваших обученных моделей. Режим Val в Ultralytics YOLOv8 обеспечивает набор инструментов и метрик для оценки производительности ваших моделей по обнаружению объектов. Это руководство служит полным ресурсом для понимания того, как эффективно использовать режим Val, чтобы обеспечить точность и надежность ваших моделей. + +## Зачем проверять с Ultralytics YOLO? + +Вот почему использование режима Val YOLOv8 выгодно: + +- **Точность:** Получите точные метрики, такие как mAP50, mAP75 и mAP50-95, для всесторонней оценки вашей модели. +- **Удобство:** Используйте встроенные функции, которые запоминают настройки обучения, упрощая процесс валидации. +- **Гибкость:** Проверяйте вашу модель с использованием тех же или разных наборов данных и размеров изображений. +- **Настройка гиперпараметров:** Используйте метрики проверки для дополнительной настройки вашей модели для лучшей производительности. + +### Основные функции режима Val + +Вот некоторые заметные функции, предлагаемые режимом Val YOLOv8: + +- **Автоматизированные настройки:** Модели запоминают свои конфигурации обучения для простой валидации. +- **Поддержка множества метрик:** Оцените вашу модель, основываясь на ряде метрик точности. +- **CLI и Python API:** Выберите интерфейс командной строки или Python API в зависимости от вашего предпочтения для проверки. +- **Совместимость данных:** Бесперебойно работает с наборами данных, используемыми во время фазы обучения, а также с пользовательскими наборами данных. + +!!! Tip "Совет" + + * Модели YOLOv8 автоматически запоминают свои настройки обучения, так что вы можете легко проверить модель с тем же размером изображения и на оригинальном наборе данных, просто используя `yolo val model=yolov8n.pt` или `model('yolov8n.pt').val()` + +## Примеры использования + +Проверьте точность обученной модели YOLOv8n на наборе данных COCO128. Аргументы передавать не требуется, так как `модель` сохраняет `данные` и аргументы в качестве атрибутов модели. См. раздел Аргументы ниже для полного списка аргументов экспорта. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузка модели + model = YOLO('yolov8n.pt') # загрузить официальную модель + model = YOLO('path/to/best.pt') # загрузить пользовательскую модель + + # Проверка модели + metrics = model.val() # аргументы не нужны, набор данных и настройки запомнены + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # список содержит map50-95 каждой категории + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # проверить официальную модель + yolo detect val model=path/to/best.pt # проверить пользовательскую модель + ``` + +## Аргументы + +Настройки проверки для моделей YOLO относятся к различным гиперпараметрам и конфигурациям, используемым для оценки производительности модели на наборе данных для проверки. Эти настройки могут влиять на производительность, скорость и точность модели. Некоторые общие параметры проверки YOLO включают размер пакета, частоту проведения проверки во время обучения и метрики, используемые для оценки производительности модели. Другие факторы, которые могут влиять на процесс проверки, включают размер и состав набора данных для проверки и конкретную задачу, для которой используется модель. Важно тщательно настроить и провести эксперименты с этими параметрами, чтобы убедиться, что модель хорошо работает на наборе данных для проверки и для обнаружения и предотвращения переобучения. + +| Ключ | Значение | Описание | +|---------------|----------|-----------------------------------------------------------------------------------------| +| `data` | `None` | путь к файлу данных, например, coco128.yaml | +| `imgsz` | `640` | размер входных изображений как целое число | +| `batch` | `16` | количество изображений в пакете (-1 для AutoBatch) | +| `save_json` | `False` | сохранить результаты в файл JSON | +| `save_hybrid` | `False` | сохранить гибридную версию меток (метки + дополнительные предсказания) | +| `conf` | `0.001` | порог уверенности объекта для обнаружения | +| `iou` | `0.6` | порог пересечения по объединению (IoU) для NMS (нечеткое сравнение) | +| `max_det` | `300` | максимальное количество обнаружений на изображение | +| `half` | `True` | использовать полупрецизионность (FP16) | +| `device` | `None` | устройство для выполнения, например, cuda device=0/1/2/3 или device=cpu | +| `dnn` | `False` | использовать OpenCV DNN для ONNX инференции | +| `plots` | `False` | показывать графики во время обучения | +| `rect` | `False` | прямоугольная валидация с коллекцией каждого пакета для минимальной паддинга | +| `split` | `val` | раздел набора данных для использования в валидации, например, 'val', 'test' или 'train' | +| diff --git a/docs/ru/quickstart.md b/docs/ru/quickstart.md new file mode 100644 index 0000000..e47dd6d --- /dev/null +++ b/docs/ru/quickstart.md @@ -0,0 +1,198 @@ +--- +comments: true +description: Изучение различных методов установки Ultralytics с использованием pip, conda, git и Docker. Освоение работы с Ultralytics через интерфейс командной строки или в рамках ваших проектов на Python. +keywords: установка Ultralytics, установка pip Ultralytics, установка Docker Ultralytics, интерфейс командной строки Ultralytics, Python интерфейс Ultralytics +--- + +## Установка Ultralytics + +Ultralytics предлагает различные методы установки, включая pip, conda и Docker. Установите YOLOv8 через пакет `ultralytics` pip для последнего стабильного выпуска или путем клонирования [репозитория Ultralytics на GitHub](https://github.com/ultralytics/ultralytics) для получения самой актуальной версии. Docker можно использовать для выполнения пакета в изолированном контейнере, избегая локальной установки. + +!!! Example "Установка" + + === "Установка через Pip (рекомендуется)" + Установите пакет `ultralytics` с помощью pip или обновите существующую установку, запустив `pip install -U ultralytics`. Посетите индекс пакетов Python (PyPI) для получения дополнительной информации о пакете `ultralytics`: [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/). + + [![Версия PyPI](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Загрузки](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # Установка пакета ultralytics из PyPI + pip install ultralytics + ``` + + Вы также можете установить пакет `ultralytics` напрямую из [репозитория на GitHub](https://github.com/ultralytics/ultralytics). Это может быть полезно, если вы хотите получить последнюю версию для разработки. Убедитесь, что в вашей системе установлен инструмент командной строки Git. Команда `@main` устанавливает ветку `main`, которую можно изменить на другую, к примеру, `@my-branch`, или удалить полностью, чтобы по умолчанию использовалась ветка `main`. + + ```bash + # Установка пакета ultralytics из GitHub + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + === "Установка через Conda" + Conda - это альтернативный менеджер пакетов для pip, который также может быть использован для установки. Посетите Anaconda для получения дополнительной информации: [https://anaconda.org/conda-forge/ultralytics](https://anaconda.org/conda-forge/ultralytics). Репозиторий для обновления conda пакета Ultralytics находится здесь: [https://github.com/conda-forge/ultralytics-feedstock/](https://github.com/conda-forge/ultralytics-feedstock/). + + [![Conda Recipe](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Загрузки](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Версия](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda Платформы](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # Установка пакета ultralytics с помощью conda + conda install -c conda-forge ultralytics + ``` + + !!! Note "Заметка" + + Если вы устанавливаете пакет в среде CUDA, лучшей практикой будет установка `ultralytics`, `pytorch` и `pytorch-cuda` одной командой, чтобы менеджер пакетов conda мог разрешить любые конфликты или установить `pytorch-cuda` последним, чтобы при необходимости он мог заменить пакет `pytorch`, предназначенный для ЦП. + + ```bash + # Установка всех пакетов вместе с помощью conda + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### Образ Conda для Docker + + Образы Conda Ultralytics также доступны на [DockerHub](https://hub.docker.com/r/ultralytics/ultralytics). Эти образы основаны на [Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/) и являются простым способом начать использовать `ultralytics` в среде Conda. + + ```bash + # Установка имени образа в переменную + t=ultralytics/ultralytics:latest-conda + + # Скачивание последнего образа ultralytics с Docker Hub + sudo docker pull $t + + # Запуск образа ultralytics в контейнере с поддержкой GPU + sudo docker run -it --ipc=host --gpus all $t # все GPU + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # выбор GPU + ``` + + === "Клонирование Git" + Клонируйте репозиторий `ultralytics`, если вы заинтересованы в участии в разработке или хотите экспериментировать с последним исходным кодом. После клонирования перейдите в каталог и установите пакет в режиме редактирования `-e` с помощью pip. + + ```bash + # Клонирование репозитория ultralytics + git clone https://github.com/ultralytics/ultralytics + + # Переход в клонированный каталог + cd ultralytics + + # Установка пакета в режиме редактирования для разработки + pip install -e . + ``` + +Смотрите файл [requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) `ultralytics` для списка зависимостей. Обратите внимание, что все приведенные выше примеры устанавливают все необходимые зависимости. + +

+
+ +
+ Watch: Ultralytics YOLO Quick Start Guide +

+ +!!! Tip "Совет" + + Требования PyTorch зависят от операционной системы и требований CUDA, поэтому рекомендуется сначала установить PyTorch, следуя инструкциям на [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally). + + + Инструкции по установке PyTorch + + +## Использование Ultralytics с CLI + +Интерфейс командной строки (CLI) Ultralytics позволяет выполнять простые команды одной строкой без необходимости настройки Python среды. CLI не требует настройки или кода на Python. Все задачи можно легко выполнить из терминала с помощью команды `yolo`. Прочтите [Руководство по CLI](/../usage/cli.md), чтобы узнать больше о использовании YOLOv8 из командной строки. + +!!! Example "Пример" + + === "Cинтаксис" + + Команды Ultralytics `yolo` используют следующий синтаксис: + ```bash + yolo ЗАДАЧА РЕЖИМ АРГУМЕНТЫ + + Где ЗАДАЧА (необязательно) одна из [detect, segment, classify] + РЕЖИМ (обязательно) один из [train, val, predict, export, track] + АРГУМЕНТЫ (необязательно) любое количество пар 'arg=value', которые переопределяют настройки по умолчанию. + ``` + Смотрите все АРГУМЕНТЫ в полном [Руководстве по конфигурации](/../usage/cfg.md) или с помощью `yolo cfg` + + === "Train" + + Обучение модели для детекции на 10 эпохах с начальной скоростью обучения 0.01 + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "Predict" + + Прогнозирование видео с YouTube с использованием предварительно обученной модели сегментации при размере изображения 320: + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "Val" + + Валидация предварительно обученной модели детекции с размером партии 1 и размером изображения 640: + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "Export" + + Экспорт модели классификации YOLOv8n в формат ONNX с размером изображения 224 на 128 (TASK не требуется) + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "Special" + + Выполнение специальных команд для просмотра версии, настроек, запуска проверок и другого: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "Предупреждение" + + Аргументы должны передаваться в виде пар `arg=val`, разделенных знаком равенства `=`, и разделены пробелами ` ` между парами. Не используйте префиксы аргументов `--` или запятые `,` между аргументами. + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25`   ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25`   ❌ + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25`   ❌ + +[Руководство по CLI](/../usage/cli.md){ .md-button } + +## Использование Ultralytics с Python + +Python интерфейс YOLOv8 позволяет легко интегрировать его в ваши Python проекты, упрощая загрузку, выполнение и обработку результатов работы модели. Интерфейс Python разработан с акцентом на простоту и удобство использования, позволяя пользователям быстро внедрять функции обнаружения объектов, сегментации и классификации в их проектах. Это делает интерфейс Python YOLOv8 незаменимым инструментом для тех, кто хочет включить эти функции в свои Python проекты. + +Например, пользователи могут загрузить модель, обучить ее, оценить ее производительность на валидационном наборе, и даже экспортировать ее в формат ONNX всего за несколько строк кода. Подробнее о том, как использовать YOLOv8 в ваших Python проектах, читайте в [Руководстве по Python](/../usage/python.md). + +!!! Example "Пример" + + ```python + from ultralytics import YOLO + + # Создание новой YOLO модели с нуля + model = YOLO('yolov8n.yaml') + + # Загрузка предварительно обученной YOLO модели (рекомендуется для обучения) + model = YOLO('yolov8n.pt') + + # Обучение модели с использованием набора данных 'coco128.yaml' на 3 эпохи + results = model.train(data='coco128.yaml', epochs=3) + + # Оценка производительности модели на валидационном наборе + results = model.val() + + # Выполнение обнаружения объектов на изображении с помощью модели + results = model('https://ultralytics.com/images/bus.jpg') + + # Экспорт модели в формат ONNX + success = model.export(format='onnx') + ``` + +[Руководство по Python](/../usage/python.md){.md-button .md-button--primary} diff --git a/docs/ru/tasks/classify.md b/docs/ru/tasks/classify.md new file mode 100644 index 0000000..f255949 --- /dev/null +++ b/docs/ru/tasks/classify.md @@ -0,0 +1,172 @@ +--- +comments: true +description: Узнайте о моделях классификации изображений YOLOv8 Classify. Получите подробную информацию о списке предварительно обученных моделей и как провести Обучение, Валидацию, Предсказание и Экспорт моделей. +keywords: Ultralytics, YOLOv8, классификация изображений, предварительно обученные модели, YOLOv8n-cls, обучение, валидация, предсказание, экспорт модели +--- + +# Классификация изображений + +Примеры классификации изображений + +Классификация изображений - это самая простая из трех задач и заключается в классификации всего изображения по одному из предварительно определенных классов. + +Выход классификатора изображений - это один классовый ярлык и уровень доверия. Классификация изображений полезна, когда вам нужно знать только к какому классу относится изображение, и не нужно знать, где находятся объекты данного класса или какова их точная форма. + +!!! Tip "Совет" + + Модели YOLOv8 Classify используют суффикс `-cls`, например `yolov8n-cls.pt`, и предварительно обучены на [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +## [Модели](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Здесь показаны предварительно обученные модели классификации YOLOv8. Модели для обнаружения, сегментации и позы обучаются на наборе данных [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), в то время как модели классификации обучаются на наборе данных [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +[Модели](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) автоматически загружаются из последнего релиза Ultralytics [release](https://github.com/ultralytics/assets/releases) при первом использовании. + +| Модель | Размер
(пиксели) | Точность
top1 | Точность
top5 | Скорость
CPU ONNX
(мс) | Скорость
A100 TensorRT
(мс) | Параметры
(М) | FLOPs
(Б) на 640 | +|----------------------------------------------------------------------------------------------|--------------------------|-----------------------|-----------------------|-----------------------------------|----------------------------------------|-----------------------|--------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- Значения **точность** указывают на точность модели на валидационном наборе данных [ImageNet](https://www.image-net.org/). +
Повторить результаты можно с помощью `yolo val classify data=path/to/ImageNet device=0`. +- **Скорость** усреднена по изображениям для валидации ImageNet, используя инстанс [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
Повторить результаты можно с помощью `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu`. + +## Обучение + +Обучите модель YOLOv8n-cls на наборе данных MNIST160 на протяжении 100 эпох с размером изображения 64. Полный список доступных аргументов приведен на странице [Конфигурация](/../usage/cfg.md). + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите модель + model = YOLO('yolov8n-cls.yaml') # создайте новую модель из YAML + model = YOLO('yolov8n-cls.pt') # загрузите предварительно обученную модель (рекомендуется для обучения) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # создайте из YAML и перенесите веса + + # Обучите модель + результаты = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # Создайте новую модель из YAML и начните обучение с нуля + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # Начните обучение с предварительно обученной *.pt модели + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # Создайте новую модель из YAML, перенесите предварительно обученные веса и начните обучение + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### Формат набора данных + +Формат набора данных для классификации YOLO можно подробно изучить в [Руководстве по наборам данных](../../../datasets/classify/index.md). + +## Валидация + +Проверьте точность обученной модели YOLOv8n-cls на наборе данных MNIST160. Не нужно передавать какие-либо аргументы, так как `model` сохраняет свои `data` и аргументы в качестве атрибутов модели. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите модель + model = YOLO('yolov8n-cls.pt') # загрузите официальную модель + model = YOLO('path/to/best.pt') # загрузите собственную модель + + # Проведите валидацию модели + метрики = model.val() # аргументы не нужны, набор данных и настройки запомнены + метрики.top1 # точность top1 + метрики.top5 # точность top5 + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # валидация официальной модели + yolo classify val model=path/to/best.pt # валидация собственной модели + ``` + +## Предсказание + +Используйте обученную модель YOLOv8n-cls для выполнения предсказаний на изображениях. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите модель + model = YOLO('yolov8n-cls.pt') # загрузите официальную модель + model = YOLO('path/to/best.pt') # загрузите собственную модель + + # Сделайте предсказание с помощью модели + результаты = model('https://ultralytics.com/images/bus.jpg') # сделайте предсказание на изображении + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # предсказание с официальной моделью + yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # предсказание с собственной моделью + ``` + +Подробная информация о режиме `predict` приведена на странице [Предсказание](https://docs.ultralytics.com/modes/predict/). + +## Экспорт + +Экспортируйте модель YOLOv8n-cls в другой формат, например, ONNX, CoreML и т. д. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите модель + model = YOLO('yolov8n-cls.pt') # загрузите официальную модель + model = YOLO('path/to/best.pt') # загрузите собственную обученную модель + + # Экспортируйте модель + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # экспорт официальной модели + yolo export model=path/to/best.pt format=onnx # экспорт собственной обученной модели + ``` + +Доступные форматы экспорта YOLOv8-cls представлены в таблице ниже. Вы можете выполнять предсказания или валидацию прямо на экспортированных моделях, например, `yolo predict model=yolov8n-cls.onnx`. Примеры использования показаны для вашей модели после завершения экспорта. + +| Формат | Аргумент `format` | Модель | Метаданные | Аргументы | +|--------------------------------------------------------------------|-------------------|-------------------------------|------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +Подробная информация об экспорте приведена на странице [Экспорт](https://docs.ultralytics.com/modes/export/). diff --git a/docs/ru/tasks/detect.md b/docs/ru/tasks/detect.md new file mode 100644 index 0000000..8110dd5 --- /dev/null +++ b/docs/ru/tasks/detect.md @@ -0,0 +1,184 @@ +--- +comments: true +description: Официальная документация YOLOv8 от Ultralytics. Узнайте, как проводить обучение, проверку, предсказание и экспорт моделей в различных форматах. Включая подробные статистические данные о производительности. +keywords: YOLOv8, Ultralytics, обнаружение объектов, предобученные модели, обучение, валидация, предсказание, экспорт моделей, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# Обнаружение объектов + +Примеры обнаружения объектов + +Обнаружение объектов – это задача, которая включает идентификацию местоположения и класса объектов на изображении или видео. + +Результат работы детектора объектов – это набор ограничивающих рамок, которые заключают в себе объекты на изображении, вместе с метками классов и уровнями достоверности для каждой рамки. Обнаружение объектов является хорошим выбором, когда необходимо определить объекты интереса в сцене, но не нужно точно знать, где находится объект или его точную форму. + +

+
+ +
+ Смотрите: Обнаружение объектов с предобученной моделью Ultralytics YOLOv8. +

+ +!!! Tip "Совет" + + YOLOv8 Detect модели являются стандартными моделями YOLOv8, то есть `yolov8n.pt`, и предобучены на [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [Модели](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Здесь показаны предобученные модели YOLOv8 Detect. Модели Detect, Segment и Pose предобучены на датасете [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), в то время как модели Classify предобучены на датасете [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +[Модели](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) автоматически загружаются с последнего релиза Ultralytics [release](https://github.com/ultralytics/assets/releases) при первом использовании. + +| Модель | размер
(пиксели) | mAPval
50-95 | Скорость
CPU ONNX
(мс) | Скорость
A100 TensorRT
(мс) | параметры
(М) | FLOPs
(Б) | +|--------------------------------------------------------------------------------------|--------------------------|----------------------|-----------------------------------|----------------------------------------|-----------------------|-------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- **mAPval** значения для одиночной модели одиночного масштаба на датасете [COCO val2017](http://cocodataset.org). +
Для воспроизведения используйте `yolo val detect data=coco.yaml device=0` +- **Скорость** усреднена по изображениям COCO val на экземпляре [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/). +
Для воспроизведения используйте `yolo val detect data=coco128.yaml batch=1 device=0|cpu` + +## Обучение + +Обучите модель YOLOv8n на датасете COCO128 в течение 100 эпох с размером изображения 640. Полный список доступных аргументов см. на странице [Конфигурация](/../usage/cfg.md). + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите модель + model = YOLO('yolov8n.yaml') # создать новую модель из YAML + model = YOLO('yolov8n.pt') # загрузить предобученную модель (рекомендуется для обучения) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # создать из YAML и перенести веса + + # Обучите модель + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Создать новую модель из YAML и начать обучение с нуля + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # Начать обучение с предобученной модели *.pt + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # Создать новую модель из YAML, перенести в нее предобученные веса и начать обучение + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### Формат датасета + +Формат датасета для обнаружения YOLO можно найти более подробно в [Руководстве по датасетам](../../../datasets/detect/index.md). Чтобы конвертировать ваш существующий датасет из других форматов (например, COCO и т.д.) в формат YOLO, пожалуйста, используйте инструмент [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) от Ultralytics. + +## Валидация + +Проверьте точность обученной модели YOLOv8n на датасете COCO128. Необходимо передать аргументы, поскольку `model` сохраняет свои `data` и аргументы обучения как атрибуты модели. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите модель + model = YOLO('yolov8n.pt') # загрузить официальную модель + model = YOLO('path/to/best.pt') # загрузить собственную модель + + # Проверьте модель + metrics = model.val() # аргументы не нужны, набор данных и настройки запоминаются + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # список содержит map50-95 для каждой категории + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # val официальная модель + yolo detect val model=path/to/best.pt # val собственная модель + ``` + +## Предсказание + +Используйте обученную модель YOLOv8n для выполнения предсказаний на изображениях. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите модель + model = YOLO('yolov8n.pt') # загрузить официальную модель + model = YOLO('path/to/best.pt') # загрузить собственную модель + + # Сделайте предсказание с помощью модели + results = model('https://ultralytics.com/images/bus.jpg') # сделать предсказание на изображении + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # предсказание с официальной моделью + yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # предсказание с собственной моделью + ``` + +Полные детали режима `predict` смотрите на странице [Предсказание](https://docs.ultralytics.com/modes/predict/). + +## Экспорт + +Экспортируйте модель YOLOv8n в другой формат, такой как ONNX, CoreML и др. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузите модель + model = YOLO('yolov8n.pt') # загрузить официальную модель + model = YOLO('path/to/best.pt') # загрузить собственную модель после обучения + + # Экспортируйте модель + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # экспорт официальной модели + yolo export model=path/to/best.pt format=onnx # экспорт собственной модели после обучения + ``` + +Доступные форматы экспорта YOLOv8 приведены в таблице ниже. Вы можете выполнять предсказания или проверку непосредственно на экспортированных моделях, например `yolo predict model=yolov8n.onnx`. Примеры использования для вашей модели показаны после завершения экспорта. + +| Формат | Аргумент `format` | Модель | Метаданные | Аргументы | +|--------------------------------------------------------------------|-------------------|---------------------------|------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +Полные детали режима `export` смотрите на странице [Экспорт](https://docs.ultralytics.com/modes/export/). diff --git a/docs/ru/tasks/index.md b/docs/ru/tasks/index.md new file mode 100644 index 0000000..cc88209 --- /dev/null +++ b/docs/ru/tasks/index.md @@ -0,0 +1,55 @@ +--- +comments: true +description: Узнайте о ключевых задачах компьютерного зрения, которые может выполнять YOLOv8, включая обнаружение, сегментацию, классификацию и оценку позы. Поймите, как они могут быть использованы в ваших AI проектах. +keywords: Ultralytics, YOLOv8, Обнаружение, Сегментация, Классификация, Оценка Позы, AI Фреймворк, Задачи Компьютерного Зрения +--- + +# Задачи Ultralytics YOLOv8 + +
+Поддерживаемые задачи Ultralytics YOLO + +YOLOv8 — это AI фреймворк, поддерживающий множество задач компьютерного зрения **задачи**. Фреймворк может быть использован для выполнения [обнаружения](detect.md), [сегментации](segment.md), [классификации](classify.md) и оценки [позы](pose.md). Каждая из этих задач имеет различные цели и области применения. + +!!! Note "Заметка" + + 🚧 Наша многоязычная документация в настоящее время находится в стадии разработки, и мы усердно работаем над ее улучшением. Спасибо за ваше терпение! 🙏 + +

+
+ +
+ Смотрите: Изучите задачи Ultralytics YOLO: Обнаружение объектов, Сегментация, Отслеживание и Оценка позы. +

+ +## [Обнаружение](detect.md) + +Обнаружение — это основная задача, поддерживаемая YOLOv8. Она заключается в обнаружении объектов на изображении или кадре видео и рисовании вокруг них ограничивающих рамок. Обнаруженные объекты классифицируются на разные категории на основе их характеристик. YOLOv8 может обнаруживать несколько объектов на одном изображении или видеокадре с высокой точностью и скоростью. + +[Примеры Обнаружения](detect.md){ .md-button } + +## [Сегментация](segment.md) + +Сегментация — это задача, которая включает разбиение изображения на разные регионы на основе содержимого изображения. Каждому региону присваивается метка на основе его содержимого. Эта задача полезна в таких приложениях, как сегментация изображений и медицинская визуализация. YOLOv8 использует вариацию архитектуры U-Net для выполнения сегментации. + +[Примеры Сегментации](segment.md){ .md-button } + +## [Классификация](classify.md) + +Классификация — это задача, включающая классификацию изображения на разные категории. YOLOv8 может быть использован для классификации изображений на основе их содержимого. Для выполнения классификации используется вариация архитектуры EfficientNet. + +[Примеры Классификации](classify.md){ .md-button } + +## [Поза](pose.md) + +Обнаружение точек позы или ключевых точек — это задача, которая включает обнаружение конкретных точек на изображении или видеокадре. Эти точки называются ключевыми и используются для отслеживания движения или оценки позы. YOLOv8 может обнаруживать ключевые точки на изображении или видеокадре с высокой точностью и скоростью. + +[Примеры Поз](pose.md){ .md-button } + +## Заключение + +YOLOv8 поддерживает множество задач, включая обнаружение, сегментацию, классификацию и обнаружение ключевых точек. Каждая из этих задач имеет разные цели и области применения. Понимая различия между этими задачами, вы можете выбрать подходящую задачу для вашего приложения компьютерного зрения. diff --git a/docs/ru/tasks/pose.md b/docs/ru/tasks/pose.md new file mode 100644 index 0000000..03d0d8d --- /dev/null +++ b/docs/ru/tasks/pose.md @@ -0,0 +1,176 @@ +--- +comments: true +description: Узнайте, как использовать Ultralytics YOLOv8 для задач оценки позы. Найдите предварительно обученные модели, узнайте, как обучать, проверять, предсказывать и экспортировать свои собственные. +--- + +# Оценка позы + +![Примеры оценки позы](https://user-images.githubusercontent.com/26833433/243418616-9811ac0b-a4a7-452a-8aba-484ba32bb4a8.png) + +Оценка позы — это задача, заключающаяся в определении местоположения определённых точек на изображении, обычно называемых контрольными точками. Контрольные точки могут представлять различные части объекта, такие как суставы, ориентиры или другие характерные особенности. Расположение контрольных точек обычно представлено в виде набора 2D `[x, y]` или 3D `[x, y, visible]` координат. + +Результат работы модели оценки позы — это набор точек, представляющих контрольные точки на объекте в изображении, обычно вместе с оценками уверенности для каждой точки. Оценка позы является хорошим выбором, когда вам нужно идентифицировать конкретные части объекта в сцене и их расположение относительно друг друга. + +[Смотрите: Оценка позы с Ultralytics YOLOv8.](https://www.youtube.com/embed/Y28xXQmju64?si=pCY4ZwejZFu6Z4kZ) + +!!! Tip "Совет" + + Модели _pose_ YOLOv8 используют суффикс `-pose`, т.е. `yolov8n-pose.pt`. Эти модели обучены на наборе данных [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) и подходят для различных задач оценки позы. + +## [Модели](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Здесь представлены предварительно обученные модели YOLOv8 Pose. Модели Detect, Segment и Pose предварительно обучены на наборе данных [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), а модели Classify — на наборе данных [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +[Модели](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) скачиваются автоматически из последнего [релиза](https://github.com/ultralytics/assets/releases) Ultralytics при первом использовании. + +| Модель | размер
(пиксели) | mAPpose
50-95 | mAPpose
50 | Скорость
CPU ONNX
(мс) | Скорость
A100 TensorRT
(мс) | параметры
(М) | FLOPs
(Б) | +|------------------------------------------------------------------------------------------------------|--------------------------|-----------------------|--------------------|-----------------------------------|----------------------------------------|-----------------------|-------------------| +| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | +| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | +| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | +| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | +| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | +| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +- **mAPval** значения для одной модели одиночного масштаба на наборе данных [COCO Keypoints val2017](http://cocodataset.org). +
Воспроизводится с помощью: `yolo val pose data=coco-pose.yaml device=0` +- **Скорость** усреднена по изображениям COCO val на [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) инстансе. +
Воспроизводится с помощью: `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` + +## Обучение + +Обучите модель YOLOv8-pose на наборе данных COCO128-pose. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n-pose.yaml') # создать новую модель из YAML + model = YOLO('yolov8n-pose.pt') # загрузить предварительно обученную модель (рекомендуется для обучения) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # создать из YAML и перенести веса + + # Обучить модель + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Создать новую модель из YAML и начать обучение с нуля + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # Начать обучение с предварительно обученной модели *.pt + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # Создать новую модель из YAML, перенести предварительно обученные веса и начать обучение + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### Формат набора данных + +Формат набора данных YOLO pose можно найти в подробностях в [Руководстве по наборам данных](../../../datasets/pose/index.md). Для преобразования существующего набора данных из других форматов (например, COCO и т.д.) в формат YOLO, пожалуйста, используйте инструмент [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) от Ultralytics. + +## Проверка + +Проверьте точность обученной модели YOLOv8n-pose на наборе данных COCO128-pose. Аргументы не нужны, так как `model` +запоминает свои `data` и аргументы как атрибуты модели. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n-pose.pt') # загрузить официальную модель + model = YOLO('path/to/best.pt') # загрузить свою модель + + # Проверить модель + metrics = model.val() # аргументы не нужны, набор данных и настройки запомнены + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # список содержит map50-95 для каждой категории + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # проверить официальную модель + yolo pose val model=path/to/best.pt # проверить свою модель + ``` + +## Предсказание + +Используйте обученную модель YOLOv8n-pose для выполнения предсказаний на изображениях. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n-pose.pt') # загрузить официальную модель + model = YOLO('path/to/best.pt') # загрузить свою модель + + # Сделать предсказание моделью + results = model('https://ultralytics.com/images/bus.jpg') # предсказать по изображению + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # предсказать официальной моделью + yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # предсказать своей моделью + ``` + +Полные детали работы в режиме `predict` смотрите на странице [Predict](https://docs.ultralytics.com/modes/predict/). + +## Экспорт + +Экспортируйте модель YOLOv8n Pose в другой формат, такой как ONNX, CoreML и т.д. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n-pose.pt') # загрузить официальную модель + model = YOLO('path/to/best.pt') # загрузить свою обученную модель + + # Экспортировать модель + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # экспортировать официальную модель + yolo export model=path/to/best.pt format=onnx # экспортировать свою обученную модель + ``` + +Доступные форматы экспорта модели YOLOv8-pose приведены в таблице ниже. Вы можете делать предсказания или проверки непосредственно с экспортированных моделей, например, `yolo predict model=yolov8n-pose.onnx`. Примеры использования показаны для вашей модели после завершения экспорта. + +| Формат | Аргумент `format` | Модель | Метаданные | Аргументы | +|--------------------------------------------------------------------|-------------------|--------------------------------|------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half` | + +Полные детали экспорта смотрите на странице [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/ru/tasks/segment.md b/docs/ru/tasks/segment.md new file mode 100644 index 0000000..94d67b3 --- /dev/null +++ b/docs/ru/tasks/segment.md @@ -0,0 +1,189 @@ +--- +comments: true +description: Научитесь использовать модели сегментации объектов с помощью Ultralytics YOLO. Инструкции по обучению, валидации, предсказанию изображений и экспорту моделей. +keywords: yolov8, сегментация объектов, Ultralytics, набор данных COCO, сегментация изображений, обнаружение объектов, обучение моделей, валидация моделей, предсказания изображений, экспорт моделей +--- + +# Сегментация экземпляров + +Примеры сегментации экземпляров + +Сегментация экземпляров идёт на шаг дальше по сравнению с обнаружением объектов и включает идентификацию отдельных объектов на изображении и их сегментацию от остальной части изображения. + +Результатом модели сегментации экземпляров является набор масок или контуров, очерчивающих каждый объект на изображении, вместе с классовыми метками и коэффициентами уверенности для каждого объекта. Сегментация экземпляров полезна, когда вам нужно знать не только, где находятся объекты на изображении, но и их точную форму. + +

+
+ +
+ Смотрите: Запуск сегментации с предварительно обученной моделью Ultralytics YOLOv8 на Python. +

+ +!!! Tip "Совет" + + Модели YOLOv8 Segment используют суффикс `-seg`, например `yolov8n-seg.pt` и предварительно обучены на [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + +## [Модели](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +Здесь показаны предварительно обученные модели Segment YOLOv8. Модели Detect, Segment и Pose предварительно обучены на наборе данных [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), в то время как модели Classify предварительно обучены на наборе данных [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + +[Модели](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) автоматически загружаются из последнего [релиза](https://github.com/ultralytics/assets/releases) Ultralytics при первом использовании. + +| Модель | размер
(пиксели) | mAPbox
50-95 | mAPmask
50-95 | Скорость
CPU ONNX
(мс) | Скорость
A100 TensorRT
(мс) | параметры
(М) | FLOPs
(B) | +|----------------------------------------------------------------------------------------------|--------------------------|----------------------|-----------------------|-----------------------------------|----------------------------------------|-----------------------|-------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- Значения **mAPval** для одиночной модели одиночного масштаба на наборе данных [COCO val2017](http://cocodataset.org). +
Воспроизведите с помощью `yolo val segment data=coco.yaml device=0` +- **Скорость** усреднена для изображений COCO val на [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) + инстансе. +
Воспроизведите с помощью `yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu` + +## Обучение + +Обучите модель YOLOv8n-seg на наборе данных COCO128-seg в течение 100 эпох при размере изображения 640. Полный список доступных аргументов см. на странице [Конфигурация](/../usage/cfg.md). + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n-seg.yaml') # создать новую модель из YAML + model = YOLO('yolov8n-seg.pt') # загрузить предварительно обученную модель (рекомендуется для обучения) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # создать из YAML и перенести веса + + # Обучить модель + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # Создать новую модель из YAML и начать обучение с нуля + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # Начать обучение с предварительно обученной модели *.pt + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # Создать новую модель из YAML, перенести предварительно обученные веса и начать обучение + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### Формат набора данных + +Формат набора данных для сегментации YOLO можно найти детально в [Руководстве по наборам данных](../../../datasets/segment/index.md). Чтобы конвертировать свой существующий набор данных из других форматов (например, COCO и т.д.) в формат YOLO, пожалуйста, используйте инструмент [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) от Ultralytics. + +## Валидация + +Проверьте точность обученной модели YOLOv8n-seg на наборе данных COCO128-seg. Аргументы передавать не нужно, так как `model` сохраняет `data` и аргументы обучения в качестве атрибутов модели. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n-seg.pt') # загрузить официальную модель + model = YOLO('path/to/best.pt') # загрузить пользовательскую модель + + # Провалидировать модель + metrics = model.val() # аргументы не нужны, набор данных и настройки запомнены + metrics.box.map # map50-95(B) + metrics.box.map50 # map50(B) + metrics.box.map75 # map75(B) + metrics.box.maps # список содержит map50-95(B) каждой категории + metrics.seg.map # map50-95(M) + metrics.seg.map50 # map50(M) + metrics.seg.map75 # map75(M) + metrics.seg.maps # список содержит map50-95(M) каждой категории + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # валидация официальной модели + yolo segment val model=path/to/best.pt # валидация пользовательской модели + ``` + +## Предсказание + +Используйте обученную модель YOLOv8n-seg для выполнения предсказаний на изображениях. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n-seg.pt') # загрузить официальную модель + model = YOLO('path/to/best.pt') # загрузить пользовательскую модель + + # Сделать предсказание с помощью модели + results = model('https://ultralytics.com/images/bus.jpg') # предсказать по изображению + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # предсказать с официальной моделью + yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # предсказать с пользовательской моделью + ``` + +Полная информация о режиме `predict` на странице [Predict](https://docs.ultralytics.com/modes/predict/). + +## Экспорт + +Экспортируйте модель YOLOv8n-seg в другой формат, например ONNX, CoreML и т.д. + +!!! Example "Пример" + + === "Python" + + ```python + from ultralytics import YOLO + + # Загрузить модель + model = YOLO('yolov8n-seg.pt') # загрузить официальную модель + model = YOLO('path/to/best.pt') # загрузить пользовательскую обученную модель + + # Экспортировать модель + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # экспортировать официальную модель + yolo export model=path/to/best.pt format=onnx # экспортировать пользовательскую обученную модель + ``` + +Доступные форматы экспорта YOLOv8-seg приведены в таблице ниже. После завершения экспорта для вашей модели показаны примеры использования, включая прямое предсказание или валидацию на экспортированных моделях, например `yolo predict model=yolov8n-seg.onnx`. + +| Формат | Аргумент `format` | Модель | Метаданные | Аргументы | +|--------------------------------------------------------------------|-------------------|-------------------------------|------------|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half` | + +Подробности о режиме `export` смотрите на странице [Export](https://docs.ultralytics.com/modes/export/). diff --git a/docs/update_translations.py b/docs/update_translations.py new file mode 100644 index 0000000..9c27c70 --- /dev/null +++ b/docs/update_translations.py @@ -0,0 +1,180 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Script to fix broken Markdown links and front matter in language-specific directories zh, ko, ja, ru, de, fr, es, pt. + +This script processes markdown files in language-specific directories (like /zh/). It finds Markdown links and checks +their existence. If a link is broken and does not exist in the language-specific directory but exists in the /en/ +directory, the script updates the link to point to the corresponding file in the /en/ directory. + +It also ensures that front matter keywords like 'comments:', 'description:', and 'keywords:' are not translated and +remain in English. +""" + +import re +from pathlib import Path + + +class MarkdownLinkFixer: + """Class to fix Markdown links and front matter in language-specific directories.""" + + def __init__(self, base_dir, update_links=True, update_text=True): + """Initialize the MarkdownLinkFixer with the base directory.""" + self.base_dir = Path(base_dir) + self.update_links = update_links + self.update_text = update_text + self.md_link_regex = re.compile(r'\[([^]]+)]\(([^:)]+)\.md\)') + + @staticmethod + def replace_front_matter(content, lang_dir): + """Ensure front matter keywords remain in English.""" + english = ['comments', 'description', 'keywords'] + translations = { + 'zh': ['评论', '描述', '关键词'], # Mandarin Chinese (Simplified) warning, sometimes translates as 关键字 + 'es': ['comentarios', 'descripción', 'palabras clave'], # Spanish + 'ru': ['комментарии', 'описание', 'ключевые слова'], # Russian + 'pt': ['comentários', 'descrição', 'palavras-chave'], # Portuguese + 'fr': ['commentaires', 'description', 'mots-clés'], # French + 'de': ['kommentare', 'beschreibung', 'schlüsselwörter'], # German + 'ja': ['コメント', '説明', 'キーワード'], # Japanese + 'ko': ['댓글', '설명', '키워드'], # Korean + 'hi': ['टिप्पणियाँ', 'विवरण', 'कीवर्ड'], # Hindi + 'ar': ['التعليقات', 'الوصف', 'الكلمات الرئيسية'] # Arabic + } # front matter translations for comments, description, keyword + + for term, eng_key in zip(translations.get(lang_dir.stem, []), english): + content = re.sub(rf'{term} *[::].*', f'{eng_key}: true', content, flags=re.IGNORECASE) if \ + eng_key == 'comments' else re.sub(rf'{term} *[::] *', f'{eng_key}: ', content, flags=re.IGNORECASE) + return content + + @staticmethod + def replace_admonitions(content, lang_dir): + """Ensure front matter keywords remain in English.""" + english = [ + 'Note', 'Summary', 'Tip', 'Info', 'Success', 'Question', 'Warning', 'Failure', 'Danger', 'Bug', 'Example', + 'Quote', 'Abstract', 'Seealso', 'Admonition'] + translations = { + 'en': + english, + 'zh': ['笔记', '摘要', '提示', '信息', '成功', '问题', '警告', '失败', '危险', '故障', '示例', '引用', '摘要', '另见', '警告'], + 'es': [ + 'Nota', 'Resumen', 'Consejo', 'Información', 'Éxito', 'Pregunta', 'Advertencia', 'Fracaso', 'Peligro', + 'Error', 'Ejemplo', 'Cita', 'Abstracto', 'Véase También', 'Amonestación'], + 'ru': [ + 'Заметка', 'Сводка', 'Совет', 'Информация', 'Успех', 'Вопрос', 'Предупреждение', 'Неудача', 'Опасность', + 'Ошибка', 'Пример', 'Цитата', 'Абстракт', 'См. Также', 'Предостережение'], + 'pt': [ + 'Nota', 'Resumo', 'Dica', 'Informação', 'Sucesso', 'Questão', 'Aviso', 'Falha', 'Perigo', 'Bug', + 'Exemplo', 'Citação', 'Abstrato', 'Veja Também', 'Advertência'], + 'fr': [ + 'Note', 'Résumé', 'Conseil', 'Info', 'Succès', 'Question', 'Avertissement', 'Échec', 'Danger', 'Bug', + 'Exemple', 'Citation', 'Abstrait', 'Voir Aussi', 'Admonestation'], + 'de': [ + 'Hinweis', 'Zusammenfassung', 'Tipp', 'Info', 'Erfolg', 'Frage', 'Warnung', 'Ausfall', 'Gefahr', + 'Fehler', 'Beispiel', 'Zitat', 'Abstrakt', 'Siehe Auch', 'Ermahnung'], + 'ja': ['ノート', '要約', 'ヒント', '情報', '成功', '質問', '警告', '失敗', '危険', 'バグ', '例', '引用', '抄録', '参照', '訓告'], + 'ko': ['노트', '요약', '팁', '정보', '성공', '질문', '경고', '실패', '위험', '버그', '예제', '인용', '추상', '참조', '경고'], + 'hi': [ + 'नोट', 'सारांश', 'सुझाव', 'जानकारी', 'सफलता', 'प्रश्न', 'चेतावनी', 'विफलता', 'खतरा', 'बग', 'उदाहरण', + 'उद्धरण', 'सार', 'देखें भी', 'आगाही'], + 'ar': [ + 'ملاحظة', 'ملخص', 'نصيحة', 'معلومات', 'نجاح', 'سؤال', 'تحذير', 'فشل', 'خطر', 'عطل', 'مثال', 'اقتباس', + 'ملخص', 'انظر أيضاً', 'تحذير']} + + for term, eng_key in zip(translations.get(lang_dir.stem, []), english): + if lang_dir.stem != 'en': + content = re.sub(rf'!!! *{eng_key} *\n', f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE) + content = re.sub(rf'!!! *{term} *\n', f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE) + content = re.sub(rf'!!! *{term}', f'!!! {eng_key}', content, flags=re.IGNORECASE) + content = re.sub(r'!!! *"', '!!! Example "', content, flags=re.IGNORECASE) + + return content + + @staticmethod + def update_iframe(content): + """Update the 'allow' attribute of iframe if it does not contain the specific English permissions.""" + english = 'accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share' + pattern = re.compile(f'allow="(?!{re.escape(english)}).+?"') + return pattern.sub(f'allow="{english}"', content) + + def link_replacer(self, match, parent_dir, lang_dir, use_abs_link=False): + """Replace broken links with corresponding links in the /en/ directory.""" + text, path = match.groups() + linked_path = (parent_dir / path).resolve().with_suffix('.md') + + if not linked_path.exists(): + en_linked_path = Path(str(linked_path).replace(str(lang_dir), str(lang_dir.parent / 'en'))) + if en_linked_path.exists(): + if use_abs_link: + # Use absolute links WARNING: BUGS, DO NOT USE + docs_root_relative_path = en_linked_path.relative_to(lang_dir.parent) + updated_path = str(docs_root_relative_path).replace('en/', '/../') + else: + # Use relative links + steps_up = len(parent_dir.relative_to(self.base_dir).parts) + updated_path = Path('../' * steps_up) / en_linked_path.relative_to(self.base_dir) + updated_path = str(updated_path).replace('/en/', '/') + + print(f"Redirecting link '[{text}]({path})' from {parent_dir} to {updated_path}") + return f'[{text}]({updated_path})' + else: + print(f"Warning: Broken link '[{text}]({path})' found in {parent_dir} does not exist in /docs/en/.") + + return match.group(0) + + @staticmethod + def update_html_tags(content): + """Updates HTML tags in docs.""" + alt_tag = 'MISSING' + + # Remove closing slashes from self-closing HTML tags + pattern = re.compile(r'<([^>]+?)\s*/>') + content = re.sub(pattern, r'<\1>', content) + + # Find all images without alt tags and add placeholder alt text + pattern = re.compile(r'!\[(.*?)\]\((.*?)\)') + content, num_replacements = re.subn(pattern, lambda match: f'![{match.group(1) or alt_tag}]({match.group(2)})', + content) + + # Add missing alt tags to HTML images + pattern = re.compile(r']*src=["\'](.*?)["\'][^>]*>') + content, num_replacements = re.subn(pattern, lambda match: match.group(0).replace('>', f' alt="{alt_tag}">', 1), + content) + + return content + + def process_markdown_file(self, md_file_path, lang_dir): + """Process each markdown file in the language directory.""" + print(f'Processing file: {md_file_path}') + with open(md_file_path, encoding='utf-8') as file: + content = file.read() + + if self.update_links: + content = self.md_link_regex.sub(lambda m: self.link_replacer(m, md_file_path.parent, lang_dir), content) + + if self.update_text: + content = self.replace_front_matter(content, lang_dir) + content = self.replace_admonitions(content, lang_dir) + content = self.update_iframe(content) + content = self.update_html_tags(content) + + with open(md_file_path, 'w', encoding='utf-8') as file: + file.write(content) + + def process_language_directory(self, lang_dir): + """Process each language-specific directory.""" + print(f'Processing language directory: {lang_dir}') + for md_file in lang_dir.rglob('*.md'): + self.process_markdown_file(md_file, lang_dir) + + def run(self): + """Run the link fixing and front matter updating process for each language-specific directory.""" + for subdir in self.base_dir.iterdir(): + if subdir.is_dir() and re.match(r'^\w\w$', subdir.name): + self.process_language_directory(subdir) + + +if __name__ == '__main__': + # Set the path to your MkDocs 'docs' directory here + docs_dir = str(Path(__file__).parent.resolve()) + fixer = MarkdownLinkFixer(docs_dir, update_links=True, update_text=True) + fixer.run() diff --git a/docs/zh/index.md b/docs/zh/index.md new file mode 100644 index 0000000..b8c3ea8 --- /dev/null +++ b/docs/zh/index.md @@ -0,0 +1,84 @@ +--- +comments: true +description: 探索Ultralytics YOLOv8的完整指南,这是一个高速、高精度的目标检测和图像分割模型。包括安装、预测、训练教程等。 +keywords: Ultralytics, YOLOv8, 目标检测, 图像分割, 机器学习, 深度学习, 计算机视觉, YOLOv8安装, YOLOv8预测, YOLOv8训练, YOLO历史, YOLO许可 +--- + +# Ultralytics 中文文档 + +
+

+ + Ultralytics YOLO banner +

+ Ultralytics GitHub + space + Ultralytics LinkedIn + space + Ultralytics Twitter + space + Ultralytics YouTube + space + Ultralytics TikTok + space + Ultralytics Instagram + space + Ultralytics Discord +
+
+ Ultralytics CI + Ultralytics Code Coverage + YOLOv8 Citation + Docker Pulls +
+ Run on Gradient + Open In Colab + Open In Kaggle +
+ +介绍 [Ultralytics](https://ultralytics.com) [YOLOv8](https://github.com/ultralytics/ultralytics),这是备受好评的实时目标检测和图像分割模型的最新版本。YOLOv8基于深度学习和计算机视觉的前沿进展,提供了无与伦比的速度和准确性表现。它的精简设计使其适用于各种应用,并且可以轻松适应不同的硬件平台,从边缘设备到云API。 + +探索YOLOv8文档,这是一个全面的资源,旨在帮助您理解并利用其功能和能力。无论您是经验丰富的机器学习从业者还是新入行者,该中心旨在最大化YOLOv8在您的项目中的潜力。 + +## 从哪里开始 + +- **安装** `ultralytics` 并通过 pip 在几分钟内开始运行   [:material-clock-fast: 开始使用](quickstart.md){ .md-button } +- **预测** 使用YOLOv8预测新的图像和视频   [:octicons-image-16: 在图像上预测](modes/predict.md){ .md-button } +- **训练** 在您自己的自定义数据集上训练新的YOLOv8模型   [:fontawesome-solid-brain: 训练模型](modes/train.md){ .md-button } +- **探索** YOLOv8的任务,如分割、分类、姿态和跟踪   [:material-magnify-expand: 探索任务](tasks/index.md){ .md-button } + +

+
+ +
+ 观看:Google Colab中如何训练您的自定义数据集上的YOLOv8模型。 +

+ +## YOLO:简史 + +[YOLO](https://arxiv.org/abs/1506.02640) (You Only Look Once),由华盛顿大学的Joseph Redmon和Ali Farhadi开发的流行目标检测和图像分割模型,于2015年推出,由于其高速和准确性而迅速流行。 + +- [YOLOv2](https://arxiv.org/abs/1612.08242) 在2016年发布,通过引入批量归一化、锚框和维度聚类来改进了原始模型。 +- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf) 在2018年推出,进一步增强了模型的性能,使用了更高效的主干网络、多个锚点和空间金字塔池化。 +- [YOLOv4](https://arxiv.org/abs/2004.10934) 在2020年发布,引入了Mosaic数据增强、新的无锚检测头和新的损失函数等创新功能。 +- [YOLOv5](https://github.com/ultralytics/yolov5) 进一步改进了模型的性能,并增加了新功能,如超参数优化、集成实验跟踪和自动导出到常用的导出格式。 +- [YOLOv6](https://github.com/meituan/YOLOv6) 在2022年由[美团](https://about.meituan.com/)开源,现在正在该公司的许多自动送货机器人中使用。 +- [YOLOv7](https://github.com/WongKinYiu/yolov7) 在COCO关键点数据集上添加了额外的任务,如姿态估计。 +- [YOLOv8](https://github.com/ultralytics/ultralytics) 是Ultralytics的YOLO的最新版本。作为一种前沿、最先进(SOTA)的模型,YOLOv8在之前版本的成功基础上引入了新功能和改进,以提高性能、灵活性和效率。YOLOv8支持全范围的视觉AI任务,包括[检测](https://docs.ultralytics.com/tasks/detect/), [分割](https://docs.ultralytics.com/tasks/segment/), [姿态估计](https://docs.ultralytics.com/tasks/pose/), [跟踪](https://docs.ultralytics.com/modes/track/), 和[分类](https://docs.ultralytics.com/tasks/classify/)。这种多功能性使用户能够利用YOLOv8的功能应对多种应用和领域的需求。 + +## YOLO许可证:Ultralytics YOLO是如何授权的? + +Ultralytics提供两种许可选项以适应不同的使用场景: + +- **AGPL-3.0许可证**:这种[OSI-approved](https://opensource.org/licenses/)开源许可证非常适合学生和爱好者,促进了开放的合作和知识共享。更多详细信息请参阅[LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)文件。 +- **企业许可证**:这种许可证设计用于商业用途,允许将Ultralytics软件和AI模型无缝集成到商业商品和服务中,绕过AGPL-3.0的开源要求。如果您的场景涉及将我们的解决方案嵌入到商业产品中,请通过[Ultralytics Licensing](https://ultralytics.com/license)联系我们。 + +我们的授权策略旨在确保我们的开源项目的任何改进都能回馈到社区。我们十分珍视开源原则❤️,我们的使命是确保我们的贡献能够以对所有人有益的方式被利用和拓展。 + +--- + +**注意**:我们正在努力为我们的文档页面提供中文文档,并希望在接下来的几个月内发布。请密切关注我们的更新,并感谢您的耐心等待🙏。 diff --git a/docs/zh/models/fast-sam.md b/docs/zh/models/fast-sam.md new file mode 100644 index 0000000..ab7f269 --- /dev/null +++ b/docs/zh/models/fast-sam.md @@ -0,0 +1,193 @@ +--- +comments: true +description: FastSAM是一种基于卷积神经网络的实时图像对象分割解决方案。它提供了卓越的用户交互功能、计算效率以及适用于多种视觉任务的特性。 +keywords: FastSAM, 机器学习, 基于卷积神经网络的解决方案, 图像对象分割, 实时解决方案, Ultralytics, 视觉任务, 图像处理, 工业应用, 用户交互 +--- + +# Fast Segment Anything Model(FastSAM) + +Fast Segment Anything Model(FastSAM)是一种创新的实时卷积神经网络(CNN)模型,用于图像中的任意对象分割任务。该任务旨在根据各种可能的用户交互提示,对图像中的任意对象进行分割。FastSAM在保持具备竞争性能的同时,显著降低了计算需求,使其成为各种视觉任务的实用选择。 + +![Fast Segment Anything Model(FastSAM)架构概述](https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg) + +## 概述 + +FastSAM旨在解决[Segment Anything Model(SAM)](sam.md)的局限性,SAM是一种计算资源需求很高的Transformer模型。FastSAM将任意对象分割任务拆分为两个顺序阶段:所有实例分割和提示引导选择。第一阶段使用[YOLOv8-seg](../tasks/segment.md)生成图像中所有实例的分割掩码。在第二阶段,输出与提示对应的感兴趣区域。 + +## 主要特点 + +1. **实时解决方案:** FastSAM利用CNN的计算效率提供了图像中任意对象分割任务的实时解决方案,适用于需要快速结果的工业应用。 + +2. **高效和高性能:** FastSAM在显著降低计算和资源需求的同时,不会降低性能质量。它与SAM具有相当的性能,但计算资源大幅减少,能够实现实时应用。 + +3. **提示引导分割:** FastSAM可以通过各种可能的用户交互提示来分割图像中的任意对象,提供了不同场景下的灵活性和适应性。 + +4. **基于YOLOv8-seg:** FastSAM基于[YOLOv8-seg](../tasks/segment.md),是一种配备实例分割分支的目标检测器。这使得它能够有效地生成图像中所有实例的分割掩码。 + +5. **基准测试中具有竞争力的结果:** 在MS COCO的对象提议任务中,FastSAM在单个NVIDIA RTX 3090上以显著更快的速度获得高分,与[SAM](sam.md)相比,显示出其效率和能力。 + +6. **实际应用:** 提出的方法以非常高的速度为大量视觉任务提供了一种新的实用解决方案,比当前方法快十几倍乃至数百倍。 + +7. **模型压缩的可行性:** FastSAM通过引入人工先验到结构中,展示了通过路径显著减少计算工作量的可行性,为通用视觉任务的大型模型架构开辟了新的可能性。 + +## 可用模型、支持的任务和操作模式 + +该表格列出了可用的模型及其特定的预训练权重,它们支持的任务以及它们与不同操作模式(如[推断](../modes/predict.md)、[验证](../modes/val.md)、[训练](../modes/train.md)和[导出](../modes/export.md))的兼容性,由支持的模式用✅表示,不支持的模式用❌表示。 + +| 模型类型 | 预训练权重 | 支持的任务 | 推断 | 验证 | 训练 | 导出 | +|-----------|----------------|-----------------------------|----|----|----|----| +| FastSAM-s | `FastSAM-s.pt` | [实例分割](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| FastSAM-x | `FastSAM-x.pt` | [实例分割](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## 用法示例 + +FastSAM模型很容易集成到Python应用程序中。Ultralytics提供了用户友好的Python API和CLI命令以简化开发。 + +### 预测用法 + +要对图像进行对象检测,可以使用下面的`predict`方法: + +!!! Example "示例" + + === "Python" + ```python + from ultralytics import FastSAM + from ultralytics.models.fastsam import FastSAMPrompt + + # 定义推断源 + source = 'path/to/bus.jpg' + + # 创建FastSAM模型 + model = FastSAM('FastSAM-s.pt') # 或 FastSAM-x.pt + + # 在图像上运行推断 + everything_results = model(source, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9) + + # 准备Prompt Process对象 + prompt_process = FastSAMPrompt(source, everything_results, device='cpu') + + # Everything提示 + ann = prompt_process.everything_prompt() + + # Bbox默认形状[0,0,0,0] -> [x1,y1,x2,y2] + ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300]) + + # 文本提示 + ann = prompt_process.text_prompt(text='a photo of a dog') + + # 点提示 + # 默认点[[0,0]] [[x1,y1],[x2,y2]] + # 默认point_label [0] [1,0] 0:背景,1:前景 + ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) + prompt_process.plot(annotations=ann, output='./') + ``` + + === "CLI" + ```bash + # 加载FastSAM模型并使用该模型分割图像中的所有对象 + yolo segment predict model=FastSAM-s.pt source=path/to/bus.jpg imgsz=640 + ``` + +此片段演示了加载预训练模型并在图像上进行预测的简单性。 + +### 验证用法 + +可以采用以下方式对数据集上的模型进行验证: + +!!! Example "示例" + + === "Python" + ```python + from ultralytics import FastSAM + + # 创建FastSAM模型 + model = FastSAM('FastSAM-s.pt') # 或 FastSAM-x.pt + + # 验证模型 + results = model.val(data='coco8-seg.yaml') + ``` + + === "CLI" + ```bash + # 加载FastSAM模型,并在COCO8示例数据集上进行验证,图像大小为640 + yolo segment val model=FastSAM-s.pt data=coco8.yaml imgsz=640 + ``` + +请注意,FastSAM仅支持检测和分割单个类别的对象。这意味着它将识别和分割所有对象为相同的类别。因此,在准备数据集时,需要将所有对象的类别ID转换为0。 + +## FastSAM官方用法 + +FastSAM也可以直接从[https://github.com/CASIA-IVA-Lab/FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)存储库中获取。以下是您可能采取的使用FastSAM的典型步骤的简要概述: + +### 安装 + +1. 克隆FastSAM存储库: + ```shell + git clone https://github.com/CASIA-IVA-Lab/FastSAM.git + ``` + +2. 创建并激活一个带有Python 3.9的Conda环境: + ```shell + conda create -n FastSAM python=3.9 + conda activate FastSAM + ``` + +3. 进入克隆的存储库并安装所需的软件包: + ```shell + cd FastSAM + pip install -r requirements.txt + ``` + +4. 安装CLIP模型: + ```shell + pip install git+https://github.com/openai/CLIP.git + ``` + +### 示例用法 + +1. 下载[模型检查点](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing)。 + +2. 使用FastSAM进行推断。示例命令: + + - 在图像中分割所有内容: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg + ``` + + - 使用文本提示分割特定对象: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "the yellow dog" + ``` + + - 在边界框中分割对象(以xywh格式提供边界框坐标): + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[570,200,230,400]" + ``` + + - 在特定点附近分割对象: + ```shell + python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]" + ``` + +此外,您可以在[Colab演示](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing)上尝试FastSAM,或在[HuggingFace Web演示](https://huggingface.co/spaces/An-619/FastSAM)上进行可视化体验。 + +## 引用和致谢 + +我们要感谢FastSAM作者在实时实例分割领域作出的重要贡献: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{zhao2023fast, + title={Fast Segment Anything}, + author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang}, + year={2023}, + eprint={2306.12156}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +可在[arXiv](https://arxiv.org/abs/2306.12156)上找到原始的FastSAM论文。作者已经公开了他们的工作,代码库可以在[GitHub](https://github.com/CASIA-IVA-Lab/FastSAM)上获取。我们感谢他们在推动该领域以及使他们的工作对更广泛的社区可访问方面所做的努力。 diff --git a/docs/zh/models/index.md b/docs/zh/models/index.md new file mode 100644 index 0000000..a9a29db --- /dev/null +++ b/docs/zh/models/index.md @@ -0,0 +1,98 @@ +--- +comments: true +description: 探索 Ultralytics 支持的多样化 YOLO 系列、SAM、MobileSAM、FastSAM、YOLO-NAS 和 RT-DETR 模型。开启您的 CLI 和 Python 使用示例之旅。 +keywords: Ultralytics, 文档, YOLO, SAM, MobileSAM, FastSAM, YOLO-NAS, RT-DETR, 模型, 架构, Python, CLI +--- + +# Ultralytics 支持的模型 + +欢迎来到 Ultralytics 的模型文档!我们提供多种模型的支持,每种模型都针对特定任务量身定做,如[对象检测](../tasks/detect.md)、[实例分割](../tasks/segment.md)、[图像分类](../tasks/classify.md)、[姿态估计](../tasks/pose.md)以及[多对象跟踪](../modes/track.md)。如果您有兴趣将您的模型架构贡献给 Ultralytics,请查看我们的[贡献指南](../../help/contributing.md)。 + +!!! Note "注意" + + 🚧 我们的多语言文档目前正在建设中,我们正在努力进行完善。感谢您的耐心等待!🙏 + +## 特色模型 + +以下是一些关键模型的介绍: + +1. **[YOLOv3](yolov3.md)**:由 Joseph Redmon 最初开发的 YOLO 模型家族的第三版,以其高效的实时对象检测能力而闻名。 +2. **[YOLOv4](yolov4.md)**:由 Alexey Bochkovskiy 在 2020 年发布的 YOLOv3 的 darknet 原生更新版本。 +3. **[YOLOv5](yolov5.md)**:Ultralytics 改进的 YOLO 架构版本,与先前版本相比,提供了更好的性能和速度权衡。 +4. **[YOLOv6](yolov6.md)**:由[美团](https://about.meituan.com/)在 2022 年发布,用于公司多个自主送货机器人中。 +5. **[YOLOv7](yolov7.md)**:YOLOv4 作者在 2022 年发布的更新版 YOLO 模型。 +6. **[YOLOv8](yolov8.md) NEW 🚀**:YOLO 家族的最新版本,具备实例分割、姿态/关键点估计和分类等增强能力。 +7. **[Segment Anything Model (SAM)](sam.md)**:Meta 的 Segment Anything Model (SAM)。 +8. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**:由庆熙大学开发的移动应用 MobileSAM。 +9. **[Fast Segment Anything Model (FastSAM)](fast-sam.md)**:中国科学院自动化研究所图像与视频分析组开发的 FastSAM。 +10. **[YOLO-NAS](yolo-nas.md)**:YOLO 神经网络结构搜索 (NAS) 模型。 +11. **[Realtime Detection Transformers (RT-DETR)](rtdetr.md)**:百度 PaddlePaddle 实时检测变换器 (RT-DETR) 模型。 + +

+
+ +
+ 观看: 使用 Ultralytics YOLO 模型在几行代码中运行。 +

+ +## 入门:使用示例 + +此示例提供了简单的 YOLO 训练和推理示例。有关这些和其他[模式](../modes/index.md)的完整文档,请查看[Predict](../modes/predict.md)、[Train](../modes/train.md)、[Val](../modes/val.md) 和 [Export](../modes/export.md) 文档页面。 + +请注意,以下示例适用于对象检测的 YOLOv8 [Detect](../tasks/detect.md) 模型。有关其他支持任务的详细信息,请查看[Segment](../tasks/segment.md)、[Classify](../tasks/classify.md) 和 [Pose](../tasks/pose.md) 文档。 + +!!! Example "示例" + + === "Python" + + 可将 PyTorch 预训练的 `*.pt` 模型以及配置文件 `*.yaml` 传入 `YOLO()`、`SAM()`、`NAS()` 和 `RTDETR()` 类,以在 Python 中创建模型实例: + + ```python + from ultralytics import YOLO + + # 加载 COCO 预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 显示模型信息(可选) + model.info() + + # 在 COCO8 示例数据集上训练模型 100 个周期 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 使用 YOLOv8n 模型对 'bus.jpg' 图像进行推理 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + CLI 命令可直接运行模型: + + ```bash + # 加载 COCO 预训练的 YOLOv8n 模型,并在 COCO8 示例数据集上训练 100 个周期 + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # 加载 COCO 预训练的 YOLOv8n 模型,并对 'bus.jpg' 图像进行推理 + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## 贡献新模型 + +有兴趣将您的模型贡献给 Ultralytics 吗?太好了!我们始终欢迎扩展我们的模型投资组合。 + +1. **Fork 仓库**:从 Fork [Ultralytics GitHub 仓库](https://github.com/ultralytics/ultralytics) 开始。 + +2. **克隆您的 Fork**:将您的 Fork 克隆到您的本地机器,并创建一个新的分支进行工作。 + +3. **实现您的模型**:按照我们在[贡献指南](../../help/contributing.md)中提供的编码标准和指南添加您的模型。 + +4. **彻底测试**:确保彻底测试您的模型,无论是独立测试还是作为流水线的一部分。 + +5. **创建拉取请求**:一旦您对您的模型满意,就创建一个拉取请求以供主仓库审查。 + +6. **代码审查与合并**:经过审查,如果您的模型符合我们的标准,它将被合并到主仓库中。 + +有关详细步骤,请参阅我们的[贡献指南](../../help/contributing.md)。 diff --git a/docs/zh/models/mobile-sam.md b/docs/zh/models/mobile-sam.md new file mode 100644 index 0000000..98b191d --- /dev/null +++ b/docs/zh/models/mobile-sam.md @@ -0,0 +1,116 @@ +--- +comments: true +description: 了解有关MobileSAM的更多信息,包括其实现、与原始SAM的比较,以及在Ultralytics框架中如何下载和测试它。立即改进您的移动应用程序。 +keywords: MobileSAM, Ultralytics, SAM, 移动应用, Arxiv, GPU, API, 图像编码器, 蒙版解码器, 模型下载, 测试方法 +--- + +![MobileSAM Logo](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png?raw=true) + +# 移动端细分模型(MobileSAM) + +MobileSAM 论文现在可以在 [arXiv](https://arxiv.org/pdf/2306.14289.pdf) 上找到。 + +可以通过此 [演示链接](https://huggingface.co/spaces/dhkim2810/MobileSAM) 访问在 CPU 上运行的 MobileSAM 演示。在 Mac i5 CPU 上,性能大约需要 3 秒。在 Hugging Face 的演示中,界面和性能较低的 CPU 导致响应较慢,但它仍然能有效地工作。 + +MobileSAM 已在 Grounding-SAM、AnyLabeling 和 Segment Anything in 3D 等多个项目中实施。您可以在 [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything)、[AnyLabeling](https://github.com/vietanhdev/anylabeling) 和 [Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D) 上找到这些项目。 + +MobileSAM 使用单个 GPU 在不到一天的时间内对 10 万个数据集(原始图像的 1%)进行训练。关于此训练的代码将在将来提供。 + +## 可用模型、支持的任务和操作模式 + +以下表格显示了可用模型及其具体的预训练权重,它们支持的任务以及与不同操作模式([预测](../modes/predict.md)、[验证](../modes/val.md)、[训练](../modes/train.md) 和 [导出](../modes/export.md))的兼容性,其中支持的模式用 ✅ 表示,不支持的模式用 ❌ 表示。 + +| 模型类型 | 预训练权重 | 支持的任务 | 预测 | 验证 | 训练 | 导出 | +|-----------|-----------------|-----------------------------|----|----|----|----| +| MobileSAM | `mobile_sam.pt` | [实例分割](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## 从 SAM 迁移到 MobileSAM + +由于 MobileSAM 保留了与原始 SAM 相同的流程,我们已将原始 SAM 的预处理、后处理和所有其他接口整合到 MobileSAM 中。因此,目前使用原始 SAM 的用户可以以最小的努力迁移到 MobileSAM。 + +MobileSAM 在性能上与原始 SAM 相当,并保留了相同的流程,只是更改了图像编码器。具体而言,我们用较小的 Tiny-ViT(5M)替换了原始的笨重的 ViT-H 编码器(632M)。在单个 GPU 上,MobileSAM 每张图片的运行时间约为 12 毫秒:图像编码器约 8 毫秒,蒙版解码器约 4 毫秒。 + +以下表格比较了基于 ViT 的图像编码器: + +| 图像编码器 | 原始 SAM | MobileSAM | +|-------|--------|-----------| +| 参数 | 611M | 5M | +| 速度 | 452ms | 8ms | + +原始 SAM 和 MobileSAM 均使用相同的提示引导蒙版解码器: + +| 蒙版解码器 | 原始 SAM | MobileSAM | +|-------|--------|-----------| +| 参数 | 3.876M | 3.876M | +| 速度 | 4ms | 4ms | + +以下是整个流程的比较: + +| 整个流程(编码器+解码器) | 原始 SAM | MobileSAM | +|---------------|--------|-----------| +| 参数 | 615M | 9.66M | +| 速度 | 456ms | 12ms | + +MobileSAM 和原始 SAM 的性能通过使用点和框作为提示进行演示。 + +![点作为提示的图像](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +![框作为提示的图像](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/mask_box.jpg?raw=true) + +MobileSAM 的性能优于当前的 FastSAM,尺寸大约减小了 5 倍,速度快了约 7 倍。有关更多详细信息,请访问 [MobileSAM 项目页面](https://github.com/ChaoningZhang/MobileSAM)。 + +## 在 Ultralytics 中测试 MobileSAM + +与原始 SAM 一样,我们在 Ultralytics 中提供了一种简单的测试方法,包括点提示和框提示的模式。 + +### 模型下载 + +您可以在 [这里](https://github.com/ChaoningZhang/MobileSAM/blob/master/weights/mobile_sam.pt) 下载模型。 + +### 点提示 + +!!! Example "示例" + + === "Python" + ```python + from ultralytics import SAM + + # 载入模型 + model = SAM('mobile_sam.pt') + + # 基于点提示预测一个分段 + model.predict('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +### 框提示 + +!!! Example "示例" + + === "Python" + ```python + from ultralytics import SAM + + # 载入模型 + model = SAM('mobile_sam.pt') + + # 基于框提示预测一个分段 + model.predict('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + ``` + +我们使用相同的 API 实现了 `MobileSAM` 和 `SAM`。有关更多用法信息,请参阅 [SAM 页面](sam.md)。 + +## 引用和鸣谢 + +如果您在研究或开发工作中发现 MobileSAM 对您有用,请考虑引用我们的论文: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{mobile_sam, + title={Faster Segment Anything: Towards Lightweight SAM for Mobile Applications}, + author={Zhang, Chaoning and Han, Dongshen and Qiao, Yu and Kim, Jung Uk and Bae, Sung Ho and Lee, Seungkyu and Hong, Choong Seon}, + journal={arXiv preprint arXiv:2306.14289}, + year={2023} + } diff --git a/docs/zh/models/rtdetr.md b/docs/zh/models/rtdetr.md new file mode 100644 index 0000000..e6940d1 --- /dev/null +++ b/docs/zh/models/rtdetr.md @@ -0,0 +1,93 @@ +--- +comments: true +description: 了解百度的RT-DETR,一种基于Vision Transformers的高效灵活的实时目标检测器,包括预训练模型的特性和优势。 +keywords: RT-DETR、Baidu、Vision Transformers、目标检测、实时表现、CUDA、TensorRT、IoU感知的查询选择、Ultralytics、Python API、PaddlePaddle +--- + +# 百度的RT-DETR:基于Vision Transformers的实时目标检测器 + +## 概览 + +百度开发的实时检测变换器(RT-DETR)是一种尖端的端到端目标检测器,具有实时性能和高准确性。它利用Vision Transformers (ViT) 的强大功能,通过解耦内部尺度交互和跨尺度融合,高效处理多尺度特征。RT-DETR非常灵活适应各种推断速度的调整,支持使用不同的解码器层而无需重新训练。该模型在CUDA和TensorRT等加速后端上表现出色,超越了许多其他实时目标检测器。 + +![模型示例图像](https://user-images.githubusercontent.com/26833433/238963168-90e8483f-90aa-4eb6-a5e1-0d408b23dd33.png) +**百度的RT-DETR概览** 百度的RT-DETR模型架构图显示了骨干网的最后三个阶段{S3, S4, S5}作为编码器输入。高效的混合编码器通过内部尺度特征交互(AIFI)和跨尺度特征融合模块(CCFM)将多尺度特征转换为图像特征序列。采用IoU感知的查询选择来选择一定数量的图像特征作为解码器的初始对象查询。最后,解码器通过辅助预测头迭代优化对象查询,生成框和置信度得分。([文章来源](https://arxiv.org/pdf/2304.08069.pdf)) + +### 主要特点 + +- **高效的混合编码器:** 百度的RT-DETR使用高效的混合编码器,通过解耦内部尺度交互和跨尺度融合来处理多尺度特征。这种独特的Vision Transformers架构降低了计算成本,实现实时目标检测。 +- **IoU感知的查询选择:** 百度的RT-DETR利用IoU感知的查询选择改进了对象查询的初始化。这使得模型能够聚焦于场景中最相关的对象,提高了检测准确性。 +- **灵活的推断速度:** 百度的RT-DETR支持使用不同的解码器层灵活调整推断速度,无需重新训练。这种适应性有助于在各种实时目标检测场景中实际应用。 + +## 预训练模型 + +Ultralytics Python API提供了不同尺度的预训练PaddlePaddle RT-DETR模型: + +- RT-DETR-L: 在COCO val2017上达到53.0%的AP,在T4 GPU上达到114 FPS +- RT-DETR-X: 在COCO val2017上达到54.8%的AP,在T4 GPU上达到74 FPS + +## 使用示例 + +此示例提供了简单的RT-DETR训练和推断示例。有关这些和其他[模式](../modes/index.md)的完整文档,请参阅[预测](../modes/predict.md)、[训练](../modes/train.md)、[验证](../modes/val.md)和[导出](../modes/export.md)文档页面。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import RTDETR + + # 加载预训练的COCO RT-DETR-l模型 + model = RTDETR('rtdetr-l.pt') + + # 显示模型信息(可选) + model.info() + + # 使用COCO8示例数据集对模型进行100个epoch的训练 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 使用RT-DETR-l模型在'bus.jpg'图像上运行推断 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + ```bash + # 加载预训练的COCO RT-DETR-l模型,并在COCO8示例数据集上进行100个epoch的训练 + yolo train model=rtdetr-l.pt data=coco8.yaml epochs=100 imgsz=640 + + # 加载预训练的COCO RT-DETR-l模型,并在'bus.jpg'图像上运行推断 + yolo predict model=rtdetr-l.pt source=path/to/bus.jpg + ``` + +## 支持的任务和模式 + +该表格提供了各个模型类型、具体的预训练权重、各个模型支持的任务以及支持的各种模式([训练](../modes/train.md)、[验证](../modes/val.md)、[预测](../modes/predict.md)、[导出](../modes/export.md)),其中✅表示支持。 + +| 模型类型 | 预训练权重 | 支持的任务 | 推断 | 验证 | 训练 | 导出 | +|---------------------|---------------|----------------------------|----|----|----|----| +| RT-DETR-Large | `rtdetr-l.pt` | [目标检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR-Extra-Large | `rtdetr-x.pt` | [目标检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +## 引用和致谢 + +如果你在研究或开发中使用了百度的RT-DETR,请引用[原始论文](https://arxiv.org/abs/2304.08069): + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{lv2023detrs, + title={DETRs Beat YOLOs on Real-time Object Detection}, + author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu}, + year={2023}, + eprint={2304.08069}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +我们要感谢百度和[PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection)团队为计算机视觉社区创建和维护了这个宝贵的资源。非常感谢他们使用基于Vision Transformers的实时目标检测器RT-DETR在该领域做出的贡献。 + +*keywords: RT-DETR、Transformer、ViT、Vision Transformers、Baidu RT-DETR、PaddlePaddle、Paddle Paddle RT-DETR,实时目标检测、基于Vision Transformers的目标检测、预训练的PaddlePaddle RT-DETR模型、百度RT-DETR的使用、Ultralytics Python API* diff --git a/docs/zh/models/sam.md b/docs/zh/models/sam.md new file mode 100644 index 0000000..dc4b3f8 --- /dev/null +++ b/docs/zh/models/sam.md @@ -0,0 +1,226 @@ +--- +comments: true +description: 探索来自Ultralytics的最前沿的Segment Anything Model (SAM),它可以进行实时图像分割。了解其可提示分割、零样本性能以及如何使用它。 +keywords: Ultralytics,图像分割,Segment Anything Model,SAM,SA-1B数据集,实时性能,零样本转移,目标检测,图像分析,机器学习 +--- + +# Segment Anything Model (SAM) + +欢迎来到使用Segment Anything Model (SAM) 进行图像分割的前沿。这个革命性的模型通过引入可以提示的实时图像分割,树立了领域新的标准。 + +## SAM的介绍:Segment Anything Model + +Segment Anything Model (SAM) 是一种先进的图像分割模型,可以进行可提示的分割,为图像分析任务提供了前所未有的灵活性。SAM 是Segment Anything 项目的核心,该项目引入了一种新颖的模型、任务和图像分割数据集。 + +SAM 先进的设计允许它在没有先验知识的情况下适应新的图像分布和任务,这个特点被称为零样本转移。SAM 在包含11亿个掩模的SA-1B数据集上进行训练,该数据集包含超过1100万张精心策划的图像,SAM 在零样本任务中表现出色,许多情况下超过了之前的完全监督结果。 + +![数据集示例图像](https://user-images.githubusercontent.com/26833433/238056229-0e8ffbeb-f81a-477e-a490-aff3d82fd8ce.jpg) +从我们新引入的SA-1B数据集中选择的示例图像,显示了覆盖的掩模。SA-1B包含了1100万个多样化、高分辨率、许可的图像和11亿个高质量的分割掩模。这些掩模由SAM完全自动地进行了注释,经过人工评级和大量实验的验证,它们具有高质量和多样性。图像按每个图像的掩模数量进行分组以进行可视化(平均每个图像有∼100个掩模)。 + +## Segment Anything Model (SAM) 的主要特点 + +- **可提示的分割任务**:SAM 的设计考虑了可提示的分割任务,它可以从给定的提示中生成有效的分割掩模,例如指示对象的空间或文本线索。 +- **先进的架构**:Segment Anything Model 使用强大的图像编码器、提示编码器和轻量的掩模解码器。这种独特的架构使得分段任务中的提示灵活、实时掩模计算和模糊感知成为可能。 +- **SA-1B 数据集**:由Segment Anything 项目引入的 SA-1B 数据集包含超过11亿个掩模的1100万张图像。作为迄今为止最大的分割数据集,它为 SAM 提供了一个多样化的大规模训练数据源。 +- **零样本性能**:SAM 在各种分割任务中展现出出色的零样本性能,使得它成为一个可以立即使用的工具,对于各种应用来说,对提示工程的需求很小。 + +如果您想了解更多关于Segment Anything Model 和 SA-1B 数据集的详细信息,请访问[Segment Anything 网站](https://segment-anything.com)并查看研究论文[Segment Anything](https://arxiv.org/abs/2304.02643)。 + +## 可用模型、支持的任务和操作模式 + +这个表格展示了可用模型及其特定的预训练权重,它们支持的任务,以及它们与不同操作模式([Inference](../modes/predict.md)、[Validation](../modes/val.md)、[Training](../modes/train.md) 和 [Export](../modes/export.md))的兼容性,用 ✅ 表示支持的模式,用 ❌ 表示不支持的模式。 + +| 模型类型 | 预训练权重 | 支持的任务 | 推断 | 验证 | 训练 | 导出 | +|-----------|------------|-----------------------------|----|----|----|----| +| SAM base | `sam_b.pt` | [实例分割](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | +| SAM large | `sam_l.pt` | [实例分割](../tasks/segment.md) | ✅ | ❌ | ❌ | ✅ | + +## 如何使用 SAM: 图像分割的多功能和强大 + +Segment Anything Model 可以用于多种下游任务,超越训练数据的范围。这包括边缘检测,目标提案生成,实例分割和初步的文本到掩模预测。通过 prompt 工程,SAM 可以快速适应新的任务和数据分布,以零样本的方式,确立其作为图像分割需求的多功能和强大工具。 + +### SAM 预测示例 + +!!! Example "使用提示进行分割" + + 使用给定的提示对图像进行分割。 + + === "Python" + + ```python + from ultralytics import SAM + + # 加载模型 + model = SAM('sam_b.pt') + + # 显示模型信息(可选) + model.info() + + # 使用边界框提示进行推断 + model('ultralytics/assets/zidane.jpg', bboxes=[439, 437, 524, 709]) + + # 使用点提示进行推断 + model('ultralytics/assets/zidane.jpg', points=[900, 370], labels=[1]) + ``` + +!!! Example "分割整个图像" + + 分割整个图像。 + + === "Python" + + ```python + from ultralytics import SAM + + # 加载模型 + model = SAM('sam_b.pt') + + # 显示模型信息(可选) + model.info() + + # 进行推断 + model('path/to/image.jpg') + ``` + + === "CLI" + + ```bash + # 使用 SAM 模型进行推断 + yolo predict model=sam_b.pt source=path/to/image.jpg + ``` + +- 这里的逻辑是,如果您没有传入任何提示(边界框/点/掩模),则对整个图像进行分割。 + +!!! Example "SAMPredictor 示例" + + 这种方法可以设置图像一次,然后多次运行提示推断,而无需多次运行图像编码器。 + + === "提示推断" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # 创建 SAMPredictor + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # 设置图像 + predictor.set_image("ultralytics/assets/zidane.jpg") # 使用图像文件设置 + predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # 使用 np.ndarray 设置 + results = predictor(bboxes=[439, 437, 524, 709]) + results = predictor(points=[900, 370], labels=[1]) + + # 重置图像 + predictor.reset_image() + ``` + + 通过附加参数对整个图像分割。 + + === "分割整个图像" + + ```python + from ultralytics.models.sam import Predictor as SAMPredictor + + # 创建 SAMPredictor + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model="mobile_sam.pt") + predictor = SAMPredictor(overrides=overrides) + + # 使用附加参数进行分割整个图像 + results = predictor(source="ultralytics/assets/zidane.jpg", crop_n_layers=1, points_stride=64) + ``` + +- 更多关于`分割整个图像`的附加参数,请查看[`Predictor/generate` 参考](../../../reference/models/sam/predict.md)。 + +## SAM 与 YOLOv8 的对比 + +在这里,我们将 Meta 最小的 SAM 模型 SAM-b 与 Ultralytics 的最小分割模型 [YOLOv8n-seg](../tasks/segment.md) 进行对比: + +| 模型 | 大小 | 参数 | 速度 (CPU) | +|------------------------------------------------|-----------------------|----------------------|------------------------| +| Meta's SAM-b | 358 MB | 94.7 M | 51096 ms/im | +| [MobileSAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im | +| [FastSAM-s](fast-sam.md) with YOLOv8 backbone | 23.7 MB | 11.8 M | 115 ms/im | +| Ultralytics [YOLOv8n-seg](../tasks/segment.md) | **6.7 MB** (缩小了53.4倍) | **3.4 M** (缩小了27.9倍) | **59 ms/im** (加速了866倍) | + +这个对比显示了不同模型之间的模型大小和速度上数量级的差异。虽然 SAM 提供了自动分割的独特能力,但它不是与 YOLOv8 分割模型直接竞争的产品,后者体积更小、速度更快、效率更高。 + +在配备有16GB RAM的2023年 Apple M2 MacBook 上进行了测试。要重现这个测试: + +!!! Example "示例" + + === "Python" + ```python + from ultralytics import FastSAM, SAM, YOLO + + # 分析 SAM-b + model = SAM('sam_b.pt') + model.info() + model('ultralytics/assets') + + # 分析 MobileSAM + model = SAM('mobile_sam.pt') + model.info() + model('ultralytics/assets') + + # 分析 FastSAM-s + model = FastSAM('FastSAM-s.pt') + model.info() + model('ultralytics/assets') + + # 分析 YOLOv8n-seg + model = YOLO('yolov8n-seg.pt') + model.info() + model('ultralytics/assets') + ``` + +## 自动注释:创建分割数据集的快速路径 + +自动注释是 SAM 的一个关键功能,它允许用户使用预训练的检测模型生成一个[分割数据集](https://docs.ultralytics.com/datasets/segment)。这个功能可以通过自动生成大量图像的准确注释,绕过耗时的手动标注过程,从而快速获得高质量的分割数据集。 + +### 使用检测模型生成分割数据集 + +要使用Ultralytics框架对数据集进行自动注释,可以使用如下所示的 `auto_annotate` 函数: + +!!! Example "示例" + + === "Python" + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model='sam_b.pt') + ``` + +| 参数 | 类型 | 描述 | 默认值 | +|------------|---------------|------------------------------------------|--------------| +| data | str | 包含要进行注释的图像的文件夹的路径。 | | +| det_model | str, 可选 | 预训练的 YOLO 检测模型,默认为 'yolov8x.pt'。 | 'yolov8x.pt' | +| sam_model | str, 可选 | 预训练的 SAM 分割模型,默认为 'sam_b.pt'。 | 'sam_b.pt' | +| device | str, 可选 | 在其上运行模型的设备,默认为空字符串(如果可用,则为 CPU 或 GPU)。 | | +| output_dir | str, None, 可选 | 保存注释结果的目录。默认为与 'data' 目录同级的 'labels' 目录。 | None | + +`auto_annotate` 函数接受您图像的路径,并提供了可选的参数用于指定预训练的检测和 SAM 分割模型、运行模型的设备,以及保存注释结果的输出目录。 + +使用预训练模型进行自动注释可以大大减少创建高质量分割数据集所需的时间和工作量。这个功能特别对于处理大量图像集合的研究人员和开发人员非常有益,因为它允许他们专注于模型的开发和评估,而不是手动注释。 + +## 引用和鸣谢 + +如果您在研究或开发中发现 SAM 对您有用,请考虑引用我们的论文: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{kirillov2023segment, + title={Segment Anything}, + author={Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, + year={2023}, + eprint={2304.02643}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +我们要向 Meta AI 表示感谢,感谢他们为计算机视觉社区创建和维护了这个宝贵的资源。 + +*keywords: Segment Anything,Segment Anything Model,SAM,Meta SAM,图像分割,可提示分割,零样本性能,SA-1B数据集,先进架构,自动注释,Ultralytics,预训练模型,SAM base,SAM large,实例分割,计算机视觉,AI,人工智能,机器学习,数据注释,分割掩模,检测模型,YOLO检测模型,bibtex,Meta AI。* diff --git a/docs/zh/models/yolo-nas.md b/docs/zh/models/yolo-nas.md new file mode 100644 index 0000000..fb2c248 --- /dev/null +++ b/docs/zh/models/yolo-nas.md @@ -0,0 +1,121 @@ +--- +comments: true +description: 探索详细的YOLO-NAS文档,这是一个更高级的物体检测模型。了解其特点、预训练模型、与Ultralytics Python API的使用等内容。 +keywords: YOLO-NAS, Deci AI, 物体检测, 深度学习, 神经架构搜索, Ultralytics Python API, YOLO模型, 预训练模型, 量化, 优化, COCO, Objects365, Roboflow 100 +--- + +# YOLO-NAS + +## 概述 + +由Deci AI开发,YOLO-NAS是一种开创性的物体检测基础模型。它是先进的神经架构搜索技术的产物,经过精心设计以解决之前YOLO模型的局限性。YOLO-NAS在量化支持和准确性-延迟权衡方面取得了重大改进,代表了物体检测领域的重大飞跃。 + +![模型示例图像](https://learnopencv.com/wp-content/uploads/2023/05/yolo-nas_COCO_map_metrics.png) +**YOLO-NAS概览。** YOLO-NAS采用量化感知块和选择性量化实现最佳性能。当将模型转换为INT8量化版本时,模型会经历较小的精度损失,比其他模型有显著改进。这些先进技术使得YOLO-NAS成为具有前所未有的物体检测能力和出色性能的卓越架构。 + +### 主要特点 + +- **量化友好基本块:** YOLO-NAS引入了一种新的基本块,对量化友好,解决了之前YOLO模型的一个重要局限性。 +- **高级训练和量化:** YOLO-NAS利用先进的训练方案和训练后量化以提高性能。 +- **AutoNAC优化和预训练:** YOLO-NAS利用AutoNAC优化,并在著名数据集(如COCO、Objects365和Roboflow 100)上进行了预训练。这种预训练使其非常适合生产环境中的下游物体检测任务。 + +## 预训练模型 + +通过Ultralytics提供的预训练YOLO-NAS模型,体验下一代物体检测的强大功能。这些模型旨在在速度和准确性方面提供出色的性能。根据您的需求,可以选择各种选项: + +| 模型 | mAP | 延迟(ms) | +|------------------|-------|--------| +| YOLO-NAS S | 47.5 | 3.21 | +| YOLO-NAS M | 51.55 | 5.85 | +| YOLO-NAS L | 52.22 | 7.87 | +| YOLO-NAS S INT-8 | 47.03 | 2.36 | +| YOLO-NAS M INT-8 | 51.0 | 3.78 | +| YOLO-NAS L INT-8 | 52.1 | 4.78 | + +每个模型变体均旨在在均衡平均精度(mAP)和延迟之间提供平衡,帮助您为性能和速度都进行优化的物体检测任务。 + +## 用法示例 + +通过我们的`ultralytics` python包,Ultralytics使得将YOLO-NAS模型集成到您的Python应用程序中变得容易。该包提供了一个用户友好的Python API,以简化流程。 + +以下示例展示了如何使用`ultralytics`包与YOLO-NAS模型进行推理和验证: + +### 推理和验证示例 + +这个示例中,我们在COCO8数据集上验证YOLO-NAS-s。 + +!!! 例子 + + 以下示例为YOLO-NAS提供了简单的推理和验证代码。有关处理推理结果的方法,请参见[Predict](../modes/predict.md)模式。有关使用其他模式的YOLO-NAS的方法,请参见[Val](../modes/val.md)和[Export](../modes/export.md)。`ultralytics`包中的YOLO-NAS不支持训练。 + + === "Python" + + 可以将预训练的PyTorch `*.pt`模型文件传递给`NAS()`类以在python中创建一个模型实例: + + ```python + from ultralytics import NAS + + # 加载一个在COCO上预训练的YOLO-NAS-s模型 + model = NAS('yolo_nas_s.pt') + + # 显示模型信息(可选) + model.info() + + # 在COCO8示例数据集上验证模型 + results = model.val(data='coco8.yaml') + + # 使用YOLO-NAS-s模型对'bus.jpg'图像进行推理 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + 可以使用CLI命令直接运行模型: + + ```bash + # 加载一个在COCO上预训练的YOLO-NAS-s模型,并验证其在COCO8示例数据集上的性能 + yolo val model=yolo_nas_s.pt data=coco8.yaml + + # 加载一个在COCO上预训练的YOLO-NAS-s模型,并对'bus.jpg'图像进行推理 + yolo predict model=yolo_nas_s.pt source=path/to/bus.jpg + ``` + +## 支持的任务和模式 + +我们提供了三种类型的YOLO-NAS模型:Small (s)、Medium (m)和Large (l)。每种类型都旨在满足不同的计算和性能需求: + +- **YOLO-NAS-s:** 针对计算资源有限但效率至关重要的环境进行了优化。 +- **YOLO-NAS-m:** 提供平衡的方法,适用于具有更高准确性的通用物体检测。 +- **YOLO-NAS-l:** 面向需要最高准确性的场景,计算资源不是限制因素。 + +下面是每个模型的详细信息,包括它们的预训练权重链接、支持的任务以及与不同操作模式的兼容性。 + +| 模型类型 | 预训练权重链接 | 支持的任务 | 推理 | 验证 | 训练 | 导出 | +|------------|-----------------------------------------------------------------------------------------------|----------------------------|----|----|----|----| +| YOLO-NAS-s | [yolo_nas_s.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_s.pt) | [物体检测](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-m | [yolo_nas_m.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_m.pt) | [物体检测](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | +| YOLO-NAS-l | [yolo_nas_l.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolo_nas_l.pt) | [物体检测](../tasks/detect.md) | ✅ | ✅ | ❌ | ✅ | + +## 引用和致谢 + +如果您在研究或开发工作中使用了YOLO-NAS,请引用SuperGradients: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{supergradients, + doi = {10.5281/ZENODO.7789328}, + url = {https://zenodo.org/record/7789328}, + author = {Aharon, Shay and {Louis-Dupont} and {Ofri Masad} and Yurkova, Kate and {Lotem Fridman} and {Lkdci} and Khvedchenya, Eugene and Rubin, Ran and Bagrov, Natan and Tymchenko, Borys and Keren, Tomer and Zhilko, Alexander and {Eran-Deci}}, + title = {Super-Gradients}, + publisher = {GitHub}, + journal = {GitHub repository}, + year = {2021}, + } + ``` + +我们向Deci AI的[SuperGradients](https://github.com/Deci-AI/super-gradients/)团队表示感谢,他们致力于创建和维护这个对计算机视觉社区非常有价值的资源。我们相信YOLO-NAS凭借其创新的架构和卓越的物体检测能力,将成为开发者和研究人员的重要工具。 + +*keywords: YOLO-NAS, Deci AI, 物体检测, 深度学习, 神经架构搜索, Ultralytics Python API, YOLO模型, SuperGradients, 预训练模型, 量化友好基本块, 高级训练方案, 训练后量化, AutoNAC优化, COCO, Objects365, Roboflow 100* diff --git a/docs/zh/models/yolov3.md b/docs/zh/models/yolov3.md new file mode 100644 index 0000000..320c4a2 --- /dev/null +++ b/docs/zh/models/yolov3.md @@ -0,0 +1,98 @@ +--- +comments: true +description: 了解YOLOv3、YOLOv3-Ultralytics和YOLOv3u的概述。了解它们的关键功能、用途和支持的目标检测任务。 +keywords: YOLOv3、YOLOv3-Ultralytics、YOLOv3u、目标检测、推理、训练、Ultralytics +--- + +# YOLOv3、YOLOv3-Ultralytics和YOLOv3u + +## 概述 + +本文介绍了三个紧密相关的目标检测模型,分别是[YOLOv3](https://pjreddie.com/darknet/yolo/)、[YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3)和[YOLOv3u](https://github.com/ultralytics/ultralytics)。 + +1. **YOLOv3:** 这是第三版 You Only Look Once (YOLO) 目标检测算法。YOLOv3 在前作的基础上进行了改进,引入了多尺度预测和三种不同尺寸的检测核,提高了检测准确性。 + +2. **YOLOv3-Ultralytics:** 这是 Ultralytics 对 YOLOv3 模型的实现。它在复现了原始 YOLOv3 架构的基础上,提供了额外的功能,如对更多预训练模型的支持和更简单的定制选项。 + +3. **YOLOv3u:** 这是 YOLOv3-Ultralytics 的更新版本,它引入了 YOLOv8 模型中使用的无锚、无物体性能分离头。YOLOv3u 保留了 YOLOv3 的主干和颈部架构,但使用了来自 YOLOv8 的更新检测头。 + +![Ultralytics YOLOv3](https://raw.githubusercontent.com/ultralytics/assets/main/yolov3/banner-yolov3.png) + +## 关键功能 + +- **YOLOv3:** 引入了三种不同尺度的检测,采用了三种不同尺寸的检测核:13x13、26x26 和 52x52。这显著提高了对不同大小对象的检测准确性。此外,YOLOv3 还为每个边界框添加了多标签预测和更好的特征提取网络。 + +- **YOLOv3-Ultralytics:** Ultralytics 对 YOLOv3 的实现具有与原始模型相同的性能,但增加了对更多预训练模型、额外训练方法和更简单的定制选项的支持。这使得它在实际应用中更加通用和易用。 + +- **YOLOv3u:** 这个更新的模型采用了来自 YOLOv8 的无锚、无物体性能分离头。通过消除预定义的锚框和物体性能分数的需求,检测头设计可以提高模型对不同大小和形状的对象的检测能力。这使得 YOLOv3u 在目标检测任务中更加强大和准确。 + +## 支持的任务和模式 + +YOLOv3 系列,包括 YOLOv3、YOLOv3-Ultralytics 和 YOLOv3u,专门用于目标检测任务。这些模型以在各种实际场景中平衡准确性和速度而闻名。每个变体都提供了独特的功能和优化,使其适用于各种应用场景。 + +这三个模型都支持一套全面的模式,确保在模型部署和开发的各个阶段具备多种功能。这些模式包括[推理](../modes/predict.md)、[验证](../modes/val.md)、[训练](../modes/train.md)和[导出](../modes/export.md),为用户提供了有效的目标检测完整工具。 + +| 模型类型 | 支持的任务 | 推理 | 验证 | 训练 | 导出 | +|--------------------|----------------------------|----|----|----|----| +| YOLOv3 | [目标检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3-Ultralytics | [目标检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv3u | [目标检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +该表格提供了每个 YOLOv3 变体的能力一览,突显了它们的多功能性和适用性,以用于目标检测工作流程中的各种任务和操作模式。 + +## 用法示例 + +以下示例提供了简单的 YOLOv3 训练和推理示例。有关这些和其他模式的完整文档,请参阅 [Predict](../modes/predict.md)、[Train](../modes/train.md)、[Val](../modes/val.md) 和 [Export](../modes/export.md) 文档页面。 + +!!! Example "示例" + + === "Python" + + 可以将预先训练的 PyTorch `*.pt` 模型以及配置 `*.yaml` 文件传递给 `YOLO()` 类,以在 Python 中创建模型实例: + + ```python + from ultralytics import YOLO + + # 加载一个经过 COCO 预训练的 YOLOv3n 模型 + model = YOLO('yolov3n.pt') + + # 显示模型信息(可选) + model.info() + + # 在 COCO8 示例数据集上训练模型100个epoch + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 使用 YOLOv3n 模型对 'bus.jpg' 图像进行推理 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + 可以直接使用命令行界面 (CLI) 来运行模型: + + ```bash + # 加载一个经过 COCO 预训练的 YOLOv3n 模型,并在 COCO8 示例数据集上训练100个epoch + yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + + # 加载一个经过 COCO 预训练的 YOLOv3n 模型,并对 'bus.jpg' 图像进行推理 + yolo predict model=yolov3n.pt source=path/to/bus.jpg + ``` + +## 引用和致谢 + +如果您在研究中使用 YOLOv3,请引用原始的 YOLO 论文和 Ultralytics 的 YOLOv3 仓库: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{redmon2018yolov3, + title={YOLOv3: An Incremental Improvement}, + author={Redmon, Joseph and Farhadi, Ali}, + journal={arXiv preprint arXiv:1804.02767}, + year={2018} + } + ``` + +感谢 Joseph Redmon 和 Ali Farhadi 开发了原始的 YOLOv3 模型。 diff --git a/docs/zh/models/yolov4.md b/docs/zh/models/yolov4.md new file mode 100644 index 0000000..50edafc --- /dev/null +++ b/docs/zh/models/yolov4.md @@ -0,0 +1,71 @@ +--- +comments: true +description: 通过我们详细的YOLOv4指南,探索最先进的实时目标检测器。了解其建筑亮点,创新功能和应用示例。 +keywords: ultralytics, YOLOv4, 目标检测, 神经网络, 实时检测, 目标检测器, 机器学习 +--- + +# YOLOv4:高速和精确的目标检测 + +欢迎来到Ultralytics关于YOLOv4的文档页面,YOLOv4是由Alexey Bochkovskiy于2020年在 [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet) 发布的最先进的实时目标检测器。YOLOv4旨在提供速度和准确性的最佳平衡,使其成为许多应用的优秀选择。 + +![YOLOv4架构图](https://user-images.githubusercontent.com/26833433/246185689-530b7fe8-737b-4bb0-b5dd-de10ef5aface.png) +**YOLOv4架构图**。展示了YOLOv4的复杂网络设计,包括主干,颈部和头部组件以及它们相互连接的层,以实现最佳的实时目标检测。 + +## 简介 + +YOLOv4代表You Only Look Once版本4。它是为解决之前YOLO版本(如[YOLOv3](yolov3.md))和其他目标检测模型的局限性而开发的实时目标检测模型。与其他基于卷积神经网络(CNN)的目标检测器不同,YOLOv4不仅适用于推荐系统,还可用于独立的进程管理和减少人工输入。它在传统图形处理单元(GPU)上的操作可以以经济实惠的价格进行大规模使用,并且设计为在常规GPU上实时工作,仅需要一个这样的GPU进行训练。 + +## 架构 + +YOLOv4利用了几个创新功能,这些功能共同优化其性能。这些功能包括加权残差连接(WRC),跨阶段部分连接(CSP),交叉mini-Batch归一化(CmBN),自适应对抗训练(SAT),Mish激活函数,Mosaic数据增强,DropBlock正则化和CIoU损失。这些功能的组合可以实现最先进的结果。 + +典型的目标检测器由几个部分组成,包括输入、主干、颈部和头部。YOLOv4的主干是在ImageNet上预训练的,用于预测对象的类别和边界框。主干可以来自多个模型,包括VGG、ResNet、ResNeXt或DenseNet。检测器的颈部部分用于从不同阶段收集特征图,通常包括几条自底向上的路径和几条自顶向下的路径。头部部分用于进行最终的目标检测和分类。 + +## 免费赠品 + +YOLOv4还使用了称为“免费赠品”的方法,这些方法在训练过程中提高模型的准确性,而不增加推理成本。数据增强是目标检测中常用的一种免费赠品技术,它增加了输入图像的变异性,以提高模型的鲁棒性。一些数据增强的例子包括光度失真(调整图像的亮度、对比度、色调、饱和度和噪音)和几何失真(添加随机缩放、裁剪、翻转和旋转)。这些技术帮助模型更好地应对不同类型的图像。 + +## 特点和性能 + +YOLOv4被设计为在目标检测中具有最佳速度和准确性。YOLOv4的架构包括CSPDarknet53作为主干,PANet作为颈部,以及YOLOv3作为检测头。这种设计使得YOLOv4能够以令人印象深刻的速度进行目标检测,适用于实时应用。YOLOv4在准确性方面也表现出色,在目标检测基准测试中取得了最先进的结果。 + +## 使用示例 + +截至撰写本文时,Ultralytics当前不支持YOLOv4模型。因此,任何有兴趣使用YOLOv4的用户需要直接参考YOLOv4 GitHub存储库中的安装和使用说明。 + +以下是使用YOLOv4的典型步骤的简要概述: + +1. 访问YOLOv4 GitHub存储库:[https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet)。 + +2. 按照README文件中提供的说明进行安装。这通常涉及克隆存储库,安装必要的依赖项,并设置任何必要的环境变量。 + +3. 安装完成后,您可以根据存储库提供的使用说明训练和使用模型。这通常涉及准备您的数据集、配置模型参数、训练模型,然后使用训练好的模型进行目标检测。 + +请注意,具体的步骤可能因您的特定用例和YOLOv4存储库的当前状态而有所不同。因此,强烈建议直接参考YOLOv4 GitHub存储库中提供的说明。 + +对于Ultralytics不支持YOLOv4的情况,我们感到非常抱歉,我们将努力更新本文档,以包括使用Ultralytics支持的YOLOv4的示例。 + +## 结论 + +YOLOv4是一种强大而高效的目标检测模型,它在速度和准确性之间取得了平衡。它在训练过程中使用独特的功能和免费赠品技术,使其在实时目标检测任务中表现出色。任何具备常规GPU的人都可以进行YOLOv4的训练和使用,使其对于各种应用具有可访问性和实用性。 + +## 引文和致谢 + +我们要感谢YOLOv4的作者对实时目标检测领域的重要贡献: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + +YOLOv4的原始论文可以在[arXiv](https://arxiv.org/pdf/2004.10934.pdf)上找到。作者已经公开了他们的工作,代码库可以在[GitHub](https://github.com/AlexeyAB/darknet)上获取。我们赞赏他们在推动该领域方面的努力,并使他们的工作对广大社区产生影响。 diff --git a/docs/zh/models/yolov5.md b/docs/zh/models/yolov5.md new file mode 100644 index 0000000..10c5339 --- /dev/null +++ b/docs/zh/models/yolov5.md @@ -0,0 +1,113 @@ +--- +comments: true +description: 发现YOLOv5u,它是YOLOv5模型的改进版本,具有更好的准确性和速度之间的平衡,并为各种目标检测任务提供了许多预训练模型。 +keywords: YOLOv5u、目标检测、预训练模型、Ultralytics、推断、验证、YOLOv5、YOLOv8、无锚点、无物体检测、实时应用、机器学习 +--- + +# YOLOv5 + +## 概述 + +YOLOv5u是目标检测方法的一种进步。YOLOv5u源于Ultralytics开发的[YOLOv5](https://github.com/ultralytics/yolov5)模型的基础架构,它集成了无锚点、无物体检测分离头的新特性,这一特性在[YOLOv8](yolov8.md)模型中首次引入。通过采用这种适应性更强的检测机制,YOLOv5u改进了模型的架构,从而在目标检测任务中实现了更好的准确性和速度的平衡。根据实证结果和其衍生特性,YOLOv5u为那些在研究和实际应用中寻求强大解决方案的人提供了一种高效的选择。 + +![Ultralytics YOLOv5](https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png) + +## 主要特性 + +- **无锚点分离Ultralytics头部**: 传统的目标检测模型依靠预定义的锚点框来预测目标位置,而YOLOv5u改变了这种方法。采用无锚点分离Ultralytics头部的方式,它确保了更灵活、适应性更强的检测机制,从而在各种场景中提高了性能。 + +- **优化的准确性和速度之间的平衡**: 速度和准确性通常是相互制约的。但是YOLOv5u挑战了这种平衡。它提供了一个校准平衡,确保在保持准确性的同时实现实时检测。这一特性对于需要快速响应的应用非常重要,比如自动驾驶车辆、机器人和实时视频分析。 + +- **丰富的预训练模型**: YOLOv5u提供了多种预训练模型。无论你专注于推断、验证还是训练,都有一个量身定制的模型等待着你。这种多样性确保你不仅仅使用“一刀切”的解决方案,而是使用一个专门为你的独特挑战进行了精细调整的模型。 + +## 支持的任务和模式 + +具有各种预训练权重的YOLOv5u模型在[目标检测](../tasks/detect.md)任务中表现出色。它们支持全面的模式,适用于从开发到部署的各种应用场景。 + +| 模型类型 | 预训练权重 | 任务 | 推断 | 验证 | 训练 | 导出 | +|---------|-----------------------------------------------------------------------------------------------------------------------------|----------------------------|----|----|----|----| +| YOLOv5u | `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`, `yolov5n6u`, `yolov5s6u`, `yolov5m6u`, `yolov5l6u`, `yolov5x6u` | [目标检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +该表详细介绍了YOLOv5u模型的变体,突出了它们在目标检测任务和各种操作模式(如[推断](../modes/predict.md)、[验证](../modes/val.md)、[训练](../modes/train.md)和[导出](../modes/export.md))方面的适用性。这种全面的支持确保用户可以充分发挥YOLOv5u模型在各种目标检测场景中的能力。 + +## 性能指标 + +!!! Performance + + === "检测" + + 请参阅[检测文档](https://docs.ultralytics.com/tasks/detect/),以了解在[COCO](https://docs.ultralytics.com/datasets/detect/coco/)上训练的这些模型的用法示例,其中包括80个预训练类别。 + + | 模型 | YAML | 大小
(像素) | mAPval
50-95 | 速度
CPU ONNX
(毫秒) | 速度
A100 TensorRT
(毫秒) | 参数数
(百万) | FLOPs
(十亿) | + |---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------| + | [yolov5nu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | [yolov5n.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 | + | [yolov5su.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | [yolov5s.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 | + | [yolov5mu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | [yolov5m.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 | + | [yolov5lu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | [yolov5l.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 | + | [yolov5xu.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | [yolov5x.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5.yaml) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 | + | | | | | | | | | + | [yolov5n6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5n6u.pt) | [yolov5n6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 42.1 | 211.0 | 1.83 | 4.3 | 7.8 | + | [yolov5s6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | [yolov5s6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 48.6 | 422.6 | 2.34 | 15.3 | 24.6 | + | [yolov5m6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | [yolov5m6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 53.6 | 810.9 | 4.36 | 41.2 | 65.7 | + | [yolov5l6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | [yolov5l6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 55.7 | 1470.9 | 5.47 | 86.1 | 137.4 | + | [yolov5x6u.pt](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | [yolov5x6.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/models/v5/yolov5-p6.yaml) | 1280 | 56.8 | 2436.5 | 8.98 | 155.4 | 250.7 | + +## 使用示例 + +这个示例提供了YOLOv5训练和推断的简单示例。有关这些和其他[模式](../modes/index.md)的完整文档,请参阅[预测](../modes/predict.md)、[训练](../modes/train.md)、[验证](../modes/val.md)和[导出](../modes/export.md)的文档页面。 + +!!! Example "示例" + + === "Python" + + PyTorch预训练的`*.pt`模型,以及配置`*.yaml`文件可以传递给`YOLO()`类,以在python中创建一个模型实例: + + ```python + from ultralytics import YOLO + + # 加载一个在COCO数据集上预训练的YOLOv5n模型 + model = YOLO('yolov5n.pt') + + # 显示模型信息(可选) + model.info() + + # 使用COCO8示例数据集对模型进行100个时期的训练 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 使用YOLOv5n模型对'bus.jpg'图像进行推断 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + 可以使用CLI命令直接运行模型: + + ```bash + # 加载一个在COCO数据集上预训练的YOLOv5n模型,并在COCO8示例数据集上进行100个时期的训练 + yolo train model=yolov5n.pt data=coco8.yaml epochs=100 imgsz=640 + + # 加载一个在COCO数据集上预训练的YOLOv5n模型,并在'bus.jpg'图像上进行推断 + yolo predict model=yolov5n.pt source=path/to/bus.jpg + ``` + +## 引用和致谢 + +如果您在您的研究中使用了YOLOv5或YOLOv5u,请引用Ultralytics的YOLOv5存储库,引用方式如下: + +!!! Quote "" + + === "BibTeX" + ```bibtex + @software{yolov5, + title = {Ultralytics YOLOv5}, + author = {Glenn Jocher}, + year = {2020}, + version = {7.0}, + license = {AGPL-3.0}, + url = {https://github.com/ultralytics/yolov5}, + doi = {10.5281/zenodo.3908559}, + orcid = {0000-0001-5950-6979} + } + ``` + +请注意,YOLOv5模型提供[AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)和[企业](https://ultralytics.com/license)许可证。 diff --git a/docs/zh/models/yolov6.md b/docs/zh/models/yolov6.md new file mode 100644 index 0000000..d0ac2e3 --- /dev/null +++ b/docs/zh/models/yolov6.md @@ -0,0 +1,107 @@ +--- +comments: true +description: 探索美团YOLOv6,一种在速度和准确性之间取得平衡的最先进的物体检测模型。深入了解功能、预训练模型和Python使用方法。 +keywords: 美团YOLOv6、物体检测、Ultralytics、YOLOv6文档、双向连接、锚辅助训练、预训练模型、实时应用 +--- + +# 美团YOLOv6 + +## 概述 + +[美团](https://about.meituan.com/) YOLOv6是一种最先进的物体检测器,速度和准确性兼具,成为实时应用的热门选择。该模型在架构和训练方案上引入了几项重要改进,包括双向连接模块(BiC)、锚辅助训练(AAT)策略以及改进了的主干和颈部设计,使其在COCO数据集上达到了最先进的准确性。 + +![美团YOLOv6](https://user-images.githubusercontent.com/26833433/240750495-4da954ce-8b3b-41c4-8afd-ddb74361d3c2.png) +![模型示例图片](https://user-images.githubusercontent.com/26833433/240750557-3e9ec4f0-0598-49a8-83ea-f33c91eb6d68.png) +**YOLOv6概述。** 模型架构图显示了经过重新设计的网络组件和训练策略,这些策略导致了显著的性能提升。(a)YOLOv6的颈部(N和S)。(b)BiC模块的结构。(c)SimCSPSPPF块。([来源](https://arxiv.org/pdf/2301.05586.pdf)). + +### 主要功能 + +- **双向连接(BiC)模块:** YOLOv6在检测器的颈部引入了双向连接(BiC)模块,增强了定位信号,提供了性能增益,并且几乎没有降低速度。 +- **锚辅助训练(AAT)策略:** 该模型提出了锚辅助训练(AAT)以享受基于锚点和无锚点范例的双重优势,同时不影响推理效率。 +- **增强的主干和颈部设计:** 通过在主干和颈部中增加一个阶段,该模型在高分辨率输入下在COCO数据集上实现了最先进的性能。 +- **自我蒸馏策略:** 实施了一种新的自我蒸馏策略,以提升YOLOv6的较小模型的性能,在训练过程中增强辅助回归分支,并在推理过程中将其删除,以避免明显的速度下降。 + +## 性能指标 + +YOLOv6提供了具有不同尺度的各种预训练模型: + +- YOLOv6-N:在NVIDIA Tesla T4 GPU上,COCO val2017上的AP为37.5%,帧率为1187 FPS。 +- YOLOv6-S:AP为45.0%,帧率为484 FPS。 +- YOLOv6-M:AP为50.0%,帧率为226 FPS。 +- YOLOv6-L:AP为52.8%,帧率为116 FPS。 +- YOLOv6-L6:实时场景中的最先进准确性。 + +YOLOv6还提供了适用于不同精度和移动平台的量化模型。 + +## 使用示例 + +以下示例提供了简单的YOLOv6训练和推理示例。有关这些示例和其他[模式](../modes/index.md)的完整文档,请参阅[Predict](../modes/predict.md)、[Train](../modes/train.md)、[Val](../modes/val.md)和[Export](../modes/export.md)的文档页面。 + +!!! 例子 + + === "Python" + + 在Python中,可以将PyTorch预训练的`*.pt`模型以及配置文件`*.yaml`传递给`YOLO()`类,以创建一个模型实例: + + ```python + from ultralytics import YOLO + + # 从头开始构建一个YOLOv6n模型 + model = YOLO('yolov6n.yaml') + + # 显示模型信息(可选) + model.info() + + # 使用COCO8示例数据集对模型进行100个epoch的训练 + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 使用YOLOv6n模型对'bus.jpg'图像进行推理 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + 也可以使用CLI命令直接运行模型: + + ```bash + # 从头开始构建YOLOv6n模型,并在COCO8示例数据集上进行100个epoch的训练 + yolo train model=yolov6n.yaml data=coco8.yaml epochs=100 imgsz=640 + + # 从头开始构建YOLOv6n模型,并对'bus.jpg'图像进行推理 + yolo predict model=yolov6n.yaml source=path/to/bus.jpg + ``` + +## 支持的任务和模式 + +YOLOv6系列提供了一系列模型,每个模型都针对高性能[物体检测](../tasks/detect.md)进行了优化。这些模型适用于各种计算需求和准确性要求,使其在广泛的应用中具备多样性。 + +| 模型类型 | 预训练权重 | 支持的任务 | 推理 | 验证 | 训练 | 导出 | +|-----------|----------------|----------------------------|----|----|----|----| +| YOLOv6-N | `yolov6-n.pt` | [物体检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-S | `yolov6-s.pt` | [物体检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-M | `yolov6-m.pt` | [物体检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L | `yolov6-l.pt` | [物体检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv6-L6 | `yolov6-l6.pt` | [物体检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | + +这个表格详细介绍了YOLOv6模型的各个变体,突出了它们在物体检测任务中的能力以及它们与各种操作模式(如[推理](../modes/predict.md)、[验证](../modes/val.md)、[训练](../modes/train.md)和[导出](../modes/export.md))的兼容性。这种全面的支持确保用户可以在各种物体检测场景中充分利用YOLOv6模型的能力。 + +## 引用和致谢 + +我们要感谢这些作者在实时物体检测领域的重要贡献: + +!!! 引文 "" + + === "BibTeX" + + ```bibtex + @misc{li2023yolov6, + title={YOLOv6 v3.0: A Full-Scale Reloading}, + author={Chuyi Li and Lulu Li and Yifei Geng and Hongliang Jiang and Meng Cheng and Bo Zhang and Zaidan Ke and Xiaoming Xu and Xiangxiang Chu}, + year={2023}, + eprint={2301.05586}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` + + 原始的YOLOv6论文可以在[arXiv](https://arxiv.org/abs/2301.05586)上找到。作者已经将他们的作品公开,并且代码可以在[GitHub](https://github.com/meituan/YOLOv6)上访问。我们对他们在推动该领域的努力以及使他们的工作为更广泛的社区所接触到的努力表示感谢。 diff --git a/docs/zh/models/yolov7.md b/docs/zh/models/yolov7.md new file mode 100644 index 0000000..46b30b1 --- /dev/null +++ b/docs/zh/models/yolov7.md @@ -0,0 +1,65 @@ +--- +comments: true +description: 探索YOLOv7,一个实时物体检测器。了解其卓越的速度,令人印象深刻的精确度和独特的可训练无需付费优化聚焦点。 +keywords: YOLOv7,实时物体检测器,State-of-the-art,Ultralytics,MS COCO数据集,模型重新参数化,动态标签分配,扩展缩放,复合缩放 +--- + +# YOLOv7:可训练无需付费 + +YOLOv7是一种实时物体检测器的最新技术,其速度和准确度超过了目前已知的所有物体检测器,速度范围在5 FPS到160 FPS之间。在GPU V100上,它在所有已知实时物体检测器中具有最高的准确度(56.8%AP),且帧率达到30 FPS或更高。此外,YOLOv7在速度和准确度方面也优于其他物体检测器,如YOLOR,YOLOX,缩放后的YOLOv4,YOLOv5等等。该模型是从头开始使用MS COCO数据集进行训练的,而没有使用其他数据集或预训练权重。YOLOv7的源代码可在GitHub上获得。 + +![YOLOv7与SOTA物体检测器的比较](https://github.com/ultralytics/ultralytics/assets/26833433/5e1e0420-8122-4c79-b8d0-2860aa79af92) +**最先进物体检测器的比较**。从表2的结果可以看出,所提出的方法在速度和准确度的均衡上最佳。将YOLOv7-tiny-SiLU与YOLOv5-N(r6.1)进行比较,我们的方法在AP上快了127 FPS,准确度提高了10.7%。此外,YOLOv7在161 FPS的帧率下具有51.4%的AP,而具有相同AP的PPYOLOE-L仅具有78 FPS的帧率。在参数使用方面,YOLOv7比PPYOLOE-L少了41%。将YOLOv7-X与114 FPS的推理速度与YOLOv5-L(r6.1)的99 FPS的推理速度进行比较,YOLOv7-X可以提高3.9%的AP。如果将YOLOv7-X与类似规模的YOLOv5-X(r6.1)进行比较,YOLOv7-X的推理速度比YOLOv5-X快31 FPS。此外,就参数和计算量而言,与YOLOv5-X(r6.1)相比,YOLOv7-X减少了22%的参数和8%的计算量,但AP提高了2.2%([来源](https://arxiv.org/pdf/2207.02696.pdf))。 + +## 概述 + +实时物体检测是许多计算机视觉系统的重要组件,包括多目标跟踪,自动驾驶,机器人技术和医学图像分析等。近年来,实时物体检测的发展一直致力于设计高效的架构,并提高各种CPU,GPU和神经处理单元(NPU)的推理速度。YOLOv7支持移动GPU和GPU设备,从边缘到云端。 + +与传统的实时物体检测器侧重于架构优化不同,YOLOv7引入了对训练过程优化的关注。这包括模块和优化方法,旨在提高目标检测的准确性而不增加推理成本,这个概念被称为“可训练无需付费”。 + +## 主要特性 + +YOLOv7引入了几个关键特性: + +1. **模型重新参数化**:YOLOv7提出了一种计划好的重新参数化模型,它是一种适用于不同网络中的层的策略,具有梯度传播路径的概念。 + +2. **动态标签分配**:对多个输出层的模型进行训练会遇到一个新问题:“如何为不同分支的输出分配动态目标?”为了解决这个问题,YOLOv7引入了一种新的标签分配方法,称为粗到细的引导式标签分配。 + +3. **扩展和复合缩放**:YOLOv7提出了适用于实时物体检测器的“扩展”和“复合缩放”方法,可以有效利用参数和计算。 + +4. **效率**:YOLOv7提出的方法可以有效地减少最先进实时物体检测器的约40%的参数和50%的计算量,并具有更快的推理速度和更高的检测准确度。 + +## 使用示例 + +截至撰写本文时,Ultralytics当前不支持YOLOv7模型。因此,任何希望使用YOLOv7的用户都需要直接参考YOLOv7 GitHub存储库中的安装和使用说明。 + +这是您可能采取的使用YOLOv7的典型步骤的简要概述: + +1. 访问YOLOv7 GitHub存储库:[https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7)。 + +2. 按照README文件中提供的说明进行安装。这通常涉及克隆存储库,安装必要的依赖项,并设置任何必要的环境变量。 + +3. 安装完成后,您可以根据存储库中提供的使用说明训练和使用模型。这通常涉及准备数据集,配置模型参数,训练模型,然后使用训练好的模型执行物体检测。 + +请注意,具体的步骤可能因您的特定用例和YOLOv7存储库的当前状态而有所不同。因此,强烈建议直接参考YOLOv7 GitHub存储库中提供的说明。 + +我们对这可能造成的任何不便表示歉意,并将努力更新此文档以提供针对Ultralytics的YOLOv7支持的使用示例。 + +## 引用和致谢 + +我们要感谢YOLOv7的作者在实时物体检测领域做出的重大贡献: + +!!! Quote "" + + === "BibTeX" + + ```bibtex + @article{wang2022yolov7, + title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors}, + author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + journal={arXiv preprint arXiv:2207.02696}, + year={2022} + } + ``` + +YOLOv7的原始论文可以在[arXiv](https://arxiv.org/pdf/2207.02696.pdf)上找到。作者已将其工作公开,并且代码库可在[GitHub](https://github.com/WongKinYiu/yolov7)中访问。我们感谢他们在推动该领域发展并使其工作对广大社区可访问的努力。 diff --git a/docs/zh/models/yolov8.md b/docs/zh/models/yolov8.md new file mode 100644 index 0000000..a9844e1 --- /dev/null +++ b/docs/zh/models/yolov8.md @@ -0,0 +1,162 @@ +--- +comments: true +description: 探索YOLOv8的激动人心功能,这是我们实时目标检测器的最新版本!了解高级架构、预训练模型和精确度与速度的最佳平衡如何使YOLOv8成为您进行目标检测任务的理想选择。 +keywords: YOLOv8,Ultralytics,实时目标检测器,预训练模型,文档,目标检测,YOLO系列,高级架构,精确度,速度 +--- + +# YOLOv8 + +## 概述 + +YOLOv8是YOLO系列实时目标检测器的最新版本,以其在准确度和速度方面的卓越性能而闻名。在构建在之前YOLO版本的基础上,YOLOv8引入了新功能和优化,使其成为各种应用领域中各种目标检测任务的理想选择。 + +![Ultralytics YOLOv8](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/yolo-comparison-plots.png) + +## 主要功能 + +- **先进的主干和中间架构:** YOLOv8采用最先进的主干和中间架构,提供了更好的特征提取和目标检测性能。 +- **无锚分割Ultralytics头:** YOLOv8采用无锚分割的Ultralytics头,相比于基于锚点的方法,可以提供更高的准确性和更高效的检测过程。 +- **优化的准确度和速度平衡:** YOLOv8专注于在准确度和速度之间维持最佳平衡,适用于各种实时目标检测任务。 +- **多种预训练模型:** YOLOv8提供了一系列预训练模型,以满足各种任务和性能要求,更容易找到适合特定用例的模型。 + +## 支持的任务和模式 + +YOLOv8系列提供了多种模型,每个模型专门用于计算机视觉中的特定任务。这些模型旨在满足各种要求,从目标检测到更复杂的任务,如实例分割、姿态/关键点检测和分类。 + +YOLOv8系列的每个变体都针对其相应的任务进行了优化,确保高性能和准确性。此外,这些模型与各种操作模式兼容,包括[推理](../modes/predict.md)、[验证](../modes/val.md)、[训练](../modes/train.md)和[导出](../modes/export.md),便于在部署和开发的不同阶段使用。 + +| 模型 | 文件名 | 任务 | 推理 | 验证 | 训练 | 导出 | +|-------------|----------------------------------------------------------------------------------------------------------------|-----------------------------|----|----|----|----| +| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` | [检测](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` | [实例分割](../tasks/segment.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose-p6.pt` | [姿态/关键点](../tasks/pose.md) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [分类](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ | + +这个表格提供了YOLOv8模型变种的概览,突出了它们在特定任务中的适用性,以及它们与各种操作模式(如推理、验证、训练和导出)的兼容性。它展示了YOLOv8系列的多功能性和鲁棒性,使它们适用于计算机视觉中各种应用。 + +## 性能指标 + +!!! Performance + + === "检测(COCO)" + + 有关在[COCO](https://docs.ultralytics.com/datasets/detect/coco/)上训练的这些模型的用法示例,请参见[Detection Docs](https://docs.ultralytics.com/tasks/detect/),其中包括80个预训练的类别。 + + | 模型 | 大小
(pixels) | mAPval
50-95 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) | + | ---------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------------------- | ---------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + + === "检测(Open Images V7)" + + 有关在[Open Image V7](https://docs.ultralytics.com/datasets/detect/open-images-v7/)上训练的这些模型的用法示例,请参见[Detection Docs](https://docs.ultralytics.com/tasks/detect/),其中包括600个预训练的类别。 + + | 模型 | 大小
(pixels) | mAPval
50-95 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) | + | ------------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ---------------------------------- | ------------------ | ----------------- | + | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 | + | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 | + | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 | + | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | + | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | + + === "分割(COCO)" + + 有关在[COCO](https://docs.ultralytics.com/datasets/segment/coco/)上训练的这些模型的用法示例,请参见[Segmentation Docs](https://docs.ultralytics.com/tasks/segment/),其中包括80个预训练的类别。 + + | 模型 | 大小
(pixels) | mAPbox
50-95 | mAPmask
50-95 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) | + | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ---------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | + | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | + | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | + | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | + | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + + === "分类(ImageNet)" + + 有关在[ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/)上训练的这些模型的用法示例,请参见[Classification Docs](https://docs.ultralytics.com/tasks/classify/),其中包括1000个预训练的类别。 + + | 模型 | 大小
(pixels) | 准确率
top1 | 准确率
top5 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) at 640 | + | ---------------------------------------------------------------------------------------------- | --------------------- | ------------------ | ------------------ | ------------------------------ | ---------------------------------- | ------------------ | ------------------------ | + | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | + | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | + | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | + | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | + | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + + === "姿态(COCO)" + + 有关在[COCO](https://docs.ultralytics.com/datasets/pose/coco/)上训练的这些模型的用法示例,请参见[Pose Estimation Docs](https://docs.ultralytics.com/tasks/segment/),其中包括1个预训练的类别,'person'。 + + | 模型 | 大小
(pixels) | mAPpose
50-95 | mAPpose
50 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) | + | ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ---------------------------------- | ------------------ | ----------------- | + | [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | + | [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | + | [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | + | [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | + | [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | + | [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +## 用法示例 + +这个示例提供了关于YOLOv8训练和推理的简单示例。有关这些和其他[模式](../modes/index.md)的完整文档,请参见[Predict](../modes/predict.md),[Train](../modes/train.md),[Val](../modes/val.md)和[Export](../modes/export.md)文档页面。 + +请注意,以下示例是针对用于目标检测的YOLOv8 [Detect](../tasks/detect.md)模型。有关其他支持的任务,请参见[Segment](../tasks/segment.md)、[Classify](../tasks/classify.md)和[Pose](../tasks/pose.md)文档。 + +!!! Example "示例" + + === "Python" + + 可以将PyTorch预训练的`*.pt`模型和配置`*.yaml`文件传递给`YOLO()`类,在python中创建一个模型实例: + + ```python + from ultralytics import YOLO + + # 加载一个在COCO预训练的YOLOv8n模型 + model = YOLO('yolov8n.pt') + + # 显示模型信息(可选) + model.info() + + # 使用COCO8示例数据集训练模型100个epoch + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) + + # 使用YOLOv8n模型在'bus.jpg'图片上运行推理 + results = model('path/to/bus.jpg') + ``` + + === "CLI" + + 可以使用CLI命令直接运行模型: + + ```bash + # 加载一个在COCO预训练的YOLOv8n模型,并在COCO8示例数据集上训练100个epoch + yolo train model=yolov8n.pt data=coco8.yaml epochs=100 imgsz=640 + + # 加载一个在COCO预训练的YOLOv8n模型,并在'bus.jpg'图片上运行推理 + yolo predict model=yolov8n.pt source=path/to/bus.jpg + ``` + +## 引用和致谢 + +如果您在工作中使用YOLOv8模型或此存储库中的其他软件,请使用以下格式进行引用: + +!!! Quote "引用" + + === "BibTeX" + + ```bibtex + @software{yolov8_ultralytics, + author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu}, + title = {Ultralytics YOLOv8}, + version = {8.0.0}, + year = {2023}, + url = {https://github.com/ultralytics/ultralytics}, + orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069}, + license = {AGPL-3.0} + } + ``` + +请注意,DOI正在等待中,DOI将在可用时添加到引用中。YOLOv8模型根据[AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)和[企业许可证](https://ultralytics.com/license)提供。 diff --git a/docs/zh/modes/benchmark.md b/docs/zh/modes/benchmark.md new file mode 100644 index 0000000..53a14e5 --- /dev/null +++ b/docs/zh/modes/benchmark.md @@ -0,0 +1,94 @@ +--- +comments: 真 +description: 了解如何评估YOLOv8在各种导出格式下的速度和准确性,获取mAP50-95、accuracy_top5等指标的洞察。 +keywords: Ultralytics, YOLOv8, 基准测试, 速度分析, 准确性分析, mAP50-95, accuracy_top5, ONNX, OpenVINO, TensorRT, YOLO导出格式 +--- + +# 使用Ultralytics YOLO进行模型基准测试 + +Ultralytics YOLO生态系统和集成 + +## 介绍 + +一旦您的模型经过训练和验证,下一个合乎逻辑的步骤是评估它在各种实际场景中的性能。Ultralytics YOLOv8的基准模式通过提供一个健壮的框架来评估模型在一系列导出格式中的速度和准确性,为此目的服务。 + +## 为什么基准测试至关重要? + +- **明智的决策:** 洞察速度和准确性之间的权衡。 +- **资源分配:** 理解不同的导出格式在不同硬件上的性能表现。 +- **优化:** 了解哪种导出格式为您的特定用例提供最佳性能。 +- **成本效益:** 根据基准测试结果,更有效地利用硬件资源。 + +### 基准模式的关键指标 + +- **mAP50-95:** 用于物体检测、分割和姿态估计。 +- **accuracy_top5:** 用于图像分类。 +- **推断时间:** 处理每张图片的时间(毫秒)。 + +### 支持的导出格式 + +- **ONNX:** 为了最佳的CPU性能 +- **TensorRT:** 为了最大化的GPU效率 +- **OpenVINO:** 针对Intel硬件的优化 +- **CoreML、TensorFlow SavedModel 等:** 满足多样化部署需求。 + +!!! 技巧 "提示" + + * 导出到ONNX或OpenVINO可实现高达3倍CPU速度提升。 + * 导出到TensorRT可实现高达5倍GPU速度提升。 + +## 使用示例 + +在所有支持的导出格式上运行YOLOv8n基准测试,包括ONNX、TensorRT等。更多导出参数的完整列表请见下方的参数部分。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics.utils.benchmarks import benchmark + + # 在GPU上进行基准测试 + benchmark(model='yolov8n.pt', data='coco8.yaml', imgsz=640, half=False, device=0) + ``` + === "CLI" + + ```bash + yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + ``` + +## 参数 + +参数如 `model`、`data`、`imgsz`、`half`、`device` 和 `verbose` 等,为用户提供了灵活性,以便根据具体需求微调基准测试,并轻松比较不同导出格式的性能。 + +| 键 | 值 | 描述 | +|-----------|---------|----------------------------------------------------| +| `model` | `None` | 模型文件路径,如 yolov8n.pt, yolov8n.yaml | +| `data` | `None` | 引用基准测试数据集的YAML路径(标记为 `val`) | +| `imgsz` | `640` | 图像大小作为标量或(h, w)列表,如 (640, 480) | +| `half` | `False` | FP16量化 | +| `int8` | `False` | INT8量化 | +| `device` | `None` | 运行设备,如 cuda device=0 或 device=0,1,2,3 或 device=cpu | +| `verbose` | `False` | 错误时不继续(布尔值),或验证阈值下限(浮点数) | + +## 导出格式 + +基准测试将尝试在下方列出的所有可能的导出格式上自动运行。 + +| 格式 | `format` 参数 | 模型 | 元数据 | 参数 | +|--------------------------------------------------------------------|---------------|---------------------------|-----|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | + +在[导出](https://docs.ultralytics.com/modes/export/)页面查看完整的 `export` 详情。 diff --git a/docs/zh/modes/export.md b/docs/zh/modes/export.md new file mode 100644 index 0000000..dadb1f1 --- /dev/null +++ b/docs/zh/modes/export.md @@ -0,0 +1,108 @@ +--- +comments: true +description: 如何逐步指导您将 YOLOv8 模型导出到各种格式,如 ONNX、TensorRT、CoreML 等以进行部署。现在就探索! +keywords: YOLO, YOLOv8, Ultralytics, 模型导出, ONNX, TensorRT, CoreML, TensorFlow SavedModel, OpenVINO, PyTorch, 导出模型 +--- + +# Ultralytics YOLO 的模型导出 + +Ultralytics YOLO 生态系统和集成 + +## 引言 + +训练模型的最终目标是将其部署到现实世界的应用中。Ultralytics YOLOv8 的导出模式提供了多种选项,用于将您训练好的模型导出到不同的格式,从而可以在各种平台和设备上部署。本综合指南旨在带您逐步了解模型导出的细节,展示如何实现最大的兼容性和性能。 + +

+
+ +
+ 观看:如何导出自定义训练的 Ultralytics YOLOv8 模型并在网络摄像头上实时推理。 +

+ +## 为什么选择 YOLOv8 的导出模式? + +- **多功能性:** 支持导出到多种格式,包括 ONNX、TensorRT、CoreML 等。 +- **性能:** 使用 TensorRT 可实现高达 5 倍 GPU 加速,使用 ONNX 或 OpenVINO 可实现高达 3 倍 CPU 加速。 +- **兼容性:** 使您的模型可以在众多硬件和软件环境中广泛部署。 +- **易用性:** 简单的 CLI 和 Python API,快速直接地进行模型导出。 + +### 导出模式的关键特性 + +以下是一些突出的功能: + +- **一键导出:** 用于导出到不同格式的简单命令。 +- **批量导出:** 支持批推理能力的模型导出。 +- **优化推理:** 导出的模型针对更快的推理时间进行优化。 +- **教学视频:** 提供深入指导和教学,确保流畅的导出体验。 + +!!! Tip "提示" + + * 导出到 ONNX 或 OpenVINO,以实现高达 3 倍的 CPU 加速。 + * 导出到 TensorRT,以实现高达 5 倍的 GPU 加速。 + +## 使用示例 + +将 YOLOv8n 模型导出为 ONNX 或 TensorRT 等不同格式。查看下面的参数部分,了解完整的导出参数列表。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义训练的模型 + + # 导出模型 + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # 导出官方模型 + yolo export model=path/to/best.pt format=onnx # 导出自定义训练的模型 + ``` + +## 参数 + +YOLO 模型的导出设置是指用于在其他环境或平台中使用模型时保存或导出模型的各种配置和选项。这些设置会影响模型的性能、大小和与不同系统的兼容性。一些常见的 YOLO 导出设置包括导出的模型文件格式(例如 ONNX、TensorFlow SavedModel)、模型将在哪个设备上运行(例如 CPU、GPU)以及是否包含附加功能,如遮罩或每个框多个标签。其他可能影响导出过程的因素包括模型用途的具体细节以及目标环境或平台的要求或限制。重要的是要仔细考虑和配置这些设置,以确保导出的模型针对预期用例经过优化,并且可以在目标环境中有效使用。 + +| 键 | 值 | 描述 | +|-------------|-----------------|-------------------------------------| +| `format` | `'torchscript'` | 导出的格式 | +| `imgsz` | `640` | 图像尺寸,可以是标量或 (h, w) 列表,比如 (640, 480) | +| `keras` | `False` | 使用 Keras 导出 TF SavedModel | +| `optimize` | `False` | TorchScript:为移动设备优化 | +| `half` | `False` | FP16 量化 | +| `int8` | `False` | INT8 量化 | +| `dynamic` | `False` | ONNX/TensorRT:动态轴 | +| `simplify` | `False` | ONNX/TensorRT:简化模型 | +| `opset` | `None` | ONNX:opset 版本(可选,默认为最新版本) | +| `workspace` | `4` | TensorRT:工作区大小(GB) | +| `nms` | `False` | CoreML:添加 NMS | + +## 导出格式 + +下表中提供了可用的 YOLOv8 导出格式。您可以使用 `format` 参数导出任何格式的模型,比如 `format='onnx'` 或 `format='engine'`。 + +| 格式 | `format` 参数 | 模型 | 元数据 | 参数 | +|--------------------------------------------------------------------|---------------|---------------------------|-----|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half` | diff --git a/docs/zh/modes/index.md b/docs/zh/modes/index.md new file mode 100644 index 0000000..b82c43c --- /dev/null +++ b/docs/zh/modes/index.md @@ -0,0 +1,73 @@ +--- +comments: true +description: 从训练到跟踪,充分利用Ultralytics的YOLOv8。获取支持的每种模式的见解和示例,包括验证、导出和基准测试。 +keywords: Ultralytics, YOLOv8, 机器学习, 目标检测, 训练, 验证, 预测, 导出, 跟踪, 基准测试 +--- + +# Ultralytics YOLOv8 模式 + +Ultralytics YOLO生态系统及整合 + +## 简介 + +Ultralytics YOLOv8不仅仅是另一个目标检测模型;它是一个多功能框架,旨在涵盖机器学习模型的整个生命周期——从数据摄取和模型训练到验证、部署和实际跟踪。每种模式都服务于一个特定的目的,并设计为提供您在不同任务和用例中所需的灵活性和效率。 + +

+
+ +
+ 观看: Ultralytics模式教程:训练、验证、预测、导出和基准测试。 +

+ +### 模式概览 + +理解Ultralytics YOLOv8所支持的不同**模式**对于充分利用您的模型至关重要: + +- **训练(Train)**模式:在自定义或预加载的数据集上微调您的模型。 +- **验证(Val)**模式:训练后进行校验,以验证模型性能。 +- **预测(Predict)**模式:在真实世界数据上释放模型的预测能力。 +- **导出(Export)**模式:以各种格式使模型准备就绪,部署至生产环境。 +- **跟踪(Track)**模式:将您的目标检测模型扩展到实时跟踪应用中。 +- **基准(Benchmark)**模式:在不同部署环境中分析模型的速度和准确性。 + +本综合指南旨在为您提供每种模式的概览和实用见解,帮助您充分发挥YOLOv8的全部潜力。 + +## [训练](train.md) + +训练模式用于在自定义数据集上训练YOLOv8模型。在此模式下,模型将使用指定的数据集和超参数进行训练。训练过程包括优化模型的参数,使其能够准确预测图像中对象的类别和位置。 + +[训练示例](train.md){ .md-button } + +## [验证](val.md) + +验证模式用于训练YOLOv8模型后进行验证。在此模式下,模型在验证集上进行评估,以衡量其准确性和泛化能力。此模式可以用来调整模型的超参数,以改善其性能。 + +[验证示例](val.md){ .md-button } + +## [预测](predict.md) + +预测模式用于使用训练好的YOLOv8模型在新图像或视频上进行预测。在此模式下,模型从检查点文件加载,用户可以提供图像或视频以执行推理。模型预测输入图像或视频中对象的类别和位置。 + +[预测示例](predict.md){ .md-button } + +## [导出](export.md) + +导出模式用于将YOLOv8模型导出为可用于部署的格式。在此模式下,模型被转换为其他软件应用或硬件设备可以使用的格式。当模型部署到生产环境时,此模式十分有用。 + +[导出示例](export.md){ .md-button } + +## [跟踪](track.md) + +跟踪模式用于使用YOLOv8模型实时跟踪对象。在此模式下,模型从检查点文件加载,用户可以提供实时视频流以执行实时对象跟踪。此模式适用于监控系统或自动驾驶汽车等应用。 + +[跟踪示例](track.md){ .md-button } + +## [基准](benchmark.md) + +基准模式用于对YOLOv8的各种导出格式的速度和准确性进行评估。基准提供了有关导出格式大小、其针对目标检测、分割和姿态的`mAP50-95`指标,或针对分类的`accuracy_top5`指标,以及每张图像跨各种导出格式(如ONNX、OpenVINO、TensorRT等)的推理时间(以毫秒为单位)的信息。此信息可以帮助用户根据对速度和准确性的具体需求,选择最佳的导出格式。 + +[基准示例](benchmark.md){ .md-button } diff --git a/docs/zh/modes/predict.md b/docs/zh/modes/predict.md new file mode 100644 index 0000000..9defcd5 --- /dev/null +++ b/docs/zh/modes/predict.md @@ -0,0 +1,714 @@ +--- +comments: true +description: 了解如何使用 YOLOv8 预测模式进行各种任务。学习关于不同推理源如图像,视频和数据格式的内容。 +keywords: Ultralytics, YOLOv8, 预测模式, 推理源, 预测任务, 流式模式, 图像处理, 视频处理, 机器学习, 人工智能 +--- + +# 使用 Ultralytics YOLO 进行模型预测 + +Ultralytics YOLO 生态系统和集成 + +## 引言 + +在机器学习和计算机视觉领域,将视觉数据转化为有用信息的过程被称为'推理'或'预测'。Ultralytics YOLOv8 提供了一个强大的功能,称为 **预测模式**,它专为各种数据来源的高性能实时推理而设计。 + +

+
+ +
+ 观看: 如何从 Ultralytics YOLOv8 模型中提取输出,用于自定义项目。 +

+ +## 实际应用领域 + +| 制造业 | 体育 | 安全 | +|:-------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------:| +| ![车辆零部件检测](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a0f802a8-0776-44cf-8f17-93974a4a28a1) | ![足球运动员检测](https://github.com/RizwanMunawar/ultralytics/assets/62513924/7d320e1f-fc57-4d7f-a691-78ee579c3442) | ![人员摔倒检测](https://github.com/RizwanMunawar/ultralytics/assets/62513924/86437c4a-3227-4eee-90ef-9efb697bdb43) | +| 车辆零部件检测 | 足球运动员检测 | 人员摔倒检测 | + +## 为何使用 Ultralytics YOLO 进行推理? + +以下是考虑使用 YOLOv8 的预测模式满足您的各种推理需求的几个原因: + +- **多功能性:** 能够对图像、视频乃至实时流进行推理。 +- **性能:** 工程化为实时、高速处理而设计,不牺牲准确性。 +- **易用性:** 直观的 Python 和 CLI 接口,便于快速部署和测试。 +- **高度可定制性:** 多种设置和参数可调,依据您的具体需求调整模型的推理行为。 + +### 预测模式的关键特性 + +YOLOv8 的预测模式被设计为强大且多功能,包括以下特性: + +- **兼容多个数据来源:** 无论您的数据是单独图片,图片集合,视频文件,还是实时视频流,预测模式都能胜任。 +- **流式模式:** 使用流式功能生成一个内存高效的 `Results` 对象生成器。在调用预测器时,通过设置 `stream=True` 来启用此功能。 +- **批处理:** 能够在单个批次中处理多个图片或视频帧,进一步加快推理时间。 +- **易于集成:** 由于其灵活的 API,易于与现有数据管道和其他软件组件集成。 + +Ultralytics YOLO 模型在进行推理时返回一个 Python `Results` 对象列表,或者当传入 `stream=True` 时,返回一个内存高效的 Python `Results` 对象生成器: + +!!! Example "预测" + + === "使用 `stream=False` 返回列表" + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n.pt') # 预训练的 YOLOv8n 模型 + + # 在图片列表上运行批量推理 + results = model(['im1.jpg', 'im2.jpg']) # 返回 Results 对象列表 + + # 处理结果列表 + for result in results: + boxes = result.boxes # 边界框输出的 Boxes 对象 + masks = result.masks # 分割掩码输出的 Masks 对象 + keypoints = result.keypoints # 姿态输出的 Keypoints 对象 + probs = result.probs # 分类输出的 Probs 对象 + ``` + + === "使用 `stream=True` 返回生成器" + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n.pt') # 预训练的 YOLOv8n 模型 + + # 在图片列表上运行批量推理 + results = model(['im1.jpg', 'im2.jpg'], stream=True) # 返回 Results 对象生成器 + + # 处理结果生成器 + for result in results: + boxes = result.boxes # 边界框输出的 Boxes 对象 + masks = result.masks # 分割掩码输出的 Masks 对象 + keypoints = result.keypoints # 姿态输出的 Keypoints 对象 + probs = result.probs # 分类输出的 Probs 对象 + ``` + +## 推理来源 + +YOLOv8 可以处理推理输入的不同类型,如下表所示。来源包括静态图像、视频流和各种数据格式。表格还表示了每种来源是否可以在流式模式下使用,使用参数 `stream=True` ✅。流式模式对于处理视频或实时流非常有利,因为它创建了结果的生成器,而不是将所有帧加载到内存。 + +!!! Tip "提示" + + 使用 `stream=True` 处理长视频或大型数据集来高效地管理内存。当 `stream=False` 时,所有帧或数据点的结果都将存储在内存中,这可能很快导致内存不足错误。相对地,`stream=True` 使用生成器,只保留当前帧或数据点的结果在内存中,显著减少了内存消耗,防止内存不足问题。 + +| 来源 | 参数 | 类型 | 备注 | +|-----------|--------------------------------------------|----------------|----------------------------------------------------| +| 图像 | `'image.jpg'` | `str` 或 `Path` | 单个图像文件。 | +| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | 图像的 URL 地址。 | +| 截屏 | `'screen'` | `str` | 截取屏幕图像。 | +| PIL | `Image.open('im.jpg')` | `PIL.Image` | RGB 通道的 HWC 格式图像。 | +| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | BGR 通道的 HWC 格式图像 `uint8 (0-255)`。 | +| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | BGR 通道的 HWC 格式图像 `uint8 (0-255)`。 | +| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | RGB 通道的 BCHW 格式图像 `float32 (0.0-1.0)`。 | +| CSV | `'sources.csv'` | `str` 或 `Path` | 包含图像、视频或目录路径的 CSV 文件。 | +| 视频 ✅ | `'video.mp4'` | `str` 或 `Path` | 如 MP4, AVI 等格式的视频文件。 | +| 目录 ✅ | `'path/'` | `str` 或 `Path` | 包含图像或视频文件的目录路径。 | +| 通配符 ✅ | `'path/*.jpg'` | `str` | 匹配多个文件的通配符模式。使用 `*` 字符作为通配符。 | +| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | YouTube 视频的 URL 地址。 | +| 流媒体 ✅ | `'rtsp://example.com/media.mp4'` | `str` | RTSP, RTMP, TCP 或 IP 地址等流协议的 URL 地址。 | +| 多流媒体 ✅ | `'list.streams'` | `str` 或 `Path` | 一个流 URL 每行的 `*.streams` 文本文件,例如 8 个流将以 8 的批处理大小运行。 | + +下面为每种来源类型使用代码的示例: + +!!! Example "预测来源" + + === "图像" + 对图像文件进行推理。 + ```python + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 定义图像文件的路径 + source = 'path/to/image.jpg' + + # 对来源进行推理 + results = model(source) # Results 对象列表 + ``` + + === "截屏" + 对当前屏幕内容作为截屏进行推理。 + ```python + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 定义当前截屏为来源 + source = 'screen' + + # 对来源进行推理 + results = model(source) # Results 对象列表 + ``` + + === "URL" + 对通过 URL 远程托管的图像或视频进行推理。 + ```python + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 定义远程图像或视频 URL + source = 'https://ultralytics.com/images/bus.jpg' + + # 对来源进行推理 + results = model(source) # Results 对象列表 + ``` + + === "PIL" + 对使用 Python Imaging Library (PIL) 打开的图像进行推理。 + ```python + from PIL import Image + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 使用 PIL 打开图像 + source = Image.open('path/to/image.jpg') + + # 对来源进行推理 + results = model(source) # Results 对象列表 + ``` + + === "OpenCV" + 对使用 OpenCV 读取的图像进行推理。 + ```python + import cv2 + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 使用 OpenCV 读取图像 + source = cv2.imread('path/to/image.jpg') + + # 对来源进行推理 + results = model(source) # Results 对象列表 + ``` + + === "numpy" + 对表示为 numpy 数组的图像进行推理。 + ```python + import numpy as np + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 创建一个 HWC 形状 (640, 640, 3) 的随机 numpy 数组,数值范围 [0, 255] 类型为 uint8 + source = np.random.randint(low=0, high=255, size=(640, 640, 3), dtype='uint8') + + # 对来源进行推理 + results = model(source) # Results 对象列表 + ``` + + === "torch" + 对表示为 PyTorch 张量的图像进行推理。 + ```python + import torch + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 创建一个 BCHW 形状 (1, 3, 640, 640) 的随机 torch 张量,数值范围 [0, 1] 类型为 float32 + source = torch.rand(1, 3, 640, 640, dtype=torch.float32) + + # 对来源进行推理 + results = model(source) # Results 对象列表 + ``` + + === "CSV" + 对 CSV 文件中列出的图像、URLs、视频和目录进行推理。 + ```python + import torch + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 定义一个包含图像、URLs、视频和目录路径的 CSV 文件路径 + source = 'path/to/file.csv' + + # 对来源进行推理 + results = model(source) # Results 对象列表 + ``` + + === "视频" + 对视频文件进行推理。使用 `stream=True` 时,可以创建一个 Results 对象的生成器,减少内存使用。 + ```python + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 定义视频文件路径 + source = 'path/to/video.mp4' + + # 对来源进行推理 + results = model(source, stream=True) # Results 对象的生成器 + ``` + + === "目录" + 对目录中的所有图像和视频进行推理。要包含子目录中的图像和视频,使用通配符模式,例如 `path/to/dir/**/*`。 + ```python + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 定义包含图像和视频文件用于推理的目录路径 + source = 'path/to/dir' + + # 对来源进行推理 + results = model(source, stream=True) # Results 对象的生成器 + ``` + + === "通配符" + 对与 `*` 字符匹配的所有图像和视频进行推理。 + ```python + from ultralytics import YOLO + + # 加载预训练的 YOLOv8n 模型 + model = YOLO('yolov8n.pt') + + # 定义一个目录下所有 JPG 文件的通配符搜索 + source = 'path/to/dir/*.jpg' + + # 或定义一个包括子目录的所有 JPG 文件的递归通配符搜索 + source = 'path/to/dir/**/*.jpg' + + # 对来源进行推理 + results = model(source, stream=True) # Results 对象的生成器 + ``` + + === "YouTube" + 在YouTube视频上运行推理。通过使用`stream=True`,您可以创建一个Results对象的生成器,以减少长视频的内存使用。 + ```python + from ultralytics import YOLO + + # 加载预训练的YOLOv8n模型 + model = YOLO('yolov8n.pt') + + # 定义源为YouTube视频URL + source = 'https://youtu.be/LNwODJXcvt4' + + # 在源上运行推理 + results = model(source, stream=True) # Results对象的生成器 + ``` + + === "Streams" + 使用RTSP、RTMP、TCP和IP地址协议在远程流媒体源上运行推理。如果在`*.streams`文本文件中提供了多个流,则将运行批量推理,例如,8个流将以批大小8运行,否则单个流将以批大小1运行。 + ```python + from ultralytics import YOLO + + # 加载预训练的YOLOv8n模型 + model = YOLO('yolov8n.pt') + + # 单流媒体源批大小1推理 + source = 'rtsp://example.com/media.mp4' # RTSP、RTMP、TCP或IP流媒体地址 + + # 多个流媒体源的批量推理(例如,8个流的批大小为8) + source = 'path/to/list.streams' # *.streams文本文件,每行一个流媒体地址 + + # 在源上运行推理 + results = model(source, stream=True) # Results对象的生成器 + ``` + +## 推理参数 + +`model.predict()` 在推理时接受多个参数,可以用来覆盖默认值: + +!!! Example "示例" + + ```python + from ultralytics import YOLO + + # 加载预训练的YOLOv8n模型 + model = YOLO('yolov8n.pt') + + # 在'bus.jpg'上运行推理,并附加参数 + model.predict('bus.jpg', save=True, imgsz=320, conf=0.5) + ``` + +支持的所有参数: + +| 名称 | 类型 | 默认值 | 描述 | +|-----------------|----------------|------------------------|------------------------------------------| +| `source` | `str` | `'ultralytics/assets'` | 图像或视频的源目录 | +| `conf` | `float` | `0.25` | 检测对象的置信度阈值 | +| `iou` | `float` | `0.7` | 用于NMS的交并比(IoU)阈值 | +| `imgsz` | `int or tuple` | `640` | 图像大小,可以是标量或(h, w)列表,例如(640, 480) | +| `half` | `bool` | `False` | 使用半精度(FP16) | +| `device` | `None or str` | `None` | 运行设备,例如 cuda device=0/1/2/3 或 device=cpu | +| `show` | `bool` | `False` | 如果可能,显示结果 | +| `save` | `bool` | `False` | 保存带有结果的图像 | +| `save_txt` | `bool` | `False` | 将结果保存为.txt文件 | +| `save_conf` | `bool` | `False` | 保存带有置信度分数的结果 | +| `save_crop` | `bool` | `False` | 保存带有结果的裁剪图像 | +| `show_labels` | `bool` | `True` | 隐藏标签 | +| `show_conf` | `bool` | `True` | 隐藏置信度分数 | +| `max_det` | `int` | `300` | 每张图像的最大检测数量 | +| `vid_stride` | `bool` | `False` | 视频帧速率跳跃 | +| `stream_buffer` | `bool` | `False` | 缓冲所有流媒体帧(True)或返回最新帧(False) | +| `line_width` | `None or int` | `None` | 边框线宽度。如果为None,则按图像大小缩放。 | +| `visualize` | `bool` | `False` | 可视化模型特征 | +| `augment` | `bool` | `False` | 应用图像增强到预测源 | +| `agnostic_nms` | `bool` | `False` | 类别不敏感的NMS | +| `retina_masks` | `bool` | `False` | 使用高分辨率分割掩码 | +| `classes` | `None or list` | `None` | 按类别过滤结果,例如 classes=0,或 classes=[0,2,3] | +| `boxes` | `bool` | `True` | 在分割预测中显示框 | + +## 图像和视频格式 + +YOLOv8支持多种图像和视频格式,如[data/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/utils.py)所指定。请参阅下表了解有效的后缀名和示例预测命令。 + +### 图像 + +下表包含了Ultralytics支持的有效图像格式。 + +| 图像后缀 | 示例预测命令 | 参考链接 | +|-------|----------------------------------|-------------------------------------------------------------------------------| +| .bmp | `yolo predict source=image.bmp` | [Microsoft BMP文件格式](https://en.wikipedia.org/wiki/BMP_file_format) | +| .dng | `yolo predict source=image.dng` | [Adobe DNG](https://www.adobe.com/products/photoshop/extend.displayTab2.html) | +| .jpeg | `yolo predict source=image.jpeg` | [JPEG](https://en.wikipedia.org/wiki/JPEG) | +| .jpg | `yolo predict source=image.jpg` | [JPEG](https://en.wikipedia.org/wiki/JPEG) | +| .mpo | `yolo predict source=image.mpo` | [多图像对象](https://fileinfo.com/extension/mpo) | +| .png | `yolo predict source=image.png` | [便携式网络图形](https://en.wikipedia.org/wiki/PNG) | +| .tif | `yolo predict source=image.tif` | [标签图像文件格式](https://en.wikipedia.org/wiki/TIFF) | +| .tiff | `yolo predict source=image.tiff` | [标签图像文件格式](https://en.wikipedia.org/wiki/TIFF) | +| .webp | `yolo predict source=image.webp` | [WebP](https://en.wikipedia.org/wiki/WebP) | +| .pfm | `yolo predict source=image.pfm` | [便携式浮点映射](https://en.wikipedia.org/wiki/Netpbm#File_formats) | + +### 视频 + +以下表格包含有效的Ultralytics视频格式。 + +| 视频后缀名 | 示例预测命令 | 参考链接 | +|-------|----------------------------------|----------------------------------------------------------------------| +| .asf | `yolo predict source=video.asf` | [高级系统格式](https://en.wikipedia.org/wiki/Advanced_Systems_Format) | +| .avi | `yolo predict source=video.avi` | [音视频交错](https://en.wikipedia.org/wiki/Audio_Video_Interleave) | +| .gif | `yolo predict source=video.gif` | [图形交换格式](https://en.wikipedia.org/wiki/GIF) | +| .m4v | `yolo predict source=video.m4v` | [MPEG-4第14部分](https://en.wikipedia.org/wiki/M4V) | +| .mkv | `yolo predict source=video.mkv` | [Matroska](https://en.wikipedia.org/wiki/Matroska) | +| .mov | `yolo predict source=video.mov` | [QuickTime文件格式](https://en.wikipedia.org/wiki/QuickTime_File_Format) | +| .mp4 | `yolo predict source=video.mp4` | [MPEG-4第14部分](https://en.wikipedia.org/wiki/MPEG-4_Part_14) | +| .mpeg | `yolo predict source=video.mpeg` | [MPEG-1第2部分](https://en.wikipedia.org/wiki/MPEG-1) | +| .mpg | `yolo predict source=video.mpg` | [MPEG-1第2部分](https://en.wikipedia.org/wiki/MPEG-1) | +| .ts | `yolo predict source=video.ts` | [MPEG传输流](https://en.wikipedia.org/wiki/MPEG_transport_stream) | +| .wmv | `yolo predict source=video.wmv` | [Windows媒体视频](https://en.wikipedia.org/wiki/Windows_Media_Video) | +| .webm | `yolo predict source=video.webm` | [WebM项目](https://en.wikipedia.org/wiki/WebM) | + +## 处理结果 + +所有Ultralytics的`predict()`调用都将返回一个`Results`对象列表: + +!!! Example "结果" + + ```python + from ultralytics import YOLO + + # 加载预训练的YOLOv8n模型 + model = YOLO('yolov8n.pt') + + # 在图片上运行推理 + results = model('bus.jpg') # 1个Results对象的列表 + results = model(['bus.jpg', 'zidane.jpg']) # 2个Results对象的列表 + ``` + +`Results`对象具有以下属性: + +| 属性 | 类型 | 描述 | +|--------------|-----------------|------------------------------| +| `orig_img` | `numpy.ndarray` | 原始图像的numpy数组。 | +| `orig_shape` | `tuple` | 原始图像的形状,格式为(高度,宽度)。 | +| `boxes` | `Boxes, 可选` | 包含检测边界框的Boxes对象。 | +| `masks` | `Masks, 可选` | 包含检测掩码的Masks对象。 | +| `probs` | `Probs, 可选` | 包含每个类别的概率的Probs对象,用于分类任务。 | +| `keypoints` | `Keypoints, 可选` | 包含每个对象检测到的关键点的Keypoints对象。 | +| `speed` | `dict` | 以毫秒为单位的每张图片的预处理、推理和后处理速度的字典。 | +| `names` | `dict` | 类别名称的字典。 | +| `path` | `str` | 图像文件的路径。 | + +`Results`对象具有以下方法: + +| 方法 | 返回类型 | 描述 | +|-----------------|-----------------|----------------------------------------| +| `__getitem__()` | `Results` | 返回指定索引的Results对象。 | +| `__len__()` | `int` | 返回Results对象中的检测数量。 | +| `update()` | `None` | 更新Results对象的boxes, masks和probs属性。 | +| `cpu()` | `Results` | 将所有张量移动到CPU内存上的Results对象的副本。 | +| `numpy()` | `Results` | 将所有张量转换为numpy数组的Results对象的副本。 | +| `cuda()` | `Results` | 将所有张量移动到GPU内存上的Results对象的副本。 | +| `to()` | `Results` | 返回将张量移动到指定设备和dtype的Results对象的副本。 | +| `new()` | `Results` | 返回一个带有相同图像、路径和名称的新Results对象。 | +| `keys()` | `List[str]` | 返回非空属性名称的列表。 | +| `plot()` | `numpy.ndarray` | 绘制检测结果。返回带有注释的图像的numpy数组。 | +| `verbose()` | `str` | 返回每个任务的日志字符串。 | +| `save_txt()` | `None` | 将预测保存到txt文件中。 | +| `save_crop()` | `None` | 将裁剪的预测保存到`save_dir/cls/file_name.jpg`。 | +| `tojson()` | `None` | 将对象转换为JSON格式。 | + +有关更多详细信息,请参阅`Results`类的[文档](/../reference/engine/results.md)。 + +### 边界框(Boxes) + +`Boxes`对象可用于索引、操作和转换边界框到不同格式。 + +!!! Example "边界框(Boxes)" + + ```python + from ultralytics import YOLO + + # 加载预训练的YOLOv8n模型 + model = YOLO('yolov8n.pt') + + # 在图片上运行推理 + results = model('bus.jpg') + + # 查看结果 + for r in results: + print(r.boxes) # 打印包含检测边界框的Boxes对象 + ``` + +以下是`Boxes`类方法和属性的表格,包括它们的名称、类型和description: + +| 名称 | 类型 | 描述 | +|-----------|---------------------|-------------------------| +| `cpu()` | 方法 | 将对象移动到CPU内存。 | +| `numpy()` | 方法 | 将对象转换为numpy数组。 | +| `cuda()` | 方法 | 将对象移动到CUDA内存。 | +| `to()` | 方法 | 将对象移动到指定的设备。 | +| `xyxy` | 属性 (`torch.Tensor`) | 以xyxy格式返回边界框。 | +| `conf` | 属性 (`torch.Tensor`) | 返回边界框的置信度值。 | +| `cls` | 属性 (`torch.Tensor`) | 返回边界框的类别值。 | +| `id` | 属性 (`torch.Tensor`) | 返回边界框的跟踪ID(如果可用)。 | +| `xywh` | 属性 (`torch.Tensor`) | 以xywh格式返回边界框。 | +| `xyxyn` | 属性 (`torch.Tensor`) | 以原始图像大小归一化的xyxy格式返回边界框。 | +| `xywhn` | 属性 (`torch.Tensor`) | 以原始图像大小归一化的xywh格式返回边界框。 | + +有关更多详细信息,请参阅`Boxes`类的[文档](/../reference/engine/results.md)。 + +### 掩码(Masks) + +`Masks`对象可用于索引、操作和将掩码转换为分段。 + +!!! Example "掩码(Masks)" + + ```python + from ultralytics import YOLO + + # 加载预训练的YOLOv8n-seg分割模型 + model = YOLO('yolov8n-seg.pt') + + # 在图片上运行推理 + results = model('bus.jpg') # results列表 + + # 查看结果 + for r in results: + print(r.masks) # 打印包含检测到的实例掩码的Masks对象 + ``` + +以下是`Masks`类方法和属性的表格,包括它们的名称、类型和description: + +| 名称 | 类型 | 描述 | +|-----------|---------------------|----------------------| +| `cpu()` | 方法 | 将掩码张量返回到CPU内存。 | +| `numpy()` | 方法 | 将掩码张量转换为numpy数组。 | +| `cuda()` | 方法 | 将掩码张量返回到GPU内存。 | +| `to()` | 方法 | 将掩码张量带有指定设备和dtype返回。 | +| `xyn` | 属性 (`torch.Tensor`) | 以张量表示的归一化分段的列表。 | +| `xy` | 属性 (`torch.Tensor`) | 以像素坐标表示的分段的张量列表。 | + +有关更多详细信息,请参阅`Masks`类的[文档](/../reference/engine/results.md)。 + +### 关键点 (Keypoints) + +`Keypoints` 对象可以用于索引、操作和规范化坐标。 + +!!! Example "关键点" + + ```python + from ultralytics import YOLO + + # 加载预训练的YOLOv8n-pose 姿态模型 + model = YOLO('yolov8n-pose.pt') + + # 在图像上运行推理 + results = model('bus.jpg') # 结果列表 + + # 查看结果 + for r in results: + print(r.keypoints) # 打印包含检测到的关键点的Keypoints对象 + ``` + +以下是`Keypoints`类方法和属性的表格,包括它们的名称、类型和description: + +| 名称 | 类型 | 描述 | +|-----------|--------------------|---------------------------| +| `cpu()` | 方法 | 返回CPU内存上的关键点张量。 | +| `numpy()` | 方法 | 返回作为numpy数组的关键点张量。 | +| `cuda()` | 方法 | 返回GPU内存上的关键点张量。 | +| `to()` | 方法 | 返回指定设备和dtype的关键点张量。 | +| `xyn` | 属性(`torch.Tensor`) | 规范化关键点的列表,表示为张量。 | +| `xy` | 属性(`torch.Tensor`) | 以像素坐标表示的关键点列表,表示为张量。 | +| `conf` | 属性(`torch.Tensor`) | 返回关键点的置信度值(如果有),否则返回None。 | + +有关更多详细信息,请参阅`Keypoints`类[文档](/../reference/engine/results.md)。 + +### 概率 (Probs) + +`Probs` 对象可以用于索引,获取分类的 `top1` 和 `top5` 索引和分数。 + +!!! Example "概率" + + ```python + from ultralytics import YOLO + + # 加载预训练的YOLOv8n-cls 分类模型 + model = YOLO('yolov8n-cls.pt') + + # 在图像上运行推理 + results = model('bus.jpg') # 结果列表 + + # 查看结果 + for r in results: + print(r.probs) # 打印包含检测到的类别概率的Probs对象 + ``` + +以下是`Probs`类的方法和属性的表格总结: + +| 名称 | 类型 | 描述 | +|------------|--------------------|-------------------------| +| `cpu()` | 方法 | 返回CPU内存上的概率张量的副本。 | +| `numpy()` | 方法 | 返回概率张量的副本作为numpy数组。 | +| `cuda()` | 方法 | 返回GPU内存上的概率张量的副本。 | +| `to()` | 方法 | 返回带有指定设备和dtype的概率张量的副本。 | +| `top1` | 属性(`int`) | 第1类的索引。 | +| `top5` | 属性(`list[int]`) | 前5类的索引。 | +| `top1conf` | 属性(`torch.Tensor`) | 第1类的置信度。 | +| `top5conf` | 属性(`torch.Tensor`) | 前5类的置信度。 | + +有关更多详细信息,请参阅`Probs`类[文档](/../reference/engine/results.md)。 + +## 绘制结果 + +您可以使用`Result`对象的`plot()`方法来可视化预测结果。它会将`Results`对象中包含的所有预测类型(框、掩码、关键点、概率等)绘制到一个numpy数组上,然后可以显示或保存。 + +!!! Example "绘制" + + ```python + from PIL import Image + from ultralytics import YOLO + + # 加载预训练的YOLOv8n模型 + model = YOLO('yolov8n.pt') + + # 在'bus.jpg'上运行推理 + results = model('bus.jpg') # 结果列表 + + # 展示结果 + for r in results: + im_array = r.plot() # 绘制包含预测结果的BGR numpy数组 + im = Image.fromarray(im_array[..., ::-1]) # RGB PIL图像 + im.show() # 显示图像 + im.save('results.jpg') # 保存图像 + ``` + + `plot()`方法支持以下参数: + + | 参数 | 类型 | 描述 | 默认值 | + |---------------|-----------------|------------------------------------------------------------------------|---------------| + | `conf` | `bool` | 是否绘制检测置信度分数。 | `True` | + | `line_width` | `float` | 边框线宽度。如果为None,则按图像大小缩放。 | `None` | + | `font_size` | `float` | 文本字体大小。如果为None,则按图像大小缩放。 | `None` | + | `font` | `str` | 文本字体。 | `'Arial.ttf'` | + | `pil` | `bool` | 是否将图像返回为PIL图像。 | `False` | + | `img` | `numpy.ndarray` | 绘制到另一个图像上。如果没有,则绘制到原始图像上。 | `None` | + | `im_gpu` | `torch.Tensor` | 形状为(1, 3, 640, 640)的规范化GPU图像,用于更快地绘制掩码。 | `None` | + | `kpt_radius` | `int` | 绘制关键点的半径。默认为5。 | `5` | + | `kpt_line` | `bool` | 是否绘制连接关键点的线条。 | `True` | + | `labels` | `bool` | 是否绘制边框标签。 | `True` | + | `boxes` | `bool` | 是否绘制边框。 | `True` | + | `masks` | `bool` | 是否绘制掩码。 | `True` | + | `probs` | `bool` | 是否绘制分类概率 | `True` | + +## 线程安全推理 + +在多线程中并行运行多个YOLO模型时,确保推理过程的线程安全性至关重要。线程安全的推理保证了每个线程的预测结果是隔离的,不会相互干扰,避免竞态条件,确保输出的一致性和可靠性。 + +在多线程应用中使用YOLO模型时,重要的是为每个线程实例化单独的模型对象,或使用线程本地存储来防止冲突: + +!!! Example "线程安全推理" + + 在每个线程内实例化单个模型以实现线程安全的推理: + ```python + from ultralytics import YOLO + from threading import Thread + + def thread_safe_predict(image_path): + # 在线程内实例化新模型 + local_model = YOLO("yolov8n.pt") + results = local_model.predict(image_path) + # 处理结果 + + # 启动拥有各自模型实例的线程 + Thread(target=thread_safe_predict, args=("image1.jpg",)).start() + Thread(target=thread_safe_predict, args=("image2.jpg",)).start() + ``` + +有关YOLO模型线程安全推理的深入讨论和逐步指导,请参阅我们的[YOLO线程安全推理指南](/../guides/yolo-thread-safe-inference.md)。该指南将为您提供避免常见陷阱并确保多线程推理顺利进行所需的所有必要信息。 + +## 流媒体源`for`循环 + +以下是使用OpenCV(`cv2`)和YOLOv8在视频帧上运行推理的Python脚本。此脚本假设您已经安装了必要的包(`opencv-python`和`ultralytics`)。 + +!!! Example "流媒体for循环" + + ```python + import cv2 + from ultralytics import YOLO + + # 加载YOLOv8模型 + model = YOLO('yolov8n.pt') + + # 打开视频文件 + video_path = "path/to/your/video/file.mp4" + cap = cv2.VideoCapture(video_path) + + # 遍历视频帧 + while cap.isOpened(): + # 从视频中读取一帧 + success, frame = cap.read() + + if success: + # 在该帧上运行YOLOv8推理 + results = model(frame) + + # 在帧上可视化结果 + annotated_frame = results[0].plot() + + # 显示带注释的帧 + cv2.imshow("YOLOv8推理", annotated_frame) + + # 如果按下'q'则中断循环 + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # 如果视频结束则中断循环 + break + + # 释放视频捕获对象并关闭显示窗口 + cap.release() + cv2.destroyAllWindows() + ``` + +此脚本将对视频的每一帧进行预测,可视化结果,并在窗口中显示。按下'q'键可以退出循环。 diff --git a/docs/zh/modes/track.md b/docs/zh/modes/track.md new file mode 100644 index 0000000..95e6745 --- /dev/null +++ b/docs/zh/modes/track.md @@ -0,0 +1,273 @@ +--- +comments: true +description: 学习如何使用Ultralytics YOLO进行视频流中的物体追踪。指南包括使用不同的追踪器和自定义追踪器配置。 +keywords: Ultralytics, YOLO, 物体追踪, 视频流, BoT-SORT, ByteTrack, Python 指南, CLI 指南 +--- + +# 使用Ultralytics YOLO进行多物体追踪 + +多物体追踪示例 + +视频分析领域的物体追踪是一项关键任务,它不仅能标识出帧内物体的位置和类别,还能在视频进行过程中为每个检测到的物体保持一个唯一的ID。应用场景无限广阔——从监控与安全到实时体育分析。 + +## 为什么选择Ultralytics YOLO进行物体追踪? + +Ultralytics 追踪器的输出与标准的物体检测结果一致,但增加了物体ID的附加值。这使其易于追踪视频流中的物体并进行后续分析。以下是您应考虑使用Ultralytics YOLO来满足您物体追踪需求的原因: + +- **效率:** 实时处理视频流,同时保持准确性。 +- **灵活性:** 支持多种追踪算法和配置。 +- **易用性:** 简单的Python API和CLI选项,便于快速集成和部署。 +- **可定制性:** 易于使用自定义训练的YOLO模型,允许集成到特定领域的应用中。 + +

+
+ +
+ 观看:使用Ultralytics YOLOv8的物体检测与追踪。 +

+ +## 实际应用场景 + +| 交通运输 | 零售 | 水产养殖 | +|:----------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------:| +| ![车辆追踪](https://github.com/RizwanMunawar/ultralytics/assets/62513924/ee6e6038-383b-4f21-ac29-b2a1c7d386ab) | ![人员追踪](https://github.com/RizwanMunawar/ultralytics/assets/62513924/93bb4ee2-77a0-4e4e-8eb6-eb8f527f0527) | ![鱼类追踪](https://github.com/RizwanMunawar/ultralytics/assets/62513924/a5146d0f-bfa8-4e0a-b7df-3c1446cd8142) | +| 车辆追踪 | 人员追踪 | 鱼类追踪 | + +## 一瞥特点 + +Ultralytics YOLO扩展了其物体检测功能,以提供强大且多功能的物体追踪: + +- **实时追踪:** 在高帧率视频中无缝追踪物体。 +- **支持多个追踪器:** 从多种成熟的追踪算法中选择。 +- **自定义追踪器配置:** 通过调整各种参数来定制追踪算法,以满足特定需求。 + +## 可用的追踪器 + +Ultralytics YOLO支持以下追踪算法。可以通过传递相关的YAML配置文件如`tracker=tracker_type.yaml`来启用: + +* [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - 使用 `botsort.yaml` 启用此追踪器。 +* [ByteTrack](https://github.com/ifzhang/ByteTrack) - 使用 `bytetrack.yaml` 启用此追踪器。 + +默认追踪器是BoT-SORT。 + +## 追踪 + +要在视频流中运行追踪器,请使用已训练的检测、分割或姿态模型,例如YOLOv8n、YOLOv8n-seg和YOLOv8n-pose。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载官方或自定义模型 + model = YOLO('yolov8n.pt') # 加载一个官方的检测模型 + model = YOLO('yolov8n-seg.pt') # 加载一个官方的分割模型 + model = YOLO('yolov8n-pose.pt') # 加载一个官方的姿态模型 + model = YOLO('path/to/best.pt') # 加载一个自定义训练的模型 + + # 使用模型进行追踪 + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True) # 使用默认追踪器进行追踪 + results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml") # 使用ByteTrack追踪器进行追踪 + ``` + + === "CLI" + + ```bash + # 使用命令行界面进行各种模型的追踪 + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # 官方检测模型 + yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # 官方分割模型 + yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # 官方姿态模型 + yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # 自定义训练模型 + + # 使用ByteTrack追踪器进行追踪 + yolo track model=path/to/best.pt tracker="bytetrack.yaml" + ``` + +如上所述,Detect、Segment和Pose模型在视频或流媒体源上运行时均可进行追踪。 + +## 配置 + +### 追踪参数 + +追踪配置与预测模式共享一些属性,如`conf`、`iou`和`show`。有关进一步配置,请参见[预测](https://docs.ultralytics.com/modes/predict/)模型页面。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 配置追踪参数并运行追踪器 + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) + ``` + + === "CLI" + + ```bash + # 使用命令行界面配置追踪参数并运行追踪器 + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + ``` + +### 选择追踪器 + +Ultralytics还允许您使用修改后的追踪器配置文件。要执行此操作,只需从[ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)中复制一个追踪器配置文件(例如,`custom_tracker.yaml`)并根据您的需求修改任何配置(除了`tracker_type`)。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型并使用自定义配置文件运行追踪器 + model = YOLO('yolov8n.pt') + results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker='custom_tracker.yaml') + ``` + + === "CLI" + + ```bash + # 使用命令行界面加载模型并使用自定义配置文件运行追踪器 + yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + ``` + +有关追踪参数的全面列表,请参考[ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)页面。 + +## Python示例 + +### 持续追踪循环 + +这是一个使用OpenCV(`cv2`)和YOLOv8在视频帧上运行物体追踪的Python脚本。此脚本假设您已经安装了必要的包(`opencv-python`和`ultralytics`)。参数`persist=True`告诉追踪器当前的图像或帧是序列中的下一个,并且期望在当前图像中从上一个图像中获得追踪路径。 + +!!! Example "带追踪功能的流循环" + + ```python + import cv2 + from ultralytics import YOLO + + # 加载YOLOv8模型 + model = YOLO('yolov8n.pt') + + # 打开视频文件 + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # 循环遍历视频帧 + while cap.isOpened(): + # 从视频读取一帧 + success, frame = cap.read() + + if success: + # 在帧上运行YOLOv8追踪,持续追踪帧间的物体 + results = model.track(frame, persist=True) + + # 在帧上展示结果 + annotated_frame = results[0].plot() + + # 展示带注释的帧 + cv2.imshow("YOLOv8 Tracking", annotated_frame) + + # 如果按下'q'则退出循环 + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # 如果视频结束则退出循环 + break + + # 释放视频捕获对象并关闭显示窗口 + cap.release() + cv2.destroyAllWindows() + ``` + +请注意从`model(frame)`更改为`model.track(frame)`的变化,这使能够启用物体追踪而不只是简单的检测。这个修改的脚本将在视频的每一帧上运行追踪器,可视化结果,并在窗口中显示它们。通过按'q'可以退出循环。 + +### 随时间绘制追踪路径 + +在连续帧上可视化物体追踪路径可以提供有关视频中检测到的物体的运动模式和行为的有价值的洞见。使用Ultralytics YOLOv8,绘制这些路径是一个无缝且高效的过程。 + +在以下示例中,我们演示了如何利用YOLOv8的追踪功能在多个视频帧上绘制检测物体的移动。这个脚本涉及打开视频文件、逐帧读取,并使用YOLO模型识别并追踪各种物体。通过保留检测到的边界框的中心点并连接它们,我们可以绘制表示跟踪物体路径的线条。 + +!!! Example "在多个视频帧上绘制追踪路径" + + ```python + from collections import defaultdict + + import cv2 + import numpy as np + + from ultralytics import YOLO + + # 加载YOLOv8模型 + model = YOLO('yolov8n.pt') + + # 打开视频文件 + video_path = "path/to/video.mp4" + cap = cv2.VideoCapture(video_path) + + # 存储追踪历史 + track_history = defaultdict(lambda: []) + + # 循环遍历视频帧 + while cap.isOpened(): + # 从视频读取一帧 + success, frame = cap.read() + + if success: + # 在帧上运行YOLOv8追踪,持续追踪帧间的物体 + results = model.track(frame, persist=True) + + # 获取框和追踪ID + boxes = results[0].boxes.xywh.cpu() + track_ids = results[0].boxes.id.int().cpu().tolist() + + # 在帧上展示结果 + annotated_frame = results[0].plot() + + # 绘制追踪路径 + for box, track_id in zip(boxes, track_ids): + x, y, w, h = box + track = track_history[track_id] + track.append((float(x), float(y))) # x, y中心点 + if len(track) > 30: # 在90帧中保留90个追踪点 + track.pop(0) + + # 绘制追踪线 + points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10) + + # 展示带注释的帧 + cv2.imshow("YOLOv8 Tracking", annotated_frame) + + # 如果按下'q'则退出循环 + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # 如果视频结束则退出循环 + break + + # 释放视频捕获对象并关闭显示窗口 + cap.release() + cv2.destroyAllWindows() + ``` + +### 多线程追踪 + +多线程追踪提供了同时在多个视频流上运行物体追踪的能力。当处理多个视频输入,例如来自多个监控摄像头时,这一功能特别有用,其中并发处理可以大大提高效率和性能。 + +在提供的Python脚本中,我们利用Python的`threading`模块来同时运行多个追踪器实例。每个线程负责在一个视频文件上运行追踪器,所有线程在后台同时运行。 + +为了确保每个线程接收到正确的参数(视频文件、要使用的模型和文件索引),我们定义了一个函数`run_tracker_in_thread`,它接受这些参数并包含主追踪循环。此函数逐帧读取视频,运行追踪器,并显示结果。 + +在这个例子中,两个不同的模型被使用:`yolov8n.pt`和`yolov8n-seg.pt`,每个模型都在不同的视频文件中追踪物体。视频文件分别指定在`video_file1`和`video_file2`中。 + +在`threading.Thread`中参数`daemon=True`表示,这些线程会在主程序结束时关闭。然后我们用`start()`来开始线程,并使用`join()`来使主线程等待,直到两个追踪线程都结束。 + +最后,在所有线程完成任务后,使用`cv2.destroyAllWindows()`关闭显示结果的窗口。 diff --git a/docs/zh/modes/train.md b/docs/zh/modes/train.md new file mode 100644 index 0000000..f9c8c3c --- /dev/null +++ b/docs/zh/modes/train.md @@ -0,0 +1,294 @@ +--- +comments: true +description: 使用Ultralytics YOLO训练YOLOv8模型的逐步指南,包括单GPU和多GPU训练示例 +keywords: Ultralytics, YOLOv8, YOLO, 目标检测, 训练模式, 自定义数据集, GPU训练, 多GPU, 超参数, CLI示例, Python示例 +--- + +# 使用Ultralytics YOLO进行模型训练 + +Ultralytics YOLO生态系统与集成 + +## 引言 + +训练深度学习模型涉及向其输入数据并调整参数,以便准确预测。Ultralytics YOLOv8的训练模式旨在有效高效地训练目标检测模型,充分利用现代硬件功能。本指南旨在涵盖使用YOLOv8的强大功能集训练自己模型的所有细节。 + +

+
+ +
+ 观看: 如何在Google Colab中用你的自定义数据集训练一个YOLOv8模型。 +

+ +## 为什么选择Ultralytics YOLO进行训练? + +以下是选择YOLOv8训练模式的一些有力理由: + +- **效率:** 充分利用您的硬件资源,无论您是使用单GPU设置还是跨多个GPU扩展。 +- **多功能:** 除了可随时获取的数据集(如COCO、VOC和ImageNet)之外,还可以对自定义数据集进行训练。 +- **用户友好:** 简单而强大的CLI和Python接口,为您提供直接的训练体验。 +- **超参数灵活性:** 可定制的广泛超参数范围,以微调模型性能。 + +### 训练模式的关键特性 + +以下是YOLOv8训练模式的一些显著特点: + +- **自动数据集下载:** 标准数据集如COCO、VOC和ImageNet将在首次使用时自动下载。 +- **多GPU支持:** 无缝地跨多个GPU扩展您的训练工作,以加快过程。 +- **超参数配置:** 通过YAML配置文件或CLI参数修改超参数的选项。 +- **可视化和监控:** 实时跟踪训练指标并可视化学习过程,以获得更好的洞察力。 + +!!! 小贴士 "小贴士" + + * 如COCO、VOC、ImageNet等YOLOv8数据集在首次使用时会自动下载,即 `yolo train data=coco.yaml` + +## 使用示例 + +在COCO128数据集上训练YOLOv8n模型100个时期,图像大小为640。可以使用`device`参数指定训练设备。如果没有传递参数,并且有可用的GPU,则将使用GPU `device=0`,否则将使用`device=cpu`。有关完整列表的训练参数,请参见下面的参数部分。 + +!!! Example "单GPU和CPU训练示例" + + 设备将自动确定。如果有可用的GPU,那么将使用它,否则将在CPU上开始训练。 + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载一个模型 + model = YOLO('yolov8n.yaml') # 从YAML建立一个新模型 + model = YOLO('yolov8n.pt') # 加载预训练模型(推荐用于训练) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # 从YAML建立并转移权重 + + # 训练模型 + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # 从YAML构建新模型,从头开始训练 + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # 从预训练*.pt模型开始训练 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # 从YAML构建一个新模型,转移预训练权重,然后开始训练 + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### 多GPU训练 + +多GPU训练通过在多个GPU上分布训练负载,实现对可用硬件资源的更有效利用。无论是通过Python API还是命令行界面,都可以使用此功能。 若要启用多GPU训练,请指定您希望使用的GPU设备ID。 + +!!! Example "多GPU训练示例" + + 要使用2个GPU进行训练,请使用CUDA设备0和1,使用以下命令。根据需要扩展到更多GPU。 + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n.pt') # 加载预训练模型(推荐用于训练) + + # 使用2个GPU训练模型 + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + ``` + + === "CLI" + + ```bash + # 使用GPU 0和1从预训练*.pt模型开始训练 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + ``` + +### 苹果M1和M2 MPS训练 + +通过Ultralytics YOLO模型集成对Apple M1和M2芯片的支持,现在可以在使用强大的Metal性能着色器(MPS)框架的设备上训练模型。MPS为在Apple的定制硅上执行计算和图像处理任务提供了一种高性能的方法。 + +要在Apple M1和M2芯片上启用训练,您应该在启动训练过程时将设备指定为'mps'。以下是Python和命令行中如何做到这点的示例: + +!!! Example "MPS训练示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n.pt') # 加载预训练模型(推荐用于训练) + + # 使用2个GPU训练模型 + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + ``` + + === "CLI" + + ```bash + # 使用GPU 0和1从预训练*.pt模型开始训练 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + ``` + +利用M1/M2芯片的计算能力,这使得训练任务的处理更加高效。有关更详细的指南和高级配置选项,请参阅[PyTorch MPS文档](https://pytorch.org/docs/stable/notes/mps.html)。 + +### 恢复中断的训练 + +在处理深度学习模型时,从之前保存的状态恢复训练是一个关键特性。在各种情况下,这可能很方便,比如当训练过程意外中断,或者当您希望用新数据或更多时期继续训练模型时。 + +恢复训练时,Ultralytics YOLO将加载最后保存的模型的权重,并恢复优化器状态、学习率调度器和时期编号。这允许您无缝地从离开的地方继续训练过程。 + +在Ultralytics YOLO中,您可以通过在调用`train`方法时将`resume`参数设置为`True`并指定包含部分训练模型权重的`.pt`文件路径来轻松恢复训练。 + +下面是使用Python和命令行恢复中断训练的示例: + +!!! Example "恢复训练示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('path/to/last.pt') # 加载部分训练的模型 + + # 恢复训练 + results = model.train(resume=True) + ``` + + === "CLI" + + ```bash + # 恢复中断的训练 + yolo train resume model=path/to/last.pt + ``` + +通过设置`resume=True`,`train`函数将从'path/to/last.pt'文件中存储的状态继续训练。如果省略`resume`参数或将其设置为`False`,`train`函数将启动新的训练会话。 + +请记住,默认情况下,检查点会在每个时期结束时保存,或者使用`save_period`参数以固定间隔保存,因此您必须至少完成1个时期才能恢复训练运行。 + +## 参数 + +YOLO模型的训练设置是指用于对数据集进行模型训练的各种超参数和配置。这些设置会影响模型的性能、速度和准确性。一些常见的YOLO训练设置包括批大小、学习率、动量和权重衰减。其他可能影响训练过程的因素包括优化器的选择、损失函数的选择以及训练数据集的大小和组成。仔细调整和实验这些设置以实现给定任务的最佳性能是非常重要的。 + +| 键 | 值 | 描述 | +|-------------------|----------|---------------------------------------------------------------------| +| `model` | `None` | 模型文件路径,例如 yolov8n.pt, yolov8n.yaml | +| `data` | `None` | 数据文件路径,例如 coco128.yaml | +| `epochs` | `100` | 训练的轮次数量 | +| `patience` | `50` | 早停训练的等待轮次 | +| `batch` | `16` | 每批图像数量(-1为自动批大小) | +| `imgsz` | `640` | 输入图像的大小,以整数表示 | +| `save` | `True` | 保存训练检查点和预测结果 | +| `save_period` | `-1` | 每x轮次保存检查点(如果<1则禁用) | +| `cache` | `False` | True/ram, disk 或 False。使用缓存加载数据 | +| `device` | `None` | 运行设备,例如 cuda device=0 或 device=0,1,2,3 或 device=cpu | +| `workers` | `8` | 数据加载的工作线程数(如果DDP则为每个RANK) | +| `project` | `None` | 项目名称 | +| `name` | `None` | 实验名称 | +| `exist_ok` | `False` | 是否覆盖现有实验 | +| `pretrained` | `True` | (bool 或 str) 是否使用预训练模型(bool)或从中加载权重的模型(str) | +| `optimizer` | `'auto'` | 使用的优化器,选择范围=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto] | +| `verbose` | `False` | 是否打印详细输出 | +| `seed` | `0` | 随机种子,用于可重复性 | +| `deterministic` | `True` | 是否启用确定性模式 | +| `single_cls` | `False` | 将多类数据作为单类训练 | +| `rect` | `False` | 矩形训练,每批为最小填充整合 | +| `cos_lr` | `False` | 使用余弦学习率调度器 | +| `close_mosaic` | `10` | (int) 最后轮次禁用马赛克增强(0为禁用) | +| `resume` | `False` | 从最后检查点恢复训练 | +| `amp` | `True` | 自动混合精度(AMP)训练,选择范围=[True, False] | +| `fraction` | `1.0` | 训练的数据集比例(默认为1.0,即训练集中的所有图像) | +| `profile` | `False` | 在训练期间为记录器分析ONNX和TensorRT速度 | +| `freeze` | `None` | (int 或 list, 可选) 在训练期间冻结前n层,或冻结层索引列表 | +| `lr0` | `0.01` | 初始学习率(例如 SGD=1E-2, Adam=1E-3) | +| `lrf` | `0.01` | 最终学习率 (lr0 * lrf) | +| `momentum` | `0.937` | SGD动量/Adam beta1 | +| `weight_decay` | `0.0005` | 优化器权重衰减5e-4 | +| `warmup_epochs` | `3.0` | 热身轮次(小数ok) | +| `warmup_momentum` | `0.8` | 热身初始动量 | +| `warmup_bias_lr` | `0.1` | 热身初始偏差lr | +| `box` | `7.5` | 框损失增益 | +| `cls` | `0.5` | cls损失增益(根据像素缩放) | +| `dfl` | `1.5` | dfl损失增益 | +| `pose` | `12.0` | 姿态损失增益(仅限姿态) | +| `kobj` | `2.0` | 关键点obj损失增益(仅限姿态) | +| `label_smoothing` | `0.0` | 标签平滑(小数) | +| `nbs` | `64` | 标称批大小 | +| `overlap_mask` | `True` | 训练期间掩码应重叠(仅限分割训练) | +| `mask_ratio` | `4` | 掩码降采样比率(仅限分割训练) | +| `dropout` | `0.0` | 使用dropout正则化(仅限分类训练) | +| `val` | `True` | 训练期间验证/测试 | + +## 记录 + +在训练YOLOv8模型时,跟踪模型随时间的性能变化可能非常有价值。这就是记录发挥作用的地方。Ultralytics的YOLO提供对三种类型记录器的支持 - Comet、ClearML和TensorBoard。 + +要使用记录器,请在上面的代码片段中的下拉菜单中选择它并运行。所选的记录器将被安装和初始化。 + +### Comet + +[Comet](https://www.comet.ml/site/)是一个平台,允许数据科学家和开发人员跟踪、比较、解释和优化实验和模型。它提供了实时指标、代码差异和超参数跟踪等功能。 + +使用Comet: + +!!! Example "示例" + + === "Python" + ```python + # pip install comet_ml + import comet_ml + + comet_ml.init() + ``` + +记得在他们的网站上登录您的Comet账户并获取您的API密钥。您需要将此添加到您的环境变量或脚本中,以记录您的实验。 + +### ClearML + +[ClearML](https://www.clear.ml/) 是一个开源平台,自动跟踪实验并帮助有效共享资源。它旨在帮助团队更有效地管理、执行和复现他们的ML工作。 + +使用ClearML: + +!!! Example "示例" + + === "Python" + ```python + # pip install clearml + import clearml + + clearml.browser_login() + ``` + +运行此脚本后,您需要在浏览器中登录您的ClearML账户并认证您的会话。 + +### TensorBoard + +[TensorBoard](https://www.tensorflow.org/tensorboard) 是TensorFlow的可视化工具包。它允许您可视化TensorFlow图表,绘制有关图表执行的定量指标,并展示通过它的附加数据,如图像。 + +在[Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb)中使用TensorBoard: + +!!! Example "示例" + + === "CLI" + ```bash + load_ext tensorboard + tensorboard --logdir ultralytics/runs # 替换为'runs'目录 + ``` + +在本地使用TensorBoard,运行下面的命令并在 http://localhost:6006/ 查看结果。 + +!!! Example "示例" + + === "CLI" + ```bash + tensorboard --logdir ultralytics/runs # 替换为'runs'目录 + ``` + +这将加载TensorBoard并将其定向到保存训练日志的目录。 + +在设置好日志记录器后,您可以继续进行模型训练。所有训练指标将自动记录在您选择的平台中,您可以访问这些日志以监控模型随时间的表现,比较不同模型,并识别改进的领域。 diff --git a/docs/zh/modes/val.md b/docs/zh/modes/val.md new file mode 100644 index 0000000..21129fb --- /dev/null +++ b/docs/zh/modes/val.md @@ -0,0 +1,86 @@ +--- +comments: true +description: 指南 - 验证 YOLOv8 模型。了解如何使用验证设置和指标评估您的 YOLO 模型的性能,包括 Python 和 CLI 示例。 +keywords: Ultralytics, YOLO 文档, YOLOv8, 验证, 模型评估, 超参数, 准确率, 指标, Python, CLI +--- + +# 使用 Ultralytics YOLO 进行模型验证 + +Ultralytics YOLO 生态系统和集成 + +## 引言 + +在机器学习流程中,验证是一个关键步骤,让您能够评估训练模型的质量。Ultralytics YOLOv8 的 Val 模式提供了一整套强大的工具和指标,用于评估您的目标检测模型的性能。本指南作为一个完整资源,用于理解如何有效使用 Val 模式来确保您的模型既准确又可靠。 + +## 为什么要使用 Ultralytics YOLO 进行验证? + +以下是使用 YOLOv8 的 Val 模式的好处: + +- **精确性:** 获取准确的指标,如 mAP50、mAP75 和 mAP50-95,全面评估您的模型。 +- **便利性:** 利用内置功能记住训练设置,简化验证过程。 +- **灵活性:** 使用相同或不同的数据集和图像尺寸验证您的模型。 +- **超参数调优:** 使用验证指标来调整您的模型以获得更好的性能。 + +### Val 模式的主要特点 + +以下是 YOLOv8 的 Val 模式提供的显著功能: + +- **自动化设置:** 模型记住其训练配置,以便直接进行验证。 +- **多指标支持:** 根据一系列准确度指标评估您的模型。 +- **CLI 和 Python API:** 根据您的验证偏好选择命令行界面或 Python API。 +- **数据兼容性:** 与训练阶段使用的数据集以及自定义数据集无缝协作。 + +!!! Tip "提示" + + * YOLOv8 模型会自动记住其训练设置,因此您可以很容易地仅使用 `yolo val model=yolov8n.pt` 或 `model('yolov8n.pt').val()` 在原始数据集上并以相同图像大小验证模型。 + +## 使用示例 + +在 COCO128 数据集上验证训练过的 YOLOv8n 模型的准确性。由于 `model` 保留了其训练的 `data` 和参数作为模型属性,因此无需传递任何参数。有关完整的导出参数列表,请参阅下面的参数部分。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义模型 + + # 验证模型 + metrics = model.val() # 无需参数,数据集和设置记忆 + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # 包含每个类别的map50-95列表 + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # 验证官方模型 + yolo detect val model=path/to/best.pt # 验证自定义模型 + ``` + +## 参数 + +YOLO 模型的验证设置是指用于评估模型在验证数据集上性能的各种超参数和配置。这些设置会影响模型的性能、速度和准确性。一些常见的 YOLO 验证设置包括批处理大小、在训练期间验证频率以及用于评估模型性能的指标。其他可能影响验证过程的因素包括验证数据集的大小和组成以及模型用于特定任务的特性。仔细调整和实验这些设置很重要,以确保模型在验证数据集上表现良好并且检测和预防过拟合。 + +| 键 | 值 | 描述 | +|---------------|---------|---------------------------------------------| +| `data` | `None` | 数据文件的路径,例如 coco128.yaml | +| `imgsz` | `640` | 输入图像的大小,以整数表示 | +| `batch` | `16` | 每批图像的数量(AutoBatch 为 -1) | +| `save_json` | `False` | 将结果保存至 JSON 文件 | +| `save_hybrid` | `False` | 保存混合版本的标签(标签 + 额外预测) | +| `conf` | `0.001` | 用于检测的对象置信度阈值 | +| `iou` | `0.6` | NMS(非极大抑制)用的交并比(IoU)阈值 | +| `max_det` | `300` | 每张图像的最大检测数量 | +| `half` | `True` | 使用半精度(FP16) | +| `device` | `None` | 运行所用的设备,例如 cuda device=0/1/2/3 或 device=cpu | +| `dnn` | `False` | 使用 OpenCV DNN 进行 ONNX 推理 | +| `plots` | `False` | 在训练期间显示图表 | +| `rect` | `False` | 矩形验证,每批图像为了最小填充整齐排列 | +| `split` | `val` | 用于验证的数据集分割,例如 'val'、'test' 或 'train' | +| diff --git a/docs/zh/quickstart.md b/docs/zh/quickstart.md new file mode 100644 index 0000000..b3438a8 --- /dev/null +++ b/docs/zh/quickstart.md @@ -0,0 +1,325 @@ +--- +comments: true +description: 探索使用pip、conda、git和Docker安装Ultralytics的各种方法。了解如何在命令行界面或Python项目中使用Ultralytics。 +keywords: Ultralytics安装,pip安装Ultralytics,Docker安装Ultralytics,Ultralytics命令行界面,Ultralytics Python接口 +--- + +## 安装Ultralytics + +Ultralytics提供了多种安装方法,包括pip、conda和Docker。通过`ultralytics`pip包安装最新稳定版的YOLOv8,或者克隆[Ultralytics GitHub仓库](https://github.com/ultralytics/ultralytics)以获取最新版本。Docker可用于在隔离容器中执行包,避免本地安装。 + +!!! Example "安装" + + === "Pip安装(推荐)" + 使用pip安装`ultralytics`包,或通过运行`pip install -U ultralytics`更新现有安装。访问Python包索引(PyPI)了解更多关于`ultralytics`包的详细信息:[https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/)。 + + [![PyPI版本](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![下载](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + + ```bash + # 从PyPI安装ultralytics包 + pip install ultralytics + ``` + + 你也可以直接从GitHub[仓库](https://github.com/ultralytics/ultralytics)安装`ultralytics`包。如果你想要最新的开发版本,这可能会很有用。确保你的系统上安装了Git命令行工具。`@main`指令安装`main`分支,可修改为其他分支,如`@my-branch`,或完全删除,默认为`main`分支。 + + ```bash + # 从GitHub安装ultralytics包 + pip install git+https://github.com/ultralytics/ultralytics.git@main + ``` + + + === "Conda安装" + Conda是pip的一个替代包管理器,也可用于安装。访问Anaconda了解更多详情,网址为[https://anaconda.org/conda-forge/ultralytics](https://anaconda.org/conda-forge/ultralytics)。用于更新conda包的Ultralytics feedstock仓库位于[https://github.com/conda-forge/ultralytics-feedstock/](https://github.com/conda-forge/ultralytics-feedstock/)。 + + + [![Conda配方](https://img.shields.io/badge/recipe-ultralytics-green.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda下载](https://img.shields.io/conda/dn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda版本](https://img.shields.io/conda/vn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) [![Conda平台](https://img.shields.io/conda/pn/conda-forge/ultralytics.svg)](https://anaconda.org/conda-forge/ultralytics) + + ```bash + # 使用conda安装ultralytics包 + conda install -c conda-forge ultralytics + ``` + + !!! 注意 + + 如果你在CUDA环境中安装,最佳实践是同时安装`ultralytics`、`pytorch`和`pytorch-cuda`,以便conda包管理器解决任何冲突,或者最后安装`pytorch-cuda`,让它必要时覆盖特定于CPU的`pytorch`包。 + ```bash + # 使用conda一起安装所有包 + conda install -c pytorch -c nvidia -c conda-forge pytorch torchvision pytorch-cuda=11.8 ultralytics + ``` + + ### Conda Docker映像 + + Ultralytics Conda Docker映像也可从[DockerHub](https://hub.docker.com/r/ultralytics/ultralytics)获得。这些映像基于[Miniconda3](https://docs.conda.io/projects/miniconda/en/latest/),是开始在Conda环境中使用`ultralytics`的简单方式。 + + ```bash + # 将映像名称设置为变量 + t=ultralytics/ultralytics:latest-conda + + # 从Docker Hub拉取最新的ultralytics映像 + sudo docker pull $t + + # 使用GPU支持运行ultralytics映像的容器 + sudo docker run -it --ipc=host --gpus all $t # 所有GPU + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # 指定GPU + ``` + + === "Git克隆" + 如果您对参与开发感兴趣或希望尝试最新源代码,请克隆`ultralytics`仓库。克隆后,导航到目录并使用pip以可编辑模式`-e`安装包。 + ```bash + # 克隆ultralytics仓库 + git clone https://github.com/ultralytics/ultralytics + + # 导航到克隆的目录 + cd ultralytics + + # 为开发安装可编辑模式下的包 + pip install -e . + ``` + + === "Docker" + + 利用Docker轻松地在隔离的容器中执行`ultralytics`包,确保跨不同环境的一致性和流畅性能。通过选择一款官方`ultralytics`映像,从[Docker Hub](https://hub.docker.com/r/ultralytics/ultralytics)中不仅避免了本地安装的复杂性,还获得了对验证工作环境的访问。Ultralytics提供5种主要支持的Docker映像,每一种都为不同的平台和使用案例设计,以提供高兼容性和效率: + + Docker拉取次数 + + - **Dockerfile:** 推荐用于训练的GPU映像。 + - **Dockerfile-arm64:** 为ARM64架构优化,允许在树莓派和其他基于ARM64的平台上部署。 + - **Dockerfile-cpu:** 基于Ubuntu的CPU版,适合无GPU环境下的推理。 + - **Dockerfile-jetson:** 为NVIDIA Jetson设备量身定制,整合了针对这些平台优化的GPU支持。 + - **Dockerfile-python:** 最小化映像,只包含Python及必要依赖,理想于轻量级应用和开发。 + - **Dockerfile-conda:** 基于Miniconda3,包含conda安装的ultralytics包。 + + 以下是获取最新映像并执行它的命令: + + ```bash + # 将映像名称设置为变量 + t=ultralytics/ultralytics:latest + + # 从Docker Hub拉取最新的ultralytics映像 + sudo docker pull $t + + # 使用GPU支持运行ultralytics映像的容器 + sudo docker run -it --ipc=host --gpus all $t # 所有GPU + sudo docker run -it --ipc=host --gpus '"device=2,3"' $t # 指定GPU + ``` + + 上述命令初始化了一个带有最新`ultralytics`映像的Docker容器。`-it`标志分配了一个伪TTY,并保持stdin打开,使您可以与容器交互。`--ipc=host`标志将IPC(进程间通信)命名空间设置为宿主,这对于进程之间的内存共享至关重要。`--gpus all`标志使容器内可以访问所有可用的GPU,这对于需要GPU计算的任务至关重要。 + + 注意:要在容器中使用本地机器上的文件,请使用Docker卷将本地目录挂载到容器中: + + ```bash + # 将本地目录挂载到容器内的目录 + sudo docker run -it --ipc=host --gpus all -v /path/on/host:/path/in/container $t + ``` + + 将`/path/on/host`更改为您本地机器上的目录路径,将`/path/in/container`更改为Docker容器内希望访问的路径。 + + 欲了解进阶Docker使用方法,请探索[Ultralytics Docker指南](https://docs.ultralytics.com/guides/docker-quickstart/)。 + +有关依赖项列表,请参见`ultralytics`的[requirements.txt](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt)文件。请注意,上述所有示例均安装了所有必需的依赖项。 + +

+
+ +
+ Watch: Ultralytics YOLO Quick Start Guide +

+ +!!! Tip "提示" + + PyTorch的要求因操作系统和CUDA需要而异,因此建议首先根据[https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally)上的指南安装PyTorch。 + + + PyTorch安装指南 + + +## 通过CLI使用Ultralytics + +Ultralytics命令行界面(CLI)允许您通过简单的单行命令使用,无需Python环境。CLI不需要自定义或Python代码。您可以直接从终端使用`yolo`命令运行所有任务。查看[CLI指南](/../usage/cli.md),了解更多关于从命令行使用YOLOv8的信息。 + +!!! Example "示例" + + === "语法" + + Ultralytics `yolo`命令使用以下语法: + ```bash + yolo 任务 模式 参数 + + 其中 任务(可选)是[detect, segment, classify]中的一个 + 模式(必需)是[train, val, predict, export, track]中的一个 + 参数(可选)是任意数量的自定义“arg=value”对,如“imgsz=320”,可覆盖默认值。 + ``` + 在完整的[配置指南](/../usage/cfg.md)中查看所有参数,或者用`yolo cfg`查看 + + === "训练" + + 用初始学习率0.01训练检测模型10个周期 + ```bash + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + ``` + + === "预测" + + 使用预训练的分割模型以320的图像大小预测YouTube视频: + ```bash + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + ``` + + === "验证" + + 以批量大小1和640的图像大小验证预训练的检测模型: + ```bash + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + ``` + + === "导出" + + 以224x128的图像大小将YOLOv8n分类模型导出到ONNX格式(无需任务) + ```bash + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + ``` + + === "特殊" + + 运行特殊命令以查看版本、查看设置、运行检查等: + ```bash + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + ``` + +!!! Warning "警告" + + 参数必须以`arg=val`对的形式传递,用等号`=`分隔,并用空格` `分隔对。不要使用`--`参数前缀或逗号`,`分隔参数。 + + - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25`   ✅ + - `yolo predict model yolov8n.pt imgsz 640 conf 0.25`   ❌ + - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25`   ❌ + +[CLI指南](/../usage/cli.md){ .md-button } + +## 通过Python使用Ultralytics + +YOLOv8的Python接口允许无缝集成进您的Python项目,轻松加载、运行模型及处理输出。Python接口设计简洁易用,使用户能快速实现他们项目中的目标检测、分割和分类功能。这使YOLOv8的Python接口成为任何希望在其Python项目中纳入这些功能的人的宝贵工具。 + +例如,用户可以加载一个模型,训练它,在验证集上评估性能,甚至只需几行代码就可以将其导出到ONNX格式。查看[Python指南](/../usage/python.md),了解更多关于在Python项目中使用YOLOv8的信息。 + +!!! Example "示例" + + ```python + from ultralytics import YOLO + + # 从头开始创建一个新的YOLO模型 + model = YOLO('yolov8n.yaml') + + # 加载预训练的YOLO模型(推荐用于训练) + model = YOLO('yolov8n.pt') + + # 使用“coco128.yaml”数据集训练模型3个周期 + results = model.train(data='coco128.yaml', epochs=3) + + # 评估模型在验证集上的性能 + results = model.val() + + # 使用模型对图片进行目标检测 + results = model('https://ultralytics.com/images/bus.jpg') + + # 将模型导出为ONNX格式 + success = model.export(format='onnx') + ``` + +[Python指南](/../usage/python.md){.md-button .md-button--primary} + +## Ultralytics设置 + +Ultralytics库提供了一个强大的设置管理系统,允许您精细控制实验。通过利用`ultralytics.utils`模块中的`SettingsManager`,用户可以轻松访问和修改设置。这些设置存储在YAML文件中,可以直接在Python环境中查看或修改,或者通过命令行界面(CLI)修改。 + +### 检查设置 + +若要了解当前设置的配置情况,您可以直接查看: + +!!! Example "查看设置" + + === "Python" + 您可以使用Python查看设置。首先从`ultralytics`模块导入`settings`对象。使用以下命令打印和返回设置: + ```python + from ultralytics import settings + + # 查看所有设置 + print(settings) + + # 返回特定设置 + value = settings['runs_dir'] + ``` + + === "CLI" + 或者,命令行界面允许您用一个简单的命令检查您的设置: + ```bash + yolo settings + ``` + +### 修改设置 + +Ultralytics允许用户轻松修改他们的设置。更改可以通过以下方式执行: + +!!! Example "更新设置" + + === "Python" + 在Python环境中,调用`settings`对象上的`update`方法来更改您的设置: + ```python + from ultralytics import settings + + # 更新一个设置 + settings.update({'runs_dir': '/path/to/runs'}) + + # 更新多个设置 + settings.update({'runs_dir': '/path/to/runs', 'tensorboard': False}) + + # 重置设置为默认值 + settings.reset() + ``` + + === "CLI" + 如果您更喜欢使用命令行界面,以下命令将允许您修改设置: + ```bash + # 更新一个设置 + yolo settings runs_dir='/path/to/runs' + + # 更新多个设置 + yolo settings runs_dir='/path/to/runs' tensorboard=False + + # 重置设置为默认值 + yolo settings reset + ``` + +### 理解设置 + +下表提供了Ultralytics中可调整设置的概览。每个设置都概述了一个示例值、数据类型和简短描述。 + +| 名称 | 示例值 | 数据类型 | 描述 | +|--------------------|-----------------------|--------|------------------------------------------------------------------------------------------| +| `settings_version` | `'0.0.4'` | `str` | Ultralytics _settings_ 版本(不同于Ultralytics [pip](https://pypi.org/project/ultralytics/)版本) | +| `datasets_dir` | `'/path/to/datasets'` | `str` | 存储数据集的目录 | +| `weights_dir` | `'/path/to/weights'` | `str` | 存储模型权重的目录 | +| `runs_dir` | `'/path/to/runs'` | `str` | 存储实验运行的目录 | +| `uuid` | `'a1b2c3d4'` | `str` | 当前设置的唯一标识符 | +| `sync` | `True` | `bool` | 是否将分析和崩溃同步到HUB | +| `api_key` | `''` | `str` | Ultralytics HUB [API Key](https://hub.ultralytics.com/settings?tab=api+keys) | +| `clearml` | `True` | `bool` | 是否使用ClearML记录 | +| `comet` | `True` | `bool` | 是否使用[Comet ML](https://bit.ly/yolov8-readme-comet)进行实验跟踪和可视化 | +| `dvc` | `True` | `bool` | 是否使用[DVC进行实验跟踪](https://dvc.org/doc/dvclive/ml-frameworks/yolo)和版本控制 | +| `hub` | `True` | `bool` | 是否使用[Ultralytics HUB](https://hub.ultralytics.com)集成 | +| `mlflow` | `True` | `bool` | 是否使用MLFlow进行实验跟踪 | +| `neptune` | `True` | `bool` | 是否使用Neptune进行实验跟踪 | +| `raytune` | `True` | `bool` | 是否使用Ray Tune进行超参数调整 | +| `tensorboard` | `True` | `bool` | 是否使用TensorBoard进行可视化 | +| `wandb` | `True` | `bool` | 是否使用Weights & Biases记录 | + +在您浏览项目或实验时,请务必重新访问这些设置,以确保它们为您的需求提供最佳配置。 diff --git a/docs/zh/tasks/classify.md b/docs/zh/tasks/classify.md new file mode 100644 index 0000000..f440ad1 --- /dev/null +++ b/docs/zh/tasks/classify.md @@ -0,0 +1,172 @@ +--- +comments: true +description: 学习YOLOv8分类模型进行图像分类。获取关于预训练模型列表及如何训练、验证、预测、导出模型的详细信息。 +keywords: Ultralytics, YOLOv8, 图像分类, 预训练模型, YOLOv8n-cls, 训练, 验证, 预测, 模型导出 +--- + +# 图像分类 + +图像分类示例 + +图像分类是三项任务中最简单的,它涉及将整个图像分类为一组预定义类别中的一个。 + +图像分类器的输出是单个类别标签和一个置信度分数。当您只需要知道一幅图像属于哪个类别、而不需要知道该类别对象的位置或它们的确切形状时,图像分类非常有用。 + +!!! Tip "提示" + + YOLOv8分类模型使用`-cls`后缀,即`yolov8n-cls.pt`,并预先训练在[ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml)上。 + +## [模型](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +这里展示了预训练的YOLOv8分类模型。Detect、Segment和Pose模型是在[COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)数据集上预训练的,而分类模型则是在[ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml)数据集上预训练的。 + +[模型](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)会在首次使用时自动从Ultralytics的最新[发布版本](https://github.com/ultralytics/assets/releases)中下载。 + +| 模型 | 尺寸
(像素) | 准确率
top1 | 准确率
top5 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) at 640 | +|----------------------------------------------------------------------------------------------|-----------------|------------------|------------------|-----------------------------|----------------------------------|----------------|--------------------------| +| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | +| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | +| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | +| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | +| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | + +- **准确率** 是模型在[ImageNet](https://www.image-net.org/)数据集验证集上的准确度。 +
通过`yolo val classify data=path/to/ImageNet device=0`复现结果。 +- **速度** 是在使用[Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)实例时,ImageNet验证图像的平均处理速度。 +
通过`yolo val classify data=path/to/ImageNet batch=1 device=0|cpu`复现结果。 + +## 训练 + +在MNIST160数据集上训练YOLOv8n-cls模型100个时期,图像尺寸为64。有关可用参数的完整列表,请参见[配置](/../usage/cfg.md)页面。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n-cls.yaml') # 从YAML构建新模型 + model = YOLO('yolov8n-cls.pt') # 加载预训练模型(推荐用于训练) + model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # 从YAML构建并转移权重 + + # 训练模型 + results = model.train(data='mnist160', epochs=100, imgsz=64) + ``` + + === "CLI" + + ```bash + # 从YAML构建新模型并从头开始训练 + yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + + # 从预训练的*.pt模型开始训练 + yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + + # 从YAML构建新模型,转移预训练权重并开始训练 + yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + ``` + +### 数据集格式 + +YOLO分类数据集的格式详情请参见[数据集指南](/../datasets/classify/index.md)。 + +## 验证 + +在MNIST160数据集上验证训练好的YOLOv8n-cls模型准确性。不需要传递任何参数,因为`model`保留了它的训练`data`和参数作为模型属性。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n-cls.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义模型 + + # 验证模型 + metrics = model.val() # 无需参数,数据集和设置已记忆 + metrics.top1 # top1准确率 + metrics.top5 # top5准确率 + ``` + === "CLI" + + ```bash + yolo classify val model=yolov8n-cls.pt # 验证官方模型 + yolo classify val model=path/to/best.pt # 验证自定义模型 + ``` + +## 预测 + +使用训练过的YOLOv8n-cls模型对图像进行预测。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n-cls.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义模型 + + # 使用模型进行预测 + results = model('https://ultralytics.com/images/bus.jpg') # 对图像进行预测 + ``` + === "CLI" + + ```bash + yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # 使用官方模型进行预测 + yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # 使用自定义模型进行预测 + ``` + +有关`predict`模式的完整详细信息,请参见[预测](https://docs.ultralytics.com/modes/predict/)页面。 + +## 导出 + +将YOLOv8n-cls模型导出为其他格式,如ONNX、CoreML等。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n-cls.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义训练模型 + + # 导出模型 + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-cls.pt format=onnx # 导出官方模型 + yolo export model=path/to/best.pt format=onnx # 导出自定义训练模型 + ``` + +下表中提供了YOLOv8-cls模型可导出的格式。您可以直接在导出的模型上进行预测或验证,即`yolo predict model=yolov8n-cls.onnx`。导出完成后,示例用法会显示您的模型。 + +| 格式 | `format` 参数 | 模型 | 元数据 | 参数 | +|--------------------------------------------------------------------|---------------|-------------------------------|-----|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half` | + +有关`export`的完整详细信息,请参见[导出](https://docs.ultralytics.com/modes/export/)页面。 diff --git a/docs/zh/tasks/detect.md b/docs/zh/tasks/detect.md new file mode 100644 index 0000000..52c2308 --- /dev/null +++ b/docs/zh/tasks/detect.md @@ -0,0 +1,184 @@ +--- +comments: true +description: Ultralytics 官方YOLOv8文档。学习如何训练、验证、预测并以各种格式导出模型。包括详尽的性能统计。 +keywords: YOLOv8, Ultralytics, 目标检测, 预训练模型, 训练, 验证, 预测, 导出模型, COCO, ImageNet, PyTorch, ONNX, CoreML +--- + +# 目标检测 + +目标检测示例 + +目标检测是一项任务,涉及辨识图像或视频流中物体的位置和类别。 + +目标检测器的输出是一组围绕图像中物体的边界框,以及每个框的类别标签和置信度得分。当您需要识别场景中的感兴趣对象,但不需要准确了解物体的位置或其确切形状时,目标检测是一个很好的选择。 + +

+
+ +
+ 观看:使用预训练的Ultralytics YOLOv8模型进行目标检测。 +

+ +!!! Tip "提示" + + YOLOv8 Detect 模型是默认的 YOLOv8 模型,即 `yolov8n.pt` ,并在 [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) 数据集上进行了预训练。 + +## [模型](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +此处展示了预训练的YOLOv8 Detect模型。Detect、Segment和Pose模型在 [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) 数据集上预训练,而Classify模型在 [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) 数据集上预训练。 + +[模型](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) 会在首次使用时自动从Ultralytics的最新 [发布](https://github.com/ultralytics/assets/releases) 中下载。 + +| 模型 | 尺寸
(像素) | mAPval
50-95 | 速度
CPU ONNX
(毫秒) | 速度
A100 TensorRT
(毫秒) | 参数
(M) | FLOPs
(B) | +|--------------------------------------------------------------------------------------|-----------------|----------------------|-----------------------------|----------------------------------|----------------|-------------------| +| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | +| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | +| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | +| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | +| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | + +- **mAPval** 值适用于 [COCO val2017](http://cocodataset.org) 数据集上的单模型单尺度。 +
通过 `yolo val detect data=coco.yaml device=0` 复现。 +- **速度** 是在使用 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 云实例对COCO val图像的平均值。 +
通过 `yolo val detect data=coco128.yaml batch=1 device=0|cpu` 复现。 + +## 训练 + +在COCO128数据集上使用图像尺寸640将YOLOv8n训练100个epochs。要查看可用参数的完整列表,请参阅 [配置](/../usage/cfg.md) 页面。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n.yaml') # 从YAML构建新模型 + model = YOLO('yolov8n.pt') # 加载预训练模型(推荐用于训练) + model = YOLO('yolov8n.yaml').load('yolov8n.pt') # 从YAML构建并转移权重 + + # 训练模型 + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # 从YAML构建新模型并从头开始训练 + yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640 + + # 从预训练的*.pt模型开始训练 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640 + + # 从YAML构建新模型,传递预训练权重并开始训练 + yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + ``` + +### 数据集格式 + +YOLO检测数据集格式可以在 [数据集指南](/../datasets/detect/index.md) 中详细找到。要将您现有的数据集从其他格式(如COCO等)转换为YOLO格式,请使用Ultralytics的 [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) 工具。 + +## 验证 + +在COCO128数据集上验证训练好的YOLOv8n模型准确性。无需传递参数,`model` 作为模型属性保留其训练的 `data` 和参数。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义模型 + + # 验证模型 + metrics = model.val() # 无需参数,数据集和设置通过模型属性记住 + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # 包含每个类别map50-95的列表 + ``` + === "CLI" + + ```bash + yolo detect val model=yolov8n.pt # 验证官方模型 + yolo detect val model=path/to/best.pt # 验证自定义模型 + ``` + +## 预测 + +使用训练好的YOLOv8n模型在图像上进行预测。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义模型 + + # 使用模型进行预测 + results = model('https://ultralytics.com/images/bus.jpg') # 对图像进行预测 + ``` + === "CLI" + + ```bash + yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # 使用官方模型进行预测 + yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # 使用自定义模型进行预测 + ``` + +完整的 `predict` 模式细节请见 [预测](https://docs.ultralytics.com/modes/predict/) 页面。 + +## 导出 + +将YOLOv8n模型导出为ONNX、CoreML等不同格式。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义训练模型 + + # 导出模型 + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n.pt format=onnx # 导出官方模型 + yolo export model=path/to/best.pt format=onnx # 导出自定义训练模型 + ``` + +下表中提供了可用的YOLOv8导出格式。您可以直接在导出的模型上进行预测或验证,即 `yolo predict model=yolov8n.onnx`。导出完成后,会为您的模型显示使用示例。 + +| 格式 | `format` 参数 | 模型 | 元数据 | 参数 | +|--------------------------------------------------------------------|---------------|---------------------------|-----|-------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`,`optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`,`half`,`dynamic`,`simplify`,`opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`,`half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ | `imgsz`,`half`,`dynamic`,`simplify`,`workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`,`half`,`int8`,`nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`,`keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`,`half`,`int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`,`half` | + +完整的 `export` 详情请见 [导出](https://docs.ultralytics.com/modes/export/) 页面。 diff --git a/docs/zh/tasks/index.md b/docs/zh/tasks/index.md new file mode 100644 index 0000000..4a48c73 --- /dev/null +++ b/docs/zh/tasks/index.md @@ -0,0 +1,51 @@ +--- +comments: true +description: 了解 YOLOv8 能够执行的基础计算机视觉任务,包括检测、分割、分类和姿态估计。理解它们在你的 AI 项目中的应用。 +keywords: Ultralytics, YOLOv8, 检测, 分割, 分类, 姿态估计, AI 框架, 计算机视觉任务 +--- + +# Ultralytics YOLOv8 任务 + +
+Ultralytics YOLO 支持的任务 + +YOLOv8 是一个支持多种计算机视觉**任务**的 AI 框架。该框架可用于执行[检测](detect.md)、[分割](segment.md)、[分类](classify.md)和[姿态](pose.md)估计。每项任务都有不同的目标和用例。 + +

+
+ +
+ 观看:探索 Ultralytics YOLO 任务:对象检测、分割、追踪和姿态估计。 +

+ +## [检测](detect.md) + +检测是 YOLOv8 支持的主要任务。它涉及在图像或视频帧中检测对象并围绕它们绘制边界框。侦测到的对象根据其特征被归类到不同的类别。YOLOv8 能够在单个图像或视频帧中检测多个对象,具有高准确性和速度。 + +[检测示例](detect.md){ .md-button } + +## [分割](segment.md) + +分割是一项涉及将图像分割成基于图像内容的不同区域的任务。每个区域根据其内容被分配一个标签。该任务在应用程序中非常有用,如图像分割和医学成像。YOLOv8 使用 U-Net 架构的变体来执行分割。 + +[分割示例](segment.md){ .md-button } + +## [分类](classify.md) + +分类是一项涉及将图像归类为不同类别的任务。YOLOv8 可用于根据图像内容对图像进行分类。它使用 EfficientNet 架构的变体来执行分类。 + +[分类示例](classify.md){ .md-button } + +## [姿态](pose.md) + +姿态/关键点检测是一项涉及在图像或视频帧中检测特定点的任务。这些点被称为关键点,用于跟踪移动或姿态估计。YOLOv8 能够在图像或视频帧中准确迅速地检测关键点。 + +[姿态示例](pose.md){ .md-button } + +## 结论 + +YOLOv8 支持多个任务,包括检测、分割、分类和关键点检测。这些任务都具有不同的目标和用例。通过理解这些任务之间的差异,您可以为您的计算机视觉应用选择合适的任务。 diff --git a/docs/zh/tasks/pose.md b/docs/zh/tasks/pose.md new file mode 100644 index 0000000..6ad6890 --- /dev/null +++ b/docs/zh/tasks/pose.md @@ -0,0 +1,185 @@ +--- +comments: true +description: 学习如何使用Ultralytics YOLOv8进行姿态估计任务。找到预训练模型,学习如何训练、验证、预测以及导出你自己的模型。 +keywords: Ultralytics, YOLO, YOLOv8, 姿态估计, 关键点检测, 物体检测, 预训练模型, 机器学习, 人工智能 +--- + +# 姿态估计 + +姿态估计示例 + +姿态估计是一项任务,其涉及识别图像中特定点的位置,通常被称为关键点。这些关键点可以代表物体的各种部位,如关节、地标或其他显著特征。关键点的位置通常表示为一组2D `[x, y]` 或3D `[x, y, visible]` 坐标。 + +姿态估计模型的输出是一组点集,这些点代表图像中物体上的关键点,通常还包括每个点的置信度得分。当你需要在场景中识别物体的特定部位及其相互之间的位置时,姿态估计是一个不错的选择。 + +

+
+ +
+ 观看:使用Ultralytics YOLOv8进行姿态估计。 +

+ +!!! Tip "提示" + + YOLOv8 _姿态_ 模型使用 `-pose` 后缀,例如 `yolov8n-pose.pt`。这些模型在 [COCO关键点](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) 数据集上进行了训练,并且适用于各种姿态估计任务。 + +## [模型](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +这里展示了YOLOv8预训练的姿态模型。检测、分割和姿态模型在 [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) 数据集上进行预训练,而分类模型则在 [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) 数据集上进行预训练。 + +[模型](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) 在首次使用时将自动从最新的Ultralytics [发布版本](https://github.com/ultralytics/assets/releases)中下载。 + +| 模型 | 尺寸
(像素) | mAP姿态
50-95 | mAP姿态
50 | 速度
CPU ONNX
(毫秒) | 速度
A100 TensorRT
(毫秒) | 参数
(M) | 浮点数运算
(B) | +|----------------------------------------------------------------------------------------------------|-----------------|---------------------|------------------|-----------------------------|----------------------------------|----------------|-------------------| +| [YOLOv8n-姿态](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | +| [YOLOv8s-姿态](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | +| [YOLOv8m-姿态](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | +| [YOLOv8l-姿态](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | +| [YOLOv8x-姿态](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | +| [YOLOv8x-姿态-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | + +- **mAPval** 值适用于[COCO 关键点 val2017](http://cocodataset.org)数据集上的单模型单尺度。 +
通过执行 `yolo val pose data=coco-pose.yaml device=0` 来复现。 +- **速度** 是在 [亚马逊EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)实例上使用COCO val图像的平均值。 +
通过执行 `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` 来复现。 + +## 训练 + +在COCO128姿态数据集上训练一个YOLOv8姿态模型。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n-pose.yaml') # 从YAML构建一个新模型 + model = YOLO('yolov8n-pose.pt') # 加载一个预训练模型(推荐用于训练) + model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # 从YAML构建并传输权重 + + # 训练模型 + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # 从YAML构建一个新模型并从头开始训练 + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + + # 从一个预训练的*.pt模型开始训练 + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + + # 从YAML构建一个新模型,传输预训练权重并开始训练 + yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +### 数据集格式 + +YOLO姿态数据集格式可详细找到在[数据集指南](/../datasets/pose/index.md)中。若要将您现有的数据集从其他格式(如COCO等)转换为YOLO格式,请使用Ultralytics的 [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) 工具。 + +## 验证 + +在COCO128姿态数据集上验证训练好的YOLOv8n姿态模型的准确性。没有参数需要传递,因为`模型`保存了其训练`数据`和参数作为模型属性。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n-pose.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义模型 + + # 验证模型 + metrics = model.val() # 无需参数,数据集和设置都记住了 + metrics.box.map # map50-95 + metrics.box.map50 # map50 + metrics.box.map75 # map75 + metrics.box.maps # 包含每个类别map50-95的列表 + ``` + === "CLI" + + ```bash + yolo pose val model=yolov8n-pose.pt # 验证官方模型 + yolo pose val model=path/to/best.pt # 验证自定义模型 + ``` + +## 预测 + +使用训练好的YOLOv8n姿态模型在图片上运行预测。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n-pose.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义模型 + + # 用模型进行预测 + results = model('https://ultralytics.com/images/bus.jpg') # 在一张图片上预测 + ``` + === "CLI" + + ```bash + yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # 用官方模型预测 + yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # 用自定义模型预测 + ``` + +在[预测](https://docs.ultralytics.com/modes/predict/)页面中查看完整的`预测`模式细节。 + +## 导出 + +将YOLOv8n姿态模型导出为ONNX、CoreML等不同格式。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 加载模型 + model = YOLO('yolov8n-pose.pt') # 加载官方模型 + model = YOLO('path/to/best.pt') # 加载自定义训练好的模型 + + # 导出模型 + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-pose.pt format=onnx # 导出官方模型 + yolo export model=path/to/best.pt format=onnx # 导出自定义训练好的模型 + ``` + +以下表格中有可用的YOLOv8姿态导出格式。您可以直接在导出的模型上进行预测或验证,例如 `yolo predict model=yolov8n-pose.onnx`。导出完成后,为您的模型显示用法示例。 + +| 格式 | `format` 参数 | 模型 | 元数据 | 参数 | +|--------------------------------------------------------------------|---------------|--------------------------------|-----|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half` | + +在[导出](https://docs.ultralytics.com/modes/export/) 页面中查看完整的`导出`细节。 diff --git a/docs/zh/tasks/segment.md b/docs/zh/tasks/segment.md new file mode 100644 index 0000000..246ef58 --- /dev/null +++ b/docs/zh/tasks/segment.md @@ -0,0 +1,188 @@ +--- +comments: true +description: 学习如何使用Ultralytics YOLO进行实例分割模型。包括训练、验证、图像预测和模型导出的说明。 +keywords: yolov8, 实例分割, Ultralytics, COCO数据集, 图像分割, 物体检测, 模型训练, 模型验证, 图像预测, 模型导出 +--- + +# 实例分割 + +实例分割示例 + +实例分割比物体检测有所深入,它涉及到识别图像中的个别物体并将它们从图像的其余部分中分割出来。 + +实例分割模型的输出是一组蒙版或轮廓,用于勾画图像中每个物体,以及每个物体的类别标签和置信度分数。实例分割在您需要不仅知道图像中的物体位置,还需要知道它们确切形状时非常有用。 + +

+
+ +
+ 观看: 在Python中使用预训练的Ultralytics YOLOv8模型运行分割。 +

+ +!!! Tip "提示" + + YOLOv8分割模型使用`-seg`后缀,即`yolov8n-seg.pt`,并在[COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)上进行预训练。 + +## [模型](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) + +这里展示了预训练的YOLOv8分割模型。Detect、Segment和Pose模型都是在[COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml)数据集上进行预训练的,而Classify模型则是在[ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml)数据集上进行预训练的。 + +[模型](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)会在首次使用时自动从Ultralytics的最新[版本](https://github.com/ultralytics/assets/releases)下载。 + +| 模型 | 尺寸
(像素) | mAPbox
50-95 | mAPmask
50-95 | 速度
CPU ONNX
(ms) | 速度
A100 TensorRT
(ms) | 参数
(M) | FLOPs
(B) | +|----------------------------------------------------------------------------------------------|-----------------|----------------------|-----------------------|-----------------------------|----------------------------------|----------------|-------------------| +| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | +| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | +| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | +| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | +| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | + +- **mAPval** 值针对[COCO val2017](http://cocodataset.org)数据集的单模型单尺度。 +
通过`yolo val segment data=coco.yaml device=0`复现。 +- **速度** 基于在[Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)实例上运行的COCO val图像的平均值。 +
通过`yolo val segment data=coco128-seg.yaml batch=1 device=0|cpu`复现。 + +## 训练 + +在COCO128-seg数据集上以640的图像尺寸训练YOLOv8n-seg模型共100个周期。想了解更多可用的参数,请查阅[配置](/../usage/cfg.md)页面。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 载入一个模型 + model = YOLO('yolov8n-seg.yaml') # 从YAML构建一个新模型 + model = YOLO('yolov8n-seg.pt') # 载入预训练模型(推荐用于训练) + model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # 从YAML构建并传递权重 + + # 训练模型 + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + ``` + === "CLI" + + ```bash + # 从YAML构建新模型并从头开始训练 + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + + # 从预训练*.pt模型开始训练 + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + + # 从YAML构建新模型,传递预训练权重,开始训练 + yolo segment train data=coco128-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + ``` + +### 数据集格式 + +可以在[数据集指南](/../datasets/segment/index.md)中详细了解YOLO分割数据集格式。要将现有数据集从其他格式(如COCO等)转换为YOLO格式,请使用Ultralytics的[JSON2YOLO](https://github.com/ultralytics/JSON2YOLO)工具。 + +## 验证 + +在COCO128-seg数据集上验证已训练的YOLOv8n-seg模型的准确性。不需要传递任何参数,因为`model`保留了其训练的`data`和作为模型属性的设置。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 载入一个模型 + model = YOLO('yolov8n-seg.pt') # 载入官方模型 + model = YOLO('path/to/best.pt') # 载入自定义模型 + + # 验证模型 + metrics = model.val() # 不需要参数,数据集和设置被记住了 + metrics.box.map # map50-95(B) + metrics.box.map50 # map50(B) + metrics.box.map75 # map75(B) + metrics.box.maps # 各类别map50-95(B)列表 + metrics.seg.map # map50-95(M) + metrics.seg.map50 # map50(M) + metrics.seg.map75 # map75(M) + metrics.seg.maps # 各类别map50-95(M)列表 + ``` + === "CLI" + + ```bash + yolo segment val model=yolov8n-seg.pt # 验证官方模型 + yolo segment val model=path/to/best.pt # 验证自定义模型 + ``` + +## 预测 + +使用已训练的YOLOv8n-seg模型在图像上进行预测。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 载入一个模型 + model = YOLO('yolov8n-seg.pt') # 载入官方模型 + model = YOLO('path/to/best.pt') # 载入自定义模型 + + # 使用模型进行预测 + results = model('https://ultralytics.com/images/bus.jpg') # 对一张图像进行预测 + ``` + === "CLI" + + ```bash + yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # 使用官方模型进行预测 + yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # 使用自定义模型进行预测 + ``` + +预测模式的完整详情请参见[Predict](https://docs.ultralytics.com/modes/predict/)页面。 + +## 导出 + +将YOLOv8n-seg模型导出为ONNX、CoreML等不同格式。 + +!!! Example "示例" + + === "Python" + + ```python + from ultralytics import YOLO + + # 载入一个模型 + model = YOLO('yolov8n-seg.pt') # 载入官方模型 + model = YOLO('path/to/best.pt') # 载入自定义训练模型 + + # 导出模型 + model.export(format='onnx') + ``` + === "CLI" + + ```bash + yolo export model=yolov8n-seg.pt format=onnx # 导出官方模型 + yolo export model=path/to/best.pt format=onnx # 导出自定义训练模型 + ``` + +YOLOv8-seg导出格式的可用表格如下所示。您可以直接在导出的模型上进行预测或验证,例如`yolo predict model=yolov8n-seg.onnx`。导出完成后,示例用法将显示您的模型。 + +| 格式 | `format` 参数 | 模型 | 元数据 | 参数 | +|--------------------------------------------------------------------|---------------|-------------------------------|-----|-----------------------------------------------------| +| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - | +| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize` | +| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset` | +| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half` | +| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` | +| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms` | +| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras` | +| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz` | +| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8` | +| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` | +| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` | +| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` | +| [ncnn](https://github.com/Tencent/ncnn) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half` | + +导出模式的完整详情请参见[Export](https://docs.ultralytics.com/modes/export/)页面。 diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..90d1415 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,34 @@ +## Ultralytics YOLOv8 Example Applications + +This repository features a collection of real-world applications and walkthroughs, provided as either Python files or notebooks. Explore the examples below to see how YOLOv8 can be integrated into various applications. + +### Ultralytics YOLO Example Applications + +| Title | Format | Contributor | +| ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------ | ----------------------------------------------------------------------------------------- | +| [YOLO ONNX Detection Inference with C++](./YOLOv8-CPP-Inference) | C++/ONNX | [Justas Bartnykas](https://github.com/JustasBart) | +| [YOLO OpenCV ONNX Detection Python](./YOLOv8-OpenCV-ONNX-Python) | OpenCV/Python/ONNX | [Farid Inawan](https://github.com/frdteknikelektro) | +| [YOLOv8 .NET ONNX ImageSharp](https://github.com/dme-compunet/YOLOv8) | C#/ONNX/ImageSharp | [Compunet](https://github.com/dme-compunet) | +| [YOLO .Net ONNX Detection C#](https://www.nuget.org/packages/Yolov8.Net) | C# .Net | [Samuel Stainback](https://github.com/sstainba) | +| [YOLOv8 on NVIDIA Jetson(TensorRT and DeepStream)](https://wiki.seeedstudio.com/YOLOv8-DeepStream-TRT-Jetson/) | Python | [Lakshantha](https://github.com/lakshanthad) | +| [YOLOv8 ONNXRuntime Python](./YOLOv8-ONNXRuntime) | Python/ONNXRuntime | [Semih Demirel](https://github.com/semihhdemirel) | +| [YOLOv8 ONNXRuntime CPP](./YOLOv8-ONNXRuntime-CPP) | C++/ONNXRuntime | [DennisJcy](https://github.com/DennisJcy), [Onuralp Sezer](https://github.com/onuralpszr) | +| [RTDETR ONNXRuntime C#](https://github.com/Kayzwer/yolo-cs/blob/master/RTDETR.cs) | C#/ONNX | [Kayzwer](https://github.com/Kayzwer) | +| [YOLOv8 SAHI Video Inference](https://github.com/RizwanMunawar/ultralytics/blob/main/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py) | Python | [Muhammad Rizwan Munawar](https://github.com/RizwanMunawar) | +| [YOLOv8 Region Counter](https://github.com/RizwanMunawar/ultralytics/blob/main/examples/YOLOv8-Region-Counter/yolov8_region_counter.py) | Python | [Muhammad Rizwan Munawar](https://github.com/RizwanMunawar) | +| [YOLOv8 Segmentation ONNXRuntime Python](./YOLOv8-Segmentation-ONNXRuntime-Python) | Python/ONNXRuntime | [jamjamjon](https://github.com/jamjamjon) | + +### How to Contribute + +We greatly appreciate contributions from the community, including examples, applications, and guides. If you'd like to contribute, please follow these guidelines: + +1. Create a pull request (PR) with the title prefix `[Example]`, adding your new example folder to the `examples/` directory within the repository. +1. Make sure your project adheres to the following standards: + - Makes use of the `ultralytics` package. + - Includes a `README.md` with clear instructions for setting up and running the example. + - Refrains from adding large files or dependencies unless they are absolutely necessary for the example. + - Contributors should be willing to provide support for their examples and address related issues. + +For more detailed information and guidance on contributing, please visit our [contribution documentation](https://docs.ultralytics.com/help/contributing). + +If you encounter any questions or concerns regarding these guidelines, feel free to open a PR or an issue in the repository, and we will assist you in the contribution process. diff --git a/examples/YOLOv8-CPP-Inference/CMakeLists.txt b/examples/YOLOv8-CPP-Inference/CMakeLists.txt new file mode 100644 index 0000000..bc2f33f --- /dev/null +++ b/examples/YOLOv8-CPP-Inference/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.5) + +project(Yolov8CPPInference VERSION 0.1) + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +# CUDA +set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda") +find_package(CUDA 11 REQUIRED) + +set(CMAKE_CUDA_STANDARD 11) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) +# !CUDA + +# OpenCV +find_package(OpenCV REQUIRED) +include_directories(${OpenCV_INCLUDE_DIRS}) +# !OpenCV + +set(PROJECT_SOURCES + main.cpp + + inference.h + inference.cpp +) + +add_executable(Yolov8CPPInference ${PROJECT_SOURCES}) +target_link_libraries(Yolov8CPPInference ${OpenCV_LIBS}) diff --git a/examples/YOLOv8-CPP-Inference/README.md b/examples/YOLOv8-CPP-Inference/README.md new file mode 100644 index 0000000..601c1d0 --- /dev/null +++ b/examples/YOLOv8-CPP-Inference/README.md @@ -0,0 +1,50 @@ +# YOLOv8/YOLOv5 Inference C++ + +This example demonstrates how to perform inference using YOLOv8 and YOLOv5 models in C++ with OpenCV's DNN API. + +## Usage + +```bash +git clone ultralytics +cd ultralytics +pip install . +cd examples/YOLOv8-CPP-Inference + +# Add a **yolov8\_.onnx** and/or **yolov5\_.onnx** model(s) to the ultralytics folder. +# Edit the **main.cpp** to change the **projectBasePath** to match your user. + +# Note that by default the CMake file will try and import the CUDA library to be used with the OpenCVs dnn (cuDNN) GPU Inference. +# If your OpenCV build does not use CUDA/cuDNN you can remove that import call and run the example on CPU. + +mkdir build +cd build +cmake .. +make +./Yolov8CPPInference +``` + +## Exporting YOLOv8 and YOLOv5 Models + +To export YOLOv8 models: + +```commandline +yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12 +``` + +To export YOLOv5 models: + +```commandline +python3 export.py --weights yolov5s.pt --img 480 640 --include onnx --opset 12 +``` + +yolov8s.onnx: + +![image](https://user-images.githubusercontent.com/40023722/217356132-a4cecf2e-2729-4acb-b80a-6559022d7707.png) + +yolov5s.onnx: + +![image](https://user-images.githubusercontent.com/40023722/217357005-07464492-d1da-42e3-98a7-fc753f87d5e6.png) + +This repository utilizes OpenCV's DNN API to run ONNX exported models of YOLOv5 and YOLOv8. In theory, it should work for YOLOv6 and YOLOv7 as well, but they have not been tested. Note that the example networks are exported with rectangular (640x480) resolutions, but any exported resolution will work. You may want to use the letterbox approach for square images, depending on your use case. + +The **main** branch version uses Qt as a GUI wrapper. The primary focus here is the **Inference** class file, which demonstrates how to transpose YOLOv8 models to work as YOLOv5 models. diff --git a/examples/YOLOv8-CPP-Inference/inference.cpp b/examples/YOLOv8-CPP-Inference/inference.cpp new file mode 100644 index 0000000..12c2607 --- /dev/null +++ b/examples/YOLOv8-CPP-Inference/inference.cpp @@ -0,0 +1,185 @@ +#include "inference.h" + +Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda) +{ + modelPath = onnxModelPath; + modelShape = modelInputShape; + classesPath = classesTxtFile; + cudaEnabled = runWithCuda; + + loadOnnxNetwork(); + // loadClassesFromFile(); The classes are hard-coded for this example +} + +std::vector Inference::runInference(const cv::Mat &input) +{ + cv::Mat modelInput = input; + if (letterBoxForSquare && modelShape.width == modelShape.height) + modelInput = formatToSquare(modelInput); + + cv::Mat blob; + cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false); + net.setInput(blob); + + std::vector outputs; + net.forward(outputs, net.getUnconnectedOutLayersNames()); + + int rows = outputs[0].size[1]; + int dimensions = outputs[0].size[2]; + + bool yolov8 = false; + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + yolov8 = true; + rows = outputs[0].size[2]; + dimensions = outputs[0].size[1]; + + outputs[0] = outputs[0].reshape(1, dimensions); + cv::transpose(outputs[0], outputs[0]); + } + float *data = (float *)outputs[0].data; + + float x_factor = modelInput.cols / modelShape.width; + float y_factor = modelInput.rows / modelShape.height; + + std::vector class_ids; + std::vector confidences; + std::vector boxes; + + for (int i = 0; i < rows; ++i) + { + if (yolov8) + { + float *classes_scores = data+4; + + cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); + cv::Point class_id; + double maxClassScore; + + minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + + if (maxClassScore > modelScoreThreshold) + { + confidences.push_back(maxClassScore); + class_ids.push_back(class_id.x); + + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = int((x - 0.5 * w) * x_factor); + int top = int((y - 0.5 * h) * y_factor); + + int width = int(w * x_factor); + int height = int(h * y_factor); + + boxes.push_back(cv::Rect(left, top, width, height)); + } + } + else // yolov5 + { + float confidence = data[4]; + + if (confidence >= modelConfidenceThreshold) + { + float *classes_scores = data+5; + + cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores); + cv::Point class_id; + double max_class_score; + + minMaxLoc(scores, 0, &max_class_score, 0, &class_id); + + if (max_class_score > modelScoreThreshold) + { + confidences.push_back(confidence); + class_ids.push_back(class_id.x); + + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = int((x - 0.5 * w) * x_factor); + int top = int((y - 0.5 * h) * y_factor); + + int width = int(w * x_factor); + int height = int(h * y_factor); + + boxes.push_back(cv::Rect(left, top, width, height)); + } + } + } + + data += dimensions; + } + + std::vector nms_result; + cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result); + + std::vector detections{}; + for (unsigned long i = 0; i < nms_result.size(); ++i) + { + int idx = nms_result[i]; + + Detection result; + result.class_id = class_ids[idx]; + result.confidence = confidences[idx]; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(100, 255); + result.color = cv::Scalar(dis(gen), + dis(gen), + dis(gen)); + + result.className = classes[result.class_id]; + result.box = boxes[idx]; + + detections.push_back(result); + } + + return detections; +} + +void Inference::loadClassesFromFile() +{ + std::ifstream inputFile(classesPath); + if (inputFile.is_open()) + { + std::string classLine; + while (std::getline(inputFile, classLine)) + classes.push_back(classLine); + inputFile.close(); + } +} + +void Inference::loadOnnxNetwork() +{ + net = cv::dnn::readNetFromONNX(modelPath); + if (cudaEnabled) + { + std::cout << "\nRunning on CUDA" << std::endl; + net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); + net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); + } + else + { + std::cout << "\nRunning on CPU" << std::endl; + net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); + net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); + } +} + +cv::Mat Inference::formatToSquare(const cv::Mat &source) +{ + int col = source.cols; + int row = source.rows; + int _max = MAX(col, row); + cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3); + source.copyTo(result(cv::Rect(0, 0, col, row))); + return result; +} diff --git a/examples/YOLOv8-CPP-Inference/inference.h b/examples/YOLOv8-CPP-Inference/inference.h new file mode 100644 index 0000000..dc6149f --- /dev/null +++ b/examples/YOLOv8-CPP-Inference/inference.h @@ -0,0 +1,52 @@ +#ifndef INFERENCE_H +#define INFERENCE_H + +// Cpp native +#include +#include +#include +#include + +// OpenCV / DNN / Inference +#include +#include +#include + +struct Detection +{ + int class_id{0}; + std::string className{}; + float confidence{0.0}; + cv::Scalar color{}; + cv::Rect box{}; +}; + +class Inference +{ +public: + Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = true); + std::vector runInference(const cv::Mat &input); + +private: + void loadClassesFromFile(); + void loadOnnxNetwork(); + cv::Mat formatToSquare(const cv::Mat &source); + + std::string modelPath{}; + std::string classesPath{}; + bool cudaEnabled{}; + + std::vector classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; + + cv::Size2f modelShape{}; + + float modelConfidenceThreshold {0.25}; + float modelScoreThreshold {0.45}; + float modelNMSThreshold {0.50}; + + bool letterBoxForSquare = true; + + cv::dnn::Net net; +}; + +#endif // INFERENCE_H diff --git a/examples/YOLOv8-CPP-Inference/main.cpp b/examples/YOLOv8-CPP-Inference/main.cpp new file mode 100644 index 0000000..6d1ba98 --- /dev/null +++ b/examples/YOLOv8-CPP-Inference/main.cpp @@ -0,0 +1,70 @@ +#include +#include +#include + +#include + +#include "inference.h" + +using namespace std; +using namespace cv; + +int main(int argc, char **argv) +{ + std::string projectBasePath = "/home/user/ultralytics"; // Set your ultralytics base path + + bool runOnGPU = true; + + // + // Pass in either: + // + // "yolov8s.onnx" or "yolov5s.onnx" + // + // To run Inference with yolov8/yolov5 (ONNX) + // + + // Note that in this example the classes are hard-coded and 'classes.txt' is a place holder. + Inference inf(projectBasePath + "/yolov8s.onnx", cv::Size(640, 480), "classes.txt", runOnGPU); + + std::vector imageNames; + imageNames.push_back(projectBasePath + "/ultralytics/assets/bus.jpg"); + imageNames.push_back(projectBasePath + "/ultralytics/assets/zidane.jpg"); + + for (int i = 0; i < imageNames.size(); ++i) + { + cv::Mat frame = cv::imread(imageNames[i]); + + // Inference starts here... + std::vector output = inf.runInference(frame); + + int detections = output.size(); + std::cout << "Number of detections:" << detections << std::endl; + + for (int i = 0; i < detections; ++i) + { + Detection detection = output[i]; + + cv::Rect box = detection.box; + cv::Scalar color = detection.color; + + // Detection box + cv::rectangle(frame, box, color, 2); + + // Detection box text + std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4); + cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0); + cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20); + + cv::rectangle(frame, textBox, color, cv::FILLED); + cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0); + } + // Inference ends here... + + // This is only for preview purposes + float scale = 0.8; + cv::resize(frame, frame, cv::Size(frame.cols*scale, frame.rows*scale)); + cv::imshow("Inference", frame); + + cv::waitKey(-1); + } +} diff --git a/examples/YOLOv8-ONNXRuntime-CPP/CMakeLists.txt b/examples/YOLOv8-ONNXRuntime-CPP/CMakeLists.txt new file mode 100644 index 0000000..86232cc --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-CPP/CMakeLists.txt @@ -0,0 +1,96 @@ +cmake_minimum_required(VERSION 3.5) + +set(PROJECT_NAME Yolov8OnnxRuntimeCPPInference) +project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX) + + +# -------------- Support C++17 for using filesystem ------------------# +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS ON) +set(CMAKE_INCLUDE_CURRENT_DIR ON) + + +# -------------- OpenCV ------------------# +find_package(OpenCV REQUIRED) +include_directories(${OpenCV_INCLUDE_DIRS}) + + +# -------------- Compile CUDA for FP16 inference if needed ------------------# +option(USE_CUDA "Enable CUDA support" ON) +if (NOT APPLE AND USE_CUDA) + find_package(CUDA REQUIRED) + include_directories(${CUDA_INCLUDE_DIRS}) + add_definitions(-DUSE_CUDA) +else () + set(USE_CUDA OFF) +endif () + +# -------------- ONNXRUNTIME ------------------# + +# Set ONNXRUNTIME_VERSION +set(ONNXRUNTIME_VERSION 1.15.1) + +if (WIN32) + if (USE_CUDA) + set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-win-x64-gpu-${ONNXRUNTIME_VERSION}") + else () + set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-win-x64-${ONNXRUNTIME_VERSION}") + endif () +elseif (LINUX) + if (USE_CUDA) + set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-linux-x64-gpu-${ONNXRUNTIME_VERSION}") + else () + set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}") + endif () +elseif (APPLE) + set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-osx-arm64-${ONNXRUNTIME_VERSION}") + # Apple X64 binary + # set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-osx-x64-${ONNXRUNTIME_VERSION}") + # Apple Universal binary + # set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-osx-universal2-${ONNXRUNTIME_VERSION}") +endif () + +include_directories(${PROJECT_NAME} ${ONNXRUNTIME_ROOT}/include) + +set(PROJECT_SOURCES + main.cpp + inference.h + inference.cpp +) + +add_executable(${PROJECT_NAME} ${PROJECT_SOURCES}) + +if (WIN32) + target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${ONNXRUNTIME_ROOT}/lib/onnxruntime.lib) + if (USE_CUDA) + target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES}) + endif () +elseif (LINUX) + target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so) + if (USE_CUDA) + target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES}) + endif () +elseif (APPLE) + target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.dylib) +endif () + +# For windows system, copy onnxruntime.dll to the same folder of the executable file +if (WIN32) + add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${ONNXRUNTIME_ROOT}/lib/onnxruntime.dll" + $) +endif () + +# Download https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml +# and put it in the same folder of the executable file +configure_file(coco.yaml ${CMAKE_CURRENT_BINARY_DIR}/coco.yaml COPYONLY) + +# Copy yolov8n.onnx file to the same folder of the executable file +configure_file(yolov8n.onnx ${CMAKE_CURRENT_BINARY_DIR}/yolov8n.onnx COPYONLY) + +# Create folder name images in the same folder of the executable file +add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images +) diff --git a/examples/YOLOv8-ONNXRuntime-CPP/README.md b/examples/YOLOv8-ONNXRuntime-CPP/README.md new file mode 100644 index 0000000..f70127f --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-CPP/README.md @@ -0,0 +1,94 @@ +

YOLOv8 OnnxRuntime C++

+ +

+ C++ + Onnx-runtime +

+ +This example demonstrates how to perform inference using YOLOv8 in C++ with ONNX Runtime and OpenCV's API. + +## Benefits ✨ + +- Friendly for deployment in the industrial sector. +- Faster than OpenCV's DNN inference on both CPU and GPU. +- Supports FP32 and FP16 CUDA acceleration. + +## Exporting YOLOv8 Models 📦 + +To export YOLOv8 models, use the following Python script: + +```python +from ultralytics import YOLO + +# Load a YOLOv8 model +model = YOLO("yolov8n.pt") + +# Export the model +model.export(format="onnx", opset=12, simplify=True, dynamic=False, imgsz=640) +``` + +Alternatively, you can use the following command for exporting the model in the terminal + +```bash +yolo export model=yolov8n.pt opset=12 simplify=True dynamic=False format=onnx imgsz=640,640 +``` + +## Download COCO.yaml file 📂 + +In order to run example, you also need to download coco.yaml. You can download the file manually from [here](https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml) + +## Dependencies ⚙️ + +| Dependency | Version | +| -------------------------------- | -------------- | +| Onnxruntime(linux,windows,macos) | >=1.14.1 | +| OpenCV | >=4.0.0 | +| C++ Standard | >=17 | +| Cmake | >=3.5 | +| Cuda (Optional) | >=11.4 \<12.0 | +| cuDNN (Cuda required) | =8 | + +Note: The dependency on C++17 is due to the usage of the C++17 filesystem feature. + +Note (2): Due to ONNX Runtime, we need to use CUDA 11 and cuDNN 8. Keep in mind that this requirement might change in the future. + +## Build 🛠️ + +1. Clone the repository to your local machine. +1. Navigate to the root directory of the repository. +1. Create a build directory and navigate to it: + +```console +mkdir build && cd build +``` + +4. Run CMake to generate the build files: + +```console +cmake .. +``` + +5. Build the project: + +```console +make +``` + +6. The built executable should now be located in the `build` directory. + +## Usage 🚀 + +```c++ +// CPU inference +DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {imgsz_w, imgsz_h}, 0.1, 0.5, false}; +// GPU inference +DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {imgsz_w, imgsz_h}, 0.1, 0.5, true}; +// Load your image +cv::Mat img = cv::imread(img_path); +// Init Inference Session +char* ret = yoloDetector->CreateSession(params); + +ret = yoloDetector->RunSession(img, res); +``` + +This repository should also work for YOLOv5, which needs a permute operator for the output of the YOLOv5 model, but this has not been implemented yet. diff --git a/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp b/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp new file mode 100644 index 0000000..a2de772 --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp @@ -0,0 +1,306 @@ +#include "inference.h" +#include + +#define benchmark + +DCSP_CORE::DCSP_CORE() { + +} + + +DCSP_CORE::~DCSP_CORE() { + delete session; +} + +#ifdef USE_CUDA +namespace Ort +{ + template<> + struct TypeToTensorType { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; }; +} +#endif + + +template +char *BlobFromImage(cv::Mat &iImg, T &iBlob) { + int channels = iImg.channels(); + int imgHeight = iImg.rows; + int imgWidth = iImg.cols; + + for (int c = 0; c < channels; c++) { + for (int h = 0; h < imgHeight; h++) { + for (int w = 0; w < imgWidth; w++) { + iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer::type( + (iImg.at(h, w)[c]) / 255.0f); + } + } + } + return RET_OK; +} + + +char* DL_CORE::PreProcess(cv::Mat& iImg, std::vector iImgSize, cv::Mat& oImg) +{ + if (iImg.channels() == 3) + { + oImg = iImg.clone(); + cv::cvtColor(oImg, oImg, cv::COLOR_BGR2RGB); + } + else + { + cv::cvtColor(iImg, oImg, cv::COLOR_GRAY2RGB); + } + + if (iImg.cols >= iImg.rows) + { + resizeScales = iImg.cols / (float)iImgSize.at(0); + cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales))); + } + else + { + resizeScales = iImg.rows / (float)iImgSize.at(0); + cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); + } + cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); + oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows))); + oImg = tempImg; + return RET_OK; +} + + +char *DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams) { + char *Ret = RET_OK; + std::regex pattern("[\u4e00-\u9fa5]"); + bool result = std::regex_search(iParams.ModelPath, pattern); + if (result) { + Ret = "[DCSP_ONNX]:Model path error.Change your model path without chinese characters."; + std::cout << Ret << std::endl; + return Ret; + } + try { + rectConfidenceThreshold = iParams.RectConfidenceThreshold; + iouThreshold = iParams.iouThreshold; + imgSize = iParams.imgSize; + modelType = iParams.ModelType; + env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Yolo"); + Ort::SessionOptions sessionOption; + if (iParams.CudaEnable) { + cudaEnable = iParams.CudaEnable; + OrtCUDAProviderOptions cudaOption; + cudaOption.device_id = 0; + sessionOption.AppendExecutionProvider_CUDA(cudaOption); + } + sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); + sessionOption.SetIntraOpNumThreads(iParams.IntraOpNumThreads); + sessionOption.SetLogSeverityLevel(iParams.LogSeverityLevel); + +#ifdef _WIN32 + int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast(iParams.ModelPath.length()), nullptr, 0); + wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1]; + MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast(iParams.ModelPath.length()), wide_cstr, ModelPathSize); + wide_cstr[ModelPathSize] = L'\0'; + const wchar_t* modelPath = wide_cstr; +#else + const char *modelPath = iParams.ModelPath.c_str(); +#endif // _WIN32 + + session = new Ort::Session(env, modelPath, sessionOption); + Ort::AllocatorWithDefaultOptions allocator; + size_t inputNodesNum = session->GetInputCount(); + for (size_t i = 0; i < inputNodesNum; i++) { + Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator); + char *temp_buf = new char[50]; + strcpy(temp_buf, input_node_name.get()); + inputNodeNames.push_back(temp_buf); + } + size_t OutputNodesNum = session->GetOutputCount(); + for (size_t i = 0; i < OutputNodesNum; i++) { + Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator); + char *temp_buf = new char[10]; + strcpy(temp_buf, output_node_name.get()); + outputNodeNames.push_back(temp_buf); + } + options = Ort::RunOptions{nullptr}; + WarmUpSession(); + return RET_OK; + } + catch (const std::exception &e) { + const char *str1 = "[DCSP_ONNX]:"; + const char *str2 = e.what(); + std::string result = std::string(str1) + std::string(str2); + char *merged = new char[result.length() + 1]; + std::strcpy(merged, result.c_str()); + std::cout << merged << std::endl; + delete[] merged; + return "[DCSP_ONNX]:Create session failed."; + } + +} + + +char *DCSP_CORE::RunSession(cv::Mat &iImg, std::vector &oResult) { +#ifdef benchmark + clock_t starttime_1 = clock(); +#endif // benchmark + + char *Ret = RET_OK; + cv::Mat processedImg; + PreProcess(iImg, imgSize, processedImg); + if (modelType < 4) { + float *blob = new float[processedImg.total() * 3]; + BlobFromImage(processedImg, blob); + std::vector inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; + TensorProcess(starttime_1, iImg, blob, inputNodeDims, oResult); + } else { +#ifdef USE_CUDA + half* blob = new half[processedImg.total() * 3]; + BlobFromImage(processedImg, blob); + std::vector inputNodeDims = { 1,3,imgSize.at(0),imgSize.at(1) }; + TensorProcess(starttime_1, iImg, blob, inputNodeDims, oResult); +#endif + } + + return Ret; +} + + +template +char *DCSP_CORE::TensorProcess(clock_t &starttime_1, cv::Mat &iImg, N &blob, std::vector &inputNodeDims, + std::vector &oResult) { + Ort::Value inputTensor = Ort::Value::CreateTensor::type>( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), + inputNodeDims.data(), inputNodeDims.size()); +#ifdef benchmark + clock_t starttime_2 = clock(); +#endif // benchmark + auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), + outputNodeNames.size()); +#ifdef benchmark + clock_t starttime_3 = clock(); +#endif // benchmark + + Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo(); + auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo(); + std::vector outputNodeDims = tensor_info.GetShape(); + auto output = outputTensor.front().GetTensorMutableData::type>(); + delete blob; + switch (modelType) { + case 1://V8_ORIGIN_FP32 + case 4://V8_ORIGIN_FP16 + { + int strideNum = outputNodeDims[2]; + int signalResultNum = outputNodeDims[1]; + std::vector class_ids; + std::vector confidences; + std::vector boxes; + + cv::Mat rawData; + if (modelType == 1) { + // FP32 + rawData = cv::Mat(signalResultNum, strideNum, CV_32F, output); + } else { + // FP16 + rawData = cv::Mat(signalResultNum, strideNum, CV_16F, output); + rawData.convertTo(rawData, CV_32F); + } + rawData = rawData.t(); + float *data = (float *) rawData.data; + + for (int i = 0; i < strideNum; ++i) { + float *classesScores = data + 4; + cv::Mat scores(1, this->classes.size(), CV_32FC1, classesScores); + cv::Point class_id; + double maxClassScore; + cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + if (maxClassScore > rectConfidenceThreshold) { + confidences.push_back(maxClassScore); + class_ids.push_back(class_id.x); + + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = int((x - 0.5 * w) * resizeScales); + int top = int((y - 0.5 * h) * resizeScales); + + int width = int(w * resizeScales); + int height = int(h * resizeScales); + + boxes.emplace_back(left, top, width, height); + } + data += signalResultNum; + } + + std::vector nmsResult; + cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult); + + for (int i = 0; i < nmsResult.size(); ++i) { + int idx = nmsResult[i]; + DCSP_RESULT result; + result.classId = class_ids[idx]; + result.confidence = confidences[idx]; + result.box = boxes[idx]; + oResult.push_back(result); + } + + +#ifdef benchmark + clock_t starttime_4 = clock(); + double pre_process_time = (double) (starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; + double process_time = (double) (starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; + double post_process_time = (double) (starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) { + std::cout << "[DCSP_ONNX(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time + << "ms inference, " << post_process_time << "ms post-process." << std::endl; + } else { + std::cout << "[DCSP_ONNX(CPU)]: " << pre_process_time << "ms pre-process, " << process_time + << "ms inference, " << post_process_time << "ms post-process." << std::endl; + } +#endif // benchmark + + break; + } + } + return RET_OK; +} + + +char *DCSP_CORE::WarmUpSession() { + clock_t starttime_1 = clock(); + cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); + cv::Mat processedImg; + PreProcess(iImg, imgSize, processedImg); + if (modelType < 4) { + float *blob = new float[iImg.total() * 3]; + BlobFromImage(processedImg, blob); + std::vector YOLO_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)}; + Ort::Value input_tensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), + YOLO_input_node_dims.data(), YOLO_input_node_dims.size()); + auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), + outputNodeNames.size()); + delete[] blob; + clock_t starttime_4 = clock(); + double post_process_time = (double) (starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) { + std::cout << "[DCSP_ONNX(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; + } + } else { +#ifdef USE_CUDA + half* blob = new half[iImg.total() * 3]; + BlobFromImage(processedImg, blob); + std::vector YOLO_input_node_dims = { 1,3,imgSize.at(0),imgSize.at(1) }; + Ort::Value input_tensor = Ort::Value::CreateTensor(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), YOLO_input_node_dims.data(), YOLO_input_node_dims.size()); + auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size()); + delete[] blob; + clock_t starttime_4 = clock(); + double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) + { + std::cout << "[DCSP_ONNX(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; + } +#endif + } + return RET_OK; +} diff --git a/examples/YOLOv8-ONNXRuntime-CPP/inference.h b/examples/YOLOv8-ONNXRuntime-CPP/inference.h new file mode 100644 index 0000000..bd85e78 --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-CPP/inference.h @@ -0,0 +1,87 @@ +#pragma once + +#define RET_OK nullptr + +#ifdef _WIN32 +#include +#include +#include +#endif + +#include +#include +#include +#include +#include "onnxruntime_cxx_api.h" + +#ifdef USE_CUDA +#include +#endif + + +enum MODEL_TYPE { + //FLOAT32 MODEL + YOLO_ORIGIN_V5 = 0, + YOLO_ORIGIN_V8 = 1,//only support v8 detector currently + YOLO_POSE_V8 = 2, + YOLO_CLS_V8 = 3, + YOLO_ORIGIN_V8_HALF = 4, + YOLO_POSE_V8_HALF = 5, + YOLO_CLS_V8_HALF = 6 +}; + + +typedef struct _DCSP_INIT_PARAM { + std::string ModelPath; + MODEL_TYPE ModelType = YOLO_ORIGIN_V8; + std::vector imgSize = {640, 640}; + float RectConfidenceThreshold = 0.6; + float iouThreshold = 0.5; + bool CudaEnable = false; + int LogSeverityLevel = 3; + int IntraOpNumThreads = 1; +} DCSP_INIT_PARAM; + + +typedef struct _DCSP_RESULT { + int classId; + float confidence; + cv::Rect box; +} DCSP_RESULT; + + +class DCSP_CORE { +public: + DCSP_CORE(); + + ~DCSP_CORE(); + +public: + char *CreateSession(DCSP_INIT_PARAM &iParams); + + char *RunSession(cv::Mat &iImg, std::vector &oResult); + + char *WarmUpSession(); + + template + char *TensorProcess(clock_t &starttime_1, cv::Mat &iImg, N &blob, std::vector &inputNodeDims, + std::vector &oResult); + + char* PreProcess(cv::Mat& iImg, std::vector iImgSize, cv::Mat& oImg); + + std::vector classes{}; + +private: + Ort::Env env; + Ort::Session *session; + bool cudaEnable; + Ort::RunOptions options; + std::vector inputNodeNames; + std::vector outputNodeNames; + + MODEL_TYPE modelType; + std::vector imgSize; + float rectConfidenceThreshold; + float iouThreshold; + float resizeScales;//letterbox scale +}; diff --git a/examples/YOLOv8-ONNXRuntime-CPP/main.cpp b/examples/YOLOv8-ONNXRuntime-CPP/main.cpp new file mode 100644 index 0000000..00abec8 --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-CPP/main.cpp @@ -0,0 +1,113 @@ +#include +#include +#include "inference.h" +#include +#include + +void file_iterator(DCSP_CORE *&p) { + std::filesystem::path current_path = std::filesystem::current_path(); + std::filesystem::path imgs_path = current_path / "images"; + for (auto &i: std::filesystem::directory_iterator(imgs_path)) { + if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") { + std::string img_path = i.path().string(); + cv::Mat img = cv::imread(img_path); + std::vector res; + p->RunSession(img, res); + + for (auto &re: res) { + cv::RNG rng(cv::getTickCount()); + cv::Scalar color(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256)); + + cv::rectangle(img, re.box, color, 3); + + float confidence = floor(100 * re.confidence) / 100; + std::cout << std::fixed << std::setprecision(2); + std::string label = p->classes[re.classId] + " " + + std::to_string(confidence).substr(0, std::to_string(confidence).size() - 4); + + cv::rectangle( + img, + cv::Point(re.box.x, re.box.y - 25), + cv::Point(re.box.x + label.length() * 15, re.box.y), + color, + cv::FILLED + ); + + cv::putText( + img, + label, + cv::Point(re.box.x, re.box.y - 5), + cv::FONT_HERSHEY_SIMPLEX, + 0.75, + cv::Scalar(0, 0, 0), + 2 + ); + + + } + std::cout << "Press any key to exit" << std::endl; + cv::imshow("Result of Detection", img); + cv::waitKey(0); + cv::destroyAllWindows(); + } + } +} + +int read_coco_yaml(DCSP_CORE *&p) { + // Open the YAML file + std::ifstream file("coco.yaml"); + if (!file.is_open()) { + std::cerr << "Failed to open file" << std::endl; + return 1; + } + + // Read the file line by line + std::string line; + std::vector lines; + while (std::getline(file, line)) { + lines.push_back(line); + } + + // Find the start and end of the names section + std::size_t start = 0; + std::size_t end = 0; + for (std::size_t i = 0; i < lines.size(); i++) { + if (lines[i].find("names:") != std::string::npos) { + start = i + 1; + } else if (start > 0 && lines[i].find(':') == std::string::npos) { + end = i; + break; + } + } + + // Extract the names + std::vector names; + for (std::size_t i = start; i < end; i++) { + std::stringstream ss(lines[i]); + std::string name; + std::getline(ss, name, ':'); // Extract the number before the delimiter + std::getline(ss, name); // Extract the string after the delimiter + names.push_back(name); + } + + p->classes = names; + return 0; +} + + +int main() { + DCSP_CORE *yoloDetector = new DCSP_CORE; + std::string model_path = "yolov8n.onnx"; + read_coco_yaml(yoloDetector); +#ifdef USE_CUDA + // GPU FP32 inference + DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {640, 640}, 0.1, 0.5, true }; + // GPU FP16 inference + // DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8_HALF, {640, 640}, 0.1, 0.5, true }; +#else + // CPU inference + DCSP_INIT_PARAM params{model_path, YOLO_ORIGIN_V8, {640, 640}, 0.1, 0.5, false}; +#endif + yoloDetector->CreateSession(params); + file_iterator(yoloDetector); +} diff --git a/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml b/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml new file mode 100644 index 0000000..101f72e --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "yolov8-rs" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.2.4", features = ["derive"] } +image = { version = "0.24.7", default-features = false, features = ["jpeg", "png", "webp-encoder"] } +imageproc = { version = "0.23.0", default-features = false } +ndarray = { version = "0.15.6" } +ort = {version = "1.16.3", default-features = false, features = ["load-dynamic", "copy-dylibs", "half"]} +rusttype = { version = "0.9", default-features = false } +anyhow = { version = "1.0.75"} +regex = { version = "1.5.4" } +rand = { version ="0.8.5" } +chrono = { version = "0.4.30" } +half = { version = "2.3.1" } +dirs = { version = "5.0.1" } +ureq = { version = "2.9.1" } diff --git a/examples/YOLOv8-ONNXRuntime-Rust/README.md b/examples/YOLOv8-ONNXRuntime-Rust/README.md new file mode 100644 index 0000000..6876c15 --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-Rust/README.md @@ -0,0 +1,222 @@ +# YOLOv8-ONNXRuntime-Rust for All the Key YOLO Tasks + +This repository provides a Rust demo for performing YOLOv8 tasks like `Classification`, `Segmentation`, `Detection` and `Pose Detection` using ONNXRuntime. + +## Features + +- Support `Classification`, `Segmentation`, `Detection`, `Pose(Keypoints)-Detection` tasks. +- Support `FP16` & `FP32` ONNX models. +- Support `CPU`, `CUDA` and `TensorRT` execution provider to accelerate computation. +- Support dynamic input shapes(`batch`, `width`, `height`). + +## Installation + +### 1. Install Rust + +Please follow the Rust official installation. (https://www.rust-lang.org/tools/install) + +### 2. Install ONNXRuntime + +This repository use `ort` crate, which is ONNXRuntime wrapper for Rust. (https://docs.rs/ort/latest/ort/) + +You can follow the instruction with `ort` doc or simply do this: + +- step1: Download ONNXRuntime(https://github.com/microsoft/onnxruntime/releases) +- setp2: Set environment variable `PATH` for linking. + +On ubuntu, You can do like this: + +``` +vim ~/.bashrc + +# Add the path of ONNXRUntime lib +export LD_LIBRARY_PATH=/home/qweasd/Documents/onnxruntime-linux-x64-gpu-1.16.3/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + +source ~/.bashrc +``` + +### 3. \[Optional\] Install CUDA & CuDNN & TensorRT + +- CUDA execution provider requires CUDA v11.6+. +- TensorRT execution provider requires CUDA v11.4+ and TensorRT v8.4+. + +## Get Started + +### 1. Export the YOLOv8 ONNX Models + +```bash +pip install -U ultralytics + +# export onnx model with dynamic shapes +yolo export model=yolov8m.pt format=onnx simplify dynamic +yolo export model=yolov8m-cls.pt format=onnx simplify dynamic +yolo export model=yolov8m-pose.pt format=onnx simplify dynamic +yolo export model=yolov8m-seg.pt format=onnx simplify dynamic + + +# export onnx model with constant shapes +yolo export model=yolov8m.pt format=onnx simplify +yolo export model=yolov8m-cls.pt format=onnx simplify +yolo export model=yolov8m-pose.pt format=onnx simplify +yolo export model=yolov8m-seg.pt format=onnx simplify +``` + +### 2. Run Inference + +It will perform inference with the ONNX model on the source image. + +``` +cargo run --release -- --model --source +``` + +Set `--cuda` to use CUDA execution provider to speed up inference. + +``` +cargo run --release -- --cuda --model --source +``` + +Set `--trt` to use TensorRT execution provider, and you can set `--fp16` at the same time to use TensorRT FP16 engine. + +``` +cargo run --release -- --trt --fp16 --model --source +``` + +Set `--device_id` to select which device to run. When you have only one GPU, and you set `device_id` to 1 will not cause program panic, the `ort` would automatically fall back to `CPU` EP. + +``` +cargo run --release -- --cuda --device_id 0 --model --source +``` + +Set `--batch` to do multi-batch-size inference. + +If you're using `--trt`, you can also set `--batch-min` and `--batch-max` to explicitly specify min/max/opt batch for dynamic batch input.(https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#explicit-shape-range-for-dynamic-shape-input).(Note that the ONNX model should exported with dynamic shapes) + +``` +cargo run --release -- --cuda --batch 2 --model --source +``` + +Set `--height` and `--width` to do dynamic image size inference. (Note that the ONNX model should exported with dynamic shapes) + +``` +cargo run --release -- --cuda --width 480 --height 640 --model --source +``` + +Set `--profile` to check time consumed in each stage.(Note that the model usually needs to take 1~3 times dry run to warmup. Make sure to run enough times to evaluate the result.) + +``` +cargo run --release -- --trt --fp16 --profile --model --source +``` + +Results: (yolov8m.onnx, batch=1, 3 times, trt, fp16, RTX 3060Ti) + +``` +==> 0 +[Model Preprocess]: 12.75788ms +[ORT H2D]: 237.118µs +[ORT Inference]: 507.895469ms +[ORT D2H]: 191.655µs +[Model Inference]: 508.34589ms +[Model Postprocess]: 1.061122ms +==> 1 +[Model Preprocess]: 13.658655ms +[ORT H2D]: 209.975µs +[ORT Inference]: 5.12372ms +[ORT D2H]: 182.389µs +[Model Inference]: 5.530022ms +[Model Postprocess]: 1.04851ms +==> 2 +[Model Preprocess]: 12.475332ms +[ORT H2D]: 246.127µs +[ORT Inference]: 5.048432ms +[ORT D2H]: 187.117µs +[Model Inference]: 5.493119ms +[Model Postprocess]: 1.040906ms +``` + +And also: + +`--conf`: confidence threshold \[default: 0.3\] + +`--iou`: iou threshold in NMS \[default: 0.45\] + +`--kconf`: confidence threshold of keypoint \[default: 0.55\] + +`--plot`: plot inference result with random RGB color and save + +you can check out all CLI arguments by: + +``` +git clone https://github.com/ultralytics/ultralytics +cd ultralytics/examples/YOLOv8-ONNXRuntime-Rust +cargo run --release -- --help +``` + +## Examples + +### Classification + +Running dynamic shape ONNX model on `CPU` with image size `--height 224 --width 224`. +Saving plotted image in `runs` directory. + +``` +cargo run --release -- --model ../assets/weights/yolov8m-cls-dyn.onnx --source ../assets/images/dog.jpg --height 224 --width 224 --plot --profile +``` + +You will see result like: + +``` +Summary: +> Task: Classify (Ultralytics 8.0.217) +> EP: Cpu +> Dtype: Float32 +> Batch: 1 (Dynamic), Height: 224 (Dynamic), Width: 224 (Dynamic) +> nc: 1000 nk: 0, nm: 0, conf: 0.3, kconf: 0.55, iou: 0.45 + +[Model Preprocess]: 16.363477ms +[ORT H2D]: 50.722µs +[ORT Inference]: 16.295808ms +[ORT D2H]: 8.37µs +[Model Inference]: 16.367046ms +[Model Postprocess]: 3.527µs +[ + YOLOResult { + Probs(top5): Some([(208, 0.6950566), (209, 0.13823675), (178, 0.04849795), (215, 0.019029364), (212, 0.016506357)]), + Bboxes: None, + Keypoints: None, + Masks: None, + }, +] + +``` + +![2023-11-25-22-02-02-156623351](https://github.com/jamjamjon/ultralytics/assets/51357717/ef75c2ae-c5ab-44cc-9d9e-e60b51e39662) + +### Object Detection + +Using `CUDA` EP and dynamic image size `--height 640 --width 480` + +``` +cargo run --release -- --cuda --model ../assets/weights/yolov8m-dynamic.onnx --source ../assets/images/bus.jpg --plot --height 640 --width 480 +``` + +![det](https://github.com/jamjamjon/ultralytics/assets/51357717/5d89a19d-0c96-4a59-875c-defab6887a2c) + +### Pose Detection + +using `TensorRT` EP + +``` +cargo run --release -- --trt --model ../assets/weights/yolov8m-pose.onnx --source ../assets/images/bus.jpg --plot +``` + +![2023-11-25-22-31-45-127054025](https://github.com/jamjamjon/ultralytics/assets/51357717/157b5ba7-bfcf-47cf-bee7-68b62e0de1c4) + +### Instance Segmentation + +using `TensorRT` EP and FP16 model `--fp16` + +``` +cargo run --release -- --trt --fp16 --model ../assets/weights/yolov8m-seg.onnx --source ../assets/images/0172.jpg --plot +``` + +![seg](https://github.com/jamjamjon/ultralytics/assets/51357717/cf046f4f-9533-478a-adc7-4de22443a641) diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs new file mode 100644 index 0000000..2ba0dd4 --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs @@ -0,0 +1,87 @@ +use clap::Parser; + +use crate::YOLOTask; + +#[derive(Parser, Clone)] +#[command(author, version, about, long_about = None)] +pub struct Args { + /// ONNX model path + #[arg(long, required = true)] + pub model: String, + + /// input path + #[arg(long, required = true)] + pub source: String, + + /// device id + #[arg(long, default_value_t = 0)] + pub device_id: u32, + + /// using TensorRT EP + #[arg(long)] + pub trt: bool, + + /// using CUDA EP + #[arg(long)] + pub cuda: bool, + + /// input batch size + #[arg(long, default_value_t = 1)] + pub batch: u32, + + /// trt input min_batch size + #[arg(long, default_value_t = 1)] + pub batch_min: u32, + + /// trt input max_batch size + #[arg(long, default_value_t = 32)] + pub batch_max: u32, + + /// using TensorRT --fp16 + #[arg(long)] + pub fp16: bool, + + /// specify YOLO task + #[arg(long, value_enum)] + pub task: Option, + + /// num_classes + #[arg(long)] + pub nc: Option, + + /// num_keypoints + #[arg(long)] + pub nk: Option, + + /// num_masks + #[arg(long)] + pub nm: Option, + + /// input image width + #[arg(long)] + pub width: Option, + + /// input image height + #[arg(long)] + pub height: Option, + + /// confidence threshold + #[arg(long, required = false, default_value_t = 0.3)] + pub conf: f32, + + /// iou threshold in NMS + #[arg(long, required = false, default_value_t = 0.45)] + pub iou: f32, + + /// confidence threshold of keypoint + #[arg(long, required = false, default_value_t = 0.55)] + pub kconf: f32, + + /// plot inference result and save + #[arg(long)] + pub plot: bool, + + /// check time consumed in each stage + #[arg(long)] + pub profile: bool, +} diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs new file mode 100644 index 0000000..1af7f7c --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs @@ -0,0 +1,119 @@ +#![allow(clippy::type_complexity)] + +use std::io::{Read, Write}; + +pub mod cli; +pub mod model; +pub mod ort_backend; +pub mod yolo_result; +pub use crate::cli::Args; +pub use crate::model::YOLOv8; +pub use crate::ort_backend::{Batch, OrtBackend, OrtConfig, OrtEP, YOLOTask}; +pub use crate::yolo_result::{Bbox, Embedding, Point2, YOLOResult}; + +pub fn non_max_suppression( + xs: &mut Vec<(Bbox, Option>, Option>)>, + iou_threshold: f32, +) { + xs.sort_by(|b1, b2| b2.0.confidence().partial_cmp(&b1.0.confidence()).unwrap()); + + let mut current_index = 0; + for index in 0..xs.len() { + let mut drop = false; + for prev_index in 0..current_index { + let iou = xs[prev_index].0.iou(&xs[index].0); + if iou > iou_threshold { + drop = true; + break; + } + } + if !drop { + xs.swap(current_index, index); + current_index += 1; + } + } + xs.truncate(current_index); +} + +pub fn gen_time_string(delimiter: &str) -> String { + let offset = chrono::FixedOffset::east_opt(8 * 60 * 60).unwrap(); // Beijing + let t_now = chrono::Utc::now().with_timezone(&offset); + let fmt = format!( + "%Y{}%m{}%d{}%H{}%M{}%S{}%f", + delimiter, delimiter, delimiter, delimiter, delimiter, delimiter + ); + t_now.format(&fmt).to_string() +} + +pub const SKELETON: [(usize, usize); 16] = [ + (0, 1), + (0, 2), + (1, 3), + (2, 4), + (5, 6), + (5, 11), + (6, 12), + (11, 12), + (5, 7), + (6, 8), + (7, 9), + (8, 10), + (11, 13), + (12, 14), + (13, 15), + (14, 16), +]; + +pub fn check_font(font: &str) -> rusttype::Font<'static> { + // check then load font + + // ultralytics font path + let font_path_config = match dirs::config_dir() { + Some(mut d) => { + d.push("Ultralytics"); + d.push(font); + d + } + None => panic!("Unsupported operating system. Now support Linux, MacOS, Windows."), + }; + + // current font path + let font_path_current = std::path::PathBuf::from(font); + + // check font + let font_path = if font_path_config.exists() { + font_path_config + } else if font_path_current.exists() { + font_path_current + } else { + println!("Downloading font..."); + let source_url = "https://ultralytics.com/assets/Arial.ttf"; + let resp = ureq::get(source_url) + .timeout(std::time::Duration::from_secs(500)) + .call() + .unwrap_or_else(|err| panic!("> Failed to download font: {source_url}: {err:?}")); + + // read to buffer + let mut buffer = vec![]; + let total_size = resp + .header("Content-Length") + .and_then(|s| s.parse::().ok()) + .unwrap(); + let _reader = resp + .into_reader() + .take(total_size) + .read_to_end(&mut buffer) + .unwrap(); + + // save + let _path = std::fs::File::create(font).unwrap(); + let mut writer = std::io::BufWriter::new(_path); + writer.write_all(&buffer).unwrap(); + println!("Font saved at: {:?}", font_path_current.display()); + font_path_current + }; + + // load font + let buffer = std::fs::read(font_path).unwrap(); + rusttype::Font::try_from_vec(buffer).unwrap() +} diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs new file mode 100644 index 0000000..8dd1567 --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs @@ -0,0 +1,28 @@ +use clap::Parser; + +use yolov8_rs::{Args, YOLOv8}; + +fn main() -> Result<(), Box> { + let args = Args::parse(); + + // 1. load image + let x = image::io::Reader::open(&args.source)? + .with_guessed_format()? + .decode()?; + + // 2. model support dynamic batch inference, so input should be a Vec + let xs = vec![x]; + + // You can test `--batch 2` with this + // let xs = vec![x.clone(), x]; + + // 3. build yolov8 model + let mut model = YOLOv8::new(args)?; + model.summary(); // model info + + // 4. run + let ys = model.run(&xs)?; + println!("{:?}", ys); + + Ok(()) +} diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs new file mode 100644 index 0000000..1c0e5e4 --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs @@ -0,0 +1,642 @@ +#![allow(clippy::type_complexity)] + +use anyhow::Result; +use image::{DynamicImage, GenericImageView, ImageBuffer}; +use ndarray::{s, Array, Axis, IxDyn}; +use rand::{thread_rng, Rng}; +use std::path::PathBuf; + +use crate::{ + check_font, gen_time_string, non_max_suppression, Args, Batch, Bbox, Embedding, OrtBackend, + OrtConfig, OrtEP, Point2, YOLOResult, YOLOTask, SKELETON, +}; + +pub struct YOLOv8 { + // YOLOv8 model for all yolo-tasks + engine: OrtBackend, + nc: u32, + nk: u32, + nm: u32, + height: u32, + width: u32, + batch: u32, + task: YOLOTask, + conf: f32, + kconf: f32, + iou: f32, + names: Vec, + color_palette: Vec<(u8, u8, u8)>, + profile: bool, + plot: bool, +} + +impl YOLOv8 { + pub fn new(config: Args) -> Result { + // execution provider + let ep = if config.trt { + OrtEP::Trt(config.device_id) + } else if config.cuda { + OrtEP::Cuda(config.device_id) + } else { + OrtEP::Cpu + }; + + // batch + let batch = Batch { + opt: config.batch, + min: config.batch_min, + max: config.batch_max, + }; + + // build ort engine + let ort_args = OrtConfig { + ep, + batch, + f: config.model, + task: config.task, + trt_fp16: config.fp16, + image_size: (config.height, config.width), + }; + let engine = OrtBackend::build(ort_args)?; + + // get batch, height, width, tasks, nc, nk, nm + let (batch, height, width, task) = ( + engine.batch(), + engine.height(), + engine.width(), + engine.task(), + ); + let nc = engine.nc().or(config.nc).unwrap_or_else(|| { + panic!("Failed to get num_classes, make it explicit with `--nc`"); + }); + let (nk, nm) = match task { + YOLOTask::Pose => { + let nk = engine.nk().or(config.nk).unwrap_or_else(|| { + panic!("Failed to get num_keypoints, make it explicit with `--nk`"); + }); + (nk, 0) + } + YOLOTask::Segment => { + let nm = engine.nm().or(config.nm).unwrap_or_else(|| { + panic!("Failed to get num_masks, make it explicit with `--nm`"); + }); + (0, nm) + } + _ => (0, 0), + }; + + // class names + let names = engine.names().unwrap_or(vec!["Unknown".to_string()]); + + // color palette + let mut rng = thread_rng(); + let color_palette: Vec<_> = names + .iter() + .map(|_| { + ( + rng.gen_range(0..=255), + rng.gen_range(0..=255), + rng.gen_range(0..=255), + ) + }) + .collect(); + + Ok(Self { + engine, + names, + conf: config.conf, + kconf: config.kconf, + iou: config.iou, + color_palette, + profile: config.profile, + plot: config.plot, + nc, + nk, + nm, + height, + width, + batch, + task, + }) + } + + pub fn scale_wh(&self, w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) { + let r = (w1 / w0).min(h1 / h0); + (r, (w0 * r).round(), (h0 * r).round()) + } + + pub fn preprocess(&mut self, xs: &Vec) -> Result> { + let mut ys = + Array::ones((xs.len(), 3, self.height() as usize, self.width() as usize)).into_dyn(); + ys.fill(144.0 / 255.0); + for (idx, x) in xs.iter().enumerate() { + let img = match self.task() { + YOLOTask::Classify => x.resize_exact( + self.width(), + self.height(), + image::imageops::FilterType::Triangle, + ), + _ => { + let (w0, h0) = x.dimensions(); + let w0 = w0 as f32; + let h0 = h0 as f32; + let (_, w_new, h_new) = + self.scale_wh(w0, h0, self.width() as f32, self.height() as f32); // f32 round + x.resize_exact( + w_new as u32, + h_new as u32, + if let YOLOTask::Segment = self.task() { + image::imageops::FilterType::CatmullRom + } else { + image::imageops::FilterType::Triangle + }, + ) + } + }; + + for (x, y, rgb) in img.pixels() { + let x = x as usize; + let y = y as usize; + let [r, g, b, _] = rgb.0; + ys[[idx, 0, y, x]] = (r as f32) / 255.0; + ys[[idx, 1, y, x]] = (g as f32) / 255.0; + ys[[idx, 2, y, x]] = (b as f32) / 255.0; + } + } + + Ok(ys) + } + + pub fn run(&mut self, xs: &Vec) -> Result> { + // pre-process + let t_pre = std::time::Instant::now(); + let xs_ = self.preprocess(xs)?; + if self.profile { + println!("[Model Preprocess]: {:?}", t_pre.elapsed()); + } + + // run + let t_run = std::time::Instant::now(); + let ys = self.engine.run(xs_, self.profile)?; + if self.profile { + println!("[Model Inference]: {:?}", t_run.elapsed()); + } + + // post-process + let t_post = std::time::Instant::now(); + let ys = self.postprocess(ys, xs)?; + if self.profile { + println!("[Model Postprocess]: {:?}", t_post.elapsed()); + } + + // plot and save + if self.plot { + self.plot_and_save(&ys, xs, Some(&SKELETON)); + } + Ok(ys) + } + + pub fn postprocess( + &self, + xs: Vec>, + xs0: &[DynamicImage], + ) -> Result> { + if let YOLOTask::Classify = self.task() { + let mut ys = Vec::new(); + let preds = &xs[0]; + for batch in preds.axis_iter(Axis(0)) { + ys.push(YOLOResult::new( + Some(Embedding::new(batch.into_owned())), + None, + None, + None, + )); + } + Ok(ys) + } else { + const CXYWH_OFFSET: usize = 4; // cxcywh + const KPT_STEP: usize = 3; // xyconf + let preds = &xs[0]; + let protos = { + if xs.len() > 1 { + Some(&xs[1]) + } else { + None + } + }; + let mut ys = Vec::new(); + for (idx, anchor) in preds.axis_iter(Axis(0)).enumerate() { + // [bs, 4 + nc + nm, anchors] + // input image + let width_original = xs0[idx].width() as f32; + let height_original = xs0[idx].height() as f32; + let ratio = (self.width() as f32 / width_original) + .min(self.height() as f32 / height_original); + + // save each result + let mut data: Vec<(Bbox, Option>, Option>)> = Vec::new(); + for pred in anchor.axis_iter(Axis(1)) { + // split preds for different tasks + let bbox = pred.slice(s![0..CXYWH_OFFSET]); + let clss = pred.slice(s![CXYWH_OFFSET..CXYWH_OFFSET + self.nc() as usize]); + let kpts = { + if let YOLOTask::Pose = self.task() { + Some(pred.slice(s![pred.len() - KPT_STEP * self.nk() as usize..])) + } else { + None + } + }; + let coefs = { + if let YOLOTask::Segment = self.task() { + Some(pred.slice(s![pred.len() - self.nm() as usize..]).to_vec()) + } else { + None + } + }; + + // confidence and id + let (id, &confidence) = clss + .into_iter() + .enumerate() + .reduce(|max, x| if x.1 > max.1 { x } else { max }) + .unwrap(); // definitely will not panic! + + // confidence filter + if confidence < self.conf { + continue; + } + + // bbox re-scale + let cx = bbox[0] / ratio; + let cy = bbox[1] / ratio; + let w = bbox[2] / ratio; + let h = bbox[3] / ratio; + let x = cx - w / 2.; + let y = cy - h / 2.; + let y_bbox = Bbox::new( + x.max(0.0f32).min(width_original), + y.max(0.0f32).min(height_original), + w, + h, + id, + confidence, + ); + + // kpts + let y_kpts = { + if let Some(kpts) = kpts { + let mut kpts_ = Vec::new(); + // rescale + for i in 0..self.nk() as usize { + let kx = kpts[KPT_STEP * i] / ratio; + let ky = kpts[KPT_STEP * i + 1] / ratio; + let kconf = kpts[KPT_STEP * i + 2]; + if kconf < self.kconf { + kpts_.push(Point2::default()); + } else { + kpts_.push(Point2::new_with_conf( + kx.max(0.0f32).min(width_original), + ky.max(0.0f32).min(height_original), + kconf, + )); + } + } + Some(kpts_) + } else { + None + } + }; + + // data merged + data.push((y_bbox, y_kpts, coefs)); + } + + // nms + non_max_suppression(&mut data, self.iou); + + // decode + let mut y_bboxes: Vec = Vec::new(); + let mut y_kpts: Vec> = Vec::new(); + let mut y_masks: Vec> = Vec::new(); + for elem in data.into_iter() { + if let Some(kpts) = elem.1 { + y_kpts.push(kpts) + } + + // decode masks + if let Some(coefs) = elem.2 { + let proto = protos.unwrap().slice(s![idx, .., .., ..]); + let (nm, nh, nw) = proto.dim(); + + // coefs * proto -> mask + let coefs = Array::from_shape_vec((1, nm), coefs)?; // (n, nm) + let proto = proto.to_owned().into_shape((nm, nh * nw))?; // (nm, nh*nw) + let mask = coefs.dot(&proto).into_shape((nh, nw, 1))?; // (nh, nw, n) + + // build image from ndarray + let mask_im: ImageBuffer, Vec> = + match ImageBuffer::from_raw(nw as u32, nh as u32, mask.into_raw_vec()) { + Some(image) => image, + None => panic!("can not create image from ndarray"), + }; + let mut mask_im = image::DynamicImage::from(mask_im); // -> dyn + + // rescale masks + let (_, w_mask, h_mask) = + self.scale_wh(width_original, height_original, nw as f32, nh as f32); + let mask_cropped = mask_im.crop(0, 0, w_mask as u32, h_mask as u32); + let mask_original = mask_cropped.resize_exact( + // resize_to_fill + width_original as u32, + height_original as u32, + match self.task() { + YOLOTask::Segment => image::imageops::FilterType::CatmullRom, + _ => image::imageops::FilterType::Triangle, + }, + ); + + // crop-mask with bbox + let mut mask_original_cropped = mask_original.into_luma8(); + for y in 0..height_original as usize { + for x in 0..width_original as usize { + if x < elem.0.xmin() as usize + || x > elem.0.xmax() as usize + || y < elem.0.ymin() as usize + || y > elem.0.ymax() as usize + { + mask_original_cropped.put_pixel( + x as u32, + y as u32, + image::Luma([0u8]), + ); + } + } + } + y_masks.push(mask_original_cropped.into_raw()); + } + y_bboxes.push(elem.0); + } + + // save each result + let y = YOLOResult { + probs: None, + bboxes: if !y_bboxes.is_empty() { + Some(y_bboxes) + } else { + None + }, + keypoints: if !y_kpts.is_empty() { + Some(y_kpts) + } else { + None + }, + masks: if !y_masks.is_empty() { + Some(y_masks) + } else { + None + }, + }; + ys.push(y); + } + + Ok(ys) + } + } + + pub fn plot_and_save( + &self, + ys: &[YOLOResult], + xs0: &[DynamicImage], + skeletons: Option<&[(usize, usize)]>, + ) { + // check font then load + let font = check_font("Arial.ttf"); + for (_idb, (img0, y)) in xs0.iter().zip(ys.iter()).enumerate() { + let mut img = img0.to_rgb8(); + + // draw for classifier + if let Some(probs) = y.probs() { + for (i, k) in probs.topk(5).iter().enumerate() { + let legend = format!("{} {:.2}%", self.names[k.0], k.1); + let scale = 32; + let legend_size = img.width().max(img.height()) / scale; + let x = img.width() / 20; + let y = img.height() / 20 + i as u32 * legend_size; + imageproc::drawing::draw_text_mut( + &mut img, + image::Rgb([0, 255, 0]), + x as i32, + y as i32, + rusttype::Scale::uniform(legend_size as f32 - 1.), + &font, + &legend, + ); + } + } + + // draw bboxes & keypoints + if let Some(bboxes) = y.bboxes() { + for (_idx, bbox) in bboxes.iter().enumerate() { + // rect + imageproc::drawing::draw_hollow_rect_mut( + &mut img, + imageproc::rect::Rect::at(bbox.xmin() as i32, bbox.ymin() as i32) + .of_size(bbox.width() as u32, bbox.height() as u32), + image::Rgb(self.color_palette[bbox.id()].into()), + ); + + // text + let legend = format!("{} {:.2}%", self.names[bbox.id()], bbox.confidence()); + let scale = 40; + let legend_size = img.width().max(img.height()) / scale; + imageproc::drawing::draw_text_mut( + &mut img, + image::Rgb(self.color_palette[bbox.id()].into()), + bbox.xmin() as i32, + (bbox.ymin() - legend_size as f32) as i32, + rusttype::Scale::uniform(legend_size as f32 - 1.), + &font, + &legend, + ); + } + } + + // draw kpts + if let Some(keypoints) = y.keypoints() { + for kpts in keypoints.iter() { + for kpt in kpts.iter() { + // filter + if kpt.confidence() < self.kconf { + continue; + } + + // draw point + imageproc::drawing::draw_filled_circle_mut( + &mut img, + (kpt.x() as i32, kpt.y() as i32), + 2, + image::Rgb([0, 255, 0]), + ); + } + + // draw skeleton if has + if let Some(skeletons) = skeletons { + for &(idx1, idx2) in skeletons.iter() { + let kpt1 = &kpts[idx1]; + let kpt2 = &kpts[idx2]; + if kpt1.confidence() < self.kconf || kpt2.confidence() < self.kconf { + continue; + } + imageproc::drawing::draw_line_segment_mut( + &mut img, + (kpt1.x(), kpt1.y()), + (kpt2.x(), kpt2.y()), + image::Rgb([233, 14, 57]), + ); + } + } + } + } + + // draw mask + if let Some(masks) = y.masks() { + for (mask, _bbox) in masks.iter().zip(y.bboxes().unwrap().iter()) { + let mask_nd: ImageBuffer, Vec> = + match ImageBuffer::from_vec(img.width(), img.height(), mask.to_vec()) { + Some(image) => image, + None => panic!("can not crate image from ndarray"), + }; + + for _x in 0..img.width() { + for _y in 0..img.height() { + let mask_p = imageproc::drawing::Canvas::get_pixel(&mask_nd, _x, _y); + if mask_p.0[0] > 0 { + let mut img_p = imageproc::drawing::Canvas::get_pixel(&img, _x, _y); + // img_p.0[2] = self.color_palette[bbox.id()].2 / 2; + // img_p.0[1] = self.color_palette[bbox.id()].1 / 2; + // img_p.0[0] = self.color_palette[bbox.id()].0 / 2; + img_p.0[2] /= 2; + img_p.0[1] = 255 - (255 - img_p.0[2]) / 2; + img_p.0[0] /= 2; + imageproc::drawing::Canvas::draw_pixel(&mut img, _x, _y, img_p) + } + } + } + } + } + + // mkdir and save + let mut runs = PathBuf::from("runs"); + if !runs.exists() { + std::fs::create_dir_all(&runs).unwrap(); + } + runs.push(gen_time_string("-")); + let saveout = format!("{}.jpg", runs.to_str().unwrap()); + let _ = img.save(saveout); + } + } + + pub fn summary(&self) { + println!( + "\nSummary:\n\ + > Task: {:?}{}\n\ + > EP: {:?} {}\n\ + > Dtype: {:?}\n\ + > Batch: {} ({}), Height: {} ({}), Width: {} ({})\n\ + > nc: {} nk: {}, nm: {}, conf: {}, kconf: {}, iou: {}\n\ + ", + self.task(), + match self.engine.author().zip(self.engine.version()) { + Some((author, ver)) => format!(" ({} {})", author, ver), + None => String::from(""), + }, + self.engine.ep(), + if let OrtEP::Cpu = self.engine.ep() { + "" + } else { + "(May still fall back to CPU)" + }, + self.engine.dtype(), + self.batch(), + if self.engine.is_batch_dynamic() { + "Dynamic" + } else { + "Const" + }, + self.height(), + if self.engine.is_height_dynamic() { + "Dynamic" + } else { + "Const" + }, + self.width(), + if self.engine.is_width_dynamic() { + "Dynamic" + } else { + "Const" + }, + self.nc(), + self.nk(), + self.nm(), + self.conf, + self.kconf, + self.iou, + ); + } + + pub fn engine(&self) -> &OrtBackend { + &self.engine + } + + pub fn conf(&self) -> f32 { + self.conf + } + + pub fn set_conf(&mut self, val: f32) { + self.conf = val; + } + + pub fn conf_mut(&mut self) -> &mut f32 { + &mut self.conf + } + + pub fn kconf(&self) -> f32 { + self.kconf + } + + pub fn iou(&self) -> f32 { + self.iou + } + + pub fn task(&self) -> &YOLOTask { + &self.task + } + + pub fn batch(&self) -> u32 { + self.batch + } + + pub fn width(&self) -> u32 { + self.width + } + + pub fn height(&self) -> u32 { + self.height + } + + pub fn nc(&self) -> u32 { + self.nc + } + + pub fn nk(&self) -> u32 { + self.nk + } + + pub fn nm(&self) -> u32 { + self.nm + } + + pub fn names(&self) -> &Vec { + &self.names + } +} diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs new file mode 100644 index 0000000..5be93bd --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs @@ -0,0 +1,534 @@ +use anyhow::Result; +use clap::ValueEnum; +use half::f16; +use ndarray::{Array, CowArray, IxDyn}; +use ort::execution_providers::{CUDAExecutionProviderOptions, TensorRTExecutionProviderOptions}; +use ort::tensor::TensorElementDataType; +use ort::{Environment, ExecutionProvider, Session, SessionBuilder, Value}; +use regex::Regex; + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] +pub enum YOLOTask { + // YOLO tasks + Classify, + Detect, + Pose, + Segment, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum OrtEP { + // ONNXRuntime execution provider + Cpu, + Cuda(u32), + Trt(u32), +} + +#[derive(Debug)] +pub struct Batch { + pub opt: u32, + pub min: u32, + pub max: u32, +} + +impl Default for Batch { + fn default() -> Self { + Self { + opt: 1, + min: 1, + max: 1, + } + } +} + +#[derive(Debug, Default)] +pub struct OrtInputs { + // ONNX model inputs attrs + pub shapes: Vec>, + pub dtypes: Vec, + pub names: Vec, + pub sizes: Vec>, +} + +impl OrtInputs { + pub fn new(session: &Session) -> Self { + let mut shapes = Vec::new(); + let mut dtypes = Vec::new(); + let mut names = Vec::new(); + for i in session.inputs.iter() { + let shape: Vec = i + .dimensions() + .map(|x| if let Some(x) = x { x as i32 } else { -1i32 }) + .collect(); + shapes.push(shape); + dtypes.push(i.input_type); + names.push(i.name.clone()); + } + Self { + shapes, + dtypes, + names, + ..Default::default() + } + } +} + +#[derive(Debug)] +pub struct OrtConfig { + // ORT config + pub f: String, + pub task: Option, + pub ep: OrtEP, + pub trt_fp16: bool, + pub batch: Batch, + pub image_size: (Option, Option), +} + +#[derive(Debug)] +pub struct OrtBackend { + // ORT engine + session: Session, + task: YOLOTask, + ep: OrtEP, + batch: Batch, + inputs: OrtInputs, +} + +impl OrtBackend { + pub fn build(args: OrtConfig) -> Result { + // build env & session + let env = Environment::builder() + .with_name("YOLOv8") + .with_log_level(ort::LoggingLevel::Verbose) + .build()? + .into_arc(); + let session = SessionBuilder::new(&env)?.with_model_from_file(&args.f)?; + + // get inputs + let mut inputs = OrtInputs::new(&session); + + // batch size + let mut batch = args.batch; + let batch = if inputs.shapes[0][0] == -1 { + batch + } else { + assert_eq!( + inputs.shapes[0][0] as u32, batch.opt, + "Expected batch size: {}, got {}. Try using `--batch {}`.", + inputs.shapes[0][0] as u32, batch.opt, inputs.shapes[0][0] as u32 + ); + batch.opt = inputs.shapes[0][0] as u32; + batch + }; + + // input size: height and width + let height = if inputs.shapes[0][2] == -1 { + match args.image_size.0 { + Some(height) => height, + None => panic!("Failed to get model height. Make it explicit with `--height`"), + } + } else { + inputs.shapes[0][2] as u32 + }; + let width = if inputs.shapes[0][3] == -1 { + match args.image_size.1 { + Some(width) => width, + None => panic!("Failed to get model width. Make it explicit with `--width`"), + } + } else { + inputs.shapes[0][3] as u32 + }; + inputs.sizes.push(vec![height, width]); + + // build provider + let (ep, provider) = match args.ep { + OrtEP::Cuda(device_id) => Self::set_ep_cuda(device_id), + OrtEP::Trt(device_id) => Self::set_ep_trt(device_id, args.trt_fp16, &batch, &inputs), + _ => (OrtEP::Cpu, ExecutionProvider::CPU(Default::default())), + }; + + // build session again with the new provider + let session = SessionBuilder::new(&env)? + // .with_optimization_level(ort::GraphOptimizationLevel::Level3)? + .with_execution_providers([provider])? + .with_model_from_file(args.f)?; + + // task: using given one or guessing + let task = match args.task { + Some(task) => task, + None => match session.metadata() { + Err(_) => panic!("No metadata found. Try making it explicit by `--task`"), + Ok(metadata) => match metadata.custom("task") { + Err(_) => panic!("Can not get custom value. Try making it explicit by `--task`"), + Ok(value) => match value { + None => panic!("No correspoing value of `task` found in metadata. Make it explicit by `--task`"), + Some(task) => match task.as_str() { + "classify" => YOLOTask::Classify, + "detect" => YOLOTask::Detect, + "pose" => YOLOTask::Pose, + "segment" => YOLOTask::Segment, + x => todo!("{:?} is not supported for now!", x), + }, + }, + }, + }, + }; + + Ok(Self { + session, + task, + ep, + batch, + inputs, + }) + } + + pub fn fetch_inputs_from_session( + session: &Session, + ) -> (Vec>, Vec, Vec) { + // get inputs attrs from ONNX model + let mut shapes = Vec::new(); + let mut dtypes = Vec::new(); + let mut names = Vec::new(); + for i in session.inputs.iter() { + let shape: Vec = i + .dimensions() + .map(|x| if let Some(x) = x { x as i32 } else { -1i32 }) + .collect(); + shapes.push(shape); + dtypes.push(i.input_type); + names.push(i.name.clone()); + } + (shapes, dtypes, names) + } + + pub fn set_ep_cuda(device_id: u32) -> (OrtEP, ExecutionProvider) { + // set CUDA + if ExecutionProvider::CUDA(Default::default()).is_available() { + ( + OrtEP::Cuda(device_id), + ExecutionProvider::CUDA(CUDAExecutionProviderOptions { + device_id, + ..Default::default() + }), + ) + } else { + println!("> CUDA is not available! Using CPU."); + (OrtEP::Cpu, ExecutionProvider::CPU(Default::default())) + } + } + + pub fn set_ep_trt( + device_id: u32, + fp16: bool, + batch: &Batch, + inputs: &OrtInputs, + ) -> (OrtEP, ExecutionProvider) { + // set TensorRT + if ExecutionProvider::TensorRT(Default::default()).is_available() { + let (height, width) = (inputs.sizes[0][0], inputs.sizes[0][1]); + + // dtype match checking + if inputs.dtypes[0] == TensorElementDataType::Float16 && !fp16 { + panic!( + "Dtype mismatch! Expected: Float32, got: {:?}. You should use `--fp16`", + inputs.dtypes[0] + ); + } + + // dynamic shape: input_tensor_1:dim_1xdim_2x...,input_tensor_2:dim_3xdim_4x...,... + let mut opt_string = String::new(); + let mut min_string = String::new(); + let mut max_string = String::new(); + for name in inputs.names.iter() { + let s_opt = format!("{}:{}x3x{}x{},", name, batch.opt, height, width); + let s_min = format!("{}:{}x3x{}x{},", name, batch.min, height, width); + let s_max = format!("{}:{}x3x{}x{},", name, batch.max, height, width); + opt_string.push_str(s_opt.as_str()); + min_string.push_str(s_min.as_str()); + max_string.push_str(s_max.as_str()); + } + let _ = opt_string.pop(); + let _ = min_string.pop(); + let _ = max_string.pop(); + ( + OrtEP::Trt(device_id), + ExecutionProvider::TensorRT(TensorRTExecutionProviderOptions { + device_id, + fp16_enable: fp16, + timing_cache_enable: true, + profile_min_shapes: min_string, + profile_max_shapes: max_string, + profile_opt_shapes: opt_string, + ..Default::default() + }), + ) + } else { + println!("> TensorRT is not available! Try using CUDA..."); + Self::set_ep_cuda(device_id) + } + } + + pub fn fetch_from_metadata(&self, key: &str) -> Option { + // fetch value from onnx model file by key + match self.session.metadata() { + Err(_) => None, + Ok(metadata) => match metadata.custom(key) { + Err(_) => None, + Ok(value) => value, + }, + } + } + + pub fn run(&self, xs: Array, profile: bool) -> Result>> { + // ORT inference + match self.dtype() { + TensorElementDataType::Float16 => self.run_fp16(xs, profile), + TensorElementDataType::Float32 => self.run_fp32(xs, profile), + _ => todo!(), + } + } + + pub fn run_fp16(&self, xs: Array, profile: bool) -> Result>> { + // f32->f16 + let t = std::time::Instant::now(); + let xs = xs.mapv(f16::from_f32); + if profile { + println!("[ORT f32->f16]: {:?}", t.elapsed()); + } + + // h2d + let t = std::time::Instant::now(); + let xs = CowArray::from(xs); + let xs = vec![Value::from_array(self.session.allocator(), &xs)?]; + if profile { + println!("[ORT H2D]: {:?}", t.elapsed()); + } + + // run + let t = std::time::Instant::now(); + let ys = self.session.run(xs)?; + if profile { + println!("[ORT Inference]: {:?}", t.elapsed()); + } + + // d2h + Ok(ys + .iter() + .map(|x| { + // d2h + let t = std::time::Instant::now(); + let x = x.try_extract::<_>().unwrap().view().clone().into_owned(); + if profile { + println!("[ORT D2H]: {:?}", t.elapsed()); + } + + // f16->f32 + let t_ = std::time::Instant::now(); + let x = x.mapv(f16::to_f32); + if profile { + println!("[ORT f16->f32]: {:?}", t_.elapsed()); + } + x + }) + .collect::>>()) + } + + pub fn run_fp32(&self, xs: Array, profile: bool) -> Result>> { + // h2d + let t = std::time::Instant::now(); + let xs = CowArray::from(xs); + let xs = vec![Value::from_array(self.session.allocator(), &xs)?]; + if profile { + println!("[ORT H2D]: {:?}", t.elapsed()); + } + + // run + let t = std::time::Instant::now(); + let ys = self.session.run(xs)?; + if profile { + println!("[ORT Inference]: {:?}", t.elapsed()); + } + + // d2h + Ok(ys + .iter() + .map(|x| { + let t = std::time::Instant::now(); + let x = x.try_extract::<_>().unwrap().view().clone().into_owned(); + if profile { + println!("[ORT D2H]: {:?}", t.elapsed()); + } + x + }) + .collect::>>()) + } + + pub fn output_shapes(&self) -> Vec> { + let mut shapes = Vec::new(); + for o in &self.session.outputs { + let shape: Vec<_> = o + .dimensions() + .map(|x| if let Some(x) = x { x as i32 } else { -1i32 }) + .collect(); + shapes.push(shape); + } + shapes + } + + pub fn output_dtypes(&self) -> Vec { + let mut dtypes = Vec::new(); + self.session + .outputs + .iter() + .for_each(|x| dtypes.push(x.output_type)); + dtypes + } + + pub fn input_shapes(&self) -> &Vec> { + &self.inputs.shapes + } + + pub fn input_names(&self) -> &Vec { + &self.inputs.names + } + + pub fn input_dtypes(&self) -> &Vec { + &self.inputs.dtypes + } + + pub fn dtype(&self) -> TensorElementDataType { + self.input_dtypes()[0] + } + + pub fn height(&self) -> u32 { + self.inputs.sizes[0][0] + } + + pub fn width(&self) -> u32 { + self.inputs.sizes[0][1] + } + + pub fn is_height_dynamic(&self) -> bool { + self.input_shapes()[0][2] == -1 + } + + pub fn is_width_dynamic(&self) -> bool { + self.input_shapes()[0][3] == -1 + } + + pub fn batch(&self) -> u32 { + self.batch.opt + } + + pub fn is_batch_dynamic(&self) -> bool { + self.input_shapes()[0][0] == -1 + } + + pub fn ep(&self) -> &OrtEP { + &self.ep + } + + pub fn task(&self) -> YOLOTask { + self.task.clone() + } + + pub fn names(&self) -> Option> { + // class names, metadata parsing + // String format: `{0: 'person', 1: 'bicycle', 2: 'sports ball', ..., 27: "yellow_lady's_slipper"}` + match self.fetch_from_metadata("names") { + Some(names) => { + let re = Regex::new(r#"(['"])([-()\w '"]+)(['"])"#).unwrap(); + let mut names_ = vec![]; + for (_, [_, name, _]) in re.captures_iter(&names).map(|x| x.extract()) { + names_.push(name.to_string()); + } + Some(names_) + } + None => None, + } + } + + pub fn nk(&self) -> Option { + // num_keypoints, metadata parsing: String `nk` in onnx model: `[17, 3]` + match self.fetch_from_metadata("kpt_shape") { + None => None, + Some(kpt_string) => { + let re = Regex::new(r"([0-9]+), ([0-9]+)").unwrap(); + let caps = re.captures(&kpt_string).unwrap(); + Some(caps.get(1).unwrap().as_str().parse::().unwrap()) + } + } + } + + pub fn nc(&self) -> Option { + // num_classes + match self.names() { + // by names + Some(names) => Some(names.len() as u32), + None => match self.task() { + // by task calculation + YOLOTask::Classify => Some(self.output_shapes()[0][1] as u32), + YOLOTask::Detect => { + if self.output_shapes()[0][1] == -1 { + None + } else { + // cxywhclss + Some(self.output_shapes()[0][1] as u32 - 4) + } + } + YOLOTask::Pose => { + match self.nk() { + None => None, + Some(nk) => { + if self.output_shapes()[0][1] == -1 { + None + } else { + // cxywhclss3*kpt + Some(self.output_shapes()[0][1] as u32 - 4 - 3 * nk) + } + } + } + } + YOLOTask::Segment => { + if self.output_shapes()[0][1] == -1 { + None + } else { + // cxywhclssnm + Some((self.output_shapes()[0][1] - self.output_shapes()[1][1]) as u32 - 4) + } + } + }, + } + } + + pub fn nm(&self) -> Option { + // num_masks + match self.task() { + YOLOTask::Segment => Some(self.output_shapes()[1][1] as u32), + _ => None, + } + } + + pub fn na(&self) -> Option { + // num_anchors + match self.task() { + YOLOTask::Segment | YOLOTask::Detect | YOLOTask::Pose => { + if self.output_shapes()[0][2] == -1 { + None + } else { + Some(self.output_shapes()[0][2] as u32) + } + } + _ => None, + } + } + + pub fn author(&self) -> Option { + self.fetch_from_metadata("author") + } + + pub fn version(&self) -> Option { + self.fetch_from_metadata("version") + } +} diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/yolo_result.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/yolo_result.rs new file mode 100644 index 0000000..2fcc6d8 --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/yolo_result.rs @@ -0,0 +1,235 @@ +use ndarray::{Array, Axis, IxDyn}; + +#[derive(Clone, PartialEq, Default)] +pub struct YOLOResult { + // YOLO tasks results of an image + pub probs: Option, + pub bboxes: Option>, + pub keypoints: Option>>, + pub masks: Option>>, +} + +impl std::fmt::Debug for YOLOResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("YOLOResult") + .field( + "Probs(top5)", + &format_args!("{:?}", self.probs().map(|probs| probs.topk(5))), + ) + .field("Bboxes", &self.bboxes) + .field("Keypoints", &self.keypoints) + .field( + "Masks", + &format_args!("{:?}", self.masks().map(|masks| masks.len())), + ) + .finish() + } +} + +impl YOLOResult { + pub fn new( + probs: Option, + bboxes: Option>, + keypoints: Option>>, + masks: Option>>, + ) -> Self { + Self { + probs, + bboxes, + keypoints, + masks, + } + } + + pub fn probs(&self) -> Option<&Embedding> { + self.probs.as_ref() + } + + pub fn keypoints(&self) -> Option<&Vec>> { + self.keypoints.as_ref() + } + + pub fn masks(&self) -> Option<&Vec>> { + self.masks.as_ref() + } + + pub fn bboxes(&self) -> Option<&Vec> { + self.bboxes.as_ref() + } + + pub fn bboxes_mut(&mut self) -> Option<&mut Vec> { + self.bboxes.as_mut() + } +} + +#[derive(Debug, PartialEq, Clone, Default)] +pub struct Point2 { + // A point2d with x, y, conf + x: f32, + y: f32, + confidence: f32, +} + +impl Point2 { + pub fn new_with_conf(x: f32, y: f32, confidence: f32) -> Self { + Self { x, y, confidence } + } + + pub fn new(x: f32, y: f32) -> Self { + Self { + x, + y, + ..Default::default() + } + } + + pub fn x(&self) -> f32 { + self.x + } + + pub fn y(&self) -> f32 { + self.y + } + + pub fn confidence(&self) -> f32 { + self.confidence + } +} + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct Embedding { + // An float32 n-dims tensor + data: Array, +} + +impl Embedding { + pub fn new(data: Array) -> Self { + Self { data } + } + + pub fn data(&self) -> &Array { + &self.data + } + + pub fn topk(&self, k: usize) -> Vec<(usize, f32)> { + let mut probs = self + .data + .iter() + .enumerate() + .map(|(a, b)| (a, *b)) + .collect::>(); + probs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + let mut topk = Vec::new(); + for &(id, confidence) in probs.iter().take(k) { + topk.push((id, confidence)); + } + topk + } + + pub fn norm(&self) -> Array { + let std_ = self.data.mapv(|x| x * x).sum_axis(Axis(0)).mapv(f32::sqrt); + self.data.clone() / std_ + } + + pub fn top1(&self) -> (usize, f32) { + self.topk(1)[0] + } +} + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct Bbox { + // a bounding box around an object + xmin: f32, + ymin: f32, + width: f32, + height: f32, + id: usize, + confidence: f32, +} + +impl Bbox { + pub fn new_from_xywh(xmin: f32, ymin: f32, width: f32, height: f32) -> Self { + Self { + xmin, + ymin, + width, + height, + ..Default::default() + } + } + + pub fn new(xmin: f32, ymin: f32, width: f32, height: f32, id: usize, confidence: f32) -> Self { + Self { + xmin, + ymin, + width, + height, + id, + confidence, + } + } + + pub fn width(&self) -> f32 { + self.width + } + + pub fn height(&self) -> f32 { + self.height + } + + pub fn xmin(&self) -> f32 { + self.xmin + } + + pub fn ymin(&self) -> f32 { + self.ymin + } + + pub fn xmax(&self) -> f32 { + self.xmin + self.width + } + + pub fn ymax(&self) -> f32 { + self.ymin + self.height + } + + pub fn tl(&self) -> Point2 { + Point2::new(self.xmin, self.ymin) + } + + pub fn br(&self) -> Point2 { + Point2::new(self.xmax(), self.ymax()) + } + + pub fn cxcy(&self) -> Point2 { + Point2::new(self.xmin + self.width / 2., self.ymin + self.height / 2.) + } + + pub fn id(&self) -> usize { + self.id + } + + pub fn confidence(&self) -> f32 { + self.confidence + } + + pub fn area(&self) -> f32 { + self.width * self.height + } + + pub fn intersection_area(&self, another: &Bbox) -> f32 { + let l = self.xmin.max(another.xmin); + let r = (self.xmin + self.width).min(another.xmin + another.width); + let t = self.ymin.max(another.ymin); + let b = (self.ymin + self.height).min(another.ymin + another.height); + (r - l + 1.).max(0.) * (b - t + 1.).max(0.) + } + + pub fn union(&self, another: &Bbox) -> f32 { + self.area() + another.area() - self.intersection_area(another) + } + + pub fn iou(&self, another: &Bbox) -> f32 { + self.intersection_area(another) / self.union(another) + } +} diff --git a/examples/YOLOv8-ONNXRuntime/README.md b/examples/YOLOv8-ONNXRuntime/README.md new file mode 100644 index 0000000..b206b2e --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime/README.md @@ -0,0 +1,43 @@ +# YOLOv8 - ONNX Runtime + +This project implements YOLOv8 using ONNX Runtime. + +## Installation + +To run this project, you need to install the required dependencies. The following instructions will guide you through the installation process. + +### Installing Required Dependencies + +You can install the required dependencies by running the following command: + +```bash +pip install -r requirements.txt +``` + +### Installing `onnxruntime-gpu` + +If you have an NVIDIA GPU and want to leverage GPU acceleration, you can install the onnxruntime-gpu package using the following command: + +```bash +pip install onnxruntime-gpu +``` + +Note: Make sure you have the appropriate GPU drivers installed on your system. + +### Installing `onnxruntime` (CPU version) + +If you don't have an NVIDIA GPU or prefer to use the CPU version of onnxruntime, you can install the onnxruntime package using the following command: + +```bash +pip install onnxruntime +``` + +### Usage + +After successfully installing the required packages, you can run the YOLOv8 implementation using the following command: + +```bash +python main.py --model yolov8n.onnx --img image.jpg --conf-thres 0.5 --iou-thres 0.5 +``` + +Make sure to replace yolov8n.onnx with the path to your YOLOv8 ONNX model file, image.jpg with the path to your input image, and adjust the confidence threshold (conf-thres) and IoU threshold (iou-thres) values as needed. diff --git a/examples/YOLOv8-ONNXRuntime/main.py b/examples/YOLOv8-ONNXRuntime/main.py new file mode 100644 index 0000000..ec76871 --- /dev/null +++ b/examples/YOLOv8-ONNXRuntime/main.py @@ -0,0 +1,228 @@ +import argparse + +import cv2 +import numpy as np +import onnxruntime as ort +import torch + +from ultralytics.utils import ASSETS, yaml_load +from ultralytics.utils.checks import check_requirements, check_yaml + + +class YOLOv8: + """YOLOv8 object detection model class for handling inference and visualization.""" + + def __init__(self, onnx_model, input_image, confidence_thres, iou_thres): + """ + Initializes an instance of the YOLOv8 class. + + Args: + onnx_model: Path to the ONNX model. + input_image: Path to the input image. + confidence_thres: Confidence threshold for filtering detections. + iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression. + """ + self.onnx_model = onnx_model + self.input_image = input_image + self.confidence_thres = confidence_thres + self.iou_thres = iou_thres + + # Load the class names from the COCO dataset + self.classes = yaml_load(check_yaml('coco128.yaml'))['names'] + + # Generate a color palette for the classes + self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) + + def draw_detections(self, img, box, score, class_id): + """ + Draws bounding boxes and labels on the input image based on the detected objects. + + Args: + img: The input image to draw detections on. + box: Detected bounding box. + score: Corresponding detection score. + class_id: Class ID for the detected object. + + Returns: + None + """ + + # Extract the coordinates of the bounding box + x1, y1, w, h = box + + # Retrieve the color for the class ID + color = self.color_palette[class_id] + + # Draw the bounding box on the image + cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2) + + # Create the label text with class name and score + label = f'{self.classes[class_id]}: {score:.2f}' + + # Calculate the dimensions of the label text + (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + + # Calculate the position of the label text + label_x = x1 + label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 + + # Draw a filled rectangle as the background for the label text + cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, + cv2.FILLED) + + # Draw the label text on the image + cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) + + def preprocess(self): + """ + Preprocesses the input image before performing inference. + + Returns: + image_data: Preprocessed image data ready for inference. + """ + # Read the input image using OpenCV + self.img = cv2.imread(self.input_image) + + # Get the height and width of the input image + self.img_height, self.img_width = self.img.shape[:2] + + # Convert the image color space from BGR to RGB + img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) + + # Resize the image to match the input shape + img = cv2.resize(img, (self.input_width, self.input_height)) + + # Normalize the image data by dividing it by 255.0 + image_data = np.array(img) / 255.0 + + # Transpose the image to have the channel dimension as the first dimension + image_data = np.transpose(image_data, (2, 0, 1)) # Channel first + + # Expand the dimensions of the image data to match the expected input shape + image_data = np.expand_dims(image_data, axis=0).astype(np.float32) + + # Return the preprocessed image data + return image_data + + def postprocess(self, input_image, output): + """ + Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs. + + Args: + input_image (numpy.ndarray): The input image. + output (numpy.ndarray): The output of the model. + + Returns: + numpy.ndarray: The input image with detections drawn on it. + """ + + # Transpose and squeeze the output to match the expected shape + outputs = np.transpose(np.squeeze(output[0])) + + # Get the number of rows in the outputs array + rows = outputs.shape[0] + + # Lists to store the bounding boxes, scores, and class IDs of the detections + boxes = [] + scores = [] + class_ids = [] + + # Calculate the scaling factors for the bounding box coordinates + x_factor = self.img_width / self.input_width + y_factor = self.img_height / self.input_height + + # Iterate over each row in the outputs array + for i in range(rows): + # Extract the class scores from the current row + classes_scores = outputs[i][4:] + + # Find the maximum score among the class scores + max_score = np.amax(classes_scores) + + # If the maximum score is above the confidence threshold + if max_score >= self.confidence_thres: + # Get the class ID with the highest score + class_id = np.argmax(classes_scores) + + # Extract the bounding box coordinates from the current row + x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3] + + # Calculate the scaled coordinates of the bounding box + left = int((x - w / 2) * x_factor) + top = int((y - h / 2) * y_factor) + width = int(w * x_factor) + height = int(h * y_factor) + + # Add the class ID, score, and box coordinates to the respective lists + class_ids.append(class_id) + scores.append(max_score) + boxes.append([left, top, width, height]) + + # Apply non-maximum suppression to filter out overlapping bounding boxes + indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres) + + # Iterate over the selected indices after non-maximum suppression + for i in indices: + # Get the box, score, and class ID corresponding to the index + box = boxes[i] + score = scores[i] + class_id = class_ids[i] + + # Draw the detection on the input image + self.draw_detections(input_image, box, score, class_id) + + # Return the modified input image + return input_image + + def main(self): + """ + Performs inference using an ONNX model and returns the output image with drawn detections. + + Returns: + output_img: The output image with drawn detections. + """ + # Create an inference session using the ONNX model and specify execution providers + session = ort.InferenceSession(self.onnx_model, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) + + # Get the model inputs + model_inputs = session.get_inputs() + + # Store the shape of the input for later use + input_shape = model_inputs[0].shape + self.input_width = input_shape[2] + self.input_height = input_shape[3] + + # Preprocess the image data + img_data = self.preprocess() + + # Run inference using the preprocessed image data + outputs = session.run(None, {model_inputs[0].name: img_data}) + + # Perform post-processing on the outputs to obtain output image. + return self.postprocess(self.img, outputs) # output image + + +if __name__ == '__main__': + # Create an argument parser to handle command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument('--model', type=str, default='yolov8n.onnx', help='Input your ONNX model.') + parser.add_argument('--img', type=str, default=str(ASSETS / 'bus.jpg'), help='Path to input image.') + parser.add_argument('--conf-thres', type=float, default=0.5, help='Confidence threshold') + parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold') + args = parser.parse_args() + + # Check the requirements and select the appropriate backend (CPU or GPU) + check_requirements('onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime') + + # Create an instance of the YOLOv8 class with the specified arguments + detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres) + + # Perform object detection and obtain the output image + output_image = detection.main() + + # Display the output image in a window + cv2.namedWindow('Output', cv2.WINDOW_NORMAL) + cv2.imshow('Output', output_image) + + # Wait for a key press to exit + cv2.waitKey(0) diff --git a/examples/YOLOv8-OpenCV-ONNX-Python/README.md b/examples/YOLOv8-OpenCV-ONNX-Python/README.md new file mode 100644 index 0000000..c9076fa --- /dev/null +++ b/examples/YOLOv8-OpenCV-ONNX-Python/README.md @@ -0,0 +1,19 @@ +# YOLOv8 - OpenCV + +Implementation YOLOv8 on OpenCV using ONNX Format. + +Just simply clone and run + +```bash +pip install -r requirements.txt +python main.py --model yolov8n.onnx --img image.jpg +``` + +If you start from scratch: + +```bash +pip install ultralytics +yolo export model=yolov8n.pt imgsz=640 format=onnx opset=12 +``` + +_\*Make sure to include "opset=12"_ diff --git a/examples/YOLOv8-OpenCV-ONNX-Python/main.py b/examples/YOLOv8-OpenCV-ONNX-Python/main.py new file mode 100644 index 0000000..78b0b08 --- /dev/null +++ b/examples/YOLOv8-OpenCV-ONNX-Python/main.py @@ -0,0 +1,117 @@ +import argparse + +import cv2.dnn +import numpy as np + +from ultralytics.utils import ASSETS, yaml_load +from ultralytics.utils.checks import check_yaml + +CLASSES = yaml_load(check_yaml('coco128.yaml'))['names'] +colors = np.random.uniform(0, 255, size=(len(CLASSES), 3)) + + +def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h): + """ + Draws bounding boxes on the input image based on the provided arguments. + + Args: + img (numpy.ndarray): The input image to draw the bounding box on. + class_id (int): Class ID of the detected object. + confidence (float): Confidence score of the detected object. + x (int): X-coordinate of the top-left corner of the bounding box. + y (int): Y-coordinate of the top-left corner of the bounding box. + x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box. + y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box. + """ + label = f'{CLASSES[class_id]} ({confidence:.2f})' + color = colors[class_id] + cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2) + cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) + + +def main(onnx_model, input_image): + """ + Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image. + + Args: + onnx_model (str): Path to the ONNX model. + input_image (str): Path to the input image. + + Returns: + list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc. + """ + # Load the ONNX model + model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model) + + # Read the input image + original_image: np.ndarray = cv2.imread(input_image) + [height, width, _] = original_image.shape + + # Prepare a square image for inference + length = max((height, width)) + image = np.zeros((length, length, 3), np.uint8) + image[0:height, 0:width] = original_image + + # Calculate scale factor + scale = length / 640 + + # Preprocess the image and prepare blob for model + blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True) + model.setInput(blob) + + # Perform inference + outputs = model.forward() + + # Prepare output array + outputs = np.array([cv2.transpose(outputs[0])]) + rows = outputs.shape[1] + + boxes = [] + scores = [] + class_ids = [] + + # Iterate through output to collect bounding boxes, confidence scores, and class IDs + for i in range(rows): + classes_scores = outputs[0][i][4:] + (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores) + if maxScore >= 0.25: + box = [ + outputs[0][i][0] - (0.5 * outputs[0][i][2]), outputs[0][i][1] - (0.5 * outputs[0][i][3]), + outputs[0][i][2], outputs[0][i][3]] + boxes.append(box) + scores.append(maxScore) + class_ids.append(maxClassIndex) + + # Apply NMS (Non-maximum suppression) + result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5) + + detections = [] + + # Iterate through NMS results to draw bounding boxes and labels + for i in range(len(result_boxes)): + index = result_boxes[i] + box = boxes[index] + detection = { + 'class_id': class_ids[index], + 'class_name': CLASSES[class_ids[index]], + 'confidence': scores[index], + 'box': box, + 'scale': scale} + detections.append(detection) + draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale), + round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale)) + + # Display the image with bounding boxes + cv2.imshow('image', original_image) + cv2.waitKey(0) + cv2.destroyAllWindows() + + return detections + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--model', default='yolov8n.onnx', help='Input your ONNX model.') + parser.add_argument('--img', default=str(ASSETS / 'bus.jpg'), help='Path to input image.') + args = parser.parse_args() + main(args.model, args.img) diff --git a/examples/YOLOv8-Region-Counter/readme.md b/examples/YOLOv8-Region-Counter/readme.md new file mode 100644 index 0000000..2acf0a5 --- /dev/null +++ b/examples/YOLOv8-Region-Counter/readme.md @@ -0,0 +1,123 @@ +# Regions Counting Using YOLOv8 (Inference on Video) + +- Region counting is a method employed to tally the objects within a specified area, allowing for more sophisticated analyses when multiple regions are considered. These regions can be adjusted interactively using a Left Mouse Click, and the counting process occurs in real time. +- Regions can be adjusted to suit the user's preferences and requirements. + +
+

+ YOLOv8 region counting visual 1 + YOLOv8 region counting visual 2 +

+
+ +## Table of Contents + +- [Step 1: Install the Required Libraries](#step-1-install-the-required-libraries) +- [Step 2: Run the Region Counting Using Ultralytics YOLOv8](#step-2-run-the-region-counting-using-ultralytics-yolov8) +- [Usage Options](#usage-options) +- [FAQ](#faq) + +## Step 1: Install the Required Libraries + +Clone the repository, install dependencies and `cd` to this local directory for commands in Step 2. + +```bash +# Clone ultralytics repo +git clone https://github.com/ultralytics/ultralytics + +# cd to local directory +cd ultralytics/examples/YOLOv8-Region-Counter +``` + +## Step 2: Run the Region Counting Using Ultralytics YOLOv8 + +Here are the basic commands for running the inference: + +### Note + +After the video begins playing, you can freely move the region anywhere within the video by simply clicking and dragging using the left mouse button. + +```bash +# If you want to save results +python yolov8_region_counter.py --source "path/to/video.mp4" --save-img --view-img + +# If you want to run model on CPU +python yolov8_region_counter.py --source "path/to/video.mp4" --save-img --view-img --device cpu + +# If you want to change model file +python yolov8_region_counter.py --source "path/to/video.mp4" --save-img --weights "path/to/model.pt" + +# If you want to detect specific class (first class and third class) +python yolov8_region_counter.py --source "path/to/video.mp4" --classes 0 2 --weights "path/to/model.pt" + +# If you dont want to save results +python yolov8_region_counter.py --source "path/to/video.mp4" --view-img +``` + +## Usage Options + +- `--source`: Specifies the path to the video file you want to run inference on. +- `--device`: Specifies the device `cpu` or `0` +- `--save-img`: Flag to save the detection results as images. +- `--weights`: Specifies a different YOLOv8 model file (e.g., `yolov8n.pt`, `yolov8s.pt`, `yolov8m.pt`, `yolov8l.pt`, `yolov8x.pt`). +- `--classes`: Specifies the class to be detected +- `--line-thickness`: Specifies the bounding box thickness +- `--region-thickness`: Specifies the region boxes thickness +- `--track-thickness`: Specifies the track line thickness + +## FAQ + +**1. What Does Region Counting Involve?** + +Region counting is a computational method utilized to ascertain the quantity of objects within a specific area in recorded video or real-time streams. This technique finds frequent application in image processing, computer vision, and pattern recognition, facilitating the analysis and segmentation of objects or features based on their spatial relationships. + +**2. Is Friendly Region Plotting Supported by the Region Counter?** + +The Region Counter offers the capability to create regions in various formats, such as polygons and rectangles. You have the flexibility to modify region attributes, including coordinates, colors, and other details, as demonstrated in the following code: + +```python +from shapely.geometry import Polygon + +counting_regions = [ + { + "name": "YOLOv8 Polygon Region", + "polygon": Polygon( + [(50, 80), (250, 20), (450, 80), (400, 350), (100, 350)] + ), # Polygon with five points (Pentagon) + "counts": 0, + "dragging": False, + "region_color": (255, 42, 4), # BGR Value + "text_color": (255, 255, 255), # Region Text Color + }, + { + "name": "YOLOv8 Rectangle Region", + "polygon": Polygon( + [(200, 250), (440, 250), (440, 550), (200, 550)] + ), # Rectangle with four points + "counts": 0, + "dragging": False, + "region_color": (37, 255, 225), # BGR Value + "text_color": (0, 0, 0), # Region Text Color + }, +] +``` + +**3. Why Combine Region Counting with YOLOv8?** + +YOLOv8 specializes in the detection and tracking of objects in video streams. Region counting complements this by enabling object counting within designated areas, making it a valuable application of YOLOv8. + +**4. How Can I Troubleshoot Issues?** + +To gain more insights during inference, you can include the `--debug` flag in your command: + +```bash +python yolov8_region_counter.py --source "path to video file" --debug +``` + +**5. Can I Employ Other YOLO Versions?** + +Certainly, you have the flexibility to specify different YOLO model weights using the `--weights` option. + +**6. Where Can I Access Additional Information?** + +For a comprehensive guide on using YOLOv8 with Object Tracking, please refer to [Multi-Object Tracking with Ultralytics YOLO](https://docs.ultralytics.com/modes/track/). diff --git a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py new file mode 100644 index 0000000..5379fd3 --- /dev/null +++ b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py @@ -0,0 +1,218 @@ +import argparse +from collections import defaultdict +from pathlib import Path + +import cv2 +import numpy as np +from shapely.geometry import Polygon +from shapely.geometry.point import Point + +from ultralytics import YOLO +from ultralytics.utils.files import increment_path +from ultralytics.utils.plotting import Annotator, colors + +track_history = defaultdict(list) + +current_region = None +counting_regions = [ + { + 'name': 'YOLOv8 Polygon Region', + 'polygon': Polygon([(50, 80), (250, 20), (450, 80), (400, 350), (100, 350)]), # Polygon points + 'counts': 0, + 'dragging': False, + 'region_color': (255, 42, 4), # BGR Value + 'text_color': (255, 255, 255) # Region Text Color + }, + { + 'name': 'YOLOv8 Rectangle Region', + 'polygon': Polygon([(200, 250), (440, 250), (440, 550), (200, 550)]), # Polygon points + 'counts': 0, + 'dragging': False, + 'region_color': (37, 255, 225), # BGR Value + 'text_color': (0, 0, 0), # Region Text Color + }, ] + + +def mouse_callback(event, x, y, flags, param): + """Mouse call back event.""" + global current_region + + # Mouse left button down event + if event == cv2.EVENT_LBUTTONDOWN: + for region in counting_regions: + if region['polygon'].contains(Point((x, y))): + current_region = region + current_region['dragging'] = True + current_region['offset_x'] = x + current_region['offset_y'] = y + + # Mouse move event + elif event == cv2.EVENT_MOUSEMOVE: + if current_region is not None and current_region['dragging']: + dx = x - current_region['offset_x'] + dy = y - current_region['offset_y'] + current_region['polygon'] = Polygon([ + (p[0] + dx, p[1] + dy) for p in current_region['polygon'].exterior.coords]) + current_region['offset_x'] = x + current_region['offset_y'] = y + + # Mouse left button up event + elif event == cv2.EVENT_LBUTTONUP: + if current_region is not None and current_region['dragging']: + current_region['dragging'] = False + + +def run( + weights='yolov8n.pt', + source=None, + device='cpu', + view_img=False, + save_img=False, + exist_ok=False, + classes=None, + line_thickness=2, + track_thickness=2, + region_thickness=2, +): + """ + Run Region counting on a video using YOLOv8 and ByteTrack. + + Supports movable region for real time counting inside specific area. + Supports multiple regions counting. + Regions can be Polygons or rectangle in shape + + Args: + weights (str): Model weights path. + source (str): Video file path. + device (str): processing device cpu, 0, 1 + view_img (bool): Show results. + save_img (bool): Save results. + exist_ok (bool): Overwrite existing files. + classes (list): classes to detect and track + line_thickness (int): Bounding box thickness. + track_thickness (int): Tracking line thickness + region_thickness (int): Region thickness. + """ + vid_frame_count = 0 + + # Check source path + if not Path(source).exists(): + raise FileNotFoundError(f"Source path '{source}' does not exist.") + + # Setup Model + model = YOLO(f'{weights}') + model.to('cuda') if device == '0' else model.to('cpu') + + # Extract classes names + names = model.model.names + + # Video setup + videocapture = cv2.VideoCapture(source) + frame_width, frame_height = int(videocapture.get(3)), int(videocapture.get(4)) + fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*'mp4v') + + # Output setup + save_dir = increment_path(Path('ultralytics_rc_output') / 'exp', exist_ok) + save_dir.mkdir(parents=True, exist_ok=True) + video_writer = cv2.VideoWriter(str(save_dir / f'{Path(source).stem}.mp4'), fourcc, fps, (frame_width, frame_height)) + + # Iterate over video frames + while videocapture.isOpened(): + success, frame = videocapture.read() + if not success: + break + vid_frame_count += 1 + + # Extract the results + results = model.track(frame, persist=True, classes=classes) + + if results[0].boxes.id is not None: + boxes = results[0].boxes.xyxy.cpu() + track_ids = results[0].boxes.id.int().cpu().tolist() + clss = results[0].boxes.cls.cpu().tolist() + + annotator = Annotator(frame, line_width=line_thickness, example=str(names)) + + for box, track_id, cls in zip(boxes, track_ids, clss): + annotator.box_label(box, str(names[cls]), color=colors(cls, True)) + bbox_center = (box[0] + box[2]) / 2, (box[1] + box[3]) / 2 # Bbox center + + track = track_history[track_id] # Tracking Lines plot + track.append((float(bbox_center[0]), float(bbox_center[1]))) + if len(track) > 30: + track.pop(0) + points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines(frame, [points], isClosed=False, color=colors(cls, True), thickness=track_thickness) + + # Check if detection inside region + for region in counting_regions: + if region['polygon'].contains(Point((bbox_center[0], bbox_center[1]))): + region['counts'] += 1 + + # Draw regions (Polygons/Rectangles) + for region in counting_regions: + region_label = str(region['counts']) + region_color = region['region_color'] + region_text_color = region['text_color'] + + polygon_coords = np.array(region['polygon'].exterior.coords, dtype=np.int32) + centroid_x, centroid_y = int(region['polygon'].centroid.x), int(region['polygon'].centroid.y) + + text_size, _ = cv2.getTextSize(region_label, + cv2.FONT_HERSHEY_SIMPLEX, + fontScale=0.7, + thickness=line_thickness) + text_x = centroid_x - text_size[0] // 2 + text_y = centroid_y + text_size[1] // 2 + cv2.rectangle(frame, (text_x - 5, text_y - text_size[1] - 5), (text_x + text_size[0] + 5, text_y + 5), + region_color, -1) + cv2.putText(frame, region_label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, region_text_color, + line_thickness) + cv2.polylines(frame, [polygon_coords], isClosed=True, color=region_color, thickness=region_thickness) + + if view_img: + if vid_frame_count == 1: + cv2.namedWindow('Ultralytics YOLOv8 Region Counter Movable') + cv2.setMouseCallback('Ultralytics YOLOv8 Region Counter Movable', mouse_callback) + cv2.imshow('Ultralytics YOLOv8 Region Counter Movable', frame) + + if save_img: + video_writer.write(frame) + + for region in counting_regions: # Reinitialize count for each region + region['counts'] = 0 + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + del vid_frame_count + video_writer.release() + videocapture.release() + cv2.destroyAllWindows() + + +def parse_opt(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default='yolov8n.pt', help='initial weights path') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--source', type=str, required=True, help='video file path') + parser.add_argument('--view-img', action='store_true', help='show results') + parser.add_argument('--save-img', action='store_true', help='save results') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') + parser.add_argument('--line-thickness', type=int, default=2, help='bounding box thickness') + parser.add_argument('--track-thickness', type=int, default=2, help='Tracking line thickness') + parser.add_argument('--region-thickness', type=int, default=4, help='Region thickness') + + return parser.parse_args() + + +def main(opt): + """Main function.""" + run(**vars(opt)) + + +if __name__ == '__main__': + opt = parse_opt() + main(opt) diff --git a/examples/YOLOv8-SAHI-Inference-Video/readme.md b/examples/YOLOv8-SAHI-Inference-Video/readme.md new file mode 100644 index 0000000..f24df30 --- /dev/null +++ b/examples/YOLOv8-SAHI-Inference-Video/readme.md @@ -0,0 +1,69 @@ +# YOLOv8 with SAHI (Inference on Video) + +[SAHI](https://docs.ultralytics.com/guides/sahi-tiled-inference/) is designed to optimize object detection algorithms for large-scale and high-resolution imagery. It partitions images into manageable slices, performs object detection on each slice, and then stitches the results back together. This tutorial will guide you through the process of running YOLOv8 inference on video files with the aid of SAHI. + +## Table of Contents + +- [Step 1: Install the Required Libraries](#step-1-install-the-required-libraries) +- [Step 2: Run the Inference with SAHI using Ultralytics YOLOv8](#step-2-run-the-inference-with-sahi-using-ultralytics-yolov8) +- [Usage Options](#usage-options) +- [FAQ](#faq) + +## Step 1: Install the Required Libraries + +Clone the repository, install dependencies and `cd` to this local directory for commands in Step 2. + +```bash +# Clone ultralytics repo +git clone https://github.com/ultralytics/ultralytics + +# Install dependencies +pip install sahi ultralytics + +# cd to local directory +cd ultralytics/examples/YOLOv8-SAHI-Inference-Video +``` + +## Step 2: Run the Inference with SAHI using Ultralytics YOLOv8 + +Here are the basic commands for running the inference: + +```bash +#if you want to save results +python yolov8_sahi.py --source "path/to/video.mp4" --save-img + +#if you want to change model file +python yolov8_sahi.py --source "path/to/video.mp4" --save-img --weights "yolov8n.pt" +``` + +## Usage Options + +- `--source`: Specifies the path to the video file you want to run inference on. +- `--save-img`: Flag to save the detection results as images. +- `--weights`: Specifies a different YOLOv8 model file (e.g., `yolov8n.pt`, `yolov8s.pt`, `yolov8m.pt`, `yolov8l.pt`, `yolov8x.pt`). + +## FAQ + +**1. What is SAHI?** + +SAHI stands for Slicing, Analysis, and Healing of Images. It is a library designed to optimize object detection algorithms for large-scale and high-resolution images. The library source code is available on [GitHub](https://github.com/obss/sahi). + +**2. Why use SAHI with YOLOv8?** + +SAHI can handle large-scale images by slicing them into smaller, more manageable sizes without compromising the detection quality. This makes it a great companion to YOLOv8, especially when working with high-resolution videos. + +**3. How do I debug issues?** + +You can add the `--debug` flag to your command to print out more information during inference: + +```bash +python yolov8_sahi.py --source "path to video file" --debug +``` + +**4. Can I use other YOLO versions?** + +Yes, you can specify different YOLO model weights using the `--weights` option. + +**5. Where can I find more information?** + +For a full guide to YOLOv8 with SAHI see [https://docs.ultralytics.com/guides/sahi-tiled-inference](https://docs.ultralytics.com/guides/sahi-tiled-inference/). diff --git a/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py b/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py new file mode 100644 index 0000000..7ab8441 --- /dev/null +++ b/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py @@ -0,0 +1,111 @@ +import argparse +from pathlib import Path + +import cv2 +from sahi import AutoDetectionModel +from sahi.predict import get_sliced_prediction +from sahi.utils.yolov8 import download_yolov8s_model + +from ultralytics.utils.files import increment_path + + +def run(weights='yolov8n.pt', source='test.mp4', view_img=False, save_img=False, exist_ok=False): + """ + Run object detection on a video using YOLOv8 and SAHI. + + Args: + weights (str): Model weights path. + source (str): Video file path. + view_img (bool): Show results. + save_img (bool): Save results. + exist_ok (bool): Overwrite existing files. + """ + + # Check source path + if not Path(source).exists(): + raise FileNotFoundError(f"Source path '{source}' does not exist.") + + yolov8_model_path = f'models/{weights}' + download_yolov8s_model(yolov8_model_path) + detection_model = AutoDetectionModel.from_pretrained(model_type='yolov8', + model_path=yolov8_model_path, + confidence_threshold=0.3, + device='cpu') + + # Video setup + videocapture = cv2.VideoCapture(source) + frame_width, frame_height = int(videocapture.get(3)), int(videocapture.get(4)) + fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*'mp4v') + + # Output setup + save_dir = increment_path(Path('ultralytics_results_with_sahi') / 'exp', exist_ok) + save_dir.mkdir(parents=True, exist_ok=True) + video_writer = cv2.VideoWriter(str(save_dir / f'{Path(source).stem}.mp4'), fourcc, fps, (frame_width, frame_height)) + + while videocapture.isOpened(): + success, frame = videocapture.read() + if not success: + break + + results = get_sliced_prediction(frame, + detection_model, + slice_height=512, + slice_width=512, + overlap_height_ratio=0.2, + overlap_width_ratio=0.2) + object_prediction_list = results.object_prediction_list + + boxes_list = [] + clss_list = [] + for ind, _ in enumerate(object_prediction_list): + boxes = object_prediction_list[ind].bbox.minx, object_prediction_list[ind].bbox.miny, \ + object_prediction_list[ind].bbox.maxx, object_prediction_list[ind].bbox.maxy + clss = object_prediction_list[ind].category.name + boxes_list.append(boxes) + clss_list.append(clss) + + for box, cls in zip(boxes_list, clss_list): + x1, y1, x2, y2 = box + cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (56, 56, 255), 2) + label = str(cls) + t_size = cv2.getTextSize(label, 0, fontScale=0.6, thickness=1)[0] + cv2.rectangle(frame, (int(x1), int(y1) - t_size[1] - 3), (int(x1) + t_size[0], int(y1) + 3), (56, 56, 255), + -1) + cv2.putText(frame, + label, (int(x1), int(y1) - 2), + 0, + 0.6, [255, 255, 255], + thickness=1, + lineType=cv2.LINE_AA) + + if view_img: + cv2.imshow(Path(source).stem, frame) + if save_img: + video_writer.write(frame) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + video_writer.release() + videocapture.release() + cv2.destroyAllWindows() + + +def parse_opt(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default='yolov8n.pt', help='initial weights path') + parser.add_argument('--source', type=str, required=True, help='video file path') + parser.add_argument('--view-img', action='store_true', help='show results') + parser.add_argument('--save-img', action='store_true', help='save results') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + return parser.parse_args() + + +def main(opt): + """Main function.""" + run(**vars(opt)) + + +if __name__ == '__main__': + opt = parse_opt() + main(opt) diff --git a/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md new file mode 100644 index 0000000..9327f1f --- /dev/null +++ b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/README.md @@ -0,0 +1,63 @@ +# YOLOv8-Segmentation-ONNXRuntime-Python Demo + +This repository provides a Python demo for performing segmentation with YOLOv8 using ONNX Runtime, highlighting the interoperability of YOLOv8 models without the need for the full PyTorch stack. + +## Features + +- **Framework Agnostic**: Runs segmentation inference purely on ONNX Runtime without importing PyTorch. +- **Efficient Inference**: Supports both FP32 and FP16 precision for ONNX models, catering to different computational needs. +- **Ease of Use**: Utilizes simple command-line arguments for model execution. +- **Broad Compatibility**: Leverages Numpy and OpenCV for image processing, ensuring broad compatibility with various environments. + +## Installation + +Install the required packages using pip. You will need `ultralytics` for exporting YOLOv8-seg ONNX model and using some utility functions, `onnxruntime-gpu` for GPU-accelerated inference, and `opencv-python` for image processing. + +```bash +pip install ultralytics +pip install onnxruntime-gpu # For GPU support +# pip install onnxruntime # Use this instead if you don't have an NVIDIA GPU +pip install numpy +pip install opencv-python +``` + +## Getting Started + +### 1. Export the YOLOv8 ONNX Model + +Export the YOLOv8 segmentation model to ONNX format using the provided `ultralytics` package. + +```bash +yolo export model=yolov8s-seg.pt imgsz=640 format=onnx opset=12 simplify +``` + +### 2. Run Inference + +Perform inference with the exported ONNX model on your images. + +```bash +python main.py --model-path --source +``` + +### Example Output + +After running the command, you should see segmentation results similar to this: + +Segmentation Demo + +## Advanced Usage + +For more advanced usage, including real-time video processing, please refer to the `main.py` script's command-line arguments. + +## Contributing + +We welcome contributions to improve this demo! Please submit issues and pull requests for bug reports, feature requests, or submitting a new algorithm enhancement. + +## License + +This project is licensed under the AGPL-3.0 License - see the [LICENSE](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) file for details. + +## Acknowledgments + +- The YOLOv8-Segmentation-ONNXRuntime-Python demo is contributed by GitHub user [jamjamjon](https://github.com/jamjamjon). +- Thanks to the ONNX Runtime community for providing a robust and efficient inference engine. diff --git a/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py new file mode 100644 index 0000000..b13eab3 --- /dev/null +++ b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py @@ -0,0 +1,321 @@ +import argparse + +import cv2 +import numpy as np +import onnxruntime as ort + +from ultralytics.utils import ASSETS, yaml_load +from ultralytics.utils.checks import check_yaml +from ultralytics.utils.plotting import Colors + + +class YOLOv8Seg: + """YOLOv8 segmentation model.""" + + def __init__(self, onnx_model): + """ + Initialization. + + Args: + onnx_model (str): Path to the ONNX model. + """ + + # Build Ort session + self.session = ort.InferenceSession(onnx_model, + providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] + if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) + + # Numpy dtype: support both FP32 and FP16 onnx model + self.ndtype = np.half if self.session.get_inputs()[0].type == 'tensor(float16)' else np.single + + # Get model width and height(YOLOv8-seg only has one input) + self.model_height, self.model_width = [x.shape for x in self.session.get_inputs()][0][-2:] + + # Load COCO class names + self.classes = yaml_load(check_yaml('coco128.yaml'))['names'] + + # Create color palette + self.color_palette = Colors() + + def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45, nm=32): + """ + The whole pipeline: pre-process -> inference -> post-process. + + Args: + im0 (Numpy.ndarray): original input image. + conf_threshold (float): confidence threshold for filtering predictions. + iou_threshold (float): iou threshold for NMS. + nm (int): the number of masks. + + Returns: + boxes (List): list of bounding boxes. + segments (List): list of segments. + masks (np.ndarray): [N, H, W], output masks. + """ + + # Pre-process + im, ratio, (pad_w, pad_h) = self.preprocess(im0) + + # Ort inference + preds = self.session.run(None, {self.session.get_inputs()[0].name: im}) + + # Post-process + boxes, segments, masks = self.postprocess(preds, + im0=im0, + ratio=ratio, + pad_w=pad_w, + pad_h=pad_h, + conf_threshold=conf_threshold, + iou_threshold=iou_threshold, + nm=nm) + return boxes, segments, masks + + def preprocess(self, img): + """ + Pre-processes the input image. + + Args: + img (Numpy.ndarray): image about to be processed. + + Returns: + img_process (Numpy.ndarray): image preprocessed for inference. + ratio (tuple): width, height ratios in letterbox. + pad_w (float): width padding in letterbox. + pad_h (float): height padding in letterbox. + """ + + # Resize and pad input image using letterbox() (Borrowed from Ultralytics) + shape = img.shape[:2] # original image shape + new_shape = (self.model_height, self.model_width) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + ratio = r, r + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) + left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) + + # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) + img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 + img_process = img[None] if len(img.shape) == 3 else img + return img_process, ratio, (pad_w, pad_h) + + def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold, nm=32): + """ + Post-process the prediction. + + Args: + preds (Numpy.ndarray): predictions come from ort.session.run(). + im0 (Numpy.ndarray): [h, w, c] original input image. + ratio (tuple): width, height ratios in letterbox. + pad_w (float): width padding in letterbox. + pad_h (float): height padding in letterbox. + conf_threshold (float): conf threshold. + iou_threshold (float): iou threshold. + nm (int): the number of masks. + + Returns: + boxes (List): list of bounding boxes. + segments (List): list of segments. + masks (np.ndarray): [N, H, W], output masks. + """ + x, protos = preds[0], preds[1] # Two outputs: predictions and protos + + # Transpose the first output: (Batch_size, xywh_conf_cls_nm, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls_nm) + x = np.einsum('bcn->bnc', x) + + # Predictions filtering by conf-threshold + x = x[np.amax(x[..., 4:-nm], axis=-1) > conf_threshold] + + # Create a new matrix which merge these(box, score, cls, nm) into one + # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html + x = np.c_[x[..., :4], np.amax(x[..., 4:-nm], axis=-1), np.argmax(x[..., 4:-nm], axis=-1), x[..., -nm:]] + + # NMS filtering + x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] + + # Decode and return + if len(x) > 0: + + # Bounding boxes format change: cxcywh -> xyxy + x[..., [0, 1]] -= x[..., [2, 3]] / 2 + x[..., [2, 3]] += x[..., [0, 1]] + + # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image + x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] + x[..., :4] /= min(ratio) + + # Bounding boxes boundary clamp + x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) + x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) + + # Process masks + masks = self.process_mask(protos[0], x[:, 6:], x[:, :4], im0.shape) + + # Masks -> Segments(contours) + segments = self.masks2segments(masks) + return x[..., :6], segments, masks # boxes, segments, masks + else: + return [], [], [] + + @staticmethod + def masks2segments(masks): + """ + It takes a list of masks(n,h,w) and returns a list of segments(n,xy) (Borrowed from + https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L750) + + Args: + masks (numpy.ndarray): the output of the model, which is a tensor of shape (batch_size, 160, 160). + + Returns: + segments (List): list of segment masks. + """ + segments = [] + for x in masks.astype('uint8'): + c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # CHAIN_APPROX_SIMPLE + if c: + c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) + else: + c = np.zeros((0, 2)) # no segments found + segments.append(c.astype('float32')) + return segments + + @staticmethod + def crop_mask(masks, boxes): + """ + It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box. (Borrowed from + https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L599) + + Args: + masks (Numpy.ndarray): [n, h, w] tensor of masks. + boxes (Numpy.ndarray): [n, 4] tensor of bbox coordinates in relative point form. + + Returns: + (Numpy.ndarray): The masks are being cropped to the bounding box. + """ + n, h, w = masks.shape + x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1) + r = np.arange(w, dtype=x1.dtype)[None, None, :] + c = np.arange(h, dtype=x1.dtype)[None, :, None] + return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) + + def process_mask(self, protos, masks_in, bboxes, im0_shape): + """ + Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality + but is slower. (Borrowed from https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L618) + + Args: + protos (numpy.ndarray): [mask_dim, mask_h, mask_w]. + masks_in (numpy.ndarray): [n, mask_dim], n is number of masks after nms. + bboxes (numpy.ndarray): bboxes re-scaled to original image shape. + im0_shape (tuple): the size of the input image (h,w,c). + + Returns: + (numpy.ndarray): The upsampled masks. + """ + c, mh, mw = protos.shape + masks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0) # HWN + masks = np.ascontiguousarray(masks) + masks = self.scale_mask(masks, im0_shape) # re-scale mask from P3 shape to original input image shape + masks = np.einsum('HWN -> NHW', masks) # HWN -> NHW + masks = self.crop_mask(masks, bboxes) + return np.greater(masks, 0.5) + + @staticmethod + def scale_mask(masks, im0_shape, ratio_pad=None): + """ + Takes a mask, and resizes it to the original image size. (Borrowed from + https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L305) + + Args: + masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3]. + im0_shape (tuple): the original image shape. + ratio_pad (tuple): the ratio of the padding to the original image. + + Returns: + masks (np.ndarray): The masks that are being returned. + """ + im1_shape = masks.shape[:2] + if ratio_pad is None: # calculate from im0_shape + gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new + pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding + else: + pad = ratio_pad[1] + + # Calculate tlbr of mask + top, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1)) # y, x + bottom, right = int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1)) + if len(masks.shape) < 2: + raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') + masks = masks[top:bottom, left:right] + masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]), + interpolation=cv2.INTER_LINEAR) # INTER_CUBIC would be better + if len(masks.shape) == 2: + masks = masks[:, :, None] + return masks + + def draw_and_visualize(self, im, bboxes, segments, vis=False, save=True): + """ + Draw and visualize results. + + Args: + im (np.ndarray): original image, shape [h, w, c]. + bboxes (numpy.ndarray): [n, 4], n is number of bboxes. + segments (List): list of segment masks. + vis (bool): imshow using OpenCV. + save (bool): save image annotated. + + Returns: + None + """ + + # Draw rectangles and polygons + im_canvas = im.copy() + for (*box, conf, cls_), segment in zip(bboxes, segments): + # draw contour and fill mask + cv2.polylines(im, np.int32([segment]), True, (255, 255, 255), 2) # white borderline + cv2.fillPoly(im_canvas, np.int32([segment]), self.color_palette(int(cls_), bgr=True)) + + # draw bbox rectangle + cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), + self.color_palette(int(cls_), bgr=True), 1, cv2.LINE_AA) + cv2.putText(im, f'{self.classes[cls_]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette(int(cls_), bgr=True), 2, cv2.LINE_AA) + + # Mix image + im = cv2.addWeighted(im_canvas, 0.3, im, 0.7, 0) + + # Show image + if vis: + cv2.imshow('demo', im) + cv2.waitKey(0) + cv2.destroyAllWindows() + + # Save image + if save: + cv2.imwrite('demo.jpg', im) + + +if __name__ == '__main__': + # Create an argument parser to handle command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument('--model', type=str, required=True, help='Path to ONNX model') + parser.add_argument('--source', type=str, default=str(ASSETS / 'bus.jpg'), help='Path to input image') + parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') + parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') + args = parser.parse_args() + + # Build model + model = YOLOv8Seg(args.model) + + # Read image by OpenCV + img = cv2.imread(args.source) + + # Inference + boxes, segments, _ = model(img, conf_threshold=args.conf, iou_threshold=args.iou) + + # Draw bboxes and polygons + if len(boxes) > 0: + model.draw_and_visualize(img, boxes, segments, vis=False, save=True) diff --git a/examples/hub.ipynb b/examples/hub.ipynb new file mode 100644 index 0000000..5d8be2a --- /dev/null +++ b/examples/hub.ipynb @@ -0,0 +1,106 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Ultralytics HUB", + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FIzICjaph_Wy" + }, + "source": [ + "\n", + "\n", + "\n", + "
\n", + "\n", + "[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [हिन्दी](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)\n", + "\n", + " \n", + " \"CI\n", + " \n", + " \"Open\n", + "\n", + "Welcome to the [Ultralytics](https://ultralytics.com/) HUB notebook!\n", + "\n", + "This notebook allows you to train [YOLOv5](https://github.com/ultralytics/yolov5) and [YOLOv8](https://github.com/ultralytics/ultralytics) 🚀 models using [HUB](https://hub.ultralytics.com/). Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eRQ2ow94MiOv" + }, + "source": [ + "# Setup\n", + "\n", + "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) and check software and hardware." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FyDnXd-n4c7Y", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "01e34b44-a26f-4dbc-a5a1-6e29bca01a1b" + }, + "source": [ + "%pip install ultralytics # install\n", + "from ultralytics import YOLO, checks, hub\n", + "checks() # checks" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Ultralytics YOLOv8.0.210 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)\n", + "Setup complete ✅ (2 CPUs, 12.7 GB RAM, 24.4/78.2 GB disk)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cQ9BwaAqxAm4" + }, + "source": [ + "# Start\n", + "\n", + "Login with your [API key](https://hub.ultralytics.com/settings?tab=api+keys), select your YOLO 🚀 model and start training!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XSlZaJ9Iw_iZ" + }, + "source": [ + "hub.login('API_KEY') # use your API key\n", + "\n", + "model = YOLO('https://hub.ultralytics.com/MODEL_ID') # use your model URL\n", + "results = model.train() # train model" + ], + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb new file mode 100644 index 0000000..d3bbafe --- /dev/null +++ b/examples/tutorial.ipynb @@ -0,0 +1,616 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "YOLOv8 Tutorial", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "t6MPjfT5NrKQ" + }, + "source": [ + "
\n", + "\n", + " \n", + " \n", + "\n", + " [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [हिन्दी](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)\n", + "\n", + " \"Run\n", + " \"Open\n", + " \"Open\n", + "\n", + "Welcome to the Ultralytics YOLOv8 🚀 notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n", + "\n", + "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n", + "\n", + "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", + "\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7mGmQbAO5pQb" + }, + "source": [ + "# Setup\n", + "\n", + "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) and check software and hardware." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "wbvMlHd_QwMG", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "27ca383c-0a97-4679-f1c5-ba843f033de7" + }, + "source": [ + "%pip install ultralytics\n", + "import ultralytics\n", + "ultralytics.checks()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Ultralytics YOLOv8.0.145 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)\n", + "Setup complete ✅ (2 CPUs, 12.7 GB RAM, 24.2/78.2 GB disk)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4JnkELT0cIJg" + }, + "source": [ + "# 1. Predict\n", + "\n", + "YOLOv8 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) and other details in the [YOLOv8 Predict Docs](https://docs.ultralytics.com/modes/train/).\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "zR9ZbuQCH7FX", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "64489d1f-e71a-44b5-92f6-2088781ca096" + }, + "source": [ + "# Run inference on an image with YOLOv8n\n", + "!yolo predict model=yolov8n.pt source='https://ultralytics.com/images/zidane.jpg'" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to 'yolov8n.pt'...\n", + "100% 6.23M/6.23M [00:00<00:00, 77.2MB/s]\n", + "Ultralytics YOLOv8.0.145 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)\n", + "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n", + "\n", + "Downloading https://ultralytics.com/images/zidane.jpg to 'zidane.jpg'...\n", + "100% 165k/165k [00:00<00:00, 7.46MB/s]\n", + "image 1/1 /content/zidane.jpg: 384x640 2 persons, 1 tie, 365.8ms\n", + "Speed: 13.7ms preprocess, 365.8ms inference, 431.7ms postprocess per image at shape (1, 3, 384, 640)\n", + "Results saved to \u001b[1mruns/detect/predict\u001b[0m\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hkAzDWJ7cWTr" + }, + "source": [ + "        \n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0eq1SMWl6Sfn" + }, + "source": [ + "# 2. Val\n", + "Validate a model's accuracy on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset's `val` or `test` splits. The latest YOLOv8 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used. See [YOLOv8 Val Docs](https://docs.ultralytics.com/modes/val/) for more information." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WQPtK1QYVaD_" + }, + "source": [ + "# Download COCO val\n", + "import torch\n", + "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017val.zip', 'tmp.zip') # download (780M - 5000 images)\n", + "!unzip -q tmp.zip -d datasets && rm tmp.zip # unzip" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "X58w8JLpMnjH", + "outputId": "e3aacd98-ceca-49b7-e112-a0c25979ad6c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "# Validate YOLOv8n on COCO8 val\n", + "!yolo val model=yolov8n.pt data=coco8.yaml" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Ultralytics YOLOv8.0.145 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)\n", + "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n", + "\n", + "Dataset 'coco8.yaml' images not found ⚠️, missing path '/content/datasets/coco8/images/val'\n", + "Downloading https://ultralytics.com/assets/coco8.zip to '/content/datasets/coco8.zip'...\n", + "100% 433k/433k [00:00<00:00, 12.4MB/s]\n", + "Unzipping /content/datasets/coco8.zip to /content/datasets...\n", + "Dataset download success ✅ (0.7s), saved to \u001b[1m/content/datasets\u001b[0m\n", + "\n", + "Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...\n", + "100% 755k/755k [00:00<00:00, 17.5MB/s]\n", + "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco8/labels/val... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<00:00, 276.04it/s]\n", + "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: /content/datasets/coco8/labels/val.cache\n", + " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:03<00:00, 3.84s/it]\n", + " all 4 17 0.621 0.833 0.888 0.63\n", + " person 4 10 0.721 0.5 0.519 0.269\n", + " dog 4 1 0.37 1 0.995 0.597\n", + " horse 4 2 0.751 1 0.995 0.631\n", + " elephant 4 2 0.505 0.5 0.828 0.394\n", + " umbrella 4 1 0.564 1 0.995 0.995\n", + " potted plant 4 1 0.814 1 0.995 0.895\n", + "Speed: 0.3ms preprocess, 78.7ms inference, 0.0ms loss, 65.4ms postprocess per image\n", + "Results saved to \u001b[1mruns/detect/val\u001b[0m\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZY2VXXXu74w5" + }, + "source": [ + "# 3. Train\n", + "\n", + "

\n", + "\n", + "Train YOLOv8 on [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/) datasets. See [YOLOv8 Train Docs](https://docs.ultralytics.com/modes/train/) for more information." + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Select YOLOv8 🚀 logger {run: 'auto'}\n", + "logger = 'Comet' #@param ['Comet', 'TensorBoard']\n", + "\n", + "if logger == 'Comet':\n", + " %pip install -q comet_ml\n", + " import comet_ml; comet_ml.init()\n", + "elif logger == 'TensorBoard':\n", + " %load_ext tensorboard\n", + " %tensorboard --logdir ." + ], + "metadata": { + "id": "ktegpM42AooT" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "1NcFxRcFdJ_O", + "outputId": "b750f2fe-c4d9-4764-b8d5-ed7bd920697b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "# Train YOLOv8n on COCO8 for 3 epochs\n", + "!yolo train model=yolov8n.pt data=coco8.yaml epochs=3 imgsz=640" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Ultralytics YOLOv8.0.145 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)\n", + "\u001b[34m\u001b[1mengine/trainer: \u001b[0mtask=detect, mode=train, model=yolov8n.pt, data=coco8.yaml, epochs=3, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/train\n", + "\n", + " from n params module arguments \n", + " 0 -1 1 464 ultralytics.nn.modules.conv.Conv [3, 16, 3, 2] \n", + " 1 -1 1 4672 ultralytics.nn.modules.conv.Conv [16, 32, 3, 2] \n", + " 2 -1 1 7360 ultralytics.nn.modules.block.C2f [32, 32, 1, True] \n", + " 3 -1 1 18560 ultralytics.nn.modules.conv.Conv [32, 64, 3, 2] \n", + " 4 -1 2 49664 ultralytics.nn.modules.block.C2f [64, 64, 2, True] \n", + " 5 -1 1 73984 ultralytics.nn.modules.conv.Conv [64, 128, 3, 2] \n", + " 6 -1 2 197632 ultralytics.nn.modules.block.C2f [128, 128, 2, True] \n", + " 7 -1 1 295424 ultralytics.nn.modules.conv.Conv [128, 256, 3, 2] \n", + " 8 -1 1 460288 ultralytics.nn.modules.block.C2f [256, 256, 1, True] \n", + " 9 -1 1 164608 ultralytics.nn.modules.block.SPPF [256, 256, 5] \n", + " 10 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", + " 11 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", + " 12 -1 1 148224 ultralytics.nn.modules.block.C2f [384, 128, 1] \n", + " 13 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", + " 14 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", + " 15 -1 1 37248 ultralytics.nn.modules.block.C2f [192, 64, 1] \n", + " 16 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] \n", + " 17 [-1, 12] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", + " 18 -1 1 123648 ultralytics.nn.modules.block.C2f [192, 128, 1] \n", + " 19 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] \n", + " 20 [-1, 9] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", + " 21 -1 1 493056 ultralytics.nn.modules.block.C2f [384, 256, 1] \n", + " 22 [15, 18, 21] 1 897664 ultralytics.nn.modules.head.Detect [80, [64, 128, 256]] \n", + "Model summary: 225 layers, 3157200 parameters, 3157184 gradients\n", + "\n", + "Transferred 355/355 items from pretrained weights\n", + "\u001b[34m\u001b[1mTensorBoard: \u001b[0mStart with 'tensorboard --logdir runs/detect/train', view at http://localhost:6006/\n", + "\u001b[34m\u001b[1mAMP: \u001b[0mrunning Automatic Mixed Precision (AMP) checks with YOLOv8n...\n", + "\u001b[34m\u001b[1mAMP: \u001b[0mchecks passed ✅\n", + "\u001b[34m\u001b[1mtrain: \u001b[0mScanning /content/datasets/coco8/labels/train... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<00:00, 860.11it/s]\n", + "\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: /content/datasets/coco8/labels/train.cache\n", + "\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))\n", + "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco8/labels/val.cache... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00\n" + ], + "metadata": { + "id": "Phm9ccmOKye5" + } + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Detection\n", + "\n", + "YOLOv8 _detection_ models have no suffix and are the default YOLOv8 models, i.e. `yolov8n.pt` and are pretrained on COCO. See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for full details.\n" + ], + "metadata": { + "id": "yq26lwpYK1lq" + } + }, + { + "cell_type": "code", + "source": [ + "# Load YOLOv8n, train it on COCO128 for 3 epochs and predict an image with it\n", + "from ultralytics import YOLO\n", + "\n", + "model = YOLO('yolov8n.pt') # load a pretrained YOLOv8n detection model\n", + "model.train(data='coco128.yaml', epochs=3) # train the model\n", + "model('https://ultralytics.com/images/bus.jpg') # predict on an image" + ], + "metadata": { + "id": "8Go5qqS9LbC5" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Segmentation\n", + "\n", + "YOLOv8 _segmentation_ models use the `-seg` suffix, i.e. `yolov8n-seg.pt` and are pretrained on COCO. See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for full details.\n" + ], + "metadata": { + "id": "7ZW58jUzK66B" + } + }, + { + "cell_type": "code", + "source": [ + "# Load YOLOv8n-seg, train it on COCO128-seg for 3 epochs and predict an image with it\n", + "from ultralytics import YOLO\n", + "\n", + "model = YOLO('yolov8n-seg.pt') # load a pretrained YOLOv8n segmentation model\n", + "model.train(data='coco128-seg.yaml', epochs=3) # train the model\n", + "model('https://ultralytics.com/images/bus.jpg') # predict on an image" + ], + "metadata": { + "id": "WFPJIQl_L5HT" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Classification\n", + "\n", + "YOLOv8 _classification_ models use the `-cls` suffix, i.e. `yolov8n-cls.pt` and are pretrained on ImageNet. See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for full details.\n" + ], + "metadata": { + "id": "ax3p94VNK9zR" + } + }, + { + "cell_type": "code", + "source": [ + "# Load YOLOv8n-cls, train it on mnist160 for 3 epochs and predict an image with it\n", + "from ultralytics import YOLO\n", + "\n", + "model = YOLO('yolov8n-cls.pt') # load a pretrained YOLOv8n classification model\n", + "model.train(data='mnist160', epochs=3) # train the model\n", + "model('https://ultralytics.com/images/bus.jpg') # predict on an image" + ], + "metadata": { + "id": "5q9Zu6zlL5rS" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Pose\n", + "\n", + "YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt` and are pretrained on COCO Keypoints. See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for full details." + ], + "metadata": { + "id": "SpIaFLiO11TG" + } + }, + { + "cell_type": "code", + "source": [ + "# Load YOLOv8n-pose, train it on COCO8-pose for 3 epochs and predict an image with it\n", + "from ultralytics import YOLO\n", + "\n", + "model = YOLO('yolov8n-pose.pt') # load a pretrained YOLOv8n classification model\n", + "model.train(data='coco8-pose.yaml', epochs=3) # train the model\n", + "model('https://ultralytics.com/images/bus.jpg') # predict on an image" + ], + "metadata": { + "id": "si4aKFNg19vX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IEijrePND_2I" + }, + "source": [ + "# Appendix\n", + "\n", + "Additional content below." + ] + }, + { + "cell_type": "code", + "source": [ + "# Pip install from source\n", + "!pip install git+https://github.com/ultralytics/ultralytics@main" + ], + "metadata": { + "id": "pIdE6i8C3LYp" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Git clone and run tests on updates branch\n", + "!git clone https://github.com/ultralytics/ultralytics -b main\n", + "%pip install -qe ultralytics" + ], + "metadata": { + "id": "uRKlwxSJdhd1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Run tests (Git clone only)\n", + "!pytest ultralytics/tests" + ], + "metadata": { + "id": "GtPlh7mcCGZX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Validate multiple models\n", + "for x in 'nsmlx':\n", + " !yolo val model=yolov8{x}.pt data=coco.yaml" + ], + "metadata": { + "id": "Wdc6t_bfzDDk" + }, + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/fps01.py b/fps01.py new file mode 100644 index 0000000..59c9963 --- /dev/null +++ b/fps01.py @@ -0,0 +1,144 @@ +#解决重叠框问题,并调试人头框,可显示多人头,但是人头框闪烁 + +import os +import cv2 +import numpy as np +from collections import deque +from ultralytics import YOLO + +# YOLOv8模型路径 +model_path = r'detect\train\weights\best.pt' +model = YOLO(model_path) + +# 动态检测参数 +tracking_window_size = 250 # 10秒对应的帧数 +center_history = deque(maxlen=tracking_window_size) +tracker = None # 跟踪器变量 +tracking_initialized = False + +# 定义头部中心点的容忍范围(像素) +tolerance_radius = 20 # 你可以根据实际需要调整 +detection_interval = 10 # 目标检测的帧间隔 + + +def infer_and_draw_video(video_path, output_folder): + # 打开RTSP视频流 + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + print("错误:无法打开视频流。") + return + + # 获取视频属性 + fps = int(cap.get(cv2.CAP_PROP_FPS)) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + # 创建视频写入对象 + output_video_path = os.path.join(output_folder, 'output_video.mp4') + fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 使用mp4v编解码器 + out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height)) + + global tracking_initialized + global center_history + global tracker + + frame_counter = 0 # 帧计数器 + + while True: + ret, frame = cap.read() + if not ret: + print("视频流读取结束或出错。") + break + + frame_counter += 1 + + filtered_boxes = [] # 确保在进入目标检测逻辑之前初始化 + + if frame_counter % detection_interval == 0: # 每一定帧数进行目标检测 + # 使用YOLOv8进行目标检测 + results = model(frame) + detected_boxes = [] + if results: + for result in results: + if result.boxes is not None and len(result.boxes.xyxy) > 0: + boxes = result.boxes.xyxy.cpu().numpy() + confidences = result.boxes.conf.cpu().numpy() + + for i, box in enumerate(boxes): + x1, y1, x2, y2 = map(int, box[:4]) + conf = confidences[i] if len(confidences) > i else 0.0 + detected_boxes.append((x1, y1, x2, y2, conf)) + + filtered_boxes = filter_and_merge_boxes(detected_boxes) + + if not tracking_initialized and filtered_boxes: + # 选择置信度最高的目标进行跟踪 + filtered_boxes = sorted(filtered_boxes, key=lambda b: b[4], reverse=True) + x1, y1, x2, y2, _ = filtered_boxes[0] + tracking_bbox = (x1, y1, x2 - x1, y2 - y1) + tracker = cv2.TrackerCSRT_create() + tracker.init(frame, tracking_bbox) + tracking_initialized = True + elif tracking_initialized: + # 更新跟踪器 + success, bbox = tracker.update(frame) + if success: + x, y, w, h = map(int, bbox) + center = (x + w // 2, y + h // 2) + center_history.append(center) + + # 检查中心点是否稳定在容忍范围内 + if len(center_history) == tracking_window_size: + initial_center = center_history[0] + stable = all( + np.linalg.norm(np.array(center) - np.array(initial_center)) <= tolerance_radius for + center in center_history) + if stable: + cv2.putText(frame, "SLEEP", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + tracking_initialized = False # 重新初始化跟踪器 + + # 使用image检测框的替代逻辑 + for box in filtered_boxes: + x1, y1, x2, y2, _ = box + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + + # 写入处理后的帧 + out.write(frame) + + cap.release() + out.release() + cv2.destroyAllWindows() + print(f"已保存带注释的视频到: {output_video_path}") + + +def filter_and_merge_boxes(boxes): + filtered_boxes = [] + threshold = 0.5 # IOU阈值 + + def iou(box1, box2): + x1, y1, x2, y2 = box1 + x1_, y1_, x2_, y2_ = box2 + ix1, iy1 = max(x1, x1_), max(y1, y1_) + ix2, iy2 = min(x2, x2_), min(y2, y2_) + iw = max(ix2 - ix1 + 1, 0) + ih = max(iy2 - iy1 + 1, 0) + inter = iw * ih + ua = (x2 - x1 + 1) * (y2 - y1 + 1) + (x2_ - x1_ + 1) * (y2_ - y1_) - inter + return inter / ua + + for i, box1 in enumerate(boxes): + keep = True + for j, box2 in enumerate(filtered_boxes): + if iou(box1[:4], box2[:4]) > threshold: + keep = False + break + if keep: + filtered_boxes.append(box1) + + return filtered_boxes + + +# 使用实际视频路径进行推理,并指定输出文件夹 +infer_and_draw_video(r'视频路径', + r'输出路径') diff --git a/fps02.py b/fps02.py new file mode 100644 index 0000000..507ca8e --- /dev/null +++ b/fps02.py @@ -0,0 +1,150 @@ +#增加中心点并解决人头框闪烁问题,但目前依旧只能检索一人,下一步需要解决多目标问题 + +import os +import cv2 +import numpy as np +from collections import deque +from ultralytics import YOLO + +# YOLOv8模型路径 +model_path = r'detect\train\weights\best.pt' +model = YOLO(model_path) + +# 动态检测参数 +tracking_window_size = 250 # 10秒对应的帧数 +center_history = deque(maxlen=tracking_window_size) +tracker = None # 跟踪器变量 +tracking_initialized = False + +# 定义头部中心点的容忍范围(像素) +tolerance_radius = 20 # 你可以根据实际需要调整 +detection_interval = 10 # 目标检测的帧间隔 + +def infer_and_draw_video(video_path, output_folder): + # 打开RTSP视频流 + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + print("错误:无法打开视频流。") + return + + # 获取视频属性 + fps = int(cap.get(cv2.CAP_PROP_FPS)) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + # 创建视频写入对象 + output_video_path = os.path.join(output_folder, 'output_video.mp4') + fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 使用mp4v编解码器 + out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height)) + + global tracking_initialized + global center_history + global tracker + + frame_counter = 0 # 帧计数器 + last_detection_boxes = [] # 上一帧检测到的框 + + while True: + ret, frame = cap.read() + if not ret: + print("视频流读取结束或出错。") + break + + frame_counter += 1 + + filtered_boxes = [] # 确保在进入目标检测逻辑之前初始化 + + if frame_counter % detection_interval == 0: # 每一定帧数进行目标检测 + # 使用YOLOv8进行目标检测 + results = model(frame) + detected_boxes = [] + if results: + for result in results: + if result.boxes is not None and len(result.boxes.xyxy) > 0: + boxes = result.boxes.xyxy.cpu().numpy() + confidences = result.boxes.conf.cpu().numpy() + + for i, box in enumerate(boxes): + x1, y1, x2, y2 = map(int, box[:4]) + conf = confidences[i] if len(confidences) > i else 0.0 + detected_boxes.append((x1, y1, x2, y2, conf)) + + filtered_boxes = filter_and_merge_boxes(detected_boxes) + + if filtered_boxes: + # 选择置信度最高的目标进行跟踪 + filtered_boxes = sorted(filtered_boxes, key=lambda b: b[4], reverse=True) + x1, y1, x2, y2, _ = filtered_boxes[0] + tracking_bbox = (x1, y1, x2 - x1, y2 - y1) + tracker = cv2.TrackerCSRT_create() + tracker.init(frame, tracking_bbox) + tracking_initialized = True + last_detection_boxes = filtered_boxes + else: + # 如果没有检测到框,则重置跟踪器 + tracking_initialized = False + elif tracking_initialized: + # 更新跟踪器 + success, bbox = tracker.update(frame) + if success: + x, y, w, h = map(int, bbox) + center = (x + w // 2, y + h // 2) + center_history.append(center) + cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) + cv2.circle(frame, center, 5, (255, 0, 0), -1) + + # 检查中心点是否稳定在容忍范围内 + if len(center_history) == tracking_window_size: + initial_center = center_history[0] + stable = all(np.linalg.norm(np.array(center) - np.array(initial_center)) <= tolerance_radius for center in center_history) + if stable: + cv2.putText(frame, "SLEEP", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + # 跟踪失败,重置跟踪器 + tracking_initialized = False + + # 如果跟踪失败但有检测到的目标框,使用检测到的目标框 + if not tracking_initialized and last_detection_boxes: + for box in last_detection_boxes: + x1, y1, x2, y2, _ = box + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + center = (x1 + (x2 - x1) // 2, y1 + (y2 - y1) // 2) + cv2.circle(frame, center, 5, (255, 0, 0), -1) + + # 写入处理后的帧 + out.write(frame) + + cap.release() + out.release() + cv2.destroyAllWindows() + print(f"已保存带注释的视频到: {output_video_path}") + +def filter_and_merge_boxes(boxes): + filtered_boxes = [] + threshold = 0.5 # IOU阈值 + + def iou(box1, box2): + x1, y1, x2, y2 = box1 + x1_, y1_, x2_, y2_ = box2 + ix1, iy1 = max(x1, x1_), max(y1, y1_) + ix2, iy2 = min(x2, x2_), min(y2, y2_) + iw = max(ix2 - ix1 + 1, 0) + ih = max(iy2 - iy1 + 1, 0) + inter = iw * ih + ua = (x2 - x1 + 1) * (y2 - y1 + 1) + (x2_ - x1_ + 1) * (y2_ - y1_) - inter + return inter / ua + + for i, box1 in enumerate(boxes): + keep = True + for j, box2 in enumerate(filtered_boxes): + if iou(box1[:4], box2[:4]) > threshold: + keep = False + break + if keep: + filtered_boxes.append(box1) + + return filtered_boxes + +# 使用实际视频路径进行推理,并指定输出文件夹 +infer_and_draw_video(r'视频路径', r'输出路径') +#infer_and_draw_video(r'摄像头网络串流', r'输出路径') \ No newline at end of file diff --git a/fps03.py b/fps03.py new file mode 100644 index 0000000..17474d0 --- /dev/null +++ b/fps03.py @@ -0,0 +1,151 @@ +#解决多人头问题,但是cpu处理速度跟不上,只能运行一小段时间 + + +import os +import cv2 +import numpy as np +from collections import deque +from ultralytics import YOLO + +# YOLOv8模型路径 +model_path = r'detect\train\weights\best.pt' +model = YOLO(model_path) + +# 动态检测参数 +tracking_window_size = 250 # 10秒对应的帧数 +center_history = deque(maxlen=tracking_window_size) +tracking_initialized = False +tracker_list = [] # 跟踪器列表 + +# 定义头部中心点的容忍范围(像素) +tolerance_radius = 20 # 你可以根据实际需要调整 +detection_interval = 10 # 目标检测的帧间隔 + +def infer_and_draw_video(video_path, output_folder): + global tracking_initialized + global center_history + global tracker_list + + # 打开RTSP视频流 + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + print("错误:无法打开视频流。") + return + + # 获取视频属性 + fps = int(cap.get(cv2.CAP_PROP_FPS)) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + # 创建视频写入对象 + output_video_path = os.path.join(output_folder, 'output_video.mp4') + fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 使用mp4v编解码器 + out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height)) + + frame_counter = 0 # 帧计数器 + last_detection_boxes = [] # 上一帧检测到的框 + + while True: + ret, frame = cap.read() + if not ret: + print("视频流读取结束或出错。") + break + + frame_counter += 1 + + if frame_counter % detection_interval == 0: # 每一定帧数进行目标检测 + # 使用YOLOv8进行目标检测 + results = model(frame) + detected_boxes = [] + if results: + for result in results: + if result.boxes is not None and len(result.boxes.xyxy) > 0: + boxes = result.boxes.xyxy.cpu().numpy() + confidences = result.boxes.conf.cpu().numpy() + + for i, box in enumerate(boxes): + x1, y1, x2, y2 = map(int, box[:4]) + conf = confidences[i] if len(confidences) > i else 0.0 + detected_boxes.append((x1, y1, x2, y2, conf)) + + filtered_boxes = filter_and_merge_boxes(detected_boxes) + + if filtered_boxes: + # 对每个检测到的目标初始化一个跟踪器 + tracker_list = [cv2.TrackerCSRT_create() for _ in filtered_boxes] + for i, box in enumerate(filtered_boxes): + x1, y1, x2, y2, _ = box + tracking_bbox = (x1, y1, x2 - x1, y2 - y1) + tracker_list[i].init(frame, tracking_bbox) + last_detection_boxes = filtered_boxes + tracking_initialized = True + else: + # 如果没有检测到框,则重置跟踪器 + tracking_initialized = False + elif tracking_initialized: + # 更新所有跟踪器 + for tracker in tracker_list: + success, bbox = tracker.update(frame) + if success: + x, y, w, h = map(int, bbox) + center = (x + w // 2, y + h // 2) + center_history.append(center) + cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) + cv2.circle(frame, center, 5, (255, 0, 0), -1) + + # 检查中心点是否稳定在容忍范围内 + if len(center_history) == tracking_window_size: + initial_center = center_history[0] + stable = all(np.linalg.norm(np.array(center) - np.array(initial_center)) <= tolerance_radius for center in center_history) + if stable: + cv2.putText(frame, "SLEEP", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + # 跟踪失败,重置跟踪器 + tracking_initialized = False + tracker_list = [] + + # 如果跟踪失败但有检测到的目标框,显示检测框 + if not tracking_initialized and last_detection_boxes: + for box in last_detection_boxes: + x1, y1, x2, y2, _ = box + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + center = (x1 + (x2 - x1) // 2, y1 + (y2 - y1) // 2) + cv2.circle(frame, center, 5, (255, 0, 0), -1) + + # 写入处理后的帧 + out.write(frame) + + cap.release() + out.release() + cv2.destroyAllWindows() + print(f"已保存带注释的视频到: {output_video_path}") + +def filter_and_merge_boxes(boxes): + filtered_boxes = [] + threshold = 0.5 # IOU阈值 + + def iou(box1, box2): + x1, y1, x2, y2 = box1 + x1_, y1_, x2_, y2_ = box2 + ix1, iy1 = max(x1, x1_), max(y1, y1_) + ix2, iy2 = min(x2, x2_), min(y2, y2_) + iw = max(ix2 - ix1 + 1, 0) + ih = max(iy2 - iy1 + 1, 0) + inter = iw * ih + ua = (x2 - x1 + 1) * (y2 - y1 + 1) + (x2_ - x1_ + 1) * (y2_ - y1_) - inter + return inter / ua + + for i, box1 in enumerate(boxes): + keep = True + for j, box2 in enumerate(filtered_boxes): + if iou(box1[:4], box2[:4]) > threshold: + keep = False + break + if keep: + filtered_boxes.append(box1) + + return filtered_boxes + +# 使用实际视频路径进行推理,并指定输出文件夹 +infer_and_draw_video(r'视频路径', r'输出路径') +#infer_and_draw_video(r'摄像头网络串流', r'输出路径') diff --git a/fps04.py b/fps04.py new file mode 100644 index 0000000..f43e9e8 --- /dev/null +++ b/fps04.py @@ -0,0 +1,125 @@ +#优化了多人头代码,目前可以持续运作了 + +import os +import cv2 +import numpy as np +from collections import deque + +from fps02 import filter_and_merge_boxes +from fps03 import tolerance_radius +from ultralytics import YOLO +from concurrent.futures import ThreadPoolExecutor + +# YOLOv8模型路径 +model_path = r'etect\train\weights\best.pt' +model = YOLO(model_path) + +# 动态检测参数 +tracking_window_size = 250 +center_history = deque(maxlen=tracking_window_size) +tracking_initialized = False +tracker_list = [] +detection_interval = 10 +new_width = 640 + +def infer_and_draw_video(video_path, output_folder): + global tracking_initialized + global center_history + global tracker_list + + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + print("错误:无法打开视频流。") + return + + fps = int(cap.get(cv2.CAP_PROP_FPS)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + # 设置新的分辨率 + cap.set(cv2.CAP_PROP_FRAME_WIDTH, new_width) + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, int((new_width / cap.get(cv2.CAP_PROP_FRAME_WIDTH)) * height)) + + output_video_path = os.path.join(output_folder, 'output_video.mp4') + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out = cv2.VideoWriter(output_video_path, fourcc, fps, (new_width, int((new_width / cap.get(cv2.CAP_PROP_FRAME_WIDTH)) * height))) + + frame_counter = 0 + last_detection_boxes = [] + + def process_frame(frame, tracking_initialized=None): + nonlocal frame_counter + frame_counter += 1 + + if frame_counter % detection_interval == 0: + results = model(frame) + detected_boxes = [] + if results: + for result in results: + if result.boxes is not None and len(result.boxes.xyxy) > 0: + boxes = result.boxes.xyxy.cpu().numpy() + confidences = result.boxes.conf.cpu().numpy() + for i, box in enumerate(boxes): + x1, y1, x2, y2 = map(int, box[:4]) + conf = confidences[i] if len(confidences) > i else 0.0 + detected_boxes.append((x1, y1, x2, y2, conf)) + + filtered_boxes = filter_and_merge_boxes(detected_boxes) + print(f"检测到的框:{filtered_boxes}") + + if filtered_boxes: + tracker_list[:] = [cv2.TrackerKCF_create() for _ in filtered_boxes] + for i, box in enumerate(filtered_boxes): + x1, y1, x2, y2, _ = box + tracking_bbox = (x1, y1, x2 - x1, y2 - y1) + tracker_list[i].init(frame, tracking_bbox) + last_detection_boxes[:] = filtered_boxes + tracking_initialized = True + else: + tracking_initialized = False + elif tracking_initialized: + for tracker in tracker_list: + success, bbox = tracker.update(frame) + if success: + x, y, w, h = map(int, bbox) + center = (x + w // 2, y + h // 2) + center_history.append(center) + cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) + cv2.circle(frame, center, 5, (255, 0, 0), -1) + + if len(center_history) == tracking_window_size: + initial_center = center_history[0] + stable = all(np.linalg.norm(np.array(center) - np.array(initial_center)) <= tolerance_radius for center in center_history) + if stable: + cv2.putText(frame, "SLEEP", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + tracking_initialized = False + tracker_list.clear() + + if not tracking_initialized and last_detection_boxes: + for box in last_detection_boxes: + x1, y1, x2, y2, _ = box + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + center = (x1 + (x2 - x1) // 2, y1 + (y2 - y1) // 2) + cv2.circle(frame, center, 5, (255, 0, 0), -1) + + return frame + + # 使用多线程处理视频流 + with ThreadPoolExecutor(max_workers=2) as executor: + while True: + ret, frame = cap.read() + if not ret: + print("视频流读取结束或出错。") + break + + processed_frame = executor.submit(process_frame, frame).result() + out.write(processed_frame) + + cap.release() + out.release() + cv2.destroyAllWindows() + print(f"已保存带注释的视频到: {output_video_path}") + +# 调用函数 +#infer_and_draw_video(r'视频路径', r'输出路径') +infer_and_draw_video(r'网络串流', r'输出路径') diff --git a/huafen.py b/huafen.py new file mode 100644 index 0000000..43f4fc6 --- /dev/null +++ b/huafen.py @@ -0,0 +1,65 @@ +import os +import random +import shutil + +# 设置数据集路径 +dataset_path = r"替换成自己数据集的路径地址" +images_path = os.path.join(dataset_path, "images") +labels_path = os.path.join(dataset_path, "labels") + +# 创建训练集和验证集目录 +train_path = os.path.join(dataset_path, "train") +val_path = os.path.join(dataset_path, "val") +os.makedirs(train_path, exist_ok=True) +os.makedirs(val_path, exist_ok=True) + +# 获取所有图像和标签文件的路径 +images = os.listdir(images_path) +labels = os.listdir(labels_path) + +# 确保图像和标签文件数量一致 +assert len(images) == len(labels) + +# 将文件名列表随机排序 +random.shuffle(images) + +# 计算训练集和验证集的数量 +train_size = int(0.8 * len(images)) +val_size = len(images) - train_size + +# 创建训练集和验证集目录 +train_path = os.path.join(dataset_path, "train") +train_images_path = os.path.join(train_path, "images") +train_labels_path = os.path.join(train_path, "labels") +os.makedirs(train_images_path, exist_ok=True) +os.makedirs(train_labels_path, exist_ok=True) + +val_path = os.path.join(dataset_path, "val") +val_images_path = os.path.join(val_path, "images") +val_labels_path = os.path.join(val_path, "labels") +os.makedirs(val_images_path, exist_ok=True) +os.makedirs(val_labels_path, exist_ok=True) + +# 复制训练集图像和标签文件到train目录 +for i in range(train_size): + image_name = images[i] + label_name = image_name.replace(".jpg", ".txt") + src_image = os.path.join(images_path, image_name) + src_label = os.path.join(labels_path, label_name) + dst_image = os.path.join(train_images_path, image_name) + dst_label = os.path.join(train_labels_path, label_name) + shutil.copyfile(src_image, dst_image) + shutil.copyfile(src_label, dst_label) + +# 复制验证集图像和标签文件到val目录 +for i in range(train_size, len(images)): + image_name = images[i] + label_name = image_name.replace(".jpg", ".txt") + src_image = os.path.join(images_path, image_name) + src_label = os.path.join(labels_path, label_name) + dst_image = os.path.join(val_images_path, image_name) + dst_label = os.path.join(val_labels_path, label_name) + shutil.copyfile(src_image, dst_image) + shutil.copyfile(src_label, dst_label) + +print("数据集已成功划分为训练集和验证集!") diff --git a/phototest.py b/phototest.py new file mode 100644 index 0000000..730f1c5 --- /dev/null +++ b/phototest.py @@ -0,0 +1,106 @@ +#静态检测,图片检测 + +from ultralytics import YOLO # ultralytics是yolo模型的实现库 +import cv2 # cv2是opencv库,用于图像处理 +import os # os用于文件和目录操作 + +# 替换为实际的模型路径 +model_path = r'\detect\train\weights\best.pt' # 加载yolo模型 +model = YOLO(model_path) # model是加载后的yolo模型实例 + +def infer_and_draw(image_path, output_folder): + # 检查 image_path 是否是文件夹路径 + if os.path.isdir(image_path): + # 遍历文件夹中的所有图像文件 + for filename in os.listdir(image_path): + file_path = os.path.join(image_path, filename) + if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')): + process_image(file_path, output_folder) + else: + # 处理单张图像 + process_image(image_path, output_folder) + +def process_image(image_path, output_folder): + # 使用模型进行推理 + results = model(image_path) + original_img = cv2.imread(image_path) # 读取原始图像 + + if results: + for result in results: + if result.boxes is not None and len(result.boxes.xyxy) > 0: + boxes = result.boxes.xyxy.cpu().numpy() # 获取检测框 + classes = result.boxes.cls.cpu().numpy() # 获取类别 + confidences = result.boxes.conf.cpu().numpy() # 获取置信度 + + # 确保输出文件夹存在 + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + # 存储所有检测框的标签和位置 + detected_boxes = [] + + for i, box in enumerate(boxes): + x1, y1, x2, y2 = map(int, box[:4]) + class_id = int(classes[i]) if len(classes) > i else 0 + # 标签映射(假设0表示人头类别) + label = "head" if class_id == 0 else "unknown" + conf = confidences[i] if len(confidences) > i else 0.0 + + detected_boxes.append((x1, y1, x2, y2, label, conf)) + + # 避免重叠框的问题,通过过滤和合并框 + filtered_boxes = filter_and_merge_boxes(detected_boxes) + + # 生成最终的标注图像 + img_annotated = original_img.copy() + for (x1, y1, x2, y2, label, conf) in filtered_boxes: + cv2.rectangle(img_annotated, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.putText(img_annotated, f'{label} {conf:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + + # 保存标注后的图像 + output_image_path = os.path.join(output_folder, os.path.basename(image_path)) + cv2.imwrite(output_image_path, img_annotated) + print(f"Saved annotated image to: {output_image_path}") + + # 显示图像(可选) + cv2.imshow('Detected Image', img_annotated) + cv2.waitKey(0) + cv2.destroyAllWindows() + else: + print("No detections found.") + # 保存原始图像 + output_image_path = os.path.join(output_folder, os.path.basename(image_path)) + cv2.imwrite(output_image_path, original_img) + print(f"Saved original image to: {output_image_path}") + +def filter_and_merge_boxes(boxes): + """ + 简单地合并重叠框的逻辑。可以根据需要调整合并算法。 + """ + filtered_boxes = [] + threshold = 0.5 # 重叠阈值 + + def iou(box1, box2): + x1, y1, x2, y2 = box1 + x1_, y1_, x2_, y2_ = box2 + ix1, iy1 = max(x1, x1_), max(y1, y1_) + ix2, iy2 = min(x2, x2_), min(y2, y2_) + iw = max(ix2 - ix1 + 1, 0) + ih = max(iy2 - iy1 + 1, 0) + inter = iw * ih + ua = (x2 - x1 + 1) * (y2 - y1 + 1) + (x2_ - x1_ + 1) * (y2_ - y1_) - inter + return inter / ua + + for i, box1 in enumerate(boxes): + keep = True + for j, box2 in enumerate(filtered_boxes): + if iou(box1[:4], box2[:4]) > threshold: + keep = False + break + if keep: + filtered_boxes.append(box1) + + return filtered_boxes + +# 使用实际图像路径或文件夹路径进行推理,并指定输出文件夹 +infer_and_draw(r'\assets\xx.jpg(图片路径)', r'ultralytics\output(输出路径)') diff --git a/ptToonnx.py b/ptToonnx.py new file mode 100644 index 0000000..e36d65f --- /dev/null +++ b/ptToonnx.py @@ -0,0 +1,6 @@ +#更多功能参数介绍:https://docs.ultralytics.com/modes/export/#key-features-of-export-mode +from ultralytics import YOLO +if __name__ == '__main__': + model = YOLO('best.pt') # load a custom trained model + # Export the model + model.export(format='onnx') \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8632dd1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,46 @@ +# Ultralytics requirements +# Example: pip install -r requirements.txt + +# Base ---------------------------------------- +matplotlib>=3.3.0 +numpy>=1.22.2 # pinned by Snyk to avoid a vulnerability +opencv-python>=4.6.0 +pillow>=7.1.2 +pyyaml>=5.3.1 +requests>=2.23.0 +scipy>=1.4.1 +torch>=1.8.0 +torchvision>=0.9.0 +tqdm>=4.64.0 + +# Logging ------------------------------------- +# tensorboard>=2.13.0 +# dvclive>=2.12.0 +# clearml +# comet + +# Plotting ------------------------------------ +pandas>=1.1.4 +seaborn>=0.11.0 + +# Export -------------------------------------- +#coremltools>=7.0 # CoreML export +#onnx>=1.12.0 # ONNX export +#onnxsim>=0.4.1 # ONNX simplifier +#nvidia-pyindex # TensorRT export +#nvidia-tensorrt # TensorRT export +#scikit-learn==0.19.2 # CoreML quantization +#tensorflow>=2.4.1,<=2.13.1 # TF exports (-cpu, -aarch64, -macos) +#tflite-support +#tensorflowjs>=3.9.0 # TF.js export +#openvino-dev>=2023.0 # OpenVINO export + +# Extras -------------------------------------- +psutil # system utilization +py-cpuinfo # display CPU info +thop>=0.1.1 # FLOPs computation +# ipython # interactive notebook +# albumentations>=1.0.3 # training augmentations +# pycocotools>=2.0.6 # COCO mAP +# roboflow +chardet \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..af1ed3b --- /dev/null +++ b/setup.cfg @@ -0,0 +1,71 @@ +# Project-wide configuration file, can be used for package metadata and other toll configurations +# Example usage: global configuration for PEP8 (via flake8) setting or default pytest arguments +# Local usage: pip install pre-commit, pre-commit run --all-files + +[metadata] +license_files = LICENSE +description_file = README.md + +[tool:pytest] +norecursedirs = + .git + dist + build +addopts = + --doctest-modules + --durations=30 + --color=yes + +[coverage:run] +source = ultralytics/ +data_file = tests/.coverage +omit = + ultralytics/utils/callbacks/* + +[flake8] +max-line-length = 120 +exclude = .tox,*.egg,build,temp +select = E,W,F +doctests = True +verbose = 2 +# https://pep8.readthedocs.io/en/latest/intro.html#error-codes +format = pylint +# see: https://www.flake8rules.com/ +ignore = E731,F405,E402,W504,E501 + # E731: Do not assign a lambda expression, use a def + # F405: name may be undefined, or defined from star imports: module + # E402: module level import not at top of file + # W504: line break after binary operator + # E501: line too long + # removed: + # F401: module imported but unused + # E231: missing whitespace after ,, ;, or : + # E127: continuation line over-indented for visual indent + # F403: from module import * used; unable to detect undefined names + + +[isort] +# https://pycqa.github.io/isort/docs/configuration/options.html +line_length = 120 +# see: https://pycqa.github.io/isort/docs/configuration/multi_line_output_modes.html +multi_line_output = 0 + +[yapf] +based_on_style = pep8 +spaces_before_comment = 2 +COLUMN_LIMIT = 120 +COALESCE_BRACKETS = True +SPACES_AROUND_POWER_OPERATOR = True +SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = True +SPLIT_BEFORE_CLOSING_BRACKET = False +SPLIT_BEFORE_FIRST_ARGUMENT = False +# EACH_DICT_ENTRY_ON_SEPARATE_LINE = False + +[docformatter] +wrap-summaries = 120 +wrap-descriptions = 120 +in-place = true +make-summary-multi-line = false +pre-summary-newline = true +force-wrap = false +close-quotes-on-newline = true diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4af3b5f --- /dev/null +++ b/setup.py @@ -0,0 +1,105 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import re +from pathlib import Path + +from setuptools import setup + +# Settings +FILE = Path(__file__).resolve() +PARENT = FILE.parent # root directory +README = (PARENT / 'README.md').read_text(encoding='utf-8') + + +def get_version(): + """ + Retrieve the version number from the 'ultralytics/__init__.py' file. + + Returns: + (str): The version number extracted from the '__version__' attribute in the 'ultralytics/__init__.py' file. + """ + file = PARENT / 'ultralytics/__init__.py' + return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', file.read_text(encoding='utf-8'), re.M)[1] + + +def parse_requirements(file_path: Path): + """ + Parse a requirements.txt file, ignoring lines that start with '#' and any text after '#'. + + Args: + file_path (str | Path): Path to the requirements.txt file. + + Returns: + (List[str]): List of parsed requirements. + """ + + requirements = [] + for line in Path(file_path).read_text().splitlines(): + line = line.strip() + if line and not line.startswith('#'): + requirements.append(line.split('#')[0].strip()) # ignore inline comments + + return requirements + + +setup( + name='ultralytics', # name of pypi package + version=get_version(), # version of pypi package + python_requires='>=3.8', + license='AGPL-3.0', + description=('Ultralytics YOLOv8 for SOTA object detection, multi-object tracking, instance segmentation, ' + 'pose estimation and image classification.'), + long_description=README, + long_description_content_type='text/markdown', + url='https://github.com/ultralytics/ultralytics', + project_urls={ + 'Bug Reports': 'https://github.com/ultralytics/ultralytics/issues', + 'Funding': 'https://ultralytics.com', + 'Source': 'https://github.com/ultralytics/ultralytics'}, + author='Ultralytics', + author_email='hello@ultralytics.com', + packages=['ultralytics'] + [str(x) for x in Path('ultralytics').rglob('*/') if x.is_dir() and '__' not in str(x)], + package_data={ + '': ['*.yaml'], + 'ultralytics.assets': ['*.jpg']}, + include_package_data=True, + install_requires=parse_requirements(PARENT / 'requirements.txt'), + extras_require={ + 'dev': [ + 'ipython', + 'check-manifest', + 'pre-commit', + 'pytest', + 'pytest-cov', + 'coverage', + 'mkdocs-material', + 'mkdocstrings[python]', + 'mkdocs-redirects', # for 301 redirects + 'mkdocs-ultralytics-plugin>=0.0.34', # for meta descriptions and images, dates and authors + ], + 'export': [ + 'coremltools>=7.0', + 'openvino-dev>=2023.0', + 'tensorflow<=2.13.1', # TF bug https://github.com/ultralytics/ultralytics/issues/5161 + 'tensorflowjs', # automatically installs tensorflow + ], }, + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Topic :: Software Development', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Scientific/Engineering :: Image Recognition', + 'Operating System :: POSIX :: Linux', + 'Operating System :: MacOS', + 'Operating System :: Microsoft :: Windows', ], + keywords='machine-learning, deep-learning, vision, ML, DL, AI, YOLO, YOLOv3, YOLOv5, YOLOv8, HUB, Ultralytics', + entry_points={'console_scripts': ['yolo = ultralytics.cfg:entrypoint', 'ultralytics = ultralytics.cfg:entrypoint']}) diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py new file mode 100644 index 0000000..2fb0250 --- /dev/null +++ b/ultralytics/__init__.py @@ -0,0 +1,12 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +__version__ = '8.0.221' + +from ultralytics.models import RTDETR, SAM, YOLO +from ultralytics.models.fastsam import FastSAM +from ultralytics.models.nas import NAS +from ultralytics.utils import SETTINGS as settings +from ultralytics.utils.checks import check_yolo as checks +from ultralytics.utils.downloads import download + +__all__ = '__version__', 'YOLO', 'NAS', 'SAM', 'FastSAM', 'RTDETR', 'checks', 'download', 'settings' diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py new file mode 100644 index 0000000..652e8fc --- /dev/null +++ b/ultralytics/cfg/__init__.py @@ -0,0 +1,465 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import contextlib +import shutil +import sys +from pathlib import Path +from types import SimpleNamespace +from typing import Dict, List, Union + +from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_PATH, LOGGER, RANK, ROOT, RUNS_DIR, + SETTINGS, SETTINGS_YAML, TESTS_RUNNING, IterableSimpleNamespace, __version__, checks, + colorstr, deprecation_warn, yaml_load, yaml_print) + +# Define valid tasks and modes +MODES = 'train', 'val', 'predict', 'export', 'track', 'benchmark' +TASKS = 'detect', 'segment', 'classify', 'pose' +TASK2DATA = {'detect': 'coco8.yaml', 'segment': 'coco8-seg.yaml', 'classify': 'imagenet10', 'pose': 'coco8-pose.yaml'} +TASK2MODEL = { + 'detect': 'yolov8n.pt', + 'segment': 'yolov8n-seg.pt', + 'classify': 'yolov8n-cls.pt', + 'pose': 'yolov8n-pose.pt'} +TASK2METRIC = { + 'detect': 'metrics/mAP50-95(B)', + 'segment': 'metrics/mAP50-95(M)', + 'classify': 'metrics/accuracy_top1', + 'pose': 'metrics/mAP50-95(P)'} + +CLI_HELP_MSG = \ + f""" + Arguments received: {str(['yolo'] + sys.argv[1:])}. Ultralytics 'yolo' commands use the following syntax: + + yolo TASK MODE ARGS + + Where TASK (optional) is one of {TASKS} + MODE (required) is one of {MODES} + ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults. + See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg' + + 1. Train a detection model for 10 epochs with an initial learning_rate of 0.01 + yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + + 2. Predict a YouTube video using a pretrained segmentation model at image size 320: + yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + + 3. Val a pretrained detection model at batch-size 1 and image size 640: + yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + + 4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required) + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + + 5. Run special commands: + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + + Docs: https://docs.ultralytics.com + Community: https://community.ultralytics.com + GitHub: https://github.com/ultralytics/ultralytics + """ + +# Define keys for arg type checks +CFG_FLOAT_KEYS = 'warmup_epochs', 'box', 'cls', 'dfl', 'degrees', 'shear' +CFG_FRACTION_KEYS = ('dropout', 'iou', 'lr0', 'lrf', 'momentum', 'weight_decay', 'warmup_momentum', 'warmup_bias_lr', + 'label_smoothing', 'hsv_h', 'hsv_s', 'hsv_v', 'translate', 'scale', 'perspective', 'flipud', + 'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou', 'fraction') # fraction floats 0.0 - 1.0 +CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic', 'mask_ratio', 'max_det', 'vid_stride', + 'line_width', 'workspace', 'nbs', 'save_period') +CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val', + 'save_json', 'save_hybrid', 'half', 'dnn', 'plots', 'show', 'save_txt', 'save_conf', 'save_crop', + 'save_frames', 'show_labels', 'show_conf', 'visualize', 'augment', 'agnostic_nms', 'retina_masks', + 'show_boxes', 'keras', 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'profile') + + +def cfg2dict(cfg): + """ + Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object. + + Args: + cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary. + + Returns: + cfg (dict): Configuration object in dictionary format. + """ + if isinstance(cfg, (str, Path)): + cfg = yaml_load(cfg) # load dict + elif isinstance(cfg, SimpleNamespace): + cfg = vars(cfg) # convert to dict + return cfg + + +def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, overrides: Dict = None): + """ + Load and merge configuration data from a file or dictionary. + + Args: + cfg (str | Path | Dict | SimpleNamespace): Configuration data. + overrides (str | Dict | optional): Overrides in the form of a file name or a dictionary. Default is None. + + Returns: + (SimpleNamespace): Training arguments namespace. + """ + cfg = cfg2dict(cfg) + + # Merge overrides + if overrides: + overrides = cfg2dict(overrides) + if 'save_dir' not in cfg: + overrides.pop('save_dir', None) # special override keys to ignore + check_dict_alignment(cfg, overrides) + cfg = {**cfg, **overrides} # merge cfg and overrides dicts (prefer overrides) + + # Special handling for numeric project/name + for k in 'project', 'name': + if k in cfg and isinstance(cfg[k], (int, float)): + cfg[k] = str(cfg[k]) + if cfg.get('name') == 'model': # assign model to 'name' arg + cfg['name'] = cfg.get('model', '').split('.')[0] + LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.") + + # Type and Value checks + for k, v in cfg.items(): + if v is not None: # None values may be from optional args + if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)): + raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. " + f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')") + elif k in CFG_FRACTION_KEYS: + if not isinstance(v, (int, float)): + raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. " + f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')") + if not (0.0 <= v <= 1.0): + raise ValueError(f"'{k}={v}' is an invalid value. " + f"Valid '{k}' values are between 0.0 and 1.0.") + elif k in CFG_INT_KEYS and not isinstance(v, int): + raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. " + f"'{k}' must be an int (i.e. '{k}=8')") + elif k in CFG_BOOL_KEYS and not isinstance(v, bool): + raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. " + f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')") + + # Return instance + return IterableSimpleNamespace(**cfg) + + +def get_save_dir(args, name=None): + """Return save_dir as created from train/val/predict arguments.""" + + if getattr(args, 'save_dir', None): + save_dir = args.save_dir + else: + from ultralytics.utils.files import increment_path + + project = args.project or (ROOT.parent / 'tests/tmp/runs' if TESTS_RUNNING else RUNS_DIR) / args.task + name = name or args.name or f'{args.mode}' + save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True) + + return Path(save_dir) + + +def _handle_deprecation(custom): + """Hardcoded function to handle deprecated config keys.""" + + for key in custom.copy().keys(): + if key == 'boxes': + deprecation_warn(key, 'show_boxes') + custom['show_boxes'] = custom.pop('boxes') + if key == 'hide_labels': + deprecation_warn(key, 'show_labels') + custom['show_labels'] = custom.pop('hide_labels') == 'False' + if key == 'hide_conf': + deprecation_warn(key, 'show_conf') + custom['show_conf'] = custom.pop('hide_conf') == 'False' + if key == 'line_thickness': + deprecation_warn(key, 'line_width') + custom['line_width'] = custom.pop('line_thickness') + + return custom + + +def check_dict_alignment(base: Dict, custom: Dict, e=None): + """ + This function checks for any mismatched keys between a custom configuration list and a base configuration list. If + any mismatched keys are found, the function prints out similar keys from the base list and exits the program. + + Args: + custom (dict): a dictionary of custom configuration options + base (dict): a dictionary of base configuration options + e (Error, optional): An optional error that is passed by the calling function. + """ + custom = _handle_deprecation(custom) + base_keys, custom_keys = (set(x.keys()) for x in (base, custom)) + mismatched = [k for k in custom_keys if k not in base_keys] + if mismatched: + from difflib import get_close_matches + + string = '' + for x in mismatched: + matches = get_close_matches(x, base_keys) # key list + matches = [f'{k}={base[k]}' if base.get(k) is not None else k for k in matches] + match_str = f'Similar arguments are i.e. {matches}.' if matches else '' + string += f"'{colorstr('red', 'bold', x)}' is not a valid YOLO argument. {match_str}\n" + raise SyntaxError(string + CLI_HELP_MSG) from e + + +def merge_equals_args(args: List[str]) -> List[str]: + """ + Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first + argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign. + + Args: + args (List[str]): A list of strings where each element is an argument. + + Returns: + List[str]: A list of strings where the arguments around isolated '=' are merged. + """ + new_args = [] + for i, arg in enumerate(args): + if arg == '=' and 0 < i < len(args) - 1: # merge ['arg', '=', 'val'] + new_args[-1] += f'={args[i + 1]}' + del args[i + 1] + elif arg.endswith('=') and i < len(args) - 1 and '=' not in args[i + 1]: # merge ['arg=', 'val'] + new_args.append(f'{arg}{args[i + 1]}') + del args[i + 1] + elif arg.startswith('=') and i > 0: # merge ['arg', '=val'] + new_args[-1] += arg + else: + new_args.append(arg) + return new_args + + +def handle_yolo_hub(args: List[str]) -> None: + """ + Handle Ultralytics HUB command-line interface (CLI) commands. + + This function processes Ultralytics HUB CLI commands such as login and logout. + It should be called when executing a script with arguments related to HUB authentication. + + Args: + args (List[str]): A list of command line arguments + + Example: + ```bash + python my_script.py hub login your_api_key + ``` + """ + from ultralytics import hub + + if args[0] == 'login': + key = args[1] if len(args) > 1 else '' + # Log in to Ultralytics HUB using the provided API key + hub.login(key) + elif args[0] == 'logout': + # Log out from Ultralytics HUB + hub.logout() + + +def handle_yolo_settings(args: List[str]) -> None: + """ + Handle YOLO settings command-line interface (CLI) commands. + + This function processes YOLO settings CLI commands such as reset. + It should be called when executing a script with arguments related to YOLO settings management. + + Args: + args (List[str]): A list of command line arguments for YOLO settings management. + + Example: + ```bash + python my_script.py yolo settings reset + ``` + """ + url = 'https://docs.ultralytics.com/quickstart/#ultralytics-settings' # help URL + try: + if any(args): + if args[0] == 'reset': + SETTINGS_YAML.unlink() # delete the settings file + SETTINGS.reset() # create new settings + LOGGER.info('Settings reset successfully') # inform the user that settings have been reset + else: # save a new setting + new = dict(parse_key_value_pair(a) for a in args) + check_dict_alignment(SETTINGS, new) + SETTINGS.update(new) + + LOGGER.info(f'💡 Learn about settings at {url}') + yaml_print(SETTINGS_YAML) # print the current settings + except Exception as e: + LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.") + + +def parse_key_value_pair(pair): + """Parse one 'key=value' pair and return key and value.""" + k, v = pair.split('=', 1) # split on first '=' sign + k, v = k.strip(), v.strip() # remove spaces + assert v, f"missing '{k}' value" + return k, smart_value(v) + + +def smart_value(v): + """Convert a string to an underlying type such as int, float, bool, etc.""" + v_lower = v.lower() + if v_lower == 'none': + return None + elif v_lower == 'true': + return True + elif v_lower == 'false': + return False + else: + with contextlib.suppress(Exception): + return eval(v) + return v + + +def entrypoint(debug=''): + """ + This function is the ultralytics package entrypoint, it's responsible for parsing the command line arguments passed + to the package. + + This function allows for: + - passing mandatory YOLO args as a list of strings + - specifying the task to be performed, either 'detect', 'segment' or 'classify' + - specifying the mode, either 'train', 'val', 'test', or 'predict' + - running special modes like 'checks' + - passing overrides to the package's configuration + + It uses the package's default cfg and initializes it using the passed overrides. + Then it calls the CLI function with the composed cfg + """ + args = (debug.split(' ') if debug else sys.argv)[1:] + if not args: # no arguments passed + LOGGER.info(CLI_HELP_MSG) + return + + special = { + 'help': lambda: LOGGER.info(CLI_HELP_MSG), + 'checks': checks.collect_system_info, + 'version': lambda: LOGGER.info(__version__), + 'settings': lambda: handle_yolo_settings(args[1:]), + 'cfg': lambda: yaml_print(DEFAULT_CFG_PATH), + 'hub': lambda: handle_yolo_hub(args[1:]), + 'login': lambda: handle_yolo_hub(args), + 'copy-cfg': copy_default_cfg} + full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special} + + # Define common misuses of special commands, i.e. -h, -help, --help + special.update({k[0]: v for k, v in special.items()}) # singular + special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith('s')}) # singular + special = {**special, **{f'-{k}': v for k, v in special.items()}, **{f'--{k}': v for k, v in special.items()}} + + overrides = {} # basic overrides, i.e. imgsz=320 + for a in merge_equals_args(args): # merge spaces around '=' sign + if a.startswith('--'): + LOGGER.warning(f"WARNING ⚠️ '{a}' does not require leading dashes '--', updating to '{a[2:]}'.") + a = a[2:] + if a.endswith(','): + LOGGER.warning(f"WARNING ⚠️ '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.") + a = a[:-1] + if '=' in a: + try: + k, v = parse_key_value_pair(a) + if k == 'cfg' and v is not None: # custom.yaml passed + LOGGER.info(f'Overriding {DEFAULT_CFG_PATH} with {v}') + overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != 'cfg'} + else: + overrides[k] = v + except (NameError, SyntaxError, ValueError, AssertionError) as e: + check_dict_alignment(full_args_dict, {a: ''}, e) + + elif a in TASKS: + overrides['task'] = a + elif a in MODES: + overrides['mode'] = a + elif a.lower() in special: + special[a.lower()]() + return + elif a in DEFAULT_CFG_DICT and isinstance(DEFAULT_CFG_DICT[a], bool): + overrides[a] = True # auto-True for default bool args, i.e. 'yolo show' sets show=True + elif a in DEFAULT_CFG_DICT: + raise SyntaxError(f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign " + f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}") + else: + check_dict_alignment(full_args_dict, {a: ''}) + + # Check keys + check_dict_alignment(full_args_dict, overrides) + + # Mode + mode = overrides.get('mode') + if mode is None: + mode = DEFAULT_CFG.mode or 'predict' + LOGGER.warning(f"WARNING ⚠️ 'mode' is missing. Valid modes are {MODES}. Using default 'mode={mode}'.") + elif mode not in MODES: + raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}") + + # Task + task = overrides.pop('task', None) + if task: + if task not in TASKS: + raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}") + if 'model' not in overrides: + overrides['model'] = TASK2MODEL[task] + + # Model + model = overrides.pop('model', DEFAULT_CFG.model) + if model is None: + model = 'yolov8n.pt' + LOGGER.warning(f"WARNING ⚠️ 'model' is missing. Using default 'model={model}'.") + overrides['model'] = model + stem = Path(model).stem.lower() + if 'rtdetr' in stem: # guess architecture + from ultralytics import RTDETR + model = RTDETR(model) # no task argument + elif 'fastsam' in stem: + from ultralytics import FastSAM + model = FastSAM(model) + elif 'sam' in stem: + from ultralytics import SAM + model = SAM(model) + else: + from ultralytics import YOLO + model = YOLO(model, task=task) + if isinstance(overrides.get('pretrained'), str): + model.load(overrides['pretrained']) + + # Task Update + if task != model.task: + if task: + LOGGER.warning(f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. " + f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model.") + task = model.task + + # Mode + if mode in ('predict', 'track') and 'source' not in overrides: + overrides['source'] = DEFAULT_CFG.source or ASSETS + LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using default 'source={overrides['source']}'.") + elif mode in ('train', 'val'): + if 'data' not in overrides and 'resume' not in overrides: + overrides['data'] = TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data) + LOGGER.warning(f"WARNING ⚠️ 'data' is missing. Using default 'data={overrides['data']}'.") + elif mode == 'export': + if 'format' not in overrides: + overrides['format'] = DEFAULT_CFG.format or 'torchscript' + LOGGER.warning(f"WARNING ⚠️ 'format' is missing. Using default 'format={overrides['format']}'.") + + # Run command in python + getattr(model, mode)(**overrides) # default args from model + + # Show help + LOGGER.info(f'💡 Learn more at https://docs.ultralytics.com/modes/{mode}') + + +# Special modes -------------------------------------------------------------------------------------------------------- +def copy_default_cfg(): + """Copy and create a new default configuration file with '_copy' appended to its name.""" + new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace('.yaml', '_copy.yaml') + shutil.copy2(DEFAULT_CFG_PATH, new_file) + LOGGER.info(f'{DEFAULT_CFG_PATH} copied to {new_file}\n' + f"Example YOLO command with this new custom cfg:\n yolo cfg='{new_file}' imgsz=320 batch=8") + + +if __name__ == '__main__': + # Example: entrypoint(debug='yolo predict model=yolov8n.pt') + entrypoint(debug='') diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml new file mode 100644 index 0000000..3da9fc8 --- /dev/null +++ b/ultralytics/cfg/default.yaml @@ -0,0 +1,118 @@ +task: detect # (str) YOLO任务,即detect(检测),segment(分割),classify(分类),pose(姿态) +mode: train # (str) YOLO模式,即train(训练),val(验证),predict(预测),export(导出),track(跟踪),benchmark(基准测试) + +# 训练设置 ------------------------------------------------------------------------------------------------------- +model: # (str, optional) 模型文件路径,例如yolov8n.pt,yolov8n.yaml +data: # (str, optional) 数据文件路径,例如coco128.yaml +epochs: 100 # (int) 训练的轮数 +patience: 50 # (int) 早停机制,等待观察不到改善的轮数以提前停止训练 +batch: 8 # (int) 每批处理的图像数(-1表示自动批处理) +imgsz: 640 # (int | list) 输入图像的大小,对于训练和验证模式为int,对于预测和导出模式为list[w,h] +save: True # (bool) 保存训练检查点和预测结果 +save_period: -1 # (int) 每x轮保存一次检查点(如果<1则禁用) +cache: False # (bool) True/ram,disk或False。使用缓存进行数据加载 +device: # (int | str | list, optional) 运行设备,例如cuda device=0或device=0,1,2,3或device=cpu +workers: 8 # (int) 数据加载的工作线程数(每个RANK如果DDP) +project: # (str, optional) 项目名称 +name: # (str, optional) 实验名称,结果保存在'project/name'目录中 +exist_ok: False # (bool) 是否覆盖现有实验 +pretrained: True # (bool | str) 是否使用预训练模型(bool)或要加载权重的模型(str) +optimizer: auto # (str) 要使用的优化器,choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto] +verbose: True # (bool) 是否打印详细输出 +seed: 0 # (int) 用于可重现性的随机种子 +deterministic: True # (bool) 是否启用确定性模式 +single_cls: False # (bool) 将多类数据训练为单类 +rect: False # (bool) 如果mode='train'则进行矩形训练,如果mode='val'则进行矩形验证 +cos_lr: False # (bool) 使用余弦学习率调度程序 +close_mosaic: 10 # (int) 在最后几轮禁用马赛克增强(0表示禁用) +resume: False # (bool) 从上次检查点恢复训练 +amp: True # (bool) 自动混合精度(AMP)训练,choices=[True, False],True运行AMP检查 +fraction: 1.0 # (float) 要训练的数据集分数(默认为1.0,训练集中的所有图像) +profile: False # (bool) 在训练期间为记录器启用ONNX和TensorRT速度 +freeze: None # (int | list, optional) 冻结前n层,或在训练期间冻结层索引的列表 + +# 分割 +overlap_mask: True # (bool) 训练期间掩码应重叠(仅适用于分割训练) +mask_ratio: 4 # (int) 掩码下采样比率(仅适用于分割训练) +# 分类 +dropout: 0.0 # (float) 使用dropout正则化(仅适用于分类训练) + +# 验证/测试设置 ---------------------------------------------------------------------------------------------------- +val: True # (bool) 在训练期间进行验证/测试 +split: val # (str) 用于验证的数据集拆分,例如'val','test'或'train' +save_json: False # (bool) 将结果保存到JSON文件 +save_hybrid: False # (bool) 保存标签的混合版本(标签+额外预测) +conf: # (float, optional) 用于检测的对象置信度阈值(默认为0.25预测,0.001验证) +iou: 0.7 # (float) 非最大抑制(NMS)的交并比(IoU)阈值 +max_det: 300 # (int) 每张图像的最大检测数 +half: False # (bool) 使用半精度(FP16) +dnn: False # (bool) 使用OpenCV DNN进行ONNX推断 +plots: True # (bool) 在训练/验证期间保存绘图和图像 + +# 预测设置 ----------------------------------------------------------------------------------------------------- +source: # (str, optional) 图像或视频的源目录 +vid_stride: 1 # (int) 视频帧速率跨度 +stream_buffer: False # (bool) 缓冲所有流式帧(True)或返回最近的帧(False) +visualize: False # (bool) 可视化模型特征 +augment: False # (bool) 对预测源应用图像增强 +agnostic_nms: False # (bool) 类别不可知的NMS +classes: # (int | list[int], optional) 按类别过滤结果,例如classes=0,或classes=[0,2,3] +retina_masks: False # (bool) 使用高分辨率分割掩码 + +# 可视化设置 --------------------------------------------------------------------------------------------------- +show: True # (bool) 如果环境允许,显示预测的图像和视频 +save_frames: False # (bool) 保存预测的单个视频帧 +save_txt: False # (bool) 将结果保存为.txt文件 +save_conf: False # (bool) 保存带有置信度得分的结果 +save_crop: False # (bool) 保存带有结果的裁剪图像 +show_labels: True # (bool) 显示预测标签,例如'person' +show_conf: True # (bool) 显示预测置信度,例如'0.99' +show_boxes: True # (bool) 显示预测框 +line_width: # (int, optional) 边界框的线宽。如果为None,则缩放到图像大小 + + +# 导出设置 ------------------------------------------------------------------------------------------------------ +format: torchscript # (str) 导出格式,选项请参见https://docs.ultralytics.com/modes/export/#export-formats +keras: False # (bool) 使用Keras +optimize: False # (bool) TorchScript: 优化为移动设备 +int8: False # (bool) CoreML/TF INT8 量化 +dynamic: False # (bool) ONNX/TF/TensorRT: 动态轴 +simplify: False # (bool) ONNX: 简化模型 +opset: # (int, optional) ONNX: opset 版本 +workspace: 4 # (int) TensorRT: 工作空间大小(GB) +nms: False # (bool) CoreML: 添加NMS + +# 超参数 ------------------------------------------------------------------------------------------------------ +lr0: 0.01 # (float) 初始学习率(例如SGD=1E-2,Adam=1E-3) +lrf: 0.01 # (float) 最终学习率(lr0 * lrf) +momentum: 0.937 # (float) SGD动量/Adam beta1 +weight_decay: 0.0005 # (float) 优化器权重衰减5e-4 +warmup_epochs: 3.0 # (float) 预热轮数(可以是小数) +warmup_momentum: 0.8 # (float) 预热初始动量 +warmup_bias_lr: 0.1 # (float) 预热初始偏置学习率 +box: 7.5 # (float) 目标框损失增益 +cls: 0.5 # (float) 类别损失增益(与像素成比例) +dfl: 1.5 # (float) dfl损失增益 +pose: 12.0 # (float) 姿态损失增益 +kobj: 1.0 # (float) 关键点obj损失增益 +label_smoothing: 0.0 # (float) 标签平滑(分数) +nbs: 64 # (int) 名义批次大小 +hsv_h: 0.015 # (float) 图像HSV-Hue增强(分数) +hsv_s: 0.7 # (float) 图像HSV-Saturation增强(分数) +hsv_v: 0.4 # (float) 图像HSV-Value增强(分数) +degrees: 0.0 # (float) 图像旋转(+/- deg) +translate: 0.1 # (float) 图像平移(+/- 分数) +scale: 0.5 # (float) 图像缩放(+/- gain) +shear: 0.0 # (float) 图像剪切(+/- deg) +perspective: 0.0 # (float) 图像透视(+/- 分数),范围0-0.001 +flipud: 0.0 # (float) 图像上下翻转(概率) +fliplr: 0.5 # (float) 图像左右翻转(概率) +mosaic: 1.0 # (float) 图像马赛克(概率) +mixup: 0.0 # (float) 图像混合(概率) +copy_paste: 0.0 # (float) 分割复制-粘贴(概率) + +# 自定义config.yaml --------------------------------------------------------------------------------------------------- +cfg: # (str, optional) 用于覆盖defaults.yaml的配置 + +# 追踪器设置 ------------------------------------------------------------------------------------------------------ +tracker: botsort.yaml # (str) 追踪器类型,选项=[botsort.yaml, bytetrack.yaml] diff --git a/ultralytics/cfg/models/README.md b/ultralytics/cfg/models/README.md new file mode 100644 index 0000000..4749441 --- /dev/null +++ b/ultralytics/cfg/models/README.md @@ -0,0 +1,41 @@ +## Models + +Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image segmentation tasks. + +These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms, from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this directory provides a great starting point for your custom model development needs. + +To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full details at the Ultralytics [Docs](https://docs.ultralytics.com/models), and if you need help or have any questions, feel free to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now! + +### Usage + +Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command: + +```bash +yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100 +``` + +They may also be used directly in a Python environment, and accepts the same +[arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above: + +```python +from ultralytics import YOLO + +model = YOLO("model.yaml") # build a YOLOv8n model from scratch +# YOLO("model.pt") use pre-trained model if available +model.info() # display model information +model.train(data="coco128.yaml", epochs=100) # train the model +``` + +## Pre-trained Model Architectures + +Ultralytics supports many model architectures. Visit https://docs.ultralytics.com/models to view detailed information and usage. Any of these models can be used by loading their configs or pretrained checkpoints if available. + +## Contribute New Models + +Have you trained a new YOLO variant or achieved state-of-the-art performance with specific tuning? We'd love to showcase your work in our Models section! Contributions from the community in the form of new models, architectures, or optimizations are highly valued and can significantly enrich our repository. + +By contributing to this section, you're helping us offer a wider array of model choices and configurations to the community. It's a fantastic way to share your knowledge and expertise while making the Ultralytics YOLO ecosystem even more versatile. + +To get started, please consult our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for step-by-step instructions on how to submit a Pull Request (PR) 🛠️. Your contributions are eagerly awaited! + +Let's join hands to extend the range and capabilities of the Ultralytics YOLO models 🙏! diff --git a/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml b/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml new file mode 100644 index 0000000..bd20da1 --- /dev/null +++ b/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml @@ -0,0 +1,50 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' + # [depth, width, max_channels] + l: [1.00, 1.00, 1024] + +backbone: + # [from, repeats, module, args] + - [-1, 1, HGStem, [32, 48]] # 0-P2/4 + - [-1, 6, HGBlock, [48, 128, 3]] # stage 1 + + - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8 + - [-1, 6, HGBlock, [96, 512, 3]] # stage 2 + + - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16 + - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut + - [-1, 6, HGBlock, [192, 1024, 5, True, True]] + - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3 + + - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32 + - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4 + +head: + - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2 + - [-1, 1, AIFI, [1024, 8]] + - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1 + - [[-2, -1], 1, Concat, [1]] + - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0 + - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0 + - [[-2, -1], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1 + + - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0 + - [[-1, 17], 1, Concat, [1]] # cat Y4 + - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0 + + - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1 + - [[-1, 12], 1, Concat, [1]] # cat Y5 + - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1 + + - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml b/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml new file mode 100644 index 0000000..848cb52 --- /dev/null +++ b/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml @@ -0,0 +1,54 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' + # [depth, width, max_channels] + x: [1.00, 1.00, 2048] + +backbone: + # [from, repeats, module, args] + - [-1, 1, HGStem, [32, 64]] # 0-P2/4 + - [-1, 6, HGBlock, [64, 128, 3]] # stage 1 + + - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8 + - [-1, 6, HGBlock, [128, 512, 3]] + - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2 + + - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16 + - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut + - [-1, 6, HGBlock, [256, 1024, 5, True, True]] + - [-1, 6, HGBlock, [256, 1024, 5, True, True]] + - [-1, 6, HGBlock, [256, 1024, 5, True, True]] + - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3 + + - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32 + - [-1, 6, HGBlock, [512, 2048, 5, True, False]] + - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4 + +head: + - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2 + - [-1, 1, AIFI, [2048, 8]] + - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1 + - [[-2, -1], 1, Concat, [1]] + - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0 + - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0 + - [[-2, -1], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1 + + - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0 + - [[-1, 21], 1, Concat, [1]] # cat Y4 + - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0 + + - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1 + - [[-1, 16], 1, Concat, [1]] # cat Y5 + - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1 + + - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/v3/yolov3-spp.yaml b/ultralytics/cfg/models/v3/yolov3-spp.yaml new file mode 100644 index 0000000..406e019 --- /dev/null +++ b/ultralytics/cfg/models/v3/yolov3-spp.yaml @@ -0,0 +1,48 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 + +# Parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# darknet53 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [32, 3, 1]], # 0 + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + [-1, 1, Bottleneck, [64]], + [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 + [-1, 2, Bottleneck, [128]], + [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 + [-1, 8, Bottleneck, [256]], + [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 + [-1, 8, Bottleneck, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 + [-1, 4, Bottleneck, [1024]], # 10 + ] + +# YOLOv3-SPP head +head: + [[-1, 1, Bottleneck, [1024, False]], + [-1, 1, SPP, [512, [5, 9, 13]]], + [-1, 1, Conv, [1024, 3, 1]], + [-1, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) + + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 8], 1, Concat, [1]], # cat backbone P4 + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) + + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P3 + [-1, 1, Bottleneck, [256, False]], + [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) + + [[27, 22, 15], 1, Detect, [nc]], # Detect(P3, P4, P5) + ] diff --git a/ultralytics/cfg/models/v3/yolov3-tiny.yaml b/ultralytics/cfg/models/v3/yolov3-tiny.yaml new file mode 100644 index 0000000..69d8e42 --- /dev/null +++ b/ultralytics/cfg/models/v3/yolov3-tiny.yaml @@ -0,0 +1,39 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 + +# Parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# YOLOv3-tiny backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [16, 3, 1]], # 0 + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 + [-1, 1, Conv, [32, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 + [-1, 1, Conv, [512, 3, 1]], + [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 + [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 + ] + +# YOLOv3-tiny head +head: + [[-1, 1, Conv, [1024, 3, 1]], + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) + + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 8], 1, Concat, [1]], # cat backbone P4 + [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) + + [[19, 15], 1, Detect, [nc]], # Detect(P4, P5) + ] diff --git a/ultralytics/cfg/models/v3/yolov3.yaml b/ultralytics/cfg/models/v3/yolov3.yaml new file mode 100644 index 0000000..7cc0afa --- /dev/null +++ b/ultralytics/cfg/models/v3/yolov3.yaml @@ -0,0 +1,48 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 + +# Parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple + +# darknet53 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [32, 3, 1]], # 0 + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + [-1, 1, Bottleneck, [64]], + [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 + [-1, 2, Bottleneck, [128]], + [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 + [-1, 8, Bottleneck, [256]], + [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 + [-1, 8, Bottleneck, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 + [-1, 4, Bottleneck, [1024]], # 10 + ] + +# YOLOv3 head +head: + [[-1, 1, Bottleneck, [1024, False]], + [-1, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [1024, 3, 1]], + [-1, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) + + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 8], 1, Concat, [1]], # cat backbone P4 + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) + + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P3 + [-1, 1, Bottleneck, [256, False]], + [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) + + [[27, 22, 15], 1, Detect, [nc]], # Detect(P3, P4, P5) + ] diff --git a/ultralytics/cfg/models/v5/yolov5-p6.yaml b/ultralytics/cfg/models/v5/yolov5-p6.yaml new file mode 100644 index 0000000..d468377 --- /dev/null +++ b/ultralytics/cfg/models/v5/yolov5-p6.yaml @@ -0,0 +1,61 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5 + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 1024] + l: [1.00, 1.00, 1024] + x: [1.33, 1.25, 1024] + +# YOLOv5 v6.0 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 6, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 + [-1, 3, C3, [768]], + [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 11 + ] + +# YOLOv5 v6.0 head +head: + [[-1, 1, Conv, [768, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 8], 1, Concat, [1]], # cat backbone P5 + [-1, 3, C3, [768, False]], # 15 + + [-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 19 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 23 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 20], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 26 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 16], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [768, False]], # 29 (P5/32-large) + + [-1, 1, Conv, [768, 3, 2]], + [[-1, 12], 1, Concat, [1]], # cat head P6 + [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) + + [[23, 26, 29, 32], 1, Detect, [nc]], # Detect(P3, P4, P5, P6) + ] diff --git a/ultralytics/cfg/models/v5/yolov5.yaml b/ultralytics/cfg/models/v5/yolov5.yaml new file mode 100644 index 0000000..4a3fced --- /dev/null +++ b/ultralytics/cfg/models/v5/yolov5.yaml @@ -0,0 +1,50 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5 + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 1024] + l: [1.00, 1.00, 1024] + x: [1.33, 1.25, 1024] + +# YOLOv5 v6.0 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 6, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 + ] + +# YOLOv5 v6.0 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5) + ] diff --git a/ultralytics/cfg/models/v6/yolov6.yaml b/ultralytics/cfg/models/v6/yolov6.yaml new file mode 100644 index 0000000..cb5e32a --- /dev/null +++ b/ultralytics/cfg/models/v6/yolov6.yaml @@ -0,0 +1,53 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6 + +# Parameters +nc: 80 # number of classes +activation: nn.ReLU() # (optional) model default activation function +scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 768] + l: [1.00, 1.00, 512] + x: [1.00, 1.25, 512] + +# YOLOv6-3.0s backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 6, Conv, [128, 3, 1]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 12, Conv, [256, 3, 1]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 18, Conv, [512, 3, 1]] + - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 + - [-1, 6, Conv, [1024, 3, 1]] + - [-1, 1, SPPF, [1024, 5]] # 9 + +# YOLOv6-3.0s head +head: + - [-1, 1, Conv, [256, 1, 1]] + - [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 1, Conv, [256, 3, 1]] + - [-1, 9, Conv, [256, 3, 1]] # 14 + + - [-1, 1, Conv, [128, 1, 1]] + - [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 1, Conv, [128, 3, 1]] + - [-1, 9, Conv, [128, 3, 1]] # 19 + + - [-1, 1, Conv, [128, 3, 2]] + - [[-1, 15], 1, Concat, [1]] # cat head P4 + - [-1, 1, Conv, [256, 3, 1]] + - [-1, 9, Conv, [256, 3, 1]] # 23 + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 10], 1, Concat, [1]] # cat head P5 + - [-1, 1, Conv, [512, 3, 1]] + - [-1, 9, Conv, [512, 3, 1]] # 27 + + - [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/v8/yolov8-cls.yaml b/ultralytics/cfg/models/v8/yolov8-cls.yaml new file mode 100644 index 0000000..5332f1d --- /dev/null +++ b/ultralytics/cfg/models/v8/yolov8-cls.yaml @@ -0,0 +1,29 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify + +# Parameters +nc: 1000 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 1024] + l: [1.00, 1.00, 1024] + x: [1.00, 1.25, 1024] + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [1024, True]] + +# YOLOv8.0n head +head: + - [-1, 1, Classify, [nc]] # Classify diff --git a/ultralytics/cfg/models/v8/yolov8-p2.yaml b/ultralytics/cfg/models/v8/yolov8-p2.yaml new file mode 100644 index 0000000..3e286aa --- /dev/null +++ b/ultralytics/cfg/models/v8/yolov8-p2.yaml @@ -0,0 +1,54 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 768] + l: [1.00, 1.00, 512] + x: [1.00, 1.25, 512] + +# YOLOv8.0 backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + +# YOLOv8.0-p2 head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2f, [512]] # 12 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 15 (P3/8-small) + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 2], 1, Concat, [1]] # cat backbone P2 + - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall) + + - [-1, 1, Conv, [128, 3, 2]] + - [[-1, 15], 1, Concat, [1]] # cat head P3 + - [-1, 3, C2f, [256]] # 21 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 12], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2f, [512]] # 24 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 9], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2f, [1024]] # 27 (P5/32-large) + + - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5) diff --git a/ultralytics/cfg/models/v8/yolov8-p6.yaml b/ultralytics/cfg/models/v8/yolov8-p6.yaml new file mode 100644 index 0000000..3635ed9 --- /dev/null +++ b/ultralytics/cfg/models/v8/yolov8-p6.yaml @@ -0,0 +1,56 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 768] + l: [1.00, 1.00, 512] + x: [1.00, 1.25, 512] + +# YOLOv8.0x6 backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [768, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 11 + +# YOLOv8.0x6 head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 8], 1, Concat, [1]] # cat backbone P5 + - [-1, 3, C2, [768, False]] # 14 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2, [512, False]] # 17 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2, [256, False]] # 20 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 17], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2, [512, False]] # 23 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2, [768, False]] # 26 (P5/32-large) + + - [-1, 1, Conv, [768, 3, 2]] + - [[-1, 11], 1, Concat, [1]] # cat head P6 + - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge) + + - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6) diff --git a/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml b/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml new file mode 100644 index 0000000..abf0cfc --- /dev/null +++ b/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml @@ -0,0 +1,57 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose + +# Parameters +nc: 1 # number of classes +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 768] + l: [1.00, 1.00, 512] + x: [1.00, 1.25, 512] + +# YOLOv8.0x6 backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [768, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 11 + +# YOLOv8.0x6 head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 8], 1, Concat, [1]] # cat backbone P5 + - [-1, 3, C2, [768, False]] # 14 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2, [512, False]] # 17 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2, [256, False]] # 20 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 17], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2, [512, False]] # 23 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2, [768, False]] # 26 (P5/32-large) + + - [-1, 1, Conv, [768, 3, 2]] + - [[-1, 11], 1, Concat, [1]] # cat head P6 + - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge) + + - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6) diff --git a/ultralytics/cfg/models/v8/yolov8-pose.yaml b/ultralytics/cfg/models/v8/yolov8-pose.yaml new file mode 100644 index 0000000..9f48e1e --- /dev/null +++ b/ultralytics/cfg/models/v8/yolov8-pose.yaml @@ -0,0 +1,47 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose + +# Parameters +nc: 1 # number of classes +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 768] + l: [1.00, 1.00, 512] + x: [1.00, 1.25, 512] + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2f, [512]] # 12 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 15 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 12], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2f, [512]] # 18 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 9], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2f, [1024]] # 21 (P5/32-large) + + - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5) diff --git a/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml b/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml new file mode 100644 index 0000000..a058106 --- /dev/null +++ b/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml @@ -0,0 +1,46 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs + s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs + m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs + l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs + x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2f, [512]] # 12 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 15 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 12], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2f, [512]] # 18 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 9], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2f, [1024]] # 21 (P5/32-large) + + - [[15, 18, 21], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml b/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml new file mode 100644 index 0000000..5ac0936 --- /dev/null +++ b/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml @@ -0,0 +1,56 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' will call yolov8-seg-p6.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 768] + l: [1.00, 1.00, 512] + x: [1.00, 1.25, 512] + +# YOLOv8.0x6 backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [768, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 11 + +# YOLOv8.0x6 head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 8], 1, Concat, [1]] # cat backbone P5 + - [-1, 3, C2, [768, False]] # 14 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2, [512, False]] # 17 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2, [256, False]] # 20 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 17], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2, [512, False]] # 23 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2, [768, False]] # 26 (P5/32-large) + + - [-1, 1, Conv, [768, 3, 2]] + - [[-1, 11], 1, Concat, [1]] # cat head P6 + - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge) + + - [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]] # Pose(P3, P4, P5, P6) diff --git a/ultralytics/cfg/models/v8/yolov8-seg.yaml b/ultralytics/cfg/models/v8/yolov8-seg.yaml new file mode 100644 index 0000000..fbb08fc --- /dev/null +++ b/ultralytics/cfg/models/v8/yolov8-seg.yaml @@ -0,0 +1,46 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] + s: [0.33, 0.50, 1024] + m: [0.67, 0.75, 768] + l: [1.00, 1.00, 512] + x: [1.00, 1.25, 512] + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2f, [512]] # 12 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 15 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 12], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2f, [512]] # 18 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 9], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2f, [1024]] # 21 (P5/32-large) + + - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5) diff --git a/ultralytics/cfg/models/v8/yolov8.yaml b/ultralytics/cfg/models/v8/yolov8.yaml new file mode 100644 index 0000000..2255450 --- /dev/null +++ b/ultralytics/cfg/models/v8/yolov8.yaml @@ -0,0 +1,46 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect + +# Parameters +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs + s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs + m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs + l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs + x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs + +# YOLOv8.0n backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [1024, True]] + - [-1, 1, SPPF, [1024, 5]] # 9 + +# YOLOv8.0n head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2f, [512]] # 12 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 15 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 12], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2f, [512]] # 18 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 9], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2f, [1024]] # 21 (P5/32-large) + + - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/trackers/botsort.yaml b/ultralytics/cfg/trackers/botsort.yaml new file mode 100644 index 0000000..cbbf348 --- /dev/null +++ b/ultralytics/cfg/trackers/botsort.yaml @@ -0,0 +1,18 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT + +tracker_type: botsort # tracker type, ['botsort', 'bytetrack'] +track_high_thresh: 0.5 # threshold for the first association +track_low_thresh: 0.1 # threshold for the second association +new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks +track_buffer: 30 # buffer to calculate the time when to remove tracks +match_thresh: 0.8 # threshold for matching tracks +# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now) +# mot20: False # for tracker evaluation(not used for now) + +# BoT-SORT settings +gmc_method: sparseOptFlow # method of global motion compensation +# ReID model related thresh (not supported yet) +proximity_thresh: 0.5 +appearance_thresh: 0.25 +with_reid: False diff --git a/ultralytics/cfg/trackers/bytetrack.yaml b/ultralytics/cfg/trackers/bytetrack.yaml new file mode 100644 index 0000000..5060f92 --- /dev/null +++ b/ultralytics/cfg/trackers/bytetrack.yaml @@ -0,0 +1,11 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack + +tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack'] +track_high_thresh: 0.5 # threshold for the first association +track_low_thresh: 0.1 # threshold for the second association +new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks +track_buffer: 30 # buffer to calculate the time when to remove tracks +match_thresh: 0.8 # threshold for matching tracks +# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now) +# mot20: False # for tracker evaluation(not used for now) diff --git a/ultralytics/data/__init__.py b/ultralytics/data/__init__.py new file mode 100644 index 0000000..6fa7e84 --- /dev/null +++ b/ultralytics/data/__init__.py @@ -0,0 +1,8 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .base import BaseDataset +from .build import build_dataloader, build_yolo_dataset, load_inference_source +from .dataset import ClassificationDataset, SemanticDataset, YOLODataset + +__all__ = ('BaseDataset', 'ClassificationDataset', 'SemanticDataset', 'YOLODataset', 'build_yolo_dataset', + 'build_dataloader', 'load_inference_source') diff --git a/ultralytics/data/annotator.py b/ultralytics/data/annotator.py new file mode 100644 index 0000000..b4e08c7 --- /dev/null +++ b/ultralytics/data/annotator.py @@ -0,0 +1,50 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from pathlib import Path + +from ultralytics import SAM, YOLO + + +def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='', output_dir=None): + """ + Automatically annotates images using a YOLO object detection model and a SAM segmentation model. + + Args: + data (str): Path to a folder containing images to be annotated. + det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'. + sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'. + device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available). + output_dir (str | None | optional): Directory to save the annotated results. + Defaults to a 'labels' folder in the same directory as 'data'. + + Example: + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt') + ``` + """ + det_model = YOLO(det_model) + sam_model = SAM(sam_model) + + data = Path(data) + if not output_dir: + output_dir = data.parent / f'{data.stem}_auto_annotate_labels' + Path(output_dir).mkdir(exist_ok=True, parents=True) + + det_results = det_model(data, stream=True, device=device) + + for result in det_results: + class_ids = result.boxes.cls.int().tolist() # noqa + if len(class_ids): + boxes = result.boxes.xyxy # Boxes object for bbox outputs + sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device) + segments = sam_results[0].masks.xyn # noqa + + with open(f'{str(Path(output_dir) / Path(result.path).stem)}.txt', 'w') as f: + for i in range(len(segments)): + s = segments[i] + if len(s) == 0: + continue + segment = map(str, segments[i].reshape(-1).tolist()) + f.write(f'{class_ids[i]} ' + ' '.join(segment) + '\n') diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py new file mode 100644 index 0000000..f500aa0 --- /dev/null +++ b/ultralytics/data/augment.py @@ -0,0 +1,1107 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import math +import random +from copy import deepcopy + +import cv2 +import numpy as np +import torch +import torchvision.transforms as T + +from ultralytics.utils import LOGGER, colorstr +from ultralytics.utils.checks import check_version +from ultralytics.utils.instance import Instances +from ultralytics.utils.metrics import bbox_ioa +from ultralytics.utils.ops import segment2box + +from .utils import polygons2masks, polygons2masks_overlap + + +# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic +class BaseTransform: + """ + Base class for image transformations. + + This is a generic transformation class that can be extended for specific image processing needs. + The class is designed to be compatible with both classification and semantic segmentation tasks. + + Methods: + __init__: Initializes the BaseTransform object. + apply_image: Applies image transformation to labels. + apply_instances: Applies transformations to object instances in labels. + apply_semantic: Applies semantic segmentation to an image. + __call__: Applies all label transformations to an image, instances, and semantic masks. + """ + + def __init__(self) -> None: + """Initializes the BaseTransform object.""" + pass + + def apply_image(self, labels): + """Applies image transformations to labels.""" + pass + + def apply_instances(self, labels): + """Applies transformations to object instances in labels.""" + pass + + def apply_semantic(self, labels): + """Applies semantic segmentation to an image.""" + pass + + def __call__(self, labels): + """Applies all label transformations to an image, instances, and semantic masks.""" + self.apply_image(labels) + self.apply_instances(labels) + self.apply_semantic(labels) + + +class Compose: + """Class for composing multiple image transformations.""" + + def __init__(self, transforms): + """Initializes the Compose object with a list of transforms.""" + self.transforms = transforms + + def __call__(self, data): + """Applies a series of transformations to input data.""" + for t in self.transforms: + data = t(data) + return data + + def append(self, transform): + """Appends a new transform to the existing list of transforms.""" + self.transforms.append(transform) + + def tolist(self): + """Converts the list of transforms to a standard Python list.""" + return self.transforms + + def __repr__(self): + """Returns a string representation of the object.""" + return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})" + + +class BaseMixTransform: + """ + Class for base mix (MixUp/Mosaic) transformations. + + This implementation is from mmyolo. + """ + + def __init__(self, dataset, pre_transform=None, p=0.0) -> None: + """Initializes the BaseMixTransform object with dataset, pre_transform, and probability.""" + self.dataset = dataset + self.pre_transform = pre_transform + self.p = p + + def __call__(self, labels): + """Applies pre-processing transforms and mixup/mosaic transforms to labels data.""" + if random.uniform(0, 1) > self.p: + return labels + + # Get index of one or three other images + indexes = self.get_indexes() + if isinstance(indexes, int): + indexes = [indexes] + + # Get images information will be used for Mosaic or MixUp + mix_labels = [self.dataset.get_image_and_label(i) for i in indexes] + + if self.pre_transform is not None: + for i, data in enumerate(mix_labels): + mix_labels[i] = self.pre_transform(data) + labels['mix_labels'] = mix_labels + + # Mosaic or MixUp + labels = self._mix_transform(labels) + labels.pop('mix_labels', None) + return labels + + def _mix_transform(self, labels): + """Applies MixUp or Mosaic augmentation to the label dictionary.""" + raise NotImplementedError + + def get_indexes(self): + """Gets a list of shuffled indexes for mosaic augmentation.""" + raise NotImplementedError + + +class Mosaic(BaseMixTransform): + """ + Mosaic augmentation. + + This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image. + The augmentation is applied to a dataset with a given probability. + + Attributes: + dataset: The dataset on which the mosaic augmentation is applied. + imgsz (int, optional): Image size (height and width) after mosaic pipeline of a single image. Default to 640. + p (float, optional): Probability of applying the mosaic augmentation. Must be in the range 0-1. Default to 1.0. + n (int, optional): The grid size, either 4 (for 2x2) or 9 (for 3x3). + """ + + def __init__(self, dataset, imgsz=640, p=1.0, n=4): + """Initializes the object with a dataset, image size, probability, and border.""" + assert 0 <= p <= 1.0, f'The probability should be in range [0, 1], but got {p}.' + assert n in (4, 9), 'grid must be equal to 4 or 9.' + super().__init__(dataset=dataset, p=p) + self.dataset = dataset + self.imgsz = imgsz + self.border = (-imgsz // 2, -imgsz // 2) # width, height + self.n = n + + def get_indexes(self, buffer=True): + """Return a list of random indexes from the dataset.""" + if buffer: # select images from buffer + return random.choices(list(self.dataset.buffer), k=self.n - 1) + else: # select any images + return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)] + + def _mix_transform(self, labels): + """Apply mixup transformation to the input image and labels.""" + assert labels.get('rect_shape', None) is None, 'rect and mosaic are mutually exclusive.' + assert len(labels.get('mix_labels', [])), 'There are no other images for mosaic augment.' + return self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels) + + def _mosaic4(self, labels): + """Create a 2x2 image mosaic.""" + mosaic_labels = [] + s = self.imgsz + yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border) # mosaic center x, y + for i in range(4): + labels_patch = labels if i == 0 else labels['mix_labels'][i - 1] + # Load image + img = labels_patch['img'] + h, w = labels_patch.pop('resized_shape') + + # Place img in img4 + if i == 0: # top left + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + labels_patch = self._update_labels(labels_patch, padw, padh) + mosaic_labels.append(labels_patch) + final_labels = self._cat_labels(mosaic_labels) + final_labels['img'] = img4 + return final_labels + + def _mosaic9(self, labels): + """Create a 3x3 image mosaic.""" + mosaic_labels = [] + s = self.imgsz + hp, wp = -1, -1 # height, width previous + for i in range(9): + labels_patch = labels if i == 0 else labels['mix_labels'][i - 1] + # Load image + img = labels_patch['img'] + h, w = labels_patch.pop('resized_shape') + + # Place img in img9 + if i == 0: # center + img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + h0, w0 = h, w + c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates + elif i == 1: # top + c = s, s - h, s + w, s + elif i == 2: # top right + c = s + wp, s - h, s + wp + w, s + elif i == 3: # right + c = s + w0, s, s + w0 + w, s + h + elif i == 4: # bottom right + c = s + w0, s + hp, s + w0 + w, s + hp + h + elif i == 5: # bottom + c = s + w0 - w, s + h0, s + w0, s + h0 + h + elif i == 6: # bottom left + c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h + elif i == 7: # left + c = s - w, s + h0 - h, s, s + h0 + elif i == 8: # top left + c = s - w, s + h0 - hp - h, s, s + h0 - hp + + padw, padh = c[:2] + x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords + + # Image + img9[y1:y2, x1:x2] = img[y1 - padh:, x1 - padw:] # img9[ymin:ymax, xmin:xmax] + hp, wp = h, w # height, width previous for next iteration + + # Labels assuming imgsz*2 mosaic size + labels_patch = self._update_labels(labels_patch, padw + self.border[0], padh + self.border[1]) + mosaic_labels.append(labels_patch) + final_labels = self._cat_labels(mosaic_labels) + + final_labels['img'] = img9[-self.border[0]:self.border[0], -self.border[1]:self.border[1]] + return final_labels + + @staticmethod + def _update_labels(labels, padw, padh): + """Update labels.""" + nh, nw = labels['img'].shape[:2] + labels['instances'].convert_bbox(format='xyxy') + labels['instances'].denormalize(nw, nh) + labels['instances'].add_padding(padw, padh) + return labels + + def _cat_labels(self, mosaic_labels): + """Return labels with mosaic border instances clipped.""" + if len(mosaic_labels) == 0: + return {} + cls = [] + instances = [] + imgsz = self.imgsz * 2 # mosaic imgsz + for labels in mosaic_labels: + cls.append(labels['cls']) + instances.append(labels['instances']) + final_labels = { + 'im_file': mosaic_labels[0]['im_file'], + 'ori_shape': mosaic_labels[0]['ori_shape'], + 'resized_shape': (imgsz, imgsz), + 'cls': np.concatenate(cls, 0), + 'instances': Instances.concatenate(instances, axis=0), + 'mosaic_border': self.border} # final_labels + final_labels['instances'].clip(imgsz, imgsz) + good = final_labels['instances'].remove_zero_area_boxes() + final_labels['cls'] = final_labels['cls'][good] + return final_labels + + +class MixUp(BaseMixTransform): + """Class for applying MixUp augmentation to the dataset.""" + + def __init__(self, dataset, pre_transform=None, p=0.0) -> None: + """Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp.""" + super().__init__(dataset=dataset, pre_transform=pre_transform, p=p) + + def get_indexes(self): + """Get a random index from the dataset.""" + return random.randint(0, len(self.dataset) - 1) + + def _mix_transform(self, labels): + """Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf.""" + r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 + labels2 = labels['mix_labels'][0] + labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8) + labels['instances'] = Instances.concatenate([labels['instances'], labels2['instances']], axis=0) + labels['cls'] = np.concatenate([labels['cls'], labels2['cls']], 0) + return labels + + +class RandomPerspective: + """ + Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and + keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the + option to apply these transformations conditionally with a specified probability. + + Attributes: + degrees (float): Degree range for random rotations. + translate (float): Fraction of total width and height for random translation. + scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%. + shear (float): Shear intensity (angle in degrees). + perspective (float): Perspective distortion factor. + border (tuple): Tuple specifying mosaic border. + pre_transform (callable): A function/transform to apply to the image before starting the random transformation. + + Methods: + affine_transform(img, border): Applies a series of affine transformations to the image. + apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix. + apply_segments(segments, M): Transforms segments and generates new bounding boxes. + apply_keypoints(keypoints, M): Transforms keypoints. + __call__(labels): Main method to apply transformations to both images and their corresponding annotations. + box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation. + """ + + def __init__(self, + degrees=0.0, + translate=0.1, + scale=0.5, + shear=0.0, + perspective=0.0, + border=(0, 0), + pre_transform=None): + """Initializes RandomPerspective object with transformation parameters.""" + + self.degrees = degrees + self.translate = translate + self.scale = scale + self.shear = shear + self.perspective = perspective + self.border = border # mosaic border + self.pre_transform = pre_transform + + def affine_transform(self, img, border): + """ + Applies a sequence of affine transformations centered around the image center. + + Args: + img (ndarray): Input image. + border (tuple): Border dimensions. + + Returns: + img (ndarray): Transformed image. + M (ndarray): Transformation matrix. + s (float): Scale factor. + """ + + # Center + C = np.eye(3, dtype=np.float32) + + C[0, 2] = -img.shape[1] / 2 # x translation (pixels) + C[1, 2] = -img.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3, dtype=np.float32) + P[2, 0] = random.uniform(-self.perspective, self.perspective) # x perspective (about y) + P[2, 1] = random.uniform(-self.perspective, self.perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3, dtype=np.float32) + a = random.uniform(-self.degrees, self.degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - self.scale, 1 + self.scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3, dtype=np.float32) + S[0, 1] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3, dtype=np.float32) + T[0, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[0] # x translation (pixels) + T[1, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[1] # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + # Affine image + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if self.perspective: + img = cv2.warpPerspective(img, M, dsize=self.size, borderValue=(114, 114, 114)) + else: # affine + img = cv2.warpAffine(img, M[:2], dsize=self.size, borderValue=(114, 114, 114)) + return img, M, s + + def apply_bboxes(self, bboxes, M): + """ + Apply affine to bboxes only. + + Args: + bboxes (ndarray): list of bboxes, xyxy format, with shape (num_bboxes, 4). + M (ndarray): affine matrix. + + Returns: + new_bboxes (ndarray): bboxes after affine, [num_bboxes, 4]. + """ + n = len(bboxes) + if n == 0: + return bboxes + + xy = np.ones((n * 4, 3), dtype=bboxes.dtype) + xy[:, :2] = bboxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if self.perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine + + # Create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T + + def apply_segments(self, segments, M): + """ + Apply affine to segments and generate new bboxes from segments. + + Args: + segments (ndarray): list of segments, [num_samples, 500, 2]. + M (ndarray): affine matrix. + + Returns: + new_segments (ndarray): list of segments after affine, [num_samples, 500, 2]. + new_bboxes (ndarray): bboxes after affine, [N, 4]. + """ + n, num = segments.shape[:2] + if n == 0: + return [], segments + + xy = np.ones((n * num, 3), dtype=segments.dtype) + segments = segments.reshape(-1, 2) + xy[:, :2] = segments + xy = xy @ M.T # transform + xy = xy[:, :2] / xy[:, 2:3] + segments = xy.reshape(n, -1, 2) + bboxes = np.stack([segment2box(xy, self.size[0], self.size[1]) for xy in segments], 0) + return bboxes, segments + + def apply_keypoints(self, keypoints, M): + """ + Apply affine to keypoints. + + Args: + keypoints (ndarray): keypoints, [N, 17, 3]. + M (ndarray): affine matrix. + + Returns: + new_keypoints (ndarray): keypoints after affine, [N, 17, 3]. + """ + n, nkpt = keypoints.shape[:2] + if n == 0: + return keypoints + xy = np.ones((n * nkpt, 3), dtype=keypoints.dtype) + visible = keypoints[..., 2].reshape(n * nkpt, 1) + xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2) + xy = xy @ M.T # transform + xy = xy[:, :2] / xy[:, 2:3] # perspective rescale or affine + out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1]) + visible[out_mask] = 0 + return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3) + + def __call__(self, labels): + """ + Affine images and targets. + + Args: + labels (dict): a dict of `bboxes`, `segments`, `keypoints`. + """ + if self.pre_transform and 'mosaic_border' not in labels: + labels = self.pre_transform(labels) + labels.pop('ratio_pad', None) # do not need ratio pad + + img = labels['img'] + cls = labels['cls'] + instances = labels.pop('instances') + # Make sure the coord formats are right + instances.convert_bbox(format='xyxy') + instances.denormalize(*img.shape[:2][::-1]) + + border = labels.pop('mosaic_border', self.border) + self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2 # w, h + # M is affine matrix + # Scale for func:`box_candidates` + img, M, scale = self.affine_transform(img, border) + + bboxes = self.apply_bboxes(instances.bboxes, M) + + segments = instances.segments + keypoints = instances.keypoints + # Update bboxes if there are segments. + if len(segments): + bboxes, segments = self.apply_segments(segments, M) + + if keypoints is not None: + keypoints = self.apply_keypoints(keypoints, M) + new_instances = Instances(bboxes, segments, keypoints, bbox_format='xyxy', normalized=False) + # Clip + new_instances.clip(*self.size) + + # Filter instances + instances.scale(scale_w=scale, scale_h=scale, bbox_only=True) + # Make the bboxes have the same scale with new_bboxes + i = self.box_candidates(box1=instances.bboxes.T, + box2=new_instances.bboxes.T, + area_thr=0.01 if len(segments) else 0.10) + labels['instances'] = new_instances[i] + labels['cls'] = cls[i] + labels['img'] = img + labels['resized_shape'] = img.shape[:2] + return labels + + def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): + """ + Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes + before and after augmentation to decide whether a box is a candidate for further processing. + + Args: + box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2]. + box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2]. + wh_thr (float, optional): The width and height threshold in pixels. Default is 2. + ar_thr (float, optional): The aspect ratio threshold. Default is 100. + area_thr (float, optional): The area ratio threshold. Default is 0.1. + eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16. + + Returns: + (numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds. + """ + w1, h1 = box1[2] - box1[0], box1[3] - box1[1] + w2, h2 = box2[2] - box2[0], box2[3] - box2[1] + ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio + return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates + + +class RandomHSV: + """ + This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an + image. + + The adjustments are random but within limits set by hgain, sgain, and vgain. + """ + + def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None: + """ + Initialize RandomHSV class with gains for each HSV channel. + + Args: + hgain (float, optional): Maximum variation for hue. Default is 0.5. + sgain (float, optional): Maximum variation for saturation. Default is 0.5. + vgain (float, optional): Maximum variation for value. Default is 0.5. + """ + self.hgain = hgain + self.sgain = sgain + self.vgain = vgain + + def __call__(self, labels): + """ + Applies random HSV augmentation to an image within the predefined limits. + + The modified image replaces the original image in the input 'labels' dict. + """ + img = labels['img'] + if self.hgain or self.sgain or self.vgain: + r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains + hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) + dtype = img.dtype # uint8 + + x = np.arange(0, 256, dtype=r.dtype) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + + im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) + cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed + return labels + + +class RandomFlip: + """ + Applies a random horizontal or vertical flip to an image with a given probability. + + Also updates any instances (bounding boxes, keypoints, etc.) accordingly. + """ + + def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None: + """ + Initializes the RandomFlip class with probability and direction. + + Args: + p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5. + direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'. + Default is 'horizontal'. + flip_idx (array-like, optional): Index mapping for flipping keypoints, if any. + """ + assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}' + assert 0 <= p <= 1.0 + + self.p = p + self.direction = direction + self.flip_idx = flip_idx + + def __call__(self, labels): + """ + Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly. + + Args: + labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped. + 'instances' is an object containing bounding boxes and optionally keypoints. + + Returns: + (dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys. + """ + img = labels['img'] + instances = labels.pop('instances') + instances.convert_bbox(format='xywh') + h, w = img.shape[:2] + h = 1 if instances.normalized else h + w = 1 if instances.normalized else w + + # Flip up-down + if self.direction == 'vertical' and random.random() < self.p: + img = np.flipud(img) + instances.flipud(h) + if self.direction == 'horizontal' and random.random() < self.p: + img = np.fliplr(img) + instances.fliplr(w) + # For keypoints + if self.flip_idx is not None and instances.keypoints is not None: + instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :]) + labels['img'] = np.ascontiguousarray(img) + labels['instances'] = instances + return labels + + +class LetterBox: + """Resize image and padding for detection, instance segmentation, pose.""" + + def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, center=True, stride=32): + """Initialize LetterBox object with specific parameters.""" + self.new_shape = new_shape + self.auto = auto + self.scaleFill = scaleFill + self.scaleup = scaleup + self.stride = stride + self.center = center # Put the image in the middle or top-left + + def __call__(self, labels=None, image=None): + """Return updated labels and image with added border.""" + if labels is None: + labels = {} + img = labels.get('img') if image is None else image + shape = img.shape[:2] # current shape [height, width] + new_shape = labels.pop('rect_shape', self.new_shape) + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not self.scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if self.auto: # minimum rectangle + dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh padding + elif self.scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + if self.center: + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, + value=(114, 114, 114)) # add border + if labels.get('ratio_pad'): + labels['ratio_pad'] = (labels['ratio_pad'], (left, top)) # for evaluation + + if len(labels): + labels = self._update_labels(labels, ratio, dw, dh) + labels['img'] = img + labels['resized_shape'] = new_shape + return labels + else: + return img + + def _update_labels(self, labels, ratio, padw, padh): + """Update labels.""" + labels['instances'].convert_bbox(format='xyxy') + labels['instances'].denormalize(*labels['img'].shape[:2][::-1]) + labels['instances'].scale(*ratio) + labels['instances'].add_padding(padw, padh) + return labels + + +class CopyPaste: + """ + Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is + responsible for applying the Copy-Paste augmentation on images and their corresponding instances. + """ + + def __init__(self, p=0.5) -> None: + """ + Initializes the CopyPaste class with a given probability. + + Args: + p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1. + Default is 0.5. + """ + self.p = p + + def __call__(self, labels): + """ + Applies the Copy-Paste augmentation to the given image and instances. + + Args: + labels (dict): A dictionary containing: + - 'img': The image to augment. + - 'cls': Class labels associated with the instances. + - 'instances': Object containing bounding boxes, and optionally, keypoints and segments. + + Returns: + (dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys. + + Notes: + 1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work. + 2. This method modifies the input dictionary 'labels' in place. + """ + im = labels['img'] + cls = labels['cls'] + h, w = im.shape[:2] + instances = labels.pop('instances') + instances.convert_bbox(format='xyxy') + instances.denormalize(w, h) + if self.p and len(instances.segments): + n = len(instances) + _, w, _ = im.shape # height, width, channels + im_new = np.zeros(im.shape, np.uint8) + + # Calculate ioa first then select indexes randomly + ins_flip = deepcopy(instances) + ins_flip.fliplr(w) + + ioa = bbox_ioa(ins_flip.bboxes, instances.bboxes) # intersection over area, (N, M) + indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) + n = len(indexes) + for j in random.sample(list(indexes), k=round(self.p * n)): + cls = np.concatenate((cls, cls[[j]]), axis=0) + instances = Instances.concatenate((instances, ins_flip[[j]]), axis=0) + cv2.drawContours(im_new, instances.segments[[j]].astype(np.int32), -1, (1, 1, 1), cv2.FILLED) + + result = cv2.flip(im, 1) # augment segments (flip left-right) + i = cv2.flip(im_new, 1).astype(bool) + im[i] = result[i] + + labels['img'] = im + labels['cls'] = cls + labels['instances'] = instances + return labels + + +class Albumentations: + """ + Albumentations transformations. + + Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive + Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by + compression. + """ + + def __init__(self, p=1.0): + """Initialize the transform object for YOLO bbox formatted params.""" + self.p = p + self.transform = None + prefix = colorstr('albumentations: ') + try: + import albumentations as A + + check_version(A.__version__, '1.0.3', hard=True) # version requirement + + T = [ + A.Blur(p=0.01), + A.MedianBlur(p=0.01), + A.ToGray(p=0.01), + A.CLAHE(p=0.01), + A.RandomBrightnessContrast(p=0.0), + A.RandomGamma(p=0.0), + A.ImageCompression(quality_lower=75, p=0.0)] # transforms + self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])) + + LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) + except ImportError: # package not installed, skip + pass + except Exception as e: + LOGGER.info(f'{prefix}{e}') + + def __call__(self, labels): + """Generates object detections and returns a dictionary with detection results.""" + im = labels['img'] + cls = labels['cls'] + if len(cls): + labels['instances'].convert_bbox('xywh') + labels['instances'].normalize(*im.shape[:2][::-1]) + bboxes = labels['instances'].bboxes + # TODO: add supports of segments and keypoints + if self.transform and random.random() < self.p: + new = self.transform(image=im, bboxes=bboxes, class_labels=cls) # transformed + if len(new['class_labels']) > 0: # skip update if no bbox in new im + labels['img'] = new['image'] + labels['cls'] = np.array(new['class_labels']) + bboxes = np.array(new['bboxes'], dtype=np.float32) + labels['instances'].update(bboxes=bboxes) + return labels + + +# TODO: technically this is not an augmentation, maybe we should put this to another files +class Format: + """ + Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class + standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader. + + Attributes: + bbox_format (str): Format for bounding boxes. Default is 'xywh'. + normalize (bool): Whether to normalize bounding boxes. Default is True. + return_mask (bool): Return instance masks for segmentation. Default is False. + return_keypoint (bool): Return keypoints for pose estimation. Default is False. + mask_ratio (int): Downsample ratio for masks. Default is 4. + mask_overlap (bool): Whether to overlap masks. Default is True. + batch_idx (bool): Keep batch indexes. Default is True. + """ + + def __init__(self, + bbox_format='xywh', + normalize=True, + return_mask=False, + return_keypoint=False, + mask_ratio=4, + mask_overlap=True, + batch_idx=True): + """Initializes the Format class with given parameters.""" + self.bbox_format = bbox_format + self.normalize = normalize + self.return_mask = return_mask # set False when training detection only + self.return_keypoint = return_keypoint + self.mask_ratio = mask_ratio + self.mask_overlap = mask_overlap + self.batch_idx = batch_idx # keep the batch indexes + + def __call__(self, labels): + """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'.""" + img = labels.pop('img') + h, w = img.shape[:2] + cls = labels.pop('cls') + instances = labels.pop('instances') + instances.convert_bbox(format=self.bbox_format) + instances.denormalize(w, h) + nl = len(instances) + + if self.return_mask: + if nl: + masks, instances, cls = self._format_segments(instances, cls, w, h) + masks = torch.from_numpy(masks) + else: + masks = torch.zeros(1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, + img.shape[1] // self.mask_ratio) + labels['masks'] = masks + if self.normalize: + instances.normalize(w, h) + labels['img'] = self._format_img(img) + labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl) + labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4)) + if self.return_keypoint: + labels['keypoints'] = torch.from_numpy(instances.keypoints) + # Then we can use collate_fn + if self.batch_idx: + labels['batch_idx'] = torch.zeros(nl) + return labels + + def _format_img(self, img): + """Format the image for YOLO from Numpy array to PyTorch tensor.""" + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1]) + img = torch.from_numpy(img) + return img + + def _format_segments(self, instances, cls, w, h): + """Convert polygon points to bitmap.""" + segments = instances.segments + if self.mask_overlap: + masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio) + masks = masks[None] # (640, 640) -> (1, 640, 640) + instances = instances[sorted_idx] + cls = cls[sorted_idx] + else: + masks = polygons2masks((h, w), segments, color=1, downsample_ratio=self.mask_ratio) + + return masks, instances, cls + + +def v8_transforms(dataset, imgsz, hyp, stretch=False): + """Convert images to a size suitable for YOLOv8 training.""" + pre_transform = Compose([ + Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic), + CopyPaste(p=hyp.copy_paste), + RandomPerspective( + degrees=hyp.degrees, + translate=hyp.translate, + scale=hyp.scale, + shear=hyp.shear, + perspective=hyp.perspective, + pre_transform=None if stretch else LetterBox(new_shape=(imgsz, imgsz)), + )]) + flip_idx = dataset.data.get('flip_idx', []) # for keypoints augmentation + if dataset.use_keypoints: + kpt_shape = dataset.data.get('kpt_shape', None) + if len(flip_idx) == 0 and hyp.fliplr > 0.0: + hyp.fliplr = 0.0 + LOGGER.warning("WARNING ⚠️ No 'flip_idx' array defined in data.yaml, setting augmentation 'fliplr=0.0'") + elif flip_idx and (len(flip_idx) != kpt_shape[0]): + raise ValueError(f'data.yaml flip_idx={flip_idx} length must be equal to kpt_shape[0]={kpt_shape[0]}') + + return Compose([ + pre_transform, + MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup), + Albumentations(p=1.0), + RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v), + RandomFlip(direction='vertical', p=hyp.flipud), + RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)]) # transforms + + +# Classification augmentations ----------------------------------------------------------------------------------------- +def classify_transforms(size=224, rect=False, mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)): # IMAGENET_MEAN, IMAGENET_STD + """Transforms to apply if albumentations not installed.""" + if not isinstance(size, int): + raise TypeError(f'classify_transforms() size {size} must be integer, not (list, tuple)') + transforms = [ClassifyLetterBox(size, auto=True) if rect else CenterCrop(size), ToTensor()] + if any(mean) or any(std): + transforms.append(T.Normalize(mean, std, inplace=True)) + return T.Compose(transforms) + + +def hsv2colorjitter(h, s, v): + """Map HSV (hue, saturation, value) jitter into ColorJitter values (brightness, contrast, saturation, hue)""" + return v, v, s, h + + +def classify_albumentations( + augment=True, + size=224, + scale=(0.08, 1.0), + hflip=0.5, + vflip=0.0, + hsv_h=0.015, # image HSV-Hue augmentation (fraction) + hsv_s=0.7, # image HSV-Saturation augmentation (fraction) + hsv_v=0.4, # image HSV-Value augmentation (fraction) + mean=(0.0, 0.0, 0.0), # IMAGENET_MEAN + std=(1.0, 1.0, 1.0), # IMAGENET_STD + auto_aug=False, +): + """YOLOv8 classification Albumentations (optional, only used if package is installed).""" + prefix = colorstr('albumentations: ') + try: + import albumentations as A + from albumentations.pytorch import ToTensorV2 + + check_version(A.__version__, '1.0.3', hard=True) # version requirement + if augment: # Resize and crop + T = [A.RandomResizedCrop(height=size, width=size, scale=scale)] + if auto_aug: + # TODO: implement AugMix, AutoAug & RandAug in albumentations + LOGGER.info(f'{prefix}auto augmentations are currently not supported') + else: + if hflip > 0: + T += [A.HorizontalFlip(p=hflip)] + if vflip > 0: + T += [A.VerticalFlip(p=vflip)] + if any((hsv_h, hsv_s, hsv_v)): + T += [A.ColorJitter(*hsv2colorjitter(hsv_h, hsv_s, hsv_v))] # brightness, contrast, saturation, hue + else: # Use fixed crop for eval set (reproducibility) + T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)] + T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor + LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) + return A.Compose(T) + + except ImportError: # package not installed, skip + pass + except Exception as e: + LOGGER.info(f'{prefix}{e}') + + +class ClassifyLetterBox: + """ + YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g., + T.Compose([LetterBox(size), ToTensor()]). + + Attributes: + h (int): Target height of the image. + w (int): Target width of the image. + auto (bool): If True, automatically solves for short side using stride. + stride (int): The stride value, used when 'auto' is True. + """ + + def __init__(self, size=(640, 640), auto=False, stride=32): + """ + Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride. + + Args: + size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox. + auto (bool): If True, automatically calculates the short side based on stride. + stride (int): The stride value, used when 'auto' is True. + """ + super().__init__() + self.h, self.w = (size, size) if isinstance(size, int) else size + self.auto = auto # pass max size integer, automatically solve for short side using stride + self.stride = stride # used with auto + + def __call__(self, im): + """ + Resizes the image and pads it with a letterbox method. + + Args: + im (numpy.ndarray): The input image as a numpy array of shape HWC. + + Returns: + (numpy.ndarray): The letterboxed and resized image as a numpy array. + """ + imh, imw = im.shape[:2] + r = min(self.h / imh, self.w / imw) # ratio of new/old dimensions + h, w = round(imh * r), round(imw * r) # resized image dimensions + + # Calculate padding dimensions + hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w) + top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1) + + # Create padded image + im_out = np.full((hs, ws, 3), 114, dtype=im.dtype) + im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) + return im_out + + +class CenterCrop: + """YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g., + T.Compose([CenterCrop(size), ToTensor()]). + """ + + def __init__(self, size=640): + """Converts an image from numpy array to PyTorch tensor.""" + super().__init__() + self.h, self.w = (size, size) if isinstance(size, int) else size + + def __call__(self, im): + """ + Resizes and crops the center of the image using a letterbox method. + + Args: + im (numpy.ndarray): The input image as a numpy array of shape HWC. + + Returns: + (numpy.ndarray): The center-cropped and resized image as a numpy array. + """ + imh, imw = im.shape[:2] + m = min(imh, imw) # min dimension + top, left = (imh - m) // 2, (imw - m) // 2 + return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR) + + +class ToTensor: + """YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()]).""" + + def __init__(self, half=False): + """Initialize YOLOv8 ToTensor object with optional half-precision support.""" + super().__init__() + self.half = half + + def __call__(self, im): + """ + Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization. + + Args: + im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order. + + Returns: + (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1]. + """ + im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous + im = torch.from_numpy(im) # to torch + im = im.half() if self.half else im.float() # uint8 to fp16/32 + im /= 255.0 # 0-255 to 0.0-1.0 + return im diff --git a/ultralytics/data/base.py b/ultralytics/data/base.py new file mode 100644 index 0000000..1df546b --- /dev/null +++ b/ultralytics/data/base.py @@ -0,0 +1,304 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import glob +import math +import os +import random +from copy import deepcopy +from multiprocessing.pool import ThreadPool +from pathlib import Path +from typing import Optional + +import cv2 +import numpy as np +import psutil +from torch.utils.data import Dataset + +from ultralytics.utils import DEFAULT_CFG, LOCAL_RANK, LOGGER, NUM_THREADS, TQDM + +from .utils import HELP_URL, IMG_FORMATS + + +class BaseDataset(Dataset): + """ + Base dataset class for loading and processing image data. + + Args: + img_path (str): Path to the folder containing images. + imgsz (int, optional): Image size. Defaults to 640. + cache (bool, optional): Cache images to RAM or disk during training. Defaults to False. + augment (bool, optional): If True, data augmentation is applied. Defaults to True. + hyp (dict, optional): Hyperparameters to apply data augmentation. Defaults to None. + prefix (str, optional): Prefix to print in log messages. Defaults to ''. + rect (bool, optional): If True, rectangular training is used. Defaults to False. + batch_size (int, optional): Size of batches. Defaults to None. + stride (int, optional): Stride. Defaults to 32. + pad (float, optional): Padding. Defaults to 0.0. + single_cls (bool, optional): If True, single class training is used. Defaults to False. + classes (list): List of included classes. Default is None. + fraction (float): Fraction of dataset to utilize. Default is 1.0 (use all data). + + Attributes: + im_files (list): List of image file paths. + labels (list): List of label data dictionaries. + ni (int): Number of images in the dataset. + ims (list): List of loaded images. + npy_files (list): List of numpy file paths. + transforms (callable): Image transformation function. + """ + + def __init__(self, + img_path, + imgsz=640, + cache=False, + augment=True, + hyp=DEFAULT_CFG, + prefix='', + rect=False, + batch_size=16, + stride=32, + pad=0.5, + single_cls=False, + classes=None, + fraction=1.0): + """Initialize BaseDataset with given configuration and options.""" + super().__init__() + self.img_path = img_path + self.imgsz = imgsz + self.augment = augment + self.single_cls = single_cls + self.prefix = prefix + self.fraction = fraction + self.im_files = self.get_img_files(self.img_path) + self.labels = self.get_labels() + self.update_labels(include_class=classes) # single_cls and include_class + self.ni = len(self.labels) # number of images + self.rect = rect + self.batch_size = batch_size + self.stride = stride + self.pad = pad + if self.rect: + assert self.batch_size is not None + self.set_rectangle() + + # Buffer thread for mosaic images + self.buffer = [] # buffer size = batch size + self.max_buffer_length = min((self.ni, self.batch_size * 8, 1000)) if self.augment else 0 + + # Cache images + if cache == 'ram' and not self.check_cache_ram(): + cache = False + self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni + self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files] + if cache: + self.cache_images(cache) + + # Transforms + self.transforms = self.build_transforms(hyp=hyp) + + def get_img_files(self, img_path): + """Read image files.""" + try: + f = [] # image files + for p in img_path if isinstance(img_path, list) else [img_path]: + p = Path(p) # os-agnostic + if p.is_dir(): # dir + f += glob.glob(str(p / '**' / '*.*'), recursive=True) + # F = list(p.rglob('*.*')) # pathlib + elif p.is_file(): # file + with open(p) as t: + t = t.read().strip().splitlines() + parent = str(p.parent) + os.sep + f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + # F += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) + else: + raise FileNotFoundError(f'{self.prefix}{p} does not exist') + im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) + # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib + assert im_files, f'{self.prefix}No images found in {img_path}' + except Exception as e: + raise FileNotFoundError(f'{self.prefix}Error loading data from {img_path}\n{HELP_URL}') from e + if self.fraction < 1: + im_files = im_files[:round(len(im_files) * self.fraction)] + return im_files + + def update_labels(self, include_class: Optional[list]): + """Update labels to include only these classes (optional).""" + include_class_array = np.array(include_class).reshape(1, -1) + for i in range(len(self.labels)): + if include_class is not None: + cls = self.labels[i]['cls'] + bboxes = self.labels[i]['bboxes'] + segments = self.labels[i]['segments'] + keypoints = self.labels[i]['keypoints'] + j = (cls == include_class_array).any(1) + self.labels[i]['cls'] = cls[j] + self.labels[i]['bboxes'] = bboxes[j] + if segments: + self.labels[i]['segments'] = [segments[si] for si, idx in enumerate(j) if idx] + if keypoints is not None: + self.labels[i]['keypoints'] = keypoints[j] + if self.single_cls: + self.labels[i]['cls'][:, 0] = 0 + + def load_image(self, i, rect_mode=True): + """Loads 1 image from dataset index 'i', returns (im, resized hw).""" + im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i] + if im is None: # not cached in RAM + if fn.exists(): # load npy + try: + im = np.load(fn) + except Exception as e: + LOGGER.warning(f'{self.prefix}WARNING ⚠️ Removing corrupt *.npy image file {fn} due to: {e}') + Path(fn).unlink(missing_ok=True) + im = cv2.imread(f) # BGR + else: # read image + im = cv2.imread(f) # BGR + if im is None: + raise FileNotFoundError(f'Image Not Found {f}') + + h0, w0 = im.shape[:2] # orig hw + if rect_mode: # resize long side to imgsz while maintaining aspect ratio + r = self.imgsz / max(h0, w0) # ratio + if r != 1: # if sizes are not equal + w, h = (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz)) + im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) + elif not (h0 == w0 == self.imgsz): # resize by stretching image to square imgsz + im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR) + + # Add to buffer if training with augmentations + if self.augment: + self.ims[i], self.im_hw0[i], self.im_hw[i] = im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized + self.buffer.append(i) + if len(self.buffer) >= self.max_buffer_length: + j = self.buffer.pop(0) + self.ims[j], self.im_hw0[j], self.im_hw[j] = None, None, None + + return im, (h0, w0), im.shape[:2] + + return self.ims[i], self.im_hw0[i], self.im_hw[i] + + def cache_images(self, cache): + """Cache images to memory or disk.""" + b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes + fcn = self.cache_images_to_disk if cache == 'disk' else self.load_image + with ThreadPool(NUM_THREADS) as pool: + results = pool.imap(fcn, range(self.ni)) + pbar = TQDM(enumerate(results), total=self.ni, disable=LOCAL_RANK > 0) + for i, x in pbar: + if cache == 'disk': + b += self.npy_files[i].stat().st_size + else: # 'ram' + self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i) + b += self.ims[i].nbytes + pbar.desc = f'{self.prefix}Caching images ({b / gb:.1f}GB {cache})' + pbar.close() + + def cache_images_to_disk(self, i): + """Saves an image as an *.npy file for faster loading.""" + f = self.npy_files[i] + if not f.exists(): + np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False) + + def check_cache_ram(self, safety_margin=0.5): + """Check image caching requirements vs available memory.""" + b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes + n = min(self.ni, 30) # extrapolate from 30 random images + for _ in range(n): + im = cv2.imread(random.choice(self.im_files)) # sample image + ratio = self.imgsz / max(im.shape[0], im.shape[1]) # max(h, w) # ratio + b += im.nbytes * ratio ** 2 + mem_required = b * self.ni / n * (1 + safety_margin) # GB required to cache dataset into RAM + mem = psutil.virtual_memory() + cache = mem_required < mem.available # to cache or not to cache, that is the question + if not cache: + LOGGER.info(f'{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images ' + f'with {int(safety_margin * 100)}% safety margin but only ' + f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, ' + f"{'caching images ✅' if cache else 'not caching images ⚠️'}") + return cache + + def set_rectangle(self): + """Sets the shape of bounding boxes for YOLO detections as rectangles.""" + bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int) # batch index + nb = bi[-1] + 1 # number of batches + + s = np.array([x.pop('shape') for x in self.labels]) # hw + ar = s[:, 0] / s[:, 1] # aspect ratio + irect = ar.argsort() + self.im_files = [self.im_files[i] for i in irect] + self.labels = [self.labels[i] for i in irect] + ar = ar[irect] + + # Set training image shapes + shapes = [[1, 1]] * nb + for i in range(nb): + ari = ar[bi == i] + mini, maxi = ari.min(), ari.max() + if maxi < 1: + shapes[i] = [maxi, 1] + elif mini > 1: + shapes[i] = [1, 1 / mini] + + self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride + self.batch = bi # batch index of image + + def __getitem__(self, index): + """Returns transformed label information for given index.""" + return self.transforms(self.get_image_and_label(index)) + + def get_image_and_label(self, index): + """Get and return label information from the dataset.""" + label = deepcopy(self.labels[index]) # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948 + label.pop('shape', None) # shape is for rect, remove it + label['img'], label['ori_shape'], label['resized_shape'] = self.load_image(index) + label['ratio_pad'] = (label['resized_shape'][0] / label['ori_shape'][0], + label['resized_shape'][1] / label['ori_shape'][1]) # for evaluation + if self.rect: + label['rect_shape'] = self.batch_shapes[self.batch[index]] + return self.update_labels_info(label) + + def __len__(self): + """Returns the length of the labels list for the dataset.""" + return len(self.labels) + + def update_labels_info(self, label): + """Custom your label format here.""" + return label + + def build_transforms(self, hyp=None): + """ + Users can customize augmentations here. + + Example: + ```python + if self.augment: + # Training transforms + return Compose([]) + else: + # Val transforms + return Compose([]) + ``` + """ + raise NotImplementedError + + def get_labels(self): + """ + Users can customize their own format here. + + Note: + Ensure output is a dictionary with the following keys: + ```python + dict( + im_file=im_file, + shape=shape, # format: (height, width) + cls=cls, + bboxes=bboxes, # xywh + segments=segments, # xy + keypoints=keypoints, # xy + normalized=True, # or False + bbox_format="xyxy", # or xywh, ltwh + ) + ``` + """ + raise NotImplementedError diff --git a/ultralytics/data/build.py b/ultralytics/data/build.py new file mode 100644 index 0000000..07de91c --- /dev/null +++ b/ultralytics/data/build.py @@ -0,0 +1,177 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import os +import random +from pathlib import Path + +import numpy as np +import torch +from PIL import Image +from torch.utils.data import dataloader, distributed + +from ultralytics.data.loaders import (LOADERS, LoadImages, LoadPilAndNumpy, LoadScreenshots, LoadStreams, LoadTensor, + SourceTypes, autocast_list) +from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS +from ultralytics.utils import RANK, colorstr +from ultralytics.utils.checks import check_file + +from .dataset import YOLODataset +from .utils import PIN_MEMORY + + +class InfiniteDataLoader(dataloader.DataLoader): + """ + Dataloader that reuses workers. + + Uses same syntax as vanilla DataLoader. + """ + + def __init__(self, *args, **kwargs): + """Dataloader that infinitely recycles workers, inherits from DataLoader.""" + super().__init__(*args, **kwargs) + object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) + self.iterator = super().__iter__() + + def __len__(self): + """Returns the length of the batch sampler's sampler.""" + return len(self.batch_sampler.sampler) + + def __iter__(self): + """Creates a sampler that repeats indefinitely.""" + for _ in range(len(self)): + yield next(self.iterator) + + def reset(self): + """ + Reset iterator. + + This is useful when we want to modify settings of dataset while training. + """ + self.iterator = self._get_iterator() + + +class _RepeatSampler: + """ + Sampler that repeats forever. + + Args: + sampler (Dataset.sampler): The sampler to repeat. + """ + + def __init__(self, sampler): + """Initializes an object that repeats a given sampler indefinitely.""" + self.sampler = sampler + + def __iter__(self): + """Iterates over the 'sampler' and yields its contents.""" + while True: + yield from iter(self.sampler) + + +def seed_worker(worker_id): # noqa + """Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader.""" + worker_seed = torch.initial_seed() % 2 ** 32 + np.random.seed(worker_seed) + random.seed(worker_seed) + + +def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32): + """Build YOLO Dataset.""" + return YOLODataset( + img_path=img_path, + imgsz=cfg.imgsz, + batch_size=batch, + augment=mode == 'train', # augmentation + hyp=cfg, # TODO: probably add a get_hyps_from_cfg function + rect=cfg.rect or rect, # rectangular batches + cache=cfg.cache or None, + single_cls=cfg.single_cls or False, + stride=int(stride), + pad=0.0 if mode == 'train' else 0.5, + prefix=colorstr(f'{mode}: '), + use_segments=cfg.task == 'segment', + use_keypoints=cfg.task == 'pose', + classes=cfg.classes, + data=data, + fraction=cfg.fraction if mode == 'train' else 1.0) + + +def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1): + """Return an InfiniteDataLoader or DataLoader for training or validation set.""" + batch = min(batch, len(dataset)) + nd = torch.cuda.device_count() # number of CUDA devices + nw = min([os.cpu_count() // max(nd, 1), batch if batch > 1 else 0, workers]) # number of workers + sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) + generator = torch.Generator() + generator.manual_seed(6148914691236517205 + RANK) + return InfiniteDataLoader(dataset=dataset, + batch_size=batch, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=PIN_MEMORY, + collate_fn=getattr(dataset, 'collate_fn', None), + worker_init_fn=seed_worker, + generator=generator) + + +def check_source(source): + """Check source type and return corresponding flag values.""" + webcam, screenshot, from_img, in_memory, tensor = False, False, False, False, False + if isinstance(source, (str, int, Path)): # int for local usb camera + source = str(source) + is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) + is_url = source.lower().startswith(('https://', 'http://', 'rtsp://', 'rtmp://', 'tcp://')) + webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file) + screenshot = source.lower() == 'screen' + if is_url and is_file: + source = check_file(source) # download + elif isinstance(source, LOADERS): + in_memory = True + elif isinstance(source, (list, tuple)): + source = autocast_list(source) # convert all list elements to PIL or np arrays + from_img = True + elif isinstance(source, (Image.Image, np.ndarray)): + from_img = True + elif isinstance(source, torch.Tensor): + tensor = True + else: + raise TypeError('Unsupported image type. For supported types see https://docs.ultralytics.com/modes/predict') + + return source, webcam, screenshot, from_img, in_memory, tensor + + +def load_inference_source(source=None, imgsz=640, vid_stride=1, buffer=False): + """ + Loads an inference source for object detection and applies necessary transformations. + + Args: + source (str, Path, Tensor, PIL.Image, np.ndarray): The input source for inference. + imgsz (int, optional): The size of the image for inference. Default is 640. + vid_stride (int, optional): The frame interval for video sources. Default is 1. + buffer (bool, optional): Determined whether stream frames will be buffered. Default is False. + + Returns: + dataset (Dataset): A dataset object for the specified input source. + """ + source, webcam, screenshot, from_img, in_memory, tensor = check_source(source) + source_type = source.source_type if in_memory else SourceTypes(webcam, screenshot, from_img, tensor) + + # Dataloader + if tensor: + dataset = LoadTensor(source) + elif in_memory: + dataset = source + elif webcam: + dataset = LoadStreams(source, imgsz=imgsz, vid_stride=vid_stride, buffer=buffer) + elif screenshot: + dataset = LoadScreenshots(source, imgsz=imgsz) + elif from_img: + dataset = LoadPilAndNumpy(source, imgsz=imgsz) + else: + dataset = LoadImages(source, imgsz=imgsz, vid_stride=vid_stride) + + # Attach source types to the dataset + setattr(dataset, 'source_type', source_type) + + return dataset diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py new file mode 100644 index 0000000..5714320 --- /dev/null +++ b/ultralytics/data/converter.py @@ -0,0 +1,304 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import json +from collections import defaultdict +from pathlib import Path + +import cv2 +import numpy as np + +from ultralytics.utils import LOGGER, TQDM +from ultralytics.utils.files import increment_path + + +def coco91_to_coco80_class(): + """ + Converts 91-index COCO class IDs to 80-index COCO class IDs. + + Returns: + (list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the + corresponding 91-index class ID. + """ + return [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None, + None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + None, 73, 74, 75, 76, 77, 78, 79, None] + + +def coco80_to_coco91_class(): + """ + Converts 80-index (val2014) to 91-index (paper). + For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/. + + Example: + ```python + import numpy as np + + a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') + b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') + x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco + x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet + ``` + """ + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + + +def convert_coco(labels_dir='../coco/annotations/', + save_dir='coco_converted/', + use_segments=False, + use_keypoints=False, + cls91to80=True): + """ + Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models. + + Args: + labels_dir (str, optional): Path to directory containing COCO dataset annotation files. + save_dir (str, optional): Path to directory to save results to. + use_segments (bool, optional): Whether to include segmentation masks in the output. + use_keypoints (bool, optional): Whether to include keypoint annotations in the output. + cls91to80 (bool, optional): Whether to map 91 COCO class IDs to the corresponding 80 COCO class IDs. + + Example: + ```python + from ultralytics.data.converter import convert_coco + + convert_coco('../datasets/coco/annotations/', use_segments=True, use_keypoints=False, cls91to80=True) + ``` + + Output: + Generates output files in the specified output directory. + """ + + # Create dataset directory + save_dir = increment_path(save_dir) # increment if save directory already exists + for p in save_dir / 'labels', save_dir / 'images': + p.mkdir(parents=True, exist_ok=True) # make dir + + # Convert classes + coco80 = coco91_to_coco80_class() + + # Import json + for json_file in sorted(Path(labels_dir).resolve().glob('*.json')): + fn = Path(save_dir) / 'labels' / json_file.stem.replace('instances_', '') # folder name + fn.mkdir(parents=True, exist_ok=True) + with open(json_file) as f: + data = json.load(f) + + # Create image dict + images = {f'{x["id"]:d}': x for x in data['images']} + # Create image-annotations dict + imgToAnns = defaultdict(list) + for ann in data['annotations']: + imgToAnns[ann['image_id']].append(ann) + + # Write labels file + for img_id, anns in TQDM(imgToAnns.items(), desc=f'Annotations {json_file}'): + img = images[f'{img_id:d}'] + h, w, f = img['height'], img['width'], img['file_name'] + + bboxes = [] + segments = [] + keypoints = [] + for ann in anns: + if ann['iscrowd']: + continue + # The COCO box format is [top left x, top left y, width, height] + box = np.array(ann['bbox'], dtype=np.float64) + box[:2] += box[2:] / 2 # xy top-left corner to center + box[[0, 2]] /= w # normalize x + box[[1, 3]] /= h # normalize y + if box[2] <= 0 or box[3] <= 0: # if w <= 0 and h <= 0 + continue + + cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1 # class + box = [cls] + box.tolist() + if box not in bboxes: + bboxes.append(box) + if use_segments and ann.get('segmentation') is not None: + if len(ann['segmentation']) == 0: + segments.append([]) + continue + elif len(ann['segmentation']) > 1: + s = merge_multi_segment(ann['segmentation']) + s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist() + else: + s = [j for i in ann['segmentation'] for j in i] # all segments concatenated + s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist() + s = [cls] + s + segments.append(s) + if use_keypoints and ann.get('keypoints') is not None: + keypoints.append(box + (np.array(ann['keypoints']).reshape(-1, 3) / + np.array([w, h, 1])).reshape(-1).tolist()) + + # Write + with open((fn / f).with_suffix('.txt'), 'a') as file: + for i in range(len(bboxes)): + if use_keypoints: + line = *(keypoints[i]), # cls, box, keypoints + else: + line = *(segments[i] + if use_segments and len(segments[i]) > 0 else bboxes[i]), # cls, box or segments + file.write(('%g ' * len(line)).rstrip() % line + '\n') + + LOGGER.info(f'COCO data converted successfully.\nResults saved to {save_dir.resolve()}') + + +def convert_dota_to_yolo_obb(dota_root_path: str): + """ + Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format. + + The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the + associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory. + + Args: + dota_root_path (str): The root directory path of the DOTA dataset. + + Example: + ```python + from ultralytics.data.converter import convert_dota_to_yolo_obb + + convert_dota_to_yolo_obb('path/to/DOTA') + ``` + + Notes: + The directory structure assumed for the DOTA dataset: + - DOTA + ├─ images + │ ├─ train + │ └─ val + └─ labels + ├─ train_original + └─ val_original + + After execution, the function will organize the labels into: + - DOTA + └─ labels + ├─ train + └─ val + """ + dota_root_path = Path(dota_root_path) + + # Class names to indices mapping + class_mapping = { + 'plane': 0, + 'ship': 1, + 'storage-tank': 2, + 'baseball-diamond': 3, + 'tennis-court': 4, + 'basketball-court': 5, + 'ground-track-field': 6, + 'harbor': 7, + 'bridge': 8, + 'large-vehicle': 9, + 'small-vehicle': 10, + 'helicopter': 11, + 'roundabout': 12, + 'soccer-ball-field': 13, + 'swimming-pool': 14, + 'container-crane': 15, + 'airport': 16, + 'helipad': 17} + + def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir): + """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory.""" + orig_label_path = orig_label_dir / f'{image_name}.txt' + save_path = save_dir / f'{image_name}.txt' + + with orig_label_path.open('r') as f, save_path.open('w') as g: + lines = f.readlines() + for line in lines: + parts = line.strip().split() + if len(parts) < 9: + continue + class_name = parts[8] + class_idx = class_mapping[class_name] + coords = [float(p) for p in parts[:8]] + normalized_coords = [ + coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)] + formatted_coords = ['{:.6g}'.format(coord) for coord in normalized_coords] + g.write(f"{class_idx} {' '.join(formatted_coords)}\n") + + for phase in ['train', 'val']: + image_dir = dota_root_path / 'images' / phase + orig_label_dir = dota_root_path / 'labels' / f'{phase}_original' + save_dir = dota_root_path / 'labels' / phase + + save_dir.mkdir(parents=True, exist_ok=True) + + image_paths = list(image_dir.iterdir()) + for image_path in TQDM(image_paths, desc=f'Processing {phase} images'): + if image_path.suffix != '.png': + continue + image_name_without_ext = image_path.stem + img = cv2.imread(str(image_path)) + h, w = img.shape[:2] + convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir) + + +def min_index(arr1, arr2): + """ + Find a pair of indexes with the shortest distance between two arrays of 2D points. + + Args: + arr1 (np.array): A NumPy array of shape (N, 2) representing N 2D points. + arr2 (np.array): A NumPy array of shape (M, 2) representing M 2D points. + + Returns: + (tuple): A tuple containing the indexes of the points with the shortest distance in arr1 and arr2 respectively. + """ + dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1) + return np.unravel_index(np.argmin(dis, axis=None), dis.shape) + + +def merge_multi_segment(segments): + """ + Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment. + This function connects these coordinates with a thin line to merge all segments into one. + + Args: + segments (List[List]): Original segmentations in COCO's JSON file. + Each element is a list of coordinates, like [segmentation1, segmentation2,...]. + + Returns: + s (List[np.ndarray]): A list of connected segments represented as NumPy arrays. + """ + s = [] + segments = [np.array(i).reshape(-1, 2) for i in segments] + idx_list = [[] for _ in range(len(segments))] + + # Record the indexes with min distance between each segment + for i in range(1, len(segments)): + idx1, idx2 = min_index(segments[i - 1], segments[i]) + idx_list[i - 1].append(idx1) + idx_list[i].append(idx2) + + # Use two round to connect all the segments + for k in range(2): + # Forward connection + if k == 0: + for i, idx in enumerate(idx_list): + # Middle segments have two indexes, reverse the index of middle segments + if len(idx) == 2 and idx[0] > idx[1]: + idx = idx[::-1] + segments[i] = segments[i][::-1, :] + + segments[i] = np.roll(segments[i], -idx[0], axis=0) + segments[i] = np.concatenate([segments[i], segments[i][:1]]) + # Deal with the first segment and the last one + if i in [0, len(idx_list) - 1]: + s.append(segments[i]) + else: + idx = [0, idx[1] - idx[0]] + s.append(segments[i][idx[0]:idx[1] + 1]) + + else: + for i in range(len(idx_list) - 1, -1, -1): + if i not in [0, len(idx_list) - 1]: + idx = idx_list[i] + nidx = abs(idx[1] - idx[0]) + s.append(segments[i][nidx:]) + return s diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py new file mode 100644 index 0000000..068311e --- /dev/null +++ b/ultralytics/data/dataset.py @@ -0,0 +1,340 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +import contextlib +from itertools import repeat +from multiprocessing.pool import ThreadPool +from pathlib import Path + +import cv2 +import numpy as np +import torch +import torchvision + +from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr, is_dir_writeable + +from .augment import Compose, Format, Instances, LetterBox, classify_albumentations, classify_transforms, v8_transforms +from .base import BaseDataset +from .utils import HELP_URL, LOGGER, get_hash, img2label_paths, verify_image, verify_image_label + +# Ultralytics dataset *.cache version, >= 1.0.0 for YOLOv8 +DATASET_CACHE_VERSION = '1.0.3' + + +class YOLODataset(BaseDataset): + """ + Dataset class for loading object detection and/or segmentation labels in YOLO format. + + Args: + data (dict, optional): A dataset YAML dictionary. Defaults to None. + use_segments (bool, optional): If True, segmentation masks are used as labels. Defaults to False. + use_keypoints (bool, optional): If True, keypoints are used as labels. Defaults to False. + + Returns: + (torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model. + """ + + def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs): + """Initializes the YOLODataset with optional configurations for segments and keypoints.""" + self.use_segments = use_segments + self.use_keypoints = use_keypoints + self.data = data + assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.' + super().__init__(*args, **kwargs) + + def cache_labels(self, path=Path('./labels.cache')): + """ + Cache dataset labels, check images and read shapes. + + Args: + path (Path): path where to save the cache file (default: Path('./labels.cache')). + Returns: + (dict): labels. + """ + x = {'labels': []} + nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages + desc = f'{self.prefix}Scanning {path.parent / path.stem}...' + total = len(self.im_files) + nkpt, ndim = self.data.get('kpt_shape', (0, 0)) + if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)): + raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of " + "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'") + with ThreadPool(NUM_THREADS) as pool: + results = pool.imap(func=verify_image_label, + iterable=zip(self.im_files, self.label_files, repeat(self.prefix), + repeat(self.use_keypoints), repeat(len(self.data['names'])), repeat(nkpt), + repeat(ndim))) + pbar = TQDM(results, desc=desc, total=total) + for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar: + nm += nm_f + nf += nf_f + ne += ne_f + nc += nc_f + if im_file: + x['labels'].append( + dict( + im_file=im_file, + shape=shape, + cls=lb[:, 0:1], # n, 1 + bboxes=lb[:, 1:], # n, 4 + segments=segments, + keypoints=keypoint, + normalized=True, + bbox_format='xywh')) + if msg: + msgs.append(msg) + pbar.desc = f'{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt' + pbar.close() + + if msgs: + LOGGER.info('\n'.join(msgs)) + if nf == 0: + LOGGER.warning(f'{self.prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}') + x['hash'] = get_hash(self.label_files + self.im_files) + x['results'] = nf, nm, ne, nc, len(self.im_files) + x['msgs'] = msgs # warnings + save_dataset_cache_file(self.prefix, path, x) + return x + + def get_labels(self): + """Returns dictionary of labels for YOLO training.""" + self.label_files = img2label_paths(self.im_files) + cache_path = Path(self.label_files[0]).parent.with_suffix('.cache') + try: + cache, exists = load_dataset_cache_file(cache_path), True # attempt to load a *.cache file + assert cache['version'] == DATASET_CACHE_VERSION # matches current version + assert cache['hash'] == get_hash(self.label_files + self.im_files) # identical hash + except (FileNotFoundError, AssertionError, AttributeError): + cache, exists = self.cache_labels(cache_path), False # run cache ops + + # Display cache + nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total + if exists and LOCAL_RANK in (-1, 0): + d = f'Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt' + TQDM(None, desc=self.prefix + d, total=n, initial=n) # display results + if cache['msgs']: + LOGGER.info('\n'.join(cache['msgs'])) # display warnings + + # Read cache + [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items + labels = cache['labels'] + if not labels: + LOGGER.warning(f'WARNING ⚠️ No images found in {cache_path}, training may not work correctly. {HELP_URL}') + self.im_files = [lb['im_file'] for lb in labels] # update im_files + + # Check if the dataset is all boxes or all segments + lengths = ((len(lb['cls']), len(lb['bboxes']), len(lb['segments'])) for lb in labels) + len_cls, len_boxes, len_segments = (sum(x) for x in zip(*lengths)) + if len_segments and len_boxes != len_segments: + LOGGER.warning( + f'WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, ' + f'len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. ' + 'To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.') + for lb in labels: + lb['segments'] = [] + if len_cls == 0: + LOGGER.warning(f'WARNING ⚠️ No labels found in {cache_path}, training may not work correctly. {HELP_URL}') + return labels + + def build_transforms(self, hyp=None): + """Builds and appends transforms to the list.""" + if self.augment: + hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0 + hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0 + transforms = v8_transforms(self, self.imgsz, hyp) + else: + transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)]) + transforms.append( + Format(bbox_format='xywh', + normalize=True, + return_mask=self.use_segments, + return_keypoint=self.use_keypoints, + batch_idx=True, + mask_ratio=hyp.mask_ratio, + mask_overlap=hyp.overlap_mask)) + return transforms + + def close_mosaic(self, hyp): + """Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations.""" + hyp.mosaic = 0.0 # set mosaic ratio=0.0 + hyp.copy_paste = 0.0 # keep the same behavior as previous v8 close-mosaic + hyp.mixup = 0.0 # keep the same behavior as previous v8 close-mosaic + self.transforms = self.build_transforms(hyp) + + def update_labels_info(self, label): + """Custom your label format here.""" + # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label + # We can make it also support classification and semantic segmentation by add or remove some dict keys there. + bboxes = label.pop('bboxes') + segments = label.pop('segments') + keypoints = label.pop('keypoints', None) + bbox_format = label.pop('bbox_format') + normalized = label.pop('normalized') + label['instances'] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized) + return label + + @staticmethod + def collate_fn(batch): + """Collates data samples into batches.""" + new_batch = {} + keys = batch[0].keys() + values = list(zip(*[list(b.values()) for b in batch])) + for i, k in enumerate(keys): + value = values[i] + if k == 'img': + value = torch.stack(value, 0) + if k in ['masks', 'keypoints', 'bboxes', 'cls']: + value = torch.cat(value, 0) + new_batch[k] = value + new_batch['batch_idx'] = list(new_batch['batch_idx']) + for i in range(len(new_batch['batch_idx'])): + new_batch['batch_idx'][i] += i # add target image index for build_targets() + new_batch['batch_idx'] = torch.cat(new_batch['batch_idx'], 0) + return new_batch + + +# Classification dataloaders ------------------------------------------------------------------------------------------- +class ClassificationDataset(torchvision.datasets.ImageFolder): + """ + YOLO Classification Dataset. + + Args: + root (str): Dataset path. + + Attributes: + cache_ram (bool): True if images should be cached in RAM, False otherwise. + cache_disk (bool): True if images should be cached on disk, False otherwise. + samples (list): List of samples containing file, index, npy, and im. + torch_transforms (callable): torchvision transforms applied to the dataset. + album_transforms (callable, optional): Albumentations transforms applied to the dataset if augment is True. + """ + + def __init__(self, root, args, augment=False, cache=False, prefix=''): + """ + Initialize YOLO object with root, image size, augmentations, and cache settings. + + Args: + root (str): Dataset path. + args (Namespace): Argument parser containing dataset related settings. + augment (bool, optional): True if dataset should be augmented, False otherwise. Defaults to False. + cache (bool | str | optional): Cache setting, can be True, False, 'ram' or 'disk'. Defaults to False. + """ + super().__init__(root=root) + if augment and args.fraction < 1.0: # reduce training fraction + self.samples = self.samples[:round(len(self.samples) * args.fraction)] + self.prefix = colorstr(f'{prefix}: ') if prefix else '' + self.cache_ram = cache is True or cache == 'ram' + self.cache_disk = cache == 'disk' + self.samples = self.verify_images() # filter out bad images + self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im + self.torch_transforms = classify_transforms(args.imgsz, rect=args.rect) + self.album_transforms = classify_albumentations( + augment=augment, + size=args.imgsz, + scale=(1.0 - args.scale, 1.0), # (0.08, 1.0) + hflip=args.fliplr, + vflip=args.flipud, + hsv_h=args.hsv_h, # HSV-Hue augmentation (fraction) + hsv_s=args.hsv_s, # HSV-Saturation augmentation (fraction) + hsv_v=args.hsv_v, # HSV-Value augmentation (fraction) + mean=(0.0, 0.0, 0.0), # IMAGENET_MEAN + std=(1.0, 1.0, 1.0), # IMAGENET_STD + auto_aug=False) if augment else None + + def __getitem__(self, i): + """Returns subset of data and targets corresponding to given indices.""" + f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image + if self.cache_ram and im is None: + im = self.samples[i][3] = cv2.imread(f) + elif self.cache_disk: + if not fn.exists(): # load npy + np.save(fn.as_posix(), cv2.imread(f), allow_pickle=False) + im = np.load(fn) + else: # read image + im = cv2.imread(f) # BGR + if self.album_transforms: + sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))['image'] + else: + sample = self.torch_transforms(im) + return {'img': sample, 'cls': j} + + def __len__(self) -> int: + """Return the total number of samples in the dataset.""" + return len(self.samples) + + def verify_images(self): + """Verify all images in dataset.""" + desc = f'{self.prefix}Scanning {self.root}...' + path = Path(self.root).with_suffix('.cache') # *.cache file path + + with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError): + cache = load_dataset_cache_file(path) # attempt to load a *.cache file + assert cache['version'] == DATASET_CACHE_VERSION # matches current version + assert cache['hash'] == get_hash([x[0] for x in self.samples]) # identical hash + nf, nc, n, samples = cache.pop('results') # found, missing, empty, corrupt, total + if LOCAL_RANK in (-1, 0): + d = f'{desc} {nf} images, {nc} corrupt' + TQDM(None, desc=d, total=n, initial=n) + if cache['msgs']: + LOGGER.info('\n'.join(cache['msgs'])) # display warnings + return samples + + # Run scan if *.cache retrieval failed + nf, nc, msgs, samples, x = 0, 0, [], [], {} + with ThreadPool(NUM_THREADS) as pool: + results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix))) + pbar = TQDM(results, desc=desc, total=len(self.samples)) + for sample, nf_f, nc_f, msg in pbar: + if nf_f: + samples.append(sample) + if msg: + msgs.append(msg) + nf += nf_f + nc += nc_f + pbar.desc = f'{desc} {nf} images, {nc} corrupt' + pbar.close() + if msgs: + LOGGER.info('\n'.join(msgs)) + x['hash'] = get_hash([x[0] for x in self.samples]) + x['results'] = nf, nc, len(samples), samples + x['msgs'] = msgs # warnings + save_dataset_cache_file(self.prefix, path, x) + return samples + + +def load_dataset_cache_file(path): + """Load an Ultralytics *.cache dictionary from path.""" + import gc + gc.disable() # reduce pickle load time https://github.com/ultralytics/ultralytics/pull/1585 + cache = np.load(str(path), allow_pickle=True).item() # load dict + gc.enable() + return cache + + +def save_dataset_cache_file(prefix, path, x): + """Save an Ultralytics dataset *.cache dictionary x to path.""" + x['version'] = DATASET_CACHE_VERSION # add cache version + if is_dir_writeable(path.parent): + if path.exists(): + path.unlink() # remove *.cache file if exists + np.save(str(path), x) # save cache for next time + path.with_suffix('.cache.npy').rename(path) # remove .npy suffix + LOGGER.info(f'{prefix}New cache created: {path}') + else: + LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.') + + +# TODO: support semantic segmentation +class SemanticDataset(BaseDataset): + """ + Semantic Segmentation Dataset. + + This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities + from the BaseDataset class. + + Note: + This class is currently a placeholder and needs to be populated with methods and attributes for supporting + semantic segmentation tasks. + """ + + def __init__(self): + """Initialize a SemanticDataset object.""" + super().__init__() diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py new file mode 100644 index 0000000..2545e9a --- /dev/null +++ b/ultralytics/data/loaders.py @@ -0,0 +1,528 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import glob +import math +import os +import time +from dataclasses import dataclass +from pathlib import Path +from threading import Thread +from urllib.parse import urlparse + +import cv2 +import numpy as np +import requests +import torch +from PIL import Image + +from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS +from ultralytics.utils import LOGGER, is_colab, is_kaggle, ops +from ultralytics.utils.checks import check_requirements + + +@dataclass +class SourceTypes: + """Class to represent various types of input sources for predictions.""" + webcam: bool = False + screenshot: bool = False + from_img: bool = False + tensor: bool = False + + +class LoadStreams: + """ + Stream Loader for various types of video streams. + + Suitable for use with `yolo predict source='rtsp://example.com/media.mp4'`, supports RTSP, RTMP, HTTP, and TCP streams. + + Attributes: + sources (str): The source input paths or URLs for the video streams. + imgsz (int): The image size for processing, defaults to 640. + vid_stride (int): Video frame-rate stride, defaults to 1. + buffer (bool): Whether to buffer input streams, defaults to False. + running (bool): Flag to indicate if the streaming thread is running. + mode (str): Set to 'stream' indicating real-time capture. + imgs (list): List of image frames for each stream. + fps (list): List of FPS for each stream. + frames (list): List of total frames for each stream. + threads (list): List of threads for each stream. + shape (list): List of shapes for each stream. + caps (list): List of cv2.VideoCapture objects for each stream. + bs (int): Batch size for processing. + + Methods: + __init__: Initialize the stream loader. + update: Read stream frames in daemon thread. + close: Close stream loader and release resources. + __iter__: Returns an iterator object for the class. + __next__: Returns source paths, transformed, and original images for processing. + __len__: Return the length of the sources object. + """ + + def __init__(self, sources='file.streams', imgsz=640, vid_stride=1, buffer=False): + """Initialize instance variables and check for consistent input stream shapes.""" + torch.backends.cudnn.benchmark = True # faster for fixed-size inference + self.buffer = buffer # buffer input streams + self.running = True # running flag for Thread + self.mode = 'stream' + self.imgsz = imgsz + self.vid_stride = vid_stride # video frame-rate stride + + sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources] + n = len(sources) + self.fps = [0] * n # frames per second + self.frames = [0] * n + self.threads = [None] * n + self.caps = [None] * n # video capture objects + self.imgs = [[] for _ in range(n)] # images + self.shape = [[] for _ in range(n)] # image shapes + self.sources = [ops.clean_str(x) for x in sources] # clean source names for later + for i, s in enumerate(sources): # index, source + # Start thread to read frames from video stream + st = f'{i + 1}/{n}: {s}... ' + if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video + # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/LNwODJXcvt4' + s = get_best_youtube_url(s) + s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam + if s == 0 and (is_colab() or is_kaggle()): + raise NotImplementedError("'source=0' webcam not supported in Colab and Kaggle notebooks. " + "Try running 'source=0' in a local environment.") + self.caps[i] = cv2.VideoCapture(s) # store video capture object + if not self.caps[i].isOpened(): + raise ConnectionError(f'{st}Failed to open {s}') + w = int(self.caps[i].get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(self.caps[i].get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = self.caps[i].get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan + self.frames[i] = max(int(self.caps[i].get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float( + 'inf') # infinite stream fallback + self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback + + success, im = self.caps[i].read() # guarantee first frame + if not success or im is None: + raise ConnectionError(f'{st}Failed to read images from {s}') + self.imgs[i].append(im) + self.shape[i] = im.shape + self.threads[i] = Thread(target=self.update, args=([i, self.caps[i], s]), daemon=True) + LOGGER.info(f'{st}Success ✅ ({self.frames[i]} frames of shape {w}x{h} at {self.fps[i]:.2f} FPS)') + self.threads[i].start() + LOGGER.info('') # newline + + # Check for common shapes + self.bs = self.__len__() + + def update(self, i, cap, stream): + """Read stream `i` frames in daemon thread.""" + n, f = 0, self.frames[i] # frame number, frame array + while self.running and cap.isOpened() and n < (f - 1): + if len(self.imgs[i]) < 30: # keep a <=30-image buffer + n += 1 + cap.grab() # .read() = .grab() followed by .retrieve() + if n % self.vid_stride == 0: + success, im = cap.retrieve() + if not success: + im = np.zeros(self.shape[i], dtype=np.uint8) + LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.') + cap.open(stream) # re-open stream if signal was lost + if self.buffer: + self.imgs[i].append(im) + else: + self.imgs[i] = [im] + else: + time.sleep(0.01) # wait until the buffer is empty + + def close(self): + """Close stream loader and release resources.""" + self.running = False # stop flag for Thread + for thread in self.threads: + if thread.is_alive(): + thread.join(timeout=5) # Add timeout + for cap in self.caps: # Iterate through the stored VideoCapture objects + try: + cap.release() # release video capture + except Exception as e: + LOGGER.warning(f'WARNING ⚠️ Could not release VideoCapture object: {e}') + cv2.destroyAllWindows() + + def __iter__(self): + """Iterates through YOLO image feed and re-opens unresponsive streams.""" + self.count = -1 + return self + + def __next__(self): + """Returns source paths, transformed and original images for processing.""" + self.count += 1 + + images = [] + for i, x in enumerate(self.imgs): + + # Wait until a frame is available in each buffer + while not x: + if not self.threads[i].is_alive() or cv2.waitKey(1) == ord('q'): # q to quit + self.close() + raise StopIteration + time.sleep(1 / min(self.fps)) + x = self.imgs[i] + if not x: + LOGGER.warning(f'WARNING ⚠️ Waiting for stream {i}') + + # Get and remove the first frame from imgs buffer + if self.buffer: + images.append(x.pop(0)) + + # Get the last frame, and clear the rest from the imgs buffer + else: + images.append(x.pop(-1) if x else np.zeros(self.shape[i], dtype=np.uint8)) + x.clear() + + return self.sources, images, None, '' + + def __len__(self): + """Return the length of the sources object.""" + return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years + + +class LoadScreenshots: + """ + YOLOv8 screenshot dataloader. + + This class manages the loading of screenshot images for processing with YOLOv8. + Suitable for use with `yolo predict source=screen`. + + Attributes: + source (str): The source input indicating which screen to capture. + imgsz (int): The image size for processing, defaults to 640. + screen (int): The screen number to capture. + left (int): The left coordinate for screen capture area. + top (int): The top coordinate for screen capture area. + width (int): The width of the screen capture area. + height (int): The height of the screen capture area. + mode (str): Set to 'stream' indicating real-time capture. + frame (int): Counter for captured frames. + sct (mss.mss): Screen capture object from `mss` library. + bs (int): Batch size, set to 1. + monitor (dict): Monitor configuration details. + + Methods: + __iter__: Returns an iterator object. + __next__: Captures the next screenshot and returns it. + """ + + def __init__(self, source, imgsz=640): + """Source = [screen_number left top width height] (pixels).""" + check_requirements('mss') + import mss # noqa + + source, *params = source.split() + self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0 + if len(params) == 1: + self.screen = int(params[0]) + elif len(params) == 4: + left, top, width, height = (int(x) for x in params) + elif len(params) == 5: + self.screen, left, top, width, height = (int(x) for x in params) + self.imgsz = imgsz + self.mode = 'stream' + self.frame = 0 + self.sct = mss.mss() + self.bs = 1 + + # Parse monitor shape + monitor = self.sct.monitors[self.screen] + self.top = monitor['top'] if top is None else (monitor['top'] + top) + self.left = monitor['left'] if left is None else (monitor['left'] + left) + self.width = width or monitor['width'] + self.height = height or monitor['height'] + self.monitor = {'left': self.left, 'top': self.top, 'width': self.width, 'height': self.height} + + def __iter__(self): + """Returns an iterator of the object.""" + return self + + def __next__(self): + """mss screen capture: get raw pixels from the screen as np array.""" + im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR + s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: ' + + self.frame += 1 + return [str(self.screen)], [im0], None, s # screen, img, vid_cap, string + + +class LoadImages: + """ + YOLOv8 image/video dataloader. + + This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from + various formats, including single image files, video files, and lists of image and video paths. + + Attributes: + imgsz (int): Image size, defaults to 640. + files (list): List of image and video file paths. + nf (int): Total number of files (images and videos). + video_flag (list): Flags indicating whether a file is a video (True) or an image (False). + mode (str): Current mode, 'image' or 'video'. + vid_stride (int): Stride for video frame-rate, defaults to 1. + bs (int): Batch size, set to 1 for this class. + cap (cv2.VideoCapture): Video capture object for OpenCV. + frame (int): Frame counter for video. + frames (int): Total number of frames in the video. + count (int): Counter for iteration, initialized at 0 during `__iter__()`. + + Methods: + _new_video(path): Create a new cv2.VideoCapture object for a given video path. + """ + + def __init__(self, path, imgsz=640, vid_stride=1): + """Initialize the Dataloader and raise FileNotFoundError if file not found.""" + parent = None + if isinstance(path, str) and Path(path).suffix == '.txt': # *.txt file with img/vid/dir on each line + parent = Path(path).parent + path = Path(path).read_text().splitlines() # list of sources + files = [] + for p in sorted(path) if isinstance(path, (list, tuple)) else [path]: + a = str(Path(p).absolute()) # do not use .resolve() https://github.com/ultralytics/ultralytics/issues/2912 + if '*' in a: + files.extend(sorted(glob.glob(a, recursive=True))) # glob + elif os.path.isdir(a): + files.extend(sorted(glob.glob(os.path.join(a, '*.*')))) # dir + elif os.path.isfile(a): + files.append(a) # files (absolute or relative to CWD) + elif parent and (parent / p).is_file(): + files.append(str((parent / p).absolute())) # files (relative to *.txt file parent) + else: + raise FileNotFoundError(f'{p} does not exist') + + images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS] + videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS] + ni, nv = len(images), len(videos) + + self.imgsz = imgsz + self.files = images + videos + self.nf = ni + nv # number of files + self.video_flag = [False] * ni + [True] * nv + self.mode = 'image' + self.vid_stride = vid_stride # video frame-rate stride + self.bs = 1 + if any(videos): + self._new_video(videos[0]) # new video + else: + self.cap = None + if self.nf == 0: + raise FileNotFoundError(f'No images or videos found in {p}. ' + f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}') + + def __iter__(self): + """Returns an iterator object for VideoStream or ImageFolder.""" + self.count = 0 + return self + + def __next__(self): + """Return next image, path and metadata from dataset.""" + if self.count == self.nf: + raise StopIteration + path = self.files[self.count] + + if self.video_flag[self.count]: + # Read video + self.mode = 'video' + for _ in range(self.vid_stride): + self.cap.grab() + success, im0 = self.cap.retrieve() + while not success: + self.count += 1 + self.cap.release() + if self.count == self.nf: # last video + raise StopIteration + path = self.files[self.count] + self._new_video(path) + success, im0 = self.cap.read() + + self.frame += 1 + # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False + s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' + + else: + # Read image + self.count += 1 + im0 = cv2.imread(path) # BGR + if im0 is None: + raise FileNotFoundError(f'Image Not Found {path}') + s = f'image {self.count}/{self.nf} {path}: ' + + return [path], [im0], self.cap, s + + def _new_video(self, path): + """Create a new video capture object.""" + self.frame = 0 + self.cap = cv2.VideoCapture(path) + self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) + + def __len__(self): + """Returns the number of files in the object.""" + return self.nf # number of files + + +class LoadPilAndNumpy: + """ + Load images from PIL and Numpy arrays for batch processing. + + This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats. + It performs basic validation and format conversion to ensure that the images are in the required format for + downstream processing. + + Attributes: + paths (list): List of image paths or autogenerated filenames. + im0 (list): List of images stored as Numpy arrays. + imgsz (int): Image size, defaults to 640. + mode (str): Type of data being processed, defaults to 'image'. + bs (int): Batch size, equivalent to the length of `im0`. + count (int): Counter for iteration, initialized at 0 during `__iter__()`. + + Methods: + _single_check(im): Validate and format a single image to a Numpy array. + """ + + def __init__(self, im0, imgsz=640): + """Initialize PIL and Numpy Dataloader.""" + if not isinstance(im0, list): + im0 = [im0] + self.paths = [getattr(im, 'filename', f'image{i}.jpg') for i, im in enumerate(im0)] + self.im0 = [self._single_check(im) for im in im0] + self.imgsz = imgsz + self.mode = 'image' + # Generate fake paths + self.bs = len(self.im0) + + @staticmethod + def _single_check(im): + """Validate and format an image to numpy array.""" + assert isinstance(im, (Image.Image, np.ndarray)), f'Expected PIL/np.ndarray image type, but got {type(im)}' + if isinstance(im, Image.Image): + if im.mode != 'RGB': + im = im.convert('RGB') + im = np.asarray(im)[:, :, ::-1] + im = np.ascontiguousarray(im) # contiguous + return im + + def __len__(self): + """Returns the length of the 'im0' attribute.""" + return len(self.im0) + + def __next__(self): + """Returns batch paths, images, processed images, None, ''.""" + if self.count == 1: # loop only once as it's batch inference + raise StopIteration + self.count += 1 + return self.paths, self.im0, None, '' + + def __iter__(self): + """Enables iteration for class LoadPilAndNumpy.""" + self.count = 0 + return self + + +class LoadTensor: + """ + Load images from torch.Tensor data. + + This class manages the loading and pre-processing of image data from PyTorch tensors for further processing. + + Attributes: + im0 (torch.Tensor): The input tensor containing the image(s). + bs (int): Batch size, inferred from the shape of `im0`. + mode (str): Current mode, set to 'image'. + paths (list): List of image paths or filenames. + count (int): Counter for iteration, initialized at 0 during `__iter__()`. + + Methods: + _single_check(im, stride): Validate and possibly modify the input tensor. + """ + + def __init__(self, im0) -> None: + """Initialize Tensor Dataloader.""" + self.im0 = self._single_check(im0) + self.bs = self.im0.shape[0] + self.mode = 'image' + self.paths = [getattr(im, 'filename', f'image{i}.jpg') for i, im in enumerate(im0)] + + @staticmethod + def _single_check(im, stride=32): + """Validate and format an image to torch.Tensor.""" + s = f'WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) ' \ + f'divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible.' + if len(im.shape) != 4: + if len(im.shape) != 3: + raise ValueError(s) + LOGGER.warning(s) + im = im.unsqueeze(0) + if im.shape[2] % stride or im.shape[3] % stride: + raise ValueError(s) + if im.max() > 1.0 + torch.finfo(im.dtype).eps: # torch.float32 eps is 1.2e-07 + LOGGER.warning(f'WARNING ⚠️ torch.Tensor inputs should be normalized 0.0-1.0 but max value is {im.max()}. ' + f'Dividing input by 255.') + im = im.float() / 255.0 + + return im + + def __iter__(self): + """Returns an iterator object.""" + self.count = 0 + return self + + def __next__(self): + """Return next item in the iterator.""" + if self.count == 1: + raise StopIteration + self.count += 1 + return self.paths, self.im0, None, '' + + def __len__(self): + """Returns the batch size.""" + return self.bs + + +def autocast_list(source): + """Merges a list of source of different types into a list of numpy arrays or PIL images.""" + files = [] + for im in source: + if isinstance(im, (str, Path)): # filename or uri + files.append(Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im)) + elif isinstance(im, (Image.Image, np.ndarray)): # PIL or np Image + files.append(im) + else: + raise TypeError(f'type {type(im).__name__} is not a supported Ultralytics prediction source type. \n' + f'See https://docs.ultralytics.com/modes/predict for supported source types.') + + return files + + +LOADERS = LoadStreams, LoadPilAndNumpy, LoadImages, LoadScreenshots # tuple + + +def get_best_youtube_url(url, use_pafy=True): + """ + Retrieves the URL of the best quality MP4 video stream from a given YouTube video. + + This function uses the pafy or yt_dlp library to extract the video info from YouTube. It then finds the highest + quality MP4 format that has video codec but no audio codec, and returns the URL of this video stream. + + Args: + url (str): The URL of the YouTube video. + use_pafy (bool): Use the pafy package, default=True, otherwise use yt_dlp package. + + Returns: + (str): The URL of the best quality MP4 video stream, or None if no suitable stream is found. + """ + if use_pafy: + check_requirements(('pafy', 'youtube_dl==2020.12.2')) + import pafy # noqa + return pafy.new(url).getbestvideo(preftype='mp4').url + else: + check_requirements('yt-dlp') + import yt_dlp + with yt_dlp.YoutubeDL({'quiet': True}) as ydl: + info_dict = ydl.extract_info(url, download=False) # extract info + for f in reversed(info_dict.get('formats', [])): # reversed because best is usually last + # Find a format with video codec, no audio, *.mp4 extension at least 1920x1080 size + good_size = (f.get('width') or 0) >= 1920 or (f.get('height') or 0) >= 1080 + if good_size and f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4': + return f.get('url') diff --git a/ultralytics/data/scripts/download_weights.sh b/ultralytics/data/scripts/download_weights.sh new file mode 100644 index 0000000..87db31f --- /dev/null +++ b/ultralytics/data/scripts/download_weights.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Download latest models from https://github.com/ultralytics/assets/releases +# Example usage: bash ultralytics/data/scripts/download_weights.sh +# parent +# └── weights +# ├── yolov8n.pt ← downloads here +# ├── yolov8s.pt +# └── ... + +python - < 9) & (shape[1] > 9), f'image size {shape} <10 pixels' + assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}' + if im.format.lower() in ('jpg', 'jpeg'): + with open(im_file, 'rb') as f: + f.seek(-2, 2) + if f.read() != b'\xff\xd9': # corrupt JPEG + ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100) + msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved' + nf = 1 + except Exception as e: + nc = 1 + msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}' + return (im_file, cls), nf, nc, msg + + +def verify_image_label(args): + """Verify one image-label pair.""" + im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args + # Number (missing, found, empty, corrupt), message, segments, keypoints + nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, '', [], None + try: + # Verify images + im = Image.open(im_file) + im.verify() # PIL verify + shape = exif_size(im) # image size + shape = (shape[1], shape[0]) # hw + assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels' + assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}' + if im.format.lower() in ('jpg', 'jpeg'): + with open(im_file, 'rb') as f: + f.seek(-2, 2) + if f.read() != b'\xff\xd9': # corrupt JPEG + ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100) + msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved' + + # Verify labels + if os.path.isfile(lb_file): + nf = 1 # label found + with open(lb_file) as f: + lb = [x.split() for x in f.read().strip().splitlines() if len(x)] + if any(len(x) > 6 for x in lb) and (not keypoint): # is segment + classes = np.array([x[0] for x in lb], dtype=np.float32) + segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...) + lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) + lb = np.array(lb, dtype=np.float32) + nl = len(lb) + if nl: + if keypoint: + assert lb.shape[1] == (5 + nkpt * ndim), f'labels require {(5 + nkpt * ndim)} columns each' + points = lb[:, 5:].reshape(-1, ndim)[:, :2] + else: + assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected' + points = lb[:, 1:] + assert points.max() <= 1, f'non-normalized or out of bounds coordinates {points[points > 1]}' + assert lb.min() >= 0, f'negative label values {lb[lb < 0]}' + + # All labels + max_cls = lb[:, 0].max() # max label count + assert max_cls <= num_cls, \ + f'Label class {int(max_cls)} exceeds dataset class count {num_cls}. ' \ + f'Possible class labels are 0-{num_cls - 1}' + _, i = np.unique(lb, axis=0, return_index=True) + if len(i) < nl: # duplicate row check + lb = lb[i] # remove duplicates + if segments: + segments = [segments[x] for x in i] + msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed' + else: + ne = 1 # label empty + lb = np.zeros((0, (5 + nkpt * ndim) if keypoint else 5), dtype=np.float32) + else: + nm = 1 # label missing + lb = np.zeros((0, (5 + nkpt * ndim) if keypoints else 5), dtype=np.float32) + if keypoint: + keypoints = lb[:, 5:].reshape(-1, nkpt, ndim) + if ndim == 2: + kpt_mask = np.where((keypoints[..., 0] < 0) | (keypoints[..., 1] < 0), 0.0, 1.0).astype(np.float32) + keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1) # (nl, nkpt, 3) + lb = lb[:, :5] + return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg + except Exception as e: + nc = 1 + msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}' + return [None, None, None, None, None, nm, nf, ne, nc, msg] + + +def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1): + """ + Convert a list of polygons to a binary mask of the specified image size. + + Args: + imgsz (tuple): The size of the image as (height, width). + polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where + N is the number of polygons, and M is the number of points such that M % 2 = 0. + color (int, optional): The color value to fill in the polygons on the mask. Defaults to 1. + downsample_ratio (int, optional): Factor by which to downsample the mask. Defaults to 1. + + Returns: + (np.ndarray): A binary mask of the specified image size with the polygons filled in. + """ + mask = np.zeros(imgsz, dtype=np.uint8) + polygons = np.asarray(polygons, dtype=np.int32) + polygons = polygons.reshape((polygons.shape[0], -1, 2)) + cv2.fillPoly(mask, polygons, color=color) + nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio) + # Note: fillPoly first then resize is trying to keep the same loss calculation method when mask-ratio=1 + return cv2.resize(mask, (nw, nh)) + + +def polygons2masks(imgsz, polygons, color, downsample_ratio=1): + """ + Convert a list of polygons to a set of binary masks of the specified image size. + + Args: + imgsz (tuple): The size of the image as (height, width). + polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where + N is the number of polygons, and M is the number of points such that M % 2 = 0. + color (int): The color value to fill in the polygons on the masks. + downsample_ratio (int, optional): Factor by which to downsample each mask. Defaults to 1. + + Returns: + (np.ndarray): A set of binary masks of the specified image size with the polygons filled in. + """ + return np.array([polygon2mask(imgsz, [x.reshape(-1)], color, downsample_ratio) for x in polygons]) + + +def polygons2masks_overlap(imgsz, segments, downsample_ratio=1): + """Return a (640, 640) overlap mask.""" + masks = np.zeros((imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio), + dtype=np.int32 if len(segments) > 255 else np.uint8) + areas = [] + ms = [] + for si in range(len(segments)): + mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1) + ms.append(mask) + areas.append(mask.sum()) + areas = np.asarray(areas) + index = np.argsort(-areas) + ms = np.array(ms)[index] + for i in range(len(segments)): + mask = ms[i] * (i + 1) + masks = masks + mask + masks = np.clip(masks, a_min=0, a_max=i + 1) + return masks, index + + +def find_dataset_yaml(path: Path) -> Path: + """ + Find and return the YAML file associated with a Detect, Segment or Pose dataset. + + This function searches for a YAML file at the root level of the provided directory first, and if not found, it + performs a recursive search. It prefers YAML files that have the same stem as the provided path. An AssertionError + is raised if no YAML file is found or if multiple YAML files are found. + + Args: + path (Path): The directory path to search for the YAML file. + + Returns: + (Path): The path of the found YAML file. + """ + files = list(path.glob('*.yaml')) or list(path.rglob('*.yaml')) # try root level first and then recursive + assert files, f"No YAML file found in '{path.resolve()}'" + if len(files) > 1: + files = [f for f in files if f.stem == path.stem] # prefer *.yaml files that match + assert len(files) == 1, f"Expected 1 YAML file in '{path.resolve()}', but found {len(files)}.\n{files}" + return files[0] + + +def check_det_dataset(dataset, autodownload=True): + """ + Download, verify, and/or unzip a dataset if not found locally. + + This function checks the availability of a specified dataset, and if not found, it has the option to download and + unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also + resolves paths related to the dataset. + + Args: + dataset (str): Path to the dataset or dataset descriptor (like a YAML file). + autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True. + + Returns: + (dict): Parsed dataset information and paths. + """ + + file = check_file(dataset) + + # Download (optional) + extract_dir = '' + if zipfile.is_zipfile(file) or is_tarfile(file): + new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False) + file = find_dataset_yaml(DATASETS_DIR / new_dir) + extract_dir, autodownload = file.parent, False + + # Read YAML + data = yaml_load(file, append_filename=True) # dictionary + + # Checks + for k in 'train', 'val': + if k not in data: + if k != 'val' or 'validation' not in data: + raise SyntaxError( + emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs.")) + LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.") + data['val'] = data.pop('validation') # replace 'validation' key with 'val' key + if 'names' not in data and 'nc' not in data: + raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs.")) + if 'names' in data and 'nc' in data and len(data['names']) != data['nc']: + raise SyntaxError(emojis(f"{dataset} 'names' length {len(data['names'])} and 'nc: {data['nc']}' must match.")) + if 'names' not in data: + data['names'] = [f'class_{i}' for i in range(data['nc'])] + else: + data['nc'] = len(data['names']) + + data['names'] = check_class_names(data['names']) + + # Resolve paths + path = Path(extract_dir or data.get('path') or Path(data.get('yaml_file', '')).parent) # dataset root + if not path.is_absolute(): + path = (DATASETS_DIR / path).resolve() + + # Set paths + data['path'] = path # download scripts + for k in 'train', 'val', 'test': + if data.get(k): # prepend path + if isinstance(data[k], str): + x = (path / data[k]).resolve() + if not x.exists() and data[k].startswith('../'): + x = (path / data[k][3:]).resolve() + data[k] = str(x) + else: + data[k] = [str((path / x).resolve()) for x in data[k]] + + # Parse YAML + val, s = (data.get(x) for x in ('val', 'download')) + if val: + val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path + if not all(x.exists() for x in val): + name = clean_url(dataset) # dataset name with URL auth stripped + m = f"\nDataset '{name}' images not found ⚠️, missing path '{[x for x in val if not x.exists()][0]}'" + if s and autodownload: + LOGGER.warning(m) + else: + m += f"\nNote dataset download directory is '{DATASETS_DIR}'. You can update this in '{SETTINGS_YAML}'" + raise FileNotFoundError(m) + t = time.time() + r = None # success + if s.startswith('http') and s.endswith('.zip'): # URL + safe_download(url=s, dir=DATASETS_DIR, delete=True) + elif s.startswith('bash '): # bash script + LOGGER.info(f'Running {s} ...') + r = os.system(s) + else: # python script + exec(s, {'yaml': data}) + dt = f'({round(time.time() - t, 1)}s)' + s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f'failure {dt} ❌' + LOGGER.info(f'Dataset download {s}\n') + check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf') # download fonts + + return data # dictionary + + +def check_cls_dataset(dataset, split=''): + """ + Checks a classification dataset such as Imagenet. + + This function accepts a `dataset` name and attempts to retrieve the corresponding dataset information. + If the dataset is not found locally, it attempts to download the dataset from the internet and save it locally. + + Args: + dataset (str | Path): The name of the dataset. + split (str, optional): The split of the dataset. Either 'val', 'test', or ''. Defaults to ''. + + Returns: + (dict): A dictionary containing the following keys: + - 'train' (Path): The directory path containing the training set of the dataset. + - 'val' (Path): The directory path containing the validation set of the dataset. + - 'test' (Path): The directory path containing the test set of the dataset. + - 'nc' (int): The number of classes in the dataset. + - 'names' (dict): A dictionary of class names in the dataset. + """ + + # Download (optional if dataset=https://file.zip is passed directly) + if str(dataset).startswith(('http:/', 'https:/')): + dataset = safe_download(dataset, dir=DATASETS_DIR, unzip=True, delete=False) + + dataset = Path(dataset) + data_dir = (dataset if dataset.is_dir() else (DATASETS_DIR / dataset)).resolve() + if not data_dir.is_dir(): + LOGGER.warning(f'\nDataset not found ⚠️, missing path {data_dir}, attempting download...') + t = time.time() + if str(dataset) == 'imagenet': + subprocess.run(f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", shell=True, check=True) + else: + url = f'https://github.com/ultralytics/yolov5/releases/download/v1.0/{dataset}.zip' + download(url, dir=data_dir.parent) + s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n" + LOGGER.info(s) + train_set = data_dir / 'train' + val_set = data_dir / 'val' if (data_dir / 'val').exists() else data_dir / 'validation' if \ + (data_dir / 'validation').exists() else None # data/test or data/val + test_set = data_dir / 'test' if (data_dir / 'test').exists() else None # data/val or data/test + if split == 'val' and not val_set: + LOGGER.warning("WARNING ⚠️ Dataset 'split=val' not found, using 'split=test' instead.") + elif split == 'test' and not test_set: + LOGGER.warning("WARNING ⚠️ Dataset 'split=test' not found, using 'split=val' instead.") + + nc = len([x for x in (data_dir / 'train').glob('*') if x.is_dir()]) # number of classes + names = [x.name for x in (data_dir / 'train').iterdir() if x.is_dir()] # class names list + names = dict(enumerate(sorted(names))) + + # Print to console + for k, v in {'train': train_set, 'val': val_set, 'test': test_set}.items(): + prefix = f'{colorstr(f"{k}:")} {v}...' + if v is None: + LOGGER.info(prefix) + else: + files = [path for path in v.rglob('*.*') if path.suffix[1:].lower() in IMG_FORMATS] + nf = len(files) # number of files + nd = len({file.parent for file in files}) # number of directories + if nf == 0: + if k == 'train': + raise FileNotFoundError(emojis(f"{dataset} '{k}:' no training images found ❌ ")) + else: + LOGGER.warning(f'{prefix} found {nf} images in {nd} classes: WARNING ⚠️ no images found') + elif nd != nc: + LOGGER.warning(f'{prefix} found {nf} images in {nd} classes: ERROR ❌️ requires {nc} classes, not {nd}') + else: + LOGGER.info(f'{prefix} found {nf} images in {nd} classes ✅ ') + + return {'train': train_set, 'val': val_set, 'test': test_set, 'nc': nc, 'names': names} + + +class HUBDatasetStats: + """ + A class for generating HUB dataset JSON and `-hub` dataset directory. + + Args: + path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'. + task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'. + autodownload (bool): Attempt to download dataset if not found locally. Default is False. + + Example: + Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets + i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip. + ```python + from ultralytics.data.utils import HUBDatasetStats + + stats = HUBDatasetStats('path/to/coco8.zip', task='detect') # detect dataset + stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment') # segment dataset + stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose') # pose dataset + stats = HUBDatasetStats('path/to/imagenet10.zip', task='classify') # classification dataset + + stats.get_json(save=True) + stats.process_images() + ``` + """ + + def __init__(self, path='coco8.yaml', task='detect', autodownload=False): + """Initialize class.""" + path = Path(path).resolve() + LOGGER.info(f'Starting HUB dataset checks for {path}....') + + self.task = task # detect, segment, pose, classify + if self.task == 'classify': + unzip_dir = unzip_file(path) + data = check_cls_dataset(unzip_dir) + data['path'] = unzip_dir + else: # detect, segment, pose + zipped, data_dir, yaml_path = self._unzip(Path(path)) + try: + # Load YAML with checks + data = yaml_load(yaml_path) + data['path'] = '' # strip path since YAML should be in dataset root for all HUB datasets + yaml_save(yaml_path, data) + data = check_det_dataset(yaml_path, autodownload) # dict + data['path'] = data_dir # YAML path should be set to '' (relative) or parent (absolute) + except Exception as e: + raise Exception('error/HUB/dataset_stats/init') from e + + self.hub_dir = Path(f'{data["path"]}-hub') + self.im_dir = self.hub_dir / 'images' + self.im_dir.mkdir(parents=True, exist_ok=True) # makes /images + self.stats = {'nc': len(data['names']), 'names': list(data['names'].values())} # statistics dictionary + self.data = data + + @staticmethod + def _unzip(path): + """Unzip data.zip.""" + if not str(path).endswith('.zip'): # path is data.yaml + return False, None, path + unzip_dir = unzip_file(path, path=path.parent) + assert unzip_dir.is_dir(), f'Error unzipping {path}, {unzip_dir} not found. ' \ + f'path/to/abc.zip MUST unzip to path/to/abc/' + return True, str(unzip_dir), find_dataset_yaml(unzip_dir) # zipped, data_dir, yaml_path + + def _hub_ops(self, f): + """Saves a compressed image for HUB previews.""" + compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub + + def get_json(self, save=False, verbose=False): + """Return dataset JSON for Ultralytics HUB.""" + + def _round(labels): + """Update labels to integer class and 4 decimal place floats.""" + if self.task == 'detect': + coordinates = labels['bboxes'] + elif self.task == 'segment': + coordinates = [x.flatten() for x in labels['segments']] + elif self.task == 'pose': + n = labels['keypoints'].shape[0] + coordinates = np.concatenate((labels['bboxes'], labels['keypoints'].reshape(n, -1)), 1) + else: + raise ValueError('Undefined dataset task.') + zipped = zip(labels['cls'], coordinates) + return [[int(c[0]), *(round(float(x), 4) for x in points)] for c, points in zipped] + + for split in 'train', 'val', 'test': + self.stats[split] = None # predefine + path = self.data.get(split) + + # Check split + if path is None: # no split + continue + files = [f for f in Path(path).rglob('*.*') if f.suffix[1:].lower() in IMG_FORMATS] # image files in split + if not files: # no images + continue + + # Get dataset statistics + if self.task == 'classify': + from torchvision.datasets import ImageFolder + + dataset = ImageFolder(self.data[split]) + + x = np.zeros(len(dataset.classes)).astype(int) + for im in dataset.imgs: + x[im[1]] += 1 + + self.stats[split] = { + 'instance_stats': { + 'total': len(dataset), + 'per_class': x.tolist()}, + 'image_stats': { + 'total': len(dataset), + 'unlabelled': 0, + 'per_class': x.tolist()}, + 'labels': [{ + Path(k).name: v} for k, v in dataset.imgs]} + else: + from ultralytics.data import YOLODataset + + dataset = YOLODataset(img_path=self.data[split], + data=self.data, + use_segments=self.task == 'segment', + use_keypoints=self.task == 'pose') + x = np.array([ + np.bincount(label['cls'].astype(int).flatten(), minlength=self.data['nc']) + for label in TQDM(dataset.labels, total=len(dataset), desc='Statistics')]) # shape(128x80) + self.stats[split] = { + 'instance_stats': { + 'total': int(x.sum()), + 'per_class': x.sum(0).tolist()}, + 'image_stats': { + 'total': len(dataset), + 'unlabelled': int(np.all(x == 0, 1).sum()), + 'per_class': (x > 0).sum(0).tolist()}, + 'labels': [{ + Path(k).name: _round(v)} for k, v in zip(dataset.im_files, dataset.labels)]} + + # Save, print and return + if save: + stats_path = self.hub_dir / 'stats.json' + LOGGER.info(f'Saving {stats_path.resolve()}...') + with open(stats_path, 'w') as f: + json.dump(self.stats, f) # save stats.json + if verbose: + LOGGER.info(json.dumps(self.stats, indent=2, sort_keys=False)) + return self.stats + + def process_images(self): + """Compress images for Ultralytics HUB.""" + from ultralytics.data import YOLODataset # ClassificationDataset + + for split in 'train', 'val', 'test': + if self.data.get(split) is None: + continue + dataset = YOLODataset(img_path=self.data[split], data=self.data) + with ThreadPool(NUM_THREADS) as pool: + for _ in TQDM(pool.imap(self._hub_ops, dataset.im_files), total=len(dataset), desc=f'{split} images'): + pass + LOGGER.info(f'Done. All images saved to {self.im_dir}') + return self.im_dir + + +def compress_one_image(f, f_new=None, max_dim=1920, quality=50): + """ + Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python + Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be + resized. + + Args: + f (str): The path to the input image file. + f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten. + max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels. + quality (int, optional): The image compression quality as a percentage. Default is 50%. + + Example: + ```python + from pathlib import Path + from ultralytics.data.utils import compress_one_image + + for f in Path('path/to/dataset').rglob('*.jpg'): + compress_one_image(f) + ``` + """ + + try: # use PIL + im = Image.open(f) + r = max_dim / max(im.height, im.width) # ratio + if r < 1.0: # image too large + im = im.resize((int(im.width * r), int(im.height * r))) + im.save(f_new or f, 'JPEG', quality=quality, optimize=True) # save + except Exception as e: # use OpenCV + LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}') + im = cv2.imread(f) + im_height, im_width = im.shape[:2] + r = max_dim / max(im_height, im_width) # ratio + if r < 1.0: # image too large + im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA) + cv2.imwrite(str(f_new or f), im) + + +def autosplit(path=DATASETS_DIR / 'coco8/images', weights=(0.9, 0.1, 0.0), annotated_only=False): + """ + Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files. + + Args: + path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'. + weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0). + annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False. + + Example: + ```python + from ultralytics.data.utils import autosplit + + autosplit() + ``` + """ + + path = Path(path) # images dir + files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only + n = len(files) # number of files + random.seed(0) # for reproducibility + indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split + + txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files + for x in txt: + if (path.parent / x).exists(): + (path.parent / x).unlink() # remove existing + + LOGGER.info(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only) + for i, img in TQDM(zip(indices, files), total=n): + if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label + with open(path.parent / txt[i], 'a') as f: + f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n') # add image to txt file diff --git a/ultralytics/engine/__init__.py b/ultralytics/engine/__init__.py new file mode 100644 index 0000000..9e68dc1 --- /dev/null +++ b/ultralytics/engine/__init__.py @@ -0,0 +1 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py new file mode 100644 index 0000000..feaaf98 --- /dev/null +++ b/ultralytics/engine/exporter.py @@ -0,0 +1,1022 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Export a YOLOv8 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit + +Format | `format=argument` | Model +--- | --- | --- +PyTorch | - | yolov8n.pt +TorchScript | `torchscript` | yolov8n.torchscript +ONNX | `onnx` | yolov8n.onnx +OpenVINO | `openvino` | yolov8n_openvino_model/ +TensorRT | `engine` | yolov8n.engine +CoreML | `coreml` | yolov8n.mlpackage +TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/ +TensorFlow GraphDef | `pb` | yolov8n.pb +TensorFlow Lite | `tflite` | yolov8n.tflite +TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite +TensorFlow.js | `tfjs` | yolov8n_web_model/ +PaddlePaddle | `paddle` | yolov8n_paddle_model/ +ncnn | `ncnn` | yolov8n_ncnn_model/ + +Requirements: + $ pip install "ultralytics[export]" + +Python: + from ultralytics import YOLO + model = YOLO('yolov8n.pt') + results = model.export(format='onnx') + +CLI: + $ yolo mode=export model=yolov8n.pt format=onnx + +Inference: + $ yolo predict model=yolov8n.pt # PyTorch + yolov8n.torchscript # TorchScript + yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True + yolov8n_openvino_model # OpenVINO + yolov8n.engine # TensorRT + yolov8n.mlpackage # CoreML (macOS-only) + yolov8n_saved_model # TensorFlow SavedModel + yolov8n.pb # TensorFlow GraphDef + yolov8n.tflite # TensorFlow Lite + yolov8n_edgetpu.tflite # TensorFlow Edge TPU + yolov8n_paddle_model # PaddlePaddle + +TensorFlow.js: + $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example + $ npm install + $ ln -s ../../yolov5/yolov8n_web_model public/yolov8n_web_model + $ npm start +""" +import json +import os +import shutil +import subprocess +import time +import warnings +from copy import deepcopy +from datetime import datetime +from pathlib import Path + +import numpy as np +import torch + +from ultralytics.cfg import get_cfg +from ultralytics.data.dataset import YOLODataset +from ultralytics.data.utils import check_det_dataset +from ultralytics.nn.autobackend import check_class_names +from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder +from ultralytics.nn.tasks import DetectionModel, SegmentationModel +from ultralytics.utils import (ARM64, DEFAULT_CFG, LINUX, LOGGER, MACOS, ROOT, WINDOWS, __version__, callbacks, + colorstr, get_default_args, yaml_save) +from ultralytics.utils.checks import check_imgsz, check_is_path_safe, check_requirements, check_version +from ultralytics.utils.downloads import attempt_download_asset, get_github_assets +from ultralytics.utils.files import file_size, spaces_in_path +from ultralytics.utils.ops import Profile +from ultralytics.utils.torch_utils import get_latest_opset, select_device, smart_inference_mode + + +def export_formats(): + """YOLOv8 export formats.""" + import pandas + x = [ + ['PyTorch', '-', '.pt', True, True], + ['TorchScript', 'torchscript', '.torchscript', True, True], + ['ONNX', 'onnx', '.onnx', True, True], + ['OpenVINO', 'openvino', '_openvino_model', True, False], + ['TensorRT', 'engine', '.engine', False, True], + ['CoreML', 'coreml', '.mlpackage', True, False], + ['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True], + ['TensorFlow GraphDef', 'pb', '.pb', True, True], + ['TensorFlow Lite', 'tflite', '.tflite', True, False], + ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', True, False], + ['TensorFlow.js', 'tfjs', '_web_model', True, False], + ['PaddlePaddle', 'paddle', '_paddle_model', True, True], + ['ncnn', 'ncnn', '_ncnn_model', True, True], ] + return pandas.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU']) + + +def gd_outputs(gd): + """TensorFlow GraphDef model output node names.""" + name_list, input_list = [], [] + for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef + name_list.append(node.name) + input_list.extend(node.input) + return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp')) + + +def try_export(inner_func): + """YOLOv8 export decorator, i..e @try_export.""" + inner_args = get_default_args(inner_func) + + def outer_func(*args, **kwargs): + """Export a model.""" + prefix = inner_args['prefix'] + try: + with Profile() as dt: + f, model = inner_func(*args, **kwargs) + LOGGER.info(f"{prefix} export success ✅ {dt.t:.1f}s, saved as '{f}' ({file_size(f):.1f} MB)") + return f, model + except Exception as e: + LOGGER.info(f'{prefix} export failure ❌ {dt.t:.1f}s: {e}') + raise e + + return outer_func + + +class Exporter: + """ + A class for exporting a model. + + Attributes: + args (SimpleNamespace): Configuration for the exporter. + callbacks (list, optional): List of callback functions. Defaults to None. + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """ + Initializes the Exporter class. + + Args: + cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG. + overrides (dict, optional): Configuration overrides. Defaults to None. + _callbacks (dict, optional): Dictionary of callback functions. Defaults to None. + """ + self.args = get_cfg(cfg, overrides) + if self.args.format.lower() in ('coreml', 'mlmodel'): # fix attempt for protobuf<3.20.x errors + os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python' # must run before TensorBoard callback + + self.callbacks = _callbacks or callbacks.get_default_callbacks() + callbacks.add_integration_callbacks(self) + + @smart_inference_mode() + def __call__(self, model=None): + """Returns list of exported files/dirs after running callbacks.""" + self.run_callbacks('on_export_start') + t = time.time() + fmt = self.args.format.lower() # to lowercase + if fmt in ('tensorrt', 'trt'): # 'engine' aliases + fmt = 'engine' + if fmt in ('mlmodel', 'mlpackage', 'mlprogram', 'apple', 'ios', 'coreml'): # 'coreml' aliases + fmt = 'coreml' + fmts = tuple(export_formats()['Argument'][1:]) # available export formats + flags = [x == fmt for x in fmts] + if sum(flags) != 1: + raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}") + jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn = flags # export booleans + + # Device + if fmt == 'engine' and self.args.device is None: + LOGGER.warning('WARNING ⚠️ TensorRT requires GPU export, automatically assigning device=0') + self.args.device = '0' + self.device = select_device('cpu' if self.args.device is None else self.args.device) + + # Checks + model.names = check_class_names(model.names) + if self.args.half and onnx and self.device.type == 'cpu': + LOGGER.warning('WARNING ⚠️ half=True only compatible with GPU export, i.e. use device=0') + self.args.half = False + assert not self.args.dynamic, 'half=True not compatible with dynamic=True, i.e. use only one.' + self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2) # check image size + if self.args.optimize: + assert not ncnn, "optimize=True not compatible with format='ncnn', i.e. use optimize=False" + assert self.device.type == 'cpu', "optimize=True not compatible with cuda devices, i.e. use device='cpu'" + if edgetpu and not LINUX: + raise SystemError('Edge TPU export only supported on Linux. See https://coral.ai/docs/edgetpu/compiler/') + + # Input + im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device) + file = Path( + getattr(model, 'pt_path', None) or getattr(model, 'yaml_file', None) or model.yaml.get('yaml_file', '')) + if file.suffix in {'.yaml', '.yml'}: + file = Path(file.name) + + # Update model + model = deepcopy(model).to(self.device) + for p in model.parameters(): + p.requires_grad = False + model.eval() + model.float() + model = model.fuse() + for m in model.modules(): + if isinstance(m, (Detect, RTDETRDecoder)): # Segment and Pose use Detect base class + m.dynamic = self.args.dynamic + m.export = True + m.format = self.args.format + elif isinstance(m, C2f) and not any((saved_model, pb, tflite, edgetpu, tfjs)): + # EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph + m.forward = m.forward_split + + y = None + for _ in range(2): + y = model(im) # dry runs + if self.args.half and (engine or onnx) and self.device.type != 'cpu': + im, model = im.half(), model.half() # to FP16 + + # Filter warnings + warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) # suppress TracerWarning + warnings.filterwarnings('ignore', category=UserWarning) # suppress shape prim::Constant missing ONNX warning + warnings.filterwarnings('ignore', category=DeprecationWarning) # suppress CoreML np.bool deprecation warning + + # Assign + self.im = im + self.model = model + self.file = file + self.output_shape = tuple(y.shape) if isinstance(y, torch.Tensor) else tuple( + tuple(x.shape if isinstance(x, torch.Tensor) else []) for x in y) + self.pretty_name = Path(self.model.yaml.get('yaml_file', self.file)).stem.replace('yolo', 'YOLO') + data = model.args['data'] if hasattr(model, 'args') and isinstance(model.args, dict) else '' + description = f'Ultralytics {self.pretty_name} model {f"trained on {data}" if data else ""}' + self.metadata = { + 'description': description, + 'author': 'Ultralytics', + 'license': 'AGPL-3.0 https://ultralytics.com/license', + 'date': datetime.now().isoformat(), + 'version': __version__, + 'stride': int(max(model.stride)), + 'task': model.task, + 'batch': self.args.batch, + 'imgsz': self.imgsz, + 'names': model.names} # model metadata + if model.task == 'pose': + self.metadata['kpt_shape'] = model.model[-1].kpt_shape + + LOGGER.info(f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and " + f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)') + + # Exports + f = [''] * len(fmts) # exported filenames + if jit or ncnn: # TorchScript + f[0], _ = self.export_torchscript() + if engine: # TensorRT required before ONNX + f[1], _ = self.export_engine() + if onnx or xml: # OpenVINO requires ONNX + f[2], _ = self.export_onnx() + if xml: # OpenVINO + f[3], _ = self.export_openvino() + if coreml: # CoreML + f[4], _ = self.export_coreml() + if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats + self.args.int8 |= edgetpu + f[5], keras_model = self.export_saved_model() + if pb or tfjs: # pb prerequisite to tfjs + f[6], _ = self.export_pb(keras_model=keras_model) + if tflite: + f[7], _ = self.export_tflite(keras_model=keras_model, nms=False, agnostic_nms=self.args.agnostic_nms) + if edgetpu: + f[8], _ = self.export_edgetpu(tflite_model=Path(f[5]) / f'{self.file.stem}_full_integer_quant.tflite') + if tfjs: + f[9], _ = self.export_tfjs() + if paddle: # PaddlePaddle + f[10], _ = self.export_paddle() + if ncnn: # ncnn + f[11], _ = self.export_ncnn() + + # Finish + f = [str(x) for x in f if x] # filter out '' and None + if any(f): + f = str(Path(f[-1])) + square = self.imgsz[0] == self.imgsz[1] + s = '' if square else f"WARNING ⚠️ non-PyTorch val requires square images, 'imgsz={self.imgsz}' will not " \ + f"work. Use export 'imgsz={max(self.imgsz)}' if val is required." + imgsz = self.imgsz[0] if square else str(self.imgsz)[1:-1].replace(' ', '') + predict_data = f'data={data}' if model.task == 'segment' and fmt == 'pb' else '' + q = 'int8' if self.args.int8 else 'half' if self.args.half else '' # quantization + LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)' + f"\nResults saved to {colorstr('bold', file.parent.resolve())}" + f'\nPredict: yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}' + f'\nValidate: yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}' + f'\nVisualize: https://netron.app') + + self.run_callbacks('on_export_end') + return f # return list of exported files/dirs + + @try_export + def export_torchscript(self, prefix=colorstr('TorchScript:')): + """YOLOv8 TorchScript model export.""" + LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...') + f = self.file.with_suffix('.torchscript') + + ts = torch.jit.trace(self.model, self.im, strict=False) + extra_files = {'config.txt': json.dumps(self.metadata)} # torch._C.ExtraFilesMap() + if self.args.optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html + LOGGER.info(f'{prefix} optimizing for mobile...') + from torch.utils.mobile_optimizer import optimize_for_mobile + optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files) + else: + ts.save(str(f), _extra_files=extra_files) + return f, None + + @try_export + def export_onnx(self, prefix=colorstr('ONNX:')): + """YOLOv8 ONNX export.""" + requirements = ['onnx>=1.12.0'] + if self.args.simplify: + requirements += ['onnxsim>=0.4.33', 'onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime'] + check_requirements(requirements) + import onnx # noqa + + opset_version = self.args.opset or get_latest_opset() + LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...') + f = str(self.file.with_suffix('.onnx')) + + output_names = ['output0', 'output1'] if isinstance(self.model, SegmentationModel) else ['output0'] + dynamic = self.args.dynamic + if dynamic: + dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}} # shape(1,3,640,640) + if isinstance(self.model, SegmentationModel): + dynamic['output0'] = {0: 'batch', 2: 'anchors'} # shape(1, 116, 8400) + dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'} # shape(1,32,160,160) + elif isinstance(self.model, DetectionModel): + dynamic['output0'] = {0: 'batch', 2: 'anchors'} # shape(1, 84, 8400) + + torch.onnx.export( + self.model.cpu() if dynamic else self.model, # dynamic=True only compatible with cpu + self.im.cpu() if dynamic else self.im, + f, + verbose=False, + opset_version=opset_version, + do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False + input_names=['images'], + output_names=output_names, + dynamic_axes=dynamic or None) + + # Checks + model_onnx = onnx.load(f) # load onnx model + # onnx.checker.check_model(model_onnx) # check onnx model + + # Simplify + if self.args.simplify: + try: + import onnxsim + + LOGGER.info(f'{prefix} simplifying with onnxsim {onnxsim.__version__}...') + # subprocess.run(f'onnxsim "{f}" "{f}"', shell=True) + model_onnx, check = onnxsim.simplify(model_onnx) + assert check, 'Simplified ONNX model could not be validated' + except Exception as e: + LOGGER.info(f'{prefix} simplifier failure: {e}') + + # Metadata + for k, v in self.metadata.items(): + meta = model_onnx.metadata_props.add() + meta.key, meta.value = k, str(v) + + onnx.save(model_onnx, f) + return f, model_onnx + + @try_export + def export_openvino(self, prefix=colorstr('OpenVINO:')): + """YOLOv8 OpenVINO export.""" + check_requirements('openvino-dev>=2023.0') # requires openvino-dev: https://pypi.org/project/openvino-dev/ + import openvino.runtime as ov # noqa + from openvino.tools import mo # noqa + + LOGGER.info(f'\n{prefix} starting export with openvino {ov.__version__}...') + f = str(self.file).replace(self.file.suffix, f'_openvino_model{os.sep}') + fq = str(self.file).replace(self.file.suffix, f'_int8_openvino_model{os.sep}') + f_onnx = self.file.with_suffix('.onnx') + f_ov = str(Path(f) / self.file.with_suffix('.xml').name) + fq_ov = str(Path(fq) / self.file.with_suffix('.xml').name) + + def serialize(ov_model, file): + """Set RT info, serialize and save metadata YAML.""" + ov_model.set_rt_info('YOLOv8', ['model_info', 'model_type']) + ov_model.set_rt_info(True, ['model_info', 'reverse_input_channels']) + ov_model.set_rt_info(114, ['model_info', 'pad_value']) + ov_model.set_rt_info([255.0], ['model_info', 'scale_values']) + ov_model.set_rt_info(self.args.iou, ['model_info', 'iou_threshold']) + ov_model.set_rt_info([v.replace(' ', '_') for v in self.model.names.values()], ['model_info', 'labels']) + if self.model.task != 'classify': + ov_model.set_rt_info('fit_to_window_letterbox', ['model_info', 'resize_type']) + + ov.serialize(ov_model, file) # save + yaml_save(Path(file).parent / 'metadata.yaml', self.metadata) # add metadata.yaml + + ov_model = mo.convert_model(f_onnx, + model_name=self.pretty_name, + framework='onnx', + compress_to_fp16=self.args.half) # export + + if self.args.int8: + assert self.args.data, "INT8 export requires a data argument for calibration, i.e. 'data=coco8.yaml'" + check_requirements('nncf>=2.5.0') + import nncf + + def transform_fn(data_item): + """Quantization transform function.""" + im = data_item['img'].numpy().astype(np.float32) / 255.0 # uint8 to fp16/32 and 0 - 255 to 0.0 - 1.0 + return np.expand_dims(im, 0) if im.ndim == 3 else im + + # Generate calibration data for integer quantization + LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'") + data = check_det_dataset(self.args.data) + dataset = YOLODataset(data['val'], data=data, imgsz=self.imgsz[0], augment=False) + quantization_dataset = nncf.Dataset(dataset, transform_fn) + ignored_scope = nncf.IgnoredScope(types=['Multiply', 'Subtract', 'Sigmoid']) # ignore operation + quantized_ov_model = nncf.quantize(ov_model, + quantization_dataset, + preset=nncf.QuantizationPreset.MIXED, + ignored_scope=ignored_scope) + serialize(quantized_ov_model, fq_ov) + return fq, None + + serialize(ov_model, f_ov) + return f, None + + @try_export + def export_paddle(self, prefix=colorstr('PaddlePaddle:')): + """YOLOv8 Paddle export.""" + check_requirements(('paddlepaddle', 'x2paddle')) + import x2paddle # noqa + from x2paddle.convert import pytorch2paddle # noqa + + LOGGER.info(f'\n{prefix} starting export with X2Paddle {x2paddle.__version__}...') + f = str(self.file).replace(self.file.suffix, f'_paddle_model{os.sep}') + + pytorch2paddle(module=self.model, save_dir=f, jit_type='trace', input_examples=[self.im]) # export + yaml_save(Path(f) / 'metadata.yaml', self.metadata) # add metadata.yaml + return f, None + + @try_export + def export_ncnn(self, prefix=colorstr('ncnn:')): + """ + YOLOv8 ncnn export using PNNX https://github.com/pnnx/pnnx. + """ + check_requirements('git+https://github.com/Tencent/ncnn.git' if ARM64 else 'ncnn') # requires ncnn + import ncnn # noqa + + LOGGER.info(f'\n{prefix} starting export with ncnn {ncnn.__version__}...') + f = Path(str(self.file).replace(self.file.suffix, f'_ncnn_model{os.sep}')) + f_ts = self.file.with_suffix('.torchscript') + + name = Path('pnnx.exe' if WINDOWS else 'pnnx') # PNNX filename + pnnx = name if name.is_file() else ROOT / name + if not pnnx.is_file(): + LOGGER.warning( + f'{prefix} WARNING ⚠️ PNNX not found. Attempting to download binary file from ' + 'https://github.com/pnnx/pnnx/.\nNote PNNX Binary file must be placed in current working directory ' + f'or in {ROOT}. See PNNX repo for full installation instructions.') + _, assets = get_github_assets(repo='pnnx/pnnx', retry=True) + system = 'macos' if MACOS else 'ubuntu' if LINUX else 'windows' # operating system + asset = [x for x in assets if system in x][0] if assets else \ + f'https://github.com/pnnx/pnnx/releases/download/20230816/pnnx-20230816-{system}.zip' # fallback + asset = attempt_download_asset(asset, repo='pnnx/pnnx', release='latest') + if check_is_path_safe(Path.cwd(), asset): # avoid path traversal security vulnerability + unzip_dir = Path(asset).with_suffix('') + (unzip_dir / name).rename(pnnx) # move binary to ROOT + shutil.rmtree(unzip_dir) # delete unzip dir + Path(asset).unlink() # delete zip + pnnx.chmod(0o777) # set read, write, and execute permissions for everyone + + ncnn_args = [ + f'ncnnparam={f / "model.ncnn.param"}', + f'ncnnbin={f / "model.ncnn.bin"}', + f'ncnnpy={f / "model_ncnn.py"}', ] + + pnnx_args = [ + f'pnnxparam={f / "model.pnnx.param"}', + f'pnnxbin={f / "model.pnnx.bin"}', + f'pnnxpy={f / "model_pnnx.py"}', + f'pnnxonnx={f / "model.pnnx.onnx"}', ] + + cmd = [ + str(pnnx), + str(f_ts), + *ncnn_args, + *pnnx_args, + f'fp16={int(self.args.half)}', + f'device={self.device.type}', + f'inputshape="{[self.args.batch, 3, *self.imgsz]}"', ] + f.mkdir(exist_ok=True) # make ncnn_model directory + LOGGER.info(f"{prefix} running '{' '.join(cmd)}'") + subprocess.run(cmd, check=True) + + # Remove debug files + pnnx_files = [x.split('=')[-1] for x in pnnx_args] + for f_debug in ('debug.bin', 'debug.param', 'debug2.bin', 'debug2.param', *pnnx_files): + Path(f_debug).unlink(missing_ok=True) + + yaml_save(f / 'metadata.yaml', self.metadata) # add metadata.yaml + return str(f), None + + @try_export + def export_coreml(self, prefix=colorstr('CoreML:')): + """YOLOv8 CoreML export.""" + mlmodel = self.args.format.lower() == 'mlmodel' # legacy *.mlmodel export format requested + check_requirements('coremltools>=6.0,<=6.2' if mlmodel else 'coremltools>=7.0') + import coremltools as ct # noqa + + LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...') + f = self.file.with_suffix('.mlmodel' if mlmodel else '.mlpackage') + if f.is_dir(): + shutil.rmtree(f) + + bias = [0.0, 0.0, 0.0] + scale = 1 / 255 + classifier_config = None + if self.model.task == 'classify': + classifier_config = ct.ClassifierConfig(list(self.model.names.values())) if self.args.nms else None + model = self.model + elif self.model.task == 'detect': + model = IOSDetectModel(self.model, self.im) if self.args.nms else self.model + else: + if self.args.nms: + LOGGER.warning(f"{prefix} WARNING ⚠️ 'nms=True' is only available for Detect models like 'yolov8n.pt'.") + # TODO CoreML Segment and Pose model pipelining + model = self.model + + ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model + ct_model = ct.convert(ts, + inputs=[ct.ImageType('image', shape=self.im.shape, scale=scale, bias=bias)], + classifier_config=classifier_config, + convert_to='neuralnetwork' if mlmodel else 'mlprogram') + bits, mode = (8, 'kmeans') if self.args.int8 else (16, 'linear') if self.args.half else (32, None) + if bits < 32: + if 'kmeans' in mode: + check_requirements('scikit-learn') # scikit-learn package required for k-means quantization + if mlmodel: + ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode) + elif bits == 8: # mlprogram already quantized to FP16 + import coremltools.optimize.coreml as cto + op_config = cto.OpPalettizerConfig(mode='kmeans', nbits=bits, weight_threshold=512) + config = cto.OptimizationConfig(global_config=op_config) + ct_model = cto.palettize_weights(ct_model, config=config) + if self.args.nms and self.model.task == 'detect': + if mlmodel: + import platform + + # coremltools<=6.2 NMS export requires Python<3.11 + check_version(platform.python_version(), '<3.11', name='Python ', hard=True) + weights_dir = None + else: + ct_model.save(str(f)) # save otherwise weights_dir does not exist + weights_dir = str(f / 'Data/com.apple.CoreML/weights') + ct_model = self._pipeline_coreml(ct_model, weights_dir=weights_dir) + + m = self.metadata # metadata dict + ct_model.short_description = m.pop('description') + ct_model.author = m.pop('author') + ct_model.license = m.pop('license') + ct_model.version = m.pop('version') + ct_model.user_defined_metadata.update({k: str(v) for k, v in m.items()}) + try: + ct_model.save(str(f)) # save *.mlpackage + except Exception as e: + LOGGER.warning( + f'{prefix} WARNING ⚠️ CoreML export to *.mlpackage failed ({e}), reverting to *.mlmodel export. ' + f'Known coremltools Python 3.11 and Windows bugs https://github.com/apple/coremltools/issues/1928.') + f = f.with_suffix('.mlmodel') + ct_model.save(str(f)) + return f, ct_model + + @try_export + def export_engine(self, prefix=colorstr('TensorRT:')): + """YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt.""" + assert self.im.device.type != 'cpu', "export running on CPU but must be on GPU, i.e. use 'device=0'" + try: + import tensorrt as trt # noqa + except ImportError: + if LINUX: + check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com') + import tensorrt as trt # noqa + + check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 + self.args.simplify = True + f_onnx, _ = self.export_onnx() + + LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...') + assert Path(f_onnx).exists(), f'failed to export ONNX file: {f_onnx}' + f = self.file.with_suffix('.engine') # TensorRT engine file + logger = trt.Logger(trt.Logger.INFO) + if self.args.verbose: + logger.min_severity = trt.Logger.Severity.VERBOSE + + builder = trt.Builder(logger) + config = builder.create_builder_config() + config.max_workspace_size = self.args.workspace * 1 << 30 + # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice + + flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) + network = builder.create_network(flag) + parser = trt.OnnxParser(network, logger) + if not parser.parse_from_file(f_onnx): + raise RuntimeError(f'failed to load ONNX file: {f_onnx}') + + inputs = [network.get_input(i) for i in range(network.num_inputs)] + outputs = [network.get_output(i) for i in range(network.num_outputs)] + for inp in inputs: + LOGGER.info(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}') + for out in outputs: + LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}') + + if self.args.dynamic: + shape = self.im.shape + if shape[0] <= 1: + LOGGER.warning(f"{prefix} WARNING ⚠️ 'dynamic=True' model requires max batch size, i.e. 'batch=16'") + profile = builder.create_optimization_profile() + for inp in inputs: + profile.set_shape(inp.name, (1, *shape[1:]), (max(1, shape[0] // 2), *shape[1:]), shape) + config.add_optimization_profile(profile) + + LOGGER.info( + f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and self.args.half else 32} engine as {f}') + if builder.platform_has_fast_fp16 and self.args.half: + config.set_flag(trt.BuilderFlag.FP16) + + del self.model + torch.cuda.empty_cache() + + # Write file + with builder.build_engine(network, config) as engine, open(f, 'wb') as t: + # Metadata + meta = json.dumps(self.metadata) + t.write(len(meta).to_bytes(4, byteorder='little', signed=True)) + t.write(meta.encode()) + # Model + t.write(engine.serialize()) + + return f, None + + @try_export + def export_saved_model(self, prefix=colorstr('TensorFlow SavedModel:')): + """YOLOv8 TensorFlow SavedModel export.""" + cuda = torch.cuda.is_available() + try: + import tensorflow as tf # noqa + except ImportError: + check_requirements(f"tensorflow{'-macos' if MACOS else '-aarch64' if ARM64 else '' if cuda else '-cpu'}") + import tensorflow as tf # noqa + check_requirements( + ('onnx', 'onnx2tf>=1.15.4,<=1.17.5', 'sng4onnx>=1.0.1', 'onnxsim>=0.4.33', 'onnx_graphsurgeon>=0.3.26', + 'tflite_support', 'onnxruntime-gpu' if cuda else 'onnxruntime'), + cmds='--extra-index-url https://pypi.ngc.nvidia.com') # onnx_graphsurgeon only on NVIDIA + + LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') + check_version(tf.__version__, + '<=2.13.1', + name='tensorflow', + verbose=True, + msg='https://github.com/ultralytics/ultralytics/issues/5161') + f = Path(str(self.file).replace(self.file.suffix, '_saved_model')) + if f.is_dir(): + import shutil + shutil.rmtree(f) # delete output folder + + # Pre-download calibration file to fix https://github.com/PINTO0309/onnx2tf/issues/545 + onnx2tf_file = Path('calibration_image_sample_data_20x128x128x3_float32.npy') + if not onnx2tf_file.exists(): + attempt_download_asset(f'{onnx2tf_file}.zip', unzip=True, delete=True) + + # Export to ONNX + self.args.simplify = True + f_onnx, _ = self.export_onnx() + + # Export to TF + tmp_file = f / 'tmp_tflite_int8_calibration_images.npy' # int8 calibration images file + if self.args.int8: + verbosity = '--verbosity info' + if self.args.data: + # Generate calibration data for integer quantization + LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'") + data = check_det_dataset(self.args.data) + dataset = YOLODataset(data['val'], data=data, imgsz=self.imgsz[0], augment=False) + images = [] + for i, batch in enumerate(dataset): + if i >= 100: # maximum number of calibration images + break + im = batch['img'].permute(1, 2, 0)[None] # list to nparray, CHW to BHWC + images.append(im) + f.mkdir() + images = torch.cat(images, 0).float() + # mean = images.view(-1, 3).mean(0) # imagenet mean [123.675, 116.28, 103.53] + # std = images.view(-1, 3).std(0) # imagenet std [58.395, 57.12, 57.375] + np.save(str(tmp_file), images.numpy()) # BHWC + int8 = f'-oiqt -qt per-tensor -cind images "{tmp_file}" "[[[[0, 0, 0]]]]" "[[[[255, 255, 255]]]]"' + else: + int8 = '-oiqt -qt per-tensor' + else: + verbosity = '--non_verbose' + int8 = '' + + cmd = f'onnx2tf -i "{f_onnx}" -o "{f}" -nuo {verbosity} {int8}'.strip() + LOGGER.info(f"{prefix} running '{cmd}'") + subprocess.run(cmd, shell=True) + yaml_save(f / 'metadata.yaml', self.metadata) # add metadata.yaml + + # Remove/rename TFLite models + if self.args.int8: + tmp_file.unlink(missing_ok=True) + for file in f.rglob('*_dynamic_range_quant.tflite'): + file.rename(file.with_name(file.stem.replace('_dynamic_range_quant', '_int8') + file.suffix)) + for file in f.rglob('*_integer_quant_with_int16_act.tflite'): + file.unlink() # delete extra fp16 activation TFLite files + + # Add TFLite metadata + for file in f.rglob('*.tflite'): + f.unlink() if 'quant_with_int16_act.tflite' in str(f) else self._add_tflite_metadata(file) + + return str(f), tf.saved_model.load(f, tags=None, options=None) # load saved_model as Keras model + + @try_export + def export_pb(self, keras_model, prefix=colorstr('TensorFlow GraphDef:')): + """YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow.""" + import tensorflow as tf # noqa + from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa + + LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') + f = self.file.with_suffix('.pb') + + m = tf.function(lambda x: keras_model(x)) # full model + m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)) + frozen_func = convert_variables_to_constants_v2(m) + frozen_func.graph.as_graph_def() + tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False) + return f, None + + @try_export + def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')): + """YOLOv8 TensorFlow Lite export.""" + import tensorflow as tf # noqa + + LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') + saved_model = Path(str(self.file).replace(self.file.suffix, '_saved_model')) + if self.args.int8: + f = saved_model / f'{self.file.stem}_int8.tflite' # fp32 in/out + elif self.args.half: + f = saved_model / f'{self.file.stem}_float16.tflite' # fp32 in/out + else: + f = saved_model / f'{self.file.stem}_float32.tflite' + return str(f), None + + @try_export + def export_edgetpu(self, tflite_model='', prefix=colorstr('Edge TPU:')): + """YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/.""" + LOGGER.warning(f'{prefix} WARNING ⚠️ Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185') + + cmd = 'edgetpu_compiler --version' + help_url = 'https://coral.ai/docs/edgetpu/compiler/' + assert LINUX, f'export only supported on Linux. See {help_url}' + if subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True).returncode != 0: + LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}') + sudo = subprocess.run('sudo --version >/dev/null', shell=True).returncode == 0 # sudo installed on system + for c in ('curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -', + 'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | ' + 'sudo tee /etc/apt/sources.list.d/coral-edgetpu.list', 'sudo apt-get update', + 'sudo apt-get install edgetpu-compiler'): + subprocess.run(c if sudo else c.replace('sudo ', ''), shell=True, check=True) + ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1] + + LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...') + f = str(tflite_model).replace('.tflite', '_edgetpu.tflite') # Edge TPU model + + cmd = f'edgetpu_compiler -s -d -k 10 --out_dir "{Path(f).parent}" "{tflite_model}"' + LOGGER.info(f"{prefix} running '{cmd}'") + subprocess.run(cmd, shell=True) + self._add_tflite_metadata(f) + return f, None + + @try_export + def export_tfjs(self, prefix=colorstr('TensorFlow.js:')): + """YOLOv8 TensorFlow.js export.""" + check_requirements('tensorflowjs') + import tensorflow as tf + import tensorflowjs as tfjs # noqa + + LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...') + f = str(self.file).replace(self.file.suffix, '_web_model') # js dir + f_pb = str(self.file.with_suffix('.pb')) # *.pb path + + gd = tf.Graph().as_graph_def() # TF GraphDef + with open(f_pb, 'rb') as file: + gd.ParseFromString(file.read()) + outputs = ','.join(gd_outputs(gd)) + LOGGER.info(f'\n{prefix} output node names: {outputs}') + + with spaces_in_path(f_pb) as fpb_, spaces_in_path(f) as f_: # exporter can not handle spaces in path + cmd = f'tensorflowjs_converter --input_format=tf_frozen_model --output_node_names={outputs} "{fpb_}" "{f_}"' + LOGGER.info(f"{prefix} running '{cmd}'") + subprocess.run(cmd, shell=True) + + if ' ' in f: + LOGGER.warning(f"{prefix} WARNING ⚠️ your model may not work correctly with spaces in path '{f}'.") + + # f_json = Path(f) / 'model.json' # *.json path + # with open(f_json, 'w') as j: # sort JSON Identity_* in ascending order + # subst = re.sub( + # r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, ' + # r'"Identity.?.?": {"name": "Identity.?.?"}, ' + # r'"Identity.?.?": {"name": "Identity.?.?"}, ' + # r'"Identity.?.?": {"name": "Identity.?.?"}}}', + # r'{"outputs": {"Identity": {"name": "Identity"}, ' + # r'"Identity_1": {"name": "Identity_1"}, ' + # r'"Identity_2": {"name": "Identity_2"}, ' + # r'"Identity_3": {"name": "Identity_3"}}}', + # f_json.read_text(), + # ) + # j.write(subst) + yaml_save(Path(f) / 'metadata.yaml', self.metadata) # add metadata.yaml + return f, None + + def _add_tflite_metadata(self, file): + """Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata.""" + from tflite_support import flatbuffers # noqa + from tflite_support import metadata as _metadata # noqa + from tflite_support import metadata_schema_py_generated as _metadata_fb # noqa + + # Create model info + model_meta = _metadata_fb.ModelMetadataT() + model_meta.name = self.metadata['description'] + model_meta.version = self.metadata['version'] + model_meta.author = self.metadata['author'] + model_meta.license = self.metadata['license'] + + # Label file + tmp_file = Path(file).parent / 'temp_meta.txt' + with open(tmp_file, 'w') as f: + f.write(str(self.metadata)) + + label_file = _metadata_fb.AssociatedFileT() + label_file.name = tmp_file.name + label_file.type = _metadata_fb.AssociatedFileType.TENSOR_AXIS_LABELS + + # Create input info + input_meta = _metadata_fb.TensorMetadataT() + input_meta.name = 'image' + input_meta.description = 'Input image to be detected.' + input_meta.content = _metadata_fb.ContentT() + input_meta.content.contentProperties = _metadata_fb.ImagePropertiesT() + input_meta.content.contentProperties.colorSpace = _metadata_fb.ColorSpaceType.RGB + input_meta.content.contentPropertiesType = _metadata_fb.ContentProperties.ImageProperties + + # Create output info + output1 = _metadata_fb.TensorMetadataT() + output1.name = 'output' + output1.description = 'Coordinates of detected objects, class labels, and confidence score' + output1.associatedFiles = [label_file] + if self.model.task == 'segment': + output2 = _metadata_fb.TensorMetadataT() + output2.name = 'output' + output2.description = 'Mask protos' + output2.associatedFiles = [label_file] + + # Create subgraph info + subgraph = _metadata_fb.SubGraphMetadataT() + subgraph.inputTensorMetadata = [input_meta] + subgraph.outputTensorMetadata = [output1, output2] if self.model.task == 'segment' else [output1] + model_meta.subgraphMetadata = [subgraph] + + b = flatbuffers.Builder(0) + b.Finish(model_meta.Pack(b), _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER) + metadata_buf = b.Output() + + populator = _metadata.MetadataPopulator.with_model_file(str(file)) + populator.load_metadata_buffer(metadata_buf) + populator.load_associated_files([str(tmp_file)]) + populator.populate() + tmp_file.unlink() + + def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr('CoreML Pipeline:')): + """YOLOv8 CoreML pipeline.""" + import coremltools as ct # noqa + + LOGGER.info(f'{prefix} starting pipeline with coremltools {ct.__version__}...') + _, _, h, w = list(self.im.shape) # BCHW + + # Output shapes + spec = model.get_spec() + out0, out1 = iter(spec.description.output) + if MACOS: + from PIL import Image + img = Image.new('RGB', (w, h)) # w=192, h=320 + out = model.predict({'image': img}) + out0_shape = out[out0.name].shape # (3780, 80) + out1_shape = out[out1.name].shape # (3780, 4) + else: # linux and windows can not run model.predict(), get sizes from PyTorch model output y + out0_shape = self.output_shape[2], self.output_shape[1] - 4 # (3780, 80) + out1_shape = self.output_shape[2], 4 # (3780, 4) + + # Checks + names = self.metadata['names'] + nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height + _, nc = out0_shape # number of anchors, number of classes + # _, nc = out0.type.multiArrayType.shape + assert len(names) == nc, f'{len(names)} names found for nc={nc}' # check + + # Define output shapes (missing) + out0.type.multiArrayType.shape[:] = out0_shape # (3780, 80) + out1.type.multiArrayType.shape[:] = out1_shape # (3780, 4) + # spec.neuralNetwork.preprocessing[0].featureName = '0' + + # Flexible input shapes + # from coremltools.models.neural_network import flexible_shape_utils + # s = [] # shapes + # s.append(flexible_shape_utils.NeuralNetworkImageSize(320, 192)) + # s.append(flexible_shape_utils.NeuralNetworkImageSize(640, 384)) # (height, width) + # flexible_shape_utils.add_enumerated_image_sizes(spec, feature_name='image', sizes=s) + # r = flexible_shape_utils.NeuralNetworkImageSizeRange() # shape ranges + # r.add_height_range((192, 640)) + # r.add_width_range((192, 640)) + # flexible_shape_utils.update_image_size_range(spec, feature_name='image', size_range=r) + + # Print + # print(spec.description) + + # Model from spec + model = ct.models.MLModel(spec, weights_dir=weights_dir) + + # 3. Create NMS protobuf + nms_spec = ct.proto.Model_pb2.Model() + nms_spec.specificationVersion = 5 + for i in range(2): + decoder_output = model._spec.description.output[i].SerializeToString() + nms_spec.description.input.add() + nms_spec.description.input[i].ParseFromString(decoder_output) + nms_spec.description.output.add() + nms_spec.description.output[i].ParseFromString(decoder_output) + + nms_spec.description.output[0].name = 'confidence' + nms_spec.description.output[1].name = 'coordinates' + + output_sizes = [nc, 4] + for i in range(2): + ma_type = nms_spec.description.output[i].type.multiArrayType + ma_type.shapeRange.sizeRanges.add() + ma_type.shapeRange.sizeRanges[0].lowerBound = 0 + ma_type.shapeRange.sizeRanges[0].upperBound = -1 + ma_type.shapeRange.sizeRanges.add() + ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i] + ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i] + del ma_type.shape[:] + + nms = nms_spec.nonMaximumSuppression + nms.confidenceInputFeatureName = out0.name # 1x507x80 + nms.coordinatesInputFeatureName = out1.name # 1x507x4 + nms.confidenceOutputFeatureName = 'confidence' + nms.coordinatesOutputFeatureName = 'coordinates' + nms.iouThresholdInputFeatureName = 'iouThreshold' + nms.confidenceThresholdInputFeatureName = 'confidenceThreshold' + nms.iouThreshold = 0.45 + nms.confidenceThreshold = 0.25 + nms.pickTop.perClass = True + nms.stringClassLabels.vector.extend(names.values()) + nms_model = ct.models.MLModel(nms_spec) + + # 4. Pipeline models together + pipeline = ct.models.pipeline.Pipeline(input_features=[('image', ct.models.datatypes.Array(3, ny, nx)), + ('iouThreshold', ct.models.datatypes.Double()), + ('confidenceThreshold', ct.models.datatypes.Double())], + output_features=['confidence', 'coordinates']) + pipeline.add_model(model) + pipeline.add_model(nms_model) + + # Correct datatypes + pipeline.spec.description.input[0].ParseFromString(model._spec.description.input[0].SerializeToString()) + pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString()) + pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString()) + + # Update metadata + pipeline.spec.specificationVersion = 5 + pipeline.spec.description.metadata.userDefined.update({ + 'IoU threshold': str(nms.iouThreshold), + 'Confidence threshold': str(nms.confidenceThreshold)}) + + # Save the model + model = ct.models.MLModel(pipeline.spec, weights_dir=weights_dir) + model.input_description['image'] = 'Input image' + model.input_description['iouThreshold'] = f'(optional) IOU threshold override (default: {nms.iouThreshold})' + model.input_description['confidenceThreshold'] = \ + f'(optional) Confidence threshold override (default: {nms.confidenceThreshold})' + model.output_description['confidence'] = 'Boxes × Class confidence (see user-defined metadata "classes")' + model.output_description['coordinates'] = 'Boxes × [x, y, width, height] (relative to image size)' + LOGGER.info(f'{prefix} pipeline success') + return model + + def add_callback(self, event: str, callback): + """Appends the given callback.""" + self.callbacks[event].append(callback) + + def run_callbacks(self, event: str): + """Execute all callbacks for a given event.""" + for callback in self.callbacks.get(event, []): + callback(self) + + +class IOSDetectModel(torch.nn.Module): + """Wrap an Ultralytics YOLO model for Apple iOS CoreML export.""" + + def __init__(self, model, im): + """Initialize the IOSDetectModel class with a YOLO model and example image.""" + super().__init__() + _, _, h, w = im.shape # batch, channel, height, width + self.model = model + self.nc = len(model.names) # number of classes + if w == h: + self.normalize = 1.0 / w # scalar + else: + self.normalize = torch.tensor([1.0 / w, 1.0 / h, 1.0 / w, 1.0 / h]) # broadcast (slower, smaller) + + def forward(self, x): + """Normalize predictions of object detection model with input size-dependent factors.""" + xywh, cls = self.model(x)[0].transpose(0, 1).split((4, self.nc), 1) + return cls, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4) diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py new file mode 100644 index 0000000..1542762 --- /dev/null +++ b/ultralytics/engine/model.py @@ -0,0 +1,430 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import inspect +import sys +from pathlib import Path +from typing import Union + +from ultralytics.cfg import TASK2DATA, get_cfg, get_save_dir +from ultralytics.hub.utils import HUB_WEB_ROOT +from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, nn, yaml_model_load +from ultralytics.utils import ASSETS, DEFAULT_CFG_DICT, LOGGER, RANK, callbacks, checks, emojis, yaml_load + + +class Model(nn.Module): + """ + A base class to unify APIs for all models. + + Args: + model (str, Path): Path to the model file to load or create. + task (Any, optional): Task type for the YOLO model. Defaults to None. + + Attributes: + predictor (Any): The predictor object. + model (Any): The model object. + trainer (Any): The trainer object. + task (str): The type of model task. + ckpt (Any): The checkpoint object if the model loaded from *.pt file. + cfg (str): The model configuration if loaded from *.yaml file. + ckpt_path (str): The checkpoint file path. + overrides (dict): Overrides for the trainer object. + metrics (Any): The data for metrics. + + Methods: + __call__(source=None, stream=False, **kwargs): + Alias for the predict method. + _new(cfg:str, verbose:bool=True) -> None: + Initializes a new model and infers the task type from the model definitions. + _load(weights:str, task:str='') -> None: + Initializes a new model and infers the task type from the model head. + _check_is_pytorch_model() -> None: + Raises TypeError if the model is not a PyTorch model. + reset() -> None: + Resets the model modules. + info(verbose:bool=False) -> None: + Logs the model info. + fuse() -> None: + Fuses the model for faster inference. + predict(source=None, stream=False, **kwargs) -> List[ultralytics.engine.results.Results]: + Performs prediction using the YOLO model. + + Returns: + list(ultralytics.engine.results.Results): The prediction results. + """ + + def __init__(self, model: Union[str, Path] = 'yolov8n.pt', task=None) -> None: + """ + Initializes the YOLO model. + + Args: + model (Union[str, Path], optional): Path or name of the model to load or create. Defaults to 'yolov8n.pt'. + task (Any, optional): Task type for the YOLO model. Defaults to None. + """ + super().__init__() + self.callbacks = callbacks.get_default_callbacks() + self.predictor = None # reuse predictor + self.model = None # model object + self.trainer = None # trainer object + self.ckpt = None # if loaded from *.pt + self.cfg = None # if loaded from *.yaml + self.ckpt_path = None + self.overrides = {} # overrides for trainer object + self.metrics = None # validation/training metrics + self.session = None # HUB session + self.task = task # task type + model = str(model).strip() # strip spaces + + # Check if Ultralytics HUB model from https://hub.ultralytics.com + if self.is_hub_model(model): + from ultralytics.hub.session import HUBTrainingSession + self.session = HUBTrainingSession(model) + model = self.session.model_file + + # Check if Triton Server model + elif self.is_triton_model(model): + self.model = model + self.task = task + return + + # Load or create new YOLO model + model = checks.check_model_file_from_stem(model) # add suffix, i.e. yolov8n -> yolov8n.pt + if Path(model).suffix in ('.yaml', '.yml'): + self._new(model, task) + else: + self._load(model, task) + + def __call__(self, source=None, stream=False, **kwargs): + """Calls the 'predict' function with given arguments to perform object detection.""" + return self.predict(source, stream, **kwargs) + + @staticmethod + def is_triton_model(model): + """Is model a Triton Server URL string, i.e. :////""" + from urllib.parse import urlsplit + url = urlsplit(model) + return url.netloc and url.path and url.scheme in {'http', 'grpc'} + + @staticmethod + def is_hub_model(model): + """Check if the provided model is a HUB model.""" + return any(( + model.startswith(f'{HUB_WEB_ROOT}/models/'), # i.e. https://hub.ultralytics.com/models/MODEL_ID + [len(x) for x in model.split('_')] == [42, 20], # APIKEY_MODELID + len(model) == 20 and not Path(model).exists() and all(x not in model for x in './\\'))) # MODELID + + def _new(self, cfg: str, task=None, model=None, verbose=True): + """ + Initializes a new model and infers the task type from the model definitions. + + Args: + cfg (str): model configuration file + task (str | None): model task + model (BaseModel): Customized model. + verbose (bool): display model info on load + """ + cfg_dict = yaml_model_load(cfg) + self.cfg = cfg + self.task = task or guess_model_task(cfg_dict) + self.model = (model or self._smart_load('model'))(cfg_dict, verbose=verbose and RANK == -1) # build model + self.overrides['model'] = self.cfg + self.overrides['task'] = self.task + + # Below added to allow export from YAMLs + self.model.args = {**DEFAULT_CFG_DICT, **self.overrides} # combine default and model args (prefer model args) + self.model.task = self.task + + def _load(self, weights: str, task=None): + """ + Initializes a new model and infers the task type from the model head. + + Args: + weights (str): model checkpoint to be loaded + task (str | None): model task + """ + suffix = Path(weights).suffix + if suffix == '.pt': + self.model, self.ckpt = attempt_load_one_weight(weights) + self.task = self.model.args['task'] + self.overrides = self.model.args = self._reset_ckpt_args(self.model.args) + self.ckpt_path = self.model.pt_path + else: + weights = checks.check_file(weights) + self.model, self.ckpt = weights, None + self.task = task or guess_model_task(weights) + self.ckpt_path = weights + self.overrides['model'] = weights + self.overrides['task'] = self.task + + def _check_is_pytorch_model(self): + """Raises TypeError is model is not a PyTorch model.""" + pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == '.pt' + pt_module = isinstance(self.model, nn.Module) + if not (pt_module or pt_str): + raise TypeError( + f"model='{self.model}' should be a *.pt PyTorch model to run this method, but is a different format. " + f"PyTorch models can train, val, predict and export, i.e. 'model.train(data=...)', but exported " + f"formats like ONNX, TensorRT etc. only support 'predict' and 'val' modes, " + f"i.e. 'yolo predict model=yolov8n.onnx'.\nTo run CUDA or MPS inference please pass the device " + f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'") + + def reset_weights(self): + """Resets the model modules parameters to randomly initialized values, losing all training information.""" + self._check_is_pytorch_model() + for m in self.model.modules(): + if hasattr(m, 'reset_parameters'): + m.reset_parameters() + for p in self.model.parameters(): + p.requires_grad = True + return self + + def load(self, weights='yolov8n.pt'): + """Transfers parameters with matching names and shapes from 'weights' to model.""" + self._check_is_pytorch_model() + if isinstance(weights, (str, Path)): + weights, self.ckpt = attempt_load_one_weight(weights) + self.model.load(weights) + return self + + def info(self, detailed=False, verbose=True): + """ + Logs model info. + + Args: + detailed (bool): Show detailed information about model. + verbose (bool): Controls verbosity. + """ + self._check_is_pytorch_model() + return self.model.info(detailed=detailed, verbose=verbose) + + def fuse(self): + """Fuse PyTorch Conv2d and BatchNorm2d layers.""" + self._check_is_pytorch_model() + self.model.fuse() + + def predict(self, source=None, stream=False, predictor=None, **kwargs): + """ + Perform prediction using the YOLO model. + + Args: + source (str | int | PIL | np.ndarray): The source of the image to make predictions on. + Accepts all source types accepted by the YOLO model. + stream (bool): Whether to stream the predictions or not. Defaults to False. + predictor (BasePredictor): Customized predictor. + **kwargs : Additional keyword arguments passed to the predictor. + Check the 'configuration' section in the documentation for all available options. + + Returns: + (List[ultralytics.engine.results.Results]): The prediction results. + """ + if source is None: + source = ASSETS + LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using 'source={source}'.") + + is_cli = (sys.argv[0].endswith('yolo') or sys.argv[0].endswith('ultralytics')) and any( + x in sys.argv for x in ('predict', 'track', 'mode=predict', 'mode=track')) + + custom = {'conf': 0.25, 'save': is_cli} # method defaults + args = {**self.overrides, **custom, **kwargs, 'mode': 'predict'} # highest priority args on the right + prompts = args.pop('prompts', None) # for SAM-type models + + if not self.predictor: + self.predictor = (predictor or self._smart_load('predictor'))(overrides=args, _callbacks=self.callbacks) + self.predictor.setup_model(model=self.model, verbose=is_cli) + else: # only update args if predictor is already setup + self.predictor.args = get_cfg(self.predictor.args, args) + if 'project' in args or 'name' in args: + self.predictor.save_dir = get_save_dir(self.predictor.args) + if prompts and hasattr(self.predictor, 'set_prompts'): # for SAM-type models + self.predictor.set_prompts(prompts) + return self.predictor.predict_cli(source=source) if is_cli else self.predictor(source=source, stream=stream) + + def track(self, source=None, stream=False, persist=False, **kwargs): + """ + Perform object tracking on the input source using the registered trackers. + + Args: + source (str, optional): The input source for object tracking. Can be a file path or a video stream. + stream (bool, optional): Whether the input source is a video stream. Defaults to False. + persist (bool, optional): Whether to persist the trackers if they already exist. Defaults to False. + **kwargs (optional): Additional keyword arguments for the tracking process. + + Returns: + (List[ultralytics.engine.results.Results]): The tracking results. + """ + if not hasattr(self.predictor, 'trackers'): + from ultralytics.trackers import register_tracker + register_tracker(self, persist) + kwargs['conf'] = kwargs.get('conf') or 0.1 # ByteTrack-based method needs low confidence predictions as input + kwargs['mode'] = 'track' + return self.predict(source=source, stream=stream, **kwargs) + + def val(self, validator=None, **kwargs): + """ + Validate a model on a given dataset. + + Args: + validator (BaseValidator): Customized validator. + **kwargs : Any other args accepted by the validators. To see all args check 'configuration' section in docs + """ + custom = {'rect': True} # method defaults + args = {**self.overrides, **custom, **kwargs, 'mode': 'val'} # highest priority args on the right + + validator = (validator or self._smart_load('validator'))(args=args, _callbacks=self.callbacks) + validator(model=self.model) + self.metrics = validator.metrics + return validator.metrics + + def benchmark(self, **kwargs): + """ + Benchmark a model on all export formats. + + Args: + **kwargs : Any other args accepted by the validators. To see all args check 'configuration' section in docs + """ + self._check_is_pytorch_model() + from ultralytics.utils.benchmarks import benchmark + + custom = {'verbose': False} # method defaults + args = {**DEFAULT_CFG_DICT, **self.model.args, **custom, **kwargs, 'mode': 'benchmark'} + return benchmark( + model=self, + data=kwargs.get('data'), # if no 'data' argument passed set data=None for default datasets + imgsz=args['imgsz'], + half=args['half'], + int8=args['int8'], + device=args['device'], + verbose=kwargs.get('verbose')) + + def export(self, **kwargs): + """ + Export model. + + Args: + **kwargs : Any other args accepted by the Exporter. To see all args check 'configuration' section in docs. + """ + self._check_is_pytorch_model() + from .exporter import Exporter + + custom = {'imgsz': self.model.args['imgsz'], 'batch': 1, 'data': None, 'verbose': False} # method defaults + args = {**self.overrides, **custom, **kwargs, 'mode': 'export'} # highest priority args on the right + return Exporter(overrides=args, _callbacks=self.callbacks)(model=self.model) + + def train(self, trainer=None, **kwargs): + """ + Trains the model on a given dataset. + + Args: + trainer (BaseTrainer, optional): Customized trainer. + **kwargs (Any): Any number of arguments representing the training configuration. + """ + self._check_is_pytorch_model() + if self.session: # Ultralytics HUB session + if any(kwargs): + LOGGER.warning('WARNING ⚠️ using HUB training arguments, ignoring local training arguments.') + kwargs = self.session.train_args + checks.check_pip_update_available() + + overrides = yaml_load(checks.check_yaml(kwargs['cfg'])) if kwargs.get('cfg') else self.overrides + custom = {'data': TASK2DATA[self.task]} # method defaults + args = {**overrides, **custom, **kwargs, 'mode': 'train'} # highest priority args on the right + if args.get('resume'): + args['resume'] = self.ckpt_path + + self.trainer = (trainer or self._smart_load('trainer'))(overrides=args, _callbacks=self.callbacks) + if not args.get('resume'): # manually set model only if not resuming + self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml) + self.model = self.trainer.model + self.trainer.hub_session = self.session # attach optional HUB session + self.trainer.train() + # Update model and cfg after training + if RANK in (-1, 0): + ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last + self.model, _ = attempt_load_one_weight(ckpt) + self.overrides = self.model.args + self.metrics = getattr(self.trainer.validator, 'metrics', None) # TODO: no metrics returned by DDP + return self.metrics + + def tune(self, use_ray=False, iterations=10, *args, **kwargs): + """ + Runs hyperparameter tuning, optionally using Ray Tune. See ultralytics.utils.tuner.run_ray_tune for Args. + + Returns: + (dict): A dictionary containing the results of the hyperparameter search. + """ + self._check_is_pytorch_model() + if use_ray: + from ultralytics.utils.tuner import run_ray_tune + return run_ray_tune(self, max_samples=iterations, *args, **kwargs) + else: + from .tuner import Tuner + + custom = {} # method defaults + args = {**self.overrides, **custom, **kwargs, 'mode': 'train'} # highest priority args on the right + return Tuner(args=args, _callbacks=self.callbacks)(model=self, iterations=iterations) + + def _apply(self, fn): + """Apply to(), cpu(), cuda(), half(), float() to model tensors that are not parameters or registered buffers.""" + self._check_is_pytorch_model() + self = super()._apply(fn) # noqa + self.predictor = None # reset predictor as device may have changed + self.overrides['device'] = self.device # was str(self.device) i.e. device(type='cuda', index=0) -> 'cuda:0' + return self + + @property + def names(self): + """Returns class names of the loaded model.""" + return self.model.names if hasattr(self.model, 'names') else None + + @property + def device(self): + """Returns device if PyTorch model.""" + return next(self.model.parameters()).device if isinstance(self.model, nn.Module) else None + + @property + def transforms(self): + """Returns transform of the loaded model.""" + return self.model.transforms if hasattr(self.model, 'transforms') else None + + def add_callback(self, event: str, func): + """Add a callback.""" + self.callbacks[event].append(func) + + def clear_callback(self, event: str): + """Clear all event callbacks.""" + self.callbacks[event] = [] + + def reset_callbacks(self): + """Reset all registered callbacks.""" + for event in callbacks.default_callbacks.keys(): + self.callbacks[event] = [callbacks.default_callbacks[event][0]] + + @staticmethod + def _reset_ckpt_args(args): + """Reset arguments when loading a PyTorch model.""" + include = {'imgsz', 'data', 'task', 'single_cls'} # only remember these arguments when loading a PyTorch model + return {k: v for k, v in args.items() if k in include} + + # def __getattr__(self, attr): + # """Raises error if object has no requested attribute.""" + # name = self.__class__.__name__ + # raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}") + + def _smart_load(self, key): + """Load model/trainer/validator/predictor.""" + try: + return self.task_map[self.task][key] + except Exception as e: + name = self.__class__.__name__ + mode = inspect.stack()[1][3] # get the function name. + raise NotImplementedError( + emojis(f"WARNING ⚠️ '{name}' model does not support '{mode}' mode for '{self.task}' task yet.")) from e + + @property + def task_map(self): + """ + Map head to model, trainer, validator, and predictor classes. + + Returns: + task_map (dict): The map of model task to mode classes. + """ + raise NotImplementedError('Please provide task map for your model!') diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py new file mode 100644 index 0000000..3df7bdb --- /dev/null +++ b/ultralytics/engine/predictor.py @@ -0,0 +1,376 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Run prediction on images, videos, directories, globs, YouTube, webcam, streams, etc. + +Usage - sources: + $ yolo mode=predict model=yolov8n.pt source=0 # webcam + img.jpg # image + vid.mp4 # video + screen # screenshot + path/ # directory + list.txt # list of images + list.streams # list of streams + 'path/*.jpg' # glob + 'https://youtu.be/LNwODJXcvt4' # YouTube + 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP, TCP stream + +Usage - formats: + $ yolo mode=predict model=yolov8n.pt # PyTorch + yolov8n.torchscript # TorchScript + yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True + yolov8n_openvino_model # OpenVINO + yolov8n.engine # TensorRT + yolov8n.mlpackage # CoreML (macOS-only) + yolov8n_saved_model # TensorFlow SavedModel + yolov8n.pb # TensorFlow GraphDef + yolov8n.tflite # TensorFlow Lite + yolov8n_edgetpu.tflite # TensorFlow Edge TPU + yolov8n_paddle_model # PaddlePaddle +""" +import platform +import threading +from pathlib import Path + +import cv2 +import numpy as np +import torch + +from ultralytics.cfg import get_cfg, get_save_dir +from ultralytics.data import load_inference_source +from ultralytics.data.augment import LetterBox, classify_transforms +from ultralytics.nn.autobackend import AutoBackend +from ultralytics.utils import DEFAULT_CFG, LOGGER, MACOS, WINDOWS, callbacks, colorstr, ops +from ultralytics.utils.checks import check_imgsz, check_imshow +from ultralytics.utils.files import increment_path +from ultralytics.utils.torch_utils import select_device, smart_inference_mode + +STREAM_WARNING = """ +WARNING ⚠️ inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory +errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help. + +Example: + results = model(source=..., stream=True) # generator of Results objects + for r in results: + boxes = r.boxes # Boxes object for bbox outputs + masks = r.masks # Masks object for segment masks outputs + probs = r.probs # Class probabilities for classification outputs +""" + + +class BasePredictor: + """ + BasePredictor. + + A base class for creating predictors. + + Attributes: + args (SimpleNamespace): Configuration for the predictor. + save_dir (Path): Directory to save results. + done_warmup (bool): Whether the predictor has finished setup. + model (nn.Module): Model used for prediction. + data (dict): Data configuration. + device (torch.device): Device used for prediction. + dataset (Dataset): Dataset used for prediction. + vid_path (str): Path to video file. + vid_writer (cv2.VideoWriter): Video writer for saving video output. + data_path (str): Path to data. + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """ + Initializes the BasePredictor class. + + Args: + cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG. + overrides (dict, optional): Configuration overrides. Defaults to None. + """ + self.args = get_cfg(cfg, overrides) + self.save_dir = get_save_dir(self.args) + if self.args.conf is None: + self.args.conf = 0.25 # default conf=0.25 + self.done_warmup = False + if self.args.show: + self.args.show = check_imshow(warn=True) + + # Usable if setup is done + self.model = None + self.data = self.args.data # data_dict + self.imgsz = None + self.device = None + self.dataset = None + self.vid_path, self.vid_writer, self.vid_frame = None, None, None + self.plotted_img = None + self.data_path = None + self.source_type = None + self.batch = None + self.results = None + self.transforms = None + self.callbacks = _callbacks or callbacks.get_default_callbacks() + self.txt_path = None + self._lock = threading.Lock() # for automatic thread-safe inference + callbacks.add_integration_callbacks(self) + + def preprocess(self, im): + """ + Prepares input image before inference. + + Args: + im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list. + """ + not_tensor = not isinstance(im, torch.Tensor) + if not_tensor: + im = np.stack(self.pre_transform(im)) + im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW, (n, 3, h, w) + im = np.ascontiguousarray(im) # contiguous + im = torch.from_numpy(im) + + im = im.to(self.device) + im = im.half() if self.model.fp16 else im.float() # uint8 to fp16/32 + if not_tensor: + im /= 255 # 0 - 255 to 0.0 - 1.0 + return im + + def inference(self, im, *args, **kwargs): + """Runs inference on a given image using the specified model and arguments.""" + visualize = increment_path(self.save_dir / Path(self.batch[0][0]).stem, + mkdir=True) if self.args.visualize and (not self.source_type.tensor) else False + return self.model(im, augment=self.args.augment, visualize=visualize) + + def pre_transform(self, im): + """ + Pre-transform input image before inference. + + Args: + im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list. + + Returns: + (list): A list of transformed images. + """ + same_shapes = all(x.shape == im[0].shape for x in im) + letterbox = LetterBox(self.imgsz, auto=same_shapes and self.model.pt, stride=self.model.stride) + return [letterbox(image=x) for x in im] + + def write_results(self, idx, results, batch): + """Write inference results to a file or directory.""" + p, im, _ = batch + log_string = '' + if len(im.shape) == 3: + im = im[None] # expand for batch dim + if self.source_type.webcam or self.source_type.from_img or self.source_type.tensor: # batch_size >= 1 + log_string += f'{idx}: ' + frame = self.dataset.count + else: + frame = getattr(self.dataset, 'frame', 0) + self.data_path = p + self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}') + log_string += '%gx%g ' % im.shape[2:] # print string + result = results[idx] + log_string += result.verbose() + + if self.args.save or self.args.show: # Add bbox to image + plot_args = { + 'line_width': self.args.line_width, + 'boxes': self.args.show_boxes, + 'conf': self.args.show_conf, + 'labels': self.args.show_labels} + if not self.args.retina_masks: + plot_args['im_gpu'] = im[idx] + self.plotted_img = result.plot(**plot_args) + # Write + if self.args.save_txt: + result.save_txt(f'{self.txt_path}.txt', save_conf=self.args.save_conf) + if self.args.save_crop: + result.save_crop(save_dir=self.save_dir / 'crops', + file_name=self.data_path.stem + ('' if self.dataset.mode == 'image' else f'_{frame}')) + + return log_string + + def postprocess(self, preds, img, orig_imgs): + """Post-processes predictions for an image and returns them.""" + return preds + + def __call__(self, source=None, model=None, stream=False, *args, **kwargs): + """Performs inference on an image or stream.""" + self.stream = stream + if stream: + return self.stream_inference(source, model, *args, **kwargs) + else: + return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of Result into one + + def predict_cli(self, source=None, model=None): + """ + Method used for CLI prediction. + + It uses always generator as outputs as not required by CLI mode. + """ + gen = self.stream_inference(source, model) + for _ in gen: # running CLI inference without accumulating any outputs (do not modify) + pass + + def setup_source(self, source): + """Sets up source and inference mode.""" + self.imgsz = check_imgsz(self.args.imgsz, stride=self.model.stride, min_dim=2) # check image size + self.transforms = getattr(self.model.model, 'transforms', classify_transforms( + self.imgsz[0])) if self.args.task == 'classify' else None + self.dataset = load_inference_source(source=source, + imgsz=self.imgsz, + vid_stride=self.args.vid_stride, + buffer=self.args.stream_buffer) + self.source_type = self.dataset.source_type + if not getattr(self, 'stream', True) and (self.dataset.mode == 'stream' or # streams + len(self.dataset) > 1000 or # images + any(getattr(self.dataset, 'video_flag', [False]))): # videos + LOGGER.warning(STREAM_WARNING) + self.vid_path = [None] * self.dataset.bs + self.vid_writer = [None] * self.dataset.bs + self.vid_frame = [None] * self.dataset.bs + + @smart_inference_mode() + def stream_inference(self, source=None, model=None, *args, **kwargs): + """Streams real-time inference on camera feed and saves results to file.""" + if self.args.verbose: + LOGGER.info('') + + # Setup model + if not self.model: + self.setup_model(model) + + with self._lock: # for thread-safe inference + # Setup source every time predict is called + self.setup_source(source if source is not None else self.args.source) + + # Check if save_dir/ label file exists + if self.args.save or self.args.save_txt: + (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) + + # Warmup model + if not self.done_warmup: + self.model.warmup(imgsz=(1 if self.model.pt or self.model.triton else self.dataset.bs, 3, *self.imgsz)) + self.done_warmup = True + + self.seen, self.windows, self.batch, profilers = 0, [], None, (ops.Profile(), ops.Profile(), ops.Profile()) + self.run_callbacks('on_predict_start') + + for batch in self.dataset: + self.run_callbacks('on_predict_batch_start') + self.batch = batch + path, im0s, vid_cap, s = batch + + # Preprocess + with profilers[0]: + im = self.preprocess(im0s) + + # Inference + with profilers[1]: + preds = self.inference(im, *args, **kwargs) + + # Postprocess + with profilers[2]: + self.results = self.postprocess(preds, im, im0s) + + self.run_callbacks('on_predict_postprocess_end') + # Visualize, save, write results + n = len(im0s) + for i in range(n): + self.seen += 1 + self.results[i].speed = { + 'preprocess': profilers[0].dt * 1E3 / n, + 'inference': profilers[1].dt * 1E3 / n, + 'postprocess': profilers[2].dt * 1E3 / n} + p, im0 = path[i], None if self.source_type.tensor else im0s[i].copy() + p = Path(p) + + if self.args.verbose or self.args.save or self.args.save_txt or self.args.show: + s += self.write_results(i, self.results, (p, im, im0)) + if self.args.save or self.args.save_txt: + self.results[i].save_dir = self.save_dir.__str__() + if self.args.show and self.plotted_img is not None: + self.show(p) + if self.args.save and self.plotted_img is not None: + self.save_preds(vid_cap, i, str(self.save_dir / p.name)) + + self.run_callbacks('on_predict_batch_end') + yield from self.results + + # Print time (inference-only) + if self.args.verbose: + LOGGER.info(f'{s}{profilers[1].dt * 1E3:.1f}ms') + + # Release assets + if isinstance(self.vid_writer[-1], cv2.VideoWriter): + self.vid_writer[-1].release() # release final video writer + + # Print results + if self.args.verbose and self.seen: + t = tuple(x.t / self.seen * 1E3 for x in profilers) # speeds per image + LOGGER.info(f'Speed: %.1fms preprocess, %.1fms inference, %.1fms postprocess per image at shape ' + f'{(1, 3, *im.shape[2:])}' % t) + if self.args.save or self.args.save_txt or self.args.save_crop: + nl = len(list(self.save_dir.glob('labels/*.txt'))) # number of labels + s = f"\n{nl} label{'s' * (nl > 1)} saved to {self.save_dir / 'labels'}" if self.args.save_txt else '' + LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}{s}") + + self.run_callbacks('on_predict_end') + + def setup_model(self, model, verbose=True): + """Initialize YOLO model with given parameters and set it to evaluation mode.""" + self.model = AutoBackend(model or self.args.model, + device=select_device(self.args.device, verbose=verbose), + dnn=self.args.dnn, + data=self.args.data, + fp16=self.args.half, + fuse=True, + verbose=verbose) + + self.device = self.model.device # update device + self.args.half = self.model.fp16 # update half + self.model.eval() + + def show(self, p): + """Display an image in a window using OpenCV imshow().""" + im0 = self.plotted_img + if platform.system() == 'Linux' and p not in self.windows: + self.windows.append(p) + cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) + cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) + cv2.imshow(str(p), im0) + cv2.waitKey(500 if self.batch[3].startswith('image') else 1) # 1 millisecond + + def save_preds(self, vid_cap, idx, save_path): + """Save video predictions as mp4 at specified path.""" + im0 = self.plotted_img + # Save imgs + if self.dataset.mode == 'image': + cv2.imwrite(save_path, im0) + else: # 'video' or 'stream' + frames_path = f'{save_path.split(".", 1)[0]}_frames/' + if self.vid_path[idx] != save_path: # new video + Path(frames_path).mkdir(parents=True, exist_ok=True) + self.vid_path[idx] = save_path + self.vid_frame[idx] = 0 + if isinstance(self.vid_writer[idx], cv2.VideoWriter): + self.vid_writer[idx].release() # release previous video writer + if vid_cap: # video + fps = int(vid_cap.get(cv2.CAP_PROP_FPS)) # integer required, floats produce error in MP4 codec + w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + else: # stream + fps, w, h = 30, im0.shape[1], im0.shape[0] + suffix, fourcc = ('.mp4', 'avc1') if MACOS else ('.avi', 'WMV2') if WINDOWS else ('.avi', 'MJPG') + self.vid_writer[idx] = cv2.VideoWriter(str(Path(save_path).with_suffix(suffix)), + cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) + # Write video + self.vid_writer[idx].write(im0) + + # Write frame + cv2.imwrite(f'{frames_path}{self.vid_frame[idx]}.jpg', im0) + self.vid_frame[idx] += 1 + + def run_callbacks(self, event: str): + """Runs all registered callbacks for a specific event.""" + for callback in self.callbacks.get(event, []): + callback(self) + + def add_callback(self, event: str, func): + """Add callback.""" + self.callbacks[event].append(func) diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py new file mode 100644 index 0000000..da145e4 --- /dev/null +++ b/ultralytics/engine/results.py @@ -0,0 +1,573 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Ultralytics Results, Boxes and Masks classes for handling inference results. + +Usage: See https://docs.ultralytics.com/modes/predict/ +""" + +from copy import deepcopy +from functools import lru_cache +from pathlib import Path + +import numpy as np +import torch + +from ultralytics.data.augment import LetterBox +from ultralytics.utils import LOGGER, SimpleClass, ops +from ultralytics.utils.plotting import Annotator, colors, save_one_box +from ultralytics.utils.torch_utils import smart_inference_mode + + +class BaseTensor(SimpleClass): + """Base tensor class with additional methods for easy manipulation and device handling.""" + + def __init__(self, data, orig_shape) -> None: + """ + Initialize BaseTensor with data and original shape. + + Args: + data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints. + orig_shape (tuple): Original shape of image. + """ + assert isinstance(data, (torch.Tensor, np.ndarray)) + self.data = data + self.orig_shape = orig_shape + + @property + def shape(self): + """Return the shape of the data tensor.""" + return self.data.shape + + def cpu(self): + """Return a copy of the tensor on CPU memory.""" + return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape) + + def numpy(self): + """Return a copy of the tensor as a numpy array.""" + return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape) + + def cuda(self): + """Return a copy of the tensor on GPU memory.""" + return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape) + + def to(self, *args, **kwargs): + """Return a copy of the tensor with the specified device and dtype.""" + return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape) + + def __len__(self): # override len(results) + """Return the length of the data tensor.""" + return len(self.data) + + def __getitem__(self, idx): + """Return a BaseTensor with the specified index of the data tensor.""" + return self.__class__(self.data[idx], self.orig_shape) + + +class Results(SimpleClass): + """ + A class for storing and manipulating inference results. + + Args: + orig_img (numpy.ndarray): The original image as a numpy array. + path (str): The path to the image file. + names (dict): A dictionary of class names. + boxes (torch.tensor, optional): A 2D tensor of bounding box coordinates for each detection. + masks (torch.tensor, optional): A 3D tensor of detection masks, where each mask is a binary image. + probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task. + keypoints (List[List[float]], optional): A list of detected keypoints for each object. + + Attributes: + orig_img (numpy.ndarray): The original image as a numpy array. + orig_shape (tuple): The original image shape in (height, width) format. + boxes (Boxes, optional): A Boxes object containing the detection bounding boxes. + masks (Masks, optional): A Masks object containing the detection masks. + probs (Probs, optional): A Probs object containing probabilities of each class for classification task. + keypoints (Keypoints, optional): A Keypoints object containing detected keypoints for each object. + speed (dict): A dictionary of preprocess, inference, and postprocess speeds in milliseconds per image. + names (dict): A dictionary of class names. + path (str): The path to the image file. + _keys (tuple): A tuple of attribute names for non-empty attributes. + """ + + def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None) -> None: + """Initialize the Results class.""" + self.orig_img = orig_img + self.orig_shape = orig_img.shape[:2] + self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes + self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks + self.probs = Probs(probs) if probs is not None else None + self.keypoints = Keypoints(keypoints, self.orig_shape) if keypoints is not None else None + self.speed = {'preprocess': None, 'inference': None, 'postprocess': None} # milliseconds per image + self.names = names + self.path = path + self.save_dir = None + self._keys = 'boxes', 'masks', 'probs', 'keypoints' + + def __getitem__(self, idx): + """Return a Results object for the specified index.""" + return self._apply('__getitem__', idx) + + def __len__(self): + """Return the number of detections in the Results object.""" + for k in self._keys: + v = getattr(self, k) + if v is not None: + return len(v) + + def update(self, boxes=None, masks=None, probs=None): + """Update the boxes, masks, and probs attributes of the Results object.""" + if boxes is not None: + self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape) + if masks is not None: + self.masks = Masks(masks, self.orig_shape) + if probs is not None: + self.probs = probs + + def _apply(self, fn, *args, **kwargs): + """ + Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This + function is internally called by methods like .to(), .cuda(), .cpu(), etc. + + Args: + fn (str): The name of the function to apply. + *args: Variable length argument list to pass to the function. + **kwargs: Arbitrary keyword arguments to pass to the function. + + Returns: + Results: A new Results object with attributes modified by the applied function. + """ + r = self.new() + for k in self._keys: + v = getattr(self, k) + if v is not None: + setattr(r, k, getattr(v, fn)(*args, **kwargs)) + return r + + def cpu(self): + """Return a copy of the Results object with all tensors on CPU memory.""" + return self._apply('cpu') + + def numpy(self): + """Return a copy of the Results object with all tensors as numpy arrays.""" + return self._apply('numpy') + + def cuda(self): + """Return a copy of the Results object with all tensors on GPU memory.""" + return self._apply('cuda') + + def to(self, *args, **kwargs): + """Return a copy of the Results object with tensors on the specified device and dtype.""" + return self._apply('to', *args, **kwargs) + + def new(self): + """Return a new Results object with the same image, path, and names.""" + return Results(orig_img=self.orig_img, path=self.path, names=self.names) + + def plot( + self, + conf=True, + line_width=None, + font_size=None, + font='Arial.ttf', + pil=False, + img=None, + im_gpu=None, + kpt_radius=5, + kpt_line=True, + labels=True, + boxes=True, + masks=True, + probs=True, + ): + """ + Plots the detection results on an input RGB image. Accepts a numpy array (cv2) or a PIL Image. + + Args: + conf (bool): Whether to plot the detection confidence score. + line_width (float, optional): The line width of the bounding boxes. If None, it is scaled to the image size. + font_size (float, optional): The font size of the text. If None, it is scaled to the image size. + font (str): The font to use for the text. + pil (bool): Whether to return the image as a PIL Image. + img (numpy.ndarray): Plot to another image. if not, plot to original image. + im_gpu (torch.Tensor): Normalized image in gpu with shape (1, 3, 640, 640), for faster mask plotting. + kpt_radius (int, optional): Radius of the drawn keypoints. Default is 5. + kpt_line (bool): Whether to draw lines connecting keypoints. + labels (bool): Whether to plot the label of bounding boxes. + boxes (bool): Whether to plot the bounding boxes. + masks (bool): Whether to plot the masks. + probs (bool): Whether to plot classification probability + + Returns: + (numpy.ndarray): A numpy array of the annotated image. + + Example: + ```python + from PIL import Image + from ultralytics import YOLO + + model = YOLO('yolov8n.pt') + results = model('bus.jpg') # results list + for r in results: + im_array = r.plot() # plot a BGR numpy array of predictions + im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image + im.show() # show image + im.save('results.jpg') # save image + ``` + """ + if img is None and isinstance(self.orig_img, torch.Tensor): + img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy() + + names = self.names + pred_boxes, show_boxes = self.boxes, boxes + pred_masks, show_masks = self.masks, masks + pred_probs, show_probs = self.probs, probs + annotator = Annotator( + deepcopy(self.orig_img if img is None else img), + line_width, + font_size, + font, + pil or (pred_probs is not None and show_probs), # Classify tasks default to pil=True + example=names) + + # Plot Segment results + if pred_masks and show_masks: + if im_gpu is None: + img = LetterBox(pred_masks.shape[1:])(image=annotator.result()) + im_gpu = torch.as_tensor(img, dtype=torch.float16, device=pred_masks.data.device).permute( + 2, 0, 1).flip(0).contiguous() / 255 + idx = pred_boxes.cls if pred_boxes else range(len(pred_masks)) + annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=im_gpu) + + # Plot Detect results + if pred_boxes and show_boxes: + for d in reversed(pred_boxes): + c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item()) + name = ('' if id is None else f'id:{id} ') + names[c] + label = (f'{name} {conf:.2f}' if conf else name) if labels else None + annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True)) + + # Plot Classify results + if pred_probs is not None and show_probs: + text = ',\n'.join(f'{names[j] if names else j} {pred_probs.data[j]:.2f}' for j in pred_probs.top5) + x = round(self.orig_shape[0] * 0.03) + annotator.text([x, x], text, txt_color=(255, 255, 255)) # TODO: allow setting colors + + # Plot Pose results + if self.keypoints is not None: + for k in reversed(self.keypoints.data): + annotator.kpts(k, self.orig_shape, radius=kpt_radius, kpt_line=kpt_line) + + return annotator.result() + + def verbose(self): + """Return log string for each task.""" + log_string = '' + probs = self.probs + boxes = self.boxes + if len(self) == 0: + return log_string if probs is not None else f'{log_string}(no detections), ' + if probs is not None: + log_string += f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, " + if boxes: + for c in boxes.cls.unique(): + n = (boxes.cls == c).sum() # detections per class + log_string += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " + return log_string + + def save_txt(self, txt_file, save_conf=False): + """ + Save predictions into txt file. + + Args: + txt_file (str): txt file path. + save_conf (bool): save confidence score or not. + """ + boxes = self.boxes + masks = self.masks + probs = self.probs + kpts = self.keypoints + texts = [] + if probs is not None: + # Classify + [texts.append(f'{probs.data[j]:.2f} {self.names[j]}') for j in probs.top5] + elif boxes: + # Detect/segment/pose + for j, d in enumerate(boxes): + c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item()) + line = (c, *d.xywhn.view(-1)) + if masks: + seg = masks[j].xyn[0].copy().reshape(-1) # reversed mask.xyn, (n,2) to (n*2) + line = (c, *seg) + if kpts is not None: + kpt = torch.cat((kpts[j].xyn, kpts[j].conf[..., None]), 2) if kpts[j].has_visible else kpts[j].xyn + line += (*kpt.reshape(-1).tolist(), ) + line += (conf, ) * save_conf + (() if id is None else (id, )) + texts.append(('%g ' * len(line)).rstrip() % line) + + if texts: + Path(txt_file).parent.mkdir(parents=True, exist_ok=True) # make directory + with open(txt_file, 'a') as f: + f.writelines(text + '\n' for text in texts) + + def save_crop(self, save_dir, file_name=Path('im.jpg')): + """ + Save cropped predictions to `save_dir/cls/file_name.jpg`. + + Args: + save_dir (str | pathlib.Path): Save path. + file_name (str | pathlib.Path): File name. + """ + if self.probs is not None: + LOGGER.warning('WARNING ⚠️ Classify task do not support `save_crop`.') + return + for d in self.boxes: + save_one_box(d.xyxy, + self.orig_img.copy(), + file=Path(save_dir) / self.names[int(d.cls)] / f'{Path(file_name).stem}.jpg', + BGR=True) + + def tojson(self, normalize=False): + """Convert the object to JSON format.""" + if self.probs is not None: + LOGGER.warning('Warning: Classify task do not support `tojson` yet.') + return + + import json + + # Create list of detection dictionaries + results = [] + data = self.boxes.data.cpu().tolist() + h, w = self.orig_shape if normalize else (1, 1) + for i, row in enumerate(data): # xyxy, track_id if tracking, conf, class_id + box = {'x1': row[0] / w, 'y1': row[1] / h, 'x2': row[2] / w, 'y2': row[3] / h} + conf = row[-2] + class_id = int(row[-1]) + name = self.names[class_id] + result = {'name': name, 'class': class_id, 'confidence': conf, 'box': box} + if self.boxes.is_track: + result['track_id'] = int(row[-3]) # track ID + if self.masks: + x, y = self.masks.xy[i][:, 0], self.masks.xy[i][:, 1] # numpy array + result['segments'] = {'x': (x / w).tolist(), 'y': (y / h).tolist()} + if self.keypoints is not None: + x, y, visible = self.keypoints[i].data[0].cpu().unbind(dim=1) # torch Tensor + result['keypoints'] = {'x': (x / w).tolist(), 'y': (y / h).tolist(), 'visible': visible.tolist()} + results.append(result) + + # Convert detections to JSON + return json.dumps(results, indent=2) + + +class Boxes(BaseTensor): + """ + A class for storing and manipulating detection boxes. + + Args: + boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes, + with shape (num_boxes, 6) or (num_boxes, 7). The last two columns contain confidence and class values. + If present, the third last column contains track IDs. + orig_shape (tuple): Original image size, in the format (height, width). + + Attributes: + xyxy (torch.Tensor | numpy.ndarray): The boxes in xyxy format. + conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes. + cls (torch.Tensor | numpy.ndarray): The class values of the boxes. + id (torch.Tensor | numpy.ndarray): The track IDs of the boxes (if available). + xywh (torch.Tensor | numpy.ndarray): The boxes in xywh format. + xyxyn (torch.Tensor | numpy.ndarray): The boxes in xyxy format normalized by original image size. + xywhn (torch.Tensor | numpy.ndarray): The boxes in xywh format normalized by original image size. + data (torch.Tensor): The raw bboxes tensor (alias for `boxes`). + + Methods: + cpu(): Move the object to CPU memory. + numpy(): Convert the object to a numpy array. + cuda(): Move the object to CUDA memory. + to(*args, **kwargs): Move the object to the specified device. + """ + + def __init__(self, boxes, orig_shape) -> None: + """Initialize the Boxes class.""" + if boxes.ndim == 1: + boxes = boxes[None, :] + n = boxes.shape[-1] + assert n in (6, 7), f'expected `n` in [6, 7], but got {n}' # xyxy, track_id, conf, cls + super().__init__(boxes, orig_shape) + self.is_track = n == 7 + self.orig_shape = orig_shape + + @property + def xyxy(self): + """Return the boxes in xyxy format.""" + return self.data[:, :4] + + @property + def conf(self): + """Return the confidence values of the boxes.""" + return self.data[:, -2] + + @property + def cls(self): + """Return the class values of the boxes.""" + return self.data[:, -1] + + @property + def id(self): + """Return the track IDs of the boxes (if available).""" + return self.data[:, -3] if self.is_track else None + + @property + @lru_cache(maxsize=2) # maxsize 1 should suffice + def xywh(self): + """Return the boxes in xywh format.""" + return ops.xyxy2xywh(self.xyxy) + + @property + @lru_cache(maxsize=2) + def xyxyn(self): + """Return the boxes in xyxy format normalized by original image size.""" + xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy) + xyxy[..., [0, 2]] /= self.orig_shape[1] + xyxy[..., [1, 3]] /= self.orig_shape[0] + return xyxy + + @property + @lru_cache(maxsize=2) + def xywhn(self): + """Return the boxes in xywh format normalized by original image size.""" + xywh = ops.xyxy2xywh(self.xyxy) + xywh[..., [0, 2]] /= self.orig_shape[1] + xywh[..., [1, 3]] /= self.orig_shape[0] + return xywh + + +class Masks(BaseTensor): + """ + A class for storing and manipulating detection masks. + + Attributes: + xy (list): A list of segments in pixel coordinates. + xyn (list): A list of normalized segments. + + Methods: + cpu(): Returns the masks tensor on CPU memory. + numpy(): Returns the masks tensor as a numpy array. + cuda(): Returns the masks tensor on GPU memory. + to(device, dtype): Returns the masks tensor with the specified device and dtype. + """ + + def __init__(self, masks, orig_shape) -> None: + """Initialize the Masks class with the given masks tensor and original image shape.""" + if masks.ndim == 2: + masks = masks[None, :] + super().__init__(masks, orig_shape) + + @property + @lru_cache(maxsize=1) + def xyn(self): + """Return normalized segments.""" + return [ + ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True) + for x in ops.masks2segments(self.data)] + + @property + @lru_cache(maxsize=1) + def xy(self): + """Return segments in pixel coordinates.""" + return [ + ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False) + for x in ops.masks2segments(self.data)] + + +class Keypoints(BaseTensor): + """ + A class for storing and manipulating detection keypoints. + + Attributes: + xy (torch.Tensor): A collection of keypoints containing x, y coordinates for each detection. + xyn (torch.Tensor): A normalized version of xy with coordinates in the range [0, 1]. + conf (torch.Tensor): Confidence values associated with keypoints if available, otherwise None. + + Methods: + cpu(): Returns a copy of the keypoints tensor on CPU memory. + numpy(): Returns a copy of the keypoints tensor as a numpy array. + cuda(): Returns a copy of the keypoints tensor on GPU memory. + to(device, dtype): Returns a copy of the keypoints tensor with the specified device and dtype. + """ + + @smart_inference_mode() # avoid keypoints < conf in-place error + def __init__(self, keypoints, orig_shape) -> None: + """Initializes the Keypoints object with detection keypoints and original image size.""" + if keypoints.ndim == 2: + keypoints = keypoints[None, :] + if keypoints.shape[2] == 3: # x, y, conf + mask = keypoints[..., 2] < 0.5 # points with conf < 0.5 (not visible) + keypoints[..., :2][mask] = 0 + super().__init__(keypoints, orig_shape) + self.has_visible = self.data.shape[-1] == 3 + + @property + @lru_cache(maxsize=1) + def xy(self): + """Returns x, y coordinates of keypoints.""" + return self.data[..., :2] + + @property + @lru_cache(maxsize=1) + def xyn(self): + """Returns normalized x, y coordinates of keypoints.""" + xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy) + xy[..., 0] /= self.orig_shape[1] + xy[..., 1] /= self.orig_shape[0] + return xy + + @property + @lru_cache(maxsize=1) + def conf(self): + """Returns confidence values of keypoints if available, else None.""" + return self.data[..., 2] if self.has_visible else None + + +class Probs(BaseTensor): + """ + A class for storing and manipulating classification predictions. + + Attributes: + top1 (int): Index of the top 1 class. + top5 (list[int]): Indices of the top 5 classes. + top1conf (torch.Tensor): Confidence of the top 1 class. + top5conf (torch.Tensor): Confidences of the top 5 classes. + + Methods: + cpu(): Returns a copy of the probs tensor on CPU memory. + numpy(): Returns a copy of the probs tensor as a numpy array. + cuda(): Returns a copy of the probs tensor on GPU memory. + to(): Returns a copy of the probs tensor with the specified device and dtype. + """ + + def __init__(self, probs, orig_shape=None) -> None: + """Initialize the Probs class with classification probabilities and optional original shape of the image.""" + super().__init__(probs, orig_shape) + + @property + @lru_cache(maxsize=1) + def top1(self): + """Return the index of top 1.""" + return int(self.data.argmax()) + + @property + @lru_cache(maxsize=1) + def top5(self): + """Return the indices of top 5.""" + return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy. + + @property + @lru_cache(maxsize=1) + def top1conf(self): + """Return the confidence of top 1.""" + return self.data[self.top1] + + @property + @lru_cache(maxsize=1) + def top5conf(self): + """Return the confidences of top 5.""" + return self.data[self.top5] diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py new file mode 100644 index 0000000..f5126f1 --- /dev/null +++ b/ultralytics/engine/trainer.py @@ -0,0 +1,677 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Train a model on a dataset. + +Usage: + $ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16 +""" + +import math +import os +import subprocess +import time +import warnings +from copy import deepcopy +from datetime import datetime, timedelta +from pathlib import Path + +import numpy as np +import torch +from torch import distributed as dist +from torch import nn, optim + +from ultralytics.cfg import get_cfg, get_save_dir +from ultralytics.data.utils import check_cls_dataset, check_det_dataset +from ultralytics.nn.tasks import attempt_load_one_weight, attempt_load_weights +from ultralytics.utils import (DEFAULT_CFG, LOGGER, RANK, TQDM, __version__, callbacks, clean_url, colorstr, emojis, + yaml_save) +from ultralytics.utils.autobatch import check_train_batch_size +from ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_model_file_from_stem, print_args +from ultralytics.utils.dist import ddp_cleanup, generate_ddp_command +from ultralytics.utils.files import get_latest_run +from ultralytics.utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, init_seeds, one_cycle, select_device, + strip_optimizer) + + +class BaseTrainer: + """ + BaseTrainer. + + A base class for creating trainers. + + Attributes: + args (SimpleNamespace): Configuration for the trainer. + validator (BaseValidator): Validator instance. + model (nn.Module): Model instance. + callbacks (defaultdict): Dictionary of callbacks. + save_dir (Path): Directory to save results. + wdir (Path): Directory to save weights. + last (Path): Path to the last checkpoint. + best (Path): Path to the best checkpoint. + save_period (int): Save checkpoint every x epochs (disabled if < 1). + batch_size (int): Batch size for training. + epochs (int): Number of epochs to train for. + start_epoch (int): Starting epoch for training. + device (torch.device): Device to use for training. + amp (bool): Flag to enable AMP (Automatic Mixed Precision). + scaler (amp.GradScaler): Gradient scaler for AMP. + data (str): Path to data. + trainset (torch.utils.data.Dataset): Training dataset. + testset (torch.utils.data.Dataset): Testing dataset. + ema (nn.Module): EMA (Exponential Moving Average) of the model. + resume (bool): Resume training from a checkpoint. + lf (nn.Module): Loss function. + scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler. + best_fitness (float): The best fitness value achieved. + fitness (float): Current fitness value. + loss (float): Current loss value. + tloss (float): Total loss value. + loss_names (list): List of loss names. + csv (Path): Path to results CSV file. + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """ + Initializes the BaseTrainer class. + + Args: + cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG. + overrides (dict, optional): Configuration overrides. Defaults to None. + """ + self.args = get_cfg(cfg, overrides) + self.check_resume(overrides) + self.device = select_device(self.args.device, self.args.batch) + self.validator = None + self.metrics = None + self.plots = {} + init_seeds(self.args.seed + 1 + RANK, deterministic=self.args.deterministic) + + # Dirs + self.save_dir = get_save_dir(self.args) + self.args.name = self.save_dir.name # update name for loggers + self.wdir = self.save_dir / 'weights' # weights dir + if RANK in (-1, 0): + self.wdir.mkdir(parents=True, exist_ok=True) # make dir + self.args.save_dir = str(self.save_dir) + yaml_save(self.save_dir / 'args.yaml', vars(self.args)) # save run args + self.last, self.best = self.wdir / 'last.pt', self.wdir / 'best.pt' # checkpoint paths + self.save_period = self.args.save_period + + self.batch_size = self.args.batch + self.epochs = self.args.epochs + self.start_epoch = 0 + if RANK == -1: + print_args(vars(self.args)) + + # Device + if self.device.type in ('cpu', 'mps'): + self.args.workers = 0 # faster CPU training as time dominated by inference, not dataloading + + # Model and Dataset + self.model = check_model_file_from_stem(self.args.model) # add suffix, i.e. yolov8n -> yolov8n.pt + try: + if self.args.task == 'classify': + self.data = check_cls_dataset(self.args.data) + elif self.args.data.split('.')[-1] in ('yaml', 'yml') or self.args.task in ('detect', 'segment', 'pose'): + self.data = check_det_dataset(self.args.data) + if 'yaml_file' in self.data: + self.args.data = self.data['yaml_file'] # for validating 'yolo train data=url.zip' usage + except Exception as e: + raise RuntimeError(emojis(f"Dataset '{clean_url(self.args.data)}' error ❌ {e}")) from e + + self.trainset, self.testset = self.get_dataset(self.data) + self.ema = None + self.resume = False + + # Optimization utils init + self.lf = None + self.scheduler = None + + # Epoch level metrics + self.best_fitness = None + self.fitness = None + self.loss = None + self.tloss = None + self.loss_names = ['Loss'] + self.csv = self.save_dir / 'results.csv' + self.plot_idx = [0, 1, 2] + + # Callbacks + self.callbacks = _callbacks or callbacks.get_default_callbacks() + if RANK in (-1, 0): + callbacks.add_integration_callbacks(self) + + def add_callback(self, event: str, callback): + """Appends the given callback.""" + self.callbacks[event].append(callback) + + def set_callback(self, event: str, callback): + """Overrides the existing callbacks with the given callback.""" + self.callbacks[event] = [callback] + + def run_callbacks(self, event: str): + """Run all existing callbacks associated with a particular event.""" + for callback in self.callbacks.get(event, []): + callback(self) + + def train(self): + """Allow device='', device=None on Multi-GPU systems to default to device=0.""" + if isinstance(self.args.device, str) and len(self.args.device): # i.e. device='0' or device='0,1,2,3' + world_size = len(self.args.device.split(',')) + elif isinstance(self.args.device, (tuple, list)): # i.e. device=[0, 1, 2, 3] (multi-GPU from CLI is list) + world_size = len(self.args.device) + elif torch.cuda.is_available(): # i.e. device=None or device='' or device=number + world_size = 1 # default to device 0 + else: # i.e. device='cpu' or 'mps' + world_size = 0 + + # Run subprocess if DDP training, else train normally + if world_size > 1 and 'LOCAL_RANK' not in os.environ: + # Argument checks + if self.args.rect: + LOGGER.warning("WARNING ⚠️ 'rect=True' is incompatible with Multi-GPU training, setting 'rect=False'") + self.args.rect = False + if self.args.batch == -1: + LOGGER.warning("WARNING ⚠️ 'batch=-1' for AutoBatch is incompatible with Multi-GPU training, setting " + "default 'batch=16'") + self.args.batch = 16 + + # Command + cmd, file = generate_ddp_command(world_size, self) + try: + LOGGER.info(f'{colorstr("DDP:")} debug command {" ".join(cmd)}') + subprocess.run(cmd, check=True) + except Exception as e: + raise e + finally: + ddp_cleanup(self, str(file)) + + else: + self._do_train(world_size) + + def _setup_ddp(self, world_size): + """Initializes and sets the DistributedDataParallel parameters for training.""" + torch.cuda.set_device(RANK) + self.device = torch.device('cuda', RANK) + # LOGGER.info(f'DDP info: RANK {RANK}, WORLD_SIZE {world_size}, DEVICE {self.device}') + os.environ['NCCL_BLOCKING_WAIT'] = '1' # set to enforce timeout + dist.init_process_group( + 'nccl' if dist.is_nccl_available() else 'gloo', + timeout=timedelta(seconds=10800), # 3 hours + rank=RANK, + world_size=world_size) + + def _setup_train(self, world_size): + """Builds dataloaders and optimizer on correct rank process.""" + + # Model + self.run_callbacks('on_pretrain_routine_start') + ckpt = self.setup_model() + self.model = self.model.to(self.device) + self.set_model_attributes() + + # Freeze layers + freeze_list = self.args.freeze if isinstance( + self.args.freeze, list) else range(self.args.freeze) if isinstance(self.args.freeze, int) else [] + always_freeze_names = ['.dfl'] # always freeze these layers + freeze_layer_names = [f'model.{x}.' for x in freeze_list] + always_freeze_names + for k, v in self.model.named_parameters(): + # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) + if any(x in k for x in freeze_layer_names): + LOGGER.info(f"Freezing layer '{k}'") + v.requires_grad = False + elif not v.requires_grad: + LOGGER.info(f"WARNING ⚠️ setting 'requires_grad=True' for frozen layer '{k}'. " + 'See ultralytics.engine.trainer for customization of frozen layers.') + v.requires_grad = True + + # Check AMP + self.amp = torch.tensor(self.args.amp).to(self.device) # True or False + if self.amp and RANK in (-1, 0): # Single-GPU and DDP + callbacks_backup = callbacks.default_callbacks.copy() # backup callbacks as check_amp() resets them + self.amp = torch.tensor(check_amp(self.model), device=self.device) + callbacks.default_callbacks = callbacks_backup # restore callbacks + if RANK > -1 and world_size > 1: # DDP + dist.broadcast(self.amp, src=0) # broadcast the tensor from rank 0 to all other ranks (returns None) + self.amp = bool(self.amp) # as boolean + self.scaler = torch.cuda.amp.GradScaler(enabled=self.amp) + if world_size > 1: + self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK]) + + # Check imgsz + gs = max(int(self.model.stride.max() if hasattr(self.model, 'stride') else 32), 32) # grid size (max stride) + self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1) + + # Batch size + if self.batch_size == -1 and RANK == -1: # single-GPU only, estimate best batch size + self.args.batch = self.batch_size = check_train_batch_size(self.model, self.args.imgsz, self.amp) + + # Dataloaders + batch_size = self.batch_size // max(world_size, 1) + self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=RANK, mode='train') + if RANK in (-1, 0): + self.test_loader = self.get_dataloader(self.testset, batch_size=batch_size * 2, rank=-1, mode='val') + self.validator = self.get_validator() + metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix='val') + self.metrics = dict(zip(metric_keys, [0] * len(metric_keys))) + self.ema = ModelEMA(self.model) + if self.args.plots: + self.plot_training_labels() + + # Optimizer + self.accumulate = max(round(self.args.nbs / self.batch_size), 1) # accumulate loss before optimizing + weight_decay = self.args.weight_decay * self.batch_size * self.accumulate / self.args.nbs # scale weight_decay + iterations = math.ceil(len(self.train_loader.dataset) / max(self.batch_size, self.args.nbs)) * self.epochs + self.optimizer = self.build_optimizer(model=self.model, + name=self.args.optimizer, + lr=self.args.lr0, + momentum=self.args.momentum, + decay=weight_decay, + iterations=iterations) + # Scheduler + if self.args.cos_lr: + self.lf = one_cycle(1, self.args.lrf, self.epochs) # cosine 1->hyp['lrf'] + else: + self.lf = lambda x: (1 - x / self.epochs) * (1.0 - self.args.lrf) + self.args.lrf # linear + self.scheduler = optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lf) + self.stopper, self.stop = EarlyStopping(patience=self.args.patience), False + self.resume_training(ckpt) + self.scheduler.last_epoch = self.start_epoch - 1 # do not move + self.run_callbacks('on_pretrain_routine_end') + + def _do_train(self, world_size=1): + """Train completed, evaluate and plot if specified by arguments.""" + if world_size > 1: + self._setup_ddp(world_size) + self._setup_train(world_size) + + self.epoch_time = None + self.epoch_time_start = time.time() + self.train_time_start = time.time() + nb = len(self.train_loader) # number of batches + nw = max(round(self.args.warmup_epochs * nb), 100) if self.args.warmup_epochs > 0 else -1 # warmup iterations + last_opt_step = -1 + self.run_callbacks('on_train_start') + LOGGER.info(f'Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n' + f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n' + f"Logging results to {colorstr('bold', self.save_dir)}\n" + f'Starting training for {self.epochs} epochs...') + if self.args.close_mosaic: + base_idx = (self.epochs - self.args.close_mosaic) * nb + self.plot_idx.extend([base_idx, base_idx + 1, base_idx + 2]) + epoch = self.epochs # predefine for resume fully trained model edge cases + for epoch in range(self.start_epoch, self.epochs): + self.epoch = epoch + self.run_callbacks('on_train_epoch_start') + self.model.train() + if RANK != -1: + self.train_loader.sampler.set_epoch(epoch) + pbar = enumerate(self.train_loader) + # Update dataloader attributes (optional) + if epoch == (self.epochs - self.args.close_mosaic): + self._close_dataloader_mosaic() + self.train_loader.reset() + + if RANK in (-1, 0): + LOGGER.info(self.progress_string()) + pbar = TQDM(enumerate(self.train_loader), total=nb) + self.tloss = None + self.optimizer.zero_grad() + for i, batch in pbar: + self.run_callbacks('on_train_batch_start') + # Warmup + ni = i + nb * epoch + if ni <= nw: + xi = [0, nw] # x interp + self.accumulate = max(1, np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round()) + for j, x in enumerate(self.optimizer.param_groups): + # Bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 + x['lr'] = np.interp( + ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x['initial_lr'] * self.lf(epoch)]) + if 'momentum' in x: + x['momentum'] = np.interp(ni, xi, [self.args.warmup_momentum, self.args.momentum]) + + # Forward + with torch.cuda.amp.autocast(self.amp): + batch = self.preprocess_batch(batch) + self.loss, self.loss_items = self.model(batch) + if RANK != -1: + self.loss *= world_size + self.tloss = (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None \ + else self.loss_items + + # Backward + self.scaler.scale(self.loss).backward() + + # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html + if ni - last_opt_step >= self.accumulate: + self.optimizer_step() + last_opt_step = ni + + # Log + mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) + loss_len = self.tloss.shape[0] if len(self.tloss.size()) else 1 + losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0) + if RANK in (-1, 0): + pbar.set_description( + ('%11s' * 2 + '%11.4g' * (2 + loss_len)) % + (f'{epoch + 1}/{self.epochs}', mem, *losses, batch['cls'].shape[0], batch['img'].shape[-1])) + self.run_callbacks('on_batch_end') + if self.args.plots and ni in self.plot_idx: + self.plot_training_samples(batch, ni) + + self.run_callbacks('on_train_batch_end') + + self.lr = {f'lr/pg{ir}': x['lr'] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers + + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress 'Detected lr_scheduler.step() before optimizer.step()' + self.scheduler.step() + self.run_callbacks('on_train_epoch_end') + + if RANK in (-1, 0): + + # Validation + self.ema.update_attr(self.model, include=['yaml', 'nc', 'args', 'names', 'stride', 'class_weights']) + final_epoch = (epoch + 1 == self.epochs) or self.stopper.possible_stop + + if self.args.val or final_epoch: + self.metrics, self.fitness = self.validate() + self.save_metrics(metrics={**self.label_loss_items(self.tloss), **self.metrics, **self.lr}) + self.stop = self.stopper(epoch + 1, self.fitness) + + # Save model + if self.args.save or (epoch + 1 == self.epochs): + self.save_model() + self.run_callbacks('on_model_save') + + tnow = time.time() + self.epoch_time = tnow - self.epoch_time_start + self.epoch_time_start = tnow + self.run_callbacks('on_fit_epoch_end') + torch.cuda.empty_cache() # clear GPU memory at end of epoch, may help reduce CUDA out of memory errors + + # Early Stopping + if RANK != -1: # if DDP training + broadcast_list = [self.stop if RANK == 0 else None] + dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks + if RANK != 0: + self.stop = broadcast_list[0] + if self.stop: + break # must break all DDP ranks + + if RANK in (-1, 0): + # Do final val with best.pt + LOGGER.info(f'\n{epoch - self.start_epoch + 1} epochs completed in ' + f'{(time.time() - self.train_time_start) / 3600:.3f} hours.') + self.final_eval() + if self.args.plots: + self.plot_metrics() + self.run_callbacks('on_train_end') + torch.cuda.empty_cache() + self.run_callbacks('teardown') + + def save_model(self): + """Save model training checkpoints with additional metadata.""" + import pandas as pd # scope for faster startup + metrics = {**self.metrics, **{'fitness': self.fitness}} + results = {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient='list').items()} + ckpt = { + 'epoch': self.epoch, + 'best_fitness': self.best_fitness, + 'model': deepcopy(de_parallel(self.model)).half(), + 'ema': deepcopy(self.ema.ema).half(), + 'updates': self.ema.updates, + 'optimizer': self.optimizer.state_dict(), + 'train_args': vars(self.args), # save as dict + 'train_metrics': metrics, + 'train_results': results, + 'date': datetime.now().isoformat(), + 'version': __version__} + + # Save last and best + torch.save(ckpt, self.last) + if self.best_fitness == self.fitness: + torch.save(ckpt, self.best) + if (self.save_period > 0) and (self.epoch > 0) and (self.epoch % self.save_period == 0): + torch.save(ckpt, self.wdir / f'epoch{self.epoch}.pt') + + @staticmethod + def get_dataset(data): + """ + Get train, val path from data dict if it exists. + + Returns None if data format is not recognized. + """ + return data['train'], data.get('val') or data.get('test') + + def setup_model(self): + """Load/create/download model for any task.""" + if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed + return + + model, weights = self.model, None + ckpt = None + if str(model).endswith('.pt'): + weights, ckpt = attempt_load_one_weight(model) + cfg = ckpt['model'].yaml + else: + cfg = model + self.model = self.get_model(cfg=cfg, weights=weights, verbose=RANK == -1) # calls Model(cfg, weights) + return ckpt + + def optimizer_step(self): + """Perform a single step of the training optimizer with gradient clipping and EMA update.""" + self.scaler.unscale_(self.optimizer) # unscale gradients + torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=10.0) # clip gradients + self.scaler.step(self.optimizer) + self.scaler.update() + self.optimizer.zero_grad() + if self.ema: + self.ema.update(self.model) + + def preprocess_batch(self, batch): + """Allows custom preprocessing model inputs and ground truths depending on task type.""" + return batch + + def validate(self): + """ + Runs validation on test set using self.validator. + + The returned dict is expected to contain "fitness" key. + """ + metrics = self.validator(self) + fitness = metrics.pop('fitness', -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found + if not self.best_fitness or self.best_fitness < fitness: + self.best_fitness = fitness + return metrics, fitness + + def get_model(self, cfg=None, weights=None, verbose=True): + """Get model and raise NotImplementedError for loading cfg files.""" + raise NotImplementedError("This task trainer doesn't support loading cfg files") + + def get_validator(self): + """Returns a NotImplementedError when the get_validator function is called.""" + raise NotImplementedError('get_validator function not implemented in trainer') + + def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'): + """Returns dataloader derived from torch.data.Dataloader.""" + raise NotImplementedError('get_dataloader function not implemented in trainer') + + def build_dataset(self, img_path, mode='train', batch=None): + """Build dataset.""" + raise NotImplementedError('build_dataset function not implemented in trainer') + + def label_loss_items(self, loss_items=None, prefix='train'): + """Returns a loss dict with labelled training loss items tensor.""" + # Not needed for classification but necessary for segmentation & detection + return {'loss': loss_items} if loss_items is not None else ['loss'] + + def set_model_attributes(self): + """To set or update model parameters before training.""" + self.model.names = self.data['names'] + + def build_targets(self, preds, targets): + """Builds target tensors for training YOLO model.""" + pass + + def progress_string(self): + """Returns a string describing training progress.""" + return '' + + # TODO: may need to put these following functions into callback + def plot_training_samples(self, batch, ni): + """Plots training samples during YOLO training.""" + pass + + def plot_training_labels(self): + """Plots training labels for YOLO model.""" + pass + + def save_metrics(self, metrics): + """Saves training metrics to a CSV file.""" + keys, vals = list(metrics.keys()), list(metrics.values()) + n = len(metrics) + 1 # number of cols + s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header + with open(self.csv, 'a') as f: + f.write(s + ('%23.5g,' * n % tuple([self.epoch + 1] + vals)).rstrip(',') + '\n') + + def plot_metrics(self): + """Plot and display metrics visually.""" + pass + + def on_plot(self, name, data=None): + """Registers plots (e.g. to be consumed in callbacks)""" + path = Path(name) + self.plots[path] = {'data': data, 'timestamp': time.time()} + + def final_eval(self): + """Performs final evaluation and validation for object detection YOLO model.""" + for f in self.last, self.best: + if f.exists(): + strip_optimizer(f) # strip optimizers + if f is self.best: + LOGGER.info(f'\nValidating {f}...') + self.validator.args.plots = self.args.plots + self.metrics = self.validator(model=f) + self.metrics.pop('fitness', None) + self.run_callbacks('on_fit_epoch_end') + + def check_resume(self, overrides): + """Check if resume checkpoint exists and update arguments accordingly.""" + resume = self.args.resume + if resume: + try: + exists = isinstance(resume, (str, Path)) and Path(resume).exists() + last = Path(check_file(resume) if exists else get_latest_run()) + + # Check that resume data YAML exists, otherwise strip to force re-download of dataset + ckpt_args = attempt_load_weights(last).args + if not Path(ckpt_args['data']).exists(): + ckpt_args['data'] = self.args.data + + resume = True + self.args = get_cfg(ckpt_args) + self.args.model = str(last) # reinstate model + for k in 'imgsz', 'batch': # allow arg updates to reduce memory on resume if crashed due to CUDA OOM + if k in overrides: + setattr(self.args, k, overrides[k]) + + except Exception as e: + raise FileNotFoundError('Resume checkpoint not found. Please pass a valid checkpoint to resume from, ' + "i.e. 'yolo train resume model=path/to/last.pt'") from e + self.resume = resume + + def resume_training(self, ckpt): + """Resume YOLO training from given epoch and best fitness.""" + if ckpt is None: + return + best_fitness = 0.0 + start_epoch = ckpt['epoch'] + 1 + if ckpt['optimizer'] is not None: + self.optimizer.load_state_dict(ckpt['optimizer']) # optimizer + best_fitness = ckpt['best_fitness'] + if self.ema and ckpt.get('ema'): + self.ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) # EMA + self.ema.updates = ckpt['updates'] + if self.resume: + assert start_epoch > 0, \ + f'{self.args.model} training to {self.epochs} epochs is finished, nothing to resume.\n' \ + f"Start a new training without resuming, i.e. 'yolo train model={self.args.model}'" + LOGGER.info( + f'Resuming training from {self.args.model} from epoch {start_epoch + 1} to {self.epochs} total epochs') + if self.epochs < start_epoch: + LOGGER.info( + f"{self.model} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {self.epochs} more epochs.") + self.epochs += ckpt['epoch'] # finetune additional epochs + self.best_fitness = best_fitness + self.start_epoch = start_epoch + if start_epoch > (self.epochs - self.args.close_mosaic): + self._close_dataloader_mosaic() + + def _close_dataloader_mosaic(self): + """Update dataloaders to stop using mosaic augmentation.""" + if hasattr(self.train_loader.dataset, 'mosaic'): + self.train_loader.dataset.mosaic = False + if hasattr(self.train_loader.dataset, 'close_mosaic'): + LOGGER.info('Closing dataloader mosaic') + self.train_loader.dataset.close_mosaic(hyp=self.args) + + def build_optimizer(self, model, name='auto', lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5): + """ + Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, momentum, + weight decay, and number of iterations. + + Args: + model (torch.nn.Module): The model for which to build an optimizer. + name (str, optional): The name of the optimizer to use. If 'auto', the optimizer is selected + based on the number of iterations. Default: 'auto'. + lr (float, optional): The learning rate for the optimizer. Default: 0.001. + momentum (float, optional): The momentum factor for the optimizer. Default: 0.9. + decay (float, optional): The weight decay for the optimizer. Default: 1e-5. + iterations (float, optional): The number of iterations, which determines the optimizer if + name is 'auto'. Default: 1e5. + + Returns: + (torch.optim.Optimizer): The constructed optimizer. + """ + + g = [], [], [] # optimizer parameter groups + bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() + if name == 'auto': + LOGGER.info(f"{colorstr('optimizer:')} 'optimizer=auto' found, " + f"ignoring 'lr0={self.args.lr0}' and 'momentum={self.args.momentum}' and " + f"determining best 'optimizer', 'lr0' and 'momentum' automatically... ") + nc = getattr(model, 'nc', 10) # number of classes + lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places + name, lr, momentum = ('SGD', 0.01, 0.9) if iterations > 10000 else ('AdamW', lr_fit, 0.9) + self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam + + for module_name, module in model.named_modules(): + for param_name, param in module.named_parameters(recurse=False): + fullname = f'{module_name}.{param_name}' if module_name else param_name + if 'bias' in fullname: # bias (no decay) + g[2].append(param) + elif isinstance(module, bn): # weight (no decay) + g[1].append(param) + else: # weight (with decay) + g[0].append(param) + + if name in ('Adam', 'Adamax', 'AdamW', 'NAdam', 'RAdam'): + optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0) + elif name == 'RMSProp': + optimizer = optim.RMSprop(g[2], lr=lr, momentum=momentum) + elif name == 'SGD': + optimizer = optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True) + else: + raise NotImplementedError( + f"Optimizer '{name}' not found in list of available optimizers " + f'[Adam, AdamW, NAdam, RAdam, RMSProp, SGD, auto].' + 'To request support for addition optimizers please visit https://github.com/ultralytics/ultralytics.') + + optimizer.add_param_group({'params': g[0], 'weight_decay': decay}) # add g0 with weight_decay + optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights) + LOGGER.info( + f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups " + f'{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)') + return optimizer diff --git a/ultralytics/engine/tuner.py b/ultralytics/engine/tuner.py new file mode 100644 index 0000000..1e5090a --- /dev/null +++ b/ultralytics/engine/tuner.py @@ -0,0 +1,224 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +This module provides functionalities for hyperparameter tuning of the Ultralytics YOLO models for object detection, +instance segmentation, image classification, pose estimation, and multi-object tracking. + +Hyperparameter tuning is the process of systematically searching for the optimal set of hyperparameters +that yield the best model performance. This is particularly crucial in deep learning models like YOLO, +where small changes in hyperparameters can lead to significant differences in model accuracy and efficiency. + +Example: + Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations. + ```python + from ultralytics import YOLO + + model = YOLO('yolov8n.pt') + model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False) + ``` +""" +import random +import shutil +import subprocess +import time + +import numpy as np +import torch + +from ultralytics.cfg import get_cfg, get_save_dir +from ultralytics.utils import DEFAULT_CFG, LOGGER, callbacks, colorstr, remove_colorstr, yaml_print, yaml_save +from ultralytics.utils.plotting import plot_tune_results + + +class Tuner: + """ + Class responsible for hyperparameter tuning of YOLO models. + + The class evolves YOLO model hyperparameters over a given number of iterations + by mutating them according to the search space and retraining the model to evaluate their performance. + + Attributes: + space (dict): Hyperparameter search space containing bounds and scaling factors for mutation. + tune_dir (Path): Directory where evolution logs and results will be saved. + tune_csv (Path): Path to the CSV file where evolution logs are saved. + + Methods: + _mutate(hyp: dict) -> dict: + Mutates the given hyperparameters within the bounds specified in `self.space`. + + __call__(): + Executes the hyperparameter evolution across multiple iterations. + + Example: + Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations. + ```python + from ultralytics import YOLO + + model = YOLO('yolov8n.pt') + model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False) + ``` + """ + + def __init__(self, args=DEFAULT_CFG, _callbacks=None): + """ + Initialize the Tuner with configurations. + + Args: + args (dict, optional): Configuration for hyperparameter evolution. + """ + self.args = get_cfg(overrides=args) + self.space = { # key: (min, max, gain(optional)) + # 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']), + 'lr0': (1e-5, 1e-1), + 'lrf': (0.0001, 0.1), # final OneCycleLR learning rate (lr0 * lrf) + 'momentum': (0.7, 0.98, 0.3), # SGD momentum/Adam beta1 + 'weight_decay': (0.0, 0.001), # optimizer weight decay 5e-4 + 'warmup_epochs': (0.0, 5.0), # warmup epochs (fractions ok) + 'warmup_momentum': (0.0, 0.95), # warmup initial momentum + 'box': (1.0, 20.0), # box loss gain + 'cls': (0.2, 4.0), # cls loss gain (scale with pixels) + 'dfl': (0.4, 6.0), # dfl loss gain + 'hsv_h': (0.0, 0.1), # image HSV-Hue augmentation (fraction) + 'hsv_s': (0.0, 0.9), # image HSV-Saturation augmentation (fraction) + 'hsv_v': (0.0, 0.9), # image HSV-Value augmentation (fraction) + 'degrees': (0.0, 45.0), # image rotation (+/- deg) + 'translate': (0.0, 0.9), # image translation (+/- fraction) + 'scale': (0.0, 0.95), # image scale (+/- gain) + 'shear': (0.0, 10.0), # image shear (+/- deg) + 'perspective': (0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 + 'flipud': (0.0, 1.0), # image flip up-down (probability) + 'fliplr': (0.0, 1.0), # image flip left-right (probability) + 'mosaic': (0.0, 1.0), # image mixup (probability) + 'mixup': (0.0, 1.0), # image mixup (probability) + 'copy_paste': (0.0, 1.0)} # segment copy-paste (probability) + self.tune_dir = get_save_dir(self.args, name='tune') + self.tune_csv = self.tune_dir / 'tune_results.csv' + self.callbacks = _callbacks or callbacks.get_default_callbacks() + self.prefix = colorstr('Tuner: ') + callbacks.add_integration_callbacks(self) + LOGGER.info(f"{self.prefix}Initialized Tuner instance with 'tune_dir={self.tune_dir}'\n" + f'{self.prefix}💡 Learn about tuning at https://docs.ultralytics.com/guides/hyperparameter-tuning') + + def _mutate(self, parent='single', n=5, mutation=0.8, sigma=0.2): + """ + Mutates the hyperparameters based on bounds and scaling factors specified in `self.space`. + + Args: + parent (str): Parent selection method: 'single' or 'weighted'. + n (int): Number of parents to consider. + mutation (float): Probability of a parameter mutation in any given iteration. + sigma (float): Standard deviation for Gaussian random number generator. + + Returns: + (dict): A dictionary containing mutated hyperparameters. + """ + if self.tune_csv.exists(): # if CSV file exists: select best hyps and mutate + # Select parent(s) + x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=',', skiprows=1) + fitness = x[:, 0] # first column + n = min(n, len(x)) # number of previous results to consider + x = x[np.argsort(-fitness)][:n] # top n mutations + w = x[:, 0] - x[:, 0].min() + 1E-6 # weights (sum > 0) + if parent == 'single' or len(x) == 1: + # x = x[random.randint(0, n - 1)] # random selection + x = x[random.choices(range(n), weights=w)[0]] # weighted selection + elif parent == 'weighted': + x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination + + # Mutate + r = np.random # method + r.seed(int(time.time())) + g = np.array([v[2] if len(v) == 3 else 1.0 for k, v in self.space.items()]) # gains 0-1 + ng = len(self.space) + v = np.ones(ng) + while all(v == 1): # mutate until a change occurs (prevent duplicates) + v = (g * (r.random(ng) < mutation) * r.randn(ng) * r.random() * sigma + 1).clip(0.3, 3.0) + hyp = {k: float(x[i + 1] * v[i]) for i, k in enumerate(self.space.keys())} + else: + hyp = {k: getattr(self.args, k) for k in self.space.keys()} + + # Constrain to limits + for k, v in self.space.items(): + hyp[k] = max(hyp[k], v[0]) # lower limit + hyp[k] = min(hyp[k], v[1]) # upper limit + hyp[k] = round(hyp[k], 5) # significant digits + + return hyp + + def __call__(self, model=None, iterations=10, cleanup=True): + """ + Executes the hyperparameter evolution process when the Tuner instance is called. + + This method iterates through the number of iterations, performing the following steps in each iteration: + 1. Load the existing hyperparameters or initialize new ones. + 2. Mutate the hyperparameters using the `mutate` method. + 3. Train a YOLO model with the mutated hyperparameters. + 4. Log the fitness score and mutated hyperparameters to a CSV file. + + Args: + model (Model): A pre-initialized YOLO model to be used for training. + iterations (int): The number of generations to run the evolution for. + cleanup (bool): Whether to delete iteration weights to reduce storage space used during tuning. + + Note: + The method utilizes the `self.tune_csv` Path object to read and log hyperparameters and fitness scores. + Ensure this path is set correctly in the Tuner instance. + """ + + t0 = time.time() + best_save_dir, best_metrics = None, None + (self.tune_dir / 'weights').mkdir(parents=True, exist_ok=True) + for i in range(iterations): + # Mutate hyperparameters + mutated_hyp = self._mutate() + LOGGER.info(f'{self.prefix}Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}') + + metrics = {} + train_args = {**vars(self.args), **mutated_hyp} + save_dir = get_save_dir(get_cfg(train_args)) + weights_dir = save_dir / 'weights' + ckpt_file = weights_dir / ('best.pt' if (weights_dir / 'best.pt').exists() else 'last.pt') + try: + # Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang) + cmd = ['yolo', 'train', *(f'{k}={v}' for k, v in train_args.items())] + assert subprocess.run(cmd, check=True).returncode == 0, 'training failed' + metrics = torch.load(ckpt_file)['train_metrics'] + + except Exception as e: + LOGGER.warning(f'WARNING ❌️ training failure for hyperparameter tuning iteration {i + 1}\n{e}') + + # Save results and mutated_hyp to CSV + fitness = metrics.get('fitness', 0.0) + log_row = [round(fitness, 5)] + [mutated_hyp[k] for k in self.space.keys()] + headers = '' if self.tune_csv.exists() else (','.join(['fitness'] + list(self.space.keys())) + '\n') + with open(self.tune_csv, 'a') as f: + f.write(headers + ','.join(map(str, log_row)) + '\n') + + # Get best results + x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=',', skiprows=1) + fitness = x[:, 0] # first column + best_idx = fitness.argmax() + best_is_current = best_idx == i + if best_is_current: + best_save_dir = save_dir + best_metrics = {k: round(v, 5) for k, v in metrics.items()} + for ckpt in weights_dir.glob('*.pt'): + shutil.copy2(ckpt, self.tune_dir / 'weights') + elif cleanup: + shutil.rmtree(ckpt_file.parent) # remove iteration weights/ dir to reduce storage space + + # Plot tune results + plot_tune_results(self.tune_csv) + + # Save and print tune results + header = (f'{self.prefix}{i + 1}/{iterations} iterations complete ✅ ({time.time() - t0:.2f}s)\n' + f'{self.prefix}Results saved to {colorstr("bold", self.tune_dir)}\n' + f'{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n' + f'{self.prefix}Best fitness metrics are {best_metrics}\n' + f'{self.prefix}Best fitness model is {best_save_dir}\n' + f'{self.prefix}Best fitness hyperparameters are printed below.\n') + LOGGER.info('\n' + header) + data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())} + yaml_save(self.tune_dir / 'best_hyperparameters.yaml', + data=data, + header=remove_colorstr(header.replace(self.prefix, '# ')) + '\n') + yaml_print(self.tune_dir / 'best_hyperparameters.yaml') diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py new file mode 100644 index 0000000..11d7ead --- /dev/null +++ b/ultralytics/engine/validator.py @@ -0,0 +1,326 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Check a model's accuracy on a test or val split of a dataset. + +Usage: + $ yolo mode=val model=yolov8n.pt data=coco128.yaml imgsz=640 + +Usage - formats: + $ yolo mode=val model=yolov8n.pt # PyTorch + yolov8n.torchscript # TorchScript + yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True + yolov8n_openvino_model # OpenVINO + yolov8n.engine # TensorRT + yolov8n.mlpackage # CoreML (macOS-only) + yolov8n_saved_model # TensorFlow SavedModel + yolov8n.pb # TensorFlow GraphDef + yolov8n.tflite # TensorFlow Lite + yolov8n_edgetpu.tflite # TensorFlow Edge TPU + yolov8n_paddle_model # PaddlePaddle +""" +import json +import time +from pathlib import Path + +import numpy as np +import torch + +from ultralytics.cfg import get_cfg, get_save_dir +from ultralytics.data.utils import check_cls_dataset, check_det_dataset +from ultralytics.nn.autobackend import AutoBackend +from ultralytics.utils import LOGGER, TQDM, callbacks, colorstr, emojis +from ultralytics.utils.checks import check_imgsz +from ultralytics.utils.ops import Profile +from ultralytics.utils.torch_utils import de_parallel, select_device, smart_inference_mode + + +class BaseValidator: + """ + BaseValidator. + + A base class for creating validators. + + Attributes: + args (SimpleNamespace): Configuration for the validator. + dataloader (DataLoader): Dataloader to use for validation. + pbar (tqdm): Progress bar to update during validation. + model (nn.Module): Model to validate. + data (dict): Data dictionary. + device (torch.device): Device to use for validation. + batch_i (int): Current batch index. + training (bool): Whether the model is in training mode. + names (dict): Class names. + seen: Records the number of images seen so far during validation. + stats: Placeholder for statistics during validation. + confusion_matrix: Placeholder for a confusion matrix. + nc: Number of classes. + iouv: (torch.Tensor): IoU thresholds from 0.50 to 0.95 in spaces of 0.05. + jdict (dict): Dictionary to store JSON validation results. + speed (dict): Dictionary with keys 'preprocess', 'inference', 'loss', 'postprocess' and their respective + batch processing times in milliseconds. + save_dir (Path): Directory to save results. + plots (dict): Dictionary to store plots for visualization. + callbacks (dict): Dictionary to store various callback functions. + """ + + def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): + """ + Initializes a BaseValidator instance. + + Args: + dataloader (torch.utils.data.DataLoader): Dataloader to be used for validation. + save_dir (Path, optional): Directory to save results. + pbar (tqdm.tqdm): Progress bar for displaying progress. + args (SimpleNamespace): Configuration for the validator. + _callbacks (dict): Dictionary to store various callback functions. + """ + self.args = get_cfg(overrides=args) + self.dataloader = dataloader + self.pbar = pbar + self.stride = None + self.data = None + self.device = None + self.batch_i = None + self.training = True + self.names = None + self.seen = None + self.stats = None + self.confusion_matrix = None + self.nc = None + self.iouv = None + self.jdict = None + self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + + self.save_dir = save_dir or get_save_dir(self.args) + (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) + if self.args.conf is None: + self.args.conf = 0.001 # default conf=0.001 + self.args.imgsz = check_imgsz(self.args.imgsz, max_dim=1) + + self.plots = {} + self.callbacks = _callbacks or callbacks.get_default_callbacks() + + @smart_inference_mode() + def __call__(self, trainer=None, model=None): + """Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer + gets priority). + """ + self.training = trainer is not None + augment = self.args.augment and (not self.training) + if self.training: + self.device = trainer.device + self.data = trainer.data + self.args.half = self.device.type != 'cpu' # force FP16 val during training + model = trainer.ema.ema or trainer.model + model = model.half() if self.args.half else model.float() + # self.model = model + self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device) + self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1) + model.eval() + else: + callbacks.add_integration_callbacks(self) + model = AutoBackend(model or self.args.model, + device=select_device(self.args.device, self.args.batch), + dnn=self.args.dnn, + data=self.args.data, + fp16=self.args.half) + # self.model = model + self.device = model.device # update device + self.args.half = model.fp16 # update half + stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine + imgsz = check_imgsz(self.args.imgsz, stride=stride) + if engine: + self.args.batch = model.batch_size + elif not pt and not jit: + self.args.batch = 1 # export.py models default to batch-size 1 + LOGGER.info(f'Forcing batch=1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') + + if isinstance(self.args.data, str) and self.args.data.split('.')[-1] in ('yaml', 'yml'): + self.data = check_det_dataset(self.args.data) + elif self.args.task == 'classify': + self.data = check_cls_dataset(self.args.data, split=self.args.split) + else: + raise FileNotFoundError(emojis(f"Dataset '{self.args.data}' for task={self.args.task} not found ❌")) + + if self.device.type in ('cpu', 'mps'): + self.args.workers = 0 # faster CPU val as time dominated by inference, not dataloading + if not pt: + self.args.rect = False + self.stride = model.stride # used in get_dataloader() for padding + self.dataloader = self.dataloader or self.get_dataloader(self.data.get(self.args.split), self.args.batch) + + model.eval() + model.warmup(imgsz=(1 if pt else self.args.batch, 3, imgsz, imgsz)) # warmup + + self.run_callbacks('on_val_start') + dt = Profile(), Profile(), Profile(), Profile() + bar = TQDM(self.dataloader, desc=self.get_desc(), total=len(self.dataloader)) + self.init_metrics(de_parallel(model)) + self.jdict = [] # empty before each val + for batch_i, batch in enumerate(bar): + self.run_callbacks('on_val_batch_start') + self.batch_i = batch_i + # Preprocess + with dt[0]: + batch = self.preprocess(batch) + + # Inference + with dt[1]: + preds = model(batch['img'], augment=augment) + + # Loss + with dt[2]: + if self.training: + self.loss += model.loss(batch, preds)[1] + + # Postprocess + with dt[3]: + preds = self.postprocess(preds) + + self.update_metrics(preds, batch) + if self.args.plots and batch_i < 3: + self.plot_val_samples(batch, batch_i) + self.plot_predictions(batch, preds, batch_i) + + self.run_callbacks('on_val_batch_end') + stats = self.get_stats() + self.check_stats(stats) + self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1E3 for x in dt))) + self.finalize_metrics() + self.print_results() + self.run_callbacks('on_val_end') + if self.training: + model.float() + results = {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix='val')} + return {k: round(float(v), 5) for k, v in results.items()} # return results as 5 decimal place floats + else: + LOGGER.info('Speed: %.1fms preprocess, %.1fms inference, %.1fms loss, %.1fms postprocess per image' % + tuple(self.speed.values())) + if self.args.save_json and self.jdict: + with open(str(self.save_dir / 'predictions.json'), 'w') as f: + LOGGER.info(f'Saving {f.name}...') + json.dump(self.jdict, f) # flatten and save + stats = self.eval_json(stats) # update stats + if self.args.plots or self.args.save_json: + LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}") + return stats + + def match_predictions(self, pred_classes, true_classes, iou, use_scipy=False): + """ + Matches predictions to ground truth objects (pred_classes, true_classes) using IoU. + + Args: + pred_classes (torch.Tensor): Predicted class indices of shape(N,). + true_classes (torch.Tensor): Target class indices of shape(M,). + iou (torch.Tensor): An NxM tensor containing the pairwise IoU values for predictions and ground of truth + use_scipy (bool): Whether to use scipy for matching (more precise). + + Returns: + (torch.Tensor): Correct tensor of shape(N,10) for 10 IoU thresholds. + """ + # Dx10 matrix, where D - detections, 10 - IoU thresholds + correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(bool) + # LxD matrix where L - labels (rows), D - detections (columns) + correct_class = true_classes[:, None] == pred_classes + iou = iou * correct_class # zero out the wrong classes + iou = iou.cpu().numpy() + for i, threshold in enumerate(self.iouv.cpu().tolist()): + if use_scipy: + # WARNING: known issue that reduces mAP in https://github.com/ultralytics/ultralytics/pull/4708 + import scipy # scope import to avoid importing for all commands + cost_matrix = iou * (iou >= threshold) + if cost_matrix.any(): + labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(cost_matrix, maximize=True) + valid = cost_matrix[labels_idx, detections_idx] > 0 + if valid.any(): + correct[detections_idx[valid], i] = True + else: + matches = np.nonzero(iou >= threshold) # IoU > threshold and classes match + matches = np.array(matches).T + if matches.shape[0]: + if matches.shape[0] > 1: + matches = matches[iou[matches[:, 0], matches[:, 1]].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + correct[matches[:, 1].astype(int), i] = True + return torch.tensor(correct, dtype=torch.bool, device=pred_classes.device) + + def add_callback(self, event: str, callback): + """Appends the given callback.""" + self.callbacks[event].append(callback) + + def run_callbacks(self, event: str): + """Runs all callbacks associated with a specified event.""" + for callback in self.callbacks.get(event, []): + callback(self) + + def get_dataloader(self, dataset_path, batch_size): + """Get data loader from dataset path and batch size.""" + raise NotImplementedError('get_dataloader function not implemented for this validator') + + def build_dataset(self, img_path): + """Build dataset.""" + raise NotImplementedError('build_dataset function not implemented in validator') + + def preprocess(self, batch): + """Preprocesses an input batch.""" + return batch + + def postprocess(self, preds): + """Describes and summarizes the purpose of 'postprocess()' but no details mentioned.""" + return preds + + def init_metrics(self, model): + """Initialize performance metrics for the YOLO model.""" + pass + + def update_metrics(self, preds, batch): + """Updates metrics based on predictions and batch.""" + pass + + def finalize_metrics(self, *args, **kwargs): + """Finalizes and returns all metrics.""" + pass + + def get_stats(self): + """Returns statistics about the model's performance.""" + return {} + + def check_stats(self, stats): + """Checks statistics.""" + pass + + def print_results(self): + """Prints the results of the model's predictions.""" + pass + + def get_desc(self): + """Get description of the YOLO model.""" + pass + + @property + def metric_keys(self): + """Returns the metric keys used in YOLO training/validation.""" + return [] + + def on_plot(self, name, data=None): + """Registers plots (e.g. to be consumed in callbacks)""" + self.plots[Path(name)] = {'data': data, 'timestamp': time.time()} + + # TODO: may need to put these following functions into callback + def plot_val_samples(self, batch, ni): + """Plots validation samples during training.""" + pass + + def plot_predictions(self, batch, preds, ni): + """Plots YOLO model predictions on batch images.""" + pass + + def pred_to_json(self, preds, batch): + """Convert predictions to JSON format.""" + pass + + def eval_json(self, stats): + """Evaluate and return JSON format of prediction statistics.""" + pass diff --git a/ultralytics/hub/__init__.py b/ultralytics/hub/__init__.py new file mode 100644 index 0000000..8e101d6 --- /dev/null +++ b/ultralytics/hub/__init__.py @@ -0,0 +1,100 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import requests + +from ultralytics.data.utils import HUBDatasetStats +from ultralytics.hub.auth import Auth +from ultralytics.hub.utils import HUB_API_ROOT, HUB_WEB_ROOT, PREFIX +from ultralytics.utils import LOGGER, SETTINGS + + +def login(api_key=''): + """ + Log in to the Ultralytics HUB API using the provided API key. + + Args: + api_key (str, optional): May be an API key or a combination API key and model ID, i.e. key_id + + Example: + ```python + from ultralytics import hub + + hub.login('API_KEY') + ``` + """ + Auth(api_key, verbose=True) + + +def logout(): + """ + Log out of Ultralytics HUB by removing the API key from the settings file. To log in again, use 'yolo hub login'. + + Example: + ```python + from ultralytics import hub + + hub.logout() + ``` + """ + SETTINGS['api_key'] = '' + SETTINGS.save() + LOGGER.info(f"{PREFIX}logged out ✅. To log in again, use 'yolo hub login'.") + + +def reset_model(model_id=''): + """Reset a trained model to an untrained state.""" + r = requests.post(f'{HUB_API_ROOT}/model-reset', json={'apiKey': Auth().api_key, 'modelId': model_id}) + if r.status_code == 200: + LOGGER.info(f'{PREFIX}Model reset successfully') + return + LOGGER.warning(f'{PREFIX}Model reset failure {r.status_code} {r.reason}') + + +def export_fmts_hub(): + """Returns a list of HUB-supported export formats.""" + from ultralytics.engine.exporter import export_formats + return list(export_formats()['Argument'][1:]) + ['ultralytics_tflite', 'ultralytics_coreml'] + + +def export_model(model_id='', format='torchscript'): + """Export a model to all formats.""" + assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}" + r = requests.post(f'{HUB_API_ROOT}/v1/models/{model_id}/export', + json={'format': format}, + headers={'x-api-key': Auth().api_key}) + assert r.status_code == 200, f'{PREFIX}{format} export failure {r.status_code} {r.reason}' + LOGGER.info(f'{PREFIX}{format} export started ✅') + + +def get_export(model_id='', format='torchscript'): + """Get an exported model dictionary with download URL.""" + assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}" + r = requests.post(f'{HUB_API_ROOT}/get-export', + json={ + 'apiKey': Auth().api_key, + 'modelId': model_id, + 'format': format}) + assert r.status_code == 200, f'{PREFIX}{format} get_export failure {r.status_code} {r.reason}' + return r.json() + + +def check_dataset(path='', task='detect'): + """ + Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is uploaded + to the HUB. Usage examples are given below. + + Args: + path (str, optional): Path to data.zip (with data.yaml inside data.zip). Defaults to ''. + task (str, optional): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Defaults to 'detect'. + + Example: + ```python + from ultralytics.hub import check_dataset + + check_dataset('path/to/coco8.zip', task='detect') # detect dataset + check_dataset('path/to/coco8-seg.zip', task='segment') # segment dataset + check_dataset('path/to/coco8-pose.zip', task='pose') # pose dataset + ``` + """ + HUBDatasetStats(path=path, task=task).get_json() + LOGGER.info(f'Checks completed correctly ✅. Upload this dataset to {HUB_WEB_ROOT}/datasets/.') diff --git a/ultralytics/hub/auth.py b/ultralytics/hub/auth.py new file mode 100644 index 0000000..deea9a3 --- /dev/null +++ b/ultralytics/hub/auth.py @@ -0,0 +1,134 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import requests + +from ultralytics.hub.utils import HUB_API_ROOT, HUB_WEB_ROOT, PREFIX, request_with_credentials +from ultralytics.utils import LOGGER, SETTINGS, emojis, is_colab + +API_KEY_URL = f'{HUB_WEB_ROOT}/settings?tab=api+keys' + + +class Auth: + """ + Manages authentication processes including API key handling, cookie-based authentication, and header generation. + + The class supports different methods of authentication: + 1. Directly using an API key. + 2. Authenticating using browser cookies (specifically in Google Colab). + 3. Prompting the user to enter an API key. + + Attributes: + id_token (str or bool): Token used for identity verification, initialized as False. + api_key (str or bool): API key for authentication, initialized as False. + model_key (bool): Placeholder for model key, initialized as False. + """ + id_token = api_key = model_key = False + + def __init__(self, api_key='', verbose=False): + """ + Initialize the Auth class with an optional API key. + + Args: + api_key (str, optional): May be an API key or a combination API key and model ID, i.e. key_id + """ + # Split the input API key in case it contains a combined key_model and keep only the API key part + api_key = api_key.split('_')[0] + + # Set API key attribute as value passed or SETTINGS API key if none passed + self.api_key = api_key or SETTINGS.get('api_key', '') + + # If an API key is provided + if self.api_key: + # If the provided API key matches the API key in the SETTINGS + if self.api_key == SETTINGS.get('api_key'): + # Log that the user is already logged in + if verbose: + LOGGER.info(f'{PREFIX}Authenticated ✅') + return + else: + # Attempt to authenticate with the provided API key + success = self.authenticate() + # If the API key is not provided and the environment is a Google Colab notebook + elif is_colab(): + # Attempt to authenticate using browser cookies + success = self.auth_with_cookies() + else: + # Request an API key + success = self.request_api_key() + + # Update SETTINGS with the new API key after successful authentication + if success: + SETTINGS.update({'api_key': self.api_key}) + # Log that the new login was successful + if verbose: + LOGGER.info(f'{PREFIX}New authentication successful ✅') + elif verbose: + LOGGER.info(f'{PREFIX}Retrieve API key from {API_KEY_URL}') + + def request_api_key(self, max_attempts=3): + """ + Prompt the user to input their API key. + + Returns the model ID. + """ + import getpass + for attempts in range(max_attempts): + LOGGER.info(f'{PREFIX}Login. Attempt {attempts + 1} of {max_attempts}') + input_key = getpass.getpass(f'Enter API key from {API_KEY_URL} ') + self.api_key = input_key.split('_')[0] # remove model id if present + if self.authenticate(): + return True + raise ConnectionError(emojis(f'{PREFIX}Failed to authenticate ❌')) + + def authenticate(self) -> bool: + """ + Attempt to authenticate with the server using either id_token or API key. + + Returns: + bool: True if authentication is successful, False otherwise. + """ + try: + if header := self.get_auth_header(): + r = requests.post(f'{HUB_API_ROOT}/v1/auth', headers=header) + if not r.json().get('success', False): + raise ConnectionError('Unable to authenticate.') + return True + raise ConnectionError('User has not authenticated locally.') + except ConnectionError: + self.id_token = self.api_key = False # reset invalid + LOGGER.warning(f'{PREFIX}Invalid API key ⚠️') + return False + + def auth_with_cookies(self) -> bool: + """ + Attempt to fetch authentication via cookies and set id_token. User must be logged in to HUB and running in a + supported browser. + + Returns: + bool: True if authentication is successful, False otherwise. + """ + if not is_colab(): + return False # Currently only works with Colab + try: + authn = request_with_credentials(f'{HUB_API_ROOT}/v1/auth/auto') + if authn.get('success', False): + self.id_token = authn.get('data', {}).get('idToken', None) + self.authenticate() + return True + raise ConnectionError('Unable to fetch browser authentication details.') + except ConnectionError: + self.id_token = False # reset invalid + return False + + def get_auth_header(self): + """ + Get the authentication header for making API requests. + + Returns: + (dict): The authentication header if id_token or API key is set, None otherwise. + """ + if self.id_token: + return {'authorization': f'Bearer {self.id_token}'} + elif self.api_key: + return {'x-api-key': self.api_key} + # else returns None diff --git a/ultralytics/hub/session.py b/ultralytics/hub/session.py new file mode 100644 index 0000000..d2fd89a --- /dev/null +++ b/ultralytics/hub/session.py @@ -0,0 +1,191 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import signal +import sys +from pathlib import Path +from time import sleep + +import requests + +from ultralytics.hub.utils import HUB_API_ROOT, HUB_WEB_ROOT, PREFIX, smart_request +from ultralytics.utils import LOGGER, __version__, checks, emojis, is_colab, threaded +from ultralytics.utils.errors import HUBModelError + +AGENT_NAME = f'python-{__version__}-colab' if is_colab() else f'python-{__version__}-local' + + +class HUBTrainingSession: + """ + HUB training session for Ultralytics HUB YOLO models. Handles model initialization, heartbeats, and checkpointing. + + Args: + url (str): Model identifier used to initialize the HUB training session. + + Attributes: + agent_id (str): Identifier for the instance communicating with the server. + model_id (str): Identifier for the YOLO model being trained. + model_url (str): URL for the model in Ultralytics HUB. + api_url (str): API URL for the model in Ultralytics HUB. + auth_header (dict): Authentication header for the Ultralytics HUB API requests. + rate_limits (dict): Rate limits for different API calls (in seconds). + timers (dict): Timers for rate limiting. + metrics_queue (dict): Queue for the model's metrics. + model (dict): Model data fetched from Ultralytics HUB. + alive (bool): Indicates if the heartbeat loop is active. + """ + + def __init__(self, url): + """ + Initialize the HUBTrainingSession with the provided model identifier. + + Args: + url (str): Model identifier used to initialize the HUB training session. + It can be a URL string or a model key with specific format. + + Raises: + ValueError: If the provided model identifier is invalid. + ConnectionError: If connecting with global API key is not supported. + """ + + from ultralytics.hub.auth import Auth + + # Parse input + if url.startswith(f'{HUB_WEB_ROOT}/models/'): + url = url.split(f'{HUB_WEB_ROOT}/models/')[-1] + if [len(x) for x in url.split('_')] == [42, 20]: + key, model_id = url.split('_') + elif len(url) == 20: + key, model_id = '', url + else: + raise HUBModelError(f"model='{url}' not found. Check format is correct, i.e. " + f"model='{HUB_WEB_ROOT}/models/MODEL_ID' and try again.") + + # Authorize + auth = Auth(key) + self.agent_id = None # identifies which instance is communicating with server + self.model_id = model_id + self.model_url = f'{HUB_WEB_ROOT}/models/{model_id}' + self.api_url = f'{HUB_API_ROOT}/v1/models/{model_id}' + self.auth_header = auth.get_auth_header() + self.rate_limits = {'metrics': 3.0, 'ckpt': 900.0, 'heartbeat': 300.0} # rate limits (seconds) + self.timers = {} # rate limit timers (seconds) + self.metrics_queue = {} # metrics queue + self.model = self._get_model() + self.alive = True + self._start_heartbeat() # start heartbeats + self._register_signal_handlers() + LOGGER.info(f'{PREFIX}View model at {self.model_url} 🚀') + + def _register_signal_handlers(self): + """Register signal handlers for SIGTERM and SIGINT signals to gracefully handle termination.""" + signal.signal(signal.SIGTERM, self._handle_signal) + signal.signal(signal.SIGINT, self._handle_signal) + + def _handle_signal(self, signum, frame): + """ + Handle kill signals and prevent heartbeats from being sent on Colab after termination. + + This method does not use frame, it is included as it is passed by signal. + """ + if self.alive is True: + LOGGER.info(f'{PREFIX}Kill signal received! ❌') + self._stop_heartbeat() + sys.exit(signum) + + def _stop_heartbeat(self): + """Terminate the heartbeat loop.""" + self.alive = False + + def upload_metrics(self): + """Upload model metrics to Ultralytics HUB.""" + payload = {'metrics': self.metrics_queue.copy(), 'type': 'metrics'} + smart_request('post', self.api_url, json=payload, headers=self.auth_header, code=2) + + def _get_model(self): + """Fetch and return model data from Ultralytics HUB.""" + api_url = f'{HUB_API_ROOT}/v1/models/{self.model_id}' + + try: + response = smart_request('get', api_url, headers=self.auth_header, thread=False, code=0) + data = response.json().get('data', None) + + if data.get('status', None) == 'trained': + raise ValueError(emojis(f'Model is already trained and uploaded to {self.model_url} 🚀')) + + if not data.get('data', None): + raise ValueError('Dataset may still be processing. Please wait a minute and try again.') # RF fix + self.model_id = data['id'] + + if data['status'] == 'new': # new model to start training + self.train_args = { + 'batch': data['batch_size'], # note HUB argument is slightly different + 'epochs': data['epochs'], + 'imgsz': data['imgsz'], + 'patience': data['patience'], + 'device': data['device'], + 'cache': data['cache'], + 'data': data['data']} + self.model_file = data.get('cfg') or data.get('weights') # cfg for pretrained=False + self.model_file = checks.check_yolov5u_filename(self.model_file, verbose=False) # YOLOv5->YOLOv5u + elif data['status'] == 'training': # existing model to resume training + self.train_args = {'data': data['data'], 'resume': True} + self.model_file = data['resume'] + + return data + except requests.exceptions.ConnectionError as e: + raise ConnectionRefusedError('ERROR: The HUB server is not online. Please try again later.') from e + except Exception: + raise + + def upload_model(self, epoch, weights, is_best=False, map=0.0, final=False): + """ + Upload a model checkpoint to Ultralytics HUB. + + Args: + epoch (int): The current training epoch. + weights (str): Path to the model weights file. + is_best (bool): Indicates if the current model is the best one so far. + map (float): Mean average precision of the model. + final (bool): Indicates if the model is the final model after training. + """ + if Path(weights).is_file(): + with open(weights, 'rb') as f: + file = f.read() + else: + LOGGER.warning(f'{PREFIX}WARNING ⚠️ Model upload issue. Missing model {weights}.') + file = None + url = f'{self.api_url}/upload' + # url = 'http://httpbin.org/post' # for debug + data = {'epoch': epoch} + if final: + data.update({'type': 'final', 'map': map}) + filesize = Path(weights).stat().st_size + smart_request('post', + url, + data=data, + files={'best.pt': file}, + headers=self.auth_header, + retry=10, + timeout=3600, + thread=False, + progress=filesize, + code=4) + else: + data.update({'type': 'epoch', 'isBest': bool(is_best)}) + smart_request('post', url, data=data, files={'last.pt': file}, headers=self.auth_header, code=3) + + @threaded + def _start_heartbeat(self): + """Begin a threaded heartbeat loop to report the agent's status to Ultralytics HUB.""" + while self.alive: + r = smart_request('post', + f'{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}', + json={ + 'agent': AGENT_NAME, + 'agentId': self.agent_id}, + headers=self.auth_header, + retry=0, + code=5, + thread=False) # already in a thread + self.agent_id = r.json().get('data', {}).get('agentId', None) + sleep(self.rate_limits['heartbeat']) diff --git a/ultralytics/hub/utils.py b/ultralytics/hub/utils.py new file mode 100644 index 0000000..f2621d7 --- /dev/null +++ b/ultralytics/hub/utils.py @@ -0,0 +1,221 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import os +import platform +import random +import sys +import threading +import time +from pathlib import Path + +import requests + +from ultralytics.utils import (ENVIRONMENT, LOGGER, ONLINE, RANK, SETTINGS, TESTS_RUNNING, TQDM, TryExcept, __version__, + colorstr, get_git_origin_url, is_colab, is_git_dir, is_pip_package) +from ultralytics.utils.downloads import GITHUB_ASSETS_NAMES + +PREFIX = colorstr('Ultralytics HUB: ') +HELP_MSG = 'If this issue persists please visit https://github.com/ultralytics/hub/issues for assistance.' +HUB_API_ROOT = os.environ.get('ULTRALYTICS_HUB_API', 'https://api.ultralytics.com') +HUB_WEB_ROOT = os.environ.get('ULTRALYTICS_HUB_WEB', 'https://hub.ultralytics.com') + + +def request_with_credentials(url: str) -> any: + """ + Make an AJAX request with cookies attached in a Google Colab environment. + + Args: + url (str): The URL to make the request to. + + Returns: + (any): The response data from the AJAX request. + + Raises: + OSError: If the function is not run in a Google Colab environment. + """ + if not is_colab(): + raise OSError('request_with_credentials() must run in a Colab environment') + from google.colab import output # noqa + from IPython import display # noqa + display.display( + display.Javascript(""" + window._hub_tmp = new Promise((resolve, reject) => { + const timeout = setTimeout(() => reject("Failed authenticating existing browser session"), 5000) + fetch("%s", { + method: 'POST', + credentials: 'include' + }) + .then((response) => resolve(response.json())) + .then((json) => { + clearTimeout(timeout); + }).catch((err) => { + clearTimeout(timeout); + reject(err); + }); + }); + """ % url)) + return output.eval_js('_hub_tmp') + + +def requests_with_progress(method, url, **kwargs): + """ + Make an HTTP request using the specified method and URL, with an optional progress bar. + + Args: + method (str): The HTTP method to use (e.g. 'GET', 'POST'). + url (str): The URL to send the request to. + **kwargs (dict): Additional keyword arguments to pass to the underlying `requests.request` function. + + Returns: + (requests.Response): The response object from the HTTP request. + + Note: + - If 'progress' is set to True, the progress bar will display the download progress for responses with a known + content length. + - If 'progress' is a number then progress bar will display assuming content length = progress. + """ + progress = kwargs.pop('progress', False) + if not progress: + return requests.request(method, url, **kwargs) + response = requests.request(method, url, stream=True, **kwargs) + total = int(response.headers.get('content-length', 0) if isinstance(progress, bool) else progress) # total size + try: + pbar = TQDM(total=total, unit='B', unit_scale=True, unit_divisor=1024) + for data in response.iter_content(chunk_size=1024): + pbar.update(len(data)) + pbar.close() + except requests.exceptions.ChunkedEncodingError: # avoid 'Connection broken: IncompleteRead' warnings + response.close() + return response + + +def smart_request(method, url, retry=3, timeout=30, thread=True, code=-1, verbose=True, progress=False, **kwargs): + """ + Makes an HTTP request using the 'requests' library, with exponential backoff retries up to a specified timeout. + + Args: + method (str): The HTTP method to use for the request. Choices are 'post' and 'get'. + url (str): The URL to make the request to. + retry (int, optional): Number of retries to attempt before giving up. Default is 3. + timeout (int, optional): Timeout in seconds after which the function will give up retrying. Default is 30. + thread (bool, optional): Whether to execute the request in a separate daemon thread. Default is True. + code (int, optional): An identifier for the request, used for logging purposes. Default is -1. + verbose (bool, optional): A flag to determine whether to print out to console or not. Default is True. + progress (bool, optional): Whether to show a progress bar during the request. Default is False. + **kwargs (dict): Keyword arguments to be passed to the requests function specified in method. + + Returns: + (requests.Response): The HTTP response object. If the request is executed in a separate thread, returns None. + """ + retry_codes = (408, 500) # retry only these codes + + @TryExcept(verbose=verbose) + def func(func_method, func_url, **func_kwargs): + """Make HTTP requests with retries and timeouts, with optional progress tracking.""" + r = None # response + t0 = time.time() # initial time for timer + for i in range(retry + 1): + if (time.time() - t0) > timeout: + break + r = requests_with_progress(func_method, func_url, **func_kwargs) # i.e. get(url, data, json, files) + if r.status_code < 300: # return codes in the 2xx range are generally considered "good" or "successful" + break + try: + m = r.json().get('message', 'No JSON message.') + except AttributeError: + m = 'Unable to read JSON.' + if i == 0: + if r.status_code in retry_codes: + m += f' Retrying {retry}x for {timeout}s.' if retry else '' + elif r.status_code == 429: # rate limit + h = r.headers # response headers + m = f"Rate limit reached ({h['X-RateLimit-Remaining']}/{h['X-RateLimit-Limit']}). " \ + f"Please retry after {h['Retry-After']}s." + if verbose: + LOGGER.warning(f'{PREFIX}{m} {HELP_MSG} ({r.status_code} #{code})') + if r.status_code not in retry_codes: + return r + time.sleep(2 ** i) # exponential standoff + return r + + args = method, url + kwargs['progress'] = progress + if thread: + threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True).start() + else: + return func(*args, **kwargs) + + +class Events: + """ + A class for collecting anonymous event analytics. Event analytics are enabled when sync=True in settings and + disabled when sync=False. Run 'yolo settings' to see and update settings YAML file. + + Attributes: + url (str): The URL to send anonymous events. + rate_limit (float): The rate limit in seconds for sending events. + metadata (dict): A dictionary containing metadata about the environment. + enabled (bool): A flag to enable or disable Events based on certain conditions. + """ + + url = 'https://www.google-analytics.com/mp/collect?measurement_id=G-X8NCJYTQXM&api_secret=QLQrATrNSwGRFRLE-cbHJw' + + def __init__(self): + """Initializes the Events object with default values for events, rate_limit, and metadata.""" + self.events = [] # events list + self.rate_limit = 60.0 # rate limit (seconds) + self.t = 0.0 # rate limit timer (seconds) + self.metadata = { + 'cli': Path(sys.argv[0]).name == 'yolo', + 'install': 'git' if is_git_dir() else 'pip' if is_pip_package() else 'other', + 'python': '.'.join(platform.python_version_tuple()[:2]), # i.e. 3.10 + 'version': __version__, + 'env': ENVIRONMENT, + 'session_id': round(random.random() * 1E15), + 'engagement_time_msec': 1000} + self.enabled = \ + SETTINGS['sync'] and \ + RANK in (-1, 0) and \ + not TESTS_RUNNING and \ + ONLINE and \ + (is_pip_package() or get_git_origin_url() == 'https://github.com/ultralytics/ultralytics.git') + + def __call__(self, cfg): + """ + Attempts to add a new event to the events list and send events if the rate limit is reached. + + Args: + cfg (IterableSimpleNamespace): The configuration object containing mode and task information. + """ + if not self.enabled: + # Events disabled, do nothing + return + + # Attempt to add to events + if len(self.events) < 25: # Events list limited to 25 events (drop any events past this) + params = { + **self.metadata, 'task': cfg.task, + 'model': cfg.model if cfg.model in GITHUB_ASSETS_NAMES else 'custom'} + if cfg.mode == 'export': + params['format'] = cfg.format + self.events.append({'name': cfg.mode, 'params': params}) + + # Check rate limit + t = time.time() + if (t - self.t) < self.rate_limit: + # Time is under rate limiter, wait to send + return + + # Time is over rate limiter, send now + data = {'client_id': SETTINGS['uuid'], 'events': self.events} # SHA-256 anonymized UUID hash and events list + + # POST equivalent to requests.post(self.url, json=data) + smart_request('post', self.url, json=data, retry=0, verbose=False) + + # Reset events and rate limit timer + self.events = [] + self.t = t + + +# Run below code on hub/utils init ------------------------------------------------------------------------------------- +events = Events() diff --git a/ultralytics/models/__init__.py b/ultralytics/models/__init__.py new file mode 100644 index 0000000..e96f893 --- /dev/null +++ b/ultralytics/models/__init__.py @@ -0,0 +1,7 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .rtdetr import RTDETR +from .sam import SAM +from .yolo import YOLO + +__all__ = 'YOLO', 'RTDETR', 'SAM' # allow simpler import diff --git a/ultralytics/models/fastsam/__init__.py b/ultralytics/models/fastsam/__init__.py new file mode 100644 index 0000000..8f47772 --- /dev/null +++ b/ultralytics/models/fastsam/__init__.py @@ -0,0 +1,8 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .model import FastSAM +from .predict import FastSAMPredictor +from .prompt import FastSAMPrompt +from .val import FastSAMValidator + +__all__ = 'FastSAMPredictor', 'FastSAM', 'FastSAMPrompt', 'FastSAMValidator' diff --git a/ultralytics/models/fastsam/model.py b/ultralytics/models/fastsam/model.py new file mode 100644 index 0000000..e6475fa --- /dev/null +++ b/ultralytics/models/fastsam/model.py @@ -0,0 +1,34 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from pathlib import Path + +from ultralytics.engine.model import Model + +from .predict import FastSAMPredictor +from .val import FastSAMValidator + + +class FastSAM(Model): + """ + FastSAM model interface. + + Example: + ```python + from ultralytics import FastSAM + + model = FastSAM('last.pt') + results = model.predict('ultralytics/assets/bus.jpg') + ``` + """ + + def __init__(self, model='FastSAM-x.pt'): + """Call the __init__ method of the parent class (YOLO) with the updated default model.""" + if str(model) == 'FastSAM.pt': + model = 'FastSAM-x.pt' + assert Path(model).suffix not in ('.yaml', '.yml'), 'FastSAM models only support pre-trained models.' + super().__init__(model=model, task='segment') + + @property + def task_map(self): + """Returns a dictionary mapping segment task to corresponding predictor and validator classes.""" + return {'segment': {'predictor': FastSAMPredictor, 'validator': FastSAMValidator}} diff --git a/ultralytics/models/fastsam/predict.py b/ultralytics/models/fastsam/predict.py new file mode 100644 index 0000000..4a3c2e9 --- /dev/null +++ b/ultralytics/models/fastsam/predict.py @@ -0,0 +1,85 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch + +from ultralytics.engine.results import Results +from ultralytics.models.fastsam.utils import bbox_iou +from ultralytics.models.yolo.detect.predict import DetectionPredictor +from ultralytics.utils import DEFAULT_CFG, ops + + +class FastSAMPredictor(DetectionPredictor): + """ + FastSAMPredictor is specialized for fast SAM (Segment Anything Model) segmentation prediction tasks in Ultralytics + YOLO framework. + + This class extends the DetectionPredictor, customizing the prediction pipeline specifically for fast SAM. + It adjusts post-processing steps to incorporate mask prediction and non-max suppression while optimizing + for single-class segmentation. + + Attributes: + cfg (dict): Configuration parameters for prediction. + overrides (dict, optional): Optional parameter overrides for custom behavior. + _callbacks (dict, optional): Optional list of callback functions to be invoked during prediction. + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """ + Initializes the FastSAMPredictor class, inheriting from DetectionPredictor and setting the task to 'segment'. + + Args: + cfg (dict): Configuration parameters for prediction. + overrides (dict, optional): Optional parameter overrides for custom behavior. + _callbacks (dict, optional): Optional list of callback functions to be invoked during prediction. + """ + super().__init__(cfg, overrides, _callbacks) + self.args.task = 'segment' + + def postprocess(self, preds, img, orig_imgs): + """ + Perform post-processing steps on predictions, including non-max suppression and scaling boxes to original image + size, and returns the final results. + + Args: + preds (list): The raw output predictions from the model. + img (torch.Tensor): The processed image tensor. + orig_imgs (list | torch.Tensor): The original image or list of images. + + Returns: + (list): A list of Results objects, each containing processed boxes, masks, and other metadata. + """ + p = ops.non_max_suppression( + preds[0], + self.args.conf, + self.args.iou, + agnostic=self.args.agnostic_nms, + max_det=self.args.max_det, + nc=1, # set to 1 class since SAM has no class predictions + classes=self.args.classes) + full_box = torch.zeros(p[0].shape[1], device=p[0].device) + full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0 + full_box = full_box.view(1, -1) + critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:]) + if critical_iou_index.numel() != 0: + full_box[0][4] = p[0][critical_iou_index][:, 4] + full_box[0][6:] = p[0][critical_iou_index][:, 6:] + p[0][critical_iou_index] = full_box + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + + results = [] + proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported + for i, pred in enumerate(p): + orig_img = orig_imgs[i] + img_path = self.batch[0][i] + if not len(pred): # save empty boxes + masks = None + elif self.args.retina_masks: + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC + else: + masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)) + return results diff --git a/ultralytics/models/fastsam/prompt.py b/ultralytics/models/fastsam/prompt.py new file mode 100644 index 0000000..0f43441 --- /dev/null +++ b/ultralytics/models/fastsam/prompt.py @@ -0,0 +1,350 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import os +from pathlib import Path + +import cv2 +import matplotlib.pyplot as plt +import numpy as np +import torch +from PIL import Image + +from ultralytics.utils import TQDM + + +class FastSAMPrompt: + """ + Fast Segment Anything Model class for image annotation and visualization. + + Attributes: + device (str): Computing device ('cuda' or 'cpu'). + results: Object detection or segmentation results. + source: Source image or image path. + clip: CLIP model for linear assignment. + """ + + def __init__(self, source, results, device='cuda') -> None: + """Initializes FastSAMPrompt with given source, results and device, and assigns clip for linear assignment.""" + self.device = device + self.results = results + self.source = source + + # Import and assign clip + try: + import clip # for linear_assignment + except ImportError: + from ultralytics.utils.checks import check_requirements + check_requirements('git+https://github.com/openai/CLIP.git') + import clip + self.clip = clip + + @staticmethod + def _segment_image(image, bbox): + """Segments the given image according to the provided bounding box coordinates.""" + image_array = np.array(image) + segmented_image_array = np.zeros_like(image_array) + x1, y1, x2, y2 = bbox + segmented_image_array[y1:y2, x1:x2] = image_array[y1:y2, x1:x2] + segmented_image = Image.fromarray(segmented_image_array) + black_image = Image.new('RGB', image.size, (255, 255, 255)) + # transparency_mask = np.zeros_like((), dtype=np.uint8) + transparency_mask = np.zeros((image_array.shape[0], image_array.shape[1]), dtype=np.uint8) + transparency_mask[y1:y2, x1:x2] = 255 + transparency_mask_image = Image.fromarray(transparency_mask, mode='L') + black_image.paste(segmented_image, mask=transparency_mask_image) + return black_image + + @staticmethod + def _format_results(result, filter=0): + """Formats detection results into list of annotations each containing ID, segmentation, bounding box, score and + area. + """ + annotations = [] + n = len(result.masks.data) if result.masks is not None else 0 + for i in range(n): + mask = result.masks.data[i] == 1.0 + if torch.sum(mask) >= filter: + annotation = { + 'id': i, + 'segmentation': mask.cpu().numpy(), + 'bbox': result.boxes.data[i], + 'score': result.boxes.conf[i]} + annotation['area'] = annotation['segmentation'].sum() + annotations.append(annotation) + return annotations + + @staticmethod + def _get_bbox_from_mask(mask): + """Applies morphological transformations to the mask, displays it, and if with_contours is True, draws + contours. + """ + mask = mask.astype(np.uint8) + contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + x1, y1, w, h = cv2.boundingRect(contours[0]) + x2, y2 = x1 + w, y1 + h + if len(contours) > 1: + for b in contours: + x_t, y_t, w_t, h_t = cv2.boundingRect(b) + x1 = min(x1, x_t) + y1 = min(y1, y_t) + x2 = max(x2, x_t + w_t) + y2 = max(y2, y_t + h_t) + return [x1, y1, x2, y2] + + def plot(self, + annotations, + output, + bbox=None, + points=None, + point_label=None, + mask_random_color=True, + better_quality=True, + retina=False, + with_contours=True): + """ + Plots annotations, bounding boxes, and points on images and saves the output. + + Args: + annotations (list): Annotations to be plotted. + output (str or Path): Output directory for saving the plots. + bbox (list, optional): Bounding box coordinates [x1, y1, x2, y2]. Defaults to None. + points (list, optional): Points to be plotted. Defaults to None. + point_label (list, optional): Labels for the points. Defaults to None. + mask_random_color (bool, optional): Whether to use random color for masks. Defaults to True. + better_quality (bool, optional): Whether to apply morphological transformations for better mask quality. Defaults to True. + retina (bool, optional): Whether to use retina mask. Defaults to False. + with_contours (bool, optional): Whether to plot contours. Defaults to True. + """ + pbar = TQDM(annotations, total=len(annotations)) + for ann in pbar: + result_name = os.path.basename(ann.path) + image = ann.orig_img[..., ::-1] # BGR to RGB + original_h, original_w = ann.orig_shape + # For macOS only + # plt.switch_backend('TkAgg') + plt.figure(figsize=(original_w / 100, original_h / 100)) + # Add subplot with no margin. + plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) + plt.margins(0, 0) + plt.gca().xaxis.set_major_locator(plt.NullLocator()) + plt.gca().yaxis.set_major_locator(plt.NullLocator()) + plt.imshow(image) + + if ann.masks is not None: + masks = ann.masks.data + if better_quality: + if isinstance(masks[0], torch.Tensor): + masks = np.array(masks.cpu()) + for i, mask in enumerate(masks): + mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8)) + masks[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8)) + + self.fast_show_mask(masks, + plt.gca(), + random_color=mask_random_color, + bbox=bbox, + points=points, + pointlabel=point_label, + retinamask=retina, + target_height=original_h, + target_width=original_w) + + if with_contours: + contour_all = [] + temp = np.zeros((original_h, original_w, 1)) + for i, mask in enumerate(masks): + mask = mask.astype(np.uint8) + if not retina: + mask = cv2.resize(mask, (original_w, original_h), interpolation=cv2.INTER_NEAREST) + contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + contour_all.extend(iter(contours)) + cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2) + color = np.array([0 / 255, 0 / 255, 1.0, 0.8]) + contour_mask = temp / 255 * color.reshape(1, 1, -1) + plt.imshow(contour_mask) + + # Save the figure + save_path = Path(output) / result_name + save_path.parent.mkdir(exist_ok=True, parents=True) + plt.axis('off') + plt.savefig(save_path, bbox_inches='tight', pad_inches=0, transparent=True) + plt.close() + pbar.set_description(f'Saving {result_name} to {save_path}') + + @staticmethod + def fast_show_mask( + annotation, + ax, + random_color=False, + bbox=None, + points=None, + pointlabel=None, + retinamask=True, + target_height=960, + target_width=960, + ): + """ + Quickly shows the mask annotations on the given matplotlib axis. + + Args: + annotation (array-like): Mask annotation. + ax (matplotlib.axes.Axes): Matplotlib axis. + random_color (bool, optional): Whether to use random color for masks. Defaults to False. + bbox (list, optional): Bounding box coordinates [x1, y1, x2, y2]. Defaults to None. + points (list, optional): Points to be plotted. Defaults to None. + pointlabel (list, optional): Labels for the points. Defaults to None. + retinamask (bool, optional): Whether to use retina mask. Defaults to True. + target_height (int, optional): Target height for resizing. Defaults to 960. + target_width (int, optional): Target width for resizing. Defaults to 960. + """ + n, h, w = annotation.shape # batch, height, width + + areas = np.sum(annotation, axis=(1, 2)) + annotation = annotation[np.argsort(areas)] + + index = (annotation != 0).argmax(axis=0) + if random_color: + color = np.random.random((n, 1, 1, 3)) + else: + color = np.ones((n, 1, 1, 3)) * np.array([30 / 255, 144 / 255, 1.0]) + transparency = np.ones((n, 1, 1, 1)) * 0.6 + visual = np.concatenate([color, transparency], axis=-1) + mask_image = np.expand_dims(annotation, -1) * visual + + show = np.zeros((h, w, 4)) + h_indices, w_indices = np.meshgrid(np.arange(h), np.arange(w), indexing='ij') + indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None)) + + show[h_indices, w_indices, :] = mask_image[indices] + if bbox is not None: + x1, y1, x2, y2 = bbox + ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1)) + # Draw point + if points is not None: + plt.scatter( + [point[0] for i, point in enumerate(points) if pointlabel[i] == 1], + [point[1] for i, point in enumerate(points) if pointlabel[i] == 1], + s=20, + c='y', + ) + plt.scatter( + [point[0] for i, point in enumerate(points) if pointlabel[i] == 0], + [point[1] for i, point in enumerate(points) if pointlabel[i] == 0], + s=20, + c='m', + ) + + if not retinamask: + show = cv2.resize(show, (target_width, target_height), interpolation=cv2.INTER_NEAREST) + ax.imshow(show) + + @torch.no_grad() + def retrieve(self, model, preprocess, elements, search_text: str, device) -> int: + """Processes images and text with a model, calculates similarity, and returns softmax score.""" + preprocessed_images = [preprocess(image).to(device) for image in elements] + tokenized_text = self.clip.tokenize([search_text]).to(device) + stacked_images = torch.stack(preprocessed_images) + image_features = model.encode_image(stacked_images) + text_features = model.encode_text(tokenized_text) + image_features /= image_features.norm(dim=-1, keepdim=True) + text_features /= text_features.norm(dim=-1, keepdim=True) + probs = 100.0 * image_features @ text_features.T + return probs[:, 0].softmax(dim=0) + + def _crop_image(self, format_results): + """Crops an image based on provided annotation format and returns cropped images and related data.""" + if os.path.isdir(self.source): + raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.") + image = Image.fromarray(cv2.cvtColor(self.results[0].orig_img, cv2.COLOR_BGR2RGB)) + ori_w, ori_h = image.size + annotations = format_results + mask_h, mask_w = annotations[0]['segmentation'].shape + if ori_w != mask_w or ori_h != mask_h: + image = image.resize((mask_w, mask_h)) + cropped_boxes = [] + cropped_images = [] + not_crop = [] + filter_id = [] + for _, mask in enumerate(annotations): + if np.sum(mask['segmentation']) <= 100: + filter_id.append(_) + continue + bbox = self._get_bbox_from_mask(mask['segmentation']) # mask 的 bbox + cropped_boxes.append(self._segment_image(image, bbox)) # 保存裁剪的图片 + cropped_images.append(bbox) # 保存裁剪的图片的bbox + + return cropped_boxes, cropped_images, not_crop, filter_id, annotations + + def box_prompt(self, bbox): + """Modifies the bounding box properties and calculates IoU between masks and bounding box.""" + if self.results[0].masks is not None: + assert (bbox[2] != 0 and bbox[3] != 0) + if os.path.isdir(self.source): + raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.") + masks = self.results[0].masks.data + target_height, target_width = self.results[0].orig_shape + h = masks.shape[1] + w = masks.shape[2] + if h != target_height or w != target_width: + bbox = [ + int(bbox[0] * w / target_width), + int(bbox[1] * h / target_height), + int(bbox[2] * w / target_width), + int(bbox[3] * h / target_height), ] + bbox[0] = max(round(bbox[0]), 0) + bbox[1] = max(round(bbox[1]), 0) + bbox[2] = min(round(bbox[2]), w) + bbox[3] = min(round(bbox[3]), h) + + # IoUs = torch.zeros(len(masks), dtype=torch.float32) + bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0]) + + masks_area = torch.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], dim=(1, 2)) + orig_masks_area = torch.sum(masks, dim=(1, 2)) + + union = bbox_area + orig_masks_area - masks_area + iou = masks_area / union + max_iou_index = torch.argmax(iou) + + self.results[0].masks.data = torch.tensor(np.array([masks[max_iou_index].cpu().numpy()])) + return self.results + + def point_prompt(self, points, pointlabel): # numpy + """Adjusts points on detected masks based on user input and returns the modified results.""" + if self.results[0].masks is not None: + if os.path.isdir(self.source): + raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.") + masks = self._format_results(self.results[0], 0) + target_height, target_width = self.results[0].orig_shape + h = masks[0]['segmentation'].shape[0] + w = masks[0]['segmentation'].shape[1] + if h != target_height or w != target_width: + points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points] + onemask = np.zeros((h, w)) + for annotation in masks: + mask = annotation['segmentation'] if isinstance(annotation, dict) else annotation + for i, point in enumerate(points): + if mask[point[1], point[0]] == 1 and pointlabel[i] == 1: + onemask += mask + if mask[point[1], point[0]] == 1 and pointlabel[i] == 0: + onemask -= mask + onemask = onemask >= 1 + self.results[0].masks.data = torch.tensor(np.array([onemask])) + return self.results + + def text_prompt(self, text): + """Processes a text prompt, applies it to existing results and returns the updated results.""" + if self.results[0].masks is not None: + format_results = self._format_results(self.results[0], 0) + cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results) + clip_model, preprocess = self.clip.load('ViT-B/32', device=self.device) + scores = self.retrieve(clip_model, preprocess, cropped_boxes, text, device=self.device) + max_idx = scores.argsort() + max_idx = max_idx[-1] + max_idx += sum(np.array(filter_id) <= int(max_idx)) + self.results[0].masks.data = torch.tensor(np.array([annotations[max_idx]['segmentation']])) + return self.results + + def everything_prompt(self): + """Returns the processed results from the previous methods in the class.""" + return self.results diff --git a/ultralytics/models/fastsam/utils.py b/ultralytics/models/fastsam/utils.py new file mode 100644 index 0000000..480e903 --- /dev/null +++ b/ultralytics/models/fastsam/utils.py @@ -0,0 +1,67 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch + + +def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20): + """ + Adjust bounding boxes to stick to image border if they are within a certain threshold. + + Args: + boxes (torch.Tensor): (n, 4) + image_shape (tuple): (height, width) + threshold (int): pixel threshold + + Returns: + adjusted_boxes (torch.Tensor): adjusted bounding boxes + """ + + # Image dimensions + h, w = image_shape + + # Adjust boxes + boxes[boxes[:, 0] < threshold, 0] = 0 # x1 + boxes[boxes[:, 1] < threshold, 1] = 0 # y1 + boxes[boxes[:, 2] > w - threshold, 2] = w # x2 + boxes[boxes[:, 3] > h - threshold, 3] = h # y2 + return boxes + + +def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False): + """ + Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes. + + Args: + box1 (torch.Tensor): (4, ) + boxes (torch.Tensor): (n, 4) + iou_thres (float): IoU threshold + image_shape (tuple): (height, width) + raw_output (bool): If True, return the raw IoU values instead of the indices + + Returns: + high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres + """ + boxes = adjust_bboxes_to_image_border(boxes, image_shape) + # Obtain coordinates for intersections + x1 = torch.max(box1[0], boxes[:, 0]) + y1 = torch.max(box1[1], boxes[:, 1]) + x2 = torch.min(box1[2], boxes[:, 2]) + y2 = torch.min(box1[3], boxes[:, 3]) + + # Compute the area of intersection + intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0) + + # Compute the area of both individual boxes + box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) + box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + + # Compute the area of union + union = box1_area + box2_area - intersection + + # Compute the IoU + iou = intersection / union # Should be shape (n, ) + if raw_output: + return 0 if iou.numel() == 0 else iou + + # return indices of boxes with IoU > thres + return torch.nonzero(iou > iou_thres).flatten() diff --git a/ultralytics/models/fastsam/val.py b/ultralytics/models/fastsam/val.py new file mode 100644 index 0000000..4e1e0b0 --- /dev/null +++ b/ultralytics/models/fastsam/val.py @@ -0,0 +1,40 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.models.yolo.segment import SegmentationValidator +from ultralytics.utils.metrics import SegmentMetrics + + +class FastSAMValidator(SegmentationValidator): + """ + Custom validation class for fast SAM (Segment Anything Model) segmentation in Ultralytics YOLO framework. + + Extends the SegmentationValidator class, customizing the validation process specifically for fast SAM. This class + sets the task to 'segment' and uses the SegmentMetrics for evaluation. Additionally, plotting features are disabled + to avoid errors during validation. + + Attributes: + dataloader: The data loader object used for validation. + save_dir (str): The directory where validation results will be saved. + pbar: A progress bar object. + args: Additional arguments for customization. + _callbacks: List of callback functions to be invoked during validation. + """ + + def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): + """ + Initialize the FastSAMValidator class, setting the task to 'segment' and metrics to SegmentMetrics. + + Args: + dataloader (torch.utils.data.DataLoader): Dataloader to be used for validation. + save_dir (Path, optional): Directory to save results. + pbar (tqdm.tqdm): Progress bar for displaying progress. + args (SimpleNamespace): Configuration for the validator. + _callbacks (dict): Dictionary to store various callback functions. + + Notes: + Plots for ConfusionMatrix and other related metrics are disabled in this class to avoid errors. + """ + super().__init__(dataloader, save_dir, pbar, args, _callbacks) + self.args.task = 'segment' + self.args.plots = False # disable ConfusionMatrix and other plots to avoid errors + self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot) diff --git a/ultralytics/models/nas/__init__.py b/ultralytics/models/nas/__init__.py new file mode 100644 index 0000000..eec3837 --- /dev/null +++ b/ultralytics/models/nas/__init__.py @@ -0,0 +1,7 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .model import NAS +from .predict import NASPredictor +from .val import NASValidator + +__all__ = 'NASPredictor', 'NASValidator', 'NAS' diff --git a/ultralytics/models/nas/model.py b/ultralytics/models/nas/model.py new file mode 100644 index 0000000..00d0b6e --- /dev/null +++ b/ultralytics/models/nas/model.py @@ -0,0 +1,83 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +YOLO-NAS model interface. + +Example: + ```python + from ultralytics import NAS + + model = NAS('yolo_nas_s') + results = model.predict('ultralytics/assets/bus.jpg') + ``` +""" + +from pathlib import Path + +import torch + +from ultralytics.engine.model import Model +from ultralytics.utils.torch_utils import model_info, smart_inference_mode + +from .predict import NASPredictor +from .val import NASValidator + + +class NAS(Model): + """ + YOLO NAS model for object detection. + + This class provides an interface for the YOLO-NAS models and extends the `Model` class from Ultralytics engine. + It is designed to facilitate the task of object detection using pre-trained or custom-trained YOLO-NAS models. + + Example: + ```python + from ultralytics import NAS + + model = NAS('yolo_nas_s') + results = model.predict('ultralytics/assets/bus.jpg') + ``` + + Attributes: + model (str): Path to the pre-trained model or model name. Defaults to 'yolo_nas_s.pt'. + + Note: + YOLO-NAS models only support pre-trained models. Do not provide YAML configuration files. + """ + + def __init__(self, model='yolo_nas_s.pt') -> None: + """Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model.""" + assert Path(model).suffix not in ('.yaml', '.yml'), 'YOLO-NAS models only support pre-trained models.' + super().__init__(model, task='detect') + + @smart_inference_mode() + def _load(self, weights: str, task: str): + """Loads an existing NAS model weights or creates a new NAS model with pretrained weights if not provided.""" + import super_gradients + suffix = Path(weights).suffix + if suffix == '.pt': + self.model = torch.load(weights) + elif suffix == '': + self.model = super_gradients.training.models.get(weights, pretrained_weights='coco') + # Standardize model + self.model.fuse = lambda verbose=True: self.model + self.model.stride = torch.tensor([32]) + self.model.names = dict(enumerate(self.model._class_names)) + self.model.is_fused = lambda: False # for info() + self.model.yaml = {} # for info() + self.model.pt_path = weights # for export() + self.model.task = 'detect' # for export() + + def info(self, detailed=False, verbose=True): + """ + Logs model info. + + Args: + detailed (bool): Show detailed information about model. + verbose (bool): Controls verbosity. + """ + return model_info(self.model, detailed=detailed, verbose=verbose, imgsz=640) + + @property + def task_map(self): + """Returns a dictionary mapping tasks to respective predictor and validator classes.""" + return {'detect': {'predictor': NASPredictor, 'validator': NASValidator}} diff --git a/ultralytics/models/nas/predict.py b/ultralytics/models/nas/predict.py new file mode 100644 index 0000000..0118527 --- /dev/null +++ b/ultralytics/models/nas/predict.py @@ -0,0 +1,58 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch + +from ultralytics.engine.predictor import BasePredictor +from ultralytics.engine.results import Results +from ultralytics.utils import ops + + +class NASPredictor(BasePredictor): + """ + Ultralytics YOLO NAS Predictor for object detection. + + This class extends the `BasePredictor` from Ultralytics engine and is responsible for post-processing the + raw predictions generated by the YOLO NAS models. It applies operations like non-maximum suppression and + scaling the bounding boxes to fit the original image dimensions. + + Attributes: + args (Namespace): Namespace containing various configurations for post-processing. + + Example: + ```python + from ultralytics import NAS + + model = NAS('yolo_nas_s') + predictor = model.predictor + # Assumes that raw_preds, img, orig_imgs are available + results = predictor.postprocess(raw_preds, img, orig_imgs) + ``` + + Note: + Typically, this class is not instantiated directly. It is used internally within the `NAS` class. + """ + + def postprocess(self, preds_in, img, orig_imgs): + """Postprocess predictions and returns a list of Results objects.""" + + # Cat boxes and class scores + boxes = ops.xyxy2xywh(preds_in[0][0]) + preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1) + + preds = ops.non_max_suppression(preds, + self.args.conf, + self.args.iou, + agnostic=self.args.agnostic_nms, + max_det=self.args.max_det, + classes=self.args.classes) + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + + results = [] + for i, pred in enumerate(preds): + orig_img = orig_imgs[i] + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + img_path = self.batch[0][i] + results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) + return results diff --git a/ultralytics/models/nas/val.py b/ultralytics/models/nas/val.py new file mode 100644 index 0000000..41f60c1 --- /dev/null +++ b/ultralytics/models/nas/val.py @@ -0,0 +1,48 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch + +from ultralytics.models.yolo.detect import DetectionValidator +from ultralytics.utils import ops + +__all__ = ['NASValidator'] + + +class NASValidator(DetectionValidator): + """ + Ultralytics YOLO NAS Validator for object detection. + + Extends `DetectionValidator` from the Ultralytics models package and is designed to post-process the raw predictions + generated by YOLO NAS models. It performs non-maximum suppression to remove overlapping and low-confidence boxes, + ultimately producing the final detections. + + Attributes: + args (Namespace): Namespace containing various configurations for post-processing, such as confidence and IoU thresholds. + lb (torch.Tensor): Optional tensor for multilabel NMS. + + Example: + ```python + from ultralytics import NAS + + model = NAS('yolo_nas_s') + validator = model.validator + # Assumes that raw_preds are available + final_preds = validator.postprocess(raw_preds) + ``` + + Note: + This class is generally not instantiated directly but is used internally within the `NAS` class. + """ + + def postprocess(self, preds_in): + """Apply Non-maximum suppression to prediction outputs.""" + boxes = ops.xyxy2xywh(preds_in[0][0]) + preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1) + return ops.non_max_suppression(preds, + self.args.conf, + self.args.iou, + labels=self.lb, + multi_label=False, + agnostic=self.args.single_cls, + max_det=self.args.max_det, + max_time_img=0.5) diff --git a/ultralytics/models/rtdetr/__init__.py b/ultralytics/models/rtdetr/__init__.py new file mode 100644 index 0000000..4d12115 --- /dev/null +++ b/ultralytics/models/rtdetr/__init__.py @@ -0,0 +1,7 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .model import RTDETR +from .predict import RTDETRPredictor +from .val import RTDETRValidator + +__all__ = 'RTDETRPredictor', 'RTDETRValidator', 'RTDETR' diff --git a/ultralytics/models/rtdetr/model.py b/ultralytics/models/rtdetr/model.py new file mode 100644 index 0000000..6e582a8 --- /dev/null +++ b/ultralytics/models/rtdetr/model.py @@ -0,0 +1,54 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Interface for Baidu's RT-DETR, a Vision Transformer-based real-time object detector. RT-DETR offers real-time +performance and high accuracy, excelling in accelerated backends like CUDA with TensorRT. It features an efficient +hybrid encoder and IoU-aware query selection for enhanced detection accuracy. + +For more information on RT-DETR, visit: https://arxiv.org/pdf/2304.08069.pdf +""" + +from ultralytics.engine.model import Model +from ultralytics.nn.tasks import RTDETRDetectionModel + +from .predict import RTDETRPredictor +from .train import RTDETRTrainer +from .val import RTDETRValidator + + +class RTDETR(Model): + """ + Interface for Baidu's RT-DETR model. This Vision Transformer-based object detector provides real-time performance + with high accuracy. It supports efficient hybrid encoding, IoU-aware query selection, and adaptable inference speed. + + Attributes: + model (str): Path to the pre-trained model. Defaults to 'rtdetr-l.pt'. + """ + + def __init__(self, model='rtdetr-l.pt') -> None: + """ + Initializes the RT-DETR model with the given pre-trained model file. Supports .pt and .yaml formats. + + Args: + model (str): Path to the pre-trained model. Defaults to 'rtdetr-l.pt'. + + Raises: + NotImplementedError: If the model file extension is not 'pt', 'yaml', or 'yml'. + """ + if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'): + raise NotImplementedError('RT-DETR only supports creating from *.pt, *.yaml, or *.yml files.') + super().__init__(model=model, task='detect') + + @property + def task_map(self) -> dict: + """ + Returns a task map for RT-DETR, associating tasks with corresponding Ultralytics classes. + + Returns: + dict: A dictionary mapping task names to Ultralytics task classes for the RT-DETR model. + """ + return { + 'detect': { + 'predictor': RTDETRPredictor, + 'validator': RTDETRValidator, + 'trainer': RTDETRTrainer, + 'model': RTDETRDetectionModel}} diff --git a/ultralytics/models/rtdetr/predict.py b/ultralytics/models/rtdetr/predict.py new file mode 100644 index 0000000..8ad92de --- /dev/null +++ b/ultralytics/models/rtdetr/predict.py @@ -0,0 +1,83 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch + +from ultralytics.data.augment import LetterBox +from ultralytics.engine.predictor import BasePredictor +from ultralytics.engine.results import Results +from ultralytics.utils import ops + + +class RTDETRPredictor(BasePredictor): + """ + RT-DETR (Real-Time Detection Transformer) Predictor extending the BasePredictor class for making predictions using + Baidu's RT-DETR model. + + This class leverages the power of Vision Transformers to provide real-time object detection while maintaining + high accuracy. It supports key features like efficient hybrid encoding and IoU-aware query selection. + + Example: + ```python + from ultralytics.utils import ASSETS + from ultralytics.models.rtdetr import RTDETRPredictor + + args = dict(model='rtdetr-l.pt', source=ASSETS) + predictor = RTDETRPredictor(overrides=args) + predictor.predict_cli() + ``` + + Attributes: + imgsz (int): Image size for inference (must be square and scale-filled). + args (dict): Argument overrides for the predictor. + """ + + def postprocess(self, preds, img, orig_imgs): + """ + Postprocess the raw predictions from the model to generate bounding boxes and confidence scores. + + The method filters detections based on confidence and class if specified in `self.args`. + + Args: + preds (torch.Tensor): Raw predictions from the model. + img (torch.Tensor): Processed input images. + orig_imgs (list or torch.Tensor): Original, unprocessed images. + + Returns: + (list[Results]): A list of Results objects containing the post-processed bounding boxes, confidence scores, + and class labels. + """ + nd = preds[0].shape[-1] + bboxes, scores = preds[0].split((4, nd - 4), dim=-1) + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + + results = [] + for i, bbox in enumerate(bboxes): # (300, 4) + bbox = ops.xywh2xyxy(bbox) + score, cls = scores[i].max(-1, keepdim=True) # (300, 1) + idx = score.squeeze(-1) > self.args.conf # (300, ) + if self.args.classes is not None: + idx = (cls == torch.tensor(self.args.classes, device=cls.device)).any(1) & idx + pred = torch.cat([bbox, score, cls], dim=-1)[idx] # filter + orig_img = orig_imgs[i] + oh, ow = orig_img.shape[:2] + pred[..., [0, 2]] *= ow + pred[..., [1, 3]] *= oh + img_path = self.batch[0][i] + results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) + return results + + def pre_transform(self, im): + """ + Pre-transforms the input images before feeding them into the model for inference. The input images are + letterboxed to ensure a square aspect ratio and scale-filled. The size must be square(640) and scaleFilled. + + Args: + im (list[np.ndarray] |torch.Tensor): Input images of shape (N,3,h,w) for tensor, [(h,w,3) x N] for list. + + Returns: + (list): List of pre-transformed images ready for model inference. + """ + letterbox = LetterBox(self.imgsz, auto=False, scaleFill=True) + return [letterbox(image=x) for x in im] diff --git a/ultralytics/models/rtdetr/train.py b/ultralytics/models/rtdetr/train.py new file mode 100644 index 0000000..26b7ea6 --- /dev/null +++ b/ultralytics/models/rtdetr/train.py @@ -0,0 +1,100 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from copy import copy + +import torch + +from ultralytics.models.yolo.detect import DetectionTrainer +from ultralytics.nn.tasks import RTDETRDetectionModel +from ultralytics.utils import RANK, colorstr + +from .val import RTDETRDataset, RTDETRValidator + + +class RTDETRTrainer(DetectionTrainer): + """ + Trainer class for the RT-DETR model developed by Baidu for real-time object detection. Extends the DetectionTrainer + class for YOLO to adapt to the specific features and architecture of RT-DETR. This model leverages Vision + Transformers and has capabilities like IoU-aware query selection and adaptable inference speed. + + Notes: + - F.grid_sample used in RT-DETR does not support the `deterministic=True` argument. + - AMP training can lead to NaN outputs and may produce errors during bipartite graph matching. + + Example: + ```python + from ultralytics.models.rtdetr.train import RTDETRTrainer + + args = dict(model='rtdetr-l.yaml', data='coco8.yaml', imgsz=640, epochs=3) + trainer = RTDETRTrainer(overrides=args) + trainer.train() + ``` + """ + + def get_model(self, cfg=None, weights=None, verbose=True): + """ + Initialize and return an RT-DETR model for object detection tasks. + + Args: + cfg (dict, optional): Model configuration. Defaults to None. + weights (str, optional): Path to pre-trained model weights. Defaults to None. + verbose (bool): Verbose logging if True. Defaults to True. + + Returns: + (RTDETRDetectionModel): Initialized model. + """ + model = RTDETRDetectionModel(cfg, nc=self.data['nc'], verbose=verbose and RANK == -1) + if weights: + model.load(weights) + return model + + def build_dataset(self, img_path, mode='val', batch=None): + """ + Build and return an RT-DETR dataset for training or validation. + + Args: + img_path (str): Path to the folder containing images. + mode (str): Dataset mode, either 'train' or 'val'. + batch (int, optional): Batch size for rectangle training. Defaults to None. + + Returns: + (RTDETRDataset): Dataset object for the specific mode. + """ + return RTDETRDataset(img_path=img_path, + imgsz=self.args.imgsz, + batch_size=batch, + augment=mode == 'train', + hyp=self.args, + rect=False, + cache=self.args.cache or None, + prefix=colorstr(f'{mode}: '), + data=self.data) + + def get_validator(self): + """ + Returns a DetectionValidator suitable for RT-DETR model validation. + + Returns: + (RTDETRValidator): Validator object for model validation. + """ + self.loss_names = 'giou_loss', 'cls_loss', 'l1_loss' + return RTDETRValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args)) + + def preprocess_batch(self, batch): + """ + Preprocess a batch of images. Scales and converts the images to float format. + + Args: + batch (dict): Dictionary containing a batch of images, bboxes, and labels. + + Returns: + (dict): Preprocessed batch. + """ + batch = super().preprocess_batch(batch) + bs = len(batch['img']) + batch_idx = batch['batch_idx'] + gt_bbox, gt_class = [], [] + for i in range(bs): + gt_bbox.append(batch['bboxes'][batch_idx == i].to(batch_idx.device)) + gt_class.append(batch['cls'][batch_idx == i].to(device=batch_idx.device, dtype=torch.long)) + return batch diff --git a/ultralytics/models/rtdetr/val.py b/ultralytics/models/rtdetr/val.py new file mode 100644 index 0000000..468026b --- /dev/null +++ b/ultralytics/models/rtdetr/val.py @@ -0,0 +1,154 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from pathlib import Path + +import torch + +from ultralytics.data import YOLODataset +from ultralytics.data.augment import Compose, Format, v8_transforms +from ultralytics.models.yolo.detect import DetectionValidator +from ultralytics.utils import colorstr, ops + +__all__ = 'RTDETRValidator', # tuple or list + + +class RTDETRDataset(YOLODataset): + """ + Real-Time DEtection and TRacking (RT-DETR) dataset class extending the base YOLODataset class. + + This specialized dataset class is designed for use with the RT-DETR object detection model and is optimized for + real-time detection and tracking tasks. + """ + + def __init__(self, *args, data=None, **kwargs): + """Initialize the RTDETRDataset class by inheriting from the YOLODataset class.""" + super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs) + + # NOTE: add stretch version load_image for RTDETR mosaic + def load_image(self, i, rect_mode=False): + """Loads 1 image from dataset index 'i', returns (im, resized hw).""" + return super().load_image(i=i, rect_mode=rect_mode) + + def build_transforms(self, hyp=None): + """Temporary, only for evaluation.""" + if self.augment: + hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0 + hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0 + transforms = v8_transforms(self, self.imgsz, hyp, stretch=True) + else: + # transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), auto=False, scaleFill=True)]) + transforms = Compose([]) + transforms.append( + Format(bbox_format='xywh', + normalize=True, + return_mask=self.use_segments, + return_keypoint=self.use_keypoints, + batch_idx=True, + mask_ratio=hyp.mask_ratio, + mask_overlap=hyp.overlap_mask)) + return transforms + + +class RTDETRValidator(DetectionValidator): + """ + RTDETRValidator extends the DetectionValidator class to provide validation capabilities specifically tailored for + the RT-DETR (Real-Time DETR) object detection model. + + The class allows building of an RTDETR-specific dataset for validation, applies Non-maximum suppression for + post-processing, and updates evaluation metrics accordingly. + + Example: + ```python + from ultralytics.models.rtdetr import RTDETRValidator + + args = dict(model='rtdetr-l.pt', data='coco8.yaml') + validator = RTDETRValidator(args=args) + validator() + ``` + + Note: + For further details on the attributes and methods, refer to the parent DetectionValidator class. + """ + + def build_dataset(self, img_path, mode='val', batch=None): + """ + Build an RTDETR Dataset. + + Args: + img_path (str): Path to the folder containing images. + mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode. + batch (int, optional): Size of batches, this is for `rect`. Defaults to None. + """ + return RTDETRDataset( + img_path=img_path, + imgsz=self.args.imgsz, + batch_size=batch, + augment=False, # no augmentation + hyp=self.args, + rect=False, # no rect + cache=self.args.cache or None, + prefix=colorstr(f'{mode}: '), + data=self.data) + + def postprocess(self, preds): + """Apply Non-maximum suppression to prediction outputs.""" + bs, _, nd = preds[0].shape + bboxes, scores = preds[0].split((4, nd - 4), dim=-1) + bboxes *= self.args.imgsz + outputs = [torch.zeros((0, 6), device=bboxes.device)] * bs + for i, bbox in enumerate(bboxes): # (300, 4) + bbox = ops.xywh2xyxy(bbox) + score, cls = scores[i].max(-1) # (300, ) + # Do not need threshold for evaluation as only got 300 boxes here + # idx = score > self.args.conf + pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1) # filter + # Sort by confidence to correctly get internal metrics + pred = pred[score.argsort(descending=True)] + outputs[i] = pred # [idx] + + return outputs + + def update_metrics(self, preds, batch): + """Metrics.""" + for si, pred in enumerate(preds): + idx = batch['batch_idx'] == si + cls = batch['cls'][idx] + bbox = batch['bboxes'][idx] + nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions + shape = batch['ori_shape'][si] + correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init + self.seen += 1 + + if npr == 0: + if nl: + self.stats.append((correct_bboxes, *torch.zeros((2, 0), device=self.device), cls.squeeze(-1))) + if self.args.plots: + self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1)) + continue + + # Predictions + if self.args.single_cls: + pred[:, 5] = 0 + predn = pred.clone() + predn[..., [0, 2]] *= shape[1] / self.args.imgsz # native-space pred + predn[..., [1, 3]] *= shape[0] / self.args.imgsz # native-space pred + + # Evaluate + if nl: + tbox = ops.xywh2xyxy(bbox) # target boxes + tbox[..., [0, 2]] *= shape[1] # native-space pred + tbox[..., [1, 3]] *= shape[0] # native-space pred + labelsn = torch.cat((cls, tbox), 1) # native-space labels + # NOTE: To get correct metrics, the inputs of `_process_batch` should always be float32 type. + correct_bboxes = self._process_batch(predn.float(), labelsn) + # TODO: maybe remove these `self.` arguments as they already are member variable + if self.args.plots: + self.confusion_matrix.process_batch(predn, labelsn) + self.stats.append((correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1))) # (conf, pcls, tcls) + + # Save + if self.args.save_json: + self.pred_to_json(predn, batch['im_file'][si]) + if self.args.save_txt: + file = self.save_dir / 'labels' / f'{Path(batch["im_file"][si]).stem}.txt' + self.save_one_txt(predn, self.args.save_conf, shape, file) diff --git a/ultralytics/models/sam/__init__.py b/ultralytics/models/sam/__init__.py new file mode 100644 index 0000000..abf2eef --- /dev/null +++ b/ultralytics/models/sam/__init__.py @@ -0,0 +1,6 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .model import SAM +from .predict import Predictor + +__all__ = 'SAM', 'Predictor' # tuple or list diff --git a/ultralytics/models/sam/amg.py b/ultralytics/models/sam/amg.py new file mode 100644 index 0000000..d7751d6 --- /dev/null +++ b/ultralytics/models/sam/amg.py @@ -0,0 +1,186 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import math +from itertools import product +from typing import Any, Generator, List, Tuple + +import numpy as np +import torch + + +def is_box_near_crop_edge(boxes: torch.Tensor, + crop_box: List[int], + orig_box: List[int], + atol: float = 20.0) -> torch.Tensor: + """Return a boolean tensor indicating if boxes are near the crop edge.""" + crop_box_torch = torch.as_tensor(crop_box, dtype=torch.float, device=boxes.device) + orig_box_torch = torch.as_tensor(orig_box, dtype=torch.float, device=boxes.device) + boxes = uncrop_boxes_xyxy(boxes, crop_box).float() + near_crop_edge = torch.isclose(boxes, crop_box_torch[None, :], atol=atol, rtol=0) + near_image_edge = torch.isclose(boxes, orig_box_torch[None, :], atol=atol, rtol=0) + near_crop_edge = torch.logical_and(near_crop_edge, ~near_image_edge) + return torch.any(near_crop_edge, dim=1) + + +def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]: + """Yield batches of data from the input arguments.""" + assert args and all(len(a) == len(args[0]) for a in args), 'Batched iteration must have same-size inputs.' + n_batches = len(args[0]) // batch_size + int(len(args[0]) % batch_size != 0) + for b in range(n_batches): + yield [arg[b * batch_size:(b + 1) * batch_size] for arg in args] + + +def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, threshold_offset: float) -> torch.Tensor: + """ + Computes the stability score for a batch of masks. + + The stability score is the IoU between the binary masks obtained by thresholding the predicted mask logits at high + and low values. + """ + # One mask is always contained inside the other. + # Save memory by preventing unnecessary cast to torch.int64 + intersections = ((masks > (mask_threshold + threshold_offset)).sum(-1, dtype=torch.int16).sum(-1, + dtype=torch.int32)) + unions = ((masks > (mask_threshold - threshold_offset)).sum(-1, dtype=torch.int16).sum(-1, dtype=torch.int32)) + return intersections / unions + + +def build_point_grid(n_per_side: int) -> np.ndarray: + """Generate a 2D grid of evenly spaced points in the range [0,1]x[0,1].""" + offset = 1 / (2 * n_per_side) + points_one_side = np.linspace(offset, 1 - offset, n_per_side) + points_x = np.tile(points_one_side[None, :], (n_per_side, 1)) + points_y = np.tile(points_one_side[:, None], (1, n_per_side)) + return np.stack([points_x, points_y], axis=-1).reshape(-1, 2) + + +def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer: int) -> List[np.ndarray]: + """Generate point grids for all crop layers.""" + return [build_point_grid(int(n_per_side / (scale_per_layer ** i))) for i in range(n_layers + 1)] + + +def generate_crop_boxes(im_size: Tuple[int, ...], n_layers: int, + overlap_ratio: float) -> Tuple[List[List[int]], List[int]]: + """ + Generates a list of crop boxes of different sizes. + + Each layer has (2**i)**2 boxes for the ith layer. + """ + crop_boxes, layer_idxs = [], [] + im_h, im_w = im_size + short_side = min(im_h, im_w) + + # Original image + crop_boxes.append([0, 0, im_w, im_h]) + layer_idxs.append(0) + + def crop_len(orig_len, n_crops, overlap): + """Crops bounding boxes to the size of the input image.""" + return int(math.ceil((overlap * (n_crops - 1) + orig_len) / n_crops)) + + for i_layer in range(n_layers): + n_crops_per_side = 2 ** (i_layer + 1) + overlap = int(overlap_ratio * short_side * (2 / n_crops_per_side)) + + crop_w = crop_len(im_w, n_crops_per_side, overlap) + crop_h = crop_len(im_h, n_crops_per_side, overlap) + + crop_box_x0 = [int((crop_w - overlap) * i) for i in range(n_crops_per_side)] + crop_box_y0 = [int((crop_h - overlap) * i) for i in range(n_crops_per_side)] + + # Crops in XYWH format + for x0, y0 in product(crop_box_x0, crop_box_y0): + box = [x0, y0, min(x0 + crop_w, im_w), min(y0 + crop_h, im_h)] + crop_boxes.append(box) + layer_idxs.append(i_layer + 1) + + return crop_boxes, layer_idxs + + +def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor: + """Uncrop bounding boxes by adding the crop box offset.""" + x0, y0, _, _ = crop_box + offset = torch.tensor([[x0, y0, x0, y0]], device=boxes.device) + # Check if boxes has a channel dimension + if len(boxes.shape) == 3: + offset = offset.unsqueeze(1) + return boxes + offset + + +def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor: + """Uncrop points by adding the crop box offset.""" + x0, y0, _, _ = crop_box + offset = torch.tensor([[x0, y0]], device=points.device) + # Check if points has a channel dimension + if len(points.shape) == 3: + offset = offset.unsqueeze(1) + return points + offset + + +def uncrop_masks(masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w: int) -> torch.Tensor: + """Uncrop masks by padding them to the original image size.""" + x0, y0, x1, y1 = crop_box + if x0 == 0 and y0 == 0 and x1 == orig_w and y1 == orig_h: + return masks + # Coordinate transform masks + pad_x, pad_y = orig_w - (x1 - x0), orig_h - (y1 - y0) + pad = (x0, pad_x - x0, y0, pad_y - y0) + return torch.nn.functional.pad(masks, pad, value=0) + + +def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tuple[np.ndarray, bool]: + """Remove small disconnected regions or holes in a mask, returning the mask and a modification indicator.""" + import cv2 # type: ignore + + assert mode in {'holes', 'islands'} + correct_holes = mode == 'holes' + working_mask = (correct_holes ^ mask).astype(np.uint8) + n_labels, regions, stats, _ = cv2.connectedComponentsWithStats(working_mask, 8) + sizes = stats[:, -1][1:] # Row 0 is background label + small_regions = [i + 1 for i, s in enumerate(sizes) if s < area_thresh] + if not small_regions: + return mask, False + fill_labels = [0] + small_regions + if not correct_holes: + # If every region is below threshold, keep largest + fill_labels = [i for i in range(n_labels) if i not in fill_labels] or [int(np.argmax(sizes)) + 1] + mask = np.isin(regions, fill_labels) + return mask, True + + +def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor: + """ + Calculates boxes in XYXY format around masks. + + Return [0,0,0,0] for an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4. + """ + # torch.max below raises an error on empty inputs, just skip in this case + if torch.numel(masks) == 0: + return torch.zeros(*masks.shape[:-2], 4, device=masks.device) + + # Normalize shape to CxHxW + shape = masks.shape + h, w = shape[-2:] + masks = masks.flatten(0, -3) if len(shape) > 2 else masks.unsqueeze(0) + # Get top and bottom edges + in_height, _ = torch.max(masks, dim=-1) + in_height_coords = in_height * torch.arange(h, device=in_height.device)[None, :] + bottom_edges, _ = torch.max(in_height_coords, dim=-1) + in_height_coords = in_height_coords + h * (~in_height) + top_edges, _ = torch.min(in_height_coords, dim=-1) + + # Get left and right edges + in_width, _ = torch.max(masks, dim=-2) + in_width_coords = in_width * torch.arange(w, device=in_width.device)[None, :] + right_edges, _ = torch.max(in_width_coords, dim=-1) + in_width_coords = in_width_coords + w * (~in_width) + left_edges, _ = torch.min(in_width_coords, dim=-1) + + # If the mask is empty the right edge will be to the left of the left edge. + # Replace these boxes with [0, 0, 0, 0] + empty_filter = (right_edges < left_edges) | (bottom_edges < top_edges) + out = torch.stack([left_edges, top_edges, right_edges, bottom_edges], dim=-1) + out = out * (~empty_filter).unsqueeze(-1) + + # Return to original shape + return out.reshape(*shape[:-2], 4) if len(shape) > 2 else out[0] diff --git a/ultralytics/models/sam/build.py b/ultralytics/models/sam/build.py new file mode 100644 index 0000000..c27f2d0 --- /dev/null +++ b/ultralytics/models/sam/build.py @@ -0,0 +1,159 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from functools import partial + +import torch + +from ultralytics.utils.downloads import attempt_download_asset + +from .modules.decoders import MaskDecoder +from .modules.encoders import ImageEncoderViT, PromptEncoder +from .modules.sam import Sam +from .modules.tiny_encoder import TinyViT +from .modules.transformer import TwoWayTransformer + + +def build_sam_vit_h(checkpoint=None): + """Build and return a Segment Anything Model (SAM) h-size model.""" + return _build_sam( + encoder_embed_dim=1280, + encoder_depth=32, + encoder_num_heads=16, + encoder_global_attn_indexes=[7, 15, 23, 31], + checkpoint=checkpoint, + ) + + +def build_sam_vit_l(checkpoint=None): + """Build and return a Segment Anything Model (SAM) l-size model.""" + return _build_sam( + encoder_embed_dim=1024, + encoder_depth=24, + encoder_num_heads=16, + encoder_global_attn_indexes=[5, 11, 17, 23], + checkpoint=checkpoint, + ) + + +def build_sam_vit_b(checkpoint=None): + """Build and return a Segment Anything Model (SAM) b-size model.""" + return _build_sam( + encoder_embed_dim=768, + encoder_depth=12, + encoder_num_heads=12, + encoder_global_attn_indexes=[2, 5, 8, 11], + checkpoint=checkpoint, + ) + + +def build_mobile_sam(checkpoint=None): + """Build and return Mobile Segment Anything Model (Mobile-SAM).""" + return _build_sam( + encoder_embed_dim=[64, 128, 160, 320], + encoder_depth=[2, 2, 6, 2], + encoder_num_heads=[2, 4, 5, 10], + encoder_global_attn_indexes=None, + mobile_sam=True, + checkpoint=checkpoint, + ) + + +def _build_sam(encoder_embed_dim, + encoder_depth, + encoder_num_heads, + encoder_global_attn_indexes, + checkpoint=None, + mobile_sam=False): + """Builds the selected SAM model architecture.""" + prompt_embed_dim = 256 + image_size = 1024 + vit_patch_size = 16 + image_embedding_size = image_size // vit_patch_size + image_encoder = (TinyViT( + img_size=1024, + in_chans=3, + num_classes=1000, + embed_dims=encoder_embed_dim, + depths=encoder_depth, + num_heads=encoder_num_heads, + window_sizes=[7, 7, 14, 7], + mlp_ratio=4.0, + drop_rate=0.0, + drop_path_rate=0.0, + use_checkpoint=False, + mbconv_expand_ratio=4.0, + local_conv_size=3, + layer_lr_decay=0.8, + ) if mobile_sam else ImageEncoderViT( + depth=encoder_depth, + embed_dim=encoder_embed_dim, + img_size=image_size, + mlp_ratio=4, + norm_layer=partial(torch.nn.LayerNorm, eps=1e-6), + num_heads=encoder_num_heads, + patch_size=vit_patch_size, + qkv_bias=True, + use_rel_pos=True, + global_attn_indexes=encoder_global_attn_indexes, + window_size=14, + out_chans=prompt_embed_dim, + )) + sam = Sam( + image_encoder=image_encoder, + prompt_encoder=PromptEncoder( + embed_dim=prompt_embed_dim, + image_embedding_size=(image_embedding_size, image_embedding_size), + input_image_size=(image_size, image_size), + mask_in_chans=16, + ), + mask_decoder=MaskDecoder( + num_multimask_outputs=3, + transformer=TwoWayTransformer( + depth=2, + embedding_dim=prompt_embed_dim, + mlp_dim=2048, + num_heads=8, + ), + transformer_dim=prompt_embed_dim, + iou_head_depth=3, + iou_head_hidden_dim=256, + ), + pixel_mean=[123.675, 116.28, 103.53], + pixel_std=[58.395, 57.12, 57.375], + ) + if checkpoint is not None: + checkpoint = attempt_download_asset(checkpoint) + with open(checkpoint, 'rb') as f: + state_dict = torch.load(f) + sam.load_state_dict(state_dict) + sam.eval() + # sam.load_state_dict(torch.load(checkpoint), strict=True) + # sam.eval() + return sam + + +sam_model_map = { + 'sam_h.pt': build_sam_vit_h, + 'sam_l.pt': build_sam_vit_l, + 'sam_b.pt': build_sam_vit_b, + 'mobile_sam.pt': build_mobile_sam, } + + +def build_sam(ckpt='sam_b.pt'): + """Build a SAM model specified by ckpt.""" + model_builder = None + ckpt = str(ckpt) # to allow Path ckpt types + for k in sam_model_map.keys(): + if ckpt.endswith(k): + model_builder = sam_model_map.get(k) + + if not model_builder: + raise FileNotFoundError(f'{ckpt} is not a supported SAM model. Available models are: \n {sam_model_map.keys()}') + + return model_builder(ckpt) diff --git a/ultralytics/models/sam/model.py b/ultralytics/models/sam/model.py new file mode 100644 index 0000000..68acd22 --- /dev/null +++ b/ultralytics/models/sam/model.py @@ -0,0 +1,115 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +SAM model interface. + +This module provides an interface to the Segment Anything Model (SAM) from Ultralytics, designed for real-time image +segmentation tasks. The SAM model allows for promptable segmentation with unparalleled versatility in image analysis, +and has been trained on the SA-1B dataset. It features zero-shot performance capabilities, enabling it to adapt to new +image distributions and tasks without prior knowledge. + +Key Features: + - Promptable segmentation + - Real-time performance + - Zero-shot transfer capabilities + - Trained on SA-1B dataset +""" + +from pathlib import Path + +from ultralytics.engine.model import Model +from ultralytics.utils.torch_utils import model_info + +from .build import build_sam +from .predict import Predictor + + +class SAM(Model): + """ + SAM (Segment Anything Model) interface class. + + SAM is designed for promptable real-time image segmentation. It can be used with a variety of prompts such as + bounding boxes, points, or labels. The model has capabilities for zero-shot performance and is trained on the SA-1B + dataset. + """ + + def __init__(self, model='sam_b.pt') -> None: + """ + Initializes the SAM model with a pre-trained model file. + + Args: + model (str): Path to the pre-trained SAM model file. File should have a .pt or .pth extension. + + Raises: + NotImplementedError: If the model file extension is not .pt or .pth. + """ + if model and Path(model).suffix not in ('.pt', '.pth'): + raise NotImplementedError('SAM prediction requires pre-trained *.pt or *.pth model.') + super().__init__(model=model, task='segment') + + def _load(self, weights: str, task=None): + """ + Loads the specified weights into the SAM model. + + Args: + weights (str): Path to the weights file. + task (str, optional): Task name. Defaults to None. + """ + self.model = build_sam(weights) + + def predict(self, source, stream=False, bboxes=None, points=None, labels=None, **kwargs): + """ + Performs segmentation prediction on the given image or video source. + + Args: + source (str): Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object. + stream (bool, optional): If True, enables real-time streaming. Defaults to False. + bboxes (list, optional): List of bounding box coordinates for prompted segmentation. Defaults to None. + points (list, optional): List of points for prompted segmentation. Defaults to None. + labels (list, optional): List of labels for prompted segmentation. Defaults to None. + + Returns: + (list): The model predictions. + """ + overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024) + kwargs.update(overrides) + prompts = dict(bboxes=bboxes, points=points, labels=labels) + return super().predict(source, stream, prompts=prompts, **kwargs) + + def __call__(self, source=None, stream=False, bboxes=None, points=None, labels=None, **kwargs): + """ + Alias for the 'predict' method. + + Args: + source (str): Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object. + stream (bool, optional): If True, enables real-time streaming. Defaults to False. + bboxes (list, optional): List of bounding box coordinates for prompted segmentation. Defaults to None. + points (list, optional): List of points for prompted segmentation. Defaults to None. + labels (list, optional): List of labels for prompted segmentation. Defaults to None. + + Returns: + (list): The model predictions. + """ + return self.predict(source, stream, bboxes, points, labels, **kwargs) + + def info(self, detailed=False, verbose=True): + """ + Logs information about the SAM model. + + Args: + detailed (bool, optional): If True, displays detailed information about the model. Defaults to False. + verbose (bool, optional): If True, displays information on the console. Defaults to True. + + Returns: + (tuple): A tuple containing the model's information. + """ + return model_info(self.model, detailed=detailed, verbose=verbose) + + @property + def task_map(self): + """ + Provides a mapping from the 'segment' task to its corresponding 'Predictor'. + + Returns: + (dict): A dictionary mapping the 'segment' task to its corresponding 'Predictor'. + """ + return {'segment': {'predictor': Predictor}} diff --git a/ultralytics/models/sam/modules/__init__.py b/ultralytics/models/sam/modules/__init__.py new file mode 100644 index 0000000..9e68dc1 --- /dev/null +++ b/ultralytics/models/sam/modules/__init__.py @@ -0,0 +1 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license diff --git a/ultralytics/models/sam/modules/decoders.py b/ultralytics/models/sam/modules/decoders.py new file mode 100644 index 0000000..4ad1d9f --- /dev/null +++ b/ultralytics/models/sam/modules/decoders.py @@ -0,0 +1,188 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from typing import List, Tuple, Type + +import torch +from torch import nn +from torch.nn import functional as F + +from ultralytics.nn.modules import LayerNorm2d + + +class MaskDecoder(nn.Module): + """ + Decoder module for generating masks and their associated quality scores, using a transformer architecture to predict + masks given image and prompt embeddings. + + Attributes: + transformer_dim (int): Channel dimension for the transformer module. + transformer (nn.Module): The transformer module used for mask prediction. + num_multimask_outputs (int): Number of masks to predict for disambiguating masks. + iou_token (nn.Embedding): Embedding for the IoU token. + num_mask_tokens (int): Number of mask tokens. + mask_tokens (nn.Embedding): Embedding for the mask tokens. + output_upscaling (nn.Sequential): Neural network sequence for upscaling the output. + output_hypernetworks_mlps (nn.ModuleList): Hypernetwork MLPs for generating masks. + iou_prediction_head (nn.Module): MLP for predicting mask quality. + """ + + def __init__( + self, + *, + transformer_dim: int, + transformer: nn.Module, + num_multimask_outputs: int = 3, + activation: Type[nn.Module] = nn.GELU, + iou_head_depth: int = 3, + iou_head_hidden_dim: int = 256, + ) -> None: + """ + Predicts masks given an image and prompt embeddings, using a transformer architecture. + + Args: + transformer_dim (int): the channel dimension of the transformer module + transformer (nn.Module): the transformer used to predict masks + num_multimask_outputs (int): the number of masks to predict when disambiguating masks + activation (nn.Module): the type of activation to use when upscaling masks + iou_head_depth (int): the depth of the MLP used to predict mask quality + iou_head_hidden_dim (int): the hidden dimension of the MLP used to predict mask quality + """ + super().__init__() + self.transformer_dim = transformer_dim + self.transformer = transformer + + self.num_multimask_outputs = num_multimask_outputs + + self.iou_token = nn.Embedding(1, transformer_dim) + self.num_mask_tokens = num_multimask_outputs + 1 + self.mask_tokens = nn.Embedding(self.num_mask_tokens, transformer_dim) + + self.output_upscaling = nn.Sequential( + nn.ConvTranspose2d(transformer_dim, transformer_dim // 4, kernel_size=2, stride=2), + LayerNorm2d(transformer_dim // 4), + activation(), + nn.ConvTranspose2d(transformer_dim // 4, transformer_dim // 8, kernel_size=2, stride=2), + activation(), + ) + self.output_hypernetworks_mlps = nn.ModuleList([ + MLP(transformer_dim, transformer_dim, transformer_dim // 8, 3) for _ in range(self.num_mask_tokens)]) + + self.iou_prediction_head = MLP(transformer_dim, iou_head_hidden_dim, self.num_mask_tokens, iou_head_depth) + + def forward( + self, + image_embeddings: torch.Tensor, + image_pe: torch.Tensor, + sparse_prompt_embeddings: torch.Tensor, + dense_prompt_embeddings: torch.Tensor, + multimask_output: bool, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Predict masks given image and prompt embeddings. + + Args: + image_embeddings (torch.Tensor): the embeddings from the image encoder + image_pe (torch.Tensor): positional encoding with the shape of image_embeddings + sparse_prompt_embeddings (torch.Tensor): the embeddings of the points and boxes + dense_prompt_embeddings (torch.Tensor): the embeddings of the mask inputs + multimask_output (bool): Whether to return multiple masks or a single mask. + + Returns: + torch.Tensor: batched predicted masks + torch.Tensor: batched predictions of mask quality + """ + masks, iou_pred = self.predict_masks( + image_embeddings=image_embeddings, + image_pe=image_pe, + sparse_prompt_embeddings=sparse_prompt_embeddings, + dense_prompt_embeddings=dense_prompt_embeddings, + ) + + # Select the correct mask or masks for output + mask_slice = slice(1, None) if multimask_output else slice(0, 1) + masks = masks[:, mask_slice, :, :] + iou_pred = iou_pred[:, mask_slice] + + # Prepare output + return masks, iou_pred + + def predict_masks( + self, + image_embeddings: torch.Tensor, + image_pe: torch.Tensor, + sparse_prompt_embeddings: torch.Tensor, + dense_prompt_embeddings: torch.Tensor, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Predicts masks. + + See 'forward' for more details. + """ + # Concatenate output tokens + output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0) + output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1) + tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1) + + # Expand per-image data in batch direction to be per-mask + src = torch.repeat_interleave(image_embeddings, tokens.shape[0], dim=0) + src = src + dense_prompt_embeddings + pos_src = torch.repeat_interleave(image_pe, tokens.shape[0], dim=0) + b, c, h, w = src.shape + + # Run the transformer + hs, src = self.transformer(src, pos_src, tokens) + iou_token_out = hs[:, 0, :] + mask_tokens_out = hs[:, 1:(1 + self.num_mask_tokens), :] + + # Upscale mask embeddings and predict masks using the mask tokens + src = src.transpose(1, 2).view(b, c, h, w) + upscaled_embedding = self.output_upscaling(src) + hyper_in_list: List[torch.Tensor] = [ + self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :]) for i in range(self.num_mask_tokens)] + hyper_in = torch.stack(hyper_in_list, dim=1) + b, c, h, w = upscaled_embedding.shape + masks = (hyper_in @ upscaled_embedding.view(b, c, h * w)).view(b, -1, h, w) + + # Generate mask quality predictions + iou_pred = self.iou_prediction_head(iou_token_out) + + return masks, iou_pred + + +class MLP(nn.Module): + """ + MLP (Multi-Layer Perceptron) model lightly adapted from + https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py + """ + + def __init__( + self, + input_dim: int, + hidden_dim: int, + output_dim: int, + num_layers: int, + sigmoid_output: bool = False, + ) -> None: + """ + Initializes the MLP (Multi-Layer Perceptron) model. + + Args: + input_dim (int): The dimensionality of the input features. + hidden_dim (int): The dimensionality of the hidden layers. + output_dim (int): The dimensionality of the output layer. + num_layers (int): The number of hidden layers. + sigmoid_output (bool, optional): Apply a sigmoid activation to the output layer. Defaults to False. + """ + super().__init__() + self.num_layers = num_layers + h = [hidden_dim] * (num_layers - 1) + self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) + self.sigmoid_output = sigmoid_output + + def forward(self, x): + """Executes feedforward within the neural network module and applies activation.""" + for i, layer in enumerate(self.layers): + x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) + if self.sigmoid_output: + x = torch.sigmoid(x) + return x diff --git a/ultralytics/models/sam/modules/encoders.py b/ultralytics/models/sam/modules/encoders.py new file mode 100644 index 0000000..f777138 --- /dev/null +++ b/ultralytics/models/sam/modules/encoders.py @@ -0,0 +1,606 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from typing import Any, Optional, Tuple, Type + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ultralytics.nn.modules import LayerNorm2d, MLPBlock + + +class ImageEncoderViT(nn.Module): + """ + An image encoder using Vision Transformer (ViT) architecture for encoding an image into a compact latent space. The + encoder takes an image, splits it into patches, and processes these patches through a series of transformer blocks. + The encoded patches are then processed through a neck to generate the final encoded representation. + + This class and its supporting functions below lightly adapted from the ViTDet backbone available at + https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/vit.py. + + Attributes: + img_size (int): Dimension of input images, assumed to be square. + patch_embed (PatchEmbed): Module for patch embedding. + pos_embed (nn.Parameter, optional): Absolute positional embedding for patches. + blocks (nn.ModuleList): List of transformer blocks for processing patch embeddings. + neck (nn.Sequential): Neck module to further process the output. + """ + + def __init__( + self, + img_size: int = 1024, + patch_size: int = 16, + in_chans: int = 3, + embed_dim: int = 768, + depth: int = 12, + num_heads: int = 12, + mlp_ratio: float = 4.0, + out_chans: int = 256, + qkv_bias: bool = True, + norm_layer: Type[nn.Module] = nn.LayerNorm, + act_layer: Type[nn.Module] = nn.GELU, + use_abs_pos: bool = True, + use_rel_pos: bool = False, + rel_pos_zero_init: bool = True, + window_size: int = 0, + global_attn_indexes: Tuple[int, ...] = (), + ) -> None: + """ + Args: + img_size (int): Input image size. + patch_size (int): Patch size. + in_chans (int): Number of input image channels. + embed_dim (int): Patch embedding dimension. + depth (int): Depth of ViT. + num_heads (int): Number of attention heads in each ViT block. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool): If True, add a learnable bias to query, key, value. + norm_layer (nn.Module): Normalization layer. + act_layer (nn.Module): Activation layer. + use_abs_pos (bool): If True, use absolute positional embeddings. + use_rel_pos (bool): If True, add relative positional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + window_size (int): Window size for window attention blocks. + global_attn_indexes (list): Indexes for blocks using global attention. + """ + super().__init__() + self.img_size = img_size + + self.patch_embed = PatchEmbed( + kernel_size=(patch_size, patch_size), + stride=(patch_size, patch_size), + in_chans=in_chans, + embed_dim=embed_dim, + ) + + self.pos_embed: Optional[nn.Parameter] = None + if use_abs_pos: + # Initialize absolute positional embedding with pretrain image size. + self.pos_embed = nn.Parameter(torch.zeros(1, img_size // patch_size, img_size // patch_size, embed_dim)) + + self.blocks = nn.ModuleList() + for i in range(depth): + block = Block( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + norm_layer=norm_layer, + act_layer=act_layer, + use_rel_pos=use_rel_pos, + rel_pos_zero_init=rel_pos_zero_init, + window_size=window_size if i not in global_attn_indexes else 0, + input_size=(img_size // patch_size, img_size // patch_size), + ) + self.blocks.append(block) + + self.neck = nn.Sequential( + nn.Conv2d( + embed_dim, + out_chans, + kernel_size=1, + bias=False, + ), + LayerNorm2d(out_chans), + nn.Conv2d( + out_chans, + out_chans, + kernel_size=3, + padding=1, + bias=False, + ), + LayerNorm2d(out_chans), + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Processes input through patch embedding, applies positional embedding if present, and passes through blocks + and neck. + """ + x = self.patch_embed(x) + if self.pos_embed is not None: + x = x + self.pos_embed + for blk in self.blocks: + x = blk(x) + return self.neck(x.permute(0, 3, 1, 2)) + + +class PromptEncoder(nn.Module): + """ + Encodes different types of prompts, including points, boxes, and masks, for input to SAM's mask decoder. The encoder + produces both sparse and dense embeddings for the input prompts. + + Attributes: + embed_dim (int): Dimension of the embeddings. + input_image_size (Tuple[int, int]): Size of the input image as (H, W). + image_embedding_size (Tuple[int, int]): Spatial size of the image embedding as (H, W). + pe_layer (PositionEmbeddingRandom): Module for random position embedding. + num_point_embeddings (int): Number of point embeddings for different types of points. + point_embeddings (nn.ModuleList): List of point embeddings. + not_a_point_embed (nn.Embedding): Embedding for points that are not a part of any label. + mask_input_size (Tuple[int, int]): Size of the input mask. + mask_downscaling (nn.Sequential): Neural network for downscaling the mask. + no_mask_embed (nn.Embedding): Embedding for cases where no mask is provided. + """ + + def __init__( + self, + embed_dim: int, + image_embedding_size: Tuple[int, int], + input_image_size: Tuple[int, int], + mask_in_chans: int, + activation: Type[nn.Module] = nn.GELU, + ) -> None: + """ + Encodes prompts for input to SAM's mask decoder. + + Args: + embed_dim (int): The prompts' embedding dimension + image_embedding_size (tuple(int, int)): The spatial size of the + image embedding, as (H, W). + input_image_size (int): The padded size of the image as input + to the image encoder, as (H, W). + mask_in_chans (int): The number of hidden channels used for + encoding input masks. + activation (nn.Module): The activation to use when encoding + input masks. + """ + super().__init__() + self.embed_dim = embed_dim + self.input_image_size = input_image_size + self.image_embedding_size = image_embedding_size + self.pe_layer = PositionEmbeddingRandom(embed_dim // 2) + + self.num_point_embeddings: int = 4 # pos/neg point + 2 box corners + point_embeddings = [nn.Embedding(1, embed_dim) for _ in range(self.num_point_embeddings)] + self.point_embeddings = nn.ModuleList(point_embeddings) + self.not_a_point_embed = nn.Embedding(1, embed_dim) + + self.mask_input_size = (4 * image_embedding_size[0], 4 * image_embedding_size[1]) + self.mask_downscaling = nn.Sequential( + nn.Conv2d(1, mask_in_chans // 4, kernel_size=2, stride=2), + LayerNorm2d(mask_in_chans // 4), + activation(), + nn.Conv2d(mask_in_chans // 4, mask_in_chans, kernel_size=2, stride=2), + LayerNorm2d(mask_in_chans), + activation(), + nn.Conv2d(mask_in_chans, embed_dim, kernel_size=1), + ) + self.no_mask_embed = nn.Embedding(1, embed_dim) + + def get_dense_pe(self) -> torch.Tensor: + """ + Returns the positional encoding used to encode point prompts, applied to a dense set of points the shape of the + image encoding. + + Returns: + torch.Tensor: Positional encoding with shape 1x(embed_dim)x(embedding_h)x(embedding_w) + """ + return self.pe_layer(self.image_embedding_size).unsqueeze(0) + + def _embed_points( + self, + points: torch.Tensor, + labels: torch.Tensor, + pad: bool, + ) -> torch.Tensor: + """Embeds point prompts.""" + points = points + 0.5 # Shift to center of pixel + if pad: + padding_point = torch.zeros((points.shape[0], 1, 2), device=points.device) + padding_label = -torch.ones((labels.shape[0], 1), device=labels.device) + points = torch.cat([points, padding_point], dim=1) + labels = torch.cat([labels, padding_label], dim=1) + point_embedding = self.pe_layer.forward_with_coords(points, self.input_image_size) + point_embedding[labels == -1] = 0.0 + point_embedding[labels == -1] += self.not_a_point_embed.weight + point_embedding[labels == 0] += self.point_embeddings[0].weight + point_embedding[labels == 1] += self.point_embeddings[1].weight + return point_embedding + + def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor: + """Embeds box prompts.""" + boxes = boxes + 0.5 # Shift to center of pixel + coords = boxes.reshape(-1, 2, 2) + corner_embedding = self.pe_layer.forward_with_coords(coords, self.input_image_size) + corner_embedding[:, 0, :] += self.point_embeddings[2].weight + corner_embedding[:, 1, :] += self.point_embeddings[3].weight + return corner_embedding + + def _embed_masks(self, masks: torch.Tensor) -> torch.Tensor: + """Embeds mask inputs.""" + return self.mask_downscaling(masks) + + def _get_batch_size( + self, + points: Optional[Tuple[torch.Tensor, torch.Tensor]], + boxes: Optional[torch.Tensor], + masks: Optional[torch.Tensor], + ) -> int: + """Gets the batch size of the output given the batch size of the input prompts.""" + if points is not None: + return points[0].shape[0] + elif boxes is not None: + return boxes.shape[0] + elif masks is not None: + return masks.shape[0] + else: + return 1 + + def _get_device(self) -> torch.device: + """Returns the device of the first point embedding's weight tensor.""" + return self.point_embeddings[0].weight.device + + def forward( + self, + points: Optional[Tuple[torch.Tensor, torch.Tensor]], + boxes: Optional[torch.Tensor], + masks: Optional[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Embeds different types of prompts, returning both sparse and dense embeddings. + + Args: + points (tuple(torch.Tensor, torch.Tensor), None): point coordinates and labels to embed. + boxes (torch.Tensor, None): boxes to embed + masks (torch.Tensor, None): masks to embed + + Returns: + torch.Tensor: sparse embeddings for the points and boxes, with shape BxNx(embed_dim), where N is determined + by the number of input points and boxes. + torch.Tensor: dense embeddings for the masks, in the shape Bx(embed_dim)x(embed_H)x(embed_W) + """ + bs = self._get_batch_size(points, boxes, masks) + sparse_embeddings = torch.empty((bs, 0, self.embed_dim), device=self._get_device()) + if points is not None: + coords, labels = points + point_embeddings = self._embed_points(coords, labels, pad=(boxes is None)) + sparse_embeddings = torch.cat([sparse_embeddings, point_embeddings], dim=1) + if boxes is not None: + box_embeddings = self._embed_boxes(boxes) + sparse_embeddings = torch.cat([sparse_embeddings, box_embeddings], dim=1) + + if masks is not None: + dense_embeddings = self._embed_masks(masks) + else: + dense_embeddings = self.no_mask_embed.weight.reshape(1, -1, 1, + 1).expand(bs, -1, self.image_embedding_size[0], + self.image_embedding_size[1]) + + return sparse_embeddings, dense_embeddings + + +class PositionEmbeddingRandom(nn.Module): + """Positional encoding using random spatial frequencies.""" + + def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None: + """Initializes a position embedding using random spatial frequencies.""" + super().__init__() + if scale is None or scale <= 0.0: + scale = 1.0 + self.register_buffer('positional_encoding_gaussian_matrix', scale * torch.randn((2, num_pos_feats))) + + # Set non-deterministic for forward() error 'cumsum_cuda_kernel does not have a deterministic implementation' + torch.use_deterministic_algorithms(False) + torch.backends.cudnn.deterministic = False + + def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor: + """Positionally encode points that are normalized to [0,1].""" + # Assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape + coords = 2 * coords - 1 + coords = coords @ self.positional_encoding_gaussian_matrix + coords = 2 * np.pi * coords + # Outputs d_1 x ... x d_n x C shape + return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1) + + def forward(self, size: Tuple[int, int]) -> torch.Tensor: + """Generate positional encoding for a grid of the specified size.""" + h, w = size + device: Any = self.positional_encoding_gaussian_matrix.device + grid = torch.ones((h, w), device=device, dtype=torch.float32) + y_embed = grid.cumsum(dim=0) - 0.5 + x_embed = grid.cumsum(dim=1) - 0.5 + y_embed = y_embed / h + x_embed = x_embed / w + + pe = self._pe_encoding(torch.stack([x_embed, y_embed], dim=-1)) + return pe.permute(2, 0, 1) # C x H x W + + def forward_with_coords(self, coords_input: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor: + """Positionally encode points that are not normalized to [0,1].""" + coords = coords_input.clone() + coords[:, :, 0] = coords[:, :, 0] / image_size[1] + coords[:, :, 1] = coords[:, :, 1] / image_size[0] + return self._pe_encoding(coords.to(torch.float)) # B x N x C + + +class Block(nn.Module): + """Transformer blocks with support of window attention and residual propagation blocks.""" + + def __init__( + self, + dim: int, + num_heads: int, + mlp_ratio: float = 4.0, + qkv_bias: bool = True, + norm_layer: Type[nn.Module] = nn.LayerNorm, + act_layer: Type[nn.Module] = nn.GELU, + use_rel_pos: bool = False, + rel_pos_zero_init: bool = True, + window_size: int = 0, + input_size: Optional[Tuple[int, int]] = None, + ) -> None: + """ + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads in each ViT block. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool): If True, add a learnable bias to query, key, value. + norm_layer (nn.Module): Normalization layer. + act_layer (nn.Module): Activation layer. + use_rel_pos (bool): If True, add relative positional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + window_size (int): Window size for window attention blocks. If it equals 0, then + use global attention. + input_size (tuple(int, int), None): Input resolution for calculating the relative + positional parameter size. + """ + super().__init__() + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + use_rel_pos=use_rel_pos, + rel_pos_zero_init=rel_pos_zero_init, + input_size=input_size if window_size == 0 else (window_size, window_size), + ) + + self.norm2 = norm_layer(dim) + self.mlp = MLPBlock(embedding_dim=dim, mlp_dim=int(dim * mlp_ratio), act=act_layer) + + self.window_size = window_size + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Executes a forward pass through the transformer block with window attention and non-overlapping windows.""" + shortcut = x + x = self.norm1(x) + # Window partition + if self.window_size > 0: + H, W = x.shape[1], x.shape[2] + x, pad_hw = window_partition(x, self.window_size) + + x = self.attn(x) + # Reverse window partition + if self.window_size > 0: + x = window_unpartition(x, self.window_size, pad_hw, (H, W)) + + x = shortcut + x + return x + self.mlp(self.norm2(x)) + + +class Attention(nn.Module): + """Multi-head Attention block with relative position embeddings.""" + + def __init__( + self, + dim: int, + num_heads: int = 8, + qkv_bias: bool = True, + use_rel_pos: bool = False, + rel_pos_zero_init: bool = True, + input_size: Optional[Tuple[int, int]] = None, + ) -> None: + """ + Initialize Attention module. + + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads. + qkv_bias (bool): If True, add a learnable bias to query, key, value. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + input_size (tuple(int, int), None): Input resolution for calculating the relative + positional parameter size. + """ + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = head_dim ** -0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.proj = nn.Linear(dim, dim) + + self.use_rel_pos = use_rel_pos + if self.use_rel_pos: + assert (input_size is not None), 'Input size must be provided if using relative positional encoding.' + # Initialize relative positional embeddings + self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim)) + self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Applies the forward operation including attention, normalization, MLP, and indexing within window limits.""" + B, H, W, _ = x.shape + # qkv with shape (3, B, nHead, H * W, C) + qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + # q, k, v with shape (B * nHead, H * W, C) + q, k, v = qkv.reshape(3, B * self.num_heads, H * W, -1).unbind(0) + + attn = (q * self.scale) @ k.transpose(-2, -1) + + if self.use_rel_pos: + attn = add_decomposed_rel_pos(attn, q, self.rel_pos_h, self.rel_pos_w, (H, W), (H, W)) + + attn = attn.softmax(dim=-1) + x = (attn @ v).view(B, self.num_heads, H, W, -1).permute(0, 2, 3, 1, 4).reshape(B, H, W, -1) + return self.proj(x) + + +def window_partition(x: torch.Tensor, window_size: int) -> Tuple[torch.Tensor, Tuple[int, int]]: + """ + Partition into non-overlapping windows with padding if needed. + Args: + x (tensor): input tokens with [B, H, W, C]. + window_size (int): window size. + + Returns: + windows: windows after partition with [B * num_windows, window_size, window_size, C]. + (Hp, Wp): padded height and width before partition + """ + B, H, W, C = x.shape + + pad_h = (window_size - H % window_size) % window_size + pad_w = (window_size - W % window_size) % window_size + if pad_h > 0 or pad_w > 0: + x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h)) + Hp, Wp = H + pad_h, W + pad_w + + x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows, (Hp, Wp) + + +def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: Tuple[int, int], + hw: Tuple[int, int]) -> torch.Tensor: + """ + Window unpartition into original sequences and removing padding. + + Args: + windows (tensor): input tokens with [B * num_windows, window_size, window_size, C]. + window_size (int): window size. + pad_hw (Tuple): padded height and width (Hp, Wp). + hw (Tuple): original height and width (H, W) before padding. + + Returns: + x: unpartitioned sequences with [B, H, W, C]. + """ + Hp, Wp = pad_hw + H, W = hw + B = windows.shape[0] // (Hp * Wp // window_size // window_size) + x = windows.view(B, Hp // window_size, Wp // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1) + + if Hp > H or Wp > W: + x = x[:, :H, :W, :].contiguous() + return x + + +def get_rel_pos(q_size: int, k_size: int, rel_pos: torch.Tensor) -> torch.Tensor: + """ + Get relative positional embeddings according to the relative positions of query and key sizes. + + Args: + q_size (int): size of query q. + k_size (int): size of key k. + rel_pos (Tensor): relative position embeddings (L, C). + + Returns: + Extracted positional embeddings according to relative positions. + """ + max_rel_dist = int(2 * max(q_size, k_size) - 1) + # Interpolate rel pos if needed. + if rel_pos.shape[0] != max_rel_dist: + # Interpolate rel pos. + rel_pos_resized = F.interpolate( + rel_pos.reshape(1, rel_pos.shape[0], -1).permute(0, 2, 1), + size=max_rel_dist, + mode='linear', + ) + rel_pos_resized = rel_pos_resized.reshape(-1, max_rel_dist).permute(1, 0) + else: + rel_pos_resized = rel_pos + + # Scale the coords with short length if shapes for q and k are different. + q_coords = torch.arange(q_size)[:, None] * max(k_size / q_size, 1.0) + k_coords = torch.arange(k_size)[None, :] * max(q_size / k_size, 1.0) + relative_coords = (q_coords - k_coords) + (k_size - 1) * max(q_size / k_size, 1.0) + + return rel_pos_resized[relative_coords.long()] + + +def add_decomposed_rel_pos( + attn: torch.Tensor, + q: torch.Tensor, + rel_pos_h: torch.Tensor, + rel_pos_w: torch.Tensor, + q_size: Tuple[int, int], + k_size: Tuple[int, int], +) -> torch.Tensor: + """ + Calculate decomposed Relative Positional Embeddings from mvitv2 paper at + https://github.com/facebookresearch/mvit/blob/main/mvit/models/attention.py. + + Args: + attn (Tensor): attention map. + q (Tensor): query q in the attention layer with shape (B, q_h * q_w, C). + rel_pos_h (Tensor): relative position embeddings (Lh, C) for height axis. + rel_pos_w (Tensor): relative position embeddings (Lw, C) for width axis. + q_size (Tuple): spatial sequence size of query q with (q_h, q_w). + k_size (Tuple): spatial sequence size of key k with (k_h, k_w). + + Returns: + attn (Tensor): attention map with added relative positional embeddings. + """ + q_h, q_w = q_size + k_h, k_w = k_size + Rh = get_rel_pos(q_h, k_h, rel_pos_h) + Rw = get_rel_pos(q_w, k_w, rel_pos_w) + + B, _, dim = q.shape + r_q = q.reshape(B, q_h, q_w, dim) + rel_h = torch.einsum('bhwc,hkc->bhwk', r_q, Rh) + rel_w = torch.einsum('bhwc,wkc->bhwk', r_q, Rw) + + attn = (attn.view(B, q_h, q_w, k_h, k_w) + rel_h[:, :, :, :, None] + rel_w[:, :, :, None, :]).view( + B, q_h * q_w, k_h * k_w) + + return attn + + +class PatchEmbed(nn.Module): + """Image to Patch Embedding.""" + + def __init__( + self, + kernel_size: Tuple[int, int] = (16, 16), + stride: Tuple[int, int] = (16, 16), + padding: Tuple[int, int] = (0, 0), + in_chans: int = 3, + embed_dim: int = 768, + ) -> None: + """ + Initialize PatchEmbed module. + + Args: + kernel_size (Tuple): kernel size of the projection layer. + stride (Tuple): stride of the projection layer. + padding (Tuple): padding size of the projection layer. + in_chans (int): Number of input image channels. + embed_dim (int): Patch embedding dimension. + """ + super().__init__() + + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Computes patch embedding by applying convolution and transposing resulting tensor.""" + return self.proj(x).permute(0, 2, 3, 1) # B C H W -> B H W C diff --git a/ultralytics/models/sam/modules/sam.py b/ultralytics/models/sam/modules/sam.py new file mode 100644 index 0000000..4097a22 --- /dev/null +++ b/ultralytics/models/sam/modules/sam.py @@ -0,0 +1,64 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from typing import List + +import torch +from torch import nn + +from .decoders import MaskDecoder +from .encoders import ImageEncoderViT, PromptEncoder + + +class Sam(nn.Module): + """ + Sam (Segment Anything Model) is designed for object segmentation tasks. It uses image encoders to generate image + embeddings, and prompt encoders to encode various types of input prompts. These embeddings are then used by the mask + decoder to predict object masks. + + Attributes: + mask_threshold (float): Threshold value for mask prediction. + image_format (str): Format of the input image, default is 'RGB'. + image_encoder (ImageEncoderViT): The backbone used to encode the image into embeddings. + prompt_encoder (PromptEncoder): Encodes various types of input prompts. + mask_decoder (MaskDecoder): Predicts object masks from the image and prompt embeddings. + pixel_mean (List[float]): Mean pixel values for image normalization. + pixel_std (List[float]): Standard deviation values for image normalization. + """ + mask_threshold: float = 0.0 + image_format: str = 'RGB' + + def __init__( + self, + image_encoder: ImageEncoderViT, + prompt_encoder: PromptEncoder, + mask_decoder: MaskDecoder, + pixel_mean: List[float] = (123.675, 116.28, 103.53), + pixel_std: List[float] = (58.395, 57.12, 57.375) + ) -> None: + """ + Initialize the Sam class to predict object masks from an image and input prompts. + + Note: + All forward() operations moved to SAMPredictor. + + Args: + image_encoder (ImageEncoderViT): The backbone used to encode the image into image embeddings. + prompt_encoder (PromptEncoder): Encodes various types of input prompts. + mask_decoder (MaskDecoder): Predicts masks from the image embeddings and encoded prompts. + pixel_mean (List[float], optional): Mean values for normalizing pixels in the input image. Defaults to + (123.675, 116.28, 103.53). + pixel_std (List[float], optional): Std values for normalizing pixels in the input image. Defaults to + (58.395, 57.12, 57.375). + """ + super().__init__() + self.image_encoder = image_encoder + self.prompt_encoder = prompt_encoder + self.mask_decoder = mask_decoder + self.register_buffer('pixel_mean', torch.Tensor(pixel_mean).view(-1, 1, 1), False) + self.register_buffer('pixel_std', torch.Tensor(pixel_std).view(-1, 1, 1), False) diff --git a/ultralytics/models/sam/modules/tiny_encoder.py b/ultralytics/models/sam/modules/tiny_encoder.py new file mode 100644 index 0000000..9955a26 --- /dev/null +++ b/ultralytics/models/sam/modules/tiny_encoder.py @@ -0,0 +1,720 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +# -------------------------------------------------------- +# TinyViT Model Architecture +# Copyright (c) 2022 Microsoft +# Adapted from LeViT and Swin Transformer +# LeViT: (https://github.com/facebookresearch/levit) +# Swin: (https://github.com/microsoft/swin-transformer) +# Build the TinyViT Model +# -------------------------------------------------------- + +import itertools +from typing import Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint + +from ultralytics.utils.instance import to_2tuple + + +class Conv2d_BN(torch.nn.Sequential): + """A sequential container that performs 2D convolution followed by batch normalization.""" + + def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1): + """Initializes the MBConv model with given input channels, output channels, expansion ratio, activation, and + drop path. + """ + super().__init__() + self.add_module('c', torch.nn.Conv2d(a, b, ks, stride, pad, dilation, groups, bias=False)) + bn = torch.nn.BatchNorm2d(b) + torch.nn.init.constant_(bn.weight, bn_weight_init) + torch.nn.init.constant_(bn.bias, 0) + self.add_module('bn', bn) + + +class PatchEmbed(nn.Module): + """Embeds images into patches and projects them into a specified embedding dimension.""" + + def __init__(self, in_chans, embed_dim, resolution, activation): + """Initialize the PatchMerging class with specified input, output dimensions, resolution and activation + function. + """ + super().__init__() + img_size: Tuple[int, int] = to_2tuple(resolution) + self.patches_resolution = (img_size[0] // 4, img_size[1] // 4) + self.num_patches = self.patches_resolution[0] * self.patches_resolution[1] + self.in_chans = in_chans + self.embed_dim = embed_dim + n = embed_dim + self.seq = nn.Sequential( + Conv2d_BN(in_chans, n // 2, 3, 2, 1), + activation(), + Conv2d_BN(n // 2, n, 3, 2, 1), + ) + + def forward(self, x): + """Runs input tensor 'x' through the PatchMerging model's sequence of operations.""" + return self.seq(x) + + +class MBConv(nn.Module): + """Mobile Inverted Bottleneck Conv (MBConv) layer, part of the EfficientNet architecture.""" + + def __init__(self, in_chans, out_chans, expand_ratio, activation, drop_path): + """Initializes a convolutional layer with specified dimensions, input resolution, depth, and activation + function. + """ + super().__init__() + self.in_chans = in_chans + self.hidden_chans = int(in_chans * expand_ratio) + self.out_chans = out_chans + + self.conv1 = Conv2d_BN(in_chans, self.hidden_chans, ks=1) + self.act1 = activation() + + self.conv2 = Conv2d_BN(self.hidden_chans, self.hidden_chans, ks=3, stride=1, pad=1, groups=self.hidden_chans) + self.act2 = activation() + + self.conv3 = Conv2d_BN(self.hidden_chans, out_chans, ks=1, bn_weight_init=0.0) + self.act3 = activation() + + # NOTE: `DropPath` is needed only for training. + # self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.drop_path = nn.Identity() + + def forward(self, x): + """Implements the forward pass for the model architecture.""" + shortcut = x + x = self.conv1(x) + x = self.act1(x) + x = self.conv2(x) + x = self.act2(x) + x = self.conv3(x) + x = self.drop_path(x) + x += shortcut + return self.act3(x) + + +class PatchMerging(nn.Module): + """Merges neighboring patches in the feature map and projects to a new dimension.""" + + def __init__(self, input_resolution, dim, out_dim, activation): + """Initializes the ConvLayer with specific dimension, input resolution, depth, activation, drop path, and other + optional parameters. + """ + super().__init__() + + self.input_resolution = input_resolution + self.dim = dim + self.out_dim = out_dim + self.act = activation() + self.conv1 = Conv2d_BN(dim, out_dim, 1, 1, 0) + stride_c = 1 if out_dim in [320, 448, 576] else 2 + self.conv2 = Conv2d_BN(out_dim, out_dim, 3, stride_c, 1, groups=out_dim) + self.conv3 = Conv2d_BN(out_dim, out_dim, 1, 1, 0) + + def forward(self, x): + """Applies forward pass on the input utilizing convolution and activation layers, and returns the result.""" + if x.ndim == 3: + H, W = self.input_resolution + B = len(x) + # (B, C, H, W) + x = x.view(B, H, W, -1).permute(0, 3, 1, 2) + + x = self.conv1(x) + x = self.act(x) + + x = self.conv2(x) + x = self.act(x) + x = self.conv3(x) + return x.flatten(2).transpose(1, 2) + + +class ConvLayer(nn.Module): + """ + Convolutional Layer featuring multiple MobileNetV3-style inverted bottleneck convolutions (MBConv). + + Optionally applies downsample operations to the output, and provides support for gradient checkpointing. + """ + + def __init__( + self, + dim, + input_resolution, + depth, + activation, + drop_path=0., + downsample=None, + use_checkpoint=False, + out_dim=None, + conv_expand_ratio=4., + ): + """ + Initializes the ConvLayer with the given dimensions and settings. + + Args: + dim (int): The dimensionality of the input and output. + input_resolution (Tuple[int, int]): The resolution of the input image. + depth (int): The number of MBConv layers in the block. + activation (Callable): Activation function applied after each convolution. + drop_path (Union[float, List[float]]): Drop path rate. Single float or a list of floats for each MBConv. + downsample (Optional[Callable]): Function for downsampling the output. None to skip downsampling. + use_checkpoint (bool): Whether to use gradient checkpointing to save memory. + out_dim (Optional[int]): The dimensionality of the output. None means it will be the same as `dim`. + conv_expand_ratio (float): Expansion ratio for the MBConv layers. + """ + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # Build blocks + self.blocks = nn.ModuleList([ + MBConv( + dim, + dim, + conv_expand_ratio, + activation, + drop_path[i] if isinstance(drop_path, list) else drop_path, + ) for i in range(depth)]) + + # Patch merging layer + self.downsample = None if downsample is None else downsample( + input_resolution, dim=dim, out_dim=out_dim, activation=activation) + + def forward(self, x): + """Processes the input through a series of convolutional layers and returns the activated output.""" + for blk in self.blocks: + x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x) + return x if self.downsample is None else self.downsample(x) + + +class Mlp(nn.Module): + """ + Multi-layer Perceptron (MLP) for transformer architectures. + + This layer takes an input with in_features, applies layer normalization and two fully-connected layers. + """ + + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + """Initializes Attention module with the given parameters including dimension, key_dim, number of heads, etc.""" + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.norm = nn.LayerNorm(in_features) + self.fc1 = nn.Linear(in_features, hidden_features) + self.fc2 = nn.Linear(hidden_features, out_features) + self.act = act_layer() + self.drop = nn.Dropout(drop) + + def forward(self, x): + """Applies operations on input x and returns modified x, runs downsample if not None.""" + x = self.norm(x) + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + return self.drop(x) + + +class Attention(torch.nn.Module): + """ + Multi-head attention module with support for spatial awareness, applying attention biases based on spatial + resolution. Implements trainable attention biases for each unique offset between spatial positions in the resolution + grid. + + Attributes: + ab (Tensor, optional): Cached attention biases for inference, deleted during training. + """ + + def __init__( + self, + dim, + key_dim, + num_heads=8, + attn_ratio=4, + resolution=(14, 14), + ): + """ + Initializes the Attention module. + + Args: + dim (int): The dimensionality of the input and output. + key_dim (int): The dimensionality of the keys and queries. + num_heads (int, optional): Number of attention heads. Default is 8. + attn_ratio (float, optional): Attention ratio, affecting the dimensions of the value vectors. Default is 4. + resolution (Tuple[int, int], optional): Spatial resolution of the input feature map. Default is (14, 14). + + Raises: + AssertionError: If `resolution` is not a tuple of length 2. + """ + super().__init__() + + assert isinstance(resolution, tuple) and len(resolution) == 2 + self.num_heads = num_heads + self.scale = key_dim ** -0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * num_heads + self.attn_ratio = attn_ratio + h = self.dh + nh_kd * 2 + + self.norm = nn.LayerNorm(dim) + self.qkv = nn.Linear(dim, h) + self.proj = nn.Linear(self.dh, dim) + + points = list(itertools.product(range(resolution[0]), range(resolution[1]))) + N = len(points) + attention_offsets = {} + idxs = [] + for p1 in points: + for p2 in points: + offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1])) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = torch.nn.Parameter(torch.zeros(num_heads, len(attention_offsets))) + self.register_buffer('attention_bias_idxs', torch.LongTensor(idxs).view(N, N), persistent=False) + + @torch.no_grad() + def train(self, mode=True): + """Sets the module in training mode and handles attribute 'ab' based on the mode.""" + super().train(mode) + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = self.attention_biases[:, self.attention_bias_idxs] + + def forward(self, x): # x + """Performs forward pass over the input tensor 'x' by applying normalization and querying keys/values.""" + B, N, _ = x.shape # B, N, C + + # Normalization + x = self.norm(x) + + qkv = self.qkv(x) + # (B, N, num_heads, d) + q, k, v = qkv.view(B, N, self.num_heads, -1).split([self.key_dim, self.key_dim, self.d], dim=3) + # (B, num_heads, N, d) + q = q.permute(0, 2, 1, 3) + k = k.permute(0, 2, 1, 3) + v = v.permute(0, 2, 1, 3) + self.ab = self.ab.to(self.attention_biases.device) + + attn = ((q @ k.transpose(-2, -1)) * self.scale + + (self.attention_biases[:, self.attention_bias_idxs] if self.training else self.ab)) + attn = attn.softmax(dim=-1) + x = (attn @ v).transpose(1, 2).reshape(B, N, self.dh) + return self.proj(x) + + +class TinyViTBlock(nn.Module): + """TinyViT Block that applies self-attention and a local convolution to the input.""" + + def __init__( + self, + dim, + input_resolution, + num_heads, + window_size=7, + mlp_ratio=4., + drop=0., + drop_path=0., + local_conv_size=3, + activation=nn.GELU, + ): + """ + Initializes the TinyViTBlock. + + Args: + dim (int): The dimensionality of the input and output. + input_resolution (Tuple[int, int]): Spatial resolution of the input feature map. + num_heads (int): Number of attention heads. + window_size (int, optional): Window size for attention. Default is 7. + mlp_ratio (float, optional): Ratio of mlp hidden dim to embedding dim. Default is 4. + drop (float, optional): Dropout rate. Default is 0. + drop_path (float, optional): Stochastic depth rate. Default is 0. + local_conv_size (int, optional): The kernel size of the local convolution. Default is 3. + activation (torch.nn, optional): Activation function for MLP. Default is nn.GELU. + + Raises: + AssertionError: If `window_size` is not greater than 0. + AssertionError: If `dim` is not divisible by `num_heads`. + """ + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + assert window_size > 0, 'window_size must be greater than 0' + self.window_size = window_size + self.mlp_ratio = mlp_ratio + + # NOTE: `DropPath` is needed only for training. + # self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.drop_path = nn.Identity() + + assert dim % num_heads == 0, 'dim must be divisible by num_heads' + head_dim = dim // num_heads + + window_resolution = (window_size, window_size) + self.attn = Attention(dim, head_dim, num_heads, attn_ratio=1, resolution=window_resolution) + + mlp_hidden_dim = int(dim * mlp_ratio) + mlp_activation = activation + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=mlp_activation, drop=drop) + + pad = local_conv_size // 2 + self.local_conv = Conv2d_BN(dim, dim, ks=local_conv_size, stride=1, pad=pad, groups=dim) + + def forward(self, x): + """Applies attention-based transformation or padding to input 'x' before passing it through a local + convolution. + """ + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, 'input feature has wrong size' + res_x = x + if H == self.window_size and W == self.window_size: + x = self.attn(x) + else: + x = x.view(B, H, W, C) + pad_b = (self.window_size - H % self.window_size) % self.window_size + pad_r = (self.window_size - W % self.window_size) % self.window_size + padding = pad_b > 0 or pad_r > 0 + + if padding: + x = F.pad(x, (0, 0, 0, pad_r, 0, pad_b)) + + pH, pW = H + pad_b, W + pad_r + nH = pH // self.window_size + nW = pW // self.window_size + # Window partition + x = x.view(B, nH, self.window_size, nW, self.window_size, + C).transpose(2, 3).reshape(B * nH * nW, self.window_size * self.window_size, C) + x = self.attn(x) + # Window reverse + x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C) + + if padding: + x = x[:, :H, :W].contiguous() + + x = x.view(B, L, C) + + x = res_x + self.drop_path(x) + + x = x.transpose(1, 2).reshape(B, C, H, W) + x = self.local_conv(x) + x = x.view(B, C, L).transpose(1, 2) + + return x + self.drop_path(self.mlp(x)) + + def extra_repr(self) -> str: + """Returns a formatted string representing the TinyViTBlock's parameters: dimension, input resolution, number of + attentions heads, window size, and MLP ratio. + """ + return f'dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, ' \ + f'window_size={self.window_size}, mlp_ratio={self.mlp_ratio}' + + +class BasicLayer(nn.Module): + """A basic TinyViT layer for one stage in a TinyViT architecture.""" + + def __init__( + self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4., + drop=0., + drop_path=0., + downsample=None, + use_checkpoint=False, + local_conv_size=3, + activation=nn.GELU, + out_dim=None, + ): + """ + Initializes the BasicLayer. + + Args: + dim (int): The dimensionality of the input and output. + input_resolution (Tuple[int, int]): Spatial resolution of the input feature map. + depth (int): Number of TinyViT blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float, optional): Ratio of mlp hidden dim to embedding dim. Default is 4. + drop (float, optional): Dropout rate. Default is 0. + drop_path (float | tuple[float], optional): Stochastic depth rate. Default is 0. + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default is None. + use_checkpoint (bool, optional): Whether to use checkpointing to save memory. Default is False. + local_conv_size (int, optional): Kernel size of the local convolution. Default is 3. + activation (torch.nn, optional): Activation function for MLP. Default is nn.GELU. + out_dim (int | None, optional): The output dimension of the layer. Default is None. + + Raises: + ValueError: If `drop_path` is a list of float but its length doesn't match `depth`. + """ + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # Build blocks + self.blocks = nn.ModuleList([ + TinyViTBlock( + dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + window_size=window_size, + mlp_ratio=mlp_ratio, + drop=drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + local_conv_size=local_conv_size, + activation=activation, + ) for i in range(depth)]) + + # Patch merging layer + self.downsample = None if downsample is None else downsample( + input_resolution, dim=dim, out_dim=out_dim, activation=activation) + + def forward(self, x): + """Performs forward propagation on the input tensor and returns a normalized tensor.""" + for blk in self.blocks: + x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x) + return x if self.downsample is None else self.downsample(x) + + def extra_repr(self) -> str: + """Returns a string representation of the extra_repr function with the layer's parameters.""" + return f'dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}' + + +class LayerNorm2d(nn.Module): + """A PyTorch implementation of Layer Normalization in 2D.""" + + def __init__(self, num_channels: int, eps: float = 1e-6) -> None: + """Initialize LayerNorm2d with the number of channels and an optional epsilon.""" + super().__init__() + self.weight = nn.Parameter(torch.ones(num_channels)) + self.bias = nn.Parameter(torch.zeros(num_channels)) + self.eps = eps + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Perform a forward pass, normalizing the input tensor.""" + u = x.mean(1, keepdim=True) + s = (x - u).pow(2).mean(1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.eps) + return self.weight[:, None, None] * x + self.bias[:, None, None] + + +class TinyViT(nn.Module): + """ + The TinyViT architecture for vision tasks. + + Attributes: + img_size (int): Input image size. + in_chans (int): Number of input channels. + num_classes (int): Number of classification classes. + embed_dims (List[int]): List of embedding dimensions for each layer. + depths (List[int]): List of depths for each layer. + num_heads (List[int]): List of number of attention heads for each layer. + window_sizes (List[int]): List of window sizes for each layer. + mlp_ratio (float): Ratio of MLP hidden dimension to embedding dimension. + drop_rate (float): Dropout rate for drop layers. + drop_path_rate (float): Drop path rate for stochastic depth. + use_checkpoint (bool): Use checkpointing for efficient memory usage. + mbconv_expand_ratio (float): Expansion ratio for MBConv layer. + local_conv_size (int): Local convolution kernel size. + layer_lr_decay (float): Layer-wise learning rate decay. + + Note: + This implementation is generalized to accept a list of depths, attention heads, + embedding dimensions and window sizes, which allows you to create a + "stack" of TinyViT models of varying configurations. + """ + + def __init__( + self, + img_size=224, + in_chans=3, + num_classes=1000, + embed_dims=[96, 192, 384, 768], + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_sizes=[7, 7, 14, 7], + mlp_ratio=4., + drop_rate=0., + drop_path_rate=0.1, + use_checkpoint=False, + mbconv_expand_ratio=4.0, + local_conv_size=3, + layer_lr_decay=1.0, + ): + """ + Initializes the TinyViT model. + + Args: + img_size (int, optional): The input image size. Defaults to 224. + in_chans (int, optional): Number of input channels. Defaults to 3. + num_classes (int, optional): Number of classification classes. Defaults to 1000. + embed_dims (List[int], optional): List of embedding dimensions for each layer. Defaults to [96, 192, 384, 768]. + depths (List[int], optional): List of depths for each layer. Defaults to [2, 2, 6, 2]. + num_heads (List[int], optional): List of number of attention heads for each layer. Defaults to [3, 6, 12, 24]. + window_sizes (List[int], optional): List of window sizes for each layer. Defaults to [7, 7, 14, 7]. + mlp_ratio (float, optional): Ratio of MLP hidden dimension to embedding dimension. Defaults to 4. + drop_rate (float, optional): Dropout rate. Defaults to 0. + drop_path_rate (float, optional): Drop path rate for stochastic depth. Defaults to 0.1. + use_checkpoint (bool, optional): Whether to use checkpointing for efficient memory usage. Defaults to False. + mbconv_expand_ratio (float, optional): Expansion ratio for MBConv layer. Defaults to 4.0. + local_conv_size (int, optional): Local convolution kernel size. Defaults to 3. + layer_lr_decay (float, optional): Layer-wise learning rate decay. Defaults to 1.0. + """ + super().__init__() + self.img_size = img_size + self.num_classes = num_classes + self.depths = depths + self.num_layers = len(depths) + self.mlp_ratio = mlp_ratio + + activation = nn.GELU + + self.patch_embed = PatchEmbed(in_chans=in_chans, + embed_dim=embed_dims[0], + resolution=img_size, + activation=activation) + + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # Stochastic depth + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule + + # Build layers + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + kwargs = dict( + dim=embed_dims[i_layer], + input_resolution=(patches_resolution[0] // (2 ** (i_layer - 1 if i_layer == 3 else i_layer)), + patches_resolution[1] // (2 ** (i_layer - 1 if i_layer == 3 else i_layer))), + # input_resolution=(patches_resolution[0] // (2 ** i_layer), + # patches_resolution[1] // (2 ** i_layer)), + depth=depths[i_layer], + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], + downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, + use_checkpoint=use_checkpoint, + out_dim=embed_dims[min(i_layer + 1, + len(embed_dims) - 1)], + activation=activation, + ) + if i_layer == 0: + layer = ConvLayer(conv_expand_ratio=mbconv_expand_ratio, **kwargs) + else: + layer = BasicLayer(num_heads=num_heads[i_layer], + window_size=window_sizes[i_layer], + mlp_ratio=self.mlp_ratio, + drop=drop_rate, + local_conv_size=local_conv_size, + **kwargs) + self.layers.append(layer) + + # Classifier head + self.norm_head = nn.LayerNorm(embed_dims[-1]) + self.head = nn.Linear(embed_dims[-1], num_classes) if num_classes > 0 else torch.nn.Identity() + + # Init weights + self.apply(self._init_weights) + self.set_layer_lr_decay(layer_lr_decay) + self.neck = nn.Sequential( + nn.Conv2d( + embed_dims[-1], + 256, + kernel_size=1, + bias=False, + ), + LayerNorm2d(256), + nn.Conv2d( + 256, + 256, + kernel_size=3, + padding=1, + bias=False, + ), + LayerNorm2d(256), + ) + + def set_layer_lr_decay(self, layer_lr_decay): + """Sets the learning rate decay for each layer in the TinyViT model.""" + decay_rate = layer_lr_decay + + # Layers -> blocks (depth) + depth = sum(self.depths) + lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)] + + def _set_lr_scale(m, scale): + """Sets the learning rate scale for each layer in the model based on the layer's depth.""" + for p in m.parameters(): + p.lr_scale = scale + + self.patch_embed.apply(lambda x: _set_lr_scale(x, lr_scales[0])) + i = 0 + for layer in self.layers: + for block in layer.blocks: + block.apply(lambda x: _set_lr_scale(x, lr_scales[i])) + i += 1 + if layer.downsample is not None: + layer.downsample.apply(lambda x: _set_lr_scale(x, lr_scales[i - 1])) + assert i == depth + for m in [self.norm_head, self.head]: + m.apply(lambda x: _set_lr_scale(x, lr_scales[-1])) + + for k, p in self.named_parameters(): + p.param_name = k + + def _check_lr_scale(m): + """Checks if the learning rate scale attribute is present in module's parameters.""" + for p in m.parameters(): + assert hasattr(p, 'lr_scale'), p.param_name + + self.apply(_check_lr_scale) + + def _init_weights(self, m): + """Initializes weights for linear layers and layer normalization in the given module.""" + if isinstance(m, nn.Linear): + # NOTE: This initialization is needed only for training. + # trunc_normal_(m.weight, std=.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore + def no_weight_decay_keywords(self): + """Returns a dictionary of parameter names where weight decay should not be applied.""" + return {'attention_biases'} + + def forward_features(self, x): + """Runs the input through the model layers and returns the transformed output.""" + x = self.patch_embed(x) # x input is (N, C, H, W) + + x = self.layers[0](x) + start_i = 1 + + for i in range(start_i, len(self.layers)): + layer = self.layers[i] + x = layer(x) + B, _, C = x.size() + x = x.view(B, 64, 64, C) + x = x.permute(0, 3, 1, 2) + return self.neck(x) + + def forward(self, x): + """Executes a forward pass on the input tensor through the constructed model layers.""" + return self.forward_features(x) diff --git a/ultralytics/models/sam/modules/transformer.py b/ultralytics/models/sam/modules/transformer.py new file mode 100644 index 0000000..5c06acd --- /dev/null +++ b/ultralytics/models/sam/modules/transformer.py @@ -0,0 +1,273 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import math +from typing import Tuple, Type + +import torch +from torch import Tensor, nn + +from ultralytics.nn.modules import MLPBlock + + +class TwoWayTransformer(nn.Module): + """ + A Two-Way Transformer module that enables the simultaneous attention to both image and query points. This class + serves as a specialized transformer decoder that attends to an input image using queries whose positional embedding + is supplied. This is particularly useful for tasks like object detection, image segmentation, and point cloud + processing. + + Attributes: + depth (int): The number of layers in the transformer. + embedding_dim (int): The channel dimension for the input embeddings. + num_heads (int): The number of heads for multihead attention. + mlp_dim (int): The internal channel dimension for the MLP block. + layers (nn.ModuleList): The list of TwoWayAttentionBlock layers that make up the transformer. + final_attn_token_to_image (Attention): The final attention layer applied from the queries to the image. + norm_final_attn (nn.LayerNorm): The layer normalization applied to the final queries. + """ + + def __init__( + self, + depth: int, + embedding_dim: int, + num_heads: int, + mlp_dim: int, + activation: Type[nn.Module] = nn.ReLU, + attention_downsample_rate: int = 2, + ) -> None: + """ + A transformer decoder that attends to an input image using queries whose positional embedding is supplied. + + Args: + depth (int): number of layers in the transformer + embedding_dim (int): the channel dimension for the input embeddings + num_heads (int): the number of heads for multihead attention. Must + divide embedding_dim + mlp_dim (int): the channel dimension internal to the MLP block + activation (nn.Module): the activation to use in the MLP block + """ + super().__init__() + self.depth = depth + self.embedding_dim = embedding_dim + self.num_heads = num_heads + self.mlp_dim = mlp_dim + self.layers = nn.ModuleList() + + for i in range(depth): + self.layers.append( + TwoWayAttentionBlock( + embedding_dim=embedding_dim, + num_heads=num_heads, + mlp_dim=mlp_dim, + activation=activation, + attention_downsample_rate=attention_downsample_rate, + skip_first_layer_pe=(i == 0), + )) + + self.final_attn_token_to_image = Attention(embedding_dim, num_heads, downsample_rate=attention_downsample_rate) + self.norm_final_attn = nn.LayerNorm(embedding_dim) + + def forward( + self, + image_embedding: Tensor, + image_pe: Tensor, + point_embedding: Tensor, + ) -> Tuple[Tensor, Tensor]: + """ + Args: + image_embedding (torch.Tensor): image to attend to. Should be shape B x embedding_dim x h x w for any h and w. + image_pe (torch.Tensor): the positional encoding to add to the image. Must have same shape as image_embedding. + point_embedding (torch.Tensor): the embedding to add to the query points. + Must have shape B x N_points x embedding_dim for any N_points. + + Returns: + (torch.Tensor): the processed point_embedding + (torch.Tensor): the processed image_embedding + """ + # BxCxHxW -> BxHWxC == B x N_image_tokens x C + bs, c, h, w = image_embedding.shape + image_embedding = image_embedding.flatten(2).permute(0, 2, 1) + image_pe = image_pe.flatten(2).permute(0, 2, 1) + + # Prepare queries + queries = point_embedding + keys = image_embedding + + # Apply transformer blocks and final layernorm + for layer in self.layers: + queries, keys = layer( + queries=queries, + keys=keys, + query_pe=point_embedding, + key_pe=image_pe, + ) + + # Apply the final attention layer from the points to the image + q = queries + point_embedding + k = keys + image_pe + attn_out = self.final_attn_token_to_image(q=q, k=k, v=keys) + queries = queries + attn_out + queries = self.norm_final_attn(queries) + + return queries, keys + + +class TwoWayAttentionBlock(nn.Module): + """ + An attention block that performs both self-attention and cross-attention in two directions: queries to keys and + keys to queries. This block consists of four main layers: (1) self-attention on sparse inputs, (2) cross-attention + of sparse inputs to dense inputs, (3) an MLP block on sparse inputs, and (4) cross-attention of dense inputs to + sparse inputs. + + Attributes: + self_attn (Attention): The self-attention layer for the queries. + norm1 (nn.LayerNorm): Layer normalization following the first attention block. + cross_attn_token_to_image (Attention): Cross-attention layer from queries to keys. + norm2 (nn.LayerNorm): Layer normalization following the second attention block. + mlp (MLPBlock): MLP block that transforms the query embeddings. + norm3 (nn.LayerNorm): Layer normalization following the MLP block. + norm4 (nn.LayerNorm): Layer normalization following the third attention block. + cross_attn_image_to_token (Attention): Cross-attention layer from keys to queries. + skip_first_layer_pe (bool): Whether to skip the positional encoding in the first layer. + """ + + def __init__( + self, + embedding_dim: int, + num_heads: int, + mlp_dim: int = 2048, + activation: Type[nn.Module] = nn.ReLU, + attention_downsample_rate: int = 2, + skip_first_layer_pe: bool = False, + ) -> None: + """ + A transformer block with four layers: (1) self-attention of sparse inputs, (2) cross attention of sparse + inputs to dense inputs, (3) mlp block on sparse inputs, and (4) cross attention of dense inputs to sparse + inputs. + + Args: + embedding_dim (int): the channel dimension of the embeddings + num_heads (int): the number of heads in the attention layers + mlp_dim (int): the hidden dimension of the mlp block + activation (nn.Module): the activation of the mlp block + skip_first_layer_pe (bool): skip the PE on the first layer + """ + super().__init__() + self.self_attn = Attention(embedding_dim, num_heads) + self.norm1 = nn.LayerNorm(embedding_dim) + + self.cross_attn_token_to_image = Attention(embedding_dim, num_heads, downsample_rate=attention_downsample_rate) + self.norm2 = nn.LayerNorm(embedding_dim) + + self.mlp = MLPBlock(embedding_dim, mlp_dim, activation) + self.norm3 = nn.LayerNorm(embedding_dim) + + self.norm4 = nn.LayerNorm(embedding_dim) + self.cross_attn_image_to_token = Attention(embedding_dim, num_heads, downsample_rate=attention_downsample_rate) + + self.skip_first_layer_pe = skip_first_layer_pe + + def forward(self, queries: Tensor, keys: Tensor, query_pe: Tensor, key_pe: Tensor) -> Tuple[Tensor, Tensor]: + """Apply self-attention and cross-attention to queries and keys and return the processed embeddings.""" + + # Self attention block + if self.skip_first_layer_pe: + queries = self.self_attn(q=queries, k=queries, v=queries) + else: + q = queries + query_pe + attn_out = self.self_attn(q=q, k=q, v=queries) + queries = queries + attn_out + queries = self.norm1(queries) + + # Cross attention block, tokens attending to image embedding + q = queries + query_pe + k = keys + key_pe + attn_out = self.cross_attn_token_to_image(q=q, k=k, v=keys) + queries = queries + attn_out + queries = self.norm2(queries) + + # MLP block + mlp_out = self.mlp(queries) + queries = queries + mlp_out + queries = self.norm3(queries) + + # Cross attention block, image embedding attending to tokens + q = queries + query_pe + k = keys + key_pe + attn_out = self.cross_attn_image_to_token(q=k, k=q, v=queries) + keys = keys + attn_out + keys = self.norm4(keys) + + return queries, keys + + +class Attention(nn.Module): + """An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and + values. + """ + + def __init__( + self, + embedding_dim: int, + num_heads: int, + downsample_rate: int = 1, + ) -> None: + """ + Initializes the Attention model with the given dimensions and settings. + + Args: + embedding_dim (int): The dimensionality of the input embeddings. + num_heads (int): The number of attention heads. + downsample_rate (int, optional): The factor by which the internal dimensions are downsampled. Defaults to 1. + + Raises: + AssertionError: If 'num_heads' does not evenly divide the internal dimension (embedding_dim / downsample_rate). + """ + super().__init__() + self.embedding_dim = embedding_dim + self.internal_dim = embedding_dim // downsample_rate + self.num_heads = num_heads + assert self.internal_dim % num_heads == 0, 'num_heads must divide embedding_dim.' + + self.q_proj = nn.Linear(embedding_dim, self.internal_dim) + self.k_proj = nn.Linear(embedding_dim, self.internal_dim) + self.v_proj = nn.Linear(embedding_dim, self.internal_dim) + self.out_proj = nn.Linear(self.internal_dim, embedding_dim) + + @staticmethod + def _separate_heads(x: Tensor, num_heads: int) -> Tensor: + """Separate the input tensor into the specified number of attention heads.""" + b, n, c = x.shape + x = x.reshape(b, n, num_heads, c // num_heads) + return x.transpose(1, 2) # B x N_heads x N_tokens x C_per_head + + @staticmethod + def _recombine_heads(x: Tensor) -> Tensor: + """Recombine the separated attention heads into a single tensor.""" + b, n_heads, n_tokens, c_per_head = x.shape + x = x.transpose(1, 2) + return x.reshape(b, n_tokens, n_heads * c_per_head) # B x N_tokens x C + + def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor: + """Compute the attention output given the input query, key, and value tensors.""" + + # Input projections + q = self.q_proj(q) + k = self.k_proj(k) + v = self.v_proj(v) + + # Separate into heads + q = self._separate_heads(q, self.num_heads) + k = self._separate_heads(k, self.num_heads) + v = self._separate_heads(v, self.num_heads) + + # Attention + _, _, _, c_per_head = q.shape + attn = q @ k.permute(0, 1, 3, 2) # B x N_heads x N_tokens x N_tokens + attn = attn / math.sqrt(c_per_head) + attn = torch.softmax(attn, dim=-1) + + # Get output + out = attn @ v + out = self._recombine_heads(out) + return self.out_proj(out) diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py new file mode 100644 index 0000000..94362ec --- /dev/null +++ b/ultralytics/models/sam/predict.py @@ -0,0 +1,463 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Generate predictions using the Segment Anything Model (SAM). + +SAM is an advanced image segmentation model offering features like promptable segmentation and zero-shot performance. +This module contains the implementation of the prediction logic and auxiliary utilities required to perform segmentation +using SAM. It forms an integral part of the Ultralytics framework and is designed for high-performance, real-time image +segmentation tasks. +""" + +import numpy as np +import torch +import torch.nn.functional as F +import torchvision + +from ultralytics.data.augment import LetterBox +from ultralytics.engine.predictor import BasePredictor +from ultralytics.engine.results import Results +from ultralytics.utils import DEFAULT_CFG, ops +from ultralytics.utils.torch_utils import select_device + +from .amg import (batch_iterator, batched_mask_to_box, build_all_layer_point_grids, calculate_stability_score, + generate_crop_boxes, is_box_near_crop_edge, remove_small_regions, uncrop_boxes_xyxy, uncrop_masks) +from .build import build_sam + + +class Predictor(BasePredictor): + """ + Predictor class for the Segment Anything Model (SAM), extending BasePredictor. + + The class provides an interface for model inference tailored to image segmentation tasks. + With advanced architecture and promptable segmentation capabilities, it facilitates flexible and real-time + mask generation. The class is capable of working with various types of prompts such as bounding boxes, + points, and low-resolution masks. + + Attributes: + cfg (dict): Configuration dictionary specifying model and task-related parameters. + overrides (dict): Dictionary containing values that override the default configuration. + _callbacks (dict): Dictionary of user-defined callback functions to augment behavior. + args (namespace): Namespace to hold command-line arguments or other operational variables. + im (torch.Tensor): Preprocessed input image tensor. + features (torch.Tensor): Extracted image features used for inference. + prompts (dict): Collection of various prompt types, such as bounding boxes and points. + segment_all (bool): Flag to control whether to segment all objects in the image or only specified ones. + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """ + Initialize the Predictor with configuration, overrides, and callbacks. + + The method sets up the Predictor object and applies any configuration overrides or callbacks provided. It + initializes task-specific settings for SAM, such as retina_masks being set to True for optimal results. + + Args: + cfg (dict): Configuration dictionary. + overrides (dict, optional): Dictionary of values to override default configuration. + _callbacks (dict, optional): Dictionary of callback functions to customize behavior. + """ + if overrides is None: + overrides = {} + overrides.update(dict(task='segment', mode='predict', imgsz=1024)) + super().__init__(cfg, overrides, _callbacks) + self.args.retina_masks = True + self.im = None + self.features = None + self.prompts = {} + self.segment_all = False + + def preprocess(self, im): + """ + Preprocess the input image for model inference. + + The method prepares the input image by applying transformations and normalization. + It supports both torch.Tensor and list of np.ndarray as input formats. + + Args: + im (torch.Tensor | List[np.ndarray]): BCHW tensor format or list of HWC numpy arrays. + + Returns: + (torch.Tensor): The preprocessed image tensor. + """ + if self.im is not None: + return self.im + not_tensor = not isinstance(im, torch.Tensor) + if not_tensor: + im = np.stack(self.pre_transform(im)) + im = im[..., ::-1].transpose((0, 3, 1, 2)) + im = np.ascontiguousarray(im) + im = torch.from_numpy(im) + + im = im.to(self.device) + im = im.half() if self.model.fp16 else im.float() + if not_tensor: + im = (im - self.mean) / self.std + return im + + def pre_transform(self, im): + """ + Perform initial transformations on the input image for preprocessing. + + The method applies transformations such as resizing to prepare the image for further preprocessing. + Currently, batched inference is not supported; hence the list length should be 1. + + Args: + im (List[np.ndarray]): List containing images in HWC numpy array format. + + Returns: + (List[np.ndarray]): List of transformed images. + """ + assert len(im) == 1, 'SAM model does not currently support batched inference' + letterbox = LetterBox(self.args.imgsz, auto=False, center=False) + return [letterbox(image=x) for x in im] + + def inference(self, im, bboxes=None, points=None, labels=None, masks=None, multimask_output=False, *args, **kwargs): + """ + Perform image segmentation inference based on the given input cues, using the currently loaded image. This + method leverages SAM's (Segment Anything Model) architecture consisting of image encoder, prompt encoder, and + mask decoder for real-time and promptable segmentation tasks. + + Args: + im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W). + bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format. + points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixel coordinates. + labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 for foreground and 0 for background. + masks (np.ndarray, optional): Low-resolution masks from previous predictions. Shape should be (N, H, W). For SAM, H=W=256. + multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts. Defaults to False. + + Returns: + (tuple): Contains the following three elements. + - np.ndarray: The output masks in shape CxHxW, where C is the number of generated masks. + - np.ndarray: An array of length C containing quality scores predicted by the model for each mask. + - np.ndarray: Low-resolution logits of shape CxHxW for subsequent inference, where H=W=256. + """ + # Override prompts if any stored in self.prompts + bboxes = self.prompts.pop('bboxes', bboxes) + points = self.prompts.pop('points', points) + masks = self.prompts.pop('masks', masks) + + if all(i is None for i in [bboxes, points, masks]): + return self.generate(im, *args, **kwargs) + + return self.prompt_inference(im, bboxes, points, labels, masks, multimask_output) + + def prompt_inference(self, im, bboxes=None, points=None, labels=None, masks=None, multimask_output=False): + """ + Internal function for image segmentation inference based on cues like bounding boxes, points, and masks. + Leverages SAM's specialized architecture for prompt-based, real-time segmentation. + + Args: + im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W). + bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format. + points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixel coordinates. + labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 for foreground and 0 for background. + masks (np.ndarray, optional): Low-resolution masks from previous predictions. Shape should be (N, H, W). For SAM, H=W=256. + multimask_output (bool, optional): Flag to return multiple masks. Helpful for ambiguous prompts. Defaults to False. + + Returns: + (tuple): Contains the following three elements. + - np.ndarray: The output masks in shape CxHxW, where C is the number of generated masks. + - np.ndarray: An array of length C containing quality scores predicted by the model for each mask. + - np.ndarray: Low-resolution logits of shape CxHxW for subsequent inference, where H=W=256. + """ + features = self.model.image_encoder(im) if self.features is None else self.features + + src_shape, dst_shape = self.batch[1][0].shape[:2], im.shape[2:] + r = 1.0 if self.segment_all else min(dst_shape[0] / src_shape[0], dst_shape[1] / src_shape[1]) + # Transform input prompts + if points is not None: + points = torch.as_tensor(points, dtype=torch.float32, device=self.device) + points = points[None] if points.ndim == 1 else points + # Assuming labels are all positive if users don't pass labels. + if labels is None: + labels = np.ones(points.shape[0]) + labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device) + points *= r + # (N, 2) --> (N, 1, 2), (N, ) --> (N, 1) + points, labels = points[:, None, :], labels[:, None] + if bboxes is not None: + bboxes = torch.as_tensor(bboxes, dtype=torch.float32, device=self.device) + bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes + bboxes *= r + if masks is not None: + masks = torch.as_tensor(masks, dtype=torch.float32, device=self.device).unsqueeze(1) + + points = (points, labels) if points is not None else None + # Embed prompts + sparse_embeddings, dense_embeddings = self.model.prompt_encoder(points=points, boxes=bboxes, masks=masks) + + # Predict masks + pred_masks, pred_scores = self.model.mask_decoder( + image_embeddings=features, + image_pe=self.model.prompt_encoder.get_dense_pe(), + sparse_prompt_embeddings=sparse_embeddings, + dense_prompt_embeddings=dense_embeddings, + multimask_output=multimask_output, + ) + + # (N, d, H, W) --> (N*d, H, W), (N, d) --> (N*d, ) + # `d` could be 1 or 3 depends on `multimask_output`. + return pred_masks.flatten(0, 1), pred_scores.flatten(0, 1) + + def generate(self, + im, + crop_n_layers=0, + crop_overlap_ratio=512 / 1500, + crop_downscale_factor=1, + point_grids=None, + points_stride=32, + points_batch_size=64, + conf_thres=0.88, + stability_score_thresh=0.95, + stability_score_offset=0.95, + crop_nms_thresh=0.7): + """ + Perform image segmentation using the Segment Anything Model (SAM). + + This function segments an entire image into constituent parts by leveraging SAM's advanced architecture + and real-time performance capabilities. It can optionally work on image crops for finer segmentation. + + Args: + im (torch.Tensor): Input tensor representing the preprocessed image with dimensions (N, C, H, W). + crop_n_layers (int): Specifies the number of layers for additional mask predictions on image crops. + Each layer produces 2**i_layer number of image crops. + crop_overlap_ratio (float): Determines the extent of overlap between crops. Scaled down in subsequent layers. + crop_downscale_factor (int): Scaling factor for the number of sampled points-per-side in each layer. + point_grids (list[np.ndarray], optional): Custom grids for point sampling normalized to [0,1]. + Used in the nth crop layer. + points_stride (int, optional): Number of points to sample along each side of the image. + Exclusive with 'point_grids'. + points_batch_size (int): Batch size for the number of points processed simultaneously. + conf_thres (float): Confidence threshold [0,1] for filtering based on the model's mask quality prediction. + stability_score_thresh (float): Stability threshold [0,1] for mask filtering based on mask stability. + stability_score_offset (float): Offset value for calculating stability score. + crop_nms_thresh (float): IoU cutoff for Non-Maximum Suppression (NMS) to remove duplicate masks between crops. + + Returns: + (tuple): A tuple containing segmented masks, confidence scores, and bounding boxes. + """ + self.segment_all = True + ih, iw = im.shape[2:] + crop_regions, layer_idxs = generate_crop_boxes((ih, iw), crop_n_layers, crop_overlap_ratio) + if point_grids is None: + point_grids = build_all_layer_point_grids(points_stride, crop_n_layers, crop_downscale_factor) + pred_masks, pred_scores, pred_bboxes, region_areas = [], [], [], [] + for crop_region, layer_idx in zip(crop_regions, layer_idxs): + x1, y1, x2, y2 = crop_region + w, h = x2 - x1, y2 - y1 + area = torch.tensor(w * h, device=im.device) + points_scale = np.array([[w, h]]) # w, h + # Crop image and interpolate to input size + crop_im = F.interpolate(im[..., y1:y2, x1:x2], (ih, iw), mode='bilinear', align_corners=False) + # (num_points, 2) + points_for_image = point_grids[layer_idx] * points_scale + crop_masks, crop_scores, crop_bboxes = [], [], [] + for (points, ) in batch_iterator(points_batch_size, points_for_image): + pred_mask, pred_score = self.prompt_inference(crop_im, points=points, multimask_output=True) + # Interpolate predicted masks to input size + pred_mask = F.interpolate(pred_mask[None], (h, w), mode='bilinear', align_corners=False)[0] + idx = pred_score > conf_thres + pred_mask, pred_score = pred_mask[idx], pred_score[idx] + + stability_score = calculate_stability_score(pred_mask, self.model.mask_threshold, + stability_score_offset) + idx = stability_score > stability_score_thresh + pred_mask, pred_score = pred_mask[idx], pred_score[idx] + # Bool type is much more memory-efficient. + pred_mask = pred_mask > self.model.mask_threshold + # (N, 4) + pred_bbox = batched_mask_to_box(pred_mask).float() + keep_mask = ~is_box_near_crop_edge(pred_bbox, crop_region, [0, 0, iw, ih]) + if not torch.all(keep_mask): + pred_bbox, pred_mask, pred_score = pred_bbox[keep_mask], pred_mask[keep_mask], pred_score[keep_mask] + + crop_masks.append(pred_mask) + crop_bboxes.append(pred_bbox) + crop_scores.append(pred_score) + + # Do nms within this crop + crop_masks = torch.cat(crop_masks) + crop_bboxes = torch.cat(crop_bboxes) + crop_scores = torch.cat(crop_scores) + keep = torchvision.ops.nms(crop_bboxes, crop_scores, self.args.iou) # NMS + crop_bboxes = uncrop_boxes_xyxy(crop_bboxes[keep], crop_region) + crop_masks = uncrop_masks(crop_masks[keep], crop_region, ih, iw) + crop_scores = crop_scores[keep] + + pred_masks.append(crop_masks) + pred_bboxes.append(crop_bboxes) + pred_scores.append(crop_scores) + region_areas.append(area.expand(len(crop_masks))) + + pred_masks = torch.cat(pred_masks) + pred_bboxes = torch.cat(pred_bboxes) + pred_scores = torch.cat(pred_scores) + region_areas = torch.cat(region_areas) + + # Remove duplicate masks between crops + if len(crop_regions) > 1: + scores = 1 / region_areas + keep = torchvision.ops.nms(pred_bboxes, scores, crop_nms_thresh) + pred_masks, pred_bboxes, pred_scores = pred_masks[keep], pred_bboxes[keep], pred_scores[keep] + + return pred_masks, pred_scores, pred_bboxes + + def setup_model(self, model, verbose=True): + """ + Initializes the Segment Anything Model (SAM) for inference. + + This method sets up the SAM model by allocating it to the appropriate device and initializing the necessary + parameters for image normalization and other Ultralytics compatibility settings. + + Args: + model (torch.nn.Module): A pre-trained SAM model. If None, a model will be built based on configuration. + verbose (bool): If True, prints selected device information. + + Attributes: + model (torch.nn.Module): The SAM model allocated to the chosen device for inference. + device (torch.device): The device to which the model and tensors are allocated. + mean (torch.Tensor): The mean values for image normalization. + std (torch.Tensor): The standard deviation values for image normalization. + """ + device = select_device(self.args.device, verbose=verbose) + if model is None: + model = build_sam(self.args.model) + model.eval() + self.model = model.to(device) + self.device = device + self.mean = torch.tensor([123.675, 116.28, 103.53]).view(-1, 1, 1).to(device) + self.std = torch.tensor([58.395, 57.12, 57.375]).view(-1, 1, 1).to(device) + + # Ultralytics compatibility settings + self.model.pt = False + self.model.triton = False + self.model.stride = 32 + self.model.fp16 = False + self.done_warmup = True + + def postprocess(self, preds, img, orig_imgs): + """ + Post-processes SAM's inference outputs to generate object detection masks and bounding boxes. + + The method scales masks and boxes to the original image size and applies a threshold to the mask predictions. The + SAM model uses advanced architecture and promptable segmentation tasks to achieve real-time performance. + + Args: + preds (tuple): The output from SAM model inference, containing masks, scores, and optional bounding boxes. + img (torch.Tensor): The processed input image tensor. + orig_imgs (list | torch.Tensor): The original, unprocessed images. + + Returns: + (list): List of Results objects containing detection masks, bounding boxes, and other metadata. + """ + # (N, 1, H, W), (N, 1) + pred_masks, pred_scores = preds[:2] + pred_bboxes = preds[2] if self.segment_all else None + names = dict(enumerate(str(i) for i in range(len(pred_masks)))) + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + + results = [] + for i, masks in enumerate([pred_masks]): + orig_img = orig_imgs[i] + if pred_bboxes is not None: + pred_bboxes = ops.scale_boxes(img.shape[2:], pred_bboxes.float(), orig_img.shape, padding=False) + cls = torch.arange(len(pred_masks), dtype=torch.int32, device=pred_masks.device) + pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1) + + masks = ops.scale_masks(masks[None].float(), orig_img.shape[:2], padding=False)[0] + masks = masks > self.model.mask_threshold # to bool + img_path = self.batch[0][i] + results.append(Results(orig_img, path=img_path, names=names, masks=masks, boxes=pred_bboxes)) + # Reset segment-all mode. + self.segment_all = False + return results + + def setup_source(self, source): + """ + Sets up the data source for inference. + + This method configures the data source from which images will be fetched for inference. The source could be a + directory, a video file, or other types of image data sources. + + Args: + source (str | Path): The path to the image data source for inference. + """ + if source is not None: + super().setup_source(source) + + def set_image(self, image): + """ + Preprocesses and sets a single image for inference. + + This function sets up the model if not already initialized, configures the data source to the specified image, + and preprocesses the image for feature extraction. Only one image can be set at a time. + + Args: + image (str | np.ndarray): Image file path as a string, or a np.ndarray image read by cv2. + + Raises: + AssertionError: If more than one image is set. + """ + if self.model is None: + model = build_sam(self.args.model) + self.setup_model(model) + self.setup_source(image) + assert len(self.dataset) == 1, '`set_image` only supports setting one image!' + for batch in self.dataset: + im = self.preprocess(batch[1]) + self.features = self.model.image_encoder(im) + self.im = im + break + + def set_prompts(self, prompts): + """Set prompts in advance.""" + self.prompts = prompts + + def reset_image(self): + """Resets the image and its features to None.""" + self.im = None + self.features = None + + @staticmethod + def remove_small_regions(masks, min_area=0, nms_thresh=0.7): + """ + Perform post-processing on segmentation masks generated by the Segment Anything Model (SAM). Specifically, this + function removes small disconnected regions and holes from the input masks, and then performs Non-Maximum + Suppression (NMS) to eliminate any newly created duplicate boxes. + + Args: + masks (torch.Tensor): A tensor containing the masks to be processed. Shape should be (N, H, W), where N is + the number of masks, H is height, and W is width. + min_area (int): The minimum area below which disconnected regions and holes will be removed. Defaults to 0. + nms_thresh (float): The IoU threshold for the NMS algorithm. Defaults to 0.7. + + Returns: + (tuple([torch.Tensor, List[int]])): + - new_masks (torch.Tensor): The processed masks with small regions removed. Shape is (N, H, W). + - keep (List[int]): The indices of the remaining masks post-NMS, which can be used to filter the boxes. + """ + if len(masks) == 0: + return masks + + # Filter small disconnected regions and holes + new_masks = [] + scores = [] + for mask in masks: + mask = mask.cpu().numpy().astype(np.uint8) + mask, changed = remove_small_regions(mask, min_area, mode='holes') + unchanged = not changed + mask, changed = remove_small_regions(mask, min_area, mode='islands') + unchanged = unchanged and not changed + + new_masks.append(torch.as_tensor(mask).unsqueeze(0)) + # Give score=0 to changed masks and 1 to unchanged masks so NMS prefers masks not needing postprocessing + scores.append(float(unchanged)) + + # Recalculate boxes and remove any new duplicates + new_masks = torch.cat(new_masks, dim=0) + boxes = batched_mask_to_box(new_masks) + keep = torchvision.ops.nms(boxes.float(), torch.as_tensor(scores), nms_thresh) + + return new_masks[keep].to(device=masks.device, dtype=masks.dtype), keep diff --git a/ultralytics/models/utils/__init__.py b/ultralytics/models/utils/__init__.py new file mode 100644 index 0000000..9e68dc1 --- /dev/null +++ b/ultralytics/models/utils/__init__.py @@ -0,0 +1 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license diff --git a/ultralytics/models/utils/loss.py b/ultralytics/models/utils/loss.py new file mode 100644 index 0000000..abb5495 --- /dev/null +++ b/ultralytics/models/utils/loss.py @@ -0,0 +1,342 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ultralytics.utils.loss import FocalLoss, VarifocalLoss +from ultralytics.utils.metrics import bbox_iou + +from .ops import HungarianMatcher + + +class DETRLoss(nn.Module): + """ + DETR (DEtection TRansformer) Loss class. This class calculates and returns the different loss components for the + DETR object detection model. It computes classification loss, bounding box loss, GIoU loss, and optionally auxiliary + losses. + + Attributes: + nc (int): The number of classes. + loss_gain (dict): Coefficients for different loss components. + aux_loss (bool): Whether to compute auxiliary losses. + use_fl (bool): Use FocalLoss or not. + use_vfl (bool): Use VarifocalLoss or not. + use_uni_match (bool): Whether to use a fixed layer to assign labels for the auxiliary branch. + uni_match_ind (int): The fixed indices of a layer to use if `use_uni_match` is True. + matcher (HungarianMatcher): Object to compute matching cost and indices. + fl (FocalLoss or None): Focal Loss object if `use_fl` is True, otherwise None. + vfl (VarifocalLoss or None): Varifocal Loss object if `use_vfl` is True, otherwise None. + device (torch.device): Device on which tensors are stored. + """ + + def __init__(self, + nc=80, + loss_gain=None, + aux_loss=True, + use_fl=True, + use_vfl=False, + use_uni_match=False, + uni_match_ind=0): + """ + DETR loss function. + + Args: + nc (int): The number of classes. + loss_gain (dict): The coefficient of loss. + aux_loss (bool): If 'aux_loss = True', loss at each decoder layer are to be used. + use_vfl (bool): Use VarifocalLoss or not. + use_uni_match (bool): Whether to use a fixed layer to assign labels for auxiliary branch. + uni_match_ind (int): The fixed indices of a layer. + """ + super().__init__() + + if loss_gain is None: + loss_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'no_object': 0.1, 'mask': 1, 'dice': 1} + self.nc = nc + self.matcher = HungarianMatcher(cost_gain={'class': 2, 'bbox': 5, 'giou': 2}) + self.loss_gain = loss_gain + self.aux_loss = aux_loss + self.fl = FocalLoss() if use_fl else None + self.vfl = VarifocalLoss() if use_vfl else None + + self.use_uni_match = use_uni_match + self.uni_match_ind = uni_match_ind + self.device = None + + def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''): + """Computes the classification loss based on predictions, target values, and ground truth scores.""" + # Logits: [b, query, num_classes], gt_class: list[[n, 1]] + name_class = f'loss_class{postfix}' + bs, nq = pred_scores.shape[:2] + # one_hot = F.one_hot(targets, self.nc + 1)[..., :-1] # (bs, num_queries, num_classes) + one_hot = torch.zeros((bs, nq, self.nc + 1), dtype=torch.int64, device=targets.device) + one_hot.scatter_(2, targets.unsqueeze(-1), 1) + one_hot = one_hot[..., :-1] + gt_scores = gt_scores.view(bs, nq, 1) * one_hot + + if self.fl: + if num_gts and self.vfl: + loss_cls = self.vfl(pred_scores, gt_scores, one_hot) + else: + loss_cls = self.fl(pred_scores, one_hot.float()) + loss_cls /= max(num_gts, 1) / nq + else: + loss_cls = nn.BCEWithLogitsLoss(reduction='none')(pred_scores, gt_scores).mean(1).sum() # YOLO CLS loss + + return {name_class: loss_cls.squeeze() * self.loss_gain['class']} + + def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''): + """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding + boxes. + """ + # Boxes: [b, query, 4], gt_bbox: list[[n, 4]] + name_bbox = f'loss_bbox{postfix}' + name_giou = f'loss_giou{postfix}' + + loss = {} + if len(gt_bboxes) == 0: + loss[name_bbox] = torch.tensor(0., device=self.device) + loss[name_giou] = torch.tensor(0., device=self.device) + return loss + + loss[name_bbox] = self.loss_gain['bbox'] * F.l1_loss(pred_bboxes, gt_bboxes, reduction='sum') / len(gt_bboxes) + loss[name_giou] = 1.0 - bbox_iou(pred_bboxes, gt_bboxes, xywh=True, GIoU=True) + loss[name_giou] = loss[name_giou].sum() / len(gt_bboxes) + loss[name_giou] = self.loss_gain['giou'] * loss[name_giou] + return {k: v.squeeze() for k, v in loss.items()} + + # This function is for future RT-DETR Segment models + # def _get_loss_mask(self, masks, gt_mask, match_indices, postfix=''): + # # masks: [b, query, h, w], gt_mask: list[[n, H, W]] + # name_mask = f'loss_mask{postfix}' + # name_dice = f'loss_dice{postfix}' + # + # loss = {} + # if sum(len(a) for a in gt_mask) == 0: + # loss[name_mask] = torch.tensor(0., device=self.device) + # loss[name_dice] = torch.tensor(0., device=self.device) + # return loss + # + # num_gts = len(gt_mask) + # src_masks, target_masks = self._get_assigned_bboxes(masks, gt_mask, match_indices) + # src_masks = F.interpolate(src_masks.unsqueeze(0), size=target_masks.shape[-2:], mode='bilinear')[0] + # # TODO: torch does not have `sigmoid_focal_loss`, but it's not urgent since we don't use mask branch for now. + # loss[name_mask] = self.loss_gain['mask'] * F.sigmoid_focal_loss(src_masks, target_masks, + # torch.tensor([num_gts], dtype=torch.float32)) + # loss[name_dice] = self.loss_gain['dice'] * self._dice_loss(src_masks, target_masks, num_gts) + # return loss + + # This function is for future RT-DETR Segment models + # @staticmethod + # def _dice_loss(inputs, targets, num_gts): + # inputs = F.sigmoid(inputs).flatten(1) + # targets = targets.flatten(1) + # numerator = 2 * (inputs * targets).sum(1) + # denominator = inputs.sum(-1) + targets.sum(-1) + # loss = 1 - (numerator + 1) / (denominator + 1) + # return loss.sum() / num_gts + + def _get_loss_aux(self, + pred_bboxes, + pred_scores, + gt_bboxes, + gt_cls, + gt_groups, + match_indices=None, + postfix='', + masks=None, + gt_mask=None): + """Get auxiliary losses.""" + # NOTE: loss class, bbox, giou, mask, dice + loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device) + if match_indices is None and self.use_uni_match: + match_indices = self.matcher(pred_bboxes[self.uni_match_ind], + pred_scores[self.uni_match_ind], + gt_bboxes, + gt_cls, + gt_groups, + masks=masks[self.uni_match_ind] if masks is not None else None, + gt_mask=gt_mask) + for i, (aux_bboxes, aux_scores) in enumerate(zip(pred_bboxes, pred_scores)): + aux_masks = masks[i] if masks is not None else None + loss_ = self._get_loss(aux_bboxes, + aux_scores, + gt_bboxes, + gt_cls, + gt_groups, + masks=aux_masks, + gt_mask=gt_mask, + postfix=postfix, + match_indices=match_indices) + loss[0] += loss_[f'loss_class{postfix}'] + loss[1] += loss_[f'loss_bbox{postfix}'] + loss[2] += loss_[f'loss_giou{postfix}'] + # if masks is not None and gt_mask is not None: + # loss_ = self._get_loss_mask(aux_masks, gt_mask, match_indices, postfix) + # loss[3] += loss_[f'loss_mask{postfix}'] + # loss[4] += loss_[f'loss_dice{postfix}'] + + loss = { + f'loss_class_aux{postfix}': loss[0], + f'loss_bbox_aux{postfix}': loss[1], + f'loss_giou_aux{postfix}': loss[2]} + # if masks is not None and gt_mask is not None: + # loss[f'loss_mask_aux{postfix}'] = loss[3] + # loss[f'loss_dice_aux{postfix}'] = loss[4] + return loss + + @staticmethod + def _get_index(match_indices): + """Returns batch indices, source indices, and destination indices from provided match indices.""" + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)]) + src_idx = torch.cat([src for (src, _) in match_indices]) + dst_idx = torch.cat([dst for (_, dst) in match_indices]) + return (batch_idx, src_idx), dst_idx + + def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices): + """Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices.""" + pred_assigned = torch.cat([ + t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device) + for t, (I, _) in zip(pred_bboxes, match_indices)]) + gt_assigned = torch.cat([ + t[J] if len(J) > 0 else torch.zeros(0, t.shape[-1], device=self.device) + for t, (_, J) in zip(gt_bboxes, match_indices)]) + return pred_assigned, gt_assigned + + def _get_loss(self, + pred_bboxes, + pred_scores, + gt_bboxes, + gt_cls, + gt_groups, + masks=None, + gt_mask=None, + postfix='', + match_indices=None): + """Get losses.""" + if match_indices is None: + match_indices = self.matcher(pred_bboxes, + pred_scores, + gt_bboxes, + gt_cls, + gt_groups, + masks=masks, + gt_mask=gt_mask) + + idx, gt_idx = self._get_index(match_indices) + pred_bboxes, gt_bboxes = pred_bboxes[idx], gt_bboxes[gt_idx] + + bs, nq = pred_scores.shape[:2] + targets = torch.full((bs, nq), self.nc, device=pred_scores.device, dtype=gt_cls.dtype) + targets[idx] = gt_cls[gt_idx] + + gt_scores = torch.zeros([bs, nq], device=pred_scores.device) + if len(gt_bboxes): + gt_scores[idx] = bbox_iou(pred_bboxes.detach(), gt_bboxes, xywh=True).squeeze(-1) + + loss = {} + loss.update(self._get_loss_class(pred_scores, targets, gt_scores, len(gt_bboxes), postfix)) + loss.update(self._get_loss_bbox(pred_bboxes, gt_bboxes, postfix)) + # if masks is not None and gt_mask is not None: + # loss.update(self._get_loss_mask(masks, gt_mask, match_indices, postfix)) + return loss + + def forward(self, pred_bboxes, pred_scores, batch, postfix='', **kwargs): + """ + Args: + pred_bboxes (torch.Tensor): [l, b, query, 4] + pred_scores (torch.Tensor): [l, b, query, num_classes] + batch (dict): A dict includes: + gt_cls (torch.Tensor) with shape [num_gts, ], + gt_bboxes (torch.Tensor): [num_gts, 4], + gt_groups (List(int)): a list of batch size length includes the number of gts of each image. + postfix (str): postfix of loss name. + """ + self.device = pred_bboxes.device + match_indices = kwargs.get('match_indices', None) + gt_cls, gt_bboxes, gt_groups = batch['cls'], batch['bboxes'], batch['gt_groups'] + + total_loss = self._get_loss(pred_bboxes[-1], + pred_scores[-1], + gt_bboxes, + gt_cls, + gt_groups, + postfix=postfix, + match_indices=match_indices) + + if self.aux_loss: + total_loss.update( + self._get_loss_aux(pred_bboxes[:-1], pred_scores[:-1], gt_bboxes, gt_cls, gt_groups, match_indices, + postfix)) + + return total_loss + + +class RTDETRDetectionLoss(DETRLoss): + """ + Real-Time DeepTracker (RT-DETR) Detection Loss class that extends the DETRLoss. + + This class computes the detection loss for the RT-DETR model, which includes the standard detection loss as well as + an additional denoising training loss when provided with denoising metadata. + """ + + def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_meta=None): + """ + Forward pass to compute the detection loss. + + Args: + preds (tuple): Predicted bounding boxes and scores. + batch (dict): Batch data containing ground truth information. + dn_bboxes (torch.Tensor, optional): Denoising bounding boxes. Default is None. + dn_scores (torch.Tensor, optional): Denoising scores. Default is None. + dn_meta (dict, optional): Metadata for denoising. Default is None. + + Returns: + (dict): Dictionary containing the total loss and, if applicable, the denoising loss. + """ + pred_bboxes, pred_scores = preds + total_loss = super().forward(pred_bboxes, pred_scores, batch) + + # Check for denoising metadata to compute denoising training loss + if dn_meta is not None: + dn_pos_idx, dn_num_group = dn_meta['dn_pos_idx'], dn_meta['dn_num_group'] + assert len(batch['gt_groups']) == len(dn_pos_idx) + + # Get the match indices for denoising + match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch['gt_groups']) + + # Compute the denoising training loss + dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix='_dn', match_indices=match_indices) + total_loss.update(dn_loss) + else: + # If no denoising metadata is provided, set denoising loss to zero + total_loss.update({f'{k}_dn': torch.tensor(0., device=self.device) for k in total_loss.keys()}) + + return total_loss + + @staticmethod + def get_dn_match_indices(dn_pos_idx, dn_num_group, gt_groups): + """ + Get the match indices for denoising. + + Args: + dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising. + dn_num_group (int): Number of denoising groups. + gt_groups (List[int]): List of integers representing the number of ground truths for each image. + + Returns: + (List[tuple]): List of tuples containing matched indices for denoising. + """ + dn_match_indices = [] + idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0) + for i, num_gt in enumerate(gt_groups): + if num_gt > 0: + gt_idx = torch.arange(end=num_gt, dtype=torch.long) + idx_groups[i] + gt_idx = gt_idx.repeat(dn_num_group) + assert len(dn_pos_idx[i]) == len(gt_idx), 'Expected the same length, ' + f'but got {len(dn_pos_idx[i])} and {len(gt_idx)} respectively.' + dn_match_indices.append((dn_pos_idx[i], gt_idx)) + else: + dn_match_indices.append((torch.zeros([0], dtype=torch.long), torch.zeros([0], dtype=torch.long))) + return dn_match_indices diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py new file mode 100644 index 0000000..902756d --- /dev/null +++ b/ultralytics/models/utils/ops.py @@ -0,0 +1,260 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch +import torch.nn as nn +import torch.nn.functional as F +from scipy.optimize import linear_sum_assignment + +from ultralytics.utils.metrics import bbox_iou +from ultralytics.utils.ops import xywh2xyxy, xyxy2xywh + + +class HungarianMatcher(nn.Module): + """ + A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an + end-to-end fashion. + + HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost + function that considers classification scores, bounding box coordinates, and optionally, mask predictions. + + Attributes: + cost_gain (dict): Dictionary of cost coefficients: 'class', 'bbox', 'giou', 'mask', and 'dice'. + use_fl (bool): Indicates whether to use Focal Loss for the classification cost calculation. + with_mask (bool): Indicates whether the model makes mask predictions. + num_sample_points (int): The number of sample points used in mask cost calculation. + alpha (float): The alpha factor in Focal Loss calculation. + gamma (float): The gamma factor in Focal Loss calculation. + + Methods: + forward(pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None): Computes the + assignment between predictions and ground truths for a batch. + _cost_mask(bs, num_gts, masks=None, gt_mask=None): Computes the mask cost and dice cost if masks are predicted. + """ + + def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0): + """Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha + gamma factors. + """ + super().__init__() + if cost_gain is None: + cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1} + self.cost_gain = cost_gain + self.use_fl = use_fl + self.with_mask = with_mask + self.num_sample_points = num_sample_points + self.alpha = alpha + self.gamma = gamma + + def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None): + """ + Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth + (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching between + predictions and ground truth based on these costs. + + Args: + pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4]. + pred_scores (Tensor): Predicted scores with shape [batch_size, num_queries, num_classes]. + gt_cls (torch.Tensor): Ground truth classes with shape [num_gts, ]. + gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape [num_gts, 4]. + gt_groups (List[int]): List of length equal to batch size, containing the number of ground truths for + each image. + masks (Tensor, optional): Predicted masks with shape [batch_size, num_queries, height, width]. + Defaults to None. + gt_mask (List[Tensor], optional): List of ground truth masks, each with shape [num_masks, Height, Width]. + Defaults to None. + + Returns: + (List[Tuple[Tensor, Tensor]]): A list of size batch_size, each element is a tuple (index_i, index_j), where: + - index_i is the tensor of indices of the selected predictions (in order) + - index_j is the tensor of indices of the corresponding selected ground truth targets (in order) + For each batch element, it holds: + len(index_i) = len(index_j) = min(num_queries, num_target_boxes) + """ + + bs, nq, nc = pred_scores.shape + + if sum(gt_groups) == 0: + return [(torch.tensor([], dtype=torch.long), torch.tensor([], dtype=torch.long)) for _ in range(bs)] + + # We flatten to compute the cost matrices in a batch + # [batch_size * num_queries, num_classes] + pred_scores = pred_scores.detach().view(-1, nc) + pred_scores = F.sigmoid(pred_scores) if self.use_fl else F.softmax(pred_scores, dim=-1) + # [batch_size * num_queries, 4] + pred_bboxes = pred_bboxes.detach().view(-1, 4) + + # Compute the classification cost + pred_scores = pred_scores[:, gt_cls] + if self.use_fl: + neg_cost_class = (1 - self.alpha) * (pred_scores ** self.gamma) * (-(1 - pred_scores + 1e-8).log()) + pos_cost_class = self.alpha * ((1 - pred_scores) ** self.gamma) * (-(pred_scores + 1e-8).log()) + cost_class = pos_cost_class - neg_cost_class + else: + cost_class = -pred_scores + + # Compute the L1 cost between boxes + cost_bbox = (pred_bboxes.unsqueeze(1) - gt_bboxes.unsqueeze(0)).abs().sum(-1) # (bs*num_queries, num_gt) + + # Compute the GIoU cost between boxes, (bs*num_queries, num_gt) + cost_giou = 1.0 - bbox_iou(pred_bboxes.unsqueeze(1), gt_bboxes.unsqueeze(0), xywh=True, GIoU=True).squeeze(-1) + + # Final cost matrix + C = self.cost_gain['class'] * cost_class + \ + self.cost_gain['bbox'] * cost_bbox + \ + self.cost_gain['giou'] * cost_giou + # Compute the mask cost and dice cost + if self.with_mask: + C += self._cost_mask(bs, gt_groups, masks, gt_mask) + + # Set invalid values (NaNs and infinities) to 0 (fixes ValueError: matrix contains invalid numeric entries) + C[C.isnan() | C.isinf()] = 0.0 + + C = C.view(bs, nq, -1).cpu() + indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(gt_groups, -1))] + gt_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0) + # (idx for queries, idx for gt) + return [(torch.tensor(i, dtype=torch.long), torch.tensor(j, dtype=torch.long) + gt_groups[k]) + for k, (i, j) in enumerate(indices)] + + # This function is for future RT-DETR Segment models + # def _cost_mask(self, bs, num_gts, masks=None, gt_mask=None): + # assert masks is not None and gt_mask is not None, 'Make sure the input has `mask` and `gt_mask`' + # # all masks share the same set of points for efficient matching + # sample_points = torch.rand([bs, 1, self.num_sample_points, 2]) + # sample_points = 2.0 * sample_points - 1.0 + # + # out_mask = F.grid_sample(masks.detach(), sample_points, align_corners=False).squeeze(-2) + # out_mask = out_mask.flatten(0, 1) + # + # tgt_mask = torch.cat(gt_mask).unsqueeze(1) + # sample_points = torch.cat([a.repeat(b, 1, 1, 1) for a, b in zip(sample_points, num_gts) if b > 0]) + # tgt_mask = F.grid_sample(tgt_mask, sample_points, align_corners=False).squeeze([1, 2]) + # + # with torch.cuda.amp.autocast(False): + # # binary cross entropy cost + # pos_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.ones_like(out_mask), reduction='none') + # neg_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.zeros_like(out_mask), reduction='none') + # cost_mask = torch.matmul(pos_cost_mask, tgt_mask.T) + torch.matmul(neg_cost_mask, 1 - tgt_mask.T) + # cost_mask /= self.num_sample_points + # + # # dice cost + # out_mask = F.sigmoid(out_mask) + # numerator = 2 * torch.matmul(out_mask, tgt_mask.T) + # denominator = out_mask.sum(-1, keepdim=True) + tgt_mask.sum(-1).unsqueeze(0) + # cost_dice = 1 - (numerator + 1) / (denominator + 1) + # + # C = self.cost_gain['mask'] * cost_mask + self.cost_gain['dice'] * cost_dice + # return C + + +def get_cdn_group(batch, + num_classes, + num_queries, + class_embed, + num_dn=100, + cls_noise_ratio=0.5, + box_noise_scale=1.0, + training=False): + """ + Get contrastive denoising training group. This function creates a contrastive denoising training group with positive + and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates, + and returns the modified labels, bounding boxes, attention mask and meta information. + + Args: + batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes' + (torch.Tensor with shape [num_gts, 4]), 'gt_groups' (List(int)) which is a list of batch size length + indicating the number of gts of each image. + num_classes (int): Number of classes. + num_queries (int): Number of queries. + class_embed (torch.Tensor): Embedding weights to map class labels to embedding space. + num_dn (int, optional): Number of denoising. Defaults to 100. + cls_noise_ratio (float, optional): Noise ratio for class labels. Defaults to 0.5. + box_noise_scale (float, optional): Noise scale for bounding box coordinates. Defaults to 1.0. + training (bool, optional): If it's in training mode. Defaults to False. + + Returns: + (Tuple[Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Dict]]): The modified class embeddings, + bounding boxes, attention mask and meta information for denoising. If not in training mode or 'num_dn' + is less than or equal to 0, the function returns None for all elements in the tuple. + """ + + if (not training) or num_dn <= 0: + return None, None, None, None + gt_groups = batch['gt_groups'] + total_num = sum(gt_groups) + max_nums = max(gt_groups) + if max_nums == 0: + return None, None, None, None + + num_group = num_dn // max_nums + num_group = 1 if num_group == 0 else num_group + # Pad gt to max_num of a batch + bs = len(gt_groups) + gt_cls = batch['cls'] # (bs*num, ) + gt_bbox = batch['bboxes'] # bs*num, 4 + b_idx = batch['batch_idx'] + + # Each group has positive and negative queries. + dn_cls = gt_cls.repeat(2 * num_group) # (2*num_group*bs*num, ) + dn_bbox = gt_bbox.repeat(2 * num_group, 1) # 2*num_group*bs*num, 4 + dn_b_idx = b_idx.repeat(2 * num_group).view(-1) # (2*num_group*bs*num, ) + + # Positive and negative mask + # (bs*num*num_group, ), the second total_num*num_group part as negative samples + neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num + + if cls_noise_ratio > 0: + # Half of bbox prob + mask = torch.rand(dn_cls.shape) < (cls_noise_ratio * 0.5) + idx = torch.nonzero(mask).squeeze(-1) + # Randomly put a new one here + new_label = torch.randint_like(idx, 0, num_classes, dtype=dn_cls.dtype, device=dn_cls.device) + dn_cls[idx] = new_label + + if box_noise_scale > 0: + known_bbox = xywh2xyxy(dn_bbox) + + diff = (dn_bbox[..., 2:] * 0.5).repeat(1, 2) * box_noise_scale # 2*num_group*bs*num, 4 + + rand_sign = torch.randint_like(dn_bbox, 0, 2) * 2.0 - 1.0 + rand_part = torch.rand_like(dn_bbox) + rand_part[neg_idx] += 1.0 + rand_part *= rand_sign + known_bbox += rand_part * diff + known_bbox.clip_(min=0.0, max=1.0) + dn_bbox = xyxy2xywh(known_bbox) + dn_bbox = torch.logit(dn_bbox, eps=1e-6) # inverse sigmoid + + num_dn = int(max_nums * 2 * num_group) # total denoising queries + # class_embed = torch.cat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=class_embed.device)]) + dn_cls_embed = class_embed[dn_cls] # bs*num * 2 * num_group, 256 + padding_cls = torch.zeros(bs, num_dn, dn_cls_embed.shape[-1], device=gt_cls.device) + padding_bbox = torch.zeros(bs, num_dn, 4, device=gt_bbox.device) + + map_indices = torch.cat([torch.tensor(range(num), dtype=torch.long) for num in gt_groups]) + pos_idx = torch.stack([map_indices + max_nums * i for i in range(num_group)], dim=0) + + map_indices = torch.cat([map_indices + max_nums * i for i in range(2 * num_group)]) + padding_cls[(dn_b_idx, map_indices)] = dn_cls_embed + padding_bbox[(dn_b_idx, map_indices)] = dn_bbox + + tgt_size = num_dn + num_queries + attn_mask = torch.zeros([tgt_size, tgt_size], dtype=torch.bool) + # Match query cannot see the reconstruct + attn_mask[num_dn:, :num_dn] = True + # Reconstruct cannot see each other + for i in range(num_group): + if i == 0: + attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True + if i == num_group - 1: + attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), :max_nums * i * 2] = True + else: + attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True + attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), :max_nums * 2 * i] = True + dn_meta = { + 'dn_pos_idx': [p.reshape(-1) for p in pos_idx.cpu().split(list(gt_groups), dim=1)], + 'dn_num_group': num_group, + 'dn_num_split': [num_dn, num_queries]} + + return padding_cls.to(class_embed.device), padding_bbox.to(class_embed.device), attn_mask.to( + class_embed.device), dn_meta diff --git a/ultralytics/models/yolo/__init__.py b/ultralytics/models/yolo/__init__.py new file mode 100644 index 0000000..c66e376 --- /dev/null +++ b/ultralytics/models/yolo/__init__.py @@ -0,0 +1,7 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.models.yolo import classify, detect, pose, segment + +from .model import YOLO + +__all__ = 'classify', 'segment', 'detect', 'pose', 'YOLO' diff --git a/ultralytics/models/yolo/classify/__init__.py b/ultralytics/models/yolo/classify/__init__.py new file mode 100644 index 0000000..33d72e6 --- /dev/null +++ b/ultralytics/models/yolo/classify/__init__.py @@ -0,0 +1,7 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.models.yolo.classify.predict import ClassificationPredictor +from ultralytics.models.yolo.classify.train import ClassificationTrainer +from ultralytics.models.yolo.classify.val import ClassificationValidator + +__all__ = 'ClassificationPredictor', 'ClassificationTrainer', 'ClassificationValidator' diff --git a/ultralytics/models/yolo/classify/predict.py b/ultralytics/models/yolo/classify/predict.py new file mode 100644 index 0000000..ca463b6 --- /dev/null +++ b/ultralytics/models/yolo/classify/predict.py @@ -0,0 +1,50 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch + +from ultralytics.engine.predictor import BasePredictor +from ultralytics.engine.results import Results +from ultralytics.utils import DEFAULT_CFG, ops + + +class ClassificationPredictor(BasePredictor): + """ + A class extending the BasePredictor class for prediction based on a classification model. + + Notes: + - Torchvision classification models can also be passed to the 'model' argument, i.e. model='resnet18'. + + Example: + ```python + from ultralytics.utils import ASSETS + from ultralytics.models.yolo.classify import ClassificationPredictor + + args = dict(model='yolov8n-cls.pt', source=ASSETS) + predictor = ClassificationPredictor(overrides=args) + predictor.predict_cli() + ``` + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initializes ClassificationPredictor setting the task to 'classify'.""" + super().__init__(cfg, overrides, _callbacks) + self.args.task = 'classify' + + def preprocess(self, img): + """Converts input image to model-compatible data type.""" + if not isinstance(img, torch.Tensor): + img = torch.stack([self.transforms(im) for im in img], dim=0) + img = (img if isinstance(img, torch.Tensor) else torch.from_numpy(img)).to(self.model.device) + return img.half() if self.model.fp16 else img.float() # uint8 to fp16/32 + + def postprocess(self, preds, img, orig_imgs): + """Post-processes predictions to return Results objects.""" + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + + results = [] + for i, pred in enumerate(preds): + orig_img = orig_imgs[i] + img_path = self.batch[0][i] + results.append(Results(orig_img, path=img_path, names=self.model.names, probs=pred)) + return results diff --git a/ultralytics/models/yolo/classify/train.py b/ultralytics/models/yolo/classify/train.py new file mode 100644 index 0000000..c59f285 --- /dev/null +++ b/ultralytics/models/yolo/classify/train.py @@ -0,0 +1,152 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch +import torchvision + +from ultralytics.data import ClassificationDataset, build_dataloader +from ultralytics.engine.trainer import BaseTrainer +from ultralytics.models import yolo +from ultralytics.nn.tasks import ClassificationModel, attempt_load_one_weight +from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK, colorstr +from ultralytics.utils.plotting import plot_images, plot_results +from ultralytics.utils.torch_utils import is_parallel, strip_optimizer, torch_distributed_zero_first + + +class ClassificationTrainer(BaseTrainer): + """ + A class extending the BaseTrainer class for training based on a classification model. + + Notes: + - Torchvision classification models can also be passed to the 'model' argument, i.e. model='resnet18'. + + Example: + ```python + from ultralytics.models.yolo.classify import ClassificationTrainer + + args = dict(model='yolov8n-cls.pt', data='imagenet10', epochs=3) + trainer = ClassificationTrainer(overrides=args) + trainer.train() + ``` + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initialize a ClassificationTrainer object with optional configuration overrides and callbacks.""" + if overrides is None: + overrides = {} + overrides['task'] = 'classify' + if overrides.get('imgsz') is None: + overrides['imgsz'] = 224 + super().__init__(cfg, overrides, _callbacks) + + def set_model_attributes(self): + """Set the YOLO model's class names from the loaded dataset.""" + self.model.names = self.data['names'] + + def get_model(self, cfg=None, weights=None, verbose=True): + """Returns a modified PyTorch model configured for training YOLO.""" + model = ClassificationModel(cfg, nc=self.data['nc'], verbose=verbose and RANK == -1) + if weights: + model.load(weights) + + for m in model.modules(): + if not self.args.pretrained and hasattr(m, 'reset_parameters'): + m.reset_parameters() + if isinstance(m, torch.nn.Dropout) and self.args.dropout: + m.p = self.args.dropout # set dropout + for p in model.parameters(): + p.requires_grad = True # for training + return model + + def setup_model(self): + """Load, create or download model for any task.""" + if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed + return + + model, ckpt = str(self.model), None + # Load a YOLO model locally, from torchvision, or from Ultralytics assets + if model.endswith('.pt'): + self.model, ckpt = attempt_load_one_weight(model, device='cpu') + for p in self.model.parameters(): + p.requires_grad = True # for training + elif model.split('.')[-1] in ('yaml', 'yml'): + self.model = self.get_model(cfg=model) + elif model in torchvision.models.__dict__: + self.model = torchvision.models.__dict__[model](weights='IMAGENET1K_V1' if self.args.pretrained else None) + else: + FileNotFoundError(f'ERROR: model={model} not found locally or online. Please check model name.') + ClassificationModel.reshape_outputs(self.model, self.data['nc']) + + return ckpt + + def build_dataset(self, img_path, mode='train', batch=None): + """Creates a ClassificationDataset instance given an image path, and mode (train/test etc.).""" + return ClassificationDataset(root=img_path, args=self.args, augment=mode == 'train', prefix=mode) + + def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'): + """Returns PyTorch DataLoader with transforms to preprocess images for inference.""" + with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP + dataset = self.build_dataset(dataset_path, mode) + + loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank) + # Attach inference transforms + if mode != 'train': + if is_parallel(self.model): + self.model.module.transforms = loader.dataset.torch_transforms + else: + self.model.transforms = loader.dataset.torch_transforms + return loader + + def preprocess_batch(self, batch): + """Preprocesses a batch of images and classes.""" + batch['img'] = batch['img'].to(self.device) + batch['cls'] = batch['cls'].to(self.device) + return batch + + def progress_string(self): + """Returns a formatted string showing training progress.""" + return ('\n' + '%11s' * (4 + len(self.loss_names))) % \ + ('Epoch', 'GPU_mem', *self.loss_names, 'Instances', 'Size') + + def get_validator(self): + """Returns an instance of ClassificationValidator for validation.""" + self.loss_names = ['loss'] + return yolo.classify.ClassificationValidator(self.test_loader, self.save_dir) + + def label_loss_items(self, loss_items=None, prefix='train'): + """ + Returns a loss dict with labelled training loss items tensor. + + Not needed for classification but necessary for segmentation & detection + """ + keys = [f'{prefix}/{x}' for x in self.loss_names] + if loss_items is None: + return keys + loss_items = [round(float(loss_items), 5)] + return dict(zip(keys, loss_items)) + + def plot_metrics(self): + """Plots metrics from a CSV file.""" + plot_results(file=self.csv, classify=True, on_plot=self.on_plot) # save results.png + + def final_eval(self): + """Evaluate trained model and save validation results.""" + for f in self.last, self.best: + if f.exists(): + strip_optimizer(f) # strip optimizers + if f is self.best: + LOGGER.info(f'\nValidating {f}...') + self.validator.args.data = self.args.data + self.validator.args.plots = self.args.plots + self.metrics = self.validator(model=f) + self.metrics.pop('fitness', None) + self.run_callbacks('on_fit_epoch_end') + LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}") + + def plot_training_samples(self, batch, ni): + """Plots training samples with their annotations.""" + plot_images( + images=batch['img'], + batch_idx=torch.arange(len(batch['img'])), + cls=batch['cls'].view(-1), # warning: use .view(), not .squeeze() for Classify models + fname=self.save_dir / f'train_batch{ni}.jpg', + on_plot=self.on_plot) diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py new file mode 100644 index 0000000..3ebf380 --- /dev/null +++ b/ultralytics/models/yolo/classify/val.py @@ -0,0 +1,111 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch + +from ultralytics.data import ClassificationDataset, build_dataloader +from ultralytics.engine.validator import BaseValidator +from ultralytics.utils import LOGGER +from ultralytics.utils.metrics import ClassifyMetrics, ConfusionMatrix +from ultralytics.utils.plotting import plot_images + + +class ClassificationValidator(BaseValidator): + """ + A class extending the BaseValidator class for validation based on a classification model. + + Notes: + - Torchvision classification models can also be passed to the 'model' argument, i.e. model='resnet18'. + + Example: + ```python + from ultralytics.models.yolo.classify import ClassificationValidator + + args = dict(model='yolov8n-cls.pt', data='imagenet10') + validator = ClassificationValidator(args=args) + validator() + ``` + """ + + def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): + """Initializes ClassificationValidator instance with args, dataloader, save_dir, and progress bar.""" + super().__init__(dataloader, save_dir, pbar, args, _callbacks) + self.targets = None + self.pred = None + self.args.task = 'classify' + self.metrics = ClassifyMetrics() + + def get_desc(self): + """Returns a formatted string summarizing classification metrics.""" + return ('%22s' + '%11s' * 2) % ('classes', 'top1_acc', 'top5_acc') + + def init_metrics(self, model): + """Initialize confusion matrix, class names, and top-1 and top-5 accuracy.""" + self.names = model.names + self.nc = len(model.names) + self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf, task='classify') + self.pred = [] + self.targets = [] + + def preprocess(self, batch): + """Preprocesses input batch and returns it.""" + batch['img'] = batch['img'].to(self.device, non_blocking=True) + batch['img'] = batch['img'].half() if self.args.half else batch['img'].float() + batch['cls'] = batch['cls'].to(self.device) + return batch + + def update_metrics(self, preds, batch): + """Updates running metrics with model predictions and batch targets.""" + n5 = min(len(self.names), 5) + self.pred.append(preds.argsort(1, descending=True)[:, :n5]) + self.targets.append(batch['cls']) + + def finalize_metrics(self, *args, **kwargs): + """Finalizes metrics of the model such as confusion_matrix and speed.""" + self.confusion_matrix.process_cls_preds(self.pred, self.targets) + if self.args.plots: + for normalize in True, False: + self.confusion_matrix.plot(save_dir=self.save_dir, + names=self.names.values(), + normalize=normalize, + on_plot=self.on_plot) + self.metrics.speed = self.speed + self.metrics.confusion_matrix = self.confusion_matrix + self.metrics.save_dir = self.save_dir + + def get_stats(self): + """Returns a dictionary of metrics obtained by processing targets and predictions.""" + self.metrics.process(self.targets, self.pred) + return self.metrics.results_dict + + def build_dataset(self, img_path): + """Creates and returns a ClassificationDataset instance using given image path and preprocessing parameters.""" + return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split) + + def get_dataloader(self, dataset_path, batch_size): + """Builds and returns a data loader for classification tasks with given parameters.""" + dataset = self.build_dataset(dataset_path) + return build_dataloader(dataset, batch_size, self.args.workers, rank=-1) + + def print_results(self): + """Prints evaluation metrics for YOLO object detection model.""" + pf = '%22s' + '%11.3g' * len(self.metrics.keys) # print format + LOGGER.info(pf % ('all', self.metrics.top1, self.metrics.top5)) + + def plot_val_samples(self, batch, ni): + """Plot validation image samples.""" + plot_images( + images=batch['img'], + batch_idx=torch.arange(len(batch['img'])), + cls=batch['cls'].view(-1), # warning: use .view(), not .squeeze() for Classify models + fname=self.save_dir / f'val_batch{ni}_labels.jpg', + names=self.names, + on_plot=self.on_plot) + + def plot_predictions(self, batch, preds, ni): + """Plots predicted bounding boxes on input images and saves the result.""" + plot_images(batch['img'], + batch_idx=torch.arange(len(batch['img'])), + cls=torch.argmax(preds, dim=1), + fname=self.save_dir / f'val_batch{ni}_pred.jpg', + names=self.names, + on_plot=self.on_plot) # pred diff --git a/ultralytics/models/yolo/detect/__init__.py b/ultralytics/models/yolo/detect/__init__.py new file mode 100644 index 0000000..20fc0c4 --- /dev/null +++ b/ultralytics/models/yolo/detect/__init__.py @@ -0,0 +1,7 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .predict import DetectionPredictor +from .train import DetectionTrainer +from .val import DetectionValidator + +__all__ = 'DetectionPredictor', 'DetectionTrainer', 'DetectionValidator' diff --git a/ultralytics/models/yolo/detect/predict.py b/ultralytics/models/yolo/detect/predict.py new file mode 100644 index 0000000..28cbd7c --- /dev/null +++ b/ultralytics/models/yolo/detect/predict.py @@ -0,0 +1,41 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.engine.predictor import BasePredictor +from ultralytics.engine.results import Results +from ultralytics.utils import ops + + +class DetectionPredictor(BasePredictor): + """ + A class extending the BasePredictor class for prediction based on a detection model. + + Example: + ```python + from ultralytics.utils import ASSETS + from ultralytics.models.yolo.detect import DetectionPredictor + + args = dict(model='yolov8n.pt', source=ASSETS) + predictor = DetectionPredictor(overrides=args) + predictor.predict_cli() + ``` + """ + + def postprocess(self, preds, img, orig_imgs): + """Post-processes predictions and returns a list of Results objects.""" + preds = ops.non_max_suppression(preds, + self.args.conf, + self.args.iou, + agnostic=self.args.agnostic_nms, + max_det=self.args.max_det, + classes=self.args.classes) + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + + results = [] + for i, pred in enumerate(preds): + orig_img = orig_imgs[i] + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + img_path = self.batch[0][i] + results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) + return results diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py new file mode 100644 index 0000000..d0028c6 --- /dev/null +++ b/ultralytics/models/yolo/detect/train.py @@ -0,0 +1,117 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from copy import copy + +import numpy as np + +from ultralytics.data import build_dataloader, build_yolo_dataset +from ultralytics.engine.trainer import BaseTrainer +from ultralytics.models import yolo +from ultralytics.nn.tasks import DetectionModel +from ultralytics.utils import LOGGER, RANK +from ultralytics.utils.plotting import plot_images, plot_labels, plot_results +from ultralytics.utils.torch_utils import de_parallel, torch_distributed_zero_first + + +class DetectionTrainer(BaseTrainer): + """ + A class extending the BaseTrainer class for training based on a detection model. + + Example: + ```python + from ultralytics.models.yolo.detect import DetectionTrainer + + args = dict(model='yolov8n.pt', data='coco8.yaml', epochs=3) + trainer = DetectionTrainer(overrides=args) + trainer.train() + ``` + """ + + def build_dataset(self, img_path, mode='train', batch=None): + """ + Build YOLO Dataset. + + Args: + img_path (str): Path to the folder containing images. + mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode. + batch (int, optional): Size of batches, this is for `rect`. Defaults to None. + """ + gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32) + return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == 'val', stride=gs) + + def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'): + """Construct and return dataloader.""" + assert mode in ['train', 'val'] + with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP + dataset = self.build_dataset(dataset_path, mode, batch_size) + shuffle = mode == 'train' + if getattr(dataset, 'rect', False) and shuffle: + LOGGER.warning("WARNING ⚠️ 'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False") + shuffle = False + workers = self.args.workers if mode == 'train' else self.args.workers * 2 + return build_dataloader(dataset, batch_size, workers, shuffle, rank) # return dataloader + + def preprocess_batch(self, batch): + """Preprocesses a batch of images by scaling and converting to float.""" + batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255 + return batch + + def set_model_attributes(self): + """Nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps).""" + # self.args.box *= 3 / nl # scale to layers + # self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers + # self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers + self.model.nc = self.data['nc'] # attach number of classes to model + self.model.names = self.data['names'] # attach class names to model + self.model.args = self.args # attach hyperparameters to model + # TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc + + def get_model(self, cfg=None, weights=None, verbose=True): + """Return a YOLO detection model.""" + model = DetectionModel(cfg, nc=self.data['nc'], verbose=verbose and RANK == -1) + if weights: + model.load(weights) + return model + + def get_validator(self): + """Returns a DetectionValidator for YOLO model validation.""" + self.loss_names = 'box_loss', 'cls_loss', 'dfl_loss' + return yolo.detect.DetectionValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args)) + + def label_loss_items(self, loss_items=None, prefix='train'): + """ + Returns a loss dict with labelled training loss items tensor. + + Not needed for classification but necessary for segmentation & detection + """ + keys = [f'{prefix}/{x}' for x in self.loss_names] + if loss_items is not None: + loss_items = [round(float(x), 5) for x in loss_items] # convert tensors to 5 decimal place floats + return dict(zip(keys, loss_items)) + else: + return keys + + def progress_string(self): + """Returns a formatted string of training progress with epoch, GPU memory, loss, instances and size.""" + return ('\n' + '%11s' * + (4 + len(self.loss_names))) % ('Epoch', 'GPU_mem', *self.loss_names, 'Instances', 'Size') + + def plot_training_samples(self, batch, ni): + """Plots training samples with their annotations.""" + plot_images(images=batch['img'], + batch_idx=batch['batch_idx'], + cls=batch['cls'].squeeze(-1), + bboxes=batch['bboxes'], + paths=batch['im_file'], + fname=self.save_dir / f'train_batch{ni}.jpg', + on_plot=self.on_plot) + + def plot_metrics(self): + """Plots metrics from a CSV file.""" + plot_results(file=self.csv, on_plot=self.on_plot) # save results.png + + def plot_training_labels(self): + """Create a labeled training plot of the YOLO model.""" + boxes = np.concatenate([lb['bboxes'] for lb in self.train_loader.dataset.labels], 0) + cls = np.concatenate([lb['cls'] for lb in self.train_loader.dataset.labels], 0) + plot_labels(boxes, cls.squeeze(), names=self.data['names'], save_dir=self.save_dir, on_plot=self.on_plot) diff --git a/ultralytics/models/yolo/detect/val.py b/ultralytics/models/yolo/detect/val.py new file mode 100644 index 0000000..4d43933 --- /dev/null +++ b/ultralytics/models/yolo/detect/val.py @@ -0,0 +1,266 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import os +from pathlib import Path + +import numpy as np +import torch + +from ultralytics.data import build_dataloader, build_yolo_dataset, converter +from ultralytics.engine.validator import BaseValidator +from ultralytics.utils import LOGGER, ops +from ultralytics.utils.checks import check_requirements +from ultralytics.utils.metrics import ConfusionMatrix, DetMetrics, box_iou +from ultralytics.utils.plotting import output_to_target, plot_images + + +class DetectionValidator(BaseValidator): + """ + A class extending the BaseValidator class for validation based on a detection model. + + Example: + ```python + from ultralytics.models.yolo.detect import DetectionValidator + + args = dict(model='yolov8n.pt', data='coco8.yaml') + validator = DetectionValidator(args=args) + validator() + ``` + """ + + def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): + """Initialize detection model with necessary variables and settings.""" + super().__init__(dataloader, save_dir, pbar, args, _callbacks) + self.nt_per_class = None + self.is_coco = False + self.class_map = None + self.args.task = 'detect' + self.metrics = DetMetrics(save_dir=self.save_dir, on_plot=self.on_plot) + self.iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 + self.niou = self.iouv.numel() + self.lb = [] # for autolabelling + + def preprocess(self, batch): + """Preprocesses batch of images for YOLO training.""" + batch['img'] = batch['img'].to(self.device, non_blocking=True) + batch['img'] = (batch['img'].half() if self.args.half else batch['img'].float()) / 255 + for k in ['batch_idx', 'cls', 'bboxes']: + batch[k] = batch[k].to(self.device) + + if self.args.save_hybrid: + height, width = batch['img'].shape[2:] + nb = len(batch['img']) + bboxes = batch['bboxes'] * torch.tensor((width, height, width, height), device=self.device) + self.lb = [ + torch.cat([batch['cls'][batch['batch_idx'] == i], bboxes[batch['batch_idx'] == i]], dim=-1) + for i in range(nb)] if self.args.save_hybrid else [] # for autolabelling + + return batch + + def init_metrics(self, model): + """Initialize evaluation metrics for YOLO.""" + val = self.data.get(self.args.split, '') # validation path + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO + self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.names = model.names + self.nc = len(model.names) + self.metrics.names = self.names + self.metrics.plot = self.args.plots + self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf) + self.seen = 0 + self.jdict = [] + self.stats = [] + + def get_desc(self): + """Return a formatted string summarizing class metrics of YOLO model.""" + return ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)') + + def postprocess(self, preds): + """Apply Non-maximum suppression to prediction outputs.""" + return ops.non_max_suppression(preds, + self.args.conf, + self.args.iou, + labels=self.lb, + multi_label=True, + agnostic=self.args.single_cls, + max_det=self.args.max_det) + + def update_metrics(self, preds, batch): + """Metrics.""" + for si, pred in enumerate(preds): + idx = batch['batch_idx'] == si + cls = batch['cls'][idx] + bbox = batch['bboxes'][idx] + nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions + shape = batch['ori_shape'][si] + correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init + self.seen += 1 + + if npr == 0: + if nl: + self.stats.append((correct_bboxes, *torch.zeros((2, 0), device=self.device), cls.squeeze(-1))) + if self.args.plots: + self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1)) + continue + + # Predictions + if self.args.single_cls: + pred[:, 5] = 0 + predn = pred.clone() + ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape, + ratio_pad=batch['ratio_pad'][si]) # native-space pred + + # Evaluate + if nl: + height, width = batch['img'].shape[2:] + tbox = ops.xywh2xyxy(bbox) * torch.tensor( + (width, height, width, height), device=self.device) # target boxes + ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape, + ratio_pad=batch['ratio_pad'][si]) # native-space labels + labelsn = torch.cat((cls, tbox), 1) # native-space labels + correct_bboxes = self._process_batch(predn, labelsn) + # TODO: maybe remove these `self.` arguments as they already are member variable + if self.args.plots: + self.confusion_matrix.process_batch(predn, labelsn) + self.stats.append((correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1))) # (conf, pcls, tcls) + + # Save + if self.args.save_json: + self.pred_to_json(predn, batch['im_file'][si]) + if self.args.save_txt: + file = self.save_dir / 'labels' / f'{Path(batch["im_file"][si]).stem}.txt' + self.save_one_txt(predn, self.args.save_conf, shape, file) + + def finalize_metrics(self, *args, **kwargs): + """Set final values for metrics speed and confusion matrix.""" + self.metrics.speed = self.speed + self.metrics.confusion_matrix = self.confusion_matrix + + def get_stats(self): + """Returns metrics statistics and results dictionary.""" + stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*self.stats)] # to numpy + if len(stats) and stats[0].any(): + self.metrics.process(*stats) + self.nt_per_class = np.bincount(stats[-1].astype(int), minlength=self.nc) # number of targets per class + return self.metrics.results_dict + + def print_results(self): + """Prints training/validation set metrics per class.""" + pf = '%22s' + '%11i' * 2 + '%11.3g' * len(self.metrics.keys) # print format + LOGGER.info(pf % ('all', self.seen, self.nt_per_class.sum(), *self.metrics.mean_results())) + if self.nt_per_class.sum() == 0: + LOGGER.warning( + f'WARNING ⚠️ no labels found in {self.args.task} set, can not compute metrics without labels') + + # Print results per class + if self.args.verbose and not self.training and self.nc > 1 and len(self.stats): + for i, c in enumerate(self.metrics.ap_class_index): + LOGGER.info(pf % (self.names[c], self.seen, self.nt_per_class[c], *self.metrics.class_result(i))) + + if self.args.plots: + for normalize in True, False: + self.confusion_matrix.plot(save_dir=self.save_dir, + names=self.names.values(), + normalize=normalize, + on_plot=self.on_plot) + + def _process_batch(self, detections, labels): + """ + Return correct prediction matrix. + + Args: + detections (torch.Tensor): Tensor of shape [N, 6] representing detections. + Each detection is of the format: x1, y1, x2, y2, conf, class. + labels (torch.Tensor): Tensor of shape [M, 5] representing labels. + Each label is of the format: class, x1, y1, x2, y2. + + Returns: + (torch.Tensor): Correct prediction matrix of shape [N, 10] for 10 IoU levels. + """ + iou = box_iou(labels[:, 1:], detections[:, :4]) + return self.match_predictions(detections[:, 5], labels[:, 0], iou) + + def build_dataset(self, img_path, mode='val', batch=None): + """ + Build YOLO Dataset. + + Args: + img_path (str): Path to the folder containing images. + mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode. + batch (int, optional): Size of batches, this is for `rect`. Defaults to None. + """ + return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, stride=self.stride) + + def get_dataloader(self, dataset_path, batch_size): + """Construct and return dataloader.""" + dataset = self.build_dataset(dataset_path, batch=batch_size, mode='val') + return build_dataloader(dataset, batch_size, self.args.workers, shuffle=False, rank=-1) # return dataloader + + def plot_val_samples(self, batch, ni): + """Plot validation image samples.""" + plot_images(batch['img'], + batch['batch_idx'], + batch['cls'].squeeze(-1), + batch['bboxes'], + paths=batch['im_file'], + fname=self.save_dir / f'val_batch{ni}_labels.jpg', + names=self.names, + on_plot=self.on_plot) + + def plot_predictions(self, batch, preds, ni): + """Plots predicted bounding boxes on input images and saves the result.""" + plot_images(batch['img'], + *output_to_target(preds, max_det=self.args.max_det), + paths=batch['im_file'], + fname=self.save_dir / f'val_batch{ni}_pred.jpg', + names=self.names, + on_plot=self.on_plot) # pred + + def save_one_txt(self, predn, save_conf, shape, file): + """Save YOLO detections to a txt file in normalized coordinates in a specific format.""" + gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh + for *xyxy, conf, cls in predn.tolist(): + xywh = (ops.xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + with open(file, 'a') as f: + f.write(('%g ' * len(line)).rstrip() % line + '\n') + + def pred_to_json(self, predn, filename): + """Serialize YOLO predictions to COCO json format.""" + stem = Path(filename).stem + image_id = int(stem) if stem.isnumeric() else stem + box = ops.xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + for p, b in zip(predn.tolist(), box.tolist()): + self.jdict.append({ + 'image_id': image_id, + 'category_id': self.class_map[int(p[5])], + 'bbox': [round(x, 3) for x in b], + 'score': round(p[4], 5)}) + + def eval_json(self, stats): + """Evaluates YOLO output in JSON format and returns performance statistics.""" + if self.args.save_json and self.is_coco and len(self.jdict): + anno_json = self.data['path'] / 'annotations/instances_val2017.json' # annotations + pred_json = self.save_dir / 'predictions.json' # predictions + LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...') + try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb + check_requirements('pycocotools>=2.0.6') + from pycocotools.coco import COCO # noqa + from pycocotools.cocoeval import COCOeval # noqa + + for x in anno_json, pred_json: + assert x.is_file(), f'{x} file not found' + anno = COCO(str(anno_json)) # init annotations api + pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path) + eval = COCOeval(anno, pred, 'bbox') + if self.is_coco: + eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # images to eval + eval.evaluate() + eval.accumulate() + eval.summarize() + stats[self.metrics.keys[-1]], stats[self.metrics.keys[-2]] = eval.stats[:2] # update mAP50-95 and mAP50 + except Exception as e: + LOGGER.warning(f'pycocotools unable to run: {e}') + return stats diff --git a/ultralytics/models/yolo/model.py b/ultralytics/models/yolo/model.py new file mode 100644 index 0000000..ef1b41a --- /dev/null +++ b/ultralytics/models/yolo/model.py @@ -0,0 +1,34 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.engine.model import Model +from ultralytics.models import yolo # noqa +from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel, SegmentationModel + + +class YOLO(Model): + """YOLO (You Only Look Once) object detection model.""" + + @property + def task_map(self): + """Map head to model, trainer, validator, and predictor classes.""" + return { + 'classify': { + 'model': ClassificationModel, + 'trainer': yolo.classify.ClassificationTrainer, + 'validator': yolo.classify.ClassificationValidator, + 'predictor': yolo.classify.ClassificationPredictor, }, + 'detect': { + 'model': DetectionModel, + 'trainer': yolo.detect.DetectionTrainer, + 'validator': yolo.detect.DetectionValidator, + 'predictor': yolo.detect.DetectionPredictor, }, + 'segment': { + 'model': SegmentationModel, + 'trainer': yolo.segment.SegmentationTrainer, + 'validator': yolo.segment.SegmentationValidator, + 'predictor': yolo.segment.SegmentationPredictor, }, + 'pose': { + 'model': PoseModel, + 'trainer': yolo.pose.PoseTrainer, + 'validator': yolo.pose.PoseValidator, + 'predictor': yolo.pose.PosePredictor, }, } diff --git a/ultralytics/models/yolo/pose/__init__.py b/ultralytics/models/yolo/pose/__init__.py new file mode 100644 index 0000000..2a79f0f --- /dev/null +++ b/ultralytics/models/yolo/pose/__init__.py @@ -0,0 +1,7 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .predict import PosePredictor +from .train import PoseTrainer +from .val import PoseValidator + +__all__ = 'PoseTrainer', 'PoseValidator', 'PosePredictor' diff --git a/ultralytics/models/yolo/pose/predict.py b/ultralytics/models/yolo/pose/predict.py new file mode 100644 index 0000000..d00cea0 --- /dev/null +++ b/ultralytics/models/yolo/pose/predict.py @@ -0,0 +1,53 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.engine.results import Results +from ultralytics.models.yolo.detect.predict import DetectionPredictor +from ultralytics.utils import DEFAULT_CFG, LOGGER, ops + + +class PosePredictor(DetectionPredictor): + """ + A class extending the DetectionPredictor class for prediction based on a pose model. + + Example: + ```python + from ultralytics.utils import ASSETS + from ultralytics.models.yolo.pose import PosePredictor + + args = dict(model='yolov8n-pose.pt', source=ASSETS) + predictor = PosePredictor(overrides=args) + predictor.predict_cli() + ``` + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device.""" + super().__init__(cfg, overrides, _callbacks) + self.args.task = 'pose' + if isinstance(self.args.device, str) and self.args.device.lower() == 'mps': + LOGGER.warning("WARNING ⚠️ Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. " + 'See https://github.com/ultralytics/ultralytics/issues/4031.') + + def postprocess(self, preds, img, orig_imgs): + """Return detection results for a given input image or list of images.""" + preds = ops.non_max_suppression(preds, + self.args.conf, + self.args.iou, + agnostic=self.args.agnostic_nms, + max_det=self.args.max_det, + classes=self.args.classes, + nc=len(self.model.names)) + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + + results = [] + for i, pred in enumerate(preds): + orig_img = orig_imgs[i] + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape).round() + pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:] + pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape) + img_path = self.batch[0][i] + results.append( + Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], keypoints=pred_kpts)) + return results diff --git a/ultralytics/models/yolo/pose/train.py b/ultralytics/models/yolo/pose/train.py new file mode 100644 index 0000000..2d4f4e0 --- /dev/null +++ b/ultralytics/models/yolo/pose/train.py @@ -0,0 +1,73 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from copy import copy + +from ultralytics.models import yolo +from ultralytics.nn.tasks import PoseModel +from ultralytics.utils import DEFAULT_CFG, LOGGER +from ultralytics.utils.plotting import plot_images, plot_results + + +class PoseTrainer(yolo.detect.DetectionTrainer): + """ + A class extending the DetectionTrainer class for training based on a pose model. + + Example: + ```python + from ultralytics.models.yolo.pose import PoseTrainer + + args = dict(model='yolov8n-pose.pt', data='coco8-pose.yaml', epochs=3) + trainer = PoseTrainer(overrides=args) + trainer.train() + ``` + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initialize a PoseTrainer object with specified configurations and overrides.""" + if overrides is None: + overrides = {} + overrides['task'] = 'pose' + super().__init__(cfg, overrides, _callbacks) + + if isinstance(self.args.device, str) and self.args.device.lower() == 'mps': + LOGGER.warning("WARNING ⚠️ Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. " + 'See https://github.com/ultralytics/ultralytics/issues/4031.') + + def get_model(self, cfg=None, weights=None, verbose=True): + """Get pose estimation model with specified configuration and weights.""" + model = PoseModel(cfg, ch=3, nc=self.data['nc'], data_kpt_shape=self.data['kpt_shape'], verbose=verbose) + if weights: + model.load(weights) + + return model + + def set_model_attributes(self): + """Sets keypoints shape attribute of PoseModel.""" + super().set_model_attributes() + self.model.kpt_shape = self.data['kpt_shape'] + + def get_validator(self): + """Returns an instance of the PoseValidator class for validation.""" + self.loss_names = 'box_loss', 'pose_loss', 'kobj_loss', 'cls_loss', 'dfl_loss' + return yolo.pose.PoseValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args)) + + def plot_training_samples(self, batch, ni): + """Plot a batch of training samples with annotated class labels, bounding boxes, and keypoints.""" + images = batch['img'] + kpts = batch['keypoints'] + cls = batch['cls'].squeeze(-1) + bboxes = batch['bboxes'] + paths = batch['im_file'] + batch_idx = batch['batch_idx'] + plot_images(images, + batch_idx, + cls, + bboxes, + kpts=kpts, + paths=paths, + fname=self.save_dir / f'train_batch{ni}.jpg', + on_plot=self.on_plot) + + def plot_metrics(self): + """Plots training/val metrics.""" + plot_results(file=self.csv, pose=True, on_plot=self.on_plot) # save results.png diff --git a/ultralytics/models/yolo/pose/val.py b/ultralytics/models/yolo/pose/val.py new file mode 100644 index 0000000..b8ebf57 --- /dev/null +++ b/ultralytics/models/yolo/pose/val.py @@ -0,0 +1,215 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from pathlib import Path + +import numpy as np +import torch + +from ultralytics.models.yolo.detect import DetectionValidator +from ultralytics.utils import LOGGER, ops +from ultralytics.utils.checks import check_requirements +from ultralytics.utils.metrics import OKS_SIGMA, PoseMetrics, box_iou, kpt_iou +from ultralytics.utils.plotting import output_to_target, plot_images + + +class PoseValidator(DetectionValidator): + """ + A class extending the DetectionValidator class for validation based on a pose model. + + Example: + ```python + from ultralytics.models.yolo.pose import PoseValidator + + args = dict(model='yolov8n-pose.pt', data='coco8-pose.yaml') + validator = PoseValidator(args=args) + validator() + ``` + """ + + def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): + """Initialize a 'PoseValidator' object with custom parameters and assigned attributes.""" + super().__init__(dataloader, save_dir, pbar, args, _callbacks) + self.sigma = None + self.kpt_shape = None + self.args.task = 'pose' + self.metrics = PoseMetrics(save_dir=self.save_dir, on_plot=self.on_plot) + if isinstance(self.args.device, str) and self.args.device.lower() == 'mps': + LOGGER.warning("WARNING ⚠️ Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. " + 'See https://github.com/ultralytics/ultralytics/issues/4031.') + + def preprocess(self, batch): + """Preprocesses the batch by converting the 'keypoints' data into a float and moving it to the device.""" + batch = super().preprocess(batch) + batch['keypoints'] = batch['keypoints'].to(self.device).float() + return batch + + def get_desc(self): + """Returns description of evaluation metrics in string format.""" + return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Pose(P', + 'R', 'mAP50', 'mAP50-95)') + + def postprocess(self, preds): + """Apply non-maximum suppression and return detections with high confidence scores.""" + return ops.non_max_suppression(preds, + self.args.conf, + self.args.iou, + labels=self.lb, + multi_label=True, + agnostic=self.args.single_cls, + max_det=self.args.max_det, + nc=self.nc) + + def init_metrics(self, model): + """Initiate pose estimation metrics for YOLO model.""" + super().init_metrics(model) + self.kpt_shape = self.data['kpt_shape'] + is_pose = self.kpt_shape == [17, 3] + nkpt = self.kpt_shape[0] + self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt + + def update_metrics(self, preds, batch): + """Metrics.""" + for si, pred in enumerate(preds): + idx = batch['batch_idx'] == si + cls = batch['cls'][idx] + bbox = batch['bboxes'][idx] + kpts = batch['keypoints'][idx] + nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions + nk = kpts.shape[1] # number of keypoints + shape = batch['ori_shape'][si] + correct_kpts = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init + correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init + self.seen += 1 + + if npr == 0: + if nl: + self.stats.append((correct_bboxes, correct_kpts, *torch.zeros( + (2, 0), device=self.device), cls.squeeze(-1))) + if self.args.plots: + self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1)) + continue + + # Predictions + if self.args.single_cls: + pred[:, 5] = 0 + predn = pred.clone() + ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape, + ratio_pad=batch['ratio_pad'][si]) # native-space pred + pred_kpts = predn[:, 6:].view(npr, nk, -1) + ops.scale_coords(batch['img'][si].shape[1:], pred_kpts, shape, ratio_pad=batch['ratio_pad'][si]) + + # Evaluate + if nl: + height, width = batch['img'].shape[2:] + tbox = ops.xywh2xyxy(bbox) * torch.tensor( + (width, height, width, height), device=self.device) # target boxes + ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape, + ratio_pad=batch['ratio_pad'][si]) # native-space labels + tkpts = kpts.clone() + tkpts[..., 0] *= width + tkpts[..., 1] *= height + tkpts = ops.scale_coords(batch['img'][si].shape[1:], tkpts, shape, ratio_pad=batch['ratio_pad'][si]) + labelsn = torch.cat((cls, tbox), 1) # native-space labels + correct_bboxes = self._process_batch(predn[:, :6], labelsn) + correct_kpts = self._process_batch(predn[:, :6], labelsn, pred_kpts, tkpts) + if self.args.plots: + self.confusion_matrix.process_batch(predn, labelsn) + + # Append correct_masks, correct_boxes, pconf, pcls, tcls + self.stats.append((correct_bboxes, correct_kpts, pred[:, 4], pred[:, 5], cls.squeeze(-1))) + + # Save + if self.args.save_json: + self.pred_to_json(predn, batch['im_file'][si]) + # if self.args.save_txt: + # save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') + + def _process_batch(self, detections, labels, pred_kpts=None, gt_kpts=None): + """ + Return correct prediction matrix. + + Args: + detections (torch.Tensor): Tensor of shape [N, 6] representing detections. + Each detection is of the format: x1, y1, x2, y2, conf, class. + labels (torch.Tensor): Tensor of shape [M, 5] representing labels. + Each label is of the format: class, x1, y1, x2, y2. + pred_kpts (torch.Tensor, optional): Tensor of shape [N, 51] representing predicted keypoints. + 51 corresponds to 17 keypoints each with 3 values. + gt_kpts (torch.Tensor, optional): Tensor of shape [N, 51] representing ground truth keypoints. + + Returns: + torch.Tensor: Correct prediction matrix of shape [N, 10] for 10 IoU levels. + """ + if pred_kpts is not None and gt_kpts is not None: + # `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384 + area = ops.xyxy2xywh(labels[:, 1:])[:, 2:].prod(1) * 0.53 + iou = kpt_iou(gt_kpts, pred_kpts, sigma=self.sigma, area=area) + else: # boxes + iou = box_iou(labels[:, 1:], detections[:, :4]) + + return self.match_predictions(detections[:, 5], labels[:, 0], iou) + + def plot_val_samples(self, batch, ni): + """Plots and saves validation set samples with predicted bounding boxes and keypoints.""" + plot_images(batch['img'], + batch['batch_idx'], + batch['cls'].squeeze(-1), + batch['bboxes'], + kpts=batch['keypoints'], + paths=batch['im_file'], + fname=self.save_dir / f'val_batch{ni}_labels.jpg', + names=self.names, + on_plot=self.on_plot) + + def plot_predictions(self, batch, preds, ni): + """Plots predictions for YOLO model.""" + pred_kpts = torch.cat([p[:, 6:].view(-1, *self.kpt_shape) for p in preds], 0) + plot_images(batch['img'], + *output_to_target(preds, max_det=self.args.max_det), + kpts=pred_kpts, + paths=batch['im_file'], + fname=self.save_dir / f'val_batch{ni}_pred.jpg', + names=self.names, + on_plot=self.on_plot) # pred + + def pred_to_json(self, predn, filename): + """Converts YOLO predictions to COCO JSON format.""" + stem = Path(filename).stem + image_id = int(stem) if stem.isnumeric() else stem + box = ops.xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + for p, b in zip(predn.tolist(), box.tolist()): + self.jdict.append({ + 'image_id': image_id, + 'category_id': self.class_map[int(p[5])], + 'bbox': [round(x, 3) for x in b], + 'keypoints': p[6:], + 'score': round(p[4], 5)}) + + def eval_json(self, stats): + """Evaluates object detection model using COCO JSON format.""" + if self.args.save_json and self.is_coco and len(self.jdict): + anno_json = self.data['path'] / 'annotations/person_keypoints_val2017.json' # annotations + pred_json = self.save_dir / 'predictions.json' # predictions + LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...') + try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb + check_requirements('pycocotools>=2.0.6') + from pycocotools.coco import COCO # noqa + from pycocotools.cocoeval import COCOeval # noqa + + for x in anno_json, pred_json: + assert x.is_file(), f'{x} file not found' + anno = COCO(str(anno_json)) # init annotations api + pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path) + for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'keypoints')]): + if self.is_coco: + eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval + eval.evaluate() + eval.accumulate() + eval.summarize() + idx = i * 4 + 2 + stats[self.metrics.keys[idx + 1]], stats[ + self.metrics.keys[idx]] = eval.stats[:2] # update mAP50-95 and mAP50 + except Exception as e: + LOGGER.warning(f'pycocotools unable to run: {e}') + return stats diff --git a/ultralytics/models/yolo/segment/__init__.py b/ultralytics/models/yolo/segment/__init__.py new file mode 100644 index 0000000..c84a570 --- /dev/null +++ b/ultralytics/models/yolo/segment/__init__.py @@ -0,0 +1,7 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .predict import SegmentationPredictor +from .train import SegmentationTrainer +from .val import SegmentationValidator + +__all__ = 'SegmentationPredictor', 'SegmentationTrainer', 'SegmentationValidator' diff --git a/ultralytics/models/yolo/segment/predict.py b/ultralytics/models/yolo/segment/predict.py new file mode 100644 index 0000000..ba44a48 --- /dev/null +++ b/ultralytics/models/yolo/segment/predict.py @@ -0,0 +1,55 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.engine.results import Results +from ultralytics.models.yolo.detect.predict import DetectionPredictor +from ultralytics.utils import DEFAULT_CFG, ops + + +class SegmentationPredictor(DetectionPredictor): + """ + A class extending the DetectionPredictor class for prediction based on a segmentation model. + + Example: + ```python + from ultralytics.utils import ASSETS + from ultralytics.models.yolo.segment import SegmentationPredictor + + args = dict(model='yolov8n-seg.pt', source=ASSETS) + predictor = SegmentationPredictor(overrides=args) + predictor.predict_cli() + ``` + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks.""" + super().__init__(cfg, overrides, _callbacks) + self.args.task = 'segment' + + def postprocess(self, preds, img, orig_imgs): + """Applies non-max suppression and processes detections for each image in an input batch.""" + p = ops.non_max_suppression(preds[0], + self.args.conf, + self.args.iou, + agnostic=self.args.agnostic_nms, + max_det=self.args.max_det, + nc=len(self.model.names), + classes=self.args.classes) + + if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list + orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + + results = [] + proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported + for i, pred in enumerate(p): + orig_img = orig_imgs[i] + img_path = self.batch[0][i] + if not len(pred): # save empty boxes + masks = None + elif self.args.retina_masks: + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC + else: + masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)) + return results diff --git a/ultralytics/models/yolo/segment/train.py b/ultralytics/models/yolo/segment/train.py new file mode 100644 index 0000000..b290192 --- /dev/null +++ b/ultralytics/models/yolo/segment/train.py @@ -0,0 +1,58 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from copy import copy + +from ultralytics.models import yolo +from ultralytics.nn.tasks import SegmentationModel +from ultralytics.utils import DEFAULT_CFG, RANK +from ultralytics.utils.plotting import plot_images, plot_results + + +class SegmentationTrainer(yolo.detect.DetectionTrainer): + """ + A class extending the DetectionTrainer class for training based on a segmentation model. + + Example: + ```python + from ultralytics.models.yolo.segment import SegmentationTrainer + + args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml', epochs=3) + trainer = SegmentationTrainer(overrides=args) + trainer.train() + ``` + """ + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initialize a SegmentationTrainer object with given arguments.""" + if overrides is None: + overrides = {} + overrides['task'] = 'segment' + super().__init__(cfg, overrides, _callbacks) + + def get_model(self, cfg=None, weights=None, verbose=True): + """Return SegmentationModel initialized with specified config and weights.""" + model = SegmentationModel(cfg, ch=3, nc=self.data['nc'], verbose=verbose and RANK == -1) + if weights: + model.load(weights) + + return model + + def get_validator(self): + """Return an instance of SegmentationValidator for validation of YOLO model.""" + self.loss_names = 'box_loss', 'seg_loss', 'cls_loss', 'dfl_loss' + return yolo.segment.SegmentationValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args)) + + def plot_training_samples(self, batch, ni): + """Creates a plot of training sample images with labels and box coordinates.""" + plot_images(batch['img'], + batch['batch_idx'], + batch['cls'].squeeze(-1), + batch['bboxes'], + batch['masks'], + paths=batch['im_file'], + fname=self.save_dir / f'train_batch{ni}.jpg', + on_plot=self.on_plot) + + def plot_metrics(self): + """Plots training/val metrics.""" + plot_results(file=self.csv, segment=True, on_plot=self.on_plot) # save results.png diff --git a/ultralytics/models/yolo/segment/val.py b/ultralytics/models/yolo/segment/val.py new file mode 100644 index 0000000..599b0d5 --- /dev/null +++ b/ultralytics/models/yolo/segment/val.py @@ -0,0 +1,247 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from multiprocessing.pool import ThreadPool +from pathlib import Path + +import numpy as np +import torch +import torch.nn.functional as F + +from ultralytics.models.yolo.detect import DetectionValidator +from ultralytics.utils import LOGGER, NUM_THREADS, ops +from ultralytics.utils.checks import check_requirements +from ultralytics.utils.metrics import SegmentMetrics, box_iou, mask_iou +from ultralytics.utils.plotting import output_to_target, plot_images + + +class SegmentationValidator(DetectionValidator): + """ + A class extending the DetectionValidator class for validation based on a segmentation model. + + Example: + ```python + from ultralytics.models.yolo.segment import SegmentationValidator + + args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml') + validator = SegmentationValidator(args=args) + validator() + ``` + """ + + def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): + """Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics.""" + super().__init__(dataloader, save_dir, pbar, args, _callbacks) + self.plot_masks = None + self.process = None + self.args.task = 'segment' + self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot) + + def preprocess(self, batch): + """Preprocesses batch by converting masks to float and sending to device.""" + batch = super().preprocess(batch) + batch['masks'] = batch['masks'].to(self.device).float() + return batch + + def init_metrics(self, model): + """Initialize metrics and select mask processing function based on save_json flag.""" + super().init_metrics(model) + self.plot_masks = [] + if self.args.save_json: + check_requirements('pycocotools>=2.0.6') + self.process = ops.process_mask_upsample # more accurate + else: + self.process = ops.process_mask # faster + + def get_desc(self): + """Return a formatted description of evaluation metrics.""" + return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Mask(P', + 'R', 'mAP50', 'mAP50-95)') + + def postprocess(self, preds): + """Post-processes YOLO predictions and returns output detections with proto.""" + p = ops.non_max_suppression(preds[0], + self.args.conf, + self.args.iou, + labels=self.lb, + multi_label=True, + agnostic=self.args.single_cls, + max_det=self.args.max_det, + nc=self.nc) + proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported + return p, proto + + def update_metrics(self, preds, batch): + """Metrics.""" + for si, (pred, proto) in enumerate(zip(preds[0], preds[1])): + idx = batch['batch_idx'] == si + cls = batch['cls'][idx] + bbox = batch['bboxes'][idx] + nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions + shape = batch['ori_shape'][si] + correct_masks = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init + correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init + self.seen += 1 + + if npr == 0: + if nl: + self.stats.append((correct_bboxes, correct_masks, *torch.zeros( + (2, 0), device=self.device), cls.squeeze(-1))) + if self.args.plots: + self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1)) + continue + + # Masks + midx = [si] if self.args.overlap_mask else idx + gt_masks = batch['masks'][midx] + pred_masks = self.process(proto, pred[:, 6:], pred[:, :4], shape=batch['img'][si].shape[1:]) + + # Predictions + if self.args.single_cls: + pred[:, 5] = 0 + predn = pred.clone() + ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape, + ratio_pad=batch['ratio_pad'][si]) # native-space pred + + # Evaluate + if nl: + height, width = batch['img'].shape[2:] + tbox = ops.xywh2xyxy(bbox) * torch.tensor( + (width, height, width, height), device=self.device) # target boxes + ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape, + ratio_pad=batch['ratio_pad'][si]) # native-space labels + labelsn = torch.cat((cls, tbox), 1) # native-space labels + correct_bboxes = self._process_batch(predn, labelsn) + # TODO: maybe remove these `self.` arguments as they already are member variable + correct_masks = self._process_batch(predn, + labelsn, + pred_masks, + gt_masks, + overlap=self.args.overlap_mask, + masks=True) + if self.args.plots: + self.confusion_matrix.process_batch(predn, labelsn) + + # Append correct_masks, correct_boxes, pconf, pcls, tcls + self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1))) + + pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8) + if self.args.plots and self.batch_i < 3: + self.plot_masks.append(pred_masks[:15].cpu()) # filter top 15 to plot + + # Save + if self.args.save_json: + pred_masks = ops.scale_image(pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), + shape, + ratio_pad=batch['ratio_pad'][si]) + self.pred_to_json(predn, batch['im_file'][si], pred_masks) + # if self.args.save_txt: + # save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') + + def finalize_metrics(self, *args, **kwargs): + """Sets speed and confusion matrix for evaluation metrics.""" + self.metrics.speed = self.speed + self.metrics.confusion_matrix = self.confusion_matrix + + def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False): + """ + Return correct prediction matrix. + + Args: + detections (array[N, 6]), x1, y1, x2, y2, conf, class + labels (array[M, 5]), class, x1, y1, x2, y2 + + Returns: + correct (array[N, 10]), for 10 IoU levels + """ + if masks: + if overlap: + nl = len(labels) + index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 + gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640) + gt_masks = torch.where(gt_masks == index, 1.0, 0.0) + if gt_masks.shape[1:] != pred_masks.shape[1:]: + gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode='bilinear', align_corners=False)[0] + gt_masks = gt_masks.gt_(0.5) + iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1)) + else: # boxes + iou = box_iou(labels[:, 1:], detections[:, :4]) + + return self.match_predictions(detections[:, 5], labels[:, 0], iou) + + def plot_val_samples(self, batch, ni): + """Plots validation samples with bounding box labels.""" + plot_images(batch['img'], + batch['batch_idx'], + batch['cls'].squeeze(-1), + batch['bboxes'], + batch['masks'], + paths=batch['im_file'], + fname=self.save_dir / f'val_batch{ni}_labels.jpg', + names=self.names, + on_plot=self.on_plot) + + def plot_predictions(self, batch, preds, ni): + """Plots batch predictions with masks and bounding boxes.""" + plot_images( + batch['img'], + *output_to_target(preds[0], max_det=15), # not set to self.args.max_det due to slow plotting speed + torch.cat(self.plot_masks, dim=0) if len(self.plot_masks) else self.plot_masks, + paths=batch['im_file'], + fname=self.save_dir / f'val_batch{ni}_pred.jpg', + names=self.names, + on_plot=self.on_plot) # pred + self.plot_masks.clear() + + def pred_to_json(self, predn, filename, pred_masks): + """Save one JSON result.""" + # Example result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} + from pycocotools.mask import encode # noqa + + def single_encode(x): + """Encode predicted masks as RLE and append results to jdict.""" + rle = encode(np.asarray(x[:, :, None], order='F', dtype='uint8'))[0] + rle['counts'] = rle['counts'].decode('utf-8') + return rle + + stem = Path(filename).stem + image_id = int(stem) if stem.isnumeric() else stem + box = ops.xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + pred_masks = np.transpose(pred_masks, (2, 0, 1)) + with ThreadPool(NUM_THREADS) as pool: + rles = pool.map(single_encode, pred_masks) + for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): + self.jdict.append({ + 'image_id': image_id, + 'category_id': self.class_map[int(p[5])], + 'bbox': [round(x, 3) for x in b], + 'score': round(p[4], 5), + 'segmentation': rles[i]}) + + def eval_json(self, stats): + """Return COCO-style object detection evaluation metrics.""" + if self.args.save_json and self.is_coco and len(self.jdict): + anno_json = self.data['path'] / 'annotations/instances_val2017.json' # annotations + pred_json = self.save_dir / 'predictions.json' # predictions + LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...') + try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb + check_requirements('pycocotools>=2.0.6') + from pycocotools.coco import COCO # noqa + from pycocotools.cocoeval import COCOeval # noqa + + for x in anno_json, pred_json: + assert x.is_file(), f'{x} file not found' + anno = COCO(str(anno_json)) # init annotations api + pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path) + for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]): + if self.is_coco: + eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval + eval.evaluate() + eval.accumulate() + eval.summarize() + idx = i * 4 + 2 + stats[self.metrics.keys[idx + 1]], stats[ + self.metrics.keys[idx]] = eval.stats[:2] # update mAP50-95 and mAP50 + except Exception as e: + LOGGER.warning(f'pycocotools unable to run: {e}') + return stats diff --git a/ultralytics/nn/__init__.py b/ultralytics/nn/__init__.py new file mode 100644 index 0000000..9889b7e --- /dev/null +++ b/ultralytics/nn/__init__.py @@ -0,0 +1,9 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .tasks import (BaseModel, ClassificationModel, DetectionModel, SegmentationModel, attempt_load_one_weight, + attempt_load_weights, guess_model_scale, guess_model_task, parse_model, torch_safe_load, + yaml_model_load) + +__all__ = ('attempt_load_one_weight', 'attempt_load_weights', 'parse_model', 'yaml_model_load', 'guess_model_task', + 'guess_model_scale', 'torch_safe_load', 'DetectionModel', 'SegmentationModel', 'ClassificationModel', + 'BaseModel') diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py new file mode 100644 index 0000000..596d9bd --- /dev/null +++ b/ultralytics/nn/autobackend.py @@ -0,0 +1,514 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import ast +import contextlib +import json +import platform +import zipfile +from collections import OrderedDict, namedtuple +from pathlib import Path + +import cv2 +import numpy as np +import torch +import torch.nn as nn +from PIL import Image + +from ultralytics.utils import ARM64, LINUX, LOGGER, ROOT, yaml_load +from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml +from ultralytics.utils.downloads import attempt_download_asset, is_url + + +def check_class_names(names): + """ + Check class names. + + Map imagenet class codes to human-readable names if required. Convert lists to dicts. + """ + if isinstance(names, list): # names is a list + names = dict(enumerate(names)) # convert to dict + if isinstance(names, dict): + # Convert 1) string keys to int, i.e. '0' to 0, and non-string values to strings, i.e. True to 'True' + names = {int(k): str(v) for k, v in names.items()} + n = len(names) + if max(names.keys()) >= n: + raise KeyError(f'{n}-class dataset requires class indices 0-{n - 1}, but you have invalid class indices ' + f'{min(names.keys())}-{max(names.keys())} defined in your dataset YAML.') + if isinstance(names[0], str) and names[0].startswith('n0'): # imagenet class codes, i.e. 'n01440764' + names_map = yaml_load(ROOT / 'cfg/datasets/ImageNet.yaml')['map'] # human-readable names + names = {k: names_map[v] for k, v in names.items()} + return names + + +class AutoBackend(nn.Module): + """ + Handles dynamic backend selection for running inference using Ultralytics YOLO models. + + The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide + range of formats, each with specific naming conventions as outlined below: + + Supported Formats and Naming Conventions: + | Format | File Suffix | + |-----------------------|------------------| + | PyTorch | *.pt | + | TorchScript | *.torchscript | + | ONNX Runtime | *.onnx | + | ONNX OpenCV DNN | *.onnx (dnn=True)| + | OpenVINO | *openvino_model/ | + | CoreML | *.mlpackage | + | TensorRT | *.engine | + | TensorFlow SavedModel | *_saved_model | + | TensorFlow GraphDef | *.pb | + | TensorFlow Lite | *.tflite | + | TensorFlow Edge TPU | *_edgetpu.tflite | + | PaddlePaddle | *_paddle_model | + | ncnn | *_ncnn_model | + + This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy + models across various platforms. + """ + + @torch.no_grad() + def __init__(self, + weights='yolov8n.pt', + device=torch.device('cpu'), + dnn=False, + data=None, + fp16=False, + fuse=True, + verbose=True): + """ + Initialize the AutoBackend for inference. + + Args: + weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'. + device (torch.device): Device to run the model on. Defaults to CPU. + dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False. + data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional. + fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False. + fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True. + verbose (bool): Enable verbose logging. Defaults to True. + """ + super().__init__() + w = str(weights[0] if isinstance(weights, list) else weights) + nn_module = isinstance(weights, torch.nn.Module) + pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn, triton = \ + self._model_type(w) + fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16 + nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH) + stride = 32 # default stride + model, metadata = None, None + + # Set device + cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA + if cuda and not any([nn_module, pt, jit, engine]): # GPU dataloader formats + device = torch.device('cpu') + cuda = False + + # Download if not local + if not (pt or triton or nn_module): + w = attempt_download_asset(w) + + # Load model + if nn_module: # in-memory PyTorch model + model = weights.to(device) + model = model.fuse(verbose=verbose) if fuse else model + if hasattr(model, 'kpt_shape'): + kpt_shape = model.kpt_shape # pose-only + stride = max(int(model.stride.max()), 32) # model stride + names = model.module.names if hasattr(model, 'module') else model.names # get class names + model.half() if fp16 else model.float() + self.model = model # explicitly assign for to(), cpu(), cuda(), half() + pt = True + elif pt: # PyTorch + from ultralytics.nn.tasks import attempt_load_weights + model = attempt_load_weights(weights if isinstance(weights, list) else w, + device=device, + inplace=True, + fuse=fuse) + if hasattr(model, 'kpt_shape'): + kpt_shape = model.kpt_shape # pose-only + stride = max(int(model.stride.max()), 32) # model stride + names = model.module.names if hasattr(model, 'module') else model.names # get class names + model.half() if fp16 else model.float() + self.model = model # explicitly assign for to(), cpu(), cuda(), half() + elif jit: # TorchScript + LOGGER.info(f'Loading {w} for TorchScript inference...') + extra_files = {'config.txt': ''} # model metadata + model = torch.jit.load(w, _extra_files=extra_files, map_location=device) + model.half() if fp16 else model.float() + if extra_files['config.txt']: # load metadata dict + metadata = json.loads(extra_files['config.txt'], object_hook=lambda x: dict(x.items())) + elif dnn: # ONNX OpenCV DNN + LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') + check_requirements('opencv-python>=4.5.4') + net = cv2.dnn.readNetFromONNX(w) + elif onnx: # ONNX Runtime + LOGGER.info(f'Loading {w} for ONNX Runtime inference...') + check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) + import onnxruntime + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider'] + session = onnxruntime.InferenceSession(w, providers=providers) + output_names = [x.name for x in session.get_outputs()] + metadata = session.get_modelmeta().custom_metadata_map # metadata + elif xml: # OpenVINO + LOGGER.info(f'Loading {w} for OpenVINO inference...') + check_requirements('openvino>=2023.0') # requires openvino-dev: https://pypi.org/project/openvino-dev/ + from openvino.runtime import Core, Layout, get_batch # noqa + core = Core() + w = Path(w) + if not w.is_file(): # if not *.xml + w = next(w.glob('*.xml')) # get *.xml file from *_openvino_model dir + ov_model = core.read_model(model=str(w), weights=w.with_suffix('.bin')) + if ov_model.get_parameters()[0].get_layout().empty: + ov_model.get_parameters()[0].set_layout(Layout('NCHW')) + batch_dim = get_batch(ov_model) + if batch_dim.is_static: + batch_size = batch_dim.get_length() + ov_compiled_model = core.compile_model(ov_model, device_name='AUTO') # AUTO selects best available device + metadata = w.parent / 'metadata.yaml' + elif engine: # TensorRT + LOGGER.info(f'Loading {w} for TensorRT inference...') + try: + import tensorrt as trt # noqa https://developer.nvidia.com/nvidia-tensorrt-download + except ImportError: + if LINUX: + check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com') + import tensorrt as trt # noqa + check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 + if device.type == 'cpu': + device = torch.device('cuda:0') + Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) + logger = trt.Logger(trt.Logger.INFO) + # Read file + with open(w, 'rb') as f, trt.Runtime(logger) as runtime: + meta_len = int.from_bytes(f.read(4), byteorder='little') # read metadata length + metadata = json.loads(f.read(meta_len).decode('utf-8')) # read metadata + model = runtime.deserialize_cuda_engine(f.read()) # read engine + context = model.create_execution_context() + bindings = OrderedDict() + output_names = [] + fp16 = False # default updated below + dynamic = False + for i in range(model.num_bindings): + name = model.get_binding_name(i) + dtype = trt.nptype(model.get_binding_dtype(i)) + if model.binding_is_input(i): + if -1 in tuple(model.get_binding_shape(i)): # dynamic + dynamic = True + context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2])) + if dtype == np.float16: + fp16 = True + else: # output + output_names.append(name) + shape = tuple(context.get_binding_shape(i)) + im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device) + bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr())) + binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) + batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size + elif coreml: # CoreML + LOGGER.info(f'Loading {w} for CoreML inference...') + import coremltools as ct + model = ct.models.MLModel(w) + metadata = dict(model.user_defined_metadata) + elif saved_model: # TF SavedModel + LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...') + import tensorflow as tf + keras = False # assume TF1 saved_model + model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w) + metadata = Path(w) / 'metadata.yaml' + elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt + LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...') + import tensorflow as tf + + from ultralytics.engine.exporter import gd_outputs + + def wrap_frozen_graph(gd, inputs, outputs): + """Wrap frozen graphs for deployment.""" + x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), []) # wrapped + ge = x.graph.as_graph_element + return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs)) + + gd = tf.Graph().as_graph_def() # TF GraphDef + with open(w, 'rb') as f: + gd.ParseFromString(f.read()) + frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd)) + elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu + from tflite_runtime.interpreter import Interpreter, load_delegate + except ImportError: + import tensorflow as tf + Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate + if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime + LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...') + delegate = { + 'Linux': 'libedgetpu.so.1', + 'Darwin': 'libedgetpu.1.dylib', + 'Windows': 'edgetpu.dll'}[platform.system()] + interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)]) + else: # TFLite + LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') + interpreter = Interpreter(model_path=w) # load TFLite model + interpreter.allocate_tensors() # allocate + input_details = interpreter.get_input_details() # inputs + output_details = interpreter.get_output_details() # outputs + # Load metadata + with contextlib.suppress(zipfile.BadZipFile): + with zipfile.ZipFile(w, 'r') as model: + meta_file = model.namelist()[0] + metadata = ast.literal_eval(model.read(meta_file).decode('utf-8')) + elif tfjs: # TF.js + raise NotImplementedError('YOLOv8 TF.js inference is not currently supported.') + elif paddle: # PaddlePaddle + LOGGER.info(f'Loading {w} for PaddlePaddle inference...') + check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle') + import paddle.inference as pdi # noqa + w = Path(w) + if not w.is_file(): # if not *.pdmodel + w = next(w.rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir + config = pdi.Config(str(w), str(w.with_suffix('.pdiparams'))) + if cuda: + config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0) + predictor = pdi.create_predictor(config) + input_handle = predictor.get_input_handle(predictor.get_input_names()[0]) + output_names = predictor.get_output_names() + metadata = w.parents[1] / 'metadata.yaml' + elif ncnn: # ncnn + LOGGER.info(f'Loading {w} for ncnn inference...') + check_requirements('git+https://github.com/Tencent/ncnn.git' if ARM64 else 'ncnn') # requires ncnn + import ncnn as pyncnn + net = pyncnn.Net() + net.opt.use_vulkan_compute = cuda + w = Path(w) + if not w.is_file(): # if not *.param + w = next(w.glob('*.param')) # get *.param file from *_ncnn_model dir + net.load_param(str(w)) + net.load_model(str(w.with_suffix('.bin'))) + metadata = w.parent / 'metadata.yaml' + elif triton: # NVIDIA Triton Inference Server + check_requirements('tritonclient[all]') + from ultralytics.utils.triton import TritonRemoteModel + model = TritonRemoteModel(w) + else: + from ultralytics.engine.exporter import export_formats + raise TypeError(f"model='{w}' is not a supported model format. " + 'See https://docs.ultralytics.com/modes/predict for help.' + f'\n\n{export_formats()}') + + # Load external metadata YAML + if isinstance(metadata, (str, Path)) and Path(metadata).exists(): + metadata = yaml_load(metadata) + if metadata: + for k, v in metadata.items(): + if k in ('stride', 'batch'): + metadata[k] = int(v) + elif k in ('imgsz', 'names', 'kpt_shape') and isinstance(v, str): + metadata[k] = eval(v) + stride = metadata['stride'] + task = metadata['task'] + batch = metadata['batch'] + imgsz = metadata['imgsz'] + names = metadata['names'] + kpt_shape = metadata.get('kpt_shape') + elif not (pt or triton or nn_module): + LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'") + + # Check names + if 'names' not in locals(): # names missing + names = self._apply_default_class_names(data) + names = check_class_names(names) + + # Disable gradients + if pt: + for p in model.parameters(): + p.requires_grad = False + + self.__dict__.update(locals()) # assign all variables to self + + def forward(self, im, augment=False, visualize=False): + """ + Runs inference on the YOLOv8 MultiBackend model. + + Args: + im (torch.Tensor): The image tensor to perform inference on. + augment (bool): whether to perform data augmentation during inference, defaults to False + visualize (bool): whether to visualize the output predictions, defaults to False + + Returns: + (tuple): Tuple containing the raw output tensor, and processed output for visualization (if visualize=True) + """ + b, ch, h, w = im.shape # batch, channel, height, width + if self.fp16 and im.dtype != torch.float16: + im = im.half() # to FP16 + if self.nhwc: + im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3) + + if self.pt or self.nn_module: # PyTorch + y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im) + elif self.jit: # TorchScript + y = self.model(im) + elif self.dnn: # ONNX OpenCV DNN + im = im.cpu().numpy() # torch to numpy + self.net.setInput(im) + y = self.net.forward() + elif self.onnx: # ONNX Runtime + im = im.cpu().numpy() # torch to numpy + y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) + elif self.xml: # OpenVINO + im = im.cpu().numpy() # FP32 + y = list(self.ov_compiled_model(im).values()) + elif self.engine: # TensorRT + if self.dynamic and im.shape != self.bindings['images'].shape: + i = self.model.get_binding_index('images') + self.context.set_binding_shape(i, im.shape) # reshape if dynamic + self.bindings['images'] = self.bindings['images']._replace(shape=im.shape) + for name in self.output_names: + i = self.model.get_binding_index(name) + self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i))) + s = self.bindings['images'].shape + assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}" + self.binding_addrs['images'] = int(im.data_ptr()) + self.context.execute_v2(list(self.binding_addrs.values())) + y = [self.bindings[x].data for x in sorted(self.output_names)] + elif self.coreml: # CoreML + im = im[0].cpu().numpy() + im_pil = Image.fromarray((im * 255).astype('uint8')) + # im = im.resize((192, 320), Image.BILINEAR) + y = self.model.predict({'image': im_pil}) # coordinates are xywh normalized + if 'confidence' in y: + raise TypeError('Ultralytics only supports inference of non-pipelined CoreML models exported with ' + f"'nms=False', but 'model={w}' has an NMS pipeline created by an 'nms=True' export.") + # TODO: CoreML NMS inference handling + # from ultralytics.utils.ops import xywh2xyxy + # box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels + # conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float32) + # y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) + elif len(y) == 1: # classification model + y = list(y.values()) + elif len(y) == 2: # segmentation model + y = list(reversed(y.values())) # reversed for segmentation models (pred, proto) + elif self.paddle: # PaddlePaddle + im = im.cpu().numpy().astype(np.float32) + self.input_handle.copy_from_cpu(im) + self.predictor.run() + y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names] + elif self.ncnn: # ncnn + mat_in = self.pyncnn.Mat(im[0].cpu().numpy()) + ex = self.net.create_extractor() + input_names, output_names = self.net.input_names(), self.net.output_names() + ex.input(input_names[0], mat_in) + y = [] + for output_name in output_names: + mat_out = self.pyncnn.Mat() + ex.extract(output_name, mat_out) + y.append(np.array(mat_out)[None]) + elif self.triton: # NVIDIA Triton Inference Server + im = im.cpu().numpy() # torch to numpy + y = self.model(im) + else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) + im = im.cpu().numpy() + if self.saved_model: # SavedModel + y = self.model(im, training=False) if self.keras else self.model(im) + if not isinstance(y, list): + y = [y] + elif self.pb: # GraphDef + y = self.frozen_func(x=self.tf.constant(im)) + if len(y) == 2 and len(self.names) == 999: # segments and names not defined + ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0) # index of protos, boxes + nc = y[ib].shape[1] - y[ip].shape[3] - 4 # y = (1, 160, 160, 32), (1, 116, 8400) + self.names = {i: f'class{i}' for i in range(nc)} + else: # Lite or Edge TPU + details = self.input_details[0] + integer = details['dtype'] in (np.int8, np.int16) # is TFLite quantized int8 or int16 model + if integer: + scale, zero_point = details['quantization'] + im = (im / scale + zero_point).astype(details['dtype']) # de-scale + self.interpreter.set_tensor(details['index'], im) + self.interpreter.invoke() + y = [] + for output in self.output_details: + x = self.interpreter.get_tensor(output['index']) + if integer: + scale, zero_point = output['quantization'] + x = (x.astype(np.float32) - zero_point) * scale # re-scale + if x.ndim > 2: # if task is not classification + # Denormalize xywh by image size. See https://github.com/ultralytics/ultralytics/pull/1695 + # xywh are normalized in TFLite/EdgeTPU to mitigate quantization error of integer models + x[:, [0, 2]] *= w + x[:, [1, 3]] *= h + y.append(x) + # TF segment fixes: export is reversed vs ONNX export and protos are transposed + if len(y) == 2: # segment with (det, proto) output order reversed + if len(y[1].shape) != 4: + y = list(reversed(y)) # should be y = (1, 116, 8400), (1, 160, 160, 32) + y[1] = np.transpose(y[1], (0, 3, 1, 2)) # should be y = (1, 116, 8400), (1, 32, 160, 160) + y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y] + + # for x in y: + # print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape) # debug shapes + if isinstance(y, (list, tuple)): + return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y] + else: + return self.from_numpy(y) + + def from_numpy(self, x): + """ + Convert a numpy array to a tensor. + + Args: + x (np.ndarray): The array to be converted. + + Returns: + (torch.Tensor): The converted tensor + """ + return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x + + def warmup(self, imgsz=(1, 3, 640, 640)): + """ + Warm up the model by running one forward pass with a dummy input. + + Args: + imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width) + + Returns: + (None): This method runs the forward pass and don't return any value + """ + warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module + if any(warmup_types) and (self.device.type != 'cpu' or self.triton): + im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input + for _ in range(2 if self.jit else 1): + self.forward(im) # warmup + + @staticmethod + def _apply_default_class_names(data): + """Applies default class names to an input YAML file or returns numerical class names.""" + with contextlib.suppress(Exception): + return yaml_load(check_yaml(data))['names'] + return {i: f'class{i}' for i in range(999)} # return default if above errors + + @staticmethod + def _model_type(p='path/to/model.pt'): + """ + This function takes a path to a model file and returns the model type. + + Args: + p: path to the model file. Defaults to path/to/model.pt + """ + # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx + # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle] + from ultralytics.engine.exporter import export_formats + sf = list(export_formats().Suffix) # export suffixes + if not is_url(p, check=False) and not isinstance(p, str): + check_suffix(p, sf) # checks + name = Path(p).name + types = [s in name for s in sf] + types[5] |= name.endswith('.mlmodel') # retain support for older Apple CoreML *.mlmodel formats + types[8] &= not types[9] # tflite &= not edgetpu + if any(types): + triton = False + else: + from urllib.parse import urlsplit + url = urlsplit(p) + triton = url.netloc and url.path and url.scheme in {'http', 'grpc'} + + return types + [triton] diff --git a/ultralytics/nn/modules/__init__.py b/ultralytics/nn/modules/__init__.py new file mode 100644 index 0000000..584a394 --- /dev/null +++ b/ultralytics/nn/modules/__init__.py @@ -0,0 +1,33 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Ultralytics modules. + +Example: + Visualize a module with Netron. + ```python + from ultralytics.nn.modules import * + import torch + import os + + x = torch.ones(1, 128, 40, 40) + m = Conv(128, 128) + f = f'{m._get_name()}.onnx' + torch.onnx.export(m, x, f) + os.system(f'onnxsim {f} {f} && open {f}') + ``` +""" + +from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck, + HGBlock, HGStem, Proto, RepC3) +from .conv import (CBAM, ChannelAttention, Concat, Conv, Conv2, ConvTranspose, DWConv, DWConvTranspose2d, Focus, + GhostConv, LightConv, RepConv, SpatialAttention) +from .head import Classify, Detect, Pose, RTDETRDecoder, Segment +from .transformer import (AIFI, MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer, LayerNorm2d, + MLPBlock, MSDeformAttn, TransformerBlock, TransformerEncoderLayer, TransformerLayer) + +__all__ = ('Conv', 'Conv2', 'LightConv', 'RepConv', 'DWConv', 'DWConvTranspose2d', 'ConvTranspose', 'Focus', + 'GhostConv', 'ChannelAttention', 'SpatialAttention', 'CBAM', 'Concat', 'TransformerLayer', + 'TransformerBlock', 'MLPBlock', 'LayerNorm2d', 'DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', + 'C2f', 'C3x', 'C3TR', 'C3Ghost', 'GhostBottleneck', 'Bottleneck', 'BottleneckCSP', 'Proto', 'Detect', + 'Segment', 'Pose', 'Classify', 'TransformerEncoderLayer', 'RepC3', 'RTDETRDecoder', 'AIFI', + 'DeformableTransformerDecoder', 'DeformableTransformerDecoderLayer', 'MSDeformAttn', 'MLP') diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py new file mode 100644 index 0000000..778dcec --- /dev/null +++ b/ultralytics/nn/modules/block.py @@ -0,0 +1,333 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +"""Block modules.""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .conv import Conv, DWConv, GhostConv, LightConv, RepConv +from .transformer import TransformerBlock + +__all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', 'C3x', 'C3TR', 'C3Ghost', + 'GhostBottleneck', 'Bottleneck', 'BottleneckCSP', 'Proto', 'RepC3') + + +class DFL(nn.Module): + """ + Integral module of Distribution Focal Loss (DFL). + + Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 + """ + + def __init__(self, c1=16): + """Initialize a convolutional layer with a given number of input channels.""" + super().__init__() + self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) + x = torch.arange(c1, dtype=torch.float) + self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1)) + self.c1 = c1 + + def forward(self, x): + """Applies a transformer layer on input tensor 'x' and returns a tensor.""" + b, c, a = x.shape # batch, channels, anchors + return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a) + # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a) + + +class Proto(nn.Module): + """YOLOv8 mask Proto module for segmentation models.""" + + def __init__(self, c1, c_=256, c2=32): + """ + Initializes the YOLOv8 mask Proto module with specified number of protos and masks. + + Input arguments are ch_in, number of protos, number of masks. + """ + super().__init__() + self.cv1 = Conv(c1, c_, k=3) + self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True) # nn.Upsample(scale_factor=2, mode='nearest') + self.cv2 = Conv(c_, c_, k=3) + self.cv3 = Conv(c_, c2) + + def forward(self, x): + """Performs a forward pass through layers using an upsampled input image.""" + return self.cv3(self.cv2(self.upsample(self.cv1(x)))) + + +class HGStem(nn.Module): + """ + StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d. + + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py + """ + + def __init__(self, c1, cm, c2): + """Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling.""" + super().__init__() + self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU()) + self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU()) + self.stem2b = Conv(cm // 2, cm, 2, 1, 0, act=nn.ReLU()) + self.stem3 = Conv(cm * 2, cm, 3, 2, act=nn.ReLU()) + self.stem4 = Conv(cm, c2, 1, 1, act=nn.ReLU()) + self.pool = nn.MaxPool2d(kernel_size=2, stride=1, padding=0, ceil_mode=True) + + def forward(self, x): + """Forward pass of a PPHGNetV2 backbone layer.""" + x = self.stem1(x) + x = F.pad(x, [0, 1, 0, 1]) + x2 = self.stem2a(x) + x2 = F.pad(x2, [0, 1, 0, 1]) + x2 = self.stem2b(x2) + x1 = self.pool(x) + x = torch.cat([x1, x2], dim=1) + x = self.stem3(x) + x = self.stem4(x) + return x + + +class HGBlock(nn.Module): + """ + HG_Block of PPHGNetV2 with 2 convolutions and LightConv. + + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py + """ + + def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()): + """Initializes a CSP Bottleneck with 1 convolution using specified input and output channels.""" + super().__init__() + block = LightConv if lightconv else Conv + self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n)) + self.sc = Conv(c1 + n * cm, c2 // 2, 1, 1, act=act) # squeeze conv + self.ec = Conv(c2 // 2, c2, 1, 1, act=act) # excitation conv + self.add = shortcut and c1 == c2 + + def forward(self, x): + """Forward pass of a PPHGNetV2 backbone layer.""" + y = [x] + y.extend(m(y[-1]) for m in self.m) + y = self.ec(self.sc(torch.cat(y, 1))) + return y + x if self.add else y + + +class SPP(nn.Module): + """Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729.""" + + def __init__(self, c1, c2, k=(5, 9, 13)): + """Initialize the SPP layer with input/output channels and pooling kernel sizes.""" + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + + def forward(self, x): + """Forward pass of the SPP layer, performing spatial pyramid pooling.""" + x = self.cv1(x) + return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) + + +class SPPF(nn.Module): + """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher.""" + + def __init__(self, c1, c2, k=5): + """ + Initializes the SPPF layer with given input/output channels and kernel size. + + This module is equivalent to SPP(k=(5, 9, 13)). + """ + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * 4, c2, 1, 1) + self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) + + def forward(self, x): + """Forward pass through Ghost Convolution block.""" + x = self.cv1(x) + y1 = self.m(x) + y2 = self.m(y1) + return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) + + +class C1(nn.Module): + """CSP Bottleneck with 1 convolution.""" + + def __init__(self, c1, c2, n=1): + """Initializes the CSP Bottleneck with configurations for 1 convolution with arguments ch_in, ch_out, number.""" + super().__init__() + self.cv1 = Conv(c1, c2, 1, 1) + self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n))) + + def forward(self, x): + """Applies cross-convolutions to input in the C3 module.""" + y = self.cv1(x) + return self.m(y) + y + + +class C2(nn.Module): + """CSP Bottleneck with 2 convolutions.""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initializes the CSP Bottleneck with 2 convolutions module with arguments ch_in, ch_out, number, shortcut, + groups, expansion. + """ + super().__init__() + self.c = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, 2 * self.c, 1, 1) + self.cv2 = Conv(2 * self.c, c2, 1) # optional act=FReLU(c2) + # self.attention = ChannelAttention(2 * self.c) # or SpatialAttention() + self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))) + + def forward(self, x): + """Forward pass through the CSP bottleneck with 2 convolutions.""" + a, b = self.cv1(x).chunk(2, 1) + return self.cv2(torch.cat((self.m(a), b), 1)) + + +class C2f(nn.Module): + """Faster Implementation of CSP Bottleneck with 2 convolutions.""" + + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): + """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups, + expansion. + """ + super().__init__() + self.c = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, 2 * self.c, 1, 1) + self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2) + self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)) + + def forward(self, x): + """Forward pass through C2f layer.""" + y = list(self.cv1(x).chunk(2, 1)) + y.extend(m(y[-1]) for m in self.m) + return self.cv2(torch.cat(y, 1)) + + def forward_split(self, x): + """Forward pass using split() instead of chunk().""" + y = list(self.cv1(x).split((self.c, self.c), 1)) + y.extend(m(y[-1]) for m in self.m) + return self.cv2(torch.cat(y, 1)) + + +class C3(nn.Module): + """CSP Bottleneck with 3 convolutions.""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initialize the CSP Bottleneck with given channels, number, shortcut, groups, and expansion values.""" + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) + self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n))) + + def forward(self, x): + """Forward pass through the CSP bottleneck with 2 convolutions.""" + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) + + +class C3x(C3): + """C3 module with cross-convolutions.""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initialize C3TR instance and set default parameters.""" + super().__init__(c1, c2, n, shortcut, g, e) + self.c_ = int(c2 * e) + self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n))) + + +class RepC3(nn.Module): + """Rep C3.""" + + def __init__(self, c1, c2, n=3, e=1.0): + """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number.""" + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c2, 1, 1) + self.cv2 = Conv(c1, c2, 1, 1) + self.m = nn.Sequential(*[RepConv(c_, c_) for _ in range(n)]) + self.cv3 = Conv(c_, c2, 1, 1) if c_ != c2 else nn.Identity() + + def forward(self, x): + """Forward pass of RT-DETR neck layer.""" + return self.cv3(self.m(self.cv1(x)) + self.cv2(x)) + + +class C3TR(C3): + """C3 module with TransformerBlock().""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initialize C3Ghost module with GhostBottleneck().""" + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) + self.m = TransformerBlock(c_, c_, 4, n) + + +class C3Ghost(C3): + """C3 module with GhostBottleneck().""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initialize 'SPP' module with various pooling sizes for spatial pyramid pooling.""" + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n))) + + +class GhostBottleneck(nn.Module): + """Ghost Bottleneck https://github.com/huawei-noah/ghostnet.""" + + def __init__(self, c1, c2, k=3, s=1): + """Initializes GhostBottleneck module with arguments ch_in, ch_out, kernel, stride.""" + super().__init__() + c_ = c2 // 2 + self.conv = nn.Sequential( + GhostConv(c1, c_, 1, 1), # pw + DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw + GhostConv(c_, c2, 1, 1, act=False)) # pw-linear + self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, + act=False)) if s == 2 else nn.Identity() + + def forward(self, x): + """Applies skip connection and concatenation to input tensor.""" + return self.conv(x) + self.shortcut(x) + + +class Bottleneck(nn.Module): + """Standard bottleneck.""" + + def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): + """Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and + expansion. + """ + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, k[0], 1) + self.cv2 = Conv(c_, c2, k[1], 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + """'forward()' applies the YOLO FPN to input data.""" + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class BottleneckCSP(nn.Module): + """CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks.""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initializes the CSP Bottleneck given arguments for ch_in, ch_out, number, shortcut, groups, expansion.""" + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) + + def forward(self, x): + """Applies a CSP bottleneck with 3 convolutions.""" + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1)))) diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py new file mode 100644 index 0000000..7fe615d --- /dev/null +++ b/ultralytics/nn/modules/conv.py @@ -0,0 +1,315 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +"""Convolution modules.""" + +import math + +import numpy as np +import torch +import torch.nn as nn + +__all__ = ('Conv', 'Conv2', 'LightConv', 'DWConv', 'DWConvTranspose2d', 'ConvTranspose', 'Focus', 'GhostConv', + 'ChannelAttention', 'SpatialAttention', 'CBAM', 'Concat', 'RepConv') + + +def autopad(k, p=None, d=1): # kernel, padding, dilation + """Pad to 'same' shape outputs.""" + if d > 1: + k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + + +class Conv(nn.Module): + """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation).""" + default_act = nn.SiLU() # default activation + + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): + """Initialize Conv layer with given arguments including activation.""" + super().__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) + self.bn = nn.BatchNorm2d(c2) + self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() + + def forward(self, x): + """Apply convolution, batch normalization and activation to input tensor.""" + return self.act(self.bn(self.conv(x))) + + def forward_fuse(self, x): + """Perform transposed convolution of 2D data.""" + return self.act(self.conv(x)) + + +class Conv2(Conv): + """Simplified RepConv module with Conv fusing.""" + + def __init__(self, c1, c2, k=3, s=1, p=None, g=1, d=1, act=True): + """Initialize Conv layer with given arguments including activation.""" + super().__init__(c1, c2, k, s, p, g=g, d=d, act=act) + self.cv2 = nn.Conv2d(c1, c2, 1, s, autopad(1, p, d), groups=g, dilation=d, bias=False) # add 1x1 conv + + def forward(self, x): + """Apply convolution, batch normalization and activation to input tensor.""" + return self.act(self.bn(self.conv(x) + self.cv2(x))) + + def forward_fuse(self, x): + """Apply fused convolution, batch normalization and activation to input tensor.""" + return self.act(self.bn(self.conv(x))) + + def fuse_convs(self): + """Fuse parallel convolutions.""" + w = torch.zeros_like(self.conv.weight.data) + i = [x // 2 for x in w.shape[2:]] + w[:, :, i[0]:i[0] + 1, i[1]:i[1] + 1] = self.cv2.weight.data.clone() + self.conv.weight.data += w + self.__delattr__('cv2') + self.forward = self.forward_fuse + + +class LightConv(nn.Module): + """ + Light convolution with args(ch_in, ch_out, kernel). + + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py + """ + + def __init__(self, c1, c2, k=1, act=nn.ReLU()): + """Initialize Conv layer with given arguments including activation.""" + super().__init__() + self.conv1 = Conv(c1, c2, 1, act=False) + self.conv2 = DWConv(c2, c2, k, act=act) + + def forward(self, x): + """Apply 2 convolutions to input tensor.""" + return self.conv2(self.conv1(x)) + + +class DWConv(Conv): + """Depth-wise convolution.""" + + def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation + """Initialize Depth-wise convolution with given parameters.""" + super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act) + + +class DWConvTranspose2d(nn.ConvTranspose2d): + """Depth-wise transpose convolution.""" + + def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out + """Initialize DWConvTranspose2d class with given parameters.""" + super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2)) + + +class ConvTranspose(nn.Module): + """Convolution transpose 2d layer.""" + default_act = nn.SiLU() # default activation + + def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True): + """Initialize ConvTranspose2d layer with batch normalization and activation function.""" + super().__init__() + self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn) + self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity() + self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() + + def forward(self, x): + """Applies transposed convolutions, batch normalization and activation to input.""" + return self.act(self.bn(self.conv_transpose(x))) + + def forward_fuse(self, x): + """Applies activation and convolution transpose operation to input.""" + return self.act(self.conv_transpose(x)) + + +class Focus(nn.Module): + """Focus wh information into c-space.""" + + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): + """Initializes Focus object with user defined channel, convolution, padding, group and activation values.""" + super().__init__() + self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act) + # self.contract = Contract(gain=2) + + def forward(self, x): + """ + Applies convolution to concatenated tensor and returns the output. + + Input shape is (b,c,w,h) and output shape is (b,4c,w/2,h/2). + """ + return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1)) + # return self.conv(self.contract(x)) + + +class GhostConv(nn.Module): + """Ghost Convolution https://github.com/huawei-noah/ghostnet.""" + + def __init__(self, c1, c2, k=1, s=1, g=1, act=True): + """Initializes the GhostConv object with input channels, output channels, kernel size, stride, groups and + activation. + """ + super().__init__() + c_ = c2 // 2 # hidden channels + self.cv1 = Conv(c1, c_, k, s, None, g, act=act) + self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act) + + def forward(self, x): + """Forward propagation through a Ghost Bottleneck layer with skip connection.""" + y = self.cv1(x) + return torch.cat((y, self.cv2(y)), 1) + + +class RepConv(nn.Module): + """ + RepConv is a basic rep-style block, including training and deploy status. + + This module is used in RT-DETR. + Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py + """ + default_act = nn.SiLU() # default activation + + def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False): + """Initializes Light Convolution layer with inputs, outputs & optional activation function.""" + super().__init__() + assert k == 3 and p == 1 + self.g = g + self.c1 = c1 + self.c2 = c2 + self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() + + self.bn = nn.BatchNorm2d(num_features=c1) if bn and c2 == c1 and s == 1 else None + self.conv1 = Conv(c1, c2, k, s, p=p, g=g, act=False) + self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False) + + def forward_fuse(self, x): + """Forward process.""" + return self.act(self.conv(x)) + + def forward(self, x): + """Forward process.""" + id_out = 0 if self.bn is None else self.bn(x) + return self.act(self.conv1(x) + self.conv2(x) + id_out) + + def get_equivalent_kernel_bias(self): + """Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases.""" + kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1) + kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2) + kernelid, biasid = self._fuse_bn_tensor(self.bn) + return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid + + def _pad_1x1_to_3x3_tensor(self, kernel1x1): + """Pads a 1x1 tensor to a 3x3 tensor.""" + if kernel1x1 is None: + return 0 + else: + return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) + + def _fuse_bn_tensor(self, branch): + """Generates appropriate kernels and biases for convolution by fusing branches of the neural network.""" + if branch is None: + return 0, 0 + if isinstance(branch, Conv): + kernel = branch.conv.weight + running_mean = branch.bn.running_mean + running_var = branch.bn.running_var + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn.eps + elif isinstance(branch, nn.BatchNorm2d): + if not hasattr(self, 'id_tensor'): + input_dim = self.c1 // self.g + kernel_value = np.zeros((self.c1, input_dim, 3, 3), dtype=np.float32) + for i in range(self.c1): + kernel_value[i, i % input_dim, 1, 1] = 1 + self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) + kernel = self.id_tensor + running_mean = branch.running_mean + running_var = branch.running_var + gamma = branch.weight + beta = branch.bias + eps = branch.eps + std = (running_var + eps).sqrt() + t = (gamma / std).reshape(-1, 1, 1, 1) + return kernel * t, beta - running_mean * gamma / std + + def fuse_convs(self): + """Combines two convolution layers into a single layer and removes unused attributes from the class.""" + if hasattr(self, 'conv'): + return + kernel, bias = self.get_equivalent_kernel_bias() + self.conv = nn.Conv2d(in_channels=self.conv1.conv.in_channels, + out_channels=self.conv1.conv.out_channels, + kernel_size=self.conv1.conv.kernel_size, + stride=self.conv1.conv.stride, + padding=self.conv1.conv.padding, + dilation=self.conv1.conv.dilation, + groups=self.conv1.conv.groups, + bias=True).requires_grad_(False) + self.conv.weight.data = kernel + self.conv.bias.data = bias + for para in self.parameters(): + para.detach_() + self.__delattr__('conv1') + self.__delattr__('conv2') + if hasattr(self, 'nm'): + self.__delattr__('nm') + if hasattr(self, 'bn'): + self.__delattr__('bn') + if hasattr(self, 'id_tensor'): + self.__delattr__('id_tensor') + + +class ChannelAttention(nn.Module): + """Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet.""" + + def __init__(self, channels: int) -> None: + """Initializes the class and sets the basic configurations and instance variables required.""" + super().__init__() + self.pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True) + self.act = nn.Sigmoid() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Applies forward pass using activation on convolutions of the input, optionally using batch normalization.""" + return x * self.act(self.fc(self.pool(x))) + + +class SpatialAttention(nn.Module): + """Spatial-attention module.""" + + def __init__(self, kernel_size=7): + """Initialize Spatial-attention module with kernel size argument.""" + super().__init__() + assert kernel_size in (3, 7), 'kernel size must be 3 or 7' + padding = 3 if kernel_size == 7 else 1 + self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) + self.act = nn.Sigmoid() + + def forward(self, x): + """Apply channel and spatial attention on input for feature recalibration.""" + return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1))) + + +class CBAM(nn.Module): + """Convolutional Block Attention Module.""" + + def __init__(self, c1, kernel_size=7): + """Initialize CBAM with given input channel (c1) and kernel size.""" + super().__init__() + self.channel_attention = ChannelAttention(c1) + self.spatial_attention = SpatialAttention(kernel_size) + + def forward(self, x): + """Applies the forward pass through C1 module.""" + return self.spatial_attention(self.channel_attention(x)) + + +class Concat(nn.Module): + """Concatenate a list of tensors along dimension.""" + + def __init__(self, dimension=1): + """Concatenates a list of tensors along a specified dimension.""" + super().__init__() + self.d = dimension + + def forward(self, x): + """Forward pass for the YOLOv8 mask Proto module.""" + return torch.cat(x, self.d) diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py new file mode 100644 index 0000000..7b04f87 --- /dev/null +++ b/ultralytics/nn/modules/head.py @@ -0,0 +1,396 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +"""Model head modules.""" + +import math + +import torch +import torch.nn as nn +from torch.nn.init import constant_, xavier_uniform_ + +from ultralytics.utils.tal import TORCH_1_10, dist2bbox, make_anchors + +from .block import DFL, Proto +from .conv import Conv +from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer +from .utils import bias_init_with_prob, linear_init_ + +__all__ = 'Detect', 'Segment', 'Pose', 'Classify', 'RTDETRDecoder' + + +class Detect(nn.Module): + """YOLOv8 Detect head for detection models.""" + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=()): + """Initializes the YOLOv8 detection layer with specified number of classes and channels.""" + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) # number of detection layers + self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x) + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.stride = torch.zeros(self.nl) # strides computed during build + c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], min(self.nc, 100)) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) + self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) + self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() + + def forward(self, x): + """Concatenates and returns predicted bounding boxes and class probabilities.""" + shape = x[0].shape # BCHW + for i in range(self.nl): + x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) + if self.training: + return x + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) + self.shape = shape + + x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2) + if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'): # avoid TF FlexSplitV ops + box = x_cat[:, :self.reg_max * 4] + cls = x_cat[:, self.reg_max * 4:] + else: + box, cls = x_cat.split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + + if self.export and self.format in ('tflite', 'edgetpu'): + # Normalize xywh with image size to mitigate quantization error of TFLite integer models as done in YOLOv5: + # https://github.com/ultralytics/yolov5/blob/0c8de3fca4a702f8ff5c435e67f378d1fce70243/models/tf.py#L307-L309 + # See this PR for details: https://github.com/ultralytics/ultralytics/pull/1695 + img_h = shape[2] * self.stride[0] + img_w = shape[3] * self.stride[0] + img_size = torch.tensor([img_w, img_h, img_w, img_h], device=dbox.device).reshape(1, 4, 1) + dbox /= img_size + + y = torch.cat((dbox, cls.sigmoid()), 1) + return y if self.export else (y, x) + + def bias_init(self): + """Initialize Detect() biases, WARNING: requires stride availability.""" + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img) + + +class Segment(Detect): + """YOLOv8 Segment head for segmentation models.""" + + def __init__(self, nc=80, nm=32, npr=256, ch=()): + """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.""" + super().__init__(nc, ch) + self.nm = nm # number of masks + self.npr = npr # number of protos + self.proto = Proto(ch[0], self.npr, self.nm) # protos + self.detect = Detect.forward + + c4 = max(ch[0] // 4, self.nm) + self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) + + def forward(self, x): + """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients.""" + p = self.proto(x[0]) # mask protos + bs = p.shape[0] # batch size + + mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients + x = self.detect(self, x) + if self.training: + return x, mc, p + return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) + + +class Pose(Detect): + """YOLOv8 Pose head for keypoints models.""" + + def __init__(self, nc=80, kpt_shape=(17, 3), ch=()): + """Initialize YOLO network with default parameters and Convolutional Layers.""" + super().__init__(nc, ch) + self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) + self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total + self.detect = Detect.forward + + c4 = max(ch[0] // 4, self.nk) + self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch) + + def forward(self, x): + """Perform forward pass through YOLO model and return predictions.""" + bs = x[0].shape[0] # batch size + kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w) + x = self.detect(self, x) + if self.training: + return x, kpt + pred_kpt = self.kpts_decode(bs, kpt) + return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt)) + + def kpts_decode(self, bs, kpts): + """Decodes keypoints.""" + ndim = self.kpt_shape[1] + if self.export: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug + y = kpts.view(bs, *self.kpt_shape, -1) + a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides + if ndim == 3: + a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2) + return a.view(bs, self.nk, -1) + else: + y = kpts.clone() + if ndim == 3: + y[:, 2::3] = y[:, 2::3].sigmoid() # sigmoid (WARNING: inplace .sigmoid_() Apple MPS bug) + y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides + y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides + return y + + +class Classify(nn.Module): + """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2).""" + + def __init__(self, c1, c2, k=1, s=1, p=None, g=1): + """Initializes YOLOv8 classification head with specified input and output channels, kernel size, stride, + padding, and groups. + """ + super().__init__() + c_ = 1280 # efficientnet_b0 size + self.conv = Conv(c1, c_, k, s, p, g) + self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1) + self.drop = nn.Dropout(p=0.0, inplace=True) + self.linear = nn.Linear(c_, c2) # to x(b,c2) + + def forward(self, x): + """Performs a forward pass of the YOLO model on input image data.""" + if isinstance(x, list): + x = torch.cat(x, 1) + x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1))) + return x if self.training else x.softmax(1) + + +class RTDETRDecoder(nn.Module): + """ + Real-Time Deformable Transformer Decoder (RTDETRDecoder) module for object detection. + + This decoder module utilizes Transformer architecture along with deformable convolutions to predict bounding boxes + and class labels for objects in an image. It integrates features from multiple layers and runs through a series of + Transformer decoder layers to output the final predictions. + """ + export = False # export mode + + def __init__( + self, + nc=80, + ch=(512, 1024, 2048), + hd=256, # hidden dim + nq=300, # num queries + ndp=4, # num decoder points + nh=8, # num head + ndl=6, # num decoder layers + d_ffn=1024, # dim of feedforward + dropout=0., + act=nn.ReLU(), + eval_idx=-1, + # Training args + nd=100, # num denoising + label_noise_ratio=0.5, + box_noise_scale=1.0, + learnt_init_query=False): + """ + Initializes the RTDETRDecoder module with the given parameters. + + Args: + nc (int): Number of classes. Default is 80. + ch (tuple): Channels in the backbone feature maps. Default is (512, 1024, 2048). + hd (int): Dimension of hidden layers. Default is 256. + nq (int): Number of query points. Default is 300. + ndp (int): Number of decoder points. Default is 4. + nh (int): Number of heads in multi-head attention. Default is 8. + ndl (int): Number of decoder layers. Default is 6. + d_ffn (int): Dimension of the feed-forward networks. Default is 1024. + dropout (float): Dropout rate. Default is 0. + act (nn.Module): Activation function. Default is nn.ReLU. + eval_idx (int): Evaluation index. Default is -1. + nd (int): Number of denoising. Default is 100. + label_noise_ratio (float): Label noise ratio. Default is 0.5. + box_noise_scale (float): Box noise scale. Default is 1.0. + learnt_init_query (bool): Whether to learn initial query embeddings. Default is False. + """ + super().__init__() + self.hidden_dim = hd + self.nhead = nh + self.nl = len(ch) # num level + self.nc = nc + self.num_queries = nq + self.num_decoder_layers = ndl + + # Backbone feature projection + self.input_proj = nn.ModuleList(nn.Sequential(nn.Conv2d(x, hd, 1, bias=False), nn.BatchNorm2d(hd)) for x in ch) + # NOTE: simplified version but it's not consistent with .pt weights. + # self.input_proj = nn.ModuleList(Conv(x, hd, act=False) for x in ch) + + # Transformer module + decoder_layer = DeformableTransformerDecoderLayer(hd, nh, d_ffn, dropout, act, self.nl, ndp) + self.decoder = DeformableTransformerDecoder(hd, decoder_layer, ndl, eval_idx) + + # Denoising part + self.denoising_class_embed = nn.Embedding(nc, hd) + self.num_denoising = nd + self.label_noise_ratio = label_noise_ratio + self.box_noise_scale = box_noise_scale + + # Decoder embedding + self.learnt_init_query = learnt_init_query + if learnt_init_query: + self.tgt_embed = nn.Embedding(nq, hd) + self.query_pos_head = MLP(4, 2 * hd, hd, num_layers=2) + + # Encoder head + self.enc_output = nn.Sequential(nn.Linear(hd, hd), nn.LayerNorm(hd)) + self.enc_score_head = nn.Linear(hd, nc) + self.enc_bbox_head = MLP(hd, hd, 4, num_layers=3) + + # Decoder head + self.dec_score_head = nn.ModuleList([nn.Linear(hd, nc) for _ in range(ndl)]) + self.dec_bbox_head = nn.ModuleList([MLP(hd, hd, 4, num_layers=3) for _ in range(ndl)]) + + self._reset_parameters() + + def forward(self, x, batch=None): + """Runs the forward pass of the module, returning bounding box and classification scores for the input.""" + from ultralytics.models.utils.ops import get_cdn_group + + # Input projection and embedding + feats, shapes = self._get_encoder_input(x) + + # Prepare denoising training + dn_embed, dn_bbox, attn_mask, dn_meta = \ + get_cdn_group(batch, + self.nc, + self.num_queries, + self.denoising_class_embed.weight, + self.num_denoising, + self.label_noise_ratio, + self.box_noise_scale, + self.training) + + embed, refer_bbox, enc_bboxes, enc_scores = \ + self._get_decoder_input(feats, shapes, dn_embed, dn_bbox) + + # Decoder + dec_bboxes, dec_scores = self.decoder(embed, + refer_bbox, + feats, + shapes, + self.dec_bbox_head, + self.dec_score_head, + self.query_pos_head, + attn_mask=attn_mask) + x = dec_bboxes, dec_scores, enc_bboxes, enc_scores, dn_meta + if self.training: + return x + # (bs, 300, 4+nc) + y = torch.cat((dec_bboxes.squeeze(0), dec_scores.squeeze(0).sigmoid()), -1) + return y if self.export else (y, x) + + def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2): + """Generates anchor bounding boxes for given shapes with specific grid size and validates them.""" + anchors = [] + for i, (h, w) in enumerate(shapes): + sy = torch.arange(end=h, dtype=dtype, device=device) + sx = torch.arange(end=w, dtype=dtype, device=device) + grid_y, grid_x = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) + grid_xy = torch.stack([grid_x, grid_y], -1) # (h, w, 2) + + valid_WH = torch.tensor([h, w], dtype=dtype, device=device) + grid_xy = (grid_xy.unsqueeze(0) + 0.5) / valid_WH # (1, h, w, 2) + wh = torch.ones_like(grid_xy, dtype=dtype, device=device) * grid_size * (2.0 ** i) + anchors.append(torch.cat([grid_xy, wh], -1).view(-1, h * w, 4)) # (1, h*w, 4) + + anchors = torch.cat(anchors, 1) # (1, h*w*nl, 4) + valid_mask = ((anchors > eps) * (anchors < 1 - eps)).all(-1, keepdim=True) # 1, h*w*nl, 1 + anchors = torch.log(anchors / (1 - anchors)) + anchors = anchors.masked_fill(~valid_mask, float('inf')) + return anchors, valid_mask + + def _get_encoder_input(self, x): + """Processes and returns encoder inputs by getting projection features from input and concatenating them.""" + # Get projection features + x = [self.input_proj[i](feat) for i, feat in enumerate(x)] + # Get encoder inputs + feats = [] + shapes = [] + for feat in x: + h, w = feat.shape[2:] + # [b, c, h, w] -> [b, h*w, c] + feats.append(feat.flatten(2).permute(0, 2, 1)) + # [nl, 2] + shapes.append([h, w]) + + # [b, h*w, c] + feats = torch.cat(feats, 1) + return feats, shapes + + def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None): + """Generates and prepares the input required for the decoder from the provided features and shapes.""" + bs = len(feats) + # Prepare input for decoder + anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device) + features = self.enc_output(valid_mask * feats) # bs, h*w, 256 + + enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc) + + # Query selection + # (bs, num_queries) + topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1) + # (bs, num_queries) + batch_ind = torch.arange(end=bs, dtype=topk_ind.dtype).unsqueeze(-1).repeat(1, self.num_queries).view(-1) + + # (bs, num_queries, 256) + top_k_features = features[batch_ind, topk_ind].view(bs, self.num_queries, -1) + # (bs, num_queries, 4) + top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1) + + # Dynamic anchors + static content + refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors + + enc_bboxes = refer_bbox.sigmoid() + if dn_bbox is not None: + refer_bbox = torch.cat([dn_bbox, refer_bbox], 1) + enc_scores = enc_outputs_scores[batch_ind, topk_ind].view(bs, self.num_queries, -1) + + embeddings = self.tgt_embed.weight.unsqueeze(0).repeat(bs, 1, 1) if self.learnt_init_query else top_k_features + if self.training: + refer_bbox = refer_bbox.detach() + if not self.learnt_init_query: + embeddings = embeddings.detach() + if dn_embed is not None: + embeddings = torch.cat([dn_embed, embeddings], 1) + + return embeddings, refer_bbox, enc_bboxes, enc_scores + + # TODO + def _reset_parameters(self): + """Initializes or resets the parameters of the model's various components with predefined weights and biases.""" + # Class and bbox head init + bias_cls = bias_init_with_prob(0.01) / 80 * self.nc + # NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets. + # linear_init_(self.enc_score_head) + constant_(self.enc_score_head.bias, bias_cls) + constant_(self.enc_bbox_head.layers[-1].weight, 0.) + constant_(self.enc_bbox_head.layers[-1].bias, 0.) + for cls_, reg_ in zip(self.dec_score_head, self.dec_bbox_head): + # linear_init_(cls_) + constant_(cls_.bias, bias_cls) + constant_(reg_.layers[-1].weight, 0.) + constant_(reg_.layers[-1].bias, 0.) + + linear_init_(self.enc_output[0]) + xavier_uniform_(self.enc_output[0].weight) + if self.learnt_init_query: + xavier_uniform_(self.tgt_embed.weight) + xavier_uniform_(self.query_pos_head.layers[0].weight) + xavier_uniform_(self.query_pos_head.layers[1].weight) + for layer in self.input_proj: + xavier_uniform_(layer[0].weight) diff --git a/ultralytics/nn/modules/transformer.py b/ultralytics/nn/modules/transformer.py new file mode 100644 index 0000000..9fe9597 --- /dev/null +++ b/ultralytics/nn/modules/transformer.py @@ -0,0 +1,409 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +"""Transformer modules.""" + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.init import constant_, xavier_uniform_ + +from .conv import Conv +from .utils import _get_clones, inverse_sigmoid, multi_scale_deformable_attn_pytorch + +__all__ = ('TransformerEncoderLayer', 'TransformerLayer', 'TransformerBlock', 'MLPBlock', 'LayerNorm2d', 'AIFI', + 'DeformableTransformerDecoder', 'DeformableTransformerDecoderLayer', 'MSDeformAttn', 'MLP') + + +class TransformerEncoderLayer(nn.Module): + """Defines a single layer of the transformer encoder.""" + + def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False): + """Initialize the TransformerEncoderLayer with specified parameters.""" + super().__init__() + from ...utils.torch_utils import TORCH_1_9 + if not TORCH_1_9: + raise ModuleNotFoundError( + 'TransformerEncoderLayer() requires torch>=1.9 to use nn.MultiheadAttention(batch_first=True).') + self.ma = nn.MultiheadAttention(c1, num_heads, dropout=dropout, batch_first=True) + # Implementation of Feedforward model + self.fc1 = nn.Linear(c1, cm) + self.fc2 = nn.Linear(cm, c1) + + self.norm1 = nn.LayerNorm(c1) + self.norm2 = nn.LayerNorm(c1) + self.dropout = nn.Dropout(dropout) + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + + self.act = act + self.normalize_before = normalize_before + + @staticmethod + def with_pos_embed(tensor, pos=None): + """Add position embeddings to the tensor if provided.""" + return tensor if pos is None else tensor + pos + + def forward_post(self, src, src_mask=None, src_key_padding_mask=None, pos=None): + """Performs forward pass with post-normalization.""" + q = k = self.with_pos_embed(src, pos) + src2 = self.ma(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0] + src = src + self.dropout1(src2) + src = self.norm1(src) + src2 = self.fc2(self.dropout(self.act(self.fc1(src)))) + src = src + self.dropout2(src2) + return self.norm2(src) + + def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, pos=None): + """Performs forward pass with pre-normalization.""" + src2 = self.norm1(src) + q = k = self.with_pos_embed(src2, pos) + src2 = self.ma(q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0] + src = src + self.dropout1(src2) + src2 = self.norm2(src) + src2 = self.fc2(self.dropout(self.act(self.fc1(src2)))) + return src + self.dropout2(src2) + + def forward(self, src, src_mask=None, src_key_padding_mask=None, pos=None): + """Forward propagates the input through the encoder module.""" + if self.normalize_before: + return self.forward_pre(src, src_mask, src_key_padding_mask, pos) + return self.forward_post(src, src_mask, src_key_padding_mask, pos) + + +class AIFI(TransformerEncoderLayer): + """Defines the AIFI transformer layer.""" + + def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(), normalize_before=False): + """Initialize the AIFI instance with specified parameters.""" + super().__init__(c1, cm, num_heads, dropout, act, normalize_before) + + def forward(self, x): + """Forward pass for the AIFI transformer layer.""" + c, h, w = x.shape[1:] + pos_embed = self.build_2d_sincos_position_embedding(w, h, c) + # Flatten [B, C, H, W] to [B, HxW, C] + x = super().forward(x.flatten(2).permute(0, 2, 1), pos=pos_embed.to(device=x.device, dtype=x.dtype)) + return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous() + + @staticmethod + def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0): + """Builds 2D sine-cosine position embedding.""" + grid_w = torch.arange(int(w), dtype=torch.float32) + grid_h = torch.arange(int(h), dtype=torch.float32) + grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing='ij') + assert embed_dim % 4 == 0, \ + 'Embed dimension must be divisible by 4 for 2D sin-cos position embedding' + pos_dim = embed_dim // 4 + omega = torch.arange(pos_dim, dtype=torch.float32) / pos_dim + omega = 1. / (temperature ** omega) + + out_w = grid_w.flatten()[..., None] @ omega[None] + out_h = grid_h.flatten()[..., None] @ omega[None] + + return torch.cat([torch.sin(out_w), torch.cos(out_w), torch.sin(out_h), torch.cos(out_h)], 1)[None] + + +class TransformerLayer(nn.Module): + """Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance).""" + + def __init__(self, c, num_heads): + """Initializes a self-attention mechanism using linear transformations and multi-head attention.""" + super().__init__() + self.q = nn.Linear(c, c, bias=False) + self.k = nn.Linear(c, c, bias=False) + self.v = nn.Linear(c, c, bias=False) + self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) + self.fc1 = nn.Linear(c, c, bias=False) + self.fc2 = nn.Linear(c, c, bias=False) + + def forward(self, x): + """Apply a transformer block to the input x and return the output.""" + x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x + return self.fc2(self.fc1(x)) + x + + +class TransformerBlock(nn.Module): + """Vision Transformer https://arxiv.org/abs/2010.11929.""" + + def __init__(self, c1, c2, num_heads, num_layers): + """Initialize a Transformer module with position embedding and specified number of heads and layers.""" + super().__init__() + self.conv = None + if c1 != c2: + self.conv = Conv(c1, c2) + self.linear = nn.Linear(c2, c2) # learnable position embedding + self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers))) + self.c2 = c2 + + def forward(self, x): + """Forward propagates the input through the bottleneck module.""" + if self.conv is not None: + x = self.conv(x) + b, _, w, h = x.shape + p = x.flatten(2).permute(2, 0, 1) + return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h) + + +class MLPBlock(nn.Module): + """Implements a single block of a multi-layer perceptron.""" + + def __init__(self, embedding_dim, mlp_dim, act=nn.GELU): + """Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function.""" + super().__init__() + self.lin1 = nn.Linear(embedding_dim, mlp_dim) + self.lin2 = nn.Linear(mlp_dim, embedding_dim) + self.act = act() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Forward pass for the MLPBlock.""" + return self.lin2(self.act(self.lin1(x))) + + +class MLP(nn.Module): + """Implements a simple multi-layer perceptron (also called FFN).""" + + def __init__(self, input_dim, hidden_dim, output_dim, num_layers): + """Initialize the MLP with specified input, hidden, output dimensions and number of layers.""" + super().__init__() + self.num_layers = num_layers + h = [hidden_dim] * (num_layers - 1) + self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) + + def forward(self, x): + """Forward pass for the entire MLP.""" + for i, layer in enumerate(self.layers): + x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) + return x + + +class LayerNorm2d(nn.Module): + """ + 2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations. + + Original implementations in + https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py + and + https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py. + """ + + def __init__(self, num_channels, eps=1e-6): + """Initialize LayerNorm2d with the given parameters.""" + super().__init__() + self.weight = nn.Parameter(torch.ones(num_channels)) + self.bias = nn.Parameter(torch.zeros(num_channels)) + self.eps = eps + + def forward(self, x): + """Perform forward pass for 2D layer normalization.""" + u = x.mean(1, keepdim=True) + s = (x - u).pow(2).mean(1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.eps) + return self.weight[:, None, None] * x + self.bias[:, None, None] + + +class MSDeformAttn(nn.Module): + """ + Multi-Scale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations. + + https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py + """ + + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4): + """Initialize MSDeformAttn with the given parameters.""" + super().__init__() + if d_model % n_heads != 0: + raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}') + _d_per_head = d_model // n_heads + # Better to set _d_per_head to a power of 2 which is more efficient in a CUDA implementation + assert _d_per_head * n_heads == d_model, '`d_model` must be divisible by `n_heads`' + + self.im2col_step = 64 + + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2) + self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + + self._reset_parameters() + + def _reset_parameters(self): + """Reset module parameters.""" + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat( + 1, self.n_levels, self.n_points, 1) + for i in range(self.n_points): + grid_init[:, :, i, :] *= i + 1 + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, refer_bbox, value, value_shapes, value_mask=None): + """ + Perform forward pass for multiscale deformable attention. + + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py + + Args: + query (torch.Tensor): [bs, query_length, C] + refer_bbox (torch.Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0), + bottom-right (1, 1), including padding area + value (torch.Tensor): [bs, value_length, C] + value_shapes (List): [n_levels, 2], [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})] + value_mask (Tensor): [bs, value_length], True for non-padding elements, False for padding elements + + Returns: + output (Tensor): [bs, Length_{query}, C] + """ + bs, len_q = query.shape[:2] + len_v = value.shape[1] + assert sum(s[0] * s[1] for s in value_shapes) == len_v + + value = self.value_proj(value) + if value_mask is not None: + value = value.masked_fill(value_mask[..., None], float(0)) + value = value.view(bs, len_v, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(bs, len_q, self.n_heads, self.n_levels, self.n_points, 2) + attention_weights = self.attention_weights(query).view(bs, len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(bs, len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 2 + num_points = refer_bbox.shape[-1] + if num_points == 2: + offset_normalizer = torch.as_tensor(value_shapes, dtype=query.dtype, device=query.device).flip(-1) + add = sampling_offsets / offset_normalizer[None, None, None, :, None, :] + sampling_locations = refer_bbox[:, :, None, :, None, :] + add + elif num_points == 4: + add = sampling_offsets / self.n_points * refer_bbox[:, :, None, :, None, 2:] * 0.5 + sampling_locations = refer_bbox[:, :, None, :, None, :2] + add + else: + raise ValueError(f'Last dim of reference_points must be 2 or 4, but got {num_points}.') + output = multi_scale_deformable_attn_pytorch(value, value_shapes, sampling_locations, attention_weights) + return self.output_proj(output) + + +class DeformableTransformerDecoderLayer(nn.Module): + """ + Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations. + + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py + https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py + """ + + def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0., act=nn.ReLU(), n_levels=4, n_points=4): + """Initialize the DeformableTransformerDecoderLayer with the given parameters.""" + super().__init__() + + # Self attention + self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout) + self.dropout1 = nn.Dropout(dropout) + self.norm1 = nn.LayerNorm(d_model) + + # Cross attention + self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points) + self.dropout2 = nn.Dropout(dropout) + self.norm2 = nn.LayerNorm(d_model) + + # FFN + self.linear1 = nn.Linear(d_model, d_ffn) + self.act = act + self.dropout3 = nn.Dropout(dropout) + self.linear2 = nn.Linear(d_ffn, d_model) + self.dropout4 = nn.Dropout(dropout) + self.norm3 = nn.LayerNorm(d_model) + + @staticmethod + def with_pos_embed(tensor, pos): + """Add positional embeddings to the input tensor, if provided.""" + return tensor if pos is None else tensor + pos + + def forward_ffn(self, tgt): + """Perform forward pass through the Feed-Forward Network part of the layer.""" + tgt2 = self.linear2(self.dropout3(self.act(self.linear1(tgt)))) + tgt = tgt + self.dropout4(tgt2) + return self.norm3(tgt) + + def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None): + """Perform the forward pass through the entire decoder layer.""" + + # Self attention + q = k = self.with_pos_embed(embed, query_pos) + tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1), + attn_mask=attn_mask)[0].transpose(0, 1) + embed = embed + self.dropout1(tgt) + embed = self.norm1(embed) + + # Cross attention + tgt = self.cross_attn(self.with_pos_embed(embed, query_pos), refer_bbox.unsqueeze(2), feats, shapes, + padding_mask) + embed = embed + self.dropout2(tgt) + embed = self.norm2(embed) + + # FFN + return self.forward_ffn(embed) + + +class DeformableTransformerDecoder(nn.Module): + """ + Implementation of Deformable Transformer Decoder based on PaddleDetection. + + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py + """ + + def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1): + """Initialize the DeformableTransformerDecoder with the given parameters.""" + super().__init__() + self.layers = _get_clones(decoder_layer, num_layers) + self.num_layers = num_layers + self.hidden_dim = hidden_dim + self.eval_idx = eval_idx if eval_idx >= 0 else num_layers + eval_idx + + def forward( + self, + embed, # decoder embeddings + refer_bbox, # anchor + feats, # image features + shapes, # feature shapes + bbox_head, + score_head, + pos_mlp, + attn_mask=None, + padding_mask=None): + """Perform the forward pass through the entire decoder.""" + output = embed + dec_bboxes = [] + dec_cls = [] + last_refined_bbox = None + refer_bbox = refer_bbox.sigmoid() + for i, layer in enumerate(self.layers): + output = layer(output, refer_bbox, feats, shapes, padding_mask, attn_mask, pos_mlp(refer_bbox)) + + bbox = bbox_head[i](output) + refined_bbox = torch.sigmoid(bbox + inverse_sigmoid(refer_bbox)) + + if self.training: + dec_cls.append(score_head[i](output)) + if i == 0: + dec_bboxes.append(refined_bbox) + else: + dec_bboxes.append(torch.sigmoid(bbox + inverse_sigmoid(last_refined_bbox))) + elif i == self.eval_idx: + dec_cls.append(score_head[i](output)) + dec_bboxes.append(refined_bbox) + break + + last_refined_bbox = refined_bbox + refer_bbox = refined_bbox.detach() if self.training else refined_bbox + + return torch.stack(dec_bboxes), torch.stack(dec_cls) diff --git a/ultralytics/nn/modules/utils.py b/ultralytics/nn/modules/utils.py new file mode 100644 index 0000000..c7bec7a --- /dev/null +++ b/ultralytics/nn/modules/utils.py @@ -0,0 +1,80 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +"""Module utils.""" + +import copy +import math + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.init import uniform_ + +__all__ = 'multi_scale_deformable_attn_pytorch', 'inverse_sigmoid' + + +def _get_clones(module, n): + """Create a list of cloned modules from the given module.""" + return nn.ModuleList([copy.deepcopy(module) for _ in range(n)]) + + +def bias_init_with_prob(prior_prob=0.01): + """Initialize conv/fc bias value according to a given probability value.""" + return float(-np.log((1 - prior_prob) / prior_prob)) # return bias_init + + +def linear_init_(module): + """Initialize the weights and biases of a linear module.""" + bound = 1 / math.sqrt(module.weight.shape[0]) + uniform_(module.weight, -bound, bound) + if hasattr(module, 'bias') and module.bias is not None: + uniform_(module.bias, -bound, bound) + + +def inverse_sigmoid(x, eps=1e-5): + """Calculate the inverse sigmoid function for a tensor.""" + x = x.clamp(min=0, max=1) + x1 = x.clamp(min=eps) + x2 = (1 - x).clamp(min=eps) + return torch.log(x1 / x2) + + +def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spatial_shapes: torch.Tensor, + sampling_locations: torch.Tensor, + attention_weights: torch.Tensor) -> torch.Tensor: + """ + Multi-scale deformable attention. + + https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py + """ + + bs, _, num_heads, embed_dims = value.shape + _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) + sampling_grids = 2 * sampling_locations - 1 + sampling_value_list = [] + for level, (H_, W_) in enumerate(value_spatial_shapes): + # bs, H_*W_, num_heads, embed_dims -> + # bs, H_*W_, num_heads*embed_dims -> + # bs, num_heads*embed_dims, H_*W_ -> + # bs*num_heads, embed_dims, H_, W_ + value_l_ = (value_list[level].flatten(2).transpose(1, 2).reshape(bs * num_heads, embed_dims, H_, W_)) + # bs, num_queries, num_heads, num_points, 2 -> + # bs, num_heads, num_queries, num_points, 2 -> + # bs*num_heads, num_queries, num_points, 2 + sampling_grid_l_ = sampling_grids[:, :, :, level].transpose(1, 2).flatten(0, 1) + # bs*num_heads, embed_dims, num_queries, num_points + sampling_value_l_ = F.grid_sample(value_l_, + sampling_grid_l_, + mode='bilinear', + padding_mode='zeros', + align_corners=False) + sampling_value_list.append(sampling_value_l_) + # (bs, num_queries, num_heads, num_levels, num_points) -> + # (bs, num_heads, num_queries, num_levels, num_points) -> + # (bs, num_heads, 1, num_queries, num_levels*num_points) + attention_weights = attention_weights.transpose(1, 2).reshape(bs * num_heads, 1, num_queries, + num_levels * num_points) + output = ((torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view( + bs, num_heads * embed_dims, num_queries)) + return output.transpose(1, 2).contiguous() diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py new file mode 100644 index 0000000..55aa51a --- /dev/null +++ b/ultralytics/nn/tasks.py @@ -0,0 +1,832 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import contextlib +from copy import deepcopy +from pathlib import Path + +import torch +import torch.nn as nn + +from ultralytics.nn.modules import (AIFI, C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, + Classify, Concat, Conv, Conv2, ConvTranspose, Detect, DWConv, DWConvTranspose2d, + Focus, GhostBottleneck, GhostConv, HGBlock, HGStem, Pose, RepC3, RepConv, + RTDETRDecoder, Segment) +from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load +from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml +from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8PoseLoss, v8SegmentationLoss +from ultralytics.utils.plotting import feature_visualization +from ultralytics.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights, intersect_dicts, + make_divisible, model_info, scale_img, time_sync) + +try: + import thop +except ImportError: + thop = None + + +class BaseModel(nn.Module): + """The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family.""" + + def forward(self, x, *args, **kwargs): + """ + Forward pass of the model on a single scale. Wrapper for `_forward_once` method. + + Args: + x (torch.Tensor | dict): The input image tensor or a dict including image tensor and gt labels. + + Returns: + (torch.Tensor): The output of the network. + """ + if isinstance(x, dict): # for cases of training and validating while training. + return self.loss(x, *args, **kwargs) + return self.predict(x, *args, **kwargs) + + def predict(self, x, profile=False, visualize=False, augment=False): + """ + Perform a forward pass through the network. + + Args: + x (torch.Tensor): The input tensor to the model. + profile (bool): Print the computation time of each layer if True, defaults to False. + visualize (bool): Save the feature maps of the model if True, defaults to False. + augment (bool): Augment image during prediction, defaults to False. + + Returns: + (torch.Tensor): The last output of the model. + """ + if augment: + return self._predict_augment(x) + return self._predict_once(x, profile, visualize) + + def _predict_once(self, x, profile=False, visualize=False): + """ + Perform a forward pass through the network. + + Args: + x (torch.Tensor): The input tensor to the model. + profile (bool): Print the computation time of each layer if True, defaults to False. + visualize (bool): Save the feature maps of the model if True, defaults to False. + + Returns: + (torch.Tensor): The last output of the model. + """ + y, dt = [], [] # outputs + for m in self.model: + if m.f != -1: # if not from previous layer + x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers + if profile: + self._profile_one_layer(m, x, dt) + x = m(x) # run + y.append(x if m.i in self.save else None) # save output + if visualize: + feature_visualization(x, m.type, m.i, save_dir=visualize) + return x + + def _predict_augment(self, x): + """Perform augmentations on input image x and return augmented inference.""" + LOGGER.warning(f'WARNING ⚠️ {self.__class__.__name__} does not support augmented inference yet. ' + f'Reverting to single-scale inference instead.') + return self._predict_once(x) + + def _profile_one_layer(self, m, x, dt): + """ + Profile the computation time and FLOPs of a single layer of the model on a given input. Appends the results to + the provided list. + + Args: + m (nn.Module): The layer to be profiled. + x (torch.Tensor): The input data to the layer. + dt (list): A list to store the computation time of the layer. + + Returns: + None + """ + c = m == self.model[-1] and isinstance(x, list) # is final layer list, copy input as inplace fix + flops = thop.profile(m, inputs=[x.copy() if c else x], verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs + t = time_sync() + for _ in range(10): + m(x.copy() if c else x) + dt.append((time_sync() - t) * 100) + if m == self.model[0]: + LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module") + LOGGER.info(f'{dt[-1]:10.2f} {flops:10.2f} {m.np:10.0f} {m.type}') + if c: + LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total") + + def fuse(self, verbose=True): + """ + Fuse the `Conv2d()` and `BatchNorm2d()` layers of the model into a single layer, in order to improve the + computation efficiency. + + Returns: + (nn.Module): The fused model is returned. + """ + if not self.is_fused(): + for m in self.model.modules(): + if isinstance(m, (Conv, Conv2, DWConv)) and hasattr(m, 'bn'): + if isinstance(m, Conv2): + m.fuse_convs() + m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv + delattr(m, 'bn') # remove batchnorm + m.forward = m.forward_fuse # update forward + if isinstance(m, ConvTranspose) and hasattr(m, 'bn'): + m.conv_transpose = fuse_deconv_and_bn(m.conv_transpose, m.bn) + delattr(m, 'bn') # remove batchnorm + m.forward = m.forward_fuse # update forward + if isinstance(m, RepConv): + m.fuse_convs() + m.forward = m.forward_fuse # update forward + self.info(verbose=verbose) + + return self + + def is_fused(self, thresh=10): + """ + Check if the model has less than a certain threshold of BatchNorm layers. + + Args: + thresh (int, optional): The threshold number of BatchNorm layers. Default is 10. + + Returns: + (bool): True if the number of BatchNorm layers in the model is less than the threshold, False otherwise. + """ + bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() + return sum(isinstance(v, bn) for v in self.modules()) < thresh # True if < 'thresh' BatchNorm layers in model + + def info(self, detailed=False, verbose=True, imgsz=640): + """ + Prints model information. + + Args: + detailed (bool): if True, prints out detailed information about the model. Defaults to False + verbose (bool): if True, prints out the model information. Defaults to False + imgsz (int): the size of the image that the model will be trained on. Defaults to 640 + """ + return model_info(self, detailed=detailed, verbose=verbose, imgsz=imgsz) + + def _apply(self, fn): + """ + Applies a function to all the tensors in the model that are not parameters or registered buffers. + + Args: + fn (function): the function to apply to the model + + Returns: + (BaseModel): An updated BaseModel object. + """ + self = super()._apply(fn) + m = self.model[-1] # Detect() + if isinstance(m, (Detect, Segment)): + m.stride = fn(m.stride) + m.anchors = fn(m.anchors) + m.strides = fn(m.strides) + return self + + def load(self, weights, verbose=True): + """ + Load the weights into the model. + + Args: + weights (dict | torch.nn.Module): The pre-trained weights to be loaded. + verbose (bool, optional): Whether to log the transfer progress. Defaults to True. + """ + model = weights['model'] if isinstance(weights, dict) else weights # torchvision models are not dicts + csd = model.float().state_dict() # checkpoint state_dict as FP32 + csd = intersect_dicts(csd, self.state_dict()) # intersect + self.load_state_dict(csd, strict=False) # load + if verbose: + LOGGER.info(f'Transferred {len(csd)}/{len(self.model.state_dict())} items from pretrained weights') + + def loss(self, batch, preds=None): + """ + Compute loss. + + Args: + batch (dict): Batch to compute loss on + preds (torch.Tensor | List[torch.Tensor]): Predictions. + """ + if not hasattr(self, 'criterion'): + self.criterion = self.init_criterion() + + preds = self.forward(batch['img']) if preds is None else preds + return self.criterion(preds, batch) + + def init_criterion(self): + """Initialize the loss criterion for the BaseModel.""" + raise NotImplementedError('compute_loss() needs to be implemented by task heads') + + +class DetectionModel(BaseModel): + """YOLOv8 detection model.""" + + def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes + """Initialize the YOLOv8 detection model with the given config and parameters.""" + super().__init__() + self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict + + # Define model + ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels + if nc and nc != self.yaml['nc']: + LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") + self.yaml['nc'] = nc # override YAML value + self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch, verbose=verbose) # model, savelist + self.names = {i: f'{i}' for i in range(self.yaml['nc'])} # default names dict + self.inplace = self.yaml.get('inplace', True) + + # Build strides + m = self.model[-1] # Detect() + if isinstance(m, (Detect, Segment, Pose)): + s = 256 # 2x min stride + m.inplace = self.inplace + forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose)) else self.forward(x) + m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward + self.stride = m.stride + m.bias_init() # only run once + else: + self.stride = torch.Tensor([32]) # default stride for i.e. RTDETR + + # Init weights, biases + initialize_weights(self) + if verbose: + self.info() + LOGGER.info('') + + def _predict_augment(self, x): + """Perform augmentations on input image x and return augmented inference and train outputs.""" + img_size = x.shape[-2:] # height, width + s = [1, 0.83, 0.67] # scales + f = [None, 3, None] # flips (2-ud, 3-lr) + y = [] # outputs + for si, fi in zip(s, f): + xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) + yi = super().predict(xi)[0] # forward + yi = self._descale_pred(yi, fi, si, img_size) + y.append(yi) + y = self._clip_augmented(y) # clip augmented tails + return torch.cat(y, -1), None # augmented inference, train + + @staticmethod + def _descale_pred(p, flips, scale, img_size, dim=1): + """De-scale predictions following augmented inference (inverse operation).""" + p[:, :4] /= scale # de-scale + x, y, wh, cls = p.split((1, 1, 2, p.shape[dim] - 4), dim) + if flips == 2: + y = img_size[0] - y # de-flip ud + elif flips == 3: + x = img_size[1] - x # de-flip lr + return torch.cat((x, y, wh, cls), dim) + + def _clip_augmented(self, y): + """Clip YOLO augmented inference tails.""" + nl = self.model[-1].nl # number of detection layers (P3-P5) + g = sum(4 ** x for x in range(nl)) # grid points + e = 1 # exclude layer count + i = (y[0].shape[-1] // g) * sum(4 ** x for x in range(e)) # indices + y[0] = y[0][..., :-i] # large + i = (y[-1].shape[-1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices + y[-1] = y[-1][..., i:] # small + return y + + def init_criterion(self): + """Initialize the loss criterion for the DetectionModel.""" + return v8DetectionLoss(self) + + +class SegmentationModel(DetectionModel): + """YOLOv8 segmentation model.""" + + def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True): + """Initialize YOLOv8 segmentation model with given config and parameters.""" + super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) + + def init_criterion(self): + """Initialize the loss criterion for the SegmentationModel.""" + return v8SegmentationLoss(self) + + +class PoseModel(DetectionModel): + """YOLOv8 pose model.""" + + def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True): + """Initialize YOLOv8 Pose model.""" + if not isinstance(cfg, dict): + cfg = yaml_model_load(cfg) # load model YAML + if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg['kpt_shape']): + LOGGER.info(f"Overriding model.yaml kpt_shape={cfg['kpt_shape']} with kpt_shape={data_kpt_shape}") + cfg['kpt_shape'] = data_kpt_shape + super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) + + def init_criterion(self): + """Initialize the loss criterion for the PoseModel.""" + return v8PoseLoss(self) + + +class ClassificationModel(BaseModel): + """YOLOv8 classification model.""" + + def __init__(self, cfg='yolov8n-cls.yaml', ch=3, nc=None, verbose=True): + """Init ClassificationModel with YAML, channels, number of classes, verbose flag.""" + super().__init__() + self._from_yaml(cfg, ch, nc, verbose) + + def _from_yaml(self, cfg, ch, nc, verbose): + """Set YOLOv8 model configurations and define the model architecture.""" + self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict + + # Define model + ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels + if nc and nc != self.yaml['nc']: + LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") + self.yaml['nc'] = nc # override YAML value + elif not nc and not self.yaml.get('nc', None): + raise ValueError('nc not specified. Must specify nc in model.yaml or function arguments.') + self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch, verbose=verbose) # model, savelist + self.stride = torch.Tensor([1]) # no stride constraints + self.names = {i: f'{i}' for i in range(self.yaml['nc'])} # default names dict + self.info() + + @staticmethod + def reshape_outputs(model, nc): + """Update a TorchVision classification model to class count 'n' if required.""" + name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1] # last module + if isinstance(m, Classify): # YOLO Classify() head + if m.linear.out_features != nc: + m.linear = nn.Linear(m.linear.in_features, nc) + elif isinstance(m, nn.Linear): # ResNet, EfficientNet + if m.out_features != nc: + setattr(model, name, nn.Linear(m.in_features, nc)) + elif isinstance(m, nn.Sequential): + types = [type(x) for x in m] + if nn.Linear in types: + i = types.index(nn.Linear) # nn.Linear index + if m[i].out_features != nc: + m[i] = nn.Linear(m[i].in_features, nc) + elif nn.Conv2d in types: + i = types.index(nn.Conv2d) # nn.Conv2d index + if m[i].out_channels != nc: + m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None) + + def init_criterion(self): + """Initialize the loss criterion for the ClassificationModel.""" + return v8ClassificationLoss() + + +class RTDETRDetectionModel(DetectionModel): + """ + RTDETR (Real-time DEtection and Tracking using Transformers) Detection Model class. + + This class is responsible for constructing the RTDETR architecture, defining loss functions, and facilitating both + the training and inference processes. RTDETR is an object detection and tracking model that extends from the + DetectionModel base class. + + Attributes: + cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'. + ch (int): Number of input channels. Default is 3 (RGB). + nc (int, optional): Number of classes for object detection. Default is None. + verbose (bool): Specifies if summary statistics are shown during initialization. Default is True. + + Methods: + init_criterion: Initializes the criterion used for loss calculation. + loss: Computes and returns the loss during training. + predict: Performs a forward pass through the network and returns the output. + """ + + def __init__(self, cfg='rtdetr-l.yaml', ch=3, nc=None, verbose=True): + """ + Initialize the RTDETRDetectionModel. + + Args: + cfg (str): Configuration file name or path. + ch (int): Number of input channels. + nc (int, optional): Number of classes. Defaults to None. + verbose (bool, optional): Print additional information during initialization. Defaults to True. + """ + super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) + + def init_criterion(self): + """Initialize the loss criterion for the RTDETRDetectionModel.""" + from ultralytics.models.utils.loss import RTDETRDetectionLoss + + return RTDETRDetectionLoss(nc=self.nc, use_vfl=True) + + def loss(self, batch, preds=None): + """ + Compute the loss for the given batch of data. + + Args: + batch (dict): Dictionary containing image and label data. + preds (torch.Tensor, optional): Precomputed model predictions. Defaults to None. + + Returns: + (tuple): A tuple containing the total loss and main three losses in a tensor. + """ + if not hasattr(self, 'criterion'): + self.criterion = self.init_criterion() + + img = batch['img'] + # NOTE: preprocess gt_bbox and gt_labels to list. + bs = len(img) + batch_idx = batch['batch_idx'] + gt_groups = [(batch_idx == i).sum().item() for i in range(bs)] + targets = { + 'cls': batch['cls'].to(img.device, dtype=torch.long).view(-1), + 'bboxes': batch['bboxes'].to(device=img.device), + 'batch_idx': batch_idx.to(img.device, dtype=torch.long).view(-1), + 'gt_groups': gt_groups} + + preds = self.predict(img, batch=targets) if preds is None else preds + dec_bboxes, dec_scores, enc_bboxes, enc_scores, dn_meta = preds if self.training else preds[1] + if dn_meta is None: + dn_bboxes, dn_scores = None, None + else: + dn_bboxes, dec_bboxes = torch.split(dec_bboxes, dn_meta['dn_num_split'], dim=2) + dn_scores, dec_scores = torch.split(dec_scores, dn_meta['dn_num_split'], dim=2) + + dec_bboxes = torch.cat([enc_bboxes.unsqueeze(0), dec_bboxes]) # (7, bs, 300, 4) + dec_scores = torch.cat([enc_scores.unsqueeze(0), dec_scores]) + + loss = self.criterion((dec_bboxes, dec_scores), + targets, + dn_bboxes=dn_bboxes, + dn_scores=dn_scores, + dn_meta=dn_meta) + # NOTE: There are like 12 losses in RTDETR, backward with all losses but only show the main three losses. + return sum(loss.values()), torch.as_tensor([loss[k].detach() for k in ['loss_giou', 'loss_class', 'loss_bbox']], + device=img.device) + + def predict(self, x, profile=False, visualize=False, batch=None, augment=False): + """ + Perform a forward pass through the model. + + Args: + x (torch.Tensor): The input tensor. + profile (bool, optional): If True, profile the computation time for each layer. Defaults to False. + visualize (bool, optional): If True, save feature maps for visualization. Defaults to False. + batch (dict, optional): Ground truth data for evaluation. Defaults to None. + augment (bool, optional): If True, perform data augmentation during inference. Defaults to False. + + Returns: + (torch.Tensor): Model's output tensor. + """ + y, dt = [], [] # outputs + for m in self.model[:-1]: # except the head part + if m.f != -1: # if not from previous layer + x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers + if profile: + self._profile_one_layer(m, x, dt) + x = m(x) # run + y.append(x if m.i in self.save else None) # save output + if visualize: + feature_visualization(x, m.type, m.i, save_dir=visualize) + head = self.model[-1] + x = head([y[j] for j in head.f], batch) # head inference + return x + + +class Ensemble(nn.ModuleList): + """Ensemble of models.""" + + def __init__(self): + """Initialize an ensemble of models.""" + super().__init__() + + def forward(self, x, augment=False, profile=False, visualize=False): + """Function generates the YOLO network's final layer.""" + y = [module(x, augment, profile, visualize)[0] for module in self] + # y = torch.stack(y).max(0)[0] # max ensemble + # y = torch.stack(y).mean(0) # mean ensemble + y = torch.cat(y, 2) # nms ensemble, y shape(B, HW, C) + return y, None # inference, train output + + +# Functions ------------------------------------------------------------------------------------------------------------ + + +@contextlib.contextmanager +def temporary_modules(modules=None): + """ + Context manager for temporarily adding or modifying modules in Python's module cache (`sys.modules`). + + This function can be used to change the module paths during runtime. It's useful when refactoring code, + where you've moved a module from one location to another, but you still want to support the old import + paths for backwards compatibility. + + Args: + modules (dict, optional): A dictionary mapping old module paths to new module paths. + + Example: + ```python + with temporary_modules({'old.module.path': 'new.module.path'}): + import old.module.path # this will now import new.module.path + ``` + + Note: + The changes are only in effect inside the context manager and are undone once the context manager exits. + Be aware that directly manipulating `sys.modules` can lead to unpredictable results, especially in larger + applications or libraries. Use this function with caution. + """ + if not modules: + modules = {} + + import importlib + import sys + try: + # Set modules in sys.modules under their old name + for old, new in modules.items(): + sys.modules[old] = importlib.import_module(new) + + yield + finally: + # Remove the temporary module paths + for old in modules: + if old in sys.modules: + del sys.modules[old] + + +def torch_safe_load(weight): + """ + This function attempts to load a PyTorch model with the torch.load() function. If a ModuleNotFoundError is raised, + it catches the error, logs a warning message, and attempts to install the missing module via the + check_requirements() function. After installation, the function again attempts to load the model using torch.load(). + + Args: + weight (str): The file path of the PyTorch model. + + Returns: + (dict): The loaded PyTorch model. + """ + from ultralytics.utils.downloads import attempt_download_asset + + check_suffix(file=weight, suffix='.pt') + file = attempt_download_asset(weight) # search online if missing locally + try: + with temporary_modules({ + 'ultralytics.yolo.utils': 'ultralytics.utils', + 'ultralytics.yolo.v8': 'ultralytics.models.yolo', + 'ultralytics.yolo.data': 'ultralytics.data'}): # for legacy 8.0 Classify and Pose models + return torch.load(file, map_location='cpu'), file # load + + except ModuleNotFoundError as e: # e.name is missing module name + if e.name == 'models': + raise TypeError( + emojis(f'ERROR ❌️ {weight} appears to be an Ultralytics YOLOv5 model originally trained ' + f'with https://github.com/ultralytics/yolov5.\nThis model is NOT forwards compatible with ' + f'YOLOv8 at https://github.com/ultralytics/ultralytics.' + f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to " + f"run a command with an official YOLOv8 model, i.e. 'yolo predict model=yolov8n.pt'")) from e + LOGGER.warning(f"WARNING ⚠️ {weight} appears to require '{e.name}', which is not in ultralytics requirements." + f"\nAutoInstall will run now for '{e.name}' but this feature will be removed in the future." + f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to " + f"run a command with an official YOLOv8 model, i.e. 'yolo predict model=yolov8n.pt'") + check_requirements(e.name) # install missing module + + return torch.load(file, map_location='cpu'), file # load + + +def attempt_load_weights(weights, device=None, inplace=True, fuse=False): + """Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a.""" + + ensemble = Ensemble() + for w in weights if isinstance(weights, list) else [weights]: + ckpt, w = torch_safe_load(w) # load ckpt + args = {**DEFAULT_CFG_DICT, **ckpt['train_args']} if 'train_args' in ckpt else None # combined args + model = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model + + # Model compatibility updates + model.args = args # attach args to model + model.pt_path = w # attach *.pt file path to model + model.task = guess_model_task(model) + if not hasattr(model, 'stride'): + model.stride = torch.tensor([32.]) + + # Append + ensemble.append(model.fuse().eval() if fuse and hasattr(model, 'fuse') else model.eval()) # model in eval mode + + # Module updates + for m in ensemble.modules(): + t = type(m) + if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment): + m.inplace = inplace + elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): + m.recompute_scale_factor = None # torch 1.11.0 compatibility + + # Return model + if len(ensemble) == 1: + return ensemble[-1] + + # Return ensemble + LOGGER.info(f'Ensemble created with {weights}\n') + for k in 'names', 'nc', 'yaml': + setattr(ensemble, k, getattr(ensemble[0], k)) + ensemble.stride = ensemble[torch.argmax(torch.tensor([m.stride.max() for m in ensemble])).int()].stride + assert all(ensemble[0].nc == m.nc for m in ensemble), f'Models differ in class counts {[m.nc for m in ensemble]}' + return ensemble + + +def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False): + """Loads a single model weights.""" + ckpt, weight = torch_safe_load(weight) # load ckpt + args = {**DEFAULT_CFG_DICT, **(ckpt.get('train_args', {}))} # combine model and default args, preferring model args + model = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model + + # Model compatibility updates + model.args = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # attach args to model + model.pt_path = weight # attach *.pt file path to model + model.task = guess_model_task(model) + if not hasattr(model, 'stride'): + model.stride = torch.tensor([32.]) + + model = model.fuse().eval() if fuse and hasattr(model, 'fuse') else model.eval() # model in eval mode + + # Module updates + for m in model.modules(): + t = type(m) + if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment): + m.inplace = inplace + elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): + m.recompute_scale_factor = None # torch 1.11.0 compatibility + + # Return model and ckpt + return model, ckpt + + +def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) + """Parse a YOLO model.yaml dictionary into a PyTorch model.""" + import ast + + # Args + max_channels = float('inf') + nc, act, scales = (d.get(x) for x in ('nc', 'activation', 'scales')) + depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape')) + if scales: + scale = d.get('scale') + if not scale: + scale = tuple(scales.keys())[0] + LOGGER.warning(f"WARNING ⚠️ no model scale passed. Assuming scale='{scale}'.") + depth, width, max_channels = scales[scale] + + if act: + Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() + if verbose: + LOGGER.info(f"{colorstr('activation:')} {act}") # print + + if verbose: + LOGGER.info(f"\n{'':>3}{'from':>20}{'n':>3}{'params':>10} {'module':<45}{'arguments':<30}") + ch = [ch] + layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out + for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args + m = getattr(torch.nn, m[3:]) if 'nn.' in m else globals()[m] # get module + for j, a in enumerate(args): + if isinstance(a, str): + with contextlib.suppress(ValueError): + args[j] = locals()[a] if a in locals() else ast.literal_eval(a) + + n = n_ = max(round(n * depth), 1) if n > 1 else n # depth gain + if m in (Classify, Conv, ConvTranspose, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, Focus, + BottleneckCSP, C1, C2, C2f, C3, C3TR, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, RepC3): + c1, c2 = ch[f], args[0] + if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output) + c2 = make_divisible(min(c2, max_channels) * width, 8) + + args = [c1, c2, *args[1:]] + if m in (BottleneckCSP, C1, C2, C2f, C3, C3TR, C3Ghost, C3x, RepC3): + args.insert(2, n) # number of repeats + n = 1 + elif m is AIFI: + args = [ch[f], *args] + elif m in (HGStem, HGBlock): + c1, cm, c2 = ch[f], args[0], args[1] + args = [c1, cm, c2, *args[2:]] + if m is HGBlock: + args.insert(4, n) # number of repeats + n = 1 + + elif m is nn.BatchNorm2d: + args = [ch[f]] + elif m is Concat: + c2 = sum(ch[x] for x in f) + elif m in (Detect, Segment, Pose): + args.append([ch[x] for x in f]) + if m is Segment: + args[2] = make_divisible(min(args[2], max_channels) * width, 8) + elif m is RTDETRDecoder: # special case, channels arg must be passed in index 1 + args.insert(1, [ch[x] for x in f]) + else: + c2 = ch[f] + + m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module + t = str(m)[8:-2].replace('__main__.', '') # module type + m.np = sum(x.numel() for x in m_.parameters()) # number params + m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type + if verbose: + LOGGER.info(f'{i:>3}{str(f):>20}{n_:>3}{m.np:10.0f} {t:<45}{str(args):<30}') # print + save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist + layers.append(m_) + if i == 0: + ch = [] + ch.append(c2) + return nn.Sequential(*layers), sorted(save) + + +def yaml_model_load(path): + """Load a YOLOv8 model from a YAML file.""" + import re + + path = Path(path) + if path.stem in (f'yolov{d}{x}6' for x in 'nsmlx' for d in (5, 8)): + new_stem = re.sub(r'(\d+)([nslmx])6(.+)?$', r'\1\2-p6\3', path.stem) + LOGGER.warning(f'WARNING ⚠️ Ultralytics YOLO P6 models now use -p6 suffix. Renaming {path.stem} to {new_stem}.') + path = path.with_name(new_stem + path.suffix) + + unified_path = re.sub(r'(\d+)([nslmx])(.+)?$', r'\1\3', str(path)) # i.e. yolov8x.yaml -> yolov8.yaml + yaml_file = check_yaml(unified_path, hard=False) or check_yaml(path) + d = yaml_load(yaml_file) # model dict + d['scale'] = guess_model_scale(path) + d['yaml_file'] = str(path) + return d + + +def guess_model_scale(model_path): + """ + Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale. The function + uses regular expression matching to find the pattern of the model scale in the YAML file name, which is denoted by + n, s, m, l, or x. The function returns the size character of the model scale as a string. + + Args: + model_path (str | Path): The path to the YOLO model's YAML file. + + Returns: + (str): The size character of the model's scale, which can be n, s, m, l, or x. + """ + with contextlib.suppress(AttributeError): + import re + return re.search(r'yolov\d+([nslmx])', Path(model_path).stem).group(1) # n, s, m, l, or x + return '' + + +def guess_model_task(model): + """ + Guess the task of a PyTorch model from its architecture or configuration. + + Args: + model (nn.Module | dict): PyTorch model or model configuration in YAML format. + + Returns: + (str): Task of the model ('detect', 'segment', 'classify', 'pose'). + + Raises: + SyntaxError: If the task of the model could not be determined. + """ + + def cfg2task(cfg): + """Guess from YAML dictionary.""" + m = cfg['head'][-1][-2].lower() # output module name + if m in ('classify', 'classifier', 'cls', 'fc'): + return 'classify' + if m == 'detect': + return 'detect' + if m == 'segment': + return 'segment' + if m == 'pose': + return 'pose' + + # Guess from model cfg + if isinstance(model, dict): + with contextlib.suppress(Exception): + return cfg2task(model) + + # Guess from PyTorch model + if isinstance(model, nn.Module): # PyTorch model + for x in 'model.args', 'model.model.args', 'model.model.model.args': + with contextlib.suppress(Exception): + return eval(x)['task'] + for x in 'model.yaml', 'model.model.yaml', 'model.model.model.yaml': + with contextlib.suppress(Exception): + return cfg2task(eval(x)) + + for m in model.modules(): + if isinstance(m, Detect): + return 'detect' + elif isinstance(m, Segment): + return 'segment' + elif isinstance(m, Classify): + return 'classify' + elif isinstance(m, Pose): + return 'pose' + + # Guess from model filename + if isinstance(model, (str, Path)): + model = Path(model) + if '-seg' in model.stem or 'segment' in model.parts: + return 'segment' + elif '-cls' in model.stem or 'classify' in model.parts: + return 'classify' + elif '-pose' in model.stem or 'pose' in model.parts: + return 'pose' + elif 'detect' in model.parts: + return 'detect' + + # Unable to determine task from model + LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. " + "Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify', or 'pose'.") + return 'detect' # assume detect diff --git a/ultralytics/trackers/README.md b/ultralytics/trackers/README.md new file mode 100644 index 0000000..2cab3c0 --- /dev/null +++ b/ultralytics/trackers/README.md @@ -0,0 +1,321 @@ +# Multi-Object Tracking with Ultralytics YOLO + +YOLOv8 trackers visualization + +Object tracking in the realm of video analytics is a critical task that not only identifies the location and class of objects within the frame but also maintains a unique ID for each detected object as the video progresses. The applications are limitless—ranging from surveillance and security to real-time sports analytics. + +## Why Choose Ultralytics YOLO for Object Tracking? + +The output from Ultralytics trackers is consistent with standard object detection but has the added value of object IDs. This makes it easy to track objects in video streams and perform subsequent analytics. Here's why you should consider using Ultralytics YOLO for your object tracking needs: + +- **Efficiency:** Process video streams in real-time without compromising accuracy. +- **Flexibility:** Supports multiple tracking algorithms and configurations. +- **Ease of Use:** Simple Python API and CLI options for quick integration and deployment. +- **Customizability:** Easy to use with custom trained YOLO models, allowing integration into domain-specific applications. + +**Video Tutorial:** [Object Detection and Tracking with Ultralytics YOLOv8](https://www.youtube.com/embed/hHyHmOtmEgs?si=VNZtXmm45Nb9s-N-). + +## Features at a Glance + +Ultralytics YOLO extends its object detection features to provide robust and versatile object tracking: + +- **Real-Time Tracking:** Seamlessly track objects in high-frame-rate videos. +- **Multiple Tracker Support:** Choose from a variety of established tracking algorithms. +- **Customizable Tracker Configurations:** Tailor the tracking algorithm to meet specific requirements by adjusting various parameters. + +## Available Trackers + +Ultralytics YOLO supports the following tracking algorithms. They can be enabled by passing the relevant YAML configuration file such as `tracker=tracker_type.yaml`: + +- [BoT-SORT](https://github.com/NirAharon/BoT-SORT) - Use `botsort.yaml` to enable this tracker. +- [ByteTrack](https://github.com/ifzhang/ByteTrack) - Use `bytetrack.yaml` to enable this tracker. + +The default tracker is BoT-SORT. + +## Tracking + +To run the tracker on video streams, use a trained Detect, Segment or Pose model such as YOLOv8n, YOLOv8n-seg and YOLOv8n-pose. + +#### Python + +```python +from ultralytics import YOLO + +# Load an official or custom model +model = YOLO("yolov8n.pt") # Load an official Detect model +model = YOLO("yolov8n-seg.pt") # Load an official Segment model +model = YOLO("yolov8n-pose.pt") # Load an official Pose model +model = YOLO("path/to/best.pt") # Load a custom trained model + +# Perform tracking with the model +results = model.track( + source="https://youtu.be/LNwODJXcvt4", show=True +) # Tracking with default tracker +results = model.track( + source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml" +) # Tracking with ByteTrack tracker +``` + +#### CLI + +```bash +# Perform tracking with various models using the command line interface +yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # Official Detect model +yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Official Segment model +yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Official Pose model +yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # Custom trained model + +# Track using ByteTrack tracker +yolo track model=path/to/best.pt tracker="bytetrack.yaml" +``` + +As can be seen in the above usage, tracking is available for all Detect, Segment and Pose models run on videos or streaming sources. + +## Configuration + +### Tracking Arguments + +Tracking configuration shares properties with Predict mode, such as `conf`, `iou`, and `show`. For further configurations, refer to the [Predict](https://docs.ultralytics.com/modes/predict/) model page. + +#### Python + +```python +from ultralytics import YOLO + +# Configure the tracking parameters and run the tracker +model = YOLO("yolov8n.pt") +results = model.track( + source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True +) +``` + +#### CLI + +```bash +# Configure tracking parameters and run the tracker using the command line interface +yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show +``` + +### Tracker Selection + +Ultralytics also allows you to use a modified tracker configuration file. To do this, simply make a copy of a tracker config file (for example, `custom_tracker.yaml`) from [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) and modify any configurations (except the `tracker_type`) as per your needs. + +#### Python + +```python +from ultralytics import YOLO + +# Load the model and run the tracker with a custom configuration file +model = YOLO("yolov8n.pt") +results = model.track( + source="https://youtu.be/LNwODJXcvt4", tracker="custom_tracker.yaml" +) +``` + +#### CLI + +```bash +# Load the model and run the tracker with a custom configuration file using the command line interface +yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' +``` + +For a comprehensive list of tracking arguments, refer to the [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) page. + +## Python Examples + +### Persisting Tracks Loop + +Here is a Python script using OpenCV (`cv2`) and YOLOv8 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). The `persist=True` argument tells the tracker than the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image. + +#### Python + +```python +import cv2 +from ultralytics import YOLO + +# Load the YOLOv8 model +model = YOLO("yolov8n.pt") + +# Open the video file +video_path = "path/to/video.mp4" +cap = cv2.VideoCapture(video_path) + +# Loop through the video frames +while cap.isOpened(): + # Read a frame from the video + success, frame = cap.read() + + if success: + # Run YOLOv8 tracking on the frame, persisting tracks between frames + results = model.track(frame, persist=True) + + # Visualize the results on the frame + annotated_frame = results[0].plot() + + # Display the annotated frame + cv2.imshow("YOLOv8 Tracking", annotated_frame) + + # Break the loop if 'q' is pressed + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # Break the loop if the end of the video is reached + break + +# Release the video capture object and close the display window +cap.release() +cv2.destroyAllWindows() +``` + +Please note the change from `model(frame)` to `model.track(frame)`, which enables object tracking instead of simple detection. This modified script will run the tracker on each frame of the video, visualize the results, and display them in a window. The loop can be exited by pressing 'q'. + +### Plotting Tracks Over Time + +Visualizing object tracks over consecutive frames can provide valuable insights into the movement patterns and behavior of detected objects within a video. With Ultralytics YOLOv8, plotting these tracks is a seamless and efficient process. + +In the following example, we demonstrate how to utilize YOLOv8's tracking capabilities to plot the movement of detected objects across multiple video frames. This script involves opening a video file, reading it frame by frame, and utilizing the YOLO model to identify and track various objects. By retaining the center points of the detected bounding boxes and connecting them, we can draw lines that represent the paths followed by the tracked objects. + +#### Python + +```python +from collections import defaultdict + +import cv2 +import numpy as np + +from ultralytics import YOLO + +# Load the YOLOv8 model +model = YOLO("yolov8n.pt") + +# Open the video file +video_path = "path/to/video.mp4" +cap = cv2.VideoCapture(video_path) + +# Store the track history +track_history = defaultdict(lambda: []) + +# Loop through the video frames +while cap.isOpened(): + # Read a frame from the video + success, frame = cap.read() + + if success: + # Run YOLOv8 tracking on the frame, persisting tracks between frames + results = model.track(frame, persist=True) + + # Get the boxes and track IDs + boxes = results[0].boxes.xywh.cpu() + track_ids = results[0].boxes.id.int().cpu().tolist() + + # Visualize the results on the frame + annotated_frame = results[0].plot() + + # Plot the tracks + for box, track_id in zip(boxes, track_ids): + x, y, w, h = box + track = track_history[track_id] + track.append((float(x), float(y))) # x, y center point + if len(track) > 30: # retain 90 tracks for 90 frames + track.pop(0) + + # Draw the tracking lines + points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines( + annotated_frame, + [points], + isClosed=False, + color=(230, 230, 230), + thickness=10, + ) + + # Display the annotated frame + cv2.imshow("YOLOv8 Tracking", annotated_frame) + + # Break the loop if 'q' is pressed + if cv2.waitKey(1) & 0xFF == ord("q"): + break + else: + # Break the loop if the end of the video is reached + break + +# Release the video capture object and close the display window +cap.release() +cv2.destroyAllWindows() +``` + +### Multithreaded Tracking + +Multithreaded tracking provides the capability to run object tracking on multiple video streams simultaneously. This is particularly useful when handling multiple video inputs, such as from multiple surveillance cameras, where concurrent processing can greatly enhance efficiency and performance. + +In the provided Python script, we make use of Python's `threading` module to run multiple instances of the tracker concurrently. Each thread is responsible for running the tracker on one video file, and all the threads run simultaneously in the background. + +To ensure that each thread receives the correct parameters (the video file and the model to use), we define a function `run_tracker_in_thread` that accepts these parameters and contains the main tracking loop. This function reads the video frame by frame, runs the tracker, and displays the results. + +Two different models are used in this example: `yolov8n.pt` and `yolov8n-seg.pt`, each tracking objects in a different video file. The video files are specified in `video_file1` and `video_file2`. + +The `daemon=True` parameter in `threading.Thread` means that these threads will be closed as soon as the main program finishes. We then start the threads with `start()` and use `join()` to make the main thread wait until both tracker threads have finished. + +Finally, after all threads have completed their task, the windows displaying the results are closed using `cv2.destroyAllWindows()`. + +#### Python + +```python +import threading + +import cv2 +from ultralytics import YOLO + + +def run_tracker_in_thread(filename, model): + video = cv2.VideoCapture(filename) + frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + for _ in range(frames): + ret, frame = video.read() + if ret: + results = model.track(source=frame, persist=True) + res_plotted = results[0].plot() + cv2.imshow("p", res_plotted) + if cv2.waitKey(1) == ord("q"): + break + + +# Load the models +model1 = YOLO("yolov8n.pt") +model2 = YOLO("yolov8n-seg.pt") + +# Define the video files for the trackers +video_file1 = "path/to/video1.mp4" +video_file2 = "path/to/video2.mp4" + +# Create the tracker threads +tracker_thread1 = threading.Thread( + target=run_tracker_in_thread, args=(video_file1, model1), daemon=True +) +tracker_thread2 = threading.Thread( + target=run_tracker_in_thread, args=(video_file2, model2), daemon=True +) + +# Start the tracker threads +tracker_thread1.start() +tracker_thread2.start() + +# Wait for the tracker threads to finish +tracker_thread1.join() +tracker_thread2.join() + +# Clean up and close windows +cv2.destroyAllWindows() +``` + +This example can easily be extended to handle more video files and models by creating more threads and applying the same methodology. + +## Contribute New Trackers + +Are you proficient in multi-object tracking and have successfully implemented or adapted a tracking algorithm with Ultralytics YOLO? We invite you to contribute to our Trackers section in [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers)! Your real-world applications and solutions could be invaluable for users working on tracking tasks. + +By contributing to this section, you help expand the scope of tracking solutions available within the Ultralytics YOLO framework, adding another layer of functionality and utility for the community. + +To initiate your contribution, please refer to our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for comprehensive instructions on submitting a Pull Request (PR) 🛠️. We are excited to see what you bring to the table! + +Together, let's enhance the tracking capabilities of the Ultralytics YOLO ecosystem 🙏! diff --git a/ultralytics/trackers/__init__.py b/ultralytics/trackers/__init__.py new file mode 100644 index 0000000..46e178e --- /dev/null +++ b/ultralytics/trackers/__init__.py @@ -0,0 +1,7 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .bot_sort import BOTSORT +from .byte_tracker import BYTETracker +from .track import register_tracker + +__all__ = 'register_tracker', 'BOTSORT', 'BYTETracker' # allow simpler import diff --git a/ultralytics/trackers/basetrack.py b/ultralytics/trackers/basetrack.py new file mode 100644 index 0000000..3c7b0f7 --- /dev/null +++ b/ultralytics/trackers/basetrack.py @@ -0,0 +1,71 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from collections import OrderedDict + +import numpy as np + + +class TrackState: + """Enumeration of possible object tracking states.""" + + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack: + """Base class for object tracking, handling basic track attributes and operations.""" + + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # Multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + """Return the last frame ID of the track.""" + return self.frame_id + + @staticmethod + def next_id(): + """Increment and return the global track ID counter.""" + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + """Activate the track with the provided arguments.""" + raise NotImplementedError + + def predict(self): + """Predict the next state of the track.""" + raise NotImplementedError + + def update(self, *args, **kwargs): + """Update the track with new observations.""" + raise NotImplementedError + + def mark_lost(self): + """Mark the track as lost.""" + self.state = TrackState.Lost + + def mark_removed(self): + """Mark the track as removed.""" + self.state = TrackState.Removed + + @staticmethod + def reset_id(): + """Reset the global track ID counter.""" + BaseTrack._count = 0 diff --git a/ultralytics/trackers/bot_sort.py b/ultralytics/trackers/bot_sort.py new file mode 100644 index 0000000..778786b --- /dev/null +++ b/ultralytics/trackers/bot_sort.py @@ -0,0 +1,199 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from collections import deque + +import numpy as np + +from .basetrack import TrackState +from .byte_tracker import BYTETracker, STrack +from .utils import matching +from .utils.gmc import GMC +from .utils.kalman_filter import KalmanFilterXYWH + + +class BOTrack(STrack): + """ + An extended version of the STrack class for YOLOv8, adding object tracking features. + + Attributes: + shared_kalman (KalmanFilterXYWH): A shared Kalman filter for all instances of BOTrack. + smooth_feat (np.ndarray): Smoothed feature vector. + curr_feat (np.ndarray): Current feature vector. + features (deque): A deque to store feature vectors with a maximum length defined by `feat_history`. + alpha (float): Smoothing factor for the exponential moving average of features. + mean (np.ndarray): The mean state of the Kalman filter. + covariance (np.ndarray): The covariance matrix of the Kalman filter. + + Methods: + update_features(feat): Update features vector and smooth it using exponential moving average. + predict(): Predicts the mean and covariance using Kalman filter. + re_activate(new_track, frame_id, new_id): Reactivates a track with updated features and optionally new ID. + update(new_track, frame_id): Update the YOLOv8 instance with new track and frame ID. + tlwh: Property that gets the current position in tlwh format `(top left x, top left y, width, height)`. + multi_predict(stracks): Predicts the mean and covariance of multiple object tracks using shared Kalman filter. + convert_coords(tlwh): Converts tlwh bounding box coordinates to xywh format. + tlwh_to_xywh(tlwh): Convert bounding box to xywh format `(center x, center y, width, height)`. + + Usage: + bo_track = BOTrack(tlwh, score, cls, feat) + bo_track.predict() + bo_track.update(new_track, frame_id) + """ + shared_kalman = KalmanFilterXYWH() + + def __init__(self, tlwh, score, cls, feat=None, feat_history=50): + """Initialize YOLOv8 object with temporal parameters, such as feature history, alpha and current features.""" + super().__init__(tlwh, score, cls) + + self.smooth_feat = None + self.curr_feat = None + if feat is not None: + self.update_features(feat) + self.features = deque([], maxlen=feat_history) + self.alpha = 0.9 + + def update_features(self, feat): + """Update features vector and smooth it using exponential moving average.""" + feat /= np.linalg.norm(feat) + self.curr_feat = feat + if self.smooth_feat is None: + self.smooth_feat = feat + else: + self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat + self.features.append(feat) + self.smooth_feat /= np.linalg.norm(self.smooth_feat) + + def predict(self): + """Predicts the mean and covariance using Kalman filter.""" + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[6] = 0 + mean_state[7] = 0 + + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + def re_activate(self, new_track, frame_id, new_id=False): + """Reactivates a track with updated features and optionally assigns a new ID.""" + if new_track.curr_feat is not None: + self.update_features(new_track.curr_feat) + super().re_activate(new_track, frame_id, new_id) + + def update(self, new_track, frame_id): + """Update the YOLOv8 instance with new track and frame ID.""" + if new_track.curr_feat is not None: + self.update_features(new_track.curr_feat) + super().update(new_track, frame_id) + + @property + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, width, height)`.""" + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[:2] -= ret[2:] / 2 + return ret + + @staticmethod + def multi_predict(stracks): + """Predicts the mean and covariance of multiple object tracks using shared Kalman filter.""" + if len(stracks) <= 0: + return + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][6] = 0 + multi_mean[i][7] = 0 + multi_mean, multi_covariance = BOTrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def convert_coords(self, tlwh): + """Converts Top-Left-Width-Height bounding box coordinates to X-Y-Width-Height format.""" + return self.tlwh_to_xywh(tlwh) + + @staticmethod + def tlwh_to_xywh(tlwh): + """Convert bounding box to format `(center x, center y, width, height)`.""" + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + return ret + + +class BOTSORT(BYTETracker): + """ + An extended version of the BYTETracker class for YOLOv8, designed for object tracking with ReID and GMC algorithm. + + Attributes: + proximity_thresh (float): Threshold for spatial proximity (IoU) between tracks and detections. + appearance_thresh (float): Threshold for appearance similarity (ReID embeddings) between tracks and detections. + encoder (object): Object to handle ReID embeddings, set to None if ReID is not enabled. + gmc (GMC): An instance of the GMC algorithm for data association. + args (object): Parsed command-line arguments containing tracking parameters. + + Methods: + get_kalmanfilter(): Returns an instance of KalmanFilterXYWH for object tracking. + init_track(dets, scores, cls, img): Initialize track with detections, scores, and classes. + get_dists(tracks, detections): Get distances between tracks and detections using IoU and (optionally) ReID. + multi_predict(tracks): Predict and track multiple objects with YOLOv8 model. + + Usage: + bot_sort = BOTSORT(args, frame_rate) + bot_sort.init_track(dets, scores, cls, img) + bot_sort.multi_predict(tracks) + + Note: + The class is designed to work with the YOLOv8 object detection model and supports ReID only if enabled via args. + """ + + def __init__(self, args, frame_rate=30): + """Initialize YOLOv8 object with ReID module and GMC algorithm.""" + super().__init__(args, frame_rate) + # ReID module + self.proximity_thresh = args.proximity_thresh + self.appearance_thresh = args.appearance_thresh + + if args.with_reid: + # Haven't supported BoT-SORT(reid) yet + self.encoder = None + self.gmc = GMC(method=args.gmc_method) + + def get_kalmanfilter(self): + """Returns an instance of KalmanFilterXYWH for object tracking.""" + return KalmanFilterXYWH() + + def init_track(self, dets, scores, cls, img=None): + """Initialize track with detections, scores, and classes.""" + if len(dets) == 0: + return [] + if self.args.with_reid and self.encoder is not None: + features_keep = self.encoder.inference(img, dets) + return [BOTrack(xyxy, s, c, f) for (xyxy, s, c, f) in zip(dets, scores, cls, features_keep)] # detections + else: + return [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] # detections + + def get_dists(self, tracks, detections): + """Get distances between tracks and detections using IoU and (optionally) ReID embeddings.""" + dists = matching.iou_distance(tracks, detections) + dists_mask = (dists > self.proximity_thresh) + + # TODO: mot20 + # if not self.args.mot20: + dists = matching.fuse_score(dists, detections) + + if self.args.with_reid and self.encoder is not None: + emb_dists = matching.embedding_distance(tracks, detections) / 2.0 + emb_dists[emb_dists > self.appearance_thresh] = 1.0 + emb_dists[dists_mask] = 1.0 + dists = np.minimum(dists, emb_dists) + return dists + + def multi_predict(self, tracks): + """Predict and track multiple objects with YOLOv8 model.""" + BOTrack.multi_predict(tracks) + + def reset(self): + """Reset tracker.""" + super().reset() + self.gmc.reset_params() diff --git a/ultralytics/trackers/byte_tracker.py b/ultralytics/trackers/byte_tracker.py new file mode 100644 index 0000000..1a612f8 --- /dev/null +++ b/ultralytics/trackers/byte_tracker.py @@ -0,0 +1,429 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import numpy as np + +from .basetrack import BaseTrack, TrackState +from .utils import matching +from .utils.kalman_filter import KalmanFilterXYAH + + +class STrack(BaseTrack): + """ + Single object tracking representation that uses Kalman filtering for state estimation. + + This class is responsible for storing all the information regarding individual tracklets and performs state updates + and predictions based on Kalman filter. + + Attributes: + shared_kalman (KalmanFilterXYAH): Shared Kalman filter that is used across all STrack instances for prediction. + _tlwh (np.ndarray): Private attribute to store top-left corner coordinates and width and height of bounding box. + kalman_filter (KalmanFilterXYAH): Instance of Kalman filter used for this particular object track. + mean (np.ndarray): Mean state estimate vector. + covariance (np.ndarray): Covariance of state estimate. + is_activated (bool): Boolean flag indicating if the track has been activated. + score (float): Confidence score of the track. + tracklet_len (int): Length of the tracklet. + cls (any): Class label for the object. + idx (int): Index or identifier for the object. + frame_id (int): Current frame ID. + start_frame (int): Frame where the object was first detected. + + Methods: + predict(): Predict the next state of the object using Kalman filter. + multi_predict(stracks): Predict the next states for multiple tracks. + multi_gmc(stracks, H): Update multiple track states using a homography matrix. + activate(kalman_filter, frame_id): Activate a new tracklet. + re_activate(new_track, frame_id, new_id): Reactivate a previously lost tracklet. + update(new_track, frame_id): Update the state of a matched track. + convert_coords(tlwh): Convert bounding box to x-y-angle-height format. + tlwh_to_xyah(tlwh): Convert tlwh bounding box to xyah format. + tlbr_to_tlwh(tlbr): Convert tlbr bounding box to tlwh format. + tlwh_to_tlbr(tlwh): Convert tlwh bounding box to tlbr format. + """ + + shared_kalman = KalmanFilterXYAH() + + def __init__(self, tlwh, score, cls): + """Initialize new STrack instance.""" + self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + self.cls = cls + self.idx = tlwh[-1] + + def predict(self): + """Predicts mean and covariance using Kalman filter.""" + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + """Perform multi-object predictive tracking using Kalman filter for given stracks.""" + if len(stracks) <= 0: + return + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + @staticmethod + def multi_gmc(stracks, H=np.eye(2, 3)): + """Update state tracks positions and covariances using a homography matrix.""" + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + + R = H[:2, :2] + R8x8 = np.kron(np.eye(4, dtype=float), R) + t = H[:2, 2] + + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + mean = R8x8.dot(mean) + mean[:2] += t + cov = R8x8.dot(cov).dot(R8x8.transpose()) + + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet.""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + """Reactivates a previously lost track with a new detection.""" + self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, + self.convert_coords(new_track.tlwh)) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + self.cls = new_track.cls + self.idx = new_track.idx + + def update(self, new_track, frame_id): + """ + Update the state of a matched track. + + Args: + new_track (STrack): The new track containing updated information. + frame_id (int): The ID of the current frame. + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, + self.convert_coords(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + self.cls = new_track.cls + self.idx = new_track.idx + + def convert_coords(self, tlwh): + """Convert a bounding box's top-left-width-height format to its x-y-angle-height equivalent.""" + return self.tlwh_to_xyah(tlwh) + + @property + def tlwh(self): + """Get current position in bounding box format (top left x, top left y, width, height).""" + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + def tlbr(self): + """Convert bounding box to format (min x, min y, max x, max y), i.e., (top left, bottom right).""" + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + def tlwh_to_xyah(tlwh): + """Convert bounding box to format (center x, center y, aspect ratio, height), where the aspect ratio is width / + height. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + @staticmethod + def tlbr_to_tlwh(tlbr): + """Converts top-left bottom-right format to top-left width height format.""" + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + def tlwh_to_tlbr(tlwh): + """Converts tlwh bounding box format to tlbr format.""" + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + """Return a string representation of the BYTETracker object with start and end frames and track ID.""" + return f'OT_{self.track_id}_({self.start_frame}-{self.end_frame})' + + +class BYTETracker: + """ + BYTETracker: A tracking algorithm built on top of YOLOv8 for object detection and tracking. + + The class is responsible for initializing, updating, and managing the tracks for detected objects in a video + sequence. It maintains the state of tracked, lost, and removed tracks over frames, utilizes Kalman filtering for + predicting the new object locations, and performs data association. + + Attributes: + tracked_stracks (list[STrack]): List of successfully activated tracks. + lost_stracks (list[STrack]): List of lost tracks. + removed_stracks (list[STrack]): List of removed tracks. + frame_id (int): The current frame ID. + args (namespace): Command-line arguments. + max_time_lost (int): The maximum frames for a track to be considered as 'lost'. + kalman_filter (object): Kalman Filter object. + + Methods: + update(results, img=None): Updates object tracker with new detections. + get_kalmanfilter(): Returns a Kalman filter object for tracking bounding boxes. + init_track(dets, scores, cls, img=None): Initialize object tracking with detections. + get_dists(tracks, detections): Calculates the distance between tracks and detections. + multi_predict(tracks): Predicts the location of tracks. + reset_id(): Resets the ID counter of STrack. + joint_stracks(tlista, tlistb): Combines two lists of stracks. + sub_stracks(tlista, tlistb): Filters out the stracks present in the second list from the first list. + remove_duplicate_stracks(stracksa, stracksb): Removes duplicate stracks based on IOU. + """ + + def __init__(self, args, frame_rate=30): + """Initialize a YOLOv8 object to track objects with given arguments and frame rate.""" + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + self.args = args + self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer) + self.kalman_filter = self.get_kalmanfilter() + self.reset_id() + + def update(self, results, img=None): + """Updates object tracker with new detections and returns tracked object bounding boxes.""" + self.frame_id += 1 + activated_stracks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + + scores = results.conf + bboxes = results.xyxy + # Add index + bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1) + cls = results.cls + + remain_inds = scores > self.args.track_high_thresh + inds_low = scores > self.args.track_low_thresh + inds_high = scores < self.args.track_high_thresh + + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + dets = bboxes[remain_inds] + scores_keep = scores[remain_inds] + scores_second = scores[inds_second] + cls_keep = cls[remain_inds] + cls_second = cls[inds_second] + + detections = self.init_track(dets, scores_keep, cls_keep, img) + # Add newly detected tracklets to tracked_stracks + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + # Step 2: First association, with high score detection boxes + strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + self.multi_predict(strack_pool) + if hasattr(self, 'gmc') and img is not None: + warp = self.gmc.apply(img, dets) + STrack.multi_gmc(strack_pool, warp) + STrack.multi_gmc(unconfirmed, warp) + + dists = self.get_dists(strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_stracks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + # Step 3: Second association, with low score detection boxes association the untrack to the low score detections + detections_second = self.init_track(dets_second, scores_second, cls_second, img) + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + # TODO + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_stracks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + track = r_tracked_stracks[it] + if track.state != TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + # Deal with unconfirmed tracks, usually tracks with only one beginning frame + detections = [detections[i] for i in u_detection] + dists = self.get_dists(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_stracks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + # Step 4: Init new stracks + for inew in u_detection: + track = detections[inew] + if track.score < self.args.new_track_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_stracks.append(track) + # Step 5: Update state + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = self.joint_stracks(self.tracked_stracks, activated_stracks) + self.tracked_stracks = self.joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = self.sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = self.sub_stracks(self.lost_stracks, self.removed_stracks) + self.tracked_stracks, self.lost_stracks = self.remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + self.removed_stracks.extend(removed_stracks) + if len(self.removed_stracks) > 1000: + self.removed_stracks = self.removed_stracks[-999:] # clip remove stracks to 1000 maximum + return np.asarray( + [x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.idx] for x in self.tracked_stracks if x.is_activated], + dtype=np.float32) + + def get_kalmanfilter(self): + """Returns a Kalman filter object for tracking bounding boxes.""" + return KalmanFilterXYAH() + + def init_track(self, dets, scores, cls, img=None): + """Initialize object tracking with detections and scores using STrack algorithm.""" + return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else [] # detections + + def get_dists(self, tracks, detections): + """Calculates the distance between tracks and detections using IOU and fuses scores.""" + dists = matching.iou_distance(tracks, detections) + # TODO: mot20 + # if not self.args.mot20: + dists = matching.fuse_score(dists, detections) + return dists + + def multi_predict(self, tracks): + """Returns the predicted tracks using the YOLOv8 network.""" + STrack.multi_predict(tracks) + + def reset_id(self): + """Resets the ID counter of STrack.""" + STrack.reset_id() + + def reset(self): + """Reset tracker.""" + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + self.frame_id = 0 + self.kalman_filter = self.get_kalmanfilter() + self.reset_id() + + @staticmethod + def joint_stracks(tlista, tlistb): + """Combine two lists of stracks into a single one.""" + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + @staticmethod + def sub_stracks(tlista, tlistb): + """DEPRECATED CODE in https://github.com/ultralytics/ultralytics/pull/1890/ + stracks = {t.track_id: t for t in tlista} + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + """ + track_ids_b = {t.track_id for t in tlistb} + return [t for t in tlista if t.track_id not in track_ids_b] + + @staticmethod + def remove_duplicate_stracks(stracksa, stracksb): + """Remove duplicate stracks with non-maximum IOU distance.""" + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = [], [] + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if i not in dupa] + resb = [t for i, t in enumerate(stracksb) if i not in dupb] + return resa, resb diff --git a/ultralytics/trackers/track.py b/ultralytics/trackers/track.py new file mode 100644 index 0000000..39e2275 --- /dev/null +++ b/ultralytics/trackers/track.py @@ -0,0 +1,70 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from functools import partial +from pathlib import Path + +import torch + +from ultralytics.utils import IterableSimpleNamespace, yaml_load +from ultralytics.utils.checks import check_yaml + +from .bot_sort import BOTSORT +from .byte_tracker import BYTETracker + +TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT} + + +def on_predict_start(predictor, persist=False): + """ + Initialize trackers for object tracking during prediction. + + Args: + predictor (object): The predictor object to initialize trackers for. + persist (bool, optional): Whether to persist the trackers if they already exist. Defaults to False. + + Raises: + AssertionError: If the tracker_type is not 'bytetrack' or 'botsort'. + """ + if hasattr(predictor, 'trackers') and persist: + return + tracker = check_yaml(predictor.args.tracker) + cfg = IterableSimpleNamespace(**yaml_load(tracker)) + assert cfg.tracker_type in ['bytetrack', 'botsort'], \ + f"Only support 'bytetrack' and 'botsort' for now, but got '{cfg.tracker_type}'" + trackers = [] + for _ in range(predictor.dataset.bs): + tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30) + trackers.append(tracker) + predictor.trackers = trackers + + +def on_predict_postprocess_end(predictor, persist=False): + """Postprocess detected boxes and update with object tracking.""" + bs = predictor.dataset.bs + path, im0s = predictor.batch[:2] + + for i in range(bs): + if not persist and predictor.vid_path[i] != str(predictor.save_dir / Path(path[i]).name): # new video + predictor.trackers[i].reset() + + det = predictor.results[i].boxes.cpu().numpy() + if len(det) == 0: + continue + tracks = predictor.trackers[i].update(det, im0s[i]) + if len(tracks) == 0: + continue + idx = tracks[:, -1].astype(int) + predictor.results[i] = predictor.results[i][idx] + predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1])) + + +def register_tracker(model, persist): + """ + Register tracking callbacks to the model for object tracking during prediction. + + Args: + model (object): The model object to register tracking callbacks for. + persist (bool): Whether to persist the trackers if they already exist. + """ + model.add_callback('on_predict_start', partial(on_predict_start, persist=persist)) + model.add_callback('on_predict_postprocess_end', partial(on_predict_postprocess_end, persist=persist)) diff --git a/ultralytics/trackers/utils/__init__.py b/ultralytics/trackers/utils/__init__.py new file mode 100644 index 0000000..9e68dc1 --- /dev/null +++ b/ultralytics/trackers/utils/__init__.py @@ -0,0 +1 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license diff --git a/ultralytics/trackers/utils/gmc.py b/ultralytics/trackers/utils/gmc.py new file mode 100644 index 0000000..60c46a1 --- /dev/null +++ b/ultralytics/trackers/utils/gmc.py @@ -0,0 +1,309 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import copy + +import cv2 +import numpy as np + +from ultralytics.utils import LOGGER + + +class GMC: + """ + Generalized Motion Compensation (GMC) class for tracking and object detection in video frames. + + This class provides methods for tracking and detecting objects based on several tracking algorithms including ORB, + SIFT, ECC, and Sparse Optical Flow. It also supports downscaling of frames for computational efficiency. + + Attributes: + method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'. + downscale (int): Factor by which to downscale the frames for processing. + prevFrame (np.array): Stores the previous frame for tracking. + prevKeyPoints (list): Stores the keypoints from the previous frame. + prevDescriptors (np.array): Stores the descriptors from the previous frame. + initializedFirstFrame (bool): Flag to indicate if the first frame has been processed. + + Methods: + __init__(self, method='sparseOptFlow', downscale=2): Initializes a GMC object with the specified method + and downscale factor. + apply(self, raw_frame, detections=None): Applies the chosen method to a raw frame and optionally uses + provided detections. + applyEcc(self, raw_frame, detections=None): Applies the ECC algorithm to a raw frame. + applyFeatures(self, raw_frame, detections=None): Applies feature-based methods like ORB or SIFT to a raw frame. + applySparseOptFlow(self, raw_frame, detections=None): Applies the Sparse Optical Flow method to a raw frame. + """ + + def __init__(self, method='sparseOptFlow', downscale=2): + """Initialize a video tracker with specified parameters.""" + super().__init__() + + self.method = method + self.downscale = max(1, int(downscale)) + + if self.method == 'orb': + self.detector = cv2.FastFeatureDetector_create(20) + self.extractor = cv2.ORB_create() + self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING) + + elif self.method == 'sift': + self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20) + self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20) + self.matcher = cv2.BFMatcher(cv2.NORM_L2) + + elif self.method == 'ecc': + number_of_iterations = 5000 + termination_eps = 1e-6 + self.warp_mode = cv2.MOTION_EUCLIDEAN + self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps) + + elif self.method == 'sparseOptFlow': + self.feature_params = dict(maxCorners=1000, + qualityLevel=0.01, + minDistance=1, + blockSize=3, + useHarrisDetector=False, + k=0.04) + + elif self.method in ['none', 'None', None]: + self.method = None + else: + raise ValueError(f'Error: Unknown GMC method:{method}') + + self.prevFrame = None + self.prevKeyPoints = None + self.prevDescriptors = None + + self.initializedFirstFrame = False + + def apply(self, raw_frame, detections=None): + """Apply object detection on a raw frame using specified method.""" + if self.method in ['orb', 'sift']: + return self.applyFeatures(raw_frame, detections) + elif self.method == 'ecc': + return self.applyEcc(raw_frame, detections) + elif self.method == 'sparseOptFlow': + return self.applySparseOptFlow(raw_frame, detections) + else: + return np.eye(2, 3) + + def applyEcc(self, raw_frame, detections=None): + """Initialize.""" + height, width, _ = raw_frame.shape + frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY) + H = np.eye(2, 3, dtype=np.float32) + + # Downscale image (TODO: consider using pyramids) + if self.downscale > 1.0: + frame = cv2.GaussianBlur(frame, (3, 3), 1.5) + frame = cv2.resize(frame, (width // self.downscale, height // self.downscale)) + width = width // self.downscale + height = height // self.downscale + + # Handle first frame + if not self.initializedFirstFrame: + # Initialize data + self.prevFrame = frame.copy() + + # Initialization done + self.initializedFirstFrame = True + + return H + + # Run the ECC algorithm. The results are stored in warp_matrix. + # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria) + try: + (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1) + except Exception as e: + LOGGER.warning(f'WARNING: find transform failed. Set warp as identity {e}') + + return H + + def applyFeatures(self, raw_frame, detections=None): + """Initialize.""" + height, width, _ = raw_frame.shape + frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY) + H = np.eye(2, 3) + + # Downscale image (TODO: consider using pyramids) + if self.downscale > 1.0: + # frame = cv2.GaussianBlur(frame, (3, 3), 1.5) + frame = cv2.resize(frame, (width // self.downscale, height // self.downscale)) + width = width // self.downscale + height = height // self.downscale + + # Find the keypoints + mask = np.zeros_like(frame) + # mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255 + mask[int(0.02 * height):int(0.98 * height), int(0.02 * width):int(0.98 * width)] = 255 + if detections is not None: + for det in detections: + tlbr = (det[:4] / self.downscale).astype(np.int_) + mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0 + + keypoints = self.detector.detect(frame, mask) + + # Compute the descriptors + keypoints, descriptors = self.extractor.compute(frame, keypoints) + + # Handle first frame + if not self.initializedFirstFrame: + # Initialize data + self.prevFrame = frame.copy() + self.prevKeyPoints = copy.copy(keypoints) + self.prevDescriptors = copy.copy(descriptors) + + # Initialization done + self.initializedFirstFrame = True + + return H + + # Match descriptors. + knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2) + + # Filtered matches based on smallest spatial distance + matches = [] + spatialDistances = [] + + maxSpatialDistance = 0.25 * np.array([width, height]) + + # Handle empty matches case + if len(knnMatches) == 0: + # Store to next iteration + self.prevFrame = frame.copy() + self.prevKeyPoints = copy.copy(keypoints) + self.prevDescriptors = copy.copy(descriptors) + + return H + + for m, n in knnMatches: + if m.distance < 0.9 * n.distance: + prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt + currKeyPointLocation = keypoints[m.trainIdx].pt + + spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0], + prevKeyPointLocation[1] - currKeyPointLocation[1]) + + if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \ + (np.abs(spatialDistance[1]) < maxSpatialDistance[1]): + spatialDistances.append(spatialDistance) + matches.append(m) + + meanSpatialDistances = np.mean(spatialDistances, 0) + stdSpatialDistances = np.std(spatialDistances, 0) + + inliers = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances + + goodMatches = [] + prevPoints = [] + currPoints = [] + for i in range(len(matches)): + if inliers[i, 0] and inliers[i, 1]: + goodMatches.append(matches[i]) + prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt) + currPoints.append(keypoints[matches[i].trainIdx].pt) + + prevPoints = np.array(prevPoints) + currPoints = np.array(currPoints) + + # Draw the keypoint matches on the output image + # if False: + # import matplotlib.pyplot as plt + # matches_img = np.hstack((self.prevFrame, frame)) + # matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR) + # W = np.size(self.prevFrame, 1) + # for m in goodMatches: + # prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_) + # curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_) + # curr_pt[0] += W + # color = np.random.randint(0, 255, 3) + # color = (int(color[0]), int(color[1]), int(color[2])) + # + # matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA) + # matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1) + # matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1) + # + # plt.figure() + # plt.imshow(matches_img) + # plt.show() + + # Find rigid matrix + if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)): + H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC) + + # Handle downscale + if self.downscale > 1.0: + H[0, 2] *= self.downscale + H[1, 2] *= self.downscale + else: + LOGGER.warning('WARNING: not enough matching points') + + # Store to next iteration + self.prevFrame = frame.copy() + self.prevKeyPoints = copy.copy(keypoints) + self.prevDescriptors = copy.copy(descriptors) + + return H + + def applySparseOptFlow(self, raw_frame, detections=None): + """Initialize.""" + height, width, _ = raw_frame.shape + frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY) + H = np.eye(2, 3) + + # Downscale image + if self.downscale > 1.0: + # frame = cv2.GaussianBlur(frame, (3, 3), 1.5) + frame = cv2.resize(frame, (width // self.downscale, height // self.downscale)) + + # Find the keypoints + keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params) + + # Handle first frame + if not self.initializedFirstFrame: + # Initialize data + self.prevFrame = frame.copy() + self.prevKeyPoints = copy.copy(keypoints) + + # Initialization done + self.initializedFirstFrame = True + + return H + + # Find correspondences + matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None) + + # Leave good correspondences only + prevPoints = [] + currPoints = [] + + for i in range(len(status)): + if status[i]: + prevPoints.append(self.prevKeyPoints[i]) + currPoints.append(matchedKeypoints[i]) + + prevPoints = np.array(prevPoints) + currPoints = np.array(currPoints) + + # Find rigid matrix + if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)): + H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC) + + # Handle downscale + if self.downscale > 1.0: + H[0, 2] *= self.downscale + H[1, 2] *= self.downscale + else: + LOGGER.warning('WARNING: not enough matching points') + + # Store to next iteration + self.prevFrame = frame.copy() + self.prevKeyPoints = copy.copy(keypoints) + + return H + + def reset_params(self): + """Reset parameters.""" + self.prevFrame = None + self.prevKeyPoints = None + self.prevDescriptors = None + self.initializedFirstFrame = False diff --git a/ultralytics/trackers/utils/kalman_filter.py b/ultralytics/trackers/utils/kalman_filter.py new file mode 100644 index 0000000..d740827 --- /dev/null +++ b/ultralytics/trackers/utils/kalman_filter.py @@ -0,0 +1,368 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import numpy as np +import scipy.linalg + + +class KalmanFilterXYAH: + """ + For bytetrack. A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y), aspect + ratio a, height h, and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct + observation of the state space (linear observation model). + """ + + def __init__(self): + """Initialize Kalman filter model matrices with motion and observation uncertainty weights.""" + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current state estimate. These weights control + # the amount of uncertainty in the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """ + Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], 2 * self._std_weight_position * measurement[3], 1e-2, + 2 * self._std_weight_position * measurement[3], 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], 1e-5, 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """ + Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are + initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], self._std_weight_velocity * mean[3], 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + # mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """ + Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state estimate. + """ + std = [ + self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """ + Run Kalman filter prediction step (Vectorized version). + + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrix of the object states at the previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are + initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 3], self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [np.diag(sqr[i]) for i in range(len(mean))] + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """ + Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) is the center position, a the aspect + ratio, and h the height of the bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve((chol_factor, lower), + np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'): + """ + Compute gating distance between state distribution and measurements. A suitable distance threshold can be + obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of freedom, + otherwise 2. + + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in format (x, y, a, h) where (x, y) is the bounding box + center position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding box center position only. + + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the squared Mahalanobis distance between + (mean, covariance) and `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True) + return np.sum(z * z, axis=0) # square maha + else: + raise ValueError('invalid distance metric') + + +class KalmanFilterXYWH(KalmanFilterXYAH): + """ + For BoT-SORT. A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y), width + w, height h, and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location (x, y, w, h) is taken as direct + observation of the state space (linear observation model). + """ + + def initiate(self, measurement): + """ + Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, w, h) with center position (x, y), width w, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 dimensional) of the new track. + Unobserved velocities are initialized to 0 mean. + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[2], 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[2], 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[2], 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[2], 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """ + Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are + initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[2], self._std_weight_position * mean[3], + self._std_weight_position * mean[2], self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[2], self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[2], self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """ + Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state estimate. + """ + std = [ + self._std_weight_position * mean[2], self._std_weight_position * mean[3], + self._std_weight_position * mean[2], self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """ + Run Kalman filter prediction step (Vectorized version). + + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrix of the object states at the previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are + initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 2], self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 2], self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 2], self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 2], self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [np.diag(sqr[i]) for i in range(len(mean))] + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """ + Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, w, h), where (x, y) is the center position, w the width, + and h the height of the bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + """ + return super().update(mean, covariance, measurement) diff --git a/ultralytics/trackers/utils/matching.py b/ultralytics/trackers/utils/matching.py new file mode 100644 index 0000000..f2ee75e --- /dev/null +++ b/ultralytics/trackers/utils/matching.py @@ -0,0 +1,126 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import numpy as np +import scipy +from scipy.spatial.distance import cdist + +from ultralytics.utils.metrics import bbox_ioa + +try: + import lap # for linear_assignment + + assert lap.__version__ # verify package is not directory +except (ImportError, AssertionError, AttributeError): + from ultralytics.utils.checks import check_requirements + + check_requirements('lapx>=0.5.2') # update to lap package from https://github.com/rathaROG/lapx + import lap + + +def linear_assignment(cost_matrix, thresh, use_lap=True): + """ + Perform linear assignment using scipy or lap.lapjv. + + Args: + cost_matrix (np.ndarray): The matrix containing cost values for assignments. + thresh (float): Threshold for considering an assignment valid. + use_lap (bool, optional): Whether to use lap.lapjv. Defaults to True. + + Returns: + (tuple): Tuple containing matched indices, unmatched indices from 'a', and unmatched indices from 'b'. + """ + + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + + if use_lap: + # https://github.com/gatagat/lap + _, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + matches = [[ix, mx] for ix, mx in enumerate(x) if mx >= 0] + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + else: + # https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html + x, y = scipy.optimize.linear_sum_assignment(cost_matrix) # row x, col y + matches = np.asarray([[x[i], y[i]] for i in range(len(x)) if cost_matrix[x[i], y[i]] <= thresh]) + if len(matches) == 0: + unmatched_a = list(np.arange(cost_matrix.shape[0])) + unmatched_b = list(np.arange(cost_matrix.shape[1])) + else: + unmatched_a = list(set(np.arange(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = list(set(np.arange(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def iou_distance(atracks, btracks): + """ + Compute cost based on Intersection over Union (IoU) between tracks. + + Args: + atracks (list[STrack] | list[np.ndarray]): List of tracks 'a' or bounding boxes. + btracks (list[STrack] | list[np.ndarray]): List of tracks 'b' or bounding boxes. + + Returns: + (np.ndarray): Cost matrix computed based on IoU. + """ + + if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) \ + or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float32) + if len(atlbrs) and len(btlbrs): + ious = bbox_ioa(np.ascontiguousarray(atlbrs, dtype=np.float32), + np.ascontiguousarray(btlbrs, dtype=np.float32), + iou=True) + return 1 - ious # cost matrix + + +def embedding_distance(tracks, detections, metric='cosine'): + """ + Compute distance between tracks and detections based on embeddings. + + Args: + tracks (list[STrack]): List of tracks. + detections (list[BaseTrack]): List of detections. + metric (str, optional): Metric for distance computation. Defaults to 'cosine'. + + Returns: + (np.ndarray): Cost matrix computed based on embeddings. + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float32) + # for i, track in enumerate(tracks): + # cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float32) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Normalized features + return cost_matrix + + +def fuse_score(cost_matrix, detections): + """ + Fuses cost matrix with detection scores to produce a single similarity matrix. + + Args: + cost_matrix (np.ndarray): The matrix containing cost values for assignments. + detections (list[BaseTrack]): List of detections with scores. + + Returns: + (np.ndarray): Fused similarity matrix. + """ + + if cost_matrix.size == 0: + return cost_matrix + iou_sim = 1 - cost_matrix + det_scores = np.array([det.score for det in detections]) + det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) + fuse_sim = iou_sim * det_scores + return 1 - fuse_sim # fuse_cost diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py new file mode 100644 index 0000000..88655d2 --- /dev/null +++ b/ultralytics/utils/__init__.py @@ -0,0 +1,942 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import contextlib +import inspect +import logging.config +import os +import platform +import re +import subprocess +import sys +import threading +import urllib +import uuid +from pathlib import Path +from types import SimpleNamespace +from typing import Union + +import cv2 +import matplotlib.pyplot as plt +import numpy as np +import torch +import yaml +from tqdm import tqdm as tqdm_original + +from ultralytics import __version__ + +# PyTorch Multi-GPU DDP Constants +RANK = int(os.getenv('RANK', -1)) +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html + +# Other Constants +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLO +ASSETS = ROOT / 'assets' # default images +DEFAULT_CFG_PATH = ROOT / 'cfg/default.yaml' +NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads +AUTOINSTALL = str(os.getenv('YOLO_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode +VERBOSE = str(os.getenv('YOLO_VERBOSE', True)).lower() == 'true' # global verbose mode +TQDM_BAR_FORMAT = '{l_bar}{bar:10}{r_bar}' if VERBOSE else None # tqdm bar format +LOGGING_NAME = 'ultralytics' +MACOS, LINUX, WINDOWS = (platform.system() == x for x in ['Darwin', 'Linux', 'Windows']) # environment booleans +ARM64 = platform.machine() in ('arm64', 'aarch64') # ARM64 booleans +HELP_MSG = \ + """ + Usage examples for running YOLOv8: + + 1. Install the ultralytics package: + + pip install ultralytics + + 2. Use the Python SDK: + + from ultralytics import YOLO + + # Load a model + model = YOLO('yolov8n.yaml') # build a new model from scratch + model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + + # Use the model + results = model.train(data="coco128.yaml", epochs=3) # train the model + results = model.val() # evaluate model performance on the validation set + results = model('https://ultralytics.com/images/bus.jpg') # predict on an image + success = model.export(format='onnx') # export the model to ONNX format + + 3. Use the command line interface (CLI): + + YOLOv8 'yolo' CLI commands use the following syntax: + + yolo TASK MODE ARGS + + Where TASK (optional) is one of [detect, segment, classify] + MODE (required) is one of [train, val, predict, export] + ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults. + See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg' + + - Train a detection model for 10 epochs with an initial learning_rate of 0.01 + yolo detect train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 + + - Predict a YouTube video using a pretrained segmentation model at image size 320: + yolo segment predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + + - Val a pretrained detection model at batch-size 1 and image size 640: + yolo detect val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 + + - Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required) + yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + + - Run special commands: + yolo help + yolo checks + yolo version + yolo settings + yolo copy-cfg + yolo cfg + + Docs: https://docs.ultralytics.com + Community: https://community.ultralytics.com + GitHub: https://github.com/ultralytics/ultralytics + """ + +# Settings +torch.set_printoptions(linewidth=320, precision=4, profile='default') +np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 +cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) +os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads +os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' # for deterministic training +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # suppress verbose TF compiler warnings in Colab + + +class TQDM(tqdm_original): + """ + Custom Ultralytics tqdm class with different default arguments. + + Args: + *args (list): Positional arguments passed to original tqdm. + **kwargs (dict): Keyword arguments, with custom defaults applied. + """ + + def __init__(self, *args, **kwargs): + """Initialize custom Ultralytics tqdm class with different default arguments.""" + # Set new default values (these can still be overridden when calling TQDM) + kwargs['disable'] = not VERBOSE or kwargs.get('disable', False) # logical 'and' with default value if passed + kwargs.setdefault('bar_format', TQDM_BAR_FORMAT) # override default value if passed + super().__init__(*args, **kwargs) + + +class SimpleClass: + """Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute + access methods for easier debugging and usage. + """ + + def __str__(self): + """Return a human-readable string representation of the object.""" + attr = [] + for a in dir(self): + v = getattr(self, a) + if not callable(v) and not a.startswith('_'): + if isinstance(v, SimpleClass): + # Display only the module and class name for subclasses + s = f'{a}: {v.__module__}.{v.__class__.__name__} object' + else: + s = f'{a}: {repr(v)}' + attr.append(s) + return f'{self.__module__}.{self.__class__.__name__} object with attributes:\n\n' + '\n'.join(attr) + + def __repr__(self): + """Return a machine-readable string representation of the object.""" + return self.__str__() + + def __getattr__(self, attr): + """Custom attribute access error message with helpful information.""" + name = self.__class__.__name__ + raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}") + + +class IterableSimpleNamespace(SimpleNamespace): + """Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and + enables usage with dict() and for loops. + """ + + def __iter__(self): + """Return an iterator of key-value pairs from the namespace's attributes.""" + return iter(vars(self).items()) + + def __str__(self): + """Return a human-readable string representation of the object.""" + return '\n'.join(f'{k}={v}' for k, v in vars(self).items()) + + def __getattr__(self, attr): + """Custom attribute access error message with helpful information.""" + name = self.__class__.__name__ + raise AttributeError(f""" + '{name}' object has no attribute '{attr}'. This may be caused by a modified or out of date ultralytics + 'default.yaml' file.\nPlease update your code with 'pip install -U ultralytics' and if necessary replace + {DEFAULT_CFG_PATH} with the latest version from + https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/default.yaml + """) + + def get(self, key, default=None): + """Return the value of the specified key if it exists; otherwise, return the default value.""" + return getattr(self, key, default) + + +def plt_settings(rcparams=None, backend='Agg'): + """ + Decorator to temporarily set rc parameters and the backend for a plotting function. + + Example: + decorator: @plt_settings({"font.size": 12}) + context manager: with plt_settings({"font.size": 12}): + + Args: + rcparams (dict): Dictionary of rc parameters to set. + backend (str, optional): Name of the backend to use. Defaults to 'Agg'. + + Returns: + (Callable): Decorated function with temporarily set rc parameters and backend. This decorator can be + applied to any function that needs to have specific matplotlib rc parameters and backend for its execution. + """ + + if rcparams is None: + rcparams = {'font.size': 11} + + def decorator(func): + """Decorator to apply temporary rc parameters and backend to a function.""" + + def wrapper(*args, **kwargs): + """Sets rc parameters and backend, calls the original function, and restores the settings.""" + original_backend = plt.get_backend() + if backend != original_backend: + plt.close('all') # auto-close()ing of figures upon backend switching is deprecated since 3.8 + plt.switch_backend(backend) + + with plt.rc_context(rcparams): + result = func(*args, **kwargs) + + if backend != original_backend: + plt.close('all') + plt.switch_backend(original_backend) + return result + + return wrapper + + return decorator + + +def set_logging(name=LOGGING_NAME, verbose=True): + """Sets up logging for the given name with UTF-8 encoding support.""" + level = logging.INFO if verbose and RANK in {-1, 0} else logging.ERROR # rank in world for Multi-GPU trainings + + # Configure the console (stdout) encoding to UTF-8 + formatter = logging.Formatter('%(message)s') # Default formatter + if WINDOWS and sys.stdout.encoding != 'utf-8': + try: + if hasattr(sys.stdout, 'reconfigure'): + sys.stdout.reconfigure(encoding='utf-8') + elif hasattr(sys.stdout, 'buffer'): + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + else: + sys.stdout.encoding = 'utf-8' + except Exception as e: + print(f'Creating custom formatter for non UTF-8 environments due to {e}') + + class CustomFormatter(logging.Formatter): + + def format(self, record): + return emojis(super().format(record)) + + formatter = CustomFormatter('%(message)s') # Use CustomFormatter to eliminate UTF-8 output as last recourse + + # Create and configure the StreamHandler + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setFormatter(formatter) + stream_handler.setLevel(level) + + logger = logging.getLogger(name) + logger.setLevel(level) + logger.addHandler(stream_handler) + logger.propagate = False + return logger + + +# Set logger +LOGGER = set_logging(LOGGING_NAME, verbose=VERBOSE) # define globally (used in train.py, val.py, predict.py, etc.) +for logger in 'sentry_sdk', 'urllib3.connectionpool': + logging.getLogger(logger).setLevel(logging.CRITICAL + 1) + + +def emojis(string=''): + """Return platform-dependent emoji-safe version of string.""" + return string.encode().decode('ascii', 'ignore') if WINDOWS else string + + +class ThreadingLocked: + """ + A decorator class for ensuring thread-safe execution of a function or method. This class can be used as a decorator + to make sure that if the decorated function is called from multiple threads, only one thread at a time will be able + to execute the function. + + Attributes: + lock (threading.Lock): A lock object used to manage access to the decorated function. + + Example: + ```python + from ultralytics.utils import ThreadingLocked + + @ThreadingLocked() + def my_function(): + # Your code here + pass + ``` + """ + + def __init__(self): + """Initializes the decorator class for thread-safe execution of a function or method.""" + self.lock = threading.Lock() + + def __call__(self, f): + """Run thread-safe execution of function or method.""" + from functools import wraps + + @wraps(f) + def decorated(*args, **kwargs): + """Applies thread-safety to the decorated function or method.""" + with self.lock: + return f(*args, **kwargs) + + return decorated + + +def yaml_save(file='data.yaml', data=None, header=''): + """ + Save YAML data to a file. + + Args: + file (str, optional): File name. Default is 'data.yaml'. + data (dict): Data to save in YAML format. + header (str, optional): YAML header to add. + + Returns: + (None): Data is saved to the specified file. + """ + if data is None: + data = {} + file = Path(file) + if not file.parent.exists(): + # Create parent directories if they don't exist + file.parent.mkdir(parents=True, exist_ok=True) + + # Convert Path objects to strings + valid_types = int, float, str, bool, list, tuple, dict, type(None) + for k, v in data.items(): + if not isinstance(v, valid_types): + data[k] = str(v) + + # Dump data to file in YAML format + with open(file, 'w', errors='ignore', encoding='utf-8') as f: + if header: + f.write(header) + yaml.safe_dump(data, f, sort_keys=False, allow_unicode=True) + + +def yaml_load(file='data.yaml', append_filename=False): + """ + Load YAML data from a file. + + Args: + file (str, optional): File name. Default is 'data.yaml'. + append_filename (bool): Add the YAML filename to the YAML dictionary. Default is False. + + Returns: + (dict): YAML data and file name. + """ + assert Path(file).suffix in ('.yaml', '.yml'), f'Attempting to load non-YAML file {file} with yaml_load()' + with open(file, errors='ignore', encoding='utf-8') as f: + s = f.read() # string + + # Remove special characters + if not s.isprintable(): + s = re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]+', '', s) + + # Add YAML filename to dict and return + data = yaml.safe_load(s) or {} # always return a dict (yaml.safe_load() may return None for empty files) + if append_filename: + data['yaml_file'] = str(file) + return data + + +def yaml_print(yaml_file: Union[str, Path, dict]) -> None: + """ + Pretty prints a YAML file or a YAML-formatted dictionary. + + Args: + yaml_file: The file path of the YAML file or a YAML-formatted dictionary. + + Returns: + None + """ + yaml_dict = yaml_load(yaml_file) if isinstance(yaml_file, (str, Path)) else yaml_file + dump = yaml.dump(yaml_dict, sort_keys=False, allow_unicode=True) + LOGGER.info(f"Printing '{colorstr('bold', 'black', yaml_file)}'\n\n{dump}") + + +# Default configuration +DEFAULT_CFG_DICT = yaml_load(DEFAULT_CFG_PATH) +for k, v in DEFAULT_CFG_DICT.items(): + if isinstance(v, str) and v.lower() == 'none': + DEFAULT_CFG_DICT[k] = None +DEFAULT_CFG_KEYS = DEFAULT_CFG_DICT.keys() +DEFAULT_CFG = IterableSimpleNamespace(**DEFAULT_CFG_DICT) + + +def is_ubuntu() -> bool: + """ + Check if the OS is Ubuntu. + + Returns: + (bool): True if OS is Ubuntu, False otherwise. + """ + with contextlib.suppress(FileNotFoundError): + with open('/etc/os-release') as f: + return 'ID=ubuntu' in f.read() + return False + + +def is_colab(): + """ + Check if the current script is running inside a Google Colab notebook. + + Returns: + (bool): True if running inside a Colab notebook, False otherwise. + """ + return 'COLAB_RELEASE_TAG' in os.environ or 'COLAB_BACKEND_VERSION' in os.environ + + +def is_kaggle(): + """ + Check if the current script is running inside a Kaggle kernel. + + Returns: + (bool): True if running inside a Kaggle kernel, False otherwise. + """ + return os.environ.get('PWD') == '/kaggle/working' and os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com' + + +def is_jupyter(): + """ + Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace. + + Returns: + (bool): True if running inside a Jupyter Notebook, False otherwise. + """ + with contextlib.suppress(Exception): + from IPython import get_ipython + return get_ipython() is not None + return False + + +def is_docker() -> bool: + """ + Determine if the script is running inside a Docker container. + + Returns: + (bool): True if the script is running inside a Docker container, False otherwise. + """ + file = Path('/proc/self/cgroup') + if file.exists(): + with open(file) as f: + return 'docker' in f.read() + else: + return False + + +def is_online() -> bool: + """ + Check internet connectivity by attempting to connect to a known online host. + + Returns: + (bool): True if connection is successful, False otherwise. + """ + import socket + + for host in '1.1.1.1', '8.8.8.8', '223.5.5.5': # Cloudflare, Google, AliDNS: + try: + test_connection = socket.create_connection(address=(host, 53), timeout=2) + except (socket.timeout, socket.gaierror, OSError): + continue + else: + # If the connection was successful, close it to avoid a ResourceWarning + test_connection.close() + return True + return False + + +ONLINE = is_online() + + +def is_pip_package(filepath: str = __name__) -> bool: + """ + Determines if the file at the given filepath is part of a pip package. + + Args: + filepath (str): The filepath to check. + + Returns: + (bool): True if the file is part of a pip package, False otherwise. + """ + import importlib.util + + # Get the spec for the module + spec = importlib.util.find_spec(filepath) + + # Return whether the spec is not None and the origin is not None (indicating it is a package) + return spec is not None and spec.origin is not None + + +def is_dir_writeable(dir_path: Union[str, Path]) -> bool: + """ + Check if a directory is writeable. + + Args: + dir_path (str | Path): The path to the directory. + + Returns: + (bool): True if the directory is writeable, False otherwise. + """ + return os.access(str(dir_path), os.W_OK) + + +def is_pytest_running(): + """ + Determines whether pytest is currently running or not. + + Returns: + (bool): True if pytest is running, False otherwise. + """ + return ('PYTEST_CURRENT_TEST' in os.environ) or ('pytest' in sys.modules) or ('pytest' in Path(sys.argv[0]).stem) + + +def is_github_action_running() -> bool: + """ + Determine if the current environment is a GitHub Actions runner. + + Returns: + (bool): True if the current environment is a GitHub Actions runner, False otherwise. + """ + return 'GITHUB_ACTIONS' in os.environ and 'GITHUB_WORKFLOW' in os.environ and 'RUNNER_OS' in os.environ + + +def is_git_dir(): + """ + Determines whether the current file is part of a git repository. If the current file is not part of a git + repository, returns None. + + Returns: + (bool): True if current file is part of a git repository. + """ + return get_git_dir() is not None + + +def get_git_dir(): + """ + Determines whether the current file is part of a git repository and if so, returns the repository root directory. If + the current file is not part of a git repository, returns None. + + Returns: + (Path | None): Git root directory if found or None if not found. + """ + for d in Path(__file__).parents: + if (d / '.git').is_dir(): + return d + + +def get_git_origin_url(): + """ + Retrieves the origin URL of a git repository. + + Returns: + (str | None): The origin URL of the git repository or None if not git directory. + """ + if is_git_dir(): + with contextlib.suppress(subprocess.CalledProcessError): + origin = subprocess.check_output(['git', 'config', '--get', 'remote.origin.url']) + return origin.decode().strip() + + +def get_git_branch(): + """ + Returns the current git branch name. If not in a git repository, returns None. + + Returns: + (str | None): The current git branch name or None if not a git directory. + """ + if is_git_dir(): + with contextlib.suppress(subprocess.CalledProcessError): + origin = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']) + return origin.decode().strip() + + +def get_default_args(func): + """ + Returns a dictionary of default arguments for a function. + + Args: + func (callable): The function to inspect. + + Returns: + (dict): A dictionary where each key is a parameter name, and each value is the default value of that parameter. + """ + signature = inspect.signature(func) + return {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty} + + +def get_ubuntu_version(): + """ + Retrieve the Ubuntu version if the OS is Ubuntu. + + Returns: + (str): Ubuntu version or None if not an Ubuntu OS. + """ + if is_ubuntu(): + with contextlib.suppress(FileNotFoundError, AttributeError): + with open('/etc/os-release') as f: + return re.search(r'VERSION_ID="(\d+\.\d+)"', f.read())[1] + + +def get_user_config_dir(sub_dir='Ultralytics'): + """ + Get the user config directory. + + Args: + sub_dir (str): The name of the subdirectory to create. + + Returns: + (Path): The path to the user config directory. + """ + # Return the appropriate config directory for each operating system + if WINDOWS: + path = Path.home() / 'AppData' / 'Roaming' / sub_dir + elif MACOS: # macOS + path = Path.home() / 'Library' / 'Application Support' / sub_dir + elif LINUX: + path = Path.home() / '.config' / sub_dir + else: + raise ValueError(f'Unsupported operating system: {platform.system()}') + + # GCP and AWS lambda fix, only /tmp is writeable + if not is_dir_writeable(path.parent): + LOGGER.warning(f"WARNING ⚠️ user config directory '{path}' is not writeable, defaulting to '/tmp' or CWD." + 'Alternatively you can define a YOLO_CONFIG_DIR environment variable for this path.') + path = Path('/tmp') / sub_dir if is_dir_writeable('/tmp') else Path().cwd() / sub_dir + + # Create the subdirectory if it does not exist + path.mkdir(parents=True, exist_ok=True) + + return path + + +USER_CONFIG_DIR = Path(os.getenv('YOLO_CONFIG_DIR') or get_user_config_dir()) # Ultralytics settings dir +SETTINGS_YAML = USER_CONFIG_DIR / 'settings.yaml' + + +def colorstr(*input): + """ + Colors a string based on the provided color and style arguments. Utilizes ANSI escape codes. + See https://en.wikipedia.org/wiki/ANSI_escape_code for more details. + + This function can be called in two ways: + - colorstr('color', 'style', 'your string') + - colorstr('your string') + + In the second form, 'blue' and 'bold' will be applied by default. + + Args: + *input (str): A sequence of strings where the first n-1 strings are color and style arguments, + and the last string is the one to be colored. + + Supported Colors and Styles: + Basic Colors: 'black', 'red', 'green', 'yellow', 'blue', 'magenta', 'cyan', 'white' + Bright Colors: 'bright_black', 'bright_red', 'bright_green', 'bright_yellow', + 'bright_blue', 'bright_magenta', 'bright_cyan', 'bright_white' + Misc: 'end', 'bold', 'underline' + + Returns: + (str): The input string wrapped with ANSI escape codes for the specified color and style. + + Examples: + >>> colorstr('blue', 'bold', 'hello world') + >>> '\033[34m\033[1mhello world\033[0m' + """ + *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string + colors = { + 'black': '\033[30m', # basic colors + 'red': '\033[31m', + 'green': '\033[32m', + 'yellow': '\033[33m', + 'blue': '\033[34m', + 'magenta': '\033[35m', + 'cyan': '\033[36m', + 'white': '\033[37m', + 'bright_black': '\033[90m', # bright colors + 'bright_red': '\033[91m', + 'bright_green': '\033[92m', + 'bright_yellow': '\033[93m', + 'bright_blue': '\033[94m', + 'bright_magenta': '\033[95m', + 'bright_cyan': '\033[96m', + 'bright_white': '\033[97m', + 'end': '\033[0m', # misc + 'bold': '\033[1m', + 'underline': '\033[4m'} + return ''.join(colors[x] for x in args) + f'{string}' + colors['end'] + + +def remove_colorstr(input_string): + """ + Removes ANSI escape codes from a string, effectively un-coloring it. + + Args: + input_string (str): The string to remove color and style from. + + Returns: + (str): A new string with all ANSI escape codes removed. + + Examples: + >>> remove_colorstr(colorstr('blue', 'bold', 'hello world')) + >>> 'hello world' + """ + ansi_escape = re.compile(r'\x1B\[[0-9;]*[A-Za-z]') + return ansi_escape.sub('', input_string) + + +class TryExcept(contextlib.ContextDecorator): + """ + YOLOv8 TryExcept class. + + Use as @TryExcept() decorator or 'with TryExcept():' context manager. + """ + + def __init__(self, msg='', verbose=True): + """Initialize TryExcept class with optional message and verbosity settings.""" + self.msg = msg + self.verbose = verbose + + def __enter__(self): + """Executes when entering TryExcept context, initializes instance.""" + pass + + def __exit__(self, exc_type, value, traceback): + """Defines behavior when exiting a 'with' block, prints error message if necessary.""" + if self.verbose and value: + print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}")) + return True + + +def threaded(func): + """ + Multi-threads a target function and returns thread. + + Use as @threaded decorator. + """ + + def wrapper(*args, **kwargs): + """Multi-threads a given function and returns the thread.""" + thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) + thread.start() + return thread + + return wrapper + + +def set_sentry(): + """ + Initialize the Sentry SDK for error tracking and reporting. Only used if sentry_sdk package is installed and + sync=True in settings. Run 'yolo settings' to see and update settings YAML file. + + Conditions required to send errors (ALL conditions must be met or no errors will be reported): + - sentry_sdk package is installed + - sync=True in YOLO settings + - pytest is not running + - running in a pip package installation + - running in a non-git directory + - running with rank -1 or 0 + - online environment + - CLI used to run package (checked with 'yolo' as the name of the main CLI command) + + The function also configures Sentry SDK to ignore KeyboardInterrupt and FileNotFoundError + exceptions and to exclude events with 'out of memory' in their exception message. + + Additionally, the function sets custom tags and user information for Sentry events. + """ + + def before_send(event, hint): + """ + Modify the event before sending it to Sentry based on specific exception types and messages. + + Args: + event (dict): The event dictionary containing information about the error. + hint (dict): A dictionary containing additional information about the error. + + Returns: + dict: The modified event or None if the event should not be sent to Sentry. + """ + if 'exc_info' in hint: + exc_type, exc_value, tb = hint['exc_info'] + if exc_type in (KeyboardInterrupt, FileNotFoundError) \ + or 'out of memory' in str(exc_value): + return None # do not send event + + event['tags'] = { + 'sys_argv': sys.argv[0], + 'sys_argv_name': Path(sys.argv[0]).name, + 'install': 'git' if is_git_dir() else 'pip' if is_pip_package() else 'other', + 'os': ENVIRONMENT} + return event + + if SETTINGS['sync'] and \ + RANK in (-1, 0) and \ + Path(sys.argv[0]).name == 'yolo' and \ + not TESTS_RUNNING and \ + ONLINE and \ + is_pip_package() and \ + not is_git_dir(): + + # If sentry_sdk package is not installed then return and do not use Sentry + try: + import sentry_sdk # noqa + except ImportError: + return + + sentry_sdk.init( + dsn='https://5ff1556b71594bfea135ff0203a0d290@o4504521589325824.ingest.sentry.io/4504521592406016', + debug=False, + traces_sample_rate=1.0, + release=__version__, + environment='production', # 'dev' or 'production' + before_send=before_send, + ignore_errors=[KeyboardInterrupt, FileNotFoundError]) + sentry_sdk.set_user({'id': SETTINGS['uuid']}) # SHA-256 anonymized UUID hash + + +class SettingsManager(dict): + """ + Manages Ultralytics settings stored in a YAML file. + + Args: + file (str | Path): Path to the Ultralytics settings YAML file. Default is USER_CONFIG_DIR / 'settings.yaml'. + version (str): Settings version. In case of local version mismatch, new default settings will be saved. + """ + + def __init__(self, file=SETTINGS_YAML, version='0.0.4'): + """Initialize the SettingsManager with default settings, load and validate current settings from the YAML + file. + """ + import copy + import hashlib + + from ultralytics.utils.checks import check_version + from ultralytics.utils.torch_utils import torch_distributed_zero_first + + git_dir = get_git_dir() + root = git_dir or Path() + datasets_root = (root.parent if git_dir and is_dir_writeable(root.parent) else root).resolve() + + self.file = Path(file) + self.version = version + self.defaults = { + 'settings_version': version, + 'datasets_dir': str(datasets_root / 'datasets'), + 'weights_dir': str(root / 'weights'), + 'runs_dir': str(root / 'runs'), + 'uuid': hashlib.sha256(str(uuid.getnode()).encode()).hexdigest(), + 'sync': True, + 'api_key': '', + 'clearml': True, # integrations + 'comet': True, + 'dvc': True, + 'hub': True, + 'mlflow': True, + 'neptune': True, + 'raytune': True, + 'tensorboard': True, + 'wandb': True} + + super().__init__(copy.deepcopy(self.defaults)) + + with torch_distributed_zero_first(RANK): + if not self.file.exists(): + self.save() + + self.load() + correct_keys = self.keys() == self.defaults.keys() + correct_types = all(type(a) is type(b) for a, b in zip(self.values(), self.defaults.values())) + correct_version = check_version(self['settings_version'], self.version) + if not (correct_keys and correct_types and correct_version): + LOGGER.warning( + 'WARNING ⚠️ Ultralytics settings reset to default values. This may be due to a possible problem ' + 'with your settings or a recent ultralytics package update. ' + f"\nView settings with 'yolo settings' or at '{self.file}'" + "\nUpdate settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'.") + self.reset() + + def load(self): + """Loads settings from the YAML file.""" + super().update(yaml_load(self.file)) + + def save(self): + """Saves the current settings to the YAML file.""" + yaml_save(self.file, dict(self)) + + def update(self, *args, **kwargs): + """Updates a setting value in the current settings.""" + super().update(*args, **kwargs) + self.save() + + def reset(self): + """Resets the settings to default and saves them.""" + self.clear() + self.update(self.defaults) + self.save() + + +def deprecation_warn(arg, new_arg, version=None): + """Issue a deprecation warning when a deprecated argument is used, suggesting an updated argument.""" + if not version: + version = float(__version__[:3]) + 0.2 # deprecate after 2nd major release + LOGGER.warning(f"WARNING ⚠️ '{arg}' is deprecated and will be removed in 'ultralytics {version}' in the future. " + f"Please use '{new_arg}' instead.") + + +def clean_url(url): + """Strip auth from URL, i.e. https://url.com/file.txt?auth -> https://url.com/file.txt.""" + url = Path(url).as_posix().replace(':/', '://') # Pathlib turns :// -> :/, as_posix() for Windows + return urllib.parse.unquote(url).split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth + + +def url2file(url): + """Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt.""" + return Path(clean_url(url)).name + + +# Run below code on utils init ------------------------------------------------------------------------------------ + +# Check first-install steps +PREFIX = colorstr('Ultralytics: ') +SETTINGS = SettingsManager() # initialize settings +DATASETS_DIR = Path(SETTINGS['datasets_dir']) # global datasets directory +WEIGHTS_DIR = Path(SETTINGS['weights_dir']) # global weights directory +RUNS_DIR = Path(SETTINGS['runs_dir']) # global runs directory +ENVIRONMENT = 'Colab' if is_colab() else 'Kaggle' if is_kaggle() else 'Jupyter' if is_jupyter() else \ + 'Docker' if is_docker() else platform.system() +TESTS_RUNNING = is_pytest_running() or is_github_action_running() +set_sentry() + +# Apply monkey patches +from .patches import imread, imshow, imwrite, torch_save + +torch.save = torch_save +if WINDOWS: + # Apply cv2 patches for non-ASCII and non-UTF characters in image paths + cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow diff --git a/ultralytics/utils/autobatch.py b/ultralytics/utils/autobatch.py new file mode 100644 index 0000000..172a4c1 --- /dev/null +++ b/ultralytics/utils/autobatch.py @@ -0,0 +1,88 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +"""Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch.""" + +from copy import deepcopy + +import numpy as np +import torch + +from ultralytics.utils import DEFAULT_CFG, LOGGER, colorstr +from ultralytics.utils.torch_utils import profile + + +def check_train_batch_size(model, imgsz=640, amp=True): + """ + Check YOLO training batch size using the autobatch() function. + + Args: + model (torch.nn.Module): YOLO model to check batch size for. + imgsz (int): Image size used for training. + amp (bool): If True, use automatic mixed precision (AMP) for training. + + Returns: + (int): Optimal batch size computed using the autobatch() function. + """ + + with torch.cuda.amp.autocast(amp): + return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size + + +def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch): + """ + Automatically estimate the best YOLO batch size to use a fraction of the available CUDA memory. + + Args: + model (torch.nn.module): YOLO model to compute batch size for. + imgsz (int, optional): The image size used as input for the YOLO model. Defaults to 640. + fraction (float, optional): The fraction of available CUDA memory to use. Defaults to 0.60. + batch_size (int, optional): The default batch size to use if an error is detected. Defaults to 16. + + Returns: + (int): The optimal batch size. + """ + + # Check device + prefix = colorstr('AutoBatch: ') + LOGGER.info(f'{prefix}Computing optimal batch size for imgsz={imgsz}') + device = next(model.parameters()).device # get model device + if device.type == 'cpu': + LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') + return batch_size + if torch.backends.cudnn.benchmark: + LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}') + return batch_size + + # Inspect CUDA memory + gb = 1 << 30 # bytes to GiB (1024 ** 3) + d = str(device).upper() # 'CUDA:0' + properties = torch.cuda.get_device_properties(device) # device properties + t = properties.total_memory / gb # GiB total + r = torch.cuda.memory_reserved(device) / gb # GiB reserved + a = torch.cuda.memory_allocated(device) / gb # GiB allocated + f = t - (r + a) # GiB free + LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free') + + # Profile batch sizes + batch_sizes = [1, 2, 4, 8, 16] + try: + img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] + results = profile(img, model, n=3, device=device) + + # Fit a solution + y = [x[2] for x in results if x] # memory [2] + p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit + b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) + if None in results: # some sizes failed + i = results.index(None) # first fail index + if b >= batch_sizes[i]: # y intercept above failure point + b = batch_sizes[max(i - 1, 0)] # select prior safe point + if b < 1 or b > 1024: # b outside of safe range + b = batch_size + LOGGER.info(f'{prefix}WARNING ⚠️ CUDA anomaly detected, using default batch-size {batch_size}.') + + fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted + LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅') + return b + except Exception as e: + LOGGER.warning(f'{prefix}WARNING ⚠️ error detected: {e}, using default batch-size {batch_size}.') + return batch_size diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py new file mode 100644 index 0000000..4842ff5 --- /dev/null +++ b/ultralytics/utils/benchmarks.py @@ -0,0 +1,393 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Benchmark a YOLO model formats for speed and accuracy. + +Usage: + from ultralytics.utils.benchmarks import ProfileModels, benchmark + ProfileModels(['yolov8n.yaml', 'yolov8s.yaml']).profile() + benchmark(model='yolov8n.pt', imgsz=160) + +Format | `format=argument` | Model +--- | --- | --- +PyTorch | - | yolov8n.pt +TorchScript | `torchscript` | yolov8n.torchscript +ONNX | `onnx` | yolov8n.onnx +OpenVINO | `openvino` | yolov8n_openvino_model/ +TensorRT | `engine` | yolov8n.engine +CoreML | `coreml` | yolov8n.mlpackage +TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/ +TensorFlow GraphDef | `pb` | yolov8n.pb +TensorFlow Lite | `tflite` | yolov8n.tflite +TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite +TensorFlow.js | `tfjs` | yolov8n_web_model/ +PaddlePaddle | `paddle` | yolov8n_paddle_model/ +ncnn | `ncnn` | yolov8n_ncnn_model/ +""" + +import glob +import platform +import sys +import time +from pathlib import Path + +import numpy as np +import torch.cuda + +from ultralytics import YOLO +from ultralytics.cfg import TASK2DATA, TASK2METRIC +from ultralytics.engine.exporter import export_formats +from ultralytics.utils import ASSETS, LINUX, LOGGER, MACOS, TQDM, WEIGHTS_DIR +from ultralytics.utils.checks import check_requirements, check_yolo +from ultralytics.utils.files import file_size +from ultralytics.utils.torch_utils import select_device + + +def benchmark(model=WEIGHTS_DIR / 'yolov8n.pt', + data=None, + imgsz=160, + half=False, + int8=False, + device='cpu', + verbose=False): + """ + Benchmark a YOLO model across different formats for speed and accuracy. + + Args: + model (str | Path | optional): Path to the model file or directory. Default is + Path(SETTINGS['weights_dir']) / 'yolov8n.pt'. + data (str, optional): Dataset to evaluate on, inherited from TASK2DATA if not passed. Default is None. + imgsz (int, optional): Image size for the benchmark. Default is 160. + half (bool, optional): Use half-precision for the model if True. Default is False. + int8 (bool, optional): Use int8-precision for the model if True. Default is False. + device (str, optional): Device to run the benchmark on, either 'cpu' or 'cuda'. Default is 'cpu'. + verbose (bool | float | optional): If True or a float, assert benchmarks pass with given metric. + Default is False. + + Returns: + df (pandas.DataFrame): A pandas DataFrame with benchmark results for each format, including file size, + metric, and inference time. + + Example: + ```python + from ultralytics.utils.benchmarks import benchmark + + benchmark(model='yolov8n.pt', imgsz=640) + ``` + """ + + import pandas as pd + pd.options.display.max_columns = 10 + pd.options.display.width = 120 + device = select_device(device, verbose=False) + if isinstance(model, (str, Path)): + model = YOLO(model) + + y = [] + t0 = time.time() + for i, (name, format, suffix, cpu, gpu) in export_formats().iterrows(): # index, (name, format, suffix, CPU, GPU) + emoji, filename = '❌', None # export defaults + try: + assert i != 9 or LINUX, 'Edge TPU export only supported on Linux' + if i == 10: + assert MACOS or LINUX, 'TF.js export only supported on macOS and Linux' + elif i == 11: + assert sys.version_info < (3, 11), 'PaddlePaddle export only supported on Python<=3.10' + if 'cpu' in device.type: + assert cpu, 'inference not supported on CPU' + if 'cuda' in device.type: + assert gpu, 'inference not supported on GPU' + + # Export + if format == '-': + filename = model.ckpt_path or model.cfg + exported_model = model # PyTorch format + else: + filename = model.export(imgsz=imgsz, format=format, half=half, int8=int8, device=device, verbose=False) + exported_model = YOLO(filename, task=model.task) + assert suffix in str(filename), 'export failed' + emoji = '❎' # indicates export succeeded + + # Predict + assert model.task != 'pose' or i != 7, 'GraphDef Pose inference is not supported' + assert i not in (9, 10), 'inference not supported' # Edge TPU and TF.js are unsupported + assert i != 5 or platform.system() == 'Darwin', 'inference only supported on macOS>=10.13' # CoreML + exported_model.predict(ASSETS / 'bus.jpg', imgsz=imgsz, device=device, half=half) + + # Validate + data = data or TASK2DATA[model.task] # task to dataset, i.e. coco8.yaml for task=detect + key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect + results = exported_model.val(data=data, + batch=1, + imgsz=imgsz, + plots=False, + device=device, + half=half, + int8=int8, + verbose=False) + metric, speed = results.results_dict[key], results.speed['inference'] + y.append([name, '✅', round(file_size(filename), 1), round(metric, 4), round(speed, 2)]) + except Exception as e: + if verbose: + assert type(e) is AssertionError, f'Benchmark failure for {name}: {e}' + LOGGER.warning(f'ERROR ❌️ Benchmark failure for {name}: {e}') + y.append([name, emoji, round(file_size(filename), 1), None, None]) # mAP, t_inference + + # Print results + check_yolo(device=device) # print system info + df = pd.DataFrame(y, columns=['Format', 'Status❔', 'Size (MB)', key, 'Inference time (ms/im)']) + + name = Path(model.ckpt_path).name + s = f'\nBenchmarks complete for {name} on {data} at imgsz={imgsz} ({time.time() - t0:.2f}s)\n{df}\n' + LOGGER.info(s) + with open('benchmarks.log', 'a', errors='ignore', encoding='utf-8') as f: + f.write(s) + + if verbose and isinstance(verbose, float): + metrics = df[key].array # values to compare to floor + floor = verbose # minimum metric floor to pass, i.e. = 0.29 mAP for YOLOv5n + assert all(x > floor for x in metrics if pd.notna(x)), f'Benchmark failure: metric(s) < floor {floor}' + + return df + + +class ProfileModels: + """ + ProfileModels class for profiling different models on ONNX and TensorRT. + + This class profiles the performance of different models, provided their paths. The profiling includes parameters such as + model speed and FLOPs. + + Attributes: + paths (list): Paths of the models to profile. + num_timed_runs (int): Number of timed runs for the profiling. Default is 100. + num_warmup_runs (int): Number of warmup runs before profiling. Default is 10. + min_time (float): Minimum number of seconds to profile for. Default is 60. + imgsz (int): Image size used in the models. Default is 640. + + Methods: + profile(): Profiles the models and prints the result. + + Example: + ```python + from ultralytics.utils.benchmarks import ProfileModels + + ProfileModels(['yolov8n.yaml', 'yolov8s.yaml'], imgsz=640).profile() + ``` + """ + + def __init__(self, + paths: list, + num_timed_runs=100, + num_warmup_runs=10, + min_time=60, + imgsz=640, + half=True, + trt=True, + device=None): + """ + Initialize the ProfileModels class for profiling models. + + Args: + paths (list): List of paths of the models to be profiled. + num_timed_runs (int, optional): Number of timed runs for the profiling. Default is 100. + num_warmup_runs (int, optional): Number of warmup runs before the actual profiling starts. Default is 10. + min_time (float, optional): Minimum time in seconds for profiling a model. Default is 60. + imgsz (int, optional): Size of the image used during profiling. Default is 640. + half (bool, optional): Flag to indicate whether to use half-precision floating point for profiling. Default is True. + trt (bool, optional): Flag to indicate whether to profile using TensorRT. Default is True. + device (torch.device, optional): Device used for profiling. If None, it is determined automatically. Default is None. + """ + self.paths = paths + self.num_timed_runs = num_timed_runs + self.num_warmup_runs = num_warmup_runs + self.min_time = min_time + self.imgsz = imgsz + self.half = half + self.trt = trt # run TensorRT profiling + self.device = device or torch.device(0 if torch.cuda.is_available() else 'cpu') + + def profile(self): + """Logs the benchmarking results of a model, checks metrics against floor and returns the results.""" + files = self.get_files() + + if not files: + print('No matching *.pt or *.onnx files found.') + return + + table_rows = [] + output = [] + for file in files: + engine_file = file.with_suffix('.engine') + if file.suffix in ('.pt', '.yaml', '.yml'): + model = YOLO(str(file)) + model.fuse() # to report correct params and GFLOPs in model.info() + model_info = model.info() + if self.trt and self.device.type != 'cpu' and not engine_file.is_file(): + engine_file = model.export(format='engine', + half=self.half, + imgsz=self.imgsz, + device=self.device, + verbose=False) + onnx_file = model.export(format='onnx', + half=self.half, + imgsz=self.imgsz, + simplify=True, + device=self.device, + verbose=False) + elif file.suffix == '.onnx': + model_info = self.get_onnx_model_info(file) + onnx_file = file + else: + continue + + t_engine = self.profile_tensorrt_model(str(engine_file)) + t_onnx = self.profile_onnx_model(str(onnx_file)) + table_rows.append(self.generate_table_row(file.stem, t_onnx, t_engine, model_info)) + output.append(self.generate_results_dict(file.stem, t_onnx, t_engine, model_info)) + + self.print_table(table_rows) + return output + + def get_files(self): + """Returns a list of paths for all relevant model files given by the user.""" + files = [] + for path in self.paths: + path = Path(path) + if path.is_dir(): + extensions = ['*.pt', '*.onnx', '*.yaml'] + files.extend([file for ext in extensions for file in glob.glob(str(path / ext))]) + elif path.suffix in {'.pt', '.yaml', '.yml'}: # add non-existing + files.append(str(path)) + else: + files.extend(glob.glob(str(path))) + + print(f'Profiling: {sorted(files)}') + return [Path(file) for file in sorted(files)] + + def get_onnx_model_info(self, onnx_file: str): + """Retrieves the information including number of layers, parameters, gradients and FLOPs for an ONNX model + file. + """ + # return (num_layers, num_params, num_gradients, num_flops) + return 0.0, 0.0, 0.0, 0.0 + + def iterative_sigma_clipping(self, data, sigma=2, max_iters=3): + """Applies an iterative sigma clipping algorithm to the given data times number of iterations.""" + data = np.array(data) + for _ in range(max_iters): + mean, std = np.mean(data), np.std(data) + clipped_data = data[(data > mean - sigma * std) & (data < mean + sigma * std)] + if len(clipped_data) == len(data): + break + data = clipped_data + return data + + def profile_tensorrt_model(self, engine_file: str, eps: float = 1e-3): + """Profiles the TensorRT model, measuring average run time and standard deviation among runs.""" + if not self.trt or not Path(engine_file).is_file(): + return 0.0, 0.0 + + # Model and input + model = YOLO(engine_file) + input_data = np.random.rand(self.imgsz, self.imgsz, 3).astype(np.float32) # must be FP32 + + # Warmup runs + elapsed = 0.0 + for _ in range(3): + start_time = time.time() + for _ in range(self.num_warmup_runs): + model(input_data, imgsz=self.imgsz, verbose=False) + elapsed = time.time() - start_time + + # Compute number of runs as higher of min_time or num_timed_runs + num_runs = max(round(self.min_time / (elapsed + eps) * self.num_warmup_runs), self.num_timed_runs * 50) + + # Timed runs + run_times = [] + for _ in TQDM(range(num_runs), desc=engine_file): + results = model(input_data, imgsz=self.imgsz, verbose=False) + run_times.append(results[0].speed['inference']) # Convert to milliseconds + + run_times = self.iterative_sigma_clipping(np.array(run_times), sigma=2, max_iters=3) # sigma clipping + return np.mean(run_times), np.std(run_times) + + def profile_onnx_model(self, onnx_file: str, eps: float = 1e-3): + """Profiles an ONNX model by executing it multiple times and returns the mean and standard deviation of run + times. + """ + check_requirements('onnxruntime') + import onnxruntime as ort + + # Session with either 'TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider' + sess_options = ort.SessionOptions() + sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL + sess_options.intra_op_num_threads = 8 # Limit the number of threads + sess = ort.InferenceSession(onnx_file, sess_options, providers=['CPUExecutionProvider']) + + input_tensor = sess.get_inputs()[0] + input_type = input_tensor.type + + # Mapping ONNX datatype to numpy datatype + if 'float16' in input_type: + input_dtype = np.float16 + elif 'float' in input_type: + input_dtype = np.float32 + elif 'double' in input_type: + input_dtype = np.float64 + elif 'int64' in input_type: + input_dtype = np.int64 + elif 'int32' in input_type: + input_dtype = np.int32 + else: + raise ValueError(f'Unsupported ONNX datatype {input_type}') + + input_data = np.random.rand(*input_tensor.shape).astype(input_dtype) + input_name = input_tensor.name + output_name = sess.get_outputs()[0].name + + # Warmup runs + elapsed = 0.0 + for _ in range(3): + start_time = time.time() + for _ in range(self.num_warmup_runs): + sess.run([output_name], {input_name: input_data}) + elapsed = time.time() - start_time + + # Compute number of runs as higher of min_time or num_timed_runs + num_runs = max(round(self.min_time / (elapsed + eps) * self.num_warmup_runs), self.num_timed_runs) + + # Timed runs + run_times = [] + for _ in TQDM(range(num_runs), desc=onnx_file): + start_time = time.time() + sess.run([output_name], {input_name: input_data}) + run_times.append((time.time() - start_time) * 1000) # Convert to milliseconds + + run_times = self.iterative_sigma_clipping(np.array(run_times), sigma=2, max_iters=5) # sigma clipping + return np.mean(run_times), np.std(run_times) + + def generate_table_row(self, model_name, t_onnx, t_engine, model_info): + """Generates a formatted string for a table row that includes model performance and metric details.""" + layers, params, gradients, flops = model_info + return f'| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ± {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ± {t_engine[1]:.2f} ms | {params / 1e6:.1f} | {flops:.1f} |' + + def generate_results_dict(self, model_name, t_onnx, t_engine, model_info): + """Generates a dictionary of model details including name, parameters, GFLOPS and speed metrics.""" + layers, params, gradients, flops = model_info + return { + 'model/name': model_name, + 'model/parameters': params, + 'model/GFLOPs': round(flops, 3), + 'model/speed_ONNX(ms)': round(t_onnx[0], 3), + 'model/speed_TensorRT(ms)': round(t_engine[0], 3)} + + def print_table(self, table_rows): + """Formats and prints a comparison table for different models with given statistics and performance data.""" + gpu = torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'GPU' + header = f'| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
{gpu} TensorRT
(ms) | params
(M) | FLOPs
(B) |' + separator = '|-------------|---------------------|--------------------|------------------------------|-----------------------------------|------------------|-----------------|' + + print(f'\n\n{header}') + print(separator) + for row in table_rows: + print(row) diff --git a/ultralytics/utils/callbacks/__init__.py b/ultralytics/utils/callbacks/__init__.py new file mode 100644 index 0000000..8ad4ad6 --- /dev/null +++ b/ultralytics/utils/callbacks/__init__.py @@ -0,0 +1,5 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from .base import add_integration_callbacks, default_callbacks, get_default_callbacks + +__all__ = 'add_integration_callbacks', 'default_callbacks', 'get_default_callbacks' diff --git a/ultralytics/utils/callbacks/base.py b/ultralytics/utils/callbacks/base.py new file mode 100644 index 0000000..211ae5b --- /dev/null +++ b/ultralytics/utils/callbacks/base.py @@ -0,0 +1,218 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +"""Base callbacks.""" + +from collections import defaultdict +from copy import deepcopy + +# Trainer callbacks ---------------------------------------------------------------------------------------------------- + + +def on_pretrain_routine_start(trainer): + """Called before the pretraining routine starts.""" + pass + + +def on_pretrain_routine_end(trainer): + """Called after the pretraining routine ends.""" + pass + + +def on_train_start(trainer): + """Called when the training starts.""" + pass + + +def on_train_epoch_start(trainer): + """Called at the start of each training epoch.""" + pass + + +def on_train_batch_start(trainer): + """Called at the start of each training batch.""" + pass + + +def optimizer_step(trainer): + """Called when the optimizer takes a step.""" + pass + + +def on_before_zero_grad(trainer): + """Called before the gradients are set to zero.""" + pass + + +def on_train_batch_end(trainer): + """Called at the end of each training batch.""" + pass + + +def on_train_epoch_end(trainer): + """Called at the end of each training epoch.""" + pass + + +def on_fit_epoch_end(trainer): + """Called at the end of each fit epoch (train + val).""" + pass + + +def on_model_save(trainer): + """Called when the model is saved.""" + pass + + +def on_train_end(trainer): + """Called when the training ends.""" + pass + + +def on_params_update(trainer): + """Called when the model parameters are updated.""" + pass + + +def teardown(trainer): + """Called during the teardown of the training process.""" + pass + + +# Validator callbacks -------------------------------------------------------------------------------------------------- + + +def on_val_start(validator): + """Called when the validation starts.""" + pass + + +def on_val_batch_start(validator): + """Called at the start of each validation batch.""" + pass + + +def on_val_batch_end(validator): + """Called at the end of each validation batch.""" + pass + + +def on_val_end(validator): + """Called when the validation ends.""" + pass + + +# Predictor callbacks -------------------------------------------------------------------------------------------------- + + +def on_predict_start(predictor): + """Called when the prediction starts.""" + pass + + +def on_predict_batch_start(predictor): + """Called at the start of each prediction batch.""" + pass + + +def on_predict_batch_end(predictor): + """Called at the end of each prediction batch.""" + pass + + +def on_predict_postprocess_end(predictor): + """Called after the post-processing of the prediction ends.""" + pass + + +def on_predict_end(predictor): + """Called when the prediction ends.""" + pass + + +# Exporter callbacks --------------------------------------------------------------------------------------------------- + + +def on_export_start(exporter): + """Called when the model export starts.""" + pass + + +def on_export_end(exporter): + """Called when the model export ends.""" + pass + + +default_callbacks = { + # Run in trainer + 'on_pretrain_routine_start': [on_pretrain_routine_start], + 'on_pretrain_routine_end': [on_pretrain_routine_end], + 'on_train_start': [on_train_start], + 'on_train_epoch_start': [on_train_epoch_start], + 'on_train_batch_start': [on_train_batch_start], + 'optimizer_step': [optimizer_step], + 'on_before_zero_grad': [on_before_zero_grad], + 'on_train_batch_end': [on_train_batch_end], + 'on_train_epoch_end': [on_train_epoch_end], + 'on_fit_epoch_end': [on_fit_epoch_end], # fit = train + val + 'on_model_save': [on_model_save], + 'on_train_end': [on_train_end], + 'on_params_update': [on_params_update], + 'teardown': [teardown], + + # Run in validator + 'on_val_start': [on_val_start], + 'on_val_batch_start': [on_val_batch_start], + 'on_val_batch_end': [on_val_batch_end], + 'on_val_end': [on_val_end], + + # Run in predictor + 'on_predict_start': [on_predict_start], + 'on_predict_batch_start': [on_predict_batch_start], + 'on_predict_postprocess_end': [on_predict_postprocess_end], + 'on_predict_batch_end': [on_predict_batch_end], + 'on_predict_end': [on_predict_end], + + # Run in exporter + 'on_export_start': [on_export_start], + 'on_export_end': [on_export_end]} + + +def get_default_callbacks(): + """ + Return a copy of the default_callbacks dictionary with lists as default values. + + Returns: + (defaultdict): A defaultdict with keys from default_callbacks and empty lists as default values. + """ + return defaultdict(list, deepcopy(default_callbacks)) + + +def add_integration_callbacks(instance): + """ + Add integration callbacks from various sources to the instance's callbacks. + + Args: + instance (Trainer, Predictor, Validator, Exporter): An object with a 'callbacks' attribute that is a dictionary + of callback lists. + """ + + # Load HUB callbacks + from .hub import callbacks as hub_cb + callbacks_list = [hub_cb] + + # Load training callbacks + if 'Trainer' in instance.__class__.__name__: + from .clearml import callbacks as clear_cb + from .comet import callbacks as comet_cb + from .dvc import callbacks as dvc_cb + from .mlflow import callbacks as mlflow_cb + from .neptune import callbacks as neptune_cb + from .raytune import callbacks as tune_cb + from .tensorboard import callbacks as tb_cb + from .wb import callbacks as wb_cb + callbacks_list.extend([clear_cb, comet_cb, dvc_cb, mlflow_cb, neptune_cb, tune_cb, tb_cb, wb_cb]) + + # Add the callbacks to the callbacks dictionary + for callbacks in callbacks_list: + for k, v in callbacks.items(): + if v not in instance.callbacks[k]: + instance.callbacks[k].append(v) diff --git a/ultralytics/utils/callbacks/clearml.py b/ultralytics/utils/callbacks/clearml.py new file mode 100644 index 0000000..dfb2203 --- /dev/null +++ b/ultralytics/utils/callbacks/clearml.py @@ -0,0 +1,140 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING + +try: + assert not TESTS_RUNNING # do not log pytest + assert SETTINGS['clearml'] is True # verify integration is enabled + import clearml + from clearml import Task + from clearml.binding.frameworks.pytorch_bind import PatchPyTorchModelIO + from clearml.binding.matplotlib_bind import PatchedMatplotlib + + assert hasattr(clearml, '__version__') # verify package is not directory + +except (ImportError, AssertionError): + clearml = None + + +def _log_debug_samples(files, title='Debug Samples') -> None: + """ + Log files (images) as debug samples in the ClearML task. + + Args: + files (list): A list of file paths in PosixPath format. + title (str): A title that groups together images with the same values. + """ + import re + + if task := Task.current_task(): + for f in files: + if f.exists(): + it = re.search(r'_batch(\d+)', f.name) + iteration = int(it.groups()[0]) if it else 0 + task.get_logger().report_image(title=title, + series=f.name.replace(it.group(), ''), + local_path=str(f), + iteration=iteration) + + +def _log_plot(title, plot_path) -> None: + """ + Log an image as a plot in the plot section of ClearML. + + Args: + title (str): The title of the plot. + plot_path (str): The path to the saved image file. + """ + import matplotlib.image as mpimg + import matplotlib.pyplot as plt + + img = mpimg.imread(plot_path) + fig = plt.figure() + ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect='auto', xticks=[], yticks=[]) # no ticks + ax.imshow(img) + + Task.current_task().get_logger().report_matplotlib_figure(title=title, + series='', + figure=fig, + report_interactive=False) + + +def on_pretrain_routine_start(trainer): + """Runs at start of pretraining routine; initializes and connects/ logs task to ClearML.""" + try: + if task := Task.current_task(): + # Make sure the automatic pytorch and matplotlib bindings are disabled! + # We are logging these plots and model files manually in the integration + PatchPyTorchModelIO.update_current_task(None) + PatchedMatplotlib.update_current_task(None) + else: + task = Task.init(project_name=trainer.args.project or 'YOLOv8', + task_name=trainer.args.name, + tags=['YOLOv8'], + output_uri=True, + reuse_last_task_id=False, + auto_connect_frameworks={ + 'pytorch': False, + 'matplotlib': False}) + LOGGER.warning('ClearML Initialized a new task. If you want to run remotely, ' + 'please add clearml-init and connect your arguments before initializing YOLO.') + task.connect(vars(trainer.args), name='General') + except Exception as e: + LOGGER.warning(f'WARNING ⚠️ ClearML installed but not initialized correctly, not logging this run. {e}') + + +def on_train_epoch_end(trainer): + """Logs debug samples for the first epoch of YOLO training and report current training progress.""" + if task := Task.current_task(): + # Log debug samples + if trainer.epoch == 1: + _log_debug_samples(sorted(trainer.save_dir.glob('train_batch*.jpg')), 'Mosaic') + # Report the current training progress + for k, v in trainer.validator.metrics.results_dict.items(): + task.get_logger().report_scalar('train', k, v, iteration=trainer.epoch) + + +def on_fit_epoch_end(trainer): + """Reports model information to logger at the end of an epoch.""" + if task := Task.current_task(): + # You should have access to the validation bboxes under jdict + task.get_logger().report_scalar(title='Epoch Time', + series='Epoch Time', + value=trainer.epoch_time, + iteration=trainer.epoch) + if trainer.epoch == 0: + from ultralytics.utils.torch_utils import model_info_for_loggers + for k, v in model_info_for_loggers(trainer).items(): + task.get_logger().report_single_value(k, v) + + +def on_val_end(validator): + """Logs validation results including labels and predictions.""" + if Task.current_task(): + # Log val_labels and val_pred + _log_debug_samples(sorted(validator.save_dir.glob('val*.jpg')), 'Validation') + + +def on_train_end(trainer): + """Logs final model and its name on training completion.""" + if task := Task.current_task(): + # Log final results, CM matrix + PR plots + files = [ + 'results.png', 'confusion_matrix.png', 'confusion_matrix_normalized.png', + *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] + files = [(trainer.save_dir / f) for f in files if (trainer.save_dir / f).exists()] # filter + for f in files: + _log_plot(title=f.stem, plot_path=f) + # Report final metrics + for k, v in trainer.validator.metrics.results_dict.items(): + task.get_logger().report_single_value(k, v) + # Log the final model + task.update_output_model(model_path=str(trainer.best), model_name=trainer.args.name, auto_delete_file=False) + + +callbacks = { + 'on_pretrain_routine_start': on_pretrain_routine_start, + 'on_train_epoch_end': on_train_epoch_end, + 'on_fit_epoch_end': on_fit_epoch_end, + 'on_val_end': on_val_end, + 'on_train_end': on_train_end} if clearml else {} diff --git a/ultralytics/utils/callbacks/comet.py b/ultralytics/utils/callbacks/comet.py new file mode 100644 index 0000000..e8016f4 --- /dev/null +++ b/ultralytics/utils/callbacks/comet.py @@ -0,0 +1,378 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.utils import LOGGER, RANK, SETTINGS, TESTS_RUNNING, ops + +try: + assert not TESTS_RUNNING # do not log pytest + assert SETTINGS['comet'] is True # verify integration is enabled + import comet_ml + + assert hasattr(comet_ml, '__version__') # verify package is not directory + + import os + from pathlib import Path + + # Ensures certain logging functions only run for supported tasks + COMET_SUPPORTED_TASKS = ['detect'] + + # Names of plots created by YOLOv8 that are logged to Comet + EVALUATION_PLOT_NAMES = 'F1_curve', 'P_curve', 'R_curve', 'PR_curve', 'confusion_matrix' + LABEL_PLOT_NAMES = 'labels', 'labels_correlogram' + + _comet_image_prediction_count = 0 + +except (ImportError, AssertionError): + comet_ml = None + + +def _get_comet_mode(): + """Returns the mode of comet set in the environment variables, defaults to 'online' if not set.""" + return os.getenv('COMET_MODE', 'online') + + +def _get_comet_model_name(): + """Returns the model name for Comet from the environment variable 'COMET_MODEL_NAME' or defaults to 'YOLOv8'.""" + return os.getenv('COMET_MODEL_NAME', 'YOLOv8') + + +def _get_eval_batch_logging_interval(): + """Get the evaluation batch logging interval from environment variable or use default value 1.""" + return int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1)) + + +def _get_max_image_predictions_to_log(): + """Get the maximum number of image predictions to log from the environment variables.""" + return int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100)) + + +def _scale_confidence_score(score): + """Scales the given confidence score by a factor specified in an environment variable.""" + scale = float(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100.0)) + return score * scale + + +def _should_log_confusion_matrix(): + """Determines if the confusion matrix should be logged based on the environment variable settings.""" + return os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'false').lower() == 'true' + + +def _should_log_image_predictions(): + """Determines whether to log image predictions based on a specified environment variable.""" + return os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true' + + +def _get_experiment_type(mode, project_name): + """Return an experiment based on mode and project name.""" + if mode == 'offline': + return comet_ml.OfflineExperiment(project_name=project_name) + + return comet_ml.Experiment(project_name=project_name) + + +def _create_experiment(args): + """Ensures that the experiment object is only created in a single process during distributed training.""" + if RANK not in (-1, 0): + return + try: + comet_mode = _get_comet_mode() + _project_name = os.getenv('COMET_PROJECT_NAME', args.project) + experiment = _get_experiment_type(comet_mode, _project_name) + experiment.log_parameters(vars(args)) + experiment.log_others({ + 'eval_batch_logging_interval': _get_eval_batch_logging_interval(), + 'log_confusion_matrix_on_eval': _should_log_confusion_matrix(), + 'log_image_predictions': _should_log_image_predictions(), + 'max_image_predictions': _get_max_image_predictions_to_log(), }) + experiment.log_other('Created from', 'yolov8') + + except Exception as e: + LOGGER.warning(f'WARNING ⚠️ Comet installed but not initialized correctly, not logging this run. {e}') + + +def _fetch_trainer_metadata(trainer): + """Returns metadata for YOLO training including epoch and asset saving status.""" + curr_epoch = trainer.epoch + 1 + + train_num_steps_per_epoch = len(trainer.train_loader.dataset) // trainer.batch_size + curr_step = curr_epoch * train_num_steps_per_epoch + final_epoch = curr_epoch == trainer.epochs + + save = trainer.args.save + save_period = trainer.args.save_period + save_interval = curr_epoch % save_period == 0 + save_assets = save and save_period > 0 and save_interval and not final_epoch + + return dict( + curr_epoch=curr_epoch, + curr_step=curr_step, + save_assets=save_assets, + final_epoch=final_epoch, + ) + + +def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad): + """ + YOLOv8 resizes images during training and the label values are normalized based on this resized shape. + + This function rescales the bounding box labels to the original image shape. + """ + + resized_image_height, resized_image_width = resized_image_shape + + # Convert normalized xywh format predictions to xyxy in resized scale format + box = ops.xywhn2xyxy(box, h=resized_image_height, w=resized_image_width) + # Scale box predictions from resized image scale back to original image scale + box = ops.scale_boxes(resized_image_shape, box, original_image_shape, ratio_pad) + # Convert bounding box format from xyxy to xywh for Comet logging + box = ops.xyxy2xywh(box) + # Adjust xy center to correspond top-left corner + box[:2] -= box[2:] / 2 + box = box.tolist() + + return box + + +def _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, class_name_map=None): + """Format ground truth annotations for detection.""" + indices = batch['batch_idx'] == img_idx + bboxes = batch['bboxes'][indices] + if len(bboxes) == 0: + LOGGER.debug(f'COMET WARNING: Image: {image_path} has no bounding boxes labels') + return None + + cls_labels = batch['cls'][indices].squeeze(1).tolist() + if class_name_map: + cls_labels = [str(class_name_map[label]) for label in cls_labels] + + original_image_shape = batch['ori_shape'][img_idx] + resized_image_shape = batch['resized_shape'][img_idx] + ratio_pad = batch['ratio_pad'][img_idx] + + data = [] + for box, label in zip(bboxes, cls_labels): + box = _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad) + data.append({ + 'boxes': [box], + 'label': f'gt_{label}', + 'score': _scale_confidence_score(1.0), }) + + return {'name': 'ground_truth', 'data': data} + + +def _format_prediction_annotations_for_detection(image_path, metadata, class_label_map=None): + """Format YOLO predictions for object detection visualization.""" + stem = image_path.stem + image_id = int(stem) if stem.isnumeric() else stem + + predictions = metadata.get(image_id) + if not predictions: + LOGGER.debug(f'COMET WARNING: Image: {image_path} has no bounding boxes predictions') + return None + + data = [] + for prediction in predictions: + boxes = prediction['bbox'] + score = _scale_confidence_score(prediction['score']) + cls_label = prediction['category_id'] + if class_label_map: + cls_label = str(class_label_map[cls_label]) + + data.append({'boxes': [boxes], 'label': cls_label, 'score': score}) + + return {'name': 'prediction', 'data': data} + + +def _fetch_annotations(img_idx, image_path, batch, prediction_metadata_map, class_label_map): + """Join the ground truth and prediction annotations if they exist.""" + ground_truth_annotations = _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, + class_label_map) + prediction_annotations = _format_prediction_annotations_for_detection(image_path, prediction_metadata_map, + class_label_map) + + annotations = [ + annotation for annotation in [ground_truth_annotations, prediction_annotations] if annotation is not None] + return [annotations] if annotations else None + + +def _create_prediction_metadata_map(model_predictions): + """Create metadata map for model predictions by groupings them based on image ID.""" + pred_metadata_map = {} + for prediction in model_predictions: + pred_metadata_map.setdefault(prediction['image_id'], []) + pred_metadata_map[prediction['image_id']].append(prediction) + + return pred_metadata_map + + +def _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch): + """Log the confusion matrix to Comet experiment.""" + conf_mat = trainer.validator.confusion_matrix.matrix + names = list(trainer.data['names'].values()) + ['background'] + experiment.log_confusion_matrix( + matrix=conf_mat, + labels=names, + max_categories=len(names), + epoch=curr_epoch, + step=curr_step, + ) + + +def _log_images(experiment, image_paths, curr_step, annotations=None): + """Logs images to the experiment with optional annotations.""" + if annotations: + for image_path, annotation in zip(image_paths, annotations): + experiment.log_image(image_path, name=image_path.stem, step=curr_step, annotations=annotation) + + else: + for image_path in image_paths: + experiment.log_image(image_path, name=image_path.stem, step=curr_step) + + +def _log_image_predictions(experiment, validator, curr_step): + """Logs predicted boxes for a single image during training.""" + global _comet_image_prediction_count + + task = validator.args.task + if task not in COMET_SUPPORTED_TASKS: + return + + jdict = validator.jdict + if not jdict: + return + + predictions_metadata_map = _create_prediction_metadata_map(jdict) + dataloader = validator.dataloader + class_label_map = validator.names + + batch_logging_interval = _get_eval_batch_logging_interval() + max_image_predictions = _get_max_image_predictions_to_log() + + for batch_idx, batch in enumerate(dataloader): + if (batch_idx + 1) % batch_logging_interval != 0: + continue + + image_paths = batch['im_file'] + for img_idx, image_path in enumerate(image_paths): + if _comet_image_prediction_count >= max_image_predictions: + return + + image_path = Path(image_path) + annotations = _fetch_annotations( + img_idx, + image_path, + batch, + predictions_metadata_map, + class_label_map, + ) + _log_images( + experiment, + [image_path], + curr_step, + annotations=annotations, + ) + _comet_image_prediction_count += 1 + + +def _log_plots(experiment, trainer): + """Logs evaluation plots and label plots for the experiment.""" + plot_filenames = [trainer.save_dir / f'{plots}.png' for plots in EVALUATION_PLOT_NAMES] + _log_images(experiment, plot_filenames, None) + + label_plot_filenames = [trainer.save_dir / f'{labels}.jpg' for labels in LABEL_PLOT_NAMES] + _log_images(experiment, label_plot_filenames, None) + + +def _log_model(experiment, trainer): + """Log the best-trained model to Comet.ml.""" + model_name = _get_comet_model_name() + experiment.log_model( + model_name, + file_or_folder=str(trainer.best), + file_name='best.pt', + overwrite=True, + ) + + +def on_pretrain_routine_start(trainer): + """Creates or resumes a CometML experiment at the start of a YOLO pre-training routine.""" + experiment = comet_ml.get_global_experiment() + is_alive = getattr(experiment, 'alive', False) + if not experiment or not is_alive: + _create_experiment(trainer.args) + + +def on_train_epoch_end(trainer): + """Log metrics and save batch images at the end of training epochs.""" + experiment = comet_ml.get_global_experiment() + if not experiment: + return + + metadata = _fetch_trainer_metadata(trainer) + curr_epoch = metadata['curr_epoch'] + curr_step = metadata['curr_step'] + + experiment.log_metrics( + trainer.label_loss_items(trainer.tloss, prefix='train'), + step=curr_step, + epoch=curr_epoch, + ) + + if curr_epoch == 1: + _log_images(experiment, trainer.save_dir.glob('train_batch*.jpg'), curr_step) + + +def on_fit_epoch_end(trainer): + """Logs model assets at the end of each epoch.""" + experiment = comet_ml.get_global_experiment() + if not experiment: + return + + metadata = _fetch_trainer_metadata(trainer) + curr_epoch = metadata['curr_epoch'] + curr_step = metadata['curr_step'] + save_assets = metadata['save_assets'] + + experiment.log_metrics(trainer.metrics, step=curr_step, epoch=curr_epoch) + experiment.log_metrics(trainer.lr, step=curr_step, epoch=curr_epoch) + if curr_epoch == 1: + from ultralytics.utils.torch_utils import model_info_for_loggers + experiment.log_metrics(model_info_for_loggers(trainer), step=curr_step, epoch=curr_epoch) + + if not save_assets: + return + + _log_model(experiment, trainer) + if _should_log_confusion_matrix(): + _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch) + if _should_log_image_predictions(): + _log_image_predictions(experiment, trainer.validator, curr_step) + + +def on_train_end(trainer): + """Perform operations at the end of training.""" + experiment = comet_ml.get_global_experiment() + if not experiment: + return + + metadata = _fetch_trainer_metadata(trainer) + curr_epoch = metadata['curr_epoch'] + curr_step = metadata['curr_step'] + plots = trainer.args.plots + + _log_model(experiment, trainer) + if plots: + _log_plots(experiment, trainer) + + _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch) + _log_image_predictions(experiment, trainer.validator, curr_step) + experiment.end() + + global _comet_image_prediction_count + _comet_image_prediction_count = 0 + + +callbacks = { + 'on_pretrain_routine_start': on_pretrain_routine_start, + 'on_train_epoch_end': on_train_epoch_end, + 'on_fit_epoch_end': on_fit_epoch_end, + 'on_train_end': on_train_end} if comet_ml else {} diff --git a/ultralytics/utils/callbacks/dvc.py b/ultralytics/utils/callbacks/dvc.py new file mode 100644 index 0000000..7fa05c6 --- /dev/null +++ b/ultralytics/utils/callbacks/dvc.py @@ -0,0 +1,138 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, checks + +try: + assert not TESTS_RUNNING # do not log pytest + assert SETTINGS['dvc'] is True # verify integration is enabled + import dvclive + assert checks.check_version('dvclive', '2.11.0', verbose=True) + + import os + import re + from pathlib import Path + + # DVCLive logger instance + live = None + _processed_plots = {} + + # `on_fit_epoch_end` is called on final validation (probably need to be fixed) for now this is the way we + # distinguish final evaluation of the best model vs last epoch validation + _training_epoch = False + +except (ImportError, AssertionError, TypeError): + dvclive = None + + +def _log_images(path, prefix=''): + """Logs images at specified path with an optional prefix using DVCLive.""" + if live: + name = path.name + + # Group images by batch to enable sliders in UI + if m := re.search(r'_batch(\d+)', name): + ni = m[1] + new_stem = re.sub(r'_batch(\d+)', '_batch', path.stem) + name = (Path(new_stem) / ni).with_suffix(path.suffix) + + live.log_image(os.path.join(prefix, name), path) + + +def _log_plots(plots, prefix=''): + """Logs plot images for training progress if they have not been previously processed.""" + for name, params in plots.items(): + timestamp = params['timestamp'] + if _processed_plots.get(name) != timestamp: + _log_images(name, prefix) + _processed_plots[name] = timestamp + + +def _log_confusion_matrix(validator): + """Logs the confusion matrix for the given validator using DVCLive.""" + targets = [] + preds = [] + matrix = validator.confusion_matrix.matrix + names = list(validator.names.values()) + if validator.confusion_matrix.task == 'detect': + names += ['background'] + + for ti, pred in enumerate(matrix.T.astype(int)): + for pi, num in enumerate(pred): + targets.extend([names[ti]] * num) + preds.extend([names[pi]] * num) + + live.log_sklearn_plot('confusion_matrix', targets, preds, name='cf.json', normalized=True) + + +def on_pretrain_routine_start(trainer): + """Initializes DVCLive logger for training metadata during pre-training routine.""" + try: + global live + live = dvclive.Live(save_dvc_exp=True, cache_images=True) + LOGGER.info("DVCLive is detected and auto logging is enabled (run 'yolo settings dvc=False' to disable).") + except Exception as e: + LOGGER.warning(f'WARNING ⚠️ DVCLive installed but not initialized correctly, not logging this run. {e}') + + +def on_pretrain_routine_end(trainer): + """Logs plots related to the training process at the end of the pretraining routine.""" + _log_plots(trainer.plots, 'train') + + +def on_train_start(trainer): + """Logs the training parameters if DVCLive logging is active.""" + if live: + live.log_params(trainer.args) + + +def on_train_epoch_start(trainer): + """Sets the global variable _training_epoch value to True at the start of training each epoch.""" + global _training_epoch + _training_epoch = True + + +def on_fit_epoch_end(trainer): + """Logs training metrics and model info, and advances to next step on the end of each fit epoch.""" + global _training_epoch + if live and _training_epoch: + all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr} + for metric, value in all_metrics.items(): + live.log_metric(metric, value) + + if trainer.epoch == 0: + from ultralytics.utils.torch_utils import model_info_for_loggers + for metric, value in model_info_for_loggers(trainer).items(): + live.log_metric(metric, value, plot=False) + + _log_plots(trainer.plots, 'train') + _log_plots(trainer.validator.plots, 'val') + + live.next_step() + _training_epoch = False + + +def on_train_end(trainer): + """Logs the best metrics, plots, and confusion matrix at the end of training if DVCLive is active.""" + if live: + # At the end log the best metrics. It runs validator on the best model internally. + all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr} + for metric, value in all_metrics.items(): + live.log_metric(metric, value, plot=False) + + _log_plots(trainer.plots, 'val') + _log_plots(trainer.validator.plots, 'val') + _log_confusion_matrix(trainer.validator) + + if trainer.best.exists(): + live.log_artifact(trainer.best, copy=True, type='model') + + live.end() + + +callbacks = { + 'on_pretrain_routine_start': on_pretrain_routine_start, + 'on_pretrain_routine_end': on_pretrain_routine_end, + 'on_train_start': on_train_start, + 'on_train_epoch_start': on_train_epoch_start, + 'on_fit_epoch_end': on_fit_epoch_end, + 'on_train_end': on_train_end} if dvclive else {} diff --git a/ultralytics/utils/callbacks/hub.py b/ultralytics/utils/callbacks/hub.py new file mode 100644 index 0000000..7171fb9 --- /dev/null +++ b/ultralytics/utils/callbacks/hub.py @@ -0,0 +1,87 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import json +from time import time + +from ultralytics.hub.utils import HUB_WEB_ROOT, PREFIX, events +from ultralytics.utils import LOGGER, SETTINGS + + +def on_pretrain_routine_end(trainer): + """Logs info before starting timer for upload rate limit.""" + session = getattr(trainer, 'hub_session', None) + if session: + # Start timer for upload rate limit + LOGGER.info(f'{PREFIX}View model at {HUB_WEB_ROOT}/models/{session.model_id} 🚀') + session.timers = {'metrics': time(), 'ckpt': time()} # start timer on session.rate_limit + + +def on_fit_epoch_end(trainer): + """Uploads training progress metrics at the end of each epoch.""" + session = getattr(trainer, 'hub_session', None) + if session: + # Upload metrics after val end + all_plots = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics} + if trainer.epoch == 0: + from ultralytics.utils.torch_utils import model_info_for_loggers + all_plots = {**all_plots, **model_info_for_loggers(trainer)} + session.metrics_queue[trainer.epoch] = json.dumps(all_plots) + if time() - session.timers['metrics'] > session.rate_limits['metrics']: + session.upload_metrics() + session.timers['metrics'] = time() # reset timer + session.metrics_queue = {} # reset queue + + +def on_model_save(trainer): + """Saves checkpoints to Ultralytics HUB with rate limiting.""" + session = getattr(trainer, 'hub_session', None) + if session: + # Upload checkpoints with rate limiting + is_best = trainer.best_fitness == trainer.fitness + if time() - session.timers['ckpt'] > session.rate_limits['ckpt']: + LOGGER.info(f'{PREFIX}Uploading checkpoint {HUB_WEB_ROOT}/models/{session.model_id}') + session.upload_model(trainer.epoch, trainer.last, is_best) + session.timers['ckpt'] = time() # reset timer + + +def on_train_end(trainer): + """Upload final model and metrics to Ultralytics HUB at the end of training.""" + session = getattr(trainer, 'hub_session', None) + if session: + # Upload final model and metrics with exponential standoff + LOGGER.info(f'{PREFIX}Syncing final model...') + session.upload_model(trainer.epoch, trainer.best, map=trainer.metrics.get('metrics/mAP50-95(B)', 0), final=True) + session.alive = False # stop heartbeats + LOGGER.info(f'{PREFIX}Done ✅\n' + f'{PREFIX}View model at {HUB_WEB_ROOT}/models/{session.model_id} 🚀') + + +def on_train_start(trainer): + """Run events on train start.""" + events(trainer.args) + + +def on_val_start(validator): + """Runs events on validation start.""" + events(validator.args) + + +def on_predict_start(predictor): + """Run events on predict start.""" + events(predictor.args) + + +def on_export_start(exporter): + """Run events on export start.""" + events(exporter.args) + + +callbacks = { + 'on_pretrain_routine_end': on_pretrain_routine_end, + 'on_fit_epoch_end': on_fit_epoch_end, + 'on_model_save': on_model_save, + 'on_train_end': on_train_end, + 'on_train_start': on_train_start, + 'on_val_start': on_val_start, + 'on_predict_start': on_predict_start, + 'on_export_start': on_export_start} if SETTINGS['hub'] is True else {} # verify enabled diff --git a/ultralytics/utils/callbacks/mlflow.py b/ultralytics/utils/callbacks/mlflow.py new file mode 100644 index 0000000..05f5aed --- /dev/null +++ b/ultralytics/utils/callbacks/mlflow.py @@ -0,0 +1,107 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +MLflow Logging for Ultralytics YOLO. + +This module enables MLflow logging for Ultralytics YOLO. It logs metrics, parameters, and model artifacts. +For setting up, a tracking URI should be specified. The logging can be customized using environment variables. + +Commands: + 1. To set a project name: + `export MLFLOW_EXPERIMENT_NAME=` or use the project= argument + + 2. To set a run name: + `export MLFLOW_RUN=` or use the name= argument + + 3. To start a local MLflow server: + mlflow server --backend-store-uri runs/mlflow + It will by default start a local server at http://127.0.0.1:5000. + To specify a different URI, set the MLFLOW_TRACKING_URI environment variable. + + 4. To kill all running MLflow server instances: + ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill -9 +""" + +from ultralytics.utils import LOGGER, RUNS_DIR, SETTINGS, TESTS_RUNNING, colorstr + +try: + import os + + assert not TESTS_RUNNING or 'test_mlflow' in os.environ.get('PYTEST_CURRENT_TEST', '') # do not log pytest + assert SETTINGS['mlflow'] is True # verify integration is enabled + import mlflow + + assert hasattr(mlflow, '__version__') # verify package is not directory + from pathlib import Path + PREFIX = colorstr('MLflow: ') + +except (ImportError, AssertionError): + mlflow = None + + +def on_pretrain_routine_end(trainer): + """ + Log training parameters to MLflow at the end of the pretraining routine. + + This function sets up MLflow logging based on environment variables and trainer arguments. It sets the tracking URI, + experiment name, and run name, then starts the MLflow run if not already active. It finally logs the parameters + from the trainer. + + Args: + trainer (ultralytics.engine.trainer.BaseTrainer): The training object with arguments and parameters to log. + + Global: + mlflow: The imported mlflow module to use for logging. + + Environment Variables: + MLFLOW_TRACKING_URI: The URI for MLflow tracking. If not set, defaults to 'runs/mlflow'. + MLFLOW_EXPERIMENT_NAME: The name of the MLflow experiment. If not set, defaults to trainer.args.project. + MLFLOW_RUN: The name of the MLflow run. If not set, defaults to trainer.args.name. + """ + global mlflow + + uri = os.environ.get('MLFLOW_TRACKING_URI') or str(RUNS_DIR / 'mlflow') + LOGGER.debug(f'{PREFIX} tracking uri: {uri}') + mlflow.set_tracking_uri(uri) + + # Set experiment and run names + experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8' + run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name + mlflow.set_experiment(experiment_name) + + mlflow.autolog() + try: + active_run = mlflow.active_run() or mlflow.start_run(run_name=run_name) + LOGGER.info(f'{PREFIX}logging run_id({active_run.info.run_id}) to {uri}') + if Path(uri).is_dir(): + LOGGER.info(f"{PREFIX}view at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri {uri}'") + LOGGER.info(f"{PREFIX}disable with 'yolo settings mlflow=False'") + mlflow.log_params(dict(trainer.args)) + except Exception as e: + LOGGER.warning(f'{PREFIX}WARNING ⚠️ Failed to initialize: {e}\n' + f'{PREFIX}WARNING ⚠️ Not tracking this run') + + +def on_fit_epoch_end(trainer): + """Log training metrics at the end of each fit epoch to MLflow.""" + if mlflow: + sanitized_metrics = {k.replace('(', '').replace(')', ''): float(v) for k, v in trainer.metrics.items()} + mlflow.log_metrics(metrics=sanitized_metrics, step=trainer.epoch) + + +def on_train_end(trainer): + """Log model artifacts at the end of the training.""" + if mlflow: + mlflow.log_artifact(str(trainer.best.parent)) # log save_dir/weights directory with best.pt and last.pt + for f in trainer.save_dir.glob('*'): # log all other files in save_dir + if f.suffix in {'.png', '.jpg', '.csv', '.pt', '.yaml'}: + mlflow.log_artifact(str(f)) + + mlflow.end_run() + LOGGER.info(f'{PREFIX}results logged to {mlflow.get_tracking_uri()}\n' + f"{PREFIX}disable with 'yolo settings mlflow=False'") + + +callbacks = { + 'on_pretrain_routine_end': on_pretrain_routine_end, + 'on_fit_epoch_end': on_fit_epoch_end, + 'on_train_end': on_train_end} if mlflow else {} diff --git a/ultralytics/utils/callbacks/neptune.py b/ultralytics/utils/callbacks/neptune.py new file mode 100644 index 0000000..088e3f8 --- /dev/null +++ b/ultralytics/utils/callbacks/neptune.py @@ -0,0 +1,104 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING + +try: + assert not TESTS_RUNNING # do not log pytest + assert SETTINGS['neptune'] is True # verify integration is enabled + import neptune + from neptune.types import File + + assert hasattr(neptune, '__version__') + + run = None # NeptuneAI experiment logger instance + +except (ImportError, AssertionError): + neptune = None + + +def _log_scalars(scalars, step=0): + """Log scalars to the NeptuneAI experiment logger.""" + if run: + for k, v in scalars.items(): + run[k].append(value=v, step=step) + + +def _log_images(imgs_dict, group=''): + """Log scalars to the NeptuneAI experiment logger.""" + if run: + for k, v in imgs_dict.items(): + run[f'{group}/{k}'].upload(File(v)) + + +def _log_plot(title, plot_path): + """ + Log plots to the NeptuneAI experiment logger. + + Args: + title (str): Title of the plot. + plot_path (PosixPath | str): Path to the saved image file. + """ + import matplotlib.image as mpimg + import matplotlib.pyplot as plt + + img = mpimg.imread(plot_path) + fig = plt.figure() + ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect='auto', xticks=[], yticks=[]) # no ticks + ax.imshow(img) + run[f'Plots/{title}'].upload(fig) + + +def on_pretrain_routine_start(trainer): + """Callback function called before the training routine starts.""" + try: + global run + run = neptune.init_run(project=trainer.args.project or 'YOLOv8', name=trainer.args.name, tags=['YOLOv8']) + run['Configuration/Hyperparameters'] = {k: '' if v is None else v for k, v in vars(trainer.args).items()} + except Exception as e: + LOGGER.warning(f'WARNING ⚠️ NeptuneAI installed but not initialized correctly, not logging this run. {e}') + + +def on_train_epoch_end(trainer): + """Callback function called at end of each training epoch.""" + _log_scalars(trainer.label_loss_items(trainer.tloss, prefix='train'), trainer.epoch + 1) + _log_scalars(trainer.lr, trainer.epoch + 1) + if trainer.epoch == 1: + _log_images({f.stem: str(f) for f in trainer.save_dir.glob('train_batch*.jpg')}, 'Mosaic') + + +def on_fit_epoch_end(trainer): + """Callback function called at end of each fit (train+val) epoch.""" + if run and trainer.epoch == 0: + from ultralytics.utils.torch_utils import model_info_for_loggers + run['Configuration/Model'] = model_info_for_loggers(trainer) + _log_scalars(trainer.metrics, trainer.epoch + 1) + + +def on_val_end(validator): + """Callback function called at end of each validation.""" + if run: + # Log val_labels and val_pred + _log_images({f.stem: str(f) for f in validator.save_dir.glob('val*.jpg')}, 'Validation') + + +def on_train_end(trainer): + """Callback function called at end of training.""" + if run: + # Log final results, CM matrix + PR plots + files = [ + 'results.png', 'confusion_matrix.png', 'confusion_matrix_normalized.png', + *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] + files = [(trainer.save_dir / f) for f in files if (trainer.save_dir / f).exists()] # filter + for f in files: + _log_plot(title=f.stem, plot_path=f) + # Log the final model + run[f'weights/{trainer.args.name or trainer.args.task}/{str(trainer.best.name)}'].upload(File(str( + trainer.best))) + + +callbacks = { + 'on_pretrain_routine_start': on_pretrain_routine_start, + 'on_train_epoch_end': on_train_epoch_end, + 'on_fit_epoch_end': on_fit_epoch_end, + 'on_val_end': on_val_end, + 'on_train_end': on_train_end} if neptune else {} diff --git a/ultralytics/utils/callbacks/raytune.py b/ultralytics/utils/callbacks/raytune.py new file mode 100644 index 0000000..417b331 --- /dev/null +++ b/ultralytics/utils/callbacks/raytune.py @@ -0,0 +1,24 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.utils import SETTINGS + +try: + assert SETTINGS['raytune'] is True # verify integration is enabled + import ray + from ray import tune + from ray.air import session + +except (ImportError, AssertionError): + tune = None + + +def on_fit_epoch_end(trainer): + """Sends training metrics to Ray Tune at end of each epoch.""" + if ray.tune.is_session_enabled(): + metrics = trainer.metrics + metrics['epoch'] = trainer.epoch + session.report(metrics) + + +callbacks = { + 'on_fit_epoch_end': on_fit_epoch_end, } if tune else {} diff --git a/ultralytics/utils/callbacks/tensorboard.py b/ultralytics/utils/callbacks/tensorboard.py new file mode 100644 index 0000000..c1fce53 --- /dev/null +++ b/ultralytics/utils/callbacks/tensorboard.py @@ -0,0 +1,75 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr + +try: + # WARNING: do not move import due to protobuf issue in https://github.com/ultralytics/ultralytics/pull/4674 + from torch.utils.tensorboard import SummaryWriter + + assert not TESTS_RUNNING # do not log pytest + assert SETTINGS['tensorboard'] is True # verify integration is enabled + WRITER = None # TensorBoard SummaryWriter instance + +except (ImportError, AssertionError, TypeError): + # TypeError for handling 'Descriptors cannot not be created directly.' protobuf errors in Windows + SummaryWriter = None + + +def _log_scalars(scalars, step=0): + """Logs scalar values to TensorBoard.""" + if WRITER: + for k, v in scalars.items(): + WRITER.add_scalar(k, v, step) + + +def _log_tensorboard_graph(trainer): + """Log model graph to TensorBoard.""" + try: + import warnings + + from ultralytics.utils.torch_utils import de_parallel, torch + + imgsz = trainer.args.imgsz + imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz + p = next(trainer.model.parameters()) # for device, type + im = torch.zeros((1, 3, *imgsz), device=p.device, dtype=p.dtype) # input image (must be zeros, not empty) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=UserWarning) # suppress jit trace warning + WRITER.add_graph(torch.jit.trace(de_parallel(trainer.model), im, strict=False), []) + except Exception as e: + LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}') + + +def on_pretrain_routine_start(trainer): + """Initialize TensorBoard logging with SummaryWriter.""" + if SummaryWriter: + try: + global WRITER + WRITER = SummaryWriter(str(trainer.save_dir)) + prefix = colorstr('TensorBoard: ') + LOGGER.info(f"{prefix}Start with 'tensorboard --logdir {trainer.save_dir}', view at http://localhost:6006/") + except Exception as e: + LOGGER.warning(f'WARNING ⚠️ TensorBoard not initialized correctly, not logging this run. {e}') + + +def on_train_start(trainer): + """Log TensorBoard graph.""" + if WRITER: + _log_tensorboard_graph(trainer) + + +def on_batch_end(trainer): + """Logs scalar statistics at the end of a training batch.""" + _log_scalars(trainer.label_loss_items(trainer.tloss, prefix='train'), trainer.epoch + 1) + + +def on_fit_epoch_end(trainer): + """Logs epoch metrics at end of training epoch.""" + _log_scalars(trainer.metrics, trainer.epoch + 1) + + +callbacks = { + 'on_pretrain_routine_start': on_pretrain_routine_start, + 'on_train_start': on_train_start, + 'on_fit_epoch_end': on_fit_epoch_end, + 'on_batch_end': on_batch_end} if SummaryWriter else {} diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py new file mode 100644 index 0000000..88f9bd7 --- /dev/null +++ b/ultralytics/utils/callbacks/wb.py @@ -0,0 +1,156 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.utils import SETTINGS, TESTS_RUNNING +from ultralytics.utils.torch_utils import model_info_for_loggers + +try: + assert not TESTS_RUNNING # do not log pytest + assert SETTINGS['wandb'] is True # verify integration is enabled + import wandb as wb + + assert hasattr(wb, '__version__') # verify package is not directory + + import numpy as np + import pandas as pd + + _processed_plots = {} + +except (ImportError, AssertionError): + wb = None + + +def _custom_table(x, y, classes, title='Precision Recall Curve', x_title='Recall', y_title='Precision'): + """ + Create and log a custom metric visualization to wandb.plot.pr_curve. + + This function crafts a custom metric visualization that mimics the behavior of wandb's default precision-recall curve + while allowing for enhanced customization. The visual metric is useful for monitoring model performance across different classes. + + Args: + x (List): Values for the x-axis; expected to have length N. + y (List): Corresponding values for the y-axis; also expected to have length N. + classes (List): Labels identifying the class of each point; length N. + title (str, optional): Title for the plot; defaults to 'Precision Recall Curve'. + x_title (str, optional): Label for the x-axis; defaults to 'Recall'. + y_title (str, optional): Label for the y-axis; defaults to 'Precision'. + + Returns: + (wandb.Object): A wandb object suitable for logging, showcasing the crafted metric visualization. + """ + df = pd.DataFrame({'class': classes, 'y': y, 'x': x}).round(3) + fields = {'x': 'x', 'y': 'y', 'class': 'class'} + string_fields = {'title': title, 'x-axis-title': x_title, 'y-axis-title': y_title} + return wb.plot_table('wandb/area-under-curve/v0', + wb.Table(dataframe=df), + fields=fields, + string_fields=string_fields) + + +def _plot_curve(x, + y, + names=None, + id='precision-recall', + title='Precision Recall Curve', + x_title='Recall', + y_title='Precision', + num_x=100, + only_mean=False): + """ + Log a metric curve visualization. + + This function generates a metric curve based on input data and logs the visualization to wandb. + The curve can represent aggregated data (mean) or individual class data, depending on the 'only_mean' flag. + + Args: + x (np.ndarray): Data points for the x-axis with length N. + y (np.ndarray): Corresponding data points for the y-axis with shape CxN, where C represents the number of classes. + names (list, optional): Names of the classes corresponding to the y-axis data; length C. Defaults to an empty list. + id (str, optional): Unique identifier for the logged data in wandb. Defaults to 'precision-recall'. + title (str, optional): Title for the visualization plot. Defaults to 'Precision Recall Curve'. + x_title (str, optional): Label for the x-axis. Defaults to 'Recall'. + y_title (str, optional): Label for the y-axis. Defaults to 'Precision'. + num_x (int, optional): Number of interpolated data points for visualization. Defaults to 100. + only_mean (bool, optional): Flag to indicate if only the mean curve should be plotted. Defaults to True. + + Note: + The function leverages the '_custom_table' function to generate the actual visualization. + """ + # Create new x + if names is None: + names = [] + x_new = np.linspace(x[0], x[-1], num_x).round(5) + + # Create arrays for logging + x_log = x_new.tolist() + y_log = np.interp(x_new, x, np.mean(y, axis=0)).round(3).tolist() + + if only_mean: + table = wb.Table(data=list(zip(x_log, y_log)), columns=[x_title, y_title]) + wb.run.log({title: wb.plot.line(table, x_title, y_title, title=title)}) + else: + classes = ['mean'] * len(x_log) + for i, yi in enumerate(y): + x_log.extend(x_new) # add new x + y_log.extend(np.interp(x_new, x, yi)) # interpolate y to new x + classes.extend([names[i]] * len(x_new)) # add class names + wb.log({id: _custom_table(x_log, y_log, classes, title, x_title, y_title)}, commit=False) + + +def _log_plots(plots, step): + """Logs plots from the input dictionary if they haven't been logged already at the specified step.""" + for name, params in plots.items(): + timestamp = params['timestamp'] + if _processed_plots.get(name) != timestamp: + wb.run.log({name.stem: wb.Image(str(name))}, step=step) + _processed_plots[name] = timestamp + + +def on_pretrain_routine_start(trainer): + """Initiate and start project if module is present.""" + wb.run or wb.init(project=trainer.args.project or 'YOLOv8', name=trainer.args.name, config=vars(trainer.args)) + + +def on_fit_epoch_end(trainer): + """Logs training metrics and model information at the end of an epoch.""" + wb.run.log(trainer.metrics, step=trainer.epoch + 1) + _log_plots(trainer.plots, step=trainer.epoch + 1) + _log_plots(trainer.validator.plots, step=trainer.epoch + 1) + if trainer.epoch == 0: + wb.run.log(model_info_for_loggers(trainer), step=trainer.epoch + 1) + + +def on_train_epoch_end(trainer): + """Log metrics and save images at the end of each training epoch.""" + wb.run.log(trainer.label_loss_items(trainer.tloss, prefix='train'), step=trainer.epoch + 1) + wb.run.log(trainer.lr, step=trainer.epoch + 1) + if trainer.epoch == 1: + _log_plots(trainer.plots, step=trainer.epoch + 1) + + +def on_train_end(trainer): + """Save the best model as an artifact at end of training.""" + _log_plots(trainer.validator.plots, step=trainer.epoch + 1) + _log_plots(trainer.plots, step=trainer.epoch + 1) + art = wb.Artifact(type='model', name=f'run_{wb.run.id}_model') + if trainer.best.exists(): + art.add_file(trainer.best) + wb.run.log_artifact(art, aliases=['best']) + for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results): + x, y, x_title, y_title = curve_values + _plot_curve( + x, + y, + names=list(trainer.validator.metrics.names.values()), + id=f'curves/{curve_name}', + title=curve_name, + x_title=x_title, + y_title=y_title, + ) + wb.run.finish() # required or run continues on dashboard + + +callbacks = { + 'on_pretrain_routine_start': on_pretrain_routine_start, + 'on_train_epoch_end': on_train_epoch_end, + 'on_fit_epoch_end': on_fit_epoch_end, + 'on_train_end': on_train_end} if wb else {} diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py new file mode 100644 index 0000000..ed804ff --- /dev/null +++ b/ultralytics/utils/checks.py @@ -0,0 +1,676 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import contextlib +import glob +import inspect +import math +import os +import platform +import re +import shutil +import subprocess +import sys +import time +from importlib import metadata +from pathlib import Path +from typing import Optional + +import cv2 +import numpy as np +import requests +import torch +from matplotlib import font_manager + +from ultralytics.utils import (ASSETS, AUTOINSTALL, LINUX, LOGGER, ONLINE, ROOT, USER_CONFIG_DIR, SimpleNamespace, + ThreadingLocked, TryExcept, clean_url, colorstr, downloads, emojis, is_colab, is_docker, + is_github_action_running, is_jupyter, is_kaggle, is_online, is_pip_package, url2file) + + +def parse_requirements(file_path=ROOT.parent / 'requirements.txt', package=''): + """ + Parse a requirements.txt file, ignoring lines that start with '#' and any text after '#'. + + Args: + file_path (Path): Path to the requirements.txt file. + package (str, optional): Python package to use instead of requirements.txt file, i.e. package='ultralytics'. + + Returns: + (List[Dict[str, str]]): List of parsed requirements as dictionaries with `name` and `specifier` keys. + + Example: + ```python + from ultralytics.utils.checks import parse_requirements + + parse_requirements(package='ultralytics') + ``` + """ + + if package: + requires = [x for x in metadata.distribution(package).requires if 'extra == ' not in x] + else: + requires = Path(file_path).read_text().splitlines() + + requirements = [] + for line in requires: + line = line.strip() + if line and not line.startswith('#'): + line = line.split('#')[0].strip() # ignore inline comments + match = re.match(r'([a-zA-Z0-9-_]+)\s*([<>!=~]+.*)?', line) + if match: + requirements.append(SimpleNamespace(name=match[1], specifier=match[2].strip() if match[2] else '')) + + return requirements + + +def parse_version(version='0.0.0') -> tuple: + """ + Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version. This + function replaces deprecated 'pkg_resources.parse_version(v)'. + + Args: + version (str): Version string, i.e. '2.0.1+cpu' + + Returns: + (tuple): Tuple of integers representing the numeric part of the version and the extra string, i.e. (2, 0, 1) + """ + try: + return tuple(map(int, re.findall(r'\d+', version)[:3])) # '2.0.1+cpu' -> (2, 0, 1) + except Exception as e: + LOGGER.warning(f'WARNING ⚠️ failure for parse_version({version}), returning (0, 0, 0): {e}') + return 0, 0, 0 + + +def is_ascii(s) -> bool: + """ + Check if a string is composed of only ASCII characters. + + Args: + s (str): String to be checked. + + Returns: + bool: True if the string is composed only of ASCII characters, False otherwise. + """ + # Convert list, tuple, None, etc. to string + s = str(s) + + # Check if the string is composed of only ASCII characters + return all(ord(c) < 128 for c in s) + + +def check_imgsz(imgsz, stride=32, min_dim=1, max_dim=2, floor=0): + """ + Verify image size is a multiple of the given stride in each dimension. If the image size is not a multiple of the + stride, update it to the nearest multiple of the stride that is greater than or equal to the given floor value. + + Args: + imgsz (int | cList[int]): Image size. + stride (int): Stride value. + min_dim (int): Minimum number of dimensions. + max_dim (int): Maximum number of dimensions. + floor (int): Minimum allowed value for image size. + + Returns: + (List[int]): Updated image size. + """ + # Convert stride to integer if it is a tensor + stride = int(stride.max() if isinstance(stride, torch.Tensor) else stride) + + # Convert image size to list if it is an integer + if isinstance(imgsz, int): + imgsz = [imgsz] + elif isinstance(imgsz, (list, tuple)): + imgsz = list(imgsz) + else: + raise TypeError(f"'imgsz={imgsz}' is of invalid type {type(imgsz).__name__}. " + f"Valid imgsz types are int i.e. 'imgsz=640' or list i.e. 'imgsz=[640,640]'") + + # Apply max_dim + if len(imgsz) > max_dim: + msg = "'train' and 'val' imgsz must be an integer, while 'predict' and 'export' imgsz may be a [h, w] list " \ + "or an integer, i.e. 'yolo export imgsz=640,480' or 'yolo export imgsz=640'" + if max_dim != 1: + raise ValueError(f'imgsz={imgsz} is not a valid image size. {msg}') + LOGGER.warning(f"WARNING ⚠️ updating to 'imgsz={max(imgsz)}'. {msg}") + imgsz = [max(imgsz)] + # Make image size a multiple of the stride + sz = [max(math.ceil(x / stride) * stride, floor) for x in imgsz] + + # Print warning message if image size was updated + if sz != imgsz: + LOGGER.warning(f'WARNING ⚠️ imgsz={imgsz} must be multiple of max stride {stride}, updating to {sz}') + + # Add missing dimensions if necessary + sz = [sz[0], sz[0]] if min_dim == 2 and len(sz) == 1 else sz[0] if min_dim == 1 and len(sz) == 1 else sz + + return sz + + +def check_version(current: str = '0.0.0', + required: str = '0.0.0', + name: str = 'version', + hard: bool = False, + verbose: bool = False, + msg: str = '') -> bool: + """ + Check current version against the required version or range. + + Args: + current (str): Current version or package name to get version from. + required (str): Required version or range (in pip-style format). + name (str, optional): Name to be used in warning message. + hard (bool, optional): If True, raise an AssertionError if the requirement is not met. + verbose (bool, optional): If True, print warning message if requirement is not met. + msg (str, optional): Extra message to display if verbose. + + Returns: + (bool): True if requirement is met, False otherwise. + + Example: + ```python + # Check if current version is exactly 22.04 + check_version(current='22.04', required='==22.04') + + # Check if current version is greater than or equal to 22.04 + check_version(current='22.10', required='22.04') # assumes '>=' inequality if none passed + + # Check if current version is less than or equal to 22.04 + check_version(current='22.04', required='<=22.04') + + # Check if current version is between 20.04 (inclusive) and 22.04 (exclusive) + check_version(current='21.10', required='>20.04,<22.04') + ``` + """ + if not current: # if current is '' or None + LOGGER.warning(f'WARNING ⚠️ invalid check_version({current}, {required}) requested, please check values.') + return True + elif not current[0].isdigit(): # current is package name rather than version string, i.e. current='ultralytics' + try: + name = current # assigned package name to 'name' arg + current = metadata.version(current) # get version string from package name + except metadata.PackageNotFoundError: + if hard: + raise ModuleNotFoundError(emojis(f'WARNING ⚠️ {current} package is required but not installed')) + else: + return False + + if not required: # if required is '' or None + return True + + op = '' + version = '' + result = True + c = parse_version(current) # '1.2.3' -> (1, 2, 3) + for r in required.strip(',').split(','): + op, version = re.match(r'([^0-9]*)([\d.]+)', r).groups() # split '>=22.04' -> ('>=', '22.04') + v = parse_version(version) # '1.2.3' -> (1, 2, 3) + if op == '==' and c != v: + result = False + elif op == '!=' and c == v: + result = False + elif op in ('>=', '') and not (c >= v): # if no constraint passed assume '>=required' + result = False + elif op == '<=' and not (c <= v): + result = False + elif op == '>' and not (c > v): + result = False + elif op == '<' and not (c < v): + result = False + if not result: + warning = f'WARNING ⚠️ {name}{op}{version} is required, but {name}=={current} is currently installed {msg}' + if hard: + raise ModuleNotFoundError(emojis(warning)) # assert version requirements met + if verbose: + LOGGER.warning(warning) + return result + + +def check_latest_pypi_version(package_name='ultralytics'): + """ + Returns the latest version of a PyPI package without downloading or installing it. + + Parameters: + package_name (str): The name of the package to find the latest version for. + + Returns: + (str): The latest version of the package. + """ + with contextlib.suppress(Exception): + requests.packages.urllib3.disable_warnings() # Disable the InsecureRequestWarning + response = requests.get(f'https://pypi.org/pypi/{package_name}/json', timeout=3) + if response.status_code == 200: + return response.json()['info']['version'] + + +def check_pip_update_available(): + """ + Checks if a new version of the ultralytics package is available on PyPI. + + Returns: + (bool): True if an update is available, False otherwise. + """ + if ONLINE and is_pip_package(): + with contextlib.suppress(Exception): + from ultralytics import __version__ + latest = check_latest_pypi_version() + if check_version(__version__, f'<{latest}'): # check if current version is < latest version + LOGGER.info(f'New https://pypi.org/project/ultralytics/{latest} available 😃 ' + f"Update with 'pip install -U ultralytics'") + return True + return False + + +@ThreadingLocked() +def check_font(font='Arial.ttf'): + """ + Find font locally or download to user's configuration directory if it does not already exist. + + Args: + font (str): Path or name of font. + + Returns: + file (Path): Resolved font file path. + """ + name = Path(font).name + + # Check USER_CONFIG_DIR + file = USER_CONFIG_DIR / name + if file.exists(): + return file + + # Check system fonts + matches = [s for s in font_manager.findSystemFonts() if font in s] + if any(matches): + return matches[0] + + # Download to USER_CONFIG_DIR if missing + url = f'https://ultralytics.com/assets/{name}' + if downloads.is_url(url): + downloads.safe_download(url=url, file=file) + return file + + +def check_python(minimum: str = '3.8.0') -> bool: + """ + Check current python version against the required minimum version. + + Args: + minimum (str): Required minimum version of python. + + Returns: + None + """ + return check_version(platform.python_version(), minimum, name='Python ', hard=True) + + +@TryExcept() +def check_requirements(requirements=ROOT.parent / 'requirements.txt', exclude=(), install=True, cmds=''): + """ + Check if installed dependencies meet YOLOv8 requirements and attempt to auto-update if needed. + + Args: + requirements (Union[Path, str, List[str]]): Path to a requirements.txt file, a single package requirement as a + string, or a list of package requirements as strings. + exclude (Tuple[str]): Tuple of package names to exclude from checking. + install (bool): If True, attempt to auto-update packages that don't meet requirements. + cmds (str): Additional commands to pass to the pip install command when auto-updating. + + Example: + ```python + from ultralytics.utils.checks import check_requirements + + # Check a requirements.txt file + check_requirements('path/to/requirements.txt') + + # Check a single package + check_requirements('ultralytics>=8.0.0') + + # Check multiple packages + check_requirements(['numpy', 'ultralytics>=8.0.0']) + ``` + """ + + prefix = colorstr('red', 'bold', 'requirements:') + check_python() # check python version + check_torchvision() # check torch-torchvision compatibility + if isinstance(requirements, Path): # requirements.txt file + file = requirements.resolve() + assert file.exists(), f'{prefix} {file} not found, check failed.' + requirements = [f'{x.name}{x.specifier}' for x in parse_requirements(file) if x.name not in exclude] + elif isinstance(requirements, str): + requirements = [requirements] + + pkgs = [] + for r in requirements: + r_stripped = r.split('/')[-1].replace('.git', '') # replace git+https://org/repo.git -> 'repo' + match = re.match(r'([a-zA-Z0-9-_]+)([<>!=~]+.*)?', r_stripped) + name, required = match[1], match[2].strip() if match[2] else '' + try: + assert check_version(metadata.version(name), required) # exception if requirements not met + except (AssertionError, metadata.PackageNotFoundError): + pkgs.append(r) + + s = ' '.join(f'"{x}"' for x in pkgs) # console string + if s: + if install and AUTOINSTALL: # check environment variable + n = len(pkgs) # number of packages updates + LOGGER.info(f"{prefix} Ultralytics requirement{'s' * (n > 1)} {pkgs} not found, attempting AutoUpdate...") + try: + t = time.time() + assert is_online(), 'AutoUpdate skipped (offline)' + LOGGER.info(subprocess.check_output(f'pip install --no-cache {s} {cmds}', shell=True).decode()) + dt = time.time() - t + LOGGER.info( + f"{prefix} AutoUpdate success ✅ {dt:.1f}s, installed {n} package{'s' * (n > 1)}: {pkgs}\n" + f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n") + except Exception as e: + LOGGER.warning(f'{prefix} ❌ {e}') + return False + else: + return False + + return True + + +def check_torchvision(): + """ + Checks the installed versions of PyTorch and Torchvision to ensure they're compatible. + + This function checks the installed versions of PyTorch and Torchvision, and warns if they're incompatible according + to the provided compatibility table based on: + https://github.com/pytorch/vision#installation. + + The compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible + Torchvision versions. + """ + + import torchvision + + # Compatibility table + compatibility_table = {'2.0': ['0.15'], '1.13': ['0.14'], '1.12': ['0.13']} + + # Extract only the major and minor versions + v_torch = '.'.join(torch.__version__.split('+')[0].split('.')[:2]) + v_torchvision = '.'.join(torchvision.__version__.split('+')[0].split('.')[:2]) + + if v_torch in compatibility_table: + compatible_versions = compatibility_table[v_torch] + if all(v_torchvision != v for v in compatible_versions): + print(f'WARNING ⚠️ torchvision=={v_torchvision} is incompatible with torch=={v_torch}.\n' + f"Run 'pip install torchvision=={compatible_versions[0]}' to fix torchvision or " + "'pip install -U torch torchvision' to update both.\n" + 'For a full compatibility table see https://github.com/pytorch/vision#installation') + + +def check_suffix(file='yolov8n.pt', suffix='.pt', msg=''): + """Check file(s) for acceptable suffix.""" + if file and suffix: + if isinstance(suffix, str): + suffix = (suffix, ) + for f in file if isinstance(file, (list, tuple)) else [file]: + s = Path(f).suffix.lower().strip() # file suffix + if len(s): + assert s in suffix, f'{msg}{f} acceptable suffix is {suffix}, not {s}' + + +def check_yolov5u_filename(file: str, verbose: bool = True): + """Replace legacy YOLOv5 filenames with updated YOLOv5u filenames.""" + if 'yolov3' in file or 'yolov5' in file: + if 'u.yaml' in file: + file = file.replace('u.yaml', '.yaml') # i.e. yolov5nu.yaml -> yolov5n.yaml + elif '.pt' in file and 'u' not in file: + original_file = file + file = re.sub(r'(.*yolov5([nsmlx]))\.pt', '\\1u.pt', file) # i.e. yolov5n.pt -> yolov5nu.pt + file = re.sub(r'(.*yolov5([nsmlx])6)\.pt', '\\1u.pt', file) # i.e. yolov5n6.pt -> yolov5n6u.pt + file = re.sub(r'(.*yolov3(|-tiny|-spp))\.pt', '\\1u.pt', file) # i.e. yolov3-spp.pt -> yolov3-sppu.pt + if file != original_file and verbose: + LOGGER.info( + f"PRO TIP 💡 Replace 'model={original_file}' with new 'model={file}'.\nYOLOv5 'u' models are " + f'trained with https://github.com/ultralytics/ultralytics and feature improved performance vs ' + f'standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.\n') + return file + + +def check_model_file_from_stem(model='yolov8n'): + """Return a model filename from a valid model stem.""" + if model and not Path(model).suffix and Path(model).stem in downloads.GITHUB_ASSETS_STEMS: + return Path(model).with_suffix('.pt') # add suffix, i.e. yolov8n -> yolov8n.pt + else: + return model + + +def check_file(file, suffix='', download=True, hard=True): + """Search/download file (if necessary) and return path.""" + check_suffix(file, suffix) # optional + file = str(file).strip() # convert to string and strip spaces + file = check_yolov5u_filename(file) # yolov5n -> yolov5nu + if (not file or ('://' not in file and Path(file).exists()) or # '://' check required in Windows Python<3.10 + file.lower().startswith('grpc://')): # file exists or gRPC Triton images + return file + elif download and file.lower().startswith(('https://', 'http://', 'rtsp://', 'rtmp://', 'tcp://')): # download + url = file # warning: Pathlib turns :// -> :/ + file = url2file(file) # '%2F' to '/', split https://url.com/file.txt?auth + if Path(file).exists(): + LOGGER.info(f'Found {clean_url(url)} locally at {file}') # file already exists + else: + downloads.safe_download(url=url, file=file, unzip=False) + return file + else: # search + files = glob.glob(str(ROOT / 'cfg' / '**' / file), recursive=True) # find file + if not files and hard: + raise FileNotFoundError(f"'{file}' does not exist") + elif len(files) > 1 and hard: + raise FileNotFoundError(f"Multiple files match '{file}', specify exact path: {files}") + return files[0] if len(files) else [] # return file + + +def check_yaml(file, suffix=('.yaml', '.yml'), hard=True): + """Search/download YAML file (if necessary) and return path, checking suffix.""" + return check_file(file, suffix, hard=hard) + + +def check_is_path_safe(basedir, path): + """ + Check if the resolved path is under the intended directory to prevent path traversal. + + Args: + basedir (Path | str): The intended directory. + path (Path | str): The path to check. + + Returns: + (bool): True if the path is safe, False otherwise. + """ + base_dir_resolved = Path(basedir).resolve() + path_resolved = Path(path).resolve() + + return path_resolved.is_file() and path_resolved.parts[:len(base_dir_resolved.parts)] == base_dir_resolved.parts + + +def check_imshow(warn=False): + """Check if environment supports image displays.""" + try: + if LINUX: + assert 'DISPLAY' in os.environ and not is_docker() and not is_colab() and not is_kaggle() + cv2.imshow('test', np.zeros((8, 8, 3), dtype=np.uint8)) # show a small 8-pixel image + cv2.waitKey(1) + cv2.destroyAllWindows() + cv2.waitKey(1) + return True + except Exception as e: + if warn: + LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show()\n{e}') + return False + + +def check_yolo(verbose=True, device=''): + """Return a human-readable YOLO software and hardware summary.""" + import psutil + + from ultralytics.utils.torch_utils import select_device + + if is_jupyter(): + if check_requirements('wandb', install=False): + os.system('pip uninstall -y wandb') # uninstall wandb: unwanted account creation prompt with infinite hang + if is_colab(): + shutil.rmtree('sample_data', ignore_errors=True) # remove colab /sample_data directory + + if verbose: + # System info + gib = 1 << 30 # bytes per GiB + ram = psutil.virtual_memory().total + total, used, free = shutil.disk_usage('/') + s = f'({os.cpu_count()} CPUs, {ram / gib:.1f} GB RAM, {(total - free) / gib:.1f}/{total / gib:.1f} GB disk)' + with contextlib.suppress(Exception): # clear display if ipython is installed + from IPython import display + display.clear_output() + else: + s = '' + + select_device(device=device, newline=False) + LOGGER.info(f'Setup complete ✅ {s}') + + +def collect_system_info(): + """Collect and print relevant system information including OS, Python, RAM, CPU, and CUDA.""" + + import psutil + + from ultralytics.utils import ENVIRONMENT, is_git_dir + from ultralytics.utils.torch_utils import get_cpu_info + + ram_info = psutil.virtual_memory().total / (1024 ** 3) # Convert bytes to GB + check_yolo() + LOGGER.info(f"\n{'OS':<20}{platform.platform()}\n" + f"{'Environment':<20}{ENVIRONMENT}\n" + f"{'Python':<20}{sys.version.split()[0]}\n" + f"{'Install':<20}{'git' if is_git_dir() else 'pip' if is_pip_package() else 'other'}\n" + f"{'RAM':<20}{ram_info:.2f} GB\n" + f"{'CPU':<20}{get_cpu_info()}\n" + f"{'CUDA':<20}{torch.version.cuda if torch and torch.cuda.is_available() else None}\n") + + for r in parse_requirements(package='ultralytics'): + try: + current = metadata.version(r.name) + is_met = '✅ ' if check_version(current, str(r.specifier), hard=True) else '❌ ' + except metadata.PackageNotFoundError: + current = '(not installed)' + is_met = '❌ ' + LOGGER.info(f'{r.name:<20}{is_met}{current}{r.specifier}') + + if is_github_action_running(): + LOGGER.info(f"\nRUNNER_OS: {os.getenv('RUNNER_OS')}\n" + f"GITHUB_EVENT_NAME: {os.getenv('GITHUB_EVENT_NAME')}\n" + f"GITHUB_WORKFLOW: {os.getenv('GITHUB_WORKFLOW')}\n" + f"GITHUB_ACTOR: {os.getenv('GITHUB_ACTOR')}\n" + f"GITHUB_REPOSITORY: {os.getenv('GITHUB_REPOSITORY')}\n" + f"GITHUB_REPOSITORY_OWNER: {os.getenv('GITHUB_REPOSITORY_OWNER')}\n") + + +def check_amp(model): + """ + This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model. If the checks + fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP results, so AMP will + be disabled during training. + + Args: + model (nn.Module): A YOLOv8 model instance. + + Example: + ```python + from ultralytics import YOLO + from ultralytics.utils.checks import check_amp + + model = YOLO('yolov8n.pt').model.cuda() + check_amp(model) + ``` + + Returns: + (bool): Returns True if the AMP functionality works correctly with YOLOv8 model, else False. + """ + device = next(model.parameters()).device # get model device + if device.type in ('cpu', 'mps'): + return False # AMP only used on CUDA devices + + def amp_allclose(m, im): + """All close FP32 vs AMP results.""" + a = m(im, device=device, verbose=False)[0].boxes.data # FP32 inference + with torch.cuda.amp.autocast(True): + b = m(im, device=device, verbose=False)[0].boxes.data # AMP inference + del m + return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance + + im = ASSETS / 'bus.jpg' # image to check + prefix = colorstr('AMP: ') + LOGGER.info(f'{prefix}running Automatic Mixed Precision (AMP) checks with YOLOv8n...') + warning_msg = "Setting 'amp=True'. If you experience zero-mAP or NaN losses you can disable AMP with amp=False." + try: + from ultralytics import YOLO + assert amp_allclose(YOLO('yolov8n.pt'), im) + LOGGER.info(f'{prefix}checks passed ✅') + except ConnectionError: + LOGGER.warning(f'{prefix}checks skipped ⚠️, offline and unable to download YOLOv8n. {warning_msg}') + except (AttributeError, ModuleNotFoundError): + LOGGER.warning(f'{prefix}checks skipped ⚠️. ' + f'Unable to load YOLOv8n due to possible Ultralytics package modifications. {warning_msg}') + except AssertionError: + LOGGER.warning(f'{prefix}checks failed ❌. Anomalies were detected with AMP on your system that may lead to ' + f'NaN losses or zero-mAP results, so AMP will be disabled during training.') + return False + return True + + +def git_describe(path=ROOT): # path must be a directory + """Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe.""" + with contextlib.suppress(Exception): + return subprocess.check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1] + return '' + + +def print_args(args: Optional[dict] = None, show_file=True, show_func=False): + """Print function arguments (optional args dict).""" + + def strip_auth(v): + """Clean longer Ultralytics HUB URLs by stripping potential authentication information.""" + return clean_url(v) if (isinstance(v, str) and v.startswith('http') and len(v) > 100) else v + + x = inspect.currentframe().f_back # previous frame + file, _, func, _, _ = inspect.getframeinfo(x) + if args is None: # get args automatically + args, _, _, frm = inspect.getargvalues(x) + args = {k: v for k, v in frm.items() if k in args} + try: + file = Path(file).resolve().relative_to(ROOT).with_suffix('') + except ValueError: + file = Path(file).stem + s = (f'{file}: ' if show_file else '') + (f'{func}: ' if show_func else '') + LOGGER.info(colorstr(s) + ', '.join(f'{k}={strip_auth(v)}' for k, v in args.items())) + + +def cuda_device_count() -> int: + """ + Get the number of NVIDIA GPUs available in the environment. + + Returns: + (int): The number of NVIDIA GPUs available. + """ + try: + # Run the nvidia-smi command and capture its output + output = subprocess.check_output(['nvidia-smi', '--query-gpu=count', '--format=csv,noheader,nounits'], + encoding='utf-8') + + # Take the first line and strip any leading/trailing white space + first_line = output.strip().split('\n')[0] + + return int(first_line) + except (subprocess.CalledProcessError, FileNotFoundError, ValueError): + # If the command fails, nvidia-smi is not found, or output is not an integer, assume no GPUs are available + return 0 + + +def cuda_is_available() -> bool: + """ + Check if CUDA is available in the environment. + + Returns: + (bool): True if one or more NVIDIA GPUs are available, False otherwise. + """ + return cuda_device_count() > 0 diff --git a/ultralytics/utils/dist.py b/ultralytics/utils/dist.py new file mode 100644 index 0000000..b07204e --- /dev/null +++ b/ultralytics/utils/dist.py @@ -0,0 +1,68 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import os +import shutil +import socket +import sys +import tempfile + +from . import USER_CONFIG_DIR +from .torch_utils import TORCH_1_9 + + +def find_free_network_port() -> int: + """ + Finds a free port on localhost. + + It is useful in single-node training when we don't want to connect to a real main node but have to set the + `MASTER_PORT` environment variable. + """ + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(('127.0.0.1', 0)) + return s.getsockname()[1] # port + + +def generate_ddp_file(trainer): + """Generates a DDP file and returns its file name.""" + module, name = f'{trainer.__class__.__module__}.{trainer.__class__.__name__}'.rsplit('.', 1) + + content = f""" +# Ultralytics Multi-GPU training temp file (should be automatically deleted after use) +overrides = {vars(trainer.args)} + +if __name__ == "__main__": + from {module} import {name} + from ultralytics.utils import DEFAULT_CFG_DICT + + cfg = DEFAULT_CFG_DICT.copy() + cfg.update(save_dir='') # handle the extra key 'save_dir' + trainer = {name}(cfg=cfg, overrides=overrides) + results = trainer.train() +""" + (USER_CONFIG_DIR / 'DDP').mkdir(exist_ok=True) + with tempfile.NamedTemporaryFile(prefix='_temp_', + suffix=f'{id(trainer)}.py', + mode='w+', + encoding='utf-8', + dir=USER_CONFIG_DIR / 'DDP', + delete=False) as file: + file.write(content) + return file.name + + +def generate_ddp_command(world_size, trainer): + """Generates and returns command for distributed training.""" + import __main__ # noqa local import to avoid https://github.com/Lightning-AI/lightning/issues/15218 + if not trainer.resume: + shutil.rmtree(trainer.save_dir) # remove the save_dir + file = generate_ddp_file(trainer) + dist_cmd = 'torch.distributed.run' if TORCH_1_9 else 'torch.distributed.launch' + port = find_free_network_port() + cmd = [sys.executable, '-m', dist_cmd, '--nproc_per_node', f'{world_size}', '--master_port', f'{port}', file] + return cmd, file + + +def ddp_cleanup(trainer, file): + """Delete temp file if created.""" + if f'{id(trainer)}.py' in file: # if temp_file suffix in file + os.remove(file) diff --git a/ultralytics/utils/downloads.py b/ultralytics/utils/downloads.py new file mode 100644 index 0000000..e264744 --- /dev/null +++ b/ultralytics/utils/downloads.py @@ -0,0 +1,471 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import contextlib +import re +import shutil +import subprocess +from itertools import repeat +from multiprocessing.pool import ThreadPool +from pathlib import Path +from urllib import parse, request + +import requests +import torch + +from ultralytics.utils import LOGGER, TQDM, checks, clean_url, emojis, is_online, url2file + +# Define Ultralytics GitHub assets maintained at https://github.com/ultralytics/assets +GITHUB_ASSETS_REPO = 'ultralytics/assets' +GITHUB_ASSETS_NAMES = [f'yolov8{k}{suffix}.pt' for k in 'nsmlx' for suffix in ('', '-cls', '-seg', '-pose')] + \ + [f'yolov5{k}{resolution}u.pt' for k in 'nsmlx' for resolution in ('', '6')] + \ + [f'yolov3{k}u.pt' for k in ('', '-spp', '-tiny')] + \ + [f'yolo_nas_{k}.pt' for k in 'sml'] + \ + [f'sam_{k}.pt' for k in 'bl'] + \ + [f'FastSAM-{k}.pt' for k in 'sx'] + \ + [f'rtdetr-{k}.pt' for k in 'lx'] + \ + ['mobile_sam.pt'] +GITHUB_ASSETS_STEMS = [Path(k).stem for k in GITHUB_ASSETS_NAMES] + + +def is_url(url, check=True): + """ + Validates if the given string is a URL and optionally checks if the URL exists online. + + Args: + url (str): The string to be validated as a URL. + check (bool, optional): If True, performs an additional check to see if the URL exists online. + Defaults to True. + + Returns: + bool: Returns True if the string is a valid URL. If 'check' is True, also returns True if the URL exists online. + Returns False otherwise. + + Example: + ```python + valid = is_url("https://www.example.com") + ``` + """ + with contextlib.suppress(Exception): + url = str(url) + result = parse.urlparse(url) + assert all([result.scheme, result.netloc]) # check if is url + if check: + with request.urlopen(url) as response: + return response.getcode() == 200 # check if exists online + return True + return False + + +def delete_dsstore(path, files_to_delete=('.DS_Store', '__MACOSX')): + """ + Deletes all ".DS_store" files under a specified directory. + + Args: + path (str, optional): The directory path where the ".DS_store" files should be deleted. + files_to_delete (tuple): The files to be deleted. + + Example: + ```python + from ultralytics.utils.downloads import delete_dsstore + + delete_dsstore('path/to/dir') + ``` + + Note: + ".DS_store" files are created by the Apple operating system and contain metadata about folders and files. They + are hidden system files and can cause issues when transferring files between different operating systems. + """ + for file in files_to_delete: + matches = list(Path(path).rglob(file)) + LOGGER.info(f'Deleting {file} files: {matches}') + for f in matches: + f.unlink() + + +def zip_directory(directory, compress=True, exclude=('.DS_Store', '__MACOSX'), progress=True): + """ + Zips the contents of a directory, excluding files containing strings in the exclude list. The resulting zip file is + named after the directory and placed alongside it. + + Args: + directory (str | Path): The path to the directory to be zipped. + compress (bool): Whether to compress the files while zipping. Default is True. + exclude (tuple, optional): A tuple of filename strings to be excluded. Defaults to ('.DS_Store', '__MACOSX'). + progress (bool, optional): Whether to display a progress bar. Defaults to True. + + Returns: + (Path): The path to the resulting zip file. + + Example: + ```python + from ultralytics.utils.downloads import zip_directory + + file = zip_directory('path/to/dir') + ``` + """ + from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile + + delete_dsstore(directory) + directory = Path(directory) + if not directory.is_dir(): + raise FileNotFoundError(f"Directory '{directory}' does not exist.") + + # Unzip with progress bar + files_to_zip = [f for f in directory.rglob('*') if f.is_file() and all(x not in f.name for x in exclude)] + zip_file = directory.with_suffix('.zip') + compression = ZIP_DEFLATED if compress else ZIP_STORED + with ZipFile(zip_file, 'w', compression) as f: + for file in TQDM(files_to_zip, desc=f'Zipping {directory} to {zip_file}...', unit='file', disable=not progress): + f.write(file, file.relative_to(directory)) + + return zip_file # return path to zip file + + +def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX'), exist_ok=False, progress=True): + """ + Unzips a *.zip file to the specified path, excluding files containing strings in the exclude list. + + If the zipfile does not contain a single top-level directory, the function will create a new + directory with the same name as the zipfile (without the extension) to extract its contents. + If a path is not provided, the function will use the parent directory of the zipfile as the default path. + + Args: + file (str): The path to the zipfile to be extracted. + path (str, optional): The path to extract the zipfile to. Defaults to None. + exclude (tuple, optional): A tuple of filename strings to be excluded. Defaults to ('.DS_Store', '__MACOSX'). + exist_ok (bool, optional): Whether to overwrite existing contents if they exist. Defaults to False. + progress (bool, optional): Whether to display a progress bar. Defaults to True. + + Raises: + BadZipFile: If the provided file does not exist or is not a valid zipfile. + + Returns: + (Path): The path to the directory where the zipfile was extracted. + + Example: + ```python + from ultralytics.utils.downloads import unzip_file + + dir = unzip_file('path/to/file.zip') + ``` + """ + from zipfile import BadZipFile, ZipFile, is_zipfile + + if not (Path(file).exists() and is_zipfile(file)): + raise BadZipFile(f"File '{file}' does not exist or is a bad zip file.") + if path is None: + path = Path(file).parent # default path + + # Unzip the file contents + with ZipFile(file) as zipObj: + files = [f for f in zipObj.namelist() if all(x not in f for x in exclude)] + top_level_dirs = {Path(f).parts[0] for f in files} + + if len(top_level_dirs) > 1 or (len(files) > 1 and not files[0].endswith('/')): + # Zip has multiple files at top level + path = extract_path = Path(path) / Path(file).stem # i.e. ../datasets/coco8 + else: + # Zip has 1 top-level directory + extract_path = path # i.e. ../datasets + path = Path(path) / list(top_level_dirs)[0] # i.e. ../datasets/coco8 + + # Check if destination directory already exists and contains files + if path.exists() and any(path.iterdir()) and not exist_ok: + # If it exists and is not empty, return the path without unzipping + LOGGER.warning(f'WARNING ⚠️ Skipping {file} unzip as destination directory {path} is not empty.') + return path + + for f in TQDM(files, desc=f'Unzipping {file} to {Path(path).resolve()}...', unit='file', disable=not progress): + # Ensure the file is within the extract_path to avoid path traversal security vulnerability + if '..' in Path(f).parts: + LOGGER.warning(f'Potentially insecure file path: {f}, skipping extraction.') + continue + zipObj.extract(f, extract_path) + + return path # return unzip dir + + +def check_disk_space(url='https://ultralytics.com/assets/coco128.zip', sf=1.5, hard=True): + """ + Check if there is sufficient disk space to download and store a file. + + Args: + url (str, optional): The URL to the file. Defaults to 'https://ultralytics.com/assets/coco128.zip'. + sf (float, optional): Safety factor, the multiplier for the required free space. Defaults to 2.0. + hard (bool, optional): Whether to throw an error or not on insufficient disk space. Defaults to True. + + Returns: + (bool): True if there is sufficient disk space, False otherwise. + """ + try: + r = requests.head(url) # response + assert r.status_code < 400, f'URL error for {url}: {r.status_code} {r.reason}' # check response + except Exception: + return True # requests issue, default to True + + # Check file size + gib = 1 << 30 # bytes per GiB + data = int(r.headers.get('Content-Length', 0)) / gib # file size (GB) + total, used, free = (x / gib for x in shutil.disk_usage('/')) # bytes + if data * sf < free: + return True # sufficient space + + # Insufficient space + text = (f'WARNING ⚠️ Insufficient free disk space {free:.1f} GB < {data * sf:.3f} GB required, ' + f'Please free {data * sf - free:.1f} GB additional disk space and try again.') + if hard: + raise MemoryError(text) + LOGGER.warning(text) + return False + + +def get_google_drive_file_info(link): + """ + Retrieves the direct download link and filename for a shareable Google Drive file link. + + Args: + link (str): The shareable link of the Google Drive file. + + Returns: + (str): Direct download URL for the Google Drive file. + (str): Original filename of the Google Drive file. If filename extraction fails, returns None. + + Example: + ```python + from ultralytics.utils.downloads import get_google_drive_file_info + + link = "https://drive.google.com/file/d/1cqT-cJgANNrhIHCrEufUYhQ4RqiWG_lJ/view?usp=drive_link" + url, filename = get_google_drive_file_info(link) + ``` + """ + file_id = link.split('/d/')[1].split('/view')[0] + drive_url = f'https://drive.google.com/uc?export=download&id={file_id}' + filename = None + + # Start session + with requests.Session() as session: + response = session.get(drive_url, stream=True) + if 'quota exceeded' in str(response.content.lower()): + raise ConnectionError( + emojis(f'❌ Google Drive file download quota exceeded. ' + f'Please try again later or download this file manually at {link}.')) + for k, v in response.cookies.items(): + if k.startswith('download_warning'): + drive_url += f'&confirm={v}' # v is token + cd = response.headers.get('content-disposition') + if cd: + filename = re.findall('filename="(.+)"', cd)[0] + return drive_url, filename + + +def safe_download(url, + file=None, + dir=None, + unzip=True, + delete=False, + curl=False, + retry=3, + min_bytes=1E0, + progress=True): + """ + Downloads files from a URL, with options for retrying, unzipping, and deleting the downloaded file. + + Args: + url (str): The URL of the file to be downloaded. + file (str, optional): The filename of the downloaded file. + If not provided, the file will be saved with the same name as the URL. + dir (str, optional): The directory to save the downloaded file. + If not provided, the file will be saved in the current working directory. + unzip (bool, optional): Whether to unzip the downloaded file. Default: True. + delete (bool, optional): Whether to delete the downloaded file after unzipping. Default: False. + curl (bool, optional): Whether to use curl command line tool for downloading. Default: False. + retry (int, optional): The number of times to retry the download in case of failure. Default: 3. + min_bytes (float, optional): The minimum number of bytes that the downloaded file should have, to be considered + a successful download. Default: 1E0. + progress (bool, optional): Whether to display a progress bar during the download. Default: True. + + Example: + ```python + from ultralytics.utils.downloads import safe_download + + link = "https://ultralytics.com/assets/bus.jpg" + path = safe_download(link) + ``` + """ + gdrive = url.startswith('https://drive.google.com/') # check if the URL is a Google Drive link + if gdrive: + url, file = get_google_drive_file_info(url) + + f = Path(dir or '.') / (file or url2file(url)) # URL converted to filename + if '://' not in str(url) and Path(url).is_file(): # URL exists ('://' check required in Windows Python<3.10) + f = Path(url) # filename + elif not f.is_file(): # URL and file do not exist + desc = f"Downloading {url if gdrive else clean_url(url)} to '{f}'" + LOGGER.info(f'{desc}...') + f.parent.mkdir(parents=True, exist_ok=True) # make directory if missing + check_disk_space(url) + for i in range(retry + 1): + try: + if curl or i > 0: # curl download with retry, continue + s = 'sS' * (not progress) # silent + r = subprocess.run(['curl', '-#', f'-{s}L', url, '-o', f, '--retry', '3', '-C', '-']).returncode + assert r == 0, f'Curl return value {r}' + else: # urllib download + method = 'torch' + if method == 'torch': + torch.hub.download_url_to_file(url, f, progress=progress) + else: + with request.urlopen(url) as response, TQDM(total=int(response.getheader('Content-Length', 0)), + desc=desc, + disable=not progress, + unit='B', + unit_scale=True, + unit_divisor=1024) as pbar: + with open(f, 'wb') as f_opened: + for data in response: + f_opened.write(data) + pbar.update(len(data)) + + if f.exists(): + if f.stat().st_size > min_bytes: + break # success + f.unlink() # remove partial downloads + except Exception as e: + if i == 0 and not is_online(): + raise ConnectionError(emojis(f'❌ Download failure for {url}. Environment is not online.')) from e + elif i >= retry: + raise ConnectionError(emojis(f'❌ Download failure for {url}. Retry limit reached.')) from e + LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...') + + if unzip and f.exists() and f.suffix in ('', '.zip', '.tar', '.gz'): + from zipfile import is_zipfile + + unzip_dir = (dir or f.parent).resolve() # unzip to dir if provided else unzip in place + if is_zipfile(f): + unzip_dir = unzip_file(file=f, path=unzip_dir, progress=progress) # unzip + elif f.suffix in ('.tar', '.gz'): + LOGGER.info(f'Unzipping {f} to {unzip_dir}...') + subprocess.run(['tar', 'xf' if f.suffix == '.tar' else 'xfz', f, '--directory', unzip_dir], check=True) + if delete: + f.unlink() # remove zip + return unzip_dir + + +def get_github_assets(repo='ultralytics/assets', version='latest', retry=False): + """ + Retrieve the specified version's tag and assets from a GitHub repository. If the version is not specified, the + function fetches the latest release assets. + + Args: + repo (str, optional): The GitHub repository in the format 'owner/repo'. Defaults to 'ultralytics/assets'. + version (str, optional): The release version to fetch assets from. Defaults to 'latest'. + retry (bool, optional): Flag to retry the request in case of a failure. Defaults to False. + + Returns: + tuple: A tuple containing the release tag and a list of asset names. + + Example: + ```python + tag, assets = get_github_assets(repo='ultralytics/assets', version='latest') + ``` + """ + + if version != 'latest': + version = f'tags/{version}' # i.e. tags/v6.2 + url = f'https://api.github.com/repos/{repo}/releases/{version}' + r = requests.get(url) # github api + if r.status_code != 200 and r.reason != 'rate limit exceeded' and retry: # failed and not 403 rate limit exceeded + r = requests.get(url) # try again + if r.status_code != 200: + LOGGER.warning(f'⚠️ GitHub assets check failure for {url}: {r.status_code} {r.reason}') + return '', [] + data = r.json() + return data['tag_name'], [x['name'] for x in data['assets']] # tag, assets i.e. ['yolov8n.pt', 'yolov8s.pt', ...] + + +def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0', **kwargs): + """ + Attempt to download a file from GitHub release assets if it is not found locally. The function checks for the file + locally first, then tries to download it from the specified GitHub repository release. + + Args: + file (str | Path): The filename or file path to be downloaded. + repo (str, optional): The GitHub repository in the format 'owner/repo'. Defaults to 'ultralytics/assets'. + release (str, optional): The specific release version to be downloaded. Defaults to 'v0.0.0'. + **kwargs: Additional keyword arguments for the download process. + + Returns: + str: The path to the downloaded file. + + Example: + ```python + file_path = attempt_download_asset('yolov5s.pt', repo='ultralytics/assets', release='latest') + ``` + """ + from ultralytics.utils import SETTINGS # scoped for circular import + + # YOLOv3/5u updates + file = str(file) + file = checks.check_yolov5u_filename(file) + file = Path(file.strip().replace("'", '')) + if file.exists(): + return str(file) + elif (SETTINGS['weights_dir'] / file).exists(): + return str(SETTINGS['weights_dir'] / file) + else: + # URL specified + name = Path(parse.unquote(str(file))).name # decode '%2F' to '/' etc. + download_url = f'https://github.com/{repo}/releases/download' + if str(file).startswith(('http:/', 'https:/')): # download + url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ + file = url2file(name) # parse authentication https://url.com/file.txt?auth... + if Path(file).is_file(): + LOGGER.info(f'Found {clean_url(url)} locally at {file}') # file already exists + else: + safe_download(url=url, file=file, min_bytes=1E5, **kwargs) + + elif repo == GITHUB_ASSETS_REPO and name in GITHUB_ASSETS_NAMES: + safe_download(url=f'{download_url}/{release}/{name}', file=file, min_bytes=1E5, **kwargs) + + else: + tag, assets = get_github_assets(repo, release) + if not assets: + tag, assets = get_github_assets(repo) # latest release + if name in assets: + safe_download(url=f'{download_url}/{tag}/{name}', file=file, min_bytes=1E5, **kwargs) + + return str(file) + + +def download(url, dir=Path.cwd(), unzip=True, delete=False, curl=False, threads=1, retry=3): + """ + Downloads files from specified URLs to a given directory. Supports concurrent downloads if multiple threads are + specified. + + Args: + url (str | list): The URL or list of URLs of the files to be downloaded. + dir (Path, optional): The directory where the files will be saved. Defaults to the current working directory. + unzip (bool, optional): Flag to unzip the files after downloading. Defaults to True. + delete (bool, optional): Flag to delete the zip files after extraction. Defaults to False. + curl (bool, optional): Flag to use curl for downloading. Defaults to False. + threads (int, optional): Number of threads to use for concurrent downloads. Defaults to 1. + retry (int, optional): Number of retries in case of download failure. Defaults to 3. + + Example: + ```python + download('https://ultralytics.com/assets/example.zip', dir='path/to/dir', unzip=True) + ``` + """ + dir = Path(dir) + dir.mkdir(parents=True, exist_ok=True) # make directory + if threads > 1: + with ThreadPool(threads) as pool: + pool.map( + lambda x: safe_download( + url=x[0], dir=x[1], unzip=unzip, delete=delete, curl=curl, retry=retry, progress=threads <= 1), + zip(url, repeat(dir))) + pool.close() + pool.join() + else: + for u in [url] if isinstance(url, (str, Path)) else url: + safe_download(url=u, dir=dir, unzip=unzip, delete=delete, curl=curl, retry=retry) diff --git a/ultralytics/utils/errors.py b/ultralytics/utils/errors.py new file mode 100644 index 0000000..745ca0a --- /dev/null +++ b/ultralytics/utils/errors.py @@ -0,0 +1,22 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from ultralytics.utils import emojis + + +class HUBModelError(Exception): + """ + Custom exception class for handling errors related to model fetching in Ultralytics YOLO. + + This exception is raised when a requested model is not found or cannot be retrieved. + The message is also processed to include emojis for better user experience. + + Attributes: + message (str): The error message displayed when the exception is raised. + + Note: + The message is automatically processed through the 'emojis' function from the 'ultralytics.utils' package. + """ + + def __init__(self, message='Model not found. Please check model URL and try again.'): + """Create an exception for when a model is not found.""" + super().__init__(emojis(message)) diff --git a/ultralytics/utils/files.py b/ultralytics/utils/files.py new file mode 100644 index 0000000..9fa1488 --- /dev/null +++ b/ultralytics/utils/files.py @@ -0,0 +1,147 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import contextlib +import glob +import os +import shutil +import tempfile +from contextlib import contextmanager +from datetime import datetime +from pathlib import Path + + +class WorkingDirectory(contextlib.ContextDecorator): + """Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager.""" + + def __init__(self, new_dir): + """Sets the working directory to 'new_dir' upon instantiation.""" + self.dir = new_dir # new dir + self.cwd = Path.cwd().resolve() # current dir + + def __enter__(self): + """Changes the current directory to the specified directory.""" + os.chdir(self.dir) + + def __exit__(self, exc_type, exc_val, exc_tb): # noqa + """Restore the current working directory on context exit.""" + os.chdir(self.cwd) + + +@contextmanager +def spaces_in_path(path): + """ + Context manager to handle paths with spaces in their names. If a path contains spaces, it replaces them with + underscores, copies the file/directory to the new path, executes the context code block, then copies the + file/directory back to its original location. + + Args: + path (str | Path): The original path. + + Yields: + (Path): Temporary path with spaces replaced by underscores if spaces were present, otherwise the original path. + + Example: + ```python + with ultralytics.utils.files import spaces_in_path + + with spaces_in_path('/path/with spaces') as new_path: + # Your code here + ``` + """ + + # If path has spaces, replace them with underscores + if ' ' in str(path): + string = isinstance(path, str) # input type + path = Path(path) + + # Create a temporary directory and construct the new path + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = Path(tmp_dir) / path.name.replace(' ', '_') + + # Copy file/directory + if path.is_dir(): + # tmp_path.mkdir(parents=True, exist_ok=True) + shutil.copytree(path, tmp_path) + elif path.is_file(): + tmp_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(path, tmp_path) + + try: + # Yield the temporary path + yield str(tmp_path) if string else tmp_path + + finally: + # Copy file/directory back + if tmp_path.is_dir(): + shutil.copytree(tmp_path, path, dirs_exist_ok=True) + elif tmp_path.is_file(): + shutil.copy2(tmp_path, path) # Copy back the file + + else: + # If there are no spaces, just yield the original path + yield path + + +def increment_path(path, exist_ok=False, sep='', mkdir=False): + """ + Increments a file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc. + + If the path exists and exist_ok is not set to True, the path will be incremented by appending a number and sep to + the end of the path. If the path is a file, the file extension will be preserved. If the path is a directory, the + number will be appended directly to the end of the path. If mkdir is set to True, the path will be created as a + directory if it does not already exist. + + Args: + path (str, pathlib.Path): Path to increment. + exist_ok (bool, optional): If True, the path will not be incremented and returned as-is. Defaults to False. + sep (str, optional): Separator to use between the path and the incrementation number. Defaults to ''. + mkdir (bool, optional): Create a directory if it does not exist. Defaults to False. + + Returns: + (pathlib.Path): Incremented path. + """ + path = Path(path) # os-agnostic + if path.exists() and not exist_ok: + path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '') + + # Method 1 + for n in range(2, 9999): + p = f'{path}{sep}{n}{suffix}' # increment path + if not os.path.exists(p): + break + path = Path(p) + + if mkdir: + path.mkdir(parents=True, exist_ok=True) # make directory + + return path + + +def file_age(path=__file__): + """Return days since last file update.""" + dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta + return dt.days # + dt.seconds / 86400 # fractional days + + +def file_date(path=__file__): + """Return human-readable file modification date, i.e. '2021-3-26'.""" + t = datetime.fromtimestamp(Path(path).stat().st_mtime) + return f'{t.year}-{t.month}-{t.day}' + + +def file_size(path): + """Return file/dir size (MB).""" + if isinstance(path, (str, Path)): + mb = 1 << 20 # bytes to MiB (1024 ** 2) + path = Path(path) + if path.is_file(): + return path.stat().st_size / mb + elif path.is_dir(): + return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb + return 0.0 + + +def get_latest_run(search_dir='.'): + """Return path to most recent 'last.pt' in /runs (i.e. to --resume from).""" + last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) + return max(last_list, key=os.path.getctime) if last_list else '' diff --git a/ultralytics/utils/instance.py b/ultralytics/utils/instance.py new file mode 100644 index 0000000..7df1453 --- /dev/null +++ b/ultralytics/utils/instance.py @@ -0,0 +1,417 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from collections import abc +from itertools import repeat +from numbers import Number +from typing import List + +import numpy as np + +from .ops import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh + + +def _ntuple(n): + """From PyTorch internals.""" + + def parse(x): + """Parse bounding boxes format between XYWH and LTWH.""" + return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n)) + + return parse + + +to_2tuple = _ntuple(2) +to_4tuple = _ntuple(4) + +# `xyxy` means left top and right bottom +# `xywh` means center x, center y and width, height(YOLO format) +# `ltwh` means left top and width, height(COCO format) +_formats = ['xyxy', 'xywh', 'ltwh'] + +__all__ = 'Bboxes', # tuple or list + + +class Bboxes: + """ + A class for handling bounding boxes. + + The class supports various bounding box formats like 'xyxy', 'xywh', and 'ltwh'. + Bounding box data should be provided in numpy arrays. + + Attributes: + bboxes (numpy.ndarray): The bounding boxes stored in a 2D numpy array. + format (str): The format of the bounding boxes ('xyxy', 'xywh', or 'ltwh'). + + Note: + This class does not handle normalization or denormalization of bounding boxes. + """ + + def __init__(self, bboxes, format='xyxy') -> None: + """Initializes the Bboxes class with bounding box data in a specified format.""" + assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}' + bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes + assert bboxes.ndim == 2 + assert bboxes.shape[1] == 4 + self.bboxes = bboxes + self.format = format + # self.normalized = normalized + + def convert(self, format): + """Converts bounding box format from one type to another.""" + assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}' + if self.format == format: + return + elif self.format == 'xyxy': + func = xyxy2xywh if format == 'xywh' else xyxy2ltwh + elif self.format == 'xywh': + func = xywh2xyxy if format == 'xyxy' else xywh2ltwh + else: + func = ltwh2xyxy if format == 'xyxy' else ltwh2xywh + self.bboxes = func(self.bboxes) + self.format = format + + def areas(self): + """Return box areas.""" + self.convert('xyxy') + return (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1]) + + # def denormalize(self, w, h): + # if not self.normalized: + # return + # assert (self.bboxes <= 1.0).all() + # self.bboxes[:, 0::2] *= w + # self.bboxes[:, 1::2] *= h + # self.normalized = False + # + # def normalize(self, w, h): + # if self.normalized: + # return + # assert (self.bboxes > 1.0).any() + # self.bboxes[:, 0::2] /= w + # self.bboxes[:, 1::2] /= h + # self.normalized = True + + def mul(self, scale): + """ + Args: + scale (tuple | list | int): the scale for four coords. + """ + if isinstance(scale, Number): + scale = to_4tuple(scale) + assert isinstance(scale, (tuple, list)) + assert len(scale) == 4 + self.bboxes[:, 0] *= scale[0] + self.bboxes[:, 1] *= scale[1] + self.bboxes[:, 2] *= scale[2] + self.bboxes[:, 3] *= scale[3] + + def add(self, offset): + """ + Args: + offset (tuple | list | int): the offset for four coords. + """ + if isinstance(offset, Number): + offset = to_4tuple(offset) + assert isinstance(offset, (tuple, list)) + assert len(offset) == 4 + self.bboxes[:, 0] += offset[0] + self.bboxes[:, 1] += offset[1] + self.bboxes[:, 2] += offset[2] + self.bboxes[:, 3] += offset[3] + + def __len__(self): + """Return the number of boxes.""" + return len(self.bboxes) + + @classmethod + def concatenate(cls, boxes_list: List['Bboxes'], axis=0) -> 'Bboxes': + """ + Concatenate a list of Bboxes objects into a single Bboxes object. + + Args: + boxes_list (List[Bboxes]): A list of Bboxes objects to concatenate. + axis (int, optional): The axis along which to concatenate the bounding boxes. + Defaults to 0. + + Returns: + Bboxes: A new Bboxes object containing the concatenated bounding boxes. + + Note: + The input should be a list or tuple of Bboxes objects. + """ + assert isinstance(boxes_list, (list, tuple)) + if not boxes_list: + return cls(np.empty(0)) + assert all(isinstance(box, Bboxes) for box in boxes_list) + + if len(boxes_list) == 1: + return boxes_list[0] + return cls(np.concatenate([b.bboxes for b in boxes_list], axis=axis)) + + def __getitem__(self, index) -> 'Bboxes': + """ + Retrieve a specific bounding box or a set of bounding boxes using indexing. + + Args: + index (int, slice, or np.ndarray): The index, slice, or boolean array to select + the desired bounding boxes. + + Returns: + Bboxes: A new Bboxes object containing the selected bounding boxes. + + Raises: + AssertionError: If the indexed bounding boxes do not form a 2-dimensional matrix. + + Note: + When using boolean indexing, make sure to provide a boolean array with the same + length as the number of bounding boxes. + """ + if isinstance(index, int): + return Bboxes(self.bboxes[index].view(1, -1)) + b = self.bboxes[index] + assert b.ndim == 2, f'Indexing on Bboxes with {index} failed to return a matrix!' + return Bboxes(b) + + +class Instances: + """ + Container for bounding boxes, segments, and keypoints of detected objects in an image. + + Attributes: + _bboxes (Bboxes): Internal object for handling bounding box operations. + keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3]. Default is None. + normalized (bool): Flag indicating whether the bounding box coordinates are normalized. + segments (ndarray): Segments array with shape [N, 1000, 2] after resampling. + + Args: + bboxes (ndarray): An array of bounding boxes with shape [N, 4]. + segments (list | ndarray, optional): A list or array of object segments. Default is None. + keypoints (ndarray, optional): An array of keypoints with shape [N, 17, 3]. Default is None. + bbox_format (str, optional): The format of bounding boxes ('xywh' or 'xyxy'). Default is 'xywh'. + normalized (bool, optional): Whether the bounding box coordinates are normalized. Default is True. + + Examples: + ```python + # Create an Instances object + instances = Instances( + bboxes=np.array([[10, 10, 30, 30], [20, 20, 40, 40]]), + segments=[np.array([[5, 5], [10, 10]]), np.array([[15, 15], [20, 20]])], + keypoints=np.array([[[5, 5, 1], [10, 10, 1]], [[15, 15, 1], [20, 20, 1]]]) + ) + ``` + + Note: + The bounding box format is either 'xywh' or 'xyxy', and is determined by the `bbox_format` argument. + This class does not perform input validation, and it assumes the inputs are well-formed. + """ + + def __init__(self, bboxes, segments=None, keypoints=None, bbox_format='xywh', normalized=True) -> None: + """ + Args: + bboxes (ndarray): bboxes with shape [N, 4]. + segments (list | ndarray): segments. + keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3]. + """ + if segments is None: + segments = [] + self._bboxes = Bboxes(bboxes=bboxes, format=bbox_format) + self.keypoints = keypoints + self.normalized = normalized + + if len(segments) > 0: + # List[np.array(1000, 2)] * num_samples + segments = resample_segments(segments) + # (N, 1000, 2) + segments = np.stack(segments, axis=0) + else: + segments = np.zeros((0, 1000, 2), dtype=np.float32) + self.segments = segments + + def convert_bbox(self, format): + """Convert bounding box format.""" + self._bboxes.convert(format=format) + + @property + def bbox_areas(self): + """Calculate the area of bounding boxes.""" + return self._bboxes.areas() + + def scale(self, scale_w, scale_h, bbox_only=False): + """This might be similar with denormalize func but without normalized sign.""" + self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h)) + if bbox_only: + return + self.segments[..., 0] *= scale_w + self.segments[..., 1] *= scale_h + if self.keypoints is not None: + self.keypoints[..., 0] *= scale_w + self.keypoints[..., 1] *= scale_h + + def denormalize(self, w, h): + """Denormalizes boxes, segments, and keypoints from normalized coordinates.""" + if not self.normalized: + return + self._bboxes.mul(scale=(w, h, w, h)) + self.segments[..., 0] *= w + self.segments[..., 1] *= h + if self.keypoints is not None: + self.keypoints[..., 0] *= w + self.keypoints[..., 1] *= h + self.normalized = False + + def normalize(self, w, h): + """Normalize bounding boxes, segments, and keypoints to image dimensions.""" + if self.normalized: + return + self._bboxes.mul(scale=(1 / w, 1 / h, 1 / w, 1 / h)) + self.segments[..., 0] /= w + self.segments[..., 1] /= h + if self.keypoints is not None: + self.keypoints[..., 0] /= w + self.keypoints[..., 1] /= h + self.normalized = True + + def add_padding(self, padw, padh): + """Handle rect and mosaic situation.""" + assert not self.normalized, 'you should add padding with absolute coordinates.' + self._bboxes.add(offset=(padw, padh, padw, padh)) + self.segments[..., 0] += padw + self.segments[..., 1] += padh + if self.keypoints is not None: + self.keypoints[..., 0] += padw + self.keypoints[..., 1] += padh + + def __getitem__(self, index) -> 'Instances': + """ + Retrieve a specific instance or a set of instances using indexing. + + Args: + index (int, slice, or np.ndarray): The index, slice, or boolean array to select + the desired instances. + + Returns: + Instances: A new Instances object containing the selected bounding boxes, + segments, and keypoints if present. + + Note: + When using boolean indexing, make sure to provide a boolean array with the same + length as the number of instances. + """ + segments = self.segments[index] if len(self.segments) else self.segments + keypoints = self.keypoints[index] if self.keypoints is not None else None + bboxes = self.bboxes[index] + bbox_format = self._bboxes.format + return Instances( + bboxes=bboxes, + segments=segments, + keypoints=keypoints, + bbox_format=bbox_format, + normalized=self.normalized, + ) + + def flipud(self, h): + """Flips the coordinates of bounding boxes, segments, and keypoints vertically.""" + if self._bboxes.format == 'xyxy': + y1 = self.bboxes[:, 1].copy() + y2 = self.bboxes[:, 3].copy() + self.bboxes[:, 1] = h - y2 + self.bboxes[:, 3] = h - y1 + else: + self.bboxes[:, 1] = h - self.bboxes[:, 1] + self.segments[..., 1] = h - self.segments[..., 1] + if self.keypoints is not None: + self.keypoints[..., 1] = h - self.keypoints[..., 1] + + def fliplr(self, w): + """Reverses the order of the bounding boxes and segments horizontally.""" + if self._bboxes.format == 'xyxy': + x1 = self.bboxes[:, 0].copy() + x2 = self.bboxes[:, 2].copy() + self.bboxes[:, 0] = w - x2 + self.bboxes[:, 2] = w - x1 + else: + self.bboxes[:, 0] = w - self.bboxes[:, 0] + self.segments[..., 0] = w - self.segments[..., 0] + if self.keypoints is not None: + self.keypoints[..., 0] = w - self.keypoints[..., 0] + + def clip(self, w, h): + """Clips bounding boxes, segments, and keypoints values to stay within image boundaries.""" + ori_format = self._bboxes.format + self.convert_bbox(format='xyxy') + self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w) + self.bboxes[:, [1, 3]] = self.bboxes[:, [1, 3]].clip(0, h) + if ori_format != 'xyxy': + self.convert_bbox(format=ori_format) + self.segments[..., 0] = self.segments[..., 0].clip(0, w) + self.segments[..., 1] = self.segments[..., 1].clip(0, h) + if self.keypoints is not None: + self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w) + self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h) + + def remove_zero_area_boxes(self): + """ + Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. + + This removes them. + """ + good = self.bbox_areas > 0 + if not all(good): + self._bboxes = self._bboxes[good] + if len(self.segments): + self.segments = self.segments[good] + if self.keypoints is not None: + self.keypoints = self.keypoints[good] + return good + + def update(self, bboxes, segments=None, keypoints=None): + """Updates instance variables.""" + self._bboxes = Bboxes(bboxes, format=self._bboxes.format) + if segments is not None: + self.segments = segments + if keypoints is not None: + self.keypoints = keypoints + + def __len__(self): + """Return the length of the instance list.""" + return len(self.bboxes) + + @classmethod + def concatenate(cls, instances_list: List['Instances'], axis=0) -> 'Instances': + """ + Concatenates a list of Instances objects into a single Instances object. + + Args: + instances_list (List[Instances]): A list of Instances objects to concatenate. + axis (int, optional): The axis along which the arrays will be concatenated. Defaults to 0. + + Returns: + Instances: A new Instances object containing the concatenated bounding boxes, + segments, and keypoints if present. + + Note: + The `Instances` objects in the list should have the same properties, such as + the format of the bounding boxes, whether keypoints are present, and if the + coordinates are normalized. + """ + assert isinstance(instances_list, (list, tuple)) + if not instances_list: + return cls(np.empty(0)) + assert all(isinstance(instance, Instances) for instance in instances_list) + + if len(instances_list) == 1: + return instances_list[0] + + use_keypoint = instances_list[0].keypoints is not None + bbox_format = instances_list[0]._bboxes.format + normalized = instances_list[0].normalized + + cat_boxes = np.concatenate([ins.bboxes for ins in instances_list], axis=axis) + cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis) + cat_keypoints = np.concatenate([b.keypoints for b in instances_list], axis=axis) if use_keypoint else None + return cls(cat_boxes, cat_segments, cat_keypoints, bbox_format, normalized) + + @property + def bboxes(self): + """Return bounding boxes.""" + return self._bboxes.bboxes diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py new file mode 100644 index 0000000..da2e584 --- /dev/null +++ b/ultralytics/utils/loss.py @@ -0,0 +1,528 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ultralytics.utils.metrics import OKS_SIGMA +from ultralytics.utils.ops import crop_mask, xywh2xyxy, xyxy2xywh +from ultralytics.utils.tal import TaskAlignedAssigner, dist2bbox, make_anchors + +from .metrics import bbox_iou +from .tal import bbox2dist + + +class VarifocalLoss(nn.Module): + """ + Varifocal loss by Zhang et al. + + https://arxiv.org/abs/2008.13367. + """ + + def __init__(self): + """Initialize the VarifocalLoss class.""" + super().__init__() + + @staticmethod + def forward(pred_score, gt_score, label, alpha=0.75, gamma=2.0): + """Computes varfocal loss.""" + weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label + with torch.cuda.amp.autocast(enabled=False): + loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction='none') * + weight).mean(1).sum() + return loss + + +class FocalLoss(nn.Module): + """Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5).""" + + def __init__(self, ): + """Initializer for FocalLoss class with no parameters.""" + super().__init__() + + @staticmethod + def forward(pred, label, gamma=1.5, alpha=0.25): + """Calculates and updates confusion matrix for object detection/classification tasks.""" + loss = F.binary_cross_entropy_with_logits(pred, label, reduction='none') + # p_t = torch.exp(-loss) + # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = pred.sigmoid() # prob from logits + p_t = label * pred_prob + (1 - label) * (1 - pred_prob) + modulating_factor = (1.0 - p_t) ** gamma + loss *= modulating_factor + if alpha > 0: + alpha_factor = label * alpha + (1 - label) * (1 - alpha) + loss *= alpha_factor + return loss.mean(1).sum() + + +class BboxLoss(nn.Module): + """Criterion class for computing training losses during training.""" + + def __init__(self, reg_max, use_dfl=False): + """Initialize the BboxLoss module with regularization maximum and DFL settings.""" + super().__init__() + self.reg_max = reg_max + self.use_dfl = use_dfl + + def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): + """IoU loss.""" + weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1) + iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True) + loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum + + # DFL loss + if self.use_dfl: + target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) + loss_dfl = self._df_loss(pred_dist[fg_mask].view(-1, self.reg_max + 1), target_ltrb[fg_mask]) * weight + loss_dfl = loss_dfl.sum() / target_scores_sum + else: + loss_dfl = torch.tensor(0.0).to(pred_dist.device) + + return loss_iou, loss_dfl + + @staticmethod + def _df_loss(pred_dist, target): + """Return sum of left and right DFL losses.""" + # Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 + tl = target.long() # target left + tr = tl + 1 # target right + wl = tr - target # weight left + wr = 1 - wl # weight right + return (F.cross_entropy(pred_dist, tl.view(-1), reduction='none').view(tl.shape) * wl + + F.cross_entropy(pred_dist, tr.view(-1), reduction='none').view(tl.shape) * wr).mean(-1, keepdim=True) + + +class KeypointLoss(nn.Module): + """Criterion class for computing training losses.""" + + def __init__(self, sigmas) -> None: + """Initialize the KeypointLoss class.""" + super().__init__() + self.sigmas = sigmas + + def forward(self, pred_kpts, gt_kpts, kpt_mask, area): + """Calculates keypoint loss factor and Euclidean distance loss for predicted and actual keypoints.""" + d = (pred_kpts[..., 0] - gt_kpts[..., 0]) ** 2 + (pred_kpts[..., 1] - gt_kpts[..., 1]) ** 2 + kpt_loss_factor = kpt_mask.shape[1] / (torch.sum(kpt_mask != 0, dim=1) + 1e-9) + # e = d / (2 * (area * self.sigmas) ** 2 + 1e-9) # from formula + e = d / (2 * self.sigmas) ** 2 / (area + 1e-9) / 2 # from cocoeval + return (kpt_loss_factor.view(-1, 1) * ((1 - torch.exp(-e)) * kpt_mask)).mean() + + +class v8DetectionLoss: + """Criterion class for computing training losses.""" + + def __init__(self, model): # model must be de-paralleled + """Initializes v8DetectionLoss with the model, defining model-related properties and BCE loss function.""" + device = next(model.parameters()).device # get model device + h = model.args # hyperparameters + + m = model.model[-1] # Detect() module + self.bce = nn.BCEWithLogitsLoss(reduction='none') + self.hyp = h + self.stride = m.stride # model strides + self.nc = m.nc # number of classes + self.no = m.no + self.reg_max = m.reg_max + self.device = device + + self.use_dfl = m.reg_max > 1 + + self.assigner = TaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device) + self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device) + + def preprocess(self, targets, batch_size, scale_tensor): + """Preprocesses the target counts and matches with the input batch size to output a tensor.""" + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + counts = counts.to(dtype=torch.int32) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + """Decode predicted object bounding box coordinates from anchor points and distribution.""" + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, preds, batch): + """Calculate the sum of the loss for box, cls and dfl multiplied by batch size.""" + loss = torch.zeros(3, device=self.device) # box, cls, dfl + feats = preds[1] if isinstance(preds, tuple) else preds + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size = pred_scores.shape[0] + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # Targets + targets = torch.cat((batch['batch_idx'].view(-1, 1), batch['cls'].view(-1, 1), batch['bboxes']), 1) + targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + + # Pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + + _, target_bboxes, target_scores, fg_mask, _ = self.assigner( + pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) + + target_scores_sum = max(target_scores.sum(), 1) + + # Cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + + # Bbox loss + if fg_mask.sum(): + target_bboxes /= stride_tensor + loss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, + target_scores_sum, fg_mask) + + loss[0] *= self.hyp.box # box gain + loss[1] *= self.hyp.cls # cls gain + loss[2] *= self.hyp.dfl # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) + + +class v8SegmentationLoss(v8DetectionLoss): + """Criterion class for computing training losses.""" + + def __init__(self, model): # model must be de-paralleled + """Initializes the v8SegmentationLoss class, taking a de-paralleled model as argument.""" + super().__init__(model) + self.overlap = model.args.overlap_mask + + def __call__(self, preds, batch): + """Calculate and return the loss for the YOLO model.""" + loss = torch.zeros(4, device=self.device) # box, cls, dfl + feats, pred_masks, proto = preds if len(preds) == 3 else preds[1] + batch_size, _, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + + # B, grids, .. + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + pred_masks = pred_masks.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # Targets + try: + batch_idx = batch['batch_idx'].view(-1, 1) + targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1) + targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + except RuntimeError as e: + raise TypeError('ERROR ❌ segment dataset incorrectly formatted or not a segment dataset.\n' + "This error can occur when incorrectly training a 'segment' model on a 'detect' dataset, " + "i.e. 'yolo train model=yolov8n-seg.pt data=coco128.yaml'.\nVerify your dataset is a " + "correctly formatted 'segment' dataset using 'data=coco128-seg.yaml' " + 'as an example.\nSee https://docs.ultralytics.com/tasks/segment/ for help.') from e + + # Pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + + _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( + pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) + + target_scores_sum = max(target_scores.sum(), 1) + + # Cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[2] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + + if fg_mask.sum(): + # Bbox loss + loss[0], loss[3] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor, + target_scores, target_scores_sum, fg_mask) + # Masks loss + masks = batch['masks'].to(self.device).float() + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] + + loss[1] = self.calculate_segmentation_loss(fg_mask, masks, target_gt_idx, target_bboxes, batch_idx, proto, + pred_masks, imgsz, self.overlap) + + # WARNING: lines below prevent Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove + else: + loss[1] += (proto * 0).sum() + (pred_masks * 0).sum() # inf sums may lead to nan loss + + loss[0] *= self.hyp.box # box gain + loss[1] *= self.hyp.box # seg gain + loss[2] *= self.hyp.cls # cls gain + loss[3] *= self.hyp.dfl # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) + + @staticmethod + def single_mask_loss(gt_mask: torch.Tensor, pred: torch.Tensor, proto: torch.Tensor, xyxy: torch.Tensor, + area: torch.Tensor) -> torch.Tensor: + """ + Compute the instance segmentation loss for a single image. + + Args: + gt_mask (torch.Tensor): Ground truth mask of shape (n, H, W), where n is the number of objects. + pred (torch.Tensor): Predicted mask coefficients of shape (n, 32). + proto (torch.Tensor): Prototype masks of shape (32, H, W). + xyxy (torch.Tensor): Ground truth bounding boxes in xyxy format, normalized to [0, 1], of shape (n, 4). + area (torch.Tensor): Area of each ground truth bounding box of shape (n,). + + Returns: + (torch.Tensor): The calculated mask loss for a single image. + + Notes: + The function uses the equation pred_mask = torch.einsum('in,nhw->ihw', pred, proto) to produce the + predicted masks from the prototype masks and predicted mask coefficients. + """ + pred_mask = torch.einsum('in,nhw->ihw', pred, proto) # (n, 32) @ (32, 80, 80) -> (n, 80, 80) + loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') + return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).sum() + + def calculate_segmentation_loss( + self, + fg_mask: torch.Tensor, + masks: torch.Tensor, + target_gt_idx: torch.Tensor, + target_bboxes: torch.Tensor, + batch_idx: torch.Tensor, + proto: torch.Tensor, + pred_masks: torch.Tensor, + imgsz: torch.Tensor, + overlap: bool, + ) -> torch.Tensor: + """ + Calculate the loss for instance segmentation. + + Args: + fg_mask (torch.Tensor): A binary tensor of shape (BS, N_anchors) indicating which anchors are positive. + masks (torch.Tensor): Ground truth masks of shape (BS, H, W) if `overlap` is False, otherwise (BS, ?, H, W). + target_gt_idx (torch.Tensor): Indexes of ground truth objects for each anchor of shape (BS, N_anchors). + target_bboxes (torch.Tensor): Ground truth bounding boxes for each anchor of shape (BS, N_anchors, 4). + batch_idx (torch.Tensor): Batch indices of shape (N_labels_in_batch, 1). + proto (torch.Tensor): Prototype masks of shape (BS, 32, H, W). + pred_masks (torch.Tensor): Predicted masks for each anchor of shape (BS, N_anchors, 32). + imgsz (torch.Tensor): Size of the input image as a tensor of shape (2), i.e., (H, W). + overlap (bool): Whether the masks in `masks` tensor overlap. + + Returns: + (torch.Tensor): The calculated loss for instance segmentation. + + Notes: + The batch loss can be computed for improved speed at higher memory usage. + For example, pred_mask can be computed as follows: + pred_mask = torch.einsum('in,nhw->ihw', pred, proto) # (i, 32) @ (32, 160, 160) -> (i, 160, 160) + """ + _, _, mask_h, mask_w = proto.shape + loss = 0 + + # Normalize to 0-1 + target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]] + + # Areas of target bboxes + marea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2) + + # Normalize to mask size + mxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device) + + for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)): + fg_mask_i, target_gt_idx_i, pred_masks_i, proto_i, mxyxy_i, marea_i, masks_i = single_i + if fg_mask_i.any(): + mask_idx = target_gt_idx_i[fg_mask_i] + if overlap: + gt_mask = masks_i == (mask_idx + 1).view(-1, 1, 1) + gt_mask = gt_mask.float() + else: + gt_mask = masks[batch_idx.view(-1) == i][mask_idx] + + loss += self.single_mask_loss(gt_mask, pred_masks_i[fg_mask_i], proto_i, mxyxy_i[fg_mask_i], + marea_i[fg_mask_i]) + + # WARNING: lines below prevents Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove + else: + loss += (proto * 0).sum() + (pred_masks * 0).sum() # inf sums may lead to nan loss + + return loss / fg_mask.sum() + + +class v8PoseLoss(v8DetectionLoss): + """Criterion class for computing training losses.""" + + def __init__(self, model): # model must be de-paralleled + """Initializes v8PoseLoss with model, sets keypoint variables and declares a keypoint loss instance.""" + super().__init__(model) + self.kpt_shape = model.model[-1].kpt_shape + self.bce_pose = nn.BCEWithLogitsLoss() + is_pose = self.kpt_shape == [17, 3] + nkpt = self.kpt_shape[0] # number of keypoints + sigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkpt + self.keypoint_loss = KeypointLoss(sigmas=sigmas) + + def __call__(self, preds, batch): + """Calculate the total loss and detach it.""" + loss = torch.zeros(5, device=self.device) # box, cls, dfl, kpt_location, kpt_visibility + feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1] + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + + # B, grids, .. + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + pred_kpts = pred_kpts.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # Targets + batch_size = pred_scores.shape[0] + batch_idx = batch['batch_idx'].view(-1, 1) + targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1) + targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + + # Pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3) + + _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( + pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) + + target_scores_sum = max(target_scores.sum(), 1) + + # Cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + + # Bbox loss + if fg_mask.sum(): + target_bboxes /= stride_tensor + loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, + target_scores_sum, fg_mask) + keypoints = batch['keypoints'].to(self.device).float().clone() + keypoints[..., 0] *= imgsz[1] + keypoints[..., 1] *= imgsz[0] + + loss[1], loss[2] = self.calculate_keypoints_loss(fg_mask, target_gt_idx, keypoints, batch_idx, + stride_tensor, target_bboxes, pred_kpts) + + loss[0] *= self.hyp.box # box gain + loss[1] *= self.hyp.pose # pose gain + loss[2] *= self.hyp.kobj # kobj gain + loss[3] *= self.hyp.cls # cls gain + loss[4] *= self.hyp.dfl # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) + + @staticmethod + def kpts_decode(anchor_points, pred_kpts): + """Decodes predicted keypoints to image coordinates.""" + y = pred_kpts.clone() + y[..., :2] *= 2.0 + y[..., 0] += anchor_points[:, [0]] - 0.5 + y[..., 1] += anchor_points[:, [1]] - 0.5 + return y + + def calculate_keypoints_loss(self, masks, target_gt_idx, keypoints, batch_idx, stride_tensor, target_bboxes, + pred_kpts): + """ + Calculate the keypoints loss for the model. + + This function calculates the keypoints loss and keypoints object loss for a given batch. The keypoints loss is + based on the difference between the predicted keypoints and ground truth keypoints. The keypoints object loss is + a binary classification loss that classifies whether a keypoint is present or not. + + Args: + masks (torch.Tensor): Binary mask tensor indicating object presence, shape (BS, N_anchors). + target_gt_idx (torch.Tensor): Index tensor mapping anchors to ground truth objects, shape (BS, N_anchors). + keypoints (torch.Tensor): Ground truth keypoints, shape (N_kpts_in_batch, N_kpts_per_object, kpts_dim). + batch_idx (torch.Tensor): Batch index tensor for keypoints, shape (N_kpts_in_batch, 1). + stride_tensor (torch.Tensor): Stride tensor for anchors, shape (N_anchors, 1). + target_bboxes (torch.Tensor): Ground truth boxes in (x1, y1, x2, y2) format, shape (BS, N_anchors, 4). + pred_kpts (torch.Tensor): Predicted keypoints, shape (BS, N_anchors, N_kpts_per_object, kpts_dim). + + Returns: + (tuple): Returns a tuple containing: + - kpts_loss (torch.Tensor): The keypoints loss. + - kpts_obj_loss (torch.Tensor): The keypoints object loss. + """ + batch_idx = batch_idx.flatten() + batch_size = len(masks) + + # Find the maximum number of keypoints in a single image + max_kpts = torch.unique(batch_idx, return_counts=True)[1].max() + + # Create a tensor to hold batched keypoints + batched_keypoints = torch.zeros((batch_size, max_kpts, keypoints.shape[1], keypoints.shape[2]), + device=keypoints.device) + + # TODO: any idea how to vectorize this? + # Fill batched_keypoints with keypoints based on batch_idx + for i in range(batch_size): + keypoints_i = keypoints[batch_idx == i] + batched_keypoints[i, :keypoints_i.shape[0]] = keypoints_i + + # Expand dimensions of target_gt_idx to match the shape of batched_keypoints + target_gt_idx_expanded = target_gt_idx.unsqueeze(-1).unsqueeze(-1) + + # Use target_gt_idx_expanded to select keypoints from batched_keypoints + selected_keypoints = batched_keypoints.gather( + 1, target_gt_idx_expanded.expand(-1, -1, keypoints.shape[1], keypoints.shape[2])) + + # Divide coordinates by stride + selected_keypoints /= stride_tensor.view(1, -1, 1, 1) + + kpts_loss = 0 + kpts_obj_loss = 0 + + if masks.any(): + gt_kpt = selected_keypoints[masks] + area = xyxy2xywh(target_bboxes[masks])[:, 2:].prod(1, keepdim=True) + pred_kpt = pred_kpts[masks] + kpt_mask = gt_kpt[..., 2] != 0 if gt_kpt.shape[-1] == 3 else torch.full_like(gt_kpt[..., 0], True) + kpts_loss = self.keypoint_loss(pred_kpt, gt_kpt, kpt_mask, area) # pose loss + + if pred_kpt.shape[-1] == 3: + kpts_obj_loss = self.bce_pose(pred_kpt[..., 2], kpt_mask.float()) # keypoint obj loss + + return kpts_loss, kpts_obj_loss + + +class v8ClassificationLoss: + """Criterion class for computing training losses.""" + + def __call__(self, preds, batch): + """Compute the classification loss between predictions and true labels.""" + loss = torch.nn.functional.cross_entropy(preds, batch['cls'], reduction='mean') + loss_items = loss.detach() + return loss, loss_items diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py new file mode 100644 index 0000000..27e41b7 --- /dev/null +++ b/ultralytics/utils/metrics.py @@ -0,0 +1,1069 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +"""Model validation metrics.""" + +import math +import warnings +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import torch + +from ultralytics.utils import LOGGER, SimpleClass, TryExcept, plt_settings + +OKS_SIGMA = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0 + + +def bbox_ioa(box1, box2, iou=False, eps=1e-7): + """ + Calculate the intersection over box2 area given box1 and box2. Boxes are in x1y1x2y2 format. + + Args: + box1 (np.array): A numpy array of shape (n, 4) representing n bounding boxes. + box2 (np.array): A numpy array of shape (m, 4) representing m bounding boxes. + iou (bool): Calculate the standard iou if True else return inter_area/box2_area. + eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7. + + Returns: + (np.array): A numpy array of shape (n, m) representing the intersection over box2 area. + """ + + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1.T + b2_x1, b2_y1, b2_x2, b2_y2 = box2.T + + # Intersection area + inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \ + (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0) + + # Box2 area + area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + if iou: + box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) + area = area + box1_area[:, None] - inter_area + + # Intersection over box2 area + return inter_area / (area + eps) + + +def box_iou(box1, box2, eps=1e-7): + """ + Calculate intersection-over-union (IoU) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + + Args: + box1 (torch.Tensor): A tensor of shape (N, 4) representing N bounding boxes. + box2 (torch.Tensor): A tensor of shape (M, 4) representing M bounding boxes. + eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7. + + Returns: + (torch.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2. + """ + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2) + inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp_(0).prod(2) + + # IoU = inter / (area1 + area2 - inter) + return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps) + + +def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): + """ + Calculate Intersection over Union (IoU) of box1(1, 4) to box2(n, 4). + + Args: + box1 (torch.Tensor): A tensor representing a single bounding box with shape (1, 4). + box2 (torch.Tensor): A tensor representing n bounding boxes with shape (n, 4). + xywh (bool, optional): If True, input boxes are in (x, y, w, h) format. If False, input boxes are in + (x1, y1, x2, y2) format. Defaults to True. + GIoU (bool, optional): If True, calculate Generalized IoU. Defaults to False. + DIoU (bool, optional): If True, calculate Distance IoU. Defaults to False. + CIoU (bool, optional): If True, calculate Complete IoU. Defaults to False. + eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7. + + Returns: + (torch.Tensor): IoU, GIoU, DIoU, or CIoU values depending on the specified flags. + """ + + # Get the coordinates of bounding boxes + if xywh: # transform from xywh to xyxy + (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1) + w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2 + b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_ + b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_ + else: # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1) + b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1) + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps + + # Intersection area + inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp_(0) * \ + (b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)).clamp_(0) + + # Union Area + union = w1 * h1 + w2 * h2 - inter + eps + + # IoU + iou = inter / union + if CIoU or DIoU or GIoU: + cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(b2_x1) # convex (smallest enclosing box) width + ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1) # convex height + if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 + c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2 + if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + v = (4 / math.pi ** 2) * (torch.atan(w2 / h2) - torch.atan(w1 / h1)).pow(2) + with torch.no_grad(): + alpha = v / (v - iou + (1 + eps)) + return iou - (rho2 / c2 + v * alpha) # CIoU + return iou - rho2 / c2 # DIoU + c_area = cw * ch + eps # convex area + return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf + return iou # IoU + + +def mask_iou(mask1, mask2, eps=1e-7): + """ + Calculate masks IoU. + + Args: + mask1 (torch.Tensor): A tensor of shape (N, n) where N is the number of ground truth objects and n is the + product of image width and height. + mask2 (torch.Tensor): A tensor of shape (M, n) where M is the number of predicted objects and n is the + product of image width and height. + eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7. + + Returns: + (torch.Tensor): A tensor of shape (N, M) representing masks IoU. + """ + intersection = torch.matmul(mask1, mask2.T).clamp_(0) + union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection + return intersection / (union + eps) + + +def kpt_iou(kpt1, kpt2, area, sigma, eps=1e-7): + """ + Calculate Object Keypoint Similarity (OKS). + + Args: + kpt1 (torch.Tensor): A tensor of shape (N, 17, 3) representing ground truth keypoints. + kpt2 (torch.Tensor): A tensor of shape (M, 17, 3) representing predicted keypoints. + area (torch.Tensor): A tensor of shape (N,) representing areas from ground truth. + sigma (list): A list containing 17 values representing keypoint scales. + eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7. + + Returns: + (torch.Tensor): A tensor of shape (N, M) representing keypoint similarities. + """ + d = (kpt1[:, None, :, 0] - kpt2[..., 0]) ** 2 + (kpt1[:, None, :, 1] - kpt2[..., 1]) ** 2 # (N, M, 17) + sigma = torch.tensor(sigma, device=kpt1.device, dtype=kpt1.dtype) # (17, ) + kpt_mask = kpt1[..., 2] != 0 # (N, 17) + e = d / (2 * sigma) ** 2 / (area[:, None, None] + eps) / 2 # from cocoeval + # e = d / ((area[None, :, None] + eps) * sigma) ** 2 / 2 # from formula + return (torch.exp(-e) * kpt_mask[:, None]).sum(-1) / (kpt_mask.sum(-1)[:, None] + eps) + + +def smooth_BCE(eps=0.1): + """ + Computes smoothed positive and negative Binary Cross-Entropy targets. + + This function calculates positive and negative label smoothing BCE targets based on a given epsilon value. + For implementation details, refer to https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441. + + Args: + eps (float, optional): The epsilon value for label smoothing. Defaults to 0.1. + + Returns: + (tuple): A tuple containing the positive and negative label smoothing BCE targets. + """ + return 1.0 - 0.5 * eps, 0.5 * eps + + +class ConfusionMatrix: + """ + A class for calculating and updating a confusion matrix for object detection and classification tasks. + + Attributes: + task (str): The type of task, either 'detect' or 'classify'. + matrix (np.array): The confusion matrix, with dimensions depending on the task. + nc (int): The number of classes. + conf (float): The confidence threshold for detections. + iou_thres (float): The Intersection over Union threshold. + """ + + def __init__(self, nc, conf=0.25, iou_thres=0.45, task='detect'): + """Initialize attributes for the YOLO model.""" + self.task = task + self.matrix = np.zeros((nc + 1, nc + 1)) if self.task == 'detect' else np.zeros((nc, nc)) + self.nc = nc # number of classes + self.conf = 0.25 if conf in (None, 0.001) else conf # apply 0.25 if default val conf is passed + self.iou_thres = iou_thres + + def process_cls_preds(self, preds, targets): + """ + Update confusion matrix for classification task. + + Args: + preds (Array[N, min(nc,5)]): Predicted class labels. + targets (Array[N, 1]): Ground truth class labels. + """ + preds, targets = torch.cat(preds)[:, 0], torch.cat(targets) + for p, t in zip(preds.cpu().numpy(), targets.cpu().numpy()): + self.matrix[p][t] += 1 + + def process_batch(self, detections, labels): + """ + Update confusion matrix for object detection task. + + Args: + detections (Array[N, 6]): Detected bounding boxes and their associated information. + Each row should contain (x1, y1, x2, y2, conf, class). + labels (Array[M, 5]): Ground truth bounding boxes and their associated class labels. + Each row should contain (class, x1, y1, x2, y2). + """ + if labels.size(0) == 0: # Check if labels is empty + if detections is not None: + detections = detections[detections[:, 4] > self.conf] + detection_classes = detections[:, 5].int() + for dc in detection_classes: + self.matrix[dc, self.nc] += 1 # false positives + return + if detections is None: + gt_classes = labels.int() + for gc in gt_classes: + self.matrix[self.nc, gc] += 1 # background FN + return + + detections = detections[detections[:, 4] > self.conf] + gt_classes = labels[:, 0].int() + detection_classes = detections[:, 5].int() + iou = box_iou(labels[:, 1:], detections[:, :4]) + + x = torch.where(iou > self.iou_thres) + if x[0].shape[0]: + matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + else: + matches = np.zeros((0, 3)) + + n = matches.shape[0] > 0 + m0, m1, _ = matches.transpose().astype(int) + for i, gc in enumerate(gt_classes): + j = m0 == i + if n and sum(j) == 1: + self.matrix[detection_classes[m1[j]], gc] += 1 # correct + else: + self.matrix[self.nc, gc] += 1 # true background + + if n: + for i, dc in enumerate(detection_classes): + if not any(m1 == i): + self.matrix[dc, self.nc] += 1 # predicted background + + def matrix(self): + """Returns the confusion matrix.""" + return self.matrix + + def tp_fp(self): + """Returns true positives and false positives.""" + tp = self.matrix.diagonal() # true positives + fp = self.matrix.sum(1) - tp # false positives + # fn = self.matrix.sum(0) - tp # false negatives (missed detections) + return (tp[:-1], fp[:-1]) if self.task == 'detect' else (tp, fp) # remove background class if task=detect + + @TryExcept('WARNING ⚠️ ConfusionMatrix plot failure') + @plt_settings() + def plot(self, normalize=True, save_dir='', names=(), on_plot=None): + """ + Plot the confusion matrix using seaborn and save it to a file. + + Args: + normalize (bool): Whether to normalize the confusion matrix. + save_dir (str): Directory where the plot will be saved. + names (tuple): Names of classes, used as labels on the plot. + on_plot (func): An optional callback to pass plots path and data when they are rendered. + """ + import seaborn as sn + + array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns + array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) + + fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True) + nc, nn = self.nc, len(names) # number of classes, names + sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size + labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels + ticklabels = (list(names) + ['background']) if labels else 'auto' + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered + sn.heatmap(array, + ax=ax, + annot=nc < 30, + annot_kws={ + 'size': 8}, + cmap='Blues', + fmt='.2f' if normalize else '.0f', + square=True, + vmin=0.0, + xticklabels=ticklabels, + yticklabels=ticklabels).set_facecolor((1, 1, 1)) + title = 'Confusion Matrix' + ' Normalized' * normalize + ax.set_xlabel('True') + ax.set_ylabel('Predicted') + ax.set_title(title) + plot_fname = Path(save_dir) / f'{title.lower().replace(" ", "_")}.png' + fig.savefig(plot_fname, dpi=250) + plt.close(fig) + if on_plot: + on_plot(plot_fname) + + def print(self): + """Print the confusion matrix to the console.""" + for i in range(self.nc + 1): + LOGGER.info(' '.join(map(str, self.matrix[i]))) + + +def smooth(y, f=0.05): + """Box filter of fraction f.""" + nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) + p = np.ones(nf // 2) # ones padding + yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded + return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed + + +@plt_settings() +def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=(), on_plot=None): + """Plots a precision-recall curve.""" + fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) + py = np.stack(py, axis=1) + + if 0 < len(names) < 21: # display per-class legend if < 21 classes + for i, y in enumerate(py.T): + ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) + else: + ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) + + ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) + ax.set_xlabel('Recall') + ax.set_ylabel('Precision') + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + ax.legend(bbox_to_anchor=(1.04, 1), loc='upper left') + ax.set_title('Precision-Recall Curve') + fig.savefig(save_dir, dpi=250) + plt.close(fig) + if on_plot: + on_plot(save_dir) + + +@plt_settings() +def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric', on_plot=None): + """Plots a metric-confidence curve.""" + fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) + + if 0 < len(names) < 21: # display per-class legend if < 21 classes + for i, y in enumerate(py): + ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) + else: + ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) + + y = smooth(py.mean(0), 0.05) + ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + ax.legend(bbox_to_anchor=(1.04, 1), loc='upper left') + ax.set_title(f'{ylabel}-Confidence Curve') + fig.savefig(save_dir, dpi=250) + plt.close(fig) + if on_plot: + on_plot(save_dir) + + +def compute_ap(recall, precision): + """ + Compute the average precision (AP) given the recall and precision curves. + + Args: + recall (list): The recall curve. + precision (list): The precision curve. + + Returns: + (float): Average precision. + (np.ndarray): Precision envelope curve. + (np.ndarray): Modified recall curve with sentinel values added at the beginning and end. + """ + + # Append sentinel values to beginning and end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre = np.concatenate(([1.0], precision, [0.0])) + + # Compute the precision envelope + mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) + + # Integrate area under curve + method = 'interp' # methods: 'continuous', 'interp' + if method == 'interp': + x = np.linspace(0, 1, 101) # 101-point interp (COCO) + ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate + else: # 'continuous' + i = np.where(mrec[1:] != mrec[:-1])[0] # points where x-axis (recall) changes + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve + + return ap, mpre, mrec + + +def ap_per_class(tp, + conf, + pred_cls, + target_cls, + plot=False, + on_plot=None, + save_dir=Path(), + names=(), + eps=1e-16, + prefix=''): + """ + Computes the average precision per class for object detection evaluation. + + Args: + tp (np.ndarray): Binary array indicating whether the detection is correct (True) or not (False). + conf (np.ndarray): Array of confidence scores of the detections. + pred_cls (np.ndarray): Array of predicted classes of the detections. + target_cls (np.ndarray): Array of true classes of the detections. + plot (bool, optional): Whether to plot PR curves or not. Defaults to False. + on_plot (func, optional): A callback to pass plots path and data when they are rendered. Defaults to None. + save_dir (Path, optional): Directory to save the PR curves. Defaults to an empty path. + names (tuple, optional): Tuple of class names to plot PR curves. Defaults to an empty tuple. + eps (float, optional): A small value to avoid division by zero. Defaults to 1e-16. + prefix (str, optional): A prefix string for saving the plot files. Defaults to an empty string. + + Returns: + (tuple): A tuple of six arrays and one array of unique classes, where: + tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,). + fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,). + p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,). + r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,). + f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,). + ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10). + unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,). + p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000). + r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000). + f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000). + x (np.ndarray): X-axis values for the curves. Shape: (1000,). + prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000). + """ + + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes, nt = np.unique(target_cls, return_counts=True) + nc = unique_classes.shape[0] # number of classes, number of detections + + # Create Precision-Recall curve and compute AP for each class + x, prec_values = np.linspace(0, 1, 1000), [] + + # Average precision, precision and recall curves + ap, p_curve, r_curve = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) + for ci, c in enumerate(unique_classes): + i = pred_cls == c + n_l = nt[ci] # number of labels + n_p = i.sum() # number of predictions + if n_p == 0 or n_l == 0: + continue + + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum(0) + tpc = tp[i].cumsum(0) + + # Recall + recall = tpc / (n_l + eps) # recall curve + r_curve[ci] = np.interp(-x, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases + + # Precision + precision = tpc / (tpc + fpc) # precision curve + p_curve[ci] = np.interp(-x, -conf[i], precision[:, 0], left=1) # p at pr_score + + # AP from recall-precision curve + for j in range(tp.shape[1]): + ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) + if plot and j == 0: + prec_values.append(np.interp(x, mrec, mpre)) # precision at mAP@0.5 + + prec_values = np.array(prec_values) # (nc, 1000) + + # Compute F1 (harmonic mean of precision and recall) + f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps) + names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data + names = dict(enumerate(names)) # to dict + if plot: + plot_pr_curve(x, prec_values, ap, save_dir / f'{prefix}PR_curve.png', names, on_plot=on_plot) + plot_mc_curve(x, f1_curve, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1', on_plot=on_plot) + plot_mc_curve(x, p_curve, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision', on_plot=on_plot) + plot_mc_curve(x, r_curve, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall', on_plot=on_plot) + + i = smooth(f1_curve.mean(0), 0.1).argmax() # max F1 index + p, r, f1 = p_curve[:, i], r_curve[:, i], f1_curve[:, i] # max-F1 precision, recall, F1 values + tp = (r * nt).round() # true positives + fp = (tp / (p + eps) - tp).round() # false positives + return tp, fp, p, r, f1, ap, unique_classes.astype(int), p_curve, r_curve, f1_curve, x, prec_values + + +class Metric(SimpleClass): + """ + Class for computing evaluation metrics for YOLOv8 model. + + Attributes: + p (list): Precision for each class. Shape: (nc,). + r (list): Recall for each class. Shape: (nc,). + f1 (list): F1 score for each class. Shape: (nc,). + all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10). + ap_class_index (list): Index of class for each AP score. Shape: (nc,). + nc (int): Number of classes. + + Methods: + ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or []. + ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or []. + mp(): Mean precision of all classes. Returns: Float. + mr(): Mean recall of all classes. Returns: Float. + map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float. + map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float. + map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float. + mean_results(): Mean of results, returns mp, mr, map50, map. + class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i]. + maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,). + fitness(): Model fitness as a weighted combination of metrics. Returns: Float. + update(results): Update metric attributes with new evaluation results. + """ + + def __init__(self) -> None: + """Initializes a Metric instance for computing evaluation metrics for the YOLOv8 model.""" + self.p = [] # (nc, ) + self.r = [] # (nc, ) + self.f1 = [] # (nc, ) + self.all_ap = [] # (nc, 10) + self.ap_class_index = [] # (nc, ) + self.nc = 0 + + @property + def ap50(self): + """ + Returns the Average Precision (AP) at an IoU threshold of 0.5 for all classes. + + Returns: + (np.ndarray, list): Array of shape (nc,) with AP50 values per class, or an empty list if not available. + """ + return self.all_ap[:, 0] if len(self.all_ap) else [] + + @property + def ap(self): + """ + Returns the Average Precision (AP) at an IoU threshold of 0.5-0.95 for all classes. + + Returns: + (np.ndarray, list): Array of shape (nc,) with AP50-95 values per class, or an empty list if not available. + """ + return self.all_ap.mean(1) if len(self.all_ap) else [] + + @property + def mp(self): + """ + Returns the Mean Precision of all classes. + + Returns: + (float): The mean precision of all classes. + """ + return self.p.mean() if len(self.p) else 0.0 + + @property + def mr(self): + """ + Returns the Mean Recall of all classes. + + Returns: + (float): The mean recall of all classes. + """ + return self.r.mean() if len(self.r) else 0.0 + + @property + def map50(self): + """ + Returns the mean Average Precision (mAP) at an IoU threshold of 0.5. + + Returns: + (float): The mAP50 at an IoU threshold of 0.5. + """ + return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 + + @property + def map75(self): + """ + Returns the mean Average Precision (mAP) at an IoU threshold of 0.75. + + Returns: + (float): The mAP50 at an IoU threshold of 0.75. + """ + return self.all_ap[:, 5].mean() if len(self.all_ap) else 0.0 + + @property + def map(self): + """ + Returns the mean Average Precision (mAP) over IoU thresholds of 0.5 - 0.95 in steps of 0.05. + + Returns: + (float): The mAP over IoU thresholds of 0.5 - 0.95 in steps of 0.05. + """ + return self.all_ap.mean() if len(self.all_ap) else 0.0 + + def mean_results(self): + """Mean of results, return mp, mr, map50, map.""" + return [self.mp, self.mr, self.map50, self.map] + + def class_result(self, i): + """Class-aware result, return p[i], r[i], ap50[i], ap[i].""" + return self.p[i], self.r[i], self.ap50[i], self.ap[i] + + @property + def maps(self): + """MAP of each class.""" + maps = np.zeros(self.nc) + self.map + for i, c in enumerate(self.ap_class_index): + maps[c] = self.ap[i] + return maps + + def fitness(self): + """Model fitness as a weighted combination of metrics.""" + w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (np.array(self.mean_results()) * w).sum() + + def update(self, results): + """ + Updates the evaluation metrics of the model with a new set of results. + + Args: + results (tuple): A tuple containing the following evaluation metrics: + - p (list): Precision for each class. Shape: (nc,). + - r (list): Recall for each class. Shape: (nc,). + - f1 (list): F1 score for each class. Shape: (nc,). + - all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10). + - ap_class_index (list): Index of class for each AP score. Shape: (nc,). + + Side Effects: + Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based + on the values provided in the `results` tuple. + """ + (self.p, self.r, self.f1, self.all_ap, self.ap_class_index, self.p_curve, self.r_curve, self.f1_curve, self.px, + self.prec_values) = results + + @property + def curves(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [] + + @property + def curves_results(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [[self.px, self.prec_values, 'Recall', 'Precision'], [self.px, self.f1_curve, 'Confidence', 'F1'], + [self.px, self.p_curve, 'Confidence', 'Precision'], [self.px, self.r_curve, 'Confidence', 'Recall']] + + +class DetMetrics(SimpleClass): + """ + This class is a utility class for computing detection metrics such as precision, recall, and mean average precision + (mAP) of an object detection model. + + Args: + save_dir (Path): A path to the directory where the output plots will be saved. Defaults to current directory. + plot (bool): A flag that indicates whether to plot precision-recall curves for each class. Defaults to False. + on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None. + names (tuple of str): A tuple of strings that represents the names of the classes. Defaults to an empty tuple. + + Attributes: + save_dir (Path): A path to the directory where the output plots will be saved. + plot (bool): A flag that indicates whether to plot the precision-recall curves for each class. + on_plot (func): An optional callback to pass plots path and data when they are rendered. + names (tuple of str): A tuple of strings that represents the names of the classes. + box (Metric): An instance of the Metric class for storing the results of the detection metrics. + speed (dict): A dictionary for storing the execution time of different parts of the detection process. + + Methods: + process(tp, conf, pred_cls, target_cls): Updates the metric results with the latest batch of predictions. + keys: Returns a list of keys for accessing the computed detection metrics. + mean_results: Returns a list of mean values for the computed detection metrics. + class_result(i): Returns a list of values for the computed detection metrics for a specific class. + maps: Returns a dictionary of mean average precision (mAP) values for different IoU thresholds. + fitness: Computes the fitness score based on the computed detection metrics. + ap_class_index: Returns a list of class indices sorted by their average precision (AP) values. + results_dict: Returns a dictionary that maps detection metric keys to their computed values. + curves: TODO + curves_results: TODO + """ + + def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None: + """Initialize a DetMetrics instance with a save directory, plot flag, callback function, and class names.""" + self.save_dir = save_dir + self.plot = plot + self.on_plot = on_plot + self.names = names + self.box = Metric() + self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + self.task = 'detect' + + def process(self, tp, conf, pred_cls, target_cls): + """Process predicted results for object detection and update metrics.""" + results = ap_per_class(tp, + conf, + pred_cls, + target_cls, + plot=self.plot, + save_dir=self.save_dir, + names=self.names, + on_plot=self.on_plot)[2:] + self.box.nc = len(self.names) + self.box.update(results) + + @property + def keys(self): + """Returns a list of keys for accessing specific metrics.""" + return ['metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)'] + + def mean_results(self): + """Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95.""" + return self.box.mean_results() + + def class_result(self, i): + """Return the result of evaluating the performance of an object detection model on a specific class.""" + return self.box.class_result(i) + + @property + def maps(self): + """Returns mean Average Precision (mAP) scores per class.""" + return self.box.maps + + @property + def fitness(self): + """Returns the fitness of box object.""" + return self.box.fitness() + + @property + def ap_class_index(self): + """Returns the average precision index per class.""" + return self.box.ap_class_index + + @property + def results_dict(self): + """Returns dictionary of computed performance metrics and statistics.""" + return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness])) + + @property + def curves(self): + """Returns a list of curves for accessing specific metrics curves.""" + return ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)'] + + @property + def curves_results(self): + """Returns dictionary of computed performance metrics and statistics.""" + return self.box.curves_results + + +class SegmentMetrics(SimpleClass): + """ + Calculates and aggregates detection and segmentation metrics over a given set of classes. + + Args: + save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory. + plot (bool): Whether to save the detection and segmentation plots. Default is False. + on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None. + names (list): List of class names. Default is an empty list. + + Attributes: + save_dir (Path): Path to the directory where the output plots should be saved. + plot (bool): Whether to save the detection and segmentation plots. + on_plot (func): An optional callback to pass plots path and data when they are rendered. + names (list): List of class names. + box (Metric): An instance of the Metric class to calculate box detection metrics. + seg (Metric): An instance of the Metric class to calculate mask segmentation metrics. + speed (dict): Dictionary to store the time taken in different phases of inference. + + Methods: + process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions. + mean_results(): Returns the mean of the detection and segmentation metrics over all the classes. + class_result(i): Returns the detection and segmentation metrics of class `i`. + maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95. + fitness: Returns the fitness scores, which are a single weighted combination of metrics. + ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP). + results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score. + """ + + def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None: + """Initialize a SegmentMetrics instance with a save directory, plot flag, callback function, and class names.""" + self.save_dir = save_dir + self.plot = plot + self.on_plot = on_plot + self.names = names + self.box = Metric() + self.seg = Metric() + self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + self.task = 'segment' + + def process(self, tp_b, tp_m, conf, pred_cls, target_cls): + """ + Processes the detection and segmentation metrics over the given set of predictions. + + Args: + tp_b (list): List of True Positive boxes. + tp_m (list): List of True Positive masks. + conf (list): List of confidence scores. + pred_cls (list): List of predicted classes. + target_cls (list): List of target classes. + """ + + results_mask = ap_per_class(tp_m, + conf, + pred_cls, + target_cls, + plot=self.plot, + on_plot=self.on_plot, + save_dir=self.save_dir, + names=self.names, + prefix='Mask')[2:] + self.seg.nc = len(self.names) + self.seg.update(results_mask) + results_box = ap_per_class(tp_b, + conf, + pred_cls, + target_cls, + plot=self.plot, + on_plot=self.on_plot, + save_dir=self.save_dir, + names=self.names, + prefix='Box')[2:] + self.box.nc = len(self.names) + self.box.update(results_box) + + @property + def keys(self): + """Returns a list of keys for accessing metrics.""" + return [ + 'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)', + 'metrics/precision(M)', 'metrics/recall(M)', 'metrics/mAP50(M)', 'metrics/mAP50-95(M)'] + + def mean_results(self): + """Return the mean metrics for bounding box and segmentation results.""" + return self.box.mean_results() + self.seg.mean_results() + + def class_result(self, i): + """Returns classification results for a specified class index.""" + return self.box.class_result(i) + self.seg.class_result(i) + + @property + def maps(self): + """Returns mAP scores for object detection and semantic segmentation models.""" + return self.box.maps + self.seg.maps + + @property + def fitness(self): + """Get the fitness score for both segmentation and bounding box models.""" + return self.seg.fitness() + self.box.fitness() + + @property + def ap_class_index(self): + """Boxes and masks have the same ap_class_index.""" + return self.box.ap_class_index + + @property + def results_dict(self): + """Returns results of object detection model for evaluation.""" + return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness])) + + @property + def curves(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [ + 'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)', + 'Precision-Recall(M)', 'F1-Confidence(M)', 'Precision-Confidence(M)', 'Recall-Confidence(M)'] + + @property + def curves_results(self): + """Returns dictionary of computed performance metrics and statistics.""" + return self.box.curves_results + self.seg.curves_results + + +class PoseMetrics(SegmentMetrics): + """ + Calculates and aggregates detection and pose metrics over a given set of classes. + + Args: + save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory. + plot (bool): Whether to save the detection and segmentation plots. Default is False. + on_plot (func): An optional callback to pass plots path and data when they are rendered. Defaults to None. + names (list): List of class names. Default is an empty list. + + Attributes: + save_dir (Path): Path to the directory where the output plots should be saved. + plot (bool): Whether to save the detection and segmentation plots. + on_plot (func): An optional callback to pass plots path and data when they are rendered. + names (list): List of class names. + box (Metric): An instance of the Metric class to calculate box detection metrics. + pose (Metric): An instance of the Metric class to calculate mask segmentation metrics. + speed (dict): Dictionary to store the time taken in different phases of inference. + + Methods: + process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions. + mean_results(): Returns the mean of the detection and segmentation metrics over all the classes. + class_result(i): Returns the detection and segmentation metrics of class `i`. + maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95. + fitness: Returns the fitness scores, which are a single weighted combination of metrics. + ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP). + results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score. + """ + + def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None: + """Initialize the PoseMetrics class with directory path, class names, and plotting options.""" + super().__init__(save_dir, plot, names) + self.save_dir = save_dir + self.plot = plot + self.on_plot = on_plot + self.names = names + self.box = Metric() + self.pose = Metric() + self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + self.task = 'pose' + + def process(self, tp_b, tp_p, conf, pred_cls, target_cls): + """ + Processes the detection and pose metrics over the given set of predictions. + + Args: + tp_b (list): List of True Positive boxes. + tp_p (list): List of True Positive keypoints. + conf (list): List of confidence scores. + pred_cls (list): List of predicted classes. + target_cls (list): List of target classes. + """ + + results_pose = ap_per_class(tp_p, + conf, + pred_cls, + target_cls, + plot=self.plot, + on_plot=self.on_plot, + save_dir=self.save_dir, + names=self.names, + prefix='Pose')[2:] + self.pose.nc = len(self.names) + self.pose.update(results_pose) + results_box = ap_per_class(tp_b, + conf, + pred_cls, + target_cls, + plot=self.plot, + on_plot=self.on_plot, + save_dir=self.save_dir, + names=self.names, + prefix='Box')[2:] + self.box.nc = len(self.names) + self.box.update(results_box) + + @property + def keys(self): + """Returns list of evaluation metric keys.""" + return [ + 'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)', + 'metrics/precision(P)', 'metrics/recall(P)', 'metrics/mAP50(P)', 'metrics/mAP50-95(P)'] + + def mean_results(self): + """Return the mean results of box and pose.""" + return self.box.mean_results() + self.pose.mean_results() + + def class_result(self, i): + """Return the class-wise detection results for a specific class i.""" + return self.box.class_result(i) + self.pose.class_result(i) + + @property + def maps(self): + """Returns the mean average precision (mAP) per class for both box and pose detections.""" + return self.box.maps + self.pose.maps + + @property + def fitness(self): + """Computes classification metrics and speed using the `targets` and `pred` inputs.""" + return self.pose.fitness() + self.box.fitness() + + @property + def curves(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [ + 'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)', + 'Precision-Recall(P)', 'F1-Confidence(P)', 'Precision-Confidence(P)', 'Recall-Confidence(P)'] + + @property + def curves_results(self): + """Returns dictionary of computed performance metrics and statistics.""" + return self.box.curves_results + self.pose.curves_results + + +class ClassifyMetrics(SimpleClass): + """ + Class for computing classification metrics including top-1 and top-5 accuracy. + + Attributes: + top1 (float): The top-1 accuracy. + top5 (float): The top-5 accuracy. + speed (Dict[str, float]): A dictionary containing the time taken for each step in the pipeline. + + Properties: + fitness (float): The fitness of the model, which is equal to top-5 accuracy. + results_dict (Dict[str, Union[float, str]]): A dictionary containing the classification metrics and fitness. + keys (List[str]): A list of keys for the results_dict. + + Methods: + process(targets, pred): Processes the targets and predictions to compute classification metrics. + """ + + def __init__(self) -> None: + """Initialize a ClassifyMetrics instance.""" + self.top1 = 0 + self.top5 = 0 + self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + self.task = 'classify' + + def process(self, targets, pred): + """Target classes and predicted classes.""" + pred, targets = torch.cat(pred), torch.cat(targets) + correct = (targets[:, None] == pred).float() + acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy + self.top1, self.top5 = acc.mean(0).tolist() + + @property + def fitness(self): + """Returns mean of top-1 and top-5 accuracies as fitness score.""" + return (self.top1 + self.top5) / 2 + + @property + def results_dict(self): + """Returns a dictionary with model's performance metrics and fitness score.""" + return dict(zip(self.keys + ['fitness'], [self.top1, self.top5, self.fitness])) + + @property + def keys(self): + """Returns a list of keys for the results_dict property.""" + return ['metrics/accuracy_top1', 'metrics/accuracy_top5'] + + @property + def curves(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [] + + @property + def curves_results(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [] diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py new file mode 100644 index 0000000..44e26ba --- /dev/null +++ b/ultralytics/utils/ops.py @@ -0,0 +1,796 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import contextlib +import math +import re +import time + +import cv2 +import numpy as np +import torch +import torch.nn.functional as F +import torchvision + +from ultralytics.utils import LOGGER + + +class Profile(contextlib.ContextDecorator): + """ + YOLOv8 Profile class. Use as a decorator with @Profile() or as a context manager with 'with Profile():'. + + Example: + ```python + from ultralytics.utils.ops import Profile + + with Profile() as dt: + pass # slow operation here + + print(dt) # prints "Elapsed time is 9.5367431640625e-07 s" + ``` + """ + + def __init__(self, t=0.0): + """ + Initialize the Profile class. + + Args: + t (float): Initial time. Defaults to 0.0. + """ + self.t = t + self.cuda = torch.cuda.is_available() + + def __enter__(self): + """Start timing.""" + self.start = self.time() + return self + + def __exit__(self, type, value, traceback): # noqa + """Stop timing.""" + self.dt = self.time() - self.start # delta-time + self.t += self.dt # accumulate dt + + def __str__(self): + """Returns a human-readable string representing the accumulated elapsed time in the profiler.""" + return f'Elapsed time is {self.t} s' + + def time(self): + """Get current time.""" + if self.cuda: + torch.cuda.synchronize() + return time.time() + + +def segment2box(segment, width=640, height=640): + """ + Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy). + + Args: + segment (torch.Tensor): the segment label + width (int): the width of the image. Defaults to 640 + height (int): The height of the image. Defaults to 640 + + Returns: + (np.ndarray): the minimum and maximum x and y values of the segment. + """ + # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) + x, y = segment.T # segment xy + inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) + x, y, = x[inside], y[inside] + return np.array([x.min(), y.min(), x.max(), y.max()], dtype=segment.dtype) if any(x) else np.zeros( + 4, dtype=segment.dtype) # xyxy + + +def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True): + """ + Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in + (img1_shape) to the shape of a different image (img0_shape). + + Args: + img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width). + boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2) + img0_shape (tuple): the shape of the target image, in the format of (height, width). + ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be + calculated based on the size difference between the two images. + padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular + rescaling. + + Returns: + boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2) + """ + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round( + (img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + if padding: + boxes[..., [0, 2]] -= pad[0] # x padding + boxes[..., [1, 3]] -= pad[1] # y padding + boxes[..., :4] /= gain + return clip_boxes(boxes, img0_shape) + + +def make_divisible(x, divisor): + """ + Returns the nearest number that is divisible by the given divisor. + + Args: + x (int): The number to make divisible. + divisor (int | torch.Tensor): The divisor. + + Returns: + (int): The nearest number divisible by the divisor. + """ + if isinstance(divisor, torch.Tensor): + divisor = int(divisor.max()) # to int + return math.ceil(x / divisor) * divisor + + +def non_max_suppression( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + nc=0, # number of classes (optional) + max_time_img=0.05, + max_nms=30000, + max_wh=7680, +): + """ + Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box. + + Args: + prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes) + containing the predicted boxes, classes, and masks. The tensor should be in the format + output by a model, such as YOLO. + conf_thres (float): The confidence threshold below which boxes will be filtered out. + Valid values are between 0.0 and 1.0. + iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS. + Valid values are between 0.0 and 1.0. + classes (List[int]): A list of class indices to consider. If None, all classes will be considered. + agnostic (bool): If True, the model is agnostic to the number of classes, and all + classes will be considered as one. + multi_label (bool): If True, each box may have multiple labels. + labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner + list contains the apriori labels for a given image. The list should be in the format + output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2). + max_det (int): The maximum number of boxes to keep after NMS. + nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks. + max_time_img (float): The maximum time (seconds) for processing one image. + max_nms (int): The maximum number of boxes into torchvision.ops.nms(). + max_wh (int): The maximum box width and height in pixels + + Returns: + (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of + shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns + (x1, y1, x2, y2, confidence, class, mask1, mask2, ...). + """ + + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out) + prediction = prediction[0] # select only inference output + + bs = prediction.shape[0] # batch size + nc = nc or (prediction.shape[1] - 4) # number of classes + nm = prediction.shape[1] - nc - 4 + mi = 4 + nc # mask start index + xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates + + # Settings + # min_wh = 2 # (pixels) minimum box width and height + time_limit = 0.5 + max_time_img * bs # seconds to quit after + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + + prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84) + prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy + + t = time.time() + output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + nm + 4), device=x.device) + v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box + v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Detections matrix nx6 (xyxy, conf, cls) + box, cls, mask = x.split((4, nc, nm), 1) + + if multi_label: + i, j = torch.where(cls > conf_thres) + x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1) + else: # best class only + conf, j = cls.max(1, keepdim=True) + x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + if n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + i = i[:max_det] # limit detections + + # # Experimental + # merge = False # use merge-NMS + # if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + # from .metrics import box_iou + # iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + # weights = iou * scores[None] # box weights + # x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + # redundant = True # require redundant detections + # if redundant: + # i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + + return output + + +def clip_boxes(boxes, shape): + """ + Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape. + + Args: + boxes (torch.Tensor): the bounding boxes to clip + shape (tuple): the shape of the image + + Returns: + (torch.Tensor | numpy.ndarray): Clipped boxes + """ + if isinstance(boxes, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug) + boxes[..., 0] = boxes[..., 0].clamp(0, shape[1]) # x1 + boxes[..., 1] = boxes[..., 1].clamp(0, shape[0]) # y1 + boxes[..., 2] = boxes[..., 2].clamp(0, shape[1]) # x2 + boxes[..., 3] = boxes[..., 3].clamp(0, shape[0]) # y2 + else: # np.array (faster grouped) + boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2 + return boxes + + +def clip_coords(coords, shape): + """ + Clip line coordinates to the image boundaries. + + Args: + coords (torch.Tensor | numpy.ndarray): A list of line coordinates. + shape (tuple): A tuple of integers representing the size of the image in the format (height, width). + + Returns: + (torch.Tensor | numpy.ndarray): Clipped coordinates + """ + if isinstance(coords, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug) + coords[..., 0] = coords[..., 0].clamp(0, shape[1]) # x + coords[..., 1] = coords[..., 1].clamp(0, shape[0]) # y + else: # np.array (faster grouped) + coords[..., 0] = coords[..., 0].clip(0, shape[1]) # x + coords[..., 1] = coords[..., 1].clip(0, shape[0]) # y + return coords + + +def scale_image(masks, im0_shape, ratio_pad=None): + """ + Takes a mask, and resizes it to the original image size. + + Args: + masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3]. + im0_shape (tuple): the original image shape + ratio_pad (tuple): the ratio of the padding to the original image. + + Returns: + masks (torch.Tensor): The masks that are being returned. + """ + # Rescale coordinates (xyxy) from im1_shape to im0_shape + im1_shape = masks.shape + if im1_shape[:2] == im0_shape[:2]: + return masks + if ratio_pad is None: # calculate from im0_shape + gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new + pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + top, left = (int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))) # y, x + bottom, right = (int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1))) + + if len(masks.shape) < 2: + raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') + masks = masks[top:bottom, left:right] + masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) + if len(masks.shape) == 2: + masks = masks[:, :, None] + + return masks + + +def xyxy2xywh(x): + """ + Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is the + top-left corner and (x2, y2) is the bottom-right corner. + + Args: + x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format. + + Returns: + y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format. + """ + assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}' + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center + y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center + y[..., 2] = x[..., 2] - x[..., 0] # width + y[..., 3] = x[..., 3] - x[..., 1] # height + return y + + +def xywh2xyxy(x): + """ + Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the + top-left corner and (x2, y2) is the bottom-right corner. + + Args: + x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format. + + Returns: + y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. + """ + assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}' + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + dw = x[..., 2] / 2 # half-width + dh = x[..., 3] / 2 # half-height + y[..., 0] = x[..., 0] - dw # top left x + y[..., 1] = x[..., 1] - dh # top left y + y[..., 2] = x[..., 0] + dw # bottom right x + y[..., 3] = x[..., 1] + dh # bottom right y + return y + + +def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): + """ + Convert normalized bounding box coordinates to pixel coordinates. + + Args: + x (np.ndarray | torch.Tensor): The bounding box coordinates. + w (int): Width of the image. Defaults to 640 + h (int): Height of the image. Defaults to 640 + padw (int): Padding width. Defaults to 0 + padh (int): Padding height. Defaults to 0 + Returns: + y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where + x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box. + """ + assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}' + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x + y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y + y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x + y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh # bottom right y + return y + + +def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): + """ + Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y, + width and height are normalized to image dimensions. + + Args: + x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format. + w (int): The width of the image. Defaults to 640 + h (int): The height of the image. Defaults to 640 + clip (bool): If True, the boxes will be clipped to the image boundaries. Defaults to False + eps (float): The minimum value of the box's width and height. Defaults to 0.0 + + Returns: + y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height, normalized) format + """ + if clip: + x = clip_boxes(x, (h - eps, w - eps)) + assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}' + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center + y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center + y[..., 2] = (x[..., 2] - x[..., 0]) / w # width + y[..., 3] = (x[..., 3] - x[..., 1]) / h # height + return y + + +def xywh2ltwh(x): + """ + Convert the bounding box format from [x, y, w, h] to [x1, y1, w, h], where x1, y1 are the top-left coordinates. + + Args: + x (np.ndarray | torch.Tensor): The input tensor with the bounding box coordinates in the xywh format + + Returns: + y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x + y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y + return y + + +def xyxy2ltwh(x): + """ + Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right. + + Args: + x (np.ndarray | torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format + + Returns: + y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 2] = x[..., 2] - x[..., 0] # width + y[..., 3] = x[..., 3] - x[..., 1] # height + return y + + +def ltwh2xywh(x): + """ + Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center. + + Args: + x (torch.Tensor): the input tensor + + Returns: + y (np.ndarray | torch.Tensor): The bounding box coordinates in the xywh format. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] + x[..., 2] / 2 # center x + y[..., 1] = x[..., 1] + x[..., 3] / 2 # center y + return y + + +def xyxyxyxy2xywhr(corners): + """ + Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation]. + + Args: + corners (numpy.ndarray | torch.Tensor): Input corners of shape (n, 8). + + Returns: + (numpy.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format of shape (n, 5). + """ + is_numpy = isinstance(corners, np.ndarray) + atan2, sqrt = (np.arctan2, np.sqrt) if is_numpy else (torch.atan2, torch.sqrt) + + x1, y1, x2, y2, x3, y3, x4, y4 = corners.T + cx = (x1 + x3) / 2 + cy = (y1 + y3) / 2 + dx21 = x2 - x1 + dy21 = y2 - y1 + + w = sqrt(dx21 ** 2 + dy21 ** 2) + h = sqrt((x2 - x3) ** 2 + (y2 - y3) ** 2) + + rotation = atan2(-dy21, dx21) + rotation *= 180.0 / math.pi # radians to degrees + + return np.vstack((cx, cy, w, h, rotation)).T if is_numpy else torch.stack((cx, cy, w, h, rotation), dim=1) + + +def xywhr2xyxyxyxy(center): + """ + Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4]. + + Args: + center (numpy.ndarray | torch.Tensor): Input data in [cx, cy, w, h, rotation] format of shape (n, 5). + + Returns: + (numpy.ndarray | torch.Tensor): Converted corner points of shape (n, 8). + """ + is_numpy = isinstance(center, np.ndarray) + cos, sin = (np.cos, np.sin) if is_numpy else (torch.cos, torch.sin) + + cx, cy, w, h, rotation = center.T + rotation *= math.pi / 180.0 # degrees to radians + + dx = w / 2 + dy = h / 2 + + cos_rot = cos(rotation) + sin_rot = sin(rotation) + dx_cos_rot = dx * cos_rot + dx_sin_rot = dx * sin_rot + dy_cos_rot = dy * cos_rot + dy_sin_rot = dy * sin_rot + + x1 = cx - dx_cos_rot - dy_sin_rot + y1 = cy + dx_sin_rot - dy_cos_rot + x2 = cx + dx_cos_rot - dy_sin_rot + y2 = cy - dx_sin_rot - dy_cos_rot + x3 = cx + dx_cos_rot + dy_sin_rot + y3 = cy - dx_sin_rot + dy_cos_rot + x4 = cx - dx_cos_rot + dy_sin_rot + y4 = cy + dx_sin_rot + dy_cos_rot + + return np.vstack((x1, y1, x2, y2, x3, y3, x4, y4)).T if is_numpy else torch.stack( + (x1, y1, x2, y2, x3, y3, x4, y4), dim=1) + + +def ltwh2xyxy(x): + """ + It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right. + + Args: + x (np.ndarray | torch.Tensor): the input image + + Returns: + y (np.ndarray | torch.Tensor): the xyxy coordinates of the bounding boxes. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 2] = x[..., 2] + x[..., 0] # width + y[..., 3] = x[..., 3] + x[..., 1] # height + return y + + +def segments2boxes(segments): + """ + It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) + + Args: + segments (list): list of segments, each segment is a list of points, each point is a list of x, y coordinates + + Returns: + (np.ndarray): the xywh coordinates of the bounding boxes. + """ + boxes = [] + for s in segments: + x, y = s.T # segment xy + boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy + return xyxy2xywh(np.array(boxes)) # cls, xywh + + +def resample_segments(segments, n=1000): + """ + Inputs a list of segments (n,2) and returns a list of segments (n,2) up-sampled to n points each. + + Args: + segments (list): a list of (n,2) arrays, where n is the number of points in the segment. + n (int): number of points to resample the segment to. Defaults to 1000 + + Returns: + segments (list): the resampled segments. + """ + for i, s in enumerate(segments): + s = np.concatenate((s, s[0:1, :]), axis=0) + x = np.linspace(0, len(s) - 1, n) + xp = np.arange(len(s)) + segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], + dtype=np.float32).reshape(2, -1).T # segment xy + return segments + + +def crop_mask(masks, boxes): + """ + It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box. + + Args: + masks (torch.Tensor): [n, h, w] tensor of masks + boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form + + Returns: + (torch.Tensor): The masks are being cropped to the bounding box. + """ + n, h, w = masks.shape + x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1) + r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w) + c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1) + + return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) + + +def process_mask_upsample(protos, masks_in, bboxes, shape): + """ + Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality + but is slower. + + Args: + protos (torch.Tensor): [mask_dim, mask_h, mask_w] + masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms + bboxes (torch.Tensor): [n, 4], n is number of masks after nms + shape (tuple): the size of the input image (h,w) + + Returns: + (torch.Tensor): The upsampled masks. + """ + c, mh, mw = protos.shape # CHW + masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) + masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW + masks = crop_mask(masks, bboxes) # CHW + return masks.gt_(0.5) + + +def process_mask(protos, masks_in, bboxes, shape, upsample=False): + """ + Apply masks to bounding boxes using the output of the mask head. + + Args: + protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w]. + masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS. + bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS. + shape (tuple): A tuple of integers representing the size of the input image in the format (h, w). + upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False. + + Returns: + (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w + are the height and width of the input image. The mask is applied to the bounding boxes. + """ + + c, mh, mw = protos.shape # CHW + ih, iw = shape + masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW + + downsampled_bboxes = bboxes.clone() + downsampled_bboxes[:, 0] *= mw / iw + downsampled_bboxes[:, 2] *= mw / iw + downsampled_bboxes[:, 3] *= mh / ih + downsampled_bboxes[:, 1] *= mh / ih + + masks = crop_mask(masks, downsampled_bboxes) # CHW + if upsample: + masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW + return masks.gt_(0.5) + + +def process_mask_native(protos, masks_in, bboxes, shape): + """ + It takes the output of the mask head, and crops it after upsampling to the bounding boxes. + + Args: + protos (torch.Tensor): [mask_dim, mask_h, mask_w] + masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms + bboxes (torch.Tensor): [n, 4], n is number of masks after nms + shape (tuple): the size of the input image (h,w) + + Returns: + masks (torch.Tensor): The returned masks with dimensions [h, w, n] + """ + c, mh, mw = protos.shape # CHW + masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) + masks = scale_masks(masks[None], shape)[0] # CHW + masks = crop_mask(masks, bboxes) # CHW + return masks.gt_(0.5) + + +def scale_masks(masks, shape, padding=True): + """ + Rescale segment masks to shape. + + Args: + masks (torch.Tensor): (N, C, H, W). + shape (tuple): Height and width. + padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular + rescaling. + """ + mh, mw = masks.shape[2:] + gain = min(mh / shape[0], mw / shape[1]) # gain = old / new + pad = [mw - shape[1] * gain, mh - shape[0] * gain] # wh padding + if padding: + pad[0] /= 2 + pad[1] /= 2 + top, left = (int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))) if padding else (0, 0) # y, x + bottom, right = (int(round(mh - pad[1] + 0.1)), int(round(mw - pad[0] + 0.1))) + masks = masks[..., top:bottom, left:right] + + masks = F.interpolate(masks, shape, mode='bilinear', align_corners=False) # NCHW + return masks + + +def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True): + """ + Rescale segment coordinates (xy) from img1_shape to img0_shape. + + Args: + img1_shape (tuple): The shape of the image that the coords are from. + coords (torch.Tensor): the coords to be scaled of shape n,2. + img0_shape (tuple): the shape of the image that the segmentation is being applied to. + ratio_pad (tuple): the ratio of the image size to the padded image size. + normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False. + padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular + rescaling. + + Returns: + coords (torch.Tensor): The scaled coordinates. + """ + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + if padding: + coords[..., 0] -= pad[0] # x padding + coords[..., 1] -= pad[1] # y padding + coords[..., 0] /= gain + coords[..., 1] /= gain + coords = clip_coords(coords, img0_shape) + if normalize: + coords[..., 0] /= img0_shape[1] # width + coords[..., 1] /= img0_shape[0] # height + return coords + + +def masks2segments(masks, strategy='largest'): + """ + It takes a list of masks(n,h,w) and returns a list of segments(n,xy) + + Args: + masks (torch.Tensor): the output of the model, which is a tensor of shape (batch_size, 160, 160) + strategy (str): 'concat' or 'largest'. Defaults to largest + + Returns: + segments (List): list of segment masks + """ + segments = [] + for x in masks.int().cpu().numpy().astype('uint8'): + c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] + if c: + if strategy == 'concat': # concatenate all segments + c = np.concatenate([x.reshape(-1, 2) for x in c]) + elif strategy == 'largest': # select largest segment + c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) + else: + c = np.zeros((0, 2)) # no segments found + segments.append(c.astype('float32')) + return segments + + +def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray: + """ + Convert a batch of FP32 torch tensors (0.0-1.0) to a NumPy uint8 array (0-255), changing from BCHW to BHWC layout. + + Args: + batch (torch.Tensor): Input tensor batch of shape (Batch, Channels, Height, Width) and dtype torch.float32. + + Returns: + (np.ndarray): Output NumPy array batch of shape (Batch, Height, Width, Channels) and dtype uint8. + """ + return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).to(torch.uint8).cpu().numpy() + + +def clean_str(s): + """ + Cleans a string by replacing special characters with underscore _ + + Args: + s (str): a string needing special characters replaced + + Returns: + (str): a string with special characters replaced by an underscore _ + """ + return re.sub(pattern='[|@#!¡·$€%&()=?¿^*;:,¨´><+]', repl='_', string=s) diff --git a/ultralytics/utils/patches.py b/ultralytics/utils/patches.py new file mode 100644 index 0000000..541cf45 --- /dev/null +++ b/ultralytics/utils/patches.py @@ -0,0 +1,77 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +"""Monkey patches to update/extend functionality of existing functions.""" + +from pathlib import Path + +import cv2 +import numpy as np +import torch + +# OpenCV Multilanguage-friendly functions ------------------------------------------------------------------------------ +_imshow = cv2.imshow # copy to avoid recursion errors + + +def imread(filename: str, flags: int = cv2.IMREAD_COLOR): + """ + Read an image from a file. + + Args: + filename (str): Path to the file to read. + flags (int, optional): Flag that can take values of cv2.IMREAD_*. Defaults to cv2.IMREAD_COLOR. + + Returns: + (np.ndarray): The read image. + """ + return cv2.imdecode(np.fromfile(filename, np.uint8), flags) + + +def imwrite(filename: str, img: np.ndarray, params=None): + """ + Write an image to a file. + + Args: + filename (str): Path to the file to write. + img (np.ndarray): Image to write. + params (list of ints, optional): Additional parameters. See OpenCV documentation. + + Returns: + (bool): True if the file was written, False otherwise. + """ + try: + cv2.imencode(Path(filename).suffix, img, params)[1].tofile(filename) + return True + except Exception: + return False + + +def imshow(winname: str, mat: np.ndarray): + """ + Displays an image in the specified window. + + Args: + winname (str): Name of the window. + mat (np.ndarray): Image to be shown. + """ + _imshow(winname.encode('unicode_escape').decode(), mat) + + +# PyTorch functions ---------------------------------------------------------------------------------------------------- +_torch_save = torch.save # copy to avoid recursion errors + + +def torch_save(*args, **kwargs): + """ + Use dill (if exists) to serialize the lambda functions where pickle does not do this. + + Args: + *args (tuple): Positional arguments to pass to torch.save. + **kwargs (dict): Keyword arguments to pass to torch.save. + """ + try: + import dill as pickle # noqa + except ImportError: + import pickle + + if 'pickle_module' not in kwargs: + kwargs['pickle_module'] = pickle # noqa + return _torch_save(*args, **kwargs) diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py new file mode 100644 index 0000000..9a41a94 --- /dev/null +++ b/ultralytics/utils/plotting.py @@ -0,0 +1,688 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import contextlib +import math +import warnings +from pathlib import Path + +import cv2 +import matplotlib.pyplot as plt +import numpy as np +import torch +from PIL import Image, ImageDraw, ImageFont +from PIL import __version__ as pil_version + +from ultralytics.utils import LOGGER, TryExcept, ops, plt_settings, threaded + +from .checks import check_font, check_version, is_ascii +from .files import increment_path + + +class Colors: + """ + Ultralytics default color palette https://ultralytics.com/. + + This class provides methods to work with the Ultralytics color palette, including converting hex color codes to + RGB values. + + Attributes: + palette (list of tuple): List of RGB color values. + n (int): The number of colors in the palette. + pose_palette (np.array): A specific color palette array with dtype np.uint8. + """ + + def __init__(self): + """Initialize colors as hex = matplotlib.colors.TABLEAU_COLORS.values().""" + hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB', + '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') + self.palette = [self.hex2rgb(f'#{c}') for c in hexs] + self.n = len(self.palette) + self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255], + [153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255], + [255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102], + [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]], + dtype=np.uint8) + + def __call__(self, i, bgr=False): + """Converts hex color codes to RGB values.""" + c = self.palette[int(i) % self.n] + return (c[2], c[1], c[0]) if bgr else c + + @staticmethod + def hex2rgb(h): + """Converts hex color codes to RGB values (i.e. default PIL order).""" + return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) + + +colors = Colors() # create instance for 'from utils.plots import colors' + + +class Annotator: + """ + Ultralytics Annotator for train/val mosaics and JPGs and predictions annotations. + + Attributes: + im (Image.Image or numpy array): The image to annotate. + pil (bool): Whether to use PIL or cv2 for drawing annotations. + font (ImageFont.truetype or ImageFont.load_default): Font used for text annotations. + lw (float): Line width for drawing. + skeleton (List[List[int]]): Skeleton structure for keypoints. + limb_color (List[int]): Color palette for limbs. + kpt_color (List[int]): Color palette for keypoints. + """ + + def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'): + """Initialize the Annotator class with image and line width along with color palette for keypoints and limbs.""" + assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.' + non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic + self.pil = pil or non_ascii + self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width + if self.pil: # use PIL + self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) + self.draw = ImageDraw.Draw(self.im) + try: + font = check_font('Arial.Unicode.ttf' if non_ascii else font) + size = font_size or max(round(sum(self.im.size) / 2 * 0.035), 12) + self.font = ImageFont.truetype(str(font), size) + except Exception: + self.font = ImageFont.load_default() + # Deprecation fix for w, h = getsize(string) -> _, _, w, h = getbox(string) + if check_version(pil_version, '9.2.0'): + self.font.getsize = lambda x: self.font.getbbox(x)[2:4] # text width, height + else: # use cv2 + self.im = im + self.tf = max(self.lw - 1, 1) # font thickness + self.sf = self.lw / 3 # font scale + # Pose + self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], + [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]] + + self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]] + self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]] + + def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): + """Add one xyxy box to image with label.""" + if isinstance(box, torch.Tensor): + box = box.tolist() + if self.pil or not is_ascii(label): + self.draw.rectangle(box, width=self.lw, outline=color) # box + if label: + w, h = self.font.getsize(label) # text width, height + outside = box[1] - h >= 0 # label fits outside box + self.draw.rectangle( + (box[0], box[1] - h if outside else box[1], box[0] + w + 1, + box[1] + 1 if outside else box[1] + h + 1), + fill=color, + ) + # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 + self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font) + else: # cv2 + p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) + cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA) + if label: + w, h = cv2.getTextSize(label, 0, fontScale=self.sf, thickness=self.tf)[0] # text width, height + outside = p1[1] - h >= 3 + p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 + cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled + cv2.putText(self.im, + label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), + 0, + self.sf, + txt_color, + thickness=self.tf, + lineType=cv2.LINE_AA) + + def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False): + """ + Plot masks on image. + + Args: + masks (tensor): Predicted masks on cuda, shape: [n, h, w] + colors (List[List[Int]]): Colors for predicted masks, [[r, g, b] * n] + im_gpu (tensor): Image is in cuda, shape: [3, h, w], range: [0, 1] + alpha (float): Mask transparency: 0.0 fully transparent, 1.0 opaque + retina_masks (bool): Whether to use high resolution masks or not. Defaults to False. + """ + if self.pil: + # Convert to numpy first + self.im = np.asarray(self.im).copy() + if len(masks) == 0: + self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255 + if im_gpu.device != masks.device: + im_gpu = im_gpu.to(masks.device) + colors = torch.tensor(colors, device=masks.device, dtype=torch.float32) / 255.0 # shape(n,3) + colors = colors[:, None, None] # shape(n,1,1,3) + masks = masks.unsqueeze(3) # shape(n,h,w,1) + masks_color = masks * (colors * alpha) # shape(n,h,w,3) + + inv_alpha_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1) + mcs = masks_color.max(dim=0).values # shape(n,h,w,3) + + im_gpu = im_gpu.flip(dims=[0]) # flip channel + im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3) + im_gpu = im_gpu * inv_alpha_masks[-1] + mcs + im_mask = (im_gpu * 255) + im_mask_np = im_mask.byte().cpu().numpy() + self.im[:] = im_mask_np if retina_masks else ops.scale_image(im_mask_np, self.im.shape) + if self.pil: + # Convert im back to PIL and update draw + self.fromarray(self.im) + + def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True): + """ + Plot keypoints on the image. + + Args: + kpts (tensor): Predicted keypoints with shape [17, 3]. Each keypoint has (x, y, confidence). + shape (tuple): Image shape as a tuple (h, w), where h is the height and w is the width. + radius (int, optional): Radius of the drawn keypoints. Default is 5. + kpt_line (bool, optional): If True, the function will draw lines connecting keypoints + for human pose. Default is True. + + Note: `kpt_line=True` currently only supports human pose plotting. + """ + if self.pil: + # Convert to numpy first + self.im = np.asarray(self.im).copy() + nkpt, ndim = kpts.shape + is_pose = nkpt == 17 and ndim == 3 + kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting + for i, k in enumerate(kpts): + color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i) + x_coord, y_coord = k[0], k[1] + if x_coord % shape[1] != 0 and y_coord % shape[0] != 0: + if len(k) == 3: + conf = k[2] + if conf < 0.5: + continue + cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, color_k, -1, lineType=cv2.LINE_AA) + + if kpt_line: + ndim = kpts.shape[-1] + for i, sk in enumerate(self.skeleton): + pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1])) + pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1])) + if ndim == 3: + conf1 = kpts[(sk[0] - 1), 2] + conf2 = kpts[(sk[1] - 1), 2] + if conf1 < 0.5 or conf2 < 0.5: + continue + if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0: + continue + if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0: + continue + cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[i]], thickness=2, lineType=cv2.LINE_AA) + if self.pil: + # Convert im back to PIL and update draw + self.fromarray(self.im) + + def rectangle(self, xy, fill=None, outline=None, width=1): + """Add rectangle to image (PIL-only).""" + self.draw.rectangle(xy, fill, outline, width) + + def text(self, xy, text, txt_color=(255, 255, 255), anchor='top', box_style=False): + """Adds text to an image using PIL or cv2.""" + if anchor == 'bottom': # start y from font bottom + w, h = self.font.getsize(text) # text width, height + xy[1] += 1 - h + if self.pil: + if box_style: + w, h = self.font.getsize(text) + self.draw.rectangle((xy[0], xy[1], xy[0] + w + 1, xy[1] + h + 1), fill=txt_color) + # Using `txt_color` for background and draw fg with white color + txt_color = (255, 255, 255) + if '\n' in text: + lines = text.split('\n') + _, h = self.font.getsize(text) + for line in lines: + self.draw.text(xy, line, fill=txt_color, font=self.font) + xy[1] += h + else: + self.draw.text(xy, text, fill=txt_color, font=self.font) + else: + if box_style: + w, h = cv2.getTextSize(text, 0, fontScale=self.sf, thickness=self.tf)[0] # text width, height + outside = xy[1] - h >= 3 + p2 = xy[0] + w, xy[1] - h - 3 if outside else xy[1] + h + 3 + cv2.rectangle(self.im, xy, p2, txt_color, -1, cv2.LINE_AA) # filled + # Using `txt_color` for background and draw fg with white color + txt_color = (255, 255, 255) + cv2.putText(self.im, text, xy, 0, self.sf, txt_color, thickness=self.tf, lineType=cv2.LINE_AA) + + def fromarray(self, im): + """Update self.im from a numpy array.""" + self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) + self.draw = ImageDraw.Draw(self.im) + + def result(self): + """Return annotated image as array.""" + return np.asarray(self.im) + + +@TryExcept() # known issue https://github.com/ultralytics/yolov5/issues/5395 +@plt_settings() +def plot_labels(boxes, cls, names=(), save_dir=Path(''), on_plot=None): + """Plot training labels including class histograms and box statistics.""" + import pandas as pd + import seaborn as sn + + # Filter matplotlib>=3.7.2 warning and Seaborn use_inf and is_categorical FutureWarnings + warnings.filterwarnings('ignore', category=UserWarning, message='The figure layout has changed to tight') + warnings.filterwarnings('ignore', category=FutureWarning) + + # Plot dataset labels + LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ") + nc = int(cls.max() + 1) # number of classes + boxes = boxes[:1000000] # limit to 1M boxes + x = pd.DataFrame(boxes, columns=['x', 'y', 'width', 'height']) + + # Seaborn correlogram + sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9)) + plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200) + plt.close() + + # Matplotlib labels + ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel() + y = ax[0].hist(cls, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) + for i in range(nc): + y[2].patches[i].set_color([x / 255 for x in colors(i)]) + ax[0].set_ylabel('instances') + if 0 < len(names) < 30: + ax[0].set_xticks(range(len(names))) + ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10) + else: + ax[0].set_xlabel('classes') + sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9) + sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9) + + # Rectangles + boxes[:, 0:2] = 0.5 # center + boxes = ops.xywh2xyxy(boxes) * 1000 + img = Image.fromarray(np.ones((1000, 1000, 3), dtype=np.uint8) * 255) + for cls, box in zip(cls[:500], boxes[:500]): + ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot + ax[1].imshow(img) + ax[1].axis('off') + + for a in [0, 1, 2, 3]: + for s in ['top', 'right', 'left', 'bottom']: + ax[a].spines[s].set_visible(False) + + fname = save_dir / 'labels.jpg' + plt.savefig(fname, dpi=200) + plt.close() + if on_plot: + on_plot(fname) + + +def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True): + """ + Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop. + + This function takes a bounding box and an image, and then saves a cropped portion of the image according + to the bounding box. Optionally, the crop can be squared, and the function allows for gain and padding + adjustments to the bounding box. + + Args: + xyxy (torch.Tensor or list): A tensor or list representing the bounding box in xyxy format. + im (numpy.ndarray): The input image. + file (Path, optional): The path where the cropped image will be saved. Defaults to 'im.jpg'. + gain (float, optional): A multiplicative factor to increase the size of the bounding box. Defaults to 1.02. + pad (int, optional): The number of pixels to add to the width and height of the bounding box. Defaults to 10. + square (bool, optional): If True, the bounding box will be transformed into a square. Defaults to False. + BGR (bool, optional): If True, the image will be saved in BGR format, otherwise in RGB. Defaults to False. + save (bool, optional): If True, the cropped image will be saved to disk. Defaults to True. + + Returns: + (numpy.ndarray): The cropped image. + + Example: + ```python + from ultralytics.utils.plotting import save_one_box + + xyxy = [50, 50, 150, 150] + im = cv2.imread('image.jpg') + cropped_im = save_one_box(xyxy, im, file='cropped.jpg', square=True) + ``` + """ + + if not isinstance(xyxy, torch.Tensor): # may be list + xyxy = torch.stack(xyxy) + b = ops.xyxy2xywh(xyxy.view(-1, 4)) # boxes + if square: + b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square + b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad + xyxy = ops.xywh2xyxy(b).long() + xyxy = ops.clip_boxes(xyxy, im.shape) + crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)] + if save: + file.parent.mkdir(parents=True, exist_ok=True) # make directory + f = str(increment_path(file).with_suffix('.jpg')) + # cv2.imwrite(f, crop) # save BGR, https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue + Image.fromarray(crop[..., ::-1]).save(f, quality=95, subsampling=0) # save RGB + return crop + + +@threaded +def plot_images(images, + batch_idx, + cls, + bboxes=np.zeros(0, dtype=np.float32), + masks=np.zeros(0, dtype=np.uint8), + kpts=np.zeros((0, 51), dtype=np.float32), + paths=None, + fname='images.jpg', + names=None, + on_plot=None): + """Plot image grid with labels.""" + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(cls, torch.Tensor): + cls = cls.cpu().numpy() + if isinstance(bboxes, torch.Tensor): + bboxes = bboxes.cpu().numpy() + if isinstance(masks, torch.Tensor): + masks = masks.cpu().numpy().astype(int) + if isinstance(kpts, torch.Tensor): + kpts = kpts.cpu().numpy() + if isinstance(batch_idx, torch.Tensor): + batch_idx = batch_idx.cpu().numpy() + + max_size = 1920 # max image size + max_subplots = 16 # max image subplots, i.e. 4x4 + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + if np.max(images[0]) <= 1: + images *= 255 # de-normalise (optional) + + # Build Image + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, im in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + im = im.transpose(1, 2, 0) + mosaic[y:y + h, x:x + w, :] = im + + # Resize (optional) + scale = max_size / ns / max(h, w) + if scale < 1: + h = math.ceil(scale * h) + w = math.ceil(scale * w) + mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) + + # Annotate + fs = int((h + w) * ns * 0.01) # font size + annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) + for i in range(i + 1): + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders + if paths: + annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames + if len(cls) > 0: + idx = batch_idx == i + classes = cls[idx].astype('int') + + if len(bboxes): + boxes = ops.xywh2xyxy(bboxes[idx, :4]).T + labels = bboxes.shape[1] == 4 # labels if no conf column + conf = None if labels else bboxes[idx, 4] # check for confidence presence (label vs pred) + + if boxes.shape[1]: + if boxes.max() <= 1.01: # if normalized with tolerance 0.01 + boxes[[0, 2]] *= w # scale to pixels + boxes[[1, 3]] *= h + elif scale < 1: # absolute coords need scale if image scales + boxes *= scale + boxes[[0, 2]] += x + boxes[[1, 3]] += y + for j, box in enumerate(boxes.T.tolist()): + c = classes[j] + color = colors(c) + c = names.get(c, c) if names else c + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = f'{c}' if labels else f'{c} {conf[j]:.1f}' + annotator.box_label(box, label, color=color) + elif len(classes): + for c in classes: + color = colors(c) + c = names.get(c, c) if names else c + annotator.text((x, y), f'{c}', txt_color=color, box_style=True) + + # Plot keypoints + if len(kpts): + kpts_ = kpts[idx].copy() + if len(kpts_): + if kpts_[..., 0].max() <= 1.01 or kpts_[..., 1].max() <= 1.01: # if normalized with tolerance .01 + kpts_[..., 0] *= w # scale to pixels + kpts_[..., 1] *= h + elif scale < 1: # absolute coords need scale if image scales + kpts_ *= scale + kpts_[..., 0] += x + kpts_[..., 1] += y + for j in range(len(kpts_)): + if labels or conf[j] > 0.25: # 0.25 conf thresh + annotator.kpts(kpts_[j]) + + # Plot masks + if len(masks): + if idx.shape[0] == masks.shape[0]: # overlap_masks=False + image_masks = masks[idx] + else: # overlap_masks=True + image_masks = masks[[i]] # (1, 640, 640) + nl = idx.sum() + index = np.arange(nl).reshape((nl, 1, 1)) + 1 + image_masks = np.repeat(image_masks, nl, axis=0) + image_masks = np.where(image_masks == index, 1.0, 0.0) + + im = np.asarray(annotator.im).copy() + for j, box in enumerate(boxes.T.tolist()): + if labels or conf[j] > 0.25: # 0.25 conf thresh + color = colors(classes[j]) + mh, mw = image_masks[j].shape + if mh != h or mw != w: + mask = image_masks[j].astype(np.uint8) + mask = cv2.resize(mask, (w, h)) + mask = mask.astype(bool) + else: + mask = image_masks[j].astype(bool) + with contextlib.suppress(Exception): + im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 + annotator.fromarray(im) + annotator.im.save(fname) # save + if on_plot: + on_plot(fname) + + +@plt_settings() +def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False, classify=False, on_plot=None): + """ + Plot training results from a results CSV file. The function supports various types of data including segmentation, + pose estimation, and classification. Plots are saved as 'results.png' in the directory where the CSV is located. + + Args: + file (str, optional): Path to the CSV file containing the training results. Defaults to 'path/to/results.csv'. + dir (str, optional): Directory where the CSV file is located if 'file' is not provided. Defaults to ''. + segment (bool, optional): Flag to indicate if the data is for segmentation. Defaults to False. + pose (bool, optional): Flag to indicate if the data is for pose estimation. Defaults to False. + classify (bool, optional): Flag to indicate if the data is for classification. Defaults to False. + on_plot (callable, optional): Callback function to be executed after plotting. Takes filename as an argument. + Defaults to None. + + Example: + ```python + from ultralytics.utils.plotting import plot_results + + plot_results('path/to/results.csv', segment=True) + ``` + """ + import pandas as pd + from scipy.ndimage import gaussian_filter1d + save_dir = Path(file).parent if file else Path(dir) + if classify: + fig, ax = plt.subplots(2, 2, figsize=(6, 6), tight_layout=True) + index = [1, 4, 2, 3] + elif segment: + fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) + index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12] + elif pose: + fig, ax = plt.subplots(2, 9, figsize=(21, 6), tight_layout=True) + index = [1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 18, 8, 9, 12, 13] + else: + fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) + index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7] + ax = ax.ravel() + files = list(save_dir.glob('results*.csv')) + assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.' + for f in files: + try: + data = pd.read_csv(f) + s = [x.strip() for x in data.columns] + x = data.values[:, 0] + for i, j in enumerate(index): + y = data.values[:, j].astype('float') + # y[y == 0] = np.nan # don't show zero values + ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8) # actual results + ax[i].plot(x, gaussian_filter1d(y, sigma=3), ':', label='smooth', linewidth=2) # smoothing line + ax[i].set_title(s[j], fontsize=12) + # if j in [8, 9, 10]: # share train and val loss y axes + # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + LOGGER.warning(f'WARNING: Plotting error for {f}: {e}') + ax[1].legend() + fname = save_dir / 'results.png' + fig.savefig(fname, dpi=200) + plt.close() + if on_plot: + on_plot(fname) + + +def plt_color_scatter(v, f, bins=20, cmap='viridis', alpha=0.8, edgecolors='none'): + """ + Plots a scatter plot with points colored based on a 2D histogram. + + Args: + v (array-like): Values for the x-axis. + f (array-like): Values for the y-axis. + bins (int, optional): Number of bins for the histogram. Defaults to 20. + cmap (str, optional): Colormap for the scatter plot. Defaults to 'viridis'. + alpha (float, optional): Alpha for the scatter plot. Defaults to 0.8. + edgecolors (str, optional): Edge colors for the scatter plot. Defaults to 'none'. + + Examples: + >>> v = np.random.rand(100) + >>> f = np.random.rand(100) + >>> plt_color_scatter(v, f) + """ + + # Calculate 2D histogram and corresponding colors + hist, xedges, yedges = np.histogram2d(v, f, bins=bins) + colors = [ + hist[min(np.digitize(v[i], xedges, right=True) - 1, hist.shape[0] - 1), + min(np.digitize(f[i], yedges, right=True) - 1, hist.shape[1] - 1)] for i in range(len(v))] + + # Scatter plot + plt.scatter(v, f, c=colors, cmap=cmap, alpha=alpha, edgecolors=edgecolors) + + +def plot_tune_results(csv_file='tune_results.csv'): + """ + Plot the evolution results stored in an 'tune_results.csv' file. The function generates a scatter plot for each key + in the CSV, color-coded based on fitness scores. The best-performing configurations are highlighted on the plots. + + Args: + csv_file (str, optional): Path to the CSV file containing the tuning results. Defaults to 'tune_results.csv'. + + Examples: + >>> plot_tune_results('path/to/tune_results.csv') + """ + + import pandas as pd + from scipy.ndimage import gaussian_filter1d + + # Scatter plots for each hyperparameter + csv_file = Path(csv_file) + data = pd.read_csv(csv_file) + num_metrics_columns = 1 + keys = [x.strip() for x in data.columns][num_metrics_columns:] + x = data.values + fitness = x[:, 0] # fitness + j = np.argmax(fitness) # max fitness index + n = math.ceil(len(keys) ** 0.5) # columns and rows in plot + plt.figure(figsize=(10, 10), tight_layout=True) + for i, k in enumerate(keys): + v = x[:, i + num_metrics_columns] + mu = v[j] # best single result + plt.subplot(n, n, i + 1) + plt_color_scatter(v, fitness, cmap='viridis', alpha=.8, edgecolors='none') + plt.plot(mu, fitness.max(), 'k+', markersize=15) + plt.title(f'{k} = {mu:.3g}', fontdict={'size': 9}) # limit to 40 characters + plt.tick_params(axis='both', labelsize=8) # Set axis label size to 8 + if i % n != 0: + plt.yticks([]) + + file = csv_file.with_name('tune_scatter_plots.png') # filename + plt.savefig(file, dpi=200) + plt.close() + LOGGER.info(f'Saved {file}') + + # Fitness vs iteration + x = range(1, len(fitness) + 1) + plt.figure(figsize=(10, 6), tight_layout=True) + plt.plot(x, fitness, marker='o', linestyle='none', label='fitness') + plt.plot(x, gaussian_filter1d(fitness, sigma=3), ':', label='smoothed', linewidth=2) # smoothing line + plt.title('Fitness vs Iteration') + plt.xlabel('Iteration') + plt.ylabel('Fitness') + plt.grid(True) + plt.legend() + + file = csv_file.with_name('tune_fitness.png') # filename + plt.savefig(file, dpi=200) + plt.close() + LOGGER.info(f'Saved {file}') + + +def output_to_target(output, max_det=300): + """Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting.""" + targets = [] + for i, o in enumerate(output): + box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1) + j = torch.full((conf.shape[0], 1), i) + targets.append(torch.cat((j, cls, ops.xyxy2xywh(box), conf), 1)) + targets = torch.cat(targets, 0).numpy() + return targets[:, 0], targets[:, 1], targets[:, 2:] + + +def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')): + """ + Visualize feature maps of a given model module during inference. + + Args: + x (torch.Tensor): Features to be visualized. + module_type (str): Module type. + stage (int): Module stage within the model. + n (int, optional): Maximum number of feature maps to plot. Defaults to 32. + save_dir (Path, optional): Directory to save results. Defaults to Path('runs/detect/exp'). + """ + for m in ['Detect', 'Pose', 'Segment']: + if m in module_type: + return + batch, channels, height, width = x.shape # batch, channels, height, width + if height > 1 and width > 1: + f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename + + blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels + n = min(n, channels) # number of plots + fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols + ax = ax.ravel() + plt.subplots_adjust(wspace=0.05, hspace=0.05) + for i in range(n): + ax[i].imshow(blocks[i].squeeze()) # cmap='gray' + ax[i].axis('off') + + LOGGER.info(f'Saving {f}... ({n}/{channels})') + plt.savefig(f, dpi=300, bbox_inches='tight') + plt.close() + np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save diff --git a/ultralytics/utils/tal.py b/ultralytics/utils/tal.py new file mode 100644 index 0000000..8a8333f --- /dev/null +++ b/ultralytics/utils/tal.py @@ -0,0 +1,279 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import torch +import torch.nn as nn + +from .checks import check_version +from .metrics import bbox_iou + +TORCH_1_10 = check_version(torch.__version__, '1.10.0') + + +def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): + """ + Select the positive anchor center in gt. + + Args: + xy_centers (Tensor): shape(h*w, 2) + gt_bboxes (Tensor): shape(b, n_boxes, 4) + + Returns: + (Tensor): shape(b, n_boxes, h*w) + """ + n_anchors = xy_centers.shape[0] + bs, n_boxes, _ = gt_bboxes.shape + lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom + bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) + # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) + return bbox_deltas.amin(3).gt_(eps) + + +def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): + """ + If an anchor box is assigned to multiple gts, the one with the highest IoI will be selected. + + Args: + mask_pos (Tensor): shape(b, n_max_boxes, h*w) + overlaps (Tensor): shape(b, n_max_boxes, h*w) + + Returns: + target_gt_idx (Tensor): shape(b, h*w) + fg_mask (Tensor): shape(b, h*w) + mask_pos (Tensor): shape(b, n_max_boxes, h*w) + """ + # (b, n_max_boxes, h*w) -> (b, h*w) + fg_mask = mask_pos.sum(-2) + if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes + mask_multi_gts = (fg_mask.unsqueeze(1) > 1).expand(-1, n_max_boxes, -1) # (b, n_max_boxes, h*w) + max_overlaps_idx = overlaps.argmax(1) # (b, h*w) + + is_max_overlaps = torch.zeros(mask_pos.shape, dtype=mask_pos.dtype, device=mask_pos.device) + is_max_overlaps.scatter_(1, max_overlaps_idx.unsqueeze(1), 1) + + mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos).float() # (b, n_max_boxes, h*w) + fg_mask = mask_pos.sum(-2) + # Find each grid serve which gt(index) + target_gt_idx = mask_pos.argmax(-2) # (b, h*w) + return target_gt_idx, fg_mask, mask_pos + + +class TaskAlignedAssigner(nn.Module): + """ + A task-aligned assigner for object detection. + + This class assigns ground-truth (gt) objects to anchors based on the task-aligned metric, which combines both + classification and localization information. + + Attributes: + topk (int): The number of top candidates to consider. + num_classes (int): The number of object classes. + alpha (float): The alpha parameter for the classification component of the task-aligned metric. + beta (float): The beta parameter for the localization component of the task-aligned metric. + eps (float): A small value to prevent division by zero. + """ + + def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): + """Initialize a TaskAlignedAssigner object with customizable hyperparameters.""" + super().__init__() + self.topk = topk + self.num_classes = num_classes + self.bg_idx = num_classes + self.alpha = alpha + self.beta = beta + self.eps = eps + + @torch.no_grad() + def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): + """ + Compute the task-aligned assignment. Reference code is available at + https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py. + + Args: + pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) + pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) + anc_points (Tensor): shape(num_total_anchors, 2) + gt_labels (Tensor): shape(bs, n_max_boxes, 1) + gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) + mask_gt (Tensor): shape(bs, n_max_boxes, 1) + + Returns: + target_labels (Tensor): shape(bs, num_total_anchors) + target_bboxes (Tensor): shape(bs, num_total_anchors, 4) + target_scores (Tensor): shape(bs, num_total_anchors, num_classes) + fg_mask (Tensor): shape(bs, num_total_anchors) + target_gt_idx (Tensor): shape(bs, num_total_anchors) + """ + self.bs = pd_scores.size(0) + self.n_max_boxes = gt_bboxes.size(1) + + if self.n_max_boxes == 0: + device = gt_bboxes.device + return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), torch.zeros_like(pd_bboxes).to(device), + torch.zeros_like(pd_scores).to(device), torch.zeros_like(pd_scores[..., 0]).to(device), + torch.zeros_like(pd_scores[..., 0]).to(device)) + + mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, + mask_gt) + + target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) + + # Assigned target + target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) + + # Normalize + align_metric *= mask_pos + pos_align_metrics = align_metric.amax(dim=-1, keepdim=True) # b, max_num_obj + pos_overlaps = (overlaps * mask_pos).amax(dim=-1, keepdim=True) # b, max_num_obj + norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) + target_scores = target_scores * norm_align_metric + + return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx + + def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): + """Get in_gts mask, (b, max_num_obj, h*w).""" + mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) + # Get anchor_align metric, (b, max_num_obj, h*w) + align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes, mask_in_gts * mask_gt) + # Get topk_metric mask, (b, max_num_obj, h*w) + mask_topk = self.select_topk_candidates(align_metric, topk_mask=mask_gt.expand(-1, -1, self.topk).bool()) + # Merge all mask to a final mask, (b, max_num_obj, h*w) + mask_pos = mask_topk * mask_in_gts * mask_gt + + return mask_pos, align_metric, overlaps + + def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, mask_gt): + """Compute alignment metric given predicted and ground truth bounding boxes.""" + na = pd_bboxes.shape[-2] + mask_gt = mask_gt.bool() # b, max_num_obj, h*w + overlaps = torch.zeros([self.bs, self.n_max_boxes, na], dtype=pd_bboxes.dtype, device=pd_bboxes.device) + bbox_scores = torch.zeros([self.bs, self.n_max_boxes, na], dtype=pd_scores.dtype, device=pd_scores.device) + + ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj + ind[0] = torch.arange(end=self.bs).view(-1, 1).expand(-1, self.n_max_boxes) # b, max_num_obj + ind[1] = gt_labels.squeeze(-1) # b, max_num_obj + # Get the scores of each grid for each gt cls + bbox_scores[mask_gt] = pd_scores[ind[0], :, ind[1]][mask_gt] # b, max_num_obj, h*w + + # (b, max_num_obj, 1, 4), (b, 1, h*w, 4) + pd_boxes = pd_bboxes.unsqueeze(1).expand(-1, self.n_max_boxes, -1, -1)[mask_gt] + gt_boxes = gt_bboxes.unsqueeze(2).expand(-1, -1, na, -1)[mask_gt] + overlaps[mask_gt] = bbox_iou(gt_boxes, pd_boxes, xywh=False, CIoU=True).squeeze(-1).clamp_(0) + + align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) + return align_metric, overlaps + + def select_topk_candidates(self, metrics, largest=True, topk_mask=None): + """ + Select the top-k candidates based on the given metrics. + + Args: + metrics (Tensor): A tensor of shape (b, max_num_obj, h*w), where b is the batch size, + max_num_obj is the maximum number of objects, and h*w represents the + total number of anchor points. + largest (bool): If True, select the largest values; otherwise, select the smallest values. + topk_mask (Tensor): An optional boolean tensor of shape (b, max_num_obj, topk), where + topk is the number of top candidates to consider. If not provided, + the top-k values are automatically computed based on the given metrics. + + Returns: + (Tensor): A tensor of shape (b, max_num_obj, h*w) containing the selected top-k candidates. + """ + + # (b, max_num_obj, topk) + topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) + if topk_mask is None: + topk_mask = (topk_metrics.max(-1, keepdim=True)[0] > self.eps).expand_as(topk_idxs) + # (b, max_num_obj, topk) + topk_idxs.masked_fill_(~topk_mask, 0) + + # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) + count_tensor = torch.zeros(metrics.shape, dtype=torch.int8, device=topk_idxs.device) + ones = torch.ones_like(topk_idxs[:, :, :1], dtype=torch.int8, device=topk_idxs.device) + for k in range(self.topk): + # Expand topk_idxs for each value of k and add 1 at the specified positions + count_tensor.scatter_add_(-1, topk_idxs[:, :, k:k + 1], ones) + # count_tensor.scatter_add_(-1, topk_idxs, torch.ones_like(topk_idxs, dtype=torch.int8, device=topk_idxs.device)) + # Filter invalid bboxes + count_tensor.masked_fill_(count_tensor > 1, 0) + + return count_tensor.to(metrics.dtype) + + def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): + """ + Compute target labels, target bounding boxes, and target scores for the positive anchor points. + + Args: + gt_labels (Tensor): Ground truth labels of shape (b, max_num_obj, 1), where b is the + batch size and max_num_obj is the maximum number of objects. + gt_bboxes (Tensor): Ground truth bounding boxes of shape (b, max_num_obj, 4). + target_gt_idx (Tensor): Indices of the assigned ground truth objects for positive + anchor points, with shape (b, h*w), where h*w is the total + number of anchor points. + fg_mask (Tensor): A boolean tensor of shape (b, h*w) indicating the positive + (foreground) anchor points. + + Returns: + (Tuple[Tensor, Tensor, Tensor]): A tuple containing the following tensors: + - target_labels (Tensor): Shape (b, h*w), containing the target labels for + positive anchor points. + - target_bboxes (Tensor): Shape (b, h*w, 4), containing the target bounding boxes + for positive anchor points. + - target_scores (Tensor): Shape (b, h*w, num_classes), containing the target scores + for positive anchor points, where num_classes is the number + of object classes. + """ + + # Assigned target labels, (b, 1) + batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] + target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) + target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) + + # Assigned target boxes, (b, max_num_obj, 4) -> (b, h*w, 4) + target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] + + # Assigned target scores + target_labels.clamp_(0) + + # 10x faster than F.one_hot() + target_scores = torch.zeros((target_labels.shape[0], target_labels.shape[1], self.num_classes), + dtype=torch.int64, + device=target_labels.device) # (b, h*w, 80) + target_scores.scatter_(2, target_labels.unsqueeze(-1), 1) + + fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) + target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) + + return target_labels, target_bboxes, target_scores + + +def make_anchors(feats, strides, grid_cell_offset=0.5): + """Generate anchors from features.""" + anchor_points, stride_tensor = [], [] + assert feats is not None + dtype, device = feats[0].dtype, feats[0].device + for i, stride in enumerate(strides): + _, _, h, w = feats[i].shape + sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x + sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y + sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) + anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) + stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) + return torch.cat(anchor_points), torch.cat(stride_tensor) + + +def dist2bbox(distance, anchor_points, xywh=True, dim=-1): + """Transform distance(ltrb) to box(xywh or xyxy).""" + lt, rb = distance.chunk(2, dim) + x1y1 = anchor_points - lt + x2y2 = anchor_points + rb + if xywh: + c_xy = (x1y1 + x2y2) / 2 + wh = x2y2 - x1y1 + return torch.cat((c_xy, wh), dim) # xywh bbox + return torch.cat((x1y1, x2y2), dim) # xyxy bbox + + +def bbox2dist(anchor_points, bbox, reg_max): + """Transform bbox(xyxy) to dist(ltrb).""" + x1y1, x2y2 = bbox.chunk(2, -1) + return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp_(0, reg_max - 0.01) # dist (lt, rb) diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py new file mode 100644 index 0000000..be8aa3b --- /dev/null +++ b/ultralytics/utils/torch_utils.py @@ -0,0 +1,565 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import math +import os +import platform +import random +import time +from contextlib import contextmanager +from copy import deepcopy +from pathlib import Path +from typing import Union + +import numpy as np +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F + +from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, __version__ +from ultralytics.utils.checks import check_version + +try: + import thop +except ImportError: + thop = None + +TORCH_1_9 = check_version(torch.__version__, '1.9.0') +TORCH_2_0 = check_version(torch.__version__, '2.0.0') + + +@contextmanager +def torch_distributed_zero_first(local_rank: int): + """Decorator to make all processes in distributed training wait for each local_master to do something.""" + initialized = torch.distributed.is_available() and torch.distributed.is_initialized() + if initialized and local_rank not in (-1, 0): + dist.barrier(device_ids=[local_rank]) + yield + if initialized and local_rank == 0: + dist.barrier(device_ids=[0]) + + +def smart_inference_mode(): + """Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator.""" + + def decorate(fn): + """Applies appropriate torch decorator for inference mode based on torch version.""" + if TORCH_1_9 and torch.is_inference_mode_enabled(): + return fn # already in inference_mode, act as a pass-through + else: + return (torch.inference_mode if TORCH_1_9 else torch.no_grad)()(fn) + + return decorate + + +def get_cpu_info(): + """Return a string with system CPU information, i.e. 'Apple M2'.""" + import cpuinfo # pip install py-cpuinfo + + k = 'brand_raw', 'hardware_raw', 'arch_string_raw' # info keys sorted by preference (not all keys always available) + info = cpuinfo.get_cpu_info() # info dict + string = info.get(k[0] if k[0] in info else k[1] if k[1] in info else k[2], 'unknown') + return string.replace('(R)', '').replace('CPU ', '').replace('@ ', '') + + +def select_device(device='', batch=0, newline=False, verbose=True): + """ + Selects the appropriate PyTorch device based on the provided arguments. + + The function takes a string specifying the device or a torch.device object and returns a torch.device object + representing the selected device. The function also validates the number of available devices and raises an + exception if the requested device(s) are not available. + + Args: + device (str | torch.device, optional): Device string or torch.device object. + Options are 'None', 'cpu', or 'cuda', or '0' or '0,1,2,3'. Defaults to an empty string, which auto-selects + the first available GPU, or CPU if no GPU is available. + batch (int, optional): Batch size being used in your model. Defaults to 0. + newline (bool, optional): If True, adds a newline at the end of the log string. Defaults to False. + verbose (bool, optional): If True, logs the device information. Defaults to True. + + Returns: + (torch.device): Selected device. + + Raises: + ValueError: If the specified device is not available or if the batch size is not a multiple of the number of + devices when using multiple GPUs. + + Examples: + >>> select_device('cuda:0') + device(type='cuda', index=0) + + >>> select_device('cpu') + device(type='cpu') + + Note: + Sets the 'CUDA_VISIBLE_DEVICES' environment variable for specifying which GPUs to use. + """ + + if isinstance(device, torch.device): + return device + + s = f'Ultralytics YOLOv{__version__} 🚀 Python-{platform.python_version()} torch-{torch.__version__} ' + device = str(device).lower() + for remove in 'cuda:', 'none', '(', ')', '[', ']', "'", ' ': + device = device.replace(remove, '') # to string, 'cuda:0' -> '0' and '(0, 1)' -> '0,1' + cpu = device == 'cpu' + mps = device in ('mps', 'mps:0') # Apple Metal Performance Shaders (MPS) + if cpu or mps: + os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False + elif device: # non-cpu device requested + if device == 'cuda': + device = '0' + visible = os.environ.get('CUDA_VISIBLE_DEVICES', None) + os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available() + if not (torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', ''))): + LOGGER.info(s) + install = 'See https://pytorch.org/get-started/locally/ for up-to-date torch install instructions if no ' \ + 'CUDA devices are seen by torch.\n' if torch.cuda.device_count() == 0 else '' + raise ValueError(f"Invalid CUDA 'device={device}' requested." + f" Use 'device=cpu' or pass valid CUDA device(s) if available," + f" i.e. 'device=0' or 'device=0,1,2,3' for Multi-GPU.\n" + f'\ntorch.cuda.is_available(): {torch.cuda.is_available()}' + f'\ntorch.cuda.device_count(): {torch.cuda.device_count()}' + f"\nos.environ['CUDA_VISIBLE_DEVICES']: {visible}\n" + f'{install}') + + if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available + devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7 + n = len(devices) # device count + if n > 1 and batch > 0 and batch % n != 0: # check batch_size is divisible by device_count + raise ValueError(f"'batch={batch}' must be a multiple of GPU count {n}. Try 'batch={batch // n * n}' or " + f"'batch={batch // n * n + n}', the nearest batch sizes evenly divisible by {n}.") + space = ' ' * (len(s) + 1) + for i, d in enumerate(devices): + p = torch.cuda.get_device_properties(i) + s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB + arg = 'cuda:0' + elif mps and TORCH_2_0 and torch.backends.mps.is_available(): + # Prefer MPS if available + s += f'MPS ({get_cpu_info()})\n' + arg = 'mps' + else: # revert to CPU + s += f'CPU ({get_cpu_info()})\n' + arg = 'cpu' + + if verbose: + LOGGER.info(s if newline else s.rstrip()) + return torch.device(arg) + + +def time_sync(): + """PyTorch-accurate time.""" + if torch.cuda.is_available(): + torch.cuda.synchronize() + return time.time() + + +def fuse_conv_and_bn(conv, bn): + """Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/.""" + fusedconv = nn.Conv2d(conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + dilation=conv.dilation, + groups=conv.groups, + bias=True).requires_grad_(False).to(conv.weight.device) + + # Prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) + + # Prepare spatial bias + b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv + + +def fuse_deconv_and_bn(deconv, bn): + """Fuse ConvTranspose2d() and BatchNorm2d() layers.""" + fuseddconv = nn.ConvTranspose2d(deconv.in_channels, + deconv.out_channels, + kernel_size=deconv.kernel_size, + stride=deconv.stride, + padding=deconv.padding, + output_padding=deconv.output_padding, + dilation=deconv.dilation, + groups=deconv.groups, + bias=True).requires_grad_(False).to(deconv.weight.device) + + # Prepare filters + w_deconv = deconv.weight.clone().view(deconv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fuseddconv.weight.copy_(torch.mm(w_bn, w_deconv).view(fuseddconv.weight.shape)) + + # Prepare spatial bias + b_conv = torch.zeros(deconv.weight.size(1), device=deconv.weight.device) if deconv.bias is None else deconv.bias + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fuseddconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fuseddconv + + +def model_info(model, detailed=False, verbose=True, imgsz=640): + """ + Model information. + + imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]. + """ + if not verbose: + return + n_p = get_num_params(model) # number of parameters + n_g = get_num_gradients(model) # number of gradients + n_l = len(list(model.modules())) # number of layers + if detailed: + LOGGER.info( + f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}") + for i, (name, p) in enumerate(model.named_parameters()): + name = name.replace('module_list.', '') + LOGGER.info('%5g %40s %9s %12g %20s %10.3g %10.3g %10s' % + (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std(), p.dtype)) + + flops = get_flops(model, imgsz) + fused = ' (fused)' if getattr(model, 'is_fused', lambda: False)() else '' + fs = f', {flops:.1f} GFLOPs' if flops else '' + yaml_file = getattr(model, 'yaml_file', '') or getattr(model, 'yaml', {}).get('yaml_file', '') + model_name = Path(yaml_file).stem.replace('yolo', 'YOLO') or 'Model' + LOGGER.info(f'{model_name} summary{fused}: {n_l} layers, {n_p} parameters, {n_g} gradients{fs}') + return n_l, n_p, n_g, flops + + +def get_num_params(model): + """Return the total number of parameters in a YOLO model.""" + return sum(x.numel() for x in model.parameters()) + + +def get_num_gradients(model): + """Return the total number of parameters with gradients in a YOLO model.""" + return sum(x.numel() for x in model.parameters() if x.requires_grad) + + +def model_info_for_loggers(trainer): + """ + Return model info dict with useful model information. + + Example: + YOLOv8n info for loggers + ```python + results = {'model/parameters': 3151904, + 'model/GFLOPs': 8.746, + 'model/speed_ONNX(ms)': 41.244, + 'model/speed_TensorRT(ms)': 3.211, + 'model/speed_PyTorch(ms)': 18.755} + ``` + """ + if trainer.args.profile: # profile ONNX and TensorRT times + from ultralytics.utils.benchmarks import ProfileModels + results = ProfileModels([trainer.last], device=trainer.device).profile()[0] + results.pop('model/name') + else: # only return PyTorch times from most recent validation + results = { + 'model/parameters': get_num_params(trainer.model), + 'model/GFLOPs': round(get_flops(trainer.model), 3)} + results['model/speed_PyTorch(ms)'] = round(trainer.validator.speed['inference'], 3) + return results + + +def get_flops(model, imgsz=640): + """Return a YOLO model's FLOPs.""" + try: + model = de_parallel(model) + p = next(model.parameters()) + stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 # max stride + im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format + flops = thop.profile(deepcopy(model), inputs=[im], verbose=False)[0] / 1E9 * 2 if thop else 0 # stride GFLOPs + imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float + return flops * imgsz[0] / stride * imgsz[1] / stride # 640x640 GFLOPs + except Exception: + return 0 + + +def get_flops_with_torch_profiler(model, imgsz=640): + """Compute model FLOPs (thop alternative).""" + if TORCH_2_0: + model = de_parallel(model) + p = next(model.parameters()) + stride = (max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32) * 2 # max stride + im = torch.zeros((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format + with torch.profiler.profile(with_flops=True) as prof: + model(im) + flops = sum(x.flops for x in prof.key_averages()) / 1E9 + imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float + flops = flops * imgsz[0] / stride * imgsz[1] / stride # 640x640 GFLOPs + return flops + return 0 + + +def initialize_weights(model): + """Initialize model weights to random values.""" + for m in model.modules(): + t = type(m) + if t is nn.Conv2d: + pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + m.inplace = True + + +def scale_img(img, ratio=1.0, same_shape=False, gs=32): + """Scales and pads an image tensor of shape img(bs,3,y,x) based on given ratio and grid size gs, optionally + retaining the original shape. + """ + if ratio == 1.0: + return img + h, w = img.shape[2:] + s = (int(h * ratio), int(w * ratio)) # new size + img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize + if not same_shape: # pad/crop img + h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w)) + return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean + + +def make_divisible(x, divisor): + """Returns nearest x divisible by divisor.""" + if isinstance(divisor, torch.Tensor): + divisor = int(divisor.max()) # to int + return math.ceil(x / divisor) * divisor + + +def copy_attr(a, b, include=(), exclude=()): + """Copies attributes from object 'b' to object 'a', with options to include/exclude certain attributes.""" + for k, v in b.__dict__.items(): + if (len(include) and k not in include) or k.startswith('_') or k in exclude: + continue + else: + setattr(a, k, v) + + +def get_latest_opset(): + """Return second-most (for maturity) recently supported ONNX opset by this version of torch.""" + return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) - 1 # opset + + +def intersect_dicts(da, db, exclude=()): + """Returns a dictionary of intersecting keys with matching shapes, excluding 'exclude' keys, using da values.""" + return {k: v for k, v in da.items() if k in db and all(x not in k for x in exclude) and v.shape == db[k].shape} + + +def is_parallel(model): + """Returns True if model is of type DP or DDP.""" + return isinstance(model, (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)) + + +def de_parallel(model): + """De-parallelize a model: returns single-GPU model if model is of type DP or DDP.""" + return model.module if is_parallel(model) else model + + +def one_cycle(y1=0.0, y2=1.0, steps=100): + """Returns a lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf.""" + return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 + + +def init_seeds(seed=0, deterministic=False): + """Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html.""" + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe + # torch.backends.cudnn.benchmark = True # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287 + if deterministic: + if TORCH_2_0: + torch.use_deterministic_algorithms(True, warn_only=True) # warn if deterministic is not possible + torch.backends.cudnn.deterministic = True + os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' + os.environ['PYTHONHASHSEED'] = str(seed) + else: + LOGGER.warning('WARNING ⚠️ Upgrade to torch>=2.0.0 for deterministic training.') + else: + torch.use_deterministic_algorithms(False) + torch.backends.cudnn.deterministic = False + + +class ModelEMA: + """Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models + Keeps a moving average of everything in the model state_dict (parameters and buffers) + For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + To disable EMA set the `enabled` attribute to `False`. + """ + + def __init__(self, model, decay=0.9999, tau=2000, updates=0): + """Create EMA.""" + self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA + self.updates = updates # number of EMA updates + self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs) + for p in self.ema.parameters(): + p.requires_grad_(False) + self.enabled = True + + def update(self, model): + """Update EMA parameters.""" + if self.enabled: + self.updates += 1 + d = self.decay(self.updates) + + msd = de_parallel(model).state_dict() # model state_dict + for k, v in self.ema.state_dict().items(): + if v.dtype.is_floating_point: # true for FP16 and FP32 + v *= d + v += (1 - d) * msd[k].detach() + # assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype}, model {msd[k].dtype}' + + def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): + """Updates attributes and saves stripped model with optimizer removed.""" + if self.enabled: + copy_attr(self.ema, model, include, exclude) + + +def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None: + """ + Strip optimizer from 'f' to finalize training, optionally save as 's'. + + Args: + f (str): file path to model to strip the optimizer from. Default is 'best.pt'. + s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten. + + Returns: + None + + Example: + ```python + from pathlib import Path + from ultralytics.utils.torch_utils import strip_optimizer + + for f in Path('path/to/weights').rglob('*.pt'): + strip_optimizer(f) + ``` + """ + x = torch.load(f, map_location=torch.device('cpu')) + if 'model' not in x: + LOGGER.info(f'Skipping {f}, not a valid Ultralytics model.') + return + + if hasattr(x['model'], 'args'): + x['model'].args = dict(x['model'].args) # convert from IterableSimpleNamespace to dict + args = {**DEFAULT_CFG_DICT, **x['train_args']} if 'train_args' in x else None # combine args + if x.get('ema'): + x['model'] = x['ema'] # replace model with ema + for k in 'optimizer', 'best_fitness', 'ema', 'updates': # keys + x[k] = None + x['epoch'] = -1 + x['model'].half() # to FP16 + for p in x['model'].parameters(): + p.requires_grad = False + x['train_args'] = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # strip non-default keys + # x['model'].args = x['train_args'] + torch.save(x, s or f) + mb = os.path.getsize(s or f) / 1E6 # file size + LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB") + + +def profile(input, ops, n=10, device=None): + """ + Ultralytics speed, memory and FLOPs profiler. + + Example: + ```python + from ultralytics.utils.torch_utils import profile + + input = torch.randn(16, 3, 640, 640) + m1 = lambda x: x * torch.sigmoid(x) + m2 = nn.SiLU() + profile(input, [m1, m2], n=100) # profile over 100 iterations + ``` + """ + results = [] + if not isinstance(device, torch.device): + device = select_device(device) + LOGGER.info(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}" + f"{'input':>24s}{'output':>24s}") + + for x in input if isinstance(input, list) else [input]: + x = x.to(device) + x.requires_grad = True + for m in ops if isinstance(ops, list) else [ops]: + m = m.to(device) if hasattr(m, 'to') else m # device + m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m + tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward + try: + flops = thop.profile(m, inputs=[x], verbose=False)[0] / 1E9 * 2 if thop else 0 # GFLOPs + except Exception: + flops = 0 + + try: + for _ in range(n): + t[0] = time_sync() + y = m(x) + t[1] = time_sync() + try: + (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward() + t[2] = time_sync() + except Exception: # no backward method + # print(e) # for debug + t[2] = float('nan') + tf += (t[1] - t[0]) * 1000 / n # ms per op forward + tb += (t[2] - t[1]) * 1000 / n # ms per op backward + mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB) + s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y)) # shapes + p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters + LOGGER.info(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}') + results.append([p, flops, mem, tf, tb, s_in, s_out]) + except Exception as e: + LOGGER.info(e) + results.append(None) + torch.cuda.empty_cache() + return results + + +class EarlyStopping: + """Early stopping class that stops training when a specified number of epochs have passed without improvement.""" + + def __init__(self, patience=50): + """ + Initialize early stopping object. + + Args: + patience (int, optional): Number of epochs to wait after fitness stops improving before stopping. + """ + self.best_fitness = 0.0 # i.e. mAP + self.best_epoch = 0 + self.patience = patience or float('inf') # epochs to wait after fitness stops improving to stop + self.possible_stop = False # possible stop may occur next epoch + + def __call__(self, epoch, fitness): + """ + Check whether to stop training. + + Args: + epoch (int): Current epoch of training + fitness (float): Fitness value of current epoch + + Returns: + (bool): True if training should stop, False otherwise + """ + if fitness is None: # check if fitness=None (happens when val=False) + return False + + if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training + self.best_epoch = epoch + self.best_fitness = fitness + delta = epoch - self.best_epoch # epochs without improvement + self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch + stop = delta >= self.patience # stop training if patience exceeded + if stop: + LOGGER.info(f'Stopping training early as no improvement observed in last {self.patience} epochs. ' + f'Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n' + f'To update EarlyStopping(patience={self.patience}) pass a new patience value, ' + f'i.e. `patience=300` or use `patience=0` to disable EarlyStopping.') + return stop diff --git a/ultralytics/utils/triton.py b/ultralytics/utils/triton.py new file mode 100644 index 0000000..b79b04b --- /dev/null +++ b/ultralytics/utils/triton.py @@ -0,0 +1,90 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +from typing import List +from urllib.parse import urlsplit + +import numpy as np + + +class TritonRemoteModel: + """ + Client for interacting with a remote Triton Inference Server model. + + Attributes: + endpoint (str): The name of the model on the Triton server. + url (str): The URL of the Triton server. + triton_client: The Triton client (either HTTP or gRPC). + InferInput: The input class for the Triton client. + InferRequestedOutput: The output request class for the Triton client. + input_formats (List[str]): The data types of the model inputs. + np_input_formats (List[type]): The numpy data types of the model inputs. + input_names (List[str]): The names of the model inputs. + output_names (List[str]): The names of the model outputs. + """ + + def __init__(self, url: str, endpoint: str = '', scheme: str = ''): + """ + Initialize the TritonRemoteModel. + + Arguments may be provided individually or parsed from a collective 'url' argument of the form + ://// + + Args: + url (str): The URL of the Triton server. + endpoint (str): The name of the model on the Triton server. + scheme (str): The communication scheme ('http' or 'grpc'). + """ + if not endpoint and not scheme: # Parse all args from URL string + splits = urlsplit(url) + endpoint = splits.path.strip('/').split('/')[0] + scheme = splits.scheme + url = splits.netloc + + self.endpoint = endpoint + self.url = url + + # Choose the Triton client based on the communication scheme + if scheme == 'http': + import tritonclient.http as client # noqa + self.triton_client = client.InferenceServerClient(url=self.url, verbose=False, ssl=False) + config = self.triton_client.get_model_config(endpoint) + else: + import tritonclient.grpc as client # noqa + self.triton_client = client.InferenceServerClient(url=self.url, verbose=False, ssl=False) + config = self.triton_client.get_model_config(endpoint, as_json=True)['config'] + + # Sort output names alphabetically, i.e. 'output0', 'output1', etc. + config['output'] = sorted(config['output'], key=lambda x: x.get('name')) + + # Define model attributes + type_map = {'TYPE_FP32': np.float32, 'TYPE_FP16': np.float16, 'TYPE_UINT8': np.uint8} + self.InferRequestedOutput = client.InferRequestedOutput + self.InferInput = client.InferInput + self.input_formats = [x['data_type'] for x in config['input']] + self.np_input_formats = [type_map[x] for x in self.input_formats] + self.input_names = [x['name'] for x in config['input']] + self.output_names = [x['name'] for x in config['output']] + + def __call__(self, *inputs: np.ndarray) -> List[np.ndarray]: + """ + Call the model with the given inputs. + + Args: + *inputs (List[np.ndarray]): Input data to the model. + + Returns: + List[np.ndarray]: Model outputs. + """ + infer_inputs = [] + input_format = inputs[0].dtype + for i, x in enumerate(inputs): + if x.dtype != self.np_input_formats[i]: + x = x.astype(self.np_input_formats[i]) + infer_input = self.InferInput(self.input_names[i], [*x.shape], self.input_formats[i].replace('TYPE_', '')) + infer_input.set_data_from_numpy(x) + infer_inputs.append(infer_input) + + infer_outputs = [self.InferRequestedOutput(output_name) for output_name in self.output_names] + outputs = self.triton_client.infer(model_name=self.endpoint, inputs=infer_inputs, outputs=infer_outputs) + + return [outputs.as_numpy(output_name).astype(input_format) for output_name in self.output_names] diff --git a/ultralytics/utils/tuner.py b/ultralytics/utils/tuner.py new file mode 100644 index 0000000..a06f813 --- /dev/null +++ b/ultralytics/utils/tuner.py @@ -0,0 +1,144 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license + +import subprocess + +from ultralytics.cfg import TASK2DATA, TASK2METRIC, get_save_dir +from ultralytics.utils import DEFAULT_CFG, DEFAULT_CFG_DICT, LOGGER, NUM_THREADS + + +def run_ray_tune(model, + space: dict = None, + grace_period: int = 10, + gpu_per_trial: int = None, + max_samples: int = 10, + **train_args): + """ + Runs hyperparameter tuning using Ray Tune. + + Args: + model (YOLO): Model to run the tuner on. + space (dict, optional): The hyperparameter search space. Defaults to None. + grace_period (int, optional): The grace period in epochs of the ASHA scheduler. Defaults to 10. + gpu_per_trial (int, optional): The number of GPUs to allocate per trial. Defaults to None. + max_samples (int, optional): The maximum number of trials to run. Defaults to 10. + train_args (dict, optional): Additional arguments to pass to the `train()` method. Defaults to {}. + + Returns: + (dict): A dictionary containing the results of the hyperparameter search. + + Example: + ```python + from ultralytics import YOLO + + # Load a YOLOv8n model + model = YOLO('yolov8n.pt') + + # Start tuning hyperparameters for YOLOv8n training on the COCO8 dataset + result_grid = model.tune(data='coco8.yaml', use_ray=True) + ``` + """ + + LOGGER.info('💡 Learn about RayTune at https://docs.ultralytics.com/integrations/ray-tune') + if train_args is None: + train_args = {} + + try: + subprocess.run('pip install ray[tune]'.split(), check=True) + + import ray + from ray import tune + from ray.air import RunConfig + from ray.air.integrations.wandb import WandbLoggerCallback + from ray.tune.schedulers import ASHAScheduler + except ImportError: + raise ModuleNotFoundError('Tuning hyperparameters requires Ray Tune. Install with: pip install "ray[tune]"') + + try: + import wandb + + assert hasattr(wandb, '__version__') + except (ImportError, AssertionError): + wandb = False + + default_space = { + # 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']), + 'lr0': tune.uniform(1e-5, 1e-1), + 'lrf': tune.uniform(0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) + 'momentum': tune.uniform(0.6, 0.98), # SGD momentum/Adam beta1 + 'weight_decay': tune.uniform(0.0, 0.001), # optimizer weight decay 5e-4 + 'warmup_epochs': tune.uniform(0.0, 5.0), # warmup epochs (fractions ok) + 'warmup_momentum': tune.uniform(0.0, 0.95), # warmup initial momentum + 'box': tune.uniform(0.02, 0.2), # box loss gain + 'cls': tune.uniform(0.2, 4.0), # cls loss gain (scale with pixels) + 'hsv_h': tune.uniform(0.0, 0.1), # image HSV-Hue augmentation (fraction) + 'hsv_s': tune.uniform(0.0, 0.9), # image HSV-Saturation augmentation (fraction) + 'hsv_v': tune.uniform(0.0, 0.9), # image HSV-Value augmentation (fraction) + 'degrees': tune.uniform(0.0, 45.0), # image rotation (+/- deg) + 'translate': tune.uniform(0.0, 0.9), # image translation (+/- fraction) + 'scale': tune.uniform(0.0, 0.9), # image scale (+/- gain) + 'shear': tune.uniform(0.0, 10.0), # image shear (+/- deg) + 'perspective': tune.uniform(0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 + 'flipud': tune.uniform(0.0, 1.0), # image flip up-down (probability) + 'fliplr': tune.uniform(0.0, 1.0), # image flip left-right (probability) + 'mosaic': tune.uniform(0.0, 1.0), # image mixup (probability) + 'mixup': tune.uniform(0.0, 1.0), # image mixup (probability) + 'copy_paste': tune.uniform(0.0, 1.0)} # segment copy-paste (probability) + + # Put the model in ray store + task = model.task + model_in_store = ray.put(model) + + def _tune(config): + """ + Trains the YOLO model with the specified hyperparameters and additional arguments. + + Args: + config (dict): A dictionary of hyperparameters to use for training. + + Returns: + None. + """ + model_to_train = ray.get(model_in_store) # get the model from ray store for tuning + model_to_train.reset_callbacks() + config.update(train_args) + results = model_to_train.train(**config) + return results.results_dict + + # Get search space + if not space: + space = default_space + LOGGER.warning('WARNING ⚠️ search space not provided, using default search space.') + + # Get dataset + data = train_args.get('data', TASK2DATA[task]) + space['data'] = data + if 'data' not in train_args: + LOGGER.warning(f'WARNING ⚠️ data not provided, using default "data={data}".') + + # Define the trainable function with allocated resources + trainable_with_resources = tune.with_resources(_tune, {'cpu': NUM_THREADS, 'gpu': gpu_per_trial or 0}) + + # Define the ASHA scheduler for hyperparameter search + asha_scheduler = ASHAScheduler(time_attr='epoch', + metric=TASK2METRIC[task], + mode='max', + max_t=train_args.get('epochs') or DEFAULT_CFG_DICT['epochs'] or 100, + grace_period=grace_period, + reduction_factor=3) + + # Define the callbacks for the hyperparameter search + tuner_callbacks = [WandbLoggerCallback(project='YOLOv8-tune')] if wandb else [] + + # Create the Ray Tune hyperparameter search tuner + tune_dir = get_save_dir(DEFAULT_CFG, name='tune').resolve() # must be absolute dir + tune_dir.mkdir(parents=True, exist_ok=True) + tuner = tune.Tuner(trainable_with_resources, + param_space=space, + tune_config=tune.TuneConfig(scheduler=asha_scheduler, num_samples=max_samples), + run_config=RunConfig(callbacks=tuner_callbacks, storage_path=tune_dir)) + + # Run the hyperparameter search + tuner.fit() + + # Return the results of the hyperparameter search + return tuner.get_results() diff --git a/wzq.py b/wzq.py new file mode 100644 index 0000000..c74cefa --- /dev/null +++ b/wzq.py @@ -0,0 +1,17 @@ + +from ultralytics import YOLO +import os +os.environ["GIT_PYTHON_REFRESH"] = "quiet" +os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" +if __name__ == '__main__': + # # 训练数据集: + # model = YOLO('yolov8n.pt') # 如果要训练如pose,该对应的权重即可 + # results = model.train(data='data.yaml', epochs=200) + + #预测结果 + model = YOLO('yolov8n.pt') #常用模型yolov8n-seg.pt、yolov8n.pt、yolov8n-pose.pt + model.predict("ultralytics/assets", save=True) #测试图片文件夹,并且设置保存True + +#如果中断后,可以改为以下代码: +# model = YOLO('last.pt') # last.pt文件的路径 +# results = model.train(resume=True) \ No newline at end of file diff --git a/xmltotxt.py b/xmltotxt.py new file mode 100644 index 0000000..55593ee --- /dev/null +++ b/xmltotxt.py @@ -0,0 +1,55 @@ +import xml.etree.ElementTree as ET +import pickle +import os +from os import listdir, getcwd +from os.path import join + + +def convert(size, box): + x_center = (box[0] + box[1]) / 2.0 + y_center = (box[2] + box[3]) / 2.0 + x = x_center / size[0] + y = y_center / size[1] + w = (box[1] - box[0]) / size[0] + h = (box[3] - box[2]) / size[1] + return (x, y, w, h) + + +def convert_annotation(xml_files_path, save_txt_files_path, classes): + xml_files = os.listdir(xml_files_path) + print(xml_files) + for xml_name in xml_files: + print(xml_name) + xml_file = os.path.join(xml_files_path, xml_name) + out_txt_path = os.path.join(save_txt_files_path, xml_name.split('.')[0] + '.txt') + out_txt_f = open(out_txt_path, 'w') + tree = ET.parse(xml_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + for obj in root.iter('object'): + difficult = obj.find('difficult').text + cls = obj.find('name').text + if cls not in classes or int(difficult) == 1: + continue + cls_id = classes.index(cls) + xmlbox = obj.find('bndbox') + b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), + float(xmlbox.find('ymax').text)) + # b=(xmin, xmax, ymin, ymax) + print(w, h, b) + bb = convert((w, h), b) + out_txt_f.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') + + +if __name__ == "__main__": + # 把标签名,全部写这里,用逗号隔开 + classes1 = ['替换成自己的标签名'] + # 2、voc格式的xml标签文件路径 + xml_files1 = r'替换成xml文件夹路径' + # 3、转化为yolo格式的txt标签文件存储路径 + save_txt_files1 = r'替换txt标签保存的路径' + + convert_annotation(xml_files1, save_txt_files1, classes1) \ No newline at end of file