diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b25f8979..d98402e7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -98,9 +98,9 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-14] + os: [ubuntu-latest, windows-latest, macos-14] python-version: ["3.11"] - model: [yolov8n] + model: [yolo11n] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -116,24 +116,27 @@ jobs: run: | yolo checks pip list + - name: Benchmark DetectionModel + shell: bash + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}.pt' imgsz=160 verbose=0.309 - name: Benchmark ClassificationModel shell: bash - run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-cls.pt' imgsz=160 verbose=0.166 + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-cls.pt' imgsz=160 verbose=0.249 - name: Benchmark YOLOWorld DetectionModel shell: bash - run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/yolov8s-worldv2.pt' imgsz=160 verbose=0.318 + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/yolov8s-worldv2.pt' imgsz=160 verbose=0.337 - name: Benchmark SegmentationModel shell: bash - run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-seg.pt' imgsz=160 verbose=0.279 + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-seg.pt' imgsz=160 verbose=0.195 - name: Benchmark PoseModel shell: bash - run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-pose.pt' imgsz=160 verbose=0.183 + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-pose.pt' imgsz=160 verbose=0.197 - name: Benchmark OBBModel shell: bash - run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-obb.pt' imgsz=160 verbose=0.472 + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-obb.pt' imgsz=160 verbose=0.597 - name: Benchmark YOLOv10Model shell: bash - run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/yolov10n.pt' imgsz=160 verbose=0.178 + run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/yolov10n.pt' imgsz=160 verbose=0.205 - name: Merge Coverage Reports run: | coverage xml -o coverage-benchmarks.xml @@ -251,15 +254,17 @@ jobs: - name: Pytest tests run: pytest --slow tests/ - name: Benchmark ClassificationModel - run: python -m ultralytics.cfg.__init__ benchmark model='yolov8n-cls.pt' imgsz=160 verbose=0.166 + run: python -m ultralytics.cfg.__init__ benchmark model='yolo11n-cls.pt' imgsz=160 verbose=0.249 - name: Benchmark YOLOWorld DetectionModel - run: python -m ultralytics.cfg.__init__ benchmark model='yolov8s-worldv2.pt' imgsz=160 verbose=0.318 + run: python -m ultralytics.cfg.__init__ benchmark model='yolo11s-worldv2.pt' imgsz=160 verbose=0.337 - name: Benchmark SegmentationModel - run: python -m ultralytics.cfg.__init__ benchmark model='yolov8n-seg.pt' imgsz=160 verbose=0.267 + run: python -m ultralytics.cfg.__init__ benchmark model='yolo11n-seg.pt' imgsz=160 verbose=0.195 - name: Benchmark PoseModel - run: python -m ultralytics.cfg.__init__ benchmark model='yolov8n-pose.pt' imgsz=160 verbose=0.179 + run: python -m ultralytics.cfg.__init__ benchmark model='yolo11n-pose.pt' imgsz=160 verbose=0.197 - name: Benchmark OBBModel - run: python -m ultralytics.cfg.__init__ benchmark model='yolov8n-obb.pt' imgsz=160 verbose=0.472 + run: python -m ultralytics.cfg.__init__ benchmark model='yolo11n-obb.pt' imgsz=160 verbose=0.597 + - name: Benchmark YOLOv10Model + run: python -m ultralytics.cfg.__init__ benchmark model='yolov10n.pt' imgsz=160 verbose=0.205 - name: Benchmark Summary run: | cat benchmarks.log diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index a9b57b3a..1edf59bb 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -26,7 +26,7 @@ jobs: steps: - name: CLA Assistant if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I sign the CLA') || github.event_name == 'pull_request_target' - uses: contributor-assistant/github-action@v2.5.1 + uses: contributor-assistant/github-action@v2.5.2 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Must be repository secret PAT diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index d1c9810c..02bc506a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -17,7 +17,7 @@ name: Publish Docs on: push: branches: [main] - pull_request_target: + pull_request: branches: [main] workflow_dispatch: @@ -48,7 +48,7 @@ jobs: continue-on-error: true run: ruff check --fix --unsafe-fixes --select D --ignore=D100,D104,D203,D205,D212,D213,D401,D406,D407,D413 . - name: Update Docs Reference Section and Push Changes - if: github.event_name == 'pull_request_target' + continue-on-error: true run: | python docs/build_reference.py git pull origin ${{ github.head_ref || github.ref }} @@ -68,7 +68,7 @@ jobs: python docs/build_docs.py - name: Commit and Push Docs changes continue-on-error: true - if: always() && github.event_name == 'pull_request_target' + if: always() run: | git pull origin ${{ github.head_ref || github.ref }} git add --update # only add updated files @@ -81,21 +81,18 @@ jobs: fi - name: Publish Docs to https://docs.ultralytics.com if: github.event_name == 'push' - env: - PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} - INDEXNOW_KEY: ${{ secrets.INDEXNOW_KEY_DOCS }} run: | git clone https://github.com/ultralytics/docs.git docs-repo cd docs-repo git checkout gh-pages || git checkout -b gh-pages rm -rf * cp -R ../site/* . - echo "$INDEXNOW_KEY" > "$INDEXNOW_KEY.txt" + echo "${{ secrets.INDEXNOW_KEY_DOCS }}" > "${{ secrets.INDEXNOW_KEY_DOCS }}.txt" git add . if git diff --staged --quiet; then echo "No changes to commit" else LATEST_HASH=$(git rev-parse --short=7 HEAD) git commit -m "Update Docs for 'ultralytics ${{ steps.check_pypi.outputs.version }} - $LATEST_HASH'" - git push https://$PERSONAL_ACCESS_TOKEN@github.com/ultralytics/docs.git gh-pages + git push https://${{ secrets.PERSONAL_ACCESS_TOKEN }}@github.com/ultralytics/docs.git gh-pages fi diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 67651715..9befe5c9 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -7,6 +7,8 @@ name: Ultralytics Actions on: issues: types: [opened, edited] + discussion: + types: [created] pull_request_target: branches: [main] types: [opened, closed, synchronize, review_requested] @@ -27,3 +29,34 @@ jobs: summary: true # print PR summary with GPT4o (requires 'openai_api_key') openai_azure_api_key: ${{ secrets.OPENAI_AZURE_API_KEY }} openai_azure_endpoint: ${{ secrets.OPENAI_AZURE_ENDPOINT }} + first_issue_response: | + 👋 Hello @${{ github.actor }}, thank you for your interest in Ultralytics 🚀! We recommend a visit to the [Docs](https://docs.ultralytics.com) for new users where you can find many [Python](https://docs.ultralytics.com/usage/python/) and [CLI](https://docs.ultralytics.com/usage/cli/) usage examples and where many of the most common questions may already be answered. + + If this is a 🐛 Bug Report, please provide a [minimum reproducible example](https://docs.ultralytics.com/help/minimum_reproducible_example/) to help us debug it. + + If this is a custom training ❓ Question, please provide as much information as possible, including dataset image examples and training logs, and verify you are following our [Tips for Best Training Results](https://docs.ultralytics.com/guides/model-training-tips/). + + Join the Ultralytics community where it suits you best. For real-time chat, head to [Discord](https://ultralytics.com/discord) 🎧. Prefer in-depth discussions? Check out [Discourse](https://community.ultralytics.com). Or dive into threads on our [Subreddit](https://reddit.com/r/ultralytics) to share knowledge with the community. + + ## Upgrade + + Upgrade to the latest `ultralytics` package including all [requirements](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) in a [**Python>=3.8**](https://www.python.org/) environment with [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/) to verify your issue is not already resolved in the latest version: + + ```bash + pip install -U ultralytics + ``` + + ## Environments + + YOLOv8 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): + + - **Notebooks** with free GPU: + - **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) + - **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) + - **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) + + ## Status + + + + If this badge is green, all [Ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml?query=event%3Aschedule) tests are currently passing. CI tests verify correct operation of all YOLOv8 [Modes](https://docs.ultralytics.com/modes/) and [Tasks](https://docs.ultralytics.com/tasks/) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml deleted file mode 100644 index 19b099eb..00000000 --- a/.github/workflows/greetings.yml +++ /dev/null @@ -1,58 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license - -name: Greetings - -on: - pull_request_target: - types: [opened] - issues: - types: [opened] - -jobs: - greeting: - runs-on: ubuntu-latest - steps: - - uses: actions/first-interaction@v1 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - pr-message: | - 👋 Hello @${{ github.actor }}, thank you for submitting an Ultralytics 🚀 PR! To allow your work to be integrated as seamlessly as possible, we advise you to: - - - ✅ Verify your PR is **up-to-date** with `ultralytics/ultralytics` `main` branch. If your PR is behind you can update your code by clicking the 'Update branch' button or by running `git pull` and `git merge main` locally. - - ✅ Verify all Ultralytics Continuous Integration (CI) **checks are passing**. - - ✅ Update Ultralytics [Docs](https://docs.ultralytics.com) for any new or updated features. - - ✅ Reduce changes to the absolute **minimum** required for your bug fix or feature addition. _"It is not daily increase but daily decrease, hack away the unessential. The closer to the source, the less wastage there is."_ — Bruce Lee - - See our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for details and let us know if you have any questions! - - issue-message: | - 👋 Hello @${{ github.actor }}, thank you for your interest in Ultralytics 🚀! We recommend a visit to the [Docs](https://docs.ultralytics.com) for new users where you can find many [Python](https://docs.ultralytics.com/usage/python/) and [CLI](https://docs.ultralytics.com/usage/cli/) usage examples and where many of the most common questions may already be answered. - - If this is a 🐛 Bug Report, please provide a [minimum reproducible example](https://docs.ultralytics.com/help/minimum_reproducible_example/) to help us debug it. - - If this is a custom training ❓ Question, please provide as much information as possible, including dataset image examples and training logs, and verify you are following our [Tips for Best Training Results](https://docs.ultralytics.com/guides/model-training-tips/). - - Join the Ultralytics community where it suits you best. For real-time chat, head to [Discord](https://ultralytics.com/discord) 🎧. Prefer in-depth discussions? Check out [Discourse](https://community.ultralytics.com). Or dive into threads on our [Subreddit](https://reddit.com/r/ultralytics) to share knowledge with the community. - - ## Install - - Pip install the `ultralytics` package including all [requirements](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) in a [**Python>=3.8**](https://www.python.org/) environment with [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). - - ```bash - pip install ultralytics - ``` - - ## Environments - - YOLOv8 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): - - - **Notebooks** with free GPU: - - **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) - - **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) - - **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) - - ## Status - - - - If this badge is green, all [Ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml?query=event%3Aschedule) tests are currently passing. CI tests verify correct operation of all YOLOv8 [Modes](https://docs.ultralytics.com/modes/) and [Tasks](https://docs.ultralytics.com/tasks/) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/.github/workflows/merge-main-into-prs.yml b/.github/workflows/merge-main-into-prs.yml index 438967a9..347ec1b9 100644 --- a/.github/workflows/merge-main-into-prs.yml +++ b/.github/workflows/merge-main-into-prs.yml @@ -33,8 +33,8 @@ jobs: import os import time - g = Github(os.getenv('GITHUB_TOKEN')) - repo = g.get_repo(os.getenv('GITHUB_REPOSITORY')) + g = Github("${{ secrets.PERSONAL_ACCESS_TOKEN }}") + repo = g.get_repo("${{ github.repository }}") # Fetch the default branch name default_branch_name = repo.default_branch @@ -85,7 +85,3 @@ jobs: print(f"Branches updated: {updated_branches}") print(f"Branches already up-to-date: {up_to_date_branches}") print(f"Total errors: {errors}") - - env: - GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - GITHUB_REPOSITORY: ${{ github.repository }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3cb83f27..8276a769 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -85,6 +85,12 @@ jobs: if publish: print('Ready to publish new version to PyPI ✅.') id: check_pypi + - name: Publish to PyPI + continue-on-error: true + if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' + run: | + python -m build + python -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }} - name: Publish new tag if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' run: | @@ -100,14 +106,6 @@ jobs: run: | curl -s "https://raw.githubusercontent.com/ultralytics/actions/main/utils/summarize_release.py" | python - shell: bash - - name: Publish to PyPI - continue-on-error: true - if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' - env: - PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - run: | - python -m build - python -m twine upload dist/* -u __token__ -p $PYPI_TOKEN - name: Extract PR Details env: GH_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} diff --git a/CITATION.cff b/CITATION.cff index bee0abe2..6c294c0d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -11,14 +11,14 @@ authors: family-names: Jocher affiliation: Ultralytics orcid: 'https://orcid.org/0000-0001-5950-6979' - - given-names: Ayush - family-names: Chaurasia - affiliation: Ultralytics - orcid: 'https://orcid.org/0000-0002-7603-6750' - family-names: Qiu given-names: Jing affiliation: Ultralytics orcid: 'https://orcid.org/0000-0003-3783-7069' + - given-names: Ayush + family-names: Chaurasia + affiliation: Ultralytics + orcid: 'https://orcid.org/0000-0002-7603-6750' repository-code: 'https://github.com/ultralytics/ultralytics' url: 'https://ultralytics.com' license: AGPL-3.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d884e43b..b3dbfe16 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,7 +25,7 @@ Welcome! We're thrilled that you're considering contributing to our [Ultralytics ## Code of Conduct -To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct). Respect, kindness, and professionalism are at the heart of our community. +To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct/). Respect, kindness, and professionalism are at the heart of our community. ## Contributing via Pull Requests @@ -45,7 +45,7 @@ We greatly appreciate contributions in the form of pull requests. To make the re ### CLA Signing -Before we can merge your pull request, you must sign our [Contributor License Agreement (CLA)](https://docs.ultralytics.com/help/CLA). This legal agreement ensures that your contributions are properly licensed, allowing the project to continue being distributed under the AGPL-3.0 license. +Before we can merge your pull request, you must sign our [Contributor License Agreement (CLA)](https://docs.ultralytics.com/help/CLA/). This legal agreement ensures that your contributions are properly licensed, allowing the project to continue being distributed under the AGPL-3.0 license. After submitting your pull request, the CLA bot will guide you through the signing process. To sign the CLA, simply add a comment in your PR stating: @@ -117,11 +117,11 @@ def example_small_function(arg1: int, arg2: int = 4) -> bool: ### GitHub Actions CI Tests -All pull requests must pass the GitHub Actions [Continuous Integration](https://docs.ultralytics.com/help/CI) (CI) tests before they can be merged. These tests include linting, unit tests, and other checks to ensure that your changes meet the project's quality standards. Review the CI output and address any issues that arise. +All pull requests must pass the GitHub Actions [Continuous Integration](https://docs.ultralytics.com/help/CI/) (CI) tests before they can be merged. These tests include linting, unit tests, and other checks to ensure that your changes meet the project's quality standards. Review the CI output and address any issues that arise. ## Reporting Bugs -We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example)—a simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem. +We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example/)—a simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem. ## License @@ -163,4 +163,4 @@ the project's quality standards. Review the CI output and fix any issues. For de ### How do I report a bug in Ultralytics YOLO repositories? -To report a bug, provide a clear and concise [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example) along with your bug report. This helps developers quickly identify and fix the issue. Ensure your example is minimal yet sufficient to replicate the problem. For more detailed steps on reporting bugs, refer to the [Reporting Bugs](#reporting-bugs) section. +To report a bug, provide a clear and concise [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example/) along with your bug report. This helps developers quickly identify and fix the issue. Ensure your example is minimal yet sufficient to replicate the problem. For more detailed steps on reporting bugs, refer to the [Reporting Bugs](#reporting-bugs) section. diff --git a/README.md b/README.md index 1cc1686e..851d6a2d 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@
@@ -16,7 +16,7 @@ The [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute
allowfullscreen>
- Watch: How to Train an Image Classification Model with CIFAR-10 Dataset using Ultralytics YOLOv8
+ Watch: How to Train an [Image Classification](https://www.ultralytics.com/glossary/image-classification) Model with CIFAR-10 Dataset using Ultralytics YOLOv8
@@ -16,7 +16,7 @@ The [Fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset is
allowfullscreen>
- Watch: How to do Image Classification on Fashion MNIST Dataset using Ultralytics YOLOv8
+ Watch: How to do [Image Classification](https://www.ultralytics.com/glossary/image-classification) on Fashion MNIST Dataset using Ultralytics YOLOv8
@@ -29,7 +29,7 @@ The African wildlife objects detection dataset is split into three subsets:
## Applications
-This dataset can be applied in various computer vision tasks such as object detection, object tracking, and research. Specifically, it can be used to train and evaluate models for identifying African wildlife objects in images, which can have applications in wildlife conservation, ecological research, and monitoring efforts in natural reserves and protected areas. Additionally, it can serve as a valuable resource for educational purposes, enabling students and researchers to study and understand the characteristics and behaviors of different animal species.
+This dataset can be applied in various computer vision tasks such as [object detection](https://www.ultralytics.com/glossary/object-detection), object tracking, and research. Specifically, it can be used to train and evaluate models for identifying African wildlife objects in images, which can have applications in wildlife conservation, ecological research, and monitoring efforts in natural reserves and protected areas. Additionally, it can serve as a valuable resource for educational purposes, enabling students and researchers to study and understand the characteristics and behaviors of different animal species.
## Dataset YAML
@@ -43,7 +43,7 @@ A YAML (Yet Another Markup Language) file defines the dataset configuration, inc
## Usage
-To train a YOLOv8n model on the African wildlife dataset for 100 epochs with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the African wildlife dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -136,7 +136,7 @@ For additional training parameters and options, refer to the [Training](../../mo
### Where can I find the YAML configuration file for the African Wildlife Dataset?
-The YAML configuration file for the African Wildlife Dataset, named `african-wildlife.yaml`, can be found at [this GitHub link](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/african-wildlife.yaml). This file defines the dataset configuration, including paths, classes, and other details crucial for training machine learning models. See the [Dataset YAML](#dataset-yaml) section for more details.
+The YAML configuration file for the African Wildlife Dataset, named `african-wildlife.yaml`, can be found at [this GitHub link](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/african-wildlife.yaml). This file defines the dataset configuration, including paths, classes, and other details crucial for training [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models. See the [Dataset YAML](#dataset-yaml) section for more details.
### Can I see sample images and annotations from the African Wildlife Dataset?
diff --git a/docs/en/datasets/detect/argoverse.md b/docs/en/datasets/detect/argoverse.md
index 47ef822b..a834be90 100644
--- a/docs/en/datasets/detect/argoverse.md
+++ b/docs/en/datasets/detect/argoverse.md
@@ -29,7 +29,7 @@ The Argoverse dataset is organized into three main subsets:
## Applications
-The Argoverse dataset is widely used for training and evaluating deep learning models in autonomous driving tasks such as 3D object tracking, motion forecasting, and stereo depth estimation. The dataset's diverse set of sensor data, object annotations, and map information make it a valuable resource for researchers and practitioners in the field of autonomous driving.
+The Argoverse dataset is widely used for training and evaluating [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models in autonomous driving tasks such as 3D object tracking, motion forecasting, and stereo depth estimation. The dataset's diverse set of sensor data, object annotations, and map information make it a valuable resource for researchers and practitioners in the field of autonomous driving.
## Dataset YAML
@@ -43,7 +43,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the Argoverse dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the Argoverse dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
diff --git a/docs/en/datasets/detect/brain-tumor.md b/docs/en/datasets/detect/brain-tumor.md
index aa48c5f9..9f108e73 100644
--- a/docs/en/datasets/detect/brain-tumor.md
+++ b/docs/en/datasets/detect/brain-tumor.md
@@ -6,7 +6,7 @@ keywords: brain tumor dataset, MRI scans, CT scans, brain tumor detection, medic
# Brain Tumor Dataset
-A brain tumor detection dataset consists of medical images from MRI or CT scans, containing information about brain tumor presence, location, and characteristics. This dataset is essential for training computer vision algorithms to automate brain tumor identification, aiding in early diagnosis and treatment planning.
+A brain tumor detection dataset consists of medical images from MRI or CT scans, containing information about brain tumor presence, location, and characteristics. This dataset is essential for training [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) algorithms to automate brain tumor identification, aiding in early diagnosis and treatment planning.
@@ -42,7 +42,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the brain tumor dataset for 100 epochs with an image size of 640, utilize the provided code snippets. For a detailed list of available arguments, consult the model's [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the brain tumor dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, utilize the provided code snippets. For a detailed list of available arguments, consult the model's [Training](../../modes/train.md) page.
!!! example "Train Example"
diff --git a/docs/en/datasets/detect/coco.md b/docs/en/datasets/detect/coco.md
index 25878ed9..d0901428 100644
--- a/docs/en/datasets/detect/coco.md
+++ b/docs/en/datasets/detect/coco.md
@@ -6,7 +6,7 @@ keywords: COCO dataset, object detection, segmentation, benchmarking, computer v
# COCO Dataset
-The [COCO](https://cocodataset.org/#home) (Common Objects in Context) dataset is a large-scale object detection, segmentation, and captioning dataset. It is designed to encourage research on a wide variety of object categories and is commonly used for benchmarking computer vision models. It is an essential dataset for researchers and developers working on object detection, segmentation, and pose estimation tasks.
+The [COCO](https://cocodataset.org/#home) (Common Objects in Context) dataset is a large-scale object detection, segmentation, and captioning dataset. It is designed to encourage research on a wide variety of object categories and is commonly used for benchmarking [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models. It is an essential dataset for researchers and developers working on object detection, segmentation, and pose estimation tasks.
@@ -34,7 +34,7 @@ The [COCO](https://cocodataset.org/#home) (Common Objects in Context) dataset is
- COCO contains 330K images, with 200K images having annotations for object detection, segmentation, and captioning tasks.
- The dataset comprises 80 object categories, including common objects like cars, bicycles, and animals, as well as more specific categories such as umbrellas, handbags, and sports equipment.
- Annotations include object bounding boxes, segmentation masks, and captions for each image.
-- COCO provides standardized evaluation metrics like mean Average Precision (mAP) for object detection, and mean Average Recall (mAR) for segmentation tasks, making it suitable for comparing model performance.
+- COCO provides standardized evaluation metrics like [mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP) for object detection, and mean Average [Recall](https://www.ultralytics.com/glossary/recall) (mAR) for segmentation tasks, making it suitable for comparing model performance.
## Dataset Structure
@@ -46,7 +46,7 @@ The COCO dataset is split into three subsets:
## Applications
-The COCO dataset is widely used for training and evaluating deep learning models in object detection (such as YOLO, Faster R-CNN, and SSD), instance segmentation (such as Mask R-CNN), and keypoint detection (such as OpenPose). The dataset's diverse set of object categories, large number of annotated images, and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners.
+The COCO dataset is widely used for training and evaluating [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models in object detection (such as YOLO, Faster R-CNN, and SSD), [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) (such as Mask R-CNN), and keypoint detection (such as OpenPose). The dataset's diverse set of object categories, large number of annotated images, and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners.
## Dataset YAML
@@ -60,7 +60,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the COCO dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the COCO dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -118,7 +118,7 @@ We would like to acknowledge the COCO Consortium for creating and maintaining th
### What is the COCO dataset and why is it important for computer vision?
-The [COCO dataset](https://cocodataset.org/#home) (Common Objects in Context) is a large-scale dataset used for object detection, segmentation, and captioning. It contains 330K images with detailed annotations for 80 object categories, making it essential for benchmarking and training computer vision models. Researchers use COCO due to its diverse categories and standardized evaluation metrics like mean Average Precision (mAP).
+The [COCO dataset](https://cocodataset.org/#home) (Common Objects in Context) is a large-scale dataset used for [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and captioning. It contains 330K images with detailed annotations for 80 object categories, making it essential for benchmarking and training computer vision models. Researchers use COCO due to its diverse categories and standardized evaluation metrics like mean Average [Precision](https://www.ultralytics.com/glossary/precision) (mAP).
### How can I train a YOLO model using the COCO dataset?
diff --git a/docs/en/datasets/detect/coco8.md b/docs/en/datasets/detect/coco8.md
index a0972693..4a8ad5a8 100644
--- a/docs/en/datasets/detect/coco8.md
+++ b/docs/en/datasets/detect/coco8.md
@@ -8,7 +8,7 @@ keywords: COCO8, Ultralytics, dataset, object detection, YOLOv8, training, valid
## Introduction
-[Ultralytics](https://www.ultralytics.com/) COCO8 is a small, but versatile object detection dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging object detection models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets.
+[Ultralytics](https://www.ultralytics.com/) COCO8 is a small, but versatile [object detection](https://www.ultralytics.com/glossary/object-detection) dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging object detection models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets.
@@ -35,7 +35,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the COCO8 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -87,7 +87,7 @@ If you use the COCO dataset in your research or development work, please cite th
}
```
-We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).
+We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).
## FAQ
diff --git a/docs/en/datasets/detect/globalwheat2020.md b/docs/en/datasets/detect/globalwheat2020.md
index 37b9759f..ef7ff7ac 100644
--- a/docs/en/datasets/detect/globalwheat2020.md
+++ b/docs/en/datasets/detect/globalwheat2020.md
@@ -24,7 +24,7 @@ The Global Wheat Head Dataset is organized into two main subsets:
## Applications
-The Global Wheat Head Dataset is widely used for training and evaluating deep learning models in wheat head detection tasks. The dataset's diverse set of images, capturing a wide range of appearances, environments, and conditions, make it a valuable resource for researchers and practitioners in the field of plant phenotyping and crop management.
+The Global Wheat Head Dataset is widely used for training and evaluating [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models in wheat head detection tasks. The dataset's diverse set of images, capturing a wide range of appearances, environments, and conditions, make it a valuable resource for researchers and practitioners in the field of plant phenotyping and crop management.
## Dataset YAML
@@ -38,7 +38,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the Global Wheat Head Dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the Global Wheat Head Dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -130,7 +130,7 @@ Key features of the Global Wheat Head Dataset include:
- Over 3,000 training images from Europe (France, UK, Switzerland) and North America (Canada).
- Approximately 1,000 test images from Australia, Japan, and China.
- High variability in wheat head appearances due to different growing environments.
-- Detailed annotations with wheat head bounding boxes to aid object detection models.
+- Detailed annotations with wheat head bounding boxes to aid [object detection](https://www.ultralytics.com/glossary/object-detection) models.
These features facilitate the development of robust models capable of generalization across multiple regions.
diff --git a/docs/en/datasets/detect/index.md b/docs/en/datasets/detect/index.md
index f76ba40b..61640480 100644
--- a/docs/en/datasets/detect/index.md
+++ b/docs/en/datasets/detect/index.md
@@ -6,7 +6,7 @@ keywords: Ultralytics, YOLO, object detection datasets, dataset formats, COCO, d
# Object Detection Datasets Overview
-Training a robust and accurate object detection model requires a comprehensive dataset. This guide introduces various formats of datasets that are compatible with the Ultralytics YOLO model and provides insights into their structure, usage, and how to convert between different formats.
+Training a robust and accurate [object detection](https://www.ultralytics.com/glossary/object-detection) model requires a comprehensive dataset. This guide introduces various formats of datasets that are compatible with the Ultralytics YOLO model and provides insights into their structure, usage, and how to convert between different formats.
## Supported Dataset Formats
diff --git a/docs/en/datasets/detect/lvis.md b/docs/en/datasets/detect/lvis.md
index 8c06920f..c4a4ff76 100644
--- a/docs/en/datasets/detect/lvis.md
+++ b/docs/en/datasets/detect/lvis.md
@@ -6,7 +6,7 @@ keywords: LVIS dataset, object detection, instance segmentation, Facebook AI Res
# LVIS Dataset
-The [LVIS dataset](https://www.lvisdataset.org/) is a large-scale, fine-grained vocabulary-level annotation dataset developed and released by Facebook AI Research (FAIR). It is primarily used as a research benchmark for object detection and instance segmentation with a large vocabulary of categories, aiming to drive further advancements in computer vision field.
+The [LVIS dataset](https://www.lvisdataset.org/) is a large-scale, fine-grained vocabulary-level annotation dataset developed and released by Facebook AI Research (FAIR). It is primarily used as a research benchmark for object detection and [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) with a large vocabulary of categories, aiming to drive further advancements in computer vision field.
@@ -28,7 +28,7 @@ The [LVIS dataset](https://www.lvisdataset.org/) is a large-scale, fine-grained
- LVIS contains 160k images and 2M instance annotations for object detection, segmentation, and captioning tasks.
- The dataset comprises 1203 object categories, including common objects like cars, bicycles, and animals, as well as more specific categories such as umbrellas, handbags, and sports equipment.
- Annotations include object bounding boxes, segmentation masks, and captions for each image.
-- LVIS provides standardized evaluation metrics like mean Average Precision (mAP) for object detection, and mean Average Recall (mAR) for segmentation tasks, making it suitable for comparing model performance.
+- LVIS provides standardized evaluation metrics like [mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP) for object detection, and mean Average [Recall](https://www.ultralytics.com/glossary/recall) (mAR) for segmentation tasks, making it suitable for comparing model performance.
- LVIS uses exactly the same images as [COCO](./coco.md) dataset, but with different splits and different annotations.
## Dataset Structure
@@ -42,7 +42,7 @@ The LVIS dataset is split into three subsets:
## Applications
-The LVIS dataset is widely used for training and evaluating deep learning models in object detection (such as YOLO, Faster R-CNN, and SSD), instance segmentation (such as Mask R-CNN). The dataset's diverse set of object categories, large number of annotated images, and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners.
+The LVIS dataset is widely used for training and evaluating [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models in object detection (such as YOLO, Faster R-CNN, and SSD), instance segmentation (such as Mask R-CNN). The dataset's diverse set of object categories, large number of annotated images, and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners.
## Dataset YAML
@@ -56,7 +56,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the LVIS dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the LVIS dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -106,7 +106,7 @@ If you use the LVIS dataset in your research or development work, please cite th
}
```
-We would like to acknowledge the LVIS Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the LVIS dataset and its creators, visit the [LVIS dataset website](https://www.lvisdataset.org/).
+We would like to acknowledge the LVIS Consortium for creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. For more information about the LVIS dataset and its creators, visit the [LVIS dataset website](https://www.lvisdataset.org/).
## FAQ
@@ -144,11 +144,11 @@ For detailed training configurations, refer to the [Training](../../modes/train.
### How does the LVIS dataset differ from the COCO dataset?
-The images in the LVIS dataset are the same as those in the [COCO dataset](./coco.md), but the two differ in terms of splitting and annotations. LVIS provides a larger and more detailed vocabulary with 1203 object categories compared to COCO's 80 categories. Additionally, LVIS focuses on annotation completeness and diversity, aiming to push the limits of object detection and instance segmentation models by offering more nuanced and comprehensive data.
+The images in the LVIS dataset are the same as those in the [COCO dataset](./coco.md), but the two differ in terms of splitting and annotations. LVIS provides a larger and more detailed vocabulary with 1203 object categories compared to COCO's 80 categories. Additionally, LVIS focuses on annotation completeness and diversity, aiming to push the limits of [object detection](https://www.ultralytics.com/glossary/object-detection) and instance segmentation models by offering more nuanced and comprehensive data.
### Why should I use Ultralytics YOLO for training on the LVIS dataset?
-Ultralytics YOLO models, including the latest YOLOv8, are optimized for real-time object detection with state-of-the-art accuracy and speed. They support a wide range of annotations, such as the fine-grained ones provided by the LVIS dataset, making them ideal for advanced computer vision applications. Moreover, Ultralytics offers seamless integration with various [training](../../modes/train.md), [validation](../../modes/val.md), and [prediction](../../modes/predict.md) modes, ensuring efficient model development and deployment.
+Ultralytics YOLO models, including the latest YOLOv8, are optimized for real-time object detection with state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed. They support a wide range of annotations, such as the fine-grained ones provided by the LVIS dataset, making them ideal for advanced computer vision applications. Moreover, Ultralytics offers seamless integration with various [training](../../modes/train.md), [validation](../../modes/val.md), and [prediction](../../modes/predict.md) modes, ensuring efficient model development and deployment.
### Can I see some sample annotations from the LVIS dataset?
diff --git a/docs/en/datasets/detect/objects365.md b/docs/en/datasets/detect/objects365.md
index f2b44f24..49947617 100644
--- a/docs/en/datasets/detect/objects365.md
+++ b/docs/en/datasets/detect/objects365.md
@@ -24,7 +24,7 @@ The Objects365 dataset is organized into a single set of images with correspondi
## Applications
-The Objects365 dataset is widely used for training and evaluating deep learning models in object detection tasks. The dataset's diverse set of object categories and high-quality annotations make it a valuable resource for researchers and practitioners in the field of computer vision.
+The Objects365 dataset is widely used for training and evaluating deep learning models in object detection tasks. The dataset's diverse set of object categories and high-quality annotations make it a valuable resource for researchers and practitioners in the field of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv).
## Dataset YAML
@@ -38,7 +38,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the Objects365 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the Objects365 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -63,7 +63,7 @@ To train a YOLOv8n model on the Objects365 dataset for 100 epochs with an image
## Sample Data and Annotations
-The Objects365 dataset contains a diverse set of high-resolution images with objects from 365 categories, providing rich context for object detection tasks. Here are some examples of the images in the dataset:
+The Objects365 dataset contains a diverse set of high-resolution images with objects from 365 categories, providing rich context for [object detection](https://www.ultralytics.com/glossary/object-detection) tasks. Here are some examples of the images in the dataset:
![Dataset sample image](https://github.com/ultralytics/docs/releases/download/0/objects365-sample-image.avif)
@@ -95,7 +95,7 @@ We would like to acknowledge the team of researchers who created and maintain th
### What is the Objects365 dataset used for?
-The [Objects365 dataset](https://www.objects365.org/) is designed for object detection tasks in machine learning and computer vision. It provides a large-scale, high-quality dataset with 2 million annotated images and 30 million bounding boxes across 365 categories. Leveraging such a diverse dataset helps improve the performance and generalization of object detection models, making it invaluable for research and development in the field.
+The [Objects365 dataset](https://www.objects365.org/) is designed for object detection tasks in [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) and computer vision. It provides a large-scale, high-quality dataset with 2 million annotated images and 30 million bounding boxes across 365 categories. Leveraging such a diverse dataset helps improve the performance and generalization of object detection models, making it invaluable for research and development in the field.
### How can I train a YOLOv8 model on the Objects365 dataset?
@@ -138,4 +138,4 @@ The YAML configuration file for the Objects365 dataset is available at [Objects3
### How does the dataset structure of Objects365 enhance object detection modeling?
-The [Objects365 dataset](https://www.objects365.org/) is organized with 2 million high-resolution images and comprehensive annotations of over 30 million bounding boxes. This structure ensures a robust dataset for training deep learning models in object detection, offering a wide variety of objects and scenarios. Such diversity and volume help in developing models that are more accurate and capable of generalizing well to real-world applications. For more details on the dataset structure, refer to the [Dataset YAML](#dataset-yaml) section.
+The [Objects365 dataset](https://www.objects365.org/) is organized with 2 million high-resolution images and comprehensive annotations of over 30 million bounding boxes. This structure ensures a robust dataset for training [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models in object detection, offering a wide variety of objects and scenarios. Such diversity and volume help in developing models that are more accurate and capable of generalizing well to real-world applications. For more details on the dataset structure, refer to the [Dataset YAML](#dataset-yaml) section.
diff --git a/docs/en/datasets/detect/open-images-v7.md b/docs/en/datasets/detect/open-images-v7.md
index f6b0c63b..1e6f1f7e 100644
--- a/docs/en/datasets/detect/open-images-v7.md
+++ b/docs/en/datasets/detect/open-images-v7.md
@@ -6,7 +6,7 @@ keywords: Open Images V7, Google dataset, computer vision, YOLOv8 models, object
# Open Images V7 Dataset
-[Open Images V7](https://storage.googleapis.com/openimages/web/index.html) is a versatile and expansive dataset championed by Google. Aimed at propelling research in the realm of computer vision, it boasts a vast collection of images annotated with a plethora of data, including image-level labels, object bounding boxes, object segmentation masks, visual relationships, and localized narratives.
+[Open Images V7](https://storage.googleapis.com/openimages/web/index.html) is a versatile and expansive dataset championed by Google. Aimed at propelling research in the realm of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv), it boasts a vast collection of images annotated with a plethora of data, including image-level labels, object bounding boxes, object segmentation masks, visual relationships, and localized narratives.
@@ -16,7 +16,7 @@ keywords: Open Images V7, Google dataset, computer vision, YOLOv8 models, object
allowfullscreen>
- Watch: Object Detection using OpenImagesV7 Pretrained Model
+ Watch: [Object Detection](https://www.ultralytics.com/glossary/object-detection) using OpenImagesV7 Pretrained Model
@@ -37,7 +37,7 @@ The SKU-110k dataset is organized into three main subsets:
## Applications
-The SKU-110k dataset is widely used for training and evaluating deep learning models in object detection tasks, especially in densely packed scenes such as retail shelf displays. The dataset's diverse set of SKU categories and densely packed object arrangements make it a valuable resource for researchers and practitioners in the field of computer vision.
+The SKU-110k dataset is widely used for training and evaluating deep learning models in object detection tasks, especially in densely packed scenes such as retail shelf displays. The dataset's diverse set of SKU categories and densely packed object arrangements make it a valuable resource for researchers and practitioners in the field of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv).
## Dataset YAML
@@ -51,7 +51,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the SKU-110K dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the SKU-110K dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -151,7 +151,7 @@ Refer to the [Dataset Structure](#dataset-structure) section for more details.
The SKU-110k dataset configuration is defined in a YAML file, which includes details about the dataset's paths, classes, and other relevant information. The `SKU-110K.yaml` file is maintained at [SKU-110K.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/SKU-110K.yaml). For example, you can train a model using this configuration as shown in our [Usage](#usage) section.
-### What are the key features of the SKU-110k dataset in the context of deep learning?
+### What are the key features of the SKU-110k dataset in the context of [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl)?
The SKU-110k dataset features images of store shelves from around the world, showcasing densely packed objects that pose significant challenges for object detectors:
diff --git a/docs/en/datasets/detect/visdrone.md b/docs/en/datasets/detect/visdrone.md
index c1060e99..99b182cb 100644
--- a/docs/en/datasets/detect/visdrone.md
+++ b/docs/en/datasets/detect/visdrone.md
@@ -6,7 +6,7 @@ keywords: VisDrone, drone dataset, computer vision, object detection, object tra
# VisDrone Dataset
-The [VisDrone Dataset](https://github.com/VisDrone/VisDrone-Dataset) is a large-scale benchmark created by the AISKYEYE team at the Lab of Machine Learning and Data Mining, Tianjin University, China. It contains carefully annotated ground truth data for various computer vision tasks related to drone-based image and video analysis.
+The [VisDrone Dataset](https://github.com/VisDrone/VisDrone-Dataset) is a large-scale benchmark created by the AISKYEYE team at the Lab of [Machine Learning](https://www.ultralytics.com/glossary/machine-learning-ml) and Data Mining, Tianjin University, China. It contains carefully annotated ground truth data for various computer vision tasks related to drone-based image and video analysis.
@@ -33,7 +33,7 @@ The VisDrone dataset is organized into five main subsets, each focusing on a spe
## Applications
-The VisDrone dataset is widely used for training and evaluating deep learning models in drone-based computer vision tasks such as object detection, object tracking, and crowd counting. The dataset's diverse set of sensor data, object annotations, and attributes make it a valuable resource for researchers and practitioners in the field of drone-based computer vision.
+The VisDrone dataset is widely used for training and evaluating deep learning models in drone-based [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks such as object detection, object tracking, and crowd counting. The dataset's diverse set of sensor data, object annotations, and attributes make it a valuable resource for researchers and practitioners in the field of drone-based computer vision.
## Dataset YAML
@@ -47,7 +47,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the VisDrone dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the VisDrone dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -76,7 +76,7 @@ The VisDrone dataset contains a diverse set of images and videos captured by dro
![Dataset sample image](https://github.com/ultralytics/docs/releases/download/0/visdrone-object-detection-sample.avif)
-- **Task 1**: Object detection in images - This image demonstrates an example of object detection in images, where objects are annotated with bounding boxes. The dataset provides a wide variety of images taken from different locations, environments, and densities to facilitate the development of models for this task.
+- **Task 1**: [Object detection](https://www.ultralytics.com/glossary/object-detection) in images - This image demonstrates an example of object detection in images, where objects are annotated with bounding boxes. The dataset provides a wide variety of images taken from different locations, environments, and densities to facilitate the development of models for this task.
The example showcases the variety and complexity of the data in the VisDrone dataset and highlights the importance of high-quality sensor data for drone-based computer vision tasks.
@@ -100,7 +100,7 @@ If you use the VisDrone dataset in your research or development work, please cit
doi={10.1109/TPAMI.2021.3119563}}
```
-We would like to acknowledge the AISKYEYE team at the Lab of Machine Learning and Data Mining, Tianjin University, China, for creating and maintaining the VisDrone dataset as a valuable resource for the drone-based computer vision research community. For more information about the VisDrone dataset and its creators, visit the [VisDrone Dataset GitHub repository](https://github.com/VisDrone/VisDrone-Dataset).
+We would like to acknowledge the AISKYEYE team at the Lab of Machine Learning and [Data Mining](https://www.ultralytics.com/glossary/data-mining), Tianjin University, China, for creating and maintaining the VisDrone dataset as a valuable resource for the drone-based computer vision research community. For more information about the VisDrone dataset and its creators, visit the [VisDrone Dataset GitHub repository](https://github.com/VisDrone/VisDrone-Dataset).
## FAQ
@@ -150,7 +150,7 @@ The VisDrone dataset is divided into five main subsets, each tailored for a spec
4. **Task 4**: Multi-object tracking.
5. **Task 5**: Crowd counting.
-These subsets are widely used for training and evaluating deep learning models in drone-based applications such as surveillance, traffic monitoring, and public safety.
+These subsets are widely used for training and evaluating [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models in drone-based applications such as surveillance, traffic monitoring, and public safety.
### Where can I find the configuration file for the VisDrone dataset in Ultralytics?
diff --git a/docs/en/datasets/detect/voc.md b/docs/en/datasets/detect/voc.md
index 0afc9205..7dc67fb5 100644
--- a/docs/en/datasets/detect/voc.md
+++ b/docs/en/datasets/detect/voc.md
@@ -13,7 +13,7 @@ The [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) (Visual Object Classes
- VOC dataset includes two main challenges: VOC2007 and VOC2012.
- The dataset comprises 20 object categories, including common objects like cars, bicycles, and animals, as well as more specific categories such as boats, sofas, and dining tables.
- Annotations include object bounding boxes and class labels for object detection and classification tasks, and segmentation masks for the segmentation tasks.
-- VOC provides standardized evaluation metrics like mean Average Precision (mAP) for object detection and classification, making it suitable for comparing model performance.
+- VOC provides standardized evaluation metrics like [mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP) for object detection and classification, making it suitable for comparing model performance.
## Dataset Structure
@@ -25,7 +25,7 @@ The VOC dataset is split into three subsets:
## Applications
-The VOC dataset is widely used for training and evaluating deep learning models in object detection (such as YOLO, Faster R-CNN, and SSD), instance segmentation (such as Mask R-CNN), and image classification. The dataset's diverse set of object categories, large number of annotated images, and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners.
+The VOC dataset is widely used for training and evaluating [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models in object detection (such as YOLO, Faster R-CNN, and SSD), [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) (such as Mask R-CNN), and [image classification](https://www.ultralytics.com/glossary/image-classification). The dataset's diverse set of object categories, large number of annotated images, and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners.
## Dataset YAML
@@ -39,7 +39,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the VOC dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLOv8n model on the VOC dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -91,13 +91,13 @@ If you use the VOC dataset in your research or development work, please cite the
}
```
-We would like to acknowledge the PASCAL VOC Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the VOC dataset and its creators, visit the [PASCAL VOC dataset website](http://host.robots.ox.ac.uk/pascal/VOC/).
+We would like to acknowledge the PASCAL VOC Consortium for creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. For more information about the VOC dataset and its creators, visit the [PASCAL VOC dataset website](http://host.robots.ox.ac.uk/pascal/VOC/).
## FAQ
### What is the PASCAL VOC dataset and why is it important for computer vision tasks?
-The [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) (Visual Object Classes) dataset is a renowned benchmark for object detection, segmentation, and classification in computer vision. It includes comprehensive annotations like bounding boxes, class labels, and segmentation masks across 20 different object categories. Researchers use it widely to evaluate the performance of models like Faster R-CNN, YOLO, and Mask R-CNN due to its standardized evaluation metrics such as mean Average Precision (mAP).
+The [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) (Visual Object Classes) dataset is a renowned benchmark for [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and classification in computer vision. It includes comprehensive annotations like bounding boxes, class labels, and segmentation masks across 20 different object categories. Researchers use it widely to evaluate the performance of models like Faster R-CNN, YOLO, and Mask R-CNN due to its standardized evaluation metrics such as mean Average Precision (mAP).
### How do I train a YOLOv8 model using the VOC dataset?
@@ -130,8 +130,8 @@ The VOC dataset includes two main challenges: VOC2007 and VOC2012. These challen
### How does the PASCAL VOC dataset enhance model benchmarking and evaluation?
-The PASCAL VOC dataset enhances model benchmarking and evaluation through its detailed annotations and standardized metrics like mean Average Precision (mAP). These metrics are crucial for assessing the performance of object detection and classification models. The dataset's diverse and complex images ensure comprehensive model evaluation across various real-world scenarios.
+The PASCAL VOC dataset enhances model benchmarking and evaluation through its detailed annotations and standardized metrics like mean Average [Precision](https://www.ultralytics.com/glossary/precision) (mAP). These metrics are crucial for assessing the performance of object detection and classification models. The dataset's diverse and complex images ensure comprehensive model evaluation across various real-world scenarios.
-### How do I use the VOC dataset for semantic segmentation in YOLO models?
+### How do I use the VOC dataset for [semantic segmentation](https://www.ultralytics.com/glossary/semantic-segmentation) in YOLO models?
To use the VOC dataset for semantic segmentation tasks with YOLO models, you need to configure the dataset properly in a YAML file. The YAML file defines paths and classes needed for training segmentation models. Check the VOC dataset YAML configuration file at [VOC.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/VOC.yaml) for detailed setups.
diff --git a/docs/en/datasets/detect/xview.md b/docs/en/datasets/detect/xview.md
index e7e2f3d3..df8e4933 100644
--- a/docs/en/datasets/detect/xview.md
+++ b/docs/en/datasets/detect/xview.md
@@ -6,7 +6,7 @@ keywords: xView dataset, overhead imagery, satellite images, object detection, h
# xView Dataset
-The [xView](http://xviewdataset.org/) dataset is one of the largest publicly available datasets of overhead imagery, containing images from complex scenes around the world annotated using bounding boxes. The goal of the xView dataset is to accelerate progress in four computer vision frontiers:
+The [xView](http://xviewdataset.org/) dataset is one of the largest publicly available datasets of overhead imagery, containing images from complex scenes around the world annotated using bounding boxes. The goal of the xView dataset is to accelerate progress in four [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) frontiers:
1. Reduce minimum resolution for detection.
2. Improve learning efficiency.
@@ -19,8 +19,8 @@ xView builds on the success of challenges like Common Objects in Context (COCO)
- xView contains over 1 million object instances across 60 classes.
- The dataset has a resolution of 0.3 meters, providing higher resolution imagery than most public satellite imagery datasets.
-- xView features a diverse collection of small, rare, fine-grained, and multi-type objects with bounding box annotation.
-- Comes with a pre-trained baseline model using the TensorFlow object detection API and an example for PyTorch.
+- xView features a diverse collection of small, rare, fine-grained, and multi-type objects with [bounding box](https://www.ultralytics.com/glossary/bounding-box) annotation.
+- Comes with a pre-trained baseline model using the TensorFlow object detection API and an example for [PyTorch](https://www.ultralytics.com/glossary/pytorch).
## Dataset Structure
@@ -42,7 +42,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a model on the xView dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a model on the xView dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -71,7 +71,7 @@ The xView dataset contains high-resolution satellite images with a diverse set o
![Dataset sample image](https://github.com/ultralytics/docs/releases/download/0/overhead-imagery-object-detection.avif)
-- **Overhead Imagery**: This image demonstrates an example of object detection in overhead imagery, where objects are annotated with bounding boxes. The dataset provides high-resolution satellite images to facilitate the development of models for this task.
+- **Overhead Imagery**: This image demonstrates an example of [object detection](https://www.ultralytics.com/glossary/object-detection) in overhead imagery, where objects are annotated with bounding boxes. The dataset provides high-resolution satellite images to facilitate the development of models for this task.
The example showcases the variety and complexity of the data in the xView dataset and highlights the importance of high-quality satellite imagery for object detection tasks.
@@ -137,11 +137,11 @@ The xView dataset stands out due to its comprehensive set of features:
- Over 1 million object instances across 60 distinct classes.
- High-resolution imagery at 0.3 meters.
- Diverse object types including small, rare, and fine-grained objects, all annotated with bounding boxes.
-- Availability of a pre-trained baseline model and examples in TensorFlow and PyTorch.
+- Availability of a pre-trained baseline model and examples in [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and PyTorch.
### What is the dataset structure of xView, and how is it annotated?
-The xView dataset comprises high-resolution satellite images collected from WorldView-3 satellites at a 0.3m ground sample distance. It encompasses over 1 million objects across 60 classes in approximately 1,400 km² of imagery. Each object within the dataset is annotated with bounding boxes, making it ideal for training and evaluating deep learning models for object detection in overhead imagery. For a detailed overview, you can look at the dataset structure section [here](#dataset-structure).
+The xView dataset comprises high-resolution satellite images collected from WorldView-3 satellites at a 0.3m ground sample distance. It encompasses over 1 million objects across 60 classes in approximately 1,400 km² of imagery. Each object within the dataset is annotated with bounding boxes, making it ideal for training and evaluating [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models for object detection in overhead imagery. For a detailed overview, you can look at the dataset structure section [here](#dataset-structure).
### How do I cite the xView dataset in my research?
diff --git a/docs/en/datasets/explorer/api.md b/docs/en/datasets/explorer/api.md
index 905142ef..6c716c14 100644
--- a/docs/en/datasets/explorer/api.md
+++ b/docs/en/datasets/explorer/api.md
@@ -50,7 +50,7 @@ dataframe = explorer.get_similar(idx=0)
!!! note
- Embeddings table for a given dataset and model pair is only created once and reused. These use [LanceDB](https://lancedb.github.io/lancedb/) under the hood, which scales on-disk, so you can create and reuse embeddings for large datasets like COCO without running out of memory.
+ [Embeddings](https://www.ultralytics.com/glossary/embeddings) table for a given dataset and model pair is only created once and reused. These use [LanceDB](https://lancedb.github.io/lancedb/) under the hood, which scales on-disk, so you can create and reuse embeddings for large datasets like COCO without running out of memory.
In case you want to force update the embeddings table, you can pass `force=True` to `create_embeddings_table` method.
@@ -339,7 +339,7 @@ Try our GUI Demo based on Explorer API
### What is the Ultralytics Explorer API used for?
-The Ultralytics Explorer API is designed for comprehensive dataset exploration. It allows users to filter and search datasets using SQL queries, vector similarity search, and semantic search. This powerful Python API can handle large datasets, making it ideal for various computer vision tasks using Ultralytics models.
+The Ultralytics Explorer API is designed for comprehensive dataset exploration. It allows users to filter and search datasets using SQL queries, vector similarity search, and semantic search. This powerful Python API can handle large datasets, making it ideal for various [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks using Ultralytics models.
### How do I install the Ultralytics Explorer API?
diff --git a/docs/en/datasets/explorer/dashboard.md b/docs/en/datasets/explorer/dashboard.md
index ed5760be..3bc3a21e 100644
--- a/docs/en/datasets/explorer/dashboard.md
+++ b/docs/en/datasets/explorer/dashboard.md
@@ -29,14 +29,14 @@ Explorer GUI is like a playground build using [Ultralytics Explorer API](api.md)
pip install ultralytics[explorer]
```
-!!! note "Note"
+!!! note
Ask AI feature works using OpenAI, so you'll be prompted to set the api key for OpenAI when you first run the GUI.
You can set it like this - `yolo settings openai_api_key="..."`
## Vector Semantic Similarity Search
-Semantic search is a technique for finding similar images to a given image. It is based on the idea that similar images will have similar embeddings. In the UI, you can select one of more images and search for the images similar to them. This can be useful when you want to find images similar to a given image or a set of images that don't perform as expected.
+Semantic search is a technique for finding similar images to a given image. It is based on the idea that similar images will have similar [embeddings](https://www.ultralytics.com/glossary/embeddings). In the UI, you can select one of more images and search for the images similar to them. This can be useful when you want to find images similar to a given image or a set of images that don't perform as expected.
For example:
In this VOC Exploration dashboard, user selects a couple airplane images like this:
@@ -79,7 +79,7 @@ This is a Demo build using the Explorer API. You can use the API to build your o
### What is Ultralytics Explorer GUI and how do I install it?
-Ultralytics Explorer GUI is a powerful interface that unlocks advanced data exploration capabilities using the [Ultralytics Explorer API](api.md). It allows you to run semantic/vector similarity search, SQL queries, and natural language queries using the Ask AI feature powered by Large Language Models (LLMs).
+Ultralytics Explorer GUI is a powerful interface that unlocks advanced data exploration capabilities using the [Ultralytics Explorer API](api.md). It allows you to run semantic/vector similarity search, SQL queries, and natural language queries using the Ask AI feature powered by [Large Language Models](https://www.ultralytics.com/glossary/large-language-model-llm) (LLMs).
To install the Explorer GUI, you can use pip:
@@ -91,7 +91,7 @@ Note: To use the Ask AI feature, you'll need to set the OpenAI API key: `yolo se
### How does the semantic search feature in Ultralytics Explorer GUI work?
-The semantic search feature in Ultralytics Explorer GUI allows you to find images similar to a given image based on their embeddings. This technique is useful for identifying and exploring images that share visual similarities. To use this feature, select one or more images in the UI and execute a search for similar images. The result will display images that closely resemble the selected ones, facilitating efficient dataset exploration and anomaly detection.
+The semantic search feature in Ultralytics Explorer GUI allows you to find images similar to a given image based on their embeddings. This technique is useful for identifying and exploring images that share visual similarities. To use this feature, select one or more images in the UI and execute a search for similar images. The result will display images that closely resemble the selected ones, facilitating efficient dataset exploration and [anomaly detection](https://www.ultralytics.com/glossary/anomaly-detection).
Learn more about semantic search and other features by visiting the [Feature Overview](#vector-semantic-similarity-search) section.
diff --git a/docs/en/datasets/explorer/index.md b/docs/en/datasets/explorer/index.md
index c709aa2a..d7e7ab66 100644
--- a/docs/en/datasets/explorer/index.md
+++ b/docs/en/datasets/explorer/index.md
@@ -44,13 +44,13 @@ Learn more about the Explorer API [here](api.md).
## GUI Explorer Usage
-The GUI demo runs in your browser allowing you to create embeddings for your dataset and search for similar images, run SQL queries and perform semantic search. It can be run using the following command:
+The GUI demo runs in your browser allowing you to create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset and search for similar images, run SQL queries and perform semantic search. It can be run using the following command:
```bash
yolo explorer
```
-!!! note "Note"
+!!! note
Ask AI feature works using OpenAI, so you'll be prompted to set the api key for OpenAI when you first run the GUI.
You can set it like this - `yolo settings openai_api_key="..."`
@@ -63,7 +63,7 @@ yolo explorer
### What is Ultralytics Explorer and how can it help with CV datasets?
-Ultralytics Explorer is a powerful tool designed for exploring computer vision (CV) datasets through semantic search, SQL queries, vector similarity search, and even natural language. This versatile tool provides both a GUI and a Python API, allowing users to seamlessly interact with their datasets. By leveraging technologies like LanceDB, Ultralytics Explorer ensures efficient, scalable access to large datasets without excessive memory usage. Whether you're performing detailed dataset analysis or exploring data patterns, Ultralytics Explorer streamlines the entire process.
+Ultralytics Explorer is a powerful tool designed for exploring [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) (CV) datasets through semantic search, SQL queries, vector similarity search, and even natural language. This versatile tool provides both a GUI and a Python API, allowing users to seamlessly interact with their datasets. By leveraging technologies like LanceDB, Ultralytics Explorer ensures efficient, scalable access to large datasets without excessive memory usage. Whether you're performing detailed dataset analysis or exploring data patterns, Ultralytics Explorer streamlines the entire process.
Learn more about the [Explorer API](api.md).
diff --git a/docs/en/datasets/index.md b/docs/en/datasets/index.md
index f1e154ef..a53d2040 100644
--- a/docs/en/datasets/index.md
+++ b/docs/en/datasets/index.md
@@ -6,7 +6,7 @@ keywords: Ultralytics, datasets, computer vision, object detection, instance seg
# Datasets Overview
-Ultralytics provides support for various datasets to facilitate computer vision tasks such as detection, instance segmentation, pose estimation, classification, and multi-object tracking. Below is a list of the main Ultralytics datasets, followed by a summary of each computer vision task and the respective datasets.
+Ultralytics provides support for various datasets to facilitate computer vision tasks such as detection, [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), pose estimation, classification, and multi-object tracking. Below is a list of the main Ultralytics datasets, followed by a summary of each computer vision task and the respective datasets.
@@ -21,7 +21,7 @@ Ultralytics provides support for various datasets to facilitate computer vision
## NEW 🚀 Ultralytics Explorer
-Create embeddings for your dataset, search for similar images, run SQL queries, perform semantic search and even search using natural language! You can get started with our GUI app or build your own using the API. Learn more [here](explorer/index.md).
+Create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset, search for similar images, run SQL queries, perform semantic search and even search using natural language! You can get started with our GUI app or build your own using the API. Learn more [here](explorer/index.md).
@@ -32,7 +32,7 @@ Create embeddings for your dataset, search for similar images, run SQL queries, ## [Object Detection](detect/index.md) -Bounding box object detection is a computer vision technique that involves detecting and localizing objects in an image by drawing a bounding box around each object. +[Bounding box](https://www.ultralytics.com/glossary/bounding-box) object detection is a computer vision technique that involves detecting and localizing objects in an image by drawing a bounding box around each object. - [Argoverse](detect/argoverse.md): A dataset containing 3D tracking and motion forecasting data from urban environments with rich annotations. - [COCO](detect/coco.md): Common Objects in Context (COCO) is a large-scale object detection, segmentation, and captioning dataset with 80 object categories. @@ -69,10 +69,11 @@ Pose estimation is a technique used to determine the pose of the object relative - [COCO](pose/coco.md): A large-scale dataset with human pose annotations designed for pose estimation tasks. - [COCO8-pose](pose/coco8-pose.md): A smaller dataset for pose estimation tasks, containing a subset of 8 COCO images with human pose annotations. - [Tiger-pose](pose/tiger-pose.md): A compact dataset consisting of 263 images focused on tigers, annotated with 12 keypoints per tiger for pose estimation tasks. +- [Hand-Keypoints](pose/hand-keypoints.md): A concise dataset featuring over 26,000 images centered on human hands, annotated with 21 keypoints per hand, designed for pose estimation tasks. ## [Classification](classify/index.md) -Image classification is a computer vision task that involves categorizing an image into one or more predefined classes or categories based on its visual content. +[Image classification](https://www.ultralytics.com/glossary/image-classification) is a computer vision task that involves categorizing an image into one or more predefined classes or categories based on its visual content. - [Caltech 101](classify/caltech101.md): A dataset containing images of 101 object categories for image classification tasks. - [Caltech 256](classify/caltech256.md): An extended version of Caltech 101 with 256 object categories and more challenging images. @@ -123,7 +124,7 @@ Contributing a new dataset involves several steps to ensure that it aligns well 5. **Create a `data.yaml` File**: In your dataset's root directory, create a `data.yaml` file that describes the dataset, classes, and other necessary information. 6. **Optimize Images (Optional)**: If you want to reduce the size of the dataset for more efficient processing, you can optimize the images using the code below. This is not required, but recommended for smaller dataset sizes and faster download speeds. 7. **Zip Dataset**: Compress the entire dataset folder into a zip file. -8. **Document and PR**: Create a documentation page describing your dataset and how it fits into the existing framework. After that, submit a Pull Request (PR). Refer to [Ultralytics Contribution Guidelines](https://docs.ultralytics.com/help/contributing) for more details on how to submit a PR. +8. **Document and PR**: Create a documentation page describing your dataset and how it fits into the existing framework. After that, submit a Pull Request (PR). Refer to [Ultralytics Contribution Guidelines](https://docs.ultralytics.com/help/contributing/) for more details on how to submit a PR. ### Example Code to Optimize and Zip a Dataset @@ -152,7 +153,7 @@ By following these steps, you can contribute a new dataset that integrates well ## FAQ -### What datasets does Ultralytics support for object detection? +### What datasets does Ultralytics support for [object detection](https://www.ultralytics.com/glossary/object-detection)? Ultralytics supports a wide variety of datasets for object detection, including: @@ -175,7 +176,7 @@ Contributing a new dataset involves several steps: 5. **Create a `data.yaml` File**: Include dataset descriptions, classes, and other relevant information. 6. **Optimize Images (Optional)**: Reduce dataset size for efficiency. 7. **Zip Dataset**: Compress the dataset into a zip file. -8. **Document and PR**: Describe your dataset and submit a Pull Request following [Ultralytics Contribution Guidelines](https://docs.ultralytics.com/help/contributing). +8. **Document and PR**: Describe your dataset and submit a Pull Request following [Ultralytics Contribution Guidelines](https://docs.ultralytics.com/help/contributing/). Visit [Contribute New Datasets](#contribute-new-datasets) for a comprehensive guide. @@ -190,7 +191,7 @@ Ultralytics Explorer offers powerful features for dataset analysis, including: Explore the [Ultralytics Explorer](explorer/index.md) for more information and to try the [GUI Demo](explorer/index.md). -### What are the unique features of Ultralytics YOLO models for computer vision? +### What are the unique features of Ultralytics YOLO models for [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv)? Ultralytics YOLO models provide several unique features: diff --git a/docs/en/datasets/obb/dota-v2.md b/docs/en/datasets/obb/dota-v2.md index 70cac23c..76024cac 100644 --- a/docs/en/datasets/obb/dota-v2.md +++ b/docs/en/datasets/obb/dota-v2.md @@ -6,7 +6,7 @@ keywords: DOTA dataset, object detection, aerial images, oriented bounding boxes # DOTA Dataset with OBB -[DOTA](https://captain-whu.github.io/DOTA/index.html) stands as a specialized dataset, emphasizing object detection in aerial images. Originating from the DOTA series of datasets, it offers annotated images capturing a diverse array of aerial scenes with Oriented Bounding Boxes (OBB). +[DOTA](https://captain-whu.github.io/DOTA/index.html) stands as a specialized dataset, emphasizing [object detection](https://www.ultralytics.com/glossary/object-detection) in aerial images. Originating from the DOTA series of datasets, it offers annotated images capturing a diverse array of aerial scenes with Oriented Bounding Boxes (OBB). ![DOTA classes visual](https://github.com/ultralytics/docs/releases/download/0/dota-classes-visual.avif) @@ -111,15 +111,15 @@ To train a model on the DOTA v1 dataset, you can utilize the following code snip # Create a new YOLOv8n-OBB model from scratch model = YOLO("yolov8n-obb.yaml") - # Train the model on the DOTAv2 dataset - results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=640) + # Train the model on the DOTAv1 dataset + results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=1024) ``` === "CLI" ```bash - # Train a new YOLOv8n-OBB model on the DOTAv2 dataset - yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=640 + # Train a new YOLOv8n-OBB model on the DOTAv1 dataset + yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=1024 ``` ## Sample Data and Annotations @@ -128,7 +128,7 @@ Having a glance at the dataset illustrates its depth: ![Dataset sample image](https://github.com/ultralytics/docs/releases/download/0/instances-DOTA.avif) -- **DOTA examples**: This snapshot underlines the complexity of aerial scenes and the significance of Oriented Bounding Box annotations, capturing objects in their natural orientation. +- **DOTA examples**: This snapshot underlines the complexity of aerial scenes and the significance of Oriented [Bounding Box](https://www.ultralytics.com/glossary/bounding-box) annotations, capturing objects in their natural orientation. The dataset's richness offers invaluable insights into object detection challenges exclusive to aerial imagery. @@ -180,14 +180,14 @@ To train a model on the DOTA dataset, you can use the following example with Ult model = YOLO("yolov8n-obb.yaml") # Train the model on the DOTAv1 dataset - results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=640) + results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=1024) ``` === "CLI" ```bash # Train a new YOLOv8n-OBB model on the DOTAv1 dataset - yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=640 + yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=1024 ``` For more details on how to split and preprocess the DOTA images, refer to the [split DOTA images section](#split-dota-images). diff --git a/docs/en/datasets/obb/dota8.md b/docs/en/datasets/obb/dota8.md index 0188c317..f24ea5bc 100644 --- a/docs/en/datasets/obb/dota8.md +++ b/docs/en/datasets/obb/dota8.md @@ -8,7 +8,7 @@ keywords: DOTA8 dataset, Ultralytics, YOLOv8, object detection, debugging, train ## Introduction -[Ultralytics](https://www.ultralytics.com/) DOTA8 is a small, but versatile oriented object detection dataset composed of the first 8 images of 8 images of the split DOTAv1 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging object detection models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets. +[Ultralytics](https://www.ultralytics.com/) DOTA8 is a small, but versatile oriented [object detection](https://www.ultralytics.com/glossary/object-detection) dataset composed of the first 8 images of 8 images of the split DOTAv1 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging object detection models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets. This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics). @@ -24,7 +24,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n-obb model on the DOTA8 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLOv8n-obb model on the DOTA8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -121,4 +121,4 @@ Mosaicing combines multiple images into one during training, increasing the vari ### Why should I use Ultralytics YOLOv8 for object detection tasks? -Ultralytics YOLOv8 provides state-of-the-art real-time object detection capabilities, including features like oriented bounding boxes (OBB), instance segmentation, and a highly versatile training pipeline. It's suitable for various applications and offers pretrained models for efficient fine-tuning. Explore further about the advantages and usage in the [Ultralytics YOLOv8 documentation](https://github.com/ultralytics/ultralytics). +Ultralytics YOLOv8 provides state-of-the-art real-time object detection capabilities, including features like oriented bounding boxes (OBB), [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), and a highly versatile training pipeline. It's suitable for various applications and offers pretrained models for efficient fine-tuning. Explore further about the advantages and usage in the [Ultralytics YOLOv8 documentation](https://github.com/ultralytics/ultralytics). diff --git a/docs/en/datasets/obb/index.md b/docs/en/datasets/obb/index.md index 02cd08e3..edeffb83 100644 --- a/docs/en/datasets/obb/index.md +++ b/docs/en/datasets/obb/index.md @@ -6,7 +6,7 @@ keywords: Oriented Bounding Box, OBB Datasets, YOLO, Ultralytics, Object Detecti # Oriented Bounding Box (OBB) Datasets Overview -Training a precise object detection model with oriented bounding boxes (OBB) requires a thorough dataset. This guide explains the various OBB dataset formats compatible with Ultralytics YOLO models, offering insights into their structure, application, and methods for format conversions. +Training a precise [object detection](https://www.ultralytics.com/glossary/object-detection) model with oriented bounding boxes (OBB) requires a thorough dataset. This guide explains the various OBB dataset formats compatible with Ultralytics YOLO models, offering insights into their structure, application, and methods for format conversions. ## Supported OBB Dataset Formats @@ -18,7 +18,7 @@ The YOLO OBB format designates bounding boxes by their four corner points with c class_index x1 y1 x2 y2 x3 y3 x4 y4 ``` -Internally, YOLO processes losses and outputs in the `xywhr` format, which represents the bounding box's center point (xy), width, height, and rotation. +Internally, YOLO processes losses and outputs in the `xywhr` format, which represents the [bounding box](https://www.ultralytics.com/glossary/bounding-box)'s center point (xy), width, height, and rotation.
@@ -42,21 +42,23 @@ To train a model using these OBB formats: # Create a new YOLOv8n-OBB model from scratch model = YOLO("yolov8n-obb.yaml") - # Train the model on the DOTAv2 dataset - results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=640) + # Train the model on the DOTAv1 dataset + results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=1024) ``` === "CLI" ```bash - # Train a new YOLOv8n-OBB model on the DOTAv2 dataset - yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=640 + # Train a new YOLOv8n-OBB model on the DOTAv1 dataset + yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=1024 ``` ## Supported Datasets Currently, the following datasets with Oriented Bounding Boxes are supported: +- [DOTA-v1](dota-v2.md): The first version of the DOTA dataset, providing a comprehensive set of aerial images with oriented bounding boxes for object detection. +- [DOTA-v1.5](dota-v2.md): An intermediate version of the DOTA dataset, offering additional annotations and improvements over DOTA-v1 for enhanced object detection tasks. - [DOTA-v2](dota-v2.md): DOTA (A Large-scale Dataset for Object Detection in Aerial Images) version 2, emphasizes detection from aerial perspectives and contains oriented bounding boxes with 1.7 million instances and 11,268 images. - [DOTA8](dota8.md): A small, 8-image subset of the full DOTA dataset suitable for testing workflows and Continuous Integration (CI) checks of OBB training in the `ultralytics` repository. @@ -127,12 +129,14 @@ Training a YOLOv8 model with OBBs involves ensuring your dataset is in the YOLO yolo obb train data=your_dataset.yaml model=yolov8n-obb.yaml epochs=100 imgsz=640 ``` -This ensures your model leverages the detailed OBB annotations for improved detection accuracy. +This ensures your model leverages the detailed OBB annotations for improved detection [accuracy](https://www.ultralytics.com/glossary/accuracy). ### What datasets are currently supported for OBB training in Ultralytics YOLO models? Currently, Ultralytics supports the following datasets for OBB training: +- [DOTA-v1](dota-v2.md): The first version of the DOTA dataset, providing a comprehensive set of aerial images with oriented bounding boxes for object detection. +- [DOTA-v1.5](dota-v2.md): An intermediate version of the DOTA dataset, offering additional annotations and improvements over DOTA-v1 for enhanced object detection tasks. - [DOTA-v2](dota-v2.md): This dataset includes 1.7 million instances with oriented bounding boxes and 11,268 images, primarily focusing on aerial object detection. - [DOTA8](dota8.md): A smaller, 8-image subset of the DOTA dataset used for testing and continuous integration (CI) checks. diff --git a/docs/en/datasets/pose/coco.md b/docs/en/datasets/pose/coco.md index 8addbd96..20042b40 100644 --- a/docs/en/datasets/pose/coco.md +++ b/docs/en/datasets/pose/coco.md @@ -37,7 +37,7 @@ The COCO-Pose dataset is split into three subsets: ## Applications -The COCO-Pose dataset is specifically used for training and evaluating deep learning models in keypoint detection and pose estimation tasks, such as OpenPose. The dataset's large number of annotated images and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners focused on pose estimation. +The COCO-Pose dataset is specifically used for training and evaluating [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models in keypoint detection and pose estimation tasks, such as OpenPose. The dataset's large number of annotated images and standardized evaluation metrics make it an essential resource for [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) researchers and practitioners focused on pose estimation. ## Dataset YAML @@ -51,7 +51,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -140,7 +140,7 @@ For more details on the training process and available arguments, check the [tra ### What are the different metrics provided by the COCO-Pose dataset for evaluating model performance? -The COCO-Pose dataset provides several standardized evaluation metrics for pose estimation tasks, similar to the original COCO dataset. Key metrics include the Object Keypoint Similarity (OKS), which evaluates the accuracy of predicted keypoints against ground truth annotations. These metrics allow for thorough performance comparisons between different models. For instance, the COCO-Pose pretrained models such as YOLOv8n-pose, YOLOv8s-pose, and others have specific performance metrics listed in the documentation, like mAPpose50-95 and mAPpose50. +The COCO-Pose dataset provides several standardized evaluation metrics for pose estimation tasks, similar to the original COCO dataset. Key metrics include the Object Keypoint Similarity (OKS), which evaluates the [accuracy](https://www.ultralytics.com/glossary/accuracy) of predicted keypoints against ground truth annotations. These metrics allow for thorough performance comparisons between different models. For instance, the COCO-Pose pretrained models such as YOLOv8n-pose, YOLOv8s-pose, and others have specific performance metrics listed in the documentation, like mAPpose50-95 and mAPpose50. ### How is the dataset structured and split for the COCO-Pose dataset? diff --git a/docs/en/datasets/pose/coco8-pose.md b/docs/en/datasets/pose/coco8-pose.md index c5847e11..95157b79 100644 --- a/docs/en/datasets/pose/coco8-pose.md +++ b/docs/en/datasets/pose/coco8-pose.md @@ -8,7 +8,7 @@ keywords: COCO8-Pose, Ultralytics, pose detection dataset, object detection, YOL ## Introduction -[Ultralytics](https://www.ultralytics.com/) COCO8-Pose is a small, but versatile pose detection dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging object detection models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets. +[Ultralytics](https://www.ultralytics.com/) COCO8-Pose is a small, but versatile pose detection dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging [object detection](https://www.ultralytics.com/glossary/object-detection) models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets. This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics). @@ -24,7 +24,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -76,7 +76,7 @@ If you use the COCO dataset in your research or development work, please cite th } ``` -We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home). +We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home). ## FAQ diff --git a/docs/en/datasets/pose/hand-keypoints.md b/docs/en/datasets/pose/hand-keypoints.md new file mode 100644 index 00000000..c14bd5c5 --- /dev/null +++ b/docs/en/datasets/pose/hand-keypoints.md @@ -0,0 +1,175 @@ +--- +comments: true +description: Explore the hand keypoints estimation dataset for advanced pose estimation. Learn about datasets, pretrained models, metrics, and applications for training with YOLO. +keywords: Hand KeyPoints, pose estimation, dataset, keypoints, MediaPipe, YOLO, deep learning, computer vision +--- + +# Hand Keypoints Dataset + +## Introduction + +The hand-keypoints dataset contains 26,768 images of hands annotated with keypoints, making it suitable for training models like Ultralytics YOLO for pose estimation tasks. The annotations were generated using the Google MediaPipe library, ensuring high accuracy and consistency, and the dataset is compatible [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) formats. + +## Hand Landmarks + +![Hand Landmarks](https://github.com/ultralytics/docs/releases/download/0/hand_landmarks.jpg) + +## KeyPoints + +The dataset includes keypoints for hand detection. The keypoints are annotated as follows: + +1. Wrist +2. Thumb (4 points) +3. Index finger (4 points) +4. Middle finger (4 points) +5. Ring finger (4 points) +6. Little finger (4 points) + +Each hand has a total of 21 keypoints. + +## Key Features + +- **Large Dataset**: 26,768 images with hand keypoint annotations. +- **YOLOv8 Compatibility**: Ready for use with YOLOv8 models. +- **21 Keypoints**: Detailed hand pose representation. + +## Dataset Structure + +The hand keypoint dataset is split into two subsets: + +1. **Train**: This subset contains 18,776 images from the hand keypoints dataset, annotated for training pose estimation models. +2. **Val**: This subset contains 7992 images that can be used for validation purposes during model training. + +## Applications + +Hand keypoints can be used for gesture recognition, AR/VR controls, robotic manipulation, and hand movement analysis in healthcare. They can be also applied in animation for motion capture and biometric authentication systems for security. + +## Dataset YAML + +A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the Hand Keypoints dataset, the `hand-keypoints.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/hand-keypoints.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/hand-keypoints.yaml). + +!!! example "ultralytics/cfg/datasets/hand-keypoints.yaml" + + ```yaml + --8<-- "ultralytics/cfg/datasets/hand-keypoints.yaml" + ``` + +## Usage + +To train a YOLOv8n-pose model on the Hand Keypoints dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) + + # Train the model + results = model.train(data="hand-keypoints.yaml", epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo pose train data=hand-keypoints.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +## Sample Images and Annotations + +The Hand keypoints dataset contains a diverse set of images with human hands annotated with keypoints. Here are some examples of images from the dataset, along with their corresponding annotations: + +![Dataset sample image](https://github.com/ultralytics/docs/releases/download/0/human-hand-pose.jpg) + +- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts. + +The example showcases the variety and complexity of the images in the Hand Keypoints dataset and the benefits of using mosaicing during the training process. + +## Citations and Acknowledgments + +If you use the hand-keypoints dataset in your research or development work, please acknowledge the following sources: + +!!! quote "" + + === "Credits" + + We would like to thank the following sources for providing the images used in this dataset: + + - [11k Hands](https://sites.google.com/view/11khands) + - [2000 Hand Gestures](https://www.kaggle.com/datasets/ritikagiridhar/2000-hand-gestures) + - [Gesture Recognition](https://www.kaggle.com/datasets/imsparsh/gesture-recognition) + + The images were collected and used under the respective licenses provided by each platform and are distributed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/). + +We would also like to acknowledge the creator of this dataset, [Rion Dsilva](https://www.linkedin.com/in/rion-dsilva-043464229/), for his great contribution to Vision AI research. + +## FAQ + +### How do I train a YOLOv8 model on the Hand Keypoints dataset? + +To train a YOLOv8 model on the Hand Keypoints dataset, you can use either Python or the command line interface (CLI). Here's an example for training a YOLOv8n-pose model for 100 epochs with an image size of 640: + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) + + # Train the model + results = model.train(data="hand-keypoints.yaml", epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo pose train data=hand-keypoints.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + ``` + +For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +### What are the key features of the Hand Keypoints dataset? + +The Hand Keypoints dataset is designed for advanced pose estimation tasks and includes several key features: + +- **Large Dataset**: Contains 26,768 images with hand keypoint annotations. +- **YOLOv8 Compatibility**: Ready for use with YOLOv8 models. +- **21 Keypoints**: Detailed hand pose representation, including wrist and finger joints. + +For more details, you can explore the [Hand Keypoints Dataset](#introduction) section. + +### What applications can benefit from using the Hand Keypoints dataset? + +The Hand Keypoints dataset can be applied in various fields, including: + +- **Gesture Recognition**: Enhancing human-computer interaction. +- **AR/VR Controls**: Improving user experience in augmented and virtual reality. +- **Robotic Manipulation**: Enabling precise control of robotic hands. +- **Healthcare**: Analyzing hand movements for medical diagnostics. +- **Animation**: Capturing motion for realistic animations. +- **Biometric Authentication**: Enhancing security systems. + +For more information, refer to the [Applications](#applications) section. + +### How is the Hand Keypoints dataset structured? + +The Hand Keypoints dataset is divided into two subsets: + +1. **Train**: Contains 18,776 images for training pose estimation models. +2. **Val**: Contains 7,992 images for validation purposes during model training. + +This structure ensures a comprehensive training and validation process. For more details, see the [Dataset Structure](#dataset-structure) section. + +### How do I use the dataset YAML file for training? + +The dataset configuration is defined in a YAML file, which includes paths, classes, and other relevant information. The `hand-keypoints.yaml` file can be found at [hand-keypoints.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/hand-keypoints.yaml). + +To use this YAML file for training, specify it in your training script or CLI command as shown in the training example above. For more details, refer to the [Dataset YAML](#dataset-yaml) section. diff --git a/docs/en/datasets/pose/index.md b/docs/en/datasets/pose/index.md index 42884971..a8bfd222 100644 --- a/docs/en/datasets/pose/index.md +++ b/docs/en/datasets/pose/index.md @@ -34,7 +34,7 @@ Format with Dim = 3
+
+
+
+ Watch: How to generate Analytical Graphs using Ultralytics | Line Graphs, Bar Plots, Area and Pie Charts
+
@@ -98,11 +98,11 @@ These open-source tools are budget-friendly and provide a range of features to m
### Some More Things to Consider Before Annotating Data
-Before you dive into annotating your data, there are a few more things to keep in mind. You should be aware of accuracy, precision, outliers, and quality control to avoid labeling your data in a counterproductive manner.
+Before you dive into annotating your data, there are a few more things to keep in mind. You should be aware of accuracy, [precision](https://www.ultralytics.com/glossary/precision), outliers, and quality control to avoid labeling your data in a counterproductive manner.
#### Understanding Accuracy and Precision
-It's important to understand the difference between accuracy and precision and how it relates to annotation. Accuracy refers to how close the annotated data is to the true values. It helps us measure how closely the labels reflect real-world scenarios. Precision indicates the consistency of annotations. It checks if you are giving the same label to the same object or feature throughout the dataset. High accuracy and precision lead to better-trained models by reducing noise and improving the model's ability to generalize from the training data.
+It's important to understand the difference between accuracy and precision and how it relates to annotation. Accuracy refers to how close the annotated data is to the true values. It helps us measure how closely the labels reflect real-world scenarios. Precision indicates the consistency of annotations. It checks if you are giving the same label to the same object or feature throughout the dataset. High accuracy and precision lead to better-trained models by reducing noise and improving the model's ability to generalize from the [training data](https://www.ultralytics.com/glossary/training-data).
@@ -114,9 +114,9 @@ Outliers are data points that deviate quite a bit from other observations in the
You can use various methods to detect and correct outliers:
-- **Statistical Techniques**: To detect outliers in numerical features like pixel values, bounding box coordinates, or object sizes, you can use methods such as box plots, histograms, or z-scores.
+- **Statistical Techniques**: To detect outliers in numerical features like pixel values, [bounding box](https://www.ultralytics.com/glossary/bounding-box) coordinates, or object sizes, you can use methods such as box plots, histograms, or z-scores.
- **Visual Techniques**: To spot anomalies in categorical features like object classes, colors, or shapes, use visual methods like plotting images, labels, or heat maps.
-- **Algorithmic Methods**: Use tools like clustering (e.g., K-means clustering, DBSCAN) and anomaly detection algorithms to identify outliers based on data distribution patterns.
+- **Algorithmic Methods**: Use tools like clustering (e.g., K-means clustering, DBSCAN) and [anomaly detection](https://www.ultralytics.com/glossary/anomaly-detection) algorithms to identify outliers based on data distribution patterns.
#### Quality Control of Annotated Data
@@ -132,7 +132,7 @@ While reviewing, if you find errors, correct them and update the guidelines to a
## Share Your Thoughts with the Community
-Bouncing your ideas and queries off other computer vision enthusiasts can help accelerate your projects. Here are some great ways to learn, troubleshoot, and network:
+Bouncing your ideas and queries off other [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) enthusiasts can help accelerate your projects. Here are some great ways to learn, troubleshoot, and network:
### Where to Find Help and Support
@@ -159,7 +159,7 @@ Ensuring high consistency and accuracy in data annotation involves establishing
### How many images do I need for training Ultralytics YOLO models?
-For effective transfer learning and object detection with Ultralytics YOLO models, start with a minimum of a few hundred annotated objects per class. If training for just one class, begin with at least 100 annotated images and train for approximately 100 epochs. More complex tasks might require thousands of images per class to achieve high reliability and performance. Quality annotations are crucial, so ensure your data collection and annotation processes are rigorous and aligned with your project's specific goals. Explore detailed training strategies in the [YOLOv8 training guide](../modes/train.md).
+For effective [transfer learning](https://www.ultralytics.com/glossary/transfer-learning) and object detection with Ultralytics YOLO models, start with a minimum of a few hundred annotated objects per class. If training for just one class, begin with at least 100 annotated images and train for approximately 100 [epochs](https://www.ultralytics.com/glossary/epoch). More complex tasks might require thousands of images per class to achieve high reliability and performance. Quality annotations are crucial, so ensure your data collection and annotation processes are rigorous and aligned with your project's specific goals. Explore detailed training strategies in the [YOLOv8 training guide](../modes/train.md).
### What are some popular tools for data annotation?
@@ -167,7 +167,7 @@ Several popular open-source tools can streamline the data annotation process:
- **[Label Studio](https://github.com/HumanSignal/label-studio)**: A flexible tool supporting various annotation tasks, project management, and quality control features.
- **[CVAT](https://www.cvat.ai/)**: Offers multiple annotation formats and customizable workflows, making it suitable for complex projects.
-- **[Labelme](https://github.com/labelmeai/labelme)**: Ideal for quick and straightforward image annotation with polygons.
+- **[Labelme](https://github.com/wkentaro/labelme)**: Ideal for quick and straightforward image annotation with polygons.
These tools can help enhance the efficiency and accuracy of your annotation workflows. For extensive feature lists and guides, refer to our [data annotation tools documentation](../datasets/index.md).
@@ -177,7 +177,7 @@ Different types of data annotation cater to various computer vision tasks:
- **Bounding Boxes**: Used primarily for object detection, these are rectangular boxes around objects in an image.
- **Polygons**: Provide more precise object outlines suitable for instance segmentation tasks.
-- **Masks**: Offer pixel-level detail, used in semantic segmentation to differentiate objects from the background.
+- **Masks**: Offer pixel-level detail, used in [semantic segmentation](https://www.ultralytics.com/glossary/semantic-segmentation) to differentiate objects from the background.
- **Keypoints**: Identify specific points of interest within an image, useful for tasks like pose estimation and facial landmark detection.
Selecting the appropriate annotation type depends on your project's requirements. Learn more about how to implement these annotations and their formats in our [data annotation guide](#what-is-data-annotation).
diff --git a/docs/en/guides/deepstream-nvidia-jetson.md b/docs/en/guides/deepstream-nvidia-jetson.md
index cd114b86..ab15009b 100644
--- a/docs/en/guides/deepstream-nvidia-jetson.md
+++ b/docs/en/guides/deepstream-nvidia-jetson.md
@@ -27,7 +27,7 @@ This comprehensive guide provides a detailed walkthrough for deploying Ultralyti
## What is NVIDIA DeepStream?
-[NVIDIA's DeepStream SDK](https://developer.nvidia.com/deepstream-sdk) is a complete streaming analytics toolkit based on GStreamer for AI-based multi-sensor processing, video, audio, and image understanding. It's ideal for vision AI developers, software partners, startups, and OEMs building IVA (Intelligent Video Analytics) apps and services. You can now create stream-processing pipelines that incorporate neural networks and other complex processing tasks like tracking, video encoding/decoding, and video rendering. These pipelines enable real-time analytics on video, image, and sensor data. DeepStream's multi-platform support gives you a faster, easier way to develop vision AI applications and services on-premise, at the edge, and in the cloud.
+[NVIDIA's DeepStream SDK](https://developer.nvidia.com/deepstream-sdk) is a complete streaming analytics toolkit based on GStreamer for AI-based multi-sensor processing, video, audio, and image understanding. It's ideal for vision AI developers, software partners, startups, and OEMs building IVA (Intelligent Video Analytics) apps and services. You can now create stream-processing pipelines that incorporate [neural networks](https://www.ultralytics.com/glossary/neural-network-nn) and other complex processing tasks like tracking, video encoding/decoding, and video rendering. These pipelines enable real-time analytics on video, image, and sensor data. DeepStream's multi-platform support gives you a faster, easier way to develop vision AI applications and services on-premise, at the edge, and in the cloud.
## Prerequisites
@@ -183,7 +183,7 @@ deepstream-app -c deepstream_app_config.txt
!!! tip
- If you want to convert the model to FP16 precision, simply set `model-engine-file=model_b1_gpu0_fp16.engine` and `network-mode=2` inside `config_infer_primary_yoloV8.txt`
+ If you want to convert the model to FP16 [precision](https://www.ultralytics.com/glossary/precision), simply set `model-engine-file=model_b1_gpu0_fp16.engine` and `network-mode=2` inside `config_infer_primary_yoloV8.txt`
## INT8 Calibration
@@ -219,7 +219,7 @@ If you want to use INT8 precision for inference, you need to follow the steps be
!!! note
- NVIDIA recommends at least 500 images to get a good accuracy. On this example, 1000 images are chosen to get better accuracy (more images = more accuracy). You can set it from **head -1000**. For example, for 2000 images, **head -2000**. This process can take a long time.
+ NVIDIA recommends at least 500 images to get a good [accuracy](https://www.ultralytics.com/glossary/accuracy). On this example, 1000 images are chosen to get better accuracy (more images = more accuracy). You can set it from **head -1000**. For example, for 2000 images, **head -2000**. This process can take a long time.
6. Create the `calibration.txt` file with all selected images
@@ -323,7 +323,7 @@ To set up Ultralytics YOLOv8 on an [NVIDIA Jetson](https://www.nvidia.com/en-us/
### What is the benefit of using TensorRT with YOLOv8 on NVIDIA Jetson?
-Using TensorRT with YOLOv8 optimizes the model for inference, significantly reducing latency and improving throughput on NVIDIA Jetson devices. TensorRT provides high-performance, low-latency deep learning inference through layer fusion, precision calibration, and kernel auto-tuning. This leads to faster and more efficient execution, particularly useful for real-time applications like video analytics and autonomous machines.
+Using TensorRT with YOLOv8 optimizes the model for inference, significantly reducing latency and improving throughput on NVIDIA Jetson devices. TensorRT provides high-performance, low-latency [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference through layer fusion, precision calibration, and kernel auto-tuning. This leads to faster and more efficient execution, particularly useful for real-time applications like video analytics and autonomous machines.
### Can I run Ultralytics YOLOv8 with DeepStream SDK across different NVIDIA Jetson hardware?
diff --git a/docs/en/guides/defining-project-goals.md b/docs/en/guides/defining-project-goals.md
index fcd32f12..c5e3c58c 100644
--- a/docs/en/guides/defining-project-goals.md
+++ b/docs/en/guides/defining-project-goals.md
@@ -4,7 +4,7 @@ description: Learn how to define clear goals and objectives for your computer vi
keywords: computer vision, project planning, problem statement, measurable objectives, dataset preparation, model selection, YOLOv8, Ultralytics
---
-# A Practical Guide for Defining Your Computer Vision Project
+# A Practical Guide for Defining Your [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) Project
## Introduction
@@ -33,7 +33,7 @@ Consider a computer vision project where you want to [estimate the speed of vehi
@@ -103,7 +103,7 @@ If you want to use the classes the model was pre-trained on, a practical approac
- **Edge Devices**: Deploying on edge devices like smartphones or IoT devices requires lightweight models due to their limited computational resources. Example technologies include [TensorFlow Lite](../integrations/tflite.md) and [ONNX Runtime](../integrations/onnx.md), which are optimized for such environments.
- **Cloud Servers**: Cloud deployments can handle more complex models with larger computational demands. Cloud platforms like [AWS](../integrations/amazon-sagemaker.md), Google Cloud, and Azure offer robust hardware options that can scale based on the project's needs.
-- **On-Premise Servers**: For scenarios requiring high data privacy and security, deploying on-premise might be necessary. This involves significant upfront hardware investment but allows full control over the data and infrastructure.
+- **On-Premise Servers**: For scenarios requiring high [data privacy](https://www.ultralytics.com/glossary/data-privacy) and security, deploying on-premise might be necessary. This involves significant upfront hardware investment but allows full control over the data and infrastructure.
- **Hybrid Solutions**: Some projects might benefit from a hybrid approach, where some processing is done on the edge, while more complex analyses are offloaded to the cloud. This can balance performance needs with cost and latency considerations.
Each deployment option offers different benefits and challenges, and the choice depends on specific project requirements like performance, cost, and security.
@@ -158,7 +158,7 @@ For example, "Achieve 95% accuracy in speed detection within six months using a
Deployment options critically impact the performance of your Ultralytics YOLO models. Here are key options:
-- **Edge Devices:** Use lightweight models like TensorFlow Lite or ONNX Runtime for deployment on devices with limited resources.
+- **Edge Devices:** Use lightweight models like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) Lite or ONNX Runtime for deployment on devices with limited resources.
- **Cloud Servers:** Utilize robust cloud platforms like AWS, Google Cloud, or Azure for handling complex models.
- **On-Premise Servers:** High data privacy and security needs may require on-premise deployments.
- **Hybrid Solutions:** Combine edge and cloud approaches for balanced performance and cost-efficiency.
diff --git a/docs/en/guides/distance-calculation.md b/docs/en/guides/distance-calculation.md
index 00af619b..443b208b 100644
--- a/docs/en/guides/distance-calculation.md
+++ b/docs/en/guides/distance-calculation.md
@@ -8,7 +8,7 @@ keywords: Ultralytics, YOLOv8, distance calculation, computer vision, object tra
## What is Distance Calculation?
-Measuring the gap between two objects is known as distance calculation within a specified space. In the case of [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics), the bounding box centroid is employed to calculate the distance for bounding boxes highlighted by the user.
+Measuring the gap between two objects is known as distance calculation within a specified space. In the case of [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics), the [bounding box](https://www.ultralytics.com/glossary/bounding-box) centroid is employed to calculate the distance for bounding boxes highlighted by the user.
@@ -162,7 +162,7 @@ This is a plot displaying fitness (typically a performance metric like AP50) aga
#### tune_results.csv
-A CSV file containing detailed results of each iteration during the tuning. Each row in the file represents one iteration, and it includes metrics like fitness score, precision, recall, as well as the hyperparameters used.
+A CSV file containing detailed results of each iteration during the tuning. Each row in the file represents one iteration, and it includes metrics like fitness score, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), as well as the hyperparameters used.
- **Format**: CSV
- **Usage**: Per-iteration results tracking.
@@ -187,7 +187,7 @@ This file contains scatter plots generated from `tune_results.csv`, helping you
#### weights/
-This directory contains the saved PyTorch models for the last and the best iterations during the hyperparameter tuning process.
+This directory contains the saved [PyTorch](https://www.ultralytics.com/glossary/pytorch) models for the last and the best iterations during the hyperparameter tuning process.
- **`last.pt`**: The last.pt are the weights from the last epoch of training.
- **`best.pt`**: The best.pt weights for the iteration that achieved the best fitness score.
@@ -208,7 +208,7 @@ For deeper insights, you can explore the `Tuner` class source code and accompany
## FAQ
-### How do I optimize the learning rate for Ultralytics YOLO during hyperparameter tuning?
+### How do I optimize the [learning rate](https://www.ultralytics.com/glossary/learning-rate) for Ultralytics YOLO during hyperparameter tuning?
To optimize the learning rate for Ultralytics YOLO, start by setting an initial learning rate using the `lr0` parameter. Common values range from `0.001` to `0.01`. During the hyperparameter tuning process, this value will be mutated to find the optimal setting. You can utilize the `model.tune()` method to automate this process. For example:
@@ -250,7 +250,7 @@ When evaluating model performance during hyperparameter tuning in YOLO, you can
- **AP50**: The average precision at IoU threshold of 0.50.
- **F1-Score**: The harmonic mean of precision and recall.
-- **Precision and Recall**: Individual metrics indicating the model's accuracy in identifying true positives versus false positives and false negatives.
+- **Precision and Recall**: Individual metrics indicating the model's [accuracy](https://www.ultralytics.com/glossary/accuracy) in identifying true positives versus false positives and false negatives.
These metrics help you understand different aspects of your model's performance. Refer to the [Ultralytics YOLO performance metrics](../guides/yolo-performance-metrics.md) guide for a comprehensive overview.
diff --git a/docs/en/guides/index.md b/docs/en/guides/index.md
index e1cb5341..1ad70434 100644
--- a/docs/en/guides/index.md
+++ b/docs/en/guides/index.md
@@ -6,9 +6,9 @@ keywords: Ultralytics, YOLO, tutorials, guides, object detection, deep learning,
# Comprehensive Tutorials to Ultralytics YOLO
-Welcome to the Ultralytics' YOLO 🚀 Guides! Our comprehensive tutorials cover various aspects of the YOLO object detection model, ranging from training and prediction to deployment. Built on PyTorch, YOLO stands out for its exceptional speed and accuracy in real-time object detection tasks.
+Welcome to the Ultralytics' YOLO 🚀 Guides! Our comprehensive tutorials cover various aspects of the YOLO [object detection](https://www.ultralytics.com/glossary/object-detection) model, ranging from training and prediction to deployment. Built on [PyTorch](https://www.ultralytics.com/glossary/pytorch), YOLO stands out for its exceptional speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) in real-time object detection tasks.
-Whether you're a beginner or an expert in deep learning, our tutorials offer valuable insights into the implementation and optimization of YOLO for your computer vision projects. Let's dive in!
+Whether you're a beginner or an expert in [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl), our tutorials offer valuable insights into the implementation and optimization of YOLO for your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) projects. Let's dive in!
+
@@ -62,7 +73,7 @@ Quantization converts the model's weights and activations from high precision (l
### Knowledge Distillation
-Knowledge distillation involves training a smaller, simpler model (the student) to mimic the outputs of a larger, more complex model (the teacher). The student model learns to approximate the teacher's predictions, resulting in a compact model that retains much of the teacher's accuracy. This technique is beneficial for creating efficient models suitable for deployment on edge devices with constrained resources.
+Knowledge distillation involves training a smaller, simpler model (the student) to mimic the outputs of a larger, more complex model (the teacher). The student model learns to approximate the teacher's predictions, resulting in a compact model that retains much of the teacher's [accuracy](https://www.ultralytics.com/glossary/accuracy). This technique is beneficial for creating efficient models suitable for deployment on edge devices with constrained resources.
@@ -87,7 +98,7 @@ When deploying YOLOv8, several factors can affect model accuracy. Converting mod
### Inferences Are Taking Longer Than You Expected
-When deploying machine learning models, it's important that they run efficiently. If inferences are taking longer than expected, it can affect the user experience and the effectiveness of your application. Here are some steps to help you identify and resolve the problem:
+When deploying [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models, it's important that they run efficiently. If inferences are taking longer than expected, it can affect the user experience and the effectiveness of your application. Here are some steps to help you identify and resolve the problem:
- **Implement Warm-Up Runs**: Initial runs often include setup overhead, which can skew latency measurements. Perform a few warm-up inferences before measuring latency. Excluding these initial runs provides a more accurate measurement of the model's performance.
- **Optimize the Inference Engine:** Double-check that the inference engine is fully optimized for your specific GPU architecture. Use the latest drivers and software versions tailored to your hardware to ensure maximum performance and compatibility.
@@ -113,7 +124,7 @@ It's essential to control who can access your model and its data to prevent unau
### Model Obfuscation
-Protecting your model from being reverse-engineered or misuse can be done through model obfuscation. It involves encrypting model parameters, such as weights and biases in neural networks, to make it difficult for unauthorized individuals to understand or alter the model. You can also obfuscate the model's architecture by renaming layers and parameters or adding dummy layers, making it harder for attackers to reverse-engineer it. You can also serve the model in a secure environment, like a secure enclave or using a trusted execution environment (TEE), can provide an extra layer of protection during inference.
+Protecting your model from being reverse-engineered or misuse can be done through model obfuscation. It involves encrypting model parameters, such as weights and biases in [neural networks](https://www.ultralytics.com/glossary/neural-network-nn), to make it difficult for unauthorized individuals to understand or alter the model. You can also obfuscate the model's architecture by renaming layers and parameters or adding dummy layers, making it harder for attackers to reverse-engineer it. You can also serve the model in a secure environment, like a secure enclave or using a trusted execution environment (TEE), can provide an extra layer of protection during inference.
## Share Ideas With Your Peers
diff --git a/docs/en/guides/model-evaluation-insights.md b/docs/en/guides/model-evaluation-insights.md
index 22c44e5d..ef9389c2 100644
--- a/docs/en/guides/model-evaluation-insights.md
+++ b/docs/en/guides/model-evaluation-insights.md
@@ -24,19 +24,19 @@ _Quick Tip:_ When running inferences, if you aren't seeing any predictions and y
### Intersection over Union
-Intersection over Union (IoU) is a metric in object detection that measures how well the predicted bounding box overlaps with the ground truth bounding box. IoU values range from 0 to 1, where one stands for a perfect match. IoU is essential because it measures how closely the predicted boundaries match the actual object boundaries.
+[Intersection over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU) is a metric in [object detection](https://www.ultralytics.com/glossary/object-detection) that measures how well the predicted [bounding box](https://www.ultralytics.com/glossary/bounding-box) overlaps with the ground truth bounding box. IoU values range from 0 to 1, where one stands for a perfect match. IoU is essential because it measures how closely the predicted boundaries match the actual object boundaries.
@@ -108,8 +108,8 @@ These are some of the key elements that should be included in project documentat
- **[Project Overview](./steps-of-a-cv-project.md)**: Provide a high-level summary of the project, including the problem statement, solution approach, expected outcomes, and project scope. Explain the role of computer vision in addressing the problem and outline the stages and deliverables.
- **Model Architecture**: Detail the structure and design of the model, including its components, layers, and connections. Explain the chosen hyperparameters and the rationale behind these choices.
- **[Data Preparation](./data-collection-and-annotation.md)**: Describe the data sources, types, formats, sizes, and preprocessing steps. Discuss data quality, reliability, and any transformations applied before training the model.
-- **[Training Process](./model-training-tips.md)**: Document the training procedure, including the datasets used, training parameters, and loss functions. Explain how the model was trained and any challenges encountered during training.
-- **[Evaluation Metrics](./model-evaluation-insights.md)**: Specify the metrics used to evaluate the model's performance, such as accuracy, precision, recall, and F1-score. Include performance results and an analysis of these metrics.
+- **[Training Process](./model-training-tips.md)**: Document the training procedure, including the datasets used, training parameters, and [loss functions](https://www.ultralytics.com/glossary/loss-function). Explain how the model was trained and any challenges encountered during training.
+- **[Evaluation Metrics](./model-evaluation-insights.md)**: Specify the metrics used to evaluate the model's performance, such as accuracy, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and F1-score. Include performance results and an analysis of these metrics.
- **[Deployment Steps](./model-deployment-options.md)**: Outline the steps taken to deploy the model, including the tools and platforms used, deployment configurations, and any specific challenges or considerations.
- **Monitoring and Maintenance Procedure**: Provide a detailed plan for monitoring the model's performance post-deployment. Include methods for detecting and addressing data and model drift, and describe the process for regular updates and retraining.
@@ -155,7 +155,7 @@ Maintaining computer vision models involves regular updates, retraining, and mon
Data drift detection is essential because it helps identify when the statistical properties of the input data change over time, which can degrade model performance. Techniques like continuous monitoring, statistical tests (e.g., Kolmogorov-Smirnov test), and feature drift analysis can help spot issues early. Addressing data drift ensures that your model remains accurate and relevant in changing environments. Learn more about data drift detection in our [Data Drift Detection](#data-drift-detection) section.
-### What tools can I use for anomaly detection in computer vision models?
+### What tools can I use for [anomaly detection](https://www.ultralytics.com/glossary/anomaly-detection) in computer vision models?
For anomaly detection in computer vision models, tools like [Prometheus](https://prometheus.io/), [Grafana](https://grafana.com/), and [Evidently AI](https://www.evidentlyai.com/) are highly effective. These tools can help you set up alert systems to detect unusual data points or patterns that deviate from expected behavior. Configurable alerts and standardized messages can help you respond quickly to potential issues. Explore more in our [Anomaly Detection and Alert Systems](#anomaly-detection-and-alert-systems) section.
@@ -168,5 +168,5 @@ Effective documentation of a computer vision project should include:
- **Data Preparation**: Information on data sources, preprocessing steps, and transformations.
- **Training Process**: Description of the training procedure, datasets used, and challenges encountered.
- **Evaluation Metrics**: Metrics used for performance evaluation and analysis.
-- **Deployment Steps**: Steps taken for model deployment and any specific challenges.
+- **Deployment Steps**: Steps taken for [model deployment](https://www.ultralytics.com/glossary/model-deployment) and any specific challenges.
- **Monitoring and Maintenance Procedure**: Plan for ongoing monitoring and maintenance. For more comprehensive guidelines, refer to our [Documentation](#documentation) section.
diff --git a/docs/en/guides/model-testing.md b/docs/en/guides/model-testing.md
index 71ff69b0..8d324679 100644
--- a/docs/en/guides/model-testing.md
+++ b/docs/en/guides/model-testing.md
@@ -10,19 +10,19 @@ keywords: Overfitting and Underfitting in Machine Learning, Model Testing, Data
After [training](./model-training-tips.md) and [evaluating](./model-evaluation-insights.md) your model, it's time to test it. Model testing involves assessing how well it performs in real-world scenarios. Testing considers factors like accuracy, reliability, fairness, and how easy it is to understand the model's decisions. The goal is to make sure the model performs as intended, delivers the expected results, and fits into the [overall objective of your application](./defining-project-goals.md) or project.
-Model testing is quite similar to model evaluation, but they are two distinct [steps in a computer vision project](./steps-of-a-cv-project.md). Model evaluation involves metrics and plots to assess the model's accuracy. On the other hand, model testing checks if the model's learned behavior is the same as expectations. In this guide, we'll explore strategies for testing your computer vision models.
+Model testing is quite similar to model evaluation, but they are two distinct [steps in a computer vision project](./steps-of-a-cv-project.md). Model evaluation involves metrics and plots to assess the model's accuracy. On the other hand, model testing checks if the model's learned behavior is the same as expectations. In this guide, we'll explore strategies for testing your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models.
## Model Testing Vs. Model Evaluation
First, let's understand the difference between model evaluation and testing with an example.
-Suppose you have trained a computer vision model to recognize cats and dogs, and you want to deploy this model at a pet store to monitor the animals. During the model evaluation phase, you use a labeled dataset to calculate metrics like accuracy, precision, recall, and F1 score. For instance, the model might have an accuracy of 98% in distinguishing between cats and dogs in a given dataset.
+Suppose you have trained a computer vision model to recognize cats and dogs, and you want to deploy this model at a pet store to monitor the animals. During the model evaluation phase, you use a labeled dataset to calculate metrics like accuracy, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and F1 score. For instance, the model might have an accuracy of 98% in distinguishing between cats and dogs in a given dataset.
After evaluation, you test the model using images from a pet store to see how well it identifies cats and dogs in more varied and realistic conditions. You check if it can correctly label cats and dogs when they are moving, in different lighting conditions, or partially obscured by objects like toys or furniture. Model testing checks that the model behaves as expected outside the controlled evaluation environment.
## Preparing for Model Testing
-Computer vision models learn from datasets by detecting patterns, making predictions, and evaluating their performance. These [datasets](./preprocessing_annotated_data.md) are usually divided into training and testing sets to simulate real-world conditions. Training data teaches the model while testing data verifies its accuracy.
+Computer vision models learn from datasets by detecting patterns, making predictions, and evaluating their performance. These [datasets](./preprocessing_annotated_data.md) are usually divided into training and testing sets to simulate real-world conditions. [Training data](https://www.ultralytics.com/glossary/training-data) teaches the model while testing data verifies its accuracy.
Here are two points to keep in mind before testing your model:
@@ -61,7 +61,7 @@ If you want to test your trained YOLOv8 model on multiple images stored in a fol
If you are interested in testing the basic YOLOv8 model to understand whether it can be used for your application without custom training, you can use the prediction mode. While the model is pre-trained on datasets like COCO, running predictions on your own dataset can give you a quick sense of how well it might perform in your specific context.
-## Overfitting and Underfitting in Machine Learning
+## Overfitting and [Underfitting](https://www.ultralytics.com/glossary/underfitting) in [Machine Learning](https://www.ultralytics.com/glossary/machine-learning-ml)
When testing a machine learning model, especially in computer vision, it's important to watch out for overfitting and underfitting. These issues can significantly affect how well your model works with new data.
@@ -71,7 +71,7 @@ Overfitting happens when your model learns the training data too well, including
#### Signs of Overfitting
-- **High Training Accuracy, Low Validation Accuracy:** If your model performs very well on training data but poorly on validation or test data, it's likely overfitting.
+- **High Training Accuracy, Low Validation Accuracy:** If your model performs very well on training data but poorly on validation or [test data](https://www.ultralytics.com/glossary/test-data), it's likely overfitting.
- **Visual Inspection:** Sometimes, you can see overfitting if your model is too sensitive to minor changes or irrelevant details in images.
### Underfitting
@@ -102,7 +102,7 @@ Data leakage can be tricky to spot and often comes from hidden biases in the tra
- **Camera Bias:** Different angles, lighting, shadows, and camera movements can introduce unwanted patterns.
- **Overlay Bias:** Logos, timestamps, or other overlays in images can mislead the model.
- **Font and Object Bias:** Specific fonts or objects that frequently appear in certain classes can skew the model's learning.
-- **Spatial Bias:** Imbalances in foreground-background, bounding box distributions, and object locations can affect training.
+- **Spatial Bias:** Imbalances in foreground-background, [bounding box](https://www.ultralytics.com/glossary/bounding-box) distributions, and object locations can affect training.
- **Label and Domain Bias:** Incorrect labels or shifts in data types can lead to leakage.
### Detecting Data Leakage
@@ -139,13 +139,13 @@ These resources will help you navigate challenges and remain updated on the late
## In Summary
-Building trustworthy computer vision models relies on rigorous model testing. By testing the model with previously unseen data, we can analyze it and spot weaknesses like overfitting and data leakage. Addressing these issues before deployment helps the model perform well in real-world applications. It's important to remember that model testing is just as crucial as model evaluation in guaranteeing the model's long-term success and effectiveness.
+Building trustworthy computer vision models relies on rigorous model testing. By testing the model with previously unseen data, we can analyze it and spot weaknesses like [overfitting](https://www.ultralytics.com/glossary/overfitting) and data leakage. Addressing these issues before deployment helps the model perform well in real-world applications. It's important to remember that model testing is just as crucial as model evaluation in guaranteeing the model's long-term success and effectiveness.
## FAQ
### What are the key differences between model evaluation and model testing in computer vision?
-Model evaluation and model testing are distinct steps in a computer vision project. Model evaluation involves using a labeled dataset to compute metrics such as accuracy, precision, recall, and F1 score, providing insights into the model's performance with a controlled dataset. Model testing, on the other hand, assesses the model's performance in real-world scenarios by applying it to new, unseen data, ensuring the model's learned behavior aligns with expectations outside the evaluation environment. For a detailed guide, refer to the [steps in a computer vision project](./steps-of-a-cv-project.md).
+Model evaluation and model testing are distinct steps in a computer vision project. Model evaluation involves using a labeled dataset to compute metrics such as [accuracy](https://www.ultralytics.com/glossary/accuracy), precision, recall, and [F1 score](https://www.ultralytics.com/glossary/f1-score), providing insights into the model's performance with a controlled dataset. Model testing, on the other hand, assesses the model's performance in real-world scenarios by applying it to new, unseen data, ensuring the model's learned behavior aligns with expectations outside the evaluation environment. For a detailed guide, refer to the [steps in a computer vision project](./steps-of-a-cv-project.md).
### How can I test my Ultralytics YOLOv8 model on multiple images?
@@ -155,7 +155,7 @@ To test your Ultralytics YOLOv8 model on multiple images, you can use the [predi
To address **overfitting**:
-- Regularization techniques like dropout.
+- [Regularization](https://www.ultralytics.com/glossary/regularization) techniques like dropout.
- Increase the size of the training dataset.
- Simplify the model architecture.
@@ -163,7 +163,7 @@ To address **underfitting**:
- Use a more complex model.
- Provide more relevant features.
-- Increase training iterations or epochs.
+- Increase training iterations or [epochs](https://www.ultralytics.com/glossary/epoch).
Review misclassified images, perform thorough error analysis, and regularly track performance metrics to maintain a balance. For more information on these concepts, explore our section on [Overfitting and Underfitting](#overfitting-and-underfitting-in-machine-learning).
@@ -190,7 +190,7 @@ Post-testing, if the model performance meets the project goals, proceed with dep
- Error analysis.
- Gathering more diverse and high-quality data.
-- Hyperparameter tuning.
+- [Hyperparameter tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning).
- Retraining the model.
Gain insights from the [Model Testing Vs. Model Evaluation](#model-testing-vs-model-evaluation) section to refine and enhance model effectiveness in real-world applications.
diff --git a/docs/en/guides/model-training-tips.md b/docs/en/guides/model-training-tips.md
index 2efa53e7..725081a2 100644
--- a/docs/en/guides/model-training-tips.md
+++ b/docs/en/guides/model-training-tips.md
@@ -10,19 +10,30 @@ keywords: Model Training Machine Learning, AI Model Training, Number of Epochs,
One of the most important steps when working on a [computer vision project](./steps-of-a-cv-project.md) is model training. Before reaching this step, you need to [define your goals](./defining-project-goals.md) and [collect and annotate your data](./data-collection-and-annotation.md). After [preprocessing the data](./preprocessing_annotated_data.md) to make sure it is clean and consistent, you can move on to training your model.
+
+
@@ -114,7 +125,7 @@ Local training provides greater control and customization, letting you tailor yo
## Selecting an Optimizer
-An optimizer is an algorithm that adjusts the weights of your neural network to minimize the loss function, which measures how well the model is performing. In simpler terms, the optimizer helps the model learn by tweaking its parameters to reduce errors. Choosing the right optimizer directly affects how quickly and accurately the model learns.
+An optimizer is an algorithm that adjusts the weights of your neural network to minimize the [loss function](https://www.ultralytics.com/glossary/loss-function), which measures how well the model is performing. In simpler terms, the optimizer helps the model learn by tweaking its parameters to reduce errors. Choosing the right optimizer directly affects how quickly and accurately the model learns.
You can also fine-tune optimizer parameters to improve model performance. Adjusting the learning rate sets the size of the steps when updating parameters. For stability, you might start with a moderate learning rate and gradually decrease it over time to improve long-term learning. Additionally, setting the momentum determines how much influence past updates have on current updates. A common value for momentum is around 0.9. It generally provides a good balance.
@@ -136,7 +147,7 @@ Different optimizers have various strengths and weaknesses. Let's take a glimpse
- **RMSProp (Root Mean Square Propagation)**:
- Adjusts the learning rate for each parameter by dividing the gradient by a running average of the magnitudes of recent gradients.
- - Helps in handling the vanishing gradient problem and is effective for recurrent neural networks.
+ - Helps in handling the vanishing gradient problem and is effective for [recurrent neural networks](https://www.ultralytics.com/glossary/recurrent-neural-network-rnn).
For YOLOv8, the `optimizer` parameter lets you choose from various optimizers, including SGD, Adam, AdamW, NAdam, RAdam, and RMSProp, or you can set it to `auto` for automatic selection based on model configuration.
@@ -157,7 +168,7 @@ Using these resources will help you solve challenges and stay up-to-date with th
## Key Takeaways
-Training computer vision models involves following good practices, optimizing your strategies, and solving problems as they arise. Techniques like adjusting batch sizes, mixed precision training, and starting with pre-trained weights can make your models work better and train faster. Methods like subset training and early stopping help you save time and resources. Staying connected with the community and keeping up with new trends will help you keep improving your model training skills.
+Training computer vision models involves following good practices, optimizing your strategies, and solving problems as they arise. Techniques like adjusting batch sizes, mixed [precision](https://www.ultralytics.com/glossary/precision) training, and starting with pre-trained weights can make your models work better and train faster. Methods like subset training and early stopping help you save time and resources. Staying connected with the community and keeping up with new trends will help you keep improving your model training skills.
## FAQ
@@ -167,7 +178,7 @@ To improve GPU utilization, set the `batch_size` parameter in your training conf
### What is mixed precision training, and how do I enable it in YOLOv8?
-Mixed precision training utilizes both 16-bit (FP16) and 32-bit (FP32) floating-point types to balance computational speed and precision. This approach speeds up training and reduces memory usage without sacrificing model accuracy. To enable mixed precision training in YOLOv8, set the `amp` parameter to `True` in your training configuration. This activates Automatic Mixed Precision (AMP) training. For more details on this optimization technique, see the [training configuration](../modes/train.md).
+Mixed precision training utilizes both 16-bit (FP16) and 32-bit (FP32) floating-point types to balance computational speed and precision. This approach speeds up training and reduces memory usage without sacrificing model [accuracy](https://www.ultralytics.com/glossary/accuracy). To enable mixed precision training in YOLOv8, set the `amp` parameter to `True` in your training configuration. This activates Automatic Mixed Precision (AMP) training. For more details on this optimization technique, see the [training configuration](../modes/train.md).
### How does multiscale training enhance YOLOv8 model performance?
diff --git a/docs/en/guides/nvidia-jetson.md b/docs/en/guides/nvidia-jetson.md
index 9f5dbc9b..f352c76b 100644
--- a/docs/en/guides/nvidia-jetson.md
+++ b/docs/en/guides/nvidia-jetson.md
@@ -27,7 +27,7 @@ This comprehensive guide provides a detailed walkthrough for deploying Ultralyti
## What is NVIDIA Jetson?
-NVIDIA Jetson is a series of embedded computing boards designed to bring accelerated AI (artificial intelligence) computing to edge devices. These compact and powerful devices are built around NVIDIA's GPU architecture and are capable of running complex AI algorithms and deep learning models directly on the device, without needing to rely on cloud computing resources. Jetson boards are often used in robotics, autonomous vehicles, industrial automation, and other applications where AI inference needs to be performed locally with low latency and high efficiency. Additionally, these boards are based on the ARM64 architecture and runs on lower power compared to traditional GPU computing devices.
+NVIDIA Jetson is a series of embedded computing boards designed to bring accelerated AI (artificial intelligence) computing to edge devices. These compact and powerful devices are built around NVIDIA's GPU architecture and are capable of running complex AI algorithms and [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models directly on the device, without needing to rely on [cloud computing](https://www.ultralytics.com/glossary/cloud-computing) resources. Jetson boards are often used in robotics, autonomous vehicles, industrial automation, and other applications where AI inference needs to be performed locally with low latency and high efficiency. Additionally, these boards are based on the ARM64 architecture and runs on lower power compared to traditional GPU computing devices.
## NVIDIA Jetson Series Comparison
@@ -46,7 +46,7 @@ For a more detailed comparison table, please visit the **Technical Specification
## What is NVIDIA JetPack?
-[NVIDIA JetPack SDK](https://developer.nvidia.com/embedded/jetpack) powering the Jetson modules is the most comprehensive solution and provides full development environment for building end-to-end accelerated AI applications and shortens time to market. JetPack includes Jetson Linux with bootloader, Linux kernel, Ubuntu desktop environment, and a complete set of libraries for acceleration of GPU computing, multimedia, graphics, and computer vision. It also includes samples, documentation, and developer tools for both host computer and developer kit, and supports higher level SDKs such as DeepStream for streaming video analytics, Isaac for robotics, and Riva for conversational AI.
+[NVIDIA JetPack SDK](https://developer.nvidia.com/embedded/jetpack) powering the Jetson modules is the most comprehensive solution and provides full development environment for building end-to-end accelerated AI applications and shortens time to market. JetPack includes Jetson Linux with bootloader, Linux kernel, Ubuntu desktop environment, and a complete set of libraries for acceleration of GPU computing, multimedia, graphics, and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv). It also includes samples, documentation, and developer tools for both host computer and developer kit, and supports higher level SDKs such as DeepStream for streaming video analytics, Isaac for robotics, and Riva for conversational AI.
## Flash JetPack to NVIDIA Jetson
@@ -110,7 +110,7 @@ For a native installation without Docker, please refer to the steps below.
#### Install Ultralytics Package
-Here we will install Ultralytics package on the Jetson with optional dependencies so that we can export the PyTorch models to other different formats. We will mainly focus on [NVIDIA TensorRT exports](../integrations/tensorrt.md) because TensorRT will make sure we can get the maximum performance out of the Jetson devices.
+Here we will install Ultralytics package on the Jetson with optional dependencies so that we can export the [PyTorch](https://www.ultralytics.com/glossary/pytorch) models to other different formats. We will mainly focus on [NVIDIA TensorRT exports](../integrations/tensorrt.md) because TensorRT will make sure we can get the maximum performance out of the Jetson devices.
1. Update packages list, install pip and upgrade to latest
@@ -280,14 +280,14 @@ The YOLOv8n model in PyTorch format is converted to TensorRT to run inference wi
## NVIDIA Jetson Orin YOLOv8 Benchmarks
-YOLOv8 benchmarks were run by the Ultralytics team on 10 different model formats measuring speed and accuracy: PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on Seeed Studio reComputer J4012 powered by Jetson Orin NX 16GB device at FP32 precision with default input image size of 640.
+YOLOv8 benchmarks were run by the Ultralytics team on 10 different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on Seeed Studio reComputer J4012 powered by Jetson Orin NX 16GB device at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640.
### Comparison Chart
Even though all model exports are working with NVIDIA Jetson, we have only included **PyTorch, TorchScript, TensorRT** for the comparison chart below because, they make use of the GPU on the Jetson and are guaranteed to produce the best results. All the other exports only utilize the CPU and the performance is not as good as the above three. You can find benchmarks for all exports in the section after this chart.
@@ -151,7 +151,7 @@ Data preprocessing is essential in computer vision projects because it ensures t
### How can I use Ultralytics YOLO for data augmentation?
-For data augmentation with Ultralytics YOLOv8, you need to modify the dataset configuration file (.yaml). In this file, you can specify various augmentation techniques such as random crops, horizontal flips, and brightness adjustments. This can be effectively done using the training configurations [explained here](../modes/train.md). Data augmentation helps create a more robust dataset, reduce overfitting, and improve model generalization.
+For data augmentation with Ultralytics YOLOv8, you need to modify the dataset configuration file (.yaml). In this file, you can specify various augmentation techniques such as random crops, horizontal flips, and brightness adjustments. This can be effectively done using the training configurations [explained here](../modes/train.md). Data augmentation helps create a more robust dataset, reduce [overfitting](https://www.ultralytics.com/glossary/overfitting), and improve model generalization.
### What are the best data normalization techniques for computer vision data?
@@ -164,7 +164,7 @@ For YOLOv8, normalization is handled automatically, including conversion to RGB
### How should I split my annotated dataset for training?
-To split your dataset, a common practice is to divide it into 70% for training, 20% for validation, and 10% for testing. It is important to maintain the data distribution of classes across these splits and avoid data leakage by performing augmentation only on the training set. Use tools like scikit-learn or TensorFlow for efficient dataset splitting. See the detailed guide on [dataset preparation](../guides/data-collection-and-annotation.md).
+To split your dataset, a common practice is to divide it into 70% for training, 20% for validation, and 10% for testing. It is important to maintain the data distribution of classes across these splits and avoid data leakage by performing augmentation only on the training set. Use tools like scikit-learn or [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) for efficient dataset splitting. See the detailed guide on [dataset preparation](../guides/data-collection-and-annotation.md).
### Can I handle varying image sizes in YOLOv8 without manual resizing?
diff --git a/docs/en/guides/queue-management.md b/docs/en/guides/queue-management.md
index 7abc31d3..9fb4897e 100644
--- a/docs/en/guides/queue-management.md
+++ b/docs/en/guides/queue-management.md
@@ -56,15 +56,13 @@ Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultra
names=model.names,
reg_pts=queue_region,
line_thickness=3,
- fontsize=1.0,
- region_color=(255, 144, 31),
)
while cap.isOpened():
success, im0 = cap.read()
if success:
- tracks = model.track(im0, show=False, persist=True, verbose=False)
+ tracks = model.track(im0, persist=True)
out = queue.process_queue(im0, tracks)
video_writer.write(im0)
@@ -100,15 +98,13 @@ Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultra
names=model.names,
reg_pts=queue_region,
line_thickness=3,
- fontsize=1.0,
- region_color=(255, 144, 31),
)
while cap.isOpened():
success, im0 = cap.read()
if success:
- tracks = model.track(im0, show=False, persist=True, verbose=False, classes=0) # Only person class
+ tracks = model.track(im0, persist=True, classes=0) # Only person class
out = queue.process_queue(im0, tracks)
video_writer.write(im0)
@@ -125,32 +121,17 @@ Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultra
### Arguments `QueueManager`
-| Name | Type | Default | Description |
-| ------------------- | ---------------- | -------------------------- | ----------------------------------------------------------------------------------- |
-| `names` | `dict` | `model.names` | A dictionary mapping class IDs to class names. |
-| `reg_pts` | `list of tuples` | `[(20, 400), (1260, 400)]` | Points defining the counting region polygon. Defaults to a predefined rectangle. |
-| `line_thickness` | `int` | `2` | Thickness of the annotation lines. |
-| `track_thickness` | `int` | `2` | Thickness of the track lines. |
-| `view_img` | `bool` | `False` | Whether to display the image frames. |
-| `region_color` | `tuple` | `(255, 0, 255)` | Color of the counting region lines (BGR). |
-| `view_queue_counts` | `bool` | `True` | Whether to display the queue counts. |
-| `draw_tracks` | `bool` | `False` | Whether to draw tracks of the objects. |
-| `count_txt_color` | `tuple` | `(255, 255, 255)` | Color of the count text (BGR). |
-| `track_color` | `tuple` | `None` | Color of the tracks. If `None`, different colors will be used for different tracks. |
-| `region_thickness` | `int` | `5` | Thickness of the counting region lines. |
-| `fontsize` | `float` | `0.7` | Font size for the text annotations. |
+| Name | Type | Default | Description |
+| ---------------- | ---------------- | -------------------------- | -------------------------------------------------------------------------------- |
+| `names` | `dict` | `model.names` | A dictionary mapping class IDs to class names. |
+| `reg_pts` | `list of tuples` | `[(20, 400), (1260, 400)]` | Points defining the counting region polygon. Defaults to a predefined rectangle. |
+| `line_thickness` | `int` | `2` | Thickness of the annotation lines. |
+| `view_img` | `bool` | `False` | Whether to display the image frames. |
+| `draw_tracks` | `bool` | `False` | Whether to draw tracks of the objects. |
### Arguments `model.track`
-| Name | Type | Default | Description |
-| --------- | ------- | -------------- | ----------------------------------------------------------- |
-| `source` | `im0` | `None` | source directory for images or videos |
-| `persist` | `bool` | `False` | persisting tracks between frames |
-| `tracker` | `str` | `botsort.yaml` | Tracking method 'bytetrack' or 'botsort' |
-| `conf` | `float` | `0.3` | Confidence Threshold |
-| `iou` | `float` | `0.5` | IOU Threshold |
-| `classes` | `list` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] |
-| `verbose` | `bool` | `True` | Display the object tracking results |
+{% include "macros/track-args.md" %}
## FAQ
@@ -178,8 +159,6 @@ queue = solutions.QueueManager(
names=model.names,
reg_pts=queue_region,
line_thickness=3,
- fontsize=1.0,
- region_color=(255, 144, 31),
)
while cap.isOpened():
@@ -208,7 +187,7 @@ Using Ultralytics YOLOv8 for queue management offers several benefits:
For more details, explore our [Queue Management](https://docs.ultralytics.com/reference/solutions/queue_management/) solutions.
-### Why should I choose Ultralytics YOLOv8 over competitors like TensorFlow or Detectron2 for queue management?
+### Why should I choose Ultralytics YOLOv8 over competitors like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) or Detectron2 for queue management?
Ultralytics YOLOv8 has several advantages over TensorFlow and Detectron2 for queue management:
@@ -231,8 +210,6 @@ queue_airport = solutions.QueueManager(
names=model.names,
reg_pts=queue_region_airport,
line_thickness=3,
- fontsize=1.0,
- region_color=(0, 255, 0),
)
```
diff --git a/docs/en/guides/raspberry-pi.md b/docs/en/guides/raspberry-pi.md
index 674bc847..c25557e8 100644
--- a/docs/en/guides/raspberry-pi.md
+++ b/docs/en/guides/raspberry-pi.md
@@ -49,7 +49,7 @@ The first thing to do after getting your hands on a Raspberry Pi is to flash a m
## Set Up Ultralytics
-There are two ways of setting up Ultralytics package on Raspberry Pi to build your next Computer Vision project. You can use either of them.
+There are two ways of setting up Ultralytics package on Raspberry Pi to build your next [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) project. You can use either of them.
- [Start with Docker](#start-with-docker)
- [Start without Docker](#start-without-docker)
@@ -70,7 +70,7 @@ After this is done, skip to [Use NCNN on Raspberry Pi section](#use-ncnn-on-rasp
#### Install Ultralytics Package
-Here we will install Ultralytics package on the Raspberry Pi with optional dependencies so that we can export the PyTorch models to other different formats.
+Here we will install Ultralytics package on the Raspberry Pi with optional dependencies so that we can export the [PyTorch](https://www.ultralytics.com/glossary/pytorch) models to other different formats.
1. Update packages list, install pip and upgrade to latest
@@ -94,7 +94,7 @@ Here we will install Ultralytics package on the Raspberry Pi with optional depen
## Use NCNN on Raspberry Pi
-Out of all the model export formats supported by Ultralytics, [NCNN](https://docs.ultralytics.com/integrations/ncnn) delivers the best inference performance when working with Raspberry Pi devices because NCNN is highly optimized for mobile/ embedded platforms (such as ARM architecture). Therefor our recommendation is to use NCNN with Raspberry Pi.
+Out of all the model export formats supported by Ultralytics, [NCNN](https://docs.ultralytics.com/integrations/ncnn/) delivers the best inference performance when working with Raspberry Pi devices because NCNN is highly optimized for mobile/ embedded platforms (such as ARM architecture). Therefor our recommendation is to use NCNN with Raspberry Pi.
## Convert Model to NCNN and Run Inference
@@ -132,11 +132,11 @@ The YOLOv8n model in PyTorch format is converted to NCNN to run inference with t
!!! tip
- For more details about supported export options, visit the [Ultralytics documentation page on deployment options](https://docs.ultralytics.com/guides/model-deployment-options).
+ For more details about supported export options, visit the [Ultralytics documentation page on deployment options](https://docs.ultralytics.com/guides/model-deployment-options/).
## Raspberry Pi 5 vs Raspberry Pi 4 YOLOv8 Benchmarks
-YOLOv8 benchmarks were run by the Ultralytics team on nine different model formats measuring speed and accuracy: PyTorch, TorchScript, ONNX, OpenVINO, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on both Raspberry Pi 5 and Raspberry Pi 4 at FP32 precision with default input image size of 640.
+YOLOv8 benchmarks were run by the Ultralytics team on nine different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on both Raspberry Pi 5 and Raspberry Pi 4 at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640.
!!! note
diff --git a/docs/en/guides/region-counting.md b/docs/en/guides/region-counting.md
index f2be9d45..a27c2b4e 100644
--- a/docs/en/guides/region-counting.md
+++ b/docs/en/guides/region-counting.md
@@ -8,7 +8,7 @@ keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, effici
## What is Object Counting in Regions?
-[Object counting](../guides/object-counting.md) in regions with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves precisely determining the number of objects within specified areas using advanced computer vision. This approach is valuable for optimizing processes, enhancing security, and improving efficiency in various applications.
+[Object counting](../guides/object-counting.md) in regions with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves precisely determining the number of objects within specified areas using advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv). This approach is valuable for optimizing processes, enhancing security, and improving efficiency in various applications.
@@ -129,7 +129,7 @@ while True:
rospy.spin()
```
-??? Example "Complete code"
+??? example "Complete code"
```python
import time
@@ -297,7 +297,7 @@ while True:
rospy.spin()
```
-??? Example "Complete code"
+??? example "Complete code"
```python
import time
@@ -360,7 +360,7 @@ A point cloud is a collection of data points defined within a three-dimensional
Point Clouds can be obtained using various sensors:
- 1. **LIDAR (Light Detection and Ranging)**: Uses laser pulses to measure distances to objects and create high-precision 3D maps.
+ 1. **LIDAR (Light Detection and Ranging)**: Uses laser pulses to measure distances to objects and create high-[precision](https://www.ultralytics.com/glossary/precision) 3D maps.
2. **Depth Cameras**: Capture depth information for each pixel, allowing for 3D reconstruction of the scene.
3. **Stereo Cameras**: Utilize two or more cameras to obtain depth information through triangulation.
4. **Structured Light Scanners**: Project a known pattern onto a surface and measure the deformation to calculate depth.
@@ -448,7 +448,7 @@ for index, class_id in enumerate(classes):
o3d.visualization.draw_geometries([pcd])
```
-??? Example "Complete code"
+??? example "Complete code"
```python
import sys
diff --git a/docs/en/guides/sahi-tiled-inference.md b/docs/en/guides/sahi-tiled-inference.md
index 6b23b21c..1137d6b8 100644
--- a/docs/en/guides/sahi-tiled-inference.md
+++ b/docs/en/guides/sahi-tiled-inference.md
@@ -6,7 +6,7 @@ keywords: YOLOv8, SAHI, Sliced Inference, Object Detection, Ultralytics, High-re
# Ultralytics Docs: Using YOLOv8 with SAHI for Sliced Inference
-Welcome to the Ultralytics documentation on how to use YOLOv8 with [SAHI](https://github.com/obss/sahi) (Slicing Aided Hyper Inference). This comprehensive guide aims to furnish you with all the essential knowledge you'll need to implement SAHI alongside YOLOv8. We'll deep-dive into what SAHI is, why sliced inference is critical for large-scale applications, and how to integrate these functionalities with YOLOv8 for enhanced object detection performance.
+Welcome to the Ultralytics documentation on how to use YOLOv8 with [SAHI](https://github.com/obss/sahi) (Slicing Aided Hyper Inference). This comprehensive guide aims to furnish you with all the essential knowledge you'll need to implement SAHI alongside YOLOv8. We'll deep-dive into what SAHI is, why sliced inference is critical for large-scale applications, and how to integrate these functionalities with YOLOv8 for enhanced [object detection](https://www.ultralytics.com/glossary/object-detection) performance.
@@ -31,7 +31,7 @@ SAHI (Slicing Aided Hyper Inference) is an innovative library designed to optimi
- **Seamless Integration**: SAHI integrates effortlessly with YOLO models, meaning you can start slicing and detecting without a lot of code modification.
- **Resource Efficiency**: By breaking down large images into smaller parts, SAHI optimizes the memory usage, allowing you to run high-quality detection on hardware with limited resources.
-- **High Accuracy**: SAHI maintains the detection accuracy by employing smart algorithms to merge overlapping detection boxes during the stitching process.
+- **High [Accuracy](https://www.ultralytics.com/glossary/accuracy)**: SAHI maintains the detection accuracy by employing smart algorithms to merge overlapping detection boxes during the stitching process.
## What is Sliced Inference?
@@ -202,7 +202,7 @@ If you use SAHI in your research or development work, please cite the original S
}
```
-We extend our thanks to the SAHI research group for creating and maintaining this invaluable resource for the computer vision community. For more information about SAHI and its creators, visit the [SAHI GitHub repository](https://github.com/obss/sahi).
+We extend our thanks to the SAHI research group for creating and maintaining this invaluable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. For more information about SAHI and its creators, visit the [SAHI GitHub repository](https://github.com/obss/sahi).
## FAQ
diff --git a/docs/en/guides/security-alarm-system.md b/docs/en/guides/security-alarm-system.md
index 78bc2a9c..d90e44a5 100644
--- a/docs/en/guides/security-alarm-system.md
+++ b/docs/en/guides/security-alarm-system.md
@@ -8,10 +8,10 @@ keywords: YOLOv8, Security Alarm System, real-time object detection, Ultralytics
-The Security Alarm System Project utilizing Ultralytics YOLOv8 integrates advanced computer vision capabilities to enhance security measures. YOLOv8, developed by Ultralytics, provides real-time object detection, allowing the system to identify and respond to potential security threats promptly. This project offers several advantages:
+The Security Alarm System Project utilizing Ultralytics YOLOv8 integrates advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities to enhance security measures. YOLOv8, developed by Ultralytics, provides real-time object detection, allowing the system to identify and respond to potential security threats promptly. This project offers several advantages:
- **Real-time Detection:** YOLOv8's efficiency enables the Security Alarm System to detect and respond to security incidents in real-time, minimizing response time.
-- **Accuracy:** YOLOv8 is known for its accuracy in object detection, reducing false positives and enhancing the reliability of the security alarm system.
+- **[Accuracy](https://www.ultralytics.com/glossary/accuracy):** YOLOv8 is known for its accuracy in object detection, reducing false positives and enhancing the reliability of the security alarm system.
- **Integration Capabilities:** The project can be seamlessly integrated with existing security infrastructure, providing an upgraded layer of intelligent surveillance.
@@ -22,14 +22,14 @@ The Security Alarm System Project utilizing Ultralytics YOLOv8 integrates advanc
allowfullscreen>
@@ -69,7 +69,7 @@ Next, upload your images, and any pre-existing annotations you have from other t
Select the batch of images you have uploaded on the Annotate page to which you are taken after uploading images. Then, click "Start Annotating" to label images.
-To label with bounding boxes, press the `B` key on your keyboard or click the box icon in the sidebar. Click on a point where you want to start your bounding box, then drag to create the box:
+To label with bounding boxes, press the `B` key on your keyboard or click the box icon in the sidebar. Click on a point where you want to start your [bounding box](https://www.ultralytics.com/glossary/bounding-box), then drag to create the box:
@@ -194,7 +194,7 @@ You can also use your uploaded model as a [labeling assistant](https://docs.robo
Roboflow provides a range of features for use in evaluating models.
-Once you have uploaded a model to Roboflow, you can access our model evaluation tool, which provides a confusion matrix showing the performance of your model as well as an interactive vector analysis plot. These features can help you find opportunities to improve your model.
+Once you have uploaded a model to Roboflow, you can access our model evaluation tool, which provides a [confusion matrix](https://www.ultralytics.com/glossary/confusion-matrix) showing the performance of your model as well as an interactive vector analysis plot. These features can help you find opportunities to improve your model.
To access a confusion matrix, go to your model page on the Roboflow dashboard, then click "View Detailed Evaluation":
@@ -248,7 +248,7 @@ Below are a few of the many pieces of feedback we have received for using YOLOv8
Labeling data for YOLOv8 models using Roboflow is straightforward with Roboflow Annotate. First, create a project on Roboflow and upload your images. After uploading, select the batch of images and click "Start Annotating." You can use the `B` key for bounding boxes or the `P` key for polygons. For faster annotation, use the SAM-based label assistant by clicking the cursor icon in the sidebar. Detailed steps can be found [here](#upload-convert-and-label-data-for-yolov8-format).
-### What services does Roboflow offer for collecting YOLOv8 training data?
+### What services does Roboflow offer for collecting YOLOv8 [training data](https://www.ultralytics.com/glossary/training-data)?
Roboflow provides two key services for collecting YOLOv8 training data: [Universe](https://universe.roboflow.com/?ref=ultralytics) and [Collect](https://github.com/roboflow/roboflow-collect?ref=ultralytics). Universe offers access to over 250,000 vision datasets, while Collect helps you gather images using a webcam and automated prompts.
diff --git a/docs/en/integrations/tensorboard.md b/docs/en/integrations/tensorboard.md
index 59add925..d563aca1 100644
--- a/docs/en/integrations/tensorboard.md
+++ b/docs/en/integrations/tensorboard.md
@@ -6,7 +6,7 @@ keywords: YOLOv8, TensorBoard, model training, visualization, machine learning,
# Gain Visual Insights with YOLOv8's Integration with TensorBoard
-Understanding and fine-tuning computer vision models like [Ultralytics' YOLOv8](https://www.ultralytics.com/) becomes more straightforward when you take a closer look at their training processes. Model training visualization helps with getting insights into the model's learning patterns, performance metrics, and overall behavior. YOLOv8's integration with TensorBoard makes this process of visualization and analysis easier and enables more efficient and informed adjustments to the model.
+Understanding and fine-tuning [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models like [Ultralytics' YOLOv8](https://www.ultralytics.com/) becomes more straightforward when you take a closer look at their training processes. Model training visualization helps with getting insights into the model's learning patterns, performance metrics, and overall behavior. YOLOv8's integration with TensorBoard makes this process of visualization and analysis easier and enables more efficient and informed adjustments to the model.
This guide covers how to use TensorBoard with YOLOv8. You'll learn about various visualizations, from tracking metrics to analyzing model graphs. These tools will help you understand your YOLOv8 model's performance better.
@@ -16,7 +16,7 @@ This guide covers how to use TensorBoard with YOLOv8. You'll learn about various
@@ -44,7 +44,7 @@ Before we look at the code for exporting YOLOv8 models to the TensorRT format, l
TensorRT offers several deployment options, and each option balances ease of integration, performance optimization, and flexibility differently:
-- **Deploying within TensorFlow**: This method integrates TensorRT into TensorFlow, allowing optimized models to run in a familiar TensorFlow environment. It's useful for models with a mix of supported and unsupported layers, as TF-TRT can handle these efficiently.
+- **Deploying within [TensorFlow](https://www.ultralytics.com/glossary/tensorflow)**: This method integrates TensorRT into TensorFlow, allowing optimized models to run in a familiar TensorFlow environment. It's useful for models with a mix of supported and unsupported layers, as TF-TRT can handle these efficiently.
@@ -111,7 +111,7 @@ For more details about the export process, visit the [Ultralytics documentation
### Exporting TensorRT with INT8 Quantization
-Exporting Ultralytics YOLO models using TensorRT with INT8 precision executes post-training quantization (PTQ). TensorRT uses calibration for PTQ, which measures the distribution of activations within each activation tensor as the YOLO model processes inference on representative input data, and then uses that distribution to estimate scale values for each tensor. Each activation tensor that is a candidate for quantization has an associated scale that is deduced by a calibration process.
+Exporting Ultralytics YOLO models using TensorRT with INT8 [precision](https://www.ultralytics.com/glossary/precision) executes post-training quantization (PTQ). TensorRT uses calibration for PTQ, which measures the distribution of activations within each activation tensor as the YOLO model processes inference on representative input data, and then uses that distribution to estimate scale values for each tensor. Each activation tensor that is a candidate for quantization has an associated scale that is deduced by a calibration process.
When processing implicitly quantized networks TensorRT uses INT8 opportunistically to optimize layer execution time. If a layer runs faster in INT8 and has assigned quantization scales on its data inputs and outputs, then a kernel with INT8 precision is assigned to that layer, otherwise TensorRT selects a precision of either FP32 or FP16 for the kernel based on whichever results in faster execution time for that layer.
@@ -125,7 +125,7 @@ The arguments provided when using [export](../modes/export.md) for an Ultralytic
- `workspace` : Controls the size (in GiB) of the device memory allocation while converting the model weights.
- - Adjust the `workspace` value according to your calibration needs and resource availability. While a larger `workspace` may increase calibration time, it allows TensorRT to explore a wider range of optimization tactics, potentially enhancing model performance and accuracy. Conversely, a smaller `workspace` can reduce calibration time but may limit the optimization strategies, affecting the quality of the quantized model.
+ - Adjust the `workspace` value according to your calibration needs and resource availability. While a larger `workspace` may increase calibration time, it allows TensorRT to explore a wider range of optimization tactics, potentially enhancing model performance and [accuracy](https://www.ultralytics.com/glossary/accuracy). Conversely, a smaller `workspace` can reduce calibration time but may limit the optimization strategies, affecting the quality of the quantized model.
- Default is `workspace=4` (GiB), this value may need to be increased if calibration crashes (exits without warning).
@@ -139,7 +139,7 @@ The arguments provided when using [export](../modes/export.md) for an Ultralytic
!!! note
- During calibration, twice the `batch` size provided will be used. Using small batches can lead to inaccurate scaling during calibration. This is because the process adjusts based on the data it sees. Small batches might not capture the full range of values, leading to issues with the final calibration, so the `batch` size is doubled automatically. If no batch size is specified `batch=1`, calibration will be run at `batch=1 * 2` to reduce calibration scaling errors.
+ During calibration, twice the `batch` size provided will be used. Using small batches can lead to inaccurate scaling during calibration. This is because the process adjusts based on the data it sees. Small batches might not capture the full range of values, leading to issues with the final calibration, so the `batch` size is doubled automatically. If no [batch size](https://www.ultralytics.com/glossary/batch-size) is specified `batch=1`, calibration will be run at `batch=1 * 2` to reduce calibration scaling errors.
Experimentation by NVIDIA led them to recommend using at least 500 calibration images that are representative of the data for your model, with INT8 quantization calibration. This is a guideline and not a _hard_ requirement, and **you will need to experiment with what is required to perform well for your dataset**. Since the calibration data is required for INT8 calibration with TensorRT, make certain to use the `data` argument when `int8=True` for TensorRT and use `data="my_dataset.yaml"`, which will use the images from [validation](../modes/val.md) to calibrate with. When no value is passed for `data` with export to TensorRT with INT8 quantization, the default will be to use one of the ["small" example datasets based on the model task](../datasets/index.md) instead of throwing an error.
diff --git a/docs/en/integrations/tf-graphdef.md b/docs/en/integrations/tf-graphdef.md
index 24ae0dd9..15cbd484 100644
--- a/docs/en/integrations/tf-graphdef.md
+++ b/docs/en/integrations/tf-graphdef.md
@@ -6,7 +6,7 @@ keywords: YOLOv8, export, TensorFlow, GraphDef, model deployment, TensorFlow Ser
# How to Export to TF GraphDef from YOLOv8 for Deployment
-When you are deploying cutting-edge computer vision models, like YOLOv8, in different environments, you might run into compatibility issues. Google's TensorFlow GraphDef, or TF GraphDef, offers a solution by providing a serialized, platform-independent representation of your model. Using the TF GraphDef model format, you can deploy your YOLOv8 model in environments where the complete TensorFlow ecosystem may not be available, such as mobile devices or specialized hardware.
+When you are deploying cutting-edge [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models, like YOLOv8, in different environments, you might run into compatibility issues. Google's [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) GraphDef, or TF GraphDef, offers a solution by providing a serialized, platform-independent representation of your model. Using the TF GraphDef model format, you can deploy your YOLOv8 model in environments where the complete TensorFlow ecosystem may not be available, such as mobile devices or specialized hardware.
In this guide, we'll walk you step by step through how to export your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models to the TF GraphDef model format. By converting your model, you can streamline deployment and use YOLOv8's computer vision capabilities in a broader range of applications and platforms.
@@ -24,7 +24,7 @@ GraphDef models can use hardware accelerators such as GPUs, TPUs, and AI chips,
## Key Features of TF GraphDef Models
-TF GraphDef offers distinct features for streamlining model deployment and optimization.
+TF GraphDef offers distinct features for streamlining [model deployment](https://www.ultralytics.com/glossary/model-deployment) and optimization.
Here's a look at its key characteristics:
@@ -111,7 +111,7 @@ Once you've exported your YOLOv8 model to the TF GraphDef format, the next step
However, for more information on deploying your TF GraphDef models, take a look at the following resources:
-- **[TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving)**: A guide on TensorFlow Serving that teaches how to deploy and serve machine learning models efficiently in production environments.
+- **[TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving)**: A guide on TensorFlow Serving that teaches how to deploy and serve [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models efficiently in production environments.
- **[TensorFlow Lite](https://www.tensorflow.org/api_docs/python/tf/lite/TFLiteConverter)**: This page describes how to convert machine learning models into a format optimized for on-device inference with TensorFlow Lite.
@@ -173,11 +173,11 @@ Exporting YOLOv8 models to the TF GraphDef format offers multiple advantages, in
Read more about the benefits in the [TF GraphDef section](#why-should-you-export-to-tf-graphdef) of our documentation.
-### Why should I use Ultralytics YOLOv8 over other object detection models?
+### Why should I use Ultralytics YOLOv8 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models?
Ultralytics YOLOv8 offers numerous advantages compared to other models like YOLOv5 and YOLOv7. Some key benefits include:
-1. **State-of-the-Art Performance**: YOLOv8 provides exceptional speed and accuracy for real-time object detection, segmentation, and classification.
+1. **State-of-the-Art Performance**: YOLOv8 provides exceptional speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) for real-time object detection, segmentation, and classification.
2. **Ease of Use**: Features a user-friendly API for model training, validation, prediction, and export, making it accessible for both beginners and experts.
3. **Broad Compatibility**: Supports multiple export formats including ONNX, TensorRT, CoreML, and TensorFlow, for versatile deployment options.
diff --git a/docs/en/integrations/tf-savedmodel.md b/docs/en/integrations/tf-savedmodel.md
index 5de706a8..9f04dc78 100644
--- a/docs/en/integrations/tf-savedmodel.md
+++ b/docs/en/integrations/tf-savedmodel.md
@@ -6,7 +6,7 @@ keywords: YOLOv8, TF SavedModel, Ultralytics, TensorFlow, model export, model de
# Understand How to Export to TF SavedModel Format From YOLOv8
-Deploying machine learning models can be challenging. However, using an efficient and flexible model format can make your job easier. TF SavedModel is an open-source machine-learning framework used by TensorFlow to load machine-learning models in a consistent way. It is like a suitcase for TensorFlow models, making them easy to carry and use on different devices and systems.
+Deploying [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models can be challenging. However, using an efficient and flexible model format can make your job easier. TF SavedModel is an open-source machine-learning framework used by TensorFlow to load machine-learning models in a consistent way. It is like a suitcase for TensorFlow models, making them easy to carry and use on different devices and systems.
Learning how to export to TF SavedModel from [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models can help you deploy models easily across different platforms and environments. In this guide, we'll walk through how to convert your models to the TF SavedModel format, simplifying the process of running inferences with your models on different devices.
@@ -28,7 +28,7 @@ Here are the key features that make TF SavedModel a great option for AI develope
- **Ease of Deployment**: TF SavedModel bundles the computational graph, trained parameters, and necessary metadata into a single package. They can be easily loaded and used for inference without requiring the original code that built the model. This makes the deployment of TensorFlow models straightforward and efficient in various production environments.
-- **Asset Management**: TF SavedModel supports the inclusion of external assets such as vocabularies, embeddings, or lookup tables. These assets are stored alongside the graph definition and variables, ensuring they are available when the model is loaded. This feature simplifies the management and distribution of models that rely on external resources.
+- **Asset Management**: TF SavedModel supports the inclusion of external assets such as vocabularies, [embeddings](https://www.ultralytics.com/glossary/embeddings), or lookup tables. These assets are stored alongside the graph definition and variables, ensuring they are available when the model is loaded. This feature simplifies the management and distribution of models that rely on external resources.
## Deployment Options with TF SavedModel
@@ -42,7 +42,7 @@ TF SavedModel provides a range of options to deploy your machine learning models
- **Mobile and Embedded Devices:** TensorFlow Lite, a lightweight solution for running machine learning models on mobile, embedded, and IoT devices, supports converting TF SavedModels to the TensorFlow Lite format. This allows you to deploy your models on a wide range of devices, from smartphones and tablets to microcontrollers and edge devices.
-- **TensorFlow Runtime:** TensorFlow Runtime (`tfrt`) is a high-performance runtime for executing TensorFlow graphs. It provides lower-level APIs for loading and running TF SavedModels in C++ environments. TensorFlow Runtime offers better performance compared to the standard TensorFlow runtime. It is suitable for deployment scenarios that require low-latency inference and tight integration with existing C++ codebases.
+- **TensorFlow Runtime:** TensorFlow Runtime (`tfrt`) is a high-performance runtime for executing [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) graphs. It provides lower-level APIs for loading and running TF SavedModels in C++ environments. TensorFlow Runtime offers better performance compared to the standard TensorFlow runtime. It is suitable for deployment scenarios that require low-latency inference and tight integration with existing C++ codebases.
## Exporting YOLOv8 Models to TF SavedModel
@@ -157,7 +157,7 @@ Refer to the [Ultralytics Export documentation](../modes/export.md) for more det
### Why should I use the TensorFlow SavedModel format?
-The TensorFlow SavedModel format offers several advantages for model deployment:
+The TensorFlow SavedModel format offers several advantages for [model deployment](https://www.ultralytics.com/glossary/model-deployment):
- **Portability:** It provides a language-neutral format, making it easy to share and deploy models across different environments.
- **Compatibility:** Integrates seamlessly with tools like TensorFlow Serving, TensorFlow Lite, and TensorFlow.js, which are essential for deploying models on various platforms, including web and mobile applications.
diff --git a/docs/en/integrations/tfjs.md b/docs/en/integrations/tfjs.md
index 726bba25..ea2d613c 100644
--- a/docs/en/integrations/tfjs.md
+++ b/docs/en/integrations/tfjs.md
@@ -6,9 +6,9 @@ keywords: YOLOv8, TensorFlow.js, TF.js, model export, machine learning, object d
# Export to TF.js Model Format From a YOLOv8 Model Format
-Deploying machine learning models directly in the browser or on Node.js can be tricky. You'll need to make sure your model format is optimized for faster performance so that the model can be used to run interactive applications locally on the user's device. The TensorFlow.js, or TF.js, model format is designed to use minimal power while delivering fast performance.
+Deploying [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models directly in the browser or on Node.js can be tricky. You'll need to make sure your model format is optimized for faster performance so that the model can be used to run interactive applications locally on the user's device. The TensorFlow.js, or TF.js, model format is designed to use minimal power while delivering fast performance.
-The 'export to TF.js model format' feature allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for high-speed and locally-run object detection inference. In this guide, we'll walk you through converting your models to the TF.js format, making it easier for your models to perform well on various local browsers and Node.js applications.
+The 'export to TF.js model format' feature allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for high-speed and locally-run [object detection](https://www.ultralytics.com/glossary/object-detection) inference. In this guide, we'll walk you through converting your models to the TF.js format, making it easier for your models to perform well on various local browsers and Node.js applications.
## Why Should You Export to TF.js?
@@ -18,7 +18,7 @@ Exporting your machine learning models to TensorFlow.js, developed by the Tensor
+
+
+
+
@@ -122,7 +122,7 @@ apt-get install libgl1
#### HTTP Server
-DeepSparse Server runs on top of the popular FastAPI web framework and Uvicorn web server. With just a single CLI command, you can easily setup a model service endpoint with DeepSparse. The Server supports any Pipeline from DeepSparse, including object detection with YOLOv5, enabling you to send raw images to the endpoint and receive the bounding boxes.
+DeepSparse Server runs on top of the popular FastAPI web framework and Uvicorn web server. With just a single CLI command, you can easily setup a model service endpoint with DeepSparse. The Server supports any Pipeline from DeepSparse, including [object detection](https://www.ultralytics.com/glossary/object-detection) with YOLOv5, enabling you to send raw images to the endpoint and receive the bounding boxes.
Spin up the Server with the pruned-quantized YOLOv5s:
diff --git a/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md b/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md
index b00d94db..27e26f14 100644
--- a/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md
+++ b/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md
@@ -4,7 +4,7 @@ description: Learn how to load YOLOv5 from PyTorch Hub for seamless model infere
keywords: YOLOv5, PyTorch Hub, model loading, Ultralytics, object detection, machine learning, AI, tutorial, inference
---
-📚 This guide explains how to load YOLOv5 🚀 from PyTorch Hub at [https://pytorch.org/hub/ultralytics_yolov5](https://pytorch.org/hub/ultralytics_yolov5/).
+📚 This guide explains how to load YOLOv5 🚀 from [PyTorch](https://www.ultralytics.com/glossary/pytorch) Hub at [https://pytorch.org/hub/ultralytics_yolov5](https://pytorch.org/hub/ultralytics_yolov5/).
## Before You Start
@@ -44,7 +44,7 @@ results.pandas().xyxy[0]
### Detailed Example
-This example shows **batched inference** with **PIL** and **OpenCV** image sources. `results` can be **printed** to console, **saved** to `runs/hub`, **showed** to screen on supported environments, and returned as **tensors** or **pandas** dataframes.
+This example shows **batched inference** with **PIL** and **[OpenCV](https://www.ultralytics.com/glossary/opencv)** image sources. `results` can be **printed** to console, **saved** to `runs/hub`, **showed** to screen on supported environments, and returned as **tensors** or **pandas** dataframes.
```python
import cv2
diff --git a/docs/en/yolov5/tutorials/roboflow_datasets_integration.md b/docs/en/yolov5/tutorials/roboflow_datasets_integration.md
index 3047e48c..55728f21 100644
--- a/docs/en/yolov5/tutorials/roboflow_datasets_integration.md
+++ b/docs/en/yolov5/tutorials/roboflow_datasets_integration.md
@@ -52,7 +52,7 @@ We have released a custom training tutorial demonstrating all of the above capab
## Active Learning
-The real world is messy and your model will invariably encounter situations your dataset didn't anticipate. Using [active learning](https://blog.roboflow.com/what-is-active-learning/?ref=ultralytics) is an important strategy to iteratively improve your dataset and model. With the Roboflow and YOLOv5 integration, you can quickly make improvements on your model deployments by using a battle tested machine learning pipeline.
+The real world is messy and your model will invariably encounter situations your dataset didn't anticipate. Using [active learning](https://blog.roboflow.com/what-is-active-learning/?ref=ultralytics) is an important strategy to iteratively improve your dataset and model. With the Roboflow and YOLOv5 integration, you can quickly make improvements on your [model deployments](https://www.ultralytics.com/glossary/model-deployment) by using a battle tested [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) pipeline.
@@ -38,7 +38,7 @@ The [existing guide](https://coral.ai/docs/accelerator/get-started/) by Coral on
- [Raspberry Pi 4B](https://www.raspberrypi.com/products/raspberry-pi-4-model-b/) (2GB or more recommended) or [Raspberry Pi 5](https://www.raspberrypi.com/products/raspberry-pi-5/) (Recommended)
- [Raspberry Pi OS](https://www.raspberrypi.com/software/) Bullseye/Bookworm (64-bit) with desktop (Recommended)
- [Coral USB Accelerator](https://coral.ai/products/accelerator/)
-- A non-ARM based platform for exporting an Ultralytics PyTorch model
+- A non-ARM based platform for exporting an Ultralytics [PyTorch](https://www.ultralytics.com/glossary/pytorch) model
## Installation Walkthrough
@@ -85,7 +85,7 @@ After installing the runtime, you need to plug in your Coral Edge TPU into a USB
To use the Edge TPU, you need to convert your model into a compatible format. It is recommended that you run export on Google Colab, x86_64 Linux machine, using the official [Ultralytics Docker container](docker-quickstart.md), or using [Ultralytics HUB](../hub/quickstart.md), since the Edge TPU compiler is not available on ARM. See the [Export Mode](../modes/export.md) for the available arguments.
-!!! exporting the model
+!!! note "Exporting the model"
=== "Python"
@@ -111,7 +111,7 @@ The exported model will be saved in the `
@@ -23,13 +23,13 @@ Measuring the gap between two objects is known as distance calculation within a
## Visuals
-| Distance Calculation using Ultralytics YOLOv8 |
-| :----------------------------------------------------------------------------------------------------------------------------------------------: |
-| ![Ultralytics YOLOv8 Distance Calculation](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-distance-calculation.avif) |
+| Distance Calculation using Ultralytics YOLOv8 |
+| :---------------------------------------------------------------------------------------------------------------------------: |
+| ![Ultralytics YOLOv8 Distance Calculation](https://github.com/ultralytics/docs/releases/download/0/distance-calculation.avif) |
## Advantages of Distance Calculation?
-- **Localization Precision:** Enhances accurate spatial positioning in computer vision tasks.
+- **Localization [Precision](https://www.ultralytics.com/glossary/precision):** Enhances accurate spatial positioning in [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks.
- **Size Estimation:** Allows estimation of object size for better contextual understanding.
???+ tip "Distance Calculation"
@@ -73,7 +73,7 @@ Measuring the gap between two objects is known as distance calculation within a
cv2.destroyAllWindows()
```
-???+ tip "Note"
+???+ note
- Mouse Right Click will delete all drawn points
- Mouse Left Click can be used to draw points
@@ -94,15 +94,7 @@ Measuring the gap between two objects is known as distance calculation within a
### Arguments `model.track`
-| Name | Type | Default | Description |
-| --------- | ------- | -------------- | ----------------------------------------------------------- |
-| `source` | `im0` | `None` | source directory for images or videos |
-| `persist` | `bool` | `False` | persisting tracks between frames |
-| `tracker` | `str` | `botsort.yaml` | Tracking method 'bytetrack' or 'botsort' |
-| `conf` | `float` | `0.3` | Confidence Threshold |
-| `iou` | `float` | `0.5` | IOU Threshold |
-| `classes` | `list` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] |
-| `verbose` | `bool` | `True` | Display the object tracking results |
+{% include "macros/track-args.md" %}
## FAQ
@@ -120,7 +112,7 @@ Using distance calculation with Ultralytics YOLOv8 offers several advantages:
### Can I perform distance calculation in real-time video streams with Ultralytics YOLOv8?
-Yes, you can perform distance calculation in real-time video streams with Ultralytics YOLOv8. The process involves capturing video frames using OpenCV, running YOLOv8 object detection, and using the `DistanceCalculation` class to calculate distances between objects in successive frames. For a detailed implementation, see the [video stream example](#distance-calculation-using-ultralytics-yolov8).
+Yes, you can perform distance calculation in real-time video streams with Ultralytics YOLOv8. The process involves capturing video frames using [OpenCV](https://www.ultralytics.com/glossary/opencv), running YOLOv8 [object detection](https://www.ultralytics.com/glossary/object-detection), and using the `DistanceCalculation` class to calculate distances between objects in successive frames. For a detailed implementation, see the [video stream example](#distance-calculation-using-ultralytics-yolov8).
### How do I delete points drawn during distance calculation using Ultralytics YOLOv8?
diff --git a/docs/en/guides/docker-quickstart.md b/docs/en/guides/docker-quickstart.md
index 6d08fac0..3ee48946 100644
--- a/docs/en/guides/docker-quickstart.md
+++ b/docs/en/guides/docker-quickstart.md
@@ -237,7 +237,7 @@ sudo docker pull ultralytics/ultralytics:latest
For detailed steps, refer to our [Docker Quickstart Guide](../quickstart.md).
-### What are the benefits of using Ultralytics Docker images for machine learning projects?
+### What are the benefits of using Ultralytics Docker images for [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) projects?
Using Ultralytics Docker images ensures a consistent environment across different machines, replicating the same software and dependencies. This is particularly useful for collaborating across teams, running models on various hardware, and maintaining reproducibility. For GPU-based training, Ultralytics provides optimized Docker images such as `Dockerfile` for general GPU usage and `Dockerfile-jetson` for NVIDIA Jetson devices. Explore [Ultralytics Docker Hub](https://hub.docker.com/r/ultralytics/ultralytics) for more details.
diff --git a/docs/en/guides/heatmaps.md b/docs/en/guides/heatmaps.md
index 83972695..d2ebd4b1 100644
--- a/docs/en/guides/heatmaps.md
+++ b/docs/en/guides/heatmaps.md
@@ -4,7 +4,7 @@ description: Transform complex data into insightful heatmaps using Ultralytics Y
keywords: Ultralytics, YOLOv8, heatmaps, data visualization, data analysis, complex data, patterns, trends, anomalies
---
-# Advanced Data Visualization: Heatmaps using Ultralytics YOLOv8 🚀
+# Advanced [Data Visualization](https://www.ultralytics.com/glossary/data-visualization): Heatmaps using Ultralytics YOLOv8 🚀
## Introduction to Heatmaps
@@ -293,14 +293,7 @@ A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ult
### Arguments `model.track`
-| Name | Type | Default | Description |
-| --------- | ------- | -------------- | ----------------------------------------------------------- |
-| `source` | `im0` | `None` | source directory for images or videos |
-| `persist` | `bool` | `False` | persisting tracks between frames |
-| `tracker` | `str` | `botsort.yaml` | Tracking method 'bytetrack' or 'botsort' |
-| `conf` | `float` | `0.3` | Confidence Threshold |
-| `iou` | `float` | `0.5` | IOU Threshold |
-| `classes` | `list` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] |
+{% include "macros/track-args.md" %}
### Heatmap COLORMAPs
@@ -366,9 +359,9 @@ cv2.destroyAllWindows()
For further guidance, check the [Tracking Mode](../modes/track.md) page.
-### What makes Ultralytics YOLOv8 heatmaps different from other data visualization tools like those from OpenCV or Matplotlib?
+### What makes Ultralytics YOLOv8 heatmaps different from other data visualization tools like those from [OpenCV](https://www.ultralytics.com/glossary/opencv) or Matplotlib?
-Ultralytics YOLOv8 heatmaps are specifically designed for integration with its object detection and tracking models, providing an end-to-end solution for real-time data analysis. Unlike generic visualization tools like OpenCV or Matplotlib, YOLOv8 heatmaps are optimized for performance and automated processing, supporting features like persistent tracking, decay factor adjustment, and real-time video overlay. For more information on YOLOv8's unique features, visit the [Ultralytics YOLOv8 Introduction](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8).
+Ultralytics YOLOv8 heatmaps are specifically designed for integration with its [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking models, providing an end-to-end solution for real-time data analysis. Unlike generic visualization tools like OpenCV or Matplotlib, YOLOv8 heatmaps are optimized for performance and automated processing, supporting features like persistent tracking, decay factor adjustment, and real-time video overlay. For more information on YOLOv8's unique features, visit the [Ultralytics YOLOv8 Introduction](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8).
### How can I visualize only specific object classes in heatmaps using Ultralytics YOLOv8?
@@ -400,4 +393,4 @@ cv2.destroyAllWindows()
### Why should businesses choose Ultralytics YOLOv8 for heatmap generation in data analysis?
-Ultralytics YOLOv8 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLOv8's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like TensorFlow and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/plans).
+Ultralytics YOLOv8 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLOv8's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/plans).
diff --git a/docs/en/guides/hyperparameter-tuning.md b/docs/en/guides/hyperparameter-tuning.md
index b9b1723d..d715820f 100644
--- a/docs/en/guides/hyperparameter-tuning.md
+++ b/docs/en/guides/hyperparameter-tuning.md
@@ -4,19 +4,19 @@ description: Master hyperparameter tuning for Ultralytics YOLO to optimize model
keywords: Ultralytics YOLO, hyperparameter tuning, machine learning, model optimization, genetic algorithms, learning rate, batch size, epochs
---
-# Ultralytics YOLO Hyperparameter Tuning Guide
+# Ultralytics YOLO [Hyperparameter Tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) Guide
## Introduction
-Hyperparameter tuning is not just a one-time set-up but an iterative process aimed at optimizing the machine learning model's performance metrics, such as accuracy, precision, and recall. In the context of Ultralytics YOLO, these hyperparameters could range from learning rate to architectural details, such as the number of layers or types of activation functions used.
+Hyperparameter tuning is not just a one-time set-up but an iterative process aimed at optimizing the [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) model's performance metrics, such as accuracy, precision, and recall. In the context of Ultralytics YOLO, these hyperparameters could range from learning rate to architectural details, such as the number of layers or types of activation functions used.
### What are Hyperparameters?
Hyperparameters are high-level, structural settings for the algorithm. They are set prior to the training phase and remain constant during it. Here are some commonly tuned hyperparameters in Ultralytics YOLO:
-- **Learning Rate** `lr0`: Determines the step size at each iteration while moving towards a minimum in the loss function.
-- **Batch Size** `batch`: Number of images processed simultaneously in a forward pass.
-- **Number of Epochs** `epochs`: An epoch is one complete forward and backward pass of all the training examples.
+- **Learning Rate** `lr0`: Determines the step size at each iteration while moving towards a minimum in the [loss function](https://www.ultralytics.com/glossary/loss-function).
+- **[Batch Size](https://www.ultralytics.com/glossary/batch-size)** `batch`: Number of images processed simultaneously in a forward pass.
+- **Number of [Epochs](https://www.ultralytics.com/glossary/epoch)** `epochs`: An epoch is one complete forward and backward pass of all the training examples.
- **Architecture Specifics**: Such as channel counts, number of layers, types of activation functions, etc.
@@ -26,12 +26,12 @@ Whether you're a beginner or an expert in deep learning, our tutorials offer val
Here's a compilation of in-depth guides to help you master different aspects of Ultralytics YOLO.
- [YOLO Common Issues](yolo-common-issues.md) ⭐ RECOMMENDED: Practical solutions and troubleshooting tips to the most frequently encountered issues when working with Ultralytics YOLO models.
-- [YOLO Performance Metrics](yolo-performance-metrics.md) ⭐ ESSENTIAL: Understand the key metrics like mAP, IoU, and F1 score used to evaluate the performance of your YOLO models. Includes practical examples and tips on how to improve detection accuracy and speed.
-- [Model Deployment Options](model-deployment-options.md): Overview of YOLO model deployment formats like ONNX, OpenVINO, and TensorRT, with pros and cons for each to inform your deployment strategy.
+- [YOLO Performance Metrics](yolo-performance-metrics.md) ⭐ ESSENTIAL: Understand the key metrics like mAP, IoU, and [F1 score](https://www.ultralytics.com/glossary/f1-score) used to evaluate the performance of your YOLO models. Includes practical examples and tips on how to improve detection accuracy and speed.
+- [Model Deployment Options](model-deployment-options.md): Overview of YOLO [model deployment](https://www.ultralytics.com/glossary/model-deployment) formats like ONNX, OpenVINO, and TensorRT, with pros and cons for each to inform your deployment strategy.
- [K-Fold Cross Validation](kfold-cross-validation.md) 🚀 NEW: Learn how to improve model generalization using K-Fold cross-validation technique.
- [Hyperparameter Tuning](hyperparameter-tuning.md) 🚀 NEW: Discover how to optimize your YOLO models by fine-tuning hyperparameters using the Tuner class and genetic evolution algorithms.
- [SAHI Tiled Inference](sahi-tiled-inference.md) 🚀 NEW: Comprehensive guide on leveraging SAHI's sliced inference capabilities with YOLOv8 for object detection in high-resolution images.
-- [AzureML Quickstart](azureml-quickstart.md) 🚀 NEW: Get up and running with Ultralytics YOLO models on Microsoft's Azure Machine Learning platform. Learn how to train, deploy, and scale your object detection projects in the cloud.
+- [AzureML Quickstart](azureml-quickstart.md) 🚀 NEW: Get up and running with Ultralytics YOLO models on Microsoft's Azure [Machine Learning](https://www.ultralytics.com/glossary/machine-learning-ml) platform. Learn how to train, deploy, and scale your object detection projects in the cloud.
- [Conda Quickstart](conda-quickstart.md) 🚀 NEW: Step-by-step guide to setting up a [Conda](https://anaconda.org/conda-forge/ultralytics) environment for Ultralytics. Learn how to install and start using the Ultralytics package efficiently with Conda.
- [Docker Quickstart](docker-quickstart.md) 🚀 NEW: Complete guide to setting up and using Ultralytics YOLO models with [Docker](https://hub.docker.com/r/ultralytics/ultralytics). Learn how to install Docker, manage GPU support, and run YOLO models in isolated containers for consistent development and deployment.
- [Raspberry Pi](raspberry-pi.md) 🚀 NEW: Quickstart tutorial to run YOLO models to the latest Raspberry Pi hardware.
@@ -47,7 +47,7 @@ Here's a compilation of in-depth guides to help you master different aspects of
- [Defining A Computer Vision Project's Goals](defining-project-goals.md) 🚀 NEW: Walk through how to effectively define clear and measurable goals for your computer vision project. Learn the importance of a well-defined problem statement and how it creates a roadmap for your project.
- [Data Collection and Annotation](data-collection-and-annotation.md) 🚀 NEW: Explore the tools, techniques, and best practices for collecting and annotating data to create high-quality inputs for your computer vision models.
- [Preprocessing Annotated Data](preprocessing_annotated_data.md) 🚀 NEW: Learn about preprocessing and augmenting image data in computer vision projects using YOLOv8, including normalization, dataset augmentation, splitting, and exploratory data analysis (EDA).
-- [Tips for Model Training](model-training-tips.md) 🚀 NEW: Explore tips on optimizing batch sizes, using mixed precision, applying pre-trained weights, and more to make training your computer vision model a breeze.
+- [Tips for Model Training](model-training-tips.md) 🚀 NEW: Explore tips on optimizing [batch sizes](https://www.ultralytics.com/glossary/batch-size), using [mixed precision](https://www.ultralytics.com/glossary/mixed-precision), applying pre-trained weights, and more to make training your computer vision model a breeze.
- [Insights on Model Evaluation and Fine-Tuning](model-evaluation-insights.md) 🚀 NEW: Gain insights into the strategies and best practices for evaluating and fine-tuning your computer vision models. Learn about the iterative process of refining models to achieve optimal results.
- [A Guide on Model Testing](model-testing.md) 🚀 NEW: A thorough guide on testing your computer vision models in realistic settings. Learn how to verify accuracy, reliability, and performance in line with project goals.
- [Best Practices for Model Deployment](model-deployment-practices.md) 🚀 NEW: Walk through tips and best practices for efficiently deploying models in computer vision projects, with a focus on optimization, troubleshooting, and security.
@@ -89,7 +89,7 @@ For detailed dataset formatting and additional options, refer to our [Tips for M
### What performance metrics should I use to evaluate my YOLO model?
-Evaluating your YOLO model performance is crucial to understanding its efficacy. Key metrics include Mean Average Precision (mAP), Intersection over Union (IoU), and F1 score. These metrics help assess the accuracy and precision of object detection tasks. You can learn more about these metrics and how to improve your model in our [YOLO Performance Metrics](yolo-performance-metrics.md) guide.
+Evaluating your YOLO model performance is crucial to understanding its efficacy. Key metrics include [Mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP), [Intersection over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU), and F1 score. These metrics help assess the accuracy and [precision](https://www.ultralytics.com/glossary/precision) of object detection tasks. You can learn more about these metrics and how to improve your model in our [YOLO Performance Metrics](yolo-performance-metrics.md) guide.
### Why should I use Ultralytics HUB for my computer vision projects?
@@ -97,7 +97,7 @@ Ultralytics HUB is a no-code platform that simplifies managing, training, and de
### What are the common issues faced during YOLO model training, and how can I resolve them?
-Common issues during YOLO model training include data formatting errors, model architecture mismatches, and insufficient training data. To address these, ensure your dataset is correctly formatted, check for compatible model versions, and augment your training data. For a comprehensive list of solutions, refer to our [YOLO Common Issues](yolo-common-issues.md) guide.
+Common issues during YOLO model training include data formatting errors, model architecture mismatches, and insufficient [training data](https://www.ultralytics.com/glossary/training-data). To address these, ensure your dataset is correctly formatted, check for compatible model versions, and augment your training data. For a comprehensive list of solutions, refer to our [YOLO Common Issues](yolo-common-issues.md) guide.
### How can I deploy my YOLO model for real-time object detection on edge devices?
diff --git a/docs/en/guides/instance-segmentation-and-tracking.md b/docs/en/guides/instance-segmentation-and-tracking.md
index 52a11a3a..95e91a8c 100644
--- a/docs/en/guides/instance-segmentation-and-tracking.md
+++ b/docs/en/guides/instance-segmentation-and-tracking.md
@@ -6,9 +6,9 @@ keywords: instance segmentation, tracking, YOLOv8, Ultralytics, object detection
# Instance Segmentation and Tracking using Ultralytics YOLOv8 🚀
-## What is Instance Segmentation?
+## What is [Instance Segmentation](https://www.ultralytics.com/glossary/instance-segmentation)?
-[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike semantic segmentation, it uniquely labels and precisely delineates each object, crucial for tasks like object detection and medical imaging.
+[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike [semantic segmentation](https://www.ultralytics.com/glossary/semantic-segmentation), it uniquely labels and precisely delineates each object, crucial for tasks like [object detection](https://www.ultralytics.com/glossary/object-detection) and medical imaging.
There are two types of instance segmentation tracking available in the Ultralytics package:
@@ -194,7 +194,7 @@ Instance segmentation identifies and outlines individual objects within an image
### Why should I use Ultralytics YOLOv8 for instance segmentation and tracking over other models like Mask R-CNN or Faster R-CNN?
-Ultralytics YOLOv8 offers real-time performance, superior accuracy, and ease of use compared to other models like Mask R-CNN or Faster R-CNN. YOLOv8 provides a seamless integration with Ultralytics HUB, allowing users to manage models, datasets, and training pipelines efficiently. Discover more about the benefits of YOLOv8 in the [Ultralytics blog](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8).
+Ultralytics YOLOv8 offers real-time performance, superior [accuracy](https://www.ultralytics.com/glossary/accuracy), and ease of use compared to other models like Mask R-CNN or Faster R-CNN. YOLOv8 provides a seamless integration with Ultralytics HUB, allowing users to manage models, datasets, and training pipelines efficiently. Discover more about the benefits of YOLOv8 in the [Ultralytics blog](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8).
### How can I implement object tracking using Ultralytics YOLOv8?
diff --git a/docs/en/guides/isolating-segmentation-objects.md b/docs/en/guides/isolating-segmentation-objects.md
index 9f099c8c..737510e9 100644
--- a/docs/en/guides/isolating-segmentation-objects.md
+++ b/docs/en/guides/isolating-segmentation-objects.md
@@ -96,7 +96,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab
1. For more info on `c.masks.xy` see [Masks Section from Predict Mode](../modes/predict.md#masks).
- 2. Here the values are cast into `np.int32` for compatibility with `drawContours()` function from OpenCV.
+ 2. Here the values are cast into `np.int32` for compatibility with `drawContours()` function from [OpenCV](https://www.ultralytics.com/glossary/opencv).
3. The OpenCV `drawContours()` function expects contours to have a shape of `[N, 1, 2]` expand section below for more details.
@@ -137,7 +137,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab
### Object Isolation Options
- !!! example ""
+ !!! example
=== "Black Background Pixels"
@@ -178,7 +178,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab
iso_crop = isolated[y1:y2, x1:x2]
```
- 1. For more information on bounding box results, see [Boxes Section from Predict Mode](../modes/predict.md/#boxes)
+ 1. For more information on [bounding box](https://www.ultralytics.com/glossary/bounding-box) results, see [Boxes Section from Predict Mode](../modes/predict.md/#boxes)
??? question "What does this code do?"
@@ -253,7 +253,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab
_ = cv2.imwrite(f"{img_name}_{label}-{ci}.png", iso_crop)
```
- - In this example, the `img_name` is the base-name of the source image file, `label` is the detected class-name, and `ci` is the index of the object detection (in case of multiple instances with the same class name).
+ - In this example, the `img_name` is the base-name of the source image file, `label` is the detected class-name, and `ci` is the index of the [object detection](https://www.ultralytics.com/glossary/object-detection) (in case of multiple instances with the same class name).
## Full Example code
diff --git a/docs/en/guides/kfold-cross-validation.md b/docs/en/guides/kfold-cross-validation.md
index a0a34533..381aef0e 100644
--- a/docs/en/guides/kfold-cross-validation.md
+++ b/docs/en/guides/kfold-cross-validation.md
@@ -8,13 +8,13 @@ keywords: Ultralytics, YOLO, K-Fold Cross Validation, object detection, sklearn,
## Introduction
-This comprehensive guide illustrates the implementation of K-Fold Cross Validation for object detection datasets within the Ultralytics ecosystem. We'll leverage the YOLO detection format and key Python libraries such as sklearn, pandas, and PyYaml to guide you through the necessary setup, the process of generating feature vectors, and the execution of a K-Fold dataset split.
+This comprehensive guide illustrates the implementation of K-Fold Cross Validation for [object detection](https://www.ultralytics.com/glossary/object-detection) datasets within the Ultralytics ecosystem. We'll leverage the YOLO detection format and key Python libraries such as sklearn, pandas, and PyYaml to guide you through the necessary setup, the process of generating feature vectors, and the execution of a K-Fold dataset split.
+
+
+ Watch: How to Optimize and Deploy AI Models: Best Practices, Troubleshooting, and Security Considerations
+
+
+
+ Watch: Model Training Tips | How to Handle Large Datasets | Batch Size, GPU Utilization and [Mixed Precision](https://www.ultralytics.com/glossary/mixed-precision)
+
@@ -90,6 +90,46 @@ Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
cv2.destroyAllWindows()
```
+ === "OBB Object Counting"
+
+ ```python
+ import cv2
+
+ from ultralytics import YOLO, solutions
+
+ model = YOLO("yolov8n-obb.pt")
+ cap = cv2.VideoCapture("path/to/video/file.mp4")
+ assert cap.isOpened(), "Error reading video file"
+ w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
+
+ # Define region points
+ region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+
+ # Video writer
+ video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+
+ # Init Object Counter
+ counter = solutions.ObjectCounter(
+ view_img=True,
+ reg_pts=region_points,
+ names=model.names,
+ line_thickness=2,
+ )
+
+ while cap.isOpened():
+ success, im0 = cap.read()
+ if not success:
+ print("Video frame is empty or video processing has been successfully completed.")
+ break
+ tracks = model.track(im0, persist=True, show=False)
+ im0 = counter.start_counting(im0, tracks)
+ video_writer.write(im0)
+
+ cap.release()
+ video_writer.release()
+ cv2.destroyAllWindows()
+ ```
+
=== "Count in Polygon"
```python
@@ -123,7 +163,6 @@ Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
print("Video frame is empty or video processing has been successfully completed.")
break
tracks = model.track(im0, persist=True, show=False)
-
im0 = counter.start_counting(im0, tracks)
video_writer.write(im0)
@@ -165,7 +204,6 @@ Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
print("Video frame is empty or video processing has been successfully completed.")
break
tracks = model.track(im0, persist=True, show=False)
-
im0 = counter.start_counting(im0, tracks)
video_writer.write(im0)
@@ -207,7 +245,6 @@ Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
print("Video frame is empty or video processing has been successfully completed.")
break
tracks = model.track(im0, persist=True, show=False, classes=classes_to_count)
-
im0 = counter.start_counting(im0, tracks)
video_writer.write(im0)
@@ -236,15 +273,7 @@ Here's a table with the `ObjectCounter` arguments:
### Arguments `model.track`
-| Name | Type | Default | Description |
-| --------- | ------- | -------------- | ----------------------------------------------------------- |
-| `source` | `im0` | `None` | source directory for images or videos |
-| `persist` | `bool` | `False` | persisting tracks between frames |
-| `tracker` | `str` | `botsort.yaml` | Tracking method 'bytetrack' or 'botsort' |
-| `conf` | `float` | `0.3` | Confidence Threshold |
-| `iou` | `float` | `0.5` | IOU Threshold |
-| `classes` | `list` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] |
-| `verbose` | `bool` | `True` | Display the object tracking results |
+{% include "macros/track-args.md" %}
## FAQ
@@ -348,16 +377,16 @@ count_specific_classes("path/to/video.mp4", "output_specific_classes.avi", "yolo
In this example, `classes_to_count=[0, 2]`, which means it counts objects of class `0` and `2` (e.g., person and car).
-### Why should I use YOLOv8 over other object detection models for real-time applications?
+### Why should I use YOLOv8 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models for real-time applications?
Ultralytics YOLOv8 provides several advantages over other object detection models like Faster R-CNN, SSD, and previous YOLO versions:
1. **Speed and Efficiency:** YOLOv8 offers real-time processing capabilities, making it ideal for applications requiring high-speed inference, such as surveillance and autonomous driving.
-2. **Accuracy:** It provides state-of-the-art accuracy for object detection and tracking tasks, reducing the number of false positives and improving overall system reliability.
+2. **[Accuracy](https://www.ultralytics.com/glossary/accuracy):** It provides state-of-the-art accuracy for object detection and tracking tasks, reducing the number of false positives and improving overall system reliability.
3. **Ease of Integration:** YOLOv8 offers seamless integration with various platforms and devices, including mobile and edge devices, which is crucial for modern AI applications.
4. **Flexibility:** Supports various tasks like object detection, segmentation, and tracking with configurable models to meet specific use-case requirements.
-Check out Ultralytics [YOLOv8 Documentation](https://docs.ultralytics.com/models/yolov8) for a deeper dive into its features and performance comparisons.
+Check out Ultralytics [YOLOv8 Documentation](https://docs.ultralytics.com/models/yolov8/) for a deeper dive into its features and performance comparisons.
### Can I use YOLOv8 for advanced applications like crowd analysis and traffic management?
diff --git a/docs/en/guides/object-cropping.md b/docs/en/guides/object-cropping.md
index caf831fe..f4b50ed0 100644
--- a/docs/en/guides/object-cropping.md
+++ b/docs/en/guides/object-cropping.md
@@ -25,7 +25,7 @@ Object cropping with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
- **Focused Analysis**: YOLOv8 facilitates targeted object cropping, allowing for in-depth examination or processing of individual items within a scene.
- **Reduced Data Volume**: By extracting only relevant objects, object cropping helps in minimizing data size, making it efficient for storage, transmission, or subsequent computational tasks.
-- **Enhanced Precision**: YOLOv8's object detection accuracy ensures that the cropped objects maintain their spatial relationships, preserving the integrity of the visual information for detailed analysis.
+- **Enhanced Precision**: YOLOv8's [object detection](https://www.ultralytics.com/glossary/object-detection) [accuracy](https://www.ultralytics.com/glossary/accuracy) ensures that the cropped objects maintain their spatial relationships, preserving the integrity of the visual information for detailed analysis.
## Visuals
@@ -94,29 +94,13 @@ Object cropping with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
### Arguments `model.predict`
-| Argument | Type | Default | Description |
-| --------------- | -------------- | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `source` | `str` | `'ultralytics/assets'` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. Supports a wide range of formats and sources, enabling flexible application across different types of input. |
-| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. |
-| `iou` | `float` | `0.7` | Intersection Over Union (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. |
-| `imgsz` | `int or tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection accuracy and processing speed. |
-| `half` | `bool` | `False` | Enables half-precision (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. |
-| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0` or `0`). Allows users to select between CPU, a specific GPU, or other compute devices for model execution. |
-| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. |
-| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. |
-| `stream_buffer` | `bool` | `False` | Determines if all frames should be buffered when processing video streams (`True`), or if the model should return the most recent frame (`False`). Useful for real-time applications. |
-| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. |
-| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. |
-| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. |
-| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. |
-| `retina_masks` | `bool` | `False` | Uses high-resolution segmentation masks if available in the model. This can enhance mask quality for segmentation tasks, providing finer detail. |
-| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or embeddings. Useful for downstream tasks like clustering or similarity search. |
+{% include "macros/predict-args.md" %}
## FAQ
### What is object cropping in Ultralytics YOLOv8 and how does it work?
-Object cropping using [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) involves isolating and extracting specific objects from an image or video based on YOLOv8's detection capabilities. This process allows for focused analysis, reduced data volume, and enhanced precision by leveraging YOLOv8 to identify objects with high accuracy and crop them accordingly. For an in-depth tutorial, refer to the [object cropping example](#object-cropping-using-ultralytics-yolov8).
+Object cropping using [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) involves isolating and extracting specific objects from an image or video based on YOLOv8's detection capabilities. This process allows for focused analysis, reduced data volume, and enhanced [precision](https://www.ultralytics.com/glossary/precision) by leveraging YOLOv8 to identify objects with high accuracy and crop them accordingly. For an in-depth tutorial, refer to the [object cropping example](#object-cropping-using-ultralytics-yolov8).
### Why should I use Ultralytics YOLOv8 for object cropping over other solutions?
diff --git a/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md b/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md
index 92c840f3..154ec7a8 100644
--- a/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md
+++ b/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md
@@ -10,7 +10,7 @@ keywords: Ultralytics YOLO, OpenVINO optimization, deep learning, model inferenc
## Introduction
-When deploying deep learning models, particularly those for object detection such as Ultralytics YOLO models, achieving optimal performance is crucial. This guide delves into leveraging Intel's OpenVINO toolkit to optimize inference, focusing on latency and throughput. Whether you're working on consumer-grade applications or large-scale deployments, understanding and applying these optimization strategies will ensure your models run efficiently on various devices.
+When deploying [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models, particularly those for [object detection](https://www.ultralytics.com/glossary/object-detection) such as Ultralytics YOLO models, achieving optimal performance is crucial. This guide delves into leveraging Intel's OpenVINO toolkit to optimize inference, focusing on latency and throughput. Whether you're working on consumer-grade applications or large-scale deployments, understanding and applying these optimization strategies will ensure your models run efficiently on various devices.
## Optimizing for Latency
@@ -121,8 +121,8 @@ Using OpenVINO's high-level performance hints and multi-device modes can help st
Yes, Ultralytics YOLO models are highly versatile and can be integrated with various AI frameworks. Options include:
-- **TensorRT:** For NVIDIA GPU optimization, follow the [TensorRT integration guide](https://docs.ultralytics.com/integrations/tensorrt).
-- **CoreML:** For Apple devices, refer to our [CoreML export instructions](https://docs.ultralytics.com/integrations/coreml).
-- **TensorFlow.js:** For web and Node.js apps, see the [TF.js conversion guide](https://docs.ultralytics.com/integrations/tfjs).
+- **TensorRT:** For NVIDIA GPU optimization, follow the [TensorRT integration guide](https://docs.ultralytics.com/integrations/tensorrt/).
+- **CoreML:** For Apple devices, refer to our [CoreML export instructions](https://docs.ultralytics.com/integrations/coreml/).
+- **[TensorFlow](https://www.ultralytics.com/glossary/tensorflow).js:** For web and Node.js apps, see the [TF.js conversion guide](https://docs.ultralytics.com/integrations/tfjs/).
-Explore more integrations on the [Ultralytics Integrations page](https://docs.ultralytics.com/integrations).
+Explore more integrations on the [Ultralytics Integrations page](https://docs.ultralytics.com/integrations/).
diff --git a/docs/en/guides/parking-management.md b/docs/en/guides/parking-management.md
index bd5b0edd..78686bd0 100644
--- a/docs/en/guides/parking-management.md
+++ b/docs/en/guides/parking-management.md
@@ -74,9 +74,6 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr
from ultralytics import solutions
- # Path to json file, that created with above point selection app
- polygon_json_path = "bounding_boxes.json"
-
# Video capture
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
@@ -86,22 +83,16 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr
video_writer = cv2.VideoWriter("parking management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Initialize parking management object
- management = solutions.ParkingManagement(model_path="yolov8n.pt")
+ parking_manager = solutions.ParkingManagement(
+ model="yolov8n.pt", # path to model file
+ json_file="bounding_boxes.json", # path to parking annotations file
+ )
while cap.isOpened():
ret, im0 = cap.read()
if not ret:
break
-
- json_data = management.parking_regions_extraction(polygon_json_path)
- results = management.model.track(im0, persist=True, show=False)
-
- if results[0].boxes.id is not None:
- boxes = results[0].boxes.xyxy.cpu().tolist()
- clss = results[0].boxes.cls.cpu().tolist()
- management.process_data(json_data, im0, boxes, clss)
-
- management.display_frames(im0)
+ im0 = parking_manager.process_data(im0)
video_writer.write(im0)
cap.release()
@@ -111,26 +102,16 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr
### Optional Arguments `ParkingManagement`
-| Name | Type | Default | Description |
-| ------------------------ | ------- | ----------------- | -------------------------------------- |
-| `model_path` | `str` | `None` | Path to the YOLOv8 model. |
-| `txt_color` | `tuple` | `(0, 0, 0)` | RGB color tuple for text. |
-| `bg_color` | `tuple` | `(255, 255, 255)` | RGB color tuple for background. |
-| `occupied_region_color` | `tuple` | `(0, 255, 0)` | RGB color tuple for occupied regions. |
-| `available_region_color` | `tuple` | `(0, 0, 255)` | RGB color tuple for available regions. |
-| `margin` | `int` | `10` | Margin for text display. |
+| Name | Type | Default | Description |
+| ------------------------ | ------- | ------------- | -------------------------------------------------------------- |
+| `model` | `str` | `None` | Path to the YOLOv8 model. |
+| `json_file` | `str` | `None` | Path to the JSON file, that have all parking coordinates data. |
+| `occupied_region_color` | `tuple` | `(0, 0, 255)` | RGB color for occupied regions. |
+| `available_region_color` | `tuple` | `(0, 255, 0)` | RGB color for available regions. |
### Arguments `model.track`
-| Name | Type | Default | Description |
-| --------- | ------- | -------------- | ----------------------------------------------------------- |
-| `source` | `im0` | `None` | source directory for images or videos |
-| `persist` | `bool` | `False` | persisting tracks between frames |
-| `tracker` | `str` | `botsort.yaml` | Tracking method 'bytetrack' or 'botsort' |
-| `conf` | `float` | `0.3` | Confidence Threshold |
-| `iou` | `float` | `0.5` | IOU Threshold |
-| `classes` | `list` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] |
-| `verbose` | `bool` | `True` | Display the object tracking results |
+{% include "macros/track-args.md" %}
## FAQ
diff --git a/docs/en/guides/preprocessing_annotated_data.md b/docs/en/guides/preprocessing_annotated_data.md
index 36fcf9f9..fcd329c7 100644
--- a/docs/en/guides/preprocessing_annotated_data.md
+++ b/docs/en/guides/preprocessing_annotated_data.md
@@ -4,7 +4,7 @@ description: Learn essential data preprocessing techniques for annotated compute
keywords: data preprocessing, computer vision, image resizing, normalization, data augmentation, training dataset, validation dataset, test dataset, YOLOv8
---
-# Data Preprocessing Techniques for Annotated Computer Vision Data
+# Data Preprocessing Techniques for Annotated [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) Data
## Introduction
@@ -33,7 +33,7 @@ You can resize your images using the following methods:
To make resizing a simpler task, you can use the following tools:
-- **OpenCV**: A popular computer vision library with extensive functions for image processing.
+- **[OpenCV](https://www.ultralytics.com/glossary/opencv)**: A popular computer vision library with extensive functions for image processing.
- **PIL (Pillow)**: A Python Imaging Library for opening, manipulating, and saving image files.
With respect to YOLOv8, the 'imgsz' parameter during [model training](../modes/train.md) allows for flexible input sizes. When set to a specific size, such as 640, the model will resize input images so their largest dimension is 640 pixels while maintaining the original aspect ratio.
@@ -65,7 +65,7 @@ The most commonly discussed data preprocessing step is data augmentation. Data a
Here are some other benefits of data augmentation:
- **Creates a More Robust Dataset**: Data augmentation can make the model more robust to variations and distortions in the input data. This includes changes in lighting, orientation, and scale.
-- **Cost-Effective**: Data augmentation is a cost-effective way to increase the amount of training data without collecting and labeling new data.
+- **Cost-Effective**: Data augmentation is a cost-effective way to increase the amount of [training data](https://www.ultralytics.com/glossary/training-data) without collecting and labeling new data.
- **Better Use of Data**: Every available data point is used to its maximum potential by creating new variations
#### Data Augmentation Methods
@@ -96,7 +96,7 @@ Here's what each step of preprocessing would look like for this project:
- Resizing Images: Since YOLOv8 handles flexible input sizes and performs resizing automatically, manual resizing is not required. The model will adjust the image size according to the specified 'imgsz' parameter during training.
- Normalizing Pixel Values: YOLOv8 automatically normalizes pixel values to a range of 0 to 1 during preprocessing, so it's not required.
- Splitting the Dataset: Divide the dataset into training (70%), validation (20%), and test (10%) sets using tools like scikit-learn.
-- Data Augmentation: Modify the dataset configuration file (.yaml) to include data augmentation techniques such as random crops, horizontal flips, and brightness adjustments.
+- [Data Augmentation](https://www.ultralytics.com/glossary/data-augmentation): Modify the dataset configuration file (.yaml) to include data augmentation techniques such as random crops, horizontal flips, and brightness adjustments.
These steps make sure the dataset is prepared without any potential issues and is ready for Exploratory Data Analysis (EDA).
@@ -120,7 +120,7 @@ Common tools for visualizations include:
### Using Ultralytics Explorer for EDA
-For a more advanced approach to EDA, you can use the Ultralytics Explorer tool. It offers robust capabilities for exploring computer vision datasets. By supporting semantic search, SQL queries, and vector similarity search, the tool makes it easy to analyze and understand your data. With Ultralytics Explorer, you can create embeddings for your dataset to find similar images, run SQL queries for detailed analysis, and perform semantic searches, all through a user-friendly graphical interface.
+For a more advanced approach to EDA, you can use the Ultralytics Explorer tool. It offers robust capabilities for exploring computer vision datasets. By supporting semantic search, SQL queries, and vector similarity search, the tool makes it easy to analyze and understand your data. With Ultralytics Explorer, you can create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset to find similar images, run SQL queries for detailed analysis, and perform semantic searches, all through a user-friendly graphical interface.
@@ -23,7 +23,7 @@ keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, effici
## Advantages of Object Counting in Regions?
-- **Precision and Accuracy:** Object counting in regions with advanced computer vision ensures precise and accurate counts, minimizing errors often associated with manual counting.
+- **[Precision](https://www.ultralytics.com/glossary/precision) and Accuracy:** Object counting in regions with advanced computer vision ensures precise and accurate counts, minimizing errors often associated with manual counting.
- **Efficiency Improvement:** Automated object counting enhances operational efficiency, providing real-time results and streamlining processes across different applications.
- **Versatility and Application:** The versatility of object counting in regions makes it applicable across various domains, from manufacturing and surveillance to traffic monitoring, contributing to its widespread utility and effectiveness.
@@ -75,21 +75,21 @@ python yolov8_region_counter.py --source "path/to/video.mp4" --view-img
### Optional Arguments
-| Name | Type | Default | Description |
-| -------------------- | ------ | ------------ | ------------------------------------------ |
-| `--source` | `str` | `None` | Path to video file, for webcam 0 |
-| `--line_thickness` | `int` | `2` | Bounding Box thickness |
-| `--save-img` | `bool` | `False` | Save the predicted video/image |
-| `--weights` | `str` | `yolov8n.pt` | Weights file path |
-| `--classes` | `list` | `None` | Detect specific classes i.e. --classes 0 2 |
-| `--region-thickness` | `int` | `2` | Region Box thickness |
-| `--track-thickness` | `int` | `2` | Tracking line thickness |
+| Name | Type | Default | Description |
+| -------------------- | ------ | ------------ | --------------------------------------------------------------------------- |
+| `--source` | `str` | `None` | Path to video file, for webcam 0 |
+| `--line_thickness` | `int` | `2` | [Bounding Box](https://www.ultralytics.com/glossary/bounding-box) thickness |
+| `--save-img` | `bool` | `False` | Save the predicted video/image |
+| `--weights` | `str` | `yolov8n.pt` | Weights file path |
+| `--classes` | `list` | `None` | Detect specific classes i.e. --classes 0 2 |
+| `--region-thickness` | `int` | `2` | Region Box thickness |
+| `--track-thickness` | `int` | `2` | Tracking line thickness |
## FAQ
### What is object counting in specified regions using Ultralytics YOLOv8?
-Object counting in specified regions with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) involves detecting and tallying the number of objects within defined areas using advanced computer vision. This precise method enhances efficiency and accuracy across various applications like manufacturing, surveillance, and traffic monitoring.
+Object counting in specified regions with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) involves detecting and tallying the number of objects within defined areas using advanced computer vision. This precise method enhances efficiency and [accuracy](https://www.ultralytics.com/glossary/accuracy) across various applications like manufacturing, surveillance, and traffic monitoring.
### How do I run the object counting script with Ultralytics YOLOv8?
diff --git a/docs/en/guides/ros-quickstart.md b/docs/en/guides/ros-quickstart.md
index c0691ffa..27371131 100644
--- a/docs/en/guides/ros-quickstart.md
+++ b/docs/en/guides/ros-quickstart.md
@@ -69,7 +69,7 @@ Apart from the ROS environment, you will need to install the following dependenc
## Use Ultralytics with ROS `sensor_msgs/Image`
-The `sensor_msgs/Image` [message type](https://docs.ros.org/en/api/sensor_msgs/html/msg/Image.html) is commonly used in ROS for representing image data. It contains fields for encoding, height, width, and pixel data, making it suitable for transmitting images captured by cameras or other sensors. Image messages are widely used in robotic applications for tasks such as visual perception, object detection, and navigation.
+The `sensor_msgs/Image` [message type](https://docs.ros.org/en/api/sensor_msgs/html/msg/Image.html) is commonly used in ROS for representing image data. It contains fields for encoding, height, width, and pixel data, making it suitable for transmitting images captured by cameras or other sensors. Image messages are widely used in robotic applications for tasks such as visual perception, [object detection](https://www.ultralytics.com/glossary/object-detection), and navigation.
- Watch: Security Alarm System Project with Ultralytics YOLOv8 Object Detection
+ Watch: Security Alarm System Project with Ultralytics YOLOv8 [Object Detection](https://www.ultralytics.com/glossary/object-detection)
@@ -72,7 +72,7 @@ keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision
print("Video frame is empty or video processing has been successfully completed.")
break
- tracks = model.track(im0, persist=True, show=False)
+ tracks = model.track(im0, persist=True)
im0 = speed_obj.estimate_speed(im0, tracks)
video_writer.write(im0)
@@ -94,26 +94,17 @@ keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision
| `reg_pts` | `list` | `[(20, 400), (1260, 400)]` | List of region points for speed estimation. |
| `view_img` | `bool` | `False` | Whether to display the image with annotations. |
| `line_thickness` | `int` | `2` | Thickness of the lines for drawing boxes and tracks. |
-| `region_thickness` | `int` | `5` | Thickness of the region lines. |
| `spdl_dist_thresh` | `int` | `10` | Distance threshold for speed calculation. |
### Arguments `model.track`
-| Name | Type | Default | Description |
-| --------- | ------- | -------------- | ----------------------------------------------------------- |
-| `source` | `im0` | `None` | source directory for images or videos |
-| `persist` | `bool` | `False` | persisting tracks between frames |
-| `tracker` | `str` | `botsort.yaml` | Tracking method 'bytetrack' or 'botsort' |
-| `conf` | `float` | `0.3` | Confidence Threshold |
-| `iou` | `float` | `0.5` | IOU Threshold |
-| `classes` | `list` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] |
-| `verbose` | `bool` | `True` | Display the object tracking results |
+{% include "macros/track-args.md" %}
## FAQ
### How do I estimate object speed using Ultralytics YOLOv8?
-Estimating object speed with Ultralytics YOLOv8 involves combining object detection and tracking techniques. First, you need to detect objects in each frame using the YOLOv8 model. Then, track these objects across frames to calculate their movement over time. Finally, use the distance traveled by the object between frames and the frame rate to estimate its speed.
+Estimating object speed with Ultralytics YOLOv8 involves combining [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking techniques. First, you need to detect objects in each frame using the YOLOv8 model. Then, track these objects across frames to calculate their movement over time. Finally, use the distance traveled by the object between frames and the frame rate to estimate its speed.
**Example**:
@@ -161,7 +152,7 @@ Using Ultralytics YOLOv8 for speed estimation offers significant advantages in t
For more applications, see [advantages of speed estimation](#advantages-of-speed-estimation).
-### Can YOLOv8 be integrated with other AI frameworks like TensorFlow or PyTorch?
+### Can YOLOv8 be integrated with other AI frameworks like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) or [PyTorch](https://www.ultralytics.com/glossary/pytorch)?
Yes, YOLOv8 can be integrated with other AI frameworks like TensorFlow and PyTorch. Ultralytics provides support for exporting YOLOv8 models to various formats like ONNX, TensorRT, and CoreML, ensuring smooth interoperability with other ML frameworks.
@@ -175,7 +166,7 @@ Learn more about exporting models in our [guide on export](../modes/export.md).
### How accurate is the speed estimation using Ultralytics YOLOv8?
-The accuracy of speed estimation using Ultralytics YOLOv8 depends on several factors, including the quality of the object tracking, the resolution and frame rate of the video, and environmental variables. While the speed estimator provides reliable estimates, it may not be 100% accurate due to variances in frame processing speed and object occlusion.
+The [accuracy](https://www.ultralytics.com/glossary/accuracy) of speed estimation using Ultralytics YOLOv8 depends on several factors, including the quality of the object tracking, the resolution and frame rate of the video, and environmental variables. While the speed estimator provides reliable estimates, it may not be 100% accurate due to variances in frame processing speed and object occlusion.
**Note**: Always consider margin of error and validate the estimates with ground truth data when possible.
diff --git a/docs/en/guides/steps-of-a-cv-project.md b/docs/en/guides/steps-of-a-cv-project.md
index ec9b84ff..7f50440a 100644
--- a/docs/en/guides/steps-of-a-cv-project.md
+++ b/docs/en/guides/steps-of-a-cv-project.md
@@ -8,7 +8,7 @@ keywords: Computer Vision, AI, Object Detection, Image Classification, Instance
## Introduction
-Computer vision is a subfield of artificial intelligence (AI) that helps computers see and understand the world like humans do. It processes and analyzes images or videos to extract information, recognize patterns, and make decisions based on that data.
+Computer vision is a subfield of [artificial intelligence](https://www.ultralytics.com/glossary/artificial-intelligence-ai) (AI) that helps computers see and understand the world like humans do. It processes and analyzes images or videos to extract information, recognize patterns, and make decisions based on that data.
@@ -18,7 +18,7 @@ Computer vision is a subfield of artificial intelligence (AI) that helps compute
allowfullscreen>
- Watch: How to Do Computer Vision Projects | A Step-by-Step Guide
+ Watch: How to Do [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) Projects | A Step-by-Step Guide
@@ -18,7 +18,7 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb
allowfullscreen>
- Watch: How to Use Streamlit with Ultralytics for Real-Time Computer Vision in Your Browser
+ Watch: How to Use Streamlit with Ultralytics for Real-Time [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) in Your Browser
@@ -21,7 +21,7 @@ The [Triton Inference Server](https://developer.nvidia.com/triton-inference-serv
## What is Triton Inference Server?
-Triton Inference Server is designed to deploy a variety of AI models in production. It supports a wide range of deep learning and machine learning frameworks, including TensorFlow, PyTorch, ONNX Runtime, and many others. Its primary use cases are:
+Triton Inference Server is designed to deploy a variety of AI models in production. It supports a wide range of deep learning and [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) frameworks, including TensorFlow, [PyTorch](https://www.ultralytics.com/glossary/pytorch), ONNX Runtime, and many others. Its primary use cases are:
- Serving multiple models from a single server instance.
- Dynamic model loading and unloading without server restart.
@@ -147,7 +147,7 @@ By following the above steps, you can deploy and run Ultralytics YOLOv8 models e
### How do I set up Ultralytics YOLOv8 with NVIDIA Triton Inference Server?
-Setting up [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) involves a few key steps:
+Setting up [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) involves a few key steps:
1. **Export YOLOv8 to ONNX format**:
@@ -216,7 +216,7 @@ This setup can help you efficiently deploy YOLOv8 models at scale on Triton Infe
Integrating [Ultralytics YOLOv8](../models/yolov8.md) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) provides several advantages:
- **Scalable AI Inference**: Triton allows serving multiple models from a single server instance, supporting dynamic model loading and unloading, making it highly scalable for diverse AI workloads.
-- **High Performance**: Optimized for NVIDIA GPUs, Triton Inference Server ensures high-speed inference operations, perfect for real-time applications such as object detection.
+- **High Performance**: Optimized for NVIDIA GPUs, Triton Inference Server ensures high-speed inference operations, perfect for real-time applications such as [object detection](https://www.ultralytics.com/glossary/object-detection).
- **Ensemble and Model Versioning**: Triton's ensemble mode enables combining multiple models to improve results, and its model versioning supports A/B testing and rolling updates.
For detailed instructions on setting up and running YOLOv8 with Triton, you can refer to the [setup guide](#setting-up-triton-model-repository).
@@ -256,11 +256,11 @@ results = model("path/to/image.jpg")
For an in-depth guide on setting up and running Triton Server with YOLOv8, refer to the [running triton inference server](#running-triton-inference-server) section.
-### How does Ultralytics YOLOv8 compare to TensorFlow and PyTorch models for deployment?
+### How does Ultralytics YOLOv8 compare to [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and PyTorch models for deployment?
-[Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8) offers several unique advantages compared to TensorFlow and PyTorch models for deployment:
+[Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) offers several unique advantages compared to TensorFlow and PyTorch models for deployment:
-- **Real-time Performance**: Optimized for real-time object detection tasks, YOLOv8 provides state-of-the-art accuracy and speed, making it ideal for applications requiring live video analytics.
+- **Real-time Performance**: Optimized for real-time object detection tasks, YOLOv8 provides state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed, making it ideal for applications requiring live video analytics.
- **Ease of Use**: YOLOv8 integrates seamlessly with Triton Inference Server and supports diverse export formats (ONNX, TensorRT, CoreML), making it flexible for various deployment scenarios.
- **Advanced Features**: YOLOv8 includes features like dynamic model loading, model versioning, and ensemble inference, which are crucial for scalable and reliable AI deployments.
diff --git a/docs/en/guides/view-results-in-terminal.md b/docs/en/guides/view-results-in-terminal.md
index c599f664..cb6ce1c0 100644
--- a/docs/en/guides/view-results-in-terminal.md
+++ b/docs/en/guides/view-results-in-terminal.md
@@ -58,7 +58,7 @@ The VSCode compatible protocols for viewing images using the integrated terminal
1. See [plot method parameters](../modes/predict.md#plot-method-parameters) to see possible arguments to use.
-4. Now, use OpenCV to convert the `numpy.ndarray` to `bytes` data. Then use `io.BytesIO` to make a "file-like" object.
+4. Now, use [OpenCV](https://www.ultralytics.com/glossary/opencv) to convert the `numpy.ndarray` to `bytes` data. Then use `io.BytesIO` to make a "file-like" object.
```{ .py .annotate }
import io
diff --git a/docs/en/guides/vision-eye.md b/docs/en/guides/vision-eye.md
index 891a589b..48cff274 100644
--- a/docs/en/guides/vision-eye.md
+++ b/docs/en/guides/vision-eye.md
@@ -8,7 +8,7 @@ keywords: VisionEye, YOLOv8, Ultralytics, object mapping, object tracking, dista
## What is VisionEye Object Mapping?
-[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) VisionEye offers the capability for computers to identify and pinpoint objects, simulating the observational precision of the human eye. This functionality enables computers to discern and focus on specific objects, much like the way the human eye observes details from a particular viewpoint.
+[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) VisionEye offers the capability for computers to identify and pinpoint objects, simulating the observational [precision](https://www.ultralytics.com/glossary/precision) of the human eye. This functionality enables computers to discern and focus on specific objects, much like the way the human eye observes details from a particular viewpoint.
## Samples
@@ -182,7 +182,7 @@ For any inquiries, feel free to post your questions in the [Ultralytics Issue Se
### How do I start using VisionEye Object Mapping with Ultralytics YOLOv8?
-To start using VisionEye Object Mapping with Ultralytics YOLOv8, first, you'll need to install the Ultralytics YOLO package via pip. Then, you can use the sample code provided in the documentation to set up object detection with VisionEye. Here's a simple example to get you started:
+To start using VisionEye Object Mapping with Ultralytics YOLOv8, first, you'll need to install the Ultralytics YOLO package via pip. Then, you can use the sample code provided in the documentation to set up [object detection](https://www.ultralytics.com/glossary/object-detection) with VisionEye. Here's a simple example to get you started:
```python
import cv2
@@ -292,7 +292,7 @@ For detailed instructions, refer to the [VisionEye with Distance Calculation](#s
### Why should I use Ultralytics YOLOv8 for object mapping and tracking?
-Ultralytics YOLOv8 is renowned for its speed, accuracy, and ease of integration, making it a top choice for object mapping and tracking. Key advantages include:
+Ultralytics YOLOv8 is renowned for its speed, [accuracy](https://www.ultralytics.com/glossary/accuracy), and ease of integration, making it a top choice for object mapping and tracking. Key advantages include:
1. **State-of-the-art Performance**: Delivers high accuracy in real-time object detection.
2. **Flexibility**: Supports various tasks such as detection, tracking, and distance calculation.
@@ -301,7 +301,7 @@ Ultralytics YOLOv8 is renowned for its speed, accuracy, and ease of integration,
For more information on applications and benefits, check out the [Ultralytics YOLOv8 documentation](https://docs.ultralytics.com/models/yolov8/).
-### How can I integrate VisionEye with other machine learning tools like Comet or ClearML?
+### How can I integrate VisionEye with other [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) tools like Comet or ClearML?
Ultralytics YOLOv8 can integrate seamlessly with various machine learning tools like Comet and ClearML, enhancing experiment tracking, collaboration, and reproducibility. Follow the detailed guides on [how to use YOLOv5 with Comet](https://www.ultralytics.com/blog/how-to-use-yolov5-with-comet) and [integrate YOLOv8 with ClearML](https://docs.ultralytics.com/integrations/clearml/) to get started.
diff --git a/docs/en/guides/workouts-monitoring.md b/docs/en/guides/workouts-monitoring.md
index 7134a566..45856316 100644
--- a/docs/en/guides/workouts-monitoring.md
+++ b/docs/en/guides/workouts-monitoring.md
@@ -123,35 +123,11 @@ Monitoring workouts through pose estimation with [Ultralytics YOLOv8](https://gi
### Arguments `model.predict`
-| Name | Type | Default | Description |
-| --------------- | -------------- | ---------------------- | -------------------------------------------------------------------------- |
-| `source` | `str` | `'ultralytics/assets'` | source directory for images or videos |
-| `conf` | `float` | `0.25` | object confidence threshold for detection |
-| `iou` | `float` | `0.7` | intersection over union (IoU) threshold for NMS |
-| `imgsz` | `int or tuple` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) |
-| `half` | `bool` | `False` | use half precision (FP16) |
-| `device` | `None or str` | `None` | device to run on, i.e. cuda device=0/1/2/3 or device=cpu |
-| `max_det` | `int` | `300` | maximum number of detections per image |
-| `vid_stride` | `bool` | `False` | video frame-rate stride |
-| `stream_buffer` | `bool` | `False` | buffer all streaming frames (True) or return the most recent frame (False) |
-| `visualize` | `bool` | `False` | visualize model features |
-| `augment` | `bool` | `False` | apply image augmentation to prediction sources |
-| `agnostic_nms` | `bool` | `False` | class-agnostic NMS |
-| `classes` | `list[int]` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] |
-| `retina_masks` | `bool` | `False` | use high-resolution segmentation masks |
-| `embed` | `list[int]` | `None` | return feature vectors/embeddings from given layers |
+{% include "macros/predict-args.md" %}
### Arguments `model.track`
-| Name | Type | Default | Description |
-| --------- | ------- | -------------- | ----------------------------------------------------------- |
-| `source` | `im0` | `None` | source directory for images or videos |
-| `persist` | `bool` | `False` | persisting tracks between frames |
-| `tracker` | `str` | `botsort.yaml` | Tracking method 'bytetrack' or 'botsort' |
-| `conf` | `float` | `0.3` | Confidence Threshold |
-| `iou` | `float` | `0.5` | IOU Threshold |
-| `classes` | `list` | `None` | filter results by class, i.e. classes=0, or classes=[0,2,3] |
-| `verbose` | `bool` | `True` | Display the object tracking results |
+{% include "macros/track-args.md" %}
## FAQ
@@ -203,7 +179,7 @@ You can watch a [YouTube video demonstration](https://www.youtube.com/watch?v=LG
### How accurate is Ultralytics YOLOv8 in detecting and tracking exercises?
-Ultralytics YOLOv8 is highly accurate in detecting and tracking exercises due to its state-of-the-art pose estimation capabilities. It can accurately track key body landmarks and joints, providing real-time feedback on exercise form and performance metrics. The model's pretrained weights and robust architecture ensure high precision and reliability. For real-world examples, check out the [real-world applications](#real-world-applications) section in the documentation, which showcases pushups and pullups counting.
+Ultralytics YOLOv8 is highly accurate in detecting and tracking exercises due to its state-of-the-art pose estimation capabilities. It can accurately track key body landmarks and joints, providing real-time feedback on exercise form and performance metrics. The model's pretrained weights and robust architecture ensure high [precision](https://www.ultralytics.com/glossary/precision) and reliability. For real-world examples, check out the [real-world applications](#real-world-applications) section in the documentation, which showcases pushups and pullups counting.
### Can I use Ultralytics YOLOv8 for custom workout routines?
diff --git a/docs/en/guides/yolo-common-issues.md b/docs/en/guides/yolo-common-issues.md
index 77351eaa..6da5d164 100644
--- a/docs/en/guides/yolo-common-issues.md
+++ b/docs/en/guides/yolo-common-issues.md
@@ -33,7 +33,7 @@ Installation errors can arise due to various reasons, such as incompatible versi
- You're using Python 3.8 or later as recommended.
-- Ensure that you have the correct version of PyTorch (1.8 or later) installed.
+- Ensure that you have the correct version of [PyTorch](https://www.ultralytics.com/glossary/pytorch) (1.8 or later) installed.
- Consider using virtual environments to avoid conflicts.
@@ -86,7 +86,7 @@ model.train(data="/path/to/your/data.yaml", batch=4)
**Issue**: Training is slow on a single GPU, and you want to speed up the process using multiple GPUs.
-**Solution**: Increasing the batch size can accelerate training, but it's essential to consider GPU memory capacity. To speed up training with multiple GPUs, follow these steps:
+**Solution**: Increasing the [batch size](https://www.ultralytics.com/glossary/batch-size) can accelerate training, but it's essential to consider GPU memory capacity. To speed up training with multiple GPUs, follow these steps:
- Ensure that you have multiple GPUs available.
@@ -109,7 +109,7 @@ model.train(data="/path/to/your/data.yaml", batch=32, multi_scale=True)
- Precision
- Recall
-- Mean Average Precision (mAP)
+- [Mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP)
You can access these metrics from the training logs or by using tools like TensorBoard or wandb for visualization. Implementing early stopping based on these metrics can help you achieve better results.
@@ -119,7 +119,7 @@ You can access these metrics from the training logs or by using tools like Tenso
**Solution**: To track and visualize training progress, you can consider using the following tools:
-- [TensorBoard](https://www.tensorflow.org/tensorboard): TensorBoard is a popular choice for visualizing training metrics, including loss, accuracy, and more. You can integrate it with your YOLOv8 training process.
+- [TensorBoard](https://www.tensorflow.org/tensorboard): TensorBoard is a popular choice for visualizing training metrics, including loss, [accuracy](https://www.ultralytics.com/glossary/accuracy), and more. You can integrate it with your YOLOv8 training process.
- [Comet](https://bit.ly/yolov8-readme-comet): Comet provides an extensive toolkit for experiment tracking and comparison. It allows you to track metrics, hyperparameters, and even model weights. Integration with YOLO models is also straightforward, providing you with a complete overview of your experiment cycle.
- [Ultralytics HUB](https://hub.ultralytics.com/): Ultralytics HUB offers a specialized environment for tracking YOLO models, giving you a one-stop platform to manage metrics, datasets, and even collaborate with your team. Given its tailored focus on YOLO, it offers more customized tracking options.
@@ -145,17 +145,17 @@ Here are some things to keep in mind, if you are facing issues related to model
**Dataset Format and Labels**
-- Importance: The foundation of any machine learning model lies in the quality and format of the data it is trained on.
+- Importance: The foundation of any [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) model lies in the quality and format of the data it is trained on.
- Recommendation: Ensure that your custom dataset and its associated labels adhere to the expected format. It's crucial to verify that annotations are accurate and of high quality. Incorrect or subpar annotations can derail the model's learning process, leading to unpredictable outcomes.
**Model Convergence**
-- Importance: Achieving model convergence ensures that the model has sufficiently learned from the training data.
+- Importance: Achieving model convergence ensures that the model has sufficiently learned from the [training data](https://www.ultralytics.com/glossary/training-data).
-- Recommendation: When training a model 'from scratch', it's vital to ensure that the model reaches a satisfactory level of convergence. This might necessitate a longer training duration, with more epochs, compared to when you're fine-tuning an existing model.
+- Recommendation: When training a model 'from scratch', it's vital to ensure that the model reaches a satisfactory level of convergence. This might necessitate a longer training duration, with more [epochs](https://www.ultralytics.com/glossary/epoch), compared to when you're fine-tuning an existing model.
-**Learning Rate and Batch Size**
+**[Learning Rate](https://www.ultralytics.com/glossary/learning-rate) and Batch Size**
- Importance: These hyperparameters play a pivotal role in determining how the model updates its weights during training.
@@ -207,9 +207,9 @@ yolo task=detect mode=segment model=yolov8n-seg.pt source='path/to/car.mp4' show
#### Understanding Precision Metrics in YOLOv8
-**Issue**: Confusion regarding the difference between box precision, mask precision, and confusion matrix precision in YOLOv8.
+**Issue**: Confusion regarding the difference between box precision, mask precision, and [confusion matrix](https://www.ultralytics.com/glossary/confusion-matrix) precision in YOLOv8.
-**Solution**: Box precision measures the accuracy of predicted bounding boxes compared to the actual ground truth boxes using IoU (Intersection over Union) as the metric. Mask precision assesses the agreement between predicted segmentation masks and ground truth masks in pixel-wise object classification. Confusion matrix precision, on the other hand, focuses on overall classification accuracy across all classes and does not consider the geometric accuracy of predictions. It's important to note that a bounding box can be geometrically accurate (true positive) even if the class prediction is wrong, leading to differences between box precision and confusion matrix precision. These metrics evaluate distinct aspects of a model's performance, reflecting the need for different evaluation metrics in various tasks.
+**Solution**: Box precision measures the accuracy of predicted bounding boxes compared to the actual ground truth boxes using IoU (Intersection over Union) as the metric. Mask precision assesses the agreement between predicted segmentation masks and ground truth masks in pixel-wise object classification. Confusion matrix precision, on the other hand, focuses on overall classification accuracy across all classes and does not consider the geometric accuracy of predictions. It's important to note that a [bounding box](https://www.ultralytics.com/glossary/bounding-box) can be geometrically accurate (true positive) even if the class prediction is wrong, leading to differences between box precision and confusion matrix precision. These metrics evaluate distinct aspects of a model's performance, reflecting the need for different evaluation metrics in various tasks.
#### Extracting Object Dimensions in YOLOv8
@@ -280,7 +280,7 @@ These resources should provide a solid foundation for troubleshooting and improv
## Conclusion
-Troubleshooting is an integral part of any development process, and being equipped with the right knowledge can significantly reduce the time and effort spent in resolving issues. This guide aimed to address the most common challenges faced by users of the YOLOv8 model within the Ultralytics ecosystem. By understanding and addressing these common issues, you can ensure smoother project progress and achieve better results with your computer vision tasks.
+Troubleshooting is an integral part of any development process, and being equipped with the right knowledge can significantly reduce the time and effort spent in resolving issues. This guide aimed to address the most common challenges faced by users of the YOLOv8 model within the Ultralytics ecosystem. By understanding and addressing these common issues, you can ensure smoother project progress and achieve better results with your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks.
Remember, the Ultralytics community is a valuable resource. Engaging with fellow developers and experts can provide additional insights and solutions that might not be covered in standard documentation. Always keep learning, experimenting, and sharing your experiences to contribute to the collective knowledge of the community.
@@ -312,7 +312,7 @@ This sets the training process to the first GPU. Consult the `nvidia-smi` comman
### How can I monitor and track my YOLOv8 model training progress?
-Tracking and visualizing training progress can be efficiently managed through tools like [TensorBoard](https://www.tensorflow.org/tensorboard), [Comet](https://bit.ly/yolov8-readme-comet), and [Ultralytics HUB](https://hub.ultralytics.com/). These tools allow you to log and visualize metrics such as loss, precision, recall, and mAP. Implementing [early stopping](#continuous-monitoring-parameters) based on these metrics can also help achieve better training outcomes.
+Tracking and visualizing training progress can be efficiently managed through tools like [TensorBoard](https://www.tensorflow.org/tensorboard), [Comet](https://bit.ly/yolov8-readme-comet), and [Ultralytics HUB](https://hub.ultralytics.com/). These tools allow you to log and visualize metrics such as loss, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP. Implementing [early stopping](#continuous-monitoring-parameters) based on these metrics can also help achieve better training outcomes.
### What should I do if YOLOv8 is not recognizing my dataset format?
diff --git a/docs/en/guides/yolo-performance-metrics.md b/docs/en/guides/yolo-performance-metrics.md
index d885b9ea..aeed8235 100644
--- a/docs/en/guides/yolo-performance-metrics.md
+++ b/docs/en/guides/yolo-performance-metrics.md
@@ -8,7 +8,7 @@ keywords: YOLOv8 performance metrics, mAP, IoU, F1 Score, Precision, Recall, obj
## Introduction
-Performance metrics are key tools to evaluate the accuracy and efficiency of object detection models. They shed light on how effectively a model can identify and localize objects within images. Additionally, they help in understanding the model's handling of false positives and false negatives. These insights are crucial for evaluating and enhancing the model's performance. In this guide, we will explore various performance metrics associated with YOLOv8, their significance, and how to interpret them.
+Performance metrics are key tools to evaluate the [accuracy](https://www.ultralytics.com/glossary/accuracy) and efficiency of [object detection](https://www.ultralytics.com/glossary/object-detection) models. They shed light on how effectively a model can identify and localize objects within images. Additionally, they help in understanding the model's handling of false positives and false negatives. These insights are crucial for evaluating and enhancing the model's performance. In this guide, we will explore various performance metrics associated with YOLOv8, their significance, and how to interpret them.
@@ -18,14 +18,14 @@ Performance metrics are key tools to evaluate the accuracy and efficiency of obj
allowfullscreen>
- Watch: Ultralytics YOLOv8 Performance Metrics | MAP, F1 Score, Precision, IoU & Accuracy
+ Watch: Ultralytics YOLOv8 Performance Metrics | MAP, F1 Score, [Precision](https://www.ultralytics.com/glossary/precision), IoU & Accuracy
@@ -44,7 +44,7 @@ The Ultralytics Android App is a powerful tool that allows you to run YOLO model
## Quantization and Acceleration
-To achieve real-time performance on your Android device, YOLO models are quantized to either FP16 or INT8 precision. Quantization is a process that reduces the numerical precision of the model's weights and biases, thus reducing the model's size and the amount of computation required. This results in faster inference times without significantly affecting the model's accuracy.
+To achieve real-time performance on your Android device, YOLO models are quantized to either FP16 or INT8 [precision](https://www.ultralytics.com/glossary/precision). Quantization is a process that reduces the numerical precision of the model's weights and biases, thus reducing the model's size and the amount of computation required. This results in faster inference times without significantly affecting the model's [accuracy](https://www.ultralytics.com/glossary/accuracy).
### FP16 Quantization
@@ -52,7 +52,7 @@ FP16 (or half-precision) quantization converts the model's 32-bit floating-point
### INT8 Quantization
-INT8 (or 8-bit integer) quantization further reduces the model's size and computation requirements by converting its 32-bit floating-point numbers to 8-bit integers. This quantization method can result in a significant speedup, but it may lead to a slight reduction in mean average precision (mAP) due to the lower numerical precision.
+INT8 (or 8-bit integer) quantization further reduces the model's size and computation requirements by converting its 32-bit floating-point numbers to 8-bit integers. This quantization method can result in a significant speedup, but it may lead to a slight reduction in [mean average precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP) due to the lower numerical precision.
!!! tip "mAP Reduction in INT8 Models"
@@ -65,7 +65,7 @@ Different delegates are available on Android devices to accelerate model inferen
1. **CPU**: The default option, with reasonable performance on most devices.
2. **GPU**: Utilizes the device's GPU for faster inference. It can provide a significant performance boost on devices with powerful GPUs.
3. **Hexagon**: Leverages Qualcomm's Hexagon DSP for faster and more efficient processing. This option is available on devices with Qualcomm Snapdragon processors.
-4. **NNAPI**: The Android Neural Networks API (NNAPI) serves as an abstraction layer for running ML models on Android devices. NNAPI can utilize various hardware accelerators, such as CPU, GPU, and dedicated AI chips (e.g., Google's Edge TPU, or the Pixel Neural Core).
+4. **NNAPI**: The Android [Neural Networks](https://www.ultralytics.com/glossary/neural-network-nn) API (NNAPI) serves as an abstraction layer for running ML models on Android devices. NNAPI can utilize various hardware accelerators, such as CPU, GPU, and dedicated AI chips (e.g., Google's Edge TPU, or the Pixel Neural Core).
Here's a table showing the primary vendors, their product lines, popular devices, and supported delegates:
diff --git a/docs/en/hub/app/index.md b/docs/en/hub/app/index.md
index 9266a0bd..e812d686 100644
--- a/docs/en/hub/app/index.md
+++ b/docs/en/hub/app/index.md
@@ -31,11 +31,11 @@ keywords: Ultralytics HUB, YOLO models, mobile app, iOS, Android, hardware accel
-Welcome to the Ultralytics HUB App! We are excited to introduce this powerful mobile app that allows you to run YOLOv5 and YOLOv8 models directly on your [iOS](https://apps.apple.com/xk/app/ultralytics/id1583935240) and [Android](https://play.google.com/store/apps/details?id=com.ultralytics.ultralytics_app) devices. With the HUB App, you can utilize hardware acceleration features like Apple's Neural Engine (ANE) or Android GPU and Neural Network API (NNAPI) delegates to achieve impressive performance on your mobile device.
+Welcome to the Ultralytics HUB App! We are excited to introduce this powerful mobile app that allows you to run YOLOv5 and YOLOv8 models directly on your [iOS](https://apps.apple.com/xk/app/ultralytics/id1583935240) and [Android](https://play.google.com/store/apps/details?id=com.ultralytics.ultralytics_app) devices. With the HUB App, you can utilize hardware acceleration features like Apple's Neural Engine (ANE) or Android GPU and [Neural Network](https://www.ultralytics.com/glossary/neural-network-nn) API (NNAPI) delegates to achieve impressive performance on your mobile device.
## Features
-- **Run YOLOv5 and YOLOv8 models**: Experience the power of YOLO models on your mobile device for real-time object detection and image recognition tasks.
+- **Run YOLOv5 and YOLOv8 models**: Experience the power of YOLO models on your mobile device for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) and [image recognition](https://www.ultralytics.com/glossary/image-recognition) tasks.
- **Hardware Acceleration**: Benefit from Apple ANE on iOS devices or Android GPU and NNAPI delegates for optimized performance.
- **Custom Model Training**: Train custom models with the Ultralytics HUB platform and preview them live using the HUB App.
- **Mobile Compatibility**: The HUB App supports both iOS and Android devices, bringing the power of YOLO models to a wide range of users.
diff --git a/docs/en/hub/app/ios.md b/docs/en/hub/app/ios.md
index 5468633f..be896fe8 100644
--- a/docs/en/hub/app/ios.md
+++ b/docs/en/hub/app/ios.md
@@ -4,7 +4,7 @@ description: Discover the Ultralytics iOS App for running YOLO models on your iP
keywords: Ultralytics, iOS App, YOLO models, real-time object detection, Apple Neural Engine, Core ML, FP16 quantization, INT8 quantization, machine learning
---
-# Ultralytics iOS App: Real-time Object Detection with YOLO Models
+# Ultralytics iOS App: Real-time [Object Detection](https://www.ultralytics.com/glossary/object-detection) with YOLO Models
@@ -44,7 +44,7 @@ The Ultralytics iOS App is a powerful tool that allows you to run YOLO models di
## Quantization and Acceleration
-To achieve real-time performance on your iOS device, YOLO models are quantized to either FP16 or INT8 precision. Quantization is a process that reduces the numerical precision of the model's weights and biases, thus reducing the model's size and the amount of computation required. This results in faster inference times without significantly affecting the model's accuracy.
+To achieve real-time performance on your iOS device, YOLO models are quantized to either FP16 or INT8 [precision](https://www.ultralytics.com/glossary/precision). Quantization is a process that reduces the numerical precision of the model's weights and biases, thus reducing the model's size and the amount of computation required. This results in faster inference times without significantly affecting the model's [accuracy](https://www.ultralytics.com/glossary/accuracy).
### FP16 Quantization
@@ -56,7 +56,7 @@ INT8 (or 8-bit integer) quantization further reduces the model's size and comput
## Apple Neural Engine
-The Apple Neural Engine (ANE) is a dedicated hardware component integrated into Apple's A-series and M-series chips. It's designed to accelerate machine learning tasks, particularly for neural networks, allowing for faster and more efficient execution of your YOLO models.
+The Apple Neural Engine (ANE) is a dedicated hardware component integrated into Apple's A-series and M-series chips. It's designed to accelerate [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) tasks, particularly for [neural networks](https://www.ultralytics.com/glossary/neural-network-nn), allowing for faster and more efficient execution of your YOLO models.
By combining quantized YOLO models with the Apple Neural Engine, the Ultralytics iOS App achieves real-time object detection on your iOS device without compromising on accuracy or performance.
diff --git a/docs/en/hub/cloud-training.md b/docs/en/hub/cloud-training.md
index 42dd6810..f0303673 100644
--- a/docs/en/hub/cloud-training.md
+++ b/docs/en/hub/cloud-training.md
@@ -28,7 +28,7 @@ Follow the [Train Model](./models.md#train-model) instructions from the [Models]
![Ultralytics HUB screenshot of the Train Model dialog with arrows pointing to the Cloud Training options and the Start Training button](https://github.com/ultralytics/docs/releases/download/0/ultralytics-hub-train-model-dialog.avif)
-??? note "Note"
+??? note
When you are on this step, you have the option to close the **Train Model** dialog and start training your model from the Model page later.
@@ -36,15 +36,15 @@ Follow the [Train Model](./models.md#train-model) instructions from the [Models]
Most of the times, you will use the Epochs training. The number of epochs can be adjusted on this step (if the training didn't start yet) and represents the number of times your dataset needs to go through the cycle of train, label, and test. The exact pricing based on the number of epochs is hard to determine, reason why we only allow the [Account Balance](./pro.md#account-balance) payment method.
-!!! note "Note"
+!!! note
When using the Epochs training, your [account balance](./pro.md#account-balance) needs to be at least US$5.00 to start training. In case you have a low balance, you can top-up directly from this step.
![Ultralytics HUB screenshot of the Train Model dialog with an arrow pointing to the Top-Up button](https://github.com/ultralytics/docs/releases/download/0/ultralytics-hub-train-model-dialog-top-up.avif)
-!!! note "Note"
+!!! note
- When using the Epochs training, the [account balance](./pro.md#account-balance) is deducted after every epoch.
+ When using the Epochs training, the [account balance](./pro.md#account-balance) is deducted after every [epoch](https://www.ultralytics.com/glossary/epoch).
Also, after every epoch, we check if you have enough [account balance](./pro.md#account-balance) for the next epoch. In case you don't have enough [account balance](./pro.md#account-balance) for the next epoch, we will stop the training session, allowing you to resume training your model from the last checkpoint saved.
@@ -64,7 +64,7 @@ Before the training session starts, the initialization process spins up a dedica
![Ultralytics HUB screenshot of the Model page during the initialization process](https://github.com/ultralytics/docs/releases/download/0/model-page-initialization-process.avif)
-!!! note "Note"
+!!! note
The account balance is not deducted during the initialization process (before the training session starts).
@@ -74,7 +74,7 @@ If needed, you can stop the training by clicking on the **Stop Training** button
![Ultralytics HUB screenshot of the Model page of a model that is currently training with an arrow pointing to the Stop Training button](https://github.com/ultralytics/docs/releases/download/0/model-page-training-stop-button.avif)
-!!! note "Note"
+!!! note
You can resume training your model from the last checkpoint saved.
@@ -90,7 +90,7 @@ If needed, you can stop the training by clicking on the **Stop Training** button
Watch: Pause and Resume Model Training Using Ultralytics HUB
@@ -27,7 +27,7 @@ This guide showcases Ultralytics YOLOv8 integration with Weights & Biases' for e
@@ -43,13 +44,13 @@ Here are some of the key models supported:
This example provides simple YOLO training and inference examples. For full documentation on these and other [modes](../modes/index.md) see the [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md) docs pages.
-Note the below example is for YOLOv8 [Detect](../tasks/detect.md) models for object detection. For additional supported tasks see the [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) and [Pose](../tasks/pose.md) docs.
+Note the below example is for YOLOv8 [Detect](../tasks/detect.md) models for [object detection](https://www.ultralytics.com/glossary/object-detection). For additional supported tasks see the [Segment](../tasks/segment.md), [Classify](../tasks/classify.md) and [Pose](../tasks/pose.md) docs.
!!! example
=== "Python"
- PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()`, `SAM()`, `NAS()` and `RTDETR()` classes to create a model instance in Python:
+ [PyTorch](https://www.ultralytics.com/glossary/pytorch) pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()`, `SAM()`, `NAS()` and `RTDETR()` classes to create a model instance in Python:
```python
from ultralytics import YOLO
@@ -101,7 +102,7 @@ For detailed steps, consult our [Contributing Guide](../help/contributing.md).
### What are the key advantages of using Ultralytics YOLOv8 for object detection?
-Ultralytics YOLOv8 offers enhanced capabilities such as real-time object detection, instance segmentation, pose estimation, and classification. Its optimized architecture ensures high-speed performance without sacrificing accuracy, making it ideal for a variety of applications. YOLOv8 also includes built-in compatibility with popular datasets and models, as detailed on the [YOLOv8 documentation page](../models/yolov8.md).
+Ultralytics YOLOv8 offers enhanced capabilities such as real-time object detection, instance segmentation, pose estimation, and classification. Its optimized architecture ensures high-speed performance without sacrificing [accuracy](https://www.ultralytics.com/glossary/accuracy), making it ideal for a variety of applications. YOLOv8 also includes built-in compatibility with popular datasets and models, as detailed on the [YOLOv8 documentation page](../models/yolov8.md).
### How can I train a YOLOv8 model on custom data?
@@ -133,7 +134,7 @@ For more detailed instructions, visit the [Train](../modes/train.md) documentati
Ultralytics supports a comprehensive range of YOLO (You Only Look Once) versions from YOLOv3 to YOLOv10, along with models like NAS, SAM, and RT-DETR. Each version is optimized for various tasks such as detection, segmentation, and classification. For detailed information on each model, refer to the [Models Supported by Ultralytics](../models/index.md) documentation.
-### Why should I use Ultralytics HUB for machine learning projects?
+### Why should I use Ultralytics HUB for [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) projects?
Ultralytics HUB provides a no-code, end-to-end platform for training, deploying, and managing YOLO models. It simplifies complex workflows, enabling users to focus on model performance and application. The HUB also offers cloud training capabilities, comprehensive dataset management, and user-friendly interfaces. Learn more about it on the [Ultralytics HUB](../hub/index.md) documentation page.
diff --git a/docs/en/models/mobile-sam.md b/docs/en/models/mobile-sam.md
index ed3ab1c9..26c92f68 100644
--- a/docs/en/models/mobile-sam.md
+++ b/docs/en/models/mobile-sam.md
@@ -122,7 +122,7 @@ If you find MobileSAM useful in your research or development work, please consid
### What is MobileSAM and how does it differ from the original SAM model?
-MobileSAM is a lightweight, fast image segmentation model designed for mobile applications. It retains the same pipeline as the original SAM but replaces the heavyweight ViT-H encoder (632M parameters) with a smaller Tiny-ViT encoder (5M parameters). This change results in MobileSAM being approximately 5 times smaller and 7 times faster than the original SAM. For instance, MobileSAM operates at about 12ms per image, compared to the original SAM's 456ms. You can learn more about the MobileSAM implementation in various projects [here](https://github.com/ChaoningZhang/MobileSAM).
+MobileSAM is a lightweight, fast [image segmentation](https://www.ultralytics.com/glossary/image-segmentation) model designed for mobile applications. It retains the same pipeline as the original SAM but replaces the heavyweight ViT-H encoder (632M parameters) with a smaller Tiny-ViT encoder (5M parameters). This change results in MobileSAM being approximately 5 times smaller and 7 times faster than the original SAM. For instance, MobileSAM operates at about 12ms per image, compared to the original SAM's 456ms. You can learn more about the MobileSAM implementation in various projects [here](https://github.com/ChaoningZhang/MobileSAM).
### How can I test MobileSAM using Ultralytics?
@@ -152,7 +152,7 @@ MobileSAM was trained on a single GPU with a 100k dataset, which is 1% of the or
MobileSAM is designed for fast and efficient image segmentation in mobile environments. Primary use cases include:
-- **Real-time object detection and segmentation** for mobile applications.
+- **Real-time [object detection](https://www.ultralytics.com/glossary/object-detection) and segmentation** for mobile applications.
- **Low-latency image processing** in devices with limited computational resources.
- **Integration in AI-driven mobile apps** for tasks such as augmented reality (AR) and real-time analytics.
diff --git a/docs/en/models/rtdetr.md b/docs/en/models/rtdetr.md
index 46269e44..47710e03 100644
--- a/docs/en/models/rtdetr.md
+++ b/docs/en/models/rtdetr.md
@@ -4,11 +4,11 @@ description: Explore Baidu's RT-DETR, a Vision Transformer-based real-time objec
keywords: RT-DETR, Baidu, Vision Transformer, real-time object detection, PaddlePaddle, Ultralytics, pre-trained models, AI, machine learning, computer vision
---
-# Baidu's RT-DETR: A Vision Transformer-Based Real-Time Object Detector
+# Baidu's RT-DETR: A Vision [Transformer](https://www.ultralytics.com/glossary/transformer)-Based Real-Time Object Detector
## Overview
-Real-Time Detection Transformer (RT-DETR), developed by Baidu, is a cutting-edge end-to-end object detector that provides real-time performance while maintaining high accuracy. It is based on the idea of DETR (the NMS-free framework), meanwhile introducing conv-based backbone and an efficient hybrid encoder to gain real-time speed. RT-DETR efficiently processes multiscale features by decoupling intra-scale interaction and cross-scale fusion. The model is highly adaptable, supporting flexible adjustment of inference speed using different decoder layers without retraining. RT-DETR excels on accelerated backends like CUDA with TensorRT, outperforming many other real-time object detectors.
+Real-Time Detection Transformer (RT-DETR), developed by Baidu, is a cutting-edge end-to-end object detector that provides real-time performance while maintaining high [accuracy](https://www.ultralytics.com/glossary/accuracy). It is based on the idea of DETR (the NMS-free framework), meanwhile introducing conv-based backbone and an efficient hybrid encoder to gain real-time speed. RT-DETR efficiently processes multiscale features by decoupling intra-scale interaction and cross-scale fusion. The model is highly adaptable, supporting flexible adjustment of inference speed using different decoder layers without retraining. RT-DETR excels on accelerated backends like CUDA with TensorRT, outperforming many other real-time object detectors.
@@ -25,7 +25,7 @@ Real-Time Detection Transformer (RT-DETR), developed by Baidu, is a cutting-edge
### Key Features
-- **Efficient Hybrid Encoder:** Baidu's RT-DETR uses an efficient hybrid encoder that processes multiscale features by decoupling intra-scale interaction and cross-scale fusion. This unique Vision Transformers-based design reduces computational costs and allows for real-time object detection.
+- **Efficient Hybrid Encoder:** Baidu's RT-DETR uses an efficient hybrid encoder that processes multiscale features by decoupling intra-scale interaction and cross-scale fusion. This unique Vision Transformers-based design reduces computational costs and allows for real-time [object detection](https://www.ultralytics.com/glossary/object-detection).
- **IoU-aware Query Selection:** Baidu's RT-DETR improves object query initialization by utilizing IoU-aware query selection. This allows the model to focus on the most relevant objects in the scene, enhancing the detection accuracy.
- **Adaptable Inference Speed:** Baidu's RT-DETR supports flexible adjustments of inference speed by using different decoder layers without the need for retraining. This adaptability facilitates practical application in various real-time object detection scenarios.
@@ -98,7 +98,7 @@ If you use Baidu's RT-DETR in your research or development work, please cite the
}
```
-We would like to acknowledge Baidu and the [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection) team for creating and maintaining this valuable resource for the computer vision community. Their contribution to the field with the development of the Vision Transformers-based real-time object detector, RT-DETR, is greatly appreciated.
+We would like to acknowledge Baidu and the [PaddlePaddle](https://github.com/PaddlePaddle/PaddleDetection) team for creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. Their contribution to the field with the development of the Vision Transformers-based real-time object detector, RT-DETR, is greatly appreciated.
## FAQ
@@ -146,7 +146,7 @@ Baidu's RT-DETR stands out due to its efficient hybrid encoder and IoU-aware que
### How does RT-DETR support adaptable inference speed for different real-time applications?
-Baidu's RT-DETR allows flexible adjustments of inference speed by using different decoder layers without requiring retraining. This adaptability is crucial for scaling performance across various real-time object detection tasks. Whether you need faster processing for lower precision needs or slower, more accurate detections, RT-DETR can be tailored to meet your specific requirements.
+Baidu's RT-DETR allows flexible adjustments of inference speed by using different decoder layers without requiring retraining. This adaptability is crucial for scaling performance across various real-time object detection tasks. Whether you need faster processing for lower [precision](https://www.ultralytics.com/glossary/precision) needs or slower, more accurate detections, RT-DETR can be tailored to meet your specific requirements.
### Can I use RT-DETR models with other Ultralytics modes, such as training, validation, and export?
diff --git a/docs/en/models/sam-2.md b/docs/en/models/sam-2.md
index 336b294e..025c18d2 100644
--- a/docs/en/models/sam-2.md
+++ b/docs/en/models/sam-2.md
@@ -38,18 +38,18 @@ For a deeper understanding of SAM 2's architecture and capabilities, explore the
SAM 2 sets a new benchmark in the field, outperforming previous models on various metrics:
-| Metric | SAM 2 | Previous SOTA |
-| ---------------------------------- | ------------- | ------------- |
-| **Interactive Video Segmentation** | **Best** | - |
-| **Human Interactions Required** | **3x fewer** | Baseline |
-| **Image Segmentation Accuracy** | **Improved** | SAM |
-| **Inference Speed** | **6x faster** | SAM |
+| Metric | SAM 2 | Previous SOTA |
+| ------------------------------------------------------------------------------------------ | ------------- | ------------- |
+| **Interactive Video Segmentation** | **Best** | - |
+| **Human Interactions Required** | **3x fewer** | Baseline |
+| **[Image Segmentation](https://www.ultralytics.com/glossary/image-segmentation) Accuracy** | **Improved** | SAM |
+| **Inference Speed** | **6x faster** | SAM |
## Model Architecture
### Core Components
-- **Image and Video Encoder**: Utilizes a transformer-based architecture to extract high-level features from both images and video frames. This component is responsible for understanding the visual content at each timestep.
+- **Image and Video Encoder**: Utilizes a [transformer](https://www.ultralytics.com/glossary/transformer)-based architecture to extract high-level features from both images and video frames. This component is responsible for understanding the visual content at each timestep.
- **Prompt Encoder**: Processes user-provided prompts (points, boxes, masks) to guide the segmentation task. This allows SAM 2 to adapt to user input and target specific objects within a scene.
- **Memory Mechanism**: Includes a memory encoder, memory bank, and memory attention module. These components collectively store and utilize information from past frames, enabling the model to maintain consistent object tracking over time.
- **Mask Decoder**: Generates the final segmentation masks based on the encoded image features and prompts. In video, it also uses memory context to ensure accurate tracking across frames.
@@ -242,7 +242,7 @@ Despite its strengths, SAM 2 has certain limitations:
- **Tracking Stability**: SAM 2 may lose track of objects during extended sequences or significant viewpoint changes.
- **Object Confusion**: The model can sometimes confuse similar-looking objects, particularly in crowded scenes.
- **Efficiency with Multiple Objects**: Segmentation efficiency decreases when processing multiple objects simultaneously due to the lack of inter-object communication.
-- **Detail Accuracy**: May miss fine details, especially with fast-moving objects. Additional prompts can partially address this issue, but temporal smoothness is not guaranteed.
+- **Detail [Accuracy](https://www.ultralytics.com/glossary/accuracy)**: May miss fine details, especially with fast-moving objects. Additional prompts can partially address this issue, but temporal smoothness is not guaranteed.
## Citations and Acknowledgements
@@ -327,7 +327,7 @@ This mechanism ensures continuity even when objects are temporarily obscured or
### How does SAM 2 compare to other segmentation models like YOLOv8?
-SAM 2 and Ultralytics YOLOv8 serve different purposes and excel in different areas. While SAM 2 is designed for comprehensive object segmentation with advanced features like zero-shot generalization and real-time performance, YOLOv8 is optimized for speed and efficiency in object detection and segmentation tasks. Here's a comparison:
+SAM 2 and Ultralytics YOLOv8 serve different purposes and excel in different areas. While SAM 2 is designed for comprehensive object segmentation with advanced features like zero-shot generalization and real-time performance, YOLOv8 is optimized for speed and efficiency in [object detection](https://www.ultralytics.com/glossary/object-detection) and segmentation tasks. Here's a comparison:
| Model | Size
(MB) | Parameters
(M) | Speed (CPU)
(ms/im) |
| ---------------------------------------------- | ----------------------- | ---------------------------- | --------------------------------- |
diff --git a/docs/en/models/sam.md b/docs/en/models/sam.md
index 37680413..304fc002 100644
--- a/docs/en/models/sam.md
+++ b/docs/en/models/sam.md
@@ -6,7 +6,7 @@ keywords: Segment Anything, SAM, image segmentation, promptable segmentation, ze
# Segment Anything Model (SAM)
-Welcome to the frontier of image segmentation with the Segment Anything Model, or SAM. This revolutionary model has changed the game by introducing promptable image segmentation with real-time performance, setting new standards in the field.
+Welcome to the frontier of [image segmentation](https://www.ultralytics.com/glossary/image-segmentation) with the Segment Anything Model, or SAM. This revolutionary model has changed the game by introducing promptable image segmentation with real-time performance, setting new standards in the field.
## Introduction to SAM: The Segment Anything Model
@@ -21,7 +21,7 @@ SAM's advanced design allows it to adapt to new image distributions and tasks wi
- **Promptable Segmentation Task:** SAM was designed with a promptable segmentation task in mind, allowing it to generate valid segmentation masks from any given prompt, such as spatial or text clues identifying an object.
- **Advanced Architecture:** The Segment Anything Model employs a powerful image encoder, a prompt encoder, and a lightweight mask decoder. This unique architecture enables flexible prompting, real-time mask computation, and ambiguity awareness in segmentation tasks.
- **The SA-1B Dataset:** Introduced by the Segment Anything project, the SA-1B dataset features over 1 billion masks on 11 million images. As the largest segmentation dataset to date, it provides SAM with a diverse and large-scale training data source.
-- **Zero-Shot Performance:** SAM displays outstanding zero-shot performance across various segmentation tasks, making it a ready-to-use tool for diverse applications with minimal need for prompt engineering.
+- **Zero-Shot Performance:** SAM displays outstanding zero-shot performance across various segmentation tasks, making it a ready-to-use tool for diverse applications with minimal need for [prompt engineering](https://www.ultralytics.com/glossary/prompt-engineering).
For an in-depth look at the Segment Anything Model and the SA-1B dataset, please visit the [Segment Anything website](https://segment-anything.com/) and check out the research paper [Segment Anything](https://arxiv.org/abs/2304.02643).
@@ -36,7 +36,7 @@ This table presents the available models with their specific pre-trained weights
## How to Use SAM: Versatility and Power in Image Segmentation
-The Segment Anything Model can be employed for a multitude of downstream tasks that go beyond its training data. This includes edge detection, object proposal generation, instance segmentation, and preliminary text-to-mask prediction. With prompt engineering, SAM can swiftly adapt to new tasks and data distributions in a zero-shot manner, establishing it as a versatile and potent tool for all your image segmentation needs.
+The Segment Anything Model can be employed for a multitude of downstream tasks that go beyond its training data. This includes edge detection, object proposal generation, [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), and preliminary text-to-mask prediction. With prompt engineering, SAM can swiftly adapt to new tasks and data distributions in a zero-shot manner, establishing it as a versatile and potent tool for all your image segmentation needs.
### SAM prediction example
@@ -222,7 +222,7 @@ If you find SAM useful in your research or development work, please consider cit
}
```
-We would like to express our gratitude to Meta AI for creating and maintaining this valuable resource for the computer vision community.
+We would like to express our gratitude to Meta AI for creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community.
## FAQ
@@ -273,4 +273,4 @@ This function takes the path to your images and optional arguments for pre-train
### What datasets are used to train the Segment Anything Model (SAM)?
-SAM is trained on the extensive [SA-1B dataset](https://ai.facebook.com/datasets/segment-anything/) which comprises over 1 billion masks across 11 million images. SA-1B is the largest segmentation dataset to date, providing high-quality and diverse training data, ensuring impressive zero-shot performance in varied segmentation tasks. For more details, visit the [Dataset section](#key-features-of-the-segment-anything-model-sam).
+SAM is trained on the extensive [SA-1B dataset](https://ai.facebook.com/datasets/segment-anything/) which comprises over 1 billion masks across 11 million images. SA-1B is the largest segmentation dataset to date, providing high-quality and diverse [training data](https://www.ultralytics.com/glossary/training-data), ensuring impressive zero-shot performance in varied segmentation tasks. For more details, visit the [Dataset section](#key-features-of-the-segment-anything-model-sam).
diff --git a/docs/en/models/yolo-nas.md b/docs/en/models/yolo-nas.md
index f0aff53b..5523cb1b 100644
--- a/docs/en/models/yolo-nas.md
+++ b/docs/en/models/yolo-nas.md
@@ -8,7 +8,7 @@ keywords: YOLO-NAS, Deci AI, object detection, deep learning, Neural Architectur
## Overview
-Developed by Deci AI, YOLO-NAS is a groundbreaking object detection foundational model. It is the product of advanced Neural Architecture Search technology, meticulously designed to address the limitations of previous YOLO models. With significant improvements in quantization support and accuracy-latency trade-offs, YOLO-NAS represents a major leap in object detection.
+Developed by Deci AI, YOLO-NAS is a groundbreaking object detection foundational model. It is the product of advanced Neural Architecture Search technology, meticulously designed to address the limitations of previous YOLO models. With significant improvements in quantization support and [accuracy](https://www.ultralytics.com/glossary/accuracy)-latency trade-offs, YOLO-NAS represents a major leap in object detection.
![Model example image](https://github.com/ultralytics/docs/releases/download/0/yolo-nas-coco-map-metrics.avif) **Overview of YOLO-NAS.** YOLO-NAS employs quantization-aware blocks and selective quantization for optimal performance. The model, when converted to its INT8 quantized version, experiences a minimal precision drop, a significant improvement over other models. These advancements culminate in a superior architecture with unprecedented object detection capabilities and outstanding performance.
@@ -31,7 +31,7 @@ Experience the power of next-generation object detection with the pre-trained YO
| YOLO-NAS M INT-8 | 51.0 | 3.78 |
| YOLO-NAS L INT-8 | 52.1 | 4.78 |
-Each model variant is designed to offer a balance between Mean Average Precision (mAP) and latency, helping you optimize your object detection tasks for both performance and speed.
+Each model variant is designed to offer a balance between [Mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP) and latency, helping you optimize your object detection tasks for both performance and speed.
## Usage Examples
@@ -49,7 +49,7 @@ In this example we validate YOLO-NAS-s on the COCO8 dataset.
=== "Python"
- PyTorch pretrained `*.pt` models files can be passed to the `NAS()` class to create a model instance in python:
+ [PyTorch](https://www.ultralytics.com/glossary/pytorch) pretrained `*.pt` models files can be passed to the `NAS()` class to create a model instance in python:
```python
from ultralytics import NAS
@@ -84,7 +84,7 @@ In this example we validate YOLO-NAS-s on the COCO8 dataset.
We offer three variants of the YOLO-NAS models: Small (s), Medium (m), and Large (l). Each variant is designed to cater to different computational and performance needs:
- **YOLO-NAS-s**: Optimized for environments where computational resources are limited but efficiency is key.
-- **YOLO-NAS-m**: Offers a balanced approach, suitable for general-purpose object detection with higher accuracy.
+- **YOLO-NAS-m**: Offers a balanced approach, suitable for general-purpose [object detection](https://www.ultralytics.com/glossary/object-detection) with higher accuracy.
- **YOLO-NAS-l**: Tailored for scenarios requiring the highest accuracy, where computational resources are less of a constraint.
Below is a detailed overview of each model, including links to their pre-trained weights, the tasks they support, and their compatibility with different operating modes.
@@ -115,7 +115,7 @@ If you employ YOLO-NAS in your research or development work, please cite SuperGr
}
```
-We express our gratitude to Deci AI's [SuperGradients](https://github.com/Deci-AI/super-gradients/) team for their efforts in creating and maintaining this valuable resource for the computer vision community. We believe YOLO-NAS, with its innovative architecture and superior object detection capabilities, will become a critical tool for developers and researchers alike.
+We express our gratitude to Deci AI's [SuperGradients](https://github.com/Deci-AI/super-gradients/) team for their efforts in creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. We believe YOLO-NAS, with its innovative architecture and superior object detection capabilities, will become a critical tool for developers and researchers alike.
## FAQ
@@ -146,7 +146,7 @@ For more information, refer to the [Inference and Validation Examples](#inferenc
YOLO-NAS introduces several key features that make it a superior choice for object detection tasks:
-- **Quantization-Friendly Basic Block:** Enhanced architecture that improves model performance with minimal precision drop post quantization.
+- **Quantization-Friendly Basic Block:** Enhanced architecture that improves model performance with minimal [precision](https://www.ultralytics.com/glossary/precision) drop post quantization.
- **Sophisticated Training and Quantization:** Employs advanced training schemes and post-training quantization techniques.
- **AutoNAC Optimization and Pre-training:** Utilizes AutoNAC optimization and is pre-trained on prominent datasets like COCO, Objects365, and Roboflow 100.
These features contribute to its high accuracy, efficient performance, and suitability for deployment in production environments. Learn more in the [Key Features](#key-features) section.
diff --git a/docs/en/models/yolo-world.md b/docs/en/models/yolo-world.md
index 06b69798..96a6e0b6 100644
--- a/docs/en/models/yolo-world.md
+++ b/docs/en/models/yolo-world.md
@@ -23,7 +23,7 @@ The YOLO-World Model introduces an advanced, real-time [Ultralytics](https://www
## Overview
-YOLO-World tackles the challenges faced by traditional Open-Vocabulary detection models, which often rely on cumbersome Transformer models requiring extensive computational resources. These models' dependence on pre-defined object categories also restricts their utility in dynamic scenarios. YOLO-World revitalizes the YOLOv8 framework with open-vocabulary detection capabilities, employing vision-language modeling and pre-training on expansive datasets to excel at identifying a broad array of objects in zero-shot scenarios with unmatched efficiency.
+YOLO-World tackles the challenges faced by traditional Open-Vocabulary detection models, which often rely on cumbersome [Transformer](https://www.ultralytics.com/glossary/transformer) models requiring extensive computational resources. These models' dependence on pre-defined object categories also restricts their utility in dynamic scenarios. YOLO-World revitalizes the YOLOv8 framework with open-vocabulary detection capabilities, employing vision-[language modeling](https://www.ultralytics.com/glossary/language-modeling) and pre-training on expansive datasets to excel at identifying a broad array of objects in zero-shot scenarios with unmatched efficiency.
## Key Features
@@ -77,17 +77,17 @@ The YOLO-World models are easy to integrate into your Python applications. Ultra
### Train Usage
-!!! tip "Tip"
+!!! tip
We strongly recommend to use `yolov8-worldv2` model for custom training, because it supports deterministic training and also easy to export other formats i.e onnx/tensorrt.
-Object detection is straightforward with the `train` method, as illustrated below:
+[Object detection](https://www.ultralytics.com/glossary/object-detection) is straightforward with the `train` method, as illustrated below:
!!! example
=== "Python"
- PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLOWorld()` class to create a model instance in python:
+ [PyTorch](https://www.ultralytics.com/glossary/pytorch) pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLOWorld()` class to create a model instance in python:
```python
from ultralytics import YOLOWorld
@@ -197,7 +197,7 @@ Object tracking with YOLO-World model on a video/images is streamlined as follow
![YOLO-World prompt class names overview](https://github.com/ultralytics/docs/releases/download/0/yolo-world-prompt-class-names-overview.avif)
-The YOLO-World framework allows for the dynamic specification of classes through custom prompts, empowering users to tailor the model to their specific needs **without retraining**. This feature is particularly useful for adapting the model to new domains or specific tasks that were not originally part of the training data. By setting custom prompts, users can essentially guide the model's focus towards objects of interest, enhancing the relevance and accuracy of the detection results.
+The YOLO-World framework allows for the dynamic specification of classes through custom prompts, empowering users to tailor the model to their specific needs **without retraining**. This feature is particularly useful for adapting the model to new domains or specific tasks that were not originally part of the [training data](https://www.ultralytics.com/glossary/training-data). By setting custom prompts, users can essentially guide the model's focus towards objects of interest, enhancing the relevance and accuracy of the detection results.
For instance, if your application only requires detecting 'person' and 'bus' objects, you can specify these classes directly:
@@ -262,7 +262,7 @@ You can also save a model after setting custom classes. By doing this you create
- **Efficiency**: Streamlines the detection process by focusing on relevant objects, reducing computational overhead and speeding up inference.
- **Flexibility**: Allows for easy adaptation of the model to new or niche detection tasks without the need for extensive retraining or data collection.
- **Simplicity**: Simplifies deployment by eliminating the need to repeatedly specify custom classes at runtime, making the model directly usable with its embedded vocabulary.
-- **Performance**: Enhances detection accuracy for specified classes by focusing the model's attention and resources on recognizing the defined objects.
+- **Performance**: Enhances detection [accuracy](https://www.ultralytics.com/glossary/accuracy) for specified classes by focusing the model's attention and resources on recognizing the defined objects.
This approach provides a powerful means of customizing state-of-the-art object detection models for specific tasks, making advanced AI more accessible and applicable to a broader range of practical applications.
@@ -345,7 +345,7 @@ The YOLO-World model is an advanced, real-time object detection approach based o
### How does YOLO-World handle inference with custom prompts?
-YOLO-World supports a "prompt-then-detect" strategy, which utilizes an offline vocabulary to enhance efficiency. Custom prompts like captions or specific object categories are pre-encoded and stored as offline vocabulary embeddings. This approach streamlines the detection process without the need for retraining. You can dynamically set these prompts within the model to tailor it to specific detection tasks, as shown below:
+YOLO-World supports a "prompt-then-detect" strategy, which utilizes an offline vocabulary to enhance efficiency. Custom prompts like captions or specific object categories are pre-encoded and stored as offline vocabulary [embeddings](https://www.ultralytics.com/glossary/embeddings). This approach streamlines the detection process without the need for retraining. You can dynamically set these prompts within the model to tailor it to specific detection tasks, as shown below:
```python
from ultralytics import YOLOWorld
diff --git a/docs/en/models/yolo11.md b/docs/en/models/yolo11.md
new file mode 100644
index 00000000..fcda8726
--- /dev/null
+++ b/docs/en/models/yolo11.md
@@ -0,0 +1,228 @@
+---
+comments: true
+description: Discover YOLO11, the latest advancement in state-of-the-art object detection, offering unmatched accuracy and efficiency for diverse computer vision tasks.
+keywords: YOLO11, state-of-the-art object detection, YOLO series, Ultralytics, computer vision, AI, machine learning, deep learning
+---
+
+# Ultralytics YOLO11
+
+## Overview
+
+YOLO11 is the latest iteration in the [Ultralytics](https://www.ultralytics.com) YOLO series of real-time object detectors, redefining what's possible with cutting-edge [accuracy](https://www.ultralytics.com/glossary/accuracy), speed, and efficiency. Building upon the impressive advancements of previous YOLO versions, YOLO11 introduces significant improvements in architecture and training methods, making it a versatile choice for a wide range of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks.
+
+![Ultralytics YOLO11 Comparison Plots](https://github.com/user-attachments/assets/a311a4ed-bbf2-43b5-8012-5f183a28a845)
+
+
+
+ Watch: Ultralytics YOLO11 Announcement at YOLO Vision 2024
+
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) |
+ | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | --------------------------------------- | ------------------ | ----------------- |
+ | [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | 640 | 39.5 | 56.12 ± 0.82 ms | 1.55 ± 0.01 ms | 2.6 | 6.5 |
+ | [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | 640 | 47.0 | 90.01 ± 1.17 ms | 2.46 ± 0.00 ms | 9.4 | 21.5 |
+ | [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | 640 | 51.5 | 183.20 ± 2.04 ms | 4.70 ± 0.06 ms | 20.1 | 68.0 |
+ | [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | 640 | 53.4 | 238.64 ± 1.39 ms | 6.16 ± 0.08 ms | 25.3 | 86.9 |
+ | [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | 640 | 54.7 | 462.78 ± 6.66 ms | 11.31 ± 0.24 ms | 56.9 | 194.9 |
+
+ === "Segmentation (COCO)"
+
+ See [Segmentation Docs](../tasks/segment.md) for usage examples with these models trained on [COCO](../datasets/segment/coco.md), which include 80 pre-trained classes.
+
+ | Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) |
+ | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | --------------------------------------- | ------------------ | ----------------- |
+ | [YOLO11n-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt) | 640 | 38.9 | 32.0 | 65.90 ± 1.14 ms | 1.84 ± 0.00 ms | 2.9 | 10.4 |
+ | [YOLO11s-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt) | 640 | 46.6 | 37.8 | 117.56 ± 4.89 ms | 2.94 ± 0.01 ms | 10.1 | 35.5 |
+ | [YOLO11m-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt) | 640 | 51.5 | 41.5 | 281.63 ± 1.16 ms | 6.31 ± 0.09 ms | 22.4 | 123.3 |
+ | [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640 | 53.4 | 42.9 | 344.16 ± 3.17 ms | 7.78 ± 0.16 ms | 27.6 | 142.2 |
+ | [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640 | 54.7 | 43.8 | 664.50 ± 3.24 ms | 15.75 ± 0.67 ms | 62.1 | 319.0 |
+
+ === "Classification (ImageNet)"
+
+ See [Classification Docs](../tasks/classify.md) for usage examples with these models trained on [ImageNet](../datasets/classify/imagenet.md), which include 1000 pre-trained classes.
+
+ | Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) at 640 |
+ | -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | --------------------------------------- | ------------------ | ------------------------ |
+ | [YOLO11n-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt) | 224 | 70.0 | 89.4 | 5.03 ± 0.32 ms | 1.10 ± 0.01 ms | 1.6 | 3.3 |
+ | [YOLO11s-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-cls.pt) | 224 | 75.4 | 92.7 | 7.89 ± 0.18 ms | 1.34 ± 0.01 ms | 5.5 | 12.1 |
+ | [YOLO11m-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-cls.pt) | 224 | 77.3 | 93.9 | 17.17 ± 0.40 ms | 1.95 ± 0.00 ms | 10.4 | 39.3 |
+ | [YOLO11l-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-cls.pt) | 224 | 78.3 | 94.3 | 23.17 ± 0.29 ms | 2.76 ± 0.00 ms | 12.9 | 49.4 |
+ | [YOLO11x-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-cls.pt) | 224 | 79.5 | 94.9 | 41.41 ± 0.94 ms | 3.82 ± 0.00 ms | 28.4 | 110.4 |
+
+ === "Pose (COCO)"
+
+ See [Pose Estimation Docs](../tasks/pose.md) for usage examples with these models trained on [COCO](../datasets/pose/coco.md), which include 1 pre-trained class, 'person'.
+
+ | Model | size
(pixels) | mAPpose
50-95 | mAPpose
50 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) |
+ | ---------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | --------------------------------------- | ------------------ | ----------------- |
+ | [YOLO11n-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt) | 640 | 50.0 | 81.0 | 52.40 ± 0.51 ms | 1.72 ± 0.01 ms | 2.9 | 7.6 |
+ | [YOLO11s-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-pose.pt) | 640 | 58.9 | 86.3 | 90.54 ± 0.59 ms | 2.57 ± 0.00 ms | 9.9 | 23.2 |
+ | [YOLO11m-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-pose.pt) | 640 | 64.9 | 89.4 | 187.28 ± 0.77 ms | 4.94 ± 0.05 ms | 20.9 | 71.7 |
+ | [YOLO11l-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-pose.pt) | 640 | 66.1 | 89.9 | 247.69 ± 1.10 ms | 6.42 ± 0.13 ms | 26.2 | 90.7 |
+ | [YOLO11x-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-pose.pt) | 640 | 69.5 | 91.1 | 487.97 ± 13.91 ms | 12.06 ± 0.20 ms | 58.8 | 203.3 |
+
+ === "OBB (DOTAv1)"
+
+ See [Oriented Detection Docs](../tasks/obb.md) for usage examples with these models trained on [DOTAv1](../datasets/obb/dota-v2.md#dota-v10), which include 15 pre-trained classes.
+
+ | Model | size
(pixels) | mAPtest
50 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) |
+ | -------------------------------------------------------------------------------------------- | --------------------- | ------------------ | ------------------------------ | --------------------------------------- | ------------------ | ----------------- |
+ | [YOLO11n-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt) | 1024 | 78.4 | 117.56 ± 0.80 ms | 4.43 ± 0.01 ms | 2.7 | 17.2 |
+ | [YOLO11s-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-obb.pt) | 1024 | 79.5 | 219.41 ± 4.00 ms | 5.13 ± 0.02 ms | 9.7 | 57.5 |
+ | [YOLO11m-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-obb.pt) | 1024 | 80.9 | 562.81 ± 2.87 ms | 10.07 ± 0.38 ms | 20.9 | 183.5 |
+ | [YOLO11l-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-obb.pt) | 1024 | 81.0 | 712.49 ± 4.98 ms | 13.46 ± 0.55 ms | 26.2 | 232.0 |
+ | [YOLO11x-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-obb.pt) | 1024 | 81.3 | 1408.63 ± 7.67 ms | 28.59 ± 0.96 ms | 58.8 | 520.2 |
+
+## Usage Examples
+
+This section provides simple YOLO11 training and inference examples. For full documentation on these and other [modes](../modes/index.md), see the [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md), and [Export](../modes/export.md) docs pages.
+
+Note that the example below is for YOLO11 [Detect](../tasks/detect.md) models for [object detection](https://www.ultralytics.com/glossary/object-detection). For additional supported tasks, see the [Segment](../tasks/segment.md), [Classify](../tasks/classify.md), [OBB](../tasks/obb.md), and [Pose](../tasks/pose.md) docs.
+
+!!! example
+
+ === "Python"
+
+ [PyTorch](https://www.ultralytics.com/glossary/pytorch) pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in Python:
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a COCO-pretrained YOLO11n model
+ model = YOLO("yolo11n.pt")
+
+ # Train the model on the COCO8 example dataset for 100 epochs
+ results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
+
+ # Run inference with the YOLO11n model on the 'bus.jpg' image
+ results = model("path/to/bus.jpg")
+ ```
+
+ === "CLI"
+
+ CLI commands are available to directly run the models:
+
+ ```bash
+ # Load a COCO-pretrained YOLO11n model and train it on the COCO8 example dataset for 100 epochs
+ yolo train model=yolo11n.pt data=coco8.yaml epochs=100 imgsz=640
+
+ # Load a COCO-pretrained YOLO11n model and run inference on the 'bus.jpg' image
+ yolo predict model=yolo11n.pt source=path/to/bus.jpg
+ ```
+
+## Citations and Acknowledgements
+
+If you use YOLO11 or any other software from this repository in your work, please cite it using the following format:
+
+!!! quote ""
+
+ === "BibTeX"
+
+ ```bibtex
+ @software{yolo11_ultralytics,
+ author = {Glenn Jocher and Jing Qiu},
+ title = {Ultralytics YOLO11},
+ version = {11.0.0},
+ year = {2024},
+ url = {https://github.com/ultralytics/ultralytics},
+ orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069},
+ license = {AGPL-3.0}
+ }
+ ```
+
+Please note that the DOI is pending and will be added to the citation once it is available. YOLO11 models are provided under [AGPL-3.0](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) and [Enterprise](https://www.ultralytics.com/license) licenses.
+
+## FAQ
+
+### What are the key improvements in Ultralytics YOLO11 compared to previous versions?
+
+Ultralytics YOLO11 introduces several significant advancements over its predecessors. Key improvements include:
+
+- **Enhanced Feature Extraction:** YOLO11 employs an improved backbone and neck architecture, enhancing [feature extraction](https://www.ultralytics.com/glossary/feature-extraction) capabilities for more precise object detection.
+- **Optimized Efficiency and Speed:** Refined architectural designs and optimized training pipelines deliver faster processing speeds while maintaining a balance between accuracy and performance.
+- **Greater Accuracy with Fewer Parameters:** YOLO11m achieves higher mean Average [Precision](https://www.ultralytics.com/glossary/precision) (mAP) on the COCO dataset with 22% fewer parameters than YOLOv8m, making it computationally efficient without compromising accuracy.
+- **Adaptability Across Environments:** YOLO11 can be deployed across various environments, including edge devices, cloud platforms, and systems supporting NVIDIA GPUs.
+- **Broad Range of Supported Tasks:** YOLO11 supports diverse computer vision tasks such as object detection, [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), image classification, pose estimation, and oriented object detection (OBB).
+
+### How do I train a YOLO11 model for object detection?
+
+Training a YOLO11 model for object detection can be done using Python or CLI commands. Below are examples for both methods:
+
+!!! Example
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a COCO-pretrained YOLO11n model
+ model = YOLO("yolo11n.pt")
+
+ # Train the model on the COCO8 example dataset for 100 epochs
+ results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Load a COCO-pretrained YOLO11n model and train it on the COCO8 example dataset for 100 epochs
+ yolo train model=yolo11n.pt data=coco8.yaml epochs=100 imgsz=640
+ ```
+
+For more detailed instructions, refer to the [Train](../modes/train.md) documentation.
+
+### What tasks can YOLO11 models perform?
+
+YOLO11 models are versatile and support a wide range of computer vision tasks, including:
+
+- **Object Detection:** Identifying and locating objects within an image.
+- **Instance Segmentation:** Detecting objects and delineating their boundaries.
+- **[Image Classification](https://www.ultralytics.com/glossary/image-classification):** Categorizing images into predefined classes.
+- **Pose Estimation:** Detecting and tracking keypoints on human bodies.
+- **Oriented Object Detection (OBB):** Detecting objects with rotation for higher precision.
+
+For more information on each task, see the [Detection](../tasks/detect.md), [Instance Segmentation](../tasks/segment.md), [Classification](../tasks/classify.md), [Pose Estimation](../tasks/pose.md), and [Oriented Detection](../tasks/obb.md) documentation.
+
+### How does YOLO11 achieve greater accuracy with fewer parameters?
+
+YOLO11 achieves greater accuracy with fewer parameters through advancements in model design and optimization techniques. The improved architecture allows for efficient feature extraction and processing, resulting in higher mean Average Precision (mAP) on datasets like COCO while using 22% fewer parameters than YOLOv8m. This makes YOLO11 computationally efficient without compromising on accuracy, making it suitable for deployment on resource-constrained devices.
+
+### Can YOLO11 be deployed on edge devices?
+
+Yes, YOLO11 is designed for adaptability across various environments, including edge devices. Its optimized architecture and efficient processing capabilities make it suitable for deployment on edge devices, cloud platforms, and systems supporting NVIDIA GPUs. This flexibility ensures that YOLO11 can be used in diverse applications, from real-time detection on mobile devices to complex segmentation tasks in cloud environments. For more details on deployment options, refer to the [Export](../modes/export.md) documentation.
diff --git a/docs/en/models/yolov10.md b/docs/en/models/yolov10.md
index babf9726..4cfd0cf9 100644
--- a/docs/en/models/yolov10.md
+++ b/docs/en/models/yolov10.md
@@ -4,7 +4,7 @@ description: Discover YOLOv10, the latest in real-time object detection, elimina
keywords: YOLOv10, real-time object detection, NMS-free, deep learning, Tsinghua University, Ultralytics, machine learning, neural networks, performance optimization
---
-# YOLOv10: Real-Time End-to-End Object Detection
+# YOLOv10: Real-Time End-to-End [Object Detection](https://www.ultralytics.com/glossary/object-detection)
YOLOv10, built on the [Ultralytics](https://www.ultralytics.com/) [Python package](https://pypi.org/project/ultralytics/) by researchers at [Tsinghua University](https://www.tsinghua.edu.cn/en/), introduces a new approach to real-time object detection, addressing both the post-processing and model architecture deficiencies found in previous YOLO versions. By eliminating non-maximum suppression (NMS) and optimizing various model components, YOLOv10 achieves state-of-the-art performance with significantly reduced computational overhead. Extensive experiments demonstrate its superior accuracy-latency trade-offs across multiple model scales.
@@ -29,7 +29,7 @@ Real-time object detection aims to accurately predict object categories and posi
The architecture of YOLOv10 builds upon the strengths of previous YOLO models while introducing several key innovations. The model architecture consists of the following components:
-1. **Backbone**: Responsible for feature extraction, the backbone in YOLOv10 uses an enhanced version of CSPNet (Cross Stage Partial Network) to improve gradient flow and reduce computational redundancy.
+1. **Backbone**: Responsible for [feature extraction](https://www.ultralytics.com/glossary/feature-extraction), the backbone in YOLOv10 uses an enhanced version of CSPNet (Cross Stage Partial Network) to improve gradient flow and reduce computational redundancy.
2. **Neck**: The neck is designed to aggregate features from different scales and passes them to the head. It includes PAN (Path Aggregation Network) layers for effective multiscale feature fusion.
3. **One-to-Many Head**: Generates multiple predictions per object during training to provide rich supervisory signals and improve learning accuracy.
4. **One-to-One Head**: Generates a single best prediction per object during inference to eliminate the need for NMS, thereby reducing latency and improving efficiency.
@@ -72,7 +72,7 @@ Latency measured with TensorRT FP16 on T4 GPU.
YOLOv10 employs dual label assignments, combining one-to-many and one-to-one strategies during training to ensure rich supervision and efficient end-to-end deployment. The consistent matching metric aligns the supervision between both strategies, enhancing the quality of predictions during inference.
-### Holistic Efficiency-Accuracy Driven Model Design
+### Holistic Efficiency-[Accuracy](https://www.ultralytics.com/glossary/accuracy) Driven Model Design
#### Efficiency Enhancements
@@ -202,20 +202,20 @@ The YOLOv10 models series offers a range of models, each optimized for high-perf
Due to the new operations introduced with YOLOv10, not all export formats provided by Ultralytics are currently supported. The following table outlines which formats have been successfully converted using Ultralytics for YOLOv10. Feel free to open a pull request if you're able to [provide a contribution change](../help/contributing.md) for adding export support of additional formats for YOLOv10.
-| Export Format | Export Support | Exported Model Inference | Notes |
-| ------------------------------------------------- | -------------- | ------------------------ | ------------------------------------------- |
-| [TorchScript](../integrations/torchscript.md) | ✅ | ✅ | Standard PyTorch model format. |
-| [ONNX](../integrations/onnx.md) | ✅ | ✅ | Widely supported for deployment. |
-| [OpenVINO](../integrations/openvino.md) | ✅ | ✅ | Optimized for Intel hardware. |
-| [TensorRT](../integrations/tensorrt.md) | ✅ | ✅ | Optimized for NVIDIA GPUs. |
-| [CoreML](../integrations/coreml.md) | ✅ | ✅ | Limited to Apple devices. |
-| [TF SavedModel](../integrations/tf-savedmodel.md) | ✅ | ✅ | TensorFlow's standard model format. |
-| [TF GraphDef](../integrations/tf-graphdef.md) | ✅ | ✅ | Legacy TensorFlow format. |
-| [TF Lite](../integrations/tflite.md) | ✅ | ✅ | Optimized for mobile and embedded. |
-| [TF Edge TPU](../integrations/edge-tpu.md) | ✅ | ✅ | Specific to Google's Edge TPU devices. |
-| [TF.js](../integrations/tfjs.md) | ✅ | ✅ | JavaScript environment for browser use. |
-| [PaddlePaddle](../integrations/paddlepaddle.md) | ❌ | ❌ | Popular in China; less global support. |
-| [NCNN](../integrations/ncnn.md) | ✅ | ❌ | Layer `torch.topk` not exists or registered |
+| Export Format | Export Support | Exported Model Inference | Notes |
+| ------------------------------------------------- | -------------- | ------------------------ | -------------------------------------------------------------------------------------- |
+| [TorchScript](../integrations/torchscript.md) | ✅ | ✅ | Standard [PyTorch](https://www.ultralytics.com/glossary/pytorch) model format. |
+| [ONNX](../integrations/onnx.md) | ✅ | ✅ | Widely supported for deployment. |
+| [OpenVINO](../integrations/openvino.md) | ✅ | ✅ | Optimized for Intel hardware. |
+| [TensorRT](../integrations/tensorrt.md) | ✅ | ✅ | Optimized for NVIDIA GPUs. |
+| [CoreML](../integrations/coreml.md) | ✅ | ✅ | Limited to Apple devices. |
+| [TF SavedModel](../integrations/tf-savedmodel.md) | ✅ | ✅ | [TensorFlow](https://www.ultralytics.com/glossary/tensorflow)'s standard model format. |
+| [TF GraphDef](../integrations/tf-graphdef.md) | ✅ | ✅ | Legacy TensorFlow format. |
+| [TF Lite](../integrations/tflite.md) | ✅ | ✅ | Optimized for mobile and embedded. |
+| [TF Edge TPU](../integrations/edge-tpu.md) | ✅ | ✅ | Specific to Google's Edge TPU devices. |
+| [TF.js](../integrations/tfjs.md) | ✅ | ✅ | JavaScript environment for browser use. |
+| [PaddlePaddle](../integrations/paddlepaddle.md) | ❌ | ❌ | Popular in China; less global support. |
+| [NCNN](../integrations/ncnn.md) | ✅ | ❌ | Layer `torch.topk` not exists or registered |
## Conclusion
diff --git a/docs/en/models/yolov3.md b/docs/en/models/yolov3.md
index 182ccbcd..33e3cd82 100644
--- a/docs/en/models/yolov3.md
+++ b/docs/en/models/yolov3.md
@@ -20,7 +20,7 @@ This document presents an overview of three closely related object detection mod
## Key Features
-- **YOLOv3:** Introduced the use of three different scales for detection, leveraging three different sizes of detection kernels: 13x13, 26x26, and 52x52. This significantly improved detection accuracy for objects of different sizes. Additionally, YOLOv3 added features such as multi-label predictions for each bounding box and a better feature extractor network.
+- **YOLOv3:** Introduced the use of three different scales for detection, leveraging three different sizes of detection kernels: 13x13, 26x26, and 52x52. This significantly improved detection accuracy for objects of different sizes. Additionally, YOLOv3 added features such as multi-label predictions for each [bounding box](https://www.ultralytics.com/glossary/bounding-box) and a better feature extractor network.
- **YOLOv3-Ultralytics:** Ultralytics' implementation of YOLOv3 provides the same performance as the original model but comes with added support for more pre-trained models, additional training methods, and easier customization options. This makes it more versatile and user-friendly for practical applications.
@@ -30,7 +30,7 @@ This document presents an overview of three closely related object detection mod
The YOLOv3 series, including YOLOv3, YOLOv3-Ultralytics, and YOLOv3u, are designed specifically for object detection tasks. These models are renowned for their effectiveness in various real-world scenarios, balancing accuracy and speed. Each variant offers unique features and optimizations, making them suitable for a range of applications.
-All three models support a comprehensive set of modes, ensuring versatility in various stages of model deployment and development. These modes include [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md), providing users with a complete toolkit for effective object detection.
+All three models support a comprehensive set of modes, ensuring versatility in various stages of [model deployment](https://www.ultralytics.com/glossary/model-deployment) and development. These modes include [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md), providing users with a complete toolkit for effective object detection.
| Model Type | Tasks Supported | Inference | Validation | Training | Export |
| ------------------ | -------------------------------------- | --------- | ---------- | -------- | ------ |
@@ -48,7 +48,7 @@ This example provides simple YOLOv3 training and inference examples. For full do
=== "Python"
- PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:
+ [PyTorch](https://www.ultralytics.com/glossary/pytorch) pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:
```python
from ultralytics import YOLO
@@ -101,7 +101,7 @@ Thank you to Joseph Redmon and Ali Farhadi for developing the original YOLOv3.
### What are the differences between YOLOv3, YOLOv3-Ultralytics, and YOLOv3u?
-YOLOv3 is the third iteration of the YOLO (You Only Look Once) object detection algorithm developed by Joseph Redmon, known for its balance of accuracy and speed, utilizing three different scales (13x13, 26x26, and 52x52) for detections. YOLOv3-Ultralytics is Ultralytics' adaptation of YOLOv3 that adds support for more pre-trained models and facilitates easier model customization. YOLOv3u is an upgraded variant of YOLOv3-Ultralytics, integrating the anchor-free, objectness-free split head from YOLOv8, improving detection robustness and accuracy for various object sizes. For more details on the variants, refer to the [YOLOv3 series](https://github.com/ultralytics/yolov3).
+YOLOv3 is the third iteration of the YOLO (You Only Look Once) [object detection](https://www.ultralytics.com/glossary/object-detection) algorithm developed by Joseph Redmon, known for its balance of [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed, utilizing three different scales (13x13, 26x26, and 52x52) for detections. YOLOv3-Ultralytics is Ultralytics' adaptation of YOLOv3 that adds support for more pre-trained models and facilitates easier model customization. YOLOv3u is an upgraded variant of YOLOv3-Ultralytics, integrating the anchor-free, objectness-free split head from YOLOv8, improving detection robustness and accuracy for various object sizes. For more details on the variants, refer to the [YOLOv3 series](https://github.com/ultralytics/yolov3).
### How can I train a YOLOv3 model using Ultralytics?
diff --git a/docs/en/models/yolov4.md b/docs/en/models/yolov4.md
index c6f7d0da..959de10f 100644
--- a/docs/en/models/yolov4.md
+++ b/docs/en/models/yolov4.md
@@ -12,11 +12,11 @@ Welcome to the Ultralytics documentation page for YOLOv4, a state-of-the-art, re
## Introduction
-YOLOv4 stands for You Only Look Once version 4. It is a real-time object detection model developed to address the limitations of previous YOLO versions like [YOLOv3](yolov3.md) and other object detection models. Unlike other convolutional neural network (CNN) based object detectors, YOLOv4 is not only applicable for recommendation systems but also for standalone process management and human input reduction. Its operation on conventional graphics processing units (GPUs) allows for mass usage at an affordable price, and it is designed to work in real-time on a conventional GPU while requiring only one such GPU for training.
+YOLOv4 stands for You Only Look Once version 4. It is a real-time object detection model developed to address the limitations of previous YOLO versions like [YOLOv3](yolov3.md) and other object detection models. Unlike other [convolutional neural network](https://www.ultralytics.com/glossary/convolutional-neural-network-cnn) (CNN) based object detectors, YOLOv4 is not only applicable for recommendation systems but also for standalone process management and human input reduction. Its operation on conventional graphics processing units (GPUs) allows for mass usage at an affordable price, and it is designed to work in real-time on a conventional GPU while requiring only one such GPU for training.
## Architecture
-YOLOv4 makes use of several innovative features that work together to optimize its performance. These include Weighted-Residual-Connections (WRC), Cross-Stage-Partial-connections (CSP), Cross mini-Batch Normalization (CmBN), Self-adversarial-training (SAT), Mish-activation, Mosaic data augmentation, DropBlock regularization, and CIoU loss. These features are combined to achieve state-of-the-art results.
+YOLOv4 makes use of several innovative features that work together to optimize its performance. These include Weighted-Residual-Connections (WRC), Cross-Stage-Partial-connections (CSP), Cross mini-Batch Normalization (CmBN), Self-adversarial-training (SAT), Mish-activation, Mosaic [data augmentation](https://www.ultralytics.com/glossary/data-augmentation), DropBlock [regularization](https://www.ultralytics.com/glossary/regularization), and CIoU loss. These features are combined to achieve state-of-the-art results.
A typical object detector is composed of several parts including the input, the backbone, the neck, and the head. The backbone of YOLOv4 is pre-trained on ImageNet and is used to predict classes and bounding boxes of objects. The backbone could be from several models including VGG, ResNet, ResNeXt, or DenseNet. The neck part of the detector is used to collect feature maps from different stages and usually includes several bottom-up paths and several top-down paths. The head part is what is used to make the final object detections and classifications.
@@ -71,9 +71,9 @@ The original YOLOv4 paper can be found on [arXiv](https://arxiv.org/abs/2004.109
## FAQ
-### What is YOLOv4 and why should I use it for object detection?
+### What is YOLOv4 and why should I use it for [object detection](https://www.ultralytics.com/glossary/object-detection)?
-YOLOv4, which stands for "You Only Look Once version 4," is a state-of-the-art real-time object detection model developed by Alexey Bochkovskiy in 2020. It achieves an optimal balance between speed and accuracy, making it highly suitable for real-time applications. YOLOv4's architecture incorporates several innovative features like Weighted-Residual-Connections (WRC), Cross-Stage-Partial-connections (CSP), and Self-adversarial-training (SAT), among others, to achieve state-of-the-art results. If you're looking for a high-performance model that operates efficiently on conventional GPUs, YOLOv4 is an excellent choice.
+YOLOv4, which stands for "You Only Look Once version 4," is a state-of-the-art real-time object detection model developed by Alexey Bochkovskiy in 2020. It achieves an optimal balance between speed and [accuracy](https://www.ultralytics.com/glossary/accuracy), making it highly suitable for real-time applications. YOLOv4's architecture incorporates several innovative features like Weighted-Residual-Connections (WRC), Cross-Stage-Partial-connections (CSP), and Self-adversarial-training (SAT), among others, to achieve state-of-the-art results. If you're looking for a high-performance model that operates efficiently on conventional GPUs, YOLOv4 is an excellent choice.
### How does the architecture of YOLOv4 enhance its performance?
@@ -85,7 +85,7 @@ The architecture of YOLOv4 includes several key components: the backbone, the ne
### Why is YOLOv4 considered suitable for real-time object detection on conventional GPUs?
-YOLOv4 is designed to optimize both speed and accuracy, making it ideal for real-time object detection tasks that require quick and reliable performance. It operates efficiently on conventional GPUs, needing only one for both training and inference. This makes it accessible and practical for various applications ranging from recommendation systems to standalone process management, thereby reducing the need for extensive hardware setups and making it a cost-effective solution for real-time object detection.
+YOLOv4 is designed to optimize both speed and accuracy, making it ideal for real-time object detection tasks that require quick and reliable performance. It operates efficiently on conventional GPUs, needing only one for both training and inference. This makes it accessible and practical for various applications ranging from [recommendation systems](https://www.ultralytics.com/glossary/recommendation-system) to standalone process management, thereby reducing the need for extensive hardware setups and making it a cost-effective solution for real-time object detection.
### How can I get started with YOLOv4 if Ultralytics does not currently support it?
diff --git a/docs/en/models/yolov5.md b/docs/en/models/yolov5.md
index 4baa08e9..8ff1c36e 100644
--- a/docs/en/models/yolov5.md
+++ b/docs/en/models/yolov5.md
@@ -8,7 +8,7 @@ keywords: YOLOv5, YOLOv5u, object detection, Ultralytics, anchor-free, pre-train
## Overview
-YOLOv5u represents an advancement in object detection methodologies. Originating from the foundational architecture of the [YOLOv5](https://github.com/ultralytics/yolov5) model developed by Ultralytics, YOLOv5u integrates the anchor-free, objectness-free split head, a feature previously introduced in the [YOLOv8](yolov8.md) models. This adaptation refines the model's architecture, leading to an improved accuracy-speed tradeoff in object detection tasks. Given the empirical results and its derived features, YOLOv5u provides an efficient alternative for those seeking robust solutions in both research and practical applications.
+YOLOv5u represents an advancement in [object detection](https://www.ultralytics.com/glossary/object-detection) methodologies. Originating from the foundational architecture of the [YOLOv5](https://github.com/ultralytics/yolov5) model developed by Ultralytics, YOLOv5u integrates the anchor-free, objectness-free split head, a feature previously introduced in the [YOLOv8](yolov8.md) models. This adaptation refines the model's architecture, leading to an improved accuracy-speed tradeoff in object detection tasks. Given the empirical results and its derived features, YOLOv5u provides an efficient alternative for those seeking robust solutions in both research and practical applications.
![Ultralytics YOLOv5](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov5-splash.avif)
@@ -60,7 +60,7 @@ This example provides simple YOLOv5 training and inference examples. For full do
=== "Python"
- PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:
+ [PyTorch](https://www.ultralytics.com/glossary/pytorch) pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:
```python
from ultralytics import YOLO
@@ -117,7 +117,7 @@ Please note that YOLOv5 models are provided under [AGPL-3.0](https://github.com/
### What is Ultralytics YOLOv5u and how does it differ from YOLOv5?
-Ultralytics YOLOv5u is an advanced version of YOLOv5, integrating the anchor-free, objectness-free split head that enhances the accuracy-speed tradeoff for real-time object detection tasks. Unlike the traditional YOLOv5, YOLOv5u adopts an anchor-free detection mechanism, making it more flexible and adaptive in diverse scenarios. For more detailed information on its features, you can refer to the [YOLOv5 Overview](#overview).
+Ultralytics YOLOv5u is an advanced version of YOLOv5, integrating the anchor-free, objectness-free split head that enhances the [accuracy](https://www.ultralytics.com/glossary/accuracy)-speed tradeoff for real-time object detection tasks. Unlike the traditional YOLOv5, YOLOv5u adopts an anchor-free detection mechanism, making it more flexible and adaptive in diverse scenarios. For more detailed information on its features, you can refer to the [YOLOv5 Overview](#overview).
### How does the anchor-free Ultralytics head improve object detection performance in YOLOv5u?
diff --git a/docs/en/models/yolov6.md b/docs/en/models/yolov6.md
index 5cf52931..c41b40c8 100644
--- a/docs/en/models/yolov6.md
+++ b/docs/en/models/yolov6.md
@@ -24,13 +24,13 @@ keywords: Meituan YOLOv6, object detection, real-time applications, BiC module,
YOLOv6 provides various pre-trained models with different scales:
-- YOLOv6-N: 37.5% AP on COCO val2017 at 1187 FPS with NVIDIA Tesla T4 GPU.
+- YOLOv6-N: 37.5% AP on COCO val2017 at 1187 FPS with NVIDIA T4 GPU.
- YOLOv6-S: 45.0% AP at 484 FPS.
- YOLOv6-M: 50.0% AP at 226 FPS.
- YOLOv6-L: 52.8% AP at 116 FPS.
- YOLOv6-L6: State-of-the-art accuracy in real-time.
-YOLOv6 also provides quantized models for different precisions and models optimized for mobile platforms.
+YOLOv6 also provides quantized models for different [precisions](https://www.ultralytics.com/glossary/precision) and models optimized for mobile platforms.
## Usage Examples
@@ -40,7 +40,7 @@ This example provides simple YOLOv6 training and inference examples. For full do
=== "Python"
- PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:
+ [PyTorch](https://www.ultralytics.com/glossary/pytorch) pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:
```python
from ultralytics import YOLO
@@ -72,7 +72,7 @@ This example provides simple YOLOv6 training and inference examples. For full do
## Supported Tasks and Modes
-The YOLOv6 series offers a range of models, each optimized for high-performance [Object Detection](../tasks/detect.md). These models cater to varying computational needs and accuracy requirements, making them versatile for a wide array of applications.
+The YOLOv6 series offers a range of models, each optimized for high-performance [Object Detection](../tasks/detect.md). These models cater to varying computational needs and [accuracy](https://www.ultralytics.com/glossary/accuracy) requirements, making them versatile for a wide array of applications.
| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export |
| ---------- | ------------------- | -------------------------------------- | --------- | ---------- | -------- | ------ |
@@ -82,7 +82,7 @@ The YOLOv6 series offers a range of models, each optimized for high-performance
| YOLOv6-L | `yolov6-l.pt` | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ |
| YOLOv6-L6 | `yolov6-l6.pt` | [Object Detection](../tasks/detect.md) | ✅ | ✅ | ✅ | ✅ |
-This table provides a detailed overview of the YOLOv6 model variants, highlighting their capabilities in object detection tasks and their compatibility with various operational modes such as [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md). This comprehensive support ensures that users can fully leverage the capabilities of YOLOv6 models in a broad range of object detection scenarios.
+This table provides a detailed overview of the YOLOv6 model variants, highlighting their capabilities in [object detection](https://www.ultralytics.com/glossary/object-detection) tasks and their compatibility with various operational modes such as [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md). This comprehensive support ensures that users can fully leverage the capabilities of YOLOv6 models in a broad range of object detection scenarios.
## Citations and Acknowledgements
@@ -151,11 +151,11 @@ YOLOv6 offers multiple versions, each optimized for different performance requir
- YOLOv6-L: 52.8% AP at 116 FPS
- YOLOv6-L6: State-of-the-art accuracy in real-time scenarios
-These models are evaluated on the COCO dataset using an NVIDIA Tesla T4 GPU. For more on performance metrics, see the [Performance Metrics](#performance-metrics) section.
+These models are evaluated on the COCO dataset using an NVIDIA T4 GPU. For more on performance metrics, see the [Performance Metrics](#performance-metrics) section.
### How does the Anchor-Aided Training (AAT) strategy benefit YOLOv6?
-Anchor-Aided Training (AAT) in YOLOv6 combines elements of anchor-based and anchor-free approaches, enhancing the model's detection capabilities without compromising inference efficiency. This strategy leverages anchors during training to improve bounding box predictions, making YOLOv6 effective in diverse object detection tasks.
+Anchor-Aided Training (AAT) in YOLOv6 combines elements of anchor-based and anchor-free approaches, enhancing the model's detection capabilities without compromising inference efficiency. This strategy leverages anchors during training to improve [bounding box](https://www.ultralytics.com/glossary/bounding-box) predictions, making YOLOv6 effective in diverse object detection tasks.
### Which operational modes are supported by YOLOv6 models in Ultralytics?
diff --git a/docs/en/models/yolov7.md b/docs/en/models/yolov7.md
index b13467a9..1ba9dc27 100644
--- a/docs/en/models/yolov7.md
+++ b/docs/en/models/yolov7.md
@@ -6,7 +6,7 @@ keywords: YOLOv7, real-time object detection, Ultralytics, AI, computer vision,
# YOLOv7: Trainable Bag-of-Freebies
-YOLOv7 is a state-of-the-art real-time object detector that surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS. It has the highest accuracy (56.8% AP) among all known real-time object detectors with 30 FPS or higher on GPU V100. Moreover, YOLOv7 outperforms other object detectors such as YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5, and many others in speed and accuracy. The model is trained on the MS COCO dataset from scratch without using any other datasets or pre-trained weights. Source code for YOLOv7 is available on GitHub.
+YOLOv7 is a state-of-the-art real-time object detector that surpasses all known object detectors in both speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) in the range from 5 FPS to 160 FPS. It has the highest accuracy (56.8% AP) among all known real-time object detectors with 30 FPS or higher on GPU V100. Moreover, YOLOv7 outperforms other object detectors such as YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5, and many others in speed and accuracy. The model is trained on the MS COCO dataset from scratch without using any other datasets or pre-trained weights. Source code for YOLOv7 is available on GitHub.
![YOLOv7 comparison with SOTA object detectors](https://github.com/ultralytics/docs/releases/download/0/yolov7-comparison-sota-object-detectors.avif)
@@ -62,7 +62,7 @@ From the results in the YOLO comparison table we know that the proposed method h
## Overview
-Real-time object detection is an important component in many computer vision systems, including multi-object tracking, autonomous driving, robotics, and medical image analysis. In recent years, real-time object detection development has focused on designing efficient architectures and improving the inference speed of various CPUs, GPUs, and neural processing units (NPUs). YOLOv7 supports both mobile GPU and GPU devices, from the edge to the cloud.
+Real-time object detection is an important component in many [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) systems, including multi-object tracking, autonomous driving, robotics, and medical image analysis. In recent years, real-time object detection development has focused on designing efficient architectures and improving the inference speed of various CPUs, GPUs, and neural processing units (NPUs). YOLOv7 supports both mobile GPU and GPU devices, from the edge to the cloud.
Unlike traditional real-time object detectors that focus on architecture optimization, YOLOv7 introduces a focus on the optimization of the training process. This includes modules and optimization methods designed to improve the accuracy of object detection without increasing the inference cost, a concept known as the "trainable bag-of-freebies".
@@ -115,7 +115,7 @@ The original YOLOv7 paper can be found on [arXiv](https://arxiv.org/pdf/2207.026
## FAQ
-### What is YOLOv7 and why is it considered a breakthrough in real-time object detection?
+### What is YOLOv7 and why is it considered a breakthrough in real-time [object detection](https://www.ultralytics.com/glossary/object-detection)?
YOLOv7 is a cutting-edge real-time object detection model that achieves unparalleled speed and accuracy. It surpasses other models, such as YOLOX, YOLOv5, and PPYOLOE, in both parameters usage and inference speed. YOLOv7's distinguishing features include its model re-parameterization and dynamic label assignment, which optimize its performance without increasing inference costs. For more technical details about its architecture and comparison metrics with other state-of-the-art object detectors, refer to the [YOLOv7 paper](https://arxiv.org/pdf/2207.02696.pdf).
diff --git a/docs/en/models/yolov8.md b/docs/en/models/yolov8.md
index c95902be..036cd305 100644
--- a/docs/en/models/yolov8.md
+++ b/docs/en/models/yolov8.md
@@ -4,11 +4,11 @@ description: Discover YOLOv8, the latest advancement in real-time object detecti
keywords: YOLOv8, real-time object detection, YOLO series, Ultralytics, computer vision, advanced object detection, AI, machine learning, deep learning
---
-# YOLOv8
+# Ultralytics YOLOv8
## Overview
-YOLOv8 is the latest iteration in the YOLO series of real-time object detectors, offering cutting-edge performance in terms of accuracy and speed. Building upon the advancements of previous YOLO versions, YOLOv8 introduces new features and optimizations that make it an ideal choice for various object detection tasks in a wide range of applications.
+YOLOv8 is the latest iteration in the YOLO series of real-time object detectors, offering cutting-edge performance in terms of accuracy and speed. Building upon the advancements of previous YOLO versions, YOLOv8 introduces new features and optimizations that make it an ideal choice for various [object detection](https://www.ultralytics.com/glossary/object-detection) tasks in a wide range of applications.
![Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/yolov8-comparison-plots.avif)
@@ -25,14 +25,14 @@ YOLOv8 is the latest iteration in the YOLO series of real-time object detectors,
## Key Features
-- **Advanced Backbone and Neck Architectures:** YOLOv8 employs state-of-the-art backbone and neck architectures, resulting in improved feature extraction and object detection performance.
+- **Advanced Backbone and Neck Architectures:** YOLOv8 employs state-of-the-art backbone and neck architectures, resulting in improved [feature extraction](https://www.ultralytics.com/glossary/feature-extraction) and object detection performance.
- **Anchor-free Split Ultralytics Head:** YOLOv8 adopts an anchor-free split Ultralytics head, which contributes to better accuracy and a more efficient detection process compared to anchor-based approaches.
- **Optimized Accuracy-Speed Tradeoff:** With a focus on maintaining an optimal balance between accuracy and speed, YOLOv8 is suitable for real-time object detection tasks in diverse application areas.
- **Variety of Pre-trained Models:** YOLOv8 offers a range of pre-trained models to cater to various tasks and performance requirements, making it easier to find the right model for your specific use case.
## Supported Tasks and Modes
-The YOLOv8 series offers a diverse range of models, each specialized for specific tasks in computer vision. These models are designed to cater to various requirements, from object detection to more complex tasks like instance segmentation, pose/keypoints detection, oriented object detection, and classification.
+The YOLOv8 series offers a diverse range of models, each specialized for specific tasks in computer vision. These models are designed to cater to various requirements, from object detection to more complex tasks like [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), pose/keypoints detection, oriented object detection, and classification.
Each variant of the YOLOv8 series is optimized for its respective task, ensuring high performance and accuracy. Additionally, these models are compatible with various operational modes including [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md), facilitating their use in different stages of deployment and development.
@@ -44,7 +44,7 @@ Each variant of the YOLOv8 series is optimized for its respective task, ensuring
| YOLOv8-obb | `yolov8n-obb.pt` `yolov8s-obb.pt` `yolov8m-obb.pt` `yolov8l-obb.pt` `yolov8x-obb.pt` | [Oriented Detection](../tasks/obb.md) | ✅ | ✅ | ✅ | ✅ |
| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` | [Classification](../tasks/classify.md) | ✅ | ✅ | ✅ | ✅ |
-This table provides an overview of the YOLOv8 model variants, highlighting their applicability in specific tasks and their compatibility with various operational modes such as Inference, Validation, Training, and Export. It showcases the versatility and robustness of the YOLOv8 series, making them suitable for a variety of applications in computer vision.
+This table provides an overview of the YOLOv8 model variants, highlighting their applicability in specific tasks and their compatibility with various operational modes such as Inference, Validation, Training, and Export. It showcases the versatility and robustness of the YOLOv8 series, making them suitable for a variety of applications in [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv).
## Performance Metrics
@@ -133,7 +133,7 @@ Note the below example is for YOLOv8 [Detect](../tasks/detect.md) models for obj
=== "Python"
- PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:
+ [PyTorch](https://www.ultralytics.com/glossary/pytorch) pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:
```python
from ultralytics import YOLO
@@ -189,7 +189,7 @@ Please note that the DOI is pending and will be added to the citation once it is
### What is YOLOv8 and how does it differ from previous YOLO versions?
-YOLOv8 is the latest iteration in the Ultralytics YOLO series, designed to improve real-time object detection performance with advanced features. Unlike earlier versions, YOLOv8 incorporates an **anchor-free split Ultralytics head**, state-of-the-art backbone and neck architectures, and offers optimized accuracy-speed tradeoff, making it ideal for diverse applications. For more details, check the [Overview](#overview) and [Key Features](#key-features) sections.
+YOLOv8 is the latest iteration in the Ultralytics YOLO series, designed to improve real-time object detection performance with advanced features. Unlike earlier versions, YOLOv8 incorporates an **anchor-free split Ultralytics head**, state-of-the-art backbone and neck architectures, and offers optimized [accuracy](https://www.ultralytics.com/glossary/accuracy)-speed tradeoff, making it ideal for diverse applications. For more details, check the [Overview](#overview) and [Key Features](#key-features) sections.
### How can I use YOLOv8 for different computer vision tasks?
@@ -201,7 +201,7 @@ YOLOv8 models achieve state-of-the-art performance across various benchmarking d
### How do I train a YOLOv8 model?
-Training a YOLOv8 model can be done using either Python or CLI. Below are examples for training a model using a COCO-pretrained YOLOv8 model on the COCO8 dataset for 100 epochs:
+Training a YOLOv8 model can be done using either Python or CLI. Below are examples for training a model using a COCO-pretrained YOLOv8 model on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch):
!!! example
diff --git a/docs/en/models/yolov9.md b/docs/en/models/yolov9.md
index a83f85d3..a0b2b748 100644
--- a/docs/en/models/yolov9.md
+++ b/docs/en/models/yolov9.md
@@ -4,7 +4,7 @@ description: Explore YOLOv9, the latest leap in real-time object detection, feat
keywords: YOLOv9, object detection, real-time, PGI, GELAN, deep learning, MS COCO, AI, neural networks, model efficiency, accuracy, Ultralytics
---
-# YOLOv9: A Leap Forward in Object Detection Technology
+# YOLOv9: A Leap Forward in [Object Detection](https://www.ultralytics.com/glossary/object-detection) Technology
YOLOv9 marks a significant advancement in real-time object detection, introducing groundbreaking techniques such as Programmable Gradient Information (PGI) and the Generalized Efficient Layer Aggregation Network (GELAN). This model demonstrates remarkable improvements in efficiency, accuracy, and adaptability, setting new benchmarks on the MS COCO dataset. The YOLOv9 project, while developed by a separate open-source team, builds upon the robust codebase provided by [Ultralytics](https://www.ultralytics.com/) [YOLOv5](yolov5.md), showcasing the collaborative spirit of the AI research community.
@@ -23,7 +23,7 @@ YOLOv9 marks a significant advancement in real-time object detection, introducin
## Introduction to YOLOv9
-In the quest for optimal real-time object detection, YOLOv9 stands out with its innovative approach to overcoming information loss challenges inherent in deep neural networks. By integrating PGI and the versatile GELAN architecture, YOLOv9 not only enhances the model's learning capacity but also ensures the retention of crucial information throughout the detection process, thereby achieving exceptional accuracy and performance.
+In the quest for optimal real-time object detection, YOLOv9 stands out with its innovative approach to overcoming information loss challenges inherent in deep [neural networks](https://www.ultralytics.com/glossary/neural-network-nn). By integrating PGI and the versatile GELAN architecture, YOLOv9 not only enhances the model's learning capacity but also ensures the retention of crucial information throughout the detection process, thereby achieving exceptional accuracy and performance.
## Core Innovations of YOLOv9
@@ -47,7 +47,7 @@ The concept of Reversible Functions is another cornerstone of YOLOv9's design. A
X = v_zeta(r_psi(X))
```
-with `psi` and `zeta` as parameters for the reversible and its inverse function, respectively. This property is crucial for deep learning architectures, as it allows the network to retain a complete information flow, thereby enabling more accurate updates to the model's parameters. YOLOv9 incorporates reversible functions within its architecture to mitigate the risk of information degradation, especially in deeper layers, ensuring the preservation of critical data for object detection tasks.
+with `psi` and `zeta` as parameters for the reversible and its inverse function, respectively. This property is crucial for [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) architectures, as it allows the network to retain a complete information flow, thereby enabling more accurate updates to the model's parameters. YOLOv9 incorporates reversible functions within its architecture to mitigate the risk of information degradation, especially in deeper layers, ensuring the preservation of critical data for object detection tasks.
### Impact on Lightweight Models
@@ -86,7 +86,7 @@ By benchmarking, you can ensure that your model not only performs well in contro
## Performance on MS COCO Dataset
-The performance of YOLOv9 on the [COCO dataset](../datasets/detect/coco.md) exemplifies its significant advancements in real-time object detection, setting new benchmarks across various model sizes. Table 1 presents a comprehensive comparison of state-of-the-art real-time object detectors, illustrating YOLOv9's superior efficiency and accuracy.
+The performance of YOLOv9 on the [COCO dataset](../datasets/detect/coco.md) exemplifies its significant advancements in real-time object detection, setting new benchmarks across various model sizes. Table 1 presents a comprehensive comparison of state-of-the-art real-time object detectors, illustrating YOLOv9's superior efficiency and [accuracy](https://www.ultralytics.com/glossary/accuracy).
**Table 1. Comparison of State-of-the-Art Real-Time Object Detectors**
@@ -109,7 +109,7 @@ The performance of YOLOv9 on the [COCO dataset](../datasets/detect/coco.md) exem
| [YOLOv9c-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov9c-seg.pt) | 640 | 52.4 | 42.2 | 27.9 | 159.4 |
| [YOLOv9e-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov9e-seg.pt) | 640 | 55.1 | 44.3 | 60.5 | 248.4 |
-YOLOv9's iterations, ranging from the tiny `t` variant to the extensive `e` model, demonstrate improvements not only in accuracy (mAP metrics) but also in efficiency with a reduced number of parameters and computational needs (FLOPs). This table underscores YOLOv9's ability to deliver high precision while maintaining or reducing the computational overhead compared to prior versions and competing models.
+YOLOv9's iterations, ranging from the tiny `t` variant to the extensive `e` model, demonstrate improvements not only in accuracy (mAP metrics) but also in efficiency with a reduced number of parameters and computational needs (FLOPs). This table underscores YOLOv9's ability to deliver high [precision](https://www.ultralytics.com/glossary/precision) while maintaining or reducing the computational overhead compared to prior versions and competing models.
Comparatively, YOLOv9 exhibits remarkable gains:
@@ -118,7 +118,7 @@ Comparatively, YOLOv9 exhibits remarkable gains:
The YOLOv9c model, in particular, highlights the effectiveness of the architecture's optimizations. It operates with 42% fewer parameters and 21% less computational demand than YOLOv7 AF, yet it achieves comparable accuracy, demonstrating YOLOv9's significant efficiency improvements. Furthermore, the YOLOv9e model sets a new standard for large models, with 15% fewer parameters and 25% less computational need than [YOLOv8x](yolov8.md), alongside an incremental 1.7% improvement in AP.
-These results showcase YOLOv9's strategic advancements in model design, emphasizing its enhanced efficiency without compromising on the precision essential for real-time object detection tasks. The model not only pushes the boundaries of performance metrics but also emphasizes the importance of computational efficiency, making it a pivotal development in the field of computer vision.
+These results showcase YOLOv9's strategic advancements in model design, emphasizing its enhanced efficiency without compromising on the precision essential for real-time object detection tasks. The model not only pushes the boundaries of performance metrics but also emphasizes the importance of computational efficiency, making it a pivotal development in the field of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv).
## Conclusion
@@ -132,7 +132,7 @@ This example provides simple YOLOv9 training and inference examples. For full do
=== "Python"
- PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:
+ [PyTorch](https://www.ultralytics.com/glossary/pytorch) pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:
```python
from ultralytics import YOLO
@@ -235,4 +235,4 @@ YOLOv9 is designed to mitigate information loss, which is particularly important
### What tasks and modes does YOLOv9 support?
-YOLOv9 supports various tasks including object detection and instance segmentation. It is compatible with multiple operational modes such as inference, validation, training, and export. This versatility makes YOLOv9 adaptable to diverse real-time computer vision applications. Refer to the [supported tasks and modes](#supported-tasks-and-modes) section for more information.
+YOLOv9 supports various tasks including object detection and [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation). It is compatible with multiple operational modes such as inference, validation, training, and export. This versatility makes YOLOv9 adaptable to diverse real-time computer vision applications. Refer to the [supported tasks and modes](#supported-tasks-and-modes) section for more information.
diff --git a/docs/en/modes/benchmark.md b/docs/en/modes/benchmark.md
index 8e24c0a0..209a0e03 100644
--- a/docs/en/modes/benchmark.md
+++ b/docs/en/modes/benchmark.md
@@ -10,7 +10,7 @@ keywords: model benchmarking, YOLOv8, Ultralytics, performance evaluation, expor
## Introduction
-Once your model is trained and validated, the next logical step is to evaluate its performance in various real-world scenarios. Benchmark mode in Ultralytics YOLOv8 serves this purpose by providing a robust framework for assessing the speed and accuracy of your model across a range of export formats.
+Once your model is trained and validated, the next logical step is to evaluate its performance in various real-world scenarios. Benchmark mode in Ultralytics YOLOv8 serves this purpose by providing a robust framework for assessing the speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) of your model across a range of export formats.
@@ -32,8 +32,8 @@ Once your model is trained and validated, the next logical step is to evaluate i
### Key Metrics in Benchmark Mode
-- **mAP50-95:** For object detection, segmentation, and pose estimation.
-- **accuracy_top5:** For image classification.
+- **mAP50-95:** For [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and pose estimation.
+- **accuracy_top5:** For [image classification](https://www.ultralytics.com/glossary/image-classification).
- **Inference Time:** Time taken for each image in milliseconds.
### Supported Export Formats
@@ -43,7 +43,7 @@ Once your model is trained and validated, the next logical step is to evaluate i
- **OpenVINO:** For Intel hardware optimization
- **CoreML, TensorFlow SavedModel, and More:** For diverse deployment needs.
-!!! tip "Tip"
+!!! tip
* Export to ONNX or OpenVINO for up to 3x CPU speedup.
* Export to TensorRT for up to 5x GPU speedup.
@@ -73,15 +73,15 @@ Run YOLOv8n benchmarks on all supported export formats including ONNX, TensorRT
Arguments such as `model`, `data`, `imgsz`, `half`, `device`, and `verbose` provide users with the flexibility to fine-tune the benchmarks to their specific needs and compare the performance of different export formats with ease.
-| Key | Default Value | Description |
-| --------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model` | `None` | Specifies the path to the model file. Accepts both `.pt` and `.yaml` formats, e.g., `"yolov8n.pt"` for pre-trained models or configuration files. |
-| `data` | `None` | Path to a YAML file defining the dataset for benchmarking, typically including paths and settings for validation data. Example: `"coco8.yaml"`. |
-| `imgsz` | `640` | The input image size for the model. Can be a single integer for square images or a tuple `(width, height)` for non-square, e.g., `(640, 480)`. |
-| `half` | `False` | Enables FP16 (half-precision) inference, reducing memory usage and possibly increasing speed on compatible hardware. Use `half=True` to enable. |
-| `int8` | `False` | Activates INT8 quantization for further optimized performance on supported devices, especially useful for edge devices. Set `int8=True` to use. |
-| `device` | `None` | Defines the computation device(s) for benchmarking, such as `"cpu"`, `"cuda:0"`, or a list of devices like `"cuda:0,1"` for multi-GPU setups. |
-| `verbose` | `False` | Controls the level of detail in logging output. A boolean value; set `verbose=True` for detailed logs or a float for thresholding errors. |
+| Key | Default Value | Description |
+| --------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model` | `None` | Specifies the path to the model file. Accepts both `.pt` and `.yaml` formats, e.g., `"yolov8n.pt"` for pre-trained models or configuration files. |
+| `data` | `None` | Path to a YAML file defining the dataset for benchmarking, typically including paths and settings for [validation data](https://www.ultralytics.com/glossary/validation-data). Example: `"coco8.yaml"`. |
+| `imgsz` | `640` | The input image size for the model. Can be a single integer for square images or a tuple `(width, height)` for non-square, e.g., `(640, 480)`. |
+| `half` | `False` | Enables FP16 (half-precision) inference, reducing memory usage and possibly increasing speed on compatible hardware. Use `half=True` to enable. |
+| `int8` | `False` | Activates INT8 quantization for further optimized performance on supported devices, especially useful for edge devices. Set `int8=True` to use. |
+| `device` | `None` | Defines the computation device(s) for benchmarking, such as `"cpu"`, `"cuda:0"`, or a list of devices like `"cuda:0,1"` for multi-GPU setups. |
+| `verbose` | `False` | Controls the level of detail in logging output. A boolean value; set `verbose=True` for detailed logs or a float for thresholding errors. |
## Export Formats
@@ -95,7 +95,7 @@ See full `export` details in the [Export](../modes/export.md) page.
### How do I benchmark my YOLOv8 model's performance using Ultralytics?
-Ultralytics YOLOv8 offers a Benchmark mode to assess your model's performance across different export formats. This mode provides insights into key metrics such as mean Average Precision (mAP50-95), accuracy, and inference time in milliseconds. To run benchmarks, you can use either Python or CLI commands. For example, to benchmark on a GPU:
+Ultralytics YOLOv8 offers a Benchmark mode to assess your model's performance across different export formats. This mode provides insights into key metrics such as [mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP50-95), accuracy, and inference time in milliseconds. To run benchmarks, you can use either Python or CLI commands. For example, to benchmark on a GPU:
!!! example
@@ -142,7 +142,7 @@ YOLOv8 supports a variety of export formats, each tailored for specific hardware
- **ONNX:** Best for CPU performance.
- **TensorRT:** Ideal for GPU efficiency.
- **OpenVINO:** Optimized for Intel hardware.
-- **CoreML & TensorFlow:** Useful for iOS and general ML applications.
+- **CoreML & [TensorFlow](https://www.ultralytics.com/glossary/tensorflow):** Useful for iOS and general ML applications.
For a complete list of supported formats and their respective advantages, check out the [Supported Export Formats](#supported-export-formats) section.
### What arguments can I use to fine-tune my YOLOv8 benchmarks?
diff --git a/docs/en/modes/export.md b/docs/en/modes/export.md
index bd14bfd9..706dd91c 100644
--- a/docs/en/modes/export.md
+++ b/docs/en/modes/export.md
@@ -39,14 +39,14 @@ Here are some of the standout functionalities:
- **Optimized Inference:** Exported models are optimized for quicker inference times.
- **Tutorial Videos:** In-depth guides and tutorials for a smooth exporting experience.
-!!! tip "Tip"
+!!! tip
* Export to [ONNX](../integrations/onnx.md) or [OpenVINO](../integrations/openvino.md) for up to 3x CPU speedup.
* Export to [TensorRT](../integrations/tensorrt.md) for up to 5x GPU speedup.
## Usage Examples
-Export a YOLOv8n model to a different format like ONNX or TensorRT. See Arguments section below for a full list of export arguments.
+Export a YOLOv8n model to a different format like ONNX or TensorRT. See the Arguments section below for a full list of export arguments.
!!! example
@@ -76,7 +76,7 @@ This table details the configurations and options available for exporting YOLO m
{% include "macros/export-args.md" %}
-Adjusting these parameters allows for customization of the export process to fit specific requirements, such as deployment environment, hardware constraints, and performance targets. Selecting the appropriate format and settings is essential for achieving the best balance between model size, speed, and accuracy.
+Adjusting these parameters allows for customization of the export process to fit specific requirements, such as deployment environment, hardware constraints, and performance targets. Selecting the appropriate format and settings is essential for achieving the best balance between model size, speed, and [accuracy](https://www.ultralytics.com/glossary/accuracy).
## Export Formats
@@ -112,7 +112,7 @@ Exporting a YOLOv8 model to ONNX format is straightforward with Ultralytics. It
yolo export model=path/to/best.pt format=onnx # export custom trained model
```
-For more details on the process, including advanced options like handling different input sizes, refer to the [ONNX](../integrations/onnx.md) section.
+For more details on the process, including advanced options like handling different input sizes, refer to the [ONNX section](../integrations/onnx.md).
### What are the benefits of using TensorRT for model export?
@@ -122,7 +122,7 @@ Using TensorRT for model export offers significant performance improvements. YOL
- **Speed:** Achieve faster inference through advanced optimizations.
- **Compatibility:** Integrate smoothly with NVIDIA hardware.
-To learn more about integrating TensorRT, see the [TensorRT](../integrations/tensorrt.md) integration guide.
+To learn more about integrating TensorRT, see the [TensorRT integration guide](../integrations/tensorrt.md).
### How do I enable INT8 quantization when exporting my YOLOv8 model?
@@ -145,7 +145,7 @@ INT8 quantization is an excellent way to compress the model and speed up inferen
yolo export model=yolov8n.pt format=onnx int8=True # export model with INT8 quantization
```
-INT8 quantization can be applied to various formats, such as TensorRT and CoreML. More details can be found in the [Export](../modes/export.md) section.
+INT8 quantization can be applied to various formats, such as TensorRT and CoreML. More details can be found in the [Export section](../modes/export.md).
### Why is dynamic input size important when exporting models?
@@ -182,4 +182,4 @@ Understanding and configuring export arguments is crucial for optimizing model p
- **`optimize:`** Applies specific optimizations for mobile or constrained environments.
- **`int8:`** Enables INT8 quantization, highly beneficial for edge deployments.
-For a detailed list and explanations of all the export arguments, visit the [Export Arguments](#arguments) section.
+For a detailed list and explanations of all the export arguments, visit the [Export Arguments section](#arguments).
diff --git a/docs/en/modes/index.md b/docs/en/modes/index.md
index 270ce4a8..2b56680e 100644
--- a/docs/en/modes/index.md
+++ b/docs/en/modes/index.md
@@ -10,7 +10,7 @@ keywords: Ultralytics, YOLOv8, machine learning, model training, validation, pre
## Introduction
-Ultralytics YOLOv8 is not just another object detection model; it's a versatile framework designed to cover the entire lifecycle of machine learning models—from data ingestion and model training to validation, deployment, and real-world tracking. Each mode serves a specific purpose and is engineered to offer you the flexibility and efficiency required for different tasks and use-cases.
+Ultralytics YOLOv8 is not just another object detection model; it's a versatile framework designed to cover the entire lifecycle of [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models—from data ingestion and model training to validation, deployment, and real-world tracking. Each mode serves a specific purpose and is engineered to offer you the flexibility and efficiency required for different tasks and use-cases.
@@ -30,7 +30,7 @@ Understanding the different **modes** that Ultralytics YOLOv8 supports is critic
- **Train** mode: Fine-tune your model on custom or preloaded datasets.
- **Val** mode: A post-training checkpoint to validate model performance.
- **Predict** mode: Unleash the predictive power of your model on real-world data.
-- **Export** mode: Make your model deployment-ready in various formats.
+- **Export** mode: Make your [model deployment](https://www.ultralytics.com/glossary/model-deployment)-ready in various formats.
- **Track** mode: Extend your object detection model into real-time tracking applications.
- **Benchmark** mode: Analyze the speed and accuracy of your model in diverse deployment environments.
@@ -68,13 +68,13 @@ Track mode is used for tracking objects in real-time using a YOLOv8 model. In th
## [Benchmark](benchmark.md)
-Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation and pose) or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for their specific use case based on their requirements for speed and accuracy.
+Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation, and pose) or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various formats like ONNX, OpenVINO, TensorRT, and others. This information can help users choose the optimal export format for their specific use case based on their requirements for speed and accuracy.
[Benchmark Examples](benchmark.md){ .md-button }
## FAQ
-### How do I train a custom object detection model with Ultralytics YOLOv8?
+### How do I train a custom [object detection](https://www.ultralytics.com/glossary/object-detection) model with Ultralytics YOLOv8?
Training a custom object detection model with Ultralytics YOLOv8 involves using the train mode. You need a dataset formatted in YOLO format, containing images and corresponding annotation files. Use the following command to start the training process:
@@ -104,7 +104,7 @@ Ultralytics YOLOv8 uses various metrics during the validation process to assess
- **mAP (mean Average Precision)**: This evaluates the accuracy of object detection.
- **IOU (Intersection over Union)**: Measures the overlap between predicted and ground truth bounding boxes.
-- **Precision and Recall**: Precision measures the ratio of true positive detections to the total detected positives, while recall measures the ratio of true positive detections to the total actual positives.
+- **[Precision](https://www.ultralytics.com/glossary/precision) and [Recall](https://www.ultralytics.com/glossary/recall)**: Precision measures the ratio of true positive detections to the total detected positives, while recall measures the ratio of true positive detections to the total actual positives.
You can run the following command to start the validation:
@@ -154,7 +154,7 @@ Detailed steps for each export format can be found in the [Export Guide](../mode
### What is the purpose of the benchmark mode in Ultralytics YOLOv8?
-Benchmark mode in Ultralytics YOLOv8 is used to analyze the speed and accuracy of various export formats such as ONNX, TensorRT, and OpenVINO. It provides metrics like model size, `mAP50-95` for object detection, and inference time across different hardware setups, helping you choose the most suitable format for your deployment needs.
+Benchmark mode in Ultralytics YOLOv8 is used to analyze the speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) of various export formats such as ONNX, TensorRT, and OpenVINO. It provides metrics like model size, `mAP50-95` for object detection, and inference time across different hardware setups, helping you choose the most suitable format for your deployment needs.
!!! example
diff --git a/docs/en/modes/predict.md b/docs/en/modes/predict.md
index f89780f8..196d9e20 100644
--- a/docs/en/modes/predict.md
+++ b/docs/en/modes/predict.md
@@ -10,7 +10,7 @@ keywords: Ultralytics, YOLOv8, model prediction, inference, predict mode, real-t
## Introduction
-In the world of machine learning and computer vision, the process of making sense out of visual data is called 'inference' or 'prediction'. Ultralytics YOLOv8 offers a powerful feature known as **predict mode** that is tailored for high-performance, real-time inference on a wide range of data sources.
+In the world of [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv), the process of making sense out of visual data is called 'inference' or 'prediction'. Ultralytics YOLOv8 offers a powerful feature known as **predict mode** that is tailored for high-performance, real-time inference on a wide range of data sources.
@@ -35,7 +35,7 @@ In the world of machine learning and computer vision, the process of making sens
Here's why you should consider YOLOv8's predict mode for your various inference needs:
- **Versatility:** Capable of making inferences on images, videos, and even live streams.
-- **Performance:** Engineered for real-time, high-speed processing without sacrificing accuracy.
+- **Performance:** Engineered for real-time, high-speed processing without sacrificing [accuracy](https://www.ultralytics.com/glossary/accuracy).
- **Ease of Use:** Intuitive Python and CLI interfaces for rapid deployment and testing.
- **Highly Customizable:** Various settings and parameters to tune the model's inference behavior according to your specific requirements.
@@ -61,7 +61,7 @@ Ultralytics YOLO models return either a Python list of `Results` objects, or a m
model = YOLO("yolov8n.pt") # pretrained YOLOv8n model
# Run batched inference on a list of images
- results = model(["im1.jpg", "im2.jpg"]) # return a list of Results objects
+ results = model(["image1.jpg", "image2.jpg"]) # return a list of Results objects
# Process results list
for result in results:
@@ -83,7 +83,7 @@ Ultralytics YOLO models return either a Python list of `Results` objects, or a m
model = YOLO("yolov8n.pt") # pretrained YOLOv8n model
# Run batched inference on a list of images
- results = model(["im1.jpg", "im2.jpg"], stream=True) # return a generator of Results objects
+ results = model(["image1.jpg", "image2.jpg"], stream=True) # return a generator of Results objects
# Process results generator
for result in results:
@@ -100,26 +100,26 @@ Ultralytics YOLO models return either a Python list of `Results` objects, or a m
YOLOv8 can process different types of input sources for inference, as shown in the table below. The sources include static images, video streams, and various data formats. The table also indicates whether each source can be used in streaming mode with the argument `stream=True` ✅. Streaming mode is beneficial for processing videos or live streams as it creates a generator of results instead of loading all frames into memory.
-!!! tip "Tip"
+!!! tip
Use `stream=True` for processing long videos or large datasets to efficiently manage memory. When `stream=False`, the results for all frames or data points are stored in memory, which can quickly add up and cause out-of-memory errors for large inputs. In contrast, `stream=True` utilizes a generator, which only keeps the results of the current frame or data point in memory, significantly reducing memory consumption and preventing out-of-memory issues.
-| Source | Example | Type | Notes |
-| --------------- | ------------------------------------------ | --------------- | ------------------------------------------------------------------------------------------- |
-| image | `'image.jpg'` | `str` or `Path` | Single image file. |
-| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | URL to an image. |
-| screenshot | `'screen'` | `str` | Capture a screenshot. |
-| PIL | `Image.open('im.jpg')` | `PIL.Image` | HWC format with RGB channels. |
-| OpenCV | `cv2.imread('im.jpg')` | `np.ndarray` | HWC format with BGR channels `uint8 (0-255)`. |
-| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | HWC format with BGR channels `uint8 (0-255)`. |
-| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | BCHW format with RGB channels `float32 (0.0-1.0)`. |
-| CSV | `'sources.csv'` | `str` or `Path` | CSV file containing paths to images, videos, or directories. |
-| video ✅ | `'video.mp4'` | `str` or `Path` | Video file in formats like MP4, AVI, etc. |
-| directory ✅ | `'path/'` | `str` or `Path` | Path to a directory containing images or videos. |
-| glob ✅ | `'path/*.jpg'` | `str` | Glob pattern to match multiple files. Use the `*` character as a wildcard. |
-| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | URL to a YouTube video. |
-| stream ✅ | `'rtsp://example.com/media.mp4'` | `str` | URL for streaming protocols such as RTSP, RTMP, TCP, or an IP address. |
-| multi-stream ✅ | `'list.streams'` | `str` or `Path` | `*.streams` text file with one stream URL per row, i.e. 8 streams will run at batch-size 8. |
+| Source | Example | Type | Notes |
+| ----------------------------------------------------- | ------------------------------------------ | --------------- | ------------------------------------------------------------------------------------------- |
+| image | `'image.jpg'` | `str` or `Path` | Single image file. |
+| URL | `'https://ultralytics.com/images/bus.jpg'` | `str` | URL to an image. |
+| screenshot | `'screen'` | `str` | Capture a screenshot. |
+| PIL | `Image.open('image.jpg')` | `PIL.Image` | HWC format with RGB channels. |
+| [OpenCV](https://www.ultralytics.com/glossary/opencv) | `cv2.imread('image.jpg')` | `np.ndarray` | HWC format with BGR channels `uint8 (0-255)`. |
+| numpy | `np.zeros((640,1280,3))` | `np.ndarray` | HWC format with BGR channels `uint8 (0-255)`. |
+| torch | `torch.zeros(16,3,320,640)` | `torch.Tensor` | BCHW format with RGB channels `float32 (0.0-1.0)`. |
+| CSV | `'sources.csv'` | `str` or `Path` | CSV file containing paths to images, videos, or directories. |
+| video ✅ | `'video.mp4'` | `str` or `Path` | Video file in formats like MP4, AVI, etc. |
+| directory ✅ | `'path/'` | `str` or `Path` | Path to a directory containing images or videos. |
+| glob ✅ | `'path/*.jpg'` | `str` | Glob pattern to match multiple files. Use the `*` character as a wildcard. |
+| YouTube ✅ | `'https://youtu.be/LNwODJXcvt4'` | `str` | URL to a YouTube video. |
+| stream ✅ | `'rtsp://example.com/media.mp4'` | `str` | URL for streaming protocols such as RTSP, RTMP, TCP, or an IP address. |
+| multi-stream ✅ | `'list.streams'` | `str` or `Path` | `*.streams` text file with one stream URL per row, i.e. 8 streams will run at batch-size 8. |
Below are code examples for using each source type:
@@ -229,7 +229,7 @@ Below are code examples for using each source type:
=== "torch"
- Run inference on an image represented as a PyTorch tensor.
+ Run inference on an image represented as a [PyTorch](https://www.ultralytics.com/glossary/pytorch) tensor.
```python
import torch
@@ -328,9 +328,10 @@ Below are code examples for using each source type:
results = model(source, stream=True) # generator of Results objects
```
- === "Streams"
+ === "Stream"
+
+ Use the stream mode to run inference on live video streams using RTSP, RTMP, TCP, or IP address protocols. If a single stream is provided, the model runs inference with a [batch size](https://www.ultralytics.com/glossary/batch-size) of 1. For multiple streams, a `.streams` text file can be used to perform batched inference, where the batch size is determined by the number of streams provided (e.g., batch-size 8 for 8 streams).
- Run inference on remote streaming sources using RTSP, RTMP, TCP and IP address protocols. If multiple streams are provided in a `*.streams` text file then batched inference will run, i.e. 8 streams will run at batch-size 8, otherwise single streams will run at batch-size 1.
```python
from ultralytics import YOLO
@@ -338,15 +339,43 @@ Below are code examples for using each source type:
model = YOLO("yolov8n.pt")
# Single stream with batch-size 1 inference
- source = "rtsp://example.com/media.mp4" # RTSP, RTMP, TCP or IP streaming address
+ source = "rtsp://example.com/media.mp4" # RTSP, RTMP, TCP, or IP streaming address
+
+ # Run inference on the source
+ results = model(source, stream=True) # generator of Results objects
+ ```
+
+ For single stream usage, the batch size is set to 1 by default, allowing efficient real-time processing of the video feed.
+
+ === "Multi-Stream"
+
+ To handle multiple video streams simultaneously, use a `.streams` text file containing the streaming sources. The model will run batched inference where the batch size equals the number of streams. This setup enables efficient processing of multiple feeds concurrently.
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a pretrained YOLOv8n model
+ model = YOLO("yolov8n.pt")
- # Multiple streams with batched inference (i.e. batch-size 8 for 8 streams)
- source = "path/to/list.streams" # *.streams text file with one streaming address per row
+ # Multiple streams with batched inference (e.g., batch-size 8 for 8 streams)
+ source = "path/to/list.streams" # *.streams text file with one streaming address per line
# Run inference on the source
results = model(source, stream=True) # generator of Results objects
```
+ Example `.streams` text file:
+
+ ```txt
+ rtsp://example.com/media1.mp4
+ rtsp://example.com/media2.mp4
+ rtmp://example2.com/live
+ tcp://192.168.1.100:554
+ ...
+ ```
+
+ Each row in the file represents a streaming source, allowing you to monitor and perform inference on several video streams at once.
+
## Inference Arguments
`model.predict()` accepts multiple arguments that can be passed at inference time to override defaults:
@@ -365,38 +394,11 @@ Below are code examples for using each source type:
Inference arguments:
-| Argument | Type | Default | Description |
-| --------------- | -------------- | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `source` | `str` | `'ultralytics/assets'` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. Supports a wide range of formats and sources, enabling flexible application across different types of input. |
-| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. |
-| `iou` | `float` | `0.7` | Intersection Over Union (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. |
-| `imgsz` | `int or tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection accuracy and processing speed. |
-| `half` | `bool` | `False` | Enables half-precision (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. |
-| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0` or `0`). Allows users to select between CPU, a specific GPU, or other compute devices for model execution. |
-| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. |
-| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. |
-| `stream_buffer` | `bool` | `False` | Determines if all frames should be buffered when processing video streams (`True`), or if the model should return the most recent frame (`False`). Useful for real-time applications. |
-| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. |
-| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. |
-| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. |
-| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. |
-| `retina_masks` | `bool` | `False` | Uses high-resolution segmentation masks if available in the model. This can enhance mask quality for segmentation tasks, providing finer detail. |
-| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or embeddings. Useful for downstream tasks like clustering or similarity search. |
+{% include "macros/predict-args.md" %}
Visualization arguments:
-| Argument | Type | Default | Description |
-| ------------- | --------------- | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `show` | `bool` | `False` | If `True`, displays the annotated images or videos in a window. Useful for immediate visual feedback during development or testing. |
-| `save` | `bool` | `False` or `True` | Enables saving of the annotated images or videos to file. Useful for documentation, further analysis, or sharing results. Defaults to True when using CLI & False when used in Python. |
-| `save_frames` | `bool` | `False` | When processing videos, saves individual frames as images. Useful for extracting specific frames or for detailed frame-by-frame analysis. |
-| `save_txt` | `bool` | `False` | Saves detection results in a text file, following the format `[class] [x_center] [y_center] [width] [height] [confidence]`. Useful for integration with other analysis tools. |
-| `save_conf` | `bool` | `False` | Includes confidence scores in the saved text files. Enhances the detail available for post-processing and analysis. |
-| `save_crop` | `bool` | `False` | Saves cropped images of detections. Useful for dataset augmentation, analysis, or creating focused datasets for specific objects. |
-| `show_labels` | `bool` | `True` | Displays labels for each detection in the visual output. Provides immediate understanding of detected objects. |
-| `show_conf` | `bool` | `True` | Displays the confidence score for each detection alongside the label. Gives insight into the model's certainty for each detection. |
-| `show_boxes` | `bool` | `True` | Draws bounding boxes around detected objects. Essential for visual identification and location of objects in images or video frames. |
-| `line_width` | `None` or `int` | `None` | Specifies the line width of bounding boxes. If `None`, the line width is automatically adjusted based on the image size. Provides visual customization for clarity. |
+{% include "macros/visualization-args.md" %}
## Image and Video Formats
@@ -737,16 +739,16 @@ When using YOLO models in a multi-threaded application, it's important to instan
from ultralytics import YOLO
- def thread_safe_predict(image_path):
+ def thread_safe_predict(model, image_path):
"""Performs thread-safe prediction on an image using a locally instantiated YOLO model."""
- local_model = YOLO("yolov8n.pt")
- results = local_model.predict(image_path)
+ model = YOLO(model)
+ results = model.predict(image_path)
# Process results
# Starting threads that each have their own model instance
- Thread(target=thread_safe_predict, args=("image1.jpg",)).start()
- Thread(target=thread_safe_predict, args=("image2.jpg",)).start()
+ Thread(target=thread_safe_predict, args=("yolov8n.pt", "image1.jpg")).start()
+ Thread(target=thread_safe_predict, args=("yolov8n.pt", "image2.jpg")).start()
```
For an in-depth look at thread-safe inference with YOLO models and step-by-step instructions, please refer to our [YOLO Thread-Safe Inference Guide](../guides/yolo-thread-safe-inference.md). This guide will provide you with all the necessary information to avoid common pitfalls and ensure that your multi-threaded inference runs smoothly.
@@ -806,7 +808,7 @@ This script will run predictions on each frame of the video, visualize the resul
### What is Ultralytics YOLOv8 and its predict mode for real-time inference?
-Ultralytics YOLOv8 is a state-of-the-art model for real-time object detection, segmentation, and classification. Its **predict mode** allows users to perform high-speed inference on various data sources such as images, videos, and live streams. Designed for performance and versatility, it also offers batch processing and streaming modes. For more details on its features, check out the [Ultralytics YOLOv8 predict mode](#key-features-of-predict-mode).
+Ultralytics YOLOv8 is a state-of-the-art model for real-time [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and classification. Its **predict mode** allows users to perform high-speed inference on various data sources such as images, videos, and live streams. Designed for performance and versatility, it also offers batch processing and streaming modes. For more details on its features, check out the [Ultralytics YOLOv8 predict mode](#key-features-of-predict-mode).
### How can I run inference using Ultralytics YOLOv8 on different data sources?
diff --git a/docs/en/modes/track.md b/docs/en/modes/track.md
index cfeb8c90..46c43b0b 100644
--- a/docs/en/modes/track.md
+++ b/docs/en/modes/track.md
@@ -12,9 +12,9 @@ Object tracking in the realm of video analytics is a critical task that not only
## Why Choose Ultralytics YOLO for Object Tracking?
-The output from Ultralytics trackers is consistent with standard object detection but has the added value of object IDs. This makes it easy to track objects in video streams and perform subsequent analytics. Here's why you should consider using Ultralytics YOLO for your object tracking needs:
+The output from Ultralytics trackers is consistent with standard [object detection](https://www.ultralytics.com/glossary/object-detection) but has the added value of object IDs. This makes it easy to track objects in video streams and perform subsequent analytics. Here's why you should consider using Ultralytics YOLO for your object tracking needs:
-- **Efficiency:** Process video streams in real-time without compromising accuracy.
+- **Efficiency:** Process video streams in real-time without compromising [accuracy](https://www.ultralytics.com/glossary/accuracy).
- **Flexibility:** Supports multiple tracking algorithms and configurations.
- **Ease of Use:** Simple Python API and CLI options for quick integration and deployment.
- **Customizability:** Easy to use with custom trained YOLO models, allowing integration into domain-specific applications.
@@ -153,7 +153,7 @@ For a comprehensive list of tracking arguments, refer to the [ultralytics/cfg/tr
### Persisting Tracks Loop
-Here is a Python script using OpenCV (`cv2`) and YOLOv8 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). The `persist=True` argument tells the tracker that the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image.
+Here is a Python script using [OpenCV](https://www.ultralytics.com/glossary/opencv) (`cv2`) and YOLOv8 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). The `persist=True` argument tells the tracker that the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image.
!!! example "Streaming for-loop with tracking"
@@ -290,63 +290,35 @@ Finally, after all threads have completed their task, the windows displaying the
from ultralytics import YOLO
+ # Define model names and video sources
+ MODEL_NAMES = ["yolov8n.pt", "yolov8n-seg.pt"]
+ SOURCES = ["path/to/video.mp4", "0"] # local video, 0 for webcam
- def run_tracker_in_thread(filename, model, file_index):
- """
- Runs a video file or webcam stream concurrently with the YOLOv8 model using threading.
- This function captures video frames from a given file or camera source and utilizes the YOLOv8 model for object
- tracking. The function runs in its own thread for concurrent processing.
+ def run_tracker_in_thread(model_name, filename):
+ """
+ Run YOLO tracker in its own thread for concurrent processing.
Args:
+ model_name (str): The YOLOv8 model object.
filename (str): The path to the video file or the identifier for the webcam/external camera source.
- model (obj): The YOLOv8 model object.
- file_index (int): An index to uniquely identify the file being processed, used for display purposes.
-
- Note:
- Press 'q' to quit the video display window.
"""
- video = cv2.VideoCapture(filename) # Read the video file
-
- while True:
- ret, frame = video.read() # Read the video frames
-
- # Exit the loop if no more frames in either video
- if not ret:
- break
-
- # Track objects in frames if available
- results = model.track(frame, persist=True)
- res_plotted = results[0].plot()
- cv2.imshow(f"Tracking_Stream_{file_index}", res_plotted)
-
- key = cv2.waitKey(1)
- if key == ord("q"):
- break
-
- # Release video sources
- video.release()
-
+ model = YOLO(model_name)
+ results = model.track(filename, save=True, stream=True)
+ for r in results:
+ pass
- # Load the models
- model1 = YOLO("yolov8n.pt")
- model2 = YOLO("yolov8n-seg.pt")
- # Define the video files for the trackers
- video_file1 = "path/to/video1.mp4" # Path to video file, 0 for webcam
- video_file2 = 0 # Path to video file, 0 for webcam, 1 for external camera
+ # Create and start tracker threads using a for loop
+ tracker_threads = []
+ for video_file, model_name in zip(SOURCES, MODEL_NAMES):
+ thread = threading.Thread(target=run_tracker_in_thread, args=(model_name, video_file), daemon=True)
+ tracker_threads.append(thread)
+ thread.start()
- # Create the tracker threads
- tracker_thread1 = threading.Thread(target=run_tracker_in_thread, args=(video_file1, model1, 1), daemon=True)
- tracker_thread2 = threading.Thread(target=run_tracker_in_thread, args=(video_file2, model2, 2), daemon=True)
-
- # Start the tracker threads
- tracker_thread1.start()
- tracker_thread2.start()
-
- # Wait for the tracker threads to finish
- tracker_thread1.join()
- tracker_thread2.join()
+ # Wait for all tracker threads to finish
+ for thread in tracker_threads:
+ thread.join()
# Clean up and close windows
cv2.destroyAllWindows()
@@ -408,35 +380,37 @@ To run object tracking on multiple video streams simultaneously, you can use Pyt
from ultralytics import YOLO
+ # Define model names and video sources
+ MODEL_NAMES = ["yolov8n.pt", "yolov8n-seg.pt"]
+ SOURCES = ["path/to/video.mp4", "0"] # local video, 0 for webcam
- def run_tracker_in_thread(filename, model, file_index):
- video = cv2.VideoCapture(filename)
- while True:
- ret, frame = video.read()
- if not ret:
- break
- results = model.track(frame, persist=True)
- res_plotted = results[0].plot()
- cv2.imshow(f"Tracking_Stream_{file_index}", res_plotted)
- if cv2.waitKey(1) & 0xFF == ord("q"):
- break
- video.release()
+ def run_tracker_in_thread(model_name, filename):
+ """
+ Run YOLO tracker in its own thread for concurrent processing.
- model1 = YOLO("yolov8n.pt")
- model2 = YOLO("yolov8n-seg.pt")
- video_file1 = "path/to/video1.mp4"
- video_file2 = 0 # Path to a second video file, or 0 for a webcam
+ Args:
+ model_name (str): The YOLOv8 model object.
+ filename (str): The path to the video file or the identifier for the webcam/external camera source.
+ """
+ model = YOLO(model_name)
+ results = model.track(filename, save=True, stream=True)
+ for r in results:
+ pass
- tracker_thread1 = threading.Thread(target=run_tracker_in_thread, args=(video_file1, model1, 1), daemon=True)
- tracker_thread2 = threading.Thread(target=run_tracker_in_thread, args=(video_file2, model2, 2), daemon=True)
- tracker_thread1.start()
- tracker_thread2.start()
+ # Create and start tracker threads using a for loop
+ tracker_threads = []
+ for video_file, model_name in zip(SOURCES, MODEL_NAMES):
+ thread = threading.Thread(target=run_tracker_in_thread, args=(model_name, video_file), daemon=True)
+ tracker_threads.append(thread)
+ thread.start()
- tracker_thread1.join()
- tracker_thread2.join()
+ # Wait for all tracker threads to finish
+ for thread in tracker_threads:
+ thread.join()
+ # Clean up and close windows
cv2.destroyAllWindows()
```
diff --git a/docs/en/modes/train.md b/docs/en/modes/train.md
index 4e5fcb05..f5722b72 100644
--- a/docs/en/modes/train.md
+++ b/docs/en/modes/train.md
@@ -10,7 +10,7 @@ keywords: Ultralytics, YOLOv8, model training, deep learning, object detection,
## Introduction
-Training a deep learning model involves feeding it data and adjusting its parameters so that it can make accurate predictions. Train mode in Ultralytics YOLOv8 is engineered for effective and efficient training of object detection models, fully utilizing modern hardware capabilities. This guide aims to cover all the details you need to get started with training your own models using YOLOv8's robust set of features.
+Training a [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) model involves feeding it data and adjusting its parameters so that it can make accurate predictions. Train mode in Ultralytics YOLOv8 is engineered for effective and efficient training of object detection models, fully utilizing modern hardware capabilities. This guide aims to cover all the details you need to get started with training your own models using YOLOv8's robust set of features.
@@ -41,13 +41,13 @@ The following are some notable features of YOLOv8's Train mode:
- **Hyperparameter Configuration:** The option to modify hyperparameters through YAML configuration files or CLI arguments.
- **Visualization and Monitoring:** Real-time tracking of training metrics and visualization of the learning process for better insights.
-!!! tip "Tip"
+!!! tip
* YOLOv8 datasets like COCO, VOC, ImageNet and many others automatically download on first use, i.e. `yolo train data=coco.yaml`
## Usage Examples
-Train YOLOv8n on the COCO8 dataset for 100 epochs at image size 640. The training device can be specified using the `device` argument. If no argument is passed GPU `device=0` will be used if available, otherwise `device='cpu'` will be used. See Arguments section below for a full list of training arguments.
+Train YOLOv8n on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. The training device can be specified using the `device` argument. If no argument is passed GPU `device=0` will be used if available, otherwise `device='cpu'` will be used. See Arguments section below for a full list of training arguments.
!!! example "Single-GPU and CPU Training Example"
@@ -123,14 +123,14 @@ To enable training on Apple M1 and M2 chips, you should specify 'mps' as your de
# Load a model
model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
- # Train the model with 2 GPUs
+ # Train the model with MPS
results = model.train(data="coco8.yaml", epochs=100, imgsz=640, device="mps")
```
=== "CLI"
```bash
- # Start training from a pretrained *.pt model using GPUs 0 and 1
+ # Start training from a pretrained *.pt model using MPS
yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps
```
@@ -140,7 +140,7 @@ While leveraging the computational power of the M1/M2 chips, this enables more e
Resuming training from a previously saved state is a crucial feature when working with deep learning models. This can come in handy in various scenarios, like when the training process has been unexpectedly interrupted, or when you wish to continue training a model with new data or for more epochs.
-When training is resumed, Ultralytics YOLO loads the weights from the last saved model and also restores the optimizer state, learning rate scheduler, and the epoch number. This allows you to continue the training process seamlessly from where it was left off.
+When training is resumed, Ultralytics YOLO loads the weights from the last saved model and also restores the optimizer state, [learning rate](https://www.ultralytics.com/glossary/learning-rate) scheduler, and the epoch number. This allows you to continue the training process seamlessly from where it was left off.
You can easily resume training in Ultralytics YOLO by setting the `resume` argument to `True` when calling the `train` method, and specifying the path to the `.pt` file containing the partially trained model weights.
@@ -169,94 +169,27 @@ Below is an example of how to resume an interrupted training using Python and vi
By setting `resume=True`, the `train` function will continue training from where it left off, using the state stored in the 'path/to/last.pt' file. If the `resume` argument is omitted or set to `False`, the `train` function will start a new training session.
-Remember that checkpoints are saved at the end of every epoch by default, or at fixed interval using the `save_period` argument, so you must complete at least 1 epoch to resume a training run.
+Remember that checkpoints are saved at the end of every epoch by default, or at fixed intervals using the `save_period` argument, so you must complete at least 1 epoch to resume a training run.
## Train Settings
-The training settings for YOLO models encompass various hyperparameters and configurations used during the training process. These settings influence the model's performance, speed, and accuracy. Key training settings include batch size, learning rate, momentum, and weight decay. Additionally, the choice of optimizer, loss function, and training dataset composition can impact the training process. Careful tuning and experimentation with these settings are crucial for optimizing performance.
-
-| Argument | Default | Description |
-| ----------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model` | `None` | Specifies the model file for training. Accepts a path to either a `.pt` pretrained model or a `.yaml` configuration file. Essential for defining the model structure or initializing weights. |
-| `data` | `None` | Path to the dataset configuration file (e.g., `coco8.yaml`). This file contains dataset-specific parameters, including paths to training and validation data, class names, and number of classes. |
-| `epochs` | `100` | Total number of training epochs. Each epoch represents a full pass over the entire dataset. Adjusting this value can affect training duration and model performance. |
-| `time` | `None` | Maximum training time in hours. If set, this overrides the `epochs` argument, allowing training to automatically stop after the specified duration. Useful for time-constrained training scenarios. |
-| `patience` | `100` | Number of epochs to wait without improvement in validation metrics before early stopping the training. Helps prevent overfitting by stopping training when performance plateaus. |
-| `batch` | `16` | Batch size, with three modes: set as an integer (e.g., `batch=16`), auto mode for 60% GPU memory utilization (`batch=-1`), or auto mode with specified utilization fraction (`batch=0.70`). |
-| `imgsz` | `640` | Target image size for training. All images are resized to this dimension before being fed into the model. Affects model accuracy and computational complexity. |
-| `save` | `True` | Enables saving of training checkpoints and final model weights. Useful for resuming training or model deployment. |
-| `save_period` | `-1` | Frequency of saving model checkpoints, specified in epochs. A value of -1 disables this feature. Useful for saving interim models during long training sessions. |
-| `cache` | `False` | Enables caching of dataset images in memory (`True`/`ram`), on disk (`disk`), or disables it (`False`). Improves training speed by reducing disk I/O at the cost of increased memory usage. |
-| `device` | `None` | Specifies the computational device(s) for training: a single GPU (`device=0`), multiple GPUs (`device=0,1`), CPU (`device=cpu`), or MPS for Apple silicon (`device=mps`). |
-| `workers` | `8` | Number of worker threads for data loading (per `RANK` if Multi-GPU training). Influences the speed of data preprocessing and feeding into the model, especially useful in multi-GPU setups. |
-| `project` | `None` | Name of the project directory where training outputs are saved. Allows for organized storage of different experiments. |
-| `name` | `None` | Name of the training run. Used for creating a subdirectory within the project folder, where training logs and outputs are stored. |
-| `exist_ok` | `False` | If True, allows overwriting of an existing project/name directory. Useful for iterative experimentation without needing to manually clear previous outputs. |
-| `pretrained` | `True` | Determines whether to start training from a pretrained model. Can be a boolean value or a string path to a specific model from which to load weights. Enhances training efficiency and model performance. |
-| `optimizer` | `'auto'` | Choice of optimizer for training. Options include `SGD`, `Adam`, `AdamW`, `NAdam`, `RAdam`, `RMSProp` etc., or `auto` for automatic selection based on model configuration. Affects convergence speed and stability. |
-| `verbose` | `False` | Enables verbose output during training, providing detailed logs and progress updates. Useful for debugging and closely monitoring the training process. |
-| `seed` | `0` | Sets the random seed for training, ensuring reproducibility of results across runs with the same configurations. |
-| `deterministic` | `True` | Forces deterministic algorithm use, ensuring reproducibility but may affect performance and speed due to the restriction on non-deterministic algorithms. |
-| `single_cls` | `False` | Treats all classes in multi-class datasets as a single class during training. Useful for binary classification tasks or when focusing on object presence rather than classification. |
-| `rect` | `False` | Enables rectangular training, optimizing batch composition for minimal padding. Can improve efficiency and speed but may affect model accuracy. |
-| `cos_lr` | `False` | Utilizes a cosine learning rate scheduler, adjusting the learning rate following a cosine curve over epochs. Helps in managing learning rate for better convergence. |
-| `close_mosaic` | `10` | Disables mosaic data augmentation in the last N epochs to stabilize training before completion. Setting to 0 disables this feature. |
-| `resume` | `False` | Resumes training from the last saved checkpoint. Automatically loads model weights, optimizer state, and epoch count, continuing training seamlessly. |
-| `amp` | `True` | Enables Automatic Mixed Precision (AMP) training, reducing memory usage and possibly speeding up training with minimal impact on accuracy. |
-| `fraction` | `1.0` | Specifies the fraction of the dataset to use for training. Allows for training on a subset of the full dataset, useful for experiments or when resources are limited. |
-| `profile` | `False` | Enables profiling of ONNX and TensorRT speeds during training, useful for optimizing model deployment. |
-| `freeze` | `None` | Freezes the first N layers of the model or specified layers by index, reducing the number of trainable parameters. Useful for fine-tuning or transfer learning. |
-| `lr0` | `0.01` | Initial learning rate (i.e. `SGD=1E-2`, `Adam=1E-3`) . Adjusting this value is crucial for the optimization process, influencing how rapidly model weights are updated. |
-| `lrf` | `0.01` | Final learning rate as a fraction of the initial rate = (`lr0 * lrf`), used in conjunction with schedulers to adjust the learning rate over time. |
-| `momentum` | `0.937` | Momentum factor for SGD or beta1 for Adam optimizers, influencing the incorporation of past gradients in the current update. |
-| `weight_decay` | `0.0005` | L2 regularization term, penalizing large weights to prevent overfitting. |
-| `warmup_epochs` | `3.0` | Number of epochs for learning rate warmup, gradually increasing the learning rate from a low value to the initial learning rate to stabilize training early on. |
-| `warmup_momentum` | `0.8` | Initial momentum for warmup phase, gradually adjusting to the set momentum over the warmup period. |
-| `warmup_bias_lr` | `0.1` | Learning rate for bias parameters during the warmup phase, helping stabilize model training in the initial epochs. |
-| `box` | `7.5` | Weight of the box loss component in the loss function, influencing how much emphasis is placed on accurately predicting bounding box coordinates. |
-| `cls` | `0.5` | Weight of the classification loss in the total loss function, affecting the importance of correct class prediction relative to other components. |
-| `dfl` | `1.5` | Weight of the distribution focal loss, used in certain YOLO versions for fine-grained classification. |
-| `pose` | `12.0` | Weight of the pose loss in models trained for pose estimation, influencing the emphasis on accurately predicting pose keypoints. |
-| `kobj` | `2.0` | Weight of the keypoint objectness loss in pose estimation models, balancing detection confidence with pose accuracy. |
-| `label_smoothing` | `0.0` | Applies label smoothing, softening hard labels to a mix of the target label and a uniform distribution over labels, can improve generalization. |
-| `nbs` | `64` | Nominal batch size for normalization of loss. |
-| `overlap_mask` | `True` | Determines whether segmentation masks should overlap during training, applicable in instance segmentation tasks. |
-| `mask_ratio` | `4` | Downsample ratio for segmentation masks, affecting the resolution of masks used during training. |
-| `dropout` | `0.0` | Dropout rate for regularization in classification tasks, preventing overfitting by randomly omitting units during training. |
-| `val` | `True` | Enables validation during training, allowing for periodic evaluation of model performance on a separate dataset. |
-| `plots` | `False` | Generates and saves plots of training and validation metrics, as well as prediction examples, providing visual insights into model performance and learning progression. |
+The training settings for YOLO models encompass various hyperparameters and configurations used during the training process. These settings influence the model's performance, speed, and [accuracy](https://www.ultralytics.com/glossary/accuracy). Key training settings include batch size, learning rate, momentum, and weight decay. Additionally, the choice of optimizer, [loss function](https://www.ultralytics.com/glossary/loss-function), and training dataset composition can impact the training process. Careful tuning and experimentation with these settings are crucial for optimizing performance.
+
+{% include "macros/train-args.md" %}
!!! info "Note on Batch-size Settings"
The `batch` argument can be configured in three ways:
- - **Fixed Batch Size**: Set an integer value (e.g., `batch=16`), specifying the number of images per batch directly.
+ - **Fixed [Batch Size](https://www.ultralytics.com/glossary/batch-size)**: Set an integer value (e.g., `batch=16`), specifying the number of images per batch directly.
- **Auto Mode (60% GPU Memory)**: Use `batch=-1` to automatically adjust batch size for approximately 60% CUDA memory utilization.
- **Auto Mode with Utilization Fraction**: Set a fraction value (e.g., `batch=0.70`) to adjust batch size based on the specified fraction of GPU memory usage.
## Augmentation Settings and Hyperparameters
-Augmentation techniques are essential for improving the robustness and performance of YOLO models by introducing variability into the training data, helping the model generalize better to unseen data. The following table outlines the purpose and effect of each augmentation argument:
-
-| Argument | Type | Default | Range | Description |
-| --------------- | ------- | ------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `hsv_h` | `float` | `0.015` | `0.0 - 1.0` | Adjusts the hue of the image by a fraction of the color wheel, introducing color variability. Helps the model generalize across different lighting conditions. |
-| `hsv_s` | `float` | `0.7` | `0.0 - 1.0` | Alters the saturation of the image by a fraction, affecting the intensity of colors. Useful for simulating different environmental conditions. |
-| `hsv_v` | `float` | `0.4` | `0.0 - 1.0` | Modifies the value (brightness) of the image by a fraction, helping the model to perform well under various lighting conditions. |
-| `degrees` | `float` | `0.0` | `-180 - +180` | Rotates the image randomly within the specified degree range, improving the model's ability to recognize objects at various orientations. |
-| `translate` | `float` | `0.1` | `0.0 - 1.0` | Translates the image horizontally and vertically by a fraction of the image size, aiding in learning to detect partially visible objects. |
-| `scale` | `float` | `0.5` | `>=0.0` | Scales the image by a gain factor, simulating objects at different distances from the camera. |
-| `shear` | `float` | `0.0` | `-180 - +180` | Shears the image by a specified degree, mimicking the effect of objects being viewed from different angles. |
-| `perspective` | `float` | `0.0` | `0.0 - 0.001` | Applies a random perspective transformation to the image, enhancing the model's ability to understand objects in 3D space. |
-| `flipud` | `float` | `0.0` | `0.0 - 1.0` | Flips the image upside down with the specified probability, increasing the data variability without affecting the object's characteristics. |
-| `fliplr` | `float` | `0.5` | `0.0 - 1.0` | Flips the image left to right with the specified probability, useful for learning symmetrical objects and increasing dataset diversity. |
-| `bgr` | `float` | `0.0` | `0.0 - 1.0` | Flips the image channels from RGB to BGR with the specified probability, useful for increasing robustness to incorrect channel ordering. |
-| `mosaic` | `float` | `1.0` | `0.0 - 1.0` | Combines four training images into one, simulating different scene compositions and object interactions. Highly effective for complex scene understanding. |
-| `mixup` | `float` | `0.0` | `0.0 - 1.0` | Blends two images and their labels, creating a composite image. Enhances the model's ability to generalize by introducing label noise and visual variability. |
-| `copy_paste` | `float` | `0.0` | `0.0 - 1.0` | Copies objects from one image and pastes them onto another, useful for increasing object instances and learning object occlusion. |
-| `auto_augment` | `str` | `randaugment` | - | Automatically applies a predefined augmentation policy (`randaugment`, `autoaugment`, `augmix`), optimizing for classification tasks by diversifying the visual features. |
-| `erasing` | `float` | `0.4` | `0.0 - 0.9` | Randomly erases a portion of the image during classification training, encouraging the model to focus on less obvious features for recognition. |
-| `crop_fraction` | `float` | `1.0` | `0.1 - 1.0` | Crops the classification image to a fraction of its size to emphasize central features and adapt to object scales, reducing background distractions. |
+Augmentation techniques are essential for improving the robustness and performance of YOLO models by introducing variability into the [training data](https://www.ultralytics.com/glossary/training-data), helping the model generalize better to unseen data. The following table outlines the purpose and effect of each augmentation argument:
+
+{% include "macros/augmentation-args.md" %}
These settings can be adjusted to meet the specific requirements of the dataset and task at hand. Experimenting with different values can help find the optimal augmentation strategy that leads to the best model performance.
@@ -310,7 +243,7 @@ After running this script, you will need to sign in to your ClearML account on t
### TensorBoard
-[TensorBoard](https://www.tensorflow.org/tensorboard) is a visualization toolkit for TensorFlow. It allows you to visualize your TensorFlow graph, plot quantitative metrics about the execution of your graph, and show additional data like images that pass through it.
+[TensorBoard](https://www.tensorflow.org/tensorboard) is a visualization toolkit for [TensorFlow](https://www.ultralytics.com/glossary/tensorflow). It allows you to visualize your TensorFlow graph, plot quantitative metrics about the execution of your graph, and show additional data like images that pass through it.
To use TensorBoard in [Google Colab](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb):
@@ -339,7 +272,7 @@ After setting up your logger, you can then proceed with your model training. All
## FAQ
-### How do I train an object detection model using Ultralytics YOLOv8?
+### How do I train an [object detection](https://www.ultralytics.com/glossary/object-detection) model using Ultralytics YOLOv8?
To train an object detection model using Ultralytics YOLOv8, you can either use the Python API or the CLI. Below is an example for both:
diff --git a/docs/en/modes/val.md b/docs/en/modes/val.md
index c5c496a6..91eb4c2a 100644
--- a/docs/en/modes/val.md
+++ b/docs/en/modes/val.md
@@ -10,7 +10,7 @@ keywords: Ultralytics, YOLOv8, model validation, machine learning, object detect
## Introduction
-Validation is a critical step in the machine learning pipeline, allowing you to assess the quality of your trained models. Val mode in Ultralytics YOLOv8 provides a robust suite of tools and metrics for evaluating the performance of your object detection models. This guide serves as a complete resource for understanding how to effectively use the Val mode to ensure that your models are both accurate and reliable.
+Validation is a critical step in the [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) pipeline, allowing you to assess the quality of your trained models. Val mode in Ultralytics YOLOv8 provides a robust suite of tools and metrics for evaluating the performance of your [object detection](https://www.ultralytics.com/glossary/object-detection) models. This guide serves as a complete resource for understanding how to effectively use the Val mode to ensure that your models are both accurate and reliable.
@@ -30,7 +30,7 @@ Here's why using YOLOv8's Val mode is advantageous:
- **Precision:** Get accurate metrics like mAP50, mAP75, and mAP50-95 to comprehensively evaluate your model.
- **Convenience:** Utilize built-in features that remember training settings, simplifying the validation process.
- **Flexibility:** Validate your model with the same or different datasets and image sizes.
-- **Hyperparameter Tuning:** Use validation metrics to fine-tune your model for better performance.
+- **[Hyperparameter Tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning):** Use validation metrics to fine-tune your model for better performance.
### Key Features of Val Mode
@@ -41,13 +41,13 @@ These are the notable functionalities offered by YOLOv8's Val mode:
- **CLI and Python API:** Choose from command-line interface or Python API based on your preference for validation.
- **Data Compatibility:** Works seamlessly with datasets used during the training phase as well as custom datasets.
-!!! tip "Tip"
+!!! tip
* YOLOv8 models automatically remember their training settings, so you can validate a model at the same image size and on the original dataset easily with just `yolo val model=yolov8n.pt` or `model('yolov8n.pt').val()`
## Usage Examples
-Validate trained YOLOv8n model accuracy on the COCO8 dataset. No argument need to passed as the `model` retains its training `data` and arguments as model attributes. See Arguments section below for a full list of export arguments.
+Validate trained YOLOv8n model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. See Arguments section below for a full list of export arguments.
!!! example
@@ -79,22 +79,7 @@ Validate trained YOLOv8n model accuracy on the COCO8 dataset. No argument need t
When validating YOLO models, several arguments can be fine-tuned to optimize the evaluation process. These arguments control aspects such as input image size, batch processing, and performance thresholds. Below is a detailed breakdown of each argument to help you customize your validation settings effectively.
-| Argument | Type | Default | Description |
-| ------------- | ------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `data` | `str` | `None` | Specifies the path to the dataset configuration file (e.g., `coco8.yaml`). This file includes paths to validation data, class names, and number of classes. |
-| `imgsz` | `int` | `640` | Defines the size of input images. All images are resized to this dimension before processing. |
-| `batch` | `int` | `16` | Sets the number of images per batch. Use `-1` for AutoBatch, which automatically adjusts based on GPU memory availability. |
-| `save_json` | `bool` | `False` | If `True`, saves the results to a JSON file for further analysis or integration with other tools. |
-| `save_hybrid` | `bool` | `False` | If `True`, saves a hybrid version of labels that combines original annotations with additional model predictions. |
-| `conf` | `float` | `0.001` | Sets the minimum confidence threshold for detections. Detections with confidence below this threshold are discarded. |
-| `iou` | `float` | `0.6` | Sets the Intersection Over Union (IoU) threshold for Non-Maximum Suppression (NMS). Helps in reducing duplicate detections. |
-| `max_det` | `int` | `300` | Limits the maximum number of detections per image. Useful in dense scenes to prevent excessive detections. |
-| `half` | `bool` | `True` | Enables half-precision (FP16) computation, reducing memory usage and potentially increasing speed with minimal impact on accuracy. |
-| `device` | `str` | `None` | Specifies the device for validation (`cpu`, `cuda:0`, etc.). Allows flexibility in utilizing CPU or GPU resources. |
-| `dnn` | `bool` | `False` | If `True`, uses the OpenCV DNN module for ONNX model inference, offering an alternative to PyTorch inference methods. |
-| `plots` | `bool` | `False` | When set to `True`, generates and saves plots of predictions versus ground truth for visual evaluation of the model's performance. |
-| `rect` | `bool` | `False` | If `True`, uses rectangular inference for batching, reducing padding and potentially increasing speed and efficiency. |
-| `split` | `str` | `val` | Determines the dataset split to use for validation (`val`, `test`, or `train`). Allows flexibility in choosing the data segment for performance evaluation. |
+{% include "macros/validation-args.md" %}
Each of these settings plays a vital role in the validation process, allowing for a customizable and efficient evaluation of YOLO models. Adjusting these parameters according to your specific needs and resources can help achieve the best balance between accuracy and performance.
@@ -171,7 +156,7 @@ For a complete performance evaluation, it's crucial to review all these metrics.
Using Ultralytics YOLO for validation provides several advantages:
-- **Precision:** YOLOv8 offers accurate performance metrics including mAP50, mAP75, and mAP50-95.
+- **[Precision](https://www.ultralytics.com/glossary/precision):** YOLOv8 offers accurate performance metrics including mAP50, mAP75, and mAP50-95.
- **Convenience:** The models remember their training settings, making validation straightforward.
- **Flexibility:** You can validate against the same or different datasets and image sizes.
- **Hyperparameter Tuning:** Validation metrics help in fine-tuning models for better performance.
@@ -180,7 +165,7 @@ These benefits ensure that your models are evaluated thoroughly and can be optim
### Can I validate my YOLOv8 model using a custom dataset?
-Yes, you can validate your YOLOv8 model using a custom dataset. Specify the `data` argument with the path to your dataset configuration file. This file should include paths to the validation data, class names, and other relevant details.
+Yes, you can validate your YOLOv8 model using a [custom dataset](https://docs.ultralytics.com/datasets/). Specify the `data` argument with the path to your dataset configuration file. This file should include paths to the [validation data](https://www.ultralytics.com/glossary/validation-data), class names, and other relevant details.
Example in Python:
diff --git a/docs/en/quickstart.md b/docs/en/quickstart.md
index f8d3a7cf..204623cc 100644
--- a/docs/en/quickstart.md
+++ b/docs/en/quickstart.md
@@ -1,12 +1,12 @@
---
comments: true
-description: Learn how to install Ultralytics using pip, conda, or Docker. Follow our step-by-step guide for a seamless setup of YOLOv8 with thorough instructions.
-keywords: Ultralytics, YOLOv8, Install Ultralytics, pip, conda, Docker, GitHub, machine learning, object detection
+description: Learn how to install Ultralytics using pip, conda, or Docker. Follow our step-by-step guide for a seamless setup of YOLO with thorough instructions.
+keywords: Ultralytics, YOLO11, Install Ultralytics, pip, conda, Docker, GitHub, machine learning, object detection
---
## Install Ultralytics
-Ultralytics provides various installation methods including pip, conda, and Docker. Install YOLOv8 via the `ultralytics` pip package for the latest stable release or by cloning the [Ultralytics GitHub repository](https://github.com/ultralytics/ultralytics) for the most up-to-date version. Docker can be used to execute the package in an isolated container, avoiding local installation.
+Ultralytics provides various installation methods including pip, conda, and Docker. Install YOLO via the `ultralytics` pip package for the latest stable release or by cloning the [Ultralytics GitHub repository](https://github.com/ultralytics/ultralytics) for the most up-to-date version. Docker can be used to execute the package in an isolated container, avoiding local installation.
-
+
-
-
-
-
- Watch: Object Detection using Ultralytics YOLOv8 Oriented Bounding Boxes (YOLOv8-OBB)
-
-
-
- Watch: Object Detection with YOLOv8-OBB using Ultralytics HUB
-
+
+ Watch: Object Detection using Ultralytics YOLOv8 Oriented Bounding Boxes (YOLOv8-OBB)
+
+
+ Watch: How to Train Ultralytics YOLOv8-OBB (Oriented Bounding Boxes) Models on DOTA Dataset using Ultralytics HUB
+
+
+ Watch: How to Detect and Track Storage Tanks using Ultralytics YOLOv8-OBB | Oriented Bounding Boxes | DOTA
+
@@ -57,9 +57,9 @@ YOLO models can be used for a variety of tasks, including detection, segmentatio
- **Pose**: For identifying objects and estimating their keypoints in an image or video.
- **OBB**: Oriented (i.e. rotated) bounding boxes suitable for satellite or medical imagery.
-| Argument | Default | Description |
-| -------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `task` | `'detect'` | Specifies the YOLO task to be executed. Options include `detect` for object detection, `segment` for segmentation, `classify` for classification, `pose` for pose estimation and `OBB` for oriented bounding boxes. Each task is tailored to specific types of output and problems within image and video analysis. |
+| Argument | Default | Description |
+| -------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `task` | `'detect'` | Specifies the YOLO task to be executed. Options include `detect` for [object detection](https://www.ultralytics.com/glossary/object-detection), `segment` for segmentation, `classify` for classification, `pose` for pose estimation and `OBB` for oriented bounding boxes. Each task is tailored to specific types of output and problems within image and video analysis. |
[Tasks Guide](../tasks/index.md){ .md-button }
@@ -82,64 +82,15 @@ YOLO models can be used in different modes depending on the specific problem you
## Train Settings
-The training settings for YOLO models encompass various hyperparameters and configurations used during the training process. These settings influence the model's performance, speed, and accuracy. Key training settings include batch size, learning rate, momentum, and weight decay. Additionally, the choice of optimizer, loss function, and training dataset composition can impact the training process. Careful tuning and experimentation with these settings are crucial for optimizing performance.
-
-| Argument | Default | Description |
-| ----------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model` | `None` | Specifies the model file for training. Accepts a path to either a `.pt` pretrained model or a `.yaml` configuration file. Essential for defining the model structure or initializing weights. |
-| `data` | `None` | Path to the dataset configuration file (e.g., `coco8.yaml`). This file contains dataset-specific parameters, including paths to training and validation data, class names, and number of classes. |
-| `epochs` | `100` | Total number of training epochs. Each epoch represents a full pass over the entire dataset. Adjusting this value can affect training duration and model performance. |
-| `time` | `None` | Maximum training time in hours. If set, this overrides the `epochs` argument, allowing training to automatically stop after the specified duration. Useful for time-constrained training scenarios. |
-| `patience` | `100` | Number of epochs to wait without improvement in validation metrics before early stopping the training. Helps prevent overfitting by stopping training when performance plateaus. |
-| `batch` | `16` | Batch size, with three modes: set as an integer (e.g., `batch=16`), auto mode for 60% GPU memory utilization (`batch=-1`), or auto mode with specified utilization fraction (`batch=0.70`). |
-| `imgsz` | `640` | Target image size for training. All images are resized to this dimension before being fed into the model. Affects model accuracy and computational complexity. |
-| `save` | `True` | Enables saving of training checkpoints and final model weights. Useful for resuming training or model deployment. |
-| `save_period` | `-1` | Frequency of saving model checkpoints, specified in epochs. A value of -1 disables this feature. Useful for saving interim models during long training sessions. |
-| `cache` | `False` | Enables caching of dataset images in memory (`True`/`ram`), on disk (`disk`), or disables it (`False`). Improves training speed by reducing disk I/O at the cost of increased memory usage. |
-| `device` | `None` | Specifies the computational device(s) for training: a single GPU (`device=0`), multiple GPUs (`device=0,1`), CPU (`device=cpu`), or MPS for Apple silicon (`device=mps`). |
-| `workers` | `8` | Number of worker threads for data loading (per `RANK` if Multi-GPU training). Influences the speed of data preprocessing and feeding into the model, especially useful in multi-GPU setups. |
-| `project` | `None` | Name of the project directory where training outputs are saved. Allows for organized storage of different experiments. |
-| `name` | `None` | Name of the training run. Used for creating a subdirectory within the project folder, where training logs and outputs are stored. |
-| `exist_ok` | `False` | If True, allows overwriting of an existing project/name directory. Useful for iterative experimentation without needing to manually clear previous outputs. |
-| `pretrained` | `True` | Determines whether to start training from a pretrained model. Can be a boolean value or a string path to a specific model from which to load weights. Enhances training efficiency and model performance. |
-| `optimizer` | `'auto'` | Choice of optimizer for training. Options include `SGD`, `Adam`, `AdamW`, `NAdam`, `RAdam`, `RMSProp` etc., or `auto` for automatic selection based on model configuration. Affects convergence speed and stability. |
-| `verbose` | `False` | Enables verbose output during training, providing detailed logs and progress updates. Useful for debugging and closely monitoring the training process. |
-| `seed` | `0` | Sets the random seed for training, ensuring reproducibility of results across runs with the same configurations. |
-| `deterministic` | `True` | Forces deterministic algorithm use, ensuring reproducibility but may affect performance and speed due to the restriction on non-deterministic algorithms. |
-| `single_cls` | `False` | Treats all classes in multi-class datasets as a single class during training. Useful for binary classification tasks or when focusing on object presence rather than classification. |
-| `rect` | `False` | Enables rectangular training, optimizing batch composition for minimal padding. Can improve efficiency and speed but may affect model accuracy. |
-| `cos_lr` | `False` | Utilizes a cosine learning rate scheduler, adjusting the learning rate following a cosine curve over epochs. Helps in managing learning rate for better convergence. |
-| `close_mosaic` | `10` | Disables mosaic data augmentation in the last N epochs to stabilize training before completion. Setting to 0 disables this feature. |
-| `resume` | `False` | Resumes training from the last saved checkpoint. Automatically loads model weights, optimizer state, and epoch count, continuing training seamlessly. |
-| `amp` | `True` | Enables Automatic Mixed Precision (AMP) training, reducing memory usage and possibly speeding up training with minimal impact on accuracy. |
-| `fraction` | `1.0` | Specifies the fraction of the dataset to use for training. Allows for training on a subset of the full dataset, useful for experiments or when resources are limited. |
-| `profile` | `False` | Enables profiling of ONNX and TensorRT speeds during training, useful for optimizing model deployment. |
-| `freeze` | `None` | Freezes the first N layers of the model or specified layers by index, reducing the number of trainable parameters. Useful for fine-tuning or transfer learning. |
-| `lr0` | `0.01` | Initial learning rate (i.e. `SGD=1E-2`, `Adam=1E-3`) . Adjusting this value is crucial for the optimization process, influencing how rapidly model weights are updated. |
-| `lrf` | `0.01` | Final learning rate as a fraction of the initial rate = (`lr0 * lrf`), used in conjunction with schedulers to adjust the learning rate over time. |
-| `momentum` | `0.937` | Momentum factor for SGD or beta1 for Adam optimizers, influencing the incorporation of past gradients in the current update. |
-| `weight_decay` | `0.0005` | L2 regularization term, penalizing large weights to prevent overfitting. |
-| `warmup_epochs` | `3.0` | Number of epochs for learning rate warmup, gradually increasing the learning rate from a low value to the initial learning rate to stabilize training early on. |
-| `warmup_momentum` | `0.8` | Initial momentum for warmup phase, gradually adjusting to the set momentum over the warmup period. |
-| `warmup_bias_lr` | `0.1` | Learning rate for bias parameters during the warmup phase, helping stabilize model training in the initial epochs. |
-| `box` | `7.5` | Weight of the box loss component in the loss function, influencing how much emphasis is placed on accurately predicting bounding box coordinates. |
-| `cls` | `0.5` | Weight of the classification loss in the total loss function, affecting the importance of correct class prediction relative to other components. |
-| `dfl` | `1.5` | Weight of the distribution focal loss, used in certain YOLO versions for fine-grained classification. |
-| `pose` | `12.0` | Weight of the pose loss in models trained for pose estimation, influencing the emphasis on accurately predicting pose keypoints. |
-| `kobj` | `2.0` | Weight of the keypoint objectness loss in pose estimation models, balancing detection confidence with pose accuracy. |
-| `label_smoothing` | `0.0` | Applies label smoothing, softening hard labels to a mix of the target label and a uniform distribution over labels, can improve generalization. |
-| `nbs` | `64` | Nominal batch size for normalization of loss. |
-| `overlap_mask` | `True` | Determines whether segmentation masks should overlap during training, applicable in instance segmentation tasks. |
-| `mask_ratio` | `4` | Downsample ratio for segmentation masks, affecting the resolution of masks used during training. |
-| `dropout` | `0.0` | Dropout rate for regularization in classification tasks, preventing overfitting by randomly omitting units during training. |
-| `val` | `True` | Enables validation during training, allowing for periodic evaluation of model performance on a separate dataset. |
-| `plots` | `False` | Generates and saves plots of training and validation metrics, as well as prediction examples, providing visual insights into model performance and learning progression. |
+The training settings for YOLO models encompass various hyperparameters and configurations used during the training process. These settings influence the model's performance, speed, and accuracy. Key training settings include batch size, [learning rate](https://www.ultralytics.com/glossary/learning-rate), momentum, and weight decay. Additionally, the choice of optimizer, [loss function](https://www.ultralytics.com/glossary/loss-function), and training dataset composition can impact the training process. Careful tuning and experimentation with these settings are crucial for optimizing performance.
+
+{% include "macros/train-args.md" %}
!!! info "Note on Batch-size Settings"
The `batch` argument can be configured in three ways:
- - **Fixed Batch Size**: Set an integer value (e.g., `batch=16`), specifying the number of images per batch directly.
+ - **Fixed [Batch Size](https://www.ultralytics.com/glossary/batch-size)**: Set an integer value (e.g., `batch=16`), specifying the number of images per batch directly.
- **Auto Mode (60% GPU Memory)**: Use `batch=-1` to automatically adjust batch size for approximately 60% CUDA memory utilization.
- **Auto Mode with Utilization Fraction**: Set a fraction value (e.g., `batch=0.70`) to adjust batch size based on the specified fraction of GPU memory usage.
@@ -151,38 +102,11 @@ The prediction settings for YOLO models encompass a range of hyperparameters and
Inference arguments:
-| Argument | Type | Default | Description |
-| --------------- | -------------- | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `source` | `str` | `'ultralytics/assets'` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. Supports a wide range of formats and sources, enabling flexible application across different types of input. |
-| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. |
-| `iou` | `float` | `0.7` | Intersection Over Union (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. |
-| `imgsz` | `int or tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection accuracy and processing speed. |
-| `half` | `bool` | `False` | Enables half-precision (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. |
-| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0` or `0`). Allows users to select between CPU, a specific GPU, or other compute devices for model execution. |
-| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. |
-| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. |
-| `stream_buffer` | `bool` | `False` | Determines if all frames should be buffered when processing video streams (`True`), or if the model should return the most recent frame (`False`). Useful for real-time applications. |
-| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. |
-| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. |
-| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. |
-| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. |
-| `retina_masks` | `bool` | `False` | Uses high-resolution segmentation masks if available in the model. This can enhance mask quality for segmentation tasks, providing finer detail. |
-| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or embeddings. Useful for downstream tasks like clustering or similarity search. |
+{% include "macros/predict-args.md" %}
Visualization arguments:
-| Argument | Type | Default | Description |
-| ------------- | --------------- | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `show` | `bool` | `False` | If `True`, displays the annotated images or videos in a window. Useful for immediate visual feedback during development or testing. |
-| `save` | `bool` | `False` or `True` | Enables saving of the annotated images or videos to file. Useful for documentation, further analysis, or sharing results. Defaults to True when using CLI & False when used in Python. |
-| `save_frames` | `bool` | `False` | When processing videos, saves individual frames as images. Useful for extracting specific frames or for detailed frame-by-frame analysis. |
-| `save_txt` | `bool` | `False` | Saves detection results in a text file, following the format `[class] [x_center] [y_center] [width] [height] [confidence]`. Useful for integration with other analysis tools. |
-| `save_conf` | `bool` | `False` | Includes confidence scores in the saved text files. Enhances the detail available for post-processing and analysis. |
-| `save_crop` | `bool` | `False` | Saves cropped images of detections. Useful for dataset augmentation, analysis, or creating focused datasets for specific objects. |
-| `show_labels` | `bool` | `True` | Displays labels for each detection in the visual output. Provides immediate understanding of detected objects. |
-| `show_conf` | `bool` | `True` | Displays the confidence score for each detection alongside the label. Gives insight into the model's certainty for each detection. |
-| `show_boxes` | `bool` | `True` | Draws bounding boxes around detected objects. Essential for visual identification and location of objects in images or video frames. |
-| `line_width` | `None` or `int` | `None` | Specifies the line width of bounding boxes. If `None`, the line width is automatically adjusted based on the image size. Provides visual customization for clarity. |
+{% include "macros/visualization-args.md" %}
[Predict Guide](../modes/predict.md){ .md-button }
@@ -190,24 +114,9 @@ Visualization arguments:
The val (validation) settings for YOLO models involve various hyperparameters and configurations used to evaluate the model's performance on a validation dataset. These settings influence the model's performance, speed, and accuracy. Common YOLO validation settings include batch size, validation frequency during training, and performance evaluation metrics. Other factors affecting the validation process include the validation dataset's size and composition, as well as the specific task the model is employed for.
-| Argument | Type | Default | Description |
-| ------------- | ------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `data` | `str` | `None` | Specifies the path to the dataset configuration file (e.g., `coco8.yaml`). This file includes paths to validation data, class names, and number of classes. |
-| `imgsz` | `int` | `640` | Defines the size of input images. All images are resized to this dimension before processing. |
-| `batch` | `int` | `16` | Sets the number of images per batch. Use `-1` for AutoBatch, which automatically adjusts based on GPU memory availability. |
-| `save_json` | `bool` | `False` | If `True`, saves the results to a JSON file for further analysis or integration with other tools. |
-| `save_hybrid` | `bool` | `False` | If `True`, saves a hybrid version of labels that combines original annotations with additional model predictions. |
-| `conf` | `float` | `0.001` | Sets the minimum confidence threshold for detections. Detections with confidence below this threshold are discarded. |
-| `iou` | `float` | `0.6` | Sets the Intersection Over Union (IoU) threshold for Non-Maximum Suppression (NMS). Helps in reducing duplicate detections. |
-| `max_det` | `int` | `300` | Limits the maximum number of detections per image. Useful in dense scenes to prevent excessive detections. |
-| `half` | `bool` | `True` | Enables half-precision (FP16) computation, reducing memory usage and potentially increasing speed with minimal impact on accuracy. |
-| `device` | `str` | `None` | Specifies the device for validation (`cpu`, `cuda:0`, etc.). Allows flexibility in utilizing CPU or GPU resources. |
-| `dnn` | `bool` | `False` | If `True`, uses the OpenCV DNN module for ONNX model inference, offering an alternative to PyTorch inference methods. |
-| `plots` | `bool` | `False` | When set to `True`, generates and saves plots of predictions versus ground truth for visual evaluation of the model's performance. |
-| `rect` | `bool` | `False` | If `True`, uses rectangular inference for batching, reducing padding and potentially increasing speed and efficiency. |
-| `split` | `str` | `val` | Determines the dataset split to use for validation (`val`, `test`, or `train`). Allows flexibility in choosing the data segment for performance evaluation. |
-
-Careful tuning and experimentation with these settings are crucial to ensure optimal performance on the validation dataset and detect and prevent overfitting.
+{% include "macros/validation-args.md" %}
+
+Careful tuning and experimentation with these settings are crucial to ensure optimal performance on the validation dataset and detect and prevent [overfitting](https://www.ultralytics.com/glossary/overfitting).
[Val Guide](../modes/val.md){ .md-button }
@@ -223,27 +132,9 @@ It is crucial to thoughtfully configure these settings to ensure the exported mo
## Augmentation Settings
-Augmentation techniques are essential for improving the robustness and performance of YOLO models by introducing variability into the training data, helping the model generalize better to unseen data. The following table outlines the purpose and effect of each augmentation argument:
-
-| Argument | Type | Default | Range | Description |
-| --------------- | ------- | ------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `hsv_h` | `float` | `0.015` | `0.0 - 1.0` | Adjusts the hue of the image by a fraction of the color wheel, introducing color variability. Helps the model generalize across different lighting conditions. |
-| `hsv_s` | `float` | `0.7` | `0.0 - 1.0` | Alters the saturation of the image by a fraction, affecting the intensity of colors. Useful for simulating different environmental conditions. |
-| `hsv_v` | `float` | `0.4` | `0.0 - 1.0` | Modifies the value (brightness) of the image by a fraction, helping the model to perform well under various lighting conditions. |
-| `degrees` | `float` | `0.0` | `-180 - +180` | Rotates the image randomly within the specified degree range, improving the model's ability to recognize objects at various orientations. |
-| `translate` | `float` | `0.1` | `0.0 - 1.0` | Translates the image horizontally and vertically by a fraction of the image size, aiding in learning to detect partially visible objects. |
-| `scale` | `float` | `0.5` | `>=0.0` | Scales the image by a gain factor, simulating objects at different distances from the camera. |
-| `shear` | `float` | `0.0` | `-180 - +180` | Shears the image by a specified degree, mimicking the effect of objects being viewed from different angles. |
-| `perspective` | `float` | `0.0` | `0.0 - 0.001` | Applies a random perspective transformation to the image, enhancing the model's ability to understand objects in 3D space. |
-| `flipud` | `float` | `0.0` | `0.0 - 1.0` | Flips the image upside down with the specified probability, increasing the data variability without affecting the object's characteristics. |
-| `fliplr` | `float` | `0.5` | `0.0 - 1.0` | Flips the image left to right with the specified probability, useful for learning symmetrical objects and increasing dataset diversity. |
-| `bgr` | `float` | `0.0` | `0.0 - 1.0` | Flips the image channels from RGB to BGR with the specified probability, useful for increasing robustness to incorrect channel ordering. |
-| `mosaic` | `float` | `1.0` | `0.0 - 1.0` | Combines four training images into one, simulating different scene compositions and object interactions. Highly effective for complex scene understanding. |
-| `mixup` | `float` | `0.0` | `0.0 - 1.0` | Blends two images and their labels, creating a composite image. Enhances the model's ability to generalize by introducing label noise and visual variability. |
-| `copy_paste` | `float` | `0.0` | `0.0 - 1.0` | Copies objects from one image and pastes them onto another, useful for increasing object instances and learning object occlusion. |
-| `auto_augment` | `str` | `randaugment` | - | Automatically applies a predefined augmentation policy (`randaugment`, `autoaugment`, `augmix`), optimizing for classification tasks by diversifying the visual features. |
-| `erasing` | `float` | `0.4` | `0.0 - 0.9` | Randomly erases a portion of the image during classification training, encouraging the model to focus on less obvious features for recognition. |
-| `crop_fraction` | `float` | `1.0` | `0.1 - 1.0` | Crops the classification image to a fraction of its size to emphasize central features and adapt to object scales, reducing background distractions. |
+Augmentation techniques are essential for improving the robustness and performance of YOLO models by introducing variability into the [training data](https://www.ultralytics.com/glossary/training-data), helping the model generalize better to unseen data. The following table outlines the purpose and effect of each augmentation argument:
+
+{% include "macros/augmentation-args.md" %}
These settings can be adjusted to meet the specific requirements of the dataset and task at hand. Experimenting with different values can help find the optimal augmentation strategy that leads to the best model performance.
@@ -258,19 +149,19 @@ Logging, checkpoints, plotting, and file management are important considerations
Effective logging, checkpointing, plotting, and file management can help you keep track of the model's progress and make it easier to debug and optimize the training process.
-| Argument | Default | Description |
-| ---------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `project` | `'runs'` | Specifies the root directory for saving training runs. Each run will be saved in a separate subdirectory within this directory. |
-| `name` | `'exp'` | Defines the name of the experiment. If not specified, YOLO automatically increments this name for each run, e.g., `exp`, `exp2`, etc., to avoid overwriting previous experiments. |
-| `exist_ok` | `False` | Determines whether to overwrite an existing experiment directory if one with the same name already exists. Setting this to `True` allows overwriting, while `False` prevents it. |
-| `plots` | `False` | Controls the generation and saving of training and validation plots. Set to `True` to create plots such as loss curves, precision-recall curves, and sample predictions. Useful for visually tracking model performance over time. |
-| `save` | `False` | Enables the saving of training checkpoints and final model weights. Set to `True` to periodically save model states, allowing training to be resumed from these checkpoints or models to be deployed. |
+| Argument | Default | Description |
+| ---------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `project` | `'runs'` | Specifies the root directory for saving training runs. Each run will be saved in a separate subdirectory within this directory. |
+| `name` | `'exp'` | Defines the name of the experiment. If not specified, YOLO automatically increments this name for each run, e.g., `exp`, `exp2`, etc., to avoid overwriting previous experiments. |
+| `exist_ok` | `False` | Determines whether to overwrite an existing experiment directory if one with the same name already exists. Setting this to `True` allows overwriting, while `False` prevents it. |
+| `plots` | `False` | Controls the generation and saving of training and validation plots. Set to `True` to create plots such as loss curves, [precision](https://www.ultralytics.com/glossary/precision)-[recall](https://www.ultralytics.com/glossary/recall) curves, and sample predictions. Useful for visually tracking model performance over time. |
+| `save` | `False` | Enables the saving of training checkpoints and final model weights. Set to `True` to periodically save model states, allowing training to be resumed from these checkpoints or models to be deployed. |
## FAQ
### How do I improve the performance of my YOLO model during training?
-Improving YOLO model performance involves tuning hyperparameters like batch size, learning rate, momentum, and weight decay. Adjusting augmentation settings, selecting the right optimizer, and employing techniques like early stopping or mixed precision can also help. For detailed guidance on training settings, refer to the [Train Guide](../modes/train.md).
+Improving YOLO model performance involves tuning hyperparameters like batch size, learning rate, momentum, and weight decay. Adjusting augmentation settings, selecting the right optimizer, and employing techniques like early stopping or [mixed precision](https://www.ultralytics.com/glossary/mixed-precision) can also help. For detailed guidance on training settings, refer to the [Train Guide](../modes/train.md).
### What are the key hyperparameters to consider for YOLO model accuracy?
diff --git a/docs/en/usage/cli.md b/docs/en/usage/cli.md
index d2ba4927..d1d7c8de 100644
--- a/docs/en/usage/cli.md
+++ b/docs/en/usage/cli.md
@@ -35,7 +35,7 @@ The YOLO command line interface (CLI) allows for simple single-line commands wit
=== "Train"
- Train a detection model for 10 epochs with an initial learning_rate of 0.01
+ Train a detection model for 10 [epochs](https://www.ultralytics.com/glossary/epoch) with an initial learning_rate of 0.01
```bash
yolo train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01
```
@@ -79,7 +79,7 @@ Where:
- `MODE` (required) is one of `[train, val, predict, export, track, benchmark]`
- `ARGS` (optional) are any number of custom `arg=value` pairs like `imgsz=320` that override defaults. For a full list of available `ARGS` see the [Configuration](cfg.md) page and `defaults.yaml`
-!!! warning "Warning"
+!!! warning
Arguments must be passed as `arg=val` pairs, split by an equals `=` sign and delimited by spaces ` ` between pairs. Do not use `--` argument prefixes or commas `,` between arguments.
@@ -91,7 +91,7 @@ Where:
Train YOLOv8n on the COCO8 dataset for 100 epochs at image size 640. For a full list of available arguments see the [Configuration](cfg.md) page.
-!!! example "Example"
+!!! example
=== "Train"
@@ -109,9 +109,9 @@ Train YOLOv8n on the COCO8 dataset for 100 epochs at image size 640. For a full
## Val
-Validate trained YOLOv8n model accuracy on the COCO8 dataset. No argument need to passed as the `model` retains its training `data` and arguments as model attributes.
+Validate trained YOLOv8n model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes.
-!!! example "Example"
+!!! example
=== "Official"
@@ -131,7 +131,7 @@ Validate trained YOLOv8n model accuracy on the COCO8 dataset. No argument need t
Use a trained YOLOv8n model to run predictions on images.
-!!! example "Example"
+!!! example
=== "Official"
@@ -151,7 +151,7 @@ Use a trained YOLOv8n model to run predictions on images.
Export a YOLOv8n model to a different format like ONNX, CoreML, etc.
-!!! example "Example"
+!!! example
=== "Official"
@@ -177,7 +177,7 @@ See full `export` details in the [Export](../modes/export.md) page.
Default arguments can be overridden by simply passing them as arguments in the CLI in `arg=value` pairs.
-!!! tip ""
+!!! tip
=== "Train"
@@ -221,7 +221,7 @@ This will create `default_copy.yaml`, which you can then pass as `cfg=default_co
### How do I use the Ultralytics YOLOv8 command line interface (CLI) for model training?
-To train a YOLOv8 model using the CLI, you can execute a simple one-line command in the terminal. For example, to train a detection model for 10 epochs with a learning rate of 0.01, you would run:
+To train a YOLOv8 model using the CLI, you can execute a simple one-line command in the terminal. For example, to train a detection model for 10 epochs with a [learning rate](https://www.ultralytics.com/glossary/learning-rate) of 0.01, you would run:
```bash
yolo train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01
@@ -241,7 +241,7 @@ Each task can be customized with various arguments. For detailed syntax and exam
### How can I validate the accuracy of a trained YOLOv8 model using the CLI?
-To validate a YOLOv8 model's accuracy, use the `val` mode. For example, to validate a pretrained detection model with a batch size of 1 and image size of 640, run:
+To validate a YOLOv8 model's accuracy, use the `val` mode. For example, to validate a pretrained detection model with a [batch size](https://www.ultralytics.com/glossary/batch-size) of 1 and image size of 640, run:
```bash
yolo val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640
diff --git a/docs/en/usage/engine.md b/docs/en/usage/engine.md
index 7e43e08e..dc44047f 100644
--- a/docs/en/usage/engine.md
+++ b/docs/en/usage/engine.md
@@ -128,7 +128,7 @@ For more details on the customization and source code, see the [`BaseTrainer` Re
### How can I add a callback to the Ultralytics YOLOv8 DetectionTrainer?
-You can add callbacks to monitor and modify the training process in Ultralytics YOLOv8 `DetectionTrainer`. For instance, here's how you can add a callback to log model weights after every training epoch:
+You can add callbacks to monitor and modify the training process in Ultralytics YOLOv8 `DetectionTrainer`. For instance, here's how you can add a callback to log model weights after every training [epoch](https://www.ultralytics.com/glossary/epoch):
```python
from ultralytics.models.yolo.detect import DetectionTrainer
@@ -153,8 +153,8 @@ For further details on callback events and entry points, refer to our [Callbacks
Ultralytics YOLOv8 offers a high-level abstraction on powerful engine executors, making it ideal for rapid development and customization. Key benefits include:
- **Ease of Use**: Both command-line and Python interfaces simplify complex tasks.
-- **Performance**: Optimized for real-time object detection and various vision AI applications.
-- **Customization**: Easily extendable for custom models, loss functions, and dataloaders.
+- **Performance**: Optimized for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) and various vision AI applications.
+- **Customization**: Easily extendable for custom models, [loss functions](https://www.ultralytics.com/glossary/loss-function), and dataloaders.
Learn more about YOLOv8's capabilities by visiting [Ultralytics YOLO](https://www.ultralytics.com/yolo).
diff --git a/docs/en/usage/python.md b/docs/en/usage/python.md
index 66b2e6c6..5236af0a 100644
--- a/docs/en/usage/python.md
+++ b/docs/en/usage/python.md
@@ -6,7 +6,7 @@ keywords: YOLOv8, Python, object detection, segmentation, classification, machin
# Python Usage
-Welcome to the YOLOv8 Python Usage documentation! This guide is designed to help you seamlessly integrate YOLOv8 into your Python projects for object detection, segmentation, and classification. Here, you'll learn how to load and use pretrained models, train new models, and perform predictions on images. The easy-to-use Python interface is a valuable resource for anyone looking to incorporate YOLOv8 into their Python projects, allowing you to quickly implement advanced object detection capabilities. Let's get started!
+Welcome to the YOLOv8 Python Usage documentation! This guide is designed to help you seamlessly integrate YOLOv8 into your Python projects for [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and classification. Here, you'll learn how to load and use pretrained models, train new models, and perform predictions on images. The easy-to-use Python interface is a valuable resource for anyone looking to incorporate YOLOv8 into their Python projects, allowing you to quickly implement advanced object detection capabilities. Let's get started!
@@ -80,7 +80,7 @@ Train mode is used for training a YOLOv8 model on a custom dataset. In this mode
## [Val](../modes/val.md)
-Val mode is used for validating a YOLOv8 model after it has been trained. In this mode, the model is evaluated on a validation set to measure its accuracy and generalization performance. This mode can be used to tune the hyperparameters of the model to improve its performance.
+Val mode is used for validating a YOLOv8 model after it has been trained. In this mode, the model is evaluated on a validation set to measure its [accuracy](https://www.ultralytics.com/glossary/accuracy) and generalization performance. This mode can be used to tune the hyperparameters of the model to improve its performance.
!!! example "Val"
@@ -355,7 +355,7 @@ See more detailed examples in our [Predict Mode](../modes/predict.md) section.
### What are the different modes available in YOLOv8?
-Ultralytics YOLOv8 provides various modes to cater to different machine learning workflows. These include:
+Ultralytics YOLOv8 provides various modes to cater to different [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) workflows. These include:
- **[Train](../modes/train.md)**: Train a model using custom datasets.
- **[Val](../modes/val.md)**: Validate model performance on a validation set.
diff --git a/docs/en/usage/simple-utilities.md b/docs/en/usage/simple-utilities.md
index ed59f8db..e40a2478 100644
--- a/docs/en/usage/simple-utilities.md
+++ b/docs/en/usage/simple-utilities.md
@@ -31,7 +31,7 @@ The `ultralytics` package comes with a myriad of utilities that can support, enh
### Auto Labeling / Annotations
-Dataset annotation is a very resource intensive and time-consuming process. If you have a YOLO object detection model trained on a reasonable amount of data, you can use it and [SAM](../models/sam.md) to auto-annotate additional data (segmentation format).
+Dataset annotation is a very resource intensive and time-consuming process. If you have a YOLO [object detection](https://www.ultralytics.com/glossary/object-detection) model trained on a reasonable amount of data, you can use it and [SAM](../models/sam.md) to auto-annotate additional data (segmentation format).
```{ .py .annotate }
from ultralytics.data.annotator import auto_annotate
@@ -86,7 +86,7 @@ convert_coco( # (1)!
For additional information about the `convert_coco` function, [visit the reference page](../reference/data/converter.md#ultralytics.data.converter.convert_coco)
-### Get Bounding Box Dimensions
+### Get [Bounding Box](https://www.ultralytics.com/glossary/bounding-box) Dimensions
```{.py .annotate }
from ultralytics.utils.plotting import Annotator
@@ -460,15 +460,16 @@ for obb in obb_boxes:
image_with_obb = ann.result()
```
-#### Bounding Boxes Circle Annotation ([Circle Label](https://docs.ultralytics.com/reference/utils/plotting/#ultralytics.utils.plotting.Annotator.circle_label))
+#### Bounding Boxes Circle Annotation [Circle Label](https://docs.ultralytics.com/reference/utils/plotting/#ultralytics.utils.plotting.Annotator.circle_label)
```python
import cv2
from ultralytics import YOLO
-from ultralytics.utils.plotting import Annotator, colors
+from ultralytics.utils.plotting import Annotator
model = YOLO("yolov8s.pt")
+names = model.names
cap = cv2.VideoCapture("path/to/video/file.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -479,15 +480,13 @@ while True:
if not ret:
break
- annotator = Annotator(im0, line_width=2)
-
+ annotator = Annotator(im0)
results = model.predict(im0)
boxes = results[0].boxes.xyxy.cpu()
clss = results[0].boxes.cls.cpu().tolist()
for box, cls in zip(boxes, clss):
- x1, y1 = int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)
- annotator.circle_label(box, label=model.names[int(cls)], color=colors(int(cls), True))
+ annotator.circle_label(box, label=names[int(cls)])
writer.write(im0)
cv2.imshow("Ultralytics circle annotation", im0)
@@ -500,15 +499,16 @@ cap.release()
cv2.destroyAllWindows()
```
-#### Bounding Boxes Text Annotation ([Text Label](https://docs.ultralytics.com/reference/utils/plotting/#ultralytics.utils.plotting.Annotator.text_label))
+#### Bounding Boxes Text Annotation [Text Label](https://docs.ultralytics.com/reference/utils/plotting/#ultralytics.utils.plotting.Annotator.text_label)
```python
import cv2
from ultralytics import YOLO
-from ultralytics.utils.plotting import Annotator, colors
+from ultralytics.utils.plotting import Annotator
model = YOLO("yolov8s.pt")
+names = model.names
cap = cv2.VideoCapture("path/to/video/file.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -519,15 +519,13 @@ while True:
if not ret:
break
- annotator = Annotator(im0, line_width=2)
-
+ annotator = Annotator(im0)
results = model.predict(im0)
boxes = results[0].boxes.xyxy.cpu()
clss = results[0].boxes.cls.cpu().tolist()
for box, cls in zip(boxes, clss):
- x1, y1 = int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)
- annotator.text_label(box, label=model.names[int(cls)], color=colors(int(cls), True))
+ annotator.text_label(box, label=names[int(cls)])
writer.write(im0)
cv2.imshow("Ultralytics text annotation", im0)
@@ -587,7 +585,7 @@ make_divisible(7, 2)
## FAQ
-### What utilities are included in the Ultralytics package to enhance machine learning workflows?
+### What utilities are included in the Ultralytics package to enhance [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) workflows?
The Ultralytics package includes a variety of utilities designed to streamline and optimize machine learning workflows. Key utilities include [auto-annotation](../reference/data/annotator.md#ultralytics.data.annotator.auto_annotate) for labeling datasets, converting COCO to YOLO format with [convert_coco](../reference/data/converter.md#ultralytics.data.converter.convert_coco), compressing images, and dataset auto-splitting. These tools aim to reduce manual effort, ensure consistency, and enhance data processing efficiency.
diff --git a/docs/en/yolov5/environments/aws_quickstart_tutorial.md b/docs/en/yolov5/environments/aws_quickstart_tutorial.md
index ffa6a2c9..0e5daf2f 100644
--- a/docs/en/yolov5/environments/aws_quickstart_tutorial.md
+++ b/docs/en/yolov5/environments/aws_quickstart_tutorial.md
@@ -6,7 +6,7 @@ keywords: YOLOv5, AWS, Deep Learning, Machine Learning, AWS EC2, YOLOv5 setup, D
# YOLOv5 🚀 on AWS Deep Learning Instance: Your Complete Guide
-Setting up a high-performance deep learning environment can be daunting for newcomers, but fear not! 🛠️ With this guide, we'll walk you through the process of getting YOLOv5 up and running on an AWS Deep Learning instance. By leveraging the power of Amazon Web Services (AWS), even those new to machine learning can get started quickly and cost-effectively. The AWS platform's scalability is perfect for both experimentation and production deployment.
+Setting up a high-performance deep learning environment can be daunting for newcomers, but fear not! 🛠️ With this guide, we'll walk you through the process of getting YOLOv5 up and running on an AWS Deep Learning instance. By leveraging the power of Amazon Web Services (AWS), even those new to [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) can get started quickly and cost-effectively. The AWS platform's scalability is perfect for both experimentation and production deployment.
Other quickstart options for YOLOv5 include our [Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) , [GCP Deep Learning VM](./google_cloud_quickstart_tutorial.md), and our Docker image at [Docker Hub](https://hub.docker.com/r/ultralytics/yolov5) .
@@ -24,7 +24,7 @@ In the EC2 dashboard, you'll find the **Launch Instance** button which is your g
### Selecting the Right Amazon Machine Image (AMI)
-Here's where you choose the operating system and software stack for your instance. Type 'Deep Learning' into the search field and select the latest Ubuntu-based Deep Learning AMI, unless your needs dictate otherwise. Amazon's Deep Learning AMIs come pre-installed with popular frameworks and GPU drivers to streamline your setup process.
+Here's where you choose the operating system and software stack for your instance. Type '[Deep Learning](https://www.ultralytics.com/glossary/deep-learning-dl)' into the search field and select the latest Ubuntu-based Deep Learning AMI, unless your needs dictate otherwise. Amazon's Deep Learning AMIs come pre-installed with popular frameworks and GPU drivers to streamline your setup process.
![Choose AMI](https://github.com/ultralytics/docs/releases/download/0/choose-ami.avif)
@@ -92,4 +92,4 @@ sudo swapon /swapfile # activate swap file
free -h # verify swap memory
```
-And that's it! 🎉 You've successfully created an AWS Deep Learning instance and run YOLOv5. Whether you're just starting with object detection or scaling up for production, this setup can help you achieve your machine learning goals. Happy training, validating, and deploying! If you encounter any hiccups along the way, the robust AWS documentation and the active Ultralytics community are here to support you.
+And that's it! 🎉 You've successfully created an AWS Deep Learning instance and run YOLOv5. Whether you're just starting with [object detection](https://www.ultralytics.com/glossary/object-detection) or scaling up for production, this setup can help you achieve your machine learning goals. Happy training, validating, and deploying! If you encounter any hiccups along the way, the robust AWS documentation and the active Ultralytics community are here to support you.
diff --git a/docs/en/yolov5/environments/azureml_quickstart_tutorial.md b/docs/en/yolov5/environments/azureml_quickstart_tutorial.md
index 2fd33a86..82a083d1 100644
--- a/docs/en/yolov5/environments/azureml_quickstart_tutorial.md
+++ b/docs/en/yolov5/environments/azureml_quickstart_tutorial.md
@@ -61,7 +61,7 @@ Train the YOLOv5 model:
python train.py
```
-Validate the model for Precision, Recall, and mAP
+Validate the model for [Precision](https://www.ultralytics.com/glossary/precision), [Recall](https://www.ultralytics.com/glossary/recall), and mAP
```bash
python val.py --weights yolov5s.pt
diff --git a/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md b/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md
index 93c9f0a1..55b92316 100644
--- a/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md
+++ b/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md
@@ -36,7 +36,7 @@ sudo docker run --ipc=host -it ultralytics/yolov5:latest
### Container with local file access:
-To run a container with access to local files (e.g., COCO training data in `/datasets`), use the `-v` flag:
+To run a container with access to local files (e.g., COCO [training data](https://www.ultralytics.com/glossary/training-data) in `/datasets`), use the `-v` flag:
```bash
sudo docker run --ipc=host -it -v "$(pwd)"/datasets:/usr/src/datasets ultralytics/yolov5:latest
diff --git a/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md b/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md
index cdd397dc..69e2fa80 100644
--- a/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md
+++ b/docs/en/yolov5/environments/google_cloud_quickstart_tutorial.md
@@ -6,19 +6,19 @@ keywords: YOLOv5, Google Cloud Platform, GCP, Deep Learning VM, object detection
# Mastering YOLOv5 🚀 Deployment on Google Cloud Platform (GCP) Deep Learning Virtual Machine (VM) ⭐
-Embarking on the journey of artificial intelligence and machine learning can be exhilarating, especially when you leverage the power and flexibility of a cloud platform. Google Cloud Platform (GCP) offers robust tools tailored for machine learning enthusiasts and professionals alike. One such tool is the Deep Learning VM that is preconfigured for data science and ML tasks. In this tutorial, we will navigate through the process of setting up YOLOv5 on a GCP Deep Learning VM. Whether you're taking your first steps in ML or you're a seasoned practitioner, this guide is designed to provide you with a clear pathway to implementing object detection models powered by YOLOv5.
+Embarking on the journey of [artificial intelligence](https://www.ultralytics.com/glossary/artificial-intelligence-ai) and machine learning can be exhilarating, especially when you leverage the power and flexibility of a cloud platform. Google Cloud Platform (GCP) offers robust tools tailored for machine learning enthusiasts and professionals alike. One such tool is the Deep Learning VM that is preconfigured for data science and ML tasks. In this tutorial, we will navigate through the process of setting up YOLOv5 on a GCP Deep Learning VM. Whether you're taking your first steps in ML or you're a seasoned practitioner, this guide is designed to provide you with a clear pathway to implementing object detection models powered by YOLOv5.
🆓 Plus, if you're a fresh GCP user, you're in luck with a [$300 free credit offer](https://cloud.google.com/free/docs/free-cloud-features#free-trial) to kickstart your projects.
In addition to GCP, explore other accessible quickstart options for YOLOv5, like our [Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) for a browser-based experience, or the scalability of [Amazon AWS](./aws_quickstart_tutorial.md). Furthermore, container aficionados can utilize our official Docker image at [Docker Hub](https://hub.docker.com/r/ultralytics/yolov5) for an encapsulated environment.
-## Step 1: Create and Configure Your Deep Learning VM
+## Step 1: Create and Configure Your [Deep Learning](https://www.ultralytics.com/glossary/deep-learning-dl) VM
Let's begin by creating a virtual machine that's tuned for deep learning:
1. Head over to the [GCP marketplace](https://console.cloud.google.com/marketplace/details/click-to-deploy-images/deeplearning) and select the **Deep Learning VM**.
2. Opt for a **n1-standard-8** instance; it offers a balance of 8 vCPUs and 30 GB of memory, ideally suited for our needs.
-3. Next, select a GPU. This depends on your workload; even a basic one like the Tesla T4 will markedly accelerate your model training.
+3. Next, select a GPU. This depends on your workload; even a basic one like the T4 will markedly accelerate your model training.
4. Tick the box for 'Install NVIDIA GPU driver automatically on first startup?' for hassle-free setup.
5. Allocate a 300 GB SSD Persistent Disk to ensure you don't bottleneck on I/O operations.
6. Hit 'Deploy' and let GCP do its magic in provisioning your custom Deep Learning VM.
@@ -42,7 +42,7 @@ cd yolov5
pip install -r requirements.txt
```
-This setup process ensures you're working with a Python environment version 3.8.0 or newer and PyTorch 1.8 or above. Our scripts smoothly download [models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) rending from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases), making it hassle-free to start model training.
+This setup process ensures you're working with a Python environment version 3.8.0 or newer and [PyTorch](https://www.ultralytics.com/glossary/pytorch) 1.8 or above. Our scripts smoothly download [models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) rending from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases), making it hassle-free to start model training.
## Step 3: Train and Deploy Your YOLOv5 Models 🌐
@@ -62,7 +62,7 @@ python detect.py --weights yolov5s.pt --source path/to/images
python export.py --weights yolov5s.pt --include onnx coreml tflite
```
-With just a few commands, YOLOv5 allows you to train custom object detection models tailored to your specific needs or utilize pre-trained weights for quick results on a variety of tasks.
+With just a few commands, YOLOv5 allows you to train custom [object detection](https://www.ultralytics.com/glossary/object-detection) models tailored to your specific needs or utilize pre-trained weights for quick results on a variety of tasks.
![Terminal command image illustrating model training on a GCP Deep Learning VM](https://github.com/ultralytics/docs/releases/download/0/terminal-command-model-training.avif)
@@ -80,7 +80,7 @@ free -h # confirm the memory increment
### Concluding Thoughts
-Congratulations! You are now empowered to harness the capabilities of YOLOv5 with the computational prowess of Google Cloud Platform. This combination provides scalability, efficiency, and versatility for your object detection tasks. Whether for personal projects, academic research, or industrial applications, you have taken a pivotal step into the world of AI and machine learning on the cloud.
+Congratulations! You are now empowered to harness the capabilities of YOLOv5 with the computational prowess of Google Cloud Platform. This combination provides scalability, efficiency, and versatility for your object detection tasks. Whether for personal projects, academic research, or industrial applications, you have taken a pivotal step into the world of AI and [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) on the cloud.
Do remember to document your journey, share insights with the Ultralytics community, and leverage the collaborative arenas such as [GitHub discussions](https://github.com/ultralytics/yolov5/discussions) to grow further. Now, go forth and innovate with YOLOv5 and GCP! 🌟
diff --git a/docs/en/yolov5/index.md b/docs/en/yolov5/index.md
index ba4e4e3a..36050584 100644
--- a/docs/en/yolov5/index.md
+++ b/docs/en/yolov5/index.md
@@ -22,11 +22,11 @@ keywords: YOLOv5, Ultralytics, object detection, computer vision, deep learning,
-Welcome to the Ultralytics' YOLOv5🚀 Documentation! YOLOv5, the fifth iteration of the revolutionary "You Only Look Once" object detection model, is designed to deliver high-speed, high-accuracy results in real-time.
+Welcome to the Ultralytics' YOLOv5🚀 Documentation! YOLOv5, the fifth iteration of the revolutionary "You Only Look Once" [object detection](https://www.ultralytics.com/glossary/object-detection) model, is designed to deliver high-speed, high-accuracy results in real-time.
-Built on PyTorch, this powerful deep learning framework has garnered immense popularity for its versatility, ease of use, and high performance. Our documentation guides you through the installation process, explains the architectural nuances of the model, showcases various use-cases, and provides a series of detailed tutorials. These resources will help you harness the full potential of YOLOv5 for your computer vision projects. Let's get started!
+Built on PyTorch, this powerful [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) framework has garnered immense popularity for its versatility, ease of use, and high performance. Our documentation guides you through the installation process, explains the architectural nuances of the model, showcases various use-cases, and provides a series of detailed tutorials. These resources will help you harness the full potential of YOLOv5 for your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) projects. Let's get started!
@@ -42,10 +42,10 @@ Here's a compilation of comprehensive tutorials that will guide you through diff
- [Test-Time Augmentation (TTA)](tutorials/test_time_augmentation.md): Explore how to use TTA to improve your model's prediction accuracy.
- [Model Ensembling](tutorials/model_ensembling.md): Learn the strategy of combining multiple models for improved performance.
- [Model Pruning/Sparsity](tutorials/model_pruning_and_sparsity.md): Understand pruning and sparsity concepts, and how to create a more efficient model.
-- [Hyperparameter Evolution](tutorials/hyperparameter_evolution.md): Discover the process of automated hyperparameter tuning for better model performance.
-- [Transfer Learning with Frozen Layers](tutorials/transfer_learning_with_frozen_layers.md): Learn how to implement transfer learning by freezing layers in YOLOv5.
+- [Hyperparameter Evolution](tutorials/hyperparameter_evolution.md): Discover the process of automated [hyperparameter tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) for better model performance.
+- [Transfer Learning with Frozen Layers](tutorials/transfer_learning_with_frozen_layers.md): Learn how to implement [transfer learning](https://www.ultralytics.com/glossary/transfer-learning) by freezing layers in YOLOv5.
- [Architecture Summary](tutorials/architecture_description.md) 🌟 Delve into the structural details of the YOLOv5 model.
-- [Roboflow for Datasets](tutorials/roboflow_datasets_integration.md): Understand how to utilize Roboflow for dataset management, labeling, and active learning.
+- [Roboflow for Datasets](tutorials/roboflow_datasets_integration.md): Understand how to utilize Roboflow for dataset management, labeling, and [active learning](https://www.ultralytics.com/glossary/active-learning).
- [ClearML Logging](tutorials/clearml_logging_integration.md) 🌟 Learn how to integrate ClearML for efficient logging during your model training.
- [YOLOv5 with Neural Magic](tutorials/neural_magic_pruning_quantization.md) Discover how to use Neural Magic's Deepsparse to prune and quantize your YOLOv5 model.
- [Comet Logging](tutorials/comet_logging_integration.md) 🌟 NEW: Explore how to utilize Comet for improved model training logging.
@@ -95,7 +95,7 @@ We're excited to see the innovative ways you'll use YOLOv5. Dive in, experiment,
### What are the key features of Ultralytics YOLOv5?
-Ultralytics YOLOv5 is renowned for its high-speed and high-accuracy object detection capabilities. Built on PyTorch, it is versatile and user-friendly, making it suitable for various computer vision projects. Key features include real-time inference, support for multiple training tricks like Test-Time Augmentation (TTA) and Model Ensembling, and compatibility with export formats such as TFLite, ONNX, CoreML, and TensorRT. To delve deeper into how Ultralytics YOLOv5 can elevate your project, explore our [TFLite, ONNX, CoreML, TensorRT Export guide](tutorials/model_export.md).
+Ultralytics YOLOv5 is renowned for its high-speed and high-[accuracy](https://www.ultralytics.com/glossary/accuracy) object detection capabilities. Built on [PyTorch](https://www.ultralytics.com/glossary/pytorch), it is versatile and user-friendly, making it suitable for various computer vision projects. Key features include real-time inference, support for multiple training tricks like Test-Time Augmentation (TTA) and Model Ensembling, and compatibility with export formats such as TFLite, ONNX, CoreML, and TensorRT. To delve deeper into how Ultralytics YOLOv5 can elevate your project, explore our [TFLite, ONNX, CoreML, TensorRT Export guide](tutorials/model_export.md).
### How can I train a custom YOLOv5 model on my dataset?
@@ -107,7 +107,7 @@ Ultralytics YOLOv5 is preferred over models like RCNN due to its superior speed
### How can I optimize YOLOv5 model performance during training?
-Optimizing YOLOv5 model performance involves tuning various hyperparameters and incorporating techniques like data augmentation and transfer learning. Ultralytics provides comprehensive resources on hyperparameter evolution and pruning/sparsity to improve model efficiency. You can discover practical tips in our [Tips for Best Training Results guide](tutorials/tips_for_best_training_results.md), which offers actionable insights for achieving optimal performance during training.
+Optimizing YOLOv5 model performance involves tuning various hyperparameters and incorporating techniques like [data augmentation](https://www.ultralytics.com/glossary/data-augmentation) and transfer learning. Ultralytics provides comprehensive resources on hyperparameter evolution and pruning/sparsity to improve model efficiency. You can discover practical tips in our [Tips for Best Training Results guide](tutorials/tips_for_best_training_results.md), which offers actionable insights for achieving optimal performance during training.
### What environments are supported for running YOLOv5 applications?
diff --git a/docs/en/yolov5/quickstart_tutorial.md b/docs/en/yolov5/quickstart_tutorial.md
index f8cabb9f..e6ddb8fd 100644
--- a/docs/en/yolov5/quickstart_tutorial.md
+++ b/docs/en/yolov5/quickstart_tutorial.md
@@ -6,7 +6,7 @@ keywords: YOLOv5, Quickstart, real-time object detection, AI, ML, PyTorch, infer
# YOLOv5 Quickstart 🚀
-Embark on your journey into the dynamic realm of real-time object detection with YOLOv5! This guide is crafted to serve as a comprehensive starting point for AI enthusiasts and professionals aiming to master YOLOv5. From initial setup to advanced training techniques, we've got you covered. By the end of this guide, you'll have the knowledge to implement YOLOv5 into your projects confidently. Let's ignite the engines and soar into YOLOv5!
+Embark on your journey into the dynamic realm of real-time [object detection](https://www.ultralytics.com/glossary/object-detection) with YOLOv5! This guide is crafted to serve as a comprehensive starting point for AI enthusiasts and professionals aiming to master YOLOv5. From initial setup to advanced training techniques, we've got you covered. By the end of this guide, you'll have the knowledge to implement YOLOv5 into your projects confidently. Let's ignite the engines and soar into YOLOv5!
## Install
@@ -18,7 +18,7 @@ cd yolov5
pip install -r requirements.txt # install dependencies
```
-## Inference with PyTorch Hub
+## Inference with [PyTorch](https://www.ultralytics.com/glossary/pytorch) Hub
Experience the simplicity of YOLOv5 [PyTorch Hub](./tutorials/pytorch_hub_model_loading.md) inference, where [models](https://github.com/ultralytics/yolov5/tree/master/models) are seamlessly downloaded from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases).
@@ -44,8 +44,8 @@ Harness `detect.py` for versatile inference on various sources. It automatically
```bash
python detect.py --weights yolov5s.pt --source 0 # webcam
- img.jpg # image
- vid.mp4 # video
+ image.jpg # image
+ video.mp4 # video
screen # screenshot
path/ # directory
list.txt # list of images
@@ -57,7 +57,7 @@ python detect.py --weights yolov5s.pt --source 0 #
## Training
-Replicate the YOLOv5 [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) benchmarks with the instructions below. The necessary [models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) are pulled directly from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). Training YOLOv5n/s/m/l/x on a V100 GPU should typically take 1/2/4/6/8 days respectively (note that [Multi-GPU](./tutorials/multi_gpu_training.md) setups work faster). Maximize performance by using the highest possible `--batch-size` or use `--batch-size -1` for the YOLOv5 [AutoBatch](https://github.com/ultralytics/yolov5/pull/5092) feature. The following batch sizes are ideal for V100-16GB GPUs.
+Replicate the YOLOv5 [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) benchmarks with the instructions below. The necessary [models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) are pulled directly from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). Training YOLOv5n/s/m/l/x on a V100 GPU should typically take 1/2/4/6/8 days respectively (note that [Multi-GPU](./tutorials/multi_gpu_training.md) setups work faster). Maximize performance by using the highest possible `--batch-size` or use `--batch-size -1` for the YOLOv5 [AutoBatch](https://github.com/ultralytics/yolov5/pull/5092) feature. The following [batch sizes](https://www.ultralytics.com/glossary/batch-size) are ideal for V100-16GB GPUs.
```bash
python train.py --data coco.yaml --epochs 300 --weights '' --cfg yolov5n.yaml --batch-size 128
@@ -69,4 +69,4 @@ python train.py --data coco.yaml --epochs 300 --weights '' --cfg yolov5n.yaml -
-To conclude, YOLOv5 is not only a state-of-the-art tool for object detection but also a testament to the power of machine learning in transforming the way we interact with the world through visual understanding. As you progress through this guide and begin applying YOLOv5 to your projects, remember that you are at the forefront of a technological revolution, capable of achieving remarkable feats. Should you need further insights or support from fellow visionaries, you're invited to our [GitHub repository](https://github.com/ultralytics/yolov5) home to a thriving community of developers and researchers. Keep exploring, keep innovating, and enjoy the marvels of YOLOv5. Happy detecting! 🌠🔍
+To conclude, YOLOv5 is not only a state-of-the-art tool for object detection but also a testament to the power of [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) in transforming the way we interact with the world through visual understanding. As you progress through this guide and begin applying YOLOv5 to your projects, remember that you are at the forefront of a technological revolution, capable of achieving remarkable feats. Should you need further insights or support from fellow visionaries, you're invited to our [GitHub repository](https://github.com/ultralytics/yolov5) home to a thriving community of developers and researchers. Keep exploring, keep innovating, and enjoy the marvels of YOLOv5. Happy detecting! 🌠🔍
diff --git a/docs/en/yolov5/tutorials/architecture_description.md b/docs/en/yolov5/tutorials/architecture_description.md
index bb9ade69..53788270 100644
--- a/docs/en/yolov5/tutorials/architecture_description.md
+++ b/docs/en/yolov5/tutorials/architecture_description.md
@@ -6,7 +6,7 @@ keywords: YOLOv5 architecture, object detection, Ultralytics, YOLO, model struct
# Ultralytics YOLOv5 Architecture
-YOLOv5 (v6.0/6.1) is a powerful object detection algorithm developed by Ultralytics. This article dives deep into the YOLOv5 architecture, data augmentation strategies, training methodologies, and loss computation techniques. This comprehensive understanding will help improve your practical application of object detection in various fields, including surveillance, autonomous vehicles, and image recognition.
+YOLOv5 (v6.0/6.1) is a powerful object detection algorithm developed by Ultralytics. This article dives deep into the YOLOv5 architecture, [data augmentation](https://www.ultralytics.com/glossary/data-augmentation) strategies, training methodologies, and loss computation techniques. This comprehensive understanding will help improve your practical application of object detection in various fields, including surveillance, autonomous vehicles, and [image recognition](https://www.ultralytics.com/glossary/image-recognition).
## 1. Model Structure
@@ -104,9 +104,9 @@ SPPF time: 0.20780706405639648
## 2. Data Augmentation Techniques
-YOLOv5 employs various data augmentation techniques to improve the model's ability to generalize and reduce overfitting. These techniques include:
+YOLOv5 employs various data augmentation techniques to improve the model's ability to generalize and reduce [overfitting](https://www.ultralytics.com/glossary/overfitting). These techniques include:
-- **Mosaic Augmentation**: An image processing technique that combines four training images into one in ways that encourage object detection models to better handle various object scales and translations.
+- **Mosaic Augmentation**: An image processing technique that combines four training images into one in ways that encourage [object detection](https://www.ultralytics.com/glossary/object-detection) models to better handle various object scales and translations.
![mosaic](https://github.com/ultralytics/docs/releases/download/0/mosaic-augmentation.avif)
@@ -138,9 +138,9 @@ YOLOv5 applies several sophisticated training strategies to enhance the model's
- **Multiscale Training**: The input images are randomly rescaled within a range of 0.5 to 1.5 times their original size during the training process.
- **AutoAnchor**: This strategy optimizes the prior anchor boxes to match the statistical characteristics of the ground truth boxes in your custom data.
-- **Warmup and Cosine LR Scheduler**: A method to adjust the learning rate to enhance model performance.
+- **Warmup and Cosine LR Scheduler**: A method to adjust the [learning rate](https://www.ultralytics.com/glossary/learning-rate) to enhance model performance.
- **Exponential Moving Average (EMA)**: A strategy that uses the average of parameters over past steps to stabilize the training process and reduce generalization error.
-- **Mixed Precision Training**: A method to perform operations in half-precision format, reducing memory usage and enhancing computational speed.
+- **[Mixed Precision](https://www.ultralytics.com/glossary/mixed-precision) Training**: A method to perform operations in half-[precision](https://www.ultralytics.com/glossary/precision) format, reducing memory usage and enhancing computational speed.
- **Hyperparameter Evolution**: A strategy to automatically tune hyperparameters to achieve optimal performance.
## 4. Additional Features
@@ -153,7 +153,7 @@ The loss in YOLOv5 is computed as a combination of three individual loss compone
- **Objectness Loss (BCE Loss)**: Another Binary Cross-Entropy loss, calculates the error in detecting whether an object is present in a particular grid cell or not.
- **Location Loss (CIoU Loss)**: Complete IoU loss, measures the error in localizing the object within the grid cell.
-The overall loss function is depicted by:
+The overall [loss function](https://www.ultralytics.com/glossary/loss-function) is depicted by:
![loss](https://latex.codecogs.com/svg.image?Loss=\lambda_1L_{cls}+\lambda_2L_{obj}+\lambda_3L_{loc})
@@ -176,7 +176,7 @@ The YOLOv5 architecture makes some important changes to the box prediction strat
However, in YOLOv5, the formula for predicting the box coordinates has been updated to reduce grid sensitivity and prevent the model from predicting unbounded box dimensions.
-The revised formulas for calculating the predicted bounding box are as follows:
+The revised formulas for calculating the predicted [bounding box](https://www.ultralytics.com/glossary/bounding-box) are as follows:
![bx](Use SyncBatchNorm (click to expand)
-[SyncBatchNorm](https://pytorch.org/docs/master/generated/torch.nn.SyncBatchNorm.html) could increase accuracy for multiple gpu training, however, it will slow down training by a significant factor. It is **only** available for Multiple GPU DistributedDataParallel training.
+[SyncBatchNorm](https://pytorch.org/docs/master/generated/torch.nn.SyncBatchNorm.html) could increase [accuracy](https://www.ultralytics.com/glossary/accuracy) for multiple gpu training, however, it will slow down training by a significant factor. It is **only** available for Multiple GPU DistributedDataParallel training.
It is best used when the batch-size on **each** GPU is small (<= 8).
@@ -121,7 +121,7 @@ python -m torch.distributed.run --master_port 1234 --nproc_per_node 2 ...
## Results
-DDP profiling results on an [AWS EC2 P4d instance](../environments/aws_quickstart_tutorial.md) with 8x A100 SXM4-40GB for YOLOv5l for 1 COCO epoch.
+DDP profiling results on an [AWS EC2 P4d instance](../environments/aws_quickstart_tutorial.md) with 8x A100 SXM4-40GB for YOLOv5l for 1 COCO [epoch](https://www.ultralytics.com/glossary/epoch).
Profiling code
diff --git a/docs/en/yolov5/tutorials/neural_magic_pruning_quantization.md b/docs/en/yolov5/tutorials/neural_magic_pruning_quantization.md
index 663a5d14..1d2a9bca 100644
--- a/docs/en/yolov5/tutorials/neural_magic_pruning_quantization.md
+++ b/docs/en/yolov5/tutorials/neural_magic_pruning_quantization.md
@@ -30,7 +30,7 @@ DeepSparse is an inference runtime with exceptional performance on CPUs. For ins
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | "
- f"Speed
{gpu} TensorRT
(ms) | params
(M) | FLOPs
(B) |"
- )
- separator = (
- "|-------------|---------------------|--------------------|------------------------------|"
- "-----------------------------------|------------------|-----------------|"
- )
+ headers = [
+ "Model",
+ "size
(pixels)",
+ "mAPval
50-95",
+ f"Speed
CPU ({get_cpu_info()}) ONNX
(ms)",
+ f"Speed
{gpu} TensorRT
(ms)",
+ "params
(M)",
+ "FLOPs
(B)",
+ ]
+ header = "|" + "|".join(f" {h} " for h in headers) + "|"
+ separator = "|" + "|".join("-" * (len(h) + 2) for h in headers) + "|"
print(f"\n\n{header}")
print(separator)
diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py
index 6b308bc1..70d3d088 100644
--- a/ultralytics/utils/checks.py
+++ b/ultralytics/utils/checks.py
@@ -656,9 +656,10 @@ def check_amp(model):
def amp_allclose(m, im):
"""All close FP32 vs AMP results."""
- a = m(im, device=device, verbose=False)[0].boxes.data # FP32 inference
+ batch = [im] * 8
+ a = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data # FP32 inference
with autocast(enabled=True):
- b = m(im, device=device, verbose=False)[0].boxes.data # AMP inference
+ b = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data # AMP inference
del m
return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance
diff --git a/ultralytics/utils/downloads.py b/ultralytics/utils/downloads.py
index 79c3f6b0..5cbc868a 100644
--- a/ultralytics/utils/downloads.py
+++ b/ultralytics/utils/downloads.py
@@ -18,6 +18,7 @@
GITHUB_ASSETS_REPO = "ultralytics/assets"
GITHUB_ASSETS_NAMES = (
[f"yolov8{k}{suffix}.pt" for k in "nsmlx" for suffix in ("", "-cls", "-seg", "-pose", "-obb", "-oiv7")]
+ + [f"yolo11{k}{suffix}.pt" for k in "nsmlx" for suffix in ("", "-cls", "-seg", "-pose", "-obb")]
+ [f"yolov5{k}{resolution}u.pt" for k in "nsmlx" for resolution in ("", "6")]
+ [f"yolov3{k}u.pt" for k in ("", "-spp", "-tiny")]
+ [f"yolov8{k}-world.pt" for k in "smlx"]
@@ -408,7 +409,7 @@ def get_github_assets(repo="ultralytics/assets", version="latest", retry=False):
return data["tag_name"], [x["name"] for x in data["assets"]] # tag, assets i.e. ['yolov8n.pt', 'yolov8s.pt', ...]
-def attempt_download_asset(file, repo="ultralytics/assets", release="v8.2.0", **kwargs):
+def attempt_download_asset(file, repo="ultralytics/assets", release="v8.3.0", **kwargs):
"""
Attempt to download a file from GitHub release assets if it is not found locally. The function checks for the file
locally first, then tries to download it from the specified GitHub repository release.
@@ -416,7 +417,7 @@ def attempt_download_asset(file, repo="ultralytics/assets", release="v8.2.0", **
Args:
file (str | Path): The filename or file path to be downloaded.
repo (str, optional): The GitHub repository in the format 'owner/repo'. Defaults to 'ultralytics/assets'.
- release (str, optional): The specific release version to be downloaded. Defaults to 'v8.2.0'.
+ release (str, optional): The specific release version to be downloaded. Defaults to 'v8.3.0'.
**kwargs (any): Additional keyword arguments for the download process.
Returns:
diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py
index 8d4dfd4e..97a98ac6 100644
--- a/ultralytics/utils/loss.py
+++ b/ultralytics/utils/loss.py
@@ -259,8 +259,11 @@ def __call__(self, preds, batch):
# Pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
+ # dfl_conf = pred_distri.view(batch_size, -1, 4, self.reg_max).detach().softmax(-1)
+ # dfl_conf = (dfl_conf.amax(-1).mean(-1) + dfl_conf.amax(-1).amin(-1)) / 2
_, target_bboxes, target_scores, fg_mask, _ = self.assigner(
+ # pred_scores.detach().sigmoid() * 0.8 + dfl_conf.unsqueeze(-1) * 0.2,
pred_scores.detach().sigmoid(),
(pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
anchor_points * stride_tensor,
diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py
index d30a4780..f3edf781 100644
--- a/ultralytics/utils/plotting.py
+++ b/ultralytics/utils/plotting.py
@@ -20,7 +20,7 @@
class Colors:
"""
- Ultralytics default color palette https://ultralytics.com/.
+ Ultralytics color palette https://docs.ultralytics.com/reference/utils/plotting/#ultralytics.utils.plotting.Colors.
This class provides methods to work with the Ultralytics color palette, including converting hex color codes to
RGB values.
@@ -29,6 +29,60 @@ class Colors:
palette (list of tuple): List of RGB color values.
n (int): The number of colors in the palette.
pose_palette (np.ndarray): A specific color palette array with dtype np.uint8.
+
+ ## Ultralytics Color Palette
+
+ | Index | Color | HEX | RGB |
+ |-------|-------------------------------------------------------------------|-----------|-------------------|
+ | 0 | | `#042aff` | (4, 42, 255) |
+ | 1 | | `#0bdbeb` | (11, 219, 235) |
+ | 2 | | `#f3f3f3` | (243, 243, 243) |
+ | 3 | | `#00dfb7` | (0, 223, 183) |
+ | 4 | | `#111f68` | (17, 31, 104) |
+ | 5 | | `#ff6fdd` | (255, 111, 221) |
+ | 6 | | `#ff444f` | (255, 68, 79) |
+ | 7 | | `#cced00` | (204, 237, 0) |
+ | 8 | | `#00f344` | (0, 243, 68) |
+ | 9 | | `#bd00ff` | (189, 0, 255) |
+ | 10 | | `#00b4ff` | (0, 180, 255) |
+ | 11 | | `#dd00ba` | (221, 0, 186) |
+ | 12 | | `#00ffff` | (0, 255, 255) |
+ | 13 | | `#26c000` | (38, 192, 0) |
+ | 14 | | `#01ffb3` | (1, 255, 179) |
+ | 15 | | `#7d24ff` | (125, 36, 255) |
+ | 16 | | `#7b0068` | (123, 0, 104) |
+ | 17 | | `#ff1b6c` | (255, 27, 108) |
+ | 18 | | `#fc6d2f` | (252, 109, 47) |
+ | 19 | | `#a2ff0b` | (162, 255, 11) |
+
+ ## Pose Color Palette
+
+ | Index | Color | HEX | RGB |
+ |-------|-------------------------------------------------------------------|-----------|-------------------|
+ | 0 | | `#ff8000` | (255, 128, 0) |
+ | 1 | | `#ff9933` | (255, 153, 51) |
+ | 2 | | `#ffb266` | (255, 178, 102) |
+ | 3 | | `#e6e600` | (230, 230, 0) |
+ | 4 | | `#ff99ff` | (255, 153, 255) |
+ | 5 | | `#99ccff` | (153, 204, 255) |
+ | 6 | | `#ff66ff` | (255, 102, 255) |
+ | 7 | | `#ff33ff` | (255, 51, 255) |
+ | 8 | | `#66b2ff` | (102, 178, 255) |
+ | 9 | | `#3399ff` | (51, 153, 255) |
+ | 10 | | `#ff9999` | (255, 153, 153) |
+ | 11 | | `#ff6666` | (255, 102, 102) |
+ | 12 | | `#ff3333` | (255, 51, 51) |
+ | 13 | | `#99ff99` | (153, 255, 153) |
+ | 14 | | `#66ff66` | (102, 255, 102) |
+ | 15 | | `#33ff33` | (51, 255, 51) |
+ | 16 | | `#00ff00` | (0, 255, 0) |
+ | 17 | | `#0000ff` | (0, 0, 255) |
+ | 18 | | `#ff0000` | (255, 0, 0) |
+ | 19 | | `#ffffff` | (255, 255, 255) |
+
+ !!! note "Ultralytics Brand Colors"
+
+ For Ultralytics brand colors see [https://www.ultralytics.com/brand](https://www.ultralytics.com/brand). Please use the official Ultralytics colors for all marketing materials.
"""
def __init__(self):
@@ -365,7 +419,7 @@ def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False):
# Convert im back to PIL and update draw
self.fromarray(self.im)
- def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True, conf_thres=0.25, kpt_color=None):
+ def kpts(self, kpts, shape=(640, 640), radius=None, kpt_line=True, conf_thres=0.25, kpt_color=None):
"""
Plot keypoints on the image.
@@ -382,6 +436,7 @@ def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True, conf_thres=0.25,
- Modifies self.im in-place.
- If self.pil is True, converts image to numpy array and back to PIL.
"""
+ radius = radius if radius is not None else self.lw
if self.pil:
# Convert to numpy first
self.im = np.asarray(self.im).copy()
@@ -417,7 +472,7 @@ def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True, conf_thres=0.25,
pos1,
pos2,
kpt_color or self.limb_color[i].tolist(),
- thickness=2,
+ thickness=int(np.ceil(self.lw / 2)),
lineType=cv2.LINE_AA,
)
if self.pil:
diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py
index 7cde9dc7..e7fcca0a 100644
--- a/ultralytics/utils/torch_utils.py
+++ b/ultralytics/utils/torch_utils.py
@@ -110,15 +110,17 @@ def autocast(enabled: bool, device: str = "cuda"):
def get_cpu_info():
"""Return a string with system CPU information, i.e. 'Apple M2'."""
- with contextlib.suppress(Exception):
- import cpuinfo # pip install py-cpuinfo
+ from ultralytics.utils import PERSISTENT_CACHE # avoid circular import error
- k = "brand_raw", "hardware_raw", "arch_string_raw" # keys sorted by preference (not all keys always available)
- info = cpuinfo.get_cpu_info() # info dict
- string = info.get(k[0] if k[0] in info else k[1] if k[1] in info else k[2], "unknown")
- return string.replace("(R)", "").replace("CPU ", "").replace("@ ", "")
+ if "cpu_info" not in PERSISTENT_CACHE:
+ with contextlib.suppress(Exception):
+ import cpuinfo # pip install py-cpuinfo
- return "unknown"
+ k = "brand_raw", "hardware_raw", "arch_string_raw" # keys sorted by preference
+ info = cpuinfo.get_cpu_info() # info dict
+ string = info.get(k[0] if k[0] in info else k[1] if k[1] in info else k[2], "unknown")
+ PERSISTENT_CACHE["cpu_info"] = string.replace("(R)", "").replace("CPU ", "").replace("@ ", "")
+ return PERSISTENT_CACHE.get("cpu_info", "unknown")
def select_device(device="", batch=0, newline=False, verbose=True):
@@ -157,7 +159,7 @@ def select_device(device="", batch=0, newline=False, verbose=True):
if isinstance(device, torch.device):
return device
- s = f"Ultralytics YOLOv{__version__} 🚀 Python-{PYTHON_VERSION} torch-{torch.__version__} "
+ s = f"Ultralytics {__version__} 🚀 Python-{PYTHON_VERSION} torch-{torch.__version__} "
device = str(device).lower()
for remove in "cuda:", "none", "(", ")", "[", "]", "'", " ":
device = device.replace(remove, "") # to string, 'cuda:0' -> '0' and '(0, 1)' -> '0,1'
@@ -247,7 +249,7 @@ def fuse_conv_and_bn(conv, bn):
)
# Prepare filters
- w_conv = conv.weight.clone().view(conv.out_channels, -1)
+ w_conv = conv.weight.view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
@@ -278,7 +280,7 @@ def fuse_deconv_and_bn(deconv, bn):
)
# Prepare filters
- w_deconv = deconv.weight.clone().view(deconv.out_channels, -1)
+ w_deconv = deconv.weight.view(deconv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fuseddconv.weight.copy_(torch.mm(w_bn, w_deconv).view(fuseddconv.weight.shape))
@@ -533,16 +535,17 @@ def update_attr(self, model, include=(), exclude=("process_group", "reducer")):
copy_attr(self.ema, model, include, exclude)
-def strip_optimizer(f: Union[str, Path] = "best.pt", s: str = "") -> None:
+def strip_optimizer(f: Union[str, Path] = "best.pt", s: str = "", updates: dict = None) -> dict:
"""
Strip optimizer from 'f' to finalize training, optionally save as 's'.
Args:
f (str): file path to model to strip the optimizer from. Default is 'best.pt'.
s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten.
+ updates (dict): a dictionary of updates to overlay onto the checkpoint before saving.
Returns:
- None
+ (dict): The combined checkpoint dictionary.
Example:
```python
@@ -562,9 +565,9 @@ def strip_optimizer(f: Union[str, Path] = "best.pt", s: str = "") -> None:
assert "model" in x, "'model' missing from checkpoint"
except Exception as e:
LOGGER.warning(f"WARNING ⚠️ Skipping {f}, not a valid Ultralytics model: {e}")
- return
+ return {}
- updates = {
+ metadata = {
"date": datetime.now().isoformat(),
"version": __version__,
"license": "AGPL-3.0 License (https://ultralytics.com/license)",
@@ -591,9 +594,11 @@ def strip_optimizer(f: Union[str, Path] = "best.pt", s: str = "") -> None:
# x['model'].args = x['train_args']
# Save
- torch.save({**updates, **x}, s or f, use_dill=False) # combine dicts (prefer to the right)
+ combined = {**metadata, **x, **(updates or {})}
+ torch.save(combined, s or f, use_dill=False) # combine dicts (prefer to the right)
mb = os.path.getsize(s or f) / 1e6 # file size
LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")
+ return combined
def convert_optimizer_state_dict_to_fp16(state_dict):