From 90fd1e2602076f850e191c360d72b347056e7c97 Mon Sep 17 00:00:00 2001 From: "changjun.zhu" Date: Fri, 5 Nov 2021 15:11:40 +0800 Subject: [PATCH] docs: use template to generate rst files in "Examples" --- docs/code/LeedsSportsPose.py | 4 + docs/code/NeolixOD.py | 4 + docs/source/conf.py | 34 +++ docs/source/quick_start/examples/BSTLD.rst | 35 ++- .../quick_start/examples/DogsVsCats.rst | 66 ++++-- .../quick_start/examples/LeedsSportsPose.rst | 65 ++++-- docs/source/quick_start/examples/NeolixOD.rst | 74 +++--- .../quick_start/examples/Newsgroups20.rst | 76 +++---- docs/source/quick_start/examples/THCHS30.rst | 76 ++++--- .../examples/examples.rst.template | 215 ++++++++++++++++++ requirements_linter.txt | 1 + 11 files changed, 505 insertions(+), 145 deletions(-) create mode 100644 docs/source/quick_start/examples/examples.rst.template diff --git a/docs/code/LeedsSportsPose.py b/docs/code/LeedsSportsPose.py index 67e43dca9..4bb983b83 100644 --- a/docs/code/LeedsSportsPose.py +++ b/docs/code/LeedsSportsPose.py @@ -43,6 +43,10 @@ dataset = Dataset("LeedsSportsPose", gas) """""" +"""Read Dataset / list segment names""" +dataset.keys() +"""""" + """Read Dataset / get segment""" segment = dataset[0] """""" diff --git a/docs/code/NeolixOD.py b/docs/code/NeolixOD.py index eb60f677b..5909bc30e 100644 --- a/docs/code/NeolixOD.py +++ b/docs/code/NeolixOD.py @@ -42,6 +42,10 @@ dataset = Dataset("NeolixOD", gas) """""" +"""Read Dataset / list segment names""" +dataset.keys() +"""""" + """Read Dataset / get segment""" segment = dataset[0] """""" diff --git a/docs/source/conf.py b/docs/source/conf.py index 3bb1f2b56..77072f9ab 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,9 +15,12 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # """Configuration file for the Sphinx documentation builder.""" +import os import sys from pathlib import Path +import jinja2 + sys.path.insert(0, str(Path(__file__).parents[2])) @@ -79,3 +82,34 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ["_static"] + +source_path = os.path.dirname(os.path.abspath(__file__)) +example_path = os.path.join(source_path, "quick_start", "examples") +datasets = ( + "Dogs Vs Cats", + "20 Newsgroups", + "BSTLD", + "Neolix OD", + "Leeds Sports Pose", + "THCHS-30", +) +label_types = ( + "Classification", + "Classification", + "Box2D", + "Box3D", + "Keypoints2D", + "Sentence", +) +file_names = ("DogsVsCats", "Newsgroups20", "BSTLD", "NeolixOD", "LeedsSportsPose", "THCHS30") +for dataset_name, label_type, file_name in zip(datasets, label_types, file_names): + with open(os.path.join(example_path, "examples.rst.template"), encoding="utf-8") as f: + t = jinja2.Template(f.read()) + with open(os.path.join(example_path, f"{file_name}.rst"), "w", encoding="utf-8") as f: + f.write( + t.render( + dataset_name=dataset_name, + label_type=label_type, + file_name=file_name, + ) + ) diff --git a/docs/source/quick_start/examples/BSTLD.rst b/docs/source/quick_start/examples/BSTLD.rst index 062b22be7..331351310 100644 --- a/docs/source/quick_start/examples/BSTLD.rst +++ b/docs/source/quick_start/examples/BSTLD.rst @@ -1,9 +1,12 @@ -######## + + +################## BSTLD -######## +################## This topic describes how to manage the `BSTLD Dataset `_, -which is a dataset with :ref:`reference/label_format/Box2D:Box2D` label(:numref:`Fig. %s `). +which is a dataset with :ref:`reference/label_format/Box2D:Box2D` label +(:numref:`Fig. %s `). .. _example-bstld: @@ -13,6 +16,7 @@ which is a dataset with :ref:`reference/label_format/Box2D:Box2D` label(:numref: The preview of a cropped image with labels from "BSTLD". + ***************************** Authorize a Client Instance ***************************** @@ -48,16 +52,28 @@ Step 1: Write the Catalog ========================= A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which -is typically stored in a json file like ``catalog.json``. +is typically stored in a json file like ``catalog.json``. + .. literalinclude:: ../../../../tensorbay/opendataset/BSTLD/catalog.json :language: json :name: BSTLD-catalog :linenos: + + The only annotation type for "BSTLD" is :ref:`reference/label_format/Box2D:Box2D`, and there are 13 :ref:`reference/label_format/CommonLabelProperties:category` types and one :ref:`reference/label_format/CommonLabelProperties:attributes` type. + + + + + + + + + .. note:: By passing the path of the ``catalog.json``, :func:`~tensorbay.dataset.dataset.DatasetBase.load_catalog` supports loading the catalog into dataset. @@ -114,7 +130,7 @@ The organized "BSTLD" dataset can be uploaded to TensorBay for sharing, reuse, e :end-before: """""" .. note:: - Set `skip_uploaded_files=True` to skip uploaded data. + Set ``skip_uploaded_files=True`` to skip uploaded data. The data will be skiped if its name and segment name is the same as remote data. Similar with Git, the commit step after uploading can record changes to the dataset as a version. @@ -132,8 +148,6 @@ Now "BSTLD" dataset can be read from TensorBay. :start-after: """Read Dataset / get dataset""" :end-before: """""" -In :ref:`reference/dataset_structure:dataset` "BSTLD", there are three -:ref:`segments `: ``train``, ``test`` and ``additional``. Get the segment names by listing them all. .. literalinclude:: ../../../../docs/code/BSTLD.py @@ -148,8 +162,7 @@ Get a segment by passing the required segment name. :start-after: """Read Dataset / get segment""" :end-before: """""" - -In the train :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, +In the :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, which can be obtained by index. .. literalinclude:: ../../../../docs/code/BSTLD.py @@ -167,11 +180,13 @@ which can be obtained by index. :end-before: """""" There is only one label type in "BSTLD" dataset, which is ``box2d``. + The information stored in :ref:`reference/label_format/CommonLabelProperties:category` is one of the names in "categories" list of :ref:`catalog.json `. The information stored in :ref:`reference/label_format/CommonLabelProperties:attributes` is one or several of the attributes in "attributes" list of :ref:`catalog.json `. See :ref:`reference/label_format/Box2D:Box2D` label format for more details. + **************** Delete Dataset **************** @@ -179,4 +194,4 @@ See :ref:`reference/label_format/Box2D:Box2D` label format for more details. .. literalinclude:: ../../../../docs/code/BSTLD.py :language: python :start-after: """Delete Dataset""" - :end-before: """""" + :end-before: """""" \ No newline at end of file diff --git a/docs/source/quick_start/examples/DogsVsCats.rst b/docs/source/quick_start/examples/DogsVsCats.rst index c166acc7a..c0d704740 100644 --- a/docs/source/quick_start/examples/DogsVsCats.rst +++ b/docs/source/quick_start/examples/DogsVsCats.rst @@ -1,9 +1,11 @@ -############## - Dogs vs Cats -############## -This topic describes how to manage the `Dogs vs Cats Dataset `_, -which is a dataset with :ref:`reference/label_format/Classification:Classification` label. + +################## + Dogs Vs Cats +################## + +This topic describes how to manage the `Dogs Vs Cats Dataset `_, +which is a dataset with :ref:`reference/label_format/Classification:Classification` label ***************************** Authorize a Client Instance @@ -29,10 +31,10 @@ An :ref:`reference/glossary:accesskey` is needed to authenticate identity when u Organize Dataset ****************** -Normally, ``dataloader.py`` and ``catalog.json`` are required to organize the "Dogs vs Cats" dataset into the :class:`~tensorbay.dataset.dataset.Dataset` instance. +Normally, ``dataloader.py`` and ``catalog.json`` are required to organize the "Dogs Vs Cats" dataset into the :class:`~tensorbay.dataset.dataset.Dataset` instance. In this example, they are stored in the same directory like:: - Dogs vs Cats/ + Dogs Vs Cats/ catalog.json dataloader.py @@ -40,16 +42,28 @@ Step 1: Write the Catalog ========================= A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which -is typically stored in a json file like ``catalog.json``. +is typically stored in a json file like ``catalog.json``. + .. literalinclude:: ../../../../tensorbay/opendataset/DogsVsCats/catalog.json :language: json - :name: dogsvscats-catalog + :name: DogsVsCats-catalog :linenos: -The only annotation type for "Dogs vs Cats" is :ref:`reference/label_format/Classification:Classification`, and there are 2 + + + + +The only annotation type for "Dogs Vs Cats" is :ref:`reference/label_format/Classification:Classification`, and there are 2 :ref:`reference/label_format/CommonLabelProperties:category` types. + + + + + + + .. note:: By passing the path of the ``catalog.json``, :func:`~tensorbay.dataset.dataset.DatasetBase.load_catalog` supports loading the catalog into dataset. @@ -61,19 +75,17 @@ The only annotation type for "Dogs vs Cats" is :ref:`reference/label_format/Clas Step 2: Write the Dataloader ============================ -A :ref:`reference/glossary:dataloader` is needed to organize the dataset into -a :class:`~tensorbay.dataset.dataset.Dataset` instance. +A :ref:`reference/glossary:dataloader` is needed to organize the dataset into a :class:`~tensorbay.dataset.dataset.Dataset` instance. .. literalinclude:: ../../../../tensorbay/opendataset/DogsVsCats/loader.py :language: python - :name: dogsvscats-dataloader + :name: DogsVsCats-dataloader :linenos: See :ref:`Classification annotation ` for more details. - There are already a number of dataloaders in TensorBay SDK provided by the community. -Thus, instead of writing, importing an available dataloadert is also feasible. +Thus, instead of writing, importing an available dataloader is also feasible. .. literalinclude:: ../../../../docs/code/DogsVsCats.py :language: python @@ -86,7 +98,7 @@ Thus, instead of writing, importing an available dataloadert is also feasible. .. important:: - See :ref:`dataloader table ` for more examples of dataloaders with different label types. + See :ref:`dataloader table ` for dataloaders with different label types. ******************* Visualize Dataset @@ -100,13 +112,17 @@ Please see :ref:`features/visualization:Visualization` for more details. Upload Dataset **************** -The organized "Dogs vs Cats" dataset can be uploaded to TensorBay for sharing, reuse, etc. +The organized "Dogs Vs Cats" dataset can be uploaded to TensorBay for sharing, reuse, etc. .. literalinclude:: ../../../../docs/code/DogsVsCats.py :language: python :start-after: """Upload Dataset""" :end-before: """""" +.. note:: + Set ``skip_uploaded_files=True`` to skip uploaded data. + The data will be skiped if its name and segment name is the same as remote data. + Similar with Git, the commit step after uploading can record changes to the dataset as a version. If needed, do the modifications and commit again. Please see :ref:`features/version_control/index:Version Control` for more details. @@ -115,15 +131,13 @@ Please see :ref:`features/version_control/index:Version Control` for more detail Read Dataset ************** -Now "Dogs vs Cats" dataset can be read from TensorBay. +Now "Dogs Vs Cats" dataset can be read from TensorBay. .. literalinclude:: ../../../../docs/code/DogsVsCats.py :language: python :start-after: """Read Dataset / get dataset""" :end-before: """""" -In :ref:`reference/dataset_structure:dataset` "Dogs vs Cats", there are two -:ref:`segments `: ``train`` and ``test``. Get the segment names by listing them all. .. literalinclude:: ../../../../docs/code/DogsVsCats.py @@ -138,7 +152,7 @@ Get a segment by passing the required segment name. :start-after: """Read Dataset / get segment""" :end-before: """""" -In the train :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, +In the :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, which can be obtained by index. .. literalinclude:: ../../../../docs/code/DogsVsCats.py @@ -155,10 +169,14 @@ which can be obtained by index. :start-after: """Read Dataset / get label""" :end-before: """""" -There is only one label type in "Dogs vs Cats" dataset, which is ``classification``. The information stored in :ref:`reference/label_format/CommonLabelProperties:category` is -one of the names in "categories" list of :ref:`catalog.json `. +There is only one label type in "Dogs Vs Cats" dataset, which is ``classification``. + +The information stored in :ref:`reference/label_format/CommonLabelProperties:category` is +one of the names in "categories" list of :ref:`catalog.json `. The information stored +in :ref:`reference/label_format/CommonLabelProperties:attributes` is one or several of the attributes in "attributes" list of :ref:`catalog.json `. See :ref:`reference/label_format/Classification:Classification` label format for more details. + **************** Delete Dataset **************** @@ -166,4 +184,4 @@ See :ref:`reference/label_format/Classification:Classification` label format for .. literalinclude:: ../../../../docs/code/DogsVsCats.py :language: python :start-after: """Delete Dataset""" - :end-before: """""" + :end-before: """""" \ No newline at end of file diff --git a/docs/source/quick_start/examples/LeedsSportsPose.rst b/docs/source/quick_start/examples/LeedsSportsPose.rst index fa8ef1ad7..6270af148 100644 --- a/docs/source/quick_start/examples/LeedsSportsPose.rst +++ b/docs/source/quick_start/examples/LeedsSportsPose.rst @@ -1,17 +1,21 @@ -################### + + +################## Leeds Sports Pose -################### +################## This topic describes how to manage the `Leeds Sports Pose Dataset `_, -which is a dataset with :ref:`reference/label_format/Keypoints2D:Keypoints2D` label(:numref:`Fig. %s `). +which is a dataset with :ref:`reference/label_format/Keypoints2D:Keypoints2D` label +(:numref:`Fig. %s `). .. _example-leedssportspose: .. figure:: ../../images/example-Keypoints2D.png - :scale: 80 % + :scale: 50 % :align: center - The preview of an image with labels from "Leeds Sports Pose". + The preview of a cropped image with labels from "Leeds Sports Pose". + ***************************** Authorize a Client Instance @@ -48,15 +52,27 @@ Step 1: Write the Catalog ========================= A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which -is typically stored in a json file like ``catalog.json``. +is typically stored in a json file like ``catalog.json``. + .. literalinclude:: ../../../../tensorbay/opendataset/LeedsSportsPose/catalog.json :language: json :name: LeedsSportsPose-catalog :linenos: + + + + + + The only annotation type for "Leeds Sports Pose" is :ref:`reference/label_format/Keypoints2D:Keypoints2D`. + + + + + .. note:: By passing the path of the ``catalog.json``, :func:`~tensorbay.dataset.dataset.DatasetBase.load_catalog` supports loading the catalog into dataset. @@ -68,16 +84,14 @@ The only annotation type for "Leeds Sports Pose" is :ref:`reference/label_format Step 2: Write the Dataloader ============================ -A :ref:`reference/glossary:dataloader` is needed to organize the dataset into -a :class:`~tensorbay.dataset.dataset.Dataset` instance. +A :ref:`reference/glossary:dataloader` is needed to organize the dataset into a :class:`~tensorbay.dataset.dataset.Dataset` instance. .. literalinclude:: ../../../../tensorbay/opendataset/LeedsSportsPose/loader.py :language: python :name: LeedsSportsPose-dataloader :linenos: -See :ref:`Keipoints2D annotation ` for more details. - +See :ref:`Keypoints2D annotation ` for more details. There are already a number of dataloaders in TensorBay SDK provided by the community. Thus, instead of writing, importing an available dataloader is also feasible. @@ -107,13 +121,17 @@ Please see :ref:`features/visualization:Visualization` for more details. Upload Dataset **************** -The organized "BSTLD" dataset can be uploaded to TensorBay for sharing, reuse, etc. +The organized "Leeds Sports Pose" dataset can be uploaded to TensorBay for sharing, reuse, etc. .. literalinclude:: ../../../../docs/code/LeedsSportsPose.py :language: python :start-after: """Upload Dataset""" :end-before: """""" +.. note:: + Set ``skip_uploaded_files=True`` to skip uploaded data. + The data will be skiped if its name and segment name is the same as remote data. + Similar with Git, the commit step after uploading can record changes to the dataset as a version. If needed, do the modifications and commit again. Please see :ref:`features/version_control/index:Version Control` for more details. @@ -129,15 +147,21 @@ Now "Leeds Sports Pose" dataset can be read from TensorBay. :start-after: """Read Dataset / get dataset""" :end-before: """""" -In :ref:`reference/dataset_structure:dataset` "Leeds Sports Pose", there is one -:ref:`reference/dataset_structure:segment` named ``default``. Get it by passing the segment name or the index. +Get the segment names by listing them all. + +.. literalinclude:: ../../../../docs/code/LeedsSportsPose.py + :language: python + :start-after: """Read Dataset / list segment names""" + :end-before: """""" + +Get a segment by passing the required segment name. .. literalinclude:: ../../../../docs/code/LeedsSportsPose.py :language: python :start-after: """Read Dataset / get segment""" :end-before: """""" -In the default :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, +In the :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, which can be obtained by index. .. literalinclude:: ../../../../docs/code/LeedsSportsPose.py @@ -154,10 +178,13 @@ which can be obtained by index. :start-after: """Read Dataset / get label""" :end-before: """""" -There is only one label type in "Leeds Sports Pose" dataset, which is ``keypoints2d``. The information stored in ``x`` (``y``) is -the x (y) coordinate of one keypoint of one keypoints list. The information stored in ``v`` is -the visible status of one keypoint of one keypoints list. See :ref:`reference/label_format/Keypoints2D:Keypoints2D` -label format for more details. +There is only one label type in "Leeds Sports Pose" dataset, which is ``keypoints2d``. + +The information stored in :ref:`reference/label_format/CommonLabelProperties:category` is +one of the names in "categories" list of :ref:`catalog.json `. The information stored +in :ref:`reference/label_format/CommonLabelProperties:attributes` is one or several of the attributes in "attributes" list of :ref:`catalog.json `. +See :ref:`reference/label_format/Keypoints2D:Keypoints2D` label format for more details. + **************** Delete Dataset @@ -166,4 +193,4 @@ label format for more details. .. literalinclude:: ../../../../docs/code/LeedsSportsPose.py :language: python :start-after: """Delete Dataset""" - :end-before: """""" + :end-before: """""" \ No newline at end of file diff --git a/docs/source/quick_start/examples/NeolixOD.rst b/docs/source/quick_start/examples/NeolixOD.rst index 4107765db..b203cfac9 100644 --- a/docs/source/quick_start/examples/NeolixOD.rst +++ b/docs/source/quick_start/examples/NeolixOD.rst @@ -1,20 +1,21 @@ -########### + + +################## Neolix OD -########### +################## -This topic describes how to manage the `Neolix OD dataset`_, -which is a dataset with :ref:`reference/label_format/Box3D:Box3D` label type +This topic describes how to manage the `Neolix OD Dataset `_, +which is a dataset with :ref:`reference/label_format/Box3D:Box3D` label (:numref:`Fig. %s `). -.. _Neolix OD dataset: https://gas.graviti.cn/dataset/graviti-open-dataset/NeolixOD - .. _example-neolixod: .. figure:: ../../images/example-Box3D.png :scale: 50 % :align: center - The preview of a point cloud from "Neolix OD" with Box3D labels. + The preview of a cropped image with labels from "Neolix OD". + ***************************** Authorize a Client Instance @@ -31,7 +32,6 @@ An :ref:`reference/glossary:accesskey` is needed to authenticate identity when u Create Dataset **************** - .. literalinclude:: ../../../../docs/code/NeolixOD.py :language: python :start-after: """Create Dataset""" @@ -51,17 +51,29 @@ In this example, they are stored in the same directory like:: Step 1: Write the Catalog ========================= -A :ref:`Catalog ` contains all label information of one dataset, -which is typically stored in a json file like ``catalog.json``. +A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which +is typically stored in a json file like ``catalog.json``. + .. literalinclude:: ../../../../tensorbay/opendataset/NeolixOD/catalog.json :language: json - :name: neolixod-catalog + :name: NeolixOD-catalog :linenos: + + + + + + + + The only annotation type for "Neolix OD" is :ref:`reference/label_format/Box3D:Box3D`, and there are 15 :ref:`reference/label_format/CommonLabelProperties:Category` types and 3 :ref:`reference/label_format/CommonLabelProperties:Attributes` types. + + + .. note:: By passing the path of the ``catalog.json``, :func:`~tensorbay.dataset.dataset.DatasetBase.load_catalog` supports loading the catalog into dataset. @@ -73,17 +85,15 @@ The only annotation type for "Neolix OD" is :ref:`reference/label_format/Box3D:B Step 2: Write the Dataloader ============================ -A :ref:`reference/glossary:dataloader` is needed to organize the dataset into -a :class:`~tensorbay.dataset.dataset.Dataset` instance. +A :ref:`reference/glossary:dataloader` is needed to organize the dataset into a :class:`~tensorbay.dataset.dataset.Dataset` instance. .. literalinclude:: ../../../../tensorbay/opendataset/NeolixOD/loader.py :language: python - :name: neolixod-dataloader + :name: NeolixOD-dataloader :linenos: See :ref:`Box3D annotation ` for more details. - There are already a number of dataloaders in TensorBay SDK provided by the community. Thus, instead of writing, importing an available dataloader is also feasible. @@ -112,13 +122,17 @@ Please see :ref:`features/visualization:Visualization` for more details. Upload Dataset **************** -The organized "Neolix OD" dataset can be uploaded to tensorBay for sharing, reuse, etc. +The organized "Neolix OD" dataset can be uploaded to TensorBay for sharing, reuse, etc. .. literalinclude:: ../../../../docs/code/NeolixOD.py :language: python :start-after: """Upload Dataset""" :end-before: """""" +.. note:: + Set ``skip_uploaded_files=True`` to skip uploaded data. + The data will be skiped if its name and segment name is the same as remote data. + Similar with Git, the commit step after uploading can record changes to the dataset as a version. If needed, do the modifications and commit again. Please see :ref:`features/version_control/index:Version Control` for more details. @@ -134,17 +148,21 @@ Now "Neolix OD" dataset can be read from TensorBay. :start-after: """Read Dataset / get dataset""" :end-before: """""" -In :ref:`reference/dataset_structure:Dataset` "Neolix OD", there is only one -:ref:`segment `: ``default``. -Get a segment by passing the required segment name or the index. +Get the segment names by listing them all. + +.. literalinclude:: ../../../../docs/code/NeolixOD.py + :language: python + :start-after: """Read Dataset / list segment names""" + :end-before: """""" + +Get a segment by passing the required segment name. .. literalinclude:: ../../../../docs/code/NeolixOD.py :language: python :start-after: """Read Dataset / get segment""" :end-before: """""" -In the default :ref:`reference/dataset_structure:Segment`, -there is a sequence of :ref:`reference/dataset_structure:Data`, +In the :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, which can be obtained by index. .. literalinclude:: ../../../../docs/code/NeolixOD.py @@ -152,8 +170,9 @@ which can be obtained by index. :start-after: """Read Dataset / get data""" :end-before: """""" -In each :ref:`reference/dataset_structure:Data`, +In each :ref:`reference/dataset_structure:data`, there is a sequence of :ref:`reference/label_format/Box3D:Box3D` annotations, +which can be obtained by index. .. literalinclude:: ../../../../docs/code/NeolixOD.py :language: python @@ -161,12 +180,13 @@ there is a sequence of :ref:`reference/label_format/Box3D:Box3D` annotations, :end-before: """""" There is only one label type in "Neolix OD" dataset, which is ``box3d``. -The information stored in :ref:`reference/label_format/CommonLabelProperties:Category` is -one of the category names in "categories" list of :ref:`catalog.json `. -The information stored in :ref:`reference/label_format/CommonLabelProperties:Attributes` -is one of the attributes in "attributes" list of :ref:`catalog.json `. + +The information stored in :ref:`reference/label_format/CommonLabelProperties:category` is +one of the names in "categories" list of :ref:`catalog.json `. The information stored +in :ref:`reference/label_format/CommonLabelProperties:attributes` is one or several of the attributes in "attributes" list of :ref:`catalog.json `. See :ref:`reference/label_format/Box3D:Box3D` label format for more details. + **************** Delete Dataset **************** @@ -174,4 +194,4 @@ See :ref:`reference/label_format/Box3D:Box3D` label format for more details. .. literalinclude:: ../../../../docs/code/NeolixOD.py :language: python :start-after: """Delete Dataset""" - :end-before: """""" + :end-before: """""" \ No newline at end of file diff --git a/docs/source/quick_start/examples/Newsgroups20.rst b/docs/source/quick_start/examples/Newsgroups20.rst index 02045b430..bdda147d0 100644 --- a/docs/source/quick_start/examples/Newsgroups20.rst +++ b/docs/source/quick_start/examples/Newsgroups20.rst @@ -1,11 +1,11 @@ -############### - 20 Newsgroups -############### -This topic describes how to manage the `20 Newsgroups dataset`_, which is a dataset -with :ref:`reference/label_format/Classification:Classification` label type. -.. _20 Newsgroups dataset: https://gas.graviti.cn/dataset/data-decorators/Newsgroups20 +################## + 20 Newsgroups +################## + +This topic describes how to manage the `20 Newsgroups Dataset `_, +which is a dataset with :ref:`reference/label_format/Classification:Classification` label ***************************** Authorize a Client Instance @@ -21,7 +21,7 @@ An :ref:`reference/glossary:accesskey` is needed to authenticate identity when u **************** Create Dataset **************** - + .. literalinclude:: ../../../../docs/code/Newsgroups20.py :language: python :start-after: """Create Dataset""" @@ -38,30 +38,35 @@ In this example, they are stored in the same directory like:: catalog.json dataloader.py - -It takes the following steps to organize the "20 Newsgroups" dataset by -the :class:`~tensorbay.dataset.dataset.Dataset` instance. - Step 1: Write the Catalog ========================= -A :ref:`Catalog ` contains all label information of one dataset, -which is typically stored in a json file like ``catalog.json``. +A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which +is typically stored in a json file like ``catalog.json``. + .. literalinclude:: ../../../../tensorbay/opendataset/Newsgroups20/catalog.json :language: json :name: Newsgroups20-catalog :linenos: + + + + + + + + + + The only annotation type for "20 Newsgroups" is :ref:`reference/label_format/Classification:Classification`, and there are 20 :ref:`reference/label_format/CommonLabelProperties:Category` types. + .. note:: - * The :ref:`categories` in - :ref:`reference/dataset_structure:Dataset` "20 Newsgroups" have parent-child relationship, - and it use "." to sparate different levels. - * By passing the path of the ``catalog.json``, :func:`~tensorbay.dataset.dataset.DatasetBase.load_catalog` supports loading the catalog into dataset. + By passing the path of the ``catalog.json``, :func:`~tensorbay.dataset.dataset.DatasetBase.load_catalog` supports loading the catalog into dataset. .. important:: @@ -70,8 +75,7 @@ and there are 20 :ref:`reference/label_format/CommonLabelProperties:Category` ty Step 2: Write the Dataloader ============================ -A :ref:`reference/glossary:Dataloader` is neeeded to organize the dataset into a -:class:`~tensorbay.dataset.dataset.Dataset` instance. +A :ref:`reference/glossary:dataloader` is needed to organize the dataset into a :class:`~tensorbay.dataset.dataset.Dataset` instance. .. literalinclude:: ../../../../tensorbay/opendataset/Newsgroups20/loader.py :language: python @@ -80,13 +84,6 @@ A :ref:`reference/glossary:Dataloader` is neeeded to organize the dataset into a See :ref:`Classification annotation ` for more details. -.. note:: - - The data in "20 Newsgroups" do not have extensions - so that a "txt" extension is added to the remote path of each data file - to ensure the loaded dataset could function well on TensorBay. - - There are already a number of dataloaders in TensorBay SDK provided by the community. Thus, instead of writing, importing an available dataloader is also feasible. @@ -122,6 +119,10 @@ The organized "20 Newsgroups" dataset can be uploaded to TensorBay for sharing, :start-after: """Upload Dataset""" :end-before: """""" +.. note:: + Set ``skip_uploaded_files=True`` to skip uploaded data. + The data will be skiped if its name and segment name is the same as remote data. + Similar with Git, the commit step after uploading can record changes to the dataset as a version. If needed, do the modifications and commit again. Please see :ref:`features/version_control/index:Version Control` for more details. @@ -137,9 +138,6 @@ Now "20 Newsgroups" dataset can be read from TensorBay. :start-after: """Read Dataset / get dataset""" :end-before: """""" -In :ref:`reference/dataset_structure:Dataset` "20 Newsgroups", there are four -:ref:`Segments `: ``20news-18828``, -``20news-bydate-test`` and ``20news-bydate-train``, ``20_newsgroups``. Get the segment names by listing them all. .. literalinclude:: ../../../../docs/code/Newsgroups20.py @@ -154,7 +152,7 @@ Get a segment by passing the required segment name. :start-after: """Read Dataset / get segment""" :end-before: """""" -In the 20news-18828 :ref:`reference/dataset_structure:Segment`, there is a sequence of :ref:`reference/dataset_structure:Data`, +In the :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, which can be obtained by index. .. literalinclude:: ../../../../docs/code/Newsgroups20.py @@ -162,7 +160,7 @@ which can be obtained by index. :start-after: """Read Dataset / get data""" :end-before: """""" -In each :ref:`reference/dataset_structure:Data`, +In each :ref:`reference/dataset_structure:data`, there is a sequence of :ref:`reference/label_format/Classification:Classification` annotations, which can be obtained by index. @@ -171,12 +169,14 @@ which can be obtained by index. :start-after: """Read Dataset / get label""" :end-before: """""" -There is only one label type in "20 Newsgroups" dataset, which is ``Classification``. -The information stored in :ref:`reference/label_format/CommonLabelProperties:Category` is -one of the category names in "categories" list of :ref:`catalog.json `. -See :ref:`this page ` for more details about the -structure of Classification. - +There is only one label type in "20 Newsgroups" dataset, which is ``classification``. + +The information stored in :ref:`reference/label_format/CommonLabelProperties:category` is +one of the names in "categories" list of :ref:`catalog.json `. The information stored +in :ref:`reference/label_format/CommonLabelProperties:attributes` is one or several of the attributes in "attributes" list of :ref:`catalog.json `. +See :ref:`reference/label_format/Classification:Classification` label format for more details. + + **************** Delete Dataset **************** @@ -184,4 +184,4 @@ structure of Classification. .. literalinclude:: ../../../../docs/code/Newsgroups20.py :language: python :start-after: """Delete Dataset""" - :end-before: """""" + :end-before: """""" \ No newline at end of file diff --git a/docs/source/quick_start/examples/THCHS30.rst b/docs/source/quick_start/examples/THCHS30.rst index 66020089a..9d9d5bb11 100644 --- a/docs/source/quick_start/examples/THCHS30.rst +++ b/docs/source/quick_start/examples/THCHS30.rst @@ -1,12 +1,12 @@ -########### + + +################## THCHS-30 -########### +################## -This topic describes how to manage the `THCHS-30 Dataset`_, +This topic describes how to manage the `THCHS-30 Dataset `_, which is a dataset with :ref:`reference/label_format/Sentence:Sentence` label -.. _THCHS-30 Dataset: https://www.graviti.com/open-datasets/data-decorators/THCHS30 - ***************************** Authorize a Client Instance ***************************** @@ -28,29 +28,50 @@ An :ref:`reference/glossary:accesskey` is needed to authenticate identity when u :end-before: """""" ****************** -Organize Dataset + Organize Dataset ****************** -It takes the following steps to organize the “THCHS-30” dataset by the :class:`~tensorbay.dataset.dataset.Dataset` instance. +Normally, ``dataloader.py`` and ``catalog.json`` are required to organize the "THCHS-30" dataset into the :class:`~tensorbay.dataset.dataset.Dataset` instance. +In this example, they are stored in the same directory like:: + + THCHS-30/ + catalog.json + dataloader.py Step 1: Write the Catalog ========================= -A :ref:`Catalog ` contains all label information of one -dataset, which is typically stored in a json file. However the catalog of THCHS-30 is too +A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which +is typically stored in a json file like ``catalog.json``. +However the catalog of THCHS-30 is too large, instead of reading it from json file, we read it by mapping from subcatalog that is loaded by the raw file. Check the :ref:`dataloader ` below for more details. + + + + + + + + + + + + + +.. note:: + + By passing the path of the ``catalog.json``, :func:`~tensorbay.dataset.dataset.DatasetBase.load_catalog` supports loading the catalog into dataset. + .. important:: - See :ref:`catalog table ` for more catalogs with different - label types. + See :ref:`catalog table ` for more catalogs with different label types. Step 2: Write the Dataloader ============================ -A :ref:`dataloader ` is needed to organize the dataset -into a :class:`~tensorbay.dataset.dataset.Dataset` instance. +A :ref:`reference/glossary:dataloader` is needed to organize the dataset into a :class:`~tensorbay.dataset.dataset.Dataset` instance. .. literalinclude:: ../../../../tensorbay/opendataset/THCHS30/loader.py :language: python @@ -59,9 +80,8 @@ into a :class:`~tensorbay.dataset.dataset.Dataset` instance. See :ref:`Sentence annotation ` for more details. - There are already a number of dataloaders in TensorBay SDK provided by the community. -Thus, instead of writing, importing an available dataloadert is also feasible. +Thus, instead of writing, importing an available dataloader is also feasible. .. literalinclude:: ../../../../docs/code/THCHS30.py :language: python @@ -85,7 +105,7 @@ This step can help users to check whether the dataset is correctly organized. Please see :ref:`features/visualization:Visualization` for more details. **************** -Upload Dataset + Upload Dataset **************** The organized "THCHS-30" dataset can be uploaded to TensorBay for sharing, reuse, etc. @@ -95,12 +115,16 @@ The organized "THCHS-30" dataset can be uploaded to TensorBay for sharing, reuse :start-after: """Upload Dataset""" :end-before: """""" +.. note:: + Set ``skip_uploaded_files=True`` to skip uploaded data. + The data will be skiped if its name and segment name is the same as remote data. + Similar with Git, the commit step after uploading can record changes to the dataset as a version. If needed, do the modifications and commit again. Please see :ref:`features/version_control/index:Version Control` for more details. ************** -Read Dataset + Read Dataset ************** Now "THCHS-30" dataset can be read from TensorBay. @@ -110,9 +134,6 @@ Now "THCHS-30" dataset can be read from TensorBay. :start-after: """Read Dataset / get dataset""" :end-before: """""" -In :ref:`reference/dataset_structure:Dataset` "THCHS-30", there are three -:ref:`Segments `: -``dev``, ``train`` and ``test``. Get the segment names by listing them all. .. literalinclude:: ../../../../docs/code/THCHS30.py @@ -127,8 +148,7 @@ Get a segment by passing the required segment name. :start-after: """Read Dataset / get segment""" :end-before: """""" -In the dev :ref:`reference/dataset_structure:Segment`, -there is a sequence of :ref:`reference/dataset_structure:Data`, +In the :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, which can be obtained by index. .. literalinclude:: ../../../../docs/code/THCHS30.py @@ -136,7 +156,7 @@ which can be obtained by index. :start-after: """Read Dataset / get data""" :end-before: """""" -In each :ref:`reference/dataset_structure:Data`, +In each :ref:`reference/dataset_structure:data`, there is a sequence of :ref:`reference/label_format/Sentence:Sentence` annotations, which can be obtained by index. @@ -145,15 +165,17 @@ which can be obtained by index. :start-after: """Read Dataset / get label""" :end-before: """""" -There is only one label type in "THCHS-30" dataset, which is ``Sentence``. It contains -``sentence``, ``spell`` and ``phone`` information. See :ref:`Sentence ` +There is only one label type in "THCHS-30" dataset, which is ``sentence``. + +It contains ``sentence``, ``spell`` and ``phone`` information. See :ref:`Sentence ` label format for more details. + **************** -Delete Dataset + Delete Dataset **************** .. literalinclude:: ../../../../docs/code/THCHS30.py :language: python :start-after: """Delete Dataset""" - :end-before: """""" + :end-before: """""" \ No newline at end of file diff --git a/docs/source/quick_start/examples/examples.rst.template b/docs/source/quick_start/examples/examples.rst.template new file mode 100644 index 000000000..073b32e54 --- /dev/null +++ b/docs/source/quick_start/examples/examples.rst.template @@ -0,0 +1,215 @@ +{% set datasets_with_image=("BSTLD", "Neolix OD", "Leeds Sports Pose", ) %} + +################## + {{dataset_name}} +################## + +This topic describes how to manage the `{{dataset_name}} Dataset `_, +which is a dataset with :ref:`reference/label_format/{{label_type}}:{{label_type}}` label{% if dataset_name in datasets_with_image %} +(:numref:`Fig. %s `). + +.. _example-{{file_name | lower}}: + +.. figure:: ../../images/example-{{label_type}}.png + :scale: 50 % + :align: center + + The preview of a cropped image with labels from "{{dataset_name}}". +{% endif %} + +***************************** + Authorize a Client Instance +***************************** + +An :ref:`reference/glossary:accesskey` is needed to authenticate identity when using TensorBay. + +.. literalinclude:: ../../../../docs/code/{{file_name}}.py + :language: python + :start-after: """Authorize a Client Instance""" + :end-before: """""" + +**************** + Create Dataset +**************** + +.. literalinclude:: ../../../../docs/code/{{file_name}}.py + :language: python + :start-after: """Create Dataset""" + :end-before: """""" + +****************** + Organize Dataset +****************** + +Normally, ``dataloader.py`` and ``catalog.json`` are required to organize the "{{dataset_name}}" dataset into the :class:`~tensorbay.dataset.dataset.Dataset` instance. +In this example, they are stored in the same directory like:: + + {{dataset_name}}/ + catalog.json + dataloader.py + +Step 1: Write the Catalog +========================= + +A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which +is typically stored in a json file like ``catalog.json``. {% if dataset_name == "THCHS-30" %} +However the catalog of THCHS-30 is too +large, instead of reading it from json file, we read it by mapping from subcatalog that is +loaded by the raw file. Check the :ref:`dataloader ` below for more details. +{% endif %} + +{% if dataset_name != "THCHS-30" %} +.. literalinclude:: ../../../../tensorbay/opendataset/{{file_name}}/catalog.json + :language: json + :name: {{file_name}}-catalog + :linenos: +{% endif %} + +{% if dataset_name == "BSTLD" %} +The only annotation type for "{{dataset_name}}" is :ref:`reference/label_format/{{label_type}}:{{label_type}}`, and there are 13 +:ref:`reference/label_format/CommonLabelProperties:category` types and one :ref:`reference/label_format/CommonLabelProperties:attributes` type. +{% endif %} + +{% if dataset_name == "Dogs Vs Cats" %} +The only annotation type for "{{dataset_name}}" is :ref:`reference/label_format/{{label_type}}:{{label_type}}`, and there are 2 +:ref:`reference/label_format/CommonLabelProperties:category` types. +{% endif %} + +{% if dataset_name == "Leeds Sports Pose" %} +The only annotation type for "{{dataset_name}}" is :ref:`reference/label_format/{{label_type}}:{{label_type}}`. +{% endif %} + +{% if dataset_name == "Neolix OD" %} +The only annotation type for "{{dataset_name}}" is :ref:`reference/label_format/{{label_type}}:{{label_type}}`, and there are 15 +:ref:`reference/label_format/CommonLabelProperties:Category` types and 3 :ref:`reference/label_format/CommonLabelProperties:Attributes` types. +{% endif %} + +{% if dataset_name == "20 Newsgroups" %} +The only annotation type for "{{dataset_name}}" is :ref:`reference/label_format/{{label_type}}:{{label_type}}`, +and there are 20 :ref:`reference/label_format/CommonLabelProperties:Category` types. +{% endif %} + +.. note:: + + By passing the path of the ``catalog.json``, :func:`~tensorbay.dataset.dataset.DatasetBase.load_catalog` supports loading the catalog into dataset. + +.. important:: + + See :ref:`catalog table ` for more catalogs with different label types. + +Step 2: Write the Dataloader +============================ + +A :ref:`reference/glossary:dataloader` is needed to organize the dataset into a :class:`~tensorbay.dataset.dataset.Dataset` instance. + +.. literalinclude:: ../../../../tensorbay/opendataset/{{file_name}}/loader.py + :language: python + :name: {{file_name}}-dataloader + :linenos: + +See :ref:`{{label_type}} annotation ` for more details. + +There are already a number of dataloaders in TensorBay SDK provided by the community. +Thus, instead of writing, importing an available dataloader is also feasible. + +.. literalinclude:: ../../../../docs/code/{{file_name}}.py + :language: python + :start-after: """Organize dataset / import dataloader""" + :end-before: """""" + +.. note:: + + Note that catalogs are automatically loaded in available dataloaders, users do not have to write them again. + +.. important:: + + See :ref:`dataloader table ` for dataloaders with different label types. + +******************* + Visualize Dataset +******************* + +Optionally, the organized dataset can be visualized by **Pharos**, which is a TensorBay SDK plug-in. +This step can help users to check whether the dataset is correctly organized. +Please see :ref:`features/visualization:Visualization` for more details. + +**************** + Upload Dataset +**************** + +The organized "{{dataset_name}}" dataset can be uploaded to TensorBay for sharing, reuse, etc. + +.. literalinclude:: ../../../../docs/code/{{file_name}}.py + :language: python + :start-after: """Upload Dataset""" + :end-before: """""" + +.. note:: + Set ``skip_uploaded_files=True`` to skip uploaded data. + The data will be skiped if its name and segment name is the same as remote data. + +Similar with Git, the commit step after uploading can record changes to the dataset as a version. +If needed, do the modifications and commit again. +Please see :ref:`features/version_control/index:Version Control` for more details. + +************** + Read Dataset +************** + +Now "{{dataset_name}}" dataset can be read from TensorBay. + +.. literalinclude:: ../../../../docs/code/{{file_name}}.py + :language: python + :start-after: """Read Dataset / get dataset""" + :end-before: """""" + +Get the segment names by listing them all. + +.. literalinclude:: ../../../../docs/code/{{file_name}}.py + :language: python + :start-after: """Read Dataset / list segment names""" + :end-before: """""" + +Get a segment by passing the required segment name. + +.. literalinclude:: ../../../../docs/code/{{file_name}}.py + :language: python + :start-after: """Read Dataset / get segment""" + :end-before: """""" + +In the :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, +which can be obtained by index. + +.. literalinclude:: ../../../../docs/code/{{file_name}}.py + :language: python + :start-after: """Read Dataset / get data""" + :end-before: """""" + +In each :ref:`reference/dataset_structure:data`, +there is a sequence of :ref:`reference/label_format/{{label_type}}:{{label_type}}` annotations, +which can be obtained by index. + +.. literalinclude:: ../../../../docs/code/{{file_name}}.py + :language: python + :start-after: """Read Dataset / get label""" + :end-before: """""" + +There is only one label type in "{{dataset_name}}" dataset, which is ``{{label_type | lower}}``. +{% if dataset_name != "THCHS-30" %} +The information stored in :ref:`reference/label_format/CommonLabelProperties:category` is +one of the names in "categories" list of :ref:`catalog.json <{{file_name}}-catalog>`. The information stored +in :ref:`reference/label_format/CommonLabelProperties:attributes` is one or several of the attributes in "attributes" list of :ref:`catalog.json <{{file_name}}-catalog>`. +See :ref:`reference/label_format/{{label_type}}:{{label_type}}` label format for more details. +{% else %} +It contains ``sentence``, ``spell`` and ``phone`` information. See :ref:`Sentence ` +label format for more details. +{% endif %} + +**************** + Delete Dataset +**************** + +.. literalinclude:: ../../../../docs/code/{{file_name}}.py + :language: python + :start-after: """Delete Dataset""" + :end-before: """""" diff --git a/requirements_linter.txt b/requirements_linter.txt index a5d323d66..1fdbb8223 100644 --- a/requirements_linter.txt +++ b/requirements_linter.txt @@ -14,3 +14,4 @@ xmltodict >= 0.2 pyyaml >= 5.1 h5py >= 3.2.1 pillow >= 2.0.0 +jinja2 >= 3.0.1