Merge pull request #40 from sdevenes/issue_39/update_documentation

Issue 39/update documentation
sdevenes · Oct 12, 2020 · 15574ec · 15574ec
2 parents 1b24f9f + 534c353
commit 15574ec
Show file tree

Hide file tree

Showing 27 changed files with 1,039 additions and 53 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -13,7 +13,7 @@ script:
   # Run unitTests
   - nosetests --nocapture -v rr --with-coverage --cover-package=rr
   # Generate rst files from docstring
-  - sphinx-apidoc -fMeT -o docs/source/api rr rr/tests
+  - sphinx-apidoc -fMeT -o docs/source/api rr
   # Build documentation
   - sphinx-build docs/source docs/build
   - touch docs/build/.nojekyll

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,3 +1,4 @@
 include LICENSE README.rst buildout.cfg requirements.txt
 recursive-include doc conf.py *.rst *.ico *.png
+include rr/experiment/experiment.ini
 graft tests/inputs
diff --git a/README.rst b/README.rst
@@ -18,22 +18,3 @@
 
 This mini project was developed during the M05 course "reproducibility and open science"
 taught by Idiap as part of the AI master 2020-2021.
-
-Working hypothesis
-------------------
-
-It is possible to perform human activity recognition using data from continuous ambient sensors
-
-Dataset
--------
-
-The dataset that was used for this project is the following: https://archive.ics.uci.edu/ml/datasets/Human+Activity+Recognition+from+Continuous+Ambient+Sensor+Data.
-It represents ambient data collected in 30 homes with volunteer residents.
-Data are collected continuously while residents perform their normal routines.
-It contains 36 features measured plus one output for the classification label of the activity, for
-a total of 13956534 entries.
-
-Machine learning
-----------------
-
-Random forest classifiers from scikit-learn were used to obtains the results presented in this project.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -13,7 +13,7 @@
 import os
 import sys
 import subprocess
-sys.path.insert(0, os.path.abspath('../../experiment'))
+sys.path.insert(0, os.path.abspath('../..'))
 
 
 # -- Project information -----------------------------------------------------

diff --git a/docs/source/guide.rst b/docs/source/guide.rst
@@ -4,22 +4,118 @@
 
 This guide explains how to use this package and obtain results published in our
 paper.  Results can be re-generated automatically by executing the following
-command:
+commands:
 
 .. code-block:: sh
 
-   (project) $ python paper.py
+   $ rr-data 'datapath/'
+   $ rr-paper 'datapath/csh101/csh101.ann.features.csv' 'output/'
 
 
 For your reference, the paper tables are repeated below, so you can check the
 reproducibility of our solution.
 
+Working hypothesis
+------------------
+
+It is possible to perform human activity recognition using data from continuous ambient sensors
+
+Dataset
+-------
+
+The dataset that was used for this project is the UCI ML Repository's Human Activity Recognition from Continuous Ambient Sensor Data Data Set [dua2019]_ [cook2012]_.
+It represents ambient data collected in 30 homes with volunteer residents.
+Data are collected continuously while residents perform their normal routines.
+It contains 36 features measured plus one output for the classification label of the activity, for
+a total of 13956534 entries.
+
+Machine learning
+----------------
+
+Random forest classifiers from scikit-learn were used to obtains the results presented in this project.
+
 
 Results for Protocol `proto1`
 -----------------------------
 
-.. warning::
-   This graph is only a example, we need to update it with final results !
+Impact of number of trees with maximum depth of 15
+==================================================
+
+1 tree in forest
+
+.. raw:: html
+   :file: img/table_1.html
+
+5 trees in forest
+
+.. raw:: html
+   :file: img/table_2.html
+
+10 treea in forest
+
+.. raw:: html
+   :file: img/table_3.html
+
+
+Impact of tree depth with 10 trees per forest
+=============================================
+
+Depth of 5
+
+.. raw:: html
+   :file: img/table_7.html
+
+Depth of 10
+
+.. raw:: html
+   :file: img/table_8.html
+
+Depth of 15
+
+.. raw:: html
+   :file: img/table_9.html
+
+
+Results for Protocol `proto2`
+-----------------------------
+
+Impact of number of trees with maximum depth of 15
+==================================================
+
+1 tree in forest
 
 .. raw:: html
-   :file: img/result_exemple.html
+   :file: img/table_4.html
+
+5 trees in forest
+
+.. raw:: html
+   :file: img/table_5.html
+
+10 treea in forest
+
+.. raw:: html
+   :file: img/table_6.html
+
+
+Impact of tree depth with 10 trees per forest
+=============================================
+
+Depth of 5
+
+.. raw:: html
+   :file: img/table_10.html
+
+Depth of 10
+
+.. raw:: html
+   :file: img/table_11.html
+
+Depth of 15
+
+.. raw:: html
+   :file: img/table_12.html
+
+
+.. [dua2019] Dua, D. and Graff, C. (2019). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.
+.. [cook2012] D. Cook. Learning setting-generalized activity models for smart spaces. IEEE Intelligent Systems, 27(1):32-38, 2012.
diff --git a/docs/source/img/table_1.html b/docs/source/img/table_1.html
diff --git a/docs/source/img/table_10.html b/docs/source/img/table_10.html
diff --git a/docs/source/img/table_11.html b/docs/source/img/table_11.html
diff --git a/docs/source/img/table_12.html b/docs/source/img/table_12.html
diff --git a/docs/source/img/table_2.html b/docs/source/img/table_2.html
diff --git a/docs/source/img/table_3.html b/docs/source/img/table_3.html
diff --git a/docs/source/img/table_4.html b/docs/source/img/table_4.html
diff --git a/docs/source/img/table_5.html b/docs/source/img/table_5.html
diff --git a/docs/source/img/table_6.html b/docs/source/img/table_6.html
diff --git a/docs/source/img/table_7.html b/docs/source/img/table_7.html
diff --git a/docs/source/img/table_8.html b/docs/source/img/table_8.html
diff --git a/docs/source/img/table_9.html b/docs/source/img/table_9.html
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -21,7 +21,7 @@ Authors
 License
 -------
 
-todo
+This work is licensed under a MIT license
 
 Indices and tables
 ------------------

diff --git a/docs/source/installation.rst b/docs/source/installation.rst
@@ -2,21 +2,12 @@
 Installation
 ------------
 
-This package depends on the python libraries listed in `requirements.txt <https://github.com/sdevenes/M05_MiniProject/blob/master/requirements.txt>`_  to run properly. 
-Please install a modern version of these packages before trying to run the code examples.
-Or create directly a conda virtual environment using the requirements.txt file as in the following
-exemple.
-
-To download a copy of this package, clone it from its repository in GitHub:
+Use pip:
 
 .. code:: sh
 
-   $ git clone https://github.com/sdevenes/M05_MiniProject.git project
-   $ cd project
-   $ conda create -q -n project python=3.8 -c plotly --file requirements.txt
-   $ conda activate project
+   $ pip install --extra-index-url https://test.pypi.org/simple rr_sdas
 
-   (project) $ #you are ready to run baselines!
 
 Building the documentation
 --------------------------
@@ -25,10 +16,10 @@ The project documentation is generated with sphinx and can simply be build using
 
 .. code:: sh
 
-   $ sphinx-apidoc -fMeT -o docs/source/api scripts
+   $ sphinx-apidoc -fMeT -o docs/source/api rr
    $ sphinx-build docs/source docs/build
 
 The first command will generates restructured files containing API documentation for all the python 
 modules located in `scripts <https://github.com/sdevenes/M05_MiniProject/tree/master/scripts>`_ 
 based on their docstring. And the second command will packages all the rst files together and build this
-html doc.
+html doc.
diff --git a/docs/source/troubleshooting.rst b/docs/source/troubleshooting.rst
@@ -5,9 +5,8 @@ Unit tests are available for the project and can be run simply with the followin
 
 .. code-block:: shell
 
-   (project) $ nosetests --nocapture -v tests/test.py
+   (project) $ nosetests --nocapture -v rr
 
 .. note::
-   the tests are implemented using `nose <https://pypi.org/project/nose/>`_ python package, it needs to be installed beforhand
-   in the case where the conda environment provided is not activated
+   the tests are implemented using `nose <https://pypi.org/project/nose/>`_ python package, it needs to be installed beforhand.
 
diff --git a/rr/download_data/__main__.py b/rr/download_data/__main__.py
@@ -9,7 +9,12 @@ def main():
     parser = argparse.ArgumentParser(
         description="M05 mini-project: Download dataset.zip online"
     )
-    parser.add_argument("source", type=str, help="Data zip url")
+    parser.add_argument(
+        "--source",
+        type=str,
+        help="Data zip url",
+        default="https://archive.ics.uci.edu/ml/machine-learning-databases/00506/casas-dataset.zip",
+    )
     parser.add_argument("destination", type=str, help="Destination folder")
     args = parser.parse_args()
 

diff --git a/rr/experiment/__main__.py b/rr/experiment/__main__.py
@@ -6,14 +6,18 @@
 
 
 def main():
+    basepath = os.path.dirname(os.path.realpath(__file__))
     parser = argparse.ArgumentParser(description="M05 mini-project: experiments")
     parser.add_argument("datapath", type=str, help="Dataset file in .csv")
     parser.add_argument("output", type=str, help="Destination folder for the results")
     parser.add_argument(
-        "config", type=str, help="Filepath for experiments configuration file in .ini"
+        "--config",
+        type=str,
+        help="Filepath for experiments configuration file in .ini",
+        default=basepath + "/experiment.ini",
     )
     args = parser.parse_args()
-
+    print(basepath + "/experiment.ini")
     config = configparser.ConfigParser()
     config.read(args.config)
 

diff --git a/rr/experiment/experiment.ini b/rr/experiment/experiment.ini
@@ -0,0 +1,7 @@
+[nb_trees_experiment]
+nb_trees = 1, 5, 10
+tree_depth = 15
+
+[tree_depth_experiment]
+nb_trees = 10
+tree_depth = 5, 10, 15
diff --git a/rr/tests/__init__.py b/rr/tests/__init__.py
diff --git a/experiment.ini → rr/tests/inputs/test_experiment.ini b/experiment.ini → rr/tests/inputs/test_experiment.ini
@@ -1,7 +1,7 @@
 [nb_trees_experiment]
 nb_trees = 1, 2
-tree_depth = 10
+tree_depth = 1
 
 [tree_depth_experiment]
-nb_trees = 10
+nb_trees = 1
 tree_depth = 1, 2
diff --git a/rr/tests/test.py b/rr/tests/test.py
@@ -2,7 +2,7 @@
 import sys
 
 sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + "/../experiment/")
-from rr.experiment import database, analysis
+from rr.experiment import database, analysis, experiments
 from rr.download_data import download_data
 import zipfile
 import requests
@@ -176,3 +176,100 @@ def test_get_limited_classes():
         msg="y contains other classes than: {}".format(classes),
     )
     nose.tools.ok_(x.shape[0] == y.shape[0], msg="x.shape[0] != y.shape[0]")
+
+
+def test_base_experiement():
+    datapath = base_path + "/inputs/test_set.csv"
+    cm = experiments.base_experiment("proto1", database.VARIABLES, datapath, 1, 1)
+    nose.tools.ok_(
+        cm.shape == (len(database.CLASSES), len(database.CLASSES)),
+        msg="Confusion matrix size does not match number of classes",
+    )
+
+
+def test_experiment_impact_nb_trees():
+    datapath = base_path + "/inputs/test_set.csv"
+    output_path = base_path + "/output/"
+    results = experiments.experiment_impact_nb_trees(
+        100, datapath, [1, 2], 1, output_path
+    )
+    # Check table counter
+    nose.tools.ok_(
+        "Table 100: Confusion matrix with 1 tree(s) for Protocol `proto1`" in results,
+        msg="Bad table numbering",
+    )
+    nose.tools.ok_(
+        "Table 101: Confusion matrix with 2 tree(s) for Protocol `proto1`" in results,
+        msg="Bad table numbering",
+    )
+    nose.tools.ok_(
+        "Table 102: Confusion matrix with 1 tree(s) for Protocol `proto2`" in results,
+        msg="Bad table numbering",
+    )
+    nose.tools.ok_(
+        "Table 103: Confusion matrix with 2 tree(s) for Protocol `proto2`" in results,
+        msg="Bad table numbering",
+    )
+    # Check table saving
+    nose.tools.ok_(
+        os.path.isfile(output_path + "table_100.html"),
+        msg="Confusion matrix not created",
+    )
+    nose.tools.ok_(
+        os.path.isfile(output_path + "table_101.html"),
+        msg="Confusion matrix not created",
+    )
+    nose.tools.ok_(
+        os.path.isfile(output_path + "table_102.html"),
+        msg="Confusion matrix not created",
+    )
+    nose.tools.ok_(
+        os.path.isfile(output_path + "table_103.html"),
+        msg="Confusion matrix not created",
+    )
+
+
+def test_experiment_impact_tree_depth():
+    datapath = base_path + "/inputs/test_set.csv"
+    output_path = base_path + "/output/"
+    results = experiments.experiment_impact_tree_depth(
+        200, datapath, 1, [1, 2], base_path + "/output/"
+    )
+    # Check table counter
+    nose.tools.ok_(
+        "Table 200: Confusion matrix with trees maximum depth of 1 for Protocol `proto1`"
+        in results,
+        msg="Bad table numbering",
+    )
+    nose.tools.ok_(
+        "Table 201: Confusion matrix with trees maximum depth of 2 for Protocol `proto1`"
+        in results,
+        msg="Bad table numbering",
+    )
+    nose.tools.ok_(
+        "Table 202: Confusion matrix with trees maximum depth of 1 for Protocol `proto2`"
+        in results,
+        msg="Bad table numbering",
+    )
+    nose.tools.ok_(
+        "Table 203: Confusion matrix with trees maximum depth of 2 for Protocol `proto2`"
+        in results,
+        msg="Bad table numbering",
+    )
+    # Check table saving
+    nose.tools.ok_(
+        os.path.isfile(output_path + "table_200.html"),
+        msg="Confusion matrix not created",
+    )
+    nose.tools.ok_(
+        os.path.isfile(output_path + "table_201.html"),
+        msg="Confusion matrix not created",
+    )
+    nose.tools.ok_(
+        os.path.isfile(output_path + "table_202.html"),
+        msg="Confusion matrix not created",
+    )
+    nose.tools.ok_(
+        os.path.isfile(output_path + "table_203.html"),
+        msg="Confusion matrix not created",
+    )
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,7 +21,7 @@ Authors @@
     License
     -------
-    todo
+    This work is licensed under a MIT license
     Indices and tables
     ------------------
@@ Expand Down @@