From 002f5c28688535e5d02087276daa1ad102f1e286 Mon Sep 17 00:00:00 2001 From: SteveDevenes Date: Sun, 20 Sep 2020 11:25:37 +0200 Subject: [PATCH] add automatic code documentation using sphinx --- .gitignore | 8 +++ docs/Makefile | 41 +++++++++++++ docs/source/algorithm.rst | 7 +++ docs/source/analysis.rst | 7 +++ docs/source/conf.py | 111 ++++++++++++++++++++++++++++++++++ docs/source/database.rst | 7 +++ docs/source/download_data.rst | 7 +++ docs/source/index.rst | 40 ++++++++++++ docs/source/modules.rst | 11 ++++ docs/source/test.rst | 7 +++ requirements.txt | 4 +- scripts/algorithm.py | 44 +++++++++++++- scripts/analysis.py | 30 ++++++--- scripts/download_data.py | 36 ++++++++--- 14 files changed, 343 insertions(+), 17 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/source/algorithm.rst create mode 100644 docs/source/analysis.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/database.rst create mode 100644 docs/source/download_data.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/modules.rst create mode 100644 docs/source/test.rst diff --git a/.gitignore b/.gitignore index 8fce603..fb0457c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,9 @@ +### Data source files ### data/ + +### Sphinx Doc ### +docs/build/* +public/* + +### Python execution ### +scripts/__pycache__/ diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..8f4743e --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,41 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build +LIBDIR = ../scripts +OUTPUTDIR = ../public + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +#%: Makefile +# @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: clean +clean: + rm -rf $(BUILDDIR)/* + +.PHONY: gendocs +gendocs: + sphinx-apidoc -f -o "$(SOURCEDIR)" "$(LIBDIR)" --separate + +.PHONY: html +html: gendocs + $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + mkdir -p $(OUTPUTDIR)/html/ + cp -R $(BUILDDIR)/html/* $(OUTPUTDIR) + @echo "html files copied; the html files are in $(OUTPUTDIR)." + + diff --git a/docs/source/algorithm.rst b/docs/source/algorithm.rst new file mode 100644 index 0000000..8dbc89f --- /dev/null +++ b/docs/source/algorithm.rst @@ -0,0 +1,7 @@ +algorithm module +================ + +.. automodule:: algorithm + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/analysis.rst b/docs/source/analysis.rst new file mode 100644 index 0000000..932b0bc --- /dev/null +++ b/docs/source/analysis.rst @@ -0,0 +1,7 @@ +analysis module +=============== + +.. automodule:: analysis + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..a11b770 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,111 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +import subprocess +sys.path.insert(0, os.path.abspath('../../scripts')) + + +# -- Project information ----------------------------------------------------- +AUTHOR='Spano Amara, Devènes Steve' +VERSION = "0.1.0" # subprocess.check_output(["git", "describe"]).decode('UTF-8') + +project = 'M05 miniProject' +copyright = '2020' +author = 'Spano Amara, Devènes Steve' + +author = AUTHOR + +# The short X.Y version +version = VERSION +# The full version, including alpha/beta/rc tags +release = VERSION + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + #'sphinx.ext.autodoc', + 'sphinx.ext.napoleon', +] + +# Also document special classes like __init__ +autoclass_content = 'both' + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'pastie' + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} +# html_favicon = '../../img/***.png' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + + +# -- Extension configuration ------------------------------------------------- +#autodoc_mock_imports = ['pandas'] + +napoleon_google_docstring = True +napoleon_numpy_docstring = False + +html_theme_options = { + 'logo_only': False, + 'display_version': True, + 'prev_next_buttons_location': 'bottom', + 'style_external_links': False, + #'vcs_pageview_mode': 'blob', + #'style_nav_header_background': '#da0066', + # Toc options + 'collapse_navigation': False, + 'sticky_navigation': True, + 'navigation_depth': -1, + 'includehidden': True, + 'titles_only': False, +} + +# html_context = { +# "display_gitlab": True, +# "gitlab_user": "", +# "gitlab_repo": "", +# "gitlab_version": "master", +# "conf_py_path": "/docs/source/", +# "gitlab_host": "", +# } + +# html_logo = '../../img/****.svg' +html_title = 'M05 MiniProject docs' + +# Option for linkcheck +linkcheck_anchors=False diff --git a/docs/source/database.rst b/docs/source/database.rst new file mode 100644 index 0000000..e4be239 --- /dev/null +++ b/docs/source/database.rst @@ -0,0 +1,7 @@ +database module +=============== + +.. automodule:: database + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/download_data.rst b/docs/source/download_data.rst new file mode 100644 index 0000000..72cca50 --- /dev/null +++ b/docs/source/download_data.rst @@ -0,0 +1,7 @@ +download\_data module +===================== + +.. automodule:: download_data + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..8ce02c4 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,40 @@ +Welcome to M05 miniProject's documentation! +=========================================== + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Contents + :glob: + + modules + +Getting started +--------------- + +You can view the content as a: + +* `Webpage `_ +* `pdf `_ + + +Authors +------- + +* `Amara Spano - Github Profile `_ +* `Steve Devenes - Github Profile `_ + +License +------- + +todo + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + + + diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000..99f0960 --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,11 @@ +scripts +======= + +.. toctree:: + :maxdepth: 4 + + algorithm + analysis + database + download_data + test diff --git a/docs/source/test.rst b/docs/source/test.rst new file mode 100644 index 0000000..206500f --- /dev/null +++ b/docs/source/test.rst @@ -0,0 +1,7 @@ +test module +=========== + +.. automodule:: test + :members: + :undoc-members: + :show-inheritance: diff --git a/requirements.txt b/requirements.txt index d5d80e1..019eb3c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ requests sklearn -plotly.express \ No newline at end of file +plotly.express +sphinx +sphinx_rtd_theme \ No newline at end of file diff --git a/scripts/algorithm.py b/scripts/algorithm.py index 0403dba..e7e672b 100644 --- a/scripts/algorithm.py +++ b/scripts/algorithm.py @@ -7,18 +7,48 @@ def make_labels(X): - return np.hstack([k*np.ones(len(X[k]), dtype=int) for k in range(len(X))]) + """Generate label array from the given data + + Args: + X (list): A list of 1D array (with a dtype of float64) showing the input + training samples, where each item of the list correspond to one class. + Returns: + numpy.ndarray: A 1D array (with a dtype of int) containing the + label for each sample + Raises: + None + """ + return np.hstack([k*np.ones(len(X[k]), dtype=int) for k in range(len(X))]) class Model: def __init__(self, nb_tree_per_forest=50, max_depth=10): + """Create a new ML model (Random forest classifier from scikitlearn) + + Args: + nb_tree_per_forest: number of decision trees in the forest + max_depth: max depth of the trees + Returns: + None + Raises: + None + """ # Create a random forest model self.model = RandomForestClassifier(n_estimators=nb_tree_per_forest, max_depth=max_depth, random_state=0) def train(self, X): + """Train the model using the given data + Args: + X (list): A list of 1D array (with a dtype of float64) showing the input training samples, + where each item of the list correspond to one class. + Returns: + None + Raises: + None + """ # Get features X_features = np.vstack([k for k in X]) @@ -30,6 +60,18 @@ def train(self, X): def predict(self, X): + """Make a prediction on the data using the trained model + + Args: + X (list): A list of 1D array (with a dtype of float64) showing the input training samples, + where each item of the list correspond to one class. + Returns: + numpy.ndarray: A 1D array (with a dtype of int) containing the predicted + label for each sample + + Raises: + None + """ # Get features X_features = np.vstack([k for k in X]) diff --git a/scripts/analysis.py b/scripts/analysis.py index 168dda7..8d938e3 100644 --- a/scripts/analysis.py +++ b/scripts/analysis.py @@ -4,14 +4,30 @@ import itertools def get_confusion_matrix(prediction_label, true_label): - return confusion_matrix(true_label, prediction_label) + """Get the confusion matrix given the predicted and true labels -def plot_confusion_matrix(cm, classes, - normalize=False, - title='Confusion matrix'): + Args: + prediction_label (list): Estimated targets as returned by a classifier. + true_label (list): Ground truth (correct) target values. + Returns: + numpy.ndarray: A 2D array (with a dtype of int) containing the confusion matrix. + Raises: + None """ - This function prints and plots the confusion matrix. - Normalization can be applied by setting `normalize=True`. + return confusion_matrix(true_label, prediction_label) + +def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix'): + """Generate a plot of the given confusion matrix using plotly express + + Args: + cm (numpy.ndarray): A 2D array (with a dtype of int) containing the confusion matrix. + classes (numpy.ndarray): A 1D array (with a dtype of str) containing the lable name for each class + normalize (boolean): Flag to normalize the data + title (str): The title of the plot + Returns: + None + Raises: + None """ # Normalize if wanted if normalize: @@ -24,5 +40,5 @@ def plot_confusion_matrix(cm, classes, y=classes, color_continuous_scale='Blues', title=title - ) + ) fig.show() diff --git a/scripts/download_data.py b/scripts/download_data.py index 8646b06..62d5e90 100644 --- a/scripts/download_data.py +++ b/scripts/download_data.py @@ -5,6 +5,16 @@ # Function to download a file through http.get using requests def download_url(url, save_path): + """Download a file from the given url using http + + Args: + url (str): The url from which the file need to be downloaded + save_path (str): The filename where the contents should be saved + Returns: + None + Raises: + None + """ with open(save_path, "wb") as f: print("Downloading {} from {}".format(save_path, url)) response = requests.get(url, stream=True) @@ -26,17 +36,27 @@ def download_url(url, save_path): # Function to unzip files def unzip_file(path_to_zip_file, directory_to_extract_to): + """Unzip a .zip file + + Args: + path_to_zip_file (str): The file path of the zip to extract + directory_to_extract_to (str): The directory path where the contents should be extracted + Returns: + None + Raises: + None + """ print("Unzip files..") with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref: zip_ref.extractall(directory_to_extract_to) if __name__ == '__main__': - url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00506/casas-dataset.zip" - url_test = "https://archive.ics.uci.edu/ml/machine-learning-databases/00405/Postures.zip" # Smaller zip to test - save_path = "../data/casas-dataset.zip" - # Download zip file - download_url(url, save_path) - # Unzip it - unzip_file(save_path, "../data_test/") - print("Done") \ No newline at end of file + url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00506/casas-dataset.zip" + url_test = "https://archive.ics.uci.edu/ml/machine-learning-databases/00405/Postures.zip" # Smaller zip to test + save_path = "../data/casas-dataset.zip" + # Download zip file + download_url(url, save_path) + # Unzip it + unzip_file(save_path, "../data_test/") + print("Done") \ No newline at end of file