diff --git a/HOWTO.md b/HOWTO.md index ad7e708..91ffcb4 100644 --- a/HOWTO.md +++ b/HOWTO.md @@ -6,6 +6,8 @@ This document explains the installation requirements, installation procedure and See also the general information on coleto in the [README](https://github.com/dh-trier/coleto/blob/main/README.md). +For a detailed documentation of all functions, please open the [API reference](/docs/index.html) in your browser. + ## Requirements Python 3. This package has been tested with Python 3.7. It should also work with Python 3.6+ diff --git a/coleto/meta_documentation.py b/coleto/meta_documentation.py index a0fafb8..157bc1e 100644 --- a/coleto/meta_documentation.py +++ b/coleto/meta_documentation.py @@ -3,7 +3,9 @@ # Author: Christof Schöch, 2016-2019. """ -Script to save settings and parameters to disk for documentation. +Script to save settings and parameters to disk for documentation. +Each time coleto is run, a timestamped file is created with this information. +The purpose is to document, for any results, the parameters used. Part of coleto, see: https://github.com/dh-trier/coleto. """ diff --git a/coleto/meta_parameters.py b/coleto/meta_parameters.py index 62832d9..8b6403d 100644 --- a/coleto/meta_parameters.py +++ b/coleto/meta_parameters.py @@ -3,7 +3,11 @@ # Author: Christof Schöch, 2016-2021. -"""Script to define and generate parameters, such as paths and filenames. +""" +Script to collect and generate parameters, such as paths and filenames. +The user-defined parameters from the config.yaml file are included here. +Additional parameters are generated based on them. +All parameters are packaged-up in the 'params' variable. The parameters are then used by the various coleto modules. Part of coleto, see: https://github.com/dh-trier/coleto. diff --git a/coleto/run_coleto.py b/coleto/run_coleto.py index fa9d084..b4ae2e9 100644 --- a/coleto/run_coleto.py +++ b/coleto/run_coleto.py @@ -4,8 +4,10 @@ """ -Script that runs the coleto text collation pipeline. -Please see the readme.md file and the documentation. +This is the main script that controls the coleto text collation pipeline. + +For more information, please see the README.md and HOTWO.md files. +An API reference can be found in the docs folder. """ diff --git a/coleto/stats_distribution.py b/coleto/stats_distribution.py index 2140e41..99498df 100644 --- a/coleto/stats_distribution.py +++ b/coleto/stats_distribution.py @@ -4,7 +4,8 @@ """ Script to create some statistics from the diff analysis data. -These statistics focus on the edit types. +These statistics focus on the frequency of the various edit types. +Based on these statistics, several barcharts can be generated. Part of coleto, see: https://github.com/dh-trier/coleto. """ diff --git a/coleto/text_wdiff.py b/coleto/text_wdiff.py index 350b5de..a618c05 100644 --- a/coleto/text_wdiff.py +++ b/coleto/text_wdiff.py @@ -5,6 +5,10 @@ """ Uses subprocess to call wdiff from the command line. You need to have wdiff installed. +Based on the input texts split into sentences, +wdiff first aligns the text, identifying insertions and deletions. +It then identifies each location of difference between two aligned sentences. +A sanity check is included to make sure wdiff has run correctly. """ diff --git a/coleto/viz_distribution.py b/coleto/viz_distribution.py index ca740fb..858eb93 100644 --- a/coleto/viz_distribution.py +++ b/coleto/viz_distribution.py @@ -4,6 +4,8 @@ """ Script to visualize some basic collation statistics. +The key information visualized is frequency of edit types. +Uses the pygal library to create several barcharts in SVG. Part of coleto, see: https://github.com/dh-trier/coleto. """ diff --git a/coleto/viz_progression.py b/coleto/viz_progression.py index c06a7e3..196c804 100644 --- a/coleto/viz_progression.py +++ b/coleto/viz_progression.py @@ -5,6 +5,10 @@ """ Visualize edit intensity over textual progression. Relies on cumulated Levenshtein distances per sentence. +Uses the pygal library to create a barchart. +Uses scipy for some smoothing. +Input: The distribution statistics. +Ouptut: Saves an SVG file to disk. Part of coleto, see: https://github.com/dh-trier/coleto. """