Merge pull request #2 from johentsch/plotting

Plotting
DCMLab · Nov 7, 2023 · 5fa0ff2 · 5fa0ff2
2 parents 5c832be + d4cfd6f
commit 5fa0ff2
Show file tree

Hide file tree

Showing 34 changed files with 2,753 additions and 368 deletions.
diff --git a/.isort.cfg b/.isort.cfg
@@ -1,3 +1,3 @@
 [settings]
-known_third_party = _pytest,dimcat,frictionless,git,marshmallow,ms3,music21,pandas,plotly,pytest,setuptools,tqdm,typing_extensions,yaml
+known_third_party = _pytest,dimcat,frictionless,git,kaleido,marshmallow,matplotlib,ms3,music21,numpy,pandas,plotly,pytest,scipy,seaborn,setuptools,tqdm,typing_extensions,yaml
 profile = black
diff --git a/docs/notebooks b/docs/notebooks
diff --git a/setup.cfg b/setup.cfg
@@ -49,12 +49,13 @@ python_requires = >=3.10
 # For more information, check out https://semver.org/.
 install_requires =
     frictionless[zenodo,pandas,visidata]~=5.15.10
-    pandas>=1.5.3
+    pandas>=2.0.0
     marshmallow>=3.20.1
-    ms3>=2.2.2
+    ms3>=2.4.0
     music21>=9.1.0
-    plotly>=5.16.1
-    seaborn>=0.12.2
+    plotly>=5.18.0
+    scipy~=1.11.3
+    seaborn>=0.13.0
     setuptools~=68.2.0
 
 [options.packages.find]

diff --git a/src/dimcat/__init__.py b/src/dimcat/__init__.py
@@ -22,10 +22,9 @@
     get_class,
     get_schema,
 )
+from .data import catalogs, datasets, packages, resources
 from .data.datasets.base import Dataset
-from .data.resources.dc import PieceIndex
-from .steps.extractors.base import FeatureExtractor  # required
-from .steps.loaders.base import PackageLoader
+from .steps import analyzers, extractors, groupers, loaders, pipelines, slicers
 from .steps.pipelines.base import Pipeline
 
 logger = logging.getLogger(__name__)

diff --git a/src/dimcat/base.py b/src/dimcat/base.py
@@ -431,6 +431,8 @@ def __init__(
     ):
         if isinstance(options, DimcatConfig):
             options = options.options
+        elif isinstance(options, str) and dtype is None:
+            options = dict(dtype=options)
         options = dict(options, **kwargs)
         if dtype is None:
             if "dtype" not in options:
@@ -725,6 +727,14 @@ class DimcatSettings(DimcatObject):
     )
     default_basepath: str = "~/dimcat_data"
     """where to serialize data if no other basepath is specified"""
+    default_figure_path: str = "~/dimcat_data"
+    """where to store figures if no other path was specified"""
+    default_figure_format: str = ".png"
+    """default format for all figures stored by DiMCAT."""
+    default_figure_width: int = 2880
+    """default width in pixels for figures stored by DiMCAT"""
+    default_figure_height: int = 1620
+    """default height in pixels for figures stored by DiMCAT"""
     default_resource_name: str = "unnamed"
     never_store_unvalidated_data: bool = True
     """setting this to False allows for skipping mandatory validations; set to True for production"""
@@ -762,6 +772,28 @@ class Schema(DimcatObject.Schema):
                 "description": "where to serialize data if no other basepath is specified"
             },
         )
+        default_figure_path = mm.fields.String(
+            required=True,
+            metadata={
+                "description": "where to store figures if no other path was specified"
+            },
+        )
+        default_figure_format = mm.fields.String(
+            required=True,
+            metadata={"description": "default format for all figures stored by DiMCAT"},
+        )
+        default_figure_width = mm.fields.Integer(
+            required=True,
+            metadata={
+                "description": "default width in pixels for figures stored by DiMCAT"
+            },
+        )
+        default_figure_height = mm.fields.Integer(
+            required=True,
+            metadata={
+                "description": "default height in pixels for figures stored by DiMCAT"
+            },
+        )
         default_resource_name = mm.fields.String(required=True)
         never_store_unvalidated_data = mm.fields.Boolean(
             required=True,

diff --git a/src/dimcat/data/catalogs/outputs.py b/src/dimcat/data/catalogs/outputs.py
@@ -1,21 +1,23 @@
 from __future__ import annotations
 
-from typing import Iterator, Tuple
+from typing import Iterator, Optional, Tuple
 
 from dimcat.data.catalogs.base import DimcatCatalog
 from dimcat.data.resources.dc import DimcatResource
 from dimcat.data.resources.features import FeatureSpecs, feature_specs2config
 
 
 class OutputsCatalog(DimcatCatalog):
-    def get_feature(self, feature: FeatureSpecs) -> DimcatResource:
+    def get_feature(self, feature: Optional[FeatureSpecs] = None) -> DimcatResource:
         """Looks up the given feature in the "features" package and returns it.
 
         Raises:
             PackageNotFoundError: If no package with the name "features" is loaded.
             NoMatchingResourceFoundError: If no resource matching the specs is found in the "features" package.
         """
         package = self.get_package_by_name("features")
+        if feature is None:
+            return package.get_resource_by_name()
         feature_config = feature_specs2config(feature)
         return package.get_resource_by_config(feature_config)
 

diff --git a/src/dimcat/data/datasets/base.py b/src/dimcat/data/datasets/base.py
@@ -248,7 +248,7 @@ def extract_feature(self, feature: FeatureSpecs) -> Feature:
         self._pipeline.add_step(feature_extractor)
         return extracted
 
-    def get_feature(self, feature: FeatureSpecs) -> Feature:
+    def get_feature(self, feature: Optional[FeatureSpecs] = None) -> Feature:
         """High-level method that first looks up a feature fitting the specs in the outputs catalog,
         and adds a FeatureExtractor to the dataset's pipeline otherwise."""
         feature_config = feature_specs2config(feature)
@@ -299,6 +299,13 @@ def get_metadata(self) -> SomeDataframe:
         metadata = self.inputs.get_metadata()
         return metadata
 
+    def load(
+        self,
+        package: PackageSpecs,
+    ):
+        """High-level method that tries to infer what it is that you want to load."""
+        self.load_package(package=package)
+
     def load_package(
         self,
         package: PackageSpecs,

diff --git a/src/dimcat/data/datasets/processed.py b/src/dimcat/data/datasets/processed.py
@@ -5,7 +5,10 @@
 from __future__ import annotations
 
 import logging
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, List, Optional
+
+from dimcat.base import DimcatConfig
+from dimcat.dc_exceptions import NoMatchingResourceFoundError
 
 from .base import Dataset
 
@@ -39,12 +42,36 @@ def add_result(self, result: Result):
         """Adds a result to the outputs catalog."""
         self.add_output(resource=result, package_name="results")
 
-    def get_result(self, analyzer_name: Optional[str] = None):
-        """Returns the result of the previously applied analyzer with the given name."""
+    def get_result(self, regex: Optional[str] = None):
+        """Returns the last result that matches the given regex or, if None, the last result added."""
         results = self.outputs.get_package("results")
-        if analyzer_name is None:
+        if regex is None:
             return results.get_resource_by_name()
-        raise NotImplementedError("get_result with analyzer_name not implemented yet.")
+        results = self.get_results_by_regex(regex=regex)
+        if not results:
+            raise NoMatchingResourceFoundError(regex, results.package_name)
+        else:
+            return results[-1]
+
+    def get_result_by_config(self, config: DimcatConfig) -> Result:
+        """Returns the result of the previously applied analyzer with the given name."""
+        results = self.outputs.get_package("results")
+        return results.get_resource_by_config(config=config)
+
+    def get_result_by_name(self, name: str) -> Result:
+        """Returns the result of the previously applied analyzer with the given name."""
+        results = self.outputs.get_package("results")
+        return results.get_resource_by_name(name=name)
+
+    def get_results_by_regex(self, regex: str) -> List[Result]:
+        """Returns the result of the previously applied analyzer with the given name."""
+        results = self.outputs.get_package("results")
+        return results.get_resources_by_regex(regex=regex)
+
+    def get_results_by_type(self, resource_type: type) -> List[Result]:
+        """Returns the result of the previously applied analyzer with the given name."""
+        results = self.outputs.get_package("results")
+        return results.get_resources_by_type(resource_type=resource_type)
 
 
 class SlicedGroupedAnalyzedDataset(

diff --git a/src/dimcat/data/packages/base.py b/src/dimcat/data/packages/base.py
@@ -1139,7 +1139,8 @@ def get_metadata(self) -> SomeDataframe:
             raise NotImplementedError(
                 f"More than one metadata resource found: {resources!r}"
             )
-        metadata = resources[0].df
+        metadata = resources[0]
+        metadata.load()
         return metadata
 
     def get_resource_by_config(self, config: DimcatConfig) -> Resource:

diff --git a/src/dimcat/data/resources/base.py b/src/dimcat/data/resources/base.py
@@ -333,6 +333,7 @@ def from_descriptor_path(
             **kwargs: Subclasses can use this method.
         """
         basepath, descriptor_filename = os.path.split(descriptor_path)
+        basepath = resolve_path(basepath)  # could be relative
         if "basepath" in kwargs:
             kw_basepath = kwargs.pop("basepath")
             if kw_basepath != basepath:
+1 −1		.isort.cfg
+312 −0		accents.md
+72 −71		annotations.md
+69 −121		bass_degrees.md
+438 −0		bass_degrees.py
+24 −12		cadences.md
+33 −32		chromatic_bass.md
+2 −1		dft.md
+84 −72		harmonies.md
+296 −0		harmonies.py
+266 −0		ismir.md
+240 −0		ismir.py
+12 −11		keys.md
+55 −126		line_of_fifths.md
+115 −0		line_of_fifths.py
+22 −21		modulations.md
+2 −1		notes_stats.md
+ −		outputs/accents/corpuswise_proportion_of_accented_and_staccatissimo_positions.png
+ −		outputs/accents/corpuswise_proportion_of_accented_positions.png
+ −		outputs/bass_degrees/bass_degree_bigrams.png
+ −		outputs/bass_degrees/bass_degree_bigrams_scale_order.png
+ −		outputs/bass_degrees/bass_degree_unigrams.png
+ −		outputs/bass_degrees/bass_degree_unigrams_major.png
+ −		outputs/bass_degrees/bass_degree_unigrams_minor.png
+ −		outputs/bass_degrees/bass_progression_intervals_within_all_key_segments_sorted_bars.png
+6,578 −5,889		outputs/bass_degrees/key_regions.tsv
+ −		outputs/bass_degrees/n_labels_per_key_segment_histogram.png
+ −		outputs/ismir/bass_degree_bigrams.png
+ −		outputs/ismir/chord_type_distribution_cumulative.png
+ −		outputs/ismir/complete_pitch_class_distribution_absolute_bars.png
+ −		outputs/ismir/fun_fun_fun.png
+ −		outputs/ismir/localkey_distributions.png
+ −		outputs/ismir/localkey_transition_matrix.pdf
+ −		outputs/ismir/major_minor_key_segments_corpuswise_absolute_stacked_bars.png
+ −		outputs/line_of_fifths/all_pitch_class_distributions_piecewise_bubbles.png
+ −		outputs/line_of_fifths/all_pitch_class_distributions_yearwise_bubbles.png
+ −		outputs/line_of_fifths/complete_pitch_class_distribution_absolute_bars.png
+ −		outputs/line_of_fifths/debussy_la_mer_beginning_barwise_pitch_class_distributions_bubbles.png
+ −		outputs/line_of_fifths/debussy_la_mer_beginning_pitch_class_distribution_bars.png
+18,612 −0		outputs/reduction/bass_note_bigram_counts.tsv
+222,437 −0		outputs/reduction/bass_note_bigrams.tsv
+48 −20		overview.md
+1,067 −0		reduction.md
+57 −11		scale_degrees.md
+361 −684		utils.py