Skip to content

Commit

Permalink
Merge pull request #2 from johentsch/plotting
Browse files Browse the repository at this point in the history
Plotting
  • Loading branch information
johentsch authored Nov 7, 2023
2 parents 5c832be + d4cfd6f commit 5fa0ff2
Show file tree
Hide file tree
Showing 34 changed files with 2,753 additions and 368 deletions.
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[settings]
known_third_party = _pytest,dimcat,frictionless,git,marshmallow,ms3,music21,pandas,plotly,pytest,setuptools,tqdm,typing_extensions,yaml
known_third_party = _pytest,dimcat,frictionless,git,kaleido,marshmallow,matplotlib,ms3,music21,numpy,pandas,plotly,pytest,scipy,seaborn,setuptools,tqdm,typing_extensions,yaml
profile = black
2 changes: 1 addition & 1 deletion docs/notebooks
Submodule notebooks updated 45 files
+1 −1 .isort.cfg
+312 −0 accents.md
+72 −71 annotations.md
+69 −121 bass_degrees.md
+438 −0 bass_degrees.py
+24 −12 cadences.md
+33 −32 chromatic_bass.md
+2 −1 dft.md
+84 −72 harmonies.md
+296 −0 harmonies.py
+266 −0 ismir.md
+240 −0 ismir.py
+12 −11 keys.md
+55 −126 line_of_fifths.md
+115 −0 line_of_fifths.py
+22 −21 modulations.md
+2 −1 notes_stats.md
+ outputs/accents/corpuswise_proportion_of_accented_and_staccatissimo_positions.png
+ outputs/accents/corpuswise_proportion_of_accented_positions.png
+ outputs/bass_degrees/bass_degree_bigrams.png
+ outputs/bass_degrees/bass_degree_bigrams_scale_order.png
+ outputs/bass_degrees/bass_degree_unigrams.png
+ outputs/bass_degrees/bass_degree_unigrams_major.png
+ outputs/bass_degrees/bass_degree_unigrams_minor.png
+ outputs/bass_degrees/bass_progression_intervals_within_all_key_segments_sorted_bars.png
+6,578 −5,889 outputs/bass_degrees/key_regions.tsv
+ outputs/bass_degrees/n_labels_per_key_segment_histogram.png
+ outputs/ismir/bass_degree_bigrams.png
+ outputs/ismir/chord_type_distribution_cumulative.png
+ outputs/ismir/complete_pitch_class_distribution_absolute_bars.png
+ outputs/ismir/fun_fun_fun.png
+ outputs/ismir/localkey_distributions.png
+ outputs/ismir/localkey_transition_matrix.pdf
+ outputs/ismir/major_minor_key_segments_corpuswise_absolute_stacked_bars.png
+ outputs/line_of_fifths/all_pitch_class_distributions_piecewise_bubbles.png
+ outputs/line_of_fifths/all_pitch_class_distributions_yearwise_bubbles.png
+ outputs/line_of_fifths/complete_pitch_class_distribution_absolute_bars.png
+ outputs/line_of_fifths/debussy_la_mer_beginning_barwise_pitch_class_distributions_bubbles.png
+ outputs/line_of_fifths/debussy_la_mer_beginning_pitch_class_distribution_bars.png
+18,612 −0 outputs/reduction/bass_note_bigram_counts.tsv
+222,437 −0 outputs/reduction/bass_note_bigrams.tsv
+48 −20 overview.md
+1,067 −0 reduction.md
+57 −11 scale_degrees.md
+361 −684 utils.py
9 changes: 5 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,13 @@ python_requires = >=3.10
# For more information, check out https://semver.org/.
install_requires =
frictionless[zenodo,pandas,visidata]~=5.15.10
pandas>=1.5.3
pandas>=2.0.0
marshmallow>=3.20.1
ms3>=2.2.2
ms3>=2.4.0
music21>=9.1.0
plotly>=5.16.1
seaborn>=0.12.2
plotly>=5.18.0
scipy~=1.11.3
seaborn>=0.13.0
setuptools~=68.2.0

[options.packages.find]
Expand Down
5 changes: 2 additions & 3 deletions src/dimcat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@
get_class,
get_schema,
)
from .data import catalogs, datasets, packages, resources
from .data.datasets.base import Dataset
from .data.resources.dc import PieceIndex
from .steps.extractors.base import FeatureExtractor # required
from .steps.loaders.base import PackageLoader
from .steps import analyzers, extractors, groupers, loaders, pipelines, slicers
from .steps.pipelines.base import Pipeline

logger = logging.getLogger(__name__)
Expand Down
32 changes: 32 additions & 0 deletions src/dimcat/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ def __init__(
):
if isinstance(options, DimcatConfig):
options = options.options
elif isinstance(options, str) and dtype is None:
options = dict(dtype=options)
options = dict(options, **kwargs)
if dtype is None:
if "dtype" not in options:
Expand Down Expand Up @@ -725,6 +727,14 @@ class DimcatSettings(DimcatObject):
)
default_basepath: str = "~/dimcat_data"
"""where to serialize data if no other basepath is specified"""
default_figure_path: str = "~/dimcat_data"
"""where to store figures if no other path was specified"""
default_figure_format: str = ".png"
"""default format for all figures stored by DiMCAT."""
default_figure_width: int = 2880
"""default width in pixels for figures stored by DiMCAT"""
default_figure_height: int = 1620
"""default height in pixels for figures stored by DiMCAT"""
default_resource_name: str = "unnamed"
never_store_unvalidated_data: bool = True
"""setting this to False allows for skipping mandatory validations; set to True for production"""
Expand Down Expand Up @@ -762,6 +772,28 @@ class Schema(DimcatObject.Schema):
"description": "where to serialize data if no other basepath is specified"
},
)
default_figure_path = mm.fields.String(
required=True,
metadata={
"description": "where to store figures if no other path was specified"
},
)
default_figure_format = mm.fields.String(
required=True,
metadata={"description": "default format for all figures stored by DiMCAT"},
)
default_figure_width = mm.fields.Integer(
required=True,
metadata={
"description": "default width in pixels for figures stored by DiMCAT"
},
)
default_figure_height = mm.fields.Integer(
required=True,
metadata={
"description": "default height in pixels for figures stored by DiMCAT"
},
)
default_resource_name = mm.fields.String(required=True)
never_store_unvalidated_data = mm.fields.Boolean(
required=True,
Expand Down
6 changes: 4 additions & 2 deletions src/dimcat/data/catalogs/outputs.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
from __future__ import annotations

from typing import Iterator, Tuple
from typing import Iterator, Optional, Tuple

from dimcat.data.catalogs.base import DimcatCatalog
from dimcat.data.resources.dc import DimcatResource
from dimcat.data.resources.features import FeatureSpecs, feature_specs2config


class OutputsCatalog(DimcatCatalog):
def get_feature(self, feature: FeatureSpecs) -> DimcatResource:
def get_feature(self, feature: Optional[FeatureSpecs] = None) -> DimcatResource:
"""Looks up the given feature in the "features" package and returns it.
Raises:
PackageNotFoundError: If no package with the name "features" is loaded.
NoMatchingResourceFoundError: If no resource matching the specs is found in the "features" package.
"""
package = self.get_package_by_name("features")
if feature is None:
return package.get_resource_by_name()
feature_config = feature_specs2config(feature)
return package.get_resource_by_config(feature_config)

Expand Down
9 changes: 8 additions & 1 deletion src/dimcat/data/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def extract_feature(self, feature: FeatureSpecs) -> Feature:
self._pipeline.add_step(feature_extractor)
return extracted

def get_feature(self, feature: FeatureSpecs) -> Feature:
def get_feature(self, feature: Optional[FeatureSpecs] = None) -> Feature:
"""High-level method that first looks up a feature fitting the specs in the outputs catalog,
and adds a FeatureExtractor to the dataset's pipeline otherwise."""
feature_config = feature_specs2config(feature)
Expand Down Expand Up @@ -299,6 +299,13 @@ def get_metadata(self) -> SomeDataframe:
metadata = self.inputs.get_metadata()
return metadata

def load(
self,
package: PackageSpecs,
):
"""High-level method that tries to infer what it is that you want to load."""
self.load_package(package=package)

def load_package(
self,
package: PackageSpecs,
Expand Down
37 changes: 32 additions & 5 deletions src/dimcat/data/datasets/processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from __future__ import annotations

import logging
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING, List, Optional

from dimcat.base import DimcatConfig
from dimcat.dc_exceptions import NoMatchingResourceFoundError

from .base import Dataset

Expand Down Expand Up @@ -39,12 +42,36 @@ def add_result(self, result: Result):
"""Adds a result to the outputs catalog."""
self.add_output(resource=result, package_name="results")

def get_result(self, analyzer_name: Optional[str] = None):
"""Returns the result of the previously applied analyzer with the given name."""
def get_result(self, regex: Optional[str] = None):
"""Returns the last result that matches the given regex or, if None, the last result added."""
results = self.outputs.get_package("results")
if analyzer_name is None:
if regex is None:
return results.get_resource_by_name()
raise NotImplementedError("get_result with analyzer_name not implemented yet.")
results = self.get_results_by_regex(regex=regex)
if not results:
raise NoMatchingResourceFoundError(regex, results.package_name)
else:
return results[-1]

def get_result_by_config(self, config: DimcatConfig) -> Result:
"""Returns the result of the previously applied analyzer with the given name."""
results = self.outputs.get_package("results")
return results.get_resource_by_config(config=config)

def get_result_by_name(self, name: str) -> Result:
"""Returns the result of the previously applied analyzer with the given name."""
results = self.outputs.get_package("results")
return results.get_resource_by_name(name=name)

def get_results_by_regex(self, regex: str) -> List[Result]:
"""Returns the result of the previously applied analyzer with the given name."""
results = self.outputs.get_package("results")
return results.get_resources_by_regex(regex=regex)

def get_results_by_type(self, resource_type: type) -> List[Result]:
"""Returns the result of the previously applied analyzer with the given name."""
results = self.outputs.get_package("results")
return results.get_resources_by_type(resource_type=resource_type)


class SlicedGroupedAnalyzedDataset(
Expand Down
3 changes: 2 additions & 1 deletion src/dimcat/data/packages/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1139,7 +1139,8 @@ def get_metadata(self) -> SomeDataframe:
raise NotImplementedError(
f"More than one metadata resource found: {resources!r}"
)
metadata = resources[0].df
metadata = resources[0]
metadata.load()
return metadata

def get_resource_by_config(self, config: DimcatConfig) -> Resource:
Expand Down
1 change: 1 addition & 0 deletions src/dimcat/data/resources/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ def from_descriptor_path(
**kwargs: Subclasses can use this method.
"""
basepath, descriptor_filename = os.path.split(descriptor_path)
basepath = resolve_path(basepath) # could be relative
if "basepath" in kwargs:
kw_basepath = kwargs.pop("basepath")
if kw_basepath != basepath:
Expand Down
Loading

0 comments on commit 5fa0ff2

Please sign in to comment.