diff --git a/src/dimcat/data/resources/dc.py b/src/dimcat/data/resources/dc.py index 618bff3c..a8954f65 100644 --- a/src/dimcat/data/resources/dc.py +++ b/src/dimcat/data/resources/dc.py @@ -151,7 +151,7 @@ class DimcatResource(Resource, Generic[D]): creating the resource, any row containing a missing value in one of the feature columns is dropped.""" # endregion column name class variables # region associated object types - _default_analyzer: ClassVar[str] = "Proportions" + _default_analyzer: ClassVar[StepSpecs] = "Proportions" """Name of the Analyzer that is used by default for plotting the resource. Needs to return a :obj:`Result`.""" _extractable_features: ClassVar[Optional[Tuple[FeatureName, ...]]] = None """Tuple of :obj:`FeatureNames ` corresponding to the features that can be extracted from this diff --git a/src/dimcat/data/resources/features.py b/src/dimcat/data/resources/features.py index f0537f7d..c4b66782 100644 --- a/src/dimcat/data/resources/features.py +++ b/src/dimcat/data/resources/features.py @@ -26,7 +26,8 @@ class Metadata(Feature): - pass + _default_analyzer = dict(dtype="Proportions", dimension_column="length_qb") + _default_value_column = "piece" # region Annotations diff --git a/src/dimcat/data/resources/results.py b/src/dimcat/data/resources/results.py index efa8f977..8acdeb60 100644 --- a/src/dimcat/data/resources/results.py +++ b/src/dimcat/data/resources/results.py @@ -163,7 +163,7 @@ def value_column(self, value_column: str): @property def x_column(self) -> str: """Name of the result column from which to create one marker per distinct value to show over the x-axis.""" - if self.uses_line_of_fifths_colors: + if self.uses_line_of_fifths_colors or not self.formatted_column: return self.value_column else: return self.formatted_column diff --git a/src/dimcat/steps/analyzers/base.py b/src/dimcat/steps/analyzers/base.py index 95f5dcea..c7c5fe83 100644 --- a/src/dimcat/steps/analyzers/base.py +++ b/src/dimcat/steps/analyzers/base.py @@ -40,7 +40,7 @@ class Analyzer(FeatureProcessingStep): The base class performs no analysis, instantiating it serves mere testing purpose. """ - _dimension_column_name: ClassVar[Optional[str]] = None + _default_dimension_column: ClassVar[Optional[str]] = None """Name of a column, contained in the Results produced by this analyzer, containing some dimension, e.g. one to be interpreted as quantity (durations, counts, etc.) or as color.""" _enum_type: ClassVar[Type[Enum]] = AnalyzerName @@ -140,14 +140,32 @@ def __init__( features: Optional[FeatureSpecs | Iterable[FeatureSpecs]] = None, strategy: DispatchStrategy = DispatchStrategy.GROUPBY_APPLY, smallest_unit: UnitOfAnalysis = UnitOfAnalysis.SLICE, - fill_na: Any = None, + dimension_column: str = None, ): super().__init__(features=features) self._strategy: DispatchStrategy = None self.strategy = strategy self._smallest_unit: UnitOfAnalysis = None self.smallest_unit = smallest_unit - self.fill_na: Any = fill_na + self._dimension_column = None + self.dimension_column = dimension_column + + @property + def dimension_column(self) -> Optional[str]: + """Name of a column, contained in the Results produced by this analyzer, containing some dimension, + e.g. one to be interpreted as quantity (durations, counts, etc.) or as color.""" + return self._dimension_column + + @dimension_column.setter + def dimension_column(self, dimension_column: Optional[str]): + if dimension_column is None: + self._dimension_column = self._default_dimension_column + return + if not isinstance(dimension_column, str): + raise TypeError( + f"dimension_column must be a string, not {type(dimension_column)}" + ) + self._dimension_column = dimension_column @property def strategy(self) -> DispatchStrategy: @@ -183,10 +201,11 @@ def _make_new_resource(self, resource: Feature) -> Result: formatted_column = resource.formatted_column else: formatted_column = None + print("DIMCOL: ", self.dimension_column) result = result_constructor.from_dataframe( analyzed_resource=resource, value_column=value_column, - dimension_column=self._dimension_column_name, + dimension_column=self.dimension_column, formatted_column=formatted_column, df=results, resource_name=result_name, diff --git a/src/dimcat/steps/analyzers/counters.py b/src/dimcat/steps/analyzers/counters.py index 697872a8..3bfa6aa3 100644 --- a/src/dimcat/steps/analyzers/counters.py +++ b/src/dimcat/steps/analyzers/counters.py @@ -14,7 +14,7 @@ class Counter(Analyzer): - _dimension_column_name = "count" + _default_dimension_column = "count" _new_resource_type = Counts @staticmethod @@ -25,12 +25,15 @@ def compute(feature: Feature, **kwargs) -> D: and feature.formatted_column not in groupby ): groupby.append(feature.formatted_column) - result = feature.groupby(groupby)[Counter._dimension_column_name].value_counts( - dropna=False - ) - result = result.to_frame(Counter._dimension_column_name) + result = feature.groupby(groupby)[ + Counter._default_dimension_column + ].value_counts(dropna=False) + result = result.to_frame(Counter._default_dimension_column) return result + class Schema(Analyzer.Schema): + dimension_column = mm.fields.Str(load_default="count") + def groupby_apply(self, feature: Feature, groupby: SomeSeries = None, **kwargs): """Performs the computation on a groupby. The value of ``groupby`` needs to be a Series of the same length as ``feature`` or otherwise work as positional argument to feature.groupby(). @@ -47,7 +50,7 @@ def groupby_apply(self, feature: Feature, groupby: SomeSeries = None, **kwargs): ): groupby.append(feature.formatted_column) result = feature.groupby(groupby).size() - result = result.to_frame(self._dimension_column_name) + result = result.to_frame(self.dimension_column) return result diff --git a/src/dimcat/steps/analyzers/proportions.py b/src/dimcat/steps/analyzers/proportions.py index f863a47e..b9a73675 100644 --- a/src/dimcat/steps/analyzers/proportions.py +++ b/src/dimcat/steps/analyzers/proportions.py @@ -1,5 +1,6 @@ import logging +import marshmallow as mm from dimcat.data.resources.base import D, FeatureName, SomeSeries from dimcat.data.resources.dc import DimcatResource, Feature from dimcat.data.resources.results import Durations @@ -10,7 +11,7 @@ class Proportions(Analyzer): - _dimension_column_name = "duration_qb" + _default_dimension_column = "duration_qb" _new_resource_type = Durations @staticmethod @@ -22,13 +23,16 @@ def compute(feature: Feature, **kwargs) -> D: ): groupby.append(feature.formatted_column) result = ( - feature.groupby(groupby)[Proportions._dimension_column_name] + feature.groupby(groupby)[Proportions._default_dimension_column] .sum() .astype(float) ) result = result.to_frame() return result + class Schema(Analyzer.Schema): + dimension_column = mm.fields.Str(load_default="duration_qb") + def check_resource(self, resource: DimcatResource) -> None: """Check if the resource has a value column.""" super().check_resource(resource) @@ -53,7 +57,7 @@ def groupby_apply(self, feature: Feature, groupby: SomeSeries = None, **kwargs): ): groupby.append(feature.formatted_column) result = ( - feature.groupby(groupby, group_keys=False)[self._dimension_column_name] + feature.groupby(groupby, group_keys=False)[self.dimension_column] .sum() .astype(float) )