From 36c06ea9127f2baba90f079e421dfe7c9da9dc46 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Wed, 7 Feb 2024 04:31:17 +0900 Subject: [PATCH] Cleanup dataframes (#1360) ### Summary This PR updates the implementation of `ScatterTable` and `AnalysisResultTable` based on the [comment](https://github.com/Qiskit-Extensions/qiskit-experiments/pull/1319#issuecomment-1827343405) from @itoko . ### Details and comments Current pattern heavily uses inheritance; `Table(DataFrame, MixIn)`, but this causes several problems. Qiskit Experiments class directly depends on the third party library, resulting in Sphinx directive mismatch and poor robustness of the API. Instead of using inheritance, these classes are refactored with composition and delegation, namely ```python class Table: def __init__(self): self._data = DataFrame(...) ``` this pattern is also common in other software libraries using dataframe. Since this PR removes unreleased public classes, this should be merged before the release. Although this updates many files, these are just delegation of data handling logic to the class itself, which simplifies the implantation of classes that operate the container objects. Also new pattern allows more strict dtype management with dataframe. --------- Co-authored-by: Will Shanks --- docs/howtos/rerun_analysis.rst | 4 +- docs/tutorials/curve_analysis.rst | 157 ++++-- qiskit_experiments/curve_analysis/__init__.py | 1 + .../composite_curve_analysis.py | 74 +-- .../curve_analysis/curve_analysis.py | 217 ++++---- .../curve_analysis/scatter_table.py | 491 ++++++++++++++---- .../standard_analysis/bloch_trajectory.py | 6 +- .../framework/analysis_result_table.py | 402 ++++++++------ .../framework/composite/composite_analysis.py | 5 +- .../framework/experiment_data.py | 172 ++---- qiskit_experiments/framework/table_mixin.py | 109 ---- .../analysis/drag_analysis.py | 4 +- .../analysis/ramsey_xy_analysis.py | 4 +- .../characterization/analysis/t1_analysis.py | 4 +- .../analysis/zz_ramsey_analysis.py | 4 +- .../driven_freq_tuning/coefficients.py | 1 + .../ramsey_amp_scan_analysis.py | 88 ++-- .../interleaved_rb_analysis.py | 4 +- requirements.txt | 2 +- test/curve_analysis/test_scatter_table.py | 285 ++++++++++ test/extended_equality.py | 4 +- test/framework/test_analysis_results_table.py | 138 +++++ test/framework/test_composite.py | 8 + test/framework/test_data_table.py | 221 -------- 24 files changed, 1425 insertions(+), 980 deletions(-) delete mode 100644 qiskit_experiments/framework/table_mixin.py create mode 100644 test/curve_analysis/test_scatter_table.py create mode 100644 test/framework/test_analysis_results_table.py delete mode 100644 test/framework/test_data_table.py diff --git a/docs/howtos/rerun_analysis.rst b/docs/howtos/rerun_analysis.rst index 532d968cf2..c5d8653fcf 100644 --- a/docs/howtos/rerun_analysis.rst +++ b/docs/howtos/rerun_analysis.rst @@ -17,7 +17,7 @@ Solution consult the `migration guide `_.\ Once you recreate the exact experiment you ran and all of its parameters and options, -you can call the :meth:`.add_jobs` method with a list of :class:`Job +you can call the :meth:`.ExperimentData.add_jobs` method with a list of :class:`Job ` objects to generate the new :class:`.ExperimentData` object. The following example retrieves jobs from a provider that has access to them via their job IDs: @@ -47,7 +47,7 @@ job IDs: instead of overwriting the existing one. If you have the job data in the form of a :class:`~qiskit.result.Result` object, you can -invoke the :meth:`.add_data` method instead of :meth:`.add_jobs`: +invoke the :meth:`.ExperimentData.add_data` method instead of :meth:`.ExperimentData.add_jobs`: .. jupyter-input:: diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst index 8da46cbc75..4b243ef48d 100644 --- a/docs/tutorials/curve_analysis.rst +++ b/docs/tutorials/curve_analysis.rst @@ -240,6 +240,85 @@ generate initial guesses for parameters, from the ``AnalysisA`` class in the fir On the other hand, in the latter case, you need to manually copy and paste every logic defined in ``AnalysisA``. +.. _data_management_with_scatter_table: + +Managing intermediate data +-------------------------- + +:class:`.ScatterTable` is the single source of truth for the data used in the curve fit analysis. +Each data point in a 1-D curve fit may consist of the x value, y value, and +standard error of the y value. +In addition, such analysis may internally create several data subsets. +Each data point is given a metadata triplet (`series_id`, `category`, `analysis`) +to distinguish the subset. + +* The `series_id` is an integer key representing a label of the data which may be classified by fits models. + When an analysis consists of multiple fit models and performs a multi-objective fit, + the created table may contain multiple datasets for each fit model. + Usually the index of series matches with the index of the fit model in the analysis. + The table also provides a `series_name` column which is a human-friendly text notation of the `series_id`. + The `series_name` and corresponding `series_id` must refer to the identical data subset, + and the `series_name` typically matches with the name of the fit model. + You can find a particular data subset by either `series_id` or `series_name`. + +* The `category` is a string tag categorizing a group of data points. + The measured outcomes input as-is to the curve analysis are categorized by "raw". + In a standard :class:`.CurveAnalysis` subclass, the input data is formatted for + the fitting and the formatted data is also stored in the table with the "formatted" category. + You can filter the formatted data to run curve fitting with your custom program. + After the fit is successfully conducted and the model parameters are identified, + data points in the interpolated fit curves are stored with the "fitted" category + for visualization. The management of the data groups depends on the design of + the curve analysis protocol, and the convention of category naming might + be different in a particular analysis. + +* The `analysis` is a string key representing a name of + the analysis instance that generated the data point. + This allows a user to combine multiple tables from different analyses without collapsing the data points. + For a simple analysis class, all rows will have the same value, + but a :class:`.CompositeCurveAnalysis` instance consists of + nested component analysis instances containing statistically independent fit models. + Each component is given a unique analysis name, and datasets generated from each instance + are merged into a single table stored in the outermost composite analysis. + +User must be aware of this triplet to extract data points that belong to a +particular data subset. For example, + +.. code-block:: python + + mini_table = table.filter(series="my_experiment1", category="raw", analysis="AnalysisA") + mini_x = mini_table.x + mini_y = mini_table.y + +This operation is equivalent to + +.. code-block:: python + + mini_x = table.xvals(series="my_experiment1", category="raw", analysis="AnalysisA") + mini_y = table.yvals(series="my_experiment1", category="raw", analysis="AnalysisA") + +When an analysis only has a single model and the table is created from a single +analysis instance, the `series_id` and `analysis` are trivial, and you only need to +specify the `category` to get subset data of interest. + +The full description of :class:`.ScatterTable` columns are following below: + +- `xval`: Parameter scanned in the experiment. This value must be defined in the circuit metadata. +- `yval`: Nominal part of the outcome. The outcome is something like expectation value, + which is computed from the experiment result with the data processor. +- `yerr`: Standard error of the outcome, which is mainly due to sampling error. +- `series_name`: Human readable name of the data series. This is defined by the ``data_subfit_map`` option in the :class:`.CurveAnalysis`. +- `series_id`: Integer corresponding to the name of data series. This number is automatically assigned. +- `category`: A tag for the data group. This is defined by a developer of the curve analysis. +- `shots`: Number of measurement shots used to acquire a data point. This value can be defined in the circuit metadata. +- `analysis`: The name of the curve analysis instance that generated a data point. + +This object helps an analysis developer with writing a custom analysis class +without an overhead of complex data management, as well as end-users with +retrieving and reusing the intermediate data for their custom fitting workflow +outside our curve fitting framework. +Note that a :class:`ScatterTable` instance may be saved in the :class:`.ExperimentData` as an artifact. + .. _curve_analysis_workflow: Curve Analysis workflow @@ -271,67 +350,71 @@ the data processor in the analysis option is internally called. This consumes input experiment results and creates the :class:`.ScatterTable` dataframe. This table may look like: -.. code-block:: - - xval yval yerr name class_id category shots - 0 0.1 0.153659 0.011258 A 0 raw 1024 - 1 0.1 0.590732 0.015351 B 1 raw 1024 - 2 0.1 0.315610 0.014510 A 0 raw 1024 - 3 0.1 0.376098 0.015123 B 1 raw 1024 - 4 0.2 0.937073 0.007581 A 0 raw 1024 - 5 0.2 0.323415 0.014604 B 1 raw 1024 - 6 0.2 0.538049 0.015565 A 0 raw 1024 - 7 0.2 0.530244 0.015581 B 1 raw 1024 - 8 0.3 0.143902 0.010958 A 0 raw 1024 - 9 0.3 0.261951 0.013727 B 1 raw 1024 - 10 0.3 0.830732 0.011707 A 0 raw 1024 - 11 0.3 0.874634 0.010338 B 1 raw 1024 +.. jupyter-input:: + + table = analysis._run_data_processing(experiment_data.data()) + print(table) + +.. jupyter-output:: + + xval yval yerr series_name series_id category shots analysis + 0 0.1 0.153659 0.011258 A 0 raw 1024 MyAnalysis + 1 0.1 0.590732 0.015351 B 1 raw 1024 MyAnalysis + 2 0.1 0.315610 0.014510 A 0 raw 1024 MyAnalysis + 3 0.1 0.376098 0.015123 B 1 raw 1024 MyAnalysis + 4 0.2 0.937073 0.007581 A 0 raw 1024 MyAnalysis + 5 0.2 0.323415 0.014604 B 1 raw 1024 MyAnalysis + 6 0.2 0.538049 0.015565 A 0 raw 1024 MyAnalysis + 7 0.2 0.530244 0.015581 B 1 raw 1024 MyAnalysis + 8 0.3 0.143902 0.010958 A 0 raw 1024 MyAnalysis + 9 0.3 0.261951 0.013727 B 1 raw 1024 MyAnalysis + 10 0.3 0.830732 0.011707 A 0 raw 1024 MyAnalysis + 11 0.3 0.874634 0.010338 B 1 raw 1024 MyAnalysis where the experiment consists of two subset series A and B, and the experiment parameter (xval) is scanned from 0.1 to 0.3 in each subset. In this example, the experiment is run twice -for each condition. The role of each column is as follows: - -- ``xval``: Parameter scanned in the experiment. This value must be defined in the circuit metadata. -- ``yval``: Nominal part of the outcome. The outcome is something like expectation value, which is computed from the experiment result with the data processor. -- ``yerr``: Standard error of the outcome, which is mainly due to sampling error. -- ``name``: Unique identifier of the result class. This is defined by the ``data_subfit_map`` option. -- ``class_id``: Numerical index corresponding to the result class. This number is automatically assigned. -- ``category``: The attribute of data set. The "raw" category indicates an output from the data processing. -- ``shots``: Number of measurement shots used to acquire this result. +for each condition. +See :ref:`data_management_with_scatter_table` for the details of columns. 3. Formatting ^^^^^^^^^^^^^ -Next, the processed dataset is converted into another format suited for the fitting and -every valid result is assigned a class corresponding to a fit model. +Next, the processed dataset is converted into another format suited for the fitting. By default, the formatter takes average of the outcomes in the processed dataset over the same x values, followed by the sorting in the ascending order of x values. This allows the analysis to easily estimate the slope of the curves to create algorithmic initial guess of fit parameters. A developer can inject extra data processing, for example, filtering, smoothing, or elimination of outliers for better fitting. -The new class_id is given here so that its value corresponds to the fit model object index -in this analysis class. This index mapping is done based upon the correspondence of -the data name and the fit model name. +The new `series_id` is given here so that its value corresponds to the fit model index +defined in this analysis class. This index mapping is done based upon the correspondence of +the `series_name` and the fit model name. This is done by calling :meth:`_format_data` method. This may return new scatter table object with the addition of rows like the following below. -.. code-block:: +.. jupyter-input:: + + table = analysis._format_data(table) + print(table) + +.. jupyter-output:: - 12 0.1 0.234634 0.009183 A 0 formatted 2048 - 13 0.2 0.737561 0.008656 A 0 formatted 2048 - 14 0.3 0.487317 0.008018 A 0 formatted 2048 - 15 0.1 0.483415 0.010774 B 1 formatted 2048 - 16 0.2 0.426829 0.010678 B 1 formatted 2048 - 17 0.3 0.568293 0.008592 B 1 formatted 2048 + xval yval yerr series_name series_id category shots analysis + ... + 12 0.1 0.234634 0.009183 A 0 formatted 2048 MyAnalysis + 13 0.2 0.737561 0.008656 A 0 formatted 2048 MyAnalysis + 14 0.3 0.487317 0.008018 A 0 formatted 2048 MyAnalysis + 15 0.1 0.483415 0.010774 B 1 formatted 2048 MyAnalysis + 16 0.2 0.426829 0.010678 B 1 formatted 2048 MyAnalysis + 17 0.3 0.568293 0.008592 B 1 formatted 2048 MyAnalysis The default :meth:`_format_data` method adds its output data with the category "formatted". This category name must be also specified in the analysis option ``fit_category``. If overriding this method to do additional processing after the default formatting, the ``fit_category`` analysis option can be set to choose a different category name to use to select the data to pass to the fitting routine. -The (x, y) value in each row is passed to the corresponding fit model object +The (xval, yval) value in each row is passed to the corresponding fit model object to compute residual values for the least square optimization. 3. Fitting diff --git a/qiskit_experiments/curve_analysis/__init__.py b/qiskit_experiments/curve_analysis/__init__.py index 2db6044fff..992946d28e 100644 --- a/qiskit_experiments/curve_analysis/__init__.py +++ b/qiskit_experiments/curve_analysis/__init__.py @@ -39,6 +39,7 @@ .. autosummary:: :toctree: ../stubs/ + ScatterTable SeriesDef CurveData CurveFitResult diff --git a/qiskit_experiments/curve_analysis/composite_curve_analysis.py b/qiskit_experiments/curve_analysis/composite_curve_analysis.py index 77dad880d8..2a116f24f9 100644 --- a/qiskit_experiments/curve_analysis/composite_curve_analysis.py +++ b/qiskit_experiments/curve_analysis/composite_curve_analysis.py @@ -230,34 +230,35 @@ def _create_figures( A list of figures. """ for analysis in self.analyses(): - sub_data = curve_data[curve_data.group == analysis.name] - for name, data in list(sub_data.groupby("name")): - full_name = f"{name}_{analysis.name}" + group_data = curve_data.filter(analysis=analysis.name) + model_names = analysis.model_names() + for series_id, sub_data in group_data.iter_by_series_id(): + full_name = f"{model_names[series_id]}_{analysis.name}" # Plot raw data scatters if analysis.options.plot_raw_data: - raw_data = data[data.category == "raw"] + raw_data = sub_data.filter(category="raw") self.plotter.set_series_data( series_name=full_name, - x=raw_data.xval.to_numpy(), - y=raw_data.yval.to_numpy(), + x=raw_data.x, + y=raw_data.y, ) # Plot formatted data scatters - formatted_data = data[data.category == analysis.options.fit_category] + formatted_data = sub_data.filter(category=analysis.options.fit_category) self.plotter.set_series_data( series_name=full_name, - x_formatted=formatted_data.xval.to_numpy(), - y_formatted=formatted_data.yval.to_numpy(), - y_formatted_err=formatted_data.yerr.to_numpy(), + x_formatted=formatted_data.x, + y_formatted=formatted_data.y, + y_formatted_err=formatted_data.y_err, ) # Plot fit lines - line_data = data[data.category == "fitted"] + line_data = sub_data.filter(category="fitted") if len(line_data) == 0: continue - fit_stdev = line_data.yerr.to_numpy() + fit_stdev = line_data.y_err self.plotter.set_series_data( series_name=full_name, - x_interp=line_data.xval.to_numpy(), - y_interp=line_data.yval.to_numpy(), + x_interp=line_data.x, + y_interp=line_data.y, y_interp_err=fit_stdev if np.isfinite(fit_stdev).all() else None, ) @@ -354,7 +355,7 @@ def _run_analysis( metadata["group"] = analysis.name table = analysis._format_data(analysis._run_data_processing(experiment_data.data())) - formatted_subset = table[table.category == analysis.options.fit_category] + formatted_subset = table.filter(category=analysis.options.fit_category) fit_data = analysis._run_curve_fit(formatted_subset) fit_dataset[analysis.name] = fit_data @@ -376,32 +377,35 @@ def _run_analysis( if fit_data.success: # Add fit data to curve data table - fit_curves = [] - columns = list(table.columns) model_names = analysis.model_names() - for i, sub_data in list(formatted_subset.groupby("class_id")): - xval = sub_data.xval.to_numpy() + for series_id, sub_data in formatted_subset.iter_by_series_id(): + xval = sub_data.x if len(xval) == 0: # If data is empty, skip drawing this model. # This is the case when fit model exist but no data to fit is provided. continue # Compute X, Y values with fit parameters. - xval_fit = np.linspace(np.min(xval), np.max(xval), num=100) - yval_fit = eval_with_uncertainties( - x=xval_fit, - model=analysis.models[i], + xval_arr_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float) + uval_arr_fit = eval_with_uncertainties( + x=xval_arr_fit, + model=analysis.models[series_id], params=fit_data.ufloat_params, ) - model_fit = np.full((100, len(columns)), np.nan, dtype=object) - fit_curves.append(model_fit) - model_fit[:, columns.index("xval")] = xval_fit - model_fit[:, columns.index("yval")] = unp.nominal_values(yval_fit) + yval_arr_fit = unp.nominal_values(uval_arr_fit) if fit_data.covar is not None: - model_fit[:, columns.index("yerr")] = unp.std_devs(yval_fit) - model_fit[:, columns.index("name")] = model_names[i] - model_fit[:, columns.index("class_id")] = i - model_fit[:, columns.index("category")] = "fitted" - table = table.append_list_values(other=np.vstack(fit_curves)) + yerr_arr_fit = unp.std_devs(uval_arr_fit) + else: + yerr_arr_fit = np.zeros_like(xval_arr_fit) + for xval, yval, yerr in zip(xval_arr_fit, yval_arr_fit, yerr_arr_fit): + table.add_row( + xval=xval, + yval=yval, + yerr=yerr, + series_name=model_names[series_id], + series_id=series_id, + category="fitted", + analysis=analysis.name, + ) analysis_results.extend( analysis._create_analysis_results( fit_data=fit_data, @@ -416,11 +420,11 @@ def _run_analysis( analysis._create_curve_data(curve_data=formatted_subset, **metadata) ) - # Add extra column to identify the fit model - table["group"] = analysis.name curve_data_set.append(table) - combined_curve_data = pd.concat(curve_data_set) + combined_curve_data = ScatterTable.from_dataframe( + pd.concat([d.dataframe for d in curve_data_set]) + ) total_quality = self._evaluate_quality(fit_dataset) # After the quality is determined, plot can become a boolean flag for whether diff --git a/qiskit_experiments/curve_analysis/curve_analysis.py b/qiskit_experiments/curve_analysis/curve_analysis.py index d2eea799cc..b08366ec0d 100644 --- a/qiskit_experiments/curve_analysis/curve_analysis.py +++ b/qiskit_experiments/curve_analysis/curve_analysis.py @@ -17,8 +17,6 @@ from typing import Dict, List, Tuple, Union, Optional from functools import partial -from itertools import groupby -from operator import itemgetter import lmfit import numpy as np @@ -178,18 +176,20 @@ def _run_data_processing( else: to_process = raw_data - # This must align with ScatterTable columns. Use struct array. - dtypes = np.dtype( - [ - ("xval", float), - ("yval", float), - ("yerr", float), - ("name", "U30"), - ("class_id", int), - ("category", "U30"), - ("shots", int), - ] - ) + # Compute y value + if not self.options.data_processor: + raise ValueError( + f"Data processor is not set for the {self.__class__.__name__} instance. " + "Initialize the instance with the experiment data, or set the " + "data_processor analysis options." + ) + processed = self.options.data_processor(to_process) + yvals = unp.nominal_values(processed).flatten() + with np.errstate(invalid="ignore"): + # For averaged data, the processed std dev will be NaN. + # Setting std_devs to NaN will trigger floating point exceptions + # which we can ignore. See https://stackoverflow.com/q/75656026 + yerrs = unp.std_devs(processed).flatten() # Prepare circuit metadata to data class mapper from data_subfit_map value. if len(self._models) == 1: @@ -197,8 +197,8 @@ def _run_data_processing( else: classifier = self.options.data_subfit_map - source = np.empty(len(to_process), dtype=dtypes) - for idx, datum in enumerate(to_process): + table = ScatterTable() + for datum, yval, yerr in zip(to_process, yvals, yerrs): metadata = datum["metadata"] try: xval = metadata[opt.x_key] @@ -206,44 +206,25 @@ def _run_data_processing( raise DataProcessorError( f"X value key {opt.x_key} is not defined in the circuit metadata." ) from ex - source[idx]["xval"] = xval - source[idx]["shots"] = datum.get("shots", -1) - # Assign entry name and class id - for class_id, (name, spec) in enumerate(classifier.items()): + # Assign series name and series id + for series_id, (series_name, spec) in enumerate(classifier.items()): if spec.items() <= metadata.items(): - source[idx]["class_id"] = class_id - source[idx]["name"] = name break else: # This is unclassified data. - # Assume that normal ID will never become negative number. - # This is numpy struct array object and cannot store pandas nullable integer. - source[idx]["class_id"] = -1 - source[idx]["name"] = "" - - # Compute y value - if not self.options.data_processor: - raise ValueError( - f"Data processor is not set for the {self.__class__.__name__} instance. " - "Initialize the instance with the experiment data, or set the " - "data_processor analysis options." + series_name = pd.NA + series_id = pd.NA + table.add_row( + xval=xval, + yval=yval, + yerr=yerr, + series_name=series_name, + series_id=series_id, + category=category, + shots=datum.get("shots", pd.NA), + analysis=self.name, ) - processed_values = self.options.data_processor(to_process) - source["yval"] = unp.nominal_values(processed_values).flatten() - with np.errstate(invalid="ignore"): - # For averaged data, the processed std dev will be NaN. - # Setting std_devs to NaN will trigger floating point exceptions - # which we can ignore. See https://stackoverflow.com/q/75656026 - source["yerr"] = unp.std_devs(processed_values).flatten() - source["category"] = category - - table = ScatterTable(data=source) - - # Replace temporary -1 value with nullable integer - table["class_id"] = table["class_id"].replace(-1, pd.NA) - table["shots"] = table["shots"].replace(-1, pd.NA) - return table def _format_data( @@ -265,39 +246,31 @@ def _format_data( "iwv": inverse_weighted_variance, "sample": sample_average, } - - columns = list(curve_data.columns) - sort_by = itemgetter( - columns.index("class_id"), - columns.index("xval"), - ) - # Use python native groupby method on ndarray. This is more performant than pandas one. average = averaging_methods[self.options.average_method] model_names = self.model_names() - formatted = [] - for (_, xv), g in groupby(sorted(curve_data.values, key=sort_by), key=sort_by): - g_values = np.array(list(g)) - g_dict = dict(zip(columns, g_values.T)) - avg_yval, avg_yerr, shots = average(g_dict["yval"], g_dict["yerr"], g_dict["shots"]) - data_name = g_dict["name"][0] + + for (series_name, xval), sub_data in curve_data.iter_groups("series_name", "xval"): + avg_yval, avg_yerr, shots = average( + sub_data.y, + sub_data.y_err, + sub_data.shots, + ) try: - # Map data index to model index through assigned name. - # Data name should match with the model name. - # Otherwise, the model index is unclassified. - model_id = model_names.index(data_name) + series_id = model_names.index(series_name) except ValueError: - model_id = pd.NA - averaged = dict.fromkeys(columns) - averaged["category"] = category - averaged["xval"] = xv - averaged["yval"] = avg_yval - averaged["yerr"] = avg_yerr - averaged["name"] = data_name - averaged["class_id"] = model_id - averaged["shots"] = shots - formatted.append(list(averaged.values())) - - return curve_data.append_list_values(formatted) + series_id = pd.NA + curve_data.add_row( + xval=xval, + yval=avg_yval, + yerr=avg_yerr, + series_name=series_name, + series_id=series_id, + category=category, + shots=shots, + analysis=self.name, + ) + + return curve_data def _generate_fit_guesses( self, @@ -365,13 +338,13 @@ def _run_curve_fit( # Create convenient function to compute residual of the models. partial_residuals = [] - valid_uncertainty = np.all(np.isfinite(curve_data.yerr.to_numpy())) - for i, sub_data in list(curve_data.groupby("class_id")): + valid_uncertainty = np.all(np.isfinite(curve_data.y_err)) + for idx, sub_data in curve_data.iter_by_series_id(): if valid_uncertainty: nonzero_yerr = np.where( - np.isclose(sub_data.yerr, 0.0), + np.isclose(sub_data.y_err, 0.0), np.finfo(float).eps, - sub_data.yerr, + sub_data.y_err, ) raw_weights = 1 / nonzero_yerr # Remove outlier. When all sample values are the same with sample average, @@ -383,10 +356,10 @@ def _run_curve_fit( else: weights = None model_residual = partial( - self._models[i]._residual, - data=sub_data.yval.to_numpy(), + self._models[idx]._residual, + data=sub_data.y, weights=weights, - x=sub_data.xval.to_numpy(), + x=sub_data.x, ) partial_residuals.append(model_residual) @@ -428,8 +401,8 @@ def _run_curve_fit( return convert_lmfit_result( res, self._models, - curve_data.xval.to_numpy(), - curve_data.yval.to_numpy(), + curve_data.x, + curve_data.y, ) def _create_figures( @@ -444,36 +417,37 @@ def _create_figures( Returns: A list of figures. """ - for name, data in list(curve_data.groupby("name")): + for series_id, sub_data in curve_data.iter_by_series_id(): + model_name = self.model_names()[series_id] # Plot raw data scatters if self.options.plot_raw_data: - raw_data = data[data.category == "raw"] + raw_data = sub_data.filter(category="raw") self.plotter.set_series_data( - series_name=name, - x=raw_data.xval.to_numpy(), - y=raw_data.yval.to_numpy(), + series_name=model_name, + x=raw_data.x, + y=raw_data.y, ) # Plot formatted data scatters - formatted_data = data[data.category == self.options.fit_category] + formatted_data = sub_data.filter(category=self.options.fit_category) self.plotter.set_series_data( - series_name=name, - x_formatted=formatted_data.xval.to_numpy(), - y_formatted=formatted_data.yval.to_numpy(), - y_formatted_err=formatted_data.yerr.to_numpy(), + series_name=model_name, + x_formatted=formatted_data.x, + y_formatted=formatted_data.y, + y_formatted_err=formatted_data.y_err, ) # Plot fit lines - line_data = data[data.category == "fitted"] + line_data = sub_data.filter(category="fitted") if len(line_data) == 0: continue self.plotter.set_series_data( - series_name=name, - x_interp=line_data.xval.to_numpy(), - y_interp=line_data.yval.to_numpy(), + series_name=model_name, + x_interp=line_data.x, + y_interp=line_data.y, ) - fit_stdev = line_data.yerr.to_numpy() + fit_stdev = line_data.y_err if np.isfinite(fit_stdev).all(): self.plotter.set_series_data( - series_name=name, + series_name=model_name, y_interp_err=fit_stdev, ) @@ -499,7 +473,7 @@ def _run_analysis( self._initialize(experiment_data) table = self._format_data(self._run_data_processing(experiment_data.data())) - formatted_subset = table[table.category == self.options.fit_category] + formatted_subset = table.filter(category=self.options.fit_category) fit_data = self._run_curve_fit(formatted_subset) if fit_data.success: @@ -524,32 +498,35 @@ def _run_analysis( if fit_data.success: # Add fit data to curve data table - fit_curves = [] - columns = list(table.columns) model_names = self.model_names() - for i, sub_data in list(formatted_subset.groupby("class_id")): - xval = sub_data.xval.to_numpy() + for series_id, sub_data in formatted_subset.iter_by_series_id(): + xval = sub_data.x if len(xval) == 0: # If data is empty, skip drawing this model. # This is the case when fit model exist but no data to fit is provided. continue # Compute X, Y values with fit parameters. - xval_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float) - yval_fit = eval_with_uncertainties( - x=xval_fit, - model=self._models[i], + xval_arr_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float) + uval_arr_fit = eval_with_uncertainties( + x=xval_arr_fit, + model=self._models[series_id], params=fit_data.ufloat_params, ) - model_fit = np.full((100, len(columns)), None, dtype=object) - fit_curves.append(model_fit) - model_fit[:, columns.index("xval")] = xval_fit - model_fit[:, columns.index("yval")] = unp.nominal_values(yval_fit) + yval_arr_fit = unp.nominal_values(uval_arr_fit) if fit_data.covar is not None: - model_fit[:, columns.index("yerr")] = unp.std_devs(yval_fit) - model_fit[:, columns.index("name")] = model_names[i] - model_fit[:, columns.index("class_id")] = i - model_fit[:, columns.index("category")] = "fitted" - table = table.append_list_values(other=np.vstack(fit_curves)) + yerr_arr_fit = unp.std_devs(uval_arr_fit) + else: + yerr_arr_fit = np.zeros_like(xval_arr_fit) + for xval, yval, yerr in zip(xval_arr_fit, yval_arr_fit, yerr_arr_fit): + table.add_row( + xval=xval, + yval=yval, + yerr=yerr, + series_name=model_names[series_id], + series_id=series_id, + category="fitted", + analysis=self.name, + ) analysis_results.extend( self._create_analysis_results( fit_data=fit_data, diff --git a/qiskit_experiments/curve_analysis/scatter_table.py b/qiskit_experiments/curve_analysis/scatter_table.py index 4361274b9e..bbb5f91bbd 100644 --- a/qiskit_experiments/curve_analysis/scatter_table.py +++ b/qiskit_experiments/curve_analysis/scatter_table.py @@ -9,185 +9,462 @@ # Any modifications or derivative works of this code must retain this # copyright notice, and modified files need to carry a notice indicating # that they have been altered from the originals. + """Table representation of the x, y data for curve fitting.""" +from __future__ import annotations import logging -from typing import List, Sequence, Dict, Any, Union +import warnings +from collections.abc import Iterator +from typing import Any +from functools import reduce +from itertools import product import numpy as np import pandas as pd from qiskit.utils import deprecate_func -from qiskit_experiments.framework.table_mixin import DefaultColumnsMixIn - LOG = logging.getLogger(__name__) -class ScatterTable(pd.DataFrame, DefaultColumnsMixIn): - """A table to store x and y data with metadata associated with the data point. +class ScatterTable: + """A table-like dataset for the intermediate data used for curve fitting. - This class is implemented upon the pandas dataframe. - See `pandas dataframe documentation `_ - for the base class API documentation. + Default table columns are defined in the class attribute :attr:`.COLUMNS`. + This table cannot be expanded with user-provided column names. - A single ``ScatterTable`` object can contain different kind of intermediate data - generated through the curve fitting, which are classified by the fit model. - When an experiment has sub-data for ``sub_exp_1``, the formatted x, y, and y-error - array data may be obtained from the original table object as follows: + In a standard :class:`.CurveAnalysis` subclass, a ScatterTable instance may be + stored in the :class:`.ExperimentData` as an artifact. + Users can retrieve the table data at a later time to rerun a fitting with a homemade program + or with different fit options, or to visualize the curves in a preferred format. + This table dataset is designed to seamlessly provide such information + that an experimentalist may want to reuse for a custom workflow. - .. code-block::python + .. note:: - abc_data = table[ - (table.name == "sub_exp_1") & (table.category == "formatted") - ] - x, y, e = abc_data.xval.to_numpy(), abc_data.yval.to_numpy(), abc_data.yerr.to_numpy() + This dataset is not thread safe. Do not use the same instance in multiple threads. + + See the tutorial of :ref:`data_management_with_scatter_table` for the + role of each table column and how values are typically provided. """ - # TODO Add this to toctree. In current mechanism all pandas DataFrame members are rendered - # and it fails in the Sphinx build process. We may need a custom directive to - # exclude class members from an external package. + COLUMNS = [ + "xval", + "yval", + "yerr", + "series_name", + "series_id", + "category", + "shots", + "analysis", + ] - @classmethod - def _default_columns(cls) -> List[str]: - return [ - "xval", - "yval", - "yerr", - "name", - "class_id", - "category", - "shots", - ] + DTYPES = [ + "Float64", + "Float64", + "Float64", + "string", + "Int64", + "string", + "Int64", + "string", + ] - @deprecate_func( - since="0.6", - additional_msg="Curve data uses dataframe representation. Use dataframe filtering method.", - pending=True, - package_name="qiskit-experiments", - ) - def get_subset_of(self, index: Union[str, int]) -> "ScatterTable": - """Filter data by series name or index. + def __init__(self): + self._lazy_add_rows = [] + self._dump = pd.DataFrame(columns=self.COLUMNS) + + @classmethod + def from_dataframe( + cls, + data: pd.DataFrame, + ) -> "ScatterTable": + """Create new dataset with existing dataframe. Args: - index: Series index of name. + data: Data dataframe object. Returns: - A subset of data corresponding to a particular series. + A new ScatterTable instance. """ - if isinstance(index, int): - index = self.labels[index] - return self[self.name == index] + if list(data.columns) != cls.COLUMNS: + raise ValueError("Input dataframe columns don't match with the ScatterTable spec.") + format_data = cls._format_table(data) + return cls._create_new_instance(format_data) + + @classmethod + def _create_new_instance( + cls, + data: pd.DataFrame, + ) -> "ScatterTable": + # A shortcut for creating instance. + # This bypasses data formatting and column compatibility check. + # User who calls this method must guarantee the quality of the input data. + instance = object.__new__(cls) + instance._lazy_add_rows = [] + instance._dump = data + return instance + + @property + def dataframe(self): + """Dataframe object of data points.""" + if self._lazy_add_rows: + # Add data when table element is called. + # Adding rows in loop is extremely slow in pandas. + tmp_df = pd.DataFrame(self._lazy_add_rows, columns=self.COLUMNS) + tmp_df = self._format_table(tmp_df) + if len(self._dump) == 0: + self._dump = tmp_df + else: + self._dump = pd.concat([self._dump, tmp_df], ignore_index=True) + self._lazy_add_rows.clear() + return self._dump @property - @deprecate_func( - since="0.6", - additional_msg="Curve data uses dataframe representation. Call .xval.to_numpy() instead.", - pending=True, - package_name="qiskit-experiments", - is_property=True, - ) def x(self) -> np.ndarray: """X values.""" - return self.xval.to_numpy() + # For backward compatibility with CurveData.x + return self.dataframe.xval.to_numpy(dtype=float, na_value=np.nan) + + @x.setter + def x(self, new_values): + self.dataframe.loc[:, "xval"] = new_values + + def xvals( + self, + series: int | str | None = None, + category: str | None = None, + analysis: str | None = None, + check_unique: bool = True, + ) -> np.ndarray: + """Get subset of X values. + + A convenient shortcut for getting X data with filtering. + + Args: + series: Identifier of the data series, either integer series index or name. + category: Name of data category. + analysis: Name of analysis. + check_unique: Set True to check if multiple series are contained. + When multiple series are contained, it raises a user warning. + + Returns: + Numpy array of X values. + """ + sub_table = self.filter(series, category, analysis) + if check_unique: + sub_table._warn_composite_data() + return sub_table.x @property - @deprecate_func( - since="0.6", - additional_msg="Curve data uses dataframe representation. Call .yval.to_numpy() instead.", - pending=True, - package_name="qiskit-experiments", - is_property=True, - ) def y(self) -> np.ndarray: """Y values.""" - return self.yval.to_numpy() + # For backward compatibility with CurveData.y + return self.dataframe.yval.to_numpy(dtype=float, na_value=np.nan) + + @y.setter + def y(self, new_values: np.ndarray): + self.dataframe.loc[:, "yval"] = new_values + + def yvals( + self, + series: int | str | None = None, + category: str | None = None, + analysis: str | None = None, + check_unique: bool = True, + ) -> np.ndarray: + """Get subset of Y values. + + A convenient shortcut for getting Y data with filtering. + + Args: + series: Identifier of the data series, either integer series index or name. + category: Name of data category. + analysis: Name of analysis. + check_unique: Set True to check if multiple series are contained. + When multiple series are contained, it raises a user warning. + + Returns: + Numpy array of Y values. + """ + sub_table = self.filter(series, category, analysis) + if check_unique: + sub_table._warn_composite_data() + return sub_table.y @property - @deprecate_func( - since="0.6", - additional_msg="Curve data uses dataframe representation. Call .yerr.to_numpy() instead.", - pending=True, - package_name="qiskit-experiments", - is_property=True, - ) def y_err(self) -> np.ndarray: - """Standard deviation of y values.""" - return self.yerr.to_numpy() + """Standard deviation of Y values.""" + # For backward compatibility with CurveData.y_err + return self.dataframe.yerr.to_numpy(dtype=float, na_value=np.nan) + + @y_err.setter + def y_err(self, new_values: np.ndarray): + self.dataframe.loc[:, "yerr"] = new_values + + def yerrs( + self, + series: int | str | None = None, + category: str | None = None, + analysis: str | None = None, + check_unique: bool = True, + ) -> np.ndarray: + """Get subset of standard deviation of Y values. + + A convenient shortcut for getting Y error data with filtering. + + Args: + series: Identifier of the data series, either integer series index or name. + category: Name of data category. + analysis: Name of analysis. + check_unique: Set True to check if multiple series are contained. + When multiple series are contained, it raises a user warning. + + Returns: + Numpy array of Y error values. + """ + sub_table = self.filter(series, category, analysis) + if check_unique: + sub_table._warn_composite_data() + return sub_table.y_err @property - @deprecate_func( - since="0.6", - additional_msg="Curve data uses dataframe representation. Call .shots.to_numpy() instead.", - pending=True, - package_name="qiskit-experiments", - is_property=True, - ) - def shots(self): - """Shot number of data points.""" - return self["shots"].to_numpy() + def series_name(self) -> np.ndarray: + """Corresponding data name for each data point.""" + return self.dataframe.series_name.to_numpy(dtype=object, na_value=None) + + @series_name.setter + def series_name(self, new_values: np.ndarray): + self.dataframe.loc[:, "series_name"] = new_values + + @property + def series_id(self) -> np.ndarray: + """Corresponding data UID for each data point.""" + return self.dataframe.series_id.to_numpy(dtype=object, na_value=None) + + @series_id.setter + def series_id(self, new_values: np.ndarray): + self.dataframe.loc[:, "series_id"] = new_values + + @property + def category(self) -> np.ndarray: + """Array of categories of the data points.""" + return self.dataframe.category.to_numpy(dtype=object, na_value=None) + + @category.setter + def category(self, new_values: np.ndarray): + self.dataframe.loc[:, "category"] = new_values + + @property + def shots(self) -> np.ndarray: + """Shot number used to acquire each data point.""" + return self.dataframe.shots.to_numpy(dtype=object, na_value=np.nan) + + @shots.setter + def shots(self, new_values: np.ndarray): + self.dataframe.loc[:, "shots"] = new_values + + @property + def analysis(self) -> np.ndarray: + """Corresponding analysis name for each data point.""" + return self.dataframe.analysis.to_numpy(dtype=object, na_value=None) + + @analysis.setter + def analysis(self, new_values: np.ndarray): + self.dataframe.loc[:, "analysis"] = new_values + + def filter( + self, + series: int | str | None = None, + category: str | None = None, + analysis: str | None = None, + ) -> ScatterTable: + """Filter data by series, category, and/or analysis name. + + Args: + series: Identifier of the data series, either integer series index or name. + category: Name of data category. + analysis: Name of analysis. + + Returns: + New ScatterTable object with filtered data. + """ + filt_data = self.dataframe + + if series is not None: + if isinstance(series, int): + index = filt_data.series_id == series + elif isinstance(series, str): + index = filt_data.series_name == series + else: + raise ValueError( + f"Invalid series identifier {series}. This must be integer or string." + ) + filt_data = filt_data.loc[index, :] + if category is not None: + index = filt_data.category == category + filt_data = filt_data.loc[index, :] + if analysis is not None: + index = filt_data.analysis == analysis + filt_data = filt_data.loc[index, :] + return ScatterTable._create_new_instance(filt_data) + + def iter_by_series_id(self) -> Iterator[tuple[int, "ScatterTable"]]: + """Iterate over subset of data sorted by the data series index. + + Yields: + Tuple of data series index and subset of ScatterTable. + """ + id_values = self.dataframe.series_id + for did in id_values.dropna().sort_values().unique(): + yield did, ScatterTable._create_new_instance(self.dataframe.loc[id_values == did, :]) + + def iter_groups( + self, + *group_by: str, + ) -> Iterator[tuple[tuple[Any, ...], "ScatterTable"]]: + """Iterate over the subset sorted by multiple column values. + + Args: + group_by: Names of columns to group by. + + Yields: + Tuple of values for the grouped columns and the corresponding subset of the scatter table. + """ + out = self.dataframe + try: + values_iter = product(*[out.get(col).unique() for col in group_by]) + except AttributeError as ex: + raise ValueError( + f"Specified columns don't exist: {group_by} is not a subset of {self.COLUMNS}." + ) from ex + + for values in sorted(values_iter): + each_matched = [out.get(c) == v for c, v in zip(group_by, values)] + all_matched = reduce(lambda x, y: x & y, each_matched) + if not any(all_matched): + continue + yield values, ScatterTable._create_new_instance(out.loc[all_matched, :]) + + def add_row( + self, + xval: float | pd.NA = pd.NA, + yval: float | pd.NA = pd.NA, + yerr: float | pd.NA = pd.NA, + series_name: str | pd.NA = pd.NA, + series_id: int | pd.NA = pd.NA, + category: str | pd.NA = pd.NA, + shots: float | pd.NA = pd.NA, + analysis: str | pd.NA = pd.NA, + ): + """Add new data point to the table. + + Data must be the same length. + + Args: + xval: X value. + yval: Y value. + yerr: Standard deviation of y value. + series_name: Name of this data series if available. + series_id: Index of this data series if available. + category: Data category if available. + shots: Shot number used to acquire this data point. + analysis: Analysis name if available. + """ + self._lazy_add_rows.append( + [xval, yval, yerr, series_name, series_id, category, shots, analysis] + ) + + @classmethod + def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame: + return ( + data.replace(np.nan, pd.NA) + .astype(dict(zip(cls.COLUMNS, cls.DTYPES))) + .reset_index(drop=True) + ) + + def _warn_composite_data(self): + if len(self.dataframe.series_name.unique()) > 1: + warnings.warn( + "Table data contains multiple data series. " + "You may want to filter the data by a specific series_id or series_name.", + UserWarning, + ) + if len(self.dataframe.category.unique()) > 1: + warnings.warn( + "Table data contains multiple categories. " + "You may want to filter the data by a specific category name.", + UserWarning, + ) + if len(self.dataframe.analysis.unique()) > 1: + warnings.warn( + "Table data contains multiple datasets from different component analyses. " + "You may want to filter the data by a specific analysis name.", + UserWarning, + ) @property @deprecate_func( since="0.6", - additional_msg="Curve data uses dataframe representation. Call .model_id.to_numpy() instead.", + additional_msg="Curve data uses dataframe representation. Call .series_id instead.", pending=True, package_name="qiskit-experiments", is_property=True, ) def data_allocation(self) -> np.ndarray: """Index of corresponding fit model.""" - # pylint: disable=no-member - return self.class_id.to_numpy() + return self.series_id @property @deprecate_func( since="0.6", - additional_msg="Curve data uses dataframe representation. Labels are a part of table.", + additional_msg="No alternative is provided. Use .series_name with set operation.", pending=True, package_name="qiskit-experiments", is_property=True, ) - def labels(self) -> List[str]: + def labels(self) -> list[str]: """List of model names.""" # Order sensitive - name_id_tups = self.groupby(["name", "class_id"]).groups.keys() + name_id_tups = self.dataframe.groupby(["series_name", "series_id"]).groups.keys() return [k[0] for k in sorted(name_id_tups, key=lambda k: k[1])] - def append_list_values( - self, - other: Sequence, - ) -> "ScatterTable": - """Add another list of dataframe values to this dataframe. + @deprecate_func( + since="0.6", + additional_msg="Use filter method instead.", + pending=True, + package_name="qiskit-experiments", + ) + def get_subset_of(self, index: str | int) -> "ScatterTable": + """Filter data by series name or index. Args: - other: List of dataframe values to be added. + index: Series index of name. Returns: - New scatter table instance including both self and added data. + A subset of data corresponding to a particular series. """ - return ScatterTable(data=[*self.values, *other], columns=self.columns) + return self.filter(series=index) + + def __len__(self): + """Return the number of data points stored in the table.""" + return len(self.dataframe) + + def __eq__(self, other): + return self.dataframe.equals(other.dataframe) - def __json_encode__(self) -> Dict[str, Any]: + def __json_encode__(self) -> dict[str, Any]: return { "class": "ScatterTable", - "data": self.to_dict(orient="index"), + "data": self.dataframe.to_dict(orient="index"), } @classmethod - def __json_decode__(cls, value: Dict[str, Any]) -> "ScatterTable": + def __json_decode__(cls, value: dict[str, Any]) -> "ScatterTable": if not value.get("class", None) == "ScatterTable": raise ValueError("JSON decoded value for ScatterTable is not valid class type.") - - instance = cls.from_dict( - data=value.get("data", {}), - orient="index", - ).replace({np.nan: None}) - return instance - - @property - def _constructor(self): - # https://pandas.pydata.org/pandas-docs/stable/development/extending.html - return ScatterTable + tmp_df = pd.DataFrame.from_dict(value.get("data", {}), orient="index") + return ScatterTable.from_dataframe(tmp_df) diff --git a/qiskit_experiments/curve_analysis/standard_analysis/bloch_trajectory.py b/qiskit_experiments/curve_analysis/standard_analysis/bloch_trajectory.py index a155eebe58..5219ff662f 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/bloch_trajectory.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/bloch_trajectory.py @@ -170,9 +170,9 @@ def _generate_fit_guesses( user_opt.bounds.set_if_empty(t_off=(0, np.inf), b=(-1, 1)) user_opt.p0.set_if_empty(b=1e-9) - x_data = curve_data.get_subset_of("x") - y_data = curve_data.get_subset_of("y") - z_data = curve_data.get_subset_of("z") + x_data = curve_data.filter(series="x") + y_data = curve_data.filter(series="y") + z_data = curve_data.filter(series="z") omega_xyz = [] for data in (x_data, y_data, z_data): diff --git a/qiskit_experiments/framework/analysis_result_table.py b/qiskit_experiments/framework/analysis_result_table.py index 16b8716874..2c84a08e2a 100644 --- a/qiskit_experiments/framework/analysis_result_table.py +++ b/qiskit_experiments/framework/analysis_result_table.py @@ -10,256 +10,312 @@ # copyright notice, and modified files need to carry a notice indicating # that they have been altered from the originals. -"""Table representation of analysis results.""" +"""A table-like dataset for analysis results.""" +from __future__ import annotations -import logging -import threading import re +import threading import uuid import warnings -from typing import List, Dict, Union, Optional, Any +from typing import Any import numpy as np import pandas as pd -from qiskit_experiments.database_service.utils import ThreadSafeContainer - -from .table_mixin import DefaultColumnsMixIn - -LOG = logging.getLogger(__name__) - - -class AnalysisResultContainer(pd.DataFrame, DefaultColumnsMixIn): - """Data container of the thread-safe analysis result table.""" - - @classmethod - def _default_columns(cls) -> List[str]: - return [ - "name", - "experiment", - "components", - "value", - "quality", - "experiment_id", - "result_id", - "tags", - "backend", - "run_time", - "created_time", - ] +from qiskit_experiments.database_service.exceptions import ExperimentEntryNotFound - @property - def _constructor(self): - # https://pandas.pydata.org/pandas-docs/stable/development/extending.html - return AnalysisResultContainer +class AnalysisResultTable: + """A table-like dataset for analysis results. -class AnalysisResultTable(ThreadSafeContainer): - """A thread-safe table form container of analysis results. + Default table columns are defined in the class attribute :attr:`.DEFAULT_COLUMNS`. + The table is automatically expanded when an extra key is included in the + input dictionary data. Missing columns in the input data are filled with a null value. - This table is a dataframe wrapper with the thread-safe mechanism with predefined columns. - This object is attached to the :class:`.ExperimentData` container to store - analysis results. Each table row contains series of metadata in addition to the - result value itself. + Table row index (i.e. entry ID) is created by truncating the result_id string which + is basically a UUID-4 string. A random unique ID is generated when the result_id + is missing in the input data. - User can rely on the dataframe filtering mechanism to analyze large scale experiment - results, e.g. massive parallel experiment and batch experiment outcomes, efficiently. - See `pandas dataframe documentation `_ - for more details. + Any operation on the table value via the instance methods guarantees thread safety. """ VALID_ID_REGEX = re.compile(r"\A(?P\w{8})-\w{4}-\w{4}-\w{4}-\w{12}\Z") - def _init_container(self, init_values: Any): - if init_values is None: - return AnalysisResultContainer() - return init_values - - def result_ids(self) -> List[str]: - """Return all result IDs in this table.""" - with self._lock: - return self._container["result_id"].to_list() + DEFAULT_COLUMNS = [ + "name", + "experiment", + "components", + "value", + "quality", + "experiment_id", + "result_id", + "tags", + "backend", + "run_time", + "created_time", + ] + + def __init__(self): + """Create new dataset.""" + self._data = pd.DataFrame(columns=self.DEFAULT_COLUMNS) + self._lock = threading.RLock() - def filter_columns(self, columns: Union[str, List[str]]) -> List[str]: - """Filter columns names available in this table. + @classmethod + def from_dataframe(cls, data: pd.DataFrame) -> "AnalysisResultTable": + """Create new dataset with existing dataframe. Args: - columns: Specifying a set of columns to return. You can pass a list of each - column name to return, otherwise builtin column groups are available: - - * ``all``: Return all columns, including metadata to communicate - with experiment service, such as entry IDs. - * ``default``: Return columns including analysis result with supplementary - information about experiment. - * ``minimal``: Return only analysis subroutine returns. - + data: Bare dataframe object. - Raises: - ValueError: When column is given in string which doesn't match with any builtin group. + Returns: + A new AnalysisResults instance. """ - with self._lock: - if columns == "all": - return self._container.columns - if columns == "default": - return [ - "name", - "experiment", - "components", - "value", - "quality", - "backend", - "run_time", - ] + self._container.extra_columns() - if columns == "minimal": - return [ - "name", - "components", - "value", - "quality", - ] + self._container.extra_columns() - if not isinstance(columns, str): - out = [] - for column in columns: - if column in self._container.columns: - out.append(column) - else: - warnings.warn( - f"Specified column name {column} does not exist in this table.", - UserWarning, - ) - return out - raise ValueError( - f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'." - ) - - def get_entry( - self, - index: str, - ) -> pd.Series: - """Get entry from the dataframe. - - Args: - index: Name of entry to acquire. + instance = AnalysisResultTable() + instance._data = pd.concat([instance._data, data]) + return instance - Returns: - Pandas Series of acquired entry. This doesn't mutate the table. + @property + def dataframe(self) -> pd.DataFrame: + """Dataframe object of analysis results.""" + with self._lock: + return self._data.copy(deep=False) - Raises: - ValueError: When index is not in this table. - """ + @property + def result_ids(self) -> list[str]: + """Result IDs in current dataset.""" with self._lock: - if index not in self._container.index: - raise ValueError(f"Table index {index} doesn't exist in this table.") + return list(self._data.result_id) - return self._container.loc[index] + @property + def columns(self) -> list[str]: + """All columns in current dataset.""" + with self._lock: + return list(self._data.columns) - # pylint: disable=arguments-renamed - def add_entry( + def add_data( self, - result_id: Optional[str] = None, - **kwargs, - ) -> pd.Series: - """Add new entry to the table. + *, + result_id: str | None = None, + **data, + ) -> str: + """Add new data to this dataset. Args: - result_id: Result ID. Automatically generated when not provided. - This must be valid hexadecimal UUID string. - kwargs: Description of new entry to register. + result_id: A unique UUID-4 string for this data entry. + The full string is used to identify the data in the experiment service database, + and a short ID is created by truncating this string as a dataframe index. + data: Arbitrary key-value pairs representing a single data entry. + Missing values for default columns are filled with ``None``. Returns: - Pandas Series of added entry. This doesn't mutate the table. - - Raises: - ValueError: When the truncated result id causes a collision in the table. + Assigned analysis result ID. """ - if not result_id: - result_id = self._unique_table_index() + result_id = result_id or self._create_unique_hash() - matched = self.VALID_ID_REGEX.match(result_id) - if matched is None: + if matched := re.match(self.VALID_ID_REGEX, result_id): + # Short unique index is generated from result id. + # Showing full result id unnecessary occupies horizontal space of the html table. + # This mechanism is inspired by the github commit hash. + index = matched.group("short_id") + else: warnings.warn( - f"The result ID {result_id} is not a valid result ID string. " - "This entry might fail in saving with the experiment service.", + f"Result ID of {result_id} is not a valid UUID-4 string. ", UserWarning, ) - short_id = result_id[:8] - else: - # Short unique index is generated from result id. - # Showing full result id unnecessary occupies horizontal space of the html table. - # This mechanism is similar with the github commit hash. - short_id = matched.group("short_id") + index = result_id[:8] with self._lock: - if short_id in self._container.index: + if index in self._data.index: raise ValueError( - f"The short ID of the result_id '{short_id}' already exists in the " - "experiment data. Please use another ID to avoid index collision." + f"Table entry index {index} already exists. " + "Please use another ID to avoid index collision." ) - return self._container.add_entry( - index=short_id, - result_id=result_id, - **kwargs, - ) - - def drop_entry( + # Add missing columns to the table + if missing := data.keys() - set(self._data.columns): + for k in data: + # Order sensitive + if k in missing: + loc = len(self._data.columns) + self._data.insert(loc, k, value=None) + + # A hack to avoid unwanted dtype update. Appending new row with .loc indexer + # performs enlargement and implicitly changes dtype. This often induces a confusion of + # NaN (numeric container) and None (object container) for missing values. + # Filling a row with None values before assigning actual values can keep column dtype, + # but this behavior might change in future pandas version. + # https://github.com/pandas-dev/pandas/issues/6485 + # Also see test.framework.test_data_table.TestBaseTable.test_type_* + self._data.loc[index, :] = [None] * len(self._data.columns) + template = dict.fromkeys(self.columns, None) + template["result_id"] = result_id + template.update(data) + self._data.loc[index, :] = pd.array(list(template.values()), dtype=object) + + return index + + def get_data( self, - index: str, - ): - """Drop specified labels from rows. + key: str | int | slice | None = None, + columns: str | list[str] = "default", + ) -> pd.DataFrame: + """Get matched entries from this dataset. - This directly calls :meth:`.drop` of the DataFrame container object. + Args: + key: Identifier of the entry of interest. + columns: List of names or a policy (default, minimal, all) + of data columns included in the returned data frame. + + Returns: + Matched entries in a single data frame or series. + """ + if key is None: + with self._lock: + out = self._data.copy() + else: + uids = self._resolve_key(key) + with self._lock: + out = self._data.filter(items=uids, axis=0) + if columns != "all": + valid_columns = self._resolve_columns(columns) + out = out[valid_columns] + return out + + def del_data( + self, + key: str | int, + ) -> list[str]: + """Delete matched entries from this dataset. Args: - index: Name of entry to drop. + key: Identifier of the entry of interest. - Raises: - ValueError: When index is not in this table. + Returns: + Deleted analysis result IDs. """ + uids = self._resolve_key(key) with self._lock: - if index not in self._container.index: - raise ValueError(f"Table index {index} doesn't exist in this table.") - self._container.drop(index, inplace=True) + self._data.drop(uids, inplace=True) + + return uids def clear(self): - """Remove all elements from this container.""" + """Clear all table entries.""" with self._lock: - self._container = AnalysisResultContainer() + self._data = pd.DataFrame(columns=self.DEFAULT_COLUMNS) + + def copy(self): + """Create new thread-safe instance with the same data. - def _unique_table_index(self): - """Generate unique UUID which is unique in the table with first 8 characters.""" + .. note:: + This returns a new object with shallow copied data frame. + """ + with self._lock: + # Hold the lock so that no data can be added + new_instance = self.__class__() + new_instance._data = self._data.copy(deep=False) + return new_instance + + def _create_unique_hash(self) -> str: with self._lock: n = 0 while n < 1000: tmp_id = str(uuid.uuid4()) - if tmp_id[:8] not in self._container.index: + if tmp_id[:8] not in self._data.index: return tmp_id raise RuntimeError( "Unique result_id string cannot be prepared for this table within 1000 trials. " "Reduce number of entries, or manually provide a unique result_id." ) - def _repr_html_(self) -> Union[str, None]: - """Return HTML representation of this dataframe.""" + def _resolve_columns(self, columns: str | list[str]): with self._lock: - return self._container._repr_html_() + extra_columns = [c for c in self._data.columns if c not in self.DEFAULT_COLUMNS] + if columns == "default": + return [ + "name", + "experiment", + "components", + "value", + "quality", + "backend", + "run_time", + ] + extra_columns + if columns == "minimal": + return [ + "name", + "components", + "value", + "quality", + ] + extra_columns + if not isinstance(columns, str): + out = [] + for column in columns: + if column in self._data.columns: + out.append(column) + else: + warnings.warn( + f"Specified column {column} does not exist in this table.", + UserWarning, + ) + return out + raise ValueError( + f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'." + ) - def __json_encode__(self) -> Dict[str, Any]: + def _resolve_key(self, key: int | slice | str) -> list[str]: + with self._lock: + if isinstance(key, int): + if key >= len(self): + raise ExperimentEntryNotFound(f"Analysis result {key} not found.") + return [self._data.index[key]] + if isinstance(key, slice): + keys = list(self._data.index)[key] + if len(keys) == 0: + raise ExperimentEntryNotFound(f"Analysis result {key} not found.") + return keys + if isinstance(key, str): + if key in self._data.index: + return [key] + # This key is name of entry + loc = self._data["name"] == key + if not any(loc): + raise ExperimentEntryNotFound(f"Analysis result {key} not found.") + return list(self._data.index[loc]) + + raise TypeError(f"Invalid key type {type(key)}. The key must be either int, slice, or str.") + + def __len__(self): + return len(self._data) + + def __contains__(self, item): + return item in self._data.index + + def __json_encode__(self) -> dict[str, Any]: with self._lock: return { "class": "AnalysisResultTable", - "data": self._container.to_dict(orient="index"), + "data": self._data.to_dict(orient="index"), } @classmethod - def __json_decode__(cls, value: Dict[str, Any]) -> "AnalysisResultTable": + def __json_decode__(cls, value: dict[str, Any]) -> "AnalysisResultTable": if not value.get("class", None) == "AnalysisResultTable": raise ValueError("JSON decoded value for AnalysisResultTable is not valid class type.") instance = object.__new__(cls) instance._lock = threading.RLock() - instance._container = AnalysisResultContainer.from_dict( + instance._data = pd.DataFrame.from_dict( data=value.get("data", {}), orient="index", ).replace({np.nan: None}) return instance + + def __getstate__(self): + state = self.__dict__.copy() + del state["_lock"] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self._lock = threading.RLock() diff --git a/qiskit_experiments/framework/composite/composite_analysis.py b/qiskit_experiments/framework/composite/composite_analysis.py index 66f6b1642a..18c2c1f576 100644 --- a/qiskit_experiments/framework/composite/composite_analysis.py +++ b/qiskit_experiments/framework/composite/composite_analysis.py @@ -235,7 +235,10 @@ def _marginalized_component_data(self, composite_data: List[Dict]) -> List[List[ if index not in marginalized_data: # Initialize data list for marginalized marginalized_data[index] = [] - sub_data = {"metadata": metadata["composite_metadata"][i]} + sub_data = { + k: v for k, v in datum.items() if k not in ("metadata", "counts", "memory") + } + sub_data["metadata"] = metadata["composite_metadata"][i] if "counts" in datum: if composite_clbits is not None: sub_data["counts"] = marginal_distribution( diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index 9913ec24ec..a7b8ce9620 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -21,7 +21,7 @@ from datetime import datetime, timezone from concurrent import futures from threading import Event -from functools import wraps, singledispatch +from functools import wraps from collections import deque import contextlib import copy @@ -686,7 +686,7 @@ def hgp(self, new_hgp: str) -> None: def _clear_results(self): """Delete all currently stored analysis results and figures""" # Schedule existing analysis results for deletion next save call - self._deleted_analysis_results.extend(list(self._analysis_results.result_ids())) + self._deleted_analysis_results.extend(list(self._analysis_results.result_ids)) self._analysis_results.clear() # Schedule existing figures for deletion next save call for key in self._figures.keys(): @@ -1397,7 +1397,7 @@ def add_analysis_results( backend = extra_values.pop("backend", self.backend_name) run_time = extra_values.pop("run_time", self.running_time) created_time = extra_values.pop("created_time", None) - self._analysis_results.add_entry( + self._analysis_results.add_data( name=result.name, value=result.value, quality=result.quality, @@ -1419,7 +1419,7 @@ def add_analysis_results( tags = tags or [] backend = backend or self.backend_name - self._analysis_results.add_entry( + uid = self._analysis_results.add_data( result_id=result_id, name=name, value=value, @@ -1429,14 +1429,13 @@ def add_analysis_results( experiment_id=experiment_id, tags=tags or [], backend=backend, - run_time=run_time, # TODO add job RUNNING time + run_time=run_time, created_time=created_time, **extra_values, ) if self.auto_save: - last_index = self._analysis_results.result_ids()[-1][:8] service_result = _series_to_service_result( - series=self._analysis_results.get_entry(last_index), + series=self._analysis_results.get_data(uid, columns="all").iloc[0], service=self._service, auto_save=False, ) @@ -1446,39 +1445,28 @@ def add_analysis_results( def delete_analysis_result( self, result_key: Union[int, str], - ) -> str: + ) -> list[str]: """Delete the analysis result. Args: result_key: ID or index of the analysis result to be deleted. Returns: - Analysis result ID. + Deleted analysis result IDs. Raises: ExperimentEntryNotFound: If analysis result not found or multiple entries are found. """ - # Retrieve from DB if needed. - to_delete = self.analysis_results( - index=result_key, - block=False, - columns="all", - dataframe=True, - ) - if not isinstance(to_delete, pd.Series): - raise ExperimentEntryNotFound( - f"Multiple entries are found with result_key = {result_key}. " - "Try another key that can uniquely determine entry to delete." - ) + uids = self._analysis_results.del_data(result_key) - self._analysis_results.drop_entry(str(to_delete.name)) if self._service and self.auto_save: with service_exception_to_warning(): - self.service.delete_analysis_result(result_id=to_delete.result_id) + for uid in uids: + self.service.delete_analysis_result(result_id=uid) else: - self._deleted_analysis_results.append(to_delete.result_id) + self._deleted_analysis_results.extend(uids) - return to_delete.result_id + return uids def _retrieve_analysis_results(self, refresh: bool = False): """Retrieve service analysis results. @@ -1500,7 +1488,7 @@ def _retrieve_analysis_results(self, refresh: bool = False): extra = result.result_data["_extra"] if result.chisq is not None: extra["chisq"] = result.chisq - self._analysis_results.add_entry( + self._analysis_results.add_data( name=result.result_type, value=result.result_data["_value"], quality=cano_quality, @@ -1523,15 +1511,48 @@ def _retrieve_analysis_results(self, refresh: bool = False): ) def analysis_results( self, - index: Optional[Union[int, slice, str]] = None, + index: int | slice | str | None = None, refresh: bool = False, block: bool = True, - timeout: Optional[float] = None, - columns: Union[str, List[str]] = "default", + timeout: float | None = None, + columns: str | list[str] = "default", dataframe: bool = False, - ) -> Union[AnalysisResult, List[AnalysisResult], pd.DataFrame, pd.Series]: + ) -> AnalysisResult | list[AnalysisResult] | pd.DataFrame: """Return analysis results associated with this experiment. + When this method is called with ``dataframe=True`` you will receive + matched result entries with the ``index`` condition in the dataframe format. + You can access a certain entry value by specifying its row index by either + row number or short index string. For example, + + .. jupyter-input:: + + results = exp_data.analysis_results("res1", dataframe=True) + + print(results) + + .. jupyter-output:: + + name experiment components value quality backend run_time + 7dd286f4 res1 MyExp [Q0, Q1] 1 good test1 2024-02-06 13:46 + f62042a7 res1 MyExp [Q2, Q3] 2 good test1 2024-02-06 13:46 + + Getting the first result value with a row number (``iloc``). + + .. code-block:: python + + value = results.iloc[0].value + + Getting the first result value with a short index (``loc``). + + .. code-block:: python + + value = results.loc["7dd286f4"] + + See the pandas `DataFrame`_ documentation for the tips about data handling. + + .. _DataFrame: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html + Args: index: Index of the analysis result to be returned. Several types are accepted for convenience: @@ -1569,30 +1590,14 @@ def analysis_results( ) self._retrieve_analysis_results(refresh=refresh) - out = self._analysis_results.copy() - - if index is not None: - out = _filter_analysis_results(index, out) - if out is None: - msg = [f"Analysis result {index} not found."] - errors = self.errors() - if errors: - msg.append(f"Errors: {errors}") - raise ExperimentEntryNotFound("\n".join(msg)) - if dataframe: - valid_columns = self._analysis_results.filter_columns(columns) - out = out[valid_columns] - if len(out) == 1 and index is not None: - # For backward compatibility. - # One can directly access attributes with Series. e.g. out.value - return out.iloc[0] - return out + return self._analysis_results.get_data(index, columns=columns) # Convert back into List[AnalysisResult] which is payload for IBM experiment service. # This will be removed in future version. + tmp_df = self._analysis_results.get_data(index, columns="all") service_results = [] - for _, series in out.iterrows(): + for _, series in tmp_df.iterrows(): service_results.append( _series_to_service_result( series=series, @@ -1731,7 +1736,7 @@ def save( return analysis_results_to_create = [] - for _, series in self._analysis_results.copy().iterrows(): + for _, series in self._analysis_results.dataframe.iterrows(): # TODO We should support saving entire dataframe # Calling API per entry takes huge amount of time. legacy_result = _series_to_service_result( @@ -2343,7 +2348,7 @@ def copy(self, copy_results: bool = True) -> "ExperimentData": # Copy results and figures. # This requires analysis callbacks to finish self._wait_for_futures(self._analysis_futures.values(), name="analysis") - new_instance._analysis_results = self._analysis_results.copy_object() + new_instance._analysis_results = self._analysis_results.copy() with self._figures.lock: new_instance._figures = ThreadSafeOrderedDict() new_instance.add_figures(self._figures.values()) @@ -2730,68 +2735,3 @@ def _series_to_service_result( service_result.auto_save = auto_save return service_result - - -def _filter_analysis_results( - search_key: Union[int, slice, str], - data: pd.DataFrame, -) -> pd.DataFrame: - """Helper function to search result data for given key. - - Args: - search_key: Key to search for. - data: Full result dataframe. - - Returns: - Truncated dataframe. - """ - out = _search_data(search_key, data) - if isinstance(out, pd.Series): - return pd.DataFrame([out]) - return out - - -@singledispatch -def _search_data(search_key, data): - if search_key is None: - return data - raise TypeError( - f"Invalid search key {search_key}. " f"This must be either int, slice or str type." - ) - - -@_search_data.register -def _search_with_int( - search_key: int, - data: pd.DataFrame, -): - if search_key >= len(data): - return None - return data.iloc[search_key] - - -@_search_data.register -def _search_with_slice( - search_key: slice, - data: pd.DataFrame, -): - out = data[search_key] - if len(out) == 0: - return None - return out - - -@_search_data.register -def _search_with_str( - search_key: str, - data: pd.DataFrame, -): - if search_key in data.index: - # This key is table entry hash - return data.loc[search_key] - - # This key is name of entry - out = data[data["name"] == search_key] - if len(out) == 0: - return None - return out diff --git a/qiskit_experiments/framework/table_mixin.py b/qiskit_experiments/framework/table_mixin.py deleted file mode 100644 index fc59745199..0000000000 --- a/qiskit_experiments/framework/table_mixin.py +++ /dev/null @@ -1,109 +0,0 @@ -# This code is part of Qiskit. -# -# (C) Copyright IBM 2023. -# -# This code is licensed under the Apache License, Version 2.0. You may -# obtain a copy of this license in the LICENSE.txt file in the root directory -# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. -# -# Any modifications or derivative works of this code must retain this -# copyright notice, and modified files need to carry a notice indicating -# that they have been altered from the originals. - -# pylint: disable=no-member - -"""MinIn class for pandas dataframe.""" -from typing import List, Callable -from functools import wraps - -import pandas as pd - - -class DefaultColumnsMixIn: - """A mixin that sets a default data columns to a dataframe subclass. - - Subclass must define _default_columns class method to provide default column names. - This name list is order sensitive and the first element will show up at the - most left column of the dataframe table. - - .. note:: - - This Mix-in class is designed for use with the pandas DataFrame. - Implementation of this class may change in the future without notification - when we switch to other data container. - - """ - - _default_columns: Callable - - def __init_subclass__(cls, **kwargs): - # To make sure the mixin constructor is called. - super().__init_subclass__(**kwargs) - - @wraps(cls.__init__, assigned=("__annotations__",)) - def _call_init_and_set_columns(self, *init_args, **init_kwargs): - super(cls, self).__init__(*init_args, **init_kwargs) - if len(self.columns) == 0: - self.add_columns(*cls._default_columns()) - - # Monkey patch the mixed class constructor to make sure default columns are added - cls.__init__ = _call_init_and_set_columns - - def add_columns( - self: pd.DataFrame, - *new_columns: str, - ): - """Add new columns to the table. - - This operation mutates the current container. - - Args: - new_columns: Name of columns to add. - """ - # Order sensitive - new_columns = [c for c in new_columns if c not in self.columns] - if len(new_columns) == 0: - return - - # Update columns - for new_column in new_columns: - loc = len(self.columns) - self.insert(loc, new_column, value=None) - - def add_entry( - self: pd.DataFrame, - index: str, - **kwargs, - ): - """Add new entry to the dataframe. - - Args: - index: Name of this entry. Must be unique in this table. - kwargs: Description of new entry to register. - - Returns: - Pandas Series of added entry. This doesn't mutate the table. - """ - if not isinstance(index, str): - index = str(index) - if kwargs.keys() - set(self.columns): - self.add_columns(*kwargs.keys()) - - # A hack to avoid unwanted dtype update. Appending new row with .loc indexer - # performs enlargement and implicitly changes dtype. This often induces a confusion of - # NaN (numeric container) and None (object container) for missing values. - # Filling a row with None values before assigning actual values can keep column dtype, - # but this behavior might change in future pandas version. - # https://github.com/pandas-dev/pandas/issues/6485 - # Also see test.framework.test_data_table.TestBaseTable.test_type_* - self.loc[index] = [None] * len(self.columns) - - template = dict.fromkeys(self.columns, None) - template.update(kwargs) - self.loc[index] = pd.array(list(template.values()), dtype=object) - - def extra_columns( - self: pd.DataFrame, - ) -> List[str]: - """Return a list of columns added by a user.""" - return [c for c in self.columns if c not in self._default_columns()] diff --git a/qiskit_experiments/library/characterization/analysis/drag_analysis.py b/qiskit_experiments/library/characterization/analysis/drag_analysis.py index b6c9915a4f..351349ec08 100644 --- a/qiskit_experiments/library/characterization/analysis/drag_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/drag_analysis.py @@ -116,9 +116,9 @@ def _generate_fit_guesses( List of fit options that are passed to the fitter function. """ # Use the highest-frequency curve to estimate the oscillation frequency. - max_rep_model_name = self._models[-1]._name + max_rep_model_name = self.model_names()[-1] max_rep = self.options.data_subfit_map[max_rep_model_name]["nrep"] - curve_data = curve_data.get_subset_of(max_rep_model_name) + curve_data = curve_data.filter(series=max_rep_model_name) x_data = curve_data.x min_beta, max_beta = min(x_data), max(x_data) diff --git a/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py b/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py index 27a588a550..77c87ef17c 100644 --- a/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py @@ -113,8 +113,8 @@ def _generate_fit_guesses( Returns: List of fit options that are passed to the fitter function. """ - ramx_data = curve_data.get_subset_of("X") - ramy_data = curve_data.get_subset_of("Y") + ramx_data = curve_data.filter(series="X") + ramy_data = curve_data.filter(series="Y") # At very low frequency, y value of X (Y) curve stay at P=1.0 (0.5) for all x values. # Computing y peak-to-peak with combined data gives fake amplitude of 0.25. diff --git a/qiskit_experiments/library/characterization/analysis/t1_analysis.py b/qiskit_experiments/library/characterization/analysis/t1_analysis.py index 9ef0ed3bc3..f793da1108 100644 --- a/qiskit_experiments/library/characterization/analysis/t1_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/t1_analysis.py @@ -127,8 +127,8 @@ def _format_data( New scatter table instance including fit data. """ # check if the SVD decomposition categorized 0 as 1 by calculating the average slope - diff_y = np.diff(curve_data.yval) + diff_y = np.diff(curve_data.y) avg_slope = sum(diff_y) / len(diff_y) if avg_slope > 0: - curve_data.yval = 1 - curve_data.yval + curve_data.y = 1 - curve_data.y return super()._format_data(curve_data) diff --git a/qiskit_experiments/library/characterization/analysis/zz_ramsey_analysis.py b/qiskit_experiments/library/characterization/analysis/zz_ramsey_analysis.py index 5f5c9770bc..c657f21596 100644 --- a/qiskit_experiments/library/characterization/analysis/zz_ramsey_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/zz_ramsey_analysis.py @@ -142,8 +142,8 @@ def _generate_fit_guesses( y_ptp = y_max - y_min x_max = np.max(curve_data.x) - data_0 = curve_data.get_subset_of("0") - data_1 = curve_data.get_subset_of("1") + data_0 = curve_data.filter(series="0") + data_1 = curve_data.filter(series="1") def typical_step(arr): """Find the typical step size of an array""" diff --git a/qiskit_experiments/library/driven_freq_tuning/coefficients.py b/qiskit_experiments/library/driven_freq_tuning/coefficients.py index 89dd840ed2..c487f87f0a 100644 --- a/qiskit_experiments/library/driven_freq_tuning/coefficients.py +++ b/qiskit_experiments/library/driven_freq_tuning/coefficients.py @@ -181,6 +181,7 @@ def __eq__(self, other): self.neg_coef_o1 == other.neg_coef_o1, self.neg_coef_o2 == other.neg_coef_o2, self.neg_coef_o3 == other.neg_coef_o3, + self.offset == other.offset, ] ) diff --git a/qiskit_experiments/library/driven_freq_tuning/ramsey_amp_scan_analysis.py b/qiskit_experiments/library/driven_freq_tuning/ramsey_amp_scan_analysis.py index 9ced48b07a..bde5750c32 100644 --- a/qiskit_experiments/library/driven_freq_tuning/ramsey_amp_scan_analysis.py +++ b/qiskit_experiments/library/driven_freq_tuning/ramsey_amp_scan_analysis.py @@ -254,26 +254,21 @@ def _format_data( ) -> curve.ScatterTable: curve_data = super()._format_data(curve_data, category="ramsey_xy") - ramsey_xy = curve_data[curve_data.category == "ramsey_xy"] + ramsey_xy = curve_data.filter(category="ramsey_xy") + y_mean = ramsey_xy.y.mean() # Create phase data by arctan(Y/X) - columns = list(curve_data.columns) - phase_data = np.empty((0, len(columns))) - y_mean = ramsey_xy.yval.mean() - - grouped = ramsey_xy.groupby("name") - for m_id, direction in enumerate(("pos", "neg")): - x_quadrature = grouped.get_group(f"X{direction}") - y_quadrature = grouped.get_group(f"Y{direction}") - if not np.array_equal(x_quadrature.xval, y_quadrature.xval): + for data_id, direction in enumerate(("pos", "neg")): + x_quadrature = ramsey_xy.filter(series=f"X{direction}") + y_quadrature = ramsey_xy.filter(series=f"Y{direction}") + if not np.array_equal(x_quadrature.x, y_quadrature.x): raise ValueError( "Amplitude values of X and Y quadrature are different. " "Same values must be used." ) - x_uarray = unp.uarray(x_quadrature.yval, x_quadrature.yerr) - y_uarray = unp.uarray(y_quadrature.yval, y_quadrature.yerr) - - amplitudes = x_quadrature.xval.to_numpy() + x_uarray = unp.uarray(x_quadrature.y, x_quadrature.y_err) + y_uarray = unp.uarray(y_quadrature.y, y_quadrature.y_err) + amplitudes = x_quadrature.x # pylint: disable=no-member phase = unp.arctan2(y_uarray - y_mean, x_uarray - y_mean) @@ -288,17 +283,24 @@ def _format_data( unwrapped_phase = unwrapped_phase + (phase_n[-1] - unwrapped_phase[-1]) # Store new data - tmp = np.empty((len(amplitudes), len(columns)), dtype=object) - tmp[:, columns.index("xval")] = amplitudes - tmp[:, columns.index("yval")] = unwrapped_phase / self._freq_phase_coef() - tmp[:, columns.index("yerr")] = phase_s / self._freq_phase_coef() - tmp[:, columns.index("name")] = f"FREQ{direction}" - tmp[:, columns.index("class_id")] = m_id - tmp[:, columns.index("shots")] = x_quadrature.shots + y_quadrature.shots - tmp[:, columns.index("category")] = category - phase_data = np.r_[phase_data, tmp] - - return curve_data.append_list_values(other=phase_data) + unwrapped_phase /= self._freq_phase_coef() + phase_s /= self._freq_phase_coef() + shot_sums = x_quadrature.shots + y_quadrature.shots + for new_x, new_y, new_y_err, shot in zip( + amplitudes, unwrapped_phase, phase_s, shot_sums + ): + curve_data.add_row( + xval=new_x, + yval=new_y, + yerr=new_y_err, + series_name=f"FREQ{direction}", + series_id=data_id, + shots=shot, + category=category, + analysis=self.name, + ) + + return curve_data def _generate_fit_guesses( self, @@ -355,39 +357,39 @@ def _create_figures( ) -> List["matplotlib.figure.Figure"]: # plot unwrapped phase on first axis - for d in ("pos", "neg"): - sub_data = curve_data[(curve_data.name == f"FREQ{d}") & (curve_data.category == "freq")] + for direction in ("pos", "neg"): + sub_data = curve_data.filter(series=f"FREQ{direction}", category="freq") self.plotter.set_series_data( - series_name=f"F{d}", - x_formatted=sub_data.xval.to_numpy(), - y_formatted=sub_data.yval.to_numpy(), - y_formatted_err=sub_data.yerr.to_numpy(), + series_name=f"F{direction}", + x_formatted=sub_data.x, + y_formatted=sub_data.y, + y_formatted_err=sub_data.y_err, ) # plot raw RamseyXY plot on second axis for name in ("Xpos", "Ypos", "Xneg", "Yneg"): - sub_data = curve_data[(curve_data.name == name) & (curve_data.category == "ramsey_xy")] + sub_data = curve_data.filter(series=name, category="ramsey_xy") self.plotter.set_series_data( series_name=name, - x_formatted=sub_data.xval.to_numpy(), - y_formatted=sub_data.yval.to_numpy(), - y_formatted_err=sub_data.yerr.to_numpy(), + x_formatted=sub_data.x, + y_formatted=sub_data.y, + y_formatted_err=sub_data.y_err, ) # find base and amplitude guess - ramsey_xy = curve_data[curve_data.category == "ramsey_xy"] - offset_guess = 0.5 * (ramsey_xy.yval.min() + ramsey_xy.yval.max()) - amp_guess = 0.5 * np.ptp(ramsey_xy.yval) + ramsey_xy = curve_data.filter(category="ramsey_xy") + offset_guess = 0.5 * (np.min(ramsey_xy.y) + np.max(ramsey_xy.y)) + amp_guess = 0.5 * np.ptp(ramsey_xy.y) # plot frequency and Ramsey fit lines - line_data = curve_data[curve_data.category == "fitted"] + line_data = curve_data.filter(category="fitted") for direction in ("pos", "neg"): - sub_data = line_data[line_data.name == f"FREQ{direction}"] + sub_data = line_data.filter(series=f"FREQ{direction}") if len(sub_data) == 0: continue - xval = sub_data.xval.to_numpy() - yn = sub_data.yval.to_numpy() - ys = sub_data.yerr.to_numpy() + xval = sub_data.x + yn = sub_data.y + ys = sub_data.y_err yval = unp.uarray(yn, ys) * self._freq_phase_coef() # Ramsey fit lines are predicted from the phase fit line. diff --git a/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py b/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py index 7864b20436..b389cbd74d 100644 --- a/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py +++ b/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py @@ -141,12 +141,12 @@ def _generate_fit_guesses( b_guess = 1 / 2**self._num_qubits # for standard RB curve - std_curve = curve_data.get_subset_of("standard") + std_curve = curve_data.filter(series="standard") alpha_std = curve.guess.rb_decay(std_curve.x, std_curve.y, b=b_guess) a_std = (std_curve.y[0] - b_guess) / (alpha_std ** std_curve.x[0]) # for interleaved RB curve - int_curve = curve_data.get_subset_of("interleaved") + int_curve = curve_data.filter(series="interleaved") alpha_int = curve.guess.rb_decay(int_curve.x, int_curve.y, b=b_guess) a_int = (int_curve.y[0] - b_guess) / (alpha_int ** int_curve.x[0]) diff --git a/requirements.txt b/requirements.txt index 2bda6ad888..54ea5ea51c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ matplotlib>=3.4 uncertainties lmfit rustworkx -pandas>=1.1.5,<2.2.0 +pandas>=1.1.5 diff --git a/test/curve_analysis/test_scatter_table.py b/test/curve_analysis/test_scatter_table.py new file mode 100644 index 0000000000..7166b64a25 --- /dev/null +++ b/test/curve_analysis/test_scatter_table.py @@ -0,0 +1,285 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Test scatter table.""" + +from test.base import QiskitExperimentsTestCase +import pandas as pd +import numpy as np + +from qiskit_experiments.curve_analysis.scatter_table import ScatterTable + + +class TestScatterTable(QiskitExperimentsTestCase): + """Test cases for curve analysis ScatterTable.""" + + def setUp(self): + super().setUp() + + source = { + "xval": [0.100, 0.100, 0.200, 0.200, 0.100, 0.200, 0.100, 0.200, 0.100, 0.200], + "yval": [0.192, 0.784, 0.854, 0.672, 0.567, 0.488, 0.379, 0.671, 0.784, 0.672], + "yerr": [0.002, 0.091, 0.090, 0.027, 0.033, 0.038, 0.016, 0.048, 0.091, 0.027], + "series_name": [ + "model1", + "model2", + "model1", + "model2", + "model1", + "model1", + "model1", + "model1", + "model2", + "model2", + ], + "series_id": [0, 1, 0, 1, 0, 0, 0, 0, 1, 1], + "category": [ + "raw", + "raw", + "raw", + "raw", + "raw", + "raw", + "formatted", + "formatted", + "formatted", + "formatted", + ], + "shots": [ + 1000, + 1000, + 1000, + 1000, + 1000, + 1000, + 2000, + 2000, + 1000, + 1000, + ], + "analysis": [ + "Fit1", + "Fit1", + "Fit1", + "Fit1", + "Fit2", + "Fit2", + "Fit1", + "Fit1", + "Fit1", + "Fit1", + ], + } + self.reference = pd.DataFrame.from_dict(source) + + def test_create_table_from_dataframe(self): + """Test creating table from dataframe and output dataframe.""" + # ScatterTable automatically converts dtype. + # For pure dataframe equality check pre-format the source. + formatted_ref = ScatterTable._format_table(self.reference) + + obj = ScatterTable.from_dataframe(formatted_ref) + self.assertTrue(obj.dataframe.equals(formatted_ref)) + + def test_factory_method_check_all_members(self): + """Test to check the factory method populates all instance members.""" + to_test = ScatterTable.from_dataframe(pd.DataFrame(columns=ScatterTable.COLUMNS)) + ref = ScatterTable() + self.assertEqual(to_test.__dict__.keys(), ref.__dict__.keys()) + + def test_two_construction_method_identical(self): + """Check if two tables constructed differently from the same source are identical.""" + new_table = ScatterTable() + for _, row_data in self.reference.iterrows(): + new_table.add_row(**row_data) + + ref_table = ScatterTable.from_dataframe(self.reference) + self.assertEqual(new_table, ref_table) + + def test_add_row(self): + """Test adding single row to the table without and with missing data.""" + obj = ScatterTable() + obj.add_row( + xval=0.1, + yval=2.3, + yerr=0.4, + series_name="model1", + series_id=0, + category="raw", + shots=1000, + analysis="Test", + ) + obj.add_row( + category="raw", + xval=0.2, + yval=3.4, + ) + self.assertEqual(len(obj), 2) + np.testing.assert_array_equal(obj.x, np.array([0.1, 0.2])) + np.testing.assert_array_equal(obj.y, np.array([2.3, 3.4])) + np.testing.assert_array_equal(obj.y_err, np.array([0.4, np.nan])) + np.testing.assert_array_equal(obj.series_name, np.array(["model1", None])) + np.testing.assert_array_equal(obj.series_id, np.array([0, None])) + np.testing.assert_array_equal(obj.category, np.array(["raw", "raw"])) + np.testing.assert_array_equal( + # Numpy tries to handle nan strictly, but isnan only works for float dtype. + # Original data is object type, because we want to keep shot number integer, + # and there is no Numpy nullable integer. + obj.shots.astype(float), + np.array([1000, np.nan], dtype=float), + ) + np.testing.assert_array_equal(obj.analysis, np.array(["Test", None])) + + def test_set_values(self): + """Test setting new column values through setter.""" + obj = ScatterTable() + # add three empty rows + obj.add_row() + obj.add_row() + obj.add_row() + + # Set sequence + obj.x = [0.1, 0.2, 0.3] + obj.y = [1.3, 1.4, 1.5] + obj.y_err = [0.3, 0.5, 0.7] + + # Broadcast single value + obj.series_id = 0 + obj.series_name = "model0" + + np.testing.assert_array_equal(obj.x, np.array([0.1, 0.2, 0.3])) + np.testing.assert_array_equal(obj.y, np.array([1.3, 1.4, 1.5])) + np.testing.assert_array_equal(obj.y_err, np.array([0.3, 0.5, 0.7])) + np.testing.assert_array_equal(obj.series_id, np.array([0, 0, 0])) + np.testing.assert_array_equal(obj.series_name, np.array(["model0", "model0", "model0"])) + + def test_get_subset_numbers(self): + """Test end-user shortcut for getting the subset of x, y, y_err data.""" + obj = ScatterTable.from_dataframe(self.reference) + + np.testing.assert_array_equal(obj.xvals("model1", "raw", "Fit1"), np.array([0.100, 0.200])) + np.testing.assert_array_equal(obj.yvals("model1", "raw", "Fit1"), np.array([0.192, 0.854])) + np.testing.assert_array_equal(obj.yerrs("model1", "raw", "Fit1"), np.array([0.002, 0.090])) + + def test_warn_composite_values(self): + """Test raise warning when returned x, y, y_err data contains multiple data series.""" + obj = ScatterTable.from_dataframe(self.reference) + + with self.assertWarns(UserWarning): + obj.xvals() + with self.assertWarns(UserWarning): + obj.yvals() + with self.assertWarns(UserWarning): + obj.yerrs() + + def test_filter_data_by_series_id(self): + """Test filter table data with series index.""" + obj = ScatterTable.from_dataframe(self.reference) + + filtered = obj.filter(series=0) + self.assertEqual(len(filtered), 6) + np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2, 0.1, 0.2])) + np.testing.assert_array_equal(filtered.series_id, np.array([0, 0, 0, 0, 0, 0])) + + def test_filter_data_by_series_name(self): + """Test filter table data with series name.""" + obj = ScatterTable.from_dataframe(self.reference) + + filtered = obj.filter(series="model1") + self.assertEqual(len(filtered), 6) + np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2, 0.1, 0.2])) + np.testing.assert_array_equal( + filtered.series_name, + np.array(["model1", "model1", "model1", "model1", "model1", "model1"]), + ) + + def test_filter_data_by_category(self): + """Test filter table data with data category.""" + obj = ScatterTable.from_dataframe(self.reference) + + filtered = obj.filter(category="formatted") + self.assertEqual(len(filtered), 4) + np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2])) + np.testing.assert_array_equal( + filtered.category, np.array(["formatted", "formatted", "formatted", "formatted"]) + ) + + def test_filter_data_by_analysis(self): + """Test filter table data with associated analysis class.""" + obj = ScatterTable.from_dataframe(self.reference) + + filtered = obj.filter(analysis="Fit2") + self.assertEqual(len(filtered), 2) + np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2])) + np.testing.assert_array_equal(filtered.analysis, np.array(["Fit2", "Fit2"])) + + def test_filter_multiple(self): + """Test filter table data with multiple attributes.""" + obj = ScatterTable.from_dataframe(self.reference) + + filtered = obj.filter(series=0, category="raw", analysis="Fit1") + self.assertEqual(len(filtered), 2) + np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2])) + np.testing.assert_array_equal(filtered.series_id, np.array([0, 0])) + np.testing.assert_array_equal(filtered.category, np.array(["raw", "raw"])) + np.testing.assert_array_equal(filtered.analysis, np.array(["Fit1", "Fit1"])) + + def test_iter_class(self): + """Test iterating over mini tables associated with different series indices.""" + obj = ScatterTable.from_dataframe(self.reference).filter(category="raw") + + class_iter = obj.iter_by_series_id() + + series_id, table0 = next(class_iter) + ref_table_cls0 = obj.filter(series=0) + self.assertEqual(series_id, 0) + self.assertEqual(table0, ref_table_cls0) + + series_id, table1 = next(class_iter) + ref_table_cls1 = obj.filter(series=1) + self.assertEqual(series_id, 1) + self.assertEqual(table1, ref_table_cls1) + + def test_iter_groups(self): + """Test iterating over mini tables associated with multiple attributes.""" + obj = ScatterTable.from_dataframe(self.reference).filter(category="raw") + + class_iter = obj.iter_groups("series_id", "xval") + + (series_id, xval), table0 = next(class_iter) + self.assertEqual(series_id, 0) + self.assertEqual(xval, 0.1) + self.assertEqual(len(table0), 2) + np.testing.assert_array_equal(table0.y, [0.192, 0.567]) + + (series_id, xval), table1 = next(class_iter) + self.assertEqual(series_id, 0) + self.assertEqual(xval, 0.2) + self.assertEqual(len(table1), 2) + np.testing.assert_array_equal(table1.y, [0.854, 0.488]) + + (series_id, xval), table2 = next(class_iter) + self.assertEqual(series_id, 1) + self.assertEqual(xval, 0.1) + self.assertEqual(len(table2), 1) + np.testing.assert_array_equal(table2.y, [0.784]) + + (series_id, xval), table3 = next(class_iter) + self.assertEqual(series_id, 1) + self.assertEqual(xval, 0.2) + self.assertEqual(len(table3), 1) + np.testing.assert_array_equal(table3.y, [0.672]) + + def test_roundtrip_table(self): + """Test ScatterTable is JSON serializable.""" + obj = ScatterTable.from_dataframe(self.reference) + self.assertRoundTripSerializable(obj) diff --git a/test/extended_equality.py b/test/extended_equality.py index 9131492904..369d6f169d 100644 --- a/test/extended_equality.py +++ b/test/extended_equality.py @@ -294,8 +294,8 @@ def _check_result_table( **kwargs, ): """Check equality of data frame which may involve Qiskit Experiments class value.""" - table1 = data1.copy().to_dict(orient="index") - table2 = data2.copy().to_dict(orient="index") + table1 = data1.dataframe.to_dict(orient="index") + table2 = data2.dataframe.to_dict(orient="index") for table in (table1, table2): for result in table.values(): result.pop("created_time") diff --git a/test/framework/test_analysis_results_table.py b/test/framework/test_analysis_results_table.py new file mode 100644 index 0000000000..ea8e566ab4 --- /dev/null +++ b/test/framework/test_analysis_results_table.py @@ -0,0 +1,138 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2023. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Test case for data table.""" + +from test.base import QiskitExperimentsTestCase + +import uuid +from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable +from qiskit_experiments.database_service.exceptions import ExperimentEntryNotFound + + +class TestAnalysisTable(QiskitExperimentsTestCase): + """Test case for extra functionality of analysis table.""" + + def test_add_get_entry_with_result_id(self): + """Test adding entry with result_id. Index is created by truncating long string.""" + table = AnalysisResultTable() + table.add_data(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.123) + self.assertEqual(table.get_data("9a0bdec8").iloc[0].value, 0.123) + + def test_drop_entry(self): + """Test drop entry from the table.""" + table = AnalysisResultTable() + table.add_data(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.123) + table.del_data("9a0bdec8") + + self.assertEqual(len(table), 0) + + def test_drop_non_existing_entry(self): + """Test dropping non-existing entry raises ValueError.""" + table = AnalysisResultTable() + with self.assertRaises(ExperimentEntryNotFound): + table.del_data("9a0bdec8") + + def test_raises_adding_duplicated_index(self): + """Test adding duplicated index should raise.""" + table = AnalysisResultTable() + table.add_data(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.0) + + with self.assertRaises(ValueError): + # index 9a0bdec8 is already used + table.add_data(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=1.0) + + def test_clear_container(self): + """Test reset table.""" + table = AnalysisResultTable() + table.add_data(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.0, extra=123) + self.assertEqual(len(table), 1) + + table.clear() + self.assertEqual(len(table), 0) + self.assertListEqual(table.columns, AnalysisResultTable.DEFAULT_COLUMNS) + + def test_extra_column_name_is_always_returned(self): + """Test extra column names are always returned in filtered column names.""" + table = AnalysisResultTable() + table.add_data(extra=0.123) + + minimal_columns = table.get_data(0, "minimal") + self.assertTrue("extra" in minimal_columns.columns) + + default_columns = table.get_data(0, "default") + self.assertTrue("extra" in default_columns.columns) + + all_columns = table.get_data(0, "all") + self.assertTrue("extra" in all_columns.columns) + + def test_get_custom_columns(self): + """Test getting entry with user-specified columns.""" + table = AnalysisResultTable() + table.add_data(name="test", value=0) + + cols = ["name", "value"] + custom_columns = table.get_data(0, cols) + self.assertListEqual(list(custom_columns.columns), cols) + + def test_warning_non_existing_columns(self): + """Test raise user warning when attempt to get non-existing column.""" + table = AnalysisResultTable() + table.add_data(name="test", value=0) + + with self.assertWarns(UserWarning): + table.get_data(0, ["not_existing_column"]) + + def test_listing_result_id(self): + """Test returning result IDs of all stored entries.""" + table = AnalysisResultTable() + + ref_ids = [str(uuid.uuid4()) for _ in range(10)] + for ref_id in ref_ids: + table.add_data(result_id=ref_id, value=0) + + self.assertListEqual(table.result_ids, ref_ids) + + def test_no_overlap_result_id(self): + """Test automatically prepare unique result IDs for sufficient number of entries.""" + table = AnalysisResultTable() + + for i in range(100): + table.add_data(value=i) + + self.assertEqual(len(table), 100) + + def test_round_trip(self): + """Test JSON roundtrip serialization with the experiment encoder.""" + table = AnalysisResultTable() + table.add_data(result_id="30d5d05c-c074-4d3c-9530-07a83d48883a", name="x", value=0.0) + table.add_data(result_id="7c305972-858d-42a0-9b5e-57162efe20a1", name="y", value=1.0) + table.add_data(result_id="61d8d351-c0cf-4a0a-ae57-fde0f3baa00d", name="z", value=2.0) + + self.assertRoundTripSerializable(table) + + def test_round_trip_with_extra(self): + """Test JSON roundtrip serialization with extra columns containing missing value.""" + table = AnalysisResultTable() + table.add_data( + result_id="30d5d05c-c074-4d3c-9530-07a83d48883a", + name="x", + value=0.0, + extra1=2, + ) + table.add_data( + result_id="7c305972-858d-42a0-9b5e-57162efe20a1", + name="y", + value=1.0, + extra2=0.123, + ) + self.assertRoundTripSerializable(table) diff --git a/test/framework/test_composite.py b/test/framework/test_composite.py index 791b7b9689..1ca1e5a5b6 100644 --- a/test/framework/test_composite.py +++ b/test/framework/test_composite.py @@ -719,6 +719,8 @@ def test_composite_count_memory_marginalization(self, memory): "metadata": {"experiment_type": "FineXAmplitude", "qubits": [0]}, "counts": {"0": 6, "1": 4}, "memory": ["0", "0", "1", "0", "0", "1", "1", "0", "0", "1"], + "shots": 10, + "meas_level": 2, } ], [ @@ -726,6 +728,8 @@ def test_composite_count_memory_marginalization(self, memory): "metadata": {"experiment_type": "FineXAmplitude", "qubits": [1]}, "counts": {"0": 5, "1": 5}, "memory": ["0", "1", "1", "0", "0", "0", "1", "0", "1", "1"], + "shots": 10, + "meas_level": 2, } ], ] @@ -775,6 +779,8 @@ def test_composite_single_kerneled_memory_marginalization(self): [[idx + 0.3, idx + 0.3]], [[idx + 0.4, idx + 0.4]], ], + "shots": 5, + "meas_level": 1, } self.assertEqual(expected, sub_data[0]) @@ -813,6 +819,8 @@ def test_composite_avg_kerneled_memory_marginalization(self): expected = { "metadata": {"experiment_type": "FineXAmplitude", "qubits": [idx]}, "memory": [[idx + 0.0, idx + 0.1]], + "shots": 5, + "meas_level": 1, } self.assertEqual(expected, sub_data[0]) diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py deleted file mode 100644 index a1e34e7a1f..0000000000 --- a/test/framework/test_data_table.py +++ /dev/null @@ -1,221 +0,0 @@ -# This code is part of Qiskit. -# -# (C) Copyright IBM 2023. -# -# This code is licensed under the Apache License, Version 2.0. You may -# obtain a copy of this license in the LICENSE.txt file in the root directory -# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. -# -# Any modifications or derivative works of this code must retain this -# copyright notice, and modified files need to carry a notice indicating -# that they have been altered from the originals. - -"""Test case for data table.""" - -from test.base import QiskitExperimentsTestCase - -import uuid -import pandas as pd - -from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable -from qiskit_experiments.framework.table_mixin import DefaultColumnsMixIn - - -def _callable_thread_local_add_entry(args, thread_table): - """A test callable that is called from multi-thread.""" - index, kwargs = args - thread_table.add_entry(index, **kwargs) - - -class TestBaseTable(QiskitExperimentsTestCase): - """Test case for default columns mix-in.""" - - class TestTable(pd.DataFrame, DefaultColumnsMixIn): - """A table class under test with test columns.""" - - @classmethod - def _default_columns(cls): - return ["value1", "value2", "value3"] - - def test_initializing_with_dict(self): - """Test initializing table with dictionary.""" - table = TestBaseTable.TestTable.from_dict( - { - "x": {"value1": 1.0, "value2": 2.0, "value3": 3.0}, - "y": {"value1": 4.0, "value2": 5.0, "value3": 6.0}, - }, - orient="index", - ) - self.assertListEqual(list(table.columns), ["value1", "value2", "value3"]) - - def test_add_entry(self): - """Test adding data with default keys to table.""" - table = TestBaseTable.TestTable() - table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) - - self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0]) - - def test_add_entry_with_missing_key(self): - """Test adding entry with partly specified keys.""" - table = TestBaseTable.TestTable() - table.add_entry(index="x", value1=0.0, value3=2.0) - self.assertListEqual(table.loc["x"].to_list(), [0.0, None, 2.0]) - - def test_add_entry_with_new_key(self): - """Test adding data with new keys to table.""" - table = TestBaseTable.TestTable() - table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0, extra=3.0) - - self.assertListEqual(list(table.columns), ["value1", "value2", "value3", "extra"]) - self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0, 3.0]) - - def test_add_entry_with_multiple_new_keys(self): - """Test new keys are added to column and the key order is preserved.""" - table = TestBaseTable.TestTable() - table.add_entry(index="x", phi=0.1, lamb=0.2, theta=0.3) - - self.assertListEqual( - list(table.columns), ["value1", "value2", "value3", "phi", "lamb", "theta"] - ) - - def test_dtype_missing_value_is_none(self): - """Test if missing value is always None. - - Deta frame implicitly convert None into NaN for numeric container. - This should not happen. - """ - table = TestBaseTable.TestTable() - table.add_entry(index="x", value1=1.0) - table.add_entry(index="y", value2=1.0) - - self.assertEqual(table.loc["x", "value2"], None) - self.assertEqual(table.loc["y", "value1"], None) - - def test_dtype_adding_extra_later(self): - """Test adding new row later with a numeric value doesn't change None to NaN.""" - table = TestBaseTable.TestTable() - table.add_entry(index="x") - table.add_entry(index="y", extra=1.0) - - self.assertListEqual(table.loc["x"].to_list(), [None, None, None, None]) - - def test_dtype_adding_null_row(self): - """Test adding new row with empty value doesn't change dtype of the columns.""" - table = TestBaseTable.TestTable() - table.add_entry(index="x", extra1=1, extra2=1.0, extra3=True, extra4="abc") - table.add_entry(index="y") - - self.assertIsInstance(table.loc["x", "extra1"], int) - self.assertIsInstance(table.loc["x", "extra2"], float) - self.assertIsInstance(table.loc["x", "extra3"], bool) - self.assertIsInstance(table.loc["x", "extra4"], str) - - def test_filter_columns(self): - """Test filtering table with columns.""" - table = TestBaseTable.TestTable() - table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) - - filt_table = table[["value1", "value3"]] - self.assertListEqual(filt_table.loc["x"].to_list(), [0.0, 2.0]) - - -class TestAnalysisTable(QiskitExperimentsTestCase): - """Test case for extra functionality of analysis table.""" - - def test_add_get_entry_with_result_id(self): - """Test adding entry with result_id. Index is created by truncating long string.""" - table = AnalysisResultTable() - table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.123) - self.assertEqual(table.get_entry("9a0bdec8").value, 0.123) - - def test_drop_entry(self): - """Test drop entry from the table.""" - table = AnalysisResultTable() - table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.123) - table.drop_entry("9a0bdec8") - - self.assertEqual(len(table), 0) - - def test_drop_non_existing_entry(self): - """Test dropping non-existing entry raises ValueError.""" - table = AnalysisResultTable() - with self.assertRaises(ValueError): - table.drop_entry("9a0bdec8") - - def test_raises_adding_duplicated_index(self): - """Test adding duplicated index should raise.""" - table = AnalysisResultTable() - table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.0) - - with self.assertRaises(ValueError): - # index 9a0bdec8 is already used - table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=1.0) - - def test_clear_container(self): - """Test reset table.""" - table = AnalysisResultTable() - table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.0, extra=123) - self.assertEqual(len(table), 1) - - table.clear() - self.assertEqual(len(table), 0) - self.assertListEqual(table.copy().extra_columns(), []) - - def test_extra_column_name_is_always_returned(self): - """Test extra column names are always returned in filtered column names.""" - table = AnalysisResultTable() - table.add_entry(extra=0.123) - - minimal_columns = table.filter_columns("minimal") - self.assertTrue("extra" in minimal_columns) - - default_columns = table.filter_columns("default") - self.assertTrue("extra" in default_columns) - - all_columns = table.filter_columns("all") - self.assertTrue("extra" in all_columns) - - def test_listing_result_id(self): - """Test returning result IDs of all stored entries.""" - table = AnalysisResultTable() - - ref_ids = [str(uuid.uuid4()) for _ in range(10)] - for ref_id in ref_ids: - table.add_entry(result_id=ref_id, value=0) - - self.assertListEqual(table.result_ids(), ref_ids) - - def test_no_overlap_result_id(self): - """Test automatically prepare unique result IDs for sufficient number of entries.""" - table = AnalysisResultTable() - - for i in range(100): - table.add_entry(value=i) - - self.assertEqual(len(table), 100) - - def test_round_trip(self): - """Test JSON roundtrip serialization with the experiment encoder.""" - table = AnalysisResultTable() - table.add_entry(result_id="30d5d05c-c074-4d3c-9530-07a83d48883a", name="x", value=0.0) - table.add_entry(result_id="7c305972-858d-42a0-9b5e-57162efe20a1", name="y", value=1.0) - table.add_entry(result_id="61d8d351-c0cf-4a0a-ae57-fde0f3baa00d", name="z", value=2.0) - - self.assertRoundTripSerializable(table) - - def test_round_trip_with_extra(self): - """Test JSON roundtrip serialization with extra columns containing missing value.""" - table = AnalysisResultTable() - table.add_entry( - result_id="30d5d05c-c074-4d3c-9530-07a83d48883a", - name="x", - value=0.0, - extra1=2, - ) - table.add_entry( - result_id="7c305972-858d-42a0-9b5e-57162efe20a1", - name="y", - value=1.0, - extra2=0.123, - ) - self.assertRoundTripSerializable(table)