From 8b579c9ac139bc56aab2541401afe444c48da8c3 Mon Sep 17 00:00:00 2001 From: dlegland Date: Tue, 22 Dec 2020 10:20:51 +0100 Subject: [PATCH] Table: add pairPlot function --- matStats/@Table/corrcoef.m | 4 +- matStats/@Table/correlationCircles.m | 4 +- matStats/@Table/pairPlot.m | 184 +++++++++++++++++++++++++++ tests/test_pairPlot.m | 40 ++++++ 4 files changed, 228 insertions(+), 4 deletions(-) create mode 100644 matStats/@Table/pairPlot.m create mode 100644 tests/test_pairPlot.m diff --git a/matStats/@Table/corrcoef.m b/matStats/@Table/corrcoef.m index 244f5a2..351aa1d 100644 --- a/matStats/@Table/corrcoef.m +++ b/matStats/@Table/corrcoef.m @@ -47,7 +47,7 @@ % ------ % Author: David Legland -% e-mail: david.legland@inra.fr +% e-mail: david.legland@inrae.fr % Created: 2012-01-10, using Matlab 7.9.0.529 (R2009b) % Copyright 2012 INRA - Cepia Software Platform. @@ -77,7 +77,7 @@ % correlation coefficient matrix mat = corrcoef(data1, data2); - % foramt into a 2-by-2 Table + % format into a 2-by-2 Table names = {name1, name2}; res = Table(mat, names, names); end diff --git a/matStats/@Table/correlationCircles.m b/matStats/@Table/correlationCircles.m index df67ccf..ac7f09b 100644 --- a/matStats/@Table/correlationCircles.m +++ b/matStats/@Table/correlationCircles.m @@ -21,7 +21,7 @@ % correlationCircles(tab) % % See also -% corrcoef, plotmatrix +% corrcoef, pairPlot, plotmatrix % % References % It is inspired by the contribution of Taiyun Wei for R: @@ -30,7 +30,7 @@ % ------ % Author: David Legland, based on plotmatrix code -% e-mail: david.legland@inra.fr +% e-mail: david.legland@inrae.fr % Created: 2012-07-16, using Matlab 7.9.0.529 (R2009b) % Copyright 2012 INRA - Cepia Software Platform. diff --git a/matStats/@Table/pairPlot.m b/matStats/@Table/pairPlot.m new file mode 100644 index 0000000..81adc04 --- /dev/null +++ b/matStats/@Table/pairPlot.m @@ -0,0 +1,184 @@ +function pairPlot(obj, varargin) +% Pairwise scatter plots and histograms of table columns. +% +% pairPlot(TAB) +% Pair-wise plot of the columns in the specified table. +% +% pairPlot(TAB, GRP) +% Pair-wise plot of the columns in the specified table, using a +% coloration by group. +% +% pairPlot(..., PNAME, PVALUE) +% Specifies additional parameter name-vale pairs. Parameters can be: +% * 'Colors': the color associated to each group, as a NG-by-3 array +% * 'HistMode': the display mode of the histogram on the diagonal. Can +% be one of {'Histogram', 'Bar', 'kde'}. +% +% Examples +% % Display pair-wise plot of a simple table +% iris = Table.read('fisherIris'); +% figure; +% pairPlot(iris(:,1:4)); +% % Same display using different color per group +% figure; +% colors = parula(3); +% pairPlot(iris(:,1:4), iris(:,5), 'Colors', colors); +% +% See also +% correlationCircles, plot, histogram, violinPlot +% +% References: +% Rewritten from the function 'pairplot', by Ryosuke Takeuchi on the +% FileExchange +% https://fr.mathworks.com/matlabcentral/fileexchange/60866-pairplot-meas-label-group-colors-mode +% + +% ------ +% Author: David Legland +% e-mail: david.legland@inrae.fr +% INRAE - BIA Research Unit - BIBS Platform (Nantes) +% Created: 2020-12-21, using Matlab 9.8.0.1323502 (R2020a) +% Copyright 2020 INRAE. + + +%% Parse input arguments + +% Check if group argument is provided +groups = {}; +if nargin > 1 + var1 = varargin{1}; + if isa(var1, 'Table') + [groups, groupNames] = parseGroupInfos(var1); + groups = groupNames(groups); + nGroups = length(groupNames); + varargin(1) = []; + + elseif iscell(varargin{1}) + groups = var1; + groupNames = unique(groups); + nGroups = length(groupNames); + varargin(1) = []; + end +end + +% parse group colors +colors = lines(length(unique(groups))); +if ~isempty(varargin) && isnumeric(varargin{1}) + colors = varargin{1}; + varargin(1) = []; +end + +% parse optional arguments +[colors, varargin] = parseInputOption(varargin, 'Colors', colors); +[histMode, varargin] = parseInputOption(varargin, 'HistMode', 'histogram'); %#ok + + +%% Pre processing + +% retrieve data and feature names +data = obj.Data; +nc = size(data, 2); +labels = obj.ColNames; + + +%% Scatter plots +for i = 1:nc + for j = 1:nc + % select appropriate axis + ax = subplot(nc, nc, sub2ind([nc nc], i, j)); + + if i == 1 + ylabel(labels{j}); + end + if j == nc + xlabel(labels{i}); + end + + hold on; + + if i == j + continue; + end + + if isempty(groups) + plot(ax, data(:, i), data(:, j), '.'); + else + for g = 1:nGroups + inds = strcmp(groups, groupNames{g}); + plot(ax, data(inds, i), data(inds, j), ... + '.', 'Color', colors(g,:)); + end + end + xlim([min(data(:, i)) max(data(:, i))]) + end +end + +%% Plot histograms +for i = 1:nc + % select appropriate axis + ax = subplot(nc, nc, sub2ind([nc nc], i, i)); + hold on; + + bins = linspace(min(data(:,i)), max(data(:,i)), 20); + + if strcmpi(histMode, 'histogram') + % Display histograms + if isempty(groups) + histogram(ax, data(:, i), bins, 'Normalization', 'probability'); + else + for g = 1:nGroups + inds = strcmp(groups, groupNames{g}); + histogram(ax, data(inds, i), bins, 'FaceColor', colors(g,:), ... + 'Normalization', 'probability'); + end + end + xlim([bins(1) bins(end)]); + + elseif strcmpi(histMode, 'bar') + % Display bars + + % convert bin centers to edges + db = diff(bins) / 2; + edges = [bins(1)-db(1), bins(1:end-1)+db, bins(end)+db(end)]; + edges(2:end) = edges(2:end) + eps(edges(2:end)); + + if isempty(groups) + [counts, ~] = histcounts(data(:, i), edges); + bar(ax, bins, counts, 'BarWidth', 1, 'FaceColor', colors(g,:)) + else + for g = 1:nGroups + inds = strcmp(groups, groupNames{g}); + [counts, ~] = histcounts(data(inds, i), edges); + bar(ax, bins, counts, 'BarWidth', 1, 'FaceColor', colors(g,:)) + end + end + xlim([edges(1) edges(end)]); + + elseif strcmpi(histMode, 'kde') + % Use kernel-density estimate + if isempty(groups) + [f, xf] = ksdensity(data(:,i)); + plot(ax, xf, f); + else + for g = 1:nGroups + inds = strcmp(groups, groupNames{g}); + [f, xf] = ksdensity(data(inds,i)); + plot(ax, xf, f, 'Color', colors(g,:)); + end + end + + elseif strcmpi(histMode, 'cdf') + % Display cumulative density functions + if isempty(groups) + [f, x] = ecdf(data(inds, i)); + plot(ax, x, f); + else + for g = 1:nGroups + inds = strcmp(groups, groupNames{g}); + [f, x] = ecdf(data(inds, i)); + plot(ax, x, f, 'Color', colors(g,:)); + end + end + end +end + diff --git a/tests/test_pairPlot.m b/tests/test_pairPlot.m new file mode 100644 index 0000000..61efd25 --- /dev/null +++ b/tests/test_pairPlot.m @@ -0,0 +1,40 @@ +function tests = test_pairPlot +% Test suite for the file pairPlot. +% +% Test suite for the file pairPlot +% +% Example +% test_pairPlot +% +% See also +% pairPlot + +% ------ +% Author: David Legland +% e-mail: david.legland@inrae.fr +% Created: 2020-12-22, using Matlab 9.8.0.1323502 (R2020a) +% Copyright 2020 INRAE - BIA-BIBS. + +tests = functiontests(localfunctions); + +function test_Iris(testCase) %#ok<*DEFNU> +% Test call of function without argument. + +iris = Table.read('fisherIris'); +hFig = figure; + +pairPlot(iris(:,1:4)); + +close(hFig); + + +function test_Iris_bySpecies(testCase) %#ok<*DEFNU> +% Test call of function without argument. + +iris = Table.read('fisherIris'); +hFig = figure; + +pairPlot(iris(:,1:4), iris(:,5)); + +close(hFig); +