diff --git a/astetik/__init__.py b/astetik/__init__.py index e647fb8..e708c8e 100755 --- a/astetik/__init__.py +++ b/astetik/__init__.py @@ -16,6 +16,8 @@ from .plots.multikde import multikde from .plots.compare import compare from .plots.multicount import multicount +from .plots.bar1 import bar1 +from .plots.animate import Animation # from .plots.words import words @@ -25,4 +27,6 @@ from .tables.text import text # from .tables.timeseries import timeseries -__version__ = "1.1" +from .utils.transform import boolcols_to_cat + +__version__ = "1.9" diff --git a/astetik/plots/animate.py b/astetik/plots/animate.py new file mode 100644 index 0000000..24b51fa --- /dev/null +++ b/astetik/plots/animate.py @@ -0,0 +1,60 @@ +import glob +import os +import numpy as np +import matplotlib.pyplot as plt + +from IPython.display import clear_output + + +class Animation(): + + '''ANIMATED CHARTS + + Supports dataformat where x and y are compared against each other, and + label_col provides the title (e.g. year) for each frame in the animation. + + y and x are used for the legend labels when relevant. + + ''' + + def __init__(self, data, x, y, label_col, plot_type, filename='animation', + dpi=72, palette='default'): + + self.plot_type = plot_type + self.data = data + self.x = x + self.y = y + self.label_col = label_col + self.palette = palette + self.filename = filename + self.frames = len(data) + self.dpi = dpi + + # RUNTIME + self._ = self._create_plots() + self._ = self._create_gif() + + def _create_plots(self): + + for i in range(self.frames): + data = np.array([self.data[self.x][i], self.data[self.y][i]]).astype(int) + self.plot_type(data, + labels=[self.x, self.y], + palette=self.palette, + sub_title=self.data[self.label_col][i], + save=True, + dpi=self.dpi) + clear_output() + plt.show() + + def _create_gif(self): + + gif_name = self.filename + file_list = glob.glob('astetik_*.png') + list.sort(file_list, key=lambda x: int(x.split('_')[1].split('.png')[0])) + + with open('image_list.txt', 'w') as file: + for item in file_list: + file.write("%s\n" % item) + + os.system('convert @image_list.txt {}.gif'.format(gif_name)) # magick on windows diff --git a/astetik/plots/bar1.py b/astetik/plots/bar1.py new file mode 100644 index 0000000..5cd88d6 --- /dev/null +++ b/astetik/plots/bar1.py @@ -0,0 +1,140 @@ +# EXCEPTIONAL IMPORT # +import matplotlib +matplotlib.use('Agg') +# ENDS # + +import matplotlib.pyplot as plt +from matplotlib.pyplot import rcParams +import seaborn as sns + +from ..style.template import _header, _footer +from ..utils.utils import _limiter, _scaler +from ..utils.utils import factorplot_sizing +from ..style.titles import _titles + + +def bar1(data, + x, + y, + multi_color=False, + palette='default', + style='astetik', + dpi=72, + title='', + sub_title='', + x_label='', + y_label='', + legend=True, + x_scale='linear', + y_scale='linear', + x_limit='auto', + y_limit='auto', + save=False): + + '''BAR PLOT + + A 1-dimensional bar graph for the case where there is a single + value per label. + + Inputs: 2 + + 1. USE + ====== + ast.bar1d(data=patients, + x='icu_days', + y='insurance') + + 2. PARAMETERS + ============= + 2.1 INPUT PARAMETERS + -------------------- + data :: pandas dataframe + + x :: x-axis data (single value per label) + + y :: y-axis data (labels) + + -------------------- + 2.2. PLOT PARAMETERS + -------------------- + multi_color :: If True, label values will be used for hue. + + ---------------------- + 2.3. COMMON PARAMETERS + ---------------------- + palette :: One of the astetik palettes: + 'default' + 'colorblind' + 'blue_to_red' + 'blue_to_green' + 'red_to_green' + 'green_to_red' + 'violet_to_blue' + 'brown_to_green' + 'green_to_marine' + + Or use any cmap, seaborn or matplotlib + color or palette code, or hex value. + + style :: Use one of the three core styles: + 'astetik' # white + '538' # grey + 'solarized' # sepia + + Or alternatively use any matplotlib or seaborn + style definition. + + dpi :: the resolution of the plot (int value) + + title :: the title of the plot (string value) + + sub_title :: a secondary title to be shown below the title + + x_label :: string value for x-axis label + + y_label :: string value for y-axis label + + x_scale :: 'linear' or 'log' or 'symlog' + + y_scale :: 'linear' or 'log' or 'symlog' + + x_limit :: int or list with two ints + + y_limit :: int or list with two ints + + outliers :: Remove outliers using either 'zscore' or 'iqr' + ''' + + size, aspect = factorplot_sizing(data[x]) + + if multi_color == True: + n_colors = len(data[x].unique()) + else: + n_colors = 1 + + # HEADER STARTS >>> + palette = _header(palette, + style, + n_colors=n_colors, + dpi=dpi, + fig_height=None, + fig_width=None) + # <<< HEADER ENDS + p = sns.factorplot(data=data, + x=x, + y=y, + palette=palette, + aspect=aspect, + size=size, + kind='bar') + + # SCALING AND LIMITS STARTS >>> + if x_scale != 'linear' or y_scale != 'linear': + _scaler(p, x_scale, y_scale) + + # FOOTER STARTS >>> + _titles(title, sub_title=sub_title) + _footer(p, x_label, y_label, save=save) + + if data[x].min() < 0: + sns.despine(left=True) diff --git a/astetik/plots/bars.py b/astetik/plots/bars.py index c3cf922..84ac647 100644 --- a/astetik/plots/bars.py +++ b/astetik/plots/bars.py @@ -37,7 +37,7 @@ def bars(data, A multi-dimension bar plot that takes up to 5 features at a time. Inputs: 2 to 5 - Features: At least one continuous (or stepped) variable and results + Features: At least one continuous (or stepped) variable and rest can be categorical. 1. USE @@ -116,6 +116,8 @@ def bars(data, outliers :: Remove outliers using either 'zscore' or 'iqr' ''' + aspect = int(len(data[x].unique()) / 5) + if hue != None: n_colors = len(data[hue].unique()) else: @@ -137,6 +139,7 @@ def bars(data, col=col, col_wrap=col_wrap, palette=palette, + aspect=aspect, size=4, kind='bar') diff --git a/astetik/plots/corr.py b/astetik/plots/corr.py index e3b1732..5276452 100644 --- a/astetik/plots/corr.py +++ b/astetik/plots/corr.py @@ -133,4 +133,4 @@ def corr(data, _titles(title, sub_title=sub_title) _footer(p, x_label, y_label, save=save) - p.set_xticklabels(data, rotation=45) + p.set_xticklabels(data) diff --git a/astetik/plots/count.py b/astetik/plots/count.py index ee6c5dc..ca16059 100644 --- a/astetik/plots/count.py +++ b/astetik/plots/count.py @@ -7,6 +7,7 @@ def count(data, x, + sort=None, palette='default', style='astetik', dpi=72, @@ -44,7 +45,7 @@ def count(data, -------------------- 2.2. PLOT PARAMETERS -------------------- - None + sort :: If True, will be sorted based on input values. ---------------------- 2.3. COMMON PARAMETERS @@ -95,6 +96,9 @@ def count(data, aspect = int(len(data[x].unique()) / 5) + if sort != None: + sort = data[x].value_counts().index.values + # HEADER STARTS >>> palette = _header(palette, style, @@ -104,17 +108,18 @@ def count(data, fig_width=None) p = sns.factorplot(data=data, - x=x, + y=x, palette=palette, size=4, aspect=aspect, kind='count', legend=legend, - legend_out=False) + legend_out=False, + order=sort) # FOOTER _titles(title, sub_title) - _thousand_sep(p, p.ax) + _thousand_sep(p, p.ax, y_sep=False) _footer(p, x_label, y_label, save=save) p.set_xticklabels(None, rotation=90) diff --git a/astetik/plots/line.py b/astetik/plots/line.py index 46105e5..fca2e5d 100644 --- a/astetik/plots/line.py +++ b/astetik/plots/line.py @@ -1,6 +1,7 @@ import numpy as np +import pandas as pd import matplotlib.pyplot as plt -from matplotlib.dates import HourLocator +from matplotlib import ticker from ..style.formats import _thousand_sep from ..style.style import params @@ -164,7 +165,8 @@ def line(data, func=interval_func, freq=interval) - markers = ["o", "+", "x", "|", "-", ",", ".", "^", "v"] + markers = ["o", "+", "x", "|", "1", "8", "s", "p", + "o", "+", "x", "|", "1", "8", "s", "p"] # <<< END OF PLOT SPECIFIC # START OF HEADER >>> @@ -205,7 +207,8 @@ def line(data, # DATETIME FORMAT if time_frame != None: - date_handler(data[x[0]], ax, time_frame) + data[y] = pd.to_datetime(data[y]) + date_handler(data[y], ax, time_frame) # LIMITS if x_limit != None or y_limit != None: @@ -217,7 +220,6 @@ def line(data, _footer(p, x_label, y_label, save=save) if legend != False: - plt.legend(x, loc=1, ncol=lines) - if y != None: - hours = HourLocator() - ax.xaxis.set_minor_locator(hours) + plt.legend(x, loc=1, ncol=1, bbox_to_anchor=(1.15, 1.0)) + + ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=8)) diff --git a/astetik/plots/overlap.py b/astetik/plots/overlap.py index d10cc18..96e50f5 100644 --- a/astetik/plots/overlap.py +++ b/astetik/plots/overlap.py @@ -1,5 +1,6 @@ import seaborn as sns import matplotlib.pyplot as plt +import matplotlib.patches as mpatches from ..style.titles import _titles from ..style.template import _header @@ -12,6 +13,7 @@ def overlap(data, y, label_col, sort=None, + limit=None, transform_func=False, palette='default', style='astetik', @@ -67,6 +69,8 @@ def overlap(data, sort :: either True or False for ascending sort based on the x-axis data. + limit :: limit the number of items to be shown + transform_func :: If not False, the selected function such as 'mean' will be used to group by the label_col. Available functions: @@ -151,19 +155,25 @@ def overlap(data, sns.barplot(data=data, x=x, y=label_col, + orient='h', color=palette[0]) sns.barplot(data=data, x=y, y=label_col, + orient='h', color=palette[1]) # # # # PLOT ENDS # # # # + if legend != False: + x_patch = mpatches.Patch(color=palette[0], label=x) + y_patch = mpatches.Patch(color=palette[1], label=y) + ax.legend(handles=[x_patch, y_patch], ncol=1, loc="upper right", frameon=True) - ax.legend(ncol=2, loc="lower right", frameon=True) ax.set(ylabel=y_label, xlabel=x_label) sns.despine(bottom=True) ax.xaxis.set_major_locator(plt.MaxNLocator(5)) _thousand_sep(p, ax) - _titles(title, sub_title=sub_title) + if len(title) + len(sub_title) < 0: + _titles(title, sub_title=sub_title) diff --git a/astetik/plots/pie.py b/astetik/plots/pie.py index 1f9fe27..bf0332e 100644 --- a/astetik/plots/pie.py +++ b/astetik/plots/pie.py @@ -1,13 +1,15 @@ import pandas as pd import matplotlib.pyplot as plt +from ..style.style import params +from ..utils.exceptions import MissingLabel from ..style.titles import _titles from ..style.template import _header, _footer def pie(data, - x, - quantile_cut=None, + x=None, + labels=None, palette='default', style='astetik', dpi=72, @@ -24,7 +26,11 @@ def pie(data, '''PIE PLOT - A classic pie chart. + A classic pie chart. If dataframe is given as input, column sums + will be automatically used as values. If you want to use a given + row of data instead, indicate it using data.iloc[4] in the case of + wanting to use the 5th row. If array is given as input, provide labels + separately. Inputs: 1 Features: 1 categorical or continuous @@ -44,9 +50,7 @@ def pie(data, -------------------- 2.2. PLOT PARAMETERS -------------------- - quantile_cut :: An int value for the number of buckets data will be cut. - This will always yield an evenly split pie, and is useful - for showing the IQR ranges for a given feature. + NONE ---------------------- 2.3. COMMON PARAMETERS @@ -95,22 +99,35 @@ def pie(data, ''' - # PLOT SPECIFIC START >>> - if quantile_cut != None: - data = data.copy(deep=True) - data[x] = pd.qcut(data[x], quantile_cut) - n_colors = len(data[x].unique()) + if x != None: + + # more than single column + try: + data[x].shape[1] + labels = data[x].sum().index.values + data = data[x].sum().values + + # single column + except IndexError: + labels = data[x].value_counts().index.values + data = data[x].value_counts().values + else: - n_colors = len(x) - data = data.sort_values(x) - labels = data[x].value_counts().index.values - data = data[x].value_counts().values - # << PLOT SPECIFIC END + try: + data.shape[1] + labels = data.sum().index.values + data = data.sum().values + except IndexError or ValueError: + pass + + n_colors = len(data) # HEADER STARTS >>> palette = _header(palette, style, n_colors, dpi) # <<< HEADER ENDS + p, ax = plt.subplots(figsize=(8, 8)) + # # # # # # PLOT CODE STARTS # # # # # # p = plt.pie(x=data, colors=palette, @@ -121,8 +138,13 @@ def pie(data, # # # # # # PLOT CODE ENDS # # # # # # # LEGEND STARTS >>> - if legend != False: - plt.legend(p[0], labels, loc='center left', bbox_to_anchor=(1.1, 0.5)) + + try: + if legend != False: + plt.legend(p[0], labels, loc='center left', bbox_to_anchor=(1.1, 0.5)) + except TypeError: + MissingLabel("Looks like you didn't provide 'label' parameter for legend") + # <<< LEGEND ENDS # START OF TITLES >>> diff --git a/astetik/plots/scat.py b/astetik/plots/scat.py index 1694c2c..a9f9881 100644 --- a/astetik/plots/scat.py +++ b/astetik/plots/scat.py @@ -25,8 +25,8 @@ def scat(data, legend=True, x_scale='linear', y_scale='linear', - x_limit='auto', - y_limit='auto', + x_limit=None, + y_limit=None, outliers=False, save=False): @@ -117,6 +117,7 @@ def scat(data, n = len(data[hue].unique()) else: n = 1 + legend = False if size == None: size = 8 @@ -152,4 +153,4 @@ def scat(data, _thousand_sep(p, ax) _footer(p, x_label, y_label, legend, n, save) - p.set(xscale='linear') + ax.xaxis.set_major_locator(plt.MaxNLocator(5)) diff --git a/astetik/style/color_picker.py b/astetik/style/color_picker.py index 4b60a02..38a58d7 100644 --- a/astetik/style/color_picker.py +++ b/astetik/style/color_picker.py @@ -70,19 +70,54 @@ def color_picker(palette, center='light', n_colors=10, show=False): return out -def color_blind(): +def color_blind(mode='colorblind'): '''COLOR BLIND COLORS Provides a color palette that is colorblind friendly. ''' - colors = [[0, 0, 0], - [230/255, 159/255, 0], - [86/255, 180/255, 233/255], - [0, 158/255, 115/255], - [213/255, 94/255, 0], - [0, 114/255, 178/255]] + if mode == 'colorblind': + + colors = [[0, 0, 0], + [0, 73/255, 73/255], + [0, 146/255, 146/255], + [255/255, 109/255, 182/255], + [255/255, 182/255, 119/255], + [73/255, 0, 146/255], + [0, 109/255, 219/255], + [182/255, 109/255, 255/255], + [109/255, 182/255, 255/255], + [182/255, 219/255, 255/255], + [146/255, 0, 0], + [146/255, 73/255, 0], + [219/255, 209/255, 0], + [36/255, 255/255, 36/255], + [255/255, 255/255, 109/255]] + + elif mode == 'colorblind6': + colors = [[0, 0, 0], + [230/255, 159/255, 0], + [86/255, 180/255, 233/255], + [0, 158/255, 115/255], + [213/255, 94/255, 0], + [0, 114/255, 178/255]] + + elif mode == 'colorblind1': + colors = [[222/255, 188/255, 146/255], + [251/255, 175/255, 148/255], + [131/255, 215/255, 142/255], + [225/255, 191/255, 147/255], + [250/255, 109/255, 81/255], + [101/255, 170/255, 53/255], + [204/255, 146/255, 68/255], + [221/255, 51/255, 48/255], + [95/255, 130/255, 24/255], + [149/255, 115/255, 32/255], + [164/255, 23/255, 30/255], + [61/255, 105/255, 22/255], + [119/255, 81/255, 24/255]] + return colors diff --git a/astetik/style/random_colors.py b/astetik/style/random_colors.py new file mode 100644 index 0000000..ff7fcbb --- /dev/null +++ b/astetik/style/random_colors.py @@ -0,0 +1,14 @@ +import numpy as np + + +def randomcolor(): + + ''' PICKS COLORS RANDOMLY + + ''' + + colors = [] + for i in range(20): + colors.append(list((np.random.randint(0, 255, 3) / 255))) + + return colors diff --git a/astetik/style/template.py b/astetik/style/template.py index 6f32199..ec86bb9 100644 --- a/astetik/style/template.py +++ b/astetik/style/template.py @@ -1,4 +1,5 @@ import time +from datetime import datetime import seaborn as sns import matplotlib.pyplot as plt @@ -6,6 +7,7 @@ from ..style.color_picker import color_picker, color_blind, _label_to_hex from ..utils.utils import _n_decider from ..style.style import styles, default_colors +from ..style.random_colors import randomcolor def _header(palette, @@ -15,13 +17,20 @@ def _header(palette, fig_width=None, fig_height=None): - if style != 'astetik': + if palette == 'random': + palette = randomcolor() + + elif style != 'astetik': plt.style.use(style) n = _n_decider(n_colors) - if palette == 'colorblind': - palette = color_blind() + try: + if palette.startswith('colorblind'): + palette = color_blind(palette) + except AttributeError: + palette = palette + else: try: palette = color_picker(palette=palette, n_colors=n) @@ -68,6 +77,8 @@ def _footer(p, # SAVING THE PLOT if save != False: - time_stamp = time.strftime('%Y%m%d_%H%M%S') + + dt = datetime.now() + time_stamp = time.strftime('%Y%m%d_%H%M%S_' + str(dt.microsecond)) filename = "astetik_" + time_stamp + ".png" plt.savefig(filename, dpi=72) diff --git a/astetik/style/titles.py b/astetik/style/titles.py index 9fc4791..6f25bce 100644 --- a/astetik/style/titles.py +++ b/astetik/style/titles.py @@ -29,13 +29,13 @@ def _titles(title, NOTE: At the moment works with one dimensional data. ''' - - title = title.replace(' ', '\,') - - plt.title(r"$\bf{" + title + "}$" + '\n' + sub_title, - loc=location, - fontsize=fontsize, - fontname=fontname, - weight='normal', - y=1.03, - color="grey"); + if len(title) + len(sub_title) > 0: + title = title.replace(' ', '\,') + + plt.title(r"$\bf{" + title + "}$" + '\n' + sub_title, + loc=location, + fontsize=fontsize, + fontname=fontname, + weight='normal', + y=1.03, + color="grey"); diff --git a/astetik/utils/datetime.py b/astetik/utils/datetime.py index 7f2fc82..8371319 100644 --- a/astetik/utils/datetime.py +++ b/astetik/utils/datetime.py @@ -2,17 +2,23 @@ from matplotlib.dates import HourLocator, DayLocator, MonthLocator, YearLocator, MinuteLocator, SecondLocator from matplotlib.dates import DateFormatter +from matplotlib import ticker -def _time_freq(time_data, divider1, divider2, ): - out = round((time_data.time_stamp.max() - time_data.time_stamp.min()).days / divider2) - return round(out / divider1) +def _time_freq(time_data, divider1, divider2): + + out = (time_data.max() - time_data.min()).days + print(out) + out = round(out / divider2 / divider1) + print(out) + return out def date_handler(time_data, ax, time_frame): + ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=8)) + if time_frame == 'year': - ax.xaxis.set_major_locator(YearLocator(_time_freq(time_data, 10, 365))) ax.xaxis.set_major_formatter(DateFormatter('%Y')) elif time_frame == 'month': diff --git a/astetik/utils/exceptions.py b/astetik/utils/exceptions.py index 8dc2c86..7139ca2 100644 --- a/astetik/utils/exceptions.py +++ b/astetik/utils/exceptions.py @@ -1,2 +1,5 @@ class MissingParameter(Exception): """This error is raised when something goes wrong due to missing parameter""" + +class MissingLabel(Exception): + """This error is raised when something goes wrong due to missing labels""" diff --git a/astetik/utils/load_data.py b/astetik/utils/load_data.py new file mode 100644 index 0000000..32447ae --- /dev/null +++ b/astetik/utils/load_data.py @@ -0,0 +1,8 @@ +import pandas as pd + + +def read(): + + def csv(filename): + out = pd.read_csv(filename, sep=',', encoding='latin-1', error_bad_lines=True) + return out diff --git a/astetik/utils/transform.py b/astetik/utils/transform.py index 78e85e8..355eb5f 100644 --- a/astetik/utils/transform.py +++ b/astetik/utils/transform.py @@ -135,21 +135,26 @@ def mean_zero(data, retain=None): ''' # avoiding transformation of y, labels, etc - if retain is not None: - col_temp = pd.DataFrame(data[retain]) - data = data.drop(retain, axis=1) - # storing the temp values - data_mean = data.mean(axis=0) - data_std = data.std(axis=0) + data = data.copy(deep=True) - # transforming the data - data = data - data_mean - data = data / data_std + try: + col_list = list(data.columns) + except AttributeError: + col_list = list(pd.DataFrame(data.columns)) - # putting retained cols as first columns if retain is not None: - data = pd.merge(col_temp, data, left_index=True, right_index=True) + col_list.remove(retain) + + for col in col_list: + + # storing the temp values + data_mean = data[col].mean(axis=0) + data_std = data[col].std(axis=0) + + # transforming the data + col_data = data[col] - data_mean + data[col] = col_data / data_std return data @@ -172,3 +177,26 @@ def _groupby(data, by, func): temp = data.groupby(by) return groupby_func(data=temp, func=func) + + +def boolcols_to_cat(data, labels, other_label='NA'): + + '''CONVERT BOOLEAN COLS TO CATEGORICAL + + Returns a single categorical label seqeuence + that is produced from 2 or more boolean columns. + + ''' + + c = len(data) + l = ['~'] * c + + for i in range(c): + for label in labels: + if data[label][i] == True: + l[i] = label + break + + l = [label.replace('~', other_label) for label in l] + + return l diff --git a/astetik/utils/utils.py b/astetik/utils/utils.py index 43f1fe9..26f4f84 100644 --- a/astetik/utils/utils.py +++ b/astetik/utils/utils.py @@ -287,3 +287,25 @@ def multicol_transform(transform, data, x=None, y=None, func=None, freq=None): out[y] = temp[y] return out + + +def factorplot_sizing(data): + + '''COMPUTE SIZE AND ASPECT + + This is for the sole purpose of standardizing + the factorplot sizing so that bar thickness + is same regardless of the number of bars in the figure. + ''' + items = len(data) + + # choose these first + width = 9 # larger the wider + thickness = 3 # smaller the thicker + + # compute the values + value = items + 2.5 + size = value / thickness + aspect = (width / size) + + return size, aspect