-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
27,921 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
"hw1","hw2","hw3","hw4","midterm","hw5","hw6","hw7","final" | ||
"95","88","100","95","80","96","99","0","103" | ||
"0","74","74","0","53","83","97","0","79" | ||
"100","0","105","100","91","96","100","96","122" | ||
"0","90","76","100","63","91","95","0","78" | ||
"100","96","99","100","91","93","100","92","135" | ||
"90","83","95","100","73","89","100","90","117" | ||
"95","98","100","100","59","98","98","94","135" | ||
"80","100","97","100","69","94","98","101","123" | ||
"95","90","98","90","78","95","99","100","109" | ||
"90","94","95","98","91","94","100","89","126" | ||
"100","100","99","100","79","99","100","103","124" | ||
"100","95","87","100","81","0","100","84","126" | ||
"95","71","95","98","71","94","100","97","130" | ||
"100","93","97","80","74","95","98","90","117" | ||
"100","95","92","100","76","95","99","97","133" | ||
"100","100","100","95","80","94","100","99","118" | ||
"100","97","92","100","89","89","100","90","127" | ||
"100","95","100","100","91","98","100","0","118" | ||
"100","100","91","100","90","94","100","97","138" | ||
"85","90","100","90","93","90","98","100","143" | ||
"95","100","99","95","96","98","100","95","135" | ||
"100","73","92","100","88","90","98","0","129" | ||
"75","90","91","78","90","86","100","92","91" | ||
"95","100","99","100","79","92","99","0","95" | ||
"95","0","105","100","81","95","98","89","121" | ||
"100","100","98","100","83","97","100","102","131" | ||
"100","99","91","100","94","0","100","92","144" | ||
"100","98","97","100","70","91","100","95","112" | ||
"0","90","85","100","79","94","99","89","132" | ||
"80","0","74","0","74","15","97","16","121" | ||
"100","95","98","100","84","96","99","0","121" | ||
"90","96","97","95","79","0","99","89","126" | ||
"95","94","93","95","91","99","100","0","116" | ||
"100","100","98","100","77","98","100","92","112" | ||
"90","85","91","94","65","95","100","96","114" | ||
"100","95","100","100","76","92","100","0","121" | ||
"100","85","99","95","70","90","0","0","126" | ||
"90","100","100","95","80","98","100","90","122" | ||
"70","81","95","100","77","90","99","86","118" | ||
"100","99","99","98","89","96","99","0","123" | ||
"0","56","95","100","70","88","98","94","87" | ||
"70","95","92","100","88","96","100","98","136" | ||
"95","95","89","95","71","91","100","99","120" | ||
"95","85","97","95","66","96","99","90","140" | ||
"100","98","95","98","81","98","100","96","125" | ||
"90","90","98","94","67","93","100","93","86" | ||
"100","0","0","77","81","0","0","85","145" | ||
"85","93","97","95","79","92","99","97","128" | ||
"90","86","97","90","68","92","100","100","121" | ||
"100","61","94","95","77","92","98","98","98" | ||
"100","100","97","100","70","99","0","97","134" | ||
"95","94","80","93","75","94","98","94","99" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
""" | ||
Imports and helpful functions that we use in DSC 80 lectures. Use `make | ||
setup-lec` to copy this (and custom-rise-styles.css) to the lecture folders. | ||
Usage: | ||
from dsc80_utils import * | ||
""" | ||
import pandas as pd | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
from matplotlib_inline.backend_inline import set_matplotlib_formats | ||
from IPython.display import display, IFrame, HTML | ||
|
||
import plotly | ||
import plotly.figure_factory as ff | ||
import plotly.graph_objects as go | ||
import plotly.express as px | ||
from plotly.subplots import make_subplots | ||
import plotly.io as pio | ||
pio.renderers.default = "notebook" | ||
|
||
# DSC 80 preferred styles | ||
pio.templates["dsc80"] = go.layout.Template( | ||
layout=dict( | ||
margin=dict(l=30, r=30, t=30, b=30), | ||
autosize=True, | ||
width=600, | ||
height=400, | ||
xaxis=dict(showgrid=True), | ||
yaxis=dict(showgrid=True), | ||
title=dict(x=0.5, xanchor="center"), | ||
) | ||
) | ||
pio.templates.default = "simple_white+dsc80" | ||
|
||
set_matplotlib_formats("svg") | ||
sns.set_context("poster") | ||
sns.set_style("whitegrid") | ||
plt.rcParams["figure.figsize"] = (10, 5) | ||
|
||
# display options for numpy and pandas | ||
np.set_printoptions(threshold=20, precision=2, suppress=True) | ||
pd.set_option("display.max_rows", 7) | ||
pd.set_option("display.max_columns", 8) | ||
pd.set_option("display.precision", 2) | ||
|
||
# Use plotly as default plotting engine | ||
pd.options.plotting.backend = "plotly" | ||
|
||
|
||
def display_df( | ||
df, rows=pd.options.display.max_rows, cols=pd.options.display.max_columns | ||
): | ||
"""Displays n rows and cols from df""" | ||
with pd.option_context( | ||
"display.max_rows", rows, "display.max_columns", cols | ||
): | ||
display(df) | ||
|
||
|
||
def dfs_side_by_side(*dfs): | ||
""" | ||
Displays two or more dataframes side by side. | ||
""" | ||
display( | ||
HTML( | ||
f""" | ||
<div style="display: flex; gap: 1rem;"> | ||
{''.join(df.to_html() for df in dfs)} | ||
</div> | ||
""" | ||
) | ||
) | ||
|
||
from pathlib import Path | ||
|
||
# The stuff below is for Lecture 7/8. | ||
def create_kde_plotly(df, group_col, group1, group2, vals_col, title=''): | ||
fig = ff.create_distplot( | ||
hist_data=[df.loc[df[group_col] == group1, vals_col], df.loc[df[group_col] == group2, vals_col]], | ||
group_labels=[group1, group2], | ||
show_rug=False, show_hist=False | ||
) | ||
return fig.update_layout(title=title) | ||
|
||
def multiple_hists(df_map, histnorm="probability", title=""): | ||
values = [df_map[df_name]["child"].dropna() for df_name in df_map] | ||
all_sets = pd.concat(values, keys=list(df_map.keys())) | ||
all_sets = all_sets.reset_index()[["level_0", "child"]].rename( | ||
columns={"level_0": "dataset"} | ||
) | ||
fig = px.histogram( | ||
all_sets, | ||
color="dataset", | ||
x="child", | ||
barmode="overlay", | ||
histnorm=histnorm, | ||
) | ||
fig.update_layout(title=title) | ||
return fig | ||
|
||
|
||
def multiple_kdes(df_map, title=""): | ||
values = [df_map[key]["child"].dropna() for key in df_map] | ||
labels = list(df_map.keys()) | ||
fig = ff.create_distplot( | ||
hist_data=values, | ||
group_labels=labels, | ||
show_rug=False, | ||
show_hist=False, | ||
colors=px.colors.qualitative.Dark2[: len(df_map)], | ||
) | ||
return fig.update_layout(title=title).update_xaxes(title="child") | ||
|
||
def multiple_describe(df_map): | ||
out = pd.DataFrame( | ||
columns=["Dataset", "Mean", "Standard Deviation"] | ||
).set_index("Dataset") | ||
for key in df_map: | ||
out.loc[key] = df_map[key]["child"].apply(["mean", "std"]).to_numpy() | ||
return out | ||
|
||
def make_mcar(data, col, pct=0.5): | ||
"""Create MCAR from complete data""" | ||
missing = data.copy() | ||
idx = data.sample(frac=pct, replace=False).index | ||
missing.loc[idx, col] = np.NaN | ||
return missing | ||
|
||
|
||
def make_mar_on_cat(data, col, dep_col, pct=0.5): | ||
"""Create MAR from complete data. The dependency is | ||
created on dep_col, which is assumed to be categorical. | ||
This is only *one* of many ways to create MAR data. | ||
For the lecture examples only.""" | ||
|
||
missing = data.copy() | ||
# pick one value to blank out a lot | ||
high_val = np.random.choice(missing[dep_col].unique()) | ||
weights = missing[dep_col].apply(lambda x: 0.9 if x == high_val else 0.1) | ||
idx = data.sample(frac=pct, replace=False, weights=weights).index | ||
missing.loc[idx, col] = np.NaN | ||
|
||
return missing | ||
|
||
|
||
def make_mar_on_num(data, col, dep_col, pct=0.5): | ||
"""Create MAR from complete data. The dependency is | ||
created on dep_col, which is assumed to be numeric. | ||
This is only *one* of many ways to create MAR data. | ||
For the lecture examples only.""" | ||
|
||
thresh = np.percentile(data[dep_col], 50) | ||
|
||
def blank_above_middle(val): | ||
if val >= thresh: | ||
return 0.75 | ||
else: | ||
return 0.25 | ||
|
||
missing = data.copy() | ||
weights = missing[dep_col].apply(blank_above_middle) | ||
idx = missing.sample(frac=pct, replace=False, weights=weights).index | ||
|
||
missing.loc[idx, col] = np.NaN | ||
return missing |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.