Skip to content

Commit

Permalink
Update 05-16
Browse files Browse the repository at this point in the history
  • Loading branch information
SamLau95 committed May 16, 2024
1 parent fa64de2 commit a62369c
Show file tree
Hide file tree
Showing 12 changed files with 27,921 additions and 0 deletions.
2 changes: 2 additions & 0 deletions _modules/week-07.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ days:
- name: LEC 13
type: lecture
title: Linear Regression
blank: resources/lectures/lec13/lec13-live.html
filled: resources/lectures/lec13/lec13.html
reading: '[Ch. 15.0-15.6](https://learningds.org/ch/15/linear_intro.html)'
- date: '2024-05-17'
events:
Expand Down
53 changes: 53 additions & 0 deletions resources/lectures/lec13/data/gradesW4315.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"hw1","hw2","hw3","hw4","midterm","hw5","hw6","hw7","final"
"95","88","100","95","80","96","99","0","103"
"0","74","74","0","53","83","97","0","79"
"100","0","105","100","91","96","100","96","122"
"0","90","76","100","63","91","95","0","78"
"100","96","99","100","91","93","100","92","135"
"90","83","95","100","73","89","100","90","117"
"95","98","100","100","59","98","98","94","135"
"80","100","97","100","69","94","98","101","123"
"95","90","98","90","78","95","99","100","109"
"90","94","95","98","91","94","100","89","126"
"100","100","99","100","79","99","100","103","124"
"100","95","87","100","81","0","100","84","126"
"95","71","95","98","71","94","100","97","130"
"100","93","97","80","74","95","98","90","117"
"100","95","92","100","76","95","99","97","133"
"100","100","100","95","80","94","100","99","118"
"100","97","92","100","89","89","100","90","127"
"100","95","100","100","91","98","100","0","118"
"100","100","91","100","90","94","100","97","138"
"85","90","100","90","93","90","98","100","143"
"95","100","99","95","96","98","100","95","135"
"100","73","92","100","88","90","98","0","129"
"75","90","91","78","90","86","100","92","91"
"95","100","99","100","79","92","99","0","95"
"95","0","105","100","81","95","98","89","121"
"100","100","98","100","83","97","100","102","131"
"100","99","91","100","94","0","100","92","144"
"100","98","97","100","70","91","100","95","112"
"0","90","85","100","79","94","99","89","132"
"80","0","74","0","74","15","97","16","121"
"100","95","98","100","84","96","99","0","121"
"90","96","97","95","79","0","99","89","126"
"95","94","93","95","91","99","100","0","116"
"100","100","98","100","77","98","100","92","112"
"90","85","91","94","65","95","100","96","114"
"100","95","100","100","76","92","100","0","121"
"100","85","99","95","70","90","0","0","126"
"90","100","100","95","80","98","100","90","122"
"70","81","95","100","77","90","99","86","118"
"100","99","99","98","89","96","99","0","123"
"0","56","95","100","70","88","98","94","87"
"70","95","92","100","88","96","100","98","136"
"95","95","89","95","71","91","100","99","120"
"95","85","97","95","66","96","99","90","140"
"100","98","95","98","81","98","100","96","125"
"90","90","98","94","67","93","100","93","86"
"100","0","0","77","81","0","0","85","145"
"85","93","97","95","79","92","99","97","128"
"90","86","97","90","68","92","100","100","121"
"100","61","94","95","77","92","98","98","98"
"100","100","97","100","70","99","0","97","134"
"95","94","80","93","75","94","98","94","99"
168 changes: 168 additions & 0 deletions resources/lectures/lec13/dsc80_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""
Imports and helpful functions that we use in DSC 80 lectures. Use `make
setup-lec` to copy this (and custom-rise-styles.css) to the lecture folders.
Usage:
from dsc80_utils import *
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib_inline.backend_inline import set_matplotlib_formats
from IPython.display import display, IFrame, HTML

import plotly
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default = "notebook"

# DSC 80 preferred styles
pio.templates["dsc80"] = go.layout.Template(
layout=dict(
margin=dict(l=30, r=30, t=30, b=30),
autosize=True,
width=600,
height=400,
xaxis=dict(showgrid=True),
yaxis=dict(showgrid=True),
title=dict(x=0.5, xanchor="center"),
)
)
pio.templates.default = "simple_white+dsc80"

set_matplotlib_formats("svg")
sns.set_context("poster")
sns.set_style("whitegrid")
plt.rcParams["figure.figsize"] = (10, 5)

# display options for numpy and pandas
np.set_printoptions(threshold=20, precision=2, suppress=True)
pd.set_option("display.max_rows", 7)
pd.set_option("display.max_columns", 8)
pd.set_option("display.precision", 2)

# Use plotly as default plotting engine
pd.options.plotting.backend = "plotly"


def display_df(
df, rows=pd.options.display.max_rows, cols=pd.options.display.max_columns
):
"""Displays n rows and cols from df"""
with pd.option_context(
"display.max_rows", rows, "display.max_columns", cols
):
display(df)


def dfs_side_by_side(*dfs):
"""
Displays two or more dataframes side by side.
"""
display(
HTML(
f"""
<div style="display: flex; gap: 1rem;">
{''.join(df.to_html() for df in dfs)}
</div>
"""
)
)

from pathlib import Path

# The stuff below is for Lecture 7/8.
def create_kde_plotly(df, group_col, group1, group2, vals_col, title=''):
fig = ff.create_distplot(
hist_data=[df.loc[df[group_col] == group1, vals_col], df.loc[df[group_col] == group2, vals_col]],
group_labels=[group1, group2],
show_rug=False, show_hist=False
)
return fig.update_layout(title=title)

def multiple_hists(df_map, histnorm="probability", title=""):
values = [df_map[df_name]["child"].dropna() for df_name in df_map]
all_sets = pd.concat(values, keys=list(df_map.keys()))
all_sets = all_sets.reset_index()[["level_0", "child"]].rename(
columns={"level_0": "dataset"}
)
fig = px.histogram(
all_sets,
color="dataset",
x="child",
barmode="overlay",
histnorm=histnorm,
)
fig.update_layout(title=title)
return fig


def multiple_kdes(df_map, title=""):
values = [df_map[key]["child"].dropna() for key in df_map]
labels = list(df_map.keys())
fig = ff.create_distplot(
hist_data=values,
group_labels=labels,
show_rug=False,
show_hist=False,
colors=px.colors.qualitative.Dark2[: len(df_map)],
)
return fig.update_layout(title=title).update_xaxes(title="child")

def multiple_describe(df_map):
out = pd.DataFrame(
columns=["Dataset", "Mean", "Standard Deviation"]
).set_index("Dataset")
for key in df_map:
out.loc[key] = df_map[key]["child"].apply(["mean", "std"]).to_numpy()
return out

def make_mcar(data, col, pct=0.5):
"""Create MCAR from complete data"""
missing = data.copy()
idx = data.sample(frac=pct, replace=False).index
missing.loc[idx, col] = np.NaN
return missing


def make_mar_on_cat(data, col, dep_col, pct=0.5):
"""Create MAR from complete data. The dependency is
created on dep_col, which is assumed to be categorical.
This is only *one* of many ways to create MAR data.
For the lecture examples only."""

missing = data.copy()
# pick one value to blank out a lot
high_val = np.random.choice(missing[dep_col].unique())
weights = missing[dep_col].apply(lambda x: 0.9 if x == high_val else 0.1)
idx = data.sample(frac=pct, replace=False, weights=weights).index
missing.loc[idx, col] = np.NaN

return missing


def make_mar_on_num(data, col, dep_col, pct=0.5):
"""Create MAR from complete data. The dependency is
created on dep_col, which is assumed to be numeric.
This is only *one* of many ways to create MAR data.
For the lecture examples only."""

thresh = np.percentile(data[dep_col], 50)

def blank_above_middle(val):
if val >= thresh:
return 0.75
else:
return 0.25

missing = data.copy()
weights = missing[dep_col].apply(blank_above_middle)
idx = missing.sample(frac=pct, replace=False, weights=weights).index

missing.loc[idx, col] = np.NaN
return missing
Binary file added resources/lectures/lec13/imgs/constant-convo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit a62369c

Please sign in to comment.