diff --git a/.nojekyll b/.nojekyll
index 8a7a775..4e2bbb8 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-6339edee
\ No newline at end of file
+b46b290e
\ No newline at end of file
diff --git a/exams/2023-midterm.html b/exams/2023-midterm.html
index 71db11b..6a5799c 100644
--- a/exams/2023-midterm.html
+++ b/exams/2023-midterm.html
@@ -288,9 +288,9 @@
Tasks
}
df_means
-## y mean
-## 1 Group 1 9.950203
-## 2 Group 2 19.922667
+## y mean
+## 1 Group 1 10.08756
+## 2 Group 2 20.01365
# Demonstration of na.rm
mean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs
diff --git a/exams/2024-midterm.html b/exams/2024-midterm.html
index 48ff79d..1495fcc 100644
--- a/exams/2024-midterm.html
+++ b/exams/2024-midterm.html
@@ -289,9 +289,9 @@ Tasks
}
df_means
-## y mean
-## 1 Group 1 10.08169
-## 2 Group 2 20.04041
+## y mean
+## 1 Group 1 9.909545
+## 2 Group 2 20.083487
# Demonstration of na.rm
mean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs
diff --git a/figs/calendar.pdf b/figs/calendar.pdf
index 8e83d57..b92474c 100644
Binary files a/figs/calendar.pdf and b/figs/calendar.pdf differ
diff --git a/index.html b/index.html
index 9f3542d..25e6751 100644
--- a/index.html
+++ b/index.html
@@ -246,7 +246,7 @@ Course Materials
-
+
May 10, 2024
|
@@ -260,7 +260,7 @@ Course Materials
-
+
May 8, 2024
|
@@ -288,7 +288,7 @@ Course Materials
-
+
May 3, 2024
|
@@ -316,7 +316,7 @@ Course Materials
-
+
Apr 26, 2024
|
@@ -344,7 +344,7 @@ Course Materials
-
+
Apr 19, 2024
|
@@ -358,7 +358,7 @@ Course Materials
-
+
Apr 17, 2024
|
@@ -386,7 +386,7 @@ Course Materials
-
+
Apr 12, 2024
|
@@ -400,7 +400,7 @@ Course Materials
-
+
Apr 10, 2024
|
@@ -428,7 +428,7 @@ Course Materials
-
+
Apr 5, 2024
|
@@ -442,7 +442,7 @@ Course Materials
-
+
Apr 2, 2024
|
@@ -484,7 +484,7 @@ Course Materials
-
+
Mar 29, 2024
|
@@ -498,7 +498,7 @@ Course Materials
-
+
Mar 27, 2024
|
@@ -526,7 +526,7 @@ Course Materials
-
+
Mar 22, 2024
|
@@ -540,7 +540,7 @@ Course Materials
-
+
Mar 20, 2024
|
@@ -568,7 +568,7 @@ Course Materials
-
+
Mar 7, 2024
|
@@ -596,7 +596,7 @@ Course Materials
-
+
Mar 1, 2024
|
@@ -610,7 +610,7 @@ Course Materials
-
+
Mar 1, 2024
|
@@ -624,7 +624,7 @@ Course Materials
-
+
Feb 28, 2024
|
@@ -652,7 +652,7 @@ Course Materials
-
+
Feb 23, 2024
|
@@ -666,7 +666,7 @@ Course Materials
-
+
Feb 21, 2024
|
@@ -694,7 +694,7 @@ Course Materials
-
+
Feb 16, 2024
|
@@ -708,7 +708,7 @@ Course Materials
-
+
Feb 14, 2024
|
@@ -750,7 +750,7 @@ Course Materials
-
+
Feb 9, 2024
|
@@ -764,7 +764,7 @@ Course Materials
-
+
Feb 7, 2024
|
@@ -806,7 +806,7 @@ Course Materials
-
+
Feb 2, 2024
|
@@ -820,7 +820,7 @@ Course Materials
-
+
Jan 31, 2024
|
@@ -862,7 +862,7 @@ Course Materials
-
+
Jan 24, 2024
|
@@ -876,7 +876,7 @@ Course Materials
-
+
Jan 21, 2024
|
diff --git a/search.json b/search.json
index fe43ccd..bb616db 100644
--- a/search.json
+++ b/search.json
@@ -739,7 +739,7 @@
"href": "exams/2023-midterm.html#tasks",
"title": "Practice Midterm",
"section": "Tasks",
- "text": "Tasks\n\nRead in the data and create a data frame that you will work with for this exam.\nCreate a new column variable, decade, in your data frame.\n\n\nYou will need to take the response year and truncate it to the decade, so that 1972 becomes 1970 and 1989 becomes 1980. You can use a series of logical statements if you want, but it may be more effective to find a numerical function or combination of functions that will perform the operation you want.\nfloor() and math.floor() in R and python respectively are good places to start.\nCreate a scatterplot (use geom_point) of your happy year vs decade to show that your approach succeeded.\n\n\nCreate a new data set by iterating through each year to find the proportion of people who are very happy. Use a for loop. Using your new data frame, plot the proportion of very happy people over time.\nNote: You may have to pass an argument to the mean function to tell it to exclude missing values from the calculation, such as na.rm or skipna. Or, you can remove the NAs from happy using a function like na.omit or dropna, but be careful to only drop rows with an NA in variables we care about, like happy or year.\n\nThe code below provides an example of how to create a summary dataset and handle NAs in R and python. You may modify this code to help you answer part 3.\n\n# Create sample data\ndf <- data.frame(x = c(rnorm(100, 10), rnorm(100, 20)),\n y = rep(c(\"Group 1\", \"Group 2\"), each = 100))\n\ndf_means <- data.frame(y = NULL, mean = NULL)\n\n# For each y group, what is the mean of x?\nfor (i in unique(df$y)) {\n sub_df <- subset(df, y == i)\n df_means <- rbind(df_means, \n data.frame(y = i, mean = mean(sub_df$x, na.rm = T)))\n}\n\ndf_means\n## y mean\n## 1 Group 1 9.950203\n## 2 Group 2 19.922667\n\n# Demonstration of na.rm\nmean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs\n## [1] 2\nmean(c(NA, 1, 2, 3), na.rm = F) # Don't remove NAs\n## [1] NA\n\n\nimport pandas as pd\nimport numpy as np\n\n# Create a new data frame\ndf = pd.DataFrame({\n 'y': np.repeat(['Group1', 'Group2'], (100, 100)), \n 'x': np.concatenate((np.random.normal(loc = 10, size = 100), np.random.normal(loc = 12, size = 100)), axis = None)\n })\n\n# Create an empty dataframe\ndf_means = pd.DataFrame(columns = ['y', 'mean'])\n\n# For each age, how many values?\nfor i in np.unique(df.y):\n # Create the subset\n df_sub = df.loc[df.y == i]\n # Drop NAs from the data frame\n # This step isn't necessary because mean() uses skipna = T by default\n # df_sub = df_sub.dropna(subset = ['x', 'y']) \n # Add a new row to the end of df_means\n df_means.loc[len(df_means.index)] = [i, df_sub.x.mean()]\n\n\n# Demonstrating skipna parameter of mean\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = True)\n## 2.0\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = False)\n## nan"
+ "text": "Tasks\n\nRead in the data and create a data frame that you will work with for this exam.\nCreate a new column variable, decade, in your data frame.\n\n\nYou will need to take the response year and truncate it to the decade, so that 1972 becomes 1970 and 1989 becomes 1980. You can use a series of logical statements if you want, but it may be more effective to find a numerical function or combination of functions that will perform the operation you want.\nfloor() and math.floor() in R and python respectively are good places to start.\nCreate a scatterplot (use geom_point) of your happy year vs decade to show that your approach succeeded.\n\n\nCreate a new data set by iterating through each year to find the proportion of people who are very happy. Use a for loop. Using your new data frame, plot the proportion of very happy people over time.\nNote: You may have to pass an argument to the mean function to tell it to exclude missing values from the calculation, such as na.rm or skipna. Or, you can remove the NAs from happy using a function like na.omit or dropna, but be careful to only drop rows with an NA in variables we care about, like happy or year.\n\nThe code below provides an example of how to create a summary dataset and handle NAs in R and python. You may modify this code to help you answer part 3.\n\n# Create sample data\ndf <- data.frame(x = c(rnorm(100, 10), rnorm(100, 20)),\n y = rep(c(\"Group 1\", \"Group 2\"), each = 100))\n\ndf_means <- data.frame(y = NULL, mean = NULL)\n\n# For each y group, what is the mean of x?\nfor (i in unique(df$y)) {\n sub_df <- subset(df, y == i)\n df_means <- rbind(df_means, \n data.frame(y = i, mean = mean(sub_df$x, na.rm = T)))\n}\n\ndf_means\n## y mean\n## 1 Group 1 10.08756\n## 2 Group 2 20.01365\n\n# Demonstration of na.rm\nmean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs\n## [1] 2\nmean(c(NA, 1, 2, 3), na.rm = F) # Don't remove NAs\n## [1] NA\n\n\nimport pandas as pd\nimport numpy as np\n\n# Create a new data frame\ndf = pd.DataFrame({\n 'y': np.repeat(['Group1', 'Group2'], (100, 100)), \n 'x': np.concatenate((np.random.normal(loc = 10, size = 100), np.random.normal(loc = 12, size = 100)), axis = None)\n })\n\n# Create an empty dataframe\ndf_means = pd.DataFrame(columns = ['y', 'mean'])\n\n# For each age, how many values?\nfor i in np.unique(df.y):\n # Create the subset\n df_sub = df.loc[df.y == i]\n # Drop NAs from the data frame\n # This step isn't necessary because mean() uses skipna = T by default\n # df_sub = df_sub.dropna(subset = ['x', 'y']) \n # Add a new row to the end of df_means\n df_means.loc[len(df_means.index)] = [i, df_sub.x.mean()]\n\n\n# Demonstrating skipna parameter of mean\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = True)\n## 2.0\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = False)\n## nan"
},
{
"objectID": "exams/2023-midterm.html#solutions",
@@ -781,7 +781,7 @@
"href": "exams/2024-midterm.html#tasks",
"title": "2024 Midterm",
"section": "Tasks",
- "text": "Tasks\n\nRead in the data and create a data frame that you will work with for this exam.\nCreate a new column variable, decade, in your data frame.\n\n\nYou will need to take the response year and truncate it to the decade, so that 1972 becomes 1970 and 1989 becomes 1980. You can use a series of logical statements if you want, but it may be more effective to find a numerical function or combination of functions that will perform the operation you want.\nfloor() and math.floor() in R and python respectively are good places to start.\nCreate a scatterplot (use geom_point) of your happy year vs decade to show that your approach succeeded.\n\n\nCreate a new data set by iterating through each year to find the proportion of people who are very happy. Use a for loop. Using your new data frame, plot the proportion of very happy people over time.\nNote: You may have to pass an argument to the mean function to tell it to exclude missing values from the calculation, such as na.rm or skipna. Or, you can remove the NAs from happy using a function like na.omit or dropna, but be careful to only drop rows with an NA in variables we care about, like happy or year.\n\nThe code below provides an example of how to create a summary dataset and handle NAs in R and python. You may modify this code to help you answer part 3.\n\n# Create sample data\ndf <- data.frame(x = c(rnorm(100, 10), rnorm(100, 20)),\n y = rep(c(\"Group 1\", \"Group 2\"), each = 100))\n\ndf_means <- data.frame(y = NULL, mean = NULL)\n\n# For each y group, what is the mean of x?\nfor (i in unique(df$y)) {\n sub_df <- subset(df, y == i)\n df_means <- rbind(df_means, \n data.frame(y = i, mean = mean(sub_df$x, na.rm = T)))\n}\n\ndf_means\n## y mean\n## 1 Group 1 10.08169\n## 2 Group 2 20.04041\n\n# Demonstration of na.rm\nmean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs\n## [1] 2\nmean(c(NA, 1, 2, 3), na.rm = F) # Don't remove NAs\n## [1] NA\n\n\nimport pandas as pd\nimport numpy as np\n\n# Create a new data frame\ndf = pd.DataFrame({\n 'y': np.repeat(['Group1', 'Group2'], (100, 100)), \n 'x': np.concatenate((np.random.normal(loc = 10, size = 100), np.random.normal(loc = 12, size = 100)), axis = None)\n })\n\n# Create an empty dataframe\ndf_means = pd.DataFrame(columns = ['y', 'mean'])\n\n# For each age, how many values?\nfor i in np.unique(df.y):\n # Create the subset\n df_sub = df.loc[df.y == i]\n # Drop NAs from the data frame\n # This step isn't necessary because mean() uses skipna = T by default\n # df_sub = df_sub.dropna(subset = ['x', 'y']) \n # Add a new row to the end of df_means\n df_means.loc[len(df_means.index)] = [i, df_sub.x.mean()]\n\n\n# Demonstrating skipna parameter of mean\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = True)\n## 2.0\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = False)\n## nan"
+ "text": "Tasks\n\nRead in the data and create a data frame that you will work with for this exam.\nCreate a new column variable, decade, in your data frame.\n\n\nYou will need to take the response year and truncate it to the decade, so that 1972 becomes 1970 and 1989 becomes 1980. You can use a series of logical statements if you want, but it may be more effective to find a numerical function or combination of functions that will perform the operation you want.\nfloor() and math.floor() in R and python respectively are good places to start.\nCreate a scatterplot (use geom_point) of your happy year vs decade to show that your approach succeeded.\n\n\nCreate a new data set by iterating through each year to find the proportion of people who are very happy. Use a for loop. Using your new data frame, plot the proportion of very happy people over time.\nNote: You may have to pass an argument to the mean function to tell it to exclude missing values from the calculation, such as na.rm or skipna. Or, you can remove the NAs from happy using a function like na.omit or dropna, but be careful to only drop rows with an NA in variables we care about, like happy or year.\n\nThe code below provides an example of how to create a summary dataset and handle NAs in R and python. You may modify this code to help you answer part 3.\n\n# Create sample data\ndf <- data.frame(x = c(rnorm(100, 10), rnorm(100, 20)),\n y = rep(c(\"Group 1\", \"Group 2\"), each = 100))\n\ndf_means <- data.frame(y = NULL, mean = NULL)\n\n# For each y group, what is the mean of x?\nfor (i in unique(df$y)) {\n sub_df <- subset(df, y == i)\n df_means <- rbind(df_means, \n data.frame(y = i, mean = mean(sub_df$x, na.rm = T)))\n}\n\ndf_means\n## y mean\n## 1 Group 1 9.909545\n## 2 Group 2 20.083487\n\n# Demonstration of na.rm\nmean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs\n## [1] 2\nmean(c(NA, 1, 2, 3), na.rm = F) # Don't remove NAs\n## [1] NA\n\n\nimport pandas as pd\nimport numpy as np\n\n# Create a new data frame\ndf = pd.DataFrame({\n 'y': np.repeat(['Group1', 'Group2'], (100, 100)), \n 'x': np.concatenate((np.random.normal(loc = 10, size = 100), np.random.normal(loc = 12, size = 100)), axis = None)\n })\n\n# Create an empty dataframe\ndf_means = pd.DataFrame(columns = ['y', 'mean'])\n\n# For each age, how many values?\nfor i in np.unique(df.y):\n # Create the subset\n df_sub = df.loc[df.y == i]\n # Drop NAs from the data frame\n # This step isn't necessary because mean() uses skipna = T by default\n # df_sub = df_sub.dropna(subset = ['x', 'y']) \n # Add a new row to the end of df_means\n df_means.loc[len(df_means.index)] = [i, df_sub.x.mean()]\n\n\n# Demonstrating skipna parameter of mean\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = True)\n## 2.0\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = False)\n## nan"
},
{
"objectID": "exams/2024-midterm.html#solutions",
diff --git a/syllabus.pdf b/syllabus.pdf
index fd1c3e0..82b8976 100644
Binary files a/syllabus.pdf and b/syllabus.pdf differ