diff --git a/.nojekyll b/.nojekyll index baeee3e..dedc3fe 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -e171e7fb \ No newline at end of file +2b6f684e \ No newline at end of file diff --git a/exams/2023-midterm.html b/exams/2023-midterm.html index 20d86a4..c276e1e 100644 --- a/exams/2023-midterm.html +++ b/exams/2023-midterm.html @@ -288,9 +288,9 @@

Tasks

} df_means -## y mean -## 1 Group 1 10.00839 -## 2 Group 2 20.17497 +## y mean +## 1 Group 1 9.893969 +## 2 Group 2 20.003999 # Demonstration of na.rm mean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs diff --git a/exams/2024-midterm.html b/exams/2024-midterm.html index 0b278c4..448cf38 100644 --- a/exams/2024-midterm.html +++ b/exams/2024-midterm.html @@ -290,8 +290,8 @@

Tasks

df_means ## y mean -## 1 Group 1 9.822427 -## 2 Group 2 19.955377 +## 1 Group 1 9.968658 +## 2 Group 2 20.120513 # Demonstration of na.rm mean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs diff --git a/figs/calendar.pdf b/figs/calendar.pdf index 8064cd4..541b37c 100644 Binary files a/figs/calendar.pdf and b/figs/calendar.pdf differ diff --git a/index.html b/index.html index acab336..e44a241 100644 --- a/index.html +++ b/index.html @@ -246,7 +246,7 @@

Course Materials

- + May 10, 2024 @@ -260,7 +260,7 @@

Course Materials

- + May 8, 2024 @@ -288,7 +288,7 @@

Course Materials

- + May 3, 2024 @@ -316,7 +316,7 @@

Course Materials

- + Apr 26, 2024 @@ -344,7 +344,7 @@

Course Materials

- + Apr 19, 2024 @@ -358,7 +358,7 @@

Course Materials

- + Apr 17, 2024 @@ -386,7 +386,7 @@

Course Materials

- + Apr 12, 2024 @@ -400,7 +400,7 @@

Course Materials

- + Apr 10, 2024 @@ -428,7 +428,7 @@

Course Materials

- + Apr 5, 2024 @@ -442,7 +442,7 @@

Course Materials

- + Apr 2, 2024 @@ -484,7 +484,7 @@

Course Materials

- + Mar 29, 2024 @@ -498,7 +498,7 @@

Course Materials

- + Mar 27, 2024 @@ -526,7 +526,7 @@

Course Materials

- + Mar 22, 2024 @@ -540,7 +540,7 @@

Course Materials

- + Mar 20, 2024 @@ -568,7 +568,7 @@

Course Materials

- + Mar 7, 2024 @@ -596,7 +596,7 @@

Course Materials

- + Mar 1, 2024 @@ -610,7 +610,7 @@

Course Materials

- + Mar 1, 2024 @@ -624,7 +624,7 @@

Course Materials

- + Feb 28, 2024 @@ -652,7 +652,7 @@

Course Materials

- + Feb 23, 2024 @@ -666,7 +666,7 @@

Course Materials

- + Feb 21, 2024 @@ -694,7 +694,7 @@

Course Materials

- + Feb 16, 2024 @@ -708,7 +708,7 @@

Course Materials

- + Feb 14, 2024 @@ -750,7 +750,7 @@

Course Materials

- + Feb 9, 2024 @@ -764,7 +764,7 @@

Course Materials

- + Feb 7, 2024 @@ -806,7 +806,7 @@

Course Materials

- + Feb 2, 2024 @@ -820,7 +820,7 @@

Course Materials

- + Jan 31, 2024 @@ -862,7 +862,7 @@

Course Materials

- + Jan 24, 2024 @@ -876,7 +876,7 @@

Course Materials

- + Jan 21, 2024 diff --git a/search.json b/search.json index c493783..e02bac7 100644 --- a/search.json +++ b/search.json @@ -739,7 +739,7 @@ "href": "exams/2023-midterm.html#tasks", "title": "Practice Midterm", "section": "Tasks", - "text": "Tasks\n\nRead in the data and create a data frame that you will work with for this exam.\nCreate a new column variable, decade, in your data frame.\n\n\nYou will need to take the response year and truncate it to the decade, so that 1972 becomes 1970 and 1989 becomes 1980. You can use a series of logical statements if you want, but it may be more effective to find a numerical function or combination of functions that will perform the operation you want.\nfloor() and math.floor() in R and python respectively are good places to start.\nCreate a scatterplot (use geom_point) of your happy year vs decade to show that your approach succeeded.\n\n\nCreate a new data set by iterating through each year to find the proportion of people who are very happy. Use a for loop. Using your new data frame, plot the proportion of very happy people over time.\nNote: You may have to pass an argument to the mean function to tell it to exclude missing values from the calculation, such as na.rm or skipna. Or, you can remove the NAs from happy using a function like na.omit or dropna, but be careful to only drop rows with an NA in variables we care about, like happy or year.\n\nThe code below provides an example of how to create a summary dataset and handle NAs in R and python. You may modify this code to help you answer part 3.\n\n# Create sample data\ndf <- data.frame(x = c(rnorm(100, 10), rnorm(100, 20)),\n y = rep(c(\"Group 1\", \"Group 2\"), each = 100))\n\ndf_means <- data.frame(y = NULL, mean = NULL)\n\n# For each y group, what is the mean of x?\nfor (i in unique(df$y)) {\n sub_df <- subset(df, y == i)\n df_means <- rbind(df_means, \n data.frame(y = i, mean = mean(sub_df$x, na.rm = T)))\n}\n\ndf_means\n## y mean\n## 1 Group 1 10.00839\n## 2 Group 2 20.17497\n\n# Demonstration of na.rm\nmean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs\n## [1] 2\nmean(c(NA, 1, 2, 3), na.rm = F) # Don't remove NAs\n## [1] NA\n\n\nimport pandas as pd\nimport numpy as np\n\n# Create a new data frame\ndf = pd.DataFrame({\n 'y': np.repeat(['Group1', 'Group2'], (100, 100)), \n 'x': np.concatenate((np.random.normal(loc = 10, size = 100), np.random.normal(loc = 12, size = 100)), axis = None)\n })\n\n# Create an empty dataframe\ndf_means = pd.DataFrame(columns = ['y', 'mean'])\n\n# For each age, how many values?\nfor i in np.unique(df.y):\n # Create the subset\n df_sub = df.loc[df.y == i]\n # Drop NAs from the data frame\n # This step isn't necessary because mean() uses skipna = T by default\n # df_sub = df_sub.dropna(subset = ['x', 'y']) \n # Add a new row to the end of df_means\n df_means.loc[len(df_means.index)] = [i, df_sub.x.mean()]\n\n\n# Demonstrating skipna parameter of mean\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = True)\n## 2.0\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = False)\n## nan" + "text": "Tasks\n\nRead in the data and create a data frame that you will work with for this exam.\nCreate a new column variable, decade, in your data frame.\n\n\nYou will need to take the response year and truncate it to the decade, so that 1972 becomes 1970 and 1989 becomes 1980. You can use a series of logical statements if you want, but it may be more effective to find a numerical function or combination of functions that will perform the operation you want.\nfloor() and math.floor() in R and python respectively are good places to start.\nCreate a scatterplot (use geom_point) of your happy year vs decade to show that your approach succeeded.\n\n\nCreate a new data set by iterating through each year to find the proportion of people who are very happy. Use a for loop. Using your new data frame, plot the proportion of very happy people over time.\nNote: You may have to pass an argument to the mean function to tell it to exclude missing values from the calculation, such as na.rm or skipna. Or, you can remove the NAs from happy using a function like na.omit or dropna, but be careful to only drop rows with an NA in variables we care about, like happy or year.\n\nThe code below provides an example of how to create a summary dataset and handle NAs in R and python. You may modify this code to help you answer part 3.\n\n# Create sample data\ndf <- data.frame(x = c(rnorm(100, 10), rnorm(100, 20)),\n y = rep(c(\"Group 1\", \"Group 2\"), each = 100))\n\ndf_means <- data.frame(y = NULL, mean = NULL)\n\n# For each y group, what is the mean of x?\nfor (i in unique(df$y)) {\n sub_df <- subset(df, y == i)\n df_means <- rbind(df_means, \n data.frame(y = i, mean = mean(sub_df$x, na.rm = T)))\n}\n\ndf_means\n## y mean\n## 1 Group 1 9.893969\n## 2 Group 2 20.003999\n\n# Demonstration of na.rm\nmean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs\n## [1] 2\nmean(c(NA, 1, 2, 3), na.rm = F) # Don't remove NAs\n## [1] NA\n\n\nimport pandas as pd\nimport numpy as np\n\n# Create a new data frame\ndf = pd.DataFrame({\n 'y': np.repeat(['Group1', 'Group2'], (100, 100)), \n 'x': np.concatenate((np.random.normal(loc = 10, size = 100), np.random.normal(loc = 12, size = 100)), axis = None)\n })\n\n# Create an empty dataframe\ndf_means = pd.DataFrame(columns = ['y', 'mean'])\n\n# For each age, how many values?\nfor i in np.unique(df.y):\n # Create the subset\n df_sub = df.loc[df.y == i]\n # Drop NAs from the data frame\n # This step isn't necessary because mean() uses skipna = T by default\n # df_sub = df_sub.dropna(subset = ['x', 'y']) \n # Add a new row to the end of df_means\n df_means.loc[len(df_means.index)] = [i, df_sub.x.mean()]\n\n\n# Demonstrating skipna parameter of mean\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = True)\n## 2.0\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = False)\n## nan" }, { "objectID": "exams/2023-midterm.html#solutions", @@ -781,7 +781,7 @@ "href": "exams/2024-midterm.html#tasks", "title": "2024 Midterm", "section": "Tasks", - "text": "Tasks\n\nRead in the data and create a data frame that you will work with for this exam.\nCreate a new column variable, decade, in your data frame.\n\n\nYou will need to take the response year and truncate it to the decade, so that 1972 becomes 1970 and 1989 becomes 1980. You can use a series of logical statements if you want, but it may be more effective to find a numerical function or combination of functions that will perform the operation you want.\nfloor() and math.floor() in R and python respectively are good places to start.\nCreate a scatterplot (use geom_point) of your happy year vs decade to show that your approach succeeded.\n\n\nCreate a new data set by iterating through each year to find the proportion of people who are very happy. Use a for loop. Using your new data frame, plot the proportion of very happy people over time.\nNote: You may have to pass an argument to the mean function to tell it to exclude missing values from the calculation, such as na.rm or skipna. Or, you can remove the NAs from happy using a function like na.omit or dropna, but be careful to only drop rows with an NA in variables we care about, like happy or year.\n\nThe code below provides an example of how to create a summary dataset and handle NAs in R and python. You may modify this code to help you answer part 3.\n\n# Create sample data\ndf <- data.frame(x = c(rnorm(100, 10), rnorm(100, 20)),\n y = rep(c(\"Group 1\", \"Group 2\"), each = 100))\n\ndf_means <- data.frame(y = NULL, mean = NULL)\n\n# For each y group, what is the mean of x?\nfor (i in unique(df$y)) {\n sub_df <- subset(df, y == i)\n df_means <- rbind(df_means, \n data.frame(y = i, mean = mean(sub_df$x, na.rm = T)))\n}\n\ndf_means\n## y mean\n## 1 Group 1 9.822427\n## 2 Group 2 19.955377\n\n# Demonstration of na.rm\nmean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs\n## [1] 2\nmean(c(NA, 1, 2, 3), na.rm = F) # Don't remove NAs\n## [1] NA\n\n\nimport pandas as pd\nimport numpy as np\n\n# Create a new data frame\ndf = pd.DataFrame({\n 'y': np.repeat(['Group1', 'Group2'], (100, 100)), \n 'x': np.concatenate((np.random.normal(loc = 10, size = 100), np.random.normal(loc = 12, size = 100)), axis = None)\n })\n\n# Create an empty dataframe\ndf_means = pd.DataFrame(columns = ['y', 'mean'])\n\n# For each age, how many values?\nfor i in np.unique(df.y):\n # Create the subset\n df_sub = df.loc[df.y == i]\n # Drop NAs from the data frame\n # This step isn't necessary because mean() uses skipna = T by default\n # df_sub = df_sub.dropna(subset = ['x', 'y']) \n # Add a new row to the end of df_means\n df_means.loc[len(df_means.index)] = [i, df_sub.x.mean()]\n\n\n# Demonstrating skipna parameter of mean\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = True)\n## 2.0\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = False)\n## nan" + "text": "Tasks\n\nRead in the data and create a data frame that you will work with for this exam.\nCreate a new column variable, decade, in your data frame.\n\n\nYou will need to take the response year and truncate it to the decade, so that 1972 becomes 1970 and 1989 becomes 1980. You can use a series of logical statements if you want, but it may be more effective to find a numerical function or combination of functions that will perform the operation you want.\nfloor() and math.floor() in R and python respectively are good places to start.\nCreate a scatterplot (use geom_point) of your happy year vs decade to show that your approach succeeded.\n\n\nCreate a new data set by iterating through each year to find the proportion of people who are very happy. Use a for loop. Using your new data frame, plot the proportion of very happy people over time.\nNote: You may have to pass an argument to the mean function to tell it to exclude missing values from the calculation, such as na.rm or skipna. Or, you can remove the NAs from happy using a function like na.omit or dropna, but be careful to only drop rows with an NA in variables we care about, like happy or year.\n\nThe code below provides an example of how to create a summary dataset and handle NAs in R and python. You may modify this code to help you answer part 3.\n\n# Create sample data\ndf <- data.frame(x = c(rnorm(100, 10), rnorm(100, 20)),\n y = rep(c(\"Group 1\", \"Group 2\"), each = 100))\n\ndf_means <- data.frame(y = NULL, mean = NULL)\n\n# For each y group, what is the mean of x?\nfor (i in unique(df$y)) {\n sub_df <- subset(df, y == i)\n df_means <- rbind(df_means, \n data.frame(y = i, mean = mean(sub_df$x, na.rm = T)))\n}\n\ndf_means\n## y mean\n## 1 Group 1 9.968658\n## 2 Group 2 20.120513\n\n# Demonstration of na.rm\nmean(c(NA, 1, 2, 3), na.rm = T) # Remove NAs\n## [1] 2\nmean(c(NA, 1, 2, 3), na.rm = F) # Don't remove NAs\n## [1] NA\n\n\nimport pandas as pd\nimport numpy as np\n\n# Create a new data frame\ndf = pd.DataFrame({\n 'y': np.repeat(['Group1', 'Group2'], (100, 100)), \n 'x': np.concatenate((np.random.normal(loc = 10, size = 100), np.random.normal(loc = 12, size = 100)), axis = None)\n })\n\n# Create an empty dataframe\ndf_means = pd.DataFrame(columns = ['y', 'mean'])\n\n# For each age, how many values?\nfor i in np.unique(df.y):\n # Create the subset\n df_sub = df.loc[df.y == i]\n # Drop NAs from the data frame\n # This step isn't necessary because mean() uses skipna = T by default\n # df_sub = df_sub.dropna(subset = ['x', 'y']) \n # Add a new row to the end of df_means\n df_means.loc[len(df_means.index)] = [i, df_sub.x.mean()]\n\n\n# Demonstrating skipna parameter of mean\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = True)\n## 2.0\npd.DataFrame({'y':[1, 2, 3, np.nan]}).y.mean(skipna = False)\n## nan" }, { "objectID": "exams/2024-midterm.html#solutions", diff --git a/syllabus.pdf b/syllabus.pdf index 4c38b7b..296793a 100644 Binary files a/syllabus.pdf and b/syllabus.pdf differ