diff --git a/STAT660-01_f18-team-3_project2_data_analysis_by_MP.sas b/STAT660-01_f18-team-3_project2_data_analysis_by_MP.sas index 73ff70c..d35d9d1 100755 --- a/STAT660-01_f18-team-3_project2_data_analysis_by_MP.sas +++ b/STAT660-01_f18-team-3_project2_data_analysis_by_MP.sas @@ -7,9 +7,10 @@ This file uses the following analytic dataset to address several research questions regarding country's happiness. -Dataset Name: cotw_2016_analytic_file created in external file +Dataset Name: cotw_2016_analytic_file, cotw_2016_analytic_file_sort_hs, +and cotw_2016_analytic_file_sort_hr were created in external file STAT660-01_f18-team-3_project2_data_preparation.sas, which is assumed to be -in the same directory as this file +in the same directory as this file. See included file for dataset properties */ @@ -19,12 +20,12 @@ See included file for dataset properties * set relative file import path to current directory (using standard SAS trick); X "cd ""%substr(%sysget(SAS_EXECFILEPATH),1,%eval(%length(%sysget(SAS_EXECFILEPATH))-%length(%sysget(SAS_EXECFILENAME))))"""; -* load external file that generates analytic datasets cde_2014_analytic_file, - cde_2014_analytic_file_sort_frpm, and cde_2014_analytic_file_sort_sat; +* load external file that generates analytic datasets cotw_2016_analytic_file, + cotw_2016_analytic_file_sort_hs , and cotw_2016_analytic_file_sort_hr ; %include '.\STAT660-01_f18-team-3_project2_data_preparation.sas'; *******************************************************************************; -* Research Question Analysis Starting Point; +* Research Question Analysis Starting Point ; *******************************************************************************; title1 "Research Question: For the 20 largest countries, what are the top five countries that experienced the biggest decrease in Happiness Score between 2015 and 2016?" @@ -110,7 +111,7 @@ proc sgplot run; *******************************************************************************; -* Research Question Analysis Starting Point; +* Research Question Analysis Starting Point ; *******************************************************************************; title1 'Research Question: Can "GPI" predict the "Happiness Score" in 2016?' @@ -149,6 +150,15 @@ correlated. If not continue to build model than check test model assumptions. - Error variance is the same for all observations 3) Y observations are not correlated with each other + Model Results + Happiness Score = 7.648 + (-1.104)*GPI + Type III SS p-value < 0.0001 + 22% of the variability in happiness score is explained by GPI + + Correlation shows show -46.911% correlation. Thusly, not correlated can go + to next step. Test for Residual normality, shows Shapiro-Wilk 0.2089>=0.05, + failed to reject Ho, residuals are normally distributed. + Goal: Find straight line that minimizes sum of squared distances from actual weight to fitted line @@ -159,20 +169,6 @@ only 22% can be explained by GPI. Follow-up Steps: A possible follow-up is add additional X variables to improve model predictiveness. */ -*******************************************************************************; -/* -Model Results -Happiness Score = 7.648 + (-1.104)*GPI - -Results: -Type III SS p-value < 0.0001 -22% of the variability in happiness score is explained by GPI - -Correlation shows show -46.911% correlation. Thusly, not correlated can go -to next step. Test for Residual normality, shows Shapiro-Wilk 0.2089 >= 0.05, -failed to reject Ho, residuals are normally distributed. -*/ -*******************************************************************************; proc glm data= cotw_2016_analytic_file diff --git a/STAT660-01_f18-team-3_project2_data_preparation.sas b/STAT660-01_f18-team-3_project2_data_preparation.sas index 4a02b93..54832ea 100755 --- a/STAT660-01_f18-team-3_project2_data_preparation.sas +++ b/STAT660-01_f18-team-3_project2_data_preparation.sas @@ -122,7 +122,27 @@ proc format; -0.014<- 0.000="Q2 Happiness Score %" 0.000<- 0.021="Q3 Happiness Score %" 0.021<- high ="Q4 Happiness Score %" - ; + ; + value $ country + 'Palestinian Territorie' = 'Palestinian Territories' + 'Somaliland region' = 'Somalia' + 'Taiwan Province of China' = 'Taiwan' + 'Congo, Democratic Republ' = 'Congo (Kinshasa)' + 'Congo' = 'Congo (Brazzaville)' + 'Iran, Islamic Republic o' = 'Iran' + "Lao People's Democratic" = 'Laos' + 'Macedonia TFYR' = 'Macedonia' + 'Korea, Republic of' = 'South Korea' + 'Korea, Democratic People' = 'North Korea' + 'Syrian Arab Republic' = 'Syria' + 'Tanzania, United Republi' = 'Tanzania' + 'United States of America' = 'United States' + 'Venezuela, Bolivarian Re' = 'Venezuela' + 'Viet Nam' = 'Vietnam' + 'Palestine' = 'Palestinian Territories' + 'Republic of the Congo' = 'Congo (Kinshasa)' + ; + run; * setup environmental parameters; @@ -181,57 +201,8 @@ run; set &dsn ; - if country = 'Palestinian Territorie' - then country = 'Palestinian Territories' - ; - if country = 'Somaliland region' - then country = 'Somalia' - ; - if country = 'Taiwan Province of China' - then country = 'Taiwan' - ; - if country = 'Congo, Democratic Republ' - then country = 'Congo (Kinshasa)' - ; - if country = 'Congo' - then country = 'Congo (Brazzaville)' - ; - if country = 'Iran, Islamic Republic o' - then country = 'Iran' - ; - if country = "Lao People's Democratic" - then country = 'Laos' - ; - if country = 'Macedonia TFYR' - then country = 'Macedonia' - ; - if country = 'Korea, Republic of' - then country = 'South Korea' - ; - if country = 'Korea, Democratic People' - then country = 'North Korea' - ; - if country = 'Syrian Arab Republic' - then country = 'Syria' - ; - if country = 'Tanzania, United Republi' - then country = 'Tanzania' - ; - if country = 'United States of America' - then country = 'United States' - ; - if country = 'Venezuela, Bolivarian Re' - then country = 'Venezuela' - ; - if country = 'Viet Nam' - then country = 'Vietnam' - ; - if country = 'Palestine' - then country = 'Palestinian Territories' - ; - if country = 'Republic of the Congo' - then country = 'Congo (Kinshasa)' - ; + country=put(country,country.) + ; run; ***************************************************************************; @@ -480,8 +451,9 @@ run; cotw_2016_analytic_file_sort_hs by descending happiness_score_yoy for largest 20 countries ; *******************************************************************************; -proc sort nodupkey +proc sort data = cotw_2016_analytic_file + out = cotw_2016_analytic_file_sort_hs ; by descending population_mm @@ -490,7 +462,7 @@ run; data cotw_2016_analytic_file_sort_hs; set - cotw_2016_analytic_file + cotw_2016_analytic_file_sort_hs ; if _n_<=20 ;