-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot
83 lines (61 loc) · 2.71 KB
/
plot
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#W2F5_Descriptive statistics and data cleaning
#http://www.hscic.gov.uk/catalogue/PUB13648
obesity <- read.csv("http://www.hscic.gov.uk/catalogue/PUB13648/Obes-phys-acti-diet-eng-2014-tab_CSV.csv", skip=4, nrows=12)
str(obesity)
obesity$Males <- as.numeric(as.character(gsub(",","",obesity$Males)))#delete the comma in the numbers
obesity$Females <- as.numeric(as.character(gsub(",","",obesity$Females)))
obesity<- obesity[-1,c(-2, -5:-12)]#delete some row and columns
obesity
#This is the So called wide format.
#We would like to have the long format: one row - one observation
install.packages("reshape2")#somewhat like transpose the matrix
library("reshape2")
obesitylong <- melt(obesity)
obesitylong #long format
plot(obesitylong$value~obesitylong$variable)
# install.packages("lubridate") this packge can change the time format
# setting the argument colClasses= in read.table() can reduce import time of large datasets
body <- read.table("http://www.amstat.org/publications/jse/datasets/body.dat.txt")
dim(body)
str(body)
BodyMeasurements <- c("Biacromial_diameter","Biiliac_diameter","Bitrochanteric_diameter","Chest_depth","Chest_diameter","Elbow_diameter","Wrist_diameter","Knee_diameter","Ankle_diameter","Shoulder_girth","Chest_girth","Waist_girth","Navel_girth","Hip_girth","Thigh_girth","Bicep_girth","Forearm_girth","Knee_girth","Calf_max_girth","Ankle_min_girth","Wrist_min_girth","Age","Weight","Height","Gender")
names(body) <- BodyMeasurements
summary(body)
boxplot(body)
?par()
Age<-body[,21]
boxplot(Age,col="red",xlab="Age")
library(ggolot2)
ggplot(body)+geom_boxplot(aes(1, y=Age))
#
keep.par <- par()
#par(keep.par) save the defalt settings
par(mar = c(10,4,4,2)+0.1)
#What does + 0.1 do? Test the code:
c(5, 4, 4, 2) + 0.1
boxplot(body, las=3)
# to restore parameters to defaul, use: par(keep.par)
#or close your plotting window
#A few examples of visualization: Postion, colour, size, plot character
#You can visualize many different variables in the same graph.
x<- 1:10
set.seed(23)
y <- x + rnorm(10)
plot(x,y) #position
plot(x,y, col=x) #colour
plot(x,y, col=x, cex=x) #size
plot(x,y, col=x, cex=x, pch=x) #plot chartacter
x <- rep(1:10, 10)
y <- rep(1:10, each=10)
z <- 1:100
plot(x,y,pch =z)
plot(body$Thigh_girth,body$Bicep_girth)
plot(body$Thigh_girth,body$Bicep_girth, pch=body$Gender)
plot(body$Thigh_girth,body$Bicep_girth, col=body$Gender+1)
#Summarize a variable by binning
breaks <- seq(min(body$Age),max(body$Age), 5)
Age_group <- cut(body$Age, breaks)
body$Age_group <- Age_group
plot(body$Thigh_girth,body$Bicep_girth, pch=body$Gender, col=body$Age_group)
plot(body$Thigh_girth,body$Bicep_girth, pch=body$Gender, col=body$Age_group, cex=(body$Weight/10))
#Do continue exploring the dataset body