-
Notifications
You must be signed in to change notification settings - Fork 0
/
Cleaning data
62 lines (44 loc) · 1.77 KB
/
Cleaning data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
a <- read.table("clipboard", header=T, sep="\t")
#This works if there is something in your windows or linux clipboard memory
#Copy data from the file List_P_3D_data.csv You find it below the video
head(a)
names(a)
measurements <- c("EDV","ESV","SV","EF")
paste(measurements, "3D", sep=".")
c(paste(measurements, "3D", sep="."), paste(measurements, "2D", sep="."))
names(a) <- c("subject",paste(measurements, "3D", sep="."), paste(measurements, "2D", sep="."))
setwd(" here you must write your own relevant path ")
dir()
b <- read.table(file="List_P_3D_data.csv", header=T, sep=";")
dim(b)
head(b)
tail(b)
b <- b[-31:-33,-10:-12]#remove rows from 31 to 33, and column 10 through 12
tail(b)#last 6 rows
c <- read.table(file="List_P_3D_data_missing-columns.txt", header=T, sep=";")
#returns error, since it has missing columns
c <- read.table(file="List_P_3D_data_missing-columns.txt", header=T, sep=";",fill=T)
#fill the empty columns
# read.csv() read.fwf() scan() readLines()
?read.table()
installPackages("HistData")
#http://www.data.gov/
#http://www.healthdata.gov/dataset/search
#http://data.gov.uk/data/search
#http://data.gov.in/
#http://data.gov.in/community/developer-community
# data.gov.uk
# http://www.hscic.gov.uk/catalogue/PUB13648
test <- read.csv("http://www.hscic.gov.uk/catalogue/PUB14142/nhs-dent-stat-eng-2013-14-thir-quar-anx4u-UDA-CCG.csv")
str(test)
boxplot(test$Total ~ test$Patient_Type)
boxplot(test$Urgent_Occasional)
boxplot(test$Urgent_Occasional ~ test$Region_Code)
boxplot(test$Urgent_Occasional ~ test$Patient_Type)
#install.packages("RCurl")
#http://www.ncbi.nlm.nih.gov/gds search with the word 'diabetes'
webq <- readLines("http://www.ncbi.nlm.nih.gov/gds/?term=diabetes")
head(webq)
grep("diabetes",webq)
webq[grep("diabetes",webq)]
date()