-
Notifications
You must be signed in to change notification settings - Fork 0
/
snpBAC_NA_filtering.R
executable file
·56 lines (47 loc) · 2.07 KB
/
snpBAC_NA_filtering.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env Rscript
# This script removes all rows with empty cells in Query_SNP column and missing info for PhysPos column.
# Takes one argument:
# 1) SNP_BAC.txt file that includes PhysPos and Chr_2016 columns
# Script written by Chaochih Liu
# September 20, 2016
# To run the script: Rscript ./snpBAC_NA_filtering.R <SNP_BAC.txt>
# Take command line arguments
# Stores arguments into a vector
args <- commandArgs(trailingOnly = TRUE)
# A function to read in SNP_BAC.txt file format that includes
# physical positions and new chromosome info
readFileSnpBAC <- function(filename) {
data.file <- read.delim(
file = filename, # passed as an argument
header = TRUE, # First line is a header
fill = TRUE, # Fill empty fields with NAs
na.strings = "NA"
)
return(data.file)
}
# A function to remove rows with empty cells or missing PhysPos info
removeEmptyAndNA <- function(data.file) {
tmpEmptyRemoved <- data.file[!(data.file$Query_SNP == ""), ] # if Query_SNP column contains empty cell, remove row
filtered <- tmpEmptyRemoved[!is.na(tmpEmptyRemoved$PhysPos), ] # if PhysPos column contains NA value, remove row
return(filtered)
}
# Write file to outfile
writeOutFile <- function(filename, filtered) {
inputName <- unlist(strsplit(x = filename, split = ".txt"))
outputName <- paste(inputName, "filtered.txt", sep = "_")
write.table(x = filtered,
file = outputName,
quote = FALSE,
sep = "\t",
eol = "\n",
col.names = TRUE,
row.names = FALSE)
}
# Driver function
main <- function() {
snpBACPhysPos <- args[1] # SNP_BAC.txt file with Query_SNP and PhysPos columns
dataToFilter <- readFileSnpBAC(filename = snpBACPhysPos) # read in SNP_BAC.txt file
finalFiltered <- removeEmptyAndNA(data.file = dataToFilter) # data that has rows with empty cells or NA values for PhysPos removed
writeOutFile(filename = snpBACPhysPos, filtered = finalFiltered) # write filtered data to outfile
}
main() # Run the program