-
Notifications
You must be signed in to change notification settings - Fork 55
/
density.py
97 lines (80 loc) · 3.43 KB
/
density.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
'''!
Density Generator
Date created: 15th April 2019
License: GNU General Public License version 3 for academic or
not-for-profit use only
Bactome package is free software: you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
'''
import math
import fire
def cumulative_density(inputfile, column=1, increment=1, header=1):
'''!
Function to generate cumulative density (commonly known as
cumulative frequency) from a data set.
Usage:
python density.py CDF --inputfile=<input file path> --column=<column to use> --increment=<incremental value> --header=True
@param inputfile String: Path of file to process.
@param column Integer: Positional value of column to use. Default
= 1 (first column).
@param increment Float: Incremental value for bin generation.
Default = 1.
@param header Integer: Denotes the number of header rows to be
removed. Default = 1
'''
column = int(column) - 1
data = open(inputfile, 'r').readlines()
data = data[int(header):]
data = [x.split(',')[column] for x in data]
data = [float(x) for x in data]
threshold = float(math.floor(min(data)))
max_value = math.floor(max(data))
num_of_data = len(data)
print(' : '.join(['Threshold', 'Density']))
while threshold <= max_value:
temp = [x for x in data if x <= threshold]
density = len(temp) / num_of_data
print('%s : %s' % (str(threshold), str(density)))
threshold = threshold + float(increment)
def probability_density(inputfile, column=1, increment=1, header=1):
'''!
Function to generate probability density (commonly known as
frequency) from a data set.
Usage:
python density.py PDF --inputfile=<input file path> --column=<column to use> --increment=<incremental value> --header=True
@param inputfile String: Path of file to process.
@param column Integer: Positional value of column to use. Default
= 1 (first column).
@param increment Float: Incremental value for bin generation.
Default = 1.
@param header Integer: Denotes the number of header rows to be
removed. Default = 1
'''
column = int(column) - 1
data = open(inputfile, 'r').readlines()
data = data[int(header):]
data = [x.split(',')[column] for x in data]
data = [float(x) for x in data]
threshold = float(math.floor(min(data)))
max_value = math.floor(max(data))
num_of_data = len(data)
print(' : '.join(['Threshold', 'Density']))
while threshold <= max_value:
temp = [x for x in data
if x <= threshold and \
x > threshold - float(increment)]
density = len(temp) / num_of_data
print('%s : %s' % (str(threshold), str(density)))
threshold = threshold + float(increment)
if __name__ == '__main__':
exposed_functions = {'CDF': cumulative_density,
'PDF': probability_density}
fire.Fire(exposed_functions)