This repository has been archived by the owner on May 2, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathfeatures.go
152 lines (131 loc) · 3.36 KB
/
features.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/*
* Copyright 2011 Daniel Arndt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @author: Daniel Arndt <[email protected]>
*
*/
package main
import (
"fmt"
)
// Defines the minimum set of functions needed for a Feature.
type Feature interface {
Add(int64) // Add a particular value to a feature
Export() string // Export the contents of a feature in string form
Get() int64
Set(int64) // Reset the feature to a particular value
}
// A feature which takes values and bins them according to their value.
type BinFeature struct {
num_bins int // The number of bins for this feature
bin_sep int // Ie. the magnitude of the range contained in each bin
bins []int // Stores the actual count for each bin
}
// Initializes the BinFeature to contain bins starting at min and going to max.
// Anything below min is thrown into the lowest bin, and anything above max is
// put in the last bin. num_bins is the number of bins required in the range
// [min, max]
func (f *BinFeature) Init(min int, max int, num_bins int) {
f.num_bins = num_bins - 1
diff := max - min
f.bin_sep = diff / f.num_bins
f.bins = make([]int, num_bins)
for i := 0; i < num_bins; i++ {
f.bins[i] = 0
}
}
func (f *BinFeature) Add(val int64) {
bin := MinInt(int(val)/f.bin_sep, f.num_bins)
f.bins[bin] += 1
}
func (f *BinFeature) Export() string {
ret := ""
for i := 0; i < len(f.bins); i++ {
if i > 0 {
ret += fmt.Sprintf(",")
}
ret += fmt.Sprintf("%d", f.bins[i])
}
return ret
}
func (f *BinFeature) Get() int64 {
return int64(f.bins[0])
}
func (f *BinFeature) Set(val int64) {
for i := 0; i < len(f.bins); i++ {
f.bins[i] = int(val)
}
}
type DistributionFeature struct {
sum int64
sumsq int64
count int64
min int64
max int64
}
func (f *DistributionFeature) Init(val int64) {
f.Set(val)
}
func (f *DistributionFeature) Add(val int64) {
f.sum += val
f.sumsq += val * val
f.count++
if (val < f.min) || (f.min == 0) {
f.min = val
}
if val > f.max {
f.max = val
}
}
func (f *DistributionFeature) Export() string {
var (
stdDev int64 = 0
mean int64 = 0
)
if f.count > 0 {
stdDev = int64(stddev(float64(f.sumsq), float64(f.sum), f.count))
mean = f.sum / f.count
}
return fmt.Sprintf("%d,%d,%d,%d", f.min, mean, f.max, stdDev)
}
func (f *DistributionFeature) Get() int64 {
return f.count
}
// Set the DistributionFeature to include val as the single value in the Feature.
func (f *DistributionFeature) Set(val int64) {
f.sum = val
f.sumsq = val * val
f.count = val
f.min = val
f.max = val
}
type ValueFeature struct {
value int64
}
func (f *ValueFeature) Init(val int64) {
f.Set(val)
}
func (f *ValueFeature) Add(val int64) {
f.value += val
}
func (f *ValueFeature) Export() string {
return fmt.Sprintf("%d", f.value)
}
func (f *ValueFeature) Get() int64 {
return f.value
}
func (f *ValueFeature) Set(val int64) {
f.value = val
}