forked from jsyoon0823/VIME
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvime_utils.py
132 lines (105 loc) · 3.21 KB
/
vime_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""VIME: Extending the Success of Self- and Semi-supervised Learning to Tabular Domain (VIME) Codebase.
Reference: Jinsung Yoon, Yao Zhang, James Jordon, Mihaela van der Schaar,
"VIME: Extending the Success of Self- and Semi-supervised Learning to Tabular Domain,"
Neural Information Processing Systems (NeurIPS), 2020.
Paper link: TBD
Last updated Date: October 11th 2020
Code author: Jinsung Yoon ([email protected])
-----------------------------
vime_utils.py
- Various utility functions for VIME framework
(1) mask_generator: Generate mask vector for self and semi-supervised learning
(2) pretext_generator: Generate corrupted samples for self and semi-supervised learning
(3) perf_metric: prediction performances in terms of AUROC or accuracy
(4) convert_matrix_to_vector: Convert two dimensional matrix into one dimensional vector
(5) convert_vector_to_matrix: Convert one dimensional vector into one dimensional matrix
"""
# Necessary packages
import numpy as np
from sklearn.metrics import accuracy_score, roc_auc_score
#%%
def mask_generator (p_m, x):
"""Generate mask vector.
Args:
- p_m: corruption probability
- x: feature matrix
Returns:
- mask: binary mask matrix
"""
mask = np.random.binomial(1, p_m, x.shape)
return mask
#%%
def pretext_generator (m, x):
"""Generate corrupted samples.
Args:
m: mask matrix
x: feature matrix
Returns:
m_new: final mask matrix after corruption
x_tilde: corrupted feature matrix
"""
# Parameters
no, dim = x.shape
# Randomly (and column-wise) shuffle data
x_bar = np.zeros([no, dim])
for i in range(dim):
idx = np.random.permutation(no)
x_bar[:, i] = x[idx, i]
# Corrupt samples
x_tilde = x * (1-m) + x_bar * m
# Define new mask matrix
m_new = 1 * (x != x_tilde)
return m_new, x_tilde
#%%
def perf_metric (metric, y_test, y_test_hat):
"""Evaluate performance.
Args:
- metric: acc or auc
- y_test: ground truth label
- y_test_hat: predicted values
Returns:
- performance: Accuracy or AUROC performance
"""
# Accuracy metric
if metric == 'acc':
result = accuracy_score(np.argmax(y_test, axis = 1),
np.argmax(y_test_hat, axis = 1))
# AUROC metric
elif metric == 'auc':
result = roc_auc_score(y_test[:, 1], y_test_hat[:, 1])
return result
#%%
def convert_matrix_to_vector(matrix):
"""Convert two dimensional matrix into one dimensional vector
Args:
- matrix: two dimensional matrix
Returns:
- vector: one dimensional vector
"""
# Parameters
no, dim = matrix.shape
# Define output
vector = np.zeros([no,])
# Convert matrix to vector
for i in range(dim):
idx = np.where(matrix[:, i] == 1)
vector[idx] = i
return vector
#%%
def convert_vector_to_matrix(vector):
"""Convert one dimensional vector into two dimensional matrix
Args:
- vector: one dimensional vector
Returns:
- matrix: two dimensional matrix
"""
# Parameters
no = len(vector)
dim = len(np.unique(vector))
# Define output
matrix = np.zeros([no,dim])
# Convert vector to matrix
for i in range(dim):
idx = np.where(vector == i)
matrix[idx, i] = 1
return matrix