-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocessing.py
92 lines (84 loc) · 3.68 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# preprossessing for mnist http://yann.lecun.com/exdb/mnist/
# used library : https://docs.python.org/3/library/struct.html
from struct import unpack_from
from torch import tensor
mean = 0
var = 0.04
#
# The labels values are 0 to 9.
# TRAINING SET IMAGE FILE (train-images-idx3-ubyte):
# [offset] [type] [value] [description]
# 0000 32 bit integer 0x00000803(2051) magic number
# 0004 32 bit integer 60000 number of images
# 0008 32 bit integer 28 number of rows
# 0012 32 bit integer 28 number of columns
# 0016 unsigned byte ?? pixel
# 0017 unsigned byte ?? pixel
# ........
# xxxx unsigned byte ?? pixel
def training_set_image():
with open('data/raw/train-images-idx3-ubyte', 'rb') as f:
(magic_num, num, rows, cols) = unpack_from('>4i', f.read(16))
assert 2051 == magic_num, magic_num
print(num, rows, cols)
images = []
for c in range(num):
pixels = unpack_from(str(rows * cols) + 'B', f.read(rows * cols))
images.append(list(pixels))
t = (tensor(images).float()/255.0-mean)/var
# (C, H, W)
return t.view(num, 1, cols, rows)
# TRAINING SET LABEL FILE (train-labels-idx1-ubyte):
# [offset] [type] [value] [description]
# 0000 32 bit integer 0x00000801(2049) magic number (MSB first)
# 0004 32 bit integer 60000 number of items
# 0008 unsigned byte ?? label
# 0009 unsigned byte ?? label
# ........
# xxxx unsigned byte ?? label
def training_set_label():
with open('data/raw/train-labels-idx1-ubyte', 'rb') as f:
(magic_num, num) = unpack_from('>2i', f.read(8))
assert 2049 == magic_num, magic_num
labels = unpack_from(str(num) + 'B', f.read(num))
return tensor(list(labels)).view(num)
# TEST SET LABEL FILE (t10k-labels-idx1-ubyte):
# [offset] [type] [value] [description]
# 0000 32 bit integer 0x00000801(2049) magic number (MSB first)
# 0004 32 bit integer 10000 number of items
# 0008 unsigned byte ?? label
# 0009 unsigned byte ?? label
# ........
# xxxx unsigned byte ?? label
def test_set_label():
with open('data/raw/t10k-labels-idx1-ubyte', 'rb') as f:
(magic_num, num) = unpack_from('>2i', f.read(8))
assert 2049 == magic_num, magic_num
labels = unpack_from(str(num) + 'B', f.read(num))
return tensor(list(labels)).view(num)
# The labels values are 0 to 9.
# TEST SET IMAGE FILE (t10k-images-idx3-ubyte):
# [offset] [type] [value] [description]
# 0000 32 bit integer 0x00000803(2051) magic number
# 0004 32 bit integer 10000 number of images
# 0008 32 bit integer 28 number of rows
# 0012 32 bit integer 28 number of columns
# 0016 unsigned byte ?? pixel
# 0017 unsigned byte ?? pixel
# ........
# xxxx unsigned byte ?? pixel
def test_set_image():
with open('data/raw/t10k-images-idx3-ubyte', 'rb') as f:
(magic_num, num, rows, cols) = unpack_from('>4i', f.read(16))
assert 2051 == magic_num, magic_num
print(num, rows, cols)
images = []
for c in range(num):
pixels = unpack_from(str(rows * cols) + 'B', f.read(rows * cols))
images.append(list(pixels))
t = (tensor(images).float()/255.0-mean)/var
# (C, H, W)
return t.view(num, 1, cols, rows)
if __name__ == "__main__":
print('ok')
print(training_set_label())