-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_to_hdf5.py
38 lines (32 loc) · 1.26 KB
/
data_to_hdf5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import h5py
import os
class HDF5writer:
def __init__(self, img_dims, label_dims, outputPath, buffSize=1000):
if os.path.exists(outputPath):
raise ValueError("The supplied ‘outputPath‘ already "
"exists and cannot be overwritten. Manually delete "
"the file before continuing.", outputPath)
# Database
self.db = h5py.File(outputPath, "w")
# Datasets
self.images = self.db.create_dataset("images", img_dims, dtype="float")
self.labels = self.db.create_dataset("labels", label_dims, dtype="float")
# Buffer
self.buffSize = buffSize
self.buffer = {"images": [], "labels": []}
self.idx = 0
def add(self, rows, labels):
self.buffer["images"].extend(rows)
self.buffer["labels"].extend(labels)
if len(self.buffer["images"]) >= self.buffSize:
self.flush()
def flush(self):
i = self.idx + len(self.buffer["images"])
self.images[self.idx:i] = self.buffer["images"]
self.labels[self.idx:i] = self.buffer["labels"]
self.idx = i
self.buffer = {"images": [], "labels": []}
def close(self):
if len(self.buffer["images"]) > 0:
self.flush()
self.db.close()