-
Notifications
You must be signed in to change notification settings - Fork 0
/
PUF_env.py
148 lines (110 loc) · 4.32 KB
/
PUF_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 31 10:24:34 2022
@author: Mieszko Ferens
"""
import numpy as np
from pypuf.simulation import XORArbiterPUF, InterposePUF
from pypuf.io import random_inputs
class PUF_env():
"""
Class which implements an episodic training environment for RL agents that
uses the pypuf library to generate a puf instance
Reference:
N. Wisiol, C. Gräbnitz, C. Mühl, B. Zengin, T. Soroceanu, N. Pirnay,
K. T. Mursi, & A. Baliuka, "pypuf: Cryptanalysis of Physically
Unclonable Functions" (Version 2, June 2021). Zenodo.
https://doi.org/10.5281/zenodo.3901410
Arguments:
challenge_len (int): Challenge length
seed (int, default=None): The seed to pass to the RNG
+ Requires the pypuf library to be installed (>=2.2.0)
Returns:
PUF environment object
"""
def __init__(self, challenge_len, k, puf="XORPUF", seed=None):
# Create PUF
if(puf == "XORPUF"):
self.puf = XORArbiterPUF(n=challenge_len, k=k, seed=seed)
elif(puf == "IPUF"):
self.puf = InterposePUF(n=challenge_len, k_down=k, seed=seed)
# Create RNG
self.rng = np.random.default_rng(seed=seed)
self.seed = seed
# Challenges
self.c = np.array([])
# Responses
self.r = np.array([])
# Current CRP
self.CRP = 0
# Observation
self.obs_size = challenge_len
self.obs = np.zeros(self.obs_size)
# Action space
self.n_actions = 2 # 0 or 1
self.action = 0
def step(self, action, episodic=True):
"""
Check if an action matches the correct response of the PUF and end the
episode if training is episodic and action is correct
Arguments:
action (int): The action to test
episodic (boolean, default=True): Whether to use episodic or
continuous training
Returns:
Tuple that contains the observation (used challenge), reward and
boolean of whether the episode is over
"""
# Check if action matches the response to the challenge
if(action == self.r[self.CRP]):
reward = +1
done = True
else:
reward = 0
done = False
if(not episodic):
done = False
return self.obs, reward, done
def reset(self, train_samples=10000, test_samples=1000):
"""
Reset the environment by generating all CRPs and splitting them into
training and testing datasets
Arguments:
train_samples (int, default=10000): Number of training CRP samples
for the model
test_samples (int, default=1000): Number of testing CRP samples for
the model
"""
# Get challenges ((-1,1) values)
self.c = random_inputs(self.obs_size, train_samples + test_samples,
seed=self.seed)
# Get responses ((0,1) values)
self.r = np.int8(0.5 - 0.5*self.puf.eval(self.c))
# Split data into training and testing sets randomly (with indices)
self.train = train_samples
self.test = test_samples
def set_CRP(self, index):
"""
Method that sets the current CRP used in the environment and calculates
the observation (transformed challenge)
Arguments:
index (int): The index of the CRP
Returns:
Observation
"""
# Set CRP index
self.CRP = index
# Transform challenge
self.obs = np.cumprod(
np.fliplr(np.array([self.c[index]])), axis=1, dtype=np.int8)[0]
return self.obs
def next_CRP(self):
"""
Method that sets the current CRP used in the environment to the next
one in the list, and calculates the observation (transformed challenge)
Returns:
Observation
"""
if(self.CRP+1 == self.train):
print("Warning: Steping out of training CRPs and into testing CRPs")
return self.set_CRP(self.CRP+1)