-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquiz5-21.py
128 lines (110 loc) · 4.18 KB
/
quiz5-21.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# -----------------
# User Instructions
#
# Write a function, max_diffs, that maximizes the point differential
# of a player. This function will often return the same action as
# max_wins, but sometimes the strategies will differ.
#
# Enter your code at line 101.
from functools import update_wrapper
def decorator(d):
"Make function d a decorator: d wraps a function fn."
def _d(fn):
return update_wrapper(d(fn), fn)
update_wrapper(_d, d)
return _d
@decorator
def memo(f):
"""Decorator that caches the return value for each call to f(args).
Then when called again with same args, we can just look it up."""
cache = {}
def _f(*args):
try:
return cache[args]
except KeyError:
cache[args] = result = f(*args)
return result
except TypeError:
# some element of args can't be a dict key
return f(args)
return _f
other = {1:0, 0:1}
def roll(state, d):
"""Apply the roll action to a state (and a die roll d) to yield a new state:
If d is 1, get 1 point (losing any accumulated 'pending' points),
and it is the other player's turn. If d > 1, add d to 'pending' points."""
(p, me, you, pending) = state
if d == 1:
return (other[p], you, me+1, 0) # pig out; other player's turn
else:
return (p, me, you, pending+d) # accumulate die roll in pending
def hold(state):
"""Apply the hold action to a state to yield a new state:
Reap the 'pending' points and it becomes the other player's turn."""
(p, me, you, pending) = state
return (other[p], you, me+pending, 0)
def Q_pig(state, action, Pwin):
"The expected value of choosing action in state."
if action == 'hold':
return 1 - Pwin(hold(state))
if action == 'roll':
return (1 - Pwin(roll(state, 1))
+ sum(Pwin(roll(state, d)) for d in (2,3,4,5,6))) / 6.
raise ValueError
def best_action(state, actions, Q, U):
"Return the optimal action for a state, given U."
def EU(action): return Q(state, action, U)
return max(actions(state), key=EU)
def pig_actions(state):
"The legal actions from a state."
_, _, _, pending = state
return ['roll', 'hold'] if pending else ['roll']
goal = 40
@memo
def Pwin(state):
"""The utility of a state; here just the probability that an optimal player
whose turn it is to move can win from the current state."""
# Assumes opponent also plays with optimal strategy.
(p, me, you, pending) = state
if me + pending >= goal:
return 1
elif you >= goal:
return 0
else:
return max(Q_pig(state, action, Pwin)
for action in pig_actions(state))
@memo
def win_diff(state):
"The utility of a state: here the winning differential (pos or neg)."
(p, me, you, pending) = state
if me + pending >= goal or you >= goal:
return (me + pending - you)
else:
return max(Q_pig(state, action, win_diff)
for action in pig_actions(state))
def max_diffs(state):
"""A strategy that maximizes the expected difference between my final score
and my opponent's."""
return best_action(state, pig_actions, Q_pig, win_diff)
def max_wins(state):
"The optimal pig strategy chooses an action with the highest win probability."
return best_action(state, pig_actions, Q_pig, Pwin)
def test():
# The first three test cases are examples where max_wins and
# max_diffs return the same action.
assert(max_diffs((1, 26, 21, 15))) == "hold"
assert(max_diffs((1, 23, 36, 7))) == "roll"
assert(max_diffs((0, 29, 4, 3))) == "roll"
# The remaining test cases are examples where max_wins and
# max_diffs return different actions.
assert(max_diffs((0, 36, 32, 5))) == "roll"
assert(max_diffs((1, 37, 16, 3))) == "roll"
assert(max_diffs((1, 33, 39, 7))) == "roll"
assert(max_diffs((0, 7, 9, 18))) == "hold"
assert(max_diffs((1, 0, 35, 35))) == "hold"
assert(max_diffs((0, 36, 7, 4))) == "roll"
assert(max_diffs((1, 5, 12, 21))) == "hold"
assert(max_diffs((0, 3, 13, 27))) == "hold"
assert(max_diffs((0, 0, 39, 37))) == "hold"
return 'tests pass'
print test()