-
Notifications
You must be signed in to change notification settings - Fork 0
/
PValgo.py
328 lines (270 loc) · 11.3 KB
/
PValgo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
from enum import Enum
from math import inf
from time import time
from collections import defaultdict
from transpositiontable import BoundType, TTEntry, TranspositionTable
from utils.basealgo import BaseAlgo
from utils.xiangqi import Move, Xiangqi, MoveMode
from evaluation import Evaluation
from movepicker import MovePicker
class SearchTimeout(Exception):
pass
class NodeType(Enum):
NONE = -1
ROOT = 0
PV = 1
NON_PV = 2
class PVSeqNode:
def __init__(self, move: Move | None, next = None):
self.move = move
self.next = next
def to_list(self):
seq = []
cur = self
while cur:
seq.append(cur.move)
cur = cur.next
return seq
def __str__(self):
return self.to_list().__str__()
class SearchInfo:
def __init__(self, max_time):
self.max_time = max_time
self.start_time = time()
self.depth = 0
self.ply = 0
self.root_pv: Move | None = None
self.move_stack: list[Move] = []
self.counter_moves: dict
self.history_heuristic: dict
self.optimal_seq: list[PVSeqNode | None]
self.root_eval: list
def check_timer(self):
if time() - self.start_time >= self.max_time:
raise SearchTimeout
def prioritize_tt_move(generated_moves, tt_move):
if tt_move is None:
return
for i, move in enumerate(generated_moves):
if move == tt_move:
# a simple swap!
generated_moves[0], generated_moves[i] = tt_move, generated_moves[0]
return
# assert False, f"there is no {tt_move} in {generated_moves}"
def move_coords(move: Move):
return move.from_coords, move.to_coords if move else None
VALUE_LOSE = -10101010
class PVAlgo(BaseAlgo):
def __init__(self):
self.evaluation = Evaluation()
self.movepicker = MovePicker()
self.transposition_table_q = TranspositionTable()
self.transposition_table_p = TranspositionTable()
def quiescence(self, xiangqi: Xiangqi, alpha: int, beta: int, depth: int = 5,
node_type: NodeType = NodeType.NONE):
info = self.info
info.check_timer()
if depth == 0:
return (1 if xiangqi.turn else -1) * self.evaluation.evaluate(xiangqi)
pv_node = node_type == NodeType.PV
tt_entry: TTEntry = self.transposition_table_p.lookup(xiangqi)
tt_score = tt_entry and tt_entry.score
tt_bound = tt_entry and tt_entry.bound
tt_depth = tt_entry and tt_entry.depth
tt_move = tt_entry and tt_entry.move
if (not pv_node
and tt_depth is not None
and tt_depth >= depth
and (
(tt_bound == BoundType.EXACT)
or (tt_bound == BoundType.UPPER and tt_score <= alpha)
or (tt_bound == BoundType.LOWER and tt_score >= beta)
)):
return tt_score
score = (1 if xiangqi.turn else -1) * self.evaluation.evaluate(xiangqi)
if score >= beta:
# score is out of bound, we create a new entry in the transposition table
# signal that the actual valuation of this node is bigger than beta but
# there is no need for us to actually search here to find out the actual value
tt_entry = TTEntry(xiangqi, depth, BoundType.LOWER, score, None)
self.transposition_table_q.update(tt_entry)
return beta
if score > alpha:
# we know that the score lies between alpha and beta, so in this case
# we know that this score the EXACT score
alpha = score
tt_bound = BoundType.EXACT
else:
# otherwise, the actual score of this node is bounded by alpha. We don't know
# what is the actual value, we only know the bound!
tt_bound = BoundType.UPPER
best_score = score
best_move: Move | None = None
next_depth = depth - 1
moves = self.movepicker.move_gen(xiangqi)
if not moves:
return VALUE_LOSE + info.ply
moves = self.movepicker.move_order(tt_move, None,
None, mode=MoveMode.CAPTURE | MoveMode.CHECK)
for move in moves:
next_xiangqi = xiangqi.move(move)
info.ply += 1
score = -self.quiescence(next_xiangqi, -beta, -alpha, next_depth, node_type)
info.ply -= 1
if score > best_score:
# we found a better move!
best_score = score
best_move = move
if score >= beta:
# there is no need to continue. The actual value will be bigger than beta
# is this correct though?
tt_entry = TTEntry(xiangqi, depth, BoundType.LOWER, beta, best_move)
self.transposition_table_q.update(tt_entry)
return beta
if score > alpha:
alpha = score
tt_bound = BoundType.EXACT
tt_entry = TTEntry(xiangqi, depth, tt_bound, alpha, best_move)
self.transposition_table_q.update(tt_entry)
return alpha
def principal_variation(self, xiangqi: Xiangqi, alpha: int, beta: int, depth: int,
node_type: NodeType = NodeType.NONE):
info = self.info
info.check_timer()
is_root = node_type == NodeType.ROOT
pv_node = node_type != NodeType.NON_PV
if depth <= 0:
return self.quiescence(xiangqi, alpha, beta, 5,
NodeType.PV if pv_node else NodeType.NON_PV)
tt_entry: TTEntry = self.transposition_table_p.lookup(xiangqi)
tt_score = tt_entry and tt_entry.score
tt_bound = tt_entry and tt_entry.bound
tt_depth = tt_entry and tt_entry.depth
tt_move = tt_entry and tt_entry.move
tt_capture = tt_move and tt_move.is_capture(xiangqi)
# at non-PV nodes we check for an early TT cutoff
if (not pv_node
and tt_depth is not None
and tt_depth >= depth
and (
(tt_bound == BoundType.EXACT)
or (tt_bound == BoundType.UPPER and tt_score <= alpha)
or (tt_bound == BoundType.LOWER and tt_score >= beta)
)):
return tt_score
# static evaluation of the position
if tt_bound == BoundType.EXACT:
static_eval = tt_score
else:
static_eval = (1 if xiangqi.turn else -1) * self.evaluation.evaluate(xiangqi)
if (tt_score is not None
and (
(tt_bound == BoundType.UPPER and tt_score <= static_eval) or
(tt_bound == BoundType.LOWER and tt_score >= static_eval)
)):
static_eval = tt_score
# futility pruning
if (not pv_node
and depth < 3 # TODO: tune this
and static_eval >= beta
and (not tt_move or tt_capture)):
return static_eval
if not is_root and not tt_move and not xiangqi.is_in_check():
depth -= 2
# only pv_node can cause depth <= at this location
if depth <= 0:
return self.quiescence(xiangqi, alpha, beta, 5, NodeType.PV)
best_move: Move | None = None
best_score = -inf
best_seq = None
# are we on the principal variation right now?
# if so, we will conduct a null-window search to verify that we are indeed on
# the principal variation
is_pvs = False
tt_bound = BoundType.UPPER
move_count = 0
prev_move = info.move_stack[-1] if info.move_stack else None
counter_move = prev_move and info.counter_moves.get(prev_move.from_coords, prev_move.to_coords)
history_heuristic = info.history_heuristic[xiangqi.turn]
moves = self.movepicker.move_gen(xiangqi)
if not moves:
return VALUE_LOSE + info.ply
moves = self.movepicker.move_order(tt_move, counter_move,
history_heuristic, mode=MoveMode.ALL)
for move in moves:
move_count += 1
info.ply += 1
info.move_stack.append(move)
is_capture = move.is_capture(xiangqi)
is_check = move.is_check(xiangqi)
next_xiangqi = xiangqi.move(move)
next_depth = depth - 1
# late move reduction
if (not pv_node
and depth >= 2
and move_count >= 2
and not is_capture
and not is_check):
next_depth -= 1
if is_pvs:
score = -self.principal_variation(next_xiangqi, -alpha - 1, -alpha,
next_depth, NodeType.NON_PV)
do_full_search = alpha < score < beta
else:
do_full_search = True
if do_full_search:
score = -self.principal_variation(next_xiangqi, -beta, -alpha,
next_depth, NodeType.PV)
info.ply -= 1
info.move_stack.pop()
if score > best_score:
best_score = score
best_move = move
best_seq = info.optimal_seq[info.ply + 1]
if score >= beta:
if not is_capture:
info.counter_moves[move_coords(prev_move)] = move
history_heuristic[move_coords(move)] += 1 << depth
tt_entry = TTEntry(xiangqi, depth, BoundType.LOWER, beta, best_move)
self.transposition_table_p.update(tt_entry)
return beta
if score > alpha:
alpha = score
tt_bound = BoundType.EXACT
is_pvs = True
if is_root:
info.root_eval.append((move.to_notation(xiangqi), score))
if is_root:
info.root_pv = best_move
tt_entry = TTEntry(xiangqi, depth, tt_bound, alpha, best_move)
self.transposition_table_p.update(tt_entry)
info.optimal_seq[info.ply] = PVSeqNode(best_move, best_seq)
return alpha
def iterative_deepening(self, xiangqi: Xiangqi, max_depth=7):
info = self.info
alpha = -inf
beta = inf
for depth in range(1, max_depth + 1):
info.depth = depth
info.optimal_seq = [None for _ in range(depth + 1)]
info.counter_moves = dict()
info.root_eval = []
info.history_heuristic = [defaultdict(lambda: 0), defaultdict(lambda: 0)]
value = self.principal_variation(xiangqi, alpha, beta, depth, NodeType.ROOT)
# print(f"{depth=}")
# print(f"{info.root_eval=}")
# print(f"{info.root_pv=}")
# if optimal_seq := info.optimal_seq[0]:
# print(f"optimal seq at {depth=}: {optimal_seq.to_list()}")
# print(f"valuation at {depth=}: {value}")
# print(f"{info.evaluations=}")
def next_move(self, xiangqi: Xiangqi) -> Move | None:
self.info = SearchInfo(max_time=20)
try:
self.iterative_deepening(xiangqi)
except SearchTimeout:
pass
best_move = self.info.root_pv
# if best_move is None:
# raise Exception("Game is over, one side will win")
return best_move