-
Notifications
You must be signed in to change notification settings - Fork 0
/
movepicker.py
138 lines (124 loc) · 6.43 KB
/
movepicker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from utils.xiangqi import Xiangqi, MoveMode, Move, King, Advisor, Elephant, Horse, Rook, Cannon, Pawn, manhanttan_distance
from typing import List
class Pieces:
KING = King
ADVISOR = Advisor
ELEPHANT = Elephant
HORSE = Horse
ROOK = Rook
CANNON = Cannon
PAWN = Pawn
class MovePicker:
def move_gen(self, xiangqi: Xiangqi):
self.xiangqi = xiangqi
self.moves = xiangqi.actions()
return self.moves
def move_order(self, tt_move: Move | None, counter_move: Move | None,
history_heuristic: dict | None, mode: int) -> List[Move]:
"""Returns the sorted list of move dicts in the given board,
in the order that the main search routine should try.
Moves are ordered in the following:
* good captures
* good quiets
* bonus for generating checks
* bonus for escaping captures
* malus for putting piece en prise (in threat of capture)
* bad captures
* bad quiets
The goodness of a capture move is evaluated using MVV/LVA.
In future, SEE can also be incorporated.
MVV/LVA can potentially be improved by incorporating activity of a piece.
Good quiets do not make use of the search history.
In future, transposition table can be used to score moves more accurately.
To obtain the bonus for escaping captures and malus for putting piece en prise,
we need to get the available actions from the opponent in the current board.
"""
xiangqi = self.xiangqi
threats, supports = xiangqi.get_threats_and_supports()
moves = []
for move in self.moves:
move_mode = move.get_mode(xiangqi)
if not (move_mode & mode):
continue
move.value = self.score(move, xiangqi, threats, supports,
tt_move, counter_move, history_heuristic)
moves.append(move)
moves.sort(key=lambda move: -move.value)
return moves
def score(self, move: Move, xiangqi: Xiangqi,
threats, supports, tt_move, counter_move, history_heuristic: dict):
"""Returns the score of the given move.
See explanation for move_order above.
For captures, if value of attacker <= value of victim,
or if there is no threat at the target position (threat level = 0), it is a good capture,
otherwise it is a bad capture. Note that SEE is not yet implemented.
Good captures have value of at least 2000.
Quiets have value of at most 2000.
Bad captures have value at most 0.
For quiets, we calculate the bonus of escaping from capture, and malus for moving towards a capture.
Forward moves are given a slightly higher priority, and backward moves are given a slightly lower priority.
King movement from safety position is heavily penalised.
"""
if move == tt_move:
return 100000
if move.mode & MoveMode.QUIET:
counter_bonus = 2000 if move == counter_move else 0
history_bonus = history_heuristic.get((move.from_coords, move.to_coords), 0) if history_heuristic else 0
check_bonus = 500 if move.mode & MoveMode.CHECK else 0
escape_bonus = self.get_bonus(move, move.from_coords, threats, supports)
en_prise_malus = -self.get_bonus(move, move.to_coords, threats, supports)
king_or_forward_bonus = self.get_king_or_forward_bonus(move, xiangqi)
return history_bonus + counter_bonus + check_bonus + escape_bonus + en_prise_malus + king_or_forward_bonus
else:
piece_from = xiangqi.board[move.from_coords[0]][move.from_coords[1]]
piece_to = xiangqi.board[move.to_coords[0]][move.to_coords[1]]
piece_count = xiangqi.get_piece_count()
difference = abs(piece_to.value(move.from_coords, piece_count)) - abs(piece_from.value(move.to_coords, piece_count))
is_captured_piece_not_threatened = threats[move.to_coords[0]][move.to_coords[1]] == 0
return 2000 + max(difference, 0) if difference >= 0 or is_captured_piece_not_threatened else difference
def get_bonus(self, move, position, threats, supports):
"""Obtains the bonus/malus if a piece moves into/away from the position.
Returns a positive value.
"""
threat_value = threats[position[0]][position[1]]
primary_support_value, secondary_support_value, support_piece_position = supports[position[0]][position[1]]
support_value = primary_support_value if move.from_coords != support_piece_position else secondary_support_value
if threat_value == 0:
return 0
match move.piecetype:
case Pieces.ROOK:
if threat_value > 1:
return 1500
elif support_value > 0:
return 200
else:
return 1500
case Pieces.KING:
return 0 # Do not add bonus on king movement
case Pieces.HORSE | Pieces.CANNON:
if threat_value > 2:
return 750
elif support_value > 0:
return 100 if threat_value == 2 else 0
else:
return 750
case Pieces.ADVISOR | Pieces.ELEPHANT | Pieces.PAWN:
if threat_value > 3:
return 400
elif support_value > 0:
return 50 if threat_value == 3 else 0
else:
return 200
def get_king_or_forward_bonus(self, move: Move, xiangqi: Xiangqi):
"""Returns the reward for the move dependent on whether the move is a king movement.
If it is a king movement, give a reward/penalty according to manhattan distance from the safety position.
Otherwise, reward if it is a forward move, penalise if it is a backward move.
"""
match move.piecetype:
case Pieces.KING:
safety_position = (9, 4) if xiangqi.turn else (0, 4)
return (manhanttan_distance(move.from_coords, safety_position) - manhanttan_distance(move.to_coords, safety_position)) * 450
case Pieces.ROOK | Pieces.HORSE | Pieces.CANNON | Pieces.PAWN:
return move.from_coords[0] - move.to_coords[0] if xiangqi.turn else move.to_coords[0] - move.from_coords[0]
case Pieces.ADVISOR | Pieces.ELEPHANT:
return 0