Skip to content

Commit

Permalink
Merge pull request #646 from FederatedAI/feature-2.5.2-fix
Browse files Browse the repository at this point in the history
fix bug
  • Loading branch information
sagewe authored Sep 11, 2023
2 parents ccdddf2 + 8cd5642 commit bd65899
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion python/eggroll/roll_pair/egg_pair.py
Original file line number Diff line number Diff line change
Expand Up @@ -904,6 +904,9 @@ def serve(args):

def exit_gracefully(signum, frame):
nonlocal run
if cluster_manager:
myself._status = ProcessorStatus.STOPPED
send_heartbeat(node_manager_client, myself)
run = False
L.info(
f'egg_pair {args.processor_id} at port={port}, transfer_port={transfer_port}, pid={pid} receives signum={signal.getsignal(signum)}, stopping gracefully.')
Expand All @@ -912,8 +915,8 @@ def exit_gracefully(signum, frame):
signal.signal(signal.SIGINT, exit_gracefully)

while run:
time.sleep(int(RollPairConfKeys.EGGROLL_ROLLPAIR_EGGPAIR_SERVER_HEARTBEAT_INTERVAL.get()))
send_heartbeat(node_manager_client,myself)
time.sleep(int(RollPairConfKeys.EGGROLL_ROLLPAIR_EGGPAIR_SERVER_HEARTBEAT_INTERVAL.get()))

L.info(f'sending exit heartbeat to cm')
if cluster_manager:
Expand Down

0 comments on commit bd65899

Please sign in to comment.