Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

do not sleep to kill a process that has already cleaned up and exited by itself #672

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 54 additions & 7 deletions runner/util_linux.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package runner

import (
"context"
"errors"
"io"
"os/exec"
"syscall"
Expand All @@ -12,20 +14,65 @@ import (
func (e *Engine) killCmd(cmd *exec.Cmd) (pid int, err error) {
pid = cmd.Process.Pid

var killDelay time.Duration

waitResult := make(chan error)
go func() {
defer close(waitResult)
_, _ = cmd.Process.Wait()
}()

if e.config.Build.SendInterrupt {
// Sending a signal to make it clear to the process that it is time to turn off
e.mainDebug("sending interrupt to process %d", pid)
// Sending a signal to make it clear to the process group that it is time to turn off
if err = syscall.Kill(-pid, syscall.SIGINT); err != nil {
return
}
time.Sleep(e.config.killDelay())
// the kill delay is 0 by default unless the user has configured send_interrupt=true
// in which case it is fetched from the kill_delay setting in the .air.toml
killDelay = e.config.killDelay()
e.mainDebug("setting a kill timer for %s", killDelay.String())
}

// https://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly
err = syscall.Kill(-pid, syscall.SIGKILL)
// prepare a cancel context that can stop the killing if it is not needed
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

killResult := make(chan error)
// Spawn a goroutine that will kill the process after kill delay if we have not
// received a wait result before that.
go func() {
select {
case <-time.After(killDelay):
e.mainDebug("kill timer expired")
// https://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly
killResult <- syscall.Kill(-pid, syscall.SIGKILL)
case <-ctx.Done():
e.mainDebug("kill timer canceled")
return
}
}()

results := make([]error, 0, 2)

// Wait releases any resources associated with the Process.
_, _ = cmd.Process.Wait()
return
for {
// collect the responses from the kill and wait goroutines
select {
case err = <-killResult:
results = append(results, err)
case err = <-waitResult:
results = append(results, err)
// if we have a kill delay, we ignore the kill result
if killDelay > 0 && len(results) == 1 {
results = append(results, nil)
}
}

if len(results) == 2 {
err = errors.Join(results...)
return
}
}
}

func (e *Engine) startCmd(cmd string) (*exec.Cmd, io.ReadCloser, io.ReadCloser, error) {
Expand Down
23 changes: 0 additions & 23 deletions runner/util_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package runner

import (
"errors"
"fmt"
"os"
"os/exec"
Expand All @@ -10,7 +9,6 @@ import (
"runtime"
"strconv"
"strings"
"syscall"
"testing"
"time"

Expand Down Expand Up @@ -148,27 +146,6 @@ func TestAdaptToVariousPlatforms(t *testing.T) {
}
}

func Test_killCmd_no_process(t *testing.T) {
e := Engine{
config: &Config{
Build: cfgBuild{
SendInterrupt: false,
},
},
}
_, err := e.killCmd(&exec.Cmd{
Process: &os.Process{
Pid: 9999,
},
})
if err == nil {
t.Errorf("expected error but got none")
}
if !errors.Is(err, syscall.ESRCH) {
t.Errorf("expected '%s' but got '%s'", syscall.ESRCH, errors.Unwrap(err))
}
}

func Test_killCmd_SendInterrupt_false(t *testing.T) {
_, b, _, _ := runtime.Caller(0)

Expand Down
63 changes: 56 additions & 7 deletions runner/util_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
package runner

import (
"context"
"errors"
"io"
"os"
"os/exec"
Expand All @@ -13,18 +15,65 @@ import (
func (e *Engine) killCmd(cmd *exec.Cmd) (pid int, err error) {
pid = cmd.Process.Pid

var killDelay time.Duration

waitResult := make(chan error)
go func() {
defer close(waitResult)
_, _ = cmd.Process.Wait()
}()

if e.config.Build.SendInterrupt {
// Sending a signal to make it clear to the process that it is time to turn off
e.mainDebug("sending interrupt to process %d", pid)
// Sending a signal to make it clear to the process group that it is time to turn off
if err = syscall.Kill(-pid, syscall.SIGINT); err != nil {
return
}
time.Sleep(e.config.killDelay())
// the kill delay is 0 by default unless the user has configured send_interrupt=true
// in which case it is fetched from the kill_delay setting in the .air.toml
killDelay = e.config.killDelay()
e.mainDebug("setting a kill timer for %s", killDelay.String())
}

// prepare a cancel context that can stop the killing if it is not needed
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

killResult := make(chan error)
// Spawn a goroutine that will kill the process after kill delay if we have not
// received a wait result before that.
go func() {
select {
case <-time.After(killDelay):
e.mainDebug("kill timer expired")
// https://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly
killResult <- syscall.Kill(-pid, syscall.SIGKILL)
case <-ctx.Done():
e.mainDebug("kill timer canceled")
return
}
}()

results := make([]error, 0, 2)

for {
// collect the responses from the kill and wait goroutines
select {
case err = <-killResult:
results = append(results, err)
case err = <-waitResult:
results = append(results, err)
// if we have a kill delay, we ignore the kill result
if killDelay > 0 && len(results) == 1 {
results = append(results, nil)
}
}

if len(results) == 2 {
err = errors.Join(results...)
return
}
}
// https://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly
err = syscall.Kill(-pid, syscall.SIGKILL)
// Wait releases any resources associated with the Process.
_, _ = cmd.Process.Wait()
return pid, err
}

func (e *Engine) startCmd(cmd string) (*exec.Cmd, io.ReadCloser, io.ReadCloser, error) {
Expand Down