Skip to content

Commit

Permalink
Make tests verbose about startup failures
Browse files Browse the repository at this point in the history
In the past we've got situations when the CI integration tests reports
were not extensive enough to clarify what's going on, and we were
launching a full-blown debugging process just to find out that the
probes were missing.

To make initialization issues more visible, make integration tests
verbose about them:

* Introduce a HEALTHCHECK for the Collector docker image, based on the
/ready Civet API. This command would be ignored on K8S, but we could use
it on CI for containers filtering.

* Wait for Collector container to become healthy first before proceeding
with the rest of tests. This will make it immediately clear if the test
is failing due to missing probes.

* Make self-checks verification a hard failure. Currently, we wait for
the self-checks, but do not report if they're missing. This change will
make it clear if something is badly broken to the extent that we don't
receive any events.
  • Loading branch information
erthalion committed Dec 15, 2023
1 parent 499f3f4 commit acd379f
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 3 deletions.
8 changes: 8 additions & 0 deletions collector/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ RUN echo '/usr/local/lib' > /etc/ld.so.conf.d/usrlocallib.conf && \

EXPOSE 8080 9090

HEALTHCHECK \
# health checks within the first 5s are not counted as failure
--start-period=5s \
# perform health check every 5s
--interval=5s \
# the command uses /ready API
CMD /usr/local/bin/status-check.sh

ENTRYPOINT ["/bootstrap.sh"]

CMD collector-wrapper.sh \
Expand Down
21 changes: 21 additions & 0 deletions collector/container/status-check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bash

# /ready API will return the following formatted response:
# {
# "collector" : {
# "drops" : 0,
# "events" : 9330070,
# "node" : "node.name",
# "preemptions" : 0
# },
# "status" : "ok"
# }
#
# Take the status line, split it by ":" and trim spaces and quotes.
STATUS=$(curl -s localhost:8080/ready | grep 'status' | awk -F ':' '{print $2}' | xargs)

if [[ "${STATUS}" = "ok" ]]; then
exit 0
else
exit 1
fi
35 changes: 32 additions & 3 deletions integration-tests/suites/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,17 @@ func (s *IntegrationTestSuiteBase) StartCollector(disableGRPC bool, options *com

s.Require().NoError(s.Collector().Setup(options))
s.Require().NoError(s.Collector().Launch())
// wait for collector to report healthy, includes initial setup and probes
// loading
_, err = s.waitForContainerToBecomeHealthy(
"collector",
s.Collector().ContainerID,
defaultWaitTickSeconds)
s.Require().NoError(err)

// wait for self-check process to guarantee collector is started
s.Sensor().WaitProcessesN(s.Collector().ContainerID, 30*time.Second, 1)
selfCheckOk := s.Sensor().WaitProcessesN(s.Collector().ContainerID, 30*time.Second, 1)
s.Require().True(selfCheckOk)
}

// StopCollector will tear down the collector container and stop
Expand Down Expand Up @@ -235,11 +243,16 @@ func (s *IntegrationTestSuiteBase) launchContainer(name string, args ...string)
return outLines[len(outLines)-1], err
}

func (s *IntegrationTestSuiteBase) waitForContainerToExit(containerName, containerID string, tickSeconds time.Duration) (bool, error) {
func (s *IntegrationTestSuiteBase) waitForContainerStatus(
containerName string,
containerID string,
tickSeconds time.Duration,
filter string) (bool, error) {

cmd := []string{
common.RuntimeCommand, "ps", "-qa",
"--filter", "id=" + containerID,
"--filter", "status=exited",
"--filter", filter,
}

start := time.Now()
Expand Down Expand Up @@ -267,6 +280,22 @@ func (s *IntegrationTestSuiteBase) waitForContainerToExit(containerName, contain
}
}

func (s *IntegrationTestSuiteBase) waitForContainerToBecomeHealthy(
containerName string,
containerID string,
tickSeconds time.Duration) (bool, error) {

return waitForContainerStatus(containerName, containerID, tickSeconds, "health=healthy")
}

func (s *IntegrationTestSuiteBase) waitForContainerToExit(
containerName string,
containerID string,
tickSeconds time.Duration) (bool, error) {

return waitForContainerStatus(containerName, containerID, tickSeconds, "status=exited")
}

func (s *IntegrationTestSuiteBase) execContainer(containerName string, command []string) (string, error) {
cmd := []string{common.RuntimeCommand, "exec", containerName}
cmd = append(cmd, command...)
Expand Down

0 comments on commit acd379f

Please sign in to comment.