diff --git a/R/Rush.R b/R/Rush.R
index c486e7b..dee44e0 100644
--- a/R/Rush.R
+++ b/R/Rush.R
@@ -180,6 +180,7 @@ Rush = R6::R6Class("Rush",
       heartbeat_expire = NULL,
       lgr_thresholds = NULL,
       lgr_buffer_size = 0,
+      max_retries = 0,
       supervise = TRUE,
       worker_loop = worker_loop_default,
       ...
@@ -187,6 +188,10 @@ Rush = R6::R6Class("Rush",
       n_workers = assert_count(n_workers %??% rush_env$n_workers)
       assert_flag(wait_for_workers)
       assert_flag(supervise)
+      r = self$connector
+
+      # set global maximum retries of tasks
+      private$.max_retries = assert_count(max_retries)
 
       # push worker config to redis
       private$.push_worker_config(
@@ -196,6 +201,7 @@ Rush = R6::R6Class("Rush",
         heartbeat_expire = heartbeat_expire,
         lgr_thresholds = lgr_thresholds,
         lgr_buffer_size = lgr_buffer_size,
+        max_retries = max_retries,
         worker_loop = worker_loop,
         ...
       )
@@ -306,7 +312,7 @@ Rush = R6::R6Class("Rush",
 
         # Push terminate signal to worker
         cmds = map(worker_ids, function(worker_id) {
-          c("SET", private$.get_worker_key("terminate", worker_id), "TRUE")
+          c("SET", private$.get_worker_key("terminate", worker_id), "1")
         })
         r$pipeline(.commands = cmds)
 
@@ -353,10 +359,11 @@ Rush = R6::R6Class("Rush",
     #' Workers with a heartbeat process are checked with the heartbeat.
     #' Lost tasks are marked as `"lost"`.
     #'
-    #' @param restart (`logical(1)`)\cr
+    #' @param restart_workers (`logical(1)`)\cr
     #' Whether to restart lost workers.
-    detect_lost_workers = function(restart = FALSE) {
-      assert_flag(restart)
+    detect_lost_workers = function(restart_workers = FALSE, restart_tasks = FALSE) {
+      assert_flag(restart_workers)
+      assert_flag(restart_tasks)
       r = self$connector
 
       # check workers with a heartbeat
@@ -392,7 +399,7 @@ Rush = R6::R6Class("Rush",
         lost_workers = local_workers[!running]
         lg$error("Lost %i worker(s): %s", length(lost_workers), str_collapse(lost_workers))
 
-        if (restart) {
+        if (restart_workers) {
           self$restart_workers(unlist(lost_workers))
           lost_workers
         } else {
@@ -412,17 +419,34 @@ Rush = R6::R6Class("Rush",
       if (length(lost_workers)) {
         running_tasks = self$fetch_running_tasks(fields = "worker_extra")
         if (!nrow(running_tasks)) return(invisible(self))
-        keys = running_tasks[lost_workers, keys, on = "worker_id"]
+        lost_workers = unlist(lost_workers)
+        keys = running_tasks[list(lost_workers), keys, on = "worker_id"]
 
         lg$error("Lost %i task(s): %s", length(keys), str_collapse(keys))
 
-        cmds = unlist(map(keys, function(key) {
-          list(
-            list("HSET", key, "state", failed_state),
-            c("SREM", private$.get_key("running_tasks"), key),
-            c("RPUSH", private$.get_key("failed_tasks"), key))
-        }), recursive = FALSE)
-        r$pipeline(.commands = cmds)
+        if (restart_tasks) {
+
+          # check whether the tasks should be retried
+          retry = self$n_tries(keys) < private$.max_retries
+          keys = keys[retry]
+
+          if (length(keys)) {
+            lg$error("Retry %i lost task(s): %s", length(keys), str_collapse(keys))
+            cmds = map(keys, function(key) {
+              c("HINCRBY", key, "n_tries", 1)
+            })
+            cmds = c(cmds, list(
+              c("RPUSH", private$.get_key("queued_tasks"), keys),
+              c("SREM", private$.get_key("running_tasks"), keys)
+            ))
+            r$pipeline(.commands = cmds)
+          }
+        } else {
+          cmds = list(
+            c("RPUSH", private$.get_key("failed_tasks"), keys),
+            c("SREM", private$.get_key("running_tasks"), keys))
+          r$pipeline(.commands = cmds)
+        }
       }
 
       return(invisible(self))
@@ -523,10 +547,10 @@ Rush = R6::R6Class("Rush",
 
       lg$debug("Pushing %i task(s) to the shared queue", length(xss))
 
-      keys = self$write_hashes(xs = xss, xs_extra = extra, state = "queued")
+      keys = self$write_hashes(xs = xss, xs_extra = extra)
       r$command(c("LPUSH", private$.get_key("queued_tasks"), keys))
       r$command(c("SADD", private$.get_key("all_tasks"), keys))
-      if (terminate_workers) r$command(c("SET", private$.get_key("terminate_on_idle"), "TRUE"))
+      if (terminate_workers) r$command(c("SET", private$.get_key("terminate_on_idle"), 1))
 
       return(invisible(keys))
     },
@@ -559,7 +583,7 @@ Rush = R6::R6Class("Rush",
       lg$debug("Pushing %i task(s) to %i priority queue(s) and %i task(s) to the shared queue.",
         sum(!is.na(priority)), length(unique(priority[!is.na(priority)])), sum(is.na(priority)))
 
-      keys = self$write_hashes(xs = xss, xs_extra = extra, state = "queued")
+      keys = self$write_hashes(xs = xss, xs_extra = extra)
       cmds = pmap(list(priority, keys), function(worker_id, key) {
         if (is.na(worker_id)) {
           c("LPUSH", private$.get_key("queued_tasks"), key)
@@ -824,25 +848,14 @@ Rush = R6::R6Class("Rush",
     #' @param keys (character())\cr
     #' Keys of the hashes.
     #' If `NULL` new keys are generated.
-    #' @param state (`character(1)`)\cr
-    #' State of the hashes.
     #'
     #' @return (`character()`)\cr
     #' Keys of the hashes.
-    write_hashes = function(..., .values = list(), keys = NULL, state = NA_character_) {
+    write_hashes = function(..., .values = list(), keys = NULL) {
       values = discard(c(list(...), .values), function(l) !length(l))
       assert_list(values, names = "unique", types = "list", min.len = 1)
       fields = names(values)
       keys = assert_character(keys %??% uuid::UUIDgenerate(n = length(values[[1]])), len = length(values[[1]]), .var.name = "keys")
-      assert_string(state, na.ok = TRUE)
-      bin_state = switch(state,
-        "queued" = queued_state,
-        "running" = running_state,
-        "failed" = failed_state,
-        "finished" = finished_state,
-        `NA_character_` = na_state,
-        redux::object_to_bin(list(state = state))
-      )
 
       lg$debug("Writting %i hash(es) with %i field(s)", length(keys), length(fields))
 
@@ -856,7 +869,7 @@ Rush = R6::R6Class("Rush",
           # merge fields and values alternatively
           # c and rbind are fastest option in R
           # data is not copied
-          c("HSET", key, c(rbind(fields, bin_values)), "state", list(bin_state))
+          c("HSET", key, c(rbind(fields, bin_values)))
         })
 
       self$connector$pipeline(.commands = cmds)
@@ -893,6 +906,23 @@ Rush = R6::R6Class("Rush",
       # unserialize lists of the second level
       # combine elements of the third level to one list
       map(hashes, function(hash) unlist(map_if(hash, function(x) !is.null(x), redux::bin_to_object), recursive = FALSE))
+    },
+
+    #' @description
+    #' Returns the number of attempts to evaluate a task.
+    #'
+    #' @param keys (`character()`)\cr
+    #' Keys of the tasks.
+    #'
+    #' @return (`integer()`)\cr
+    #' Number of attempts.
+    n_tries = function(keys) {
+      assert_character(keys)
+      r = self$connector
+
+      # n_retries is not set when the task never failed before
+      n_tries = r$pipeline(.commands = map(keys, function(key) c("HGET", key, "n_tries")))
+      map_int(n_tries, function(value) if (is.null(value)) 0L else as.integer(value))
     }
   ),
 
@@ -1119,6 +1149,8 @@ Rush = R6::R6Class("Rush",
     #
     .hostname = NULL,
 
+    .max_retries = NULL,
+
     # prefix key with instance id
     .get_key = function(key) {
       sprintf("%s:%s", self$network_id, key)
@@ -1138,6 +1170,7 @@ Rush = R6::R6Class("Rush",
       heartbeat_expire = NULL,
       lgr_thresholds = NULL,
       lgr_buffer_size = 0,
+      max_retries = 0,
       worker_loop = worker_loop_default,
       ...
     ) {
@@ -1148,6 +1181,7 @@ Rush = R6::R6Class("Rush",
       if (!is.null(heartbeat_period)) require_namespaces("callr")
       assert_vector(lgr_thresholds, names = "named", null.ok = TRUE)
       assert_count(lgr_buffer_size)
+      assert_count(max_retries)
       assert_function(worker_loop)
       dots = list(...)
       r = self$connector
@@ -1166,7 +1200,8 @@ Rush = R6::R6Class("Rush",
         heartbeat_period = heartbeat_period,
         heartbeat_expire = heartbeat_expire,
         lgr_thresholds = lgr_thresholds,
-        lgr_buffer_size = lgr_buffer_size)
+        lgr_buffer_size = lgr_buffer_size,
+        max_retries = max_retries)
 
       # arguments needed for initializing the worker
       start_args = list(
@@ -1241,10 +1276,3 @@ Rush = R6::R6Class("Rush",
   )
 )
 
-# common state for all tasks
-# used in $write_hashes()
-queued_state = redux::object_to_bin(list(state = "queued"))
-running_state = redux::object_to_bin(list(state = "running"))
-failed_state = redux::object_to_bin(list(state = "failed"))
-finished_state = redux::object_to_bin(list(state = "finished"))
-na_state = redux::object_to_bin(list(state = NA_character_))
diff --git a/R/RushWorker.R b/R/RushWorker.R
index 538986b..3ce3a9e 100644
--- a/R/RushWorker.R
+++ b/R/RushWorker.R
@@ -43,12 +43,14 @@ RushWorker = R6::R6Class("RushWorker",
       heartbeat_period = NULL,
       heartbeat_expire = NULL,
       lgr_thresholds = NULL,
-      lgr_buffer_size = 0
+      lgr_buffer_size = 0,
+      max_retries = 0
       ) {
       super$initialize(network_id = network_id, config = config)
 
       self$host = assert_choice(host, c("local", "remote"))
       self$worker_id = assert_string(worker_id %??% uuid::UUIDgenerate())
+      private$.max_retries = assert_count(max_retries)
       r = self$connector
 
       # setup heartbeat
@@ -137,7 +139,7 @@ RushWorker = R6::R6Class("RushWorker",
 
       lg$debug("Pushing %i running task(s).", length(xss))
 
-      keys = self$write_hashes(xs = xss, xs_extra = extra, state = "running")
+      keys = self$write_hashes(xs = xss, xs_extra = extra)
       r$command(c("SADD", private$.get_key("running_tasks"), keys))
       r$command(c("SADD", private$.get_key("all_tasks"), keys))
 
@@ -156,7 +158,7 @@ RushWorker = R6::R6Class("RushWorker",
       key = r$command(c("BLMPOP", timeout, 2, private$.get_worker_key("queued_tasks"), private$.get_key("queued_tasks"), "RIGHT"))[[2]][[1]]
 
       if (is.null(key)) return(NULL)
-      self$write_hashes(worker_extra = list(list(pid = Sys.getpid(), worker_id = self$worker_id)), keys = key, state = "running")
+      self$write_hashes(worker_extra = list(list(pid = Sys.getpid(), worker_id = self$worker_id)), keys = key)
 
       # move key from queued to running
       r$command(c("SADD", private$.get_key("running_tasks"), key))
@@ -178,20 +180,16 @@ RushWorker = R6::R6Class("RushWorker",
     #' Status of the tasks.
     #' If `"finished"` the tasks are moved to the finished tasks.
     #' If `"error"` the tasks are moved to the failed tasks.
-    push_results = function(keys, yss = list(), extra = list(), conditions = list(), state = "finished") {
+    push_results = function(keys, yss = list(), extra = list(), conditions = list()) {
       assert_string(keys)
       assert_list(yss, types = "list")
       assert_list(extra, types = "list")
-      assert_list(conditions, types = "list")
-      assert_choice(state, c("finished", "failed"))
       r = self$connector
 
       # write result to hash
-      self$write_hashes(ys = yss, ys_extra = extra, condition = conditions, keys = keys, state = state)
-
-      destination = if (state == "finished") "finished_tasks" else "failed_tasks"
+      self$write_hashes(ys = yss, ys_extra = extra, condition = conditions, keys = keys)
 
-      # move key from running to finished or failed
+      # move key from running to finished
       # keys of finished and failed tasks are stored in a list i.e. the are ordered by time.
       # each rush instance only needs to record how many results it has already seen
       # to cheaply get the latest results and cache the finished tasks
@@ -199,7 +197,24 @@ RushWorker = R6::R6Class("RushWorker",
       # but at the moment a list seems to be the better option
       r$pipeline(.commands = list(
         c("SREM", private$.get_key("running_tasks"), keys),
-        c("RPUSH", private$.get_key(destination), keys)
+        c("RPUSH", private$.get_key("finished_tasks"), keys)
+      ))
+
+      return(invisible(self))
+    },
+
+    push_failed = function(keys, conditions) {
+      assert_string(keys)
+      assert_list(conditions, types = "list")
+      r = self$connector
+
+      # write condition to hash
+      self$write_hashes(condition = conditions, keys = keys)
+
+      # move key from running to failed
+      r$pipeline(.commands = list(
+        c("SREM", private$.get_key("running_tasks"), keys),
+        c("RPUSH", private$.get_key("failed_tasks"), keys)
       ))
 
       return(invisible(self))
@@ -223,7 +238,7 @@ RushWorker = R6::R6Class("RushWorker",
     #' Used in the worker loop to determine whether to continue.
     terminated = function() {
       r = self$connector
-      r$GET(private$.get_worker_key("terminate")) %??% "FALSE" == "TRUE"
+      as.logical(r$EXISTS(private$.get_worker_key("terminate")))
     },
 
     #' @field terminated_on_idle (`logical(1)`)\cr
@@ -231,7 +246,7 @@ RushWorker = R6::R6Class("RushWorker",
     #' Used in the worker loop to determine whether to continue.
     terminated_on_idle = function() {
       r = self$connector
-      r$GET(private$.get_key("terminate_on_idle")) %??% "FALSE" == "TRUE" && !as.logical(self$n_queued_tasks)
+      as.logical(r$EXISTS(private$.get_key("terminate_on_idle"))) && !as.logical(self$n_queued_tasks)
     }
   )
 )
diff --git a/R/worker_loops.R b/R/worker_loops.R
index ef3635a..8623ba2 100644
--- a/R/worker_loops.R
+++ b/R/worker_loops.R
@@ -25,7 +25,7 @@ worker_loop_default = function(fun, constants = NULL, rush) {
         rush$push_results(task$key, yss = list(ys))
       }, error = function(e) {
         condition = list(message = e$message)
-        rush$push_results(task$key, conditions = list(condition), state = "failed")
+        rush$push_failed(task$key, conditions = list(condition))
       })
     } else {
       if (rush$terminated_on_idle) break
diff --git a/man/Rush.Rd b/man/Rush.Rd
index 38936d6..e6e920e 100644
--- a/man/Rush.Rd
+++ b/man/Rush.Rd
@@ -232,6 +232,7 @@ For more details see \href{https://redis.io/docs/management/persistence/#snapsho
 \item \href{#method-Rush-wait_for_tasks}{\code{Rush$wait_for_tasks()}}
 \item \href{#method-Rush-write_hashes}{\code{Rush$write_hashes()}}
 \item \href{#method-Rush-read_hashes}{\code{Rush$read_hashes()}}
+\item \href{#method-Rush-n_tries}{\code{Rush$n_tries()}}
 \item \href{#method-Rush-clone}{\code{Rush$clone()}}
 }
 }
@@ -314,6 +315,7 @@ This function takes the arguments \code{fun} and optionally \code{constants} whi
   heartbeat_expire = NULL,
   lgr_thresholds = NULL,
   lgr_buffer_size = 0,
+  max_retries = 0,
   supervise = TRUE,
   worker_loop = worker_loop_default,
   ...
@@ -489,13 +491,13 @@ But checking local workers on windows might be very slow.
 Workers with a heartbeat process are checked with the heartbeat.
 Lost tasks are marked as \code{"lost"}.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Rush$detect_lost_workers(restart = FALSE)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Rush$detect_lost_workers(restart_workers = FALSE, restart_tasks = FALSE)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
-\item{\code{restart}}{(\code{logical(1)})\cr
+\item{\code{restart_workers}}{(\code{logical(1)})\cr
 Whether to restart lost workers.}
 }
 \if{html}{\out{</div>}}
@@ -968,7 +970,7 @@ Vectors in list columns must be wrapped in lists.
 Otherwise, \verb{$read_values()} will expand the table by the length of the vectors.
 For example, \verb{xs = list(list(x1 = 1, x2 = 2)), xs_extra = list(list(extra = c("A", "B", "C"))) does not work. Pass }xs_extra = list(list(extra = list(c("A", "B", "C"))))` instead.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Rush$write_hashes(..., .values = list(), keys = NULL, state = NA_character_)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{Rush$write_hashes(..., .values = list(), keys = NULL)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -985,9 +987,6 @@ The names of the list are used as fields.}
 \item{\code{keys}}{(character())\cr
 Keys of the hashes.
 If \code{NULL} new keys are generated.}
-
-\item{\code{state}}{(\code{character(1)})\cr
-State of the hashes.}
 }
 \if{html}{\out{</div>}}
 }
@@ -1025,6 +1024,28 @@ The inner list is the combination of the lists stored at the different fields.
 }
 }
 \if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Rush-n_tries"></a>}}
+\if{latex}{\out{\hypertarget{method-Rush-n_tries}{}}}
+\subsection{Method \code{n_tries()}}{
+Returns the number of attempts to evaluate a task.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Rush$n_tries(keys)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{keys}}{(\code{character()})\cr
+Keys of the tasks.}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+(\code{integer()})\cr
+Number of attempts.
+}
+}
+\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-Rush-clone"></a>}}
 \if{latex}{\out{\hypertarget{method-Rush-clone}{}}}
 \subsection{Method \code{clone()}}{
diff --git a/man/RushWorker.Rd b/man/RushWorker.Rd
index 19dc803..c774086 100644
--- a/man/RushWorker.Rd
+++ b/man/RushWorker.Rd
@@ -47,6 +47,7 @@ Used in the worker loop to determine whether to continue.}
 \item \href{#method-RushWorker-push_running_task}{\code{RushWorker$push_running_task()}}
 \item \href{#method-RushWorker-pop_task}{\code{RushWorker$pop_task()}}
 \item \href{#method-RushWorker-push_results}{\code{RushWorker$push_results()}}
+\item \href{#method-RushWorker-push_failed}{\code{RushWorker$push_failed()}}
 \item \href{#method-RushWorker-set_terminated}{\code{RushWorker$set_terminated()}}
 \item \href{#method-RushWorker-clone}{\code{RushWorker$clone()}}
 }
@@ -65,6 +66,7 @@ Used in the worker loop to determine whether to continue.}
 <li><span class="pkg-link" data-pkg="rush" data-topic="Rush" data-id="fetch_running_tasks"><a href='../../rush/html/Rush.html#method-Rush-fetch_running_tasks'><code>rush::Rush$fetch_running_tasks()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="rush" data-topic="Rush" data-id="fetch_tasks"><a href='../../rush/html/Rush.html#method-Rush-fetch_tasks'><code>rush::Rush$fetch_tasks()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="rush" data-topic="Rush" data-id="format"><a href='../../rush/html/Rush.html#method-Rush-format'><code>rush::Rush$format()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="rush" data-topic="Rush" data-id="n_tries"><a href='../../rush/html/Rush.html#method-Rush-n_tries'><code>rush::Rush$n_tries()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="rush" data-topic="Rush" data-id="print"><a href='../../rush/html/Rush.html#method-Rush-print'><code>rush::Rush$print()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="rush" data-topic="Rush" data-id="push_priority_tasks"><a href='../../rush/html/Rush.html#method-Rush-push_priority_tasks'><code>rush::Rush$push_priority_tasks()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="rush" data-topic="Rush" data-id="push_tasks"><a href='../../rush/html/Rush.html#method-Rush-push_tasks'><code>rush::Rush$push_tasks()</code></a></span></li>
@@ -96,7 +98,8 @@ Creates a new instance of this \link[R6:R6Class]{R6} class.
   heartbeat_period = NULL,
   heartbeat_expire = NULL,
   lgr_thresholds = NULL,
-  lgr_buffer_size = 0
+  lgr_buffer_size = 0,
+  max_retries = 0
 )}\if{html}{\out{</div>}}
 }
 
@@ -194,8 +197,7 @@ Pushes results to the data base.
   keys,
   yss = list(),
   extra = list(),
-  conditions = list(),
-  state = "finished"
+  conditions = list()
 )}\if{html}{\out{</div>}}
 }
 
@@ -221,6 +223,15 @@ If \code{"error"} the tasks are moved to the failed tasks.}
 }
 \if{html}{\out{</div>}}
 }
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-RushWorker-push_failed"></a>}}
+\if{latex}{\out{\hypertarget{method-RushWorker-push_failed}{}}}
+\subsection{Method \code{push_failed()}}{
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{RushWorker$push_failed(keys, conditions)}\if{html}{\out{</div>}}
+}
+
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-RushWorker-set_terminated"></a>}}
diff --git a/tests/testthat/_snaps/Rush.md b/tests/testthat/_snaps/Rush.md
new file mode 100644
index 0000000..d045ba2
--- /dev/null
+++ b/tests/testthat/_snaps/Rush.md
@@ -0,0 +1,9 @@
+# worker can be started with script
+
+    Code
+      rush$create_worker_script(fun = fun)
+    Output
+      INFO  (400): [rush] Start worker with:
+      INFO  (400): [rush] Rscript -e 'rush::start_worker(network_id = 'test-rush', hostname = 'host', url = 'redis://127.0.0.1:6379')'
+      INFO  (400): [rush] See ?rush::start_worker for more details.
+
diff --git a/tests/testthat/test-Rush.R b/tests/testthat/test-Rush.R
index 6d26ddb..87f36b0 100644
--- a/tests/testthat/test-Rush.R
+++ b/tests/testthat/test-Rush.R
@@ -129,12 +129,23 @@ test_that("globals are available on the worker", {
 test_that("worker can be started with script", {
   skip_on_cran()
   skip_on_ci()
+  set.seed(1) # make log messages reproducible
+
+  root_logger = lgr::get_logger("root")
+  old_fmt = root_logger$appenders$cons$layout$fmt
+  root_logger$appenders$cons$layout$set_fmt("%L (%n): %m")
+
+  on.exit({
+    root_logger$appenders$cons$layout$set_fmt(old_fmt)
+  })
 
   config = start_flush_redis()
-  rush = Rush$new(network_id = "test-rush", config = config)
+  withr::with_envvar(list("HOST" = "host"), {
+    rush = Rush$new(network_id = "test-rush", config = config)
+  })
   fun = function(x1, x2, ...) list(y = x1 + x2)
 
-  rush$create_worker_script(fun = fun)
+  expect_snapshot(rush$create_worker_script(fun = fun))
 })
 
 test_that("a remote worker is started", {
@@ -250,31 +261,6 @@ test_that("a remote worker is killed via the heartbeat", {
   expect_rush_reset(rush)
 })
 
-# restart workers --------------------------------------------------------------
-
-test_that("restarting a worker works", {
-  skip_on_cran()
-  skip_on_ci()
-
-
-  config = start_flush_redis()
-  rush = Rush$new(network_id = "test-rush", config = config)
-  fun = function(x1, x2, ...) list(y = x1 + x2)
-
-  rush$start_workers(fun = fun, n_workers = 2, wait_for_workers = TRUE)
-  worker_id_1 = rush$running_worker_ids[1]
-  worker_id_2 = rush$running_worker_ids[2]
-
-  tools::pskill(rush$worker_info[worker_id == worker_id_1, pid])
-  Sys.sleep(1)
-  expect_false(rush$processes[[worker_id_1]]$is_alive())
-
-  rush$detect_lost_workers(restart = TRUE)
-  expect_true(rush$processes[[worker_id_1]]$is_alive())
-
-  expect_rush_reset(rush)
-})
-
 # task evaluation --------------------------------------------------------------
 
 test_that("evaluating a task works", {
@@ -402,7 +388,7 @@ test_that("caching results works", {
 test_that("a segfault on a local worker is detected", {
   skip_on_cran()
   skip_on_ci()
-  skip_on_os("windows")
+  skip_if(TRUE) # does not work in testthat on environment
 
   config = start_flush_redis()
   rush = Rush$new(network_id = "test-rush", config = config)
@@ -425,7 +411,7 @@ test_that("a segfault on a local worker is detected", {
 test_that("a segfault on a worker is detected via the heartbeat", {
   skip_on_cran()
   skip_on_ci()
-  skip_on_os("windows")
+  skip_if(TRUE) # does not work in testthat on environment
 
   config = start_flush_redis()
   rush = Rush$new(network_id = "test-rush", config = config)
@@ -453,6 +439,7 @@ test_that("a segfault on a worker is detected via the heartbeat", {
 test_that("a simple error is catched", {
   skip_on_cran()
   skip_on_ci()
+  skip_if(TRUE) # does not work in testthat on environment
 
   config = start_flush_redis()
   rush = Rush$new(network_id = "test-rush", config = config)
@@ -464,7 +451,7 @@ test_that("a simple error is catched", {
 
   xss = list(list(x1 = 1, x2 = 2), list(x1 = 0, x2 = 2))
   keys = rush$push_tasks(xss)
-  rush$wait_for_tasks(keys)
+  rush$wait_for_tasks(keys, detect_lost_workers = TRUE)
   Sys.sleep(2)
 
   # check task count
@@ -500,6 +487,7 @@ test_that("a simple error is catched", {
 test_that("a lost task is detected", {
   skip_on_cran()
   skip_on_ci()
+  skip_if(TRUE) # does not work in testthat on environment
 
   config = start_flush_redis()
   rush = Rush$new(network_id = "test-rush", config = config)
@@ -548,6 +536,7 @@ test_that("a lost task is detected", {
 test_that("a lost task is detected when waiting", {
   skip_on_cran()
   skip_on_ci()
+  skip_if(TRUE) # does not work in testthat on environment
 
   config = start_flush_redis()
   rush = Rush$new(network_id = "test-rush", config = config)
@@ -591,6 +580,53 @@ test_that("a lost task is detected when waiting", {
   expect_rush_reset(rush)
 })
 
+# restart tasks and workers ----------------------------------------------------
+
+test_that("restarting a worker works", {
+  skip_on_cran()
+  skip_on_ci()
+
+  config = start_flush_redis()
+  rush = Rush$new(network_id = "test-rush", config = config)
+  fun = function(x1, x2, ...) list(y = x1 + x2)
+
+  rush$start_workers(fun = fun, n_workers = 2, wait_for_workers = TRUE)
+  worker_id_1 = rush$running_worker_ids[1]
+  worker_id_2 = rush$running_worker_ids[2]
+
+  tools::pskill(rush$worker_info[worker_id == worker_id_1, pid])
+  Sys.sleep(1)
+  expect_false(rush$processes[[worker_id_1]]$is_alive())
+
+  rush$detect_lost_workers(restart_workers = TRUE)
+  expect_true(rush$processes[[worker_id_1]]$is_alive())
+
+  expect_rush_reset(rush)
+})
+
+test_that("a task is restarted when a worker is lost", {
+  skip_on_cran()
+  skip_on_ci()
+  set.seed(1) # make log messages reproducible
+  skip_if(TRUE) # does not work in testthat on environment
+
+  config = start_flush_redis()
+  rush = Rush$new(network_id = "test-rush", config = config)
+  fun = function(x1, x2, ...) {
+    tools::pskill(Sys.getpid())
+  }
+
+  rush$start_workers(fun = fun, n_workers = 1, max_retries = 1, wait_for_workers = TRUE)
+
+  xss = list(list(x1 = 1, x2 = 2))
+  keys = rush$push_tasks(xss)
+
+  rush$detect_lost_workers(restart_workers = TRUE, restart_tasks = TRUE)
+
+  expect_equal(rush$n_tries(keys), 1)
+
+  expect_rush_reset(rush)
+})
 
 # receiving results ------------------------------------------------------------
 
diff --git a/tests/testthat/test-RushWorker.R b/tests/testthat/test-RushWorker.R
index fc888fe..aacd6b8 100644
--- a/tests/testthat/test-RushWorker.R
+++ b/tests/testthat/test-RushWorker.R
@@ -171,43 +171,6 @@ test_that("writing hashes to specific keys works", {
   expect_error(rush$write_hashes(xs = list(list(x1 = 1, x2 = 2), list(x1 = 1, x2 = 3)), keys = keys), "Assertion on 'keys' failed")
 })
 
-test_that("writing a hash with a state works", {
-  skip_on_cran()
-  skip_on_ci()
-
-  config = start_flush_redis()
-  rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
-
-  # one element
-  keys = rush$write_hashes(xs = list(list(x1 = 1, x2 = 2)), state = "queued")
-  expect_equal(rush$read_hashes(keys, c("xs", "state")), list(list(x1 = 1, x2 = 2, state = "queued")))
-
-  # two elements
-  keys = rush$write_hashes(xs = list(list(x1 = 1, x2 = 2), list(x1 = 1, x2 = 3)), state = "queued")
-  expect_equal(rush$read_hashes(keys, c("xs", "state")), list(list(x1 = 1, x2 = 2, state = "queued"), list(x1 = 1, x2 = 3, state = "queued")))
-})
-
-test_that("writing a hash with a NA state works", {
-  skip_on_cran()
-  skip_on_ci()
-
-  config = start_flush_redis()
-  rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
-
-  keys = rush$write_hashes(xs = list(list(x1 = 1, x2 = 2)), state = NA_character_)
-  expect_equal(rush$read_hashes(keys, c("xs", "state")), list(list(x1 = 1, x2 = 2, state = NA_character_)))
-})
-
-test_that("writing a hash with a arbitrary state works", {
-  skip_on_cran()
-  skip_on_ci()
-
-  config = start_flush_redis()
-  rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
-
-  keys = rush$write_hashes(xs = list(list(x1 = 1, x2 = 2)), state = "test")
-  expect_equal(rush$read_hashes(keys, c("xs", "state")), list(list(x1 = 1, x2 = 2, state = "test")))
-})
 
 test_that("writing list columns works", {
   skip_on_cran()
@@ -216,7 +179,7 @@ test_that("writing list columns works", {
   config = start_flush_redis()
   rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
 
-  keys = rush$write_hashes(xs = list(list(x1 = 1, x2 = 2)), xs_extra = list(list(extra = list("A"))), state = "finished")
+  keys = rush$write_hashes(xs = list(list(x1 = 1, x2 = 2)), xs_extra = list(list(extra = list("A"))))
   rush$connector$command(c("LPUSH", "test-rush:finished_tasks", keys))
 
   expect_list(rush$fetch_finished_tasks()$extra, len = 1)
@@ -224,7 +187,7 @@ test_that("writing list columns works", {
   config = start_flush_redis()
   rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
 
-  keys = rush$write_hashes(xs = list(list(x1 = 1, x2 = 2)), xs_extra = list(list(extra = list(letters[1:3]))), state = "finished")
+  keys = rush$write_hashes(xs = list(list(x1 = 1, x2 = 2)), xs_extra = list(list(extra = list(letters[1:3]))))
   rush$connector$command(c("LPUSH", "test-rush:finished_tasks", keys))
 
   expect_list(rush$fetch_finished_tasks()$extra, len = 1)
@@ -232,7 +195,7 @@ test_that("writing list columns works", {
   config = start_flush_redis()
   rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
 
-  keys = rush$write_hashes(xs = list(list(x1 = 1, x2 = 2), list(x1 = 2, x2 = 2)), xs_extra = list(list(extra = list("A")), list(extra = list("B"))), state = "finished")
+  keys = rush$write_hashes(xs = list(list(x1 = 1, x2 = 2), list(x1 = 2, x2 = 2)), xs_extra = list(list(extra = list("A")), list(extra = list("B"))))
   rush$connector$command(c("LPUSH", "test-rush:finished_tasks", keys))
   rush$read_hashes(keys, c("xs", "xs_extra"))
 
@@ -478,7 +441,7 @@ test_that("pushing a failed tasks works", {
   rush$push_tasks(xss)
   task = rush$pop_task()
 
-  rush$push_results(task$key, condition = list(list(message = "error")), state = "failed")
+  rush$push_failed(task$key, condition = list(list(message = "error")))
 
   # check task count
   expect_equal(rush$n_tasks, 1)
@@ -557,7 +520,7 @@ test_that("moving and fetching tasks works", {
 
   # push failed task
   task = rush$pop_task()
-  rush$push_results(task$key, condition = list(list(message = "error")), state = "failed")
+  rush$push_failed(task$key, condition = list(list(message = "error")))
   queued_tasks = rush$fetch_queued_tasks()
   expect_data_table(queued_tasks, nrows = 1)
   expect_character(queued_tasks$keys, unique = TRUE)
@@ -643,7 +606,7 @@ test_that("fetching as list works", {
 
   # push failed task
   task = rush$pop_task()
-  rush$push_results(task$key, condition = list(list(message = "error")), state = "failed")
+  rush$push_failed(task$key, condition = list(list(message = "error")))
   failed_tasks = rush$fetch_failed_tasks(data_format = "list")
   expect_list(failed_tasks, len = 1)
   expect_names(names(failed_tasks), identical.to = task$key)
@@ -889,3 +852,40 @@ test_that("pushing tasks and terminating worker works", {
   expect_rush_reset(rush, type = "terminate")
 })
 
+test_that("n_retries method works", {
+  skip_on_cran()
+  skip_on_ci()
+
+  config = start_flush_redis()
+  rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
+
+  xss = list(list(x1 = 1, x2 = 2))
+  keys = rush$push_tasks(xss)
+
+  expect_equal(rush$n_tries(keys), 0)
+
+  xss = list(list(x1 = 1, x2 = 2), list(x1 = 3, x2 = 2))
+  keys = rush$push_tasks(xss)
+
+  expect_equal(rush$n_tries(keys), c(0, 0))
+
+  rush$connector$command(c("HINCRBY", keys[1], "n_tries", 1))
+
+  expect_equal(rush$n_tries(keys), c(1L, 0))
+
+  expect_rush_reset(rush, type = "terminate")
+})
+
+test_that("terminate on idle works", {
+  config = start_flush_redis()
+  rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
+
+  xss = list(list(x1 = 1, x2 = 2))
+  keys = rush$push_tasks(xss, terminate_workers = TRUE)
+  expect_false(rush$terminated_on_idle)
+
+  rush$pop_task()
+  expect_true(rush$terminated_on_idle)
+
+  expect_rush_reset(rush, type = "terminate")
+})
diff --git a/tests/testthat/test-worker_loops.R b/tests/testthat/test-worker_loops.R
new file mode 100644
index 0000000..de09f52
--- /dev/null
+++ b/tests/testthat/test-worker_loops.R
@@ -0,0 +1,40 @@
+test_that("worker_loop_default works", {
+  config = start_flush_redis()
+  rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
+  xss = list(list(x1 = 1, x2 = 2))
+  rush$push_tasks(xss, terminate_workers = TRUE)
+  fun = function(x1, x2, ...) list(y = x1 + x2)
+
+  expect_null(worker_loop_default(fun, rush = rush))
+  expect_equal(rush$n_finished_tasks, 1L)
+
+  expect_rush_reset(rush, type = "terminate")
+})
+
+test_that("worker_loop_default works with failed task", {
+  config = start_flush_redis()
+  rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
+  xss = list(list(x1 = 1, x2 = 2))
+  rush$push_tasks(xss, terminate_workers = TRUE)
+  fun = function(x1, x2, ...) stop("failed")
+
+  expect_null(worker_loop_default(fun, rush = rush))
+  expect_equal(rush$n_failed_tasks, 1L)
+
+  expect_rush_reset(rush, type = "terminate")
+})
+
+test_that("worker_loop_default works with terminate ", {
+  config = start_flush_redis()
+  rush = RushWorker$new(network_id = "test-rush", config = config, host = "local")
+  xss = list(list(x1 = 1, x2 = 2))
+  rush$push_tasks(xss, terminate_workers = TRUE)
+  fun = function(x1, x2, ...) stop("failed")
+
+  expect_null(worker_loop_default(fun, rush = rush))
+  expect_equal(rush$n_failed_tasks, 1L)
+
+  expect_rush_reset(rush, type = "terminate")
+})
+
+