Skip to content

Commit

Permalink
Fix re-bind logic (#270)
Browse files Browse the repository at this point in the history
Fixes rebinding of hosting services to continually re-try with exponential backoff until success, or hosting connection is closed, or service is no longer accessible for hosting
  • Loading branch information
ekoby authored Apr 20, 2021
1 parent ac503aa commit e231b31
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 8 deletions.
2 changes: 2 additions & 0 deletions includes/ziti/error_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ limitations under the License.
XX(MFA_INVALID_TOKEN, "the token provided was invalid") \
/** attempted to verify or retrieve details of an MFA enrollment that has not been completed */ \
XX(MFA_NOT_ENROLLED, "the current identity has not completed MFA enrollment") \
/** not found, usually indicates stale reference or permission */ \
XX(NOT_FOUND, "entity no longer exists or is no longer accessible") \
/** Inspired by the Android SDK: What a Terrible Failure. A condition that should never happen. */ \
XX(WTF, "WTF: programming error")

Expand Down
41 changes: 33 additions & 8 deletions library/connect.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,11 +289,11 @@ static void complete_conn_req(struct ziti_conn *conn, int code) {
conn_set_state(conn, code == ZITI_TIMEOUT ? Timedout : Disconnected);
conn->conn_req->failed = true;
}
conn->conn_req->cb(conn, code);
conn->conn_req->cb = NULL;
if(conn->conn_req->conn_timeout != NULL) {
uv_timer_stop(conn->conn_req->conn_timeout);
}
conn->conn_req->cb(conn, code);
conn->conn_req->cb = NULL;
} else {
CONN_LOG(WARN, "connection attempt was already completed");
}
Expand Down Expand Up @@ -410,10 +410,12 @@ static void connect_get_net_session_cb(ziti_net_session * s, ziti_error *err, vo
if (err->err == ZITI_NOT_AUTHORIZED) {
ziti_force_session_refresh(ztx);
restart_connect(conn);
}
else {
} else {
if (err->err == ZITI_NOT_FOUND) {
err->err = ZITI_SERVICE_UNAVAILABLE;
}
CONN_LOG(ERROR, "failed to get session for service[%s]: %s(%s)", conn->service, err->code, err->message);
complete_conn_req(conn, ZITI_SERVICE_UNAVAILABLE);
complete_conn_req(conn, err->err);
}
uv_close((uv_handle_t *) ar, free_handle);
}
Expand Down Expand Up @@ -1062,26 +1064,49 @@ int ziti_bind(ziti_connection conn, const char *service, ziti_listen_opts *liste
return uv_async_send(async_cr);
}

static void ziti_rebind(ziti_connection conn);

static void rebind_delay_cb(uv_timer_t *t) {
ziti_connection conn = t->data;
ziti_rebind(conn);
}

static void rebind_cb(ziti_connection conn, int status) {
if (status == ZITI_OK) {
conn->conn_req->retry_count = 0;
CONN_LOG(DEBUG, "re-bound successfully");
} else {
CONN_LOG(DEBUG, "failed to re-bind [%d/%s]", status, ziti_errorstr(status));
} else if (status == ZITI_SERVICE_UNAVAILABLE) {
CONN_LOG(WARN, "failed to re-bind [%d/%s]", status, ziti_errorstr(status));
conn->client_cb(conn, NULL, status, NULL);
} else {
conn->conn_req->retry_count++;
int backoff_count = 1 << MIN(conn->conn_req->retry_count, 5);
uint32_t random;
uv_random(conn->ziti_ctx->loop, NULL, &random, sizeof(random), 0, NULL);
long backoff_time = random % (backoff_count * 5000);
CONN_LOG(DEBUG, "failed to re-bind[%d/%s], retrying in %ldms", status, ziti_errorstr(status), backoff_time);
if (conn->conn_req->conn_timeout == NULL) {
conn->conn_req->conn_timeout = calloc(1, sizeof(uv_timer_t));
uv_timer_init(conn->ziti_ctx->loop, conn->conn_req->conn_timeout);
conn->conn_req->conn_timeout->data = conn;
}
uv_timer_start(conn->conn_req->conn_timeout, rebind_delay_cb, backoff_time, 0);
}
}

// reset connection and Bind again with same options
static void ziti_rebind(ziti_connection conn) {
const char *service = conn->service;
struct ziti_conn_req *req = conn->conn_req;
int count = req->retry_count;
ziti_listen_opts *opts = req->listen_opts;

conn->channel = NULL;

CONN_LOG(DEBUG, "rebinding to service[%s]", service);

ziti_bind(conn, service, opts, rebind_cb, conn->client_cb);
conn->conn_req->retry_count = count;

FREE(service);
free_conn_req(req);
Expand Down Expand Up @@ -1276,7 +1301,7 @@ static void process_edge_message(struct ziti_conn *conn, message *msg, int code)

case Bound:
conn_set_state(conn, Disconnected);
conn->client_cb(conn, NULL, ZITI_CONN_CLOSED, NULL);
ziti_rebind(conn);
break;

case Connected:
Expand Down
1 change: 1 addition & 0 deletions library/ziti_ctrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ IMPL_MODEL(api_resp, API_RESP_MODEL)
int code_to_error(const char *code) {

#define CODE_MAP(XX) \
XX(NOT_FOUND, ZITI_NOT_FOUND) \
XX(CONTROLLER_UNAVAILABLE, ZITI_CONTROLLER_UNAVAILABLE) \
XX(NO_ROUTABLE_INGRESS_NODES, ZITI_GATEWAY_UNAVAILABLE) \
XX(NO_EDGE_ROUTERS_AVAILABLE, ZITI_GATEWAY_UNAVAILABLE) \
Expand Down

0 comments on commit e231b31

Please sign in to comment.