Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deadlock with tap/tun devices for arm guests #1232

Draft
wants to merge 5 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions cpu-exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -472,8 +472,10 @@ static inline bool cpu_handle_halt(CPUState *cpu)
if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
&& replay_interrupt()) {
X86CPU *x86_cpu = X86_CPU(cpu);
qemu_mutex_lock_iothread();
apic_poll_irq(x86_cpu->apic_state);
cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
qemu_mutex_unlock_iothread();
}
#endif
if (!cpu_has_work(cpu) && !rr_in_replay()) {
Expand Down Expand Up @@ -548,7 +550,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
rr_exception_index_at(RR_CALLSITE_CPU_EXCEPTION_INDEX, &cpu->exception_index);
#endif
CPUClass *cc = CPU_GET_CLASS(cpu);
qemu_mutex_lock_iothread();
cc->do_interrupt(cpu);
qemu_mutex_unlock_iothread();
cpu->exception_index = -1;
} else if (!replay_has_interrupt()) {
/* give a chance to iothread in replay mode */
Expand Down Expand Up @@ -592,13 +596,15 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
}
#endif
if (unlikely(interrupt_request)) {
qemu_mutex_lock_iothread();
if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
/* Mask out external interrupts for this step. */
interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
}
if (interrupt_request & CPU_INTERRUPT_DEBUG) {
cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
cpu->exception_index = EXCP_DEBUG;
qemu_mutex_unlock_iothread();
return true;
}
if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
Expand All @@ -608,6 +614,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
cpu->halted = 1;
cpu->exception_index = EXCP_HLT;
qemu_mutex_unlock_iothread();
return true;
}
#if defined(TARGET_I386)
Expand All @@ -618,12 +625,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
do_cpu_init(x86_cpu);
cpu->exception_index = EXCP_HALTED;
qemu_mutex_unlock_iothread();
return true;
}
#else
else if (interrupt_request & CPU_INTERRUPT_RESET) {
replay_interrupt();
cpu_reset(cpu);
qemu_mutex_unlock_iothread();
return true;
}
#endif
Expand Down Expand Up @@ -654,6 +663,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
the program flow was changed */
*last_tb = NULL;
}

/* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
qemu_mutex_unlock_iothread();
}
if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
atomic_set(&cpu->exit_request, 0);
Expand Down Expand Up @@ -801,6 +813,9 @@ int cpu_exec(CPUState *cpu)
#endif /* buggy compiler */
cpu->can_do_io = 1;
tb_lock_reset();
if (qemu_mutex_iothread_locked()) {
qemu_mutex_unlock_iothread();
}
}

/* if an exception is pending, we execute it here */
Expand Down
4 changes: 4 additions & 0 deletions cpus.c
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,7 @@ static int tcg_cpu_exec(CPUState *cpu)
#ifdef CONFIG_PROFILER
ti = profile_getclock();
#endif
qemu_mutex_unlock_iothread();
if (use_icount) {
int64_t count;
int decr;
Expand All @@ -1199,6 +1200,7 @@ static int tcg_cpu_exec(CPUState *cpu)
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
tcg_time += profile_getclock() - ti;
#endif
Expand Down Expand Up @@ -1445,6 +1447,7 @@ bool qemu_mutex_iothread_locked(void)

void qemu_mutex_lock_iothread(void)
{
g_assert(!qemu_mutex_iothread_locked());
atomic_inc(&iothread_requesting_mutex);
/* In the simple case there is no need to bump the VCPU thread out of
* TCG code execution.
Expand All @@ -1466,6 +1469,7 @@ void qemu_mutex_lock_iothread(void)

void qemu_mutex_unlock_iothread(void)
{
g_assert(qemu_mutex_iothread_locked());
iothread_locked = false;
qemu_mutex_unlock(&qemu_global_mutex);
}
Expand Down
2 changes: 2 additions & 0 deletions memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,8 @@ void memory_region_transaction_commit(void)
AddressSpace *as;

assert(memory_region_transaction_depth);
assert(qemu_mutex_iothread_locked());

--memory_region_transaction_depth;
if (!memory_region_transaction_depth) {
if (memory_region_update_pending) {
Expand Down
2 changes: 1 addition & 1 deletion net/tap-aix.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
{
}

int tap_probe_vnet_hdr(int fd)
int tap_probe_vnet_hdr(int fd, Error **errp)
{
return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion net/tap-bsd.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
{
}

int tap_probe_vnet_hdr(int fd)
int tap_probe_vnet_hdr(int fd, Error **errp)
{
return 0;
}
Expand Down
8 changes: 5 additions & 3 deletions net/tap-linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,15 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
}
}

int tap_probe_vnet_hdr(int fd)
int tap_probe_vnet_hdr(int fd, Error **errp)
{
struct ifreq ifr;

if (ioctl(fd, TUNGETIFF, &ifr) != 0) {
error_report("TUNGETIFF ioctl() failed: %s", strerror(errno));
return 0;
/* TUNGETIFF is available since kernel v2.6.27 */
error_setg_errno(errp, errno,
"Unable to query TUNGETIFF on FD %d", fd);
return -1;
}

return ifr.ifr_flags & IFF_VNET_HDR;
Expand Down
2 changes: 1 addition & 1 deletion net/tap-solaris.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
{
}

int tap_probe_vnet_hdr(int fd)
int tap_probe_vnet_hdr(int fd, Error **errp)
{
return 0;
}
Expand Down
31 changes: 24 additions & 7 deletions net/tap.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "qemu-common.h"
#include "qemu/cutils.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"

#include "net/tap.h"

Expand Down Expand Up @@ -591,8 +592,12 @@ int net_init_bridge(const Netdev *netdev, const char *name,
return -1;
}

fcntl(fd, F_SETFL, O_NONBLOCK);
vnet_hdr = tap_probe_vnet_hdr(fd);
qemu_set_nonblock(fd);
vnet_hdr = tap_probe_vnet_hdr(fd, errp);
if (vnet_hdr < 0) {
close(fd);
return -1;
}
s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);

snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
Expand Down Expand Up @@ -689,6 +694,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
error_propagate(errp, err);
return;
}
qemu_set_nonblock(vhostfd);
} else {
vhostfd = open("/dev/vhost-net", O_RDWR);
if (vhostfd < 0) {
Expand Down Expand Up @@ -779,7 +785,11 @@ int net_init_tap(const Netdev *netdev, const char *name,

fcntl(fd, F_SETFL, O_NONBLOCK);

vnet_hdr = tap_probe_vnet_hdr(fd);
vnet_hdr = tap_probe_vnet_hdr(fd, errp);
if (vnet_hdr < 0) {
close(fd);
return -1;
}

net_init_tap_one(tap, peer, "tap", name, NULL,
script, downscript,
Expand Down Expand Up @@ -825,8 +835,11 @@ int net_init_tap(const Netdev *netdev, const char *name,
fcntl(fd, F_SETFL, O_NONBLOCK);

if (i == 0) {
vnet_hdr = tap_probe_vnet_hdr(fd);
} else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
vnet_hdr = tap_probe_vnet_hdr(fd, errp);
if (vnet_hdr < 0) {
goto free_fail;
}
} else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
error_setg(errp,
"vnet_hdr not consistent across given tap fds");
goto free_fail;
Expand Down Expand Up @@ -869,8 +882,12 @@ int net_init_tap(const Netdev *netdev, const char *name,
return -1;
}

fcntl(fd, F_SETFL, O_NONBLOCK);
vnet_hdr = tap_probe_vnet_hdr(fd);
qemu_set_nonblock(fd);
vnet_hdr = tap_probe_vnet_hdr(fd, errp);
if (vnet_hdr < 0) {
close(fd);
return -1;
}

net_init_tap_one(tap, peer, "bridge", name, ifname,
script, downscript, vhostfdname,
Expand Down
2 changes: 1 addition & 1 deletion net/tap_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);

void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp);
int tap_probe_vnet_hdr(int fd);
int tap_probe_vnet_hdr(int fd, Error **errp);
int tap_probe_vnet_hdr_len(int fd, int len);
int tap_probe_has_ufo(int fd);
void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
Expand Down
8 changes: 8 additions & 0 deletions qom/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,15 @@ static void cpu_common_get_memory_mapping(CPUState *cpu,

void cpu_reset_interrupt(CPUState *cpu, int mask)
{
bool need_lock = !qemu_mutex_iothread_locked();

if (need_lock) {
qemu_mutex_lock_iothread();
}
cpu->interrupt_request &= ~mask;
if (need_lock) {
qemu_mutex_unlock_iothread();
}
}

void cpu_exit(CPUState *cpu)
Expand Down
19 changes: 17 additions & 2 deletions util/aio-posix.c
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
int64_t timeout;
int64_t start = 0;

/*
* There cannot be two concurrent aio_poll calls for the same AioContext (or
* an aio_poll concurrent with a GSource prepare/check/dispatch callback).
* We rely on this below to avoid slow locked accesses to ctx->notify_me.
*/
assert(aio_context_in_iothread(ctx));

/* aio_notify can avoid the expensive event_notifier_set if
* everything (file descriptors, bottom halves, timers) will
* be re-evaluated before the next blocking poll(). This is
Expand All @@ -583,7 +590,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
* so disable the optimization now.
*/
if (blocking) {
atomic_add(&ctx->notify_me, 2);
atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
/*
* Write ctx->notify_me before computing the timeout
* (reading bottom half flags, etc.). Pairs with
* smp_mb in aio_notify().
*/
smp_mb();
}

qemu_lockcnt_inc(&ctx->list_lock);
Expand Down Expand Up @@ -624,7 +637,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
}

if (blocking) {
atomic_sub(&ctx->notify_me, 2);
/* Finish the poll before clearing the flag. */
atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
aio_notify_accept(ctx);
}

/* Adjust polling time */
Expand Down
17 changes: 15 additions & 2 deletions util/aio-win32.c
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
int count;
int timeout;

/*
* There cannot be two concurrent aio_poll calls for the same AioContext (or
* an aio_poll concurrent with a GSource prepare/check/dispatch callback).
* We rely on this below to avoid slow locked accesses to ctx->notify_me.
*/
assert(in_aio_context_home_thread(ctx));
progress = false;

/* aio_notify can avoid the expensive event_notifier_set if
Expand All @@ -333,7 +339,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
* so disable the optimization now.
*/
if (blocking) {
atomic_add(&ctx->notify_me, 2);
atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
/*
* Write ctx->notify_me before computing the timeout
* (reading bottom half flags, etc.). Pairs with
* smp_mb in aio_notify().
*/
smp_mb();
}

qemu_lockcnt_inc(&ctx->list_lock);
Expand Down Expand Up @@ -366,7 +378,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
if (blocking) {
assert(first);
atomic_sub(&ctx->notify_me, 2);
atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
aio_notify_accept(ctx);
}

if (first) {
Expand Down
16 changes: 12 additions & 4 deletions util/async.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,14 @@ aio_ctx_prepare(GSource *source, gint *timeout)
{
AioContext *ctx = (AioContext *) source;

atomic_or(&ctx->notify_me, 1);
atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1);

/*
* Write ctx->notify_me before computing the timeout
* (reading bottom half flags, etc.). Pairs with
* smp_mb in aio_notify().
*/
smp_mb();

/* We assume there is no timeout already supplied */
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
Expand All @@ -239,7 +246,8 @@ aio_ctx_check(GSource *source)
AioContext *ctx = (AioContext *) source;
QEMUBH *bh;

atomic_and(&ctx->notify_me, ~1);
/* Finish computing the timeout before clearing the flag. */
atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);
aio_notify_accept(ctx);

for (bh = ctx->first_bh; bh; bh = bh->next) {
Expand Down Expand Up @@ -335,10 +343,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx)
void aio_notify(AioContext *ctx)
{
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
* with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
* with smp_mb in aio_ctx_prepare or aio_poll.
*/
smp_mb();
if (ctx->notify_me) {
if (atomic_read(&ctx->notify_me)) {
event_notifier_set(&ctx->notifier);
atomic_mb_set(&ctx->notified, true);
}
Expand Down