diff --git a/cpu-exec.c b/cpu-exec.c index 11e44d13bf9..10b3ffaad19 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -472,8 +472,10 @@ static inline bool cpu_handle_halt(CPUState *cpu) if ((cpu->interrupt_request & CPU_INTERRUPT_POLL) && replay_interrupt()) { X86CPU *x86_cpu = X86_CPU(cpu); + qemu_mutex_lock_iothread(); apic_poll_irq(x86_cpu->apic_state); cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); + qemu_mutex_unlock_iothread(); } #endif if (!cpu_has_work(cpu) && !rr_in_replay()) { @@ -548,7 +550,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) rr_exception_index_at(RR_CALLSITE_CPU_EXCEPTION_INDEX, &cpu->exception_index); #endif CPUClass *cc = CPU_GET_CLASS(cpu); + qemu_mutex_lock_iothread(); cc->do_interrupt(cpu); + qemu_mutex_unlock_iothread(); cpu->exception_index = -1; } else if (!replay_has_interrupt()) { /* give a chance to iothread in replay mode */ @@ -592,6 +596,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, } #endif if (unlikely(interrupt_request)) { + qemu_mutex_lock_iothread(); if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) { /* Mask out external interrupts for this step. */ interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK; @@ -599,6 +604,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, if (interrupt_request & CPU_INTERRUPT_DEBUG) { cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG; cpu->exception_index = EXCP_DEBUG; + qemu_mutex_unlock_iothread(); return true; } if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) { @@ -608,6 +614,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, cpu->interrupt_request &= ~CPU_INTERRUPT_HALT; cpu->halted = 1; cpu->exception_index = EXCP_HLT; + qemu_mutex_unlock_iothread(); return true; } #if defined(TARGET_I386) @@ -618,12 +625,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0); do_cpu_init(x86_cpu); cpu->exception_index = EXCP_HALTED; + qemu_mutex_unlock_iothread(); return true; } #else else if (interrupt_request & CPU_INTERRUPT_RESET) { replay_interrupt(); cpu_reset(cpu); + qemu_mutex_unlock_iothread(); return true; } #endif @@ -654,6 +663,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, the program flow was changed */ *last_tb = NULL; } + + /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */ + qemu_mutex_unlock_iothread(); } if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) { atomic_set(&cpu->exit_request, 0); @@ -801,6 +813,9 @@ int cpu_exec(CPUState *cpu) #endif /* buggy compiler */ cpu->can_do_io = 1; tb_lock_reset(); + if (qemu_mutex_iothread_locked()) { + qemu_mutex_unlock_iothread(); + } } /* if an exception is pending, we execute it here */ diff --git a/cpus.c b/cpus.c index 1756c8c343c..186219f3ee2 100644 --- a/cpus.c +++ b/cpus.c @@ -1182,6 +1182,7 @@ static int tcg_cpu_exec(CPUState *cpu) #ifdef CONFIG_PROFILER ti = profile_getclock(); #endif + qemu_mutex_unlock_iothread(); if (use_icount) { int64_t count; int decr; @@ -1199,6 +1200,7 @@ static int tcg_cpu_exec(CPUState *cpu) cpu_exec_start(cpu); ret = cpu_exec(cpu); cpu_exec_end(cpu); + qemu_mutex_lock_iothread(); #ifdef CONFIG_PROFILER tcg_time += profile_getclock() - ti; #endif @@ -1445,6 +1447,7 @@ bool qemu_mutex_iothread_locked(void) void qemu_mutex_lock_iothread(void) { + g_assert(!qemu_mutex_iothread_locked()); atomic_inc(&iothread_requesting_mutex); /* In the simple case there is no need to bump the VCPU thread out of * TCG code execution. @@ -1466,6 +1469,7 @@ void qemu_mutex_lock_iothread(void) void qemu_mutex_unlock_iothread(void) { + g_assert(qemu_mutex_iothread_locked()); iothread_locked = false; qemu_mutex_unlock(&qemu_global_mutex); } diff --git a/memory.c b/memory.c index d7680d841e9..38e43b457c5 100644 --- a/memory.c +++ b/memory.c @@ -915,6 +915,8 @@ void memory_region_transaction_commit(void) AddressSpace *as; assert(memory_region_transaction_depth); + assert(qemu_mutex_iothread_locked()); + --memory_region_transaction_depth; if (!memory_region_transaction_depth) { if (memory_region_update_pending) { diff --git a/net/tap-aix.c b/net/tap-aix.c index 0e6da639637..dce2c64238a 100644 --- a/net/tap-aix.c +++ b/net/tap-aix.c @@ -37,7 +37,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) { } -int tap_probe_vnet_hdr(int fd) +int tap_probe_vnet_hdr(int fd, Error **errp) { return 0; } diff --git a/net/tap-bsd.c b/net/tap-bsd.c index 6c9692263d3..4f1d633b089 100644 --- a/net/tap-bsd.c +++ b/net/tap-bsd.c @@ -211,7 +211,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) { } -int tap_probe_vnet_hdr(int fd) +int tap_probe_vnet_hdr(int fd, Error **errp) { return 0; } diff --git a/net/tap-linux.c b/net/tap-linux.c index a503fa9c6ee..9910e62efd4 100644 --- a/net/tap-linux.c +++ b/net/tap-linux.c @@ -147,13 +147,15 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) } } -int tap_probe_vnet_hdr(int fd) +int tap_probe_vnet_hdr(int fd, Error **errp) { struct ifreq ifr; if (ioctl(fd, TUNGETIFF, &ifr) != 0) { - error_report("TUNGETIFF ioctl() failed: %s", strerror(errno)); - return 0; + /* TUNGETIFF is available since kernel v2.6.27 */ + error_setg_errno(errp, errno, + "Unable to query TUNGETIFF on FD %d", fd); + return -1; } return ifr.ifr_flags & IFF_VNET_HDR; diff --git a/net/tap-solaris.c b/net/tap-solaris.c index a2a92356c1a..3437838a92f 100644 --- a/net/tap-solaris.c +++ b/net/tap-solaris.c @@ -206,7 +206,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) { } -int tap_probe_vnet_hdr(int fd) +int tap_probe_vnet_hdr(int fd, Error **errp) { return 0; } diff --git a/net/tap.c b/net/tap.c index 979e622e607..24e4697e4e7 100644 --- a/net/tap.c +++ b/net/tap.c @@ -40,6 +40,7 @@ #include "qemu-common.h" #include "qemu/cutils.h" #include "qemu/error-report.h" +#include "qemu/sockets.h" #include "net/tap.h" @@ -591,8 +592,12 @@ int net_init_bridge(const Netdev *netdev, const char *name, return -1; } - fcntl(fd, F_SETFL, O_NONBLOCK); - vnet_hdr = tap_probe_vnet_hdr(fd); + qemu_set_nonblock(fd); + vnet_hdr = tap_probe_vnet_hdr(fd, errp); + if (vnet_hdr < 0) { + close(fd); + return -1; + } s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper, @@ -689,6 +694,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, error_propagate(errp, err); return; } + qemu_set_nonblock(vhostfd); } else { vhostfd = open("/dev/vhost-net", O_RDWR); if (vhostfd < 0) { @@ -779,7 +785,11 @@ int net_init_tap(const Netdev *netdev, const char *name, fcntl(fd, F_SETFL, O_NONBLOCK); - vnet_hdr = tap_probe_vnet_hdr(fd); + vnet_hdr = tap_probe_vnet_hdr(fd, errp); + if (vnet_hdr < 0) { + close(fd); + return -1; + } net_init_tap_one(tap, peer, "tap", name, NULL, script, downscript, @@ -825,8 +835,11 @@ int net_init_tap(const Netdev *netdev, const char *name, fcntl(fd, F_SETFL, O_NONBLOCK); if (i == 0) { - vnet_hdr = tap_probe_vnet_hdr(fd); - } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) { + vnet_hdr = tap_probe_vnet_hdr(fd, errp); + if (vnet_hdr < 0) { + goto free_fail; + } + } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) { error_setg(errp, "vnet_hdr not consistent across given tap fds"); goto free_fail; @@ -869,8 +882,12 @@ int net_init_tap(const Netdev *netdev, const char *name, return -1; } - fcntl(fd, F_SETFL, O_NONBLOCK); - vnet_hdr = tap_probe_vnet_hdr(fd); + qemu_set_nonblock(fd); + vnet_hdr = tap_probe_vnet_hdr(fd, errp); + if (vnet_hdr < 0) { + close(fd); + return -1; + } net_init_tap_one(tap, peer, "bridge", name, ifname, script, downscript, vhostfdname, diff --git a/net/tap_int.h b/net/tap_int.h index ae6888f74ae..0d137686159 100644 --- a/net/tap_int.h +++ b/net/tap_int.h @@ -35,7 +35,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen); void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp); -int tap_probe_vnet_hdr(int fd); +int tap_probe_vnet_hdr(int fd, Error **errp); int tap_probe_vnet_hdr_len(int fd, int len); int tap_probe_has_ufo(int fd); void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo); diff --git a/qom/cpu.c b/qom/cpu.c index ba42b364946..bbfaae7a50f 100644 --- a/qom/cpu.c +++ b/qom/cpu.c @@ -115,7 +115,15 @@ static void cpu_common_get_memory_mapping(CPUState *cpu, void cpu_reset_interrupt(CPUState *cpu, int mask) { + bool need_lock = !qemu_mutex_iothread_locked(); + + if (need_lock) { + qemu_mutex_lock_iothread(); + } cpu->interrupt_request &= ~mask; + if (need_lock) { + qemu_mutex_unlock_iothread(); + } } void cpu_exit(CPUState *cpu) diff --git a/util/aio-posix.c b/util/aio-posix.c index 2d51239ec6f..67437b7b6b4 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -575,6 +575,13 @@ bool aio_poll(AioContext *ctx, bool blocking) int64_t timeout; int64_t start = 0; + /* + * There cannot be two concurrent aio_poll calls for the same AioContext (or + * an aio_poll concurrent with a GSource prepare/check/dispatch callback). + * We rely on this below to avoid slow locked accesses to ctx->notify_me. + */ + assert(aio_context_in_iothread(ctx)); + /* aio_notify can avoid the expensive event_notifier_set if * everything (file descriptors, bottom halves, timers) will * be re-evaluated before the next blocking poll(). This is @@ -583,7 +590,13 @@ bool aio_poll(AioContext *ctx, bool blocking) * so disable the optimization now. */ if (blocking) { - atomic_add(&ctx->notify_me, 2); + atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); + /* + * Write ctx->notify_me before computing the timeout + * (reading bottom half flags, etc.). Pairs with + * smp_mb in aio_notify(). + */ + smp_mb(); } qemu_lockcnt_inc(&ctx->list_lock); @@ -624,7 +637,9 @@ bool aio_poll(AioContext *ctx, bool blocking) } if (blocking) { - atomic_sub(&ctx->notify_me, 2); + /* Finish the poll before clearing the flag. */ + atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); + aio_notify_accept(ctx); } /* Adjust polling time */ diff --git a/util/aio-win32.c b/util/aio-win32.c index bca496a47a4..0a7390e91ba 100644 --- a/util/aio-win32.c +++ b/util/aio-win32.c @@ -323,6 +323,12 @@ bool aio_poll(AioContext *ctx, bool blocking) int count; int timeout; + /* + * There cannot be two concurrent aio_poll calls for the same AioContext (or + * an aio_poll concurrent with a GSource prepare/check/dispatch callback). + * We rely on this below to avoid slow locked accesses to ctx->notify_me. + */ + assert(in_aio_context_home_thread(ctx)); progress = false; /* aio_notify can avoid the expensive event_notifier_set if @@ -333,7 +339,13 @@ bool aio_poll(AioContext *ctx, bool blocking) * so disable the optimization now. */ if (blocking) { - atomic_add(&ctx->notify_me, 2); + atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); + /* + * Write ctx->notify_me before computing the timeout + * (reading bottom half flags, etc.). Pairs with + * smp_mb in aio_notify(). + */ + smp_mb(); } qemu_lockcnt_inc(&ctx->list_lock); @@ -366,7 +378,8 @@ bool aio_poll(AioContext *ctx, bool blocking) ret = WaitForMultipleObjects(count, events, FALSE, timeout); if (blocking) { assert(first); - atomic_sub(&ctx->notify_me, 2); + atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); + aio_notify_accept(ctx); } if (first) { diff --git a/util/async.c b/util/async.c index 355af73ee71..9596cd288dd 100644 --- a/util/async.c +++ b/util/async.c @@ -221,7 +221,14 @@ aio_ctx_prepare(GSource *source, gint *timeout) { AioContext *ctx = (AioContext *) source; - atomic_or(&ctx->notify_me, 1); + atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1); + + /* + * Write ctx->notify_me before computing the timeout + * (reading bottom half flags, etc.). Pairs with + * smp_mb in aio_notify(). + */ + smp_mb(); /* We assume there is no timeout already supplied */ *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); @@ -239,7 +246,8 @@ aio_ctx_check(GSource *source) AioContext *ctx = (AioContext *) source; QEMUBH *bh; - atomic_and(&ctx->notify_me, ~1); + /* Finish computing the timeout before clearing the flag. */ + atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1); aio_notify_accept(ctx); for (bh = ctx->first_bh; bh; bh = bh->next) { @@ -335,10 +343,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx) void aio_notify(AioContext *ctx) { /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs - * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll. + * with smp_mb in aio_ctx_prepare or aio_poll. */ smp_mb(); - if (ctx->notify_me) { + if (atomic_read(&ctx->notify_me)) { event_notifier_set(&ctx->notifier); atomic_mb_set(&ctx->notified, true); }