diff --git a/hfo2/src/api.rs b/hfo2/src/api.rs index 60584b86a..b4b30f9f9 100644 --- a/hfo2/src/api.rs +++ b/hfo2/src/api.rs @@ -22,7 +22,6 @@ use crate::abi::*; use crate::addr::*; use crate::arch::*; use crate::cpu::*; -use crate::dlog::*; use crate::list::*; use crate::mm::*; use crate::mpool::*; @@ -45,13 +44,17 @@ use crate::vm::*; // of a page. const_assert_eq!(hf_mailbox_size; HF_MAILBOX_SIZE, PAGE_SIZE); +/// A global page pool for sharing memories. Its mutability is needed only for +/// initialization. static mut API_PAGE_POOL: MaybeUninit = MaybeUninit::uninit(); /// Initialises the API page pool by taking ownership of the contents of the /// given page pool. +/// TODO(HfO2): The ownership of `ppool` is actually moved from `one_time_init` +/// to here. Refactor this function like `Api::new(ppool: MPool) -> Api`. (#31) #[no_mangle] -pub unsafe extern "C" fn api_init(ppool: *mut MPool) { - mpool_init_from(API_PAGE_POOL.get_mut(), ppool); +pub unsafe extern "C" fn api_init(ppool: *const MPool) { + API_PAGE_POOL = MaybeUninit::new(MPool::new_from(&*ppool)); } /// Switches the physical CPU back to the corresponding vcpu of the primary VM. @@ -212,7 +215,7 @@ pub unsafe extern "C" fn api_vcpu_get_count( } vm = vm_find(vm_id); - if vm == ptr::null_mut() { + if vm.is_null() { return 0; } @@ -227,26 +230,6 @@ pub unsafe extern "C" fn api_regs_state_saved(vcpu: *mut VCpu) { sl_unlock(&(*vcpu).execution_lock); } -/// Retrieves the next waiter and removes it from the wait list if the VM's -/// mailbox is in a writable state. -unsafe fn api_fetch_waiter(locked_vm: VmLocked) -> *mut WaitEntry { - let entry: *mut WaitEntry; - let vm = locked_vm.vm; - - if (*vm).mailbox.state != MailboxState::Empty - || (*vm).mailbox.recv == ptr::null_mut() - || list_empty(&(*vm).mailbox.waiter_list) - { - // The mailbox is not writable or there are no waiters. - return ptr::null_mut(); - } - - // Remove waiter from the wait list. - entry = container_of!((*vm).mailbox.waiter_list.next, WaitEntry, wait_links); - list_remove(&mut (*entry).wait_links); - entry -} - /// Assuming that the arguments have already been checked by the caller, injects /// a virtual interrupt of the given ID into the given target vCPU. This doesn't /// cause the vCPU to actually be run immediately; it will be taken when the @@ -340,16 +323,6 @@ unsafe fn api_vcpu_prepare_run( return false; } - // The VM needs to be locked to deliver mailbox messages. - // The VM lock is not needed in the common case so it must only be taken - // when it is going to be needed. This ensures there are no inter-vCPU - // dependencies in the common run case meaning the sensitive context - // switch performance is consistent. - let need_vm_lock = (*vcpu).state == VCpuStatus::BlockedMailbox; - if need_vm_lock { - sl_lock(&(*(*vcpu).vm).lock); - } - if (*(*vcpu).vm).aborting.load(Ordering::Relaxed) { if (*vcpu).state != VCpuStatus::Aborted { dlog!( @@ -360,10 +333,6 @@ unsafe fn api_vcpu_prepare_run( (*vcpu).state = VCpuStatus::Aborted; } ret = false; - // goto out; - if need_vm_lock { - sl_unlock(&(*(*vcpu).vm).lock); - } if !ret { sl_unlock(&(*vcpu).execution_lock); @@ -375,10 +344,6 @@ unsafe fn api_vcpu_prepare_run( match (*vcpu).state { VCpuStatus::Off | VCpuStatus::Aborted => { ret = false; - // goto out; - if need_vm_lock { - sl_unlock(&(*(*vcpu).vm).lock); - } if !ret { sl_unlock(&(*vcpu).execution_lock); @@ -389,12 +354,14 @@ unsafe fn api_vcpu_prepare_run( // A pending message allows the vCPU to run so the message can be // delivered directly. - VCpuStatus::BlockedMailbox if (*(*vcpu).vm).mailbox.state == MailboxState::Received => { + // The VM needs to be locked to deliver mailbox messages. + // The VM lock is not needed in the common case so it must only be taken + // when it is going to be needed. This ensures there are no inter-vCPU + // dependencies in the common run case meaning the sensitive context + // switch performance is consistent. + VCpuStatus::BlockedMailbox if (*(*vcpu).vm).inner.lock().try_read() => { arch_regs_set_retval(&mut (*vcpu).regs, SpciReturn::Success as uintreg_t); - (*(*vcpu).vm).mailbox.state = MailboxState::Read; - // break; } - // Fall through. (TODO: isn't it too verbose?) // Allow virtual interrupts to be delivered. VCpuStatus::BlockedMailbox | VCpuStatus::BlockedInterrupt @@ -424,10 +391,6 @@ unsafe fn api_vcpu_prepare_run( } ret = false; - // goto out; - if need_vm_lock { - sl_unlock(&(*(*vcpu).vm).lock); - } if !ret { sl_unlock(&(*vcpu).execution_lock); @@ -446,11 +409,6 @@ unsafe fn api_vcpu_prepare_run( ret = true; - // out: - if need_vm_lock { - sl_unlock(&(*(*vcpu).vm).lock); - } - if !ret { sl_unlock(&(*vcpu).execution_lock); } @@ -484,7 +442,7 @@ pub unsafe extern "C" fn api_vcpu_run( // The requested VM must exist. vm = vm_find(vm_id); - if vm == ptr::null_mut() { + if vm.is_null() { return ret.into_raw(); } @@ -524,25 +482,24 @@ pub unsafe extern "C" fn api_vcpu_run( return ret.into_raw(); } -/// Check that the mode indicates memory that is vaid, owned and exclusive. -fn api_mode_valid_owned_and_exclusive(mode: Mode) -> bool { - (mode & (Mode::INVALID | Mode::UNOWNED | Mode::SHARED)).is_empty() -} - /// Determines the value to be returned by api_vm_configure and /// api_mailbox_clear after they've succeeded. If a secondary VM is running and /// there are waiters, it also switches back to the primary VM for it to wake /// waiters up. -unsafe fn api_waiter_result(locked_vm: VmLocked, current: *mut VCpu, next: *mut *mut VCpu) -> i64 { - let vm = locked_vm.vm; +unsafe fn waiter_result( + vm_id: spci_vm_id_t, + vm_inner: &VmInner, + current: *mut VCpu, + next: *mut *mut VCpu, +) -> i64 { let ret = HfVCpuRunReturn::NotifyWaiters; - if list_empty(&(*vm).mailbox.waiter_list) { + if vm_inner.is_waiter_list_empty() { // No waiters, nothing else to do. return 0; } - if (*vm).id == HF_PRIMARY_VM_ID { + if vm_id == HF_PRIMARY_VM_ID { // The caller is the primary VM. Tell it to wake up waiters. return 1; } @@ -554,223 +511,6 @@ unsafe fn api_waiter_result(locked_vm: VmLocked, current: *mut VCpu, next: *mut 0 } -/// Configures the hypervisor's stage-1 view of the send and receive pages. The -/// stage-1 page tables must be locked so memory cannot be taken by another core -/// which could result in this transaction being unable to roll back in the case -/// of an error. -unsafe fn api_vm_configure_stage1( - vm_locked: VmLocked, - pa_send_begin: paddr_t, - pa_send_end: paddr_t, - pa_recv_begin: paddr_t, - pa_recv_end: paddr_t, - local_page_pool: *mut MPool, -) -> bool { - let ret; - let mut mm_stage1_locked = mm_lock_stage1(); - - // Map the send page as read-only in the hypervisor address space. - (*vm_locked.vm).mailbox.send = mm_identity_map( - mm_stage1_locked, - pa_send_begin, - pa_send_end, - Mode::R, - local_page_pool, - ) as usize as *const SpciMessage; - if (*vm_locked.vm).mailbox.send == ptr::null() { - // TODO: partial defrag of failed range. - // Recover any memory consumed in failed mapping. - mm_defrag(mm_stage1_locked, local_page_pool); - - // goto fail; - ret = false; - - mm_unlock_stage1(&mut mm_stage1_locked); - return ret; - } - - // Map the receive page as writable in the hypervisor address space. On - // failure, unmap the send page before returning. - (*vm_locked.vm).mailbox.recv = mm_identity_map( - mm_stage1_locked, - pa_recv_begin, - pa_recv_end, - Mode::W, - local_page_pool, - ) as usize as *mut SpciMessage; - if (*vm_locked.vm).mailbox.recv == ptr::null_mut() { - // TODO: parital defrag of failed range. - // Recover any memory consumed in failed mapping. - mm_defrag(mm_stage1_locked, local_page_pool); - - // goto fail_undo_send; - (*vm_locked.vm).mailbox.send = ptr::null(); - assert!(mm_unmap( - mm_stage1_locked, - pa_send_begin, - pa_send_end, - local_page_pool - )); - - ret = false; - - mm_unlock_stage1(&mut mm_stage1_locked); - return ret; - } - - ret = true; - // goto out; - mm_unlock_stage1(&mut mm_stage1_locked); - return ret; - - // The following mappings will not require more memory than is available - // in the local pool. - // fail_undo_send: - (*vm_locked.vm).mailbox.send = ptr::null(); - assert!(mm_unmap( - mm_stage1_locked, - pa_send_begin, - pa_send_end, - local_page_pool - )); - - // fail: - ret = false; - - // out: - mm_unlock_stage1(&mut mm_stage1_locked); - return ret; -} - -/// Configures the send and receive pages in the VM stage-2 and hypervisor -/// stage-1 page tables. Locking of the page tables combined with a local memory -/// pool ensures there will always be enough memory to recover from any errors -/// that arise. -unsafe fn api_vm_configure_pages( - vm_locked: VmLocked, - pa_send_begin: paddr_t, - pa_send_end: paddr_t, - orig_send_mode: Mode, - pa_recv_begin: paddr_t, - pa_recv_end: paddr_t, - orig_recv_mode: Mode, -) -> bool { - let ret; - let mut local_page_pool: MPool = mem::uninitialized(); - - // Create a local pool so any freed memory can't be used by another thread. - // This is to ensure the original mapping can be restored if any stage of - // the process fails. - mpool_init_with_fallback(&mut local_page_pool, API_PAGE_POOL.get_ref()); - - // Take memory ownership away from the VM and mark as shared. - if !mm_vm_identity_map( - &mut (*vm_locked.vm).ptable, - pa_send_begin, - pa_send_end, - (Mode::UNOWNED | Mode::SHARED | Mode::R | Mode::W), - ptr::null_mut(), - &mut local_page_pool, - ) { - //goto fail; - ret = false; - mpool_fini(&mut local_page_pool); - return ret; - } - - if !mm_vm_identity_map( - &mut (*vm_locked.vm).ptable, - pa_recv_begin, - pa_recv_end, - (Mode::UNOWNED | Mode::SHARED | Mode::R), - ptr::null_mut(), - &mut local_page_pool, - ) { - // TODO: partial defrag of failed range. - // Recover any memory consumed in failed mapping. - mm_vm_defrag(&mut (*vm_locked.vm).ptable, &mut local_page_pool); - // goto fail_undo_send; - assert!(mm_vm_identity_map( - &mut (*vm_locked.vm).ptable, - pa_send_begin, - pa_send_end, - orig_send_mode, - ptr::null_mut(), - &mut local_page_pool - )); - ret = false; - mpool_fini(&mut local_page_pool); - return ret; - } - - if !api_vm_configure_stage1( - vm_locked, - pa_send_begin, - pa_send_end, - pa_recv_begin, - pa_recv_end, - &mut local_page_pool, - ) { - // goto fail_undo_send_and_recv; - assert!(mm_vm_identity_map( - &mut (*vm_locked.vm).ptable, - pa_recv_begin, - pa_recv_end, - orig_recv_mode, - ptr::null_mut(), - &mut local_page_pool - )); - - assert!(mm_vm_identity_map( - &mut (*vm_locked.vm).ptable, - pa_send_begin, - pa_send_end, - orig_send_mode, - ptr::null_mut(), - &mut local_page_pool - )); - - ret = false; - - mpool_fini(&mut local_page_pool); - return ret; - } - - ret = true; - // goto out; - mpool_fini(&mut local_page_pool); - return ret; - - // The following mappings will not require more memory than is available in - // the local pool. - // fail_undo_send_and_recv: - assert!(mm_vm_identity_map( - &mut (*vm_locked.vm).ptable, - pa_recv_begin, - pa_recv_end, - orig_recv_mode, - ptr::null_mut(), - &mut local_page_pool - )); - - // fail_undo_send: - assert!(mm_vm_identity_map( - &mut (*vm_locked.vm).ptable, - pa_send_begin, - pa_send_end, - orig_send_mode, - ptr::null_mut(), - &mut local_page_pool - )); - - // fail: - ret = false; - - // out: - mpool_fini(&mut local_page_pool); - return ret; -} - /// Configures the VM to send/receive data through the specified pages. The /// pages must not be shared. /// @@ -788,26 +528,6 @@ pub unsafe extern "C" fn api_vm_configure( next: *mut *mut VCpu, ) -> i64 { let vm = (*current).vm; - let ret; - let mut orig_send_mode = mem::uninitialized(); - let mut orig_recv_mode = mem::uninitialized(); - - // Fail if addresses are not page-aligned. - if !is_aligned(ipa_addr(send), PAGE_SIZE) || !is_aligned(ipa_addr(recv), PAGE_SIZE) { - return -1; - } - - // Convert to physical addresses. - let pa_send_begin = pa_from_ipa(send); - let pa_send_end = pa_add(pa_send_begin, PAGE_SIZE); - - let pa_recv_begin = pa_from_ipa(recv); - let pa_recv_end = pa_add(pa_recv_begin, PAGE_SIZE); - - // Fail if the same page is used for the send and receive pages. - if pa_addr(pa_send_begin) == pa_addr(pa_recv_begin) { - return -1; - } // The hypervisor's memory map must be locked for the duration of this // operation to ensure there will be sufficient memory to recover from @@ -815,74 +535,16 @@ pub unsafe extern "C" fn api_vm_configure( // // TODO: the scope of the can be reduced but will require restructing // to keep a single unlock point. - let mut vm_locked = vm_lock(vm); - - // We only allow these to be setup once. - if (*vm).mailbox.send != ptr::null() || (*vm).mailbox.recv != ptr::null_mut() { - // goto fail; - ret = -1; - vm_unlock(&mut vm_locked); - return ret; - } - - // Ensure the pages are valid, owned and exclusive to the VM and that the - // VM has the required access to the memory. - if !mm_vm_get_mode( - &mut (*vm).ptable, - send, - ipa_add(send, PAGE_SIZE), - &mut orig_send_mode, - ) || !api_mode_valid_owned_and_exclusive(orig_send_mode) - || !orig_send_mode.contains(Mode::R) - || !orig_send_mode.contains(Mode::W) - { - // goto fail; - ret = -1; - vm_unlock(&mut vm_locked); - return ret; - } - - if !mm_vm_get_mode( - &mut (*vm).ptable, - recv, - ipa_add(recv, PAGE_SIZE), - &mut orig_recv_mode, - ) || !api_mode_valid_owned_and_exclusive(orig_recv_mode) - || !orig_recv_mode.contains(Mode::R) + let mut vm_inner = (*vm).inner.lock(); + if vm_inner + .configure(send, recv, API_PAGE_POOL.get_ref()) + .is_err() { - // goto fail; - ret = -1; - vm_unlock(&mut vm_locked); - return ret; - } - - if !api_vm_configure_pages( - vm_locked, - pa_send_begin, - pa_send_end, - orig_send_mode, - pa_recv_begin, - pa_recv_end, - orig_recv_mode, - ) { - // goto fail; - ret = -1; - vm_unlock(&mut vm_locked); - return ret; + return -1; } // Tell caller about waiters, if any. - ret = api_waiter_result(vm_locked, current, next); - // goto exit; - vm_unlock(&mut vm_locked); - return ret; - - // fail: - ret = -1; - - // exit: - vm_unlock(&mut vm_locked); - return ret; + waiter_result((*vm).id, &vm_inner, current, next) } /// Copies data from the sender's send buffer to the recipient's receive buffer @@ -908,11 +570,9 @@ pub unsafe extern "C" fn api_spci_msg_send( // header. If the tx mailbox at from_msg is configured (i.e. // from_msg != ptr::null()) then it can be safely accessed after releasing // the lock since the tx mailbox address can only be configured once. - sl_lock(&(*from).lock); - let from_msg = (*from).mailbox.send; - sl_unlock(&(*from).lock); + let from_msg = (*from).inner.lock().get_send_ptr(); - if from_msg == ptr::null() { + if from_msg.is_null() { return SpciReturn::InvalidParameters; } @@ -937,7 +597,7 @@ pub unsafe extern "C" fn api_spci_msg_send( // Ensure the target VM exists. let to = vm_find(from_msg_replica.target_vm_id); - if to == ptr::null_mut() { + if to.is_null() { return SpciReturn::InvalidParameters; } @@ -946,29 +606,19 @@ pub unsafe extern "C" fn api_spci_msg_send( // buffer. Since in spci_msg_handle_architected_message we may call // api_spci_share_memory which must hold the `from` lock, we must hold the // `from` lock at this point to prevent a deadlock scenario. - let mut vm_from_to_lock = vm_lock_both(to, from); + let (mut to_inner, mut from_inner) = SpinLock::lock_both(&(*to).inner, &(*from).inner); - if (*to).mailbox.state != MailboxState::Empty || (*to).mailbox.recv == ptr::null_mut() { + if !to_inner.is_empty() || !to_inner.is_configured() { // Fail if the target isn't currently ready to receive data, // setting up for notification if requested. if notify { - let entry = &mut (*(*current).vm).wait_entries[from_msg_replica.target_vm_id as usize]; - - // Append waiter only if it's not there yet. - if list_empty(&(*entry).wait_links) { - list_append(&mut (*to).mailbox.waiter_list, &mut (*entry).wait_links); - } + from_inner.wait(&mut to_inner, (*to).id); } - ret = SpciReturn::Busy; - // goto out; - vm_unlock(&mut vm_from_to_lock.vm1); - vm_unlock(&mut vm_from_to_lock.vm2); - - return ret; + return SpciReturn::Busy; } - let to_msg = (*to).mailbox.recv; + let to_msg = to_inner.get_recv_ptr(); // Handle architected messages. if !from_msg_replica.flags.contains(SpciMessageFlags::IMPDEF) { @@ -978,24 +628,14 @@ pub unsafe extern "C" fn api_spci_msg_send( + mem::size_of::() + mem::size_of::()] = mem::uninitialized(); - let architected_header = spci_get_architected_message_header((*from).mailbox.send); + let architected_header = spci_get_architected_message_header(from_msg); if from_msg_replica.length as usize > message_buffer.len() { - ret = SpciReturn::InvalidParameters; - // goto out; - vm_unlock(&mut vm_from_to_lock.vm1); - vm_unlock(&mut vm_from_to_lock.vm2); - - return ret; + return SpciReturn::InvalidParameters; } if (from_msg_replica.length as usize) < mem::size_of::() { - ret = SpciReturn::InvalidParameters; - // goto out; - vm_unlock(&mut vm_from_to_lock.vm1); - vm_unlock(&mut vm_from_to_lock.vm2); - - return ret; + return SpciReturn::InvalidParameters; } // Copy the architected message into an internal buffer. @@ -1014,19 +654,17 @@ pub unsafe extern "C" fn api_spci_msg_send( // at spci_msg_handle_architected_message will make several accesses to // fields in message_buffer. The memory area message_buffer must be // exclusively owned by Hf so that TOCTOU issues do not arise. + // TODO(HfO2): This code looks unsafe. Port spci_architected_message.c + // and avoid creating VmLocked manually. ret = spci_msg_handle_architected_message( - vm_from_to_lock.vm1, - vm_from_to_lock.vm2, + VmLocked { vm: to }, + VmLocked { vm: from }, architected_message_replica, &mut from_msg_replica, to_msg, ); if ret != SpciReturn::Success { - //goto out; - vm_unlock(&mut vm_from_to_lock.vm1); - vm_unlock(&mut vm_from_to_lock.vm2); - return ret; } } else { @@ -1034,7 +672,11 @@ pub unsafe extern "C" fn api_spci_msg_send( memcpy_s( &mut (*to_msg).payload as *mut _ as usize as _, SPCI_MSG_PAYLOAD_MAX, - &(*(*from).mailbox.send).payload as *const _ as usize as _, + // HfO2: below was &(*(*from).mailbox.send).payload, but we can + // safely assume it is equal to &(*from_msg).payload, even though + // from_msg was defined before entering critical section. That's + // because we do not allow vm to be configured more than once. + &(*from_msg).payload as *const _ as usize as _, size, ); *to_msg = from_msg_replica; @@ -1045,27 +687,18 @@ pub unsafe extern "C" fn api_spci_msg_send( // Messages for the primary VM are delivered directly. if (*to).id == HF_PRIMARY_VM_ID { - (*to).mailbox.state = MailboxState::Read; + to_inner.set_read(); *next = api_switch_to_primary(current, primary_ret, VCpuStatus::Ready); - - // goto out; - vm_unlock(&mut vm_from_to_lock.vm1); - vm_unlock(&mut vm_from_to_lock.vm2); - return ret; } - (*to).mailbox.state = MailboxState::Received; + to_inner.set_received(); // Return to the primary VM directly or with a switch. if (*from).id != HF_PRIMARY_VM_ID { *next = api_switch_to_primary(current, primary_ret, VCpuStatus::Ready); } - // out: - vm_unlock(&mut vm_from_to_lock.vm1); - vm_unlock(&mut vm_from_to_lock.vm2); - return ret; } @@ -1079,35 +712,26 @@ pub unsafe extern "C" fn api_spci_msg_recv( current: *mut VCpu, next: *mut *mut VCpu, ) -> SpciReturn { - let vm = (*current).vm; + let vm = &*(*current).vm; let return_code: SpciReturn; let block = attributes.contains(SpciMsgRecvAttributes::BLOCK); // The primary VM will receive messages as a status code from running vcpus // and must not call this function. - if (*vm).id == HF_PRIMARY_VM_ID { + if vm.id == HF_PRIMARY_VM_ID { return SpciReturn::Interrupted; } - sl_lock(&(*vm).lock); + let mut vm_inner = vm.inner.lock(); // Return pending messages without blocking. - if (*vm).mailbox.state == MailboxState::Received { - (*vm).mailbox.state = MailboxState::Read; - return_code = SpciReturn::Success; - // goto out; - sl_unlock(&(*vm).lock); - - return return_code; + if vm_inner.try_read() { + return SpciReturn::Success; } // No pending message so fail if not allowed to block. if !block { - return_code = SpciReturn::Retry; - // goto out; - sl_unlock(&(*vm).lock); - - return return_code; + return SpciReturn::Retry; } // From this point onward this call can only be interrupted or a message @@ -1118,9 +742,6 @@ pub unsafe extern "C" fn api_spci_msg_recv( // Don't block if there are enabled and pending interrupts, to match // behaviour of wait_for_interrupt. if (*current).interrupts.enabled_and_pending_count > 0 { - // goto out; - sl_unlock(&(*vm).lock); - return return_code; } @@ -1134,9 +755,6 @@ pub unsafe extern "C" fn api_spci_msg_recv( *next = api_switch_to_primary(current, run_return, VCpuStatus::BlockedMailbox); } - // out: - sl_unlock(&(*vm).lock); - return return_code; } @@ -1152,24 +770,12 @@ pub unsafe extern "C" fn api_spci_msg_recv( #[no_mangle] pub unsafe extern "C" fn api_mailbox_writable_get(current: *const VCpu) -> i64 { let vm = (*current).vm; - let ret; + let mut vm_inner = (*vm).inner.lock(); - sl_lock(&(*vm).lock); - if list_empty(&(*vm).mailbox.ready_list) { - ret = -1; - // goto exit; - sl_unlock(&(*vm).lock); - return ret; + match vm_inner.dequeue_ready_list() { + Some(id) => id as i64, + None => -1, } - - let entry: *mut WaitEntry = - container_of!((*vm).mailbox.ready_list.next, WaitEntry, ready_links); - list_remove(&mut (*entry).ready_links); - ret = entry.offset_from((*vm).wait_entries.as_ptr()) as i64; - - // exit: - sl_unlock(&(*vm).lock); - return ret; } /// Retrieves the next VM waiting to be notified that the mailbox of the @@ -1185,30 +791,24 @@ pub unsafe extern "C" fn api_mailbox_waiter_get(vm_id: spci_vm_id_t, current: *c } let vm = vm_find(vm_id); - if vm == ptr::null_mut() { + if vm.is_null() { return -1; } // Check if there are outstanding notifications from given vm. - let mut locked = vm_lock(vm); - let entry = api_fetch_waiter(locked); - vm_unlock(&mut locked); + let entry = (*vm).inner.lock().fetch_waiter(); - if entry == ptr::null_mut() { + if entry.is_null() { return -1; } // Enqueue notification to waiting VM. - let waiting_vm = (*entry).waiting_vm as *mut Vm; + let waiting_vm = (*entry).waiting_vm; - sl_lock(&(*waiting_vm).lock); + let mut vm_inner = (*waiting_vm).inner.lock(); if list_empty(&(*entry).ready_links) { - list_append( - &mut (*waiting_vm).mailbox.ready_list, - &mut (*entry).ready_links, - ); + vm_inner.enqueue_ready_list(&mut *entry); } - sl_unlock(&(*waiting_vm).lock); (*waiting_vm).id as i64 } @@ -1227,8 +827,8 @@ pub unsafe extern "C" fn api_mailbox_waiter_get(vm_id: spci_vm_id_t, current: *c pub unsafe extern "C" fn api_mailbox_clear(current: *mut VCpu, next: *mut *mut VCpu) -> i64 { let vm = (*current).vm; let ret; - let mut locked = vm_lock(vm); - match (*vm).mailbox.state { + let mut vm_inner = (*vm).inner.lock(); + match vm_inner.get_state() { MailboxState::Empty => { ret = 0; } @@ -1236,12 +836,11 @@ pub unsafe extern "C" fn api_mailbox_clear(current: *mut VCpu, next: *mut *mut V ret = -1; } MailboxState::Read => { - ret = api_waiter_result(locked, current, next); - (*vm).mailbox.state = MailboxState::Empty; + ret = waiter_result((*vm).id, &vm_inner, current, next); + vm_inner.set_empty(); } } - vm_unlock(&mut locked); ret } @@ -1319,8 +918,8 @@ pub unsafe extern "C" fn api_interrupt_get(current: *mut VCpu) -> intid_t { /// Returns whether the current vCPU is allowed to inject an interrupt into the /// given VM and vCPU. #[inline] -unsafe fn is_injection_allowed(target_vm_id: spci_vm_id_t, current: *const VCpu) -> bool { - let current_vm_id = (*(*current).vm).id; +fn is_injection_allowed(target_vm_id: spci_vm_id_t, current: &VCpu) -> bool { + let current_vm_id = unsafe { (*current.vm).id }; // The primary VM is allowed to inject interrupts into any VM. Secondary // VMs are only allowed to inject interrupts into their own vCPUs. @@ -1352,7 +951,7 @@ pub unsafe extern "C" fn api_interrupt_inject( return -1; } - if target_vm == ptr::null_mut() { + if target_vm.is_null() { return -1; } @@ -1361,7 +960,7 @@ pub unsafe extern "C" fn api_interrupt_inject( return -1; } - if !is_injection_allowed(target_vm_id, current) { + if !is_injection_allowed(target_vm_id, &*current) { return -1; } @@ -1380,44 +979,30 @@ pub unsafe extern "C" fn api_interrupt_inject( /// Clears a region of physical memory by overwriting it with zeros. The data is /// flushed from the cache so the memory has been cleared across the system. -unsafe fn api_clear_memory(begin: paddr_t, end: paddr_t, ppool: *mut MPool) -> bool { +fn clear_memory(begin: paddr_t, end: paddr_t, ppool: &MPool) -> bool { + let mut hypervisor_ptable = HYPERVISOR_PAGE_TABLE.lock(); + let size = pa_difference(begin, end); + let region = pa_addr(begin); + // TODO: change this to a cpu local single page window rather than a global // mapping of the whole range. Such an approach will limit the // changes to stage-1 tables and will allow only local invalidation. - let ret; - let mut stage1_locked = mm_lock_stage1(); - // TODO: Refactor result variable name. - // But mm_identity_map returns begin if succeed or null pointer otherwise. - // Hence the name is not important. - let ptr_ = mm_identity_map(stage1_locked, begin, end, Mode::W, ppool); - let size = pa_difference(begin, end); - - if ptr_ == ptr::null_mut() { + if hypervisor_ptable.identity_map(begin, end, Mode::W, ppool).is_none() { // TODO: partial defrag of failed range. // Recover any memory consumed in failed mapping. - mm_defrag(stage1_locked, ppool); - // goto fail; - ret = false; - mm_unlock_stage1(&mut stage1_locked); - return ret; + hypervisor_ptable.defrag(ppool); + return false; } - memset_s(ptr_ as usize as _, size, 0, size); - arch_mm_write_back_dcache(ptr_ as usize, size); - mm_unmap(stage1_locked, begin, end, ppool); - - ret = true; - // goto out; - mm_unlock_stage1(&mut stage1_locked); - return ret; + unsafe { + memset_s(region as usize as _, size, 0, size); + arch_mm_write_back_dcache(region as usize, size); + } - // fail: - ret = false; + hypervisor_ptable.unmap(begin, end, ppool); - // out: - mm_unlock_stage1(&mut stage1_locked); - ret + true } // TODO: Move function to spci_architectted_message.c. (How in Rust?) @@ -1442,20 +1027,18 @@ pub unsafe extern "C" fn api_spci_share_memory( memory_to_attributes: u32, share: usize, ) -> SpciReturn { - let to = to_locked.vm; - let from = from_locked.vm; - let ret; + let to_inner = (*to_locked.vm).inner.get_mut_unchecked(); + let from_inner = (*from_locked.vm).inner.get_mut_unchecked(); // Disallow reflexive shares as this suggests an error in the VM. - if to == from { + if to_locked.vm == from_locked.vm { return SpciReturn::InvalidParameters; } // Create a local pool so any freed memory can't be used by another thread. // This is to ensure the original mapping can be restored if any stage of // the process fails. - let mut local_page_pool: MPool = mem::uninitialized(); - mpool_init_with_fallback(&mut local_page_pool, API_PAGE_POOL.get_ref()); + let local_page_pool: MPool = MPool::new_with_fallback(API_PAGE_POOL.get_ref()); // Obtain the single contiguous set of pages from the memory_region. // TODO: Add support for multiple constituent regions. @@ -1477,8 +1060,8 @@ pub unsafe extern "C" fn api_spci_share_memory( }; if !spci_msg_check_transition( - to, - from, + to_locked.vm, + from_locked.vm, share, &mut orig_from_mode, begin, @@ -1495,53 +1078,33 @@ pub unsafe extern "C" fn api_spci_share_memory( // First update the mapping for the sender so there is not overlap with the // recipient. - if !mm_vm_identity_map( - &mut (*from).ptable, - pa_begin, - pa_end, - from_mode, - ptr::null_mut(), - &mut local_page_pool, - ) { - ret = SpciReturn::NoMemory; - // goto out; - mpool_fini(&mut local_page_pool); - return ret; + if from_inner + .ptable + .identity_map(pa_begin, pa_end, from_mode, &local_page_pool) + .is_none() + { + return SpciReturn::NoMemory; } // Complete the transfer by mapping the memory into the recipient. - if !mm_vm_identity_map( - &mut (*to).ptable, - pa_begin, - pa_end, - to_mode, - ptr::null_mut(), - &mut local_page_pool, - ) { + if to_inner + .ptable + .identity_map(pa_begin, pa_end, to_mode, &local_page_pool) + .is_none() + { // TODO: partial defrag of failed range. // Recover any memory consumed in failed mapping. - mm_vm_defrag(&mut (*from).ptable, &mut local_page_pool); - - ret = SpciReturn::NoMemory; - - assert!(mm_vm_identity_map( - &mut (*from).ptable, - pa_begin, - pa_end, - orig_from_mode, - ptr::null_mut(), - &mut local_page_pool - )); - // goto out; - mpool_fini(&mut local_page_pool); - return ret; - } + from_inner.ptable.defrag(&local_page_pool); - ret = SpciReturn::Success; + assert!(from_inner + .ptable + .identity_map(pa_begin, pa_end, orig_from_mode, &local_page_pool) + .is_some()); - // out: - mpool_fini(&mut local_page_pool); - return ret; + return SpciReturn::NoMemory; + } + + SpciReturn::Success } /// Shares memory from the calling VM with another. The memory can be shared in @@ -1552,47 +1115,36 @@ pub unsafe extern "C" fn api_spci_share_memory( /// of the memory they have been given, opting to not wipe the memory and /// possibly allowing multiple blocks to be transferred. What this will /// look like is TBD. -#[no_mangle] -pub unsafe extern "C" fn api_share_memory( +fn share_memory( vm_id: spci_vm_id_t, addr: ipaddr_t, - size: size_t, - share: usize, - current: *mut VCpu, -) -> i64 { - let from = (*current).vm; + size: usize, + share: HfShare, + current: &VCpu, +) -> Result<(), ()> { + let from: &Vm = unsafe { &*current.vm }; // Disallow reflexive shares as this suggests an error in the VM. - if vm_id == (*from).id { - assert!(false); - return -1; + if vm_id == from.id { + return Err(()); } // Ensure the target VM exists. - let to = vm_find(vm_id); - if to == ptr::null_mut() { - return -1; + let to = unsafe { vm_find(vm_id) }; + if to.is_null() { + return Err(()); } + let to = unsafe { &*to }; + let begin = addr; let end = ipa_add(addr, size); // Fail if addresses are not page-aligned. if !is_aligned(ipa_addr(begin), PAGE_SIZE) || !is_aligned(ipa_addr(end), PAGE_SIZE) { - return -1; + return Err(()); } - // Convert the sharing request to memory management modes. - let share = match share { - 0 => HfShare::Give, - 1 => HfShare::Lend, - 2 => HfShare::Share, - _ => { - // The input is untrusted so might not be a valid value. - return -1; - } - }; - let (from_mode, to_mode) = match share { HfShare::Give => ( (Mode::INVALID | Mode::UNOWNED), @@ -1605,77 +1157,40 @@ pub unsafe extern "C" fn api_share_memory( ), }; - // Create a local pool so any freed memory can't be used by antoher thread. + // Create a local pool so any freed memory can't be used by another thread. // This is to ensure the original mapping can be restored if any stage of // the process fails. // TODO: So that's reason why Hafnium use local_page_pool! We need to verify // this. - let mut local_page_pool = mem::uninitialized(); - mpool_init_with_fallback(&mut local_page_pool, API_PAGE_POOL.get_ref()); + let local_page_pool = MPool::new_with_fallback(unsafe { API_PAGE_POOL.get_ref() }); - sl_lock_both(&(*from).lock, &(*to).lock); - - let ret; + let (mut from_inner, mut to_inner) = SpinLock::lock_both(&(*from).inner, &(*to).inner); // Ensure that the memory range is mapped with the same mode so that // changes can be reverted if the process fails. - let mut orig_from_mode = mem::uninitialized(); - if !mm_vm_get_mode(&mut (*from).ptable, begin, end, &mut orig_from_mode) { - // goto fail; - ret = -1; - - sl_unlock(&(*from).lock); - sl_unlock(&(*to).lock); - - mpool_fini(&mut local_page_pool); - - return ret; - } - - // Ensure the memory range is valid for the sender. If it isn't, the sender - // has either shared it with another VM already or has no claim to the - // memory. - if orig_from_mode.contains(Mode::INVALID) { - // goto fail; - ret = -1; - - sl_unlock(&(*from).lock); - sl_unlock(&(*to).lock); - - mpool_fini(&mut local_page_pool); - - return ret; - } + // Also ensure the memory range is valid for the sender. If it isn't, the + // sender has either shared it with another VM already or has no claim to + // the memory. + let orig_from_mode = from_inner + .ptable + .get_mode(begin, end) + .filter(|mode| !mode.contains(Mode::INVALID)) + .ok_or(())?; // The sender must own the memory and have exclusive access to it in order // to share it. Alternatively, it is giving memory back to the owning VM. if orig_from_mode.contains(Mode::UNOWNED) { - let mut orig_to_mode = mem::uninitialized(); - - if share != HfShare::Give - || !mm_vm_get_mode(&mut (*to).ptable, begin, end, &mut orig_to_mode) - || orig_to_mode.contains(Mode::UNOWNED) - { - // goto fail; - ret = -1; - - sl_unlock(&(*from).lock); - sl_unlock(&(*to).lock); - - mpool_fini(&mut local_page_pool); - - return ret; + to_inner + .ptable + .get_mode(begin, end) + .filter(|mode| !mode.contains(Mode::UNOWNED)) + .ok_or(())?; + + if share != HfShare::Give { + return Err(()); } } else if orig_from_mode.contains(Mode::SHARED) { - // goto fail; - ret = -1; - - sl_unlock(&(*from).lock); - sl_unlock(&(*to).lock); - - mpool_fini(&mut local_page_pool); - - return ret; + return Err(()); } let pa_begin = pa_from_ipa(begin); @@ -1683,113 +1198,69 @@ pub unsafe extern "C" fn api_share_memory( // First update the mapping for the sender so there is not overlap with the // recipient. - if !mm_vm_identity_map( - &mut (*from).ptable, - pa_begin, - pa_end, - from_mode, - ptr::null_mut(), - &mut local_page_pool, - ) { - // goto fail; - ret = -1; - - sl_unlock(&(*from).lock); - sl_unlock(&(*to).lock); - - mpool_fini(&mut local_page_pool); - - return ret; - } + from_inner + .ptable + .identity_map(pa_begin, pa_end, from_mode, &local_page_pool) + .ok_or(())?; // Clear the memory so no VM or device can see the previous contents. - if !api_clear_memory(pa_begin, pa_end, &mut local_page_pool) { - // goto fail_return_to_sender; - assert!(mm_vm_identity_map( - &mut (*from).ptable, - pa_begin, - pa_end, - orig_from_mode, - ptr::null_mut(), - &mut local_page_pool - )); + if !clear_memory(pa_begin, pa_end, &local_page_pool) { + assert!(from_inner + .ptable + .identity_map(pa_begin, pa_end, orig_from_mode, &local_page_pool) + .is_some()); - ret = -1; - - sl_unlock(&(*from).lock); - sl_unlock(&(*to).lock); - - mpool_fini(&mut local_page_pool); - return ret; + return Err(()); } // Complete the transfer by mapping the memory into the recipient. - if !mm_vm_identity_map( - &mut (*to).ptable, - pa_begin, - pa_end, - to_mode, - ptr::null_mut(), - &mut local_page_pool, - ) { + if to_inner + .ptable + .identity_map(pa_begin, pa_end, to_mode, &local_page_pool) + .is_none() + { // TODO: partial defrag of failed range. // Recover any memory consumed in failed mapping. - mm_vm_defrag(&mut (*from).ptable, &mut local_page_pool); + from_inner.ptable.defrag(&local_page_pool); // goto fail_return_to_sender; - assert!(mm_vm_identity_map( - &mut (*from).ptable, - pa_begin, - pa_end, - orig_from_mode, - ptr::null_mut(), - &mut local_page_pool - )); - - // fail: - ret = -1; - - // out: - sl_unlock(&(*from).lock); - sl_unlock(&(*to).lock); - - mpool_fini(&mut local_page_pool); - return ret; - } - - ret = 0; - // goto out; - sl_unlock(&(*from).lock); - sl_unlock(&(*to).lock); + assert!(from_inner + .ptable + .identity_map(pa_begin, pa_end, orig_from_mode, &local_page_pool) + .is_some()); - mpool_fini(&mut local_page_pool); - - return ret; - - // fail_return_to_sender: - assert!(mm_vm_identity_map( - &mut (*from).ptable, - pa_begin, - pa_end, - orig_from_mode, - ptr::null_mut(), - &mut local_page_pool - )); - - // fail: - ret = -1; - - // out: - sl_unlock(&(*from).lock); - sl_unlock(&(*to).lock); + return Err(()); + } - mpool_fini(&mut local_page_pool); + Ok(()) +} +#[no_mangle] +pub unsafe extern "C" fn api_share_memory( + vm_id: spci_vm_id_t, + addr: ipaddr_t, + size: size_t, + share: usize, + current: *const VCpu, +) -> i64 { + // Convert the sharing request to memory management modes. + let share = match share { + 0 => HfShare::Give, + 1 => HfShare::Lend, + 2 => HfShare::Share, + _ => { + // The input is untrusted so might not be a valid value. + return -1; + } + }; - return ret; + match share_memory(vm_id, addr, size, share, &*current) { + Ok(_) => 0, + Err(_) => -1, + } } /// Returns the version of the implemented SPCI specification. #[no_mangle] -pub unsafe extern "C" fn api_spci_version() -> i32 { +pub extern "C" fn api_spci_version() -> i32 { // Ensure that both major and minor revision representation occupies at // most 15 bits. const_assert!(0x8000 > SPCI_VERSION_MAJOR); @@ -1801,18 +1272,6 @@ pub unsafe extern "C" fn api_spci_version() -> i32 { #[no_mangle] pub unsafe extern "C" fn api_debug_log(c: c_char, current: *mut VCpu) -> i64 { let vm = (*current).vm; - let mut vm_locked = vm_lock(vm); - - if c == '\n' as u32 as u8 - || c == '\0' as u32 as u8 - || (*vm).log_buffer_length == (*vm).log_buffer.len() - { - dlog_flush_vm_buffer(vm_locked); - } else { - (*vm).log_buffer[(*vm).log_buffer_length] = c; - (*vm).log_buffer_length += 1; - } - - vm_unlock(&mut vm_locked); + (*vm).debug_log(c); 0 } diff --git a/hfo2/src/arch/aarch64.rs b/hfo2/src/arch/aarch64.rs index 9e097670d..2405243d6 100644 --- a/hfo2/src/arch/aarch64.rs +++ b/hfo2/src/arch/aarch64.rs @@ -19,6 +19,7 @@ use core::mem; +use crate::cpu::*; use crate::types::*; const FLOAT_REG_BYTES: usize = 16; @@ -84,6 +85,30 @@ pub struct ArchRegs { peripherals: ArchPeriRegs, } +// from src/arch/aarch64/hypervisor/offset.h +// Note: always keep this constants same as ones in offset.h +const CPU_ID: usize = 0; +const CPU_STACK_BOTTOM: usize = 8; +const VCPU_REGS: usize = 32; +const REGS_LAZY: usize = 264; +const REGS_FREGS: usize = REGS_LAZY + 232; +//#[cfg(any(feature = "GIC_VERSION=3", feature = "GIC_VERSION=4"))] +const REGS_GIC: usize = REGS_FREGS + 528; + +/// Checks above constants are correct. +/// HfO2: This checking was originally done in compile time in C. But it was +/// impossible because Rust compiler rejects construction of variables with +/// interior mutability (`VCpu` has `SpinLock`) in constant expressions. Hence +/// we check those constants in runtime. +pub fn arch_cpu_module_init() { + assert_eq!(offset_of!(Cpu, id), CPU_ID); + assert_eq!(offset_of!(Cpu, stack_bottom), CPU_STACK_BOTTOM); + assert_eq!(offset_of!(VCpu, regs), VCPU_REGS); + assert_eq!(offset_of!(ArchRegs, lazy), REGS_LAZY); + assert_eq!(offset_of!(ArchRegs, fp), REGS_FREGS); + assert_eq!(offset_of!(ArchRegs, gic_ich_hcr_el2), REGS_GIC); +} + #[repr(C)] pub struct ArchSysRegs { vmpidr_el2: uintreg_t, diff --git a/hfo2/src/arch/fake.rs b/hfo2/src/arch/fake.rs index ef9f834ca..d8d5d9b52 100644 --- a/hfo2/src/arch/fake.rs +++ b/hfo2/src/arch/fake.rs @@ -47,3 +47,7 @@ pub struct ArchRegs { vcpu_id: cpu_id_t, virtual_interrupt: bool, } + +pub fn arch_cpu_module_init() { + // Do nothing. +} diff --git a/hfo2/src/cpu.rs b/hfo2/src/cpu.rs index 876979c75..c3aff2e75 100644 --- a/hfo2/src/cpu.rs +++ b/hfo2/src/cpu.rs @@ -138,7 +138,8 @@ pub struct Cpu { pub id: cpu_id_t, /// Pointer to bottom of the stack. - stack_bottom: *mut c_void, + /// `pub` here is only required by `arch_cpu_module_init`. + pub stack_bottom: *mut c_void, /// Enabling/disabling irqs are counted per-cpu. They are enabled when the count is zero, and /// disabled when it's non-zero. @@ -184,6 +185,8 @@ pub unsafe extern "C" fn cpu_module_init(cpu_ids: *mut cpu_id_t, count: usize) { let boot_cpu_id: cpu_id_t = cpus.get_ref()[0].id; let mut found_boot_cpu: bool = false; + arch_cpu_module_init(); + cpu_count = count as u32; // Initialize CPUs with the IDs from the configuration passed in. The @@ -321,6 +324,36 @@ pub unsafe extern "C" fn vcpu_index(vcpu: *const VCpu) -> spci_vcpu_index_t { index as u16 } +#[no_mangle] +pub unsafe extern "C" fn vcpu_get_regs(vcpu: *mut VCpu) -> *mut ArchRegs { + &mut (*vcpu).regs +} + +#[no_mangle] +pub unsafe extern "C" fn vcpu_get_regs_const(vcpu: *const VCpu) -> *const ArchRegs { + &(*vcpu).regs +} + +#[no_mangle] +pub unsafe extern "C" fn vcpu_get_vm(vcpu: *mut VCpu) -> *mut Vm { + (*vcpu).vm +} + +#[no_mangle] +pub unsafe extern "C" fn vcpu_get_cpu(vcpu: *mut VCpu) -> *mut Cpu { + (*vcpu).cpu +} + +#[no_mangle] +pub unsafe extern "C" fn vcpu_set_cpu(vcpu: *mut VCpu, cpu: *mut Cpu) { + (*vcpu).cpu = cpu; +} + +#[no_mangle] +pub unsafe extern "C" fn vcpu_get_interrupts(vcpu: *mut VCpu) -> *mut Interrupts { + &mut (*vcpu).interrupts +} + /// Check whether the given vcpu_state is an off state, for the purpose of /// turning vCPUs on and off. Note that aborted still counts as on in this /// context. @@ -368,7 +401,7 @@ pub unsafe extern "C" fn vcpu_secondary_reset_and_start( false, (*vm).id, vcpu_index(vcpu) as cpu_id_t, - (*vm).ptable.root, + (*vm).get_ptable_raw(), ); vcpu_on(vcpu_execution_locked, entry, arg); } @@ -385,14 +418,11 @@ pub unsafe extern "C" fn vcpu_secondary_reset_and_start( #[no_mangle] pub unsafe extern "C" fn vcpu_handle_page_fault( current: *const VCpu, - f: *mut VCpuFaultInfo, + f: *const VCpuFaultInfo, ) -> bool { let vm = (*current).vm; - let mut mode = mem::uninitialized(); // to avoid use-of-uninitialized error let mask = (*f).mode | Mode::INVALID; - let resume; - - sl_lock(&(*vm).lock); + let vm_inner = (*vm).inner.lock(); // Check if this is a legitimate fault, i.e., if the page table doesn't // allow the access attemped by the VM. @@ -402,14 +432,11 @@ pub unsafe extern "C" fn vcpu_handle_page_fault( // invalidations while holding the VM lock, so we don't need to do // anything else to recover from it. (Acquiring/releasing the lock // ensured that the invalidations have completed.) - resume = mm_vm_get_mode( - &mut (*vm).ptable, - (*f).ipaddr, - ipa_add((*f).ipaddr, 1), - &mut mode, - ) && (mode & mask) == (*f).mode; - - sl_unlock(&(*vm).lock); + let resume = vm_inner + .ptable + .get_mode((*f).ipaddr, ipa_add((*f).ipaddr, 1)) + .map(|mode| mode & mask == (*f).mode) + .unwrap_or(false); if !resume { dlog!( diff --git a/hfo2/src/dlog.rs b/hfo2/src/dlog.rs index 4efb02c46..bf13b901e 100644 --- a/hfo2/src/dlog.rs +++ b/hfo2/src/dlog.rs @@ -17,7 +17,7 @@ use core::fmt; use crate::spinlock::*; -use crate::vm::*; +use crate::types::*; extern "C" { fn plat_console_putchar(c: u8); @@ -54,22 +54,3 @@ pub fn _print(args: fmt::Arguments) { use core::fmt::Write; WRITER.lock().write_fmt(args).unwrap(); } - -/// Send the contents of the given VM's log buffer to the log, preceded by the -/// VM ID and followed by a newline. -pub unsafe extern "C" fn dlog_flush_vm_buffer(vm: VmLocked) { - use core::fmt::Write; - let mut writer = WRITER.lock(); - - writer.write_str("VM "); - writer.write_fmt(format_args!("{}", (*vm.vm).id)); - writer.write_str(": "); - - for i in 0..(*vm.vm).log_buffer_length { - plat_console_putchar((*vm.vm).log_buffer[i]); - (*vm.vm).log_buffer[i] = '\0' as u32 as u8; - } - - (*vm.vm).log_buffer_length = 0; - plat_console_putchar('\n' as u32 as u8); -} diff --git a/hfo2/src/lib.rs b/hfo2/src/lib.rs index dcbc92895..f115d86f9 100644 --- a/hfo2/src/lib.rs +++ b/hfo2/src/lib.rs @@ -21,6 +21,7 @@ #![feature(maybe_uninit_ref)] #![feature(ptr_offset_from)] #![feature(const_raw_ptr_to_usize_cast)] +#![feature(bind_by_move_pattern_guards)] #[macro_use] extern crate bitflags; diff --git a/hfo2/src/mm.rs b/hfo2/src/mm.rs index 1b59bdf55..71489519e 100644 --- a/hfo2/src/mm.rs +++ b/hfo2/src/mm.rs @@ -24,6 +24,9 @@ //! //! We assume that the stage 1 and stage 2 page table addresses are `usize`. It looks like that //! assumption might not be holding so we need to check that everything is going to be okay. +//! +//! TODO(HfO2): Many functions return Option<()> to represent success or fail. +//! Change them to return Result<(), ()> (#34.) use core::cmp; use core::marker::PhantomData; @@ -141,6 +144,13 @@ bitflags! { } } +impl Mode { + /// Check that the mode indicates memory that is vaid, owned and exclusive. + pub fn valid_owned_and_exclusive(&self) -> bool { + (*self & (Mode::INVALID | Mode::UNOWNED | Mode::SHARED)).is_empty() + } +} + bitflags! { /// Flags for memory management operations. struct Flags: u32 { @@ -160,7 +170,7 @@ type ptable_addr_t = uintvaddr_t; const_assert_eq!(addr_size_eq; mem::size_of::(), mem::size_of::()); /// The hypervisor page table. -static HYPERVISOR_PAGE_TABLE: SpinLock> = +pub static HYPERVISOR_PAGE_TABLE: SpinLock> = SpinLock::new(unsafe { PageTable::null() }); /// Is stage2 invalidation enabled? @@ -801,8 +811,13 @@ impl PageTable { mem::forget(self); } - fn get_raw(&self) -> *const RawPage { - pa_addr(self.root) as *const RawPage + /// Returns the address of the root of this page table. The return type is + /// paddr_t, physically addressed raw pointer. That means calling this + /// method is safe but accessing the memory of returned address is unsafe. + /// TODO: Better return type is PAddr (meaning of *mut RawPage + /// which is address physically.) + pub fn as_raw(&self) -> paddr_t { + self.root } fn deref(&self) -> &[RawPageTable] { diff --git a/hfo2/src/mpool.rs b/hfo2/src/mpool.rs index 6339627b5..f174a2e31 100644 --- a/hfo2/src/mpool.rs +++ b/hfo2/src/mpool.rs @@ -179,6 +179,9 @@ impl Pool { } } +/// Memory pool equipped with spinlock and fallback pool. +/// TODO(HfO2): Make a trait, which generalizes Pool (linked list) and a pair of +/// (pool, fallback) (#35.) #[repr(C)] pub struct MPool { pool: SpinLock, diff --git a/hfo2/src/spinlock.rs b/hfo2/src/spinlock.rs index 5a6b7661b..7c62f8284 100644 --- a/hfo2/src/spinlock.rs +++ b/hfo2/src/spinlock.rs @@ -97,6 +97,23 @@ impl SpinLock { pub unsafe fn get_mut_unchecked(&self) -> &mut T { &mut *self.data.get() } + + pub fn lock_both<'s>( + lhs: &'s Self, + rhs: &'s Self, + ) -> (SpinLockGuard<'s, T>, SpinLockGuard<'s, T>) { + RawSpinLock::lock_both(&lhs.lock, &rhs.lock); + ( + SpinLockGuard { + lock: lhs, + _marker: PhantomData, + }, + SpinLockGuard { + lock: rhs, + _marker: PhantomData, + }, + ) + } } pub struct SpinLockGuard<'s, T> { diff --git a/hfo2/src/vm.rs b/hfo2/src/vm.rs index 01267e682..903940f91 100644 --- a/hfo2/src/vm.rs +++ b/hfo2/src/vm.rs @@ -17,13 +17,19 @@ use core::mem; use core::mem::MaybeUninit; use core::ptr; +use core::str; use core::sync::atomic::AtomicBool; +use arrayvec::ArrayVec; + +use crate::addr::*; use crate::arch::*; use crate::cpu::*; +use crate::dlog::*; use crate::list::*; use crate::mm::*; use crate::mpool::*; +use crate::page::*; use crate::spci::*; use crate::spinlock::*; use crate::std::*; @@ -32,7 +38,7 @@ use crate::types::*; const LOG_BUFFER_SIZE: usize = 256; #[repr(C)] -#[derive(PartialEq)] +#[derive(PartialEq, Debug, Clone, Copy)] pub enum MailboxState { /// There is no message in the mailbox. Empty, @@ -58,36 +64,444 @@ pub struct WaitEntry { #[repr(C)] pub struct Mailbox { - pub state: MailboxState, - pub recv: *mut SpciMessage, - pub send: *const SpciMessage, + state: MailboxState, + + // Addresses to page used for receiving and sending messages. + // Those pages are not protected by lock -- sender and receiver should + // have a proper protocol so that Hafnium copies synchronized data. + recv: *mut SpciMessage, + send: *const SpciMessage, + + /// List of wait_entry structs representing VMs that want to be notified + /// when the mailbox becomes writable. Once the mailbox does become + /// writable, the entry is removed from this list and added to the waiting + /// VM's ready_list. + waiter_list: list_entry, + + /// List of wait_entry structs representing VMs whose mailboxes became + /// writable since the owner of the mailbox registers for notification. + ready_list: list_entry, +} + +impl Mailbox { + /// Initializes the mailbox. + /// TODO: Refactor `vm_init` and make `Mailbox::new()` instead of this. + pub unsafe fn init(&mut self) { + self.state = MailboxState::Empty; + self.recv = ptr::null_mut(); + self.send = ptr::null(); + + list_init(&mut self.waiter_list); + list_init(&mut self.ready_list); + } + + /// Retrieves the next waiter and removes it from the wait list if the VM's + /// mailbox is in a writable state. + pub unsafe fn fetch_waiter(&mut self) -> *mut WaitEntry { + let entry: *mut WaitEntry; + + if self.state != MailboxState::Empty || self.recv.is_null() || list_empty(&self.waiter_list) + { + // The mailbox is not writable or there are no waiters. + return ptr::null_mut(); + } + + // Remove waiter from the wait list. + entry = container_of!(self.waiter_list.next, WaitEntry, wait_links); + list_remove(&mut (*entry).wait_links); + entry + } + + /// Checks if any waiters exists. + pub fn is_waiter_list_empty(&self) -> bool { + unsafe { list_empty(&self.waiter_list) } + } + + /// Checks whether there exists a pending message. If one exists, marks the + /// mailbox read. + pub fn try_read(&mut self) -> bool { + if self.state == MailboxState::Received { + self.state = MailboxState::Read; + true + } else { + false + } + } + + /// Set the arrived message is read. + pub fn set_read(&mut self) { + self.state = MailboxState::Read; + } + + /// Set a message is arrived. + pub fn set_received(&mut self) { + self.state = MailboxState::Received; + } - /// List of wait_entry structs representing VMs that want to be notified when the mailbox - /// becomes writable. Once the mailbox does become writable, the entry is removed from this list - /// and added to the waiting VM's ready_list. - pub waiter_list: list_entry, + /// Configures the hypervisor's stage-1 view of the send and receive pages. + /// The stage-1 page tables must be locked so memory cannot be taken by + /// another core which could result in this transaction being unable to + /// roll back in the case of an error. + pub fn configure_stage1( + &mut self, + pa_send_begin: paddr_t, + pa_send_end: paddr_t, + pa_recv_begin: paddr_t, + pa_recv_end: paddr_t, + local_page_pool: &MPool, + ) -> Result<(), ()> { + let mut hypervisor_ptable = HYPERVISOR_PAGE_TABLE.lock(); + + // Map the send page as read-only in the hypervisor address space. + if hypervisor_ptable.identity_map(pa_send_begin, pa_send_end, Mode::R, local_page_pool).is_some() + { + self.send = pa_addr(pa_send_begin) as usize as *const SpciMessage; + } else { + // TODO: partial defrag of failed range. + // Recover any memory consumed in failed mapping. + hypervisor_ptable.defrag(local_page_pool); + return Err(()); + } + + // Map the receive page as writable in the hypervisor address space. On + // failure, unmap the send page before returning. + if hypervisor_ptable.identity_map(pa_recv_begin, pa_recv_end, Mode::W, local_page_pool).is_some() + { + self.recv = pa_addr(pa_recv_begin) as usize as *mut SpciMessage; + } else { + // TODO: parital defrag of failed range. + // Recover any memory consumed in failed mapping. + hypervisor_ptable.defrag(local_page_pool); + self.send = ptr::null(); + assert!(hypervisor_ptable + .unmap(pa_send_begin, pa_send_end, local_page_pool) + .is_some()); + + return Err(()); + } + + Ok(()) + } + + pub fn get_send_ptr(&self) -> *const SpciMessage { + self.send + } - /// List of wait_entry structs representing VMs whose mailboxes became writable since the owner - /// of the mailbox registers for notification. - pub ready_list: list_entry, + pub fn get_recv_ptr(&self) -> *mut SpciMessage { + self.recv + } +} + +pub struct VmInner { + log_buffer: ArrayVec<[c_char; LOG_BUFFER_SIZE]>, + pub ptable: PageTable, + mailbox: Mailbox, + + /// Wait entries to be used when waiting on other VM mailboxes. + wait_entries: [WaitEntry; MAX_VMS], + arch: ArchVm, +} + +impl VmInner { + /// Initializes VmInner. + pub unsafe fn init(&mut self, vm: *mut Vm, ppool: &mut MPool) -> Result<(), ()> { + self.mailbox.init(); + + if !mm_vm_init(&mut self.ptable, ppool) { + return Err(()); + } + + // Initialise waiter entries. + for i in 0..MAX_VMS { + self.wait_entries[i].waiting_vm = vm; + list_init(&mut self.wait_entries[i].wait_links); + list_init(&mut self.wait_entries[i].ready_links); + } + + Ok(()) + } + + /// Retrieves the next waiter and removes it from the wait list if the VM's + /// mailbox is in a writable state. + pub unsafe fn fetch_waiter(&mut self) -> *mut WaitEntry { + self.mailbox.fetch_waiter() + } + + /// Checks if any waiters exists. + pub fn is_waiter_list_empty(&self) -> bool { + self.mailbox.is_waiter_list_empty() + } + + /// Checks whether there exists a pending message. If one exists, marks the + /// mailbox read. + pub fn try_read(&mut self) -> bool { + self.mailbox.try_read() + } + + /// Sets the arrived message is read. + pub fn set_read(&mut self) { + self.mailbox.set_read() + } + + /// Sets a message is arrived. + pub fn set_received(&mut self) { + self.mailbox.set_received() + } + + /// Configures the send and receive pages in the VM stage-2 and hypervisor + /// stage-1 page tables. Locking of the page tables combined with a local + /// memory pool ensures there will always be enough memory to recover from + /// any errors that arise. + /// TODO: Clean up this function using RAII. + fn configure_pages( + &mut self, + pa_send_begin: paddr_t, + pa_send_end: paddr_t, + orig_send_mode: Mode, + pa_recv_begin: paddr_t, + pa_recv_end: paddr_t, + orig_recv_mode: Mode, + fallback_mpool: &MPool, + ) -> Result<(), ()> { + // Create a local pool so any freed memory can't be used by another + // thread. This is to ensure the original mapping can be restored if + // any stage of the process fails. + let local_page_pool: MPool = MPool::new_with_fallback(fallback_mpool); + + // Take memory ownership away from the VM and mark as shared. + self.ptable.identity_map( + pa_send_begin, + pa_send_end, + Mode::UNOWNED | Mode::SHARED | Mode::R | Mode::W, + &local_page_pool, + ).ok_or(())?; + + if self.ptable.identity_map( + pa_recv_begin, + pa_recv_end, + Mode::UNOWNED | Mode::SHARED | Mode::R, + &local_page_pool, + ).is_none() { + // TODO: partial defrag of failed range. + // Recover any memory consumed in failed mapping. + self.ptable.defrag(&local_page_pool); + + assert!(self + .ptable + .identity_map( + pa_send_begin, + pa_send_end, + orig_send_mode, + &local_page_pool + ) + .is_some()); + return Err(()); + } + + if self.mailbox.configure_stage1( + pa_send_begin, + pa_send_end, + pa_recv_begin, + pa_recv_end, + &local_page_pool, + ).is_err() { + assert!(self + .ptable + .identity_map( + pa_recv_begin, + pa_recv_end, + orig_recv_mode, + &local_page_pool + ) + .is_some()); + + assert!(self + .ptable + .identity_map( + pa_send_begin, + pa_send_end, + orig_send_mode, + &local_page_pool + ) + .is_some()); + + return Err(()); + } + + Ok(()) + } + + /// Configures the VM to send/receive data through the specified pages. The + /// pages must not be shared. + /// + /// Returns: + /// - None on failure. + /// - Some(()) on success. + pub fn configure( + &mut self, + send: ipaddr_t, + recv: ipaddr_t, + fallback_mpool: &MPool, + ) -> Result<(), ()> { + // Fail if addresses are not page-aligned. + if !is_aligned(ipa_addr(send), PAGE_SIZE) || !is_aligned(ipa_addr(recv), PAGE_SIZE) { + return Err(()); + } + + // Convert to physical addresses. + let pa_send_begin = pa_from_ipa(send); + let pa_send_end = pa_add(pa_send_begin, PAGE_SIZE); + + let pa_recv_begin = pa_from_ipa(recv); + let pa_recv_end = pa_add(pa_recv_begin, PAGE_SIZE); + + // Fail if the same page is used for the send and receive pages. + if pa_addr(pa_send_begin) == pa_addr(pa_recv_begin) { + return Err(()); + } + + // We only allow these to be setup once. + if self.is_configured() { + return Err(()); + } + + // Ensure the pages are valid, owned and exclusive to the VM and that + // the VM has the required access to the memory. + let orig_send_mode = self + .ptable + .get_mode(send, ipa_add(send, PAGE_SIZE)) + .filter(|mode| mode.valid_owned_and_exclusive()) + .filter(|mode| mode.contains(Mode::R)) + .filter(|mode| mode.contains(Mode::W)).ok_or(())?; + + let orig_recv_mode = self + .ptable + .get_mode(recv, ipa_add(recv, PAGE_SIZE)) + .filter(|mode| mode.valid_owned_and_exclusive()) + .filter(|mode| mode.contains(Mode::R)).ok_or(())?; + + self.configure_pages( + pa_send_begin, + pa_send_end, + orig_send_mode, + pa_recv_begin, + pa_recv_end, + orig_recv_mode, + fallback_mpool, + ) + } + + /// Checks whether `configure` is called before. + pub fn is_configured(&self) -> bool { + !self.mailbox.send.is_null() && !self.mailbox.recv.is_null() + } + + /// Checks whether mailbox is empty. + pub fn is_empty(&self) -> bool { + self.mailbox.state == MailboxState::Empty + } + + pub fn dequeue_ready_list(&mut self) -> Option { + unsafe { + if list_empty(&self.mailbox.ready_list) { + return None; + } + + let ret = { + let entry: *mut WaitEntry = + container_of!(self.mailbox.ready_list.next, WaitEntry, ready_links); + list_remove(&mut (*entry).ready_links); + entry.offset_from(self.wait_entries.as_ptr()) as spci_vm_id_t + }; + + Some(ret) + } + } + + pub fn enqueue_ready_list(&mut self, entry: &mut WaitEntry) { + debug_assert!(unsafe { list_empty(&entry.ready_links) }); + + unsafe { + list_append(&mut self.mailbox.ready_list, &mut entry.ready_links); + } + } + + pub fn get_state(&self) -> MailboxState { + self.mailbox.state + } + + pub fn set_empty(&mut self) { + debug_assert_eq!(self.mailbox.state, MailboxState::Read); + self.mailbox.state = MailboxState::Empty; + } + + /// Adds `self` into the waiter list of `target`, if `self` is not waiting + /// for another now. Returns false if `self` is waiting for another. + /// TODO: better name? + pub fn wait(&mut self, target: &mut Self, target_id: spci_vm_id_t) -> bool { + let entry = &mut self.wait_entries[target_id as usize]; + + // Append waiter only if it's not there yet. + if unsafe { !list_empty(&(*entry).wait_links) } { + return false; + } + + unsafe { + list_append(&mut target.mailbox.waiter_list, &mut (*entry).wait_links); + } + true + } + + pub fn get_send_ptr(&self) -> *const SpciMessage { + self.mailbox.get_send_ptr() + } + + pub fn get_recv_ptr(&self) -> *mut SpciMessage { + self.mailbox.get_recv_ptr() + } + + pub fn debug_log(&mut self, id: spci_vm_id_t, c: c_char) { + if c == '\n' as u32 as u8 || c == '\0' as u32 as u8 || self.log_buffer.is_full() { + // flush the buffer. + let log = str::from_utf8(&self.log_buffer).unwrap_or("non-UTF8 bytes"); + dlog!("VM {}: {}\n", id, log); + self.log_buffer.clear(); + } else { + self.log_buffer.push(c); + } + } } -#[repr(C)] pub struct Vm { pub id: spci_vm_id_t, - /// See api.c for the partial ordering on locks. - pub lock: RawSpinLock, pub vcpu_count: spci_vcpu_count_t, + + /// VCpus of this vm. + /// Note: This field is regarded as a kind of mutable states of Vm, but is + /// not contained in VmInner, because + /// 1. Mutable inner fields are contained in VCpuState. + /// 2. VCpuState has higher lock order than one of Vm. It is nonsense to + /// lock VmInner to acquire VCpuState. pub vcpus: [VCpu; MAX_CPUS], - pub ptable: PageTable, - pub mailbox: Mailbox, - pub log_buffer: [c_char; LOG_BUFFER_SIZE], - pub log_buffer_length: usize, - /// Wait entries to be used when waiting on other VM mailboxes. - pub wait_entries: [WaitEntry; MAX_VMS], + /// See api.c for the partial ordering on locks. + pub inner: SpinLock, pub aborting: AtomicBool, - pub arch: ArchVm, +} + +impl Vm { + /// Returns the root address of the page table of this VM. It is safe not to + /// lock `self.inner` because the value of `ptable.as_raw()` doesn't change + /// after `ptable` is initialized. Of course, actual page table may vary + /// during running. That's why this function returns `paddr_t` rather than + /// `&RawPage`. + pub fn get_ptable_raw(&self) -> paddr_t { + unsafe { self.inner.get_unchecked().ptable.as_raw() } + } + + pub fn debug_log(&self, c: c_char) { + self.inner.lock().debug_log(self.id, c) + } } /// Encapsulates a VM whose lock is held. @@ -128,25 +542,10 @@ pub unsafe extern "C" fn vm_init( mem::size_of::(), ); - list_init(&mut (*vm).mailbox.waiter_list); - list_init(&mut (*vm).mailbox.ready_list); - sl_init(&mut (*vm).lock); - (*vm).id = vm_count; (*vm).vcpu_count = vcpu_count; - (*vm).mailbox.state = MailboxState::Empty; (*vm).aborting = AtomicBool::new(false); - - if !mm_vm_init(&mut (*vm).ptable, ppool) { - return false; - } - - // Initialise waiter entries. - for i in 0..MAX_VMS { - (*vm).wait_entries[i].waiting_vm = vm; - list_init(&mut (*vm).wait_entries[i].wait_links); - list_init(&mut (*vm).wait_entries[i].ready_links); - } + (*vm).inner.get_mut_unchecked().init(vm, &mut *ppool); // Do basic initialization of vcpus. for i in 0..vcpu_count { @@ -179,7 +578,7 @@ pub unsafe extern "C" fn vm_find(id: spci_vm_id_t) -> *mut Vm { pub unsafe extern "C" fn vm_lock(vm: *mut Vm) -> VmLocked { let locked = VmLocked { vm }; - sl_lock(&(*vm).lock); + (*vm).inner.lock().into_raw(); locked } @@ -193,7 +592,7 @@ pub unsafe extern "C" fn vm_lock_both(vm1: *mut Vm, vm2: *mut Vm) -> TwoVmLocked vm2: VmLocked { vm: vm2 }, }; - sl_lock_both(&(*vm1).lock, &(*vm2).lock); + SpinLock::lock_both(&(*vm1).inner, &(*vm2).inner); dual_lock } @@ -202,7 +601,9 @@ pub unsafe extern "C" fn vm_lock_both(vm1: *mut Vm, vm2: *mut Vm) -> TwoVmLocked /// the fact that the VM is no longer locked. #[no_mangle] pub unsafe extern "C" fn vm_unlock(locked: *mut VmLocked) { - sl_unlock(&(*(*locked).vm).lock); + let guard = + SpinLockGuard::<'static, VmInner>::from_raw(&(*(*locked).vm).inner as *const _ as usize); + mem::drop(guard); (*locked).vm = ptr::null_mut(); } @@ -213,3 +614,23 @@ pub unsafe extern "C" fn vm_get_vcpu(vm: *mut Vm, vcpu_index: spci_vcpu_index_t) assert!(vcpu_index < (*vm).vcpu_count); &mut (*vm).vcpus[vcpu_index as usize] } + +#[no_mangle] +pub unsafe extern "C" fn vm_get_id(vm: *const Vm) -> spci_vm_id_t { + (*vm).id +} + +#[no_mangle] +pub unsafe extern "C" fn vm_get_ptable(vm: *mut Vm) -> *mut PageTable { + &mut (*vm).inner.get_mut_unchecked().ptable +} + +#[no_mangle] +pub unsafe extern "C" fn vm_get_arch(vm: *mut Vm) -> *mut ArchVm { + &mut (*vm).inner.get_mut_unchecked().arch +} + +#[no_mangle] +pub unsafe extern "C" fn vm_get_vcpu_count(vm: *const Vm) -> spci_vcpu_count_t { + (*vm).vcpu_count +} diff --git a/inc/hf/cpu.h b/inc/hf/cpu.h index b829a7695..2f75a78c1 100644 --- a/inc/hf/cpu.h +++ b/inc/hf/cpu.h @@ -67,32 +67,15 @@ struct vcpu_fault_info { int mode; }; -struct vcpu { - /* - * Protects accesses to vCPU's state and architecture registers. If a - * vCPU is running, its execution lock is logically held by the - * running pCPU. - */ - struct spinlock execution_lock; - - - /* - * Protects accesses to vCPU's interrupts. - */ - struct spinlock interrupts_lock; - - /* - * The state is only changed in the context of the vCPU being run. This - * ensures the scheduler can easily keep track of the vCPU state as - * transitions are indicated by the return code from the run call. - */ - enum vcpu_state state; - - struct cpu *cpu; - struct vm *vm; - struct arch_regs regs; - struct interrupts interrupts; -}; +/** + * Vcpu has forward declaration only. Its detailed structure is moved to the + * Rust code (vcpu.rs.) + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvisibility" +struct vcpu; +struct vm; +#pragma GCC diagnostic pop /** Encapsulates a vCPU whose execution lock is held. */ struct vcpu_execution_locked { @@ -127,6 +110,12 @@ void vcpu_unlock(struct vcpu_execution_locked *locked); void vcpu_init(struct vcpu *vcpu, struct vm *vm); void vcpu_on(struct vcpu_execution_locked vcpu, ipaddr_t entry, uintreg_t arg); spci_vcpu_index_t vcpu_index(const struct vcpu *vcpu); +struct arch_regs *vcpu_get_regs(struct vcpu *vcpu); +const struct arch_regs *vcpu_get_regs_const(const struct vcpu *vcpu); +struct vm *vcpu_get_vm(struct vcpu *vcpu); +struct cpu *vcpu_get_cpu(struct vcpu *vcpu); +void vcpu_set_cpu(struct vcpu *vcpu, struct cpu *cpu); +struct interrupts *vcpu_get_interrupts(struct vcpu *vcpu); bool vcpu_is_off(struct vcpu_execution_locked vcpu); bool vcpu_secondary_reset_and_start(struct vcpu *vcpu, ipaddr_t entry, uintreg_t arg); diff --git a/inc/hf/vm.h b/inc/hf/vm.h index 4abff00e0..954a3eb24 100644 --- a/inc/hf/vm.h +++ b/inc/hf/vm.h @@ -77,25 +77,14 @@ struct mailbox { struct list_entry ready_list; }; -struct vm { - spci_vm_id_t id; - /** See api.c for the partial ordering on locks. */ - struct spinlock lock; - spci_vcpu_count_t vcpu_count; - struct vcpu vcpus[MAX_CPUS]; - struct mm_ptable ptable; - struct mailbox mailbox; - char log_buffer[LOG_BUFFER_SIZE]; - size_t log_buffer_length; - - /** Wait entries to be used when waiting on other VM mailboxes. */ - struct wait_entry wait_entries[MAX_VMS]; - - atomic_bool aborting; - - /** Arch-specific VM information. */ - struct arch_vm arch; -}; +/** + * Vm has forward declaration only. Its detailed structure is moved to the Rust + * code (vm.rs.) + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvisibility" +struct vm; +#pragma GCC diagnostic pop /** Encapsulates a VM whose lock is held. */ struct vm_locked { @@ -116,3 +105,7 @@ struct vm_locked vm_lock(struct vm *vm); struct two_vm_locked vm_lock_both(struct vm *vm1, struct vm *vm2); void vm_unlock(struct vm_locked *locked); struct vcpu *vm_get_vcpu(struct vm *vm, spci_vcpu_index_t vcpu_index); +spci_vm_id_t vm_get_id(struct vm *vm); +struct mm_ptable *vm_get_ptable(struct vm *vm); +struct arch_vm *vm_get_arch(struct vm *vm); +spci_vcpu_count_t vm_get_vcpu_count(struct vm *vm); diff --git a/src/arch/aarch64/hypervisor/BUILD.gn b/src/arch/aarch64/hypervisor/BUILD.gn index 984adbfe3..85b2e3773 100644 --- a/src/arch/aarch64/hypervisor/BUILD.gn +++ b/src/arch/aarch64/hypervisor/BUILD.gn @@ -24,7 +24,6 @@ source_set("hypervisor") { sources += [ "handler.c", - "offsets.c", "psci_handler.c", ] diff --git a/src/arch/aarch64/hypervisor/handler.c b/src/arch/aarch64/hypervisor/handler.c index c3cc5072b..5a4177cc8 100644 --- a/src/arch/aarch64/hypervisor/handler.c +++ b/src/arch/aarch64/hypervisor/handler.c @@ -52,8 +52,8 @@ static struct vcpu *current(void) */ void complete_saving_state(struct vcpu *vcpu) { - vcpu->regs.peripherals.cntv_cval_el0 = read_msr(cntv_cval_el0); - vcpu->regs.peripherals.cntv_ctl_el0 = read_msr(cntv_ctl_el0); + vcpu_get_regs(vcpu)->peripherals.cntv_cval_el0 = read_msr(cntv_cval_el0); + vcpu_get_regs(vcpu)->peripherals.cntv_ctl_el0 = read_msr(cntv_ctl_el0); api_regs_state_saved(vcpu); @@ -63,7 +63,7 @@ void complete_saving_state(struct vcpu *vcpu) * This is used to emulate the virtual timer for the primary in case it * should fire while the secondary is running. */ - if (vcpu->vm->id == HF_PRIMARY_VM_ID) { + if (vm_get_id(vcpu_get_vm(vcpu)) == HF_PRIMARY_VM_ID) { /* * Clear timer control register before copying compare value, to * avoid a spurious timer interrupt. This could be a problem if @@ -87,15 +87,15 @@ void begin_restoring_state(struct vcpu *vcpu) * is configured as edge-triggered, as it would then be latched in. */ write_msr(cntv_ctl_el0, 0); - write_msr(cntv_cval_el0, vcpu->regs.peripherals.cntv_cval_el0); - write_msr(cntv_ctl_el0, vcpu->regs.peripherals.cntv_ctl_el0); + write_msr(cntv_cval_el0, vcpu_get_regs(vcpu)->peripherals.cntv_cval_el0); + write_msr(cntv_ctl_el0, vcpu_get_regs(vcpu)->peripherals.cntv_ctl_el0); /* * If we are switching (back) to the primary, disable the EL2 physical * timer which was being used to emulate the EL0 virtual timer, as the * virtual timer is now running for the primary again. */ - if (vcpu->vm->id == HF_PRIMARY_VM_ID) { + if (vm_get_id(vcpu_get_vm(vcpu)) == HF_PRIMARY_VM_ID) { write_msr(cnthp_ctl_el2, 0); write_msr(cnthp_cval_el2, 0); } @@ -149,10 +149,10 @@ static void invalidate_vm_tlb(void) */ void maybe_invalidate_tlb(struct vcpu *vcpu) { - size_t current_cpu_index = cpu_index(vcpu->cpu); + size_t current_cpu_index = cpu_index(vcpu_get_cpu(vcpu)); spci_vcpu_index_t new_vcpu_index = vcpu_index(vcpu); - if (vcpu->vm->arch.last_vcpu_on_cpu[current_cpu_index] != + if (vm_get_arch(vcpu_get_vm(vcpu))->last_vcpu_on_cpu[current_cpu_index] != new_vcpu_index) { /* * The vCPU has changed since the last time this VM was run on @@ -161,7 +161,7 @@ void maybe_invalidate_tlb(struct vcpu *vcpu) invalidate_vm_tlb(); /* Record the fact that this vCPU is now running on this CPU. */ - vcpu->vm->arch.last_vcpu_on_cpu[current_cpu_index] = + vm_get_arch(vcpu_get_vm(vcpu))->last_vcpu_on_cpu[current_cpu_index] = new_vcpu_index; } } @@ -360,15 +360,15 @@ struct hvc_handler_return hvc_handler(uintreg_t arg0, uintreg_t arg1, * directly in the register. */ set_virtual_interrupt_current( - current()->interrupts.enabled_and_pending_count > 0); + vcpu_get_interrupts(current())->enabled_and_pending_count > 0); } else { /* * About to switch vCPUs, set the bit for the vCPU to which we * are switching in the saved copy of the register. */ set_virtual_interrupt( - &ret.new->regs, - ret.new->interrupts.enabled_and_pending_count > 0); + vcpu_get_regs(ret.new), + vcpu_get_interrupts(ret.new)->enabled_and_pending_count > 0); } return ret; @@ -412,7 +412,7 @@ static struct vcpu_fault_info fault_info_init(uintreg_t esr, struct vcpu_fault_info r; r.mode = mode; - r.pc = va_init(vcpu->regs.pc); + r.pc = va_init(vcpu_get_regs_const(vcpu)->pc); /* * Check the FnV bit, which is only valid if dfsc/ifsc is 010000. It @@ -439,7 +439,7 @@ struct vcpu *sync_lower_exception(uintreg_t esr) switch (esr >> 26) { case 0x01: /* EC = 000001, WFI or WFE. */ /* Skip the instruction. */ - vcpu->regs.pc += (esr & (1u << 25)) ? 4 : 2; + vcpu_get_regs(vcpu)->pc += (esr & (1u << 25)) ? 4 : 2; /* Check TI bit of ISS, 0 = WFI, 1 = WFE. */ if (esr & 1) { /* WFE */ @@ -469,27 +469,27 @@ struct vcpu *sync_lower_exception(uintreg_t esr) break; case 0x17: /* EC = 010111, SMC instruction. */ { - uintreg_t smc_pc = vcpu->regs.pc; + uintreg_t smc_pc = vcpu_get_regs(vcpu)->pc; uintreg_t ret; struct vcpu *next = NULL; - if (!smc_handler(vcpu, vcpu->regs.r[0], vcpu->regs.r[1], - vcpu->regs.r[2], vcpu->regs.r[3], &ret, + if (!smc_handler(vcpu, vcpu_get_regs(vcpu)->r[0], vcpu_get_regs(vcpu)->r[1], + vcpu_get_regs(vcpu)->r[2], vcpu_get_regs(vcpu)->r[3], &ret, &next)) { - dlog("Unsupported SMC call: 0x%x\n", vcpu->regs.r[0]); + dlog("Unsupported SMC call: 0x%x\n", vcpu_get_regs(vcpu)->r[0]); ret = PSCI_ERROR_NOT_SUPPORTED; } /* Skip the SMC instruction. */ - vcpu->regs.pc = smc_pc + (esr & (1u << 25) ? 4 : 2); - vcpu->regs.r[0] = ret; + vcpu_get_regs(vcpu)->pc = smc_pc + (esr & (1u << 25) ? 4 : 2); + vcpu_get_regs(vcpu)->r[0] = ret; return next; } default: dlog("Unknown lower sync exception pc=0x%x, esr=0x%x, " "ec=0x%x\n", - vcpu->regs.pc, esr, esr >> 26); + vcpu_get_regs(vcpu)->pc, esr, esr >> 26); break; } diff --git a/src/arch/aarch64/hypervisor/offsets.c b/src/arch/aarch64/hypervisor/offsets.c deleted file mode 100644 index 1f32581e6..000000000 --- a/src/arch/aarch64/hypervisor/offsets.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2018 The Hafnium Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "offsets.h" - -#include "hf/cpu.h" -#include "hf/static_assert.h" - -#define CHECK_OFFSET(name, type, field) \ - CHECK_OFFSET_1(#name, name, offsetof(type, field)) -#define CHECK_OFFSET_1(name, actual, expected) \ - static_assert((actual) == (expected), \ - "Offset " name " should be " #expected \ - " and not " #actual) - -CHECK_OFFSET(CPU_ID, struct cpu, id); -CHECK_OFFSET(CPU_STACK_BOTTOM, struct cpu, stack_bottom); -CHECK_OFFSET(VCPU_REGS, struct vcpu, regs); -CHECK_OFFSET(VCPU_LAZY, struct vcpu, regs.lazy); -CHECK_OFFSET(VCPU_FREGS, struct vcpu, regs.fp); - -#ifdef VCPU_GIC -CHECK_OFFSET(VCPU_GIC, struct vcpu, regs.gic); -#endif diff --git a/src/arch/aarch64/hypervisor/psci_handler.c b/src/arch/aarch64/hypervisor/psci_handler.c index d5668e5f4..cbc4333bb 100644 --- a/src/arch/aarch64/hypervisor/psci_handler.c +++ b/src/arch/aarch64/hypervisor/psci_handler.c @@ -160,14 +160,14 @@ bool psci_primary_vm_handler(struct vcpu *vcpu, uint32_t func, uintreg_t arg0, * standby power state, the SMC will return and the updated * vcpu registers will be ignored. */ - arch_regs_set_pc_arg(&vcpu->regs, ipa_init(arg1), arg2); + arch_regs_set_pc_arg(vcpu_get_regs(vcpu), ipa_init(arg1), arg2); *ret = smc64(PSCI_CPU_SUSPEND, arg0, (uintreg_t)&cpu_entry, - (uintreg_t)vcpu->cpu); + (uintreg_t)vcpu_get_cpu(vcpu)); break; } case PSCI_CPU_OFF: - cpu_off(vcpu->cpu); + cpu_off(vcpu_get_cpu(vcpu)); smc32(PSCI_CPU_OFF, 0, 0, 0); panic("CPU off failed"); break; @@ -281,7 +281,7 @@ bool psci_secondary_vm_handler(struct vcpu *vcpu, uint32_t func, uintreg_t arg0, case PSCI_AFFINITY_INFO: { cpu_id_t target_affinity = arg0; uint32_t lowest_affinity_level = arg1; - struct vm *vm = vcpu->vm; + struct vm *vm = vcpu_get_vm(vcpu); struct vcpu *target_vcpu; struct vcpu_execution_locked vcpu_locked; spci_vcpu_index_t target_vcpu_index = @@ -293,7 +293,7 @@ bool psci_secondary_vm_handler(struct vcpu *vcpu, uint32_t func, uintreg_t arg0, break; } - if (target_vcpu_index >= vm->vcpu_count) { + if (target_vcpu_index >= vm_get_vcpu_count(vm)) { *ret = PSCI_ERROR_INVALID_PARAMETERS; break; } @@ -336,10 +336,10 @@ bool psci_secondary_vm_handler(struct vcpu *vcpu, uint32_t func, uintreg_t arg0, uint64_t context_id = arg2; spci_vcpu_index_t target_vcpu_index = vcpu_id_to_index(target_cpu); - struct vm *vm = vcpu->vm; + struct vm *vm = vcpu_get_vm(vcpu); struct vcpu *target_vcpu; - if (target_vcpu_index >= vm->vcpu_count) { + if (target_vcpu_index >= vm_get_vcpu_count(vm)) { *ret = PSCI_ERROR_INVALID_PARAMETERS; break; } @@ -397,7 +397,7 @@ bool psci_handler(struct vcpu *vcpu, uint32_t func, uintreg_t arg0, uintreg_t arg1, uintreg_t arg2, uintreg_t *ret, struct vcpu **next) { - if (vcpu->vm->id == HF_PRIMARY_VM_ID) { + if (vm_get_id(vcpu_get_vm(vcpu)) == HF_PRIMARY_VM_ID) { return psci_primary_vm_handler(vcpu, func, arg0, arg1, arg2, ret); } diff --git a/src/load.c b/src/load.c index 8effd012e..57167d4a8 100644 --- a/src/load.c +++ b/src/load.c @@ -94,7 +94,7 @@ bool load_primary(struct mm_stage1_locked stage1_locked, return false; } - if (vm->id != HF_PRIMARY_VM_ID) { + if (vm_get_id(vm) != HF_PRIMARY_VM_ID) { dlog("Primary vm was not given correct id\n"); return false; } @@ -102,14 +102,14 @@ bool load_primary(struct mm_stage1_locked stage1_locked, /* Map the 1TB of memory. */ /* TODO: We should do a whitelist rather than a blacklist. */ if (!mm_vm_identity_map( - &vm->ptable, pa_init(0), + vm_get_ptable(vm), pa_init(0), pa_init(UINT64_C(1024) * 1024 * 1024 * 1024), MM_MODE_R | MM_MODE_W | MM_MODE_X, NULL, ppool)) { dlog("Unable to initialise memory for primary vm\n"); return false; } - if (!mm_vm_unmap_hypervisor(&vm->ptable, ppool)) { + if (!mm_vm_unmap_hypervisor(vm_get_ptable(vm), ppool)) { dlog("Unable to unmap hypervisor from primary vm\n"); return false; } @@ -286,7 +286,7 @@ bool load_secondary(struct mm_stage1_locked stage1_locked, } /* Grant the VM access to the memory. */ - if (!mm_vm_identity_map(&vm->ptable, secondary_mem_begin, + if (!mm_vm_identity_map(vm_get_ptable(vm), secondary_mem_begin, secondary_mem_end, MM_MODE_R | MM_MODE_W | MM_MODE_X, &secondary_entry, ppool)) { @@ -295,7 +295,7 @@ bool load_secondary(struct mm_stage1_locked stage1_locked, } /* Deny the primary VM access to this memory. */ - if (!mm_vm_unmap(&primary->ptable, secondary_mem_begin, + if (!mm_vm_unmap(vm_get_ptable(primary), secondary_mem_begin, secondary_mem_end, ppool)) { dlog("Unable to unmap secondary VM from primary VM\n"); return false; diff --git a/src/main.c b/src/main.c index 9a66975a2..6695292cc 100644 --- a/src/main.c +++ b/src/main.c @@ -157,11 +157,11 @@ struct vcpu *cpu_main(struct cpu *c) } vcpu = vm_get_vcpu(vm_find(HF_PRIMARY_VM_ID), cpu_index(c)); - vm = vcpu->vm; - vcpu->cpu = c; + vm = vcpu_get_vm(vcpu); + vcpu_set_cpu(vcpu, c); /* Reset the registers to give a clean start for the primary's vCPU. */ - arch_regs_reset(&vcpu->regs, true, vm->id, c->id, vm->ptable.root); + arch_regs_reset(vcpu_get_regs(vcpu), true, vm_get_id(vm), c->id, vm_get_ptable(vm)->root); return vcpu; } diff --git a/src/spci_architected_message.c b/src/spci_architected_message.c index 09055319b..552a078c2 100644 --- a/src/spci_architected_message.c +++ b/src/spci_architected_message.c @@ -221,8 +221,8 @@ bool spci_msg_check_transition(struct vm *to, struct vm *from, * Ensure that the memory range is mapped with the same * mode. */ - if (!mm_vm_get_mode(&from->ptable, begin, end, orig_from_mode) || - !mm_vm_get_mode(&to->ptable, begin, end, &orig_to_mode)) { + if (!mm_vm_get_mode(vm_get_ptable(from), begin, end, orig_from_mode) || + !mm_vm_get_mode(vm_get_ptable(to), begin, end, &orig_to_mode)) { return false; }