From d43d229192d97f9165e5483ac921fff00645857a Mon Sep 17 00:00:00 2001 From: Dan Cross Date: Tue, 15 Aug 2023 00:54:47 +0000 Subject: [PATCH] stm32h7-eth: Properly maintain Tx tail pointer Analagous to #1493, we should maintain the Tx tail pointer properly, as well. Note that since the device won't examine a Tx descriptor until the tail pointer is set appropriately, we can relax stores and rely on the barrier in `tx_notify` to flush everything to RAM properly before writing the tail pointer and kicking off a DMA. --- drv/stm32h7-eth/src/lib.rs | 14 +++++++------ drv/stm32h7-eth/src/ring.rs | 42 ++++++++++++++++++------------------- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/drv/stm32h7-eth/src/lib.rs b/drv/stm32h7-eth/src/lib.rs index 0be985597..2079d09d5 100644 --- a/drv/stm32h7-eth/src/lib.rs +++ b/drv/stm32h7-eth/src/lib.rs @@ -150,14 +150,16 @@ impl Ethernet { dma.dmacrx_rlr .write(|w| unsafe { w.rdrl().bits(rx_ring_len) }); - // Poke both tail pointers so the hardware looks at the descriptors. We - // completely initialize the descriptor array, so the tail pointer is - // always the end. + // Poke the receive tail pointer so that the hardware looks at + // descriptors. We completely initialize the descriptor array, so the + // tail pointer is always as close to the end as we can make it. // // Doing the same drop-bottom-two-bits stuff that we had to do for DLARs // above. - dma.dmactx_dtpr - .write(|w| unsafe { w.tdt().bits(tx_ring.tail_ptr() as u32 >> 2) }); + // + // We don't set the transmit tail pointer until we enqueue a packet, + // as we don't want the hardware to race against software filling in + // descriptors. atomic::fence(Ordering::Release); dma.dmacrx_dtpr.write(|w| unsafe { w.rdt().bits(rx_ring.first_tail_ptr() as u32 >> 2) @@ -337,7 +339,7 @@ impl Ethernet { // Poke the tail pointer so the hardware knows to recheck (dropping two // bottom bits because svd2rust) self.dma.dmactx_dtpr.write(|w| unsafe { - w.tdt().bits(self.tx_ring.tail_ptr() as u32 >> 2) + w.tdt().bits(self.tx_ring.next_tail_ptr() as u32 >> 2) }); } diff --git a/drv/stm32h7-eth/src/ring.rs b/drv/stm32h7-eth/src/ring.rs index 93982931d..23574a3c9 100644 --- a/drv/stm32h7-eth/src/ring.rs +++ b/drv/stm32h7-eth/src/ring.rs @@ -144,12 +144,12 @@ impl TxRing { // ensure that they're owned by us (not the hardware). for desc in storage { #[cfg(not(feature = "vlan"))] - desc.tdes[3].store(0, Ordering::Release); + desc.tdes[3].store(0, Ordering::Relaxed); #[cfg(feature = "vlan")] { - desc.tdes[0][3].store(0, Ordering::Release); - desc.tdes[1][3].store(0, Ordering::Release); + desc.tdes[0][3].store(0, Ordering::Relaxed); + desc.tdes[1][3].store(0, Ordering::Relaxed); } } Self { @@ -165,12 +165,10 @@ impl TxRing { self.storage.as_ptr() } - /// Returns a pointer to the byte just past the end of the `TxDesc` ring. - /// This too gets loaded into the DMA controller, so that it knows what - /// section of the ring is initialized and can be read. (The answer is "all - /// of it.") - pub fn tail_ptr(&self) -> *const TxDesc { - self.storage.as_ptr_range().end + /// Returns a pointer to the "next" descriptor in the ring. We load this + /// into the device so that the DMA engine knows what descriptors are free. + pub fn next_tail_ptr(&self) -> *const TxDesc { + self.base_ptr().wrapping_add(self.next.get()) } /// Increments the `next` descriptor index, modulo the length of the ring. @@ -241,10 +239,10 @@ impl TxRing { let result = body(buffer); - // Program the descriptor to represent the packet. We program - // carefully to ensure that the memory accesses happen in the right - // order: the entire descriptor must be written before the OWN bit - // is set in TDES3 using a RELEASE store. + // Program the descriptor to represent the packet. We use relaxed + // memory ordering here as we currently own the descriptor, and + // a memory barrier will be performed before we update the tail + // pointer register to transfer descriptor ownership to the device. d.tdes[0].store(buffer.as_ptr() as u32, Ordering::Relaxed); d.tdes[1].store(0, Ordering::Relaxed); d.tdes[2].store(len as u32, Ordering::Relaxed); @@ -253,7 +251,7 @@ impl TxRing { | 1 << TDES3_LD_BIT | TDES3_CIC_CHECKSUMS_ENABLED << TDES3_CIC_BIT | len as u32; - d.tdes[3].store(tdes3, Ordering::Release); // <-- release + d.tdes[3].store(tdes3, Ordering::Relaxed); self.incr_next(); @@ -317,14 +315,14 @@ impl TxRing { let result = body(buffer); // Program the context descriptor to configure the VLAN tag. We - // program carefully to ensure that the memory accesses happen - // in the right order: the entire descriptor must be written before - // the OWN bit is set in TDES3 using a RELEASE store. + // use relaxed ordering here because we own the descriptor, and the + // code that transfers descriptor ownership to the device must + // perform a barrier before doing so. let tdes3 = 1 << TDES3_OWN_BIT | 1 << TDES3_CTXT_BIT | 1 << TDES3_VLTV_BIT | u32::from(vid); - d.tdes[0][3].store(tdes3, Ordering::Release); // <-- release + d.tdes[0][3].store(tdes3, Ordering::Relaxed); // Program the tx descriptor to represent the packet, using the // same strategy as above for memory access ordering. @@ -337,7 +335,7 @@ impl TxRing { | 1 << TDES3_LD_BIT | TDES3_CIC_CHECKSUMS_ENABLED << TDES3_CIC_BIT | len as u32; - d.tdes[1][3].store(tdes3, Ordering::Release); // <-- release + d.tdes[1][3].store(tdes3, Ordering::Relaxed); self.incr_next(); @@ -473,8 +471,10 @@ impl RxRing { } /// Programs the words in `d` to prepare to receive into `buffer` and sets - /// `d` accessible to hardware. The final write to make it accessible is - /// performed with Release ordering to get a barrier. + /// `d` accessible to hardware. We use relaxed ordering here, since we own + /// the descriptor currently and any code that writes to the tail pointer + /// register to transfer ownership to the device must first perform a + /// memory barrier. fn set_descriptor(d: &RxDesc, buffer: *mut [u8; BUFSZ]) { d.rdes[0].store(buffer as u32, Ordering::Relaxed); d.rdes[1].store(0, Ordering::Relaxed);