From 6f256d7a5c405613efc030c0c646d13f909b42f6 Mon Sep 17 00:00:00 2001 From: lihuijin <501296508@qq.com> Date: Tue, 12 Nov 2024 15:57:15 +0800 Subject: [PATCH] fix(DCache): modify format * combined tag_resp and meta_resp in Bundle * TagArray writePorts = 1 can be ommitted --- .../cache/dcache/DCacheWrapper.scala | 20 ++++++------ .../cache/dcache/meta/TagArray.scala | 31 ++++++++++--------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala b/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala index aeaf78fa08..e56e163cfb 100644 --- a/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala +++ b/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala @@ -949,7 +949,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame val errorArray = Module(new L1FlagMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = 1)) val prefetchArray = Module(new L1PrefetchSourceArray(readPorts = PrefetchArrayReadPort, writePorts = 1 + LoadPipelineWidth)) // prefetch flag array val accessArray = Module(new L1FlagMetaArray(readPorts = AccessArrayReadPort, writePorts = LoadPipelineWidth + 1)) - val tagArray = Module(new DuplicatedTagArray(readPorts = TagReadPort, writePorts=1)) + val tagArray = Module(new DuplicatedTagArray(readPorts = TagReadPort)) val prefetcherMonitor = Module(new PrefetcherMonitor) val fdpMonitor = Module(new FDPrefetcherMonitor) val bloomFilter = Module(new BloomFilter(BLOOM_FILTER_ENTRY_NUM, true)) @@ -1032,15 +1032,17 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame // refillPipe.io.meta_write ) if(StorePrefetchL1Enabled) { - meta_resp_ports.zip(tagArray.io.meta_resp).foreach { case (p, r) => p := r } + meta_resp_ports.zipWithIndex.foreach({ case (meta_resp, i) => + meta_resp := tagArray.io.resp(i).meta }) } else { (meta_resp_ports.take(HybridLoadReadBase + 1) ++ - meta_resp_ports.takeRight(backendParams.HyuCnt)).zip(tagArray.io.meta_resp).foreach { case (p, r) => p := r } + meta_resp_ports.takeRight(backendParams.HyuCnt)).zipWithIndex.foreach({ case (meta_resp, i) => + meta_resp := tagArray.io.resp(i).meta }) meta_read_ports.drop(HybridLoadReadBase + 1).take(HybridStoreReadBase).foreach { case p => p.ready := false.B } meta_resp_ports.drop(HybridLoadReadBase + 1).take(HybridStoreReadBase).foreach { case p => p := 0.U.asTypeOf(p) } } - meta_write_ports.zip(tagArray.io.meta_write).foreach { case (p, w) => w <> p } + mainPipe.io.meta_write <> tagArray.io.meta_write // read extra meta (exclude stu) (meta_read_ports.take(HybridLoadReadBase + 1) ++ meta_read_ports.takeRight(backendParams.HyuCnt)).zip(errorArray.io.read).foreach { case (p, r) => r <> p } @@ -1123,14 +1125,14 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame ldu.take(HybridLoadReadBase).zipWithIndex.foreach { case (ld, i) => tagArray.io.read(i) <> ld.io.tag_read - ld.io.tag_resp := tagArray.io.tag_resp(i) + ld.io.tag_resp := tagArray.io.resp(i).tag ld.io.tag_read.ready := !tag_write_intend } if(StorePrefetchL1Enabled) { stu.take(HybridStoreReadBase).zipWithIndex.foreach { case (st, i) => tagArray.io.read(HybridLoadReadBase + i) <> st.io.tag_read - st.io.tag_resp := tagArray.io.tag_resp(HybridLoadReadBase + i) + st.io.tag_resp := tagArray.io.resp(HybridLoadReadBase + i).tag st.io.tag_read.ready := !tag_write_intend } }else { @@ -1167,11 +1169,11 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame } // tag resp - ldu(HybridLoadTagReadPort).io.tag_resp := tagArray.io.tag_resp(TagReadPort) - stu(HybridStoreTagReadPort).io.tag_resp := tagArray.io.tag_resp(TagReadPort) + ldu(HybridLoadTagReadPort).io.tag_resp := tagArray.io.resp(TagReadPort).tag + stu(HybridStoreTagReadPort).io.tag_resp := tagArray.io.resp(TagReadPort).tag } tagArray.io.read.last <> mainPipe.io.tag_read - mainPipe.io.tag_resp := tagArray.io.tag_resp.last + mainPipe.io.tag_resp := tagArray.io.resp.last.tag val fake_tag_read_conflict_this_cycle = PopCount(ldu.map(ld=> ld.io.tag_read.valid)) XSPerfAccumulate("fake_tag_read_conflict", fake_tag_read_conflict_this_cycle) diff --git a/src/main/scala/xiangshan/cache/dcache/meta/TagArray.scala b/src/main/scala/xiangshan/cache/dcache/meta/TagArray.scala index ff13b4cc51..a411f212a8 100644 --- a/src/main/scala/xiangshan/cache/dcache/meta/TagArray.scala +++ b/src/main/scala/xiangshan/cache/dcache/meta/TagArray.scala @@ -39,6 +39,11 @@ class TagEccWriteReq(implicit p: Parameters) extends TagReadReq { val ecc = UInt(eccTagBits.W) } +class TagMetaResp(implicit p: Parameters) extends DCacheBundle { + val tag = Vec(nWays, UInt(tagBits.W)) + val meta = Vec(nWays, new Meta) +} + case object HasTagEccParam abstract class AbstractTagArray(implicit p: Parameters) extends DCacheModule { @@ -48,8 +53,7 @@ abstract class AbstractTagArray(implicit p: Parameters) extends DCacheModule { class TagArray(implicit p: Parameters) extends AbstractTagArray { val io = IO(new Bundle() { val read = Flipped(DecoupledIO(new TagReadReq)) - val tag_resp = Output(Vec(nWays, UInt(tagBits.W))) - val meta_resp = Output(Vec(nWays, new Meta)) + val resp = Output(new TagMetaResp) val tag_write = Flipped(DecoupledIO(new TagWriteReq)) val meta_write = Flipped(DecoupledIO(new CohMetaWriteReq)) // ecc @@ -127,8 +131,8 @@ class TagArray(implicit p: Parameters) extends AbstractTagArray { tag_array.io.r.req.valid := ren tag_array.io.r.req.bits.apply(setIdx = io.read.bits.idx) tag_array.clock := ClockGate(false.B, ren | wen, clock) - io.tag_resp := tag_array.io.r.resp.data.map(r => r(tagBits - 1, 0)) - io.meta_resp := VecInit(tag_array.io.r.resp.data.map(r => r(metaBits + tagBits - 1, tagBits).asTypeOf(new Meta))) + io.resp.tag := tag_array.io.r.resp.data.map(r => r(tagBits - 1, 0)) + io.resp.meta := VecInit(tag_array.io.r.resp.data.map(r => r(metaBits + tagBits - 1, tagBits).asTypeOf(new Meta))) XSPerfAccumulate("part_tag_read_counter", tag_array.io.r.req.valid) val ecc_ren = io.ecc_read.fire @@ -155,13 +159,12 @@ class TagArray(implicit p: Parameters) extends AbstractTagArray { } } -class DuplicatedTagArray(readPorts: Int, writePorts: Int)(implicit p: Parameters) extends AbstractTagArray { +class DuplicatedTagArray(readPorts: Int)(implicit p: Parameters) extends AbstractTagArray { val io = IO(new Bundle() { val read = Vec(readPorts, Flipped(DecoupledIO(new TagReadReq))) - val tag_resp = Output(Vec(readPorts, Vec(nWays, UInt(encTagBits.W)))) - val meta_resp = Output(Vec(readPorts, Vec(nWays, new Meta))) + val resp = Output(Vec(readPorts, new TagMetaResp)) val tag_write = Flipped(DecoupledIO(new TagWriteReq)) - val meta_write = Vec(writePorts, Flipped(DecoupledIO(new CohMetaWriteReq))) + val meta_write = Flipped(DecoupledIO(new CohMetaWriteReq)) // customized cache op port val cacheOp = Flipped(new L1CacheInnerOpIO) val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo))) @@ -180,8 +183,8 @@ class DuplicatedTagArray(readPorts: Int, writePorts: Int)(implicit p: Parameters // normal read / write array(i).io.tag_write.valid := io.tag_write.valid array(i).io.tag_write.bits := io.tag_write.bits - array(i).io.meta_write.valid := io.meta_write(0).valid - array(i).io.meta_write.bits := io.meta_write(0).bits + array(i).io.meta_write.valid := io.meta_write.valid + array(i).io.meta_write.bits := io.meta_write.bits array(i).io.ecc_write.valid := io.tag_write.valid array(i).io.ecc_write.bits.idx := io.tag_write.bits.idx array(i).io.ecc_write.bits.way_en := io.tag_write.bits.way_en @@ -191,8 +194,8 @@ class DuplicatedTagArray(readPorts: Int, writePorts: Int)(implicit p: Parameters array(i).io.read <> io.read(i) array(i).io.ecc_read.valid := io.read(i).valid array(i).io.ecc_read.bits := io.read(i).bits - io.tag_resp(i) := (array(i).io.ecc_resp zip array(i).io.tag_resp).map { case (e, r) => Cat(e, r) } - io.meta_resp(i) := array(i).io.meta_resp + io.resp(i).tag := (array(i).io.ecc_resp zip array(i).io.resp.tag).map { case (e, r) => Cat(e, r) } + io.resp(i).meta := array(i).io.resp.meta // extra ports for cache op // array(i).io.ecc_write.valid := false.B // array(i).io.ecc_write.bits := DontCare @@ -201,7 +204,7 @@ class DuplicatedTagArray(readPorts: Int, writePorts: Int)(implicit p: Parameters } XSPerfAccumulate("tag_read_counter", tag_read_oh.reduce(_ + _)) io.tag_write.ready := true.B - io.meta_write(0).ready := true.B + io.meta_write.ready := true.B require(nWays <= 32) io.cacheOp.resp.bits := DontCare @@ -248,6 +251,6 @@ class DuplicatedTagArray(readPorts: Int, writePorts: Int)(implicit p: Parameters } io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp) - io.cacheOp.resp.bits.read_tag_low := Mux(io.cacheOp.resp.valid, array(0).io.tag_resp(RegNext(io.cacheOp.req.bits.wayNum)), 0.U) + io.cacheOp.resp.bits.read_tag_low := Mux(io.cacheOp.resp.valid, array(0).io.resp.tag(RegNext(io.cacheOp.req.bits.wayNum)), 0.U) io.cacheOp.resp.bits.read_tag_ecc := Mux(io.cacheOp.resp.valid, array(0).io.ecc_resp(RegNext(io.cacheOp.req.bits.wayNum)), 0.U) }