Skip to content

Commit

Permalink
area(DCache): combined tag and meta
Browse files Browse the repository at this point in the history
  • Loading branch information
jin120811 committed Nov 12, 2024
1 parent 393755c commit 874d0c3
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 50 deletions.
39 changes: 17 additions & 22 deletions src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -946,11 +946,10 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
//----------------------------------------
// core data structures
val bankedDataArray = if(dwpuParam.enWPU) Module(new SramedDataArray) else Module(new BankedDataArray)
val metaArray = Module(new L1CohMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = 1))
val errorArray = Module(new L1FlagMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = 1))
val prefetchArray = Module(new L1PrefetchSourceArray(readPorts = PrefetchArrayReadPort, writePorts = 1 + LoadPipelineWidth)) // prefetch flag array
val accessArray = Module(new L1FlagMetaArray(readPorts = AccessArrayReadPort, writePorts = LoadPipelineWidth + 1))
val tagArray = Module(new DuplicatedTagArray(readPorts = TagReadPort))
val tagArray = Module(new DuplicatedTagArray(readPorts = TagReadPort, writePorts=1))
val prefetcherMonitor = Module(new PrefetcherMonitor)
val fdpMonitor = Module(new FDPrefetcherMonitor)
val bloomFilter = Module(new BloomFilter(BLOOM_FILTER_ENTRY_NUM, true))
Expand Down Expand Up @@ -1033,19 +1032,15 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
// refillPipe.io.meta_write
)
if(StorePrefetchL1Enabled) {
meta_read_ports.zip(metaArray.io.read).foreach { case (p, r) => r <> p }
meta_resp_ports.zip(metaArray.io.resp).foreach { case (p, r) => p := r }
meta_resp_ports.zip(tagArray.io.meta_resp).foreach { case (p, r) => p := r }
} else {
(meta_read_ports.take(HybridLoadReadBase + 1) ++
meta_read_ports.takeRight(backendParams.HyuCnt)).zip(metaArray.io.read).foreach { case (p, r) => r <> p }
(meta_resp_ports.take(HybridLoadReadBase + 1) ++
meta_resp_ports.takeRight(backendParams.HyuCnt)).zip(metaArray.io.resp).foreach { case (p, r) => p := r }
meta_resp_ports.takeRight(backendParams.HyuCnt)).zip(tagArray.io.meta_resp).foreach { case (p, r) => p := r }

meta_read_ports.drop(HybridLoadReadBase + 1).take(HybridStoreReadBase).foreach { case p => p.ready := false.B }
meta_resp_ports.drop(HybridLoadReadBase + 1).take(HybridStoreReadBase).foreach { case p => p := 0.U.asTypeOf(p) }
}
meta_write_ports.zip(metaArray.io.write).foreach { case (p, w) => w <> p }

meta_write_ports.zip(tagArray.io.meta_write).foreach { case (p, w) => w <> p }
// read extra meta (exclude stu)
(meta_read_ports.take(HybridLoadReadBase + 1) ++
meta_read_ports.takeRight(backendParams.HyuCnt)).zip(errorArray.io.read).foreach { case (p, r) => r <> p }
Expand Down Expand Up @@ -1123,19 +1118,19 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
require(tagArray.io.read.size == (LoadPipelineWidth + 1))
}
// val tag_write_intend = missQueue.io.refill_pipe_req.valid || mainPipe.io.tag_write_intend
val tag_write_intend = mainPipe.io.tag_write_intend
assert(!RegNext(!tag_write_intend && tagArray.io.write.valid))
val tag_write_intend = mainPipe.io.tag_write_intend || mainPipe.io.meta_write.valid
assert(!RegNext(!tag_write_intend && tagArray.io.tag_write.valid))
ldu.take(HybridLoadReadBase).zipWithIndex.foreach {
case (ld, i) =>
tagArray.io.read(i) <> ld.io.tag_read
ld.io.tag_resp := tagArray.io.resp(i)
ld.io.tag_resp := tagArray.io.tag_resp(i)
ld.io.tag_read.ready := !tag_write_intend
}
if(StorePrefetchL1Enabled) {
stu.take(HybridStoreReadBase).zipWithIndex.foreach {
case (st, i) =>
tagArray.io.read(HybridLoadReadBase + i) <> st.io.tag_read
st.io.tag_resp := tagArray.io.resp(HybridLoadReadBase + i)
st.io.tag_resp := tagArray.io.tag_resp(HybridLoadReadBase + i)
st.io.tag_read.ready := !tag_write_intend
}
}else {
Expand Down Expand Up @@ -1172,23 +1167,23 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
}

// tag resp
ldu(HybridLoadTagReadPort).io.tag_resp := tagArray.io.resp(TagReadPort)
stu(HybridStoreTagReadPort).io.tag_resp := tagArray.io.resp(TagReadPort)
ldu(HybridLoadTagReadPort).io.tag_resp := tagArray.io.tag_resp(TagReadPort)
stu(HybridStoreTagReadPort).io.tag_resp := tagArray.io.tag_resp(TagReadPort)
}
tagArray.io.read.last <> mainPipe.io.tag_read
mainPipe.io.tag_resp := tagArray.io.resp.last
mainPipe.io.tag_resp := tagArray.io.tag_resp.last

val fake_tag_read_conflict_this_cycle = PopCount(ldu.map(ld=> ld.io.tag_read.valid))
XSPerfAccumulate("fake_tag_read_conflict", fake_tag_read_conflict_this_cycle)

val tag_write_arb = Module(new Arbiter(new TagWriteReq, 1))
// tag_write_arb.io.in(0) <> refillPipe.io.tag_write
tag_write_arb.io.in(0) <> mainPipe.io.tag_write
tagArray.io.write <> tag_write_arb.io.out
tagArray.io.tag_write <> tag_write_arb.io.out

ldu.map(m => {
m.io.vtag_update.valid := tagArray.io.write.valid
m.io.vtag_update.bits := tagArray.io.write.bits
m.io.vtag_update.valid := tagArray.io.tag_write.valid
m.io.vtag_update.bits := tagArray.io.tag_write.bits
})

//----------------------------------------
Expand Down Expand Up @@ -1253,9 +1248,9 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
dwpu.io.lookup_upd(i) <> ldu(i).io.dwpu.lookup_upd(0)
dwpu.io.cfpred(i) <> ldu(i).io.dwpu.cfpred(0)
}
dwpu.io.tagwrite_upd.valid := tagArray.io.write.valid
dwpu.io.tagwrite_upd.bits.vaddr := tagArray.io.write.bits.vaddr
dwpu.io.tagwrite_upd.bits.s1_real_way_en := tagArray.io.write.bits.way_en
dwpu.io.tagwrite_upd.valid := tagArray.io.tag_write.valid
dwpu.io.tagwrite_upd.bits.vaddr := tagArray.io.tag_write.bits.vaddr
dwpu.io.tagwrite_upd.bits.s1_real_way_en := tagArray.io.tag_write.bits.way_en
} else {
for(i <- 0 until LoadPipelineWidth){
ldu(i).io.dwpu.req(0).ready := true.B
Expand Down
5 changes: 3 additions & 2 deletions src/main/scala/xiangshan/cache/dcache/mainpipe/MainPipe.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1422,9 +1422,10 @@ class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents w
io.meta_read.bits.idx := get_idx(s0_req.vaddr)
io.meta_read.bits.way_en := Mux(s0_req.replace, s0_req.replace_way_en, ~0.U(nWays.W))

io.tag_read.valid := req.valid && !set_conflict && !s0_req.replace
io.tag_read.valid := req.valid && !set_conflict
io.tag_read.valid := req.valid && s1_ready && !set_conflict
io.tag_read.bits.idx := get_idx(s0_req.vaddr)
io.tag_read.bits.way_en := ~0.U(nWays.W)
io.tag_read.bits.way_en := Mux(s0_req.replace, s0_req.replace_way_en, ~0.U(nWays.W))

io.data_read_intend := s1_valid_dup(3) && s1_need_data
io.data_readline.valid := s1_valid_dup(4) && s1_need_data
Expand Down
84 changes: 58 additions & 26 deletions src/main/scala/xiangshan/cache/dcache/meta/TagArray.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import chisel3._
import chisel3.util._
import utility.{SRAMTemplate, XSPerfAccumulate, ClockGate}
import xiangshan.cache.CacheInstrucion._
import xiangshan.cache.Meta
import freechips.rocketchip._
import freechips.rocketchip.tilelink.ClientMetadata

class TagReadReq(implicit p: Parameters) extends DCacheBundle {
val idx = UInt(idxBits.W)
Expand All @@ -45,27 +48,39 @@ abstract class AbstractTagArray(implicit p: Parameters) extends DCacheModule {
class TagArray(implicit p: Parameters) extends AbstractTagArray {
val io = IO(new Bundle() {
val read = Flipped(DecoupledIO(new TagReadReq))
val resp = Output(Vec(nWays, UInt(tagBits.W)))
val write = Flipped(DecoupledIO(new TagWriteReq))
val tag_resp = Output(Vec(nWays, UInt(tagBits.W)))
val meta_resp = Output(Vec(nWays, new Meta))
val tag_write = Flipped(DecoupledIO(new TagWriteReq))
val meta_write = Flipped(DecoupledIO(new CohMetaWriteReq))
// ecc
val ecc_read = Flipped(DecoupledIO(new TagReadReq))
val ecc_resp = Output(Vec(nWays, UInt(eccTagBits.W)))
val ecc_write = Flipped(DecoupledIO(new TagEccWriteReq))
})
def metaBits = 2 //freechips.rocketchip.tilelink.ClientMetadata.width
// TODO: reset is unnecessary?
val rst_cnt = RegInit(0.U(log2Up(nSets + 1).W))
val rst = rst_cnt < nSets.U
val rstVal = 0.U
val waddr = Mux(rst, rst_cnt, io.write.bits.idx)
val wdata = Mux(rst, rstVal, io.write.bits.tag)
val wmask = Mux(rst || (nWays == 1).B, (-1).asSInt, io.write.bits.way_en.asSInt).asBools
val write_idx = WireInit(0.U)
val write_way_en = WireInit(0.U(nWays.W))
when(io.tag_write.valid) {
write_idx := io.tag_write.bits.idx
write_way_en := io.tag_write.bits.way_en
}.elsewhen(io.meta_write.valid) {
write_idx := io.meta_write.bits.idx
write_way_en := io.meta_write.bits.way_en
}
val waddr = Mux(rst, rst_cnt, write_idx)
val wdata = Mux(rst, rstVal, Cat(io.meta_write.bits.meta.coh.asUInt, io.tag_write.bits.tag))
val wmask = Mux(rst || (nWays == 1).B, (-1).asSInt, write_way_en.asSInt).asBools
val rmask = Mux(rst || (nWays == 1).B, (-1).asSInt, io.read.bits.way_en.asSInt).asBools
when (rst) {
rst_cnt := rst_cnt + 1.U
}

val tag_array = Module(new SRAMTemplate(UInt(tagBits.W), set = nSets, way = nWays,
shouldReset = false, holdRead = false, singlePort = true))
val tag_array = Module(new SRAMTemplate(UInt((metaBits + tagBits).W), set = nSets, way = nWays,
shouldReset = true, holdRead = false, singlePort = true, useBitmask = true))

val ecc_array = TagEccParam.map {
case _ =>
Expand All @@ -74,11 +89,20 @@ class TagArray(implicit p: Parameters) extends AbstractTagArray {
ecc
}

val wen = rst || io.write.valid
val wen = rst || io.tag_write.valid || io.meta_write.valid
val write_bitmask = WireInit(0.U((metaBits + tagBits).W))
when(io.meta_write.valid && io.tag_write.valid) {
write_bitmask := Fill(metaBits + tagBits, 1.U(1.W))
}.elsewhen(io.meta_write.valid && !io.tag_write.valid) {
write_bitmask := Cat(Fill(metaBits, 1.U(1.W)), Fill(tagBits, 0.U(1.W)))
}.elsewhen(!io.meta_write.valid && io.tag_write.valid) {
write_bitmask := Cat(Fill(metaBits, 0.U(1.W)), Fill(tagBits, 1.U(1.W)))
}
tag_array.io.w.req.valid := wen
tag_array.io.w.req.bits.apply(
setIdx = waddr,
data = wdata,
bitmask = write_bitmask,
waymask = VecInit(wmask).asUInt
)

Expand All @@ -103,7 +127,8 @@ class TagArray(implicit p: Parameters) extends AbstractTagArray {
tag_array.io.r.req.valid := ren
tag_array.io.r.req.bits.apply(setIdx = io.read.bits.idx)
tag_array.clock := ClockGate(false.B, ren | wen, clock)
io.resp := tag_array.io.r.resp.data
io.tag_resp := tag_array.io.r.resp.data.map(r => r(tagBits - 1, 0))
io.meta_resp := VecInit(tag_array.io.r.resp.data.map(r => r(metaBits + tagBits - 1, tagBits).asTypeOf(new Meta)))
XSPerfAccumulate("part_tag_read_counter", tag_array.io.r.req.valid)

val ecc_ren = io.ecc_read.fire
Expand All @@ -117,7 +142,8 @@ class TagArray(implicit p: Parameters) extends AbstractTagArray {
io.ecc_resp := 0.U.asTypeOf(io.ecc_resp)
}

io.write.ready := !rst
io.tag_write.ready := !rst
io.meta_write.ready := !rst
io.read.ready := !wen
ecc_array match {
case Some(ecc) =>
Expand All @@ -129,11 +155,13 @@ class TagArray(implicit p: Parameters) extends AbstractTagArray {
}
}

class DuplicatedTagArray(readPorts: Int)(implicit p: Parameters) extends AbstractTagArray {
class DuplicatedTagArray(readPorts: Int, writePorts: Int)(implicit p: Parameters) extends AbstractTagArray {
val io = IO(new Bundle() {
val read = Vec(readPorts, Flipped(DecoupledIO(new TagReadReq)))
val resp = Output(Vec(readPorts, Vec(nWays, UInt(encTagBits.W))))
val write = Flipped(DecoupledIO(new TagWriteReq))
val tag_resp = Output(Vec(readPorts, Vec(nWays, UInt(encTagBits.W))))
val meta_resp = Output(Vec(readPorts, Vec(nWays, new Meta)))
val tag_write = Flipped(DecoupledIO(new TagWriteReq))
val meta_write = Vec(writePorts, Flipped(DecoupledIO(new CohMetaWriteReq)))
// customized cache op port
val cacheOp = Flipped(new L1CacheInnerOpIO)
val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo)))
Expand All @@ -150,26 +178,30 @@ class DuplicatedTagArray(readPorts: Int)(implicit p: Parameters) extends Abstrac
val tag_read_oh = WireInit(VecInit(Seq.fill(readPorts)(0.U(XLEN.W))))
for (i <- 0 until readPorts) {
// normal read / write
array(i).io.write.valid := io.write.valid
array(i).io.write.bits := io.write.bits
array(i).io.ecc_write.valid := io.write.valid
array(i).io.ecc_write.bits.idx := io.write.bits.idx
array(i).io.ecc_write.bits.way_en := io.write.bits.way_en
val ecc = getECCFromEncTag(cacheParams.tagCode.encode(io.write.bits.tag))
array(i).io.tag_write.valid := io.tag_write.valid
array(i).io.tag_write.bits := io.tag_write.bits
array(i).io.meta_write.valid := io.meta_write(0).valid
array(i).io.meta_write.bits := io.meta_write(0).bits
array(i).io.ecc_write.valid := io.tag_write.valid
array(i).io.ecc_write.bits.idx := io.tag_write.bits.idx
array(i).io.ecc_write.bits.way_en := io.tag_write.bits.way_en
val ecc = getECCFromEncTag(cacheParams.tagCode.encode(io.tag_write.bits.tag))
array(i).io.ecc_write.bits.ecc := ecc

array(i).io.read <> io.read(i)
array(i).io.ecc_read.valid := io.read(i).valid
array(i).io.ecc_read.bits := io.read(i).bits
io.resp(i) := (array(i).io.ecc_resp zip array(i).io.resp).map { case (e, r) => Cat(e, r) }
io.tag_resp(i) := (array(i).io.ecc_resp zip array(i).io.tag_resp).map { case (e, r) => Cat(e, r) }
io.meta_resp(i) := array(i).io.meta_resp
// extra ports for cache op
// array(i).io.ecc_write.valid := false.B
// array(i).io.ecc_write.bits := DontCare
io.read(i).ready := array(i).io.read.ready && array(i).io.ecc_read.ready
tag_read_oh(i) := PopCount(array(i).io.read.fire)
}
XSPerfAccumulate("tag_read_counter", tag_read_oh.reduce(_ + _))
io.write.ready := true.B
io.tag_write.ready := true.B
io.meta_write(0).ready := true.B

require(nWays <= 32)
io.cacheOp.resp.bits := DontCare
Expand Down Expand Up @@ -198,10 +230,10 @@ class DuplicatedTagArray(readPorts: Int)(implicit p: Parameters) extends Abstrac
}
wdata_dup_vec.zipWithIndex.map{ case(dupIdx, idx) =>
when(io.cacheOp_req_dup(dupIdx).valid && isWriteTag(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
array(idx).io.write.valid := true.B
array(idx).io.write.bits.idx := io.cacheOp.req.bits.index
array(idx).io.write.bits.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
array(idx).io.write.bits.tag := io.cacheOp.req.bits.write_tag_low
array(idx).io.tag_write.valid := true.B
array(idx).io.tag_write.bits.idx := io.cacheOp.req.bits.index
array(idx).io.tag_write.bits.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
array(idx).io.tag_write.bits.tag := io.cacheOp.req.bits.write_tag_low
cacheOpShouldResp := true.B
}
}
Expand All @@ -216,6 +248,6 @@ class DuplicatedTagArray(readPorts: Int)(implicit p: Parameters) extends Abstrac
}

io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
io.cacheOp.resp.bits.read_tag_low := Mux(io.cacheOp.resp.valid, array(0).io.resp(RegNext(io.cacheOp.req.bits.wayNum)), 0.U)
io.cacheOp.resp.bits.read_tag_low := Mux(io.cacheOp.resp.valid, array(0).io.tag_resp(RegNext(io.cacheOp.req.bits.wayNum)), 0.U)
io.cacheOp.resp.bits.read_tag_ecc := Mux(io.cacheOp.resp.valid, array(0).io.ecc_resp(RegNext(io.cacheOp.req.bits.wayNum)), 0.U)
}

0 comments on commit 874d0c3

Please sign in to comment.