diff --git a/coupledL2 b/coupledL2 index 3357550d9e..77d854b729 160000 --- a/coupledL2 +++ b/coupledL2 @@ -1 +1 @@ -Subproject commit 3357550d9e405c2ce0c2077ad029a07e4489b861 +Subproject commit 77d854b729dd4ea335ac81a4b040cf73690f7b52 diff --git a/src/main/scala/xiangshan/backend/Backend.scala b/src/main/scala/xiangshan/backend/Backend.scala index afe9389619..9f44ee3236 100644 --- a/src/main/scala/xiangshan/backend/Backend.scala +++ b/src/main/scala/xiangshan/backend/Backend.scala @@ -12,6 +12,13 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Robert. M. Tomasulo. "[An efficient algorithm for exploiting multiple arithmetic units.] +* (https://doi.org/10.1147/rd.111.0025)" IBM Journal of research and Development 11.1 (1967): 25-33. ***************************************************************************************/ package xiangshan.backend diff --git a/src/main/scala/xiangshan/backend/fu/Multiplier.scala b/src/main/scala/xiangshan/backend/fu/Multiplier.scala index e05f924864..4b78704787 100644 --- a/src/main/scala/xiangshan/backend/fu/Multiplier.scala +++ b/src/main/scala/xiangshan/backend/fu/Multiplier.scala @@ -12,6 +12,15 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Andrew D. Booth. "[A signed binary multiplication technique.](https://doi.org/10.1093/qjmam/4.2.236)" The +* Quarterly Journal of Mechanics and Applied Mathematics 4.2 (1951): 236-240. +* [2] Christopher. S. Wallace. "[A suggestion for a fast multiplier.](https://doi.org/10.1109/PGEC.1964.263830)" IEEE +* Transactions on electronic Computers 1 (1964): 14-17. ***************************************************************************************/ package xiangshan.backend.fu diff --git a/src/main/scala/xiangshan/backend/fu/SRT16Divider.scala b/src/main/scala/xiangshan/backend/fu/SRT16Divider.scala index fddb4d7916..948bcc9831 100644 --- a/src/main/scala/xiangshan/backend/fu/SRT16Divider.scala +++ b/src/main/scala/xiangshan/backend/fu/SRT16Divider.scala @@ -13,6 +13,13 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Elisardo Antelo, Tomas Lang, Paolo Montuschi, and Alberto Nannarelli. "[Digit-recurrence dividers with reduced +* logical depth.](https://doi.org/10.1109/TC.2005.115)" IEEE Transactions on Computers 54.7 (2005): 837-851. ***************************************************************************************/ // This file contains components originally written by Yifei He, see diff --git a/src/main/scala/xiangshan/backend/rename/CompressUnit.scala b/src/main/scala/xiangshan/backend/rename/CompressUnit.scala index 5099fe0fc0..2130058baa 100644 --- a/src/main/scala/xiangshan/backend/rename/CompressUnit.scala +++ b/src/main/scala/xiangshan/backend/rename/CompressUnit.scala @@ -1,3 +1,28 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* Copyright (c) 2020-2021 Peng Cheng Laboratory +* +* XiangShan is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Fernando Latorre, Grigorios Magklis, Jose González, Pedro Chaparro, and Antonio González. "[Crob: implementing a +* large instruction window through compression.](https://doi.org/10.1007/978-3-642-19448-1_7)" Transactions on high- +* performance embedded architectures and compilers III. Berlin, Heidelberg: Springer Berlin Heidelberg, 2011. 115-134. +***************************************************************************************/ + package xiangshan.backend.rename import org.chipsalliance.cde.config.Parameters diff --git a/src/main/scala/xiangshan/backend/rob/Rob.scala b/src/main/scala/xiangshan/backend/rob/Rob.scala index 1b1d3e7f87..0e14d3aca8 100644 --- a/src/main/scala/xiangshan/backend/rob/Rob.scala +++ b/src/main/scala/xiangshan/backend/rob/Rob.scala @@ -12,6 +12,14 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] James E. Smith, and Andrew R. Pleszkun. "[Implementation of precise interrupts in pipelined processors.] +* (https://dl.acm.org/doi/10.5555/327010.327125)" Proceedings of the 12th annual international symposium on Computer +* architecture. 1985. ***************************************************************************************/ package xiangshan.backend.rob diff --git a/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala b/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala index bb78b1b985..b74f7fe4a7 100644 --- a/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala +++ b/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala @@ -13,6 +13,14 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Gurindar S. Sohi, and Manoj Franklin "[High-bandwidth data memory systems for superscalar processors.] +* (https://doi.org/10.1145/106973.106980)" Proceedings of the fourth international conference on Architectural support +* for programming languages and operating systems. 1991. ***************************************************************************************/ package xiangshan.cache diff --git a/src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala b/src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala index f943d572a0..cf66b343a0 100644 --- a/src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala +++ b/src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala @@ -12,6 +12,14 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] David Kroft. "[Lockup-free instruction fetch/prefetch cache organization.] +* (https://dl.acm.org/doi/10.5555/800052.801868)" Proceedings of the 8th annual symposium on Computer Architecture. +* 1981. ***************************************************************************************/ package xiangshan.cache diff --git a/src/main/scala/xiangshan/cache/mmu/TLB.scala b/src/main/scala/xiangshan/cache/mmu/TLB.scala index 4707584425..b9e4cf7fd7 100644 --- a/src/main/scala/xiangshan/cache/mmu/TLB.scala +++ b/src/main/scala/xiangshan/cache/mmu/TLB.scala @@ -12,6 +12,14 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Binh Pham, Viswanathan Vaidyanathan, Aamer Jaleel, and Abhishek Bhattacharjee. +* "[Colt: Coalesced large-reach tlbs.](https://doi.org/10.1109/MICRO.2012.32)" 2012 45th Annual IEEE/ACM International +* Symposium on Microarchitecture. IEEE, 2012. ***************************************************************************************/ package xiangshan.cache.mmu diff --git a/src/main/scala/xiangshan/frontend/Frontend.scala b/src/main/scala/xiangshan/frontend/Frontend.scala index c91839cf9f..e486d6515b 100644 --- a/src/main/scala/xiangshan/frontend/Frontend.scala +++ b/src/main/scala/xiangshan/frontend/Frontend.scala @@ -12,6 +12,19 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Alex Ramirez, Oliverio J. Santana, Josep L. Larriba-Pey, and Mateo Valero. "[Fetching instruction streams.] +* (https://doi.org/10.1109/MICRO.2002.1176264)" 35th Annual IEEE/ACM International Symposium on Microarchitecture, +* 2002.(MICRO-35). Proceedings. IEEE, 2002. +* [2] Yasuo Ishii, Jaekyu Lee, Krishnendra Nathella, and Dam Sunwoo. "[Rebasing instruction prefetching: An industry +* perspective.](https://doi.org/10.1109/LCA.2020.3035068)" IEEE Computer Architecture Letters 19.2 (2020): 147-150. +* [3] Yasuo Ishii, Jaekyu Lee, Krishnendra Nathella, and Dam Sunwoo. "[Re-establishing fetch-directed instruction +* prefetching: An industry perspective.](https://doi.org/10.1109/ISPASS51385.2021.00034)" 2021 IEEE International +* Symposium on Performance Analysis of Systems and Software (ISPASS). IEEE, 2021. ***************************************************************************************/ package xiangshan.frontend diff --git a/src/main/scala/xiangshan/frontend/ITTAGE.scala b/src/main/scala/xiangshan/frontend/ITTAGE.scala index 8010c97901..b81b4ff53d 100644 --- a/src/main/scala/xiangshan/frontend/ITTAGE.scala +++ b/src/main/scala/xiangshan/frontend/ITTAGE.scala @@ -12,6 +12,13 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. + * + * + * Acknowledgement + * + * This implementation is inspired by several key papers: + * [1] André Seznec. "[A 64-Kbytes ITTAGE indirect branch predictor.](https://inria.hal.science/hal-00639041)" JWAC-2: + * Championship Branch Prediction. 2011. ***************************************************************************************/ package xiangshan.frontend diff --git a/src/main/scala/xiangshan/frontend/NewFtq.scala b/src/main/scala/xiangshan/frontend/NewFtq.scala index 088ca01725..06ad64bc62 100644 --- a/src/main/scala/xiangshan/frontend/NewFtq.scala +++ b/src/main/scala/xiangshan/frontend/NewFtq.scala @@ -12,6 +12,15 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Glenn Reinman, Todd Austin, and Brad Calder. "[A scalable front-end architecture for fast instruction delivery.] +* (https://doi.org/10.1109/ISCA.1999.765954)" Proceedings of the 26th International Symposium on Computer Architecture. +* IEEE, 1999. +* ***************************************************************************************/ package xiangshan.frontend diff --git a/src/main/scala/xiangshan/frontend/SC.scala b/src/main/scala/xiangshan/frontend/SC.scala index 1ba2a10976..258ec85bb8 100644 --- a/src/main/scala/xiangshan/frontend/SC.scala +++ b/src/main/scala/xiangshan/frontend/SC.scala @@ -12,6 +12,15 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] André Seznec. "[Tage-sc-l branch predictors.](https://inria.hal.science/hal-01086920)" JILP-Championship Branch +* Prediction. 2014. +* [2] André Seznec. "[Tage-sc-l branch predictors again.](https://inria.hal.science/hal-01354253)" 5th JILP Workshop on +* Computer Architecture Competitions (JWAC-5): Championship Branch Prediction (CBP-5). 2016. ***************************************************************************************/ package xiangshan.frontend diff --git a/src/main/scala/xiangshan/frontend/Tage.scala b/src/main/scala/xiangshan/frontend/Tage.scala index 4d6f8a146b..3e6b96005b 100644 --- a/src/main/scala/xiangshan/frontend/Tage.scala +++ b/src/main/scala/xiangshan/frontend/Tage.scala @@ -12,6 +12,18 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Pierre Michaud. "[A PPM-like, tag-based branch predictor.](https://inria.hal.science/hal-03406188)" The Journal +* of Instruction-Level Parallelism 7 (2005): 10. +* [2] André Seznec, and Pierre Michaud. "[A case for (partially) tagged geometric history length branch prediction.] +* (https://inria.hal.science/hal-03408381)" The Journal of Instruction-Level Parallelism 8 (2006): 23. +* [3] André Seznec. "[A 256 kbits l-tage branch predictor.](http://www.irisa.fr/caps/people/seznec/L-TAGE.pdf)" Journal +* of Instruction-Level Parallelism (JILP) Special Issue: The Second Championship Branch Prediction Competition (CBP-2) +* 9 (2007): 1-6. ***************************************************************************************/ package xiangshan.frontend diff --git a/src/main/scala/xiangshan/frontend/icache/ICache.scala b/src/main/scala/xiangshan/frontend/icache/ICache.scala index 603d53e384..9c37076f40 100644 --- a/src/main/scala/xiangshan/frontend/icache/ICache.scala +++ b/src/main/scala/xiangshan/frontend/icache/ICache.scala @@ -13,6 +13,14 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Glenn Reinman, Brad Calder, and Todd Austin. "[Fetch directed instruction prefetching.] +* (https://doi.org/10.1109/MICRO.1999.809439)" MICRO-32. Proceedings of the 32nd Annual ACM/IEEE International +* Symposium on Microarchitecture. IEEE, 1999. ***************************************************************************************/ package xiangshan.frontend.icache diff --git a/src/main/scala/xiangshan/frontend/newRAS.scala b/src/main/scala/xiangshan/frontend/newRAS.scala index 4d3a0f74fc..318cc40e38 100644 --- a/src/main/scala/xiangshan/frontend/newRAS.scala +++ b/src/main/scala/xiangshan/frontend/newRAS.scala @@ -13,6 +13,17 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Kevin Skadron, Pritpal S. Ahuja, Margaret Martonosi, and Douglas W. Clark. "[Improving prediction for procedure +* returns with return-address-stack repair mechanisms.](https://doi.org/10.1109/MICRO.1998.742787)" Proceedings. 31st +* Annual ACM/IEEE International Symposium on Microarchitecture. IEEE, 1998. +* [2] Tan Hongze, and Wang Jian. "[A Return Address Predictor Based on Persistent Stack.] +* (https://crad.ict.ac.cn/en/article/doi/10.7544/issn1000-1239.202111274)" Journal of Computer Research and Development +* 60.6 (2023): 1337-1345. ***************************************************************************************/ package xiangshan.frontend diff --git a/src/main/scala/xiangshan/mem/mdp/StoreSet.scala b/src/main/scala/xiangshan/mem/mdp/StoreSet.scala index a2022dff87..13b42b1817 100644 --- a/src/main/scala/xiangshan/mem/mdp/StoreSet.scala +++ b/src/main/scala/xiangshan/mem/mdp/StoreSet.scala @@ -12,6 +12,13 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] George Z. Chrysos, and Joel S. Emer. "[Memory dependence prediction using store sets.] +* (https://doi.org/10.1145/279361.279378)" ACM SIGARCH Computer Architecture News 26.3 (1998): 142-153. ***************************************************************************************/ package xiangshan.mem.mdp diff --git a/src/main/scala/xiangshan/mem/mdp/WaitTable.scala b/src/main/scala/xiangshan/mem/mdp/WaitTable.scala index dd4adc7163..5a468b1fa5 100644 --- a/src/main/scala/xiangshan/mem/mdp/WaitTable.scala +++ b/src/main/scala/xiangshan/mem/mdp/WaitTable.scala @@ -12,6 +12,13 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Richard Kessler. "[The alpha 21264 microprocessor.](https://doi.org/10.1109/40.755465)" IEEE micro 19.2 (1999): +* 24-36. ***************************************************************************************/ package xiangshan.mem.mdp diff --git a/src/main/scala/xiangshan/mem/prefetch/FDP.scala b/src/main/scala/xiangshan/mem/prefetch/FDP.scala index a27939ef1e..0744209ba4 100644 --- a/src/main/scala/xiangshan/mem/prefetch/FDP.scala +++ b/src/main/scala/xiangshan/mem/prefetch/FDP.scala @@ -12,6 +12,14 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * * See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Santhosh Srinath, Onur Mutlu, Hyesoon Kim, and Yale N. Patt "[Feedback directed prefetching: Improving the +* performance and bandwidth-efficiency of hardware prefetchers.](https://doi.org/10.1109/HPCA.2007.346185)" 2007 IEEE +* 13th International Symposium on High Performance Computer Architecture. IEEE, 2007. ***************************************************************************************/ package xiangshan.mem.prefetch diff --git a/src/main/scala/xiangshan/mem/prefetch/L1StridePrefetcher.scala b/src/main/scala/xiangshan/mem/prefetch/L1StridePrefetcher.scala index 62537fa74a..be1e30849d 100644 --- a/src/main/scala/xiangshan/mem/prefetch/L1StridePrefetcher.scala +++ b/src/main/scala/xiangshan/mem/prefetch/L1StridePrefetcher.scala @@ -1,3 +1,27 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* Copyright (c) 2020-2021 Peng Cheng Laboratory +* +* XiangShan is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Jean-Loup Baer, and Tien-Fu Chen. "[An effective on-chip preloading scheme to reduce data access penalty.] +* (https://doi.org/10.1145/125826.125932)" Proceedings of the 1991 ACM/IEEE conference on Supercomputing. 1991. +***************************************************************************************/ + package xiangshan.mem.prefetch import org.chipsalliance.cde.config.Parameters diff --git a/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala b/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala index c79d0dfd6e..d503729902 100644 --- a/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala +++ b/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala @@ -1,3 +1,28 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* Copyright (c) 2020-2021 Peng Cheng Laboratory +* +* XiangShan is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +* +* +* Acknowledgement +* +* This implementation is inspired by several key papers: +* [1] Stephen Somogyi, Thomas F. Wenisch, Anastassia Ailamaki, Babak Falsafi and Andreas Moshovos. +* "[Spatial memory streaming.](https://doi.org/10.1109/ISCA.2006.38)" 33rd International Symposium on Computer +* Architecture. 2006. +***************************************************************************************/ + package xiangshan.mem.prefetch import org.chipsalliance.cde.config.Parameters