From d6ef312dfa0ea12b97a6e14fd9ed23b1cacb35e6 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 26 Jul 2023 15:49:03 +0800
Subject: [PATCH 001/109] add SqureRoot

---
 arithmetic/src/square/SquareRootIO.scala      |  25 +++
 arithmetic/src/square/square.scala            | 144 ++++++++++++++++++
 .../tests/src/square/SquareRootTests.scala    |  43 ++++++
 3 files changed, 212 insertions(+)
 create mode 100644 arithmetic/src/square/SquareRootIO.scala
 create mode 100644 arithmetic/src/square/square.scala
 create mode 100644 arithmetic/tests/src/square/SquareRootTests.scala

diff --git a/arithmetic/src/square/SquareRootIO.scala b/arithmetic/src/square/SquareRootIO.scala
new file mode 100644
index 0000000..3edc11d
--- /dev/null
+++ b/arithmetic/src/square/SquareRootIO.scala
@@ -0,0 +1,25 @@
+package square
+
+import chisel3._
+import chisel3.util._
+
+//class OTFInput(qWidth: Int, ohWidth: Int) extends Bundle {
+//  val resultOrigin = UInt(qWidth.W)
+//  val resultMinusOne = UInt(qWidth.W)
+//  val selectedQuotientOH = UInt(ohWidth.W)
+//}
+//class OTFOutput(qWidth: Int) extends Bundle {
+//  val resultOrigin = UInt(qWidth.W)
+//  val resultMinusOne = UInt(qWidth.W)
+//}
+
+
+class SquareRootInput(inputWidth: Int, outputWidth: Int) extends Bundle{
+  val operand = UInt(inputWidth.W)
+  val counter = UInt(log2Ceil(outputWidth).W)
+}
+
+/** 0.1**** = 0.resultOrigin */
+class SquareRootOutput(outputWidth: Int) extends Bundle{
+  val result = UInt((outputWidth).W)
+}
diff --git a/arithmetic/src/square/square.scala b/arithmetic/src/square/square.scala
new file mode 100644
index 0000000..ff32381
--- /dev/null
+++ b/arithmetic/src/square/square.scala
@@ -0,0 +1,144 @@
+package square
+
+import chisel3.{util, _}
+import chisel3.util._
+import division.srt.SRTTable
+import division.srt.srt4.{OTF, QDS}
+import utils.leftShift
+
+/** Squre
+  *
+  * @param outputWidth decide width for result , true result is .xxxxxx
+  */
+class SquareRoot(
+  radixLog2:   Int,
+  a:           Int,
+  inputWidth:  Int,
+  outputWidth: Int)
+    extends Module {
+  val input = IO(Flipped(DecoupledIO(new SquareRootInput(inputWidth: Int, outputWidth: Int))))
+  val output = IO(DecoupledIO(new SquareRootOutput(outputWidth)))
+
+  /** width for partial result and csa */
+  val wlen = inputWidth + 2
+
+  /** todo: verify it, switch to csa */
+  val resultZero = input.bits.operand - 1.U
+
+  /** W[j] = xx.xxxxxxxx
+    *
+    * width = 2 + inputwidth
+    */
+  val partialResultCarryNext, partialResultSumNext = Wire(UInt(wlen.W))
+
+  /** S[j] = .xxxxxxxx
+    *
+    * point position depends on j
+    *
+    * grow from LSB
+    */
+  val resultOriginNext, resultMinusOneNext = Wire(UInt((outputWidth).W))
+  val counterNext = Wire(UInt(log2Ceil(outputWidth).W))
+
+  // Control
+  // sign of Cycle, true -> (counter === 0.U)
+  val isLastCycle, enable: Bool = Wire(Bool())
+  val occupiedNext = Wire(Bool())
+  val occupied = RegNext(occupiedNext, false.B)
+  occupiedNext := input.fire || (!isLastCycle && occupied)
+
+  // State
+  // because we need a CSA to minimize the critical path
+  val partialResultCarry = RegEnable(partialResultCarryNext, 0.U(wlen.W), enable)
+  val partialResultSum = RegEnable(partialResultSumNext, 0.U(wlen.W), enable)
+  val resultOrigin = RegEnable(resultOriginNext, 0.U((outputWidth).W), enable)
+  val resultMinusOne = RegEnable(resultMinusOneNext, 0.U((outputWidth).W), enable)
+  val counter = RegEnable(counterNext, 0.U(log2Ceil(outputWidth).W), enable)
+
+  //  Datapath
+  //  according two adders
+  /** todo :  store counter */
+  isLastCycle := counter === 5.U
+  output.valid := occupied && isLastCycle
+  input.ready := !occupied
+  enable := input.fire || !isLastCycle
+
+  /** rW[j]
+    *
+    * xxxx.xxxxxxxx
+    */
+  val shiftSum, shiftCarry = Wire(UInt((inputWidth + 4).W))
+  shiftSum := partialResultSum << 2
+  shiftCarry := partialResultCarry << 2
+
+  /** todo parameterize it */
+  val rtzYWidth = 7
+  val rtzSWidth = 4
+  val ohWidth = 5
+
+  val firstIter = counter === 0.U
+
+  /** S[j]
+    *
+    * x.xxxxxxxx
+    *
+    * width = outwidth + 1
+    */
+  val resultOriginRestore = (resultOrigin << (outputWidth.U - (counter << 1).asUInt))(outputWidth, 0)
+
+  /** todo: opt it with p342 */
+  val resultForQDS = Mux(
+    firstIter,
+    "b101".U,
+    Mux(resultOriginRestore(outputWidth), "b111".U, resultOriginRestore(outputWidth - 2, outputWidth - 4))
+  )
+
+  /** todo param it */
+  val tables: Seq[Seq[Int]] = SRTTable(1 << radixLog2, a, 4, 4).tablesToQDS
+
+  /** todo make sure resultOrigin has setup right? */
+  val selectedQuotientOH: UInt =
+    QDS(rtzYWidth, ohWidth, rtzSWidth - 1, tables, a)(
+      shiftSum.head(rtzYWidth),
+      shiftCarry.head(rtzYWidth),
+      resultForQDS //.1********* -> 1*** -> ***
+    )
+
+  // On-The-Fly conversion
+  val otf = OTF(radixLog2, outputWidth + 1, ohWidth, a)(resultOrigin, resultMinusOne, selectedQuotientOH)
+
+  val formationForIter = Mux1H(
+    Seq(
+      selectedQuotientOH(0) -> (resultMinusOne << 4 | "b1100".U),
+      selectedQuotientOH(1) -> (resultMinusOne << 3 | "b111".U),
+      selectedQuotientOH(2) -> 0.U,
+      selectedQuotientOH(3) -> (~resultOrigin << 3 | "b111".U),
+      selectedQuotientOH(4) -> (~resultOrigin << 4 | "b1100".U)
+    )
+  )
+
+  /** csa need width : inputwidth + 2 */
+  val formationFinal = Wire(UInt((inputWidth + 3).W))
+  formationFinal := formationForIter << (inputWidth - 2) >> (counter << 1)
+
+  val csa: Vec[UInt] = addition.csa.c32(
+    VecInit(
+      shiftSum(inputWidth + 1, 0),
+      shiftCarry(inputWidth + 1, 0),
+      formationFinal(inputWidth + 1, 0)
+    )
+  )
+
+  val remainderFinal = partialResultSumNext + partialResultCarryNext
+  val needCorrect: Bool = remainderFinal(outputWidth+1).asBool
+
+  /** init S[0] = 1 */
+  resultOriginNext := Mux(input.fire, 1.U, otf(0))
+  resultMinusOneNext := Mux(input.fire, 0.U, otf(1))
+  partialResultSumNext := Mux(input.fire, "b1110110111".U, csa(1))
+  partialResultCarryNext := Mux(input.fire, 0.U, csa(0) << 1)
+  counterNext := Mux(input.fire, 0.U, counter + 1.U)
+
+  output.bits.result := Mux(needCorrect, resultOrigin, resultMinusOne)
+
+}
diff --git a/arithmetic/tests/src/square/SquareRootTests.scala b/arithmetic/tests/src/square/SquareRootTests.scala
new file mode 100644
index 0000000..b908014
--- /dev/null
+++ b/arithmetic/tests/src/square/SquareRootTests.scala
@@ -0,0 +1,43 @@
+package square
+
+import chisel3._
+import chiseltest._
+import utest._
+import scala.util.{Random}
+
+object SquareRootTest extends TestSuite with ChiselUtestTester {
+  def tests: Tests = Tests {
+    test("Square Root should pass") {
+      def testcase(): Unit = {
+        // parameters
+
+        // test
+        testCircuit(
+          new SquareRoot(2, 2, 8, 10),
+          Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
+        ) { dut: SquareRoot =>
+          dut.clock.setTimeout(0)
+          dut.input.valid.poke(true.B)
+          dut.input.bits.operand.poke(0.U)
+          dut.input.bits.counter.poke(5.U)
+          println("ready = %d".format(dut.input.ready.peek().litValue))
+          dut.clock.step()
+          dut.input.valid.poke(false.B)
+          var flag = false
+          for (i <- 0 to 1000) {
+            if (dut.output.valid.peek().litValue == 1) {
+              flag = true
+
+            } else
+              dut.clock.step()
+
+          }
+          utest.assert(flag)
+        }
+      }
+
+      testcase()
+
+    }
+  }
+}

From 78613f9997c302f8d315edde81d6d63318b2f887 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 26 Jul 2023 17:42:35 +0800
Subject: [PATCH 002/109] fix for 24bits input

---
 arithmetic/src/square/square.scala                | 11 +++++------
 arithmetic/tests/src/square/SquareRootTests.scala | 10 ++++++++--
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/arithmetic/src/square/square.scala b/arithmetic/src/square/square.scala
index ff32381..1465f37 100644
--- a/arithmetic/src/square/square.scala
+++ b/arithmetic/src/square/square.scala
@@ -22,9 +22,6 @@ class SquareRoot(
   /** width for partial result and csa */
   val wlen = inputWidth + 2
 
-  /** todo: verify it, switch to csa */
-  val resultZero = input.bits.operand - 1.U
-
   /** W[j] = xx.xxxxxxxx
     *
     * width = 2 + inputwidth
@@ -58,7 +55,7 @@ class SquareRoot(
   //  Datapath
   //  according two adders
   /** todo :  store counter */
-  isLastCycle := counter === 5.U
+  isLastCycle := counter === (outputWidth/2).U
   output.valid := occupied && isLastCycle
   input.ready := !occupied
   enable := input.fire || !isLastCycle
@@ -130,12 +127,14 @@ class SquareRoot(
   )
 
   val remainderFinal = partialResultSumNext + partialResultCarryNext
-  val needCorrect: Bool = remainderFinal(outputWidth+1).asBool
+  val needCorrect: Bool = remainderFinal(outputWidth-1).asBool
+
+  val initSum = Cat("b11".U, input.bits.operand)
 
   /** init S[0] = 1 */
   resultOriginNext := Mux(input.fire, 1.U, otf(0))
   resultMinusOneNext := Mux(input.fire, 0.U, otf(1))
-  partialResultSumNext := Mux(input.fire, "b1110110111".U, csa(1))
+  partialResultSumNext := Mux(input.fire, initSum, csa(1))
   partialResultCarryNext := Mux(input.fire, 0.U, csa(0) << 1)
   counterNext := Mux(input.fire, 0.U, counter + 1.U)
 
diff --git a/arithmetic/tests/src/square/SquareRootTests.scala b/arithmetic/tests/src/square/SquareRootTests.scala
index b908014..2e644ec 100644
--- a/arithmetic/tests/src/square/SquareRootTests.scala
+++ b/arithmetic/tests/src/square/SquareRootTests.scala
@@ -4,21 +4,27 @@ import chisel3._
 import chiseltest._
 import utest._
 import scala.util.{Random}
+import scala.math._
 
 object SquareRootTest extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
     test("Square Root should pass") {
       def testcase(): Unit = {
         // parameters
+        val oprand: Double = 0.71484375
+        val x = sqrt(oprand)
+        val xstring = java.lang.Long.toBinaryString(java.lang.Double.doubleToRawLongBits(x))
+        println(xstring)
+        011000011100011010100100110100110110011100011110101
 
         // test
         testCircuit(
-          new SquareRoot(2, 2, 8, 10),
+          new SquareRoot(2, 2, 24, 26),
           Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
         ) { dut: SquareRoot =>
           dut.clock.setTimeout(0)
           dut.input.valid.poke(true.B)
-          dut.input.bits.operand.poke(0.U)
+          dut.input.bits.operand.poke("b101101110000000000000000".U)
           dut.input.bits.counter.poke(5.U)
           println("ready = %d".format(dut.input.ready.peek().litValue))
           dut.clock.step()

From aad7b8fa25c92610a6f3f4326df1e7104e5dba53 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 31 Jul 2023 11:31:09 +0800
Subject: [PATCH 003/109] example case passed

---
 arithmetic/src/square/square.scala            | 11 ++++++----
 .../tests/src/square/SquareRootTests.scala    | 22 +++++++++++++------
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/arithmetic/src/square/square.scala b/arithmetic/src/square/square.scala
index 1465f37..fe3b358 100644
--- a/arithmetic/src/square/square.scala
+++ b/arithmetic/src/square/square.scala
@@ -7,6 +7,9 @@ import division.srt.srt4.{OTF, QDS}
 import utils.leftShift
 
 /** Squre
+  *
+  * oprand > 1/2 , =0.1xxxxx, input.oprand = 1xxxx
+  * result = 0.1xxxxx, output.result = 1xxxxx
   *
   * @param outputWidth decide width for result , true result is .xxxxxx
   */
@@ -54,7 +57,7 @@ class SquareRoot(
 
   //  Datapath
   //  according two adders
-  /** todo :  store counter */
+  /** todo :  later store counter */
   isLastCycle := counter === (outputWidth/2).U
   output.valid := occupied && isLastCycle
   input.ready := !occupied
@@ -68,7 +71,7 @@ class SquareRoot(
   shiftSum := partialResultSum << 2
   shiftCarry := partialResultCarry << 2
 
-  /** todo parameterize it */
+  /** todo later parameterize it */
   val rtzYWidth = 7
   val rtzSWidth = 4
   val ohWidth = 5
@@ -83,14 +86,14 @@ class SquareRoot(
     */
   val resultOriginRestore = (resultOrigin << (outputWidth.U - (counter << 1).asUInt))(outputWidth, 0)
 
-  /** todo: opt it with p342 */
+  /** todo: later opt it with p342 */
   val resultForQDS = Mux(
     firstIter,
     "b101".U,
     Mux(resultOriginRestore(outputWidth), "b111".U, resultOriginRestore(outputWidth - 2, outputWidth - 4))
   )
 
-  /** todo param it */
+  /** todo later param it */
   val tables: Seq[Seq[Int]] = SRTTable(1 << radixLog2, a, 4, 4).tablesToQDS
 
   /** todo make sure resultOrigin has setup right? */
diff --git a/arithmetic/tests/src/square/SquareRootTests.scala b/arithmetic/tests/src/square/SquareRootTests.scala
index 2e644ec..a8c197d 100644
--- a/arithmetic/tests/src/square/SquareRootTests.scala
+++ b/arithmetic/tests/src/square/SquareRootTests.scala
@@ -11,11 +11,17 @@ object SquareRootTest extends TestSuite with ChiselUtestTester {
     test("Square Root should pass") {
       def testcase(): Unit = {
         // parameters
-        val oprand: Double = 0.71484375
+        val oprand: Double = 0.75
+        val inputOprandRawString = java.lang.Double.doubleToLongBits(oprand).toBinaryString
+        val inputOprandString =
+          "b1" + (Seq.fill(64 - inputOprandRawString.length)("0").mkString("") + inputOprandRawString).substring(12, 35)
+        println("inputString = " + inputOprandString)
+
         val x = sqrt(oprand)
-        val xstring = java.lang.Long.toBinaryString(java.lang.Double.doubleToRawLongBits(x))
-        println(xstring)
-        011000011100011010100100110100110110011100011110101
+        println("x(double) = " + x.toString)
+        val xstring = java.lang.Double.doubleToLongBits(x).toBinaryString
+        // 0.xxxxxx, hiden 1 + 23bits
+        val resultExpect = "1" + (Seq.fill(64 - xstring.length)("0").mkString("") + xstring).substring(12, 35)
 
         // test
         testCircuit(
@@ -24,15 +30,17 @@ object SquareRootTest extends TestSuite with ChiselUtestTester {
         ) { dut: SquareRoot =>
           dut.clock.setTimeout(0)
           dut.input.valid.poke(true.B)
-          dut.input.bits.operand.poke("b101101110000000000000000".U)
+          dut.input.bits.operand.poke(inputOprandString.U)
           dut.input.bits.counter.poke(5.U)
-          println("ready = %d".format(dut.input.ready.peek().litValue))
           dut.clock.step()
           dut.input.valid.poke(false.B)
           var flag = false
-          for (i <- 0 to 1000) {
+          for (i <- 0 to 1000 if !flag) {
             if (dut.output.valid.peek().litValue == 1) {
               flag = true
+              println("result_expect = " + resultExpect)
+              println("result_actual = " + dut.output.bits.result.peek().litValue.toString(2).substring(0, 26))
+              utest.assert(dut.output.bits.result.peek().litValue.toString(2).substring(0, 24) == resultExpect)
 
             } else
               dut.clock.step()

From d5a0fb7e08174cf2c5ace9f015e1814cbdee3e99 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 31 Jul 2023 14:55:10 +0800
Subject: [PATCH 004/109] sqrt for FP32 done

---
 .../square.scala => sqrt/SquareRoot.scala}    | 72 ++++++++++---------
 arithmetic/src/sqrt/SquareRootIO.scala        | 14 ++++
 arithmetic/src/square/SquareRootIO.scala      | 25 -------
 .../tests/src/sqrt/SquareRootTests.scala      | 64 +++++++++++++++++
 .../tests/src/square/SquareRootTests.scala    | 57 ---------------
 5 files changed, 118 insertions(+), 114 deletions(-)
 rename arithmetic/src/{square/square.scala => sqrt/SquareRoot.scala} (70%)
 create mode 100644 arithmetic/src/sqrt/SquareRootIO.scala
 delete mode 100644 arithmetic/src/square/SquareRootIO.scala
 create mode 100644 arithmetic/tests/src/sqrt/SquareRootTests.scala
 delete mode 100644 arithmetic/tests/src/square/SquareRootTests.scala

diff --git a/arithmetic/src/square/square.scala b/arithmetic/src/sqrt/SquareRoot.scala
similarity index 70%
rename from arithmetic/src/square/square.scala
rename to arithmetic/src/sqrt/SquareRoot.scala
index fe3b358..39661bd 100644
--- a/arithmetic/src/square/square.scala
+++ b/arithmetic/src/sqrt/SquareRoot.scala
@@ -1,4 +1,4 @@
-package square
+package sqrt
 
 import chisel3.{util, _}
 import chisel3.util._
@@ -6,12 +6,15 @@ import division.srt.SRTTable
 import division.srt.srt4.{OTF, QDS}
 import utils.leftShift
 
-/** Squre
+/** SquareRoot
   *
-  * oprand > 1/2 , =0.1xxxxx, input.oprand = 1xxxx
-  * result = 0.1xxxxx, output.result = 1xxxxx
+  * {{{
+  * oprand = 0.1xxxxx > 1/2 , input.bits.oprand  = 1xxxx
+  * result = 0.1xxxxx > 1/2 , output.bits.result = 1xxxxx
+  * }}}
   *
-  * @param outputWidth decide width for result , true result is .xxxxxx
+  *
+  * @param outputWidth decide width for result , true result is .xxxxxx, need to be inputwidth + 2
   */
 class SquareRoot(
   radixLog2:   Int,
@@ -30,13 +33,12 @@ class SquareRoot(
     * width = 2 + inputwidth
     */
   val partialResultCarryNext, partialResultSumNext = Wire(UInt(wlen.W))
-
   /** S[j] = .xxxxxxxx
     *
-    * point position depends on j
+    * effective bits number depends on counter, 2n+1
     *
-    * grow from LSB
-    */
+    * effective length grows from LSB and depends on j
+    * */
   val resultOriginNext, resultMinusOneNext = Wire(UInt((outputWidth).W))
   val counterNext = Wire(UInt(log2Ceil(outputWidth).W))
 
@@ -46,29 +48,30 @@ class SquareRoot(
   val occupiedNext = Wire(Bool())
   val occupied = RegNext(occupiedNext, false.B)
   occupiedNext := input.fire || (!isLastCycle && occupied)
-
-  // State
-  // because we need a CSA to minimize the critical path
-  val partialResultCarry = RegEnable(partialResultCarryNext, 0.U(wlen.W), enable)
-  val partialResultSum = RegEnable(partialResultSumNext, 0.U(wlen.W), enable)
-  val resultOrigin = RegEnable(resultOriginNext, 0.U((outputWidth).W), enable)
-  val resultMinusOne = RegEnable(resultMinusOneNext, 0.U((outputWidth).W), enable)
   val counter = RegEnable(counterNext, 0.U(log2Ceil(outputWidth).W), enable)
 
-  //  Datapath
-  //  according two adders
-  /** todo :  later store counter */
+
+  /** Data REG */
+  val resultOrigin       = RegEnable(resultOriginNext,       0.U((outputWidth).W), enable)
+  val resultMinusOne     = RegEnable(resultMinusOneNext,     0.U((outputWidth).W), enable)
+  val partialResultCarry = RegEnable(partialResultCarryNext, 0.U(wlen.W),          enable)
+  val partialResultSum   = RegEnable(partialResultSumNext,   0.U(wlen.W),          enable)
+
+
+
+  /** todo :  later don't fix it ? */
   isLastCycle := counter === (outputWidth/2).U
   output.valid := occupied && isLastCycle
   input.ready := !occupied
   enable := input.fire || !isLastCycle
 
-  /** rW[j]
+  /** rW[j] = xxxx.xxxxxxxx
+    *
+    * first 7 bits for QDS
     *
-    * xxxx.xxxxxxxx
     */
   val shiftSum, shiftCarry = Wire(UInt((inputWidth + 4).W))
-  shiftSum := partialResultSum << 2
+  shiftSum   := partialResultSum   << 2
   shiftCarry := partialResultCarry << 2
 
   /** todo later parameterize it */
@@ -78,15 +81,19 @@ class SquareRoot(
 
   val firstIter = counter === 0.U
 
-  /** S[j]
+  /** S[j] = x.xxxxxxxx
     *
-    * x.xxxxxxxx
+    * For constructing resultForQDS
+    * shift effective bits's MSB to MSB
     *
     * width = outwidth + 1
     */
   val resultOriginRestore = (resultOrigin << (outputWidth.U - (counter << 1).asUInt))(outputWidth, 0)
 
-  /** todo: later opt it with p342 */
+  /** todo: later opt it with p341
+    *
+    * seems resultOriginRestore(outputWidth) can't be 1?
+    * */
   val resultForQDS = Mux(
     firstIter,
     "b101".U,
@@ -96,7 +103,6 @@ class SquareRoot(
   /** todo later param it */
   val tables: Seq[Seq[Int]] = SRTTable(1 << radixLog2, a, 4, 4).tablesToQDS
 
-  /** todo make sure resultOrigin has setup right? */
   val selectedQuotientOH: UInt =
     QDS(rtzYWidth, ohWidth, rtzSWidth - 1, tables, a)(
       shiftSum.head(rtzYWidth),
@@ -107,6 +113,7 @@ class SquareRoot(
   // On-The-Fly conversion
   val otf = OTF(radixLog2, outputWidth + 1, ohWidth, a)(resultOrigin, resultMinusOne, selectedQuotientOH)
 
+  /** p339 */
   val formationForIter = Mux1H(
     Seq(
       selectedQuotientOH(0) -> (resultMinusOne << 4 | "b1100".U),
@@ -117,10 +124,10 @@ class SquareRoot(
     )
   )
 
-  /** csa need width : inputwidth + 2 */
   val formationFinal = Wire(UInt((inputWidth + 3).W))
   formationFinal := formationForIter << (inputWidth - 2) >> (counter << 1)
 
+  /** csa width : inputwidth + 2 */
   val csa: Vec[UInt] = addition.csa.c32(
     VecInit(
       shiftSum(inputWidth + 1, 0),
@@ -129,18 +136,19 @@ class SquareRoot(
     )
   )
 
-  val remainderFinal = partialResultSumNext + partialResultCarryNext
+  val remainderFinal = partialResultSum + partialResultCarry
   val needCorrect: Bool = remainderFinal(outputWidth-1).asBool
 
+  /** w[0] = oprand - 1.U, oprand > 1/2 */
   val initSum = Cat("b11".U, input.bits.operand)
 
   /** init S[0] = 1 */
-  resultOriginNext := Mux(input.fire, 1.U, otf(0))
-  resultMinusOneNext := Mux(input.fire, 0.U, otf(1))
-  partialResultSumNext := Mux(input.fire, initSum, csa(1))
+  resultOriginNext       := Mux(input.fire, 1.U, otf(0))
+  resultMinusOneNext     := Mux(input.fire, 0.U, otf(1))
+  partialResultSumNext   := Mux(input.fire, initSum, csa(1))
   partialResultCarryNext := Mux(input.fire, 0.U, csa(0) << 1)
   counterNext := Mux(input.fire, 0.U, counter + 1.U)
 
-  output.bits.result := Mux(needCorrect, resultOrigin, resultMinusOne)
+  output.bits.result := Mux(needCorrect, resultMinusOne, resultOrigin)
 
 }
diff --git a/arithmetic/src/sqrt/SquareRootIO.scala b/arithmetic/src/sqrt/SquareRootIO.scala
new file mode 100644
index 0000000..64e07c4
--- /dev/null
+++ b/arithmetic/src/sqrt/SquareRootIO.scala
@@ -0,0 +1,14 @@
+package sqrt
+
+import chisel3._
+import chisel3.util._
+
+class SquareRootInput(inputWidth: Int, outputWidth: Int) extends Bundle{
+  val operand = UInt(inputWidth.W)
+  val counter = UInt(log2Ceil(outputWidth).W)
+}
+
+/** 0.1**** = 0.resultOrigin */
+class SquareRootOutput(outputWidth: Int) extends Bundle{
+  val result = UInt((outputWidth).W)
+}
diff --git a/arithmetic/src/square/SquareRootIO.scala b/arithmetic/src/square/SquareRootIO.scala
deleted file mode 100644
index 3edc11d..0000000
--- a/arithmetic/src/square/SquareRootIO.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-package square
-
-import chisel3._
-import chisel3.util._
-
-//class OTFInput(qWidth: Int, ohWidth: Int) extends Bundle {
-//  val resultOrigin = UInt(qWidth.W)
-//  val resultMinusOne = UInt(qWidth.W)
-//  val selectedQuotientOH = UInt(ohWidth.W)
-//}
-//class OTFOutput(qWidth: Int) extends Bundle {
-//  val resultOrigin = UInt(qWidth.W)
-//  val resultMinusOne = UInt(qWidth.W)
-//}
-
-
-class SquareRootInput(inputWidth: Int, outputWidth: Int) extends Bundle{
-  val operand = UInt(inputWidth.W)
-  val counter = UInt(log2Ceil(outputWidth).W)
-}
-
-/** 0.1**** = 0.resultOrigin */
-class SquareRootOutput(outputWidth: Int) extends Bundle{
-  val result = UInt((outputWidth).W)
-}
diff --git a/arithmetic/tests/src/sqrt/SquareRootTests.scala b/arithmetic/tests/src/sqrt/SquareRootTests.scala
new file mode 100644
index 0000000..89d3169
--- /dev/null
+++ b/arithmetic/tests/src/sqrt/SquareRootTests.scala
@@ -0,0 +1,64 @@
+package sqrt
+
+import chisel3._
+import chiseltest._
+import utest._
+import scala.util.{Random}
+import scala.math._
+
+object SquareRootTest extends TestSuite with ChiselUtestTester {
+  def tests: Tests = Tests {
+    test("Square Root for FP32 should pass") {
+      def testcase(): Unit = {
+        val oprandFloat:  Float = (0.5 + Random.nextFloat() / 2).toFloat
+        val oprandDouble: Double = oprandFloat.toDouble
+        val oprandDoubleRawString = java.lang.Double.doubleToLongBits(oprandDouble).toBinaryString
+        val oprandFloatRawString = java.lang.Float.floatToIntBits(oprandFloat).toBinaryString
+
+        val inputFloatString = {
+          "b1" + (Seq.fill(32 - oprandFloatRawString.length)("0").mkString("") + oprandFloatRawString)
+            .substring(9, 32)
+        }
+        val x = sqrt(oprandDouble)
+        val xstring = java.lang.Double.doubleToLongBits(x).toBinaryString
+        // 0.xxxxxx, hiden 1 + 23bits
+        val resultExpect = "1" + (Seq.fill(64 - xstring.length)("0").mkString("") + xstring).substring(12, 37)
+        //        println(oprandFloat.toString + ".sqrtx = " + x.toString)
+
+        // test
+        testCircuit(
+          new SquareRoot(2, 2, 24, 26),
+          Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
+        ) { dut: SquareRoot =>
+          dut.clock.setTimeout(0)
+          dut.input.valid.poke(true.B)
+          dut.input.bits.operand.poke(inputFloatString.U)
+          dut.input.bits.counter.poke(5.U)
+          dut.clock.step()
+          dut.input.valid.poke(false.B)
+          var flag = false
+          for (i <- 0 to 1000 if !flag) {
+            if (dut.output.valid.peek().litValue == 1) {
+              flag = true
+              val resultActual = dut.output.bits.result.peek().litValue.toString(2).substring(0, 26)
+//              println("result_expect26 = " + resultExpect)
+//              println("result_actual26 = " + resultActual)
+//              println("result_expect24 = " + resultExpect.substring(0, 24))
+//              println("result_actual24 = " + resultActual.substring(0, 24))
+              utest.assert(
+                (resultExpect)  == (resultActual)
+              )
+            } else
+              dut.clock.step()
+          }
+          utest.assert(flag)
+        }
+      }
+
+      for (i <- 1 to 100) {
+        testcase()
+      }
+
+    }
+  }
+}
diff --git a/arithmetic/tests/src/square/SquareRootTests.scala b/arithmetic/tests/src/square/SquareRootTests.scala
deleted file mode 100644
index a8c197d..0000000
--- a/arithmetic/tests/src/square/SquareRootTests.scala
+++ /dev/null
@@ -1,57 +0,0 @@
-package square
-
-import chisel3._
-import chiseltest._
-import utest._
-import scala.util.{Random}
-import scala.math._
-
-object SquareRootTest extends TestSuite with ChiselUtestTester {
-  def tests: Tests = Tests {
-    test("Square Root should pass") {
-      def testcase(): Unit = {
-        // parameters
-        val oprand: Double = 0.75
-        val inputOprandRawString = java.lang.Double.doubleToLongBits(oprand).toBinaryString
-        val inputOprandString =
-          "b1" + (Seq.fill(64 - inputOprandRawString.length)("0").mkString("") + inputOprandRawString).substring(12, 35)
-        println("inputString = " + inputOprandString)
-
-        val x = sqrt(oprand)
-        println("x(double) = " + x.toString)
-        val xstring = java.lang.Double.doubleToLongBits(x).toBinaryString
-        // 0.xxxxxx, hiden 1 + 23bits
-        val resultExpect = "1" + (Seq.fill(64 - xstring.length)("0").mkString("") + xstring).substring(12, 35)
-
-        // test
-        testCircuit(
-          new SquareRoot(2, 2, 24, 26),
-          Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
-        ) { dut: SquareRoot =>
-          dut.clock.setTimeout(0)
-          dut.input.valid.poke(true.B)
-          dut.input.bits.operand.poke(inputOprandString.U)
-          dut.input.bits.counter.poke(5.U)
-          dut.clock.step()
-          dut.input.valid.poke(false.B)
-          var flag = false
-          for (i <- 0 to 1000 if !flag) {
-            if (dut.output.valid.peek().litValue == 1) {
-              flag = true
-              println("result_expect = " + resultExpect)
-              println("result_actual = " + dut.output.bits.result.peek().litValue.toString(2).substring(0, 26))
-              utest.assert(dut.output.bits.result.peek().litValue.toString(2).substring(0, 24) == resultExpect)
-
-            } else
-              dut.clock.step()
-
-          }
-          utest.assert(flag)
-        }
-      }
-
-      testcase()
-
-    }
-  }
-}

From 6de8d036d58aaf727e5124f7edc88c021bddc0d0 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 31 Jul 2023 15:00:17 +0800
Subject: [PATCH 005/109] add doc

---
 arithmetic/src/sqrt/SquareRoot.scala | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arithmetic/src/sqrt/SquareRoot.scala b/arithmetic/src/sqrt/SquareRoot.scala
index 39661bd..5ddd44d 100644
--- a/arithmetic/src/sqrt/SquareRoot.scala
+++ b/arithmetic/src/sqrt/SquareRoot.scala
@@ -11,10 +11,14 @@ import utils.leftShift
   * {{{
   * oprand = 0.1xxxxx > 1/2 , input.bits.oprand  = 1xxxx
   * result = 0.1xxxxx > 1/2 , output.bits.result = 1xxxxx
-  * }}}
   *
+  * if float = .1011, input.bits.oprand = 1011
+  * }}}
   *
-  * @param outputWidth decide width for result , true result is .xxxxxx, need to be inputwidth + 2
+  * @param radixLog2 SRT radix log2
+  * @param a Redundent system
+  * @param inputWidth   width for input
+  * @param outputWidth  width for result ,need to be inputwidth + 2
   */
 class SquareRoot(
   radixLog2:   Int,

From 9df6f44ecf8d8d37a16bd9fa770ab7a5afe58c4f Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 31 Jul 2023 16:46:58 +0800
Subject: [PATCH 006/109] update doc

---
 arithmetic/src/sqrt/SquareRoot.scala | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arithmetic/src/sqrt/SquareRoot.scala b/arithmetic/src/sqrt/SquareRoot.scala
index 5ddd44d..eedfe1a 100644
--- a/arithmetic/src/sqrt/SquareRoot.scala
+++ b/arithmetic/src/sqrt/SquareRoot.scala
@@ -7,12 +7,14 @@ import division.srt.srt4.{OTF, QDS}
 import utils.leftShift
 
 /** SquareRoot
+  *
+  * all example assumes inputWidth = 8
   *
   * {{{
   * oprand = 0.1xxxxx > 1/2 , input.bits.oprand  = 1xxxx
   * result = 0.1xxxxx > 1/2 , output.bits.result = 1xxxxx
   *
-  * if float = .1011, input.bits.oprand = 1011
+  * if oprand = .1011, correct input.bits.oprand = 10110000
   * }}}
   *
   * @param radixLog2 SRT radix log2
@@ -54,15 +56,12 @@ class SquareRoot(
   occupiedNext := input.fire || (!isLastCycle && occupied)
   val counter = RegEnable(counterNext, 0.U(log2Ceil(outputWidth).W), enable)
 
-
   /** Data REG */
   val resultOrigin       = RegEnable(resultOriginNext,       0.U((outputWidth).W), enable)
   val resultMinusOne     = RegEnable(resultMinusOneNext,     0.U((outputWidth).W), enable)
   val partialResultCarry = RegEnable(partialResultCarryNext, 0.U(wlen.W),          enable)
   val partialResultSum   = RegEnable(partialResultSumNext,   0.U(wlen.W),          enable)
 
-
-
   /** todo :  later don't fix it ? */
   isLastCycle := counter === (outputWidth/2).U
   output.valid := occupied && isLastCycle
@@ -83,8 +82,6 @@ class SquareRoot(
   val rtzSWidth = 4
   val ohWidth = 5
 
-  val firstIter = counter === 0.U
-
   /** S[j] = x.xxxxxxxx
     *
     * For constructing resultForQDS
@@ -99,7 +96,7 @@ class SquareRoot(
     * seems resultOriginRestore(outputWidth) can't be 1?
     * */
   val resultForQDS = Mux(
-    firstIter,
+    counter === 0.U,
     "b101".U,
     Mux(resultOriginRestore(outputWidth), "b111".U, resultOriginRestore(outputWidth - 2, outputWidth - 4))
   )

From 9dac34a5a5ca1f1fb320652dd0c67abe67e72351 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 1 Aug 2023 15:25:45 +0800
Subject: [PATCH 007/109] update doc, reformat and opimization

---
 arithmetic/src/sqrt/SquareRoot.scala          | 76 ++++++++++---------
 .../tests/src/sqrt/SquareRootTests.scala      |  2 +-
 2 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/arithmetic/src/sqrt/SquareRoot.scala b/arithmetic/src/sqrt/SquareRoot.scala
index eedfe1a..af2836d 100644
--- a/arithmetic/src/sqrt/SquareRoot.scala
+++ b/arithmetic/src/sqrt/SquareRoot.scala
@@ -8,7 +8,7 @@ import utils.leftShift
 
 /** SquareRoot
   *
-  * all example assumes inputWidth = 8
+  * all example xxx assumes inputWidth = 8
   *
   * {{{
   * oprand = 0.1xxxxx > 1/2 , input.bits.oprand  = 1xxxx
@@ -17,6 +17,13 @@ import utils.leftShift
   * if oprand = .1011, correct input.bits.oprand = 10110000
   * }}}
   *
+  * csa width = partialresult width : wlen = inputwidth + 2
+  * csa width(formation width) : wlen
+  * resultOrigin and Minus: outputWidth
+  *
+  * outputWidth must <= inputWidth +2 or we can't get exact FormationFinal
+  *
+  *
   * @param radixLog2 SRT radix log2
   * @param a Redundent system
   * @param inputWidth   width for input
@@ -31,7 +38,7 @@ class SquareRoot(
   val input = IO(Flipped(DecoupledIO(new SquareRootInput(inputWidth: Int, outputWidth: Int))))
   val output = IO(DecoupledIO(new SquareRootOutput(outputWidth)))
 
-  /** width for partial result and csa */
+  /** width for partial result  */
   val wlen = inputWidth + 2
 
   /** W[j] = xx.xxxxxxxx
@@ -48,32 +55,29 @@ class SquareRoot(
   val resultOriginNext, resultMinusOneNext = Wire(UInt((outputWidth).W))
   val counterNext = Wire(UInt(log2Ceil(outputWidth).W))
 
-  // Control
-  // sign of Cycle, true -> (counter === 0.U)
+  // Control logic
   val isLastCycle, enable: Bool = Wire(Bool())
   val occupiedNext = Wire(Bool())
   val occupied = RegNext(occupiedNext, false.B)
-  occupiedNext := input.fire || (!isLastCycle && occupied)
   val counter = RegEnable(counterNext, 0.U(log2Ceil(outputWidth).W), enable)
 
+  occupiedNext := input.fire || (!isLastCycle && occupied)
+  isLastCycle  := counter === (outputWidth / 2).U
+  input.ready  := !occupied
+  enable       := input.fire || !isLastCycle
+  output.valid := occupied && isLastCycle
+
   /** Data REG */
   val resultOrigin       = RegEnable(resultOriginNext,       0.U((outputWidth).W), enable)
   val resultMinusOne     = RegEnable(resultMinusOneNext,     0.U((outputWidth).W), enable)
   val partialResultCarry = RegEnable(partialResultCarryNext, 0.U(wlen.W),          enable)
   val partialResultSum   = RegEnable(partialResultSumNext,   0.U(wlen.W),          enable)
 
-  /** todo :  later don't fix it ? */
-  isLastCycle := counter === (outputWidth/2).U
-  output.valid := occupied && isLastCycle
-  input.ready := !occupied
-  enable := input.fire || !isLastCycle
-
   /** rW[j] = xxxx.xxxxxxxx
     *
-    * first 7 bits for QDS
-    *
+    * first 7 bits truncated for QDS
     */
-  val shiftSum, shiftCarry = Wire(UInt((inputWidth + 4).W))
+  val shiftSum, shiftCarry = Wire(UInt((wlen+2).W))
   shiftSum   := partialResultSum   << 2
   shiftCarry := partialResultCarry << 2
 
@@ -83,18 +87,14 @@ class SquareRoot(
   val ohWidth = 5
 
   /** S[j] = x.xxxxxxxx
-    *
-    * For constructing resultForQDS
-    * shift effective bits's MSB to MSB
-    *
     * width = outwidth + 1
+    *
+    * transform to fixpoint representation for truncation
+    * shift effective bits(2j+1)  to MSB
     */
-  val resultOriginRestore = (resultOrigin << (outputWidth.U - (counter << 1).asUInt))(outputWidth, 0)
+  val resultOriginRestore = (resultOrigin << outputWidth.U >> (counter << 1).asUInt)(outputWidth, 0)
 
-  /** todo: later opt it with p341
-    *
-    * seems resultOriginRestore(outputWidth) can't be 1?
-    * */
+  /** truncated y for QDS */
   val resultForQDS = Mux(
     counter === 0.U,
     "b101".U,
@@ -111,24 +111,32 @@ class SquareRoot(
       resultForQDS //.1********* -> 1*** -> ***
     )
 
-  // On-The-Fly conversion
-  val otf = OTF(radixLog2, outputWidth + 1, ohWidth, a)(resultOrigin, resultMinusOne, selectedQuotientOH)
+  /** On-The-Fly conversion */
+  val otf = OTF(radixLog2, outputWidth, ohWidth, a)(resultOrigin, resultMinusOne, selectedQuotientOH)
 
-  /** p339 */
+  /** effective bits : LSB 2j+1+4 = 2j + 5 */
   val formationForIter = Mux1H(
     Seq(
       selectedQuotientOH(0) -> (resultMinusOne << 4 | "b1100".U),
       selectedQuotientOH(1) -> (resultMinusOne << 3 | "b111".U),
       selectedQuotientOH(2) -> 0.U,
-      selectedQuotientOH(3) -> (~resultOrigin << 3 | "b111".U),
-      selectedQuotientOH(4) -> (~resultOrigin << 4 | "b1100".U)
+      selectedQuotientOH(3) -> (~resultOrigin << 3  | "b111".U),
+      selectedQuotientOH(4) -> (~resultOrigin << 4  | "b1100".U)
     )
   )
 
-  val formationFinal = Wire(UInt((inputWidth + 3).W))
-  formationFinal := formationForIter << (inputWidth - 2) >> (counter << 1)
+  /** Formation for csa
+    *
+    * to construct formationFinal
+    * shift formationIter effective bits to MSB
+    * need to shift wlen + 1 - (2j+5)
+    *
+    * @todo width fixed to wlen + 1, prove it
+    */
+  val formationFinal = Wire(UInt((wlen + 1).W))
+  formationFinal := formationForIter << (wlen - 4) >> (counter << 1)
 
-  /** csa width : inputwidth + 2 */
+  /** csa width : wlen */
   val csa: Vec[UInt] = addition.csa.c32(
     VecInit(
       shiftSum(inputWidth + 1, 0),
@@ -137,10 +145,11 @@ class SquareRoot(
     )
   )
 
+  /** @todo opt SZ logic */
   val remainderFinal = partialResultSum + partialResultCarry
   val needCorrect: Bool = remainderFinal(outputWidth-1).asBool
 
-  /** w[0] = oprand - 1.U, oprand > 1/2 */
+  /** w[0] = oprand - 1.U */
   val initSum = Cat("b11".U, input.bits.operand)
 
   /** init S[0] = 1 */
@@ -148,8 +157,7 @@ class SquareRoot(
   resultMinusOneNext     := Mux(input.fire, 0.U, otf(1))
   partialResultSumNext   := Mux(input.fire, initSum, csa(1))
   partialResultCarryNext := Mux(input.fire, 0.U, csa(0) << 1)
-  counterNext := Mux(input.fire, 0.U, counter + 1.U)
+  counterNext            := Mux(input.fire, 0.U, counter + 1.U)
 
   output.bits.result := Mux(needCorrect, resultMinusOne, resultOrigin)
-
 }
diff --git a/arithmetic/tests/src/sqrt/SquareRootTests.scala b/arithmetic/tests/src/sqrt/SquareRootTests.scala
index 89d3169..dfa742c 100644
--- a/arithmetic/tests/src/sqrt/SquareRootTests.scala
+++ b/arithmetic/tests/src/sqrt/SquareRootTests.scala
@@ -8,7 +8,7 @@ import scala.math._
 
 object SquareRootTest extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
-    test("Square Root for FP32 should pass") {
+    test("Sqrt FP32 should pass") {
       def testcase(): Unit = {
         val oprandFloat:  Float = (0.5 + Random.nextFloat() / 2).toFloat
         val oprandDouble: Double = oprandFloat.toDouble

From 823a7c6231c96a785004adfc2657b7ea9322d4fe Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 2 Aug 2023 12:01:40 +0800
Subject: [PATCH 008/109] fix sqrt QDS table

---
 arithmetic/src/sqrt/QDS.scala                 | 103 ++++++++++++++++++
 arithmetic/src/sqrt/SquareRoot.scala          |  40 +++----
 arithmetic/src/sqrt/SquareRootIO.scala        |  11 ++
 ...RootTests.scala => SquareRootTester.scala} |  29 +++--
 4 files changed, 145 insertions(+), 38 deletions(-)
 create mode 100644 arithmetic/src/sqrt/QDS.scala
 rename arithmetic/tests/src/sqrt/{SquareRootTests.scala => SquareRootTester.scala} (61%)

diff --git a/arithmetic/src/sqrt/QDS.scala b/arithmetic/src/sqrt/QDS.scala
new file mode 100644
index 0000000..4b732d3
--- /dev/null
+++ b/arithmetic/src/sqrt/QDS.scala
@@ -0,0 +1,103 @@
+package sqrt
+
+import chisel3._
+import chisel3.util.BitPat
+import chisel3.util.BitPat.bitPatToUInt
+import chisel3.util.experimental.decode.TruthTable
+import utils.{extend, sIntToBitPat}
+
+/** Result-Digit Selection
+  *
+  * @param rWidth y Truncate width
+  * @param ohWidth quotient width
+  * @param partialDividerWidth equals to dTruncatedWidth - 1
+  */
+class QDS(rWidth: Int, ohWidth: Int, partialDividerWidth: Int, a: Int) extends Module {
+  // IO
+  val input = IO(Input(new QDSInput(rWidth, partialDividerWidth)))
+  val output = IO(Output(new QDSOutput(ohWidth)))
+
+  // from P269 in <Digital Arithmetic> : /16， should have got from SRTTable.
+  // val qSelTable = Array(
+  //   Array(12, 4, -4, -13),
+  //   Array(14, 4, -5, -14),
+  //   Array(16, 4, -6, -16),
+  //   Array(16, 4, -6, -17),
+  //   Array(18, 6, -6, -18),
+  //   Array(20, 6, -8, -20),
+  //   Array(20, 8, -8, -22),
+  //   Array(22, 8, -8, -23)/16
+  // )
+      val selectRom: Vec[Vec[UInt]] = VecInit(
+        VecInit("b111_0100".U, "b111_1100".U, "b000_0100".U, "b000_1101".U),
+        VecInit("b111_0010".U, "b111_1100".U, "b000_0101".U, "b000_1110".U),
+        VecInit("b111_0000".U, "b111_1100".U, "b000_0110".U, "b001_0000".U),
+        VecInit("b111_0000".U, "b111_1100".U, "b000_0110".U, "b001_0001".U),
+        VecInit("b110_1110".U, "b111_1010".U, "b000_0110".U, "b001_0010".U),
+        VecInit("b110_1100".U, "b111_1010".U, "b000_1000".U, "b001_0100".U),
+        VecInit("b110_1100".U, "b111_1000".U, "b000_1000".U, "b001_0110".U),
+        VecInit("b110_1010".U, "b111_1000".U, "b000_1000".U, "b001_0111".U)
+      )
+
+  val columnSelect = input.partialDivider
+  val adderWidth = rWidth + 1
+
+  /** 3 integer bits, 4 fractional bits */
+  val yTruncate: UInt = input.partialReminderCarry + input.partialReminderSum
+
+  /** the selection constant vector */
+  val mkVec = selectRom(columnSelect)
+
+  /** add [[yTruncate]] with all mk, use decoder to find its location */
+  val selectPoints = VecInit(mkVec.map { mk =>
+    (extend(yTruncate, adderWidth).asUInt
+      + extend(mk, adderWidth).asUInt).head(1)
+  }).asUInt
+
+  // decoder or findFirstOne here, prefer decoder, the decoder only for srt4
+  output.selectedQuotientOH := chisel3.util.experimental.decode.decoder(
+    selectPoints,
+    a match {
+      case 2 =>
+        TruthTable(
+          Seq(
+            BitPat("b???0") -> BitPat("b10000"), //2
+            BitPat("b??01") -> BitPat("b01000"), //1
+            BitPat("b?011") -> BitPat("b00100"), //0
+            BitPat("b0111") -> BitPat("b00010") //-1
+          ),
+          BitPat("b00001") //-2
+        )
+      case 3 =>
+        TruthTable(
+          Seq( // 2 0 -2 1 0 -1
+            BitPat("b??_???0") -> BitPat("b100_100"), //3 = 2 + 1
+            BitPat("b??_??01") -> BitPat("b100_010"), //2 = 2 + 0
+            BitPat("b??_?011") -> BitPat("b010_100"), //1 = 0 + 1
+            BitPat("b??_0111") -> BitPat("b010_010"), //0 = 0 + 0
+            BitPat("b?0_1111") -> BitPat("b010_001"), //-1 = 0 + -1
+            BitPat("b01_1111") -> BitPat("b001_010") //-2 = -2 + 0
+          ),
+          BitPat("b001_001") //-3 = -2 + -1
+        )
+    }
+  )
+}
+
+object QDS {
+  def apply(
+             rWidth:               Int,
+             ohWidth:              Int,
+             partialDividerWidth:  Int,
+             a:                    Int
+           )(partialReminderSum:   UInt,
+             partialReminderCarry: UInt,
+             partialDivider:       UInt
+           ): UInt = {
+    val m = Module(new QDS(rWidth, ohWidth, partialDividerWidth, a))
+    m.input.partialReminderSum := partialReminderSum
+    m.input.partialReminderCarry := partialReminderCarry
+    m.input.partialDivider := partialDivider
+    m.output.selectedQuotientOH
+  }
+}
diff --git a/arithmetic/src/sqrt/SquareRoot.scala b/arithmetic/src/sqrt/SquareRoot.scala
index af2836d..6fb7659 100644
--- a/arithmetic/src/sqrt/SquareRoot.scala
+++ b/arithmetic/src/sqrt/SquareRoot.scala
@@ -1,20 +1,18 @@
 package sqrt
 
-import chisel3.{util, _}
+import chisel3._
 import chisel3.util._
-import division.srt.SRTTable
-import division.srt.srt4.{OTF, QDS}
-import utils.leftShift
+import division.srt.srt4.OTF
 
 /** SquareRoot
   *
   * all example xxx assumes inputWidth = 8
   *
   * {{{
-  * oprand = 0.1xxxxx > 1/2 , input.bits.oprand  = 1xxxx
+  * oprand = 0.1xxxxx > 1/2  , input.bits.oprand  = 1xxxx
+  * oprand = 0.01xxxxx > 1/4 , input.bits.oprand  = 01xxxx
   * result = 0.1xxxxx > 1/2 , output.bits.result = 1xxxxx
   *
-  * if oprand = .1011, correct input.bits.oprand = 10110000
   * }}}
   *
   * csa width = partialresult width : wlen = inputwidth + 2
@@ -23,6 +21,7 @@ import utils.leftShift
   *
   * outputWidth must <= inputWidth +2 or we can't get exact FormationFinal
   *
+  * @example if oprand = .10110000, input.bits.oprand shoule be 10110000
   *
   * @param radixLog2 SRT radix log2
   * @param a Redundent system
@@ -45,7 +44,7 @@ class SquareRoot(
     *
     * width = 2 + inputwidth
     */
-  val partialResultCarryNext, partialResultSumNext = Wire(UInt(wlen.W))
+  val partialCarryNext, partialSumNext = Wire(UInt(wlen.W))
   /** S[j] = .xxxxxxxx
     *
     * effective bits number depends on counter, 2n+1
@@ -62,24 +61,24 @@ class SquareRoot(
   val counter = RegEnable(counterNext, 0.U(log2Ceil(outputWidth).W), enable)
 
   occupiedNext := input.fire || (!isLastCycle && occupied)
-  isLastCycle  := counter === (outputWidth / 2).U
+  isLastCycle  := counter === 14.U
   input.ready  := !occupied
   enable       := input.fire || !isLastCycle
   output.valid := occupied && isLastCycle
 
   /** Data REG */
-  val resultOrigin       = RegEnable(resultOriginNext,       0.U((outputWidth).W), enable)
-  val resultMinusOne     = RegEnable(resultMinusOneNext,     0.U((outputWidth).W), enable)
-  val partialResultCarry = RegEnable(partialResultCarryNext, 0.U(wlen.W),          enable)
-  val partialResultSum   = RegEnable(partialResultSumNext,   0.U(wlen.W),          enable)
+  val resultOrigin       = RegEnable(resultOriginNext,   0.U((outputWidth).W), enable)
+  val resultMinusOne     = RegEnable(resultMinusOneNext, 0.U((outputWidth).W), enable)
+  val partialCarry       = RegEnable(partialCarryNext,   0.U(wlen.W),          enable)
+  val partialSum         = RegEnable(partialSumNext,     0.U(wlen.W),          enable)
 
   /** rW[j] = xxxx.xxxxxxxx
     *
     * first 7 bits truncated for QDS
     */
   val shiftSum, shiftCarry = Wire(UInt((wlen+2).W))
-  shiftSum   := partialResultSum   << 2
-  shiftCarry := partialResultCarry << 2
+  shiftSum   := partialSum   << 2
+  shiftCarry := partialCarry << 2
 
   /** todo later parameterize it */
   val rtzYWidth = 7
@@ -101,11 +100,8 @@ class SquareRoot(
     Mux(resultOriginRestore(outputWidth), "b111".U, resultOriginRestore(outputWidth - 2, outputWidth - 4))
   )
 
-  /** todo later param it */
-  val tables: Seq[Seq[Int]] = SRTTable(1 << radixLog2, a, 4, 4).tablesToQDS
-
   val selectedQuotientOH: UInt =
-    QDS(rtzYWidth, ohWidth, rtzSWidth - 1, tables, a)(
+    QDS(rtzYWidth, ohWidth, rtzSWidth - 1, a)(
       shiftSum.head(rtzYWidth),
       shiftCarry.head(rtzYWidth),
       resultForQDS //.1********* -> 1*** -> ***
@@ -146,8 +142,8 @@ class SquareRoot(
   )
 
   /** @todo opt SZ logic */
-  val remainderFinal = partialResultSum + partialResultCarry
-  val needCorrect: Bool = remainderFinal(outputWidth-1).asBool
+  val remainderFinal = partialSum + partialCarry
+  val needCorrect: Bool = remainderFinal(wlen - 1).asBool
 
   /** w[0] = oprand - 1.U */
   val initSum = Cat("b11".U, input.bits.operand)
@@ -155,8 +151,8 @@ class SquareRoot(
   /** init S[0] = 1 */
   resultOriginNext       := Mux(input.fire, 1.U, otf(0))
   resultMinusOneNext     := Mux(input.fire, 0.U, otf(1))
-  partialResultSumNext   := Mux(input.fire, initSum, csa(1))
-  partialResultCarryNext := Mux(input.fire, 0.U, csa(0) << 1)
+  partialSumNext         := Mux(input.fire, initSum, csa(1))
+  partialCarryNext       := Mux(input.fire, 0.U, csa(0) << 1)
   counterNext            := Mux(input.fire, 0.U, counter + 1.U)
 
   output.bits.result := Mux(needCorrect, resultMinusOne, resultOrigin)
diff --git a/arithmetic/src/sqrt/SquareRootIO.scala b/arithmetic/src/sqrt/SquareRootIO.scala
index 64e07c4..9b435bd 100644
--- a/arithmetic/src/sqrt/SquareRootIO.scala
+++ b/arithmetic/src/sqrt/SquareRootIO.scala
@@ -12,3 +12,14 @@ class SquareRootInput(inputWidth: Int, outputWidth: Int) extends Bundle{
 class SquareRootOutput(outputWidth: Int) extends Bundle{
   val result = UInt((outputWidth).W)
 }
+
+class QDSInput(rWidth: Int, partialDividerWidth: Int) extends Bundle {
+  val partialReminderCarry: UInt = UInt(rWidth.W)
+  val partialReminderSum:   UInt = UInt(rWidth.W)
+  /** truncated divisor without the most significant bit  */
+  val partialDivider: UInt = UInt(partialDividerWidth.W)
+}
+
+class QDSOutput(ohWidth: Int) extends Bundle {
+  val selectedQuotientOH: UInt = UInt(ohWidth.W)
+}
diff --git a/arithmetic/tests/src/sqrt/SquareRootTests.scala b/arithmetic/tests/src/sqrt/SquareRootTester.scala
similarity index 61%
rename from arithmetic/tests/src/sqrt/SquareRootTests.scala
rename to arithmetic/tests/src/sqrt/SquareRootTester.scala
index dfa742c..c0f603f 100644
--- a/arithmetic/tests/src/sqrt/SquareRootTests.scala
+++ b/arithmetic/tests/src/sqrt/SquareRootTester.scala
@@ -6,28 +6,26 @@ import utest._
 import scala.util.{Random}
 import scala.math._
 
-object SquareRootTest extends TestSuite with ChiselUtestTester {
+object SquareRootTester extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
     test("Sqrt FP32 should pass") {
       def testcase(): Unit = {
-        val oprandFloat:  Float = (0.5 + Random.nextFloat() / 2).toFloat
+        val oprandFloat:  Float = (0.25 + Random.nextFloat() * 3/4).toFloat
         val oprandDouble: Double = oprandFloat.toDouble
-        val oprandDoubleRawString = java.lang.Double.doubleToLongBits(oprandDouble).toBinaryString
         val oprandFloatRawString = java.lang.Float.floatToIntBits(oprandFloat).toBinaryString
+        val oprandSigString = (Seq.fill(32 - oprandFloatRawString.length)("0").mkString("") + oprandFloatRawString)
+          .substring(9, 32)
+
+        val inputFloatString = if(oprandFloat<0.5)"b01" + oprandSigString + "0"  else "b1" + oprandSigString + "00"
 
-        val inputFloatString = {
-          "b1" + (Seq.fill(32 - oprandFloatRawString.length)("0").mkString("") + oprandFloatRawString)
-            .substring(9, 32)
-        }
         val x = sqrt(oprandDouble)
         val xstring = java.lang.Double.doubleToLongBits(x).toBinaryString
         // 0.xxxxxx, hiden 1 + 23bits
         val resultExpect = "1" + (Seq.fill(64 - xstring.length)("0").mkString("") + xstring).substring(12, 37)
-        //        println(oprandFloat.toString + ".sqrtx = " + x.toString)
 
         // test
         testCircuit(
-          new SquareRoot(2, 2, 24, 26),
+          new SquareRoot(2, 2, 26, 28),
           Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
         ) { dut: SquareRoot =>
           dut.clock.setTimeout(0)
@@ -41,13 +39,12 @@ object SquareRootTest extends TestSuite with ChiselUtestTester {
             if (dut.output.valid.peek().litValue == 1) {
               flag = true
               val resultActual = dut.output.bits.result.peek().litValue.toString(2).substring(0, 26)
-//              println("result_expect26 = " + resultExpect)
-//              println("result_actual26 = " + resultActual)
-//              println("result_expect24 = " + resultExpect.substring(0, 24))
-//              println("result_actual24 = " + resultActual.substring(0, 24))
-              utest.assert(
-                (resultExpect)  == (resultActual)
-              )
+              if(resultExpect != resultActual){
+                println(oprandFloat.toString + ".sqrtx = " + x.toString)
+                println(inputFloatString)
+                utest.assert(resultExpect  == resultActual)
+              }
+
             } else
               dut.clock.step()
           }

From ec29b780dbea875033574ed88d28048f6857b1c0 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 2 Aug 2023 15:31:36 +0800
Subject: [PATCH 009/109] opt sqrt io

---
 arithmetic/src/sqrt/SquareRoot.scala             | 4 +++-
 arithmetic/src/sqrt/SquareRootIO.scala           | 1 -
 arithmetic/tests/src/sqrt/SquareRootTester.scala | 1 -
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arithmetic/src/sqrt/SquareRoot.scala b/arithmetic/src/sqrt/SquareRoot.scala
index 6fb7659..c33a9f9 100644
--- a/arithmetic/src/sqrt/SquareRoot.scala
+++ b/arithmetic/src/sqrt/SquareRoot.scala
@@ -21,6 +21,8 @@ import division.srt.srt4.OTF
   *
   * outputWidth must <= inputWidth +2 or we can't get exact FormationFinal
   *
+  * @note inputWidth mod 2 ==0
+  *
   * @example if oprand = .10110000, input.bits.oprand shoule be 10110000
   *
   * @param radixLog2 SRT radix log2
@@ -61,7 +63,7 @@ class SquareRoot(
   val counter = RegEnable(counterNext, 0.U(log2Ceil(outputWidth).W), enable)
 
   occupiedNext := input.fire || (!isLastCycle && occupied)
-  isLastCycle  := counter === 14.U
+  isLastCycle  := counter === (outputWidth/2).U
   input.ready  := !occupied
   enable       := input.fire || !isLastCycle
   output.valid := occupied && isLastCycle
diff --git a/arithmetic/src/sqrt/SquareRootIO.scala b/arithmetic/src/sqrt/SquareRootIO.scala
index 9b435bd..7ae94f6 100644
--- a/arithmetic/src/sqrt/SquareRootIO.scala
+++ b/arithmetic/src/sqrt/SquareRootIO.scala
@@ -5,7 +5,6 @@ import chisel3.util._
 
 class SquareRootInput(inputWidth: Int, outputWidth: Int) extends Bundle{
   val operand = UInt(inputWidth.W)
-  val counter = UInt(log2Ceil(outputWidth).W)
 }
 
 /** 0.1**** = 0.resultOrigin */
diff --git a/arithmetic/tests/src/sqrt/SquareRootTester.scala b/arithmetic/tests/src/sqrt/SquareRootTester.scala
index c0f603f..4b958ce 100644
--- a/arithmetic/tests/src/sqrt/SquareRootTester.scala
+++ b/arithmetic/tests/src/sqrt/SquareRootTester.scala
@@ -31,7 +31,6 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
           dut.clock.setTimeout(0)
           dut.input.valid.poke(true.B)
           dut.input.bits.operand.poke(inputFloatString.U)
-          dut.input.bits.counter.poke(5.U)
           dut.clock.step()
           dut.input.valid.poke(false.B)
           var flag = false

From f6f8d6c06d284295f6508d9a7afa736d77c63613 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 3 Aug 2023 19:36:37 +0800
Subject: [PATCH 010/109] add sqrtfloat module

---
 arithmetic/src/float/SqrtFloat.scala          |  57 ++++++++
 arithmetic/src/float/common.scala             |  84 ++++++++++++
 arithmetic/src/float/primitives.scala         | 127 ++++++++++++++++++
 arithmetic/src/float/rawFloatFromFN.scala     |  74 ++++++++++
 .../tests/src/float/SqrtFloatTester.scala     |  86 ++++++++++++
 .../tests/src/sqrt/SquareRootTester.scala     |   9 +-
 6 files changed, 433 insertions(+), 4 deletions(-)
 create mode 100644 arithmetic/src/float/SqrtFloat.scala
 create mode 100644 arithmetic/src/float/common.scala
 create mode 100644 arithmetic/src/float/primitives.scala
 create mode 100644 arithmetic/src/float/rawFloatFromFN.scala
 create mode 100644 arithmetic/tests/src/float/SqrtFloatTester.scala

diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
new file mode 100644
index 0000000..6a51492
--- /dev/null
+++ b/arithmetic/src/float/SqrtFloat.scala
@@ -0,0 +1,57 @@
+package float
+
+import chisel3._
+import chisel3.util._
+import sqrt._
+
+class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
+  val input = IO(Flipped(DecoupledIO(new FloatSqrtInput(expWidth, sigWidth))))
+  val output = IO(DecoupledIO(new FloatSqrtOutput(expWidth, sigWidth)))
+  val debug = IO(Output(new Bundle() {
+    val fractIn = UInt(26.W)
+  }))
+
+  /** Data path */
+  val rawFloatIn = rawFloatFromFN(expWidth,sigWidth,input.bits.oprand)
+  val adjustedExp = Cat(rawFloatIn.sExp(expWidth-1), rawFloatIn.sExp(expWidth-1, 0))
+
+  /** {{{
+    * expLSB   rawExpLSB    Sig             SigIn     expOut
+    *      0           1    1.xxxx>>2<<1    1xxxx0    rawExp/2 +1 + bias
+    *      1           0    1.xxxx>>2       01xxxx    rawExp/2 +1 + bias
+    *}}}
+    */
+  val expOutNext = Wire(UInt(expWidth.W))
+  expOutNext := adjustedExp(expWidth,1) + 127.U
+  val expOut = RegEnable(expOutNext, 0.U(expWidth.W), input.fire)
+  val fractIn = Mux(input.bits.oprand(sigWidth-1), Cat("b0".U(1.W),rawFloatIn.sig(sigWidth-1, 0),0.U(1.W)),
+    Cat(rawFloatIn.sig(sigWidth-1, 0),0.U(2.W)))
+
+  val SqrtModule = Module(new SquareRoot(2, 2, 26, 26))
+  SqrtModule.input.valid := input.valid
+  SqrtModule.input.bits.operand := fractIn
+  SqrtModule.output.ready := output.ready
+
+  input.ready := SqrtModule.input.ready
+  output.bits.result := Cat(0.U(1.W), expOut, SqrtModule.output.bits.result(24,0))
+  output.bits.sig := SqrtModule.output.bits.result
+  output.bits.exp := expOut
+  output.valid := SqrtModule.output.valid
+
+  debug.fractIn := fractIn
+
+}
+
+class FloatSqrtInput(expWidth: Int, sigWidth: Int) extends Bundle() {
+  val oprand = UInt((expWidth + sigWidth).W)
+}
+
+/** add 2 for rounding*/
+class FloatSqrtOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
+  val result = UInt((expWidth + sigWidth + 2).W)
+  val sig = UInt((sigWidth+2).W)
+  val exp = UInt(expWidth.W)
+
+//  val exceptionFlags = UInt(5.W)
+}
+
diff --git a/arithmetic/src/float/common.scala b/arithmetic/src/float/common.scala
new file mode 100644
index 0000000..ae026b3
--- /dev/null
+++ b/arithmetic/src/float/common.scala
@@ -0,0 +1,84 @@
+
+/*============================================================================
+
+This Chisel source file is part of a pre-release version of the HardFloat IEEE
+Floating-Point Arithmetic Package, by John R. Hauser (with some contributions
+from Yunsup Lee and Andrew Waterman, mainly concerning testing).
+
+Copyright 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of
+the University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+package float
+
+import chisel3._
+
+object consts {
+    /*------------------------------------------------------------------------
+    | For rounding to integer values, rounding mode 'odd' rounds to minimum
+    | magnitude instead, same as 'minMag'.
+    *------------------------------------------------------------------------*/
+    def round_near_even   = "b000".U(3.W)
+    def round_minMag      = "b001".U(3.W)
+    def round_min         = "b010".U(3.W)
+    def round_max         = "b011".U(3.W)
+    def round_near_maxMag = "b100".U(3.W)
+    def round_odd         = "b110".U(3.W)
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    def tininess_beforeRounding = 0.U
+    def tininess_afterRounding  = 1.U
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    def flRoundOpt_sigMSBitAlwaysZero  = 1
+    def flRoundOpt_subnormsAlwaysExact = 2
+    def flRoundOpt_neverUnderflows     = 4
+    def flRoundOpt_neverOverflows      = 8
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    def divSqrtOpt_twoBitsPerCycle     = 16
+}
+
+class RawFloat(val expWidth: Int, val sigWidth: Int) extends Bundle
+{
+    val isNaN: Bool = Bool()              // overrides all other fields
+    val isInf: Bool = Bool()              // overrides 'isZero', 'sExp', and 'sig'
+    val isZero: Bool = Bool()              // overrides 'sExp' and 'sig'
+    val sign: Bool = Bool()
+    val sExp: SInt = SInt((expWidth + 2).W)
+    val sig: UInt = UInt((sigWidth + 1).W)   // 2 m.s. bits cannot both be 0
+
+}
+
+//*** CHANGE THIS INTO A '.isSigNaN' METHOD OF THE 'RawFloat' CLASS:
+object isSigNaNRawFloat
+{
+    def apply(in: RawFloat): Bool = in.isNaN && !in.sig(in.sigWidth - 2)
+}
+
diff --git a/arithmetic/src/float/primitives.scala b/arithmetic/src/float/primitives.scala
new file mode 100644
index 0000000..cb75215
--- /dev/null
+++ b/arithmetic/src/float/primitives.scala
@@ -0,0 +1,127 @@
+
+/*============================================================================
+
+This Chisel source file is part of a pre-release version of the HardFloat IEEE
+Floating-Point Arithmetic Package, by John R. Hauser (with some contributions
+from Yunsup Lee and Andrew Waterman, mainly concerning testing).
+
+Copyright 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+package float
+
+import chisel3._
+import chisel3.util._
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+object lowMask
+{
+    def apply(in: UInt, topBound: BigInt, bottomBound: BigInt): UInt =
+    {
+        require(topBound != bottomBound)
+        val numInVals = BigInt(1)<<in.getWidth
+        if (topBound < bottomBound) {
+            lowMask(~in, numInVals - 1 - topBound, numInVals - 1 - bottomBound)
+        } else if (numInVals > 64 /* Empirical */) {
+            // For simulation performance, we should avoid generating
+            // exteremely wide shifters, so we divide and conquer.
+            // Empirically, this does not impact synthesis QoR.
+            val mid = numInVals / 2
+            val msb = in(in.getWidth - 1)
+            val lsbs = in(in.getWidth - 2, 0)
+            if (mid < topBound) {
+                if (mid <= bottomBound) {
+                    Mux(msb,
+                        lowMask(lsbs, topBound - mid, bottomBound - mid),
+                        0.U
+                    )
+                } else {
+                    Mux(msb,
+                        lowMask(lsbs, topBound - mid, 0) ## ((BigInt(1)<<(mid - bottomBound).toInt) - 1).U,
+                        lowMask(lsbs, mid, bottomBound)
+                    )
+                }
+            } else {
+                ~Mux(msb, 0.U, ~lowMask(lsbs, topBound, bottomBound))
+            }
+        } else {
+            val shift = (BigInt(-1)<<numInVals.toInt).S>>in
+            Reverse(
+                shift(
+                    (numInVals - 1 - bottomBound).toInt,
+                    (numInVals - topBound).toInt
+                )
+            )
+        }
+    }
+}
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+object countLeadingZeros
+{
+    def apply(in: UInt): UInt = PriorityEncoder(in.asBools.reverse)
+}
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+object orReduceBy2
+{
+    def apply(in: UInt): UInt =
+    {
+        val reducedWidth = (in.getWidth + 1)>>1
+        val reducedVec = Wire(Vec(reducedWidth, Bool()))
+        for (ix <- 0 until reducedWidth - 1) {
+            reducedVec(ix) := in(ix * 2 + 1, ix * 2).orR
+        }
+        reducedVec(reducedWidth - 1) :=
+            in(in.getWidth - 1, (reducedWidth - 1) * 2).orR
+        reducedVec.asUInt
+    }
+}
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+object orReduceBy4
+{
+    def apply(in: UInt): UInt =
+    {
+        val reducedWidth = (in.getWidth + 3)>>2
+        val reducedVec = Wire(Vec(reducedWidth, Bool()))
+        for (ix <- 0 until reducedWidth - 1) {
+            reducedVec(ix) := in(ix * 4 + 3, ix * 4).orR
+        }
+        reducedVec(reducedWidth - 1) :=
+            in(in.getWidth - 1, (reducedWidth - 1) * 4).orR
+        reducedVec.asUInt
+    }
+}
+
diff --git a/arithmetic/src/float/rawFloatFromFN.scala b/arithmetic/src/float/rawFloatFromFN.scala
new file mode 100644
index 0000000..449bce0
--- /dev/null
+++ b/arithmetic/src/float/rawFloatFromFN.scala
@@ -0,0 +1,74 @@
+
+/*============================================================================
+
+This Chisel source file is part of a pre-release version of the HardFloat IEEE
+Floating-Point Arithmetic Package, by John R. Hauser (with some contributions
+from Yunsup Lee and Andrew Waterman, mainly concerning testing).
+
+Copyright 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+package float
+
+import chisel3._
+
+object rawFloatFromFN {
+  def apply(expWidth: Int, sigWidth: Int, in: Bits) = {
+    val sign = in(expWidth + sigWidth - 1)
+    val expIn = in(expWidth + sigWidth - 2, sigWidth - 1)
+    val fractIn = in(sigWidth - 2, 0)
+
+    val isZeroExpIn = (expIn === 0.U)
+    val isZeroFractIn = (fractIn === 0.U)
+
+    val normDist = countLeadingZeros(fractIn)
+    val subnormFract = (fractIn << normDist) (sigWidth - 3, 0) << 1
+    val adjustedExp =
+      Mux(isZeroExpIn,
+        normDist ^ ((BigInt(1) << (expWidth + 1)) - 1).U,
+        expIn
+      ) + ((BigInt(1) << (expWidth - 1)).U
+        | Mux(isZeroExpIn, 2.U, 1.U))
+
+    val isZero = isZeroExpIn && isZeroFractIn
+    val isSpecial = adjustedExp(expWidth, expWidth - 1) === 3.U
+
+    val out = Wire(new RawFloat(expWidth, sigWidth))
+    out.isNaN := isSpecial && !isZeroFractIn
+    out.isInf := isSpecial && isZeroFractIn
+    out.isZero := isZero
+    out.sign := sign
+    out.sExp := adjustedExp(expWidth, 0).zext
+    out.sig :=
+      0.U(1.W) ## !isZero ## Mux(isZeroExpIn, subnormFract, fractIn)
+    out
+  }
+}
+
diff --git a/arithmetic/tests/src/float/SqrtFloatTester.scala b/arithmetic/tests/src/float/SqrtFloatTester.scala
new file mode 100644
index 0000000..1797e73
--- /dev/null
+++ b/arithmetic/tests/src/float/SqrtFloatTester.scala
@@ -0,0 +1,86 @@
+package float
+
+import chisel3._
+import chiseltest._
+import utest._
+import scala.util.{Random}
+import scala.math._
+
+object SquareRootTester extends TestSuite with ChiselUtestTester {
+  def tests: Tests = Tests {
+    test("Sqrt Float FP32 should pass") {
+      def testcase(): Unit = {
+        val oprandFloat:  Float = (Random.nextInt(100000)+Random.nextFloat() ).toFloat
+        val oprandDouble: Double = oprandFloat.toDouble
+
+        val oprandString = java.lang.Float.floatToIntBits(oprandFloat).toBinaryString
+        val oprandRawString = Seq.fill(32 - oprandString.length)("0").mkString("") + oprandString
+        val oprandSigString = oprandRawString.substring(9, 32)
+
+        val ExepctFracIn = if(oprandFloat<0.5)"b01" + oprandSigString + "0"  else "b1" + oprandSigString + "00"
+        val circuitInput = "b"+ oprandRawString
+
+        val x = sqrt(oprandDouble)
+        val xDoublestring = java.lang.Double.doubleToLongBits(x).toBinaryString
+        val xFloatstring = java.lang.Float.floatToIntBits(x.toFloat).toBinaryString
+        val xDouble = (Seq.fill(64 - xDoublestring.length)("0").mkString("") + xDoublestring)
+        val xFloat = (Seq.fill(32 - xFloatstring.length)("0").mkString("") + xFloatstring)
+        // 0.xxxxxx,   hidden 1+23bits + 2bits for round
+        val sigExpect =   "1"+xDouble.substring(12, 37)
+        // todo:
+        val expExpect =   xFloat.substring(1,9)
+
+        // test
+        testCircuit(
+          new SqrtFloat(8,24),
+          Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
+        ) { dut: SqrtFloat =>
+          dut.clock.setTimeout(0)
+          dut.input.valid.poke(true.B)
+          dut.input.bits.oprand.poke(circuitInput.U)
+          dut.clock.step()
+          dut.input.valid.poke(false.B)
+          var flag = false
+          for (i <- 0 to 1000 if !flag) {
+            if (dut.output.valid.peek().litValue == 1) {
+              flag = true
+              val resultActual = dut.output.bits.result.peek().litValue.toString(2)
+              val sigActual = dut.output.bits.sig.peek().litValue.toString(2)
+              val expActualraw = dut.output.bits.exp.peek().litValue.toString(2)
+              val expActual = (Seq.fill(8 - expActualraw.length)("0").mkString("") + expActualraw)
+
+              if(sigExpect != sigActual ){
+                println(oprandFloat.toString + ".sqrtx = " + x.toString)
+                println("input = " + circuitInput)
+                println("expect reult = " + xFloat)
+                println("sig_expect = "+ sigExpect)
+                println("sig_actual = "+ sigActual)
+
+                utest.assert(sigExpect  == sigActual)
+              }
+
+              if (expActual != expExpect) {
+                println(oprandFloat.toString + ".sqrtx = " + x.toString)
+                println("input = "+circuitInput)
+                println("expect reult = "+ xFloat)
+                println("exp_expect = " + expExpect)
+                println("exp_actual = " + expActual)
+                utest.assert(expActual ==expExpect)
+              }
+
+
+
+            } else
+              dut.clock.step()
+          }
+          utest.assert(flag)
+        }
+      }
+
+      for (i <- 1 to 100) {
+        testcase()
+      }
+
+    }
+  }
+}
\ No newline at end of file
diff --git a/arithmetic/tests/src/sqrt/SquareRootTester.scala b/arithmetic/tests/src/sqrt/SquareRootTester.scala
index 4b958ce..89559a0 100644
--- a/arithmetic/tests/src/sqrt/SquareRootTester.scala
+++ b/arithmetic/tests/src/sqrt/SquareRootTester.scala
@@ -12,9 +12,10 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
       def testcase(): Unit = {
         val oprandFloat:  Float = (0.25 + Random.nextFloat() * 3/4).toFloat
         val oprandDouble: Double = oprandFloat.toDouble
-        val oprandFloatRawString = java.lang.Float.floatToIntBits(oprandFloat).toBinaryString
-        val oprandSigString = (Seq.fill(32 - oprandFloatRawString.length)("0").mkString("") + oprandFloatRawString)
-          .substring(9, 32)
+
+        val oprandString = java.lang.Float.floatToIntBits(oprandFloat).toBinaryString
+        val oprandRawString = Seq.fill(32 - oprandString.length)("0").mkString("") + oprandString
+        val oprandSigString = oprandRawString.substring(9, 32)
 
         val inputFloatString = if(oprandFloat<0.5)"b01" + oprandSigString + "0"  else "b1" + oprandSigString + "00"
 
@@ -25,7 +26,7 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
 
         // test
         testCircuit(
-          new SquareRoot(2, 2, 26, 28),
+          new SquareRoot(2, 2, 26, 26),
           Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
         ) { dut: SquareRoot =>
           dut.clock.setTimeout(0)

From 126fa7146c2877c23ad779c7d040019fb24c22b1 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 7 Aug 2023 11:25:11 +0800
Subject: [PATCH 011/109] [sqrtfloat] add sqrtfloattester

---
 arithmetic/src/sqrt/SquareRoot.scala             |  1 +
 arithmetic/src/sqrt/SquareRootIO.scala           |  1 +
 arithmetic/tests/src/float/SqrtFloatTester.scala | 16 +++++++++-------
 arithmetic/tests/src/sqrt/SquareRootTester.scala |  6 +++---
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arithmetic/src/sqrt/SquareRoot.scala b/arithmetic/src/sqrt/SquareRoot.scala
index c33a9f9..fb3af4e 100644
--- a/arithmetic/src/sqrt/SquareRoot.scala
+++ b/arithmetic/src/sqrt/SquareRoot.scala
@@ -158,4 +158,5 @@ class SquareRoot(
   counterNext            := Mux(input.fire, 0.U, counter + 1.U)
 
   output.bits.result := Mux(needCorrect, resultMinusOne, resultOrigin)
+  output.bits.zeroRemainder := remainderFinal.orR
 }
diff --git a/arithmetic/src/sqrt/SquareRootIO.scala b/arithmetic/src/sqrt/SquareRootIO.scala
index 7ae94f6..fefd29e 100644
--- a/arithmetic/src/sqrt/SquareRootIO.scala
+++ b/arithmetic/src/sqrt/SquareRootIO.scala
@@ -10,6 +10,7 @@ class SquareRootInput(inputWidth: Int, outputWidth: Int) extends Bundle{
 /** 0.1**** = 0.resultOrigin */
 class SquareRootOutput(outputWidth: Int) extends Bundle{
   val result = UInt((outputWidth).W)
+  val zeroRemainder = Bool()
 }
 
 class QDSInput(rWidth: Int, partialDividerWidth: Int) extends Bundle {
diff --git a/arithmetic/tests/src/float/SqrtFloatTester.scala b/arithmetic/tests/src/float/SqrtFloatTester.scala
index 1797e73..c35fa3d 100644
--- a/arithmetic/tests/src/float/SqrtFloatTester.scala
+++ b/arithmetic/tests/src/float/SqrtFloatTester.scala
@@ -10,7 +10,7 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
     test("Sqrt Float FP32 should pass") {
       def testcase(): Unit = {
-        val oprandFloat:  Float = (Random.nextInt(100000)+Random.nextFloat() ).toFloat
+        val oprandFloat:  Float = (5.877471754111438e-39).toFloat
         val oprandDouble: Double = oprandFloat.toDouble
 
         val oprandString = java.lang.Float.floatToIntBits(oprandFloat).toBinaryString
@@ -53,9 +53,6 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
                 println(oprandFloat.toString + ".sqrtx = " + x.toString)
                 println("input = " + circuitInput)
                 println("expect reult = " + xFloat)
-                println("sig_expect = "+ sigExpect)
-                println("sig_actual = "+ sigActual)
-
                 utest.assert(sigExpect  == sigActual)
               }
 
@@ -63,11 +60,16 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
                 println(oprandFloat.toString + ".sqrtx = " + x.toString)
                 println("input = "+circuitInput)
                 println("expect reult = "+ xFloat)
-                println("exp_expect = " + expExpect)
-                println("exp_actual = " + expActual)
                 utest.assert(expActual ==expExpect)
               }
 
+              println(oprandFloat.toString + ".sqrtx = " + x.toString)
+              println("input = " + circuitInput)
+              println("expect reult = " + xFloat)
+              println("exp_expect = " + expExpect)
+              println("exp_actual = " + expActual)
+              println("sig_expect = " + sigExpect)
+              println("sig_actual = " + sigActual)
 
 
             } else
@@ -77,7 +79,7 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
         }
       }
 
-      for (i <- 1 to 100) {
+      for (i <- 1 to 1) {
         testcase()
       }
 
diff --git a/arithmetic/tests/src/sqrt/SquareRootTester.scala b/arithmetic/tests/src/sqrt/SquareRootTester.scala
index 89559a0..308d01f 100644
--- a/arithmetic/tests/src/sqrt/SquareRootTester.scala
+++ b/arithmetic/tests/src/sqrt/SquareRootTester.scala
@@ -52,9 +52,9 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
         }
       }
 
-      for (i <- 1 to 100) {
-        testcase()
-      }
+//      for (i <- 1 to 100) {
+//        testcase()
+//      }
 
     }
   }

From cbd53033c85e73ad6657aa0ce8918256ed5beae0 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 7 Aug 2023 13:41:44 +0800
Subject: [PATCH 012/109] [sqrtfloat] add rounding Unit

---
 arithmetic/src/float/RoundingUnit.scala       | 77 +++++++++++++++++++
 arithmetic/src/float/SqrtFloat.scala          |  7 +-
 .../tests/src/float/SqrtFloatTester.scala     | 46 ++++++-----
 3 files changed, 110 insertions(+), 20 deletions(-)
 create mode 100644 arithmetic/src/float/RoundingUnit.scala

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
new file mode 100644
index 0000000..5a163d5
--- /dev/null
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -0,0 +1,77 @@
+package float
+
+import chisel3._
+import chiseltest._
+import utest._
+
+import scala.util.Random
+import scala.math._
+
+/**
+  * input.rbits = 2bits + sticky bit
+  *
+  * */
+class RoundingUnit extends Module{
+  val input = IO(Input(new Bundle{
+//    val invalidExc = Bool() // overrides 'infiniteExc' and 'in'
+//    val infiniteExc = Bool() // overrides 'in' except for 'in.sign'
+    val sig = UInt(23.W)
+    val exp = UInt(8.W)
+    val rBits = UInt(3.W)
+    val sign = Bool()
+    val roundingMode = UInt(5.W)
+  }))
+  val output = IO(Output(new Bundle{
+    val data = UInt(32.W)
+    val exceptionFlags = Output(Bits(5.W))
+  }))
+
+  val roundingMode_near_even   = (input.roundingMode === consts.round_near_even)
+  val roundingMode_toZero      = (input.roundingMode === consts.round_minMag)
+  val roundingMode_min         = (input.roundingMode === consts.round_min)
+  val roundingMode_max         = (input.roundingMode === consts.round_max)
+  val roundingMode_near_maxMag = (input.roundingMode === consts.round_near_maxMag)
+
+  val sigPlus = Wire(UInt(23.W))
+  val expPlus = Wire(UInt(8.W))
+  val sigIncr = Wire(Bool())
+  val expIncr = Wire(Bool())
+
+  /** normal case */
+
+  /** todo later use Mux?*/
+  sigIncr := (roundingMode_near_even && input.rBits(2) && input.rBits(1,0).orR) ||
+    (roundingMode_min &&  input.sign && input.rBits.orR) ||
+    (roundingMode_max && !input.sign && input.rBits.orR) ||
+    (roundingMode_near_maxMag && input.rBits.orR)
+
+  sigPlus := input.sig + sigIncr
+
+  /** for sig = all 1 and sigIncr*/
+  expIncr := input.sig.andR && sigIncr
+  expPlus := input.exp + expIncr
+
+  val expOverflow = input.exp.andR && expIncr
+
+  val sigOut = Mux(sigIncr, sigPlus, input.sig)
+  val expOut = Mux(expIncr, expPlus, input.exp)
+
+  output.data := input.sign ## expOut ## sigOut
+  output.exceptionFlags := 0.U
+
+}
+
+object RoundingUnit {
+  def apply(sign: Bool, exp:UInt, sig: UInt, rbits:UInt, rmode: UInt): UInt = {
+
+    val rounder = Module(new RoundingUnit)
+    rounder.input.sign := sign
+    rounder.input.sig := sig
+    rounder.input.exp := exp
+    rounder.input.rBits := rbits
+    rounder.input.roundingMode := rmode
+    rounder.output.data
+  }
+
+}
+
diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
index 6a51492..6adc35d 100644
--- a/arithmetic/src/float/SqrtFloat.scala
+++ b/arithmetic/src/float/SqrtFloat.scala
@@ -32,8 +32,11 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
   SqrtModule.input.bits.operand := fractIn
   SqrtModule.output.ready := output.ready
 
+  val rbits = SqrtModule.output.bits.result(1,0) ## (!SqrtModule.output.bits.zeroRemainder)
+  val sigRound = SqrtModule.output.bits.result(24,2)
+
   input.ready := SqrtModule.input.ready
-  output.bits.result := Cat(0.U(1.W), expOut, SqrtModule.output.bits.result(24,0))
+  output.bits.result := RoundingUnit(input.bits.oprand(expWidth + sigWidth-1) ,expOut,sigRound,rbits,consts.round_near_even)
   output.bits.sig := SqrtModule.output.bits.result
   output.bits.exp := expOut
   output.valid := SqrtModule.output.valid
@@ -48,7 +51,7 @@ class FloatSqrtInput(expWidth: Int, sigWidth: Int) extends Bundle() {
 
 /** add 2 for rounding*/
 class FloatSqrtOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
-  val result = UInt((expWidth + sigWidth + 2).W)
+  val result = UInt((expWidth + sigWidth).W)
   val sig = UInt((sigWidth+2).W)
   val exp = UInt(expWidth.W)
 
diff --git a/arithmetic/tests/src/float/SqrtFloatTester.scala b/arithmetic/tests/src/float/SqrtFloatTester.scala
index c35fa3d..74ca6f2 100644
--- a/arithmetic/tests/src/float/SqrtFloatTester.scala
+++ b/arithmetic/tests/src/float/SqrtFloatTester.scala
@@ -10,6 +10,7 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
     test("Sqrt Float FP32 should pass") {
       def testcase(): Unit = {
+        def extendTofull(input:String, width:Int) =(Seq.fill(width - input.length)("0").mkString("") + input)
         val oprandFloat:  Float = (5.877471754111438e-39).toFloat
         val oprandDouble: Double = oprandFloat.toDouble
 
@@ -20,11 +21,14 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
         val ExepctFracIn = if(oprandFloat<0.5)"b01" + oprandSigString + "0"  else "b1" + oprandSigString + "00"
         val circuitInput = "b"+ oprandRawString
 
+
+
         val x = sqrt(oprandDouble)
+        x.toFloat.round
         val xDoublestring = java.lang.Double.doubleToLongBits(x).toBinaryString
         val xFloatstring = java.lang.Float.floatToIntBits(x.toFloat).toBinaryString
-        val xDouble = (Seq.fill(64 - xDoublestring.length)("0").mkString("") + xDoublestring)
-        val xFloat = (Seq.fill(32 - xFloatstring.length)("0").mkString("") + xFloatstring)
+        val xDouble = extendTofull(xDoublestring,64)
+        val xFloat = extendTofull(xFloatstring,32)
         // 0.xxxxxx,   hidden 1+23bits + 2bits for round
         val sigExpect =   "1"+xDouble.substring(12, 37)
         // todo:
@@ -44,32 +48,38 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
           for (i <- 0 to 1000 if !flag) {
             if (dut.output.valid.peek().litValue == 1) {
               flag = true
-              val resultActual = dut.output.bits.result.peek().litValue.toString(2)
+              val resultActual = extendTofull(dut.output.bits.result.peek().litValue.toString(2),32)
               val sigActual = dut.output.bits.sig.peek().litValue.toString(2)
-              val expActualraw = dut.output.bits.exp.peek().litValue.toString(2)
-              val expActual = (Seq.fill(8 - expActualraw.length)("0").mkString("") + expActualraw)
+              val expActual = extendTofull(dut.output.bits.exp.peek().litValue.toString(2),8)
 
-              if(sigExpect != sigActual ){
+              def printValue() :Unit = {
                 println(oprandFloat.toString + ".sqrtx = " + x.toString)
                 println("input = " + circuitInput)
-                println("expect reult = " + xFloat)
+                println("exp_expect = " + expExpect)
+                println("exp_actual = " + expActual)
+                println("sig_expect = " + sigExpect)
+                println("sig_actual = " + sigActual)
+                println("result_expect = " + xFloat)
+                println("result_actual = " + resultActual)
+              }
+
+
+              if(sigExpect != sigActual ){
+                printValue()
                 utest.assert(sigExpect  == sigActual)
               }
 
               if (expActual != expExpect) {
-                println(oprandFloat.toString + ".sqrtx = " + x.toString)
-                println("input = "+circuitInput)
-                println("expect reult = "+ xFloat)
+                printValue()
                 utest.assert(expActual ==expExpect)
               }
 
-              println(oprandFloat.toString + ".sqrtx = " + x.toString)
-              println("input = " + circuitInput)
-              println("expect reult = " + xFloat)
-              println("exp_expect = " + expExpect)
-              println("exp_actual = " + expActual)
-              println("sig_expect = " + sigExpect)
-              println("sig_actual = " + sigActual)
+              if(resultActual != xFloat) {
+                printValue()
+                utest.assert(resultActual == xFloat)
+              }
+
+
 
 
             } else
@@ -79,7 +89,7 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
         }
       }
 
-      for (i <- 1 to 1) {
+      for (i <- 1 to 100) {
         testcase()
       }
 

From f1cf890a105fb2967d78404b9087c6828584326d Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 7 Aug 2023 13:49:29 +0800
Subject: [PATCH 013/109] [sqrtfloat] add rounding Unit

---
 arithmetic/tests/src/float/SqrtFloatTester.scala | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arithmetic/tests/src/float/SqrtFloatTester.scala b/arithmetic/tests/src/float/SqrtFloatTester.scala
index 74ca6f2..9ac92f4 100644
--- a/arithmetic/tests/src/float/SqrtFloatTester.scala
+++ b/arithmetic/tests/src/float/SqrtFloatTester.scala
@@ -14,12 +14,11 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
         val oprandFloat:  Float = (5.877471754111438e-39).toFloat
         val oprandDouble: Double = oprandFloat.toDouble
 
-        val oprandString = java.lang.Float.floatToIntBits(oprandFloat).toBinaryString
-        val oprandRawString = Seq.fill(32 - oprandString.length)("0").mkString("") + oprandString
-        val oprandSigString = oprandRawString.substring(9, 32)
+        val oprandString = extendTofull(java.lang.Float.floatToIntBits(oprandFloat).toBinaryString,32)
+        val oprandSigString = oprandString.substring(9, 32)
 
         val ExepctFracIn = if(oprandFloat<0.5)"b01" + oprandSigString + "0"  else "b1" + oprandSigString + "00"
-        val circuitInput = "b"+ oprandRawString
+        val circuitInput = "b"+ oprandString
 
 
 

From 0d2cc84d7db5aa203b15ece4df6ad4698bfaf877 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 7 Aug 2023 16:16:30 +0800
Subject: [PATCH 014/109] [sqrtfloat] add exceptions in RoundingUnit

---
 arithmetic/src/float/RoundingUnit.scala       | 56 +++++++++++++++----
 arithmetic/src/float/SqrtFloat.scala          | 42 ++++++++++++--
 arithmetic/src/sqrt/SquareRoot.scala          |  2 +-
 .../tests/src/float/SqrtFloatTester.scala     |  2 +-
 4 files changed, 83 insertions(+), 19 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 5a163d5..6df4606 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -1,20 +1,21 @@
 package float
 
 import chisel3._
-import chiseltest._
-import utest._
+import chisel3.util._
 
-import scala.util.Random
-import scala.math._
 
 /**
   * input.rbits = 2bits + sticky bit
   *
+  * leave
+  *
+  * output is subnormal
+  *
   * */
 class RoundingUnit extends Module{
   val input = IO(Input(new Bundle{
-//    val invalidExc = Bool() // overrides 'infiniteExc' and 'in'
-//    val infiniteExc = Bool() // overrides 'in' except for 'in.sign'
+    val invalidExc = Bool() // overrides 'infiniteExc' and 'in'
+    val infiniteExc = Bool() // overrides 'in' except for 'in.sign'
     val sig = UInt(23.W)
     val exp = UInt(8.W)
     val rBits = UInt(3.W)
@@ -32,6 +33,21 @@ class RoundingUnit extends Module{
   val roundingMode_max         = (input.roundingMode === consts.round_max)
   val roundingMode_near_maxMag = (input.roundingMode === consts.round_near_maxMag)
 
+
+  val common_case = !(input.infiniteExc || input.invalidExc)
+  val common_overflow = Wire(Bool())
+  val common_inexact  = Wire(Bool())
+
+
+  // exception data with Spike
+
+  val invalidOut = "h7FC00000".U
+  /** Inf with sign  */
+  val infiniteOut = Cat(input.sign,"h7F800000".U)
+  val outSele1H = common_case ## input.infiniteExc ## input.invalidExc
+
+
+
   val sigPlus = Wire(UInt(23.W))
   val expPlus = Wire(UInt(8.W))
   val sigIncr = Wire(Bool())
@@ -51,18 +67,32 @@ class RoundingUnit extends Module{
   expIncr := input.sig.andR && sigIncr
   expPlus := input.exp + expIncr
 
-  val expOverflow = input.exp.andR && expIncr
+  common_overflow := input.exp.andR && expIncr
+  common_inexact := input.rBits.orR
+
+  val common_sigOut = Mux(sigIncr, sigPlus, input.sig)
+  val common_expOut = Mux(expIncr, expPlus, input.exp)
+
+  val common_out = Mux(common_overflow, infiniteOut, input.sign ## common_expOut ## common_sigOut)
+
+  output.data := Mux1H(Seq(
+    outSele1H(0) -> invalidOut,
+    outSele1H(1) -> infiniteOut,
+    outSele1H(2) -> common_out)
+  )
 
-  val sigOut = Mux(sigIncr, sigPlus, input.sig)
-  val expOut = Mux(expIncr, expPlus, input.exp)
+  val invalidOpration = input.invalidExc
+  val divideByzero = false.B
+  val overflow = common_case && common_overflow
+  val underflow = false.B
+  val inexact = overflow || (common_case && common_inexact)
 
-  output.data := input.sign ## expOut ## sigOut
-  output.exceptionFlags := 0.U
+  output.exceptionFlags := invalidOpration ## divideByzero ## overflow ## underflow ## inexact
 
 }
 
 object RoundingUnit {
-  def apply(sign: Bool, exp:UInt, sig: UInt, rbits:UInt, rmode: UInt): UInt = {
+  def apply(sign: Bool, exp:UInt, sig: UInt, rbits:UInt, rmode: UInt,invalidExc:Bool, infiniteExc:Bool): UInt = {
 
     val rounder = Module(new RoundingUnit)
     rounder.input.sign := sign
@@ -70,6 +100,8 @@ object RoundingUnit {
     rounder.input.exp := exp
     rounder.input.rBits := rbits
     rounder.input.roundingMode := rmode
+    rounder.input.invalidExc := invalidExc
+    rounder.input.infiniteExc := infiniteExc
     rounder.output.data
   }
 
diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
index 6adc35d..5b3f5be 100644
--- a/arithmetic/src/float/SqrtFloat.scala
+++ b/arithmetic/src/float/SqrtFloat.scala
@@ -4,15 +4,39 @@ import chisel3._
 import chisel3.util._
 import sqrt._
 
+/**
+  *
+  * @todo Opt for zero
+  *       input is Subnormal!
+  *
+  * */
 class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
   val input = IO(Flipped(DecoupledIO(new FloatSqrtInput(expWidth, sigWidth))))
   val output = IO(DecoupledIO(new FloatSqrtOutput(expWidth, sigWidth)))
   val debug = IO(Output(new Bundle() {
     val fractIn = UInt(26.W)
   }))
+  val rawFloatIn = rawFloatFromFN(expWidth,sigWidth,input.bits.oprand)
+
+  /** Control path */
+  val isNegaZero = rawFloatIn.isZero && rawFloatIn.sign
+  val isPosiInf  = rawFloatIn.isInf  && rawFloatIn.sign
+
+  val fastWorking = RegInit(false.B)
+  val fastCase = Wire(Bool())
+
+  /** negative or NaN*/
+  val invalidExec = (rawFloatIn.sign && !isNegaZero) || rawFloatIn.isNaN
+  /** positive inf */
+  val infinitExec = isPosiInf
+
+  fastCase := invalidExec || infinitExec
+  fastWorking := input.fire && fastCase
+
+
 
   /** Data path */
-  val rawFloatIn = rawFloatFromFN(expWidth,sigWidth,input.bits.oprand)
+
   val adjustedExp = Cat(rawFloatIn.sExp(expWidth-1), rawFloatIn.sExp(expWidth-1, 0))
 
   /** {{{
@@ -28,18 +52,26 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
     Cat(rawFloatIn.sig(sigWidth-1, 0),0.U(2.W)))
 
   val SqrtModule = Module(new SquareRoot(2, 2, 26, 26))
-  SqrtModule.input.valid := input.valid
+  SqrtModule.input.valid := input.valid && !fastCase
   SqrtModule.input.bits.operand := fractIn
   SqrtModule.output.ready := output.ready
 
   val rbits = SqrtModule.output.bits.result(1,0) ## (!SqrtModule.output.bits.zeroRemainder)
-  val sigRound = SqrtModule.output.bits.result(24,2)
+  val sigforRound = SqrtModule.output.bits.result(24,2)
+
 
   input.ready := SqrtModule.input.ready
-  output.bits.result := RoundingUnit(input.bits.oprand(expWidth + sigWidth-1) ,expOut,sigRound,rbits,consts.round_near_even)
+  output.bits.result := RoundingUnit(
+    input.bits.oprand(expWidth + sigWidth-1) ,
+    expOut,
+    sigforRound,
+    rbits,
+    consts.round_near_even,
+    invalidExec,
+    infinitExec)
   output.bits.sig := SqrtModule.output.bits.result
   output.bits.exp := expOut
-  output.valid := SqrtModule.output.valid
+  output.valid := SqrtModule.output.valid || fastWorking
 
   debug.fractIn := fractIn
 
diff --git a/arithmetic/src/sqrt/SquareRoot.scala b/arithmetic/src/sqrt/SquareRoot.scala
index fb3af4e..d50a751 100644
--- a/arithmetic/src/sqrt/SquareRoot.scala
+++ b/arithmetic/src/sqrt/SquareRoot.scala
@@ -158,5 +158,5 @@ class SquareRoot(
   counterNext            := Mux(input.fire, 0.U, counter + 1.U)
 
   output.bits.result := Mux(needCorrect, resultMinusOne, resultOrigin)
-  output.bits.zeroRemainder := remainderFinal.orR
+  output.bits.zeroRemainder := !remainderFinal.orR
 }
diff --git a/arithmetic/tests/src/float/SqrtFloatTester.scala b/arithmetic/tests/src/float/SqrtFloatTester.scala
index 9ac92f4..c0e619f 100644
--- a/arithmetic/tests/src/float/SqrtFloatTester.scala
+++ b/arithmetic/tests/src/float/SqrtFloatTester.scala
@@ -11,7 +11,7 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
     test("Sqrt Float FP32 should pass") {
       def testcase(): Unit = {
         def extendTofull(input:String, width:Int) =(Seq.fill(width - input.length)("0").mkString("") + input)
-        val oprandFloat:  Float = (5.877471754111438e-39).toFloat
+        val oprandFloat:  Float = Random.nextInt(1000000)+Random.nextFloat()
         val oprandDouble: Double = oprandFloat.toDouble
 
         val oprandString = extendTofull(java.lang.Float.floatToIntBits(oprandFloat).toBinaryString,32)

From 7bd210919404a78531d174af92e618035e44945a Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 1 Aug 2023 17:47:14 +0800
Subject: [PATCH 015/109] format and doc

---
 arithmetic/src/division/srt/srt4/SRT4.scala   | 44 +++++++++++--------
 .../{SRT4Test.scala => SRT4IntegerTest.scala} | 17 +++----
 2 files changed, 34 insertions(+), 27 deletions(-)
 rename arithmetic/tests/src/division/srt/{SRT4Test.scala => SRT4IntegerTest.scala} (92%)

diff --git a/arithmetic/src/division/srt/srt4/SRT4.scala b/arithmetic/src/division/srt/srt4/SRT4.scala
index 96fbeed..684f45d 100644
--- a/arithmetic/src/division/srt/srt4/SRT4.scala
+++ b/arithmetic/src/division/srt/srt4/SRT4.scala
@@ -40,10 +40,11 @@ class SRT4(
   val xLen: Int = dividendWidth + radixLog2 + 1 + guardBitWidth
   val wLen: Int = xLen + radixLog2
   // IO
-  val input = IO(Flipped(DecoupledIO(new SRTInput(dividendWidth, dividerWidth, n, 2))))
+  val input  = IO(Flipped(DecoupledIO(new SRTInput(dividendWidth, dividerWidth, n, 2))))
   val output = IO(ValidIO(new SRTOutput(dividerWidth, dividendWidth)))
 
-  //rW[j]
+  // rW[j]
+  // rW[0] = x
   val partialReminderCarryNext, partialReminderSumNext = Wire(UInt(wLen.W))
   val quotientNext, quotientMinusOneNext = Wire(UInt(n.W))
   val dividerNext = Wire(UInt(dividerWidth.W))
@@ -55,19 +56,17 @@ class SRT4(
 
   // State
   // because we need a CSA to minimize the critical path
-  val partialReminderCarry = RegEnable(partialReminderCarryNext, 0.U(wLen.W), enable)
-  val partialReminderSum = RegEnable(partialReminderSumNext, 0.U(wLen.W), enable)
-  val divider = RegEnable(dividerNext, 0.U(dividerWidth.W), enable)
-  val quotient = RegEnable(quotientNext, 0.U(n.W), enable)
-  val quotientMinusOne = RegEnable(quotientMinusOneNext, 0.U(n.W), enable)
-  val counter = RegEnable(counterNext, 0.U(log2Ceil(n).W), enable)
-
+  val partialReminderCarry = RegEnable(partialReminderCarryNext, 0.U(wLen.W),         enable)
+  val partialReminderSum   = RegEnable(partialReminderSumNext,   0.U(wLen.W),         enable)
+  val divider              = RegEnable(dividerNext,              0.U(dividerWidth.W), enable)
+  val quotient             = RegEnable(quotientNext,             0.U(n.W),            enable)
+  val quotientMinusOne     = RegEnable(quotientMinusOneNext,     0.U(n.W),            enable)
+  val counter              = RegEnable(counterNext,              0.U(log2Ceil(n).W),  enable)
+
+  /** Contrl logic */
   val occupiedNext = Wire(Bool())
   val occupied = RegNext(occupiedNext, false.B)
   occupiedNext := input.fire || (!isLastCycle && occupied)
-
-  //  Datapath
-  //  according two adders
   isLastCycle := !counter.orR
   output.valid := occupied && isLastCycle
   input.ready := !occupied
@@ -94,10 +93,17 @@ class SRT4(
     case 3 => 6
   }
 
-  /** QDS module whose output needs to be decoded */
+  /** Quotient-Divisor-Select
+    *
+    * input0 = rtz partialSum
+    * input1 = rtz partialCarry
+    * input2 = rtz divisor
+    *
+    * use dividerNext instead of dividerReg to avoid waiting in  firt Iter
+    * */
   val selectedQuotientOH: UInt =
     QDS(rWidth, ohWidth, dTruncateWidth - 1, tables, a)(
-      leftShift(partialReminderSum, radixLog2).head(rWidth),
+      leftShift(partialReminderSum,   radixLog2).head(rWidth),
       leftShift(partialReminderCarry, radixLog2).head(rWidth),
       dividerNext.head(dTruncateWidth)(dTruncateWidth - 2, 0) //.1********* -> 1*** -> ***
     )
@@ -158,10 +164,10 @@ class SRT4(
       )
     }
 
-  dividerNext := Mux(input.fire, input.bits.divider, divider)
-  counterNext := Mux(input.fire, input.bits.counter, counter - 1.U)
-  quotientNext := Mux(input.fire, 0.U, otf(0))
-  quotientMinusOneNext := Mux(input.fire, 0.U, otf(1))
-  partialReminderSumNext := Mux(input.fire, input.bits.dividend, csa(1) << radixLog2)
+  dividerNext              := Mux(input.fire, input.bits.divider, divider)
+  counterNext              := Mux(input.fire, input.bits.counter, counter - 1.U)
+  quotientNext             := Mux(input.fire, 0.U, otf(0))
+  quotientMinusOneNext     := Mux(input.fire, 0.U, otf(1))
+  partialReminderSumNext   := Mux(input.fire, input.bits.dividend, csa(1) << radixLog2)
   partialReminderCarryNext := Mux(input.fire, 0.U, csa(0) << 1 + radixLog2)
 }
diff --git a/arithmetic/tests/src/division/srt/SRT4Test.scala b/arithmetic/tests/src/division/srt/SRT4IntegerTest.scala
similarity index 92%
rename from arithmetic/tests/src/division/srt/SRT4Test.scala
rename to arithmetic/tests/src/division/srt/SRT4IntegerTest.scala
index 66b9c36..a81e4c0 100644
--- a/arithmetic/tests/src/division/srt/SRT4Test.scala
+++ b/arithmetic/tests/src/division/srt/SRT4IntegerTest.scala
@@ -5,9 +5,9 @@ import chiseltest._
 import utest._
 import scala.util.{Random}
 
-object SRT4Test extends TestSuite with ChiselUtestTester {
+object SRT4IntegerTest extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
-    test("SRT4 should pass") {
+    test("SRT4 integer should pass") {
       def testcase(width: Int, x: Int, d: Int): Unit = {
         // parameters
         val radixLog2: Int = 2
@@ -17,7 +17,7 @@ object SRT4Test extends TestSuite with ChiselUtestTester {
         val q:         Int = Random.nextInt(m)
 //        val dividend: BigInt = BigInt("fffffff0", 16) + x
         val dividend: BigInt = x
-        val divisor: BigInt = d
+        val divisor:  BigInt = d
         def zeroCheck(x: BigInt): Int = {
           var flag = false
           var a: Int = m
@@ -101,11 +101,12 @@ object SRT4Test extends TestSuite with ChiselUtestTester {
 //        }
 //      }
 
-            for (i <- 2 to 15) {
-              for (j <- 1 to i-1) {
-                testcase(4, i, j)
-              }
-            }
+//            for (i <- 2 to 15) {
+//              for (j <- 1 to i-1) {
+//                testcase(4, i, j)
+//              }
+//            }
+      testcase(4, 15, 1)
 
     }
   }

From aa2f402b4650dafcbae3945e62f64e5753e8c921 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 1 Aug 2023 19:17:15 +0800
Subject: [PATCH 016/109] decrease Reg Width for partialSum and Carry by 2

---
 arithmetic/src/division/srt/srt16/SRT16.scala | 27 +++----
 arithmetic/src/division/srt/srt4/SRT4.scala   | 81 +++++++++----------
 arithmetic/src/division/srt/srt8/SRT8.scala   | 47 ++++++-----
 3 files changed, 73 insertions(+), 82 deletions(-)

diff --git a/arithmetic/src/division/srt/srt16/SRT16.scala b/arithmetic/src/division/srt/srt16/SRT16.scala
index 9976085..da1864a 100644
--- a/arithmetic/src/division/srt/srt16/SRT16.scala
+++ b/arithmetic/src/division/srt/srt16/SRT16.scala
@@ -20,7 +20,6 @@ class SRT16(
     extends Module {
   val guardBitWidth = 3
   val xLen:    Int = dividendWidth + radixLog2 + 1 + guardBitWidth
-  val wLen:    Int = xLen + radixLog2
   val ohWidth: Int = 2 * a + 1
   val rWidth:  Int = 1 + radixLog2 + rTruncateWidth
 
@@ -28,7 +27,7 @@ class SRT16(
   val input = IO(Flipped(DecoupledIO(new SRTInput(dividendWidth, dividerWidth, n, 4))))
   val output = IO(ValidIO(new SRTOutput(dividerWidth, dividendWidth)))
 
-  val partialReminderCarryNext, partialReminderSumNext = Wire(UInt(wLen.W))
+  val partialReminderCarryNext, partialReminderSumNext = Wire(UInt(xLen.W))
   val dividerNext = Wire(UInt(dividerWidth.W))
   val counterNext = Wire(UInt(log2Ceil(n).W))
   val quotientNext, quotientMinusOneNext = Wire(UInt(n.W))
@@ -37,8 +36,8 @@ class SRT16(
   val isLastCycle, enable: Bool = Wire(Bool())
   // State
   // because we need a CSA to minimize the critical path
-  val partialReminderCarry = RegEnable(partialReminderCarryNext, 0.U(wLen.W), enable)
-  val partialReminderSum = RegEnable(partialReminderSumNext, 0.U(wLen.W), enable)
+  val partialReminderCarry = RegEnable(partialReminderCarryNext, 0.U(xLen.W), enable)
+  val partialReminderSum = RegEnable(partialReminderSumNext, 0.U(xLen.W), enable)
   val divider = RegEnable(dividerNext, 0.U(dividerWidth.W), enable)
   val quotient = RegEnable(quotientNext, 0.U(n.W), enable)
   val quotientMinusOne = RegEnable(quotientMinusOneNext, 0.U(n.W), enable)
@@ -59,9 +58,9 @@ class SRT16(
   val remainderNoCorrect: UInt = partialReminderSum + partialReminderCarry
   val remainderCorrect: UInt =
     partialReminderSum + partialReminderCarry + (divisorExtended << radixLog2)
-  val needCorrect: Bool = remainderNoCorrect(wLen - 3).asBool
+  val needCorrect: Bool = remainderNoCorrect(xLen - 1).asBool
 
-  output.bits.reminder := Mux(needCorrect, remainderCorrect, remainderNoCorrect)(wLen - 4, radixLog2 + guardBitWidth)
+  output.bits.reminder := Mux(needCorrect, remainderCorrect, remainderNoCorrect)(xLen - 2, radixLog2 + guardBitWidth)
   output.bits.quotient := Mux(needCorrect, quotientMinusOne, quotient)
 
   // 5*CSA32  SRT16 <- SRT4 + SRT4*5 /SRT16 -> CSA53+CSA32
@@ -73,8 +72,8 @@ class SRT16(
     case 2  => Fill(radixLog2, 1.U(1.W)) ## ~(divisorExtended << 1)
   })
   val csa0InWidth = rWidth + radixLog2 + 1
-  val csaIn1 = leftShift(partialReminderSum, radixLog2).head(csa0InWidth)
-  val csaIn2 = leftShift(partialReminderCarry, radixLog2).head(csa0InWidth)
+  val csaIn1 = partialReminderSum.head(csa0InWidth)
+  val csaIn2 = partialReminderCarry.head(csa0InWidth)
 
   val csa1 = addition.csa.c32(VecInit(csaIn1, csaIn2, dividerMap(0).head(csa0InWidth))) // -2  csain 10bit
   val csa2 = addition.csa.c32(VecInit(csaIn1, csaIn2, dividerMap(1).head(csa0InWidth))) // -1
@@ -87,8 +86,8 @@ class SRT16(
   val partialDivider: UInt = dividerNext.head(dTruncateWidth)(dTruncateWidth - 2, 0)
   val qdsOH0: UInt =
     QDS(rWidth, ohWidth, dTruncateWidth - 1, tables)(
-      leftShift(partialReminderSum, radixLog2).head(rWidth),
-      leftShift(partialReminderCarry, radixLog2).head(rWidth),
+      partialReminderSum.head(rWidth),
+      partialReminderCarry.head(rWidth),
       partialDivider
     ) // q_j+1 oneHot
 
@@ -120,15 +119,15 @@ class SRT16(
 
   val csa0Out = addition.csa.c32(
     VecInit(
-      leftShift(partialReminderSum, radixLog2).head(wLen - radixLog2),
-      leftShift(partialReminderCarry, radixLog2).head(wLen - radixLog2 - 1) ## qds0sign,
+      partialReminderSum.head(xLen),
+      partialReminderCarry.head(xLen - 1) ## qds0sign,
       Mux1H(qdsOH0, dividerMap)
     )
   )
   val csa1Out = addition.csa.c32(
     VecInit(
-      leftShift(csa0Out(1), radixLog2).head(wLen - radixLog2),
-      leftShift(csa0Out(0), radixLog2 + 1).head(wLen - radixLog2 - 1) ## qds1sign,
+      leftShift(csa0Out(1), radixLog2).head(xLen),
+      leftShift(csa0Out(0), radixLog2 + 1).head(xLen - 1) ## qds1sign,
       Mux1H(qdsOH1, dividerMap)
     )
   )
diff --git a/arithmetic/src/division/srt/srt4/SRT4.scala b/arithmetic/src/division/srt/srt4/SRT4.scala
index 684f45d..af17ad5 100644
--- a/arithmetic/src/division/srt/srt4/SRT4.scala
+++ b/arithmetic/src/division/srt/srt4/SRT4.scala
@@ -26,26 +26,24 @@ import utils.leftShift
   * @param rTruncateWidth TruncateWidth for residual fractional part
   */
 class SRT4(
-  dividendWidth:  Int,
-  dividerWidth:   Int,
-  n:              Int, // the longest width
-  radixLog2:      Int = 2,
-  a:              Int = 2,
-  dTruncateWidth: Int = 4,
-  rTruncateWidth: Int = 4)
-    extends Module {
+            dividendWidth:  Int,
+            dividerWidth:   Int,
+            n:              Int, // the longest width
+            radixLog2:      Int = 2,
+            a:              Int = 2,
+            dTruncateWidth: Int = 4,
+            rTruncateWidth: Int = 4)
+  extends Module {
   val guardBitWidth = 1
 
   /** width for csa */
   val xLen: Int = dividendWidth + radixLog2 + 1 + guardBitWidth
-  val wLen: Int = xLen + radixLog2
   // IO
-  val input  = IO(Flipped(DecoupledIO(new SRTInput(dividendWidth, dividerWidth, n, 2))))
+  val input = IO(Flipped(DecoupledIO(new SRTInput(dividendWidth, dividerWidth, n, 2))))
   val output = IO(ValidIO(new SRTOutput(dividerWidth, dividendWidth)))
 
-  // rW[j]
-  // rW[0] = x
-  val partialReminderCarryNext, partialReminderSumNext = Wire(UInt(wLen.W))
+  //rW[j]
+  val partialReminderCarryNext, partialReminderSumNext = Wire(UInt(xLen.W))
   val quotientNext, quotientMinusOneNext = Wire(UInt(n.W))
   val dividerNext = Wire(UInt(dividerWidth.W))
   val counterNext = Wire(UInt(log2Ceil(n).W))
@@ -56,17 +54,19 @@ class SRT4(
 
   // State
   // because we need a CSA to minimize the critical path
-  val partialReminderCarry = RegEnable(partialReminderCarryNext, 0.U(wLen.W),         enable)
-  val partialReminderSum   = RegEnable(partialReminderSumNext,   0.U(wLen.W),         enable)
-  val divider              = RegEnable(dividerNext,              0.U(dividerWidth.W), enable)
-  val quotient             = RegEnable(quotientNext,             0.U(n.W),            enable)
-  val quotientMinusOne     = RegEnable(quotientMinusOneNext,     0.U(n.W),            enable)
-  val counter              = RegEnable(counterNext,              0.U(log2Ceil(n).W),  enable)
-
-  /** Contrl logic */
+  val partialReminderCarry = RegEnable(partialReminderCarryNext, 0.U(xLen.W), enable)
+  val partialReminderSum = RegEnable(partialReminderSumNext, 0.U(xLen.W), enable)
+  val divider = RegEnable(dividerNext, 0.U(dividerWidth.W), enable)
+  val quotient = RegEnable(quotientNext, 0.U(n.W), enable)
+  val quotientMinusOne = RegEnable(quotientMinusOneNext, 0.U(n.W), enable)
+  val counter = RegEnable(counterNext, 0.U(log2Ceil(n).W), enable)
+
   val occupiedNext = Wire(Bool())
   val occupied = RegNext(occupiedNext, false.B)
   occupiedNext := input.fire || (!isLastCycle && occupied)
+
+  //  Datapath
+  //  according two adders
   isLastCycle := !counter.orR
   output.valid := occupied && isLastCycle
   input.ready := !occupied
@@ -79,9 +79,9 @@ class SRT4(
   /** partialReminderSum is r*W[j], so remainderCorrect = remainderNoCorrect + r*divisor */
   val remainderCorrect: UInt =
     partialReminderSum + partialReminderCarry + (divisorExtended << radixLog2)
-  val needCorrect: Bool = remainderNoCorrect(wLen - 3).asBool
+  val needCorrect: Bool = remainderNoCorrect(xLen - 1).asBool
 
-  output.bits.reminder := Mux(needCorrect, remainderCorrect, remainderNoCorrect)(wLen - 4, radixLog2 + guardBitWidth)
+  output.bits.reminder := Mux(needCorrect, remainderCorrect, remainderNoCorrect)(xLen - 2, radixLog2 + guardBitWidth)
   output.bits.quotient := Mux(needCorrect, quotientMinusOne, quotient)
 
   /** width for truncated y */
@@ -93,18 +93,11 @@ class SRT4(
     case 3 => 6
   }
 
-  /** Quotient-Divisor-Select
-    *
-    * input0 = rtz partialSum
-    * input1 = rtz partialCarry
-    * input2 = rtz divisor
-    *
-    * use dividerNext instead of dividerReg to avoid waiting in  firt Iter
-    * */
+  /** QDS module whose output needs to be decoded */
   val selectedQuotientOH: UInt =
     QDS(rWidth, ohWidth, dTruncateWidth - 1, tables, a)(
-      leftShift(partialReminderSum,   radixLog2).head(rWidth),
-      leftShift(partialReminderCarry, radixLog2).head(rWidth),
+      partialReminderSum.head(rWidth),
+      partialReminderCarry.head(rWidth),
       dividerNext.head(dTruncateWidth)(dTruncateWidth - 2, 0) //.1********* -> 1*** -> ***
     )
   // On-The-Fly conversion
@@ -126,8 +119,8 @@ class SRT4(
 
       addition.csa.c32(
         VecInit(
-          leftShift(partialReminderSum, radixLog2).head(wLen - radixLog2),
-          leftShift(partialReminderCarry, radixLog2).head(wLen - radixLog2 - 1) ## qdsSign,
+          partialReminderSum.head(xLen),
+          partialReminderCarry.head(xLen - 1) ## qdsSign,
           Mux1H(selectedQuotientOH, dividerMap)
         )
       )
@@ -150,24 +143,24 @@ class SRT4(
       })
       val csa0 = addition.csa.c32(
         VecInit(
-          leftShift(partialReminderSum, radixLog2).head(wLen - radixLog2),
-          leftShift(partialReminderCarry, radixLog2).head(wLen - radixLog2 - 1) ## qds0Sign,
+          partialReminderSum.head(xLen),
+          partialReminderCarry.head(xLen - 1) ## qds0Sign,
           Mux1H(qHigh, dividerHMap)
         )
       )
       addition.csa.c32(
         VecInit(
-          csa0(1).head(wLen - radixLog2),
-          leftShift(csa0(0), 1).head(wLen - radixLog2 - 1) ## qds1Sign,
+          csa0(1).head(xLen),
+          leftShift(csa0(0), 1).head(xLen - 1) ## qds1Sign,
           Mux1H(qLow, dividerLMap)
         )
       )
     }
 
-  dividerNext              := Mux(input.fire, input.bits.divider, divider)
-  counterNext              := Mux(input.fire, input.bits.counter, counter - 1.U)
-  quotientNext             := Mux(input.fire, 0.U, otf(0))
-  quotientMinusOneNext     := Mux(input.fire, 0.U, otf(1))
-  partialReminderSumNext   := Mux(input.fire, input.bits.dividend, csa(1) << radixLog2)
+  dividerNext := Mux(input.fire, input.bits.divider, divider)
+  counterNext := Mux(input.fire, input.bits.counter, counter - 1.U)
+  quotientNext := Mux(input.fire, 0.U, otf(0))
+  quotientMinusOneNext := Mux(input.fire, 0.U, otf(1))
+  partialReminderSumNext := Mux(input.fire, input.bits.dividend, csa(1) << radixLog2)
   partialReminderCarryNext := Mux(input.fire, 0.U, csa(0) << 1 + radixLog2)
 }
diff --git a/arithmetic/src/division/srt/srt8/SRT8.scala b/arithmetic/src/division/srt/srt8/SRT8.scala
index 87b987b..162d99d 100644
--- a/arithmetic/src/division/srt/srt8/SRT8.scala
+++ b/arithmetic/src/division/srt/srt8/SRT8.scala
@@ -29,13 +29,12 @@ class SRT8(
 
   val guardBitWidth = 2
   val xLen: Int = dividendWidth + radixLog2 + 1 + guardBitWidth
-  val wLen: Int = xLen + radixLog2
 
   // IO
   val input = IO(Flipped(DecoupledIO(new SRTInput(dividendWidth, dividerWidth, n, 3))))
   val output = IO(ValidIO(new SRTOutput(dividerWidth, dividendWidth)))
 
-  val partialReminderCarryNext, partialReminderSumNext = Wire(UInt(wLen.W))
+  val partialReminderCarryNext, partialReminderSumNext = Wire(UInt(xLen.W))
   val quotientNext, quotientMinusOneNext = Wire(UInt(n.W))
   val dividerNext = Wire(UInt(dividerWidth.W))
   val counterNext = Wire(UInt(log2Ceil(n).W))
@@ -47,8 +46,8 @@ class SRT8(
 
   // State
   // because we need a CSA to minimize the critical path
-  val partialReminderCarry = RegEnable(partialReminderCarryNext, 0.U(wLen.W), enable)
-  val partialReminderSum = RegEnable(partialReminderSumNext, 0.U(wLen.W), enable)
+  val partialReminderCarry = RegEnable(partialReminderCarryNext, 0.U(xLen.W), enable)
+  val partialReminderSum = RegEnable(partialReminderSumNext, 0.U(xLen.W), enable)
   val divider = RegEnable(dividerNext, 0.U(dividerWidth.W), enable)
   val quotient = RegEnable(quotientNext, 0.U(n.W), enable)
   val quotientMinusOne = RegEnable(quotientMinusOneNext, 0.U(n.W), enable)
@@ -69,8 +68,8 @@ class SRT8(
   val remainderNoCorrect: UInt = partialReminderSum + partialReminderCarry
   val remainderCorrect: UInt =
     partialReminderSum + partialReminderCarry + (divisorExtended << radixLog2)
-  val needCorrect: Bool = remainderNoCorrect(wLen - 4).asBool
-  output.bits.reminder := Mux(needCorrect, remainderCorrect, remainderNoCorrect)(wLen - 5, radixLog2 + guardBitWidth)
+  val needCorrect: Bool = remainderNoCorrect(xLen - 1).asBool
+  output.bits.reminder := Mux(needCorrect, remainderCorrect, remainderNoCorrect)(xLen - 2, radixLog2 + guardBitWidth)
   output.bits.quotient := Mux(needCorrect, quotientMinusOne, quotient)
 
   val rWidth: Int = 1 + radixLog2 + rTruncateWidth
@@ -85,8 +84,8 @@ class SRT8(
   // qds
   val selectedQuotientOH: UInt =
     QDS(rWidth, ohWidth, dTruncateWidth - 1, tables, a)(
-      leftShift(partialReminderSum, radixLog2).head(rWidth),
-      leftShift(partialReminderCarry, radixLog2).head(rWidth),
+      partialReminderSum.head(rWidth),
+      partialReminderCarry.head(rWidth),
       dividerNext.head(dTruncateWidth)(dTruncateWidth - 2, 0) //.1********* -> 1*** -> ***
     )
   // On-The-Fly conversion
@@ -115,15 +114,15 @@ class SRT8(
     })
     val csa0 = addition.csa.c32(
       VecInit(
-        leftShift(partialReminderSum, radixLog2).head(wLen - radixLog2),
-        leftShift(partialReminderCarry, radixLog2).head(wLen - radixLog2 - 1) ## qdsSign0,
+        partialReminderSum.head(xLen),
+        partialReminderCarry.head(xLen - 1) ## qdsSign0,
         Mux1H(qHigh, dividerHMap)
       )
     )
     val csa1 = addition.csa.c32(
       VecInit(
-        csa0(1).head(wLen - radixLog2),
-        leftShift(csa0(0), 1).head(wLen - radixLog2 - 1) ## qdsSign1,
+        csa0(1).head(xLen),
+        leftShift(csa0(0), 1).head(xLen - 1) ## qdsSign1,
         Mux1H(qLow, dividerLMap)
       )
     )
@@ -143,15 +142,15 @@ class SRT8(
     })
     val csa0 = addition.csa.c32(
       VecInit(
-        leftShift(partialReminderSum, radixLog2).head(wLen - radixLog2),
-        leftShift(partialReminderCarry, radixLog2).head(wLen - radixLog2 - 1) ## qdsSign0,
+        partialReminderSum.head(xLen),
+        partialReminderCarry.head(xLen - 1) ## qdsSign0,
         Mux1H(qHigh, dividerHMap)
       )
     )
     val csa1 = addition.csa.c32(
       VecInit(
-        csa0(1).head(wLen - radixLog2),
-        leftShift(csa0(0), 1).head(wLen - radixLog2 - 1) ## qdsSign1,
+        csa0(1).head(xLen),
+        leftShift(csa0(0), 1).head(xLen - 1) ## qdsSign1,
         Mux1H(qLow, dividerLMap)
       )
     )
@@ -171,15 +170,15 @@ class SRT8(
     })
     val csa0 = addition.csa.c32(
       VecInit(
-        leftShift(partialReminderSum, radixLog2).head(wLen - radixLog2),
-        leftShift(partialReminderCarry, radixLog2).head(wLen - radixLog2 - 1) ## qdsSign0,
+        partialReminderSum.head(xLen),
+        partialReminderCarry.head(xLen - 1) ## qdsSign0,
         Mux1H(qHigh, dividerHMap)
       )
     )
     val csa1 = addition.csa.c32(
       VecInit(
-        csa0(1).head(wLen - radixLog2),
-        leftShift(csa0(0), 1).head(wLen - radixLog2 - 1) ## qdsSign1,
+        csa0(1).head(xLen),
+        leftShift(csa0(0), 1).head(xLen - 1) ## qdsSign1,
         Mux1H(qLow, dividerLMap)
       )
     )
@@ -199,15 +198,15 @@ class SRT8(
     })
     val csa0 = addition.csa.c32(
       VecInit(
-        leftShift(partialReminderSum, radixLog2).head(wLen - radixLog2),
-        leftShift(partialReminderCarry, radixLog2).head(wLen - radixLog2 - 1) ## qdsSign0,
+        partialReminderSum.head(xLen),
+        partialReminderCarry.head(xLen - 1) ## qdsSign0,
         Mux1H(qHigh, dividerHMap)
       )
     )
     val csa1 = addition.csa.c32(
       VecInit(
-        csa0(1).head(wLen - radixLog2),
-        leftShift(csa0(0), 1).head(wLen - radixLog2 - 1) ## qdsSign1,
+        csa0(1).head(xLen),
+        leftShift(csa0(0), 1).head(xLen - 1) ## qdsSign1,
         Mux1H(qLow, dividerLMap)
       )
     )

From 3dc4c3277062d5ce7784a26a13ff8ea775027891 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 1 Aug 2023 19:23:12 +0800
Subject: [PATCH 017/109] rename SRT4/8Test to SRT4/8IntegerTest

---
 .../division/srt/{SRT16Test.scala => SRT16IntegerTest.scala}  | 4 ++--
 .../division/srt/{SRT8Test.scala => SRT8IntegerTest.scala}    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename arithmetic/tests/src/division/srt/{SRT16Test.scala => SRT16IntegerTest.scala} (97%)
 rename arithmetic/tests/src/division/srt/{SRT8Test.scala => SRT8IntegerTest.scala} (97%)

diff --git a/arithmetic/tests/src/division/srt/SRT16Test.scala b/arithmetic/tests/src/division/srt/SRT16IntegerTest.scala
similarity index 97%
rename from arithmetic/tests/src/division/srt/SRT16Test.scala
rename to arithmetic/tests/src/division/srt/SRT16IntegerTest.scala
index 006d6ae..2ceabee 100644
--- a/arithmetic/tests/src/division/srt/SRT16Test.scala
+++ b/arithmetic/tests/src/division/srt/SRT16IntegerTest.scala
@@ -6,9 +6,9 @@ import utest._
 
 import scala.util.Random
 
-object SRT16Test extends TestSuite with ChiselUtestTester {
+object SRT16IntegerTest extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
-    test("SRT16 should pass") {
+    test("SRT16 Integer should pass") {
       def testcase(width: Int, x: Int, d: Int): Unit = {
         // parameters
         val radixLog2: Int = 4
diff --git a/arithmetic/tests/src/division/srt/SRT8Test.scala b/arithmetic/tests/src/division/srt/SRT8IntegerTest.scala
similarity index 97%
rename from arithmetic/tests/src/division/srt/SRT8Test.scala
rename to arithmetic/tests/src/division/srt/SRT8IntegerTest.scala
index 6cbc08d..d099b68 100644
--- a/arithmetic/tests/src/division/srt/SRT8Test.scala
+++ b/arithmetic/tests/src/division/srt/SRT8IntegerTest.scala
@@ -6,9 +6,9 @@ import utest._
 
 import scala.util.Random
 
-object SRT8Test extends TestSuite with ChiselUtestTester {
+object SRT8IntegerTest extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
-    test("SRT8 should pass") {
+    test("SRT8 Integer should pass") {
       def testcase(width: Int, x: Int, d: Int): Unit = {
         // parameters
         val radixLog2: Int = 3

From 085e08903950a32eada7c227ab56b8902bc1bbdc Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 2 Aug 2023 10:38:02 +0800
Subject: [PATCH 018/109] add SRT4FracTest

---
 .../tests/src/division/srt/SRT4FracTest.scala | 77 +++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 arithmetic/tests/src/division/srt/SRT4FracTest.scala

diff --git a/arithmetic/tests/src/division/srt/SRT4FracTest.scala b/arithmetic/tests/src/division/srt/SRT4FracTest.scala
new file mode 100644
index 0000000..64a96e2
--- /dev/null
+++ b/arithmetic/tests/src/division/srt/SRT4FracTest.scala
@@ -0,0 +1,77 @@
+package division.srt.srt4
+
+import chisel3._
+import chiseltest._
+import utest._
+
+import scala.util.Random
+
+object SRT4FracTest extends TestSuite with ChiselUtestTester {
+  def tests: Tests = Tests {
+    test("SRT4 Fraction should pass") {
+      def testcase(width: Int, x: Int, d: Int): Unit = {
+        // parameters
+        val radixLog2: Int = 2
+        val n:         Int = width
+        val m:         Int = n - 1
+
+//        val dividend: BigInt = BigInt("fffffff0", 16) + x
+        val dividend: BigInt = 350 // 101011110
+        val divisor:  BigInt = 197 // 11000101
+        val counter = 14
+
+        // test
+        testCircuit(
+          new SRT4(n, n, n),
+          Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
+        ) { dut: SRT4 =>
+          dut.clock.setTimeout(0)
+          dut.input.valid.poke(true.B)
+          dut.input.bits.dividend.poke((dividend << 24).U)
+          dut.input.bits.divider.poke((divisor << 24).U)
+          dut.input.bits.counter.poke(counter.U)
+          dut.clock.step()
+          dut.input.valid.poke(false.B)
+          var flag = false
+          for (a <- 1 to 1000 if !flag) {
+            if (dut.output.valid.peek().litValue == 1) {
+              flag = true
+
+              def printvalue(): Unit = {
+
+                println(
+                  "%d / %d = %d --- %d".format(
+                    dividend,
+                    divisor,
+                    dut.output.bits.quotient.peek().litValue,
+                    dut.output.bits.reminder.peek().litValue
+                  )
+                )
+              }
+
+//              utest.assert(dut.output.bits.quotient.peek().litValue == quotient)
+//              utest.assert(dut.output.bits.reminder.peek().litValue >> zeroHeaddivisor == remainder)
+            }
+            dut.clock.step()
+          }
+          utest.assert(flag)
+          dut.clock.step(scala.util.Random.nextInt(10))
+        }
+      }
+
+//      for (i <- 0 to 15) {
+//        for (j <- 1 to 16) {
+//          testcase(32, i, j)
+//        }
+//      }
+
+//            for (i <- 2 to 15) {
+//              for (j <- 1 to i-1) {
+//                testcase(4, i, j)
+//              }
+//            }
+//      testcase(32, 15, 1)
+
+    }
+  }
+}

From 1e287118967123be501453a0eee9b97b7d60d634 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 7 Aug 2023 17:41:04 +0800
Subject: [PATCH 019/109] [div] add SRT16FracTester

---
 .../src/division/srt/SRT16FracTester.scala    | 92 +++++++++++++++++++
 .../tests/src/division/srt/SRT4FracTest.scala | 70 ++++++++------
 2 files changed, 133 insertions(+), 29 deletions(-)
 create mode 100644 arithmetic/tests/src/division/srt/SRT16FracTester.scala

diff --git a/arithmetic/tests/src/division/srt/SRT16FracTester.scala b/arithmetic/tests/src/division/srt/SRT16FracTester.scala
new file mode 100644
index 0000000..4d1e813
--- /dev/null
+++ b/arithmetic/tests/src/division/srt/SRT16FracTester.scala
@@ -0,0 +1,92 @@
+package division.srt.srt16
+
+import chisel3._
+import chiseltest._
+import utest._
+
+import scala.util.Random
+
+object SRT16FracTest extends TestSuite with ChiselUtestTester {
+  def tests: Tests = Tests {
+    test("SRT16 Fraction should pass") {
+      def testcase(width: Int): Unit = {
+        def extendTofull(input:String, width:Int) =(Seq.fill(width - input.length)("0").mkString("") + input)
+        val n:         Int = width
+
+        val xFloat = (0.5 + Random.nextFloat() /2).toFloat
+        val dFloat = (0.5 + Random.nextFloat() /2).toFloat
+
+        val xFloatString = extendTofull(java.lang.Float.floatToIntBits(xFloat).toBinaryString, 32)
+        val dFloatString = extendTofull(java.lang.Float.floatToIntBits(dFloat).toBinaryString, 32)
+        val xInput = "b1"+xFloatString.substring(9, 32)+"00000000"
+        val dInput = "b1"+dFloatString.substring(9, 32)+"00000000"
+
+
+        val counter = 8
+
+        val qDouble = xFloat / dFloat
+        val qDoubleString = extendTofull(java.lang.Double.doubleToLongBits(qDouble).toBinaryString,64)
+        val q_Expect = "1"+ qDoubleString.substring(12, 39)
+
+        // test
+        testCircuit(
+          new SRT16(n, n, n),
+          Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
+        ) { dut: SRT16 =>
+          dut.clock.setTimeout(0)
+          dut.input.valid.poke(true.B)
+          dut.input.bits.dividend.poke((xInput).U)
+          dut.input.bits.divider.poke((dInput).U)
+          dut.input.bits.counter.poke(counter.U)
+          dut.clock.step()
+          dut.input.valid.poke(false.B)
+          var flag = false
+          for (a <- 1 to 1000 if !flag) {
+            if (dut.output.valid.peek().litValue == 1) {
+              flag = true
+
+              val quotient_actual = extendTofull(dut.output.bits.quotient.peek().litValue.toString(2),32)
+
+              val q_Actual = if(quotient_actual(4).toString=="0") {
+                quotient_actual.substring(5,32)
+              } else {
+                quotient_actual.substring(4,32)
+              }
+
+
+              def printvalue(): Unit = {
+
+                println(xFloat.toString + "/ " + dFloat.toString + "="+ qDouble.toString)
+                println("xinput = " + xInput)
+                println("dinput = " + dInput)
+
+                println("all q = " + quotient_actual)
+                println("all q size ="+ quotient_actual.length.toString)
+
+                println("q_expect = " + q_Expect)
+                println("q_actual = " + q_Actual)
+
+              }
+              if(q_Expect != q_Actual){
+                printvalue()
+                utest.assert(q_Expect == q_Actual)
+              }
+
+
+
+            }
+            dut.clock.step()
+          }
+          utest.assert(flag)
+          dut.clock.step(scala.util.Random.nextInt(10))
+        }
+      }
+
+
+//      for (i <- 1 to 100) {
+//        testcase(32)
+//      }
+
+    }
+  }
+}
\ No newline at end of file
diff --git a/arithmetic/tests/src/division/srt/SRT4FracTest.scala b/arithmetic/tests/src/division/srt/SRT4FracTest.scala
index 64a96e2..4fbaeaf 100644
--- a/arithmetic/tests/src/division/srt/SRT4FracTest.scala
+++ b/arithmetic/tests/src/division/srt/SRT4FracTest.scala
@@ -9,17 +9,25 @@ import scala.util.Random
 object SRT4FracTest extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
     test("SRT4 Fraction should pass") {
-      def testcase(width: Int, x: Int, d: Int): Unit = {
-        // parameters
-        val radixLog2: Int = 2
+      def testcase(width: Int): Unit = {
+        def extendTofull(input:String, width:Int) =(Seq.fill(width - input.length)("0").mkString("") + input)
         val n:         Int = width
-        val m:         Int = n - 1
 
-//        val dividend: BigInt = BigInt("fffffff0", 16) + x
-        val dividend: BigInt = 350 // 101011110
-        val divisor:  BigInt = 197 // 11000101
+        val xFloat = (0.5 + Random.nextFloat() /2).toFloat
+        val dFloat = (0.5 + Random.nextFloat() /2).toFloat
+
+        val xFloatString = extendTofull(java.lang.Float.floatToIntBits(xFloat).toBinaryString, 32)
+        val dFloatString = extendTofull(java.lang.Float.floatToIntBits(dFloat).toBinaryString, 32)
+        val xInput = "b1"+xFloatString.substring(9, 32)+"00000000"
+        val dInput = "b1"+dFloatString.substring(9, 32)+"00000000"
+
+
         val counter = 14
 
+        val qDouble = xFloat / dFloat
+        val qDoubleString = extendTofull(java.lang.Double.doubleToLongBits(qDouble).toBinaryString,64)
+        val q_Expect = "1"+ qDoubleString.substring(12, 37)
+
         // test
         testCircuit(
           new SRT4(n, n, n),
@@ -27,8 +35,8 @@ object SRT4FracTest extends TestSuite with ChiselUtestTester {
         ) { dut: SRT4 =>
           dut.clock.setTimeout(0)
           dut.input.valid.poke(true.B)
-          dut.input.bits.dividend.poke((dividend << 24).U)
-          dut.input.bits.divider.poke((divisor << 24).U)
+          dut.input.bits.dividend.poke((xInput).U)
+          dut.input.bits.divider.poke((dInput).U)
           dut.input.bits.counter.poke(counter.U)
           dut.clock.step()
           dut.input.valid.poke(false.B)
@@ -36,21 +44,33 @@ object SRT4FracTest extends TestSuite with ChiselUtestTester {
           for (a <- 1 to 1000 if !flag) {
             if (dut.output.valid.peek().litValue == 1) {
               flag = true
+              val quotient_actual = extendTofull(dut.output.bits.quotient.peek().litValue.toString(2),32)
+              val q_Actual = if(quotient_actual(6)=="0") {
+                quotient_actual.substring(7,32)
+              } else {
+              quotient_actual.substring(6,32)
+            }
+
 
               def printvalue(): Unit = {
 
-                println(
-                  "%d / %d = %d --- %d".format(
-                    dividend,
-                    divisor,
-                    dut.output.bits.quotient.peek().litValue,
-                    dut.output.bits.reminder.peek().litValue
-                  )
-                )
+                println(xFloat.toString + "/ " + dFloat.toString + "="+ qDouble.toString)
+                println("xinput = " + xInput)
+                println("dinput = " + dInput)
+
+                println("all q = " + quotient_actual)
+
+                println("q_expect = " + q_Expect)
+                println("q_actual = " + q_Actual)
+
               }
+              if(q_Expect != q_Actual){
+                printvalue()
+                utest.assert(q_Expect == q_Actual)
+              }
+
+
 
-//              utest.assert(dut.output.bits.quotient.peek().litValue == quotient)
-//              utest.assert(dut.output.bits.reminder.peek().litValue >> zeroHeaddivisor == remainder)
             }
             dut.clock.step()
           }
@@ -59,18 +79,10 @@ object SRT4FracTest extends TestSuite with ChiselUtestTester {
         }
       }
 
-//      for (i <- 0 to 15) {
-//        for (j <- 1 to 16) {
-//          testcase(32, i, j)
-//        }
-//      }
 
-//            for (i <- 2 to 15) {
-//              for (j <- 1 to i-1) {
-//                testcase(4, i, j)
-//              }
+//            for (i <- 1 to 100) {
+//              testcase(32)
 //            }
-//      testcase(32, 15, 1)
 
     }
   }

From 966857a143afac8c7ab2e94e97cf112fbdc5f02a Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 7 Aug 2023 17:51:07 +0800
Subject: [PATCH 020/109] [sqrtfloat] reduce rbits to 2

---
 arithmetic/src/float/RoundingUnit.scala | 4 ++--
 arithmetic/src/float/SqrtFloat.scala    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 6df4606..75bd900 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -18,7 +18,7 @@ class RoundingUnit extends Module{
     val infiniteExc = Bool() // overrides 'in' except for 'in.sign'
     val sig = UInt(23.W)
     val exp = UInt(8.W)
-    val rBits = UInt(3.W)
+    val rBits = UInt(2.W)
     val sign = Bool()
     val roundingMode = UInt(5.W)
   }))
@@ -56,7 +56,7 @@ class RoundingUnit extends Module{
   /** normal case */
 
   /** todo later use Mux?*/
-  sigIncr := (roundingMode_near_even && input.rBits(2) && input.rBits(1,0).orR) ||
+  sigIncr := (roundingMode_near_even && input.rBits(1) && input.rBits(0)) ||
     (roundingMode_min &&  input.sign && input.rBits.orR) ||
     (roundingMode_max && !input.sign && input.rBits.orR) ||
     (roundingMode_near_maxMag && input.rBits.orR)
diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
index 5b3f5be..43a4225 100644
--- a/arithmetic/src/float/SqrtFloat.scala
+++ b/arithmetic/src/float/SqrtFloat.scala
@@ -56,7 +56,7 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
   SqrtModule.input.bits.operand := fractIn
   SqrtModule.output.ready := output.ready
 
-  val rbits = SqrtModule.output.bits.result(1,0) ## (!SqrtModule.output.bits.zeroRemainder)
+  val rbits = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
   val sigforRound = SqrtModule.output.bits.result(24,2)
 
 

From 39c0220df3d785f25839ee66f5ca8c7bf1ac9056 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 8 Aug 2023 15:48:05 +0800
Subject: [PATCH 021/109] [divfloat] move exp bias logic to RoundingUnit

---
 arithmetic/src/float/RoundingUnit.scala | 8 +++++---
 arithmetic/src/float/SqrtFloat.scala    | 9 ++-------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 75bd900..e04c641 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -49,9 +49,10 @@ class RoundingUnit extends Module{
 
 
   val sigPlus = Wire(UInt(23.W))
-  val expPlus = Wire(UInt(8.W))
+  val expBiasPlus = Wire(UInt(8.W))
   val sigIncr = Wire(Bool())
   val expIncr = Wire(Bool())
+  val expBiased = Wire(UInt(8.W))
 
   /** normal case */
 
@@ -65,13 +66,14 @@ class RoundingUnit extends Module{
 
   /** for sig = all 1 and sigIncr*/
   expIncr := input.sig.andR && sigIncr
-  expPlus := input.exp + expIncr
+  expBiased := input.exp + 127.U
+  expBiasPlus := expBiased + expIncr
 
   common_overflow := input.exp.andR && expIncr
   common_inexact := input.rBits.orR
 
   val common_sigOut = Mux(sigIncr, sigPlus, input.sig)
-  val common_expOut = Mux(expIncr, expPlus, input.exp)
+  val common_expOut = Mux(expIncr, expBiasPlus, expBiased)
 
   val common_out = Mux(common_overflow, infiniteOut, input.sign ## common_expOut ## common_sigOut)
 
diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
index 43a4225..4873b66 100644
--- a/arithmetic/src/float/SqrtFloat.scala
+++ b/arithmetic/src/float/SqrtFloat.scala
@@ -13,9 +13,6 @@ import sqrt._
 class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
   val input = IO(Flipped(DecoupledIO(new FloatSqrtInput(expWidth, sigWidth))))
   val output = IO(DecoupledIO(new FloatSqrtOutput(expWidth, sigWidth)))
-  val debug = IO(Output(new Bundle() {
-    val fractIn = UInt(26.W)
-  }))
   val rawFloatIn = rawFloatFromFN(expWidth,sigWidth,input.bits.oprand)
 
   /** Control path */
@@ -46,7 +43,7 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
     *}}}
     */
   val expOutNext = Wire(UInt(expWidth.W))
-  expOutNext := adjustedExp(expWidth,1) + 127.U
+  expOutNext := adjustedExp(expWidth,1)
   val expOut = RegEnable(expOutNext, 0.U(expWidth.W), input.fire)
   val fractIn = Mux(input.bits.oprand(sigWidth-1), Cat("b0".U(1.W),rawFloatIn.sig(sigWidth-1, 0),0.U(1.W)),
     Cat(rawFloatIn.sig(sigWidth-1, 0),0.U(2.W)))
@@ -70,11 +67,9 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
     invalidExec,
     infinitExec)
   output.bits.sig := SqrtModule.output.bits.result
-  output.bits.exp := expOut
+  output.bits.exp := output.bits.result(30,23)
   output.valid := SqrtModule.output.valid || fastWorking
 
-  debug.fractIn := fractIn
-
 }
 
 class FloatSqrtInput(expWidth: Int, sigWidth: Int) extends Bundle() {

From c51c5d23837f5a07186c6af996e41ac7f1838818 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 8 Aug 2023 13:10:20 +0800
Subject: [PATCH 022/109] [divfloat] add DivFloat and tester

---
 arithmetic/src/float/DivFloat.scala           | 110 ++++++++++++++++++
 .../src/division/srt/SRT16FracTester.scala    |  47 ++++++--
 .../tests/src/float/DivFloatTester.scala      | 110 ++++++++++++++++++
 .../tests/src/float/SqrtFloatTester.scala     |   6 +-
 4 files changed, 260 insertions(+), 13 deletions(-)
 create mode 100644 arithmetic/src/float/DivFloat.scala
 create mode 100644 arithmetic/tests/src/float/DivFloatTester.scala

diff --git a/arithmetic/src/float/DivFloat.scala b/arithmetic/src/float/DivFloat.scala
new file mode 100644
index 0000000..8683946
--- /dev/null
+++ b/arithmetic/src/float/DivFloat.scala
@@ -0,0 +1,110 @@
+package float
+
+import chisel3._
+import chisel3.util._
+import division.srt.srt16._
+
+
+/**
+  * input
+  * dividend = 0.1f  -> 1f +"00000" right extends to 32
+  * divisor  = 0.1f  -> 1f +"00000" right extends to 32
+  *
+  * output = 0.01f or 0.1f, LSB 28bits effective
+  * 0.01f: 28bits=01f f=sig=select(25,3)
+  * 0.1f : 28bits=1f  f=sig=select(26,4)
+  *
+  * */
+class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
+  val fpWidth = expWidth + sigWidth
+  val calWidth = 28
+  val input = IO(Flipped(DecoupledIO(new FloatDivInput(8, 24))))
+  val output = IO(DecoupledIO(new FloatDivOutput(8, 24)))
+
+
+  // for div, don't need to calculate rawExp
+  val rawFloatDividend = rawFloatFromFN(expWidth,sigWidth,input.bits.dividend)
+  val rawFloatDivisor  = rawFloatFromFN(expWidth,sigWidth,input.bits.divisor)
+
+
+  // Data Path
+  val sign = rawFloatDividend.sign ^ rawFloatDivisor.sign
+
+  val dividendIn = Wire(UInt((fpWidth).W))
+  val divisorIn = Wire(UInt((fpWidth).W))
+  val signReg = RegEnable(sign, input.fire)
+  val expRaw = Wire(UInt(expWidth.W))
+  val expOutNext = Wire(UInt(expWidth.W))
+  val expfinal = Wire(UInt(expWidth.W))
+
+
+
+  // divIter logic
+
+  dividendIn := Cat(1.U(1.W), rawFloatDividend.sig(sigWidth-2, 0), 0.U(expWidth.W))
+  divisorIn  := Cat(1.U(1.W), rawFloatDivisor.sig(sigWidth-2, 0), 0.U(expWidth.W))
+
+  val divModule = Module(new SRT16(fpWidth,fpWidth,fpWidth))
+  divModule.input.bits.dividend := dividendIn
+  divModule.input.bits.divider  := divisorIn
+  divModule.input.bits.counter  := 8.U
+
+  divModule.input.valid := input.valid
+  input.ready  := divModule.input.ready
+  output.valid := divModule.output.valid
+
+
+  val needNormNext = Wire(Bool())
+  val needNorm = RegEnable(needNormNext,input.fire)
+  needNormNext := input.bits.divisor(sigWidth-2, 0) > input.bits.dividend(sigWidth-2, 0)
+
+  // todo verify it
+  expRaw := input.bits.dividend(fpWidth-1, sigWidth-1) - input.bits.divisor(fpWidth-1, sigWidth-1)
+  expOutNext := expRaw + 127.U
+  val expOutReg = RegEnable(expOutNext, 0.U(expWidth.W), input.fire)
+  expfinal := expOutReg - needNorm
+
+
+  val sigUnRound = Mux(needNorm, divModule.output.bits.quotient(calWidth-3, calWidth-sigWidth-1),divModule.output.bits.quotient(calWidth-2, calWidth-sigWidth))
+  val rbits = Mux(needNorm, divModule.output.bits.quotient(calWidth-sigWidth-2)##1.U(1.W), divModule.output.bits.quotient(calWidth-sigWidth-1)##1.U(1.W))
+
+
+  /** @todo exceptions
+    *
+    *       >256
+    *       subnormal
+    * */
+
+  val invalidExec = false.B
+  val infinitExec = false.B
+
+
+
+
+
+
+  output.bits.result := RoundingUnit(
+    signReg,
+    expfinal,
+    sigUnRound,
+    rbits,
+    consts.round_near_even,
+    invalidExec,
+    infinitExec)
+
+  output.bits.sig := output.bits.result(22,0)
+  output.bits.exp := expfinal
+
+
+}
+class FloatDivInput(expWidth: Int, sigWidth: Int) extends Bundle() {
+  val dividend = UInt((expWidth + sigWidth).W)
+  val divisor  = UInt((expWidth + sigWidth).W)
+}
+
+/** add 2 for rounding */
+class FloatDivOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
+  val result = UInt((expWidth + sigWidth).W)
+  val sig = UInt((sigWidth-1).W)
+  val exp = UInt(expWidth.W)
+}
\ No newline at end of file
diff --git a/arithmetic/tests/src/division/srt/SRT16FracTester.scala b/arithmetic/tests/src/division/srt/SRT16FracTester.scala
index 4d1e813..2777455 100644
--- a/arithmetic/tests/src/division/srt/SRT16FracTester.scala
+++ b/arithmetic/tests/src/division/srt/SRT16FracTester.scala
@@ -6,14 +6,14 @@ import utest._
 
 import scala.util.Random
 
-object SRT16FracTest extends TestSuite with ChiselUtestTester {
+object SRT16FracTester extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
     test("SRT16 Fraction should pass") {
       def testcase(width: Int): Unit = {
         def extendTofull(input:String, width:Int) =(Seq.fill(width - input.length)("0").mkString("") + input)
         val n:         Int = width
 
-        val xFloat = (0.5 + Random.nextFloat() /2).toFloat
+        val xFloat = (0.5 +Random.nextFloat() /2).toFloat
         val dFloat = (0.5 + Random.nextFloat() /2).toFloat
 
         val xFloatString = extendTofull(java.lang.Float.floatToIntBits(xFloat).toBinaryString, 32)
@@ -22,11 +22,25 @@ object SRT16FracTest extends TestSuite with ChiselUtestTester {
         val dInput = "b1"+dFloatString.substring(9, 32)+"00000000"
 
 
+
+
         val counter = 8
 
         val qDouble = xFloat / dFloat
+        val qFloat  = qDouble.toFloat
         val qDoubleString = extendTofull(java.lang.Double.doubleToLongBits(qDouble).toBinaryString,64)
-        val q_Expect = "1"+ qDoubleString.substring(12, 39)
+        val qFloatString  = extendTofull(java.lang.Float.floatToIntBits(qFloat).toBinaryString, 32)
+        val q_SigExpect = qFloatString.substring(9, 32)
+
+        val q_SigExpectInt = Integer.parseInt(q_SigExpect, 2)
+
+        val xExp = Integer.parseInt(xFloatString.substring(1,9),2)
+        val dExp = Integer.parseInt(dFloatString.substring(1,9),2)
+        val expDiff = xExp - dExp
+        val exp_actual = expDiff + 128
+        val q_ExpExpect = Integer.parseInt(qFloatString.substring(1,9),2)
+
+
 
         // test
         testCircuit(
@@ -47,12 +61,18 @@ object SRT16FracTest extends TestSuite with ChiselUtestTester {
 
               val quotient_actual = extendTofull(dut.output.bits.quotient.peek().litValue.toString(2),32)
 
-              val q_Actual = if(quotient_actual(4).toString=="0") {
-                quotient_actual.substring(5,32)
+              val isLess = quotient_actual(4).toString=="0"
+              val q_SigBefore = if(isLess) {
+                quotient_actual.substring(6,32)
               } else {
-                quotient_actual.substring(4,32)
+                quotient_actual.substring(5,32)
               }
 
+              val sigIncr = q_SigBefore(23).toString.toInt
+              val q_SigActualInt = Integer.parseInt(q_SigBefore.substring(0,23), 2) + sigIncr
+
+
+
 
               def printvalue(): Unit = {
 
@@ -61,15 +81,22 @@ object SRT16FracTest extends TestSuite with ChiselUtestTester {
                 println("dinput = " + dInput)
 
                 println("all q = " + quotient_actual)
+                println("q_except = "+ qFloatString.toString)
                 println("all q size ="+ quotient_actual.length.toString)
 
-                println("q_expect = " + q_Expect)
-                println("q_actual = " + q_Actual)
+                println("isLess= "+ isLess.toString)
+
+                println("q_expect = " + q_SigExpect)
+                println("q_actual = " + q_SigBefore)
+
+                println("q_expectInt = " + q_SigExpectInt)
+                println("q_actualInt = " + q_SigActualInt)
 
               }
-              if(q_Expect != q_Actual){
+              if(q_SigActualInt != q_SigExpectInt){
                 printvalue()
-                utest.assert(q_Expect == q_Actual)
+                utest.assert(q_SigActualInt == q_SigExpectInt)
+
               }
 
 
diff --git a/arithmetic/tests/src/float/DivFloatTester.scala b/arithmetic/tests/src/float/DivFloatTester.scala
new file mode 100644
index 0000000..41a36f4
--- /dev/null
+++ b/arithmetic/tests/src/float/DivFloatTester.scala
@@ -0,0 +1,110 @@
+package float
+
+import chisel3._
+import chiseltest._
+import utest._
+
+import scala.util.Random
+
+import division.srt.srt16._
+
+object DivFloatTester extends TestSuite with ChiselUtestTester {
+  def tests: Tests = Tests {
+    test("Div Float should pass") {
+      def testcase(width: Int): Unit = {
+        def extendTofull(input:String, width:Int) =(Seq.fill(width - input.length)("0").mkString("") + input)
+        val n:         Int = width
+
+        val xFloat = (Random.nextInt(100000) + Random.nextFloat() ).toFloat
+        val dFloat = (Random.nextInt(100000) + Random.nextFloat() ).toFloat
+
+        val xFloatString = extendTofull(java.lang.Float.floatToIntBits(xFloat).toBinaryString, 32)
+        val dFloatString = extendTofull(java.lang.Float.floatToIntBits(dFloat).toBinaryString, 32)
+        val xInput = "b"+xFloatString
+        val dInput = "b"+dFloatString
+
+
+        val counter = 8
+
+        val qDouble = xFloat / dFloat
+        val qFloat = qDouble.toFloat
+        val qDoubleString = extendTofull(java.lang.Double.doubleToLongBits(qDouble).toBinaryString, 64)
+        val qFloatString = extendTofull(java.lang.Float.floatToIntBits(qFloat).toBinaryString, 32)
+        val sig_Expect = qFloatString.substring(9, 32)
+        val exp_Expect = qFloatString.substring(1, 9)
+
+
+
+        val xExp = Integer.parseInt(xFloatString.substring(1, 9), 2)
+        val dExp = Integer.parseInt(dFloatString.substring(1, 9), 2)
+//        val expDiff = xExp - dExp
+
+
+
+
+        // test
+        testCircuit(
+          new DivFloat(8,24),
+          Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
+        ) { dut: DivFloat =>
+          dut.clock.setTimeout(0)
+          dut.input.valid.poke(true.B)
+          dut.input.bits.dividend.poke((xInput).U)
+          dut.input.bits.divisor.poke((dInput).U)
+          dut.clock.step()
+          dut.input.valid.poke(false.B)
+          var flag = false
+          for (a <- 1 to 1000 if !flag) {
+            if (dut.output.valid.peek().litValue == 1) {
+              flag = true
+
+              val sig_Actual = extendTofull(dut.output.bits.sig.peek().litValue.toString(2),23)
+              val exp_Actual = extendTofull(dut.output.bits.exp.peek().litValue.toString(2),8)
+              val result_Actual = extendTofull(dut.output.bits.result.peek().litValue.toString(2),32)
+
+              val expInt_Actual = Integer.parseInt(exp_Actual,2)
+              val expInt_Expect = Integer.parseInt(exp_Expect,2)
+
+
+
+
+
+
+              def printvalue(): Unit = {
+
+                println(xFloat.toString + "/ " + dFloat.toString + "="+ qDouble.toString)
+                println("expInt_Actual = " + expInt_Actual)
+                println("expInt_Expect = " + expInt_Expect)
+
+//                println("all q = " + quotient_actual)
+//                println("all q size ="+ quotient_actual.length.toString)
+
+                println("sig_expect = " + sig_Expect)
+                println("sig_actual = " + sig_Actual)
+
+              }
+              if((sig_Expect != sig_Actual)|| (exp_Actual != exp_Expect)||(result_Actual != qFloatString)){
+                printvalue()
+                utest.assert(sig_Expect == sig_Actual)
+                utest.assert(exp_Expect == exp_Actual)
+                utest.assert(result_Actual != qFloatString)
+              }
+
+
+
+            }
+            dut.clock.step()
+          }
+          utest.assert(flag)
+          dut.clock.step(scala.util.Random.nextInt(10))
+        }
+      }
+
+
+            for (i <- 1 to 100) {
+              testcase(32)
+            }
+
+    }
+  }
+}
\ No newline at end of file
diff --git a/arithmetic/tests/src/float/SqrtFloatTester.scala b/arithmetic/tests/src/float/SqrtFloatTester.scala
index c0e619f..d4cad3f 100644
--- a/arithmetic/tests/src/float/SqrtFloatTester.scala
+++ b/arithmetic/tests/src/float/SqrtFloatTester.scala
@@ -88,9 +88,9 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
         }
       }
 
-      for (i <- 1 to 100) {
-        testcase()
-      }
+//      for (i <- 1 to 100) {
+//        testcase()
+//      }
 
     }
   }

From 13dc237ec7146531634dca791bfb9ddc37321db1 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 8 Aug 2023 16:51:06 +0800
Subject: [PATCH 023/109] [divfloat] fix divfloat

---
 arithmetic/src/float/DivFloat.scala           | 46 ++++++++++---------
 .../tests/src/float/DivFloatTester.scala      | 10 ----
 2 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/arithmetic/src/float/DivFloat.scala b/arithmetic/src/float/DivFloat.scala
index 8683946..da49f2d 100644
--- a/arithmetic/src/float/DivFloat.scala
+++ b/arithmetic/src/float/DivFloat.scala
@@ -7,12 +7,17 @@ import division.srt.srt16._
 
 /**
   * input
+  * {{{
   * dividend = 0.1f  -> 1f +"00000" right extends to 32
   * divisor  = 0.1f  -> 1f +"00000" right extends to 32
+  * }}}
   *
   * output = 0.01f or 0.1f, LSB 28bits effective
+  * {{{
   * 0.01f: 28bits=01f f=sig=select(25,3)
   * 0.1f : 28bits=1f  f=sig=select(26,4)
+  * }}}
+  *
   *
   * */
 class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
@@ -23,26 +28,24 @@ class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
 
 
   // for div, don't need to calculate rawExp
-  val rawFloatDividend = rawFloatFromFN(expWidth,sigWidth,input.bits.dividend)
-  val rawFloatDivisor  = rawFloatFromFN(expWidth,sigWidth,input.bits.divisor)
+  val rawA_S = rawFloatFromFN(expWidth,sigWidth,input.bits.dividend)
+  val rawB_S  = rawFloatFromFN(expWidth,sigWidth,input.bits.divisor)
 
 
-  // Data Path
-  val sign = rawFloatDividend.sign ^ rawFloatDivisor.sign
 
+  // Data Path
   val dividendIn = Wire(UInt((fpWidth).W))
   val divisorIn = Wire(UInt((fpWidth).W))
-  val signReg = RegEnable(sign, input.fire)
-  val expRaw = Wire(UInt(expWidth.W))
   val expOutNext = Wire(UInt(expWidth.W))
-  val expfinal = Wire(UInt(expWidth.W))
-
+  val expToRound = Wire(UInt(expWidth.W))
 
+  val sign = rawA_S.sign ^ rawB_S.sign
+  val signReg = RegEnable(sign, input.fire)
 
   // divIter logic
 
-  dividendIn := Cat(1.U(1.W), rawFloatDividend.sig(sigWidth-2, 0), 0.U(expWidth.W))
-  divisorIn  := Cat(1.U(1.W), rawFloatDivisor.sig(sigWidth-2, 0), 0.U(expWidth.W))
+  dividendIn := Cat(1.U(1.W), rawA_S.sig(sigWidth-2, 0), 0.U(expWidth.W))
+  divisorIn  := Cat(1.U(1.W), rawB_S.sig(sigWidth-2, 0), 0.U(expWidth.W))
 
   val divModule = Module(new SRT16(fpWidth,fpWidth,fpWidth))
   divModule.input.bits.dividend := dividendIn
@@ -59,14 +62,16 @@ class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
   needNormNext := input.bits.divisor(sigWidth-2, 0) > input.bits.dividend(sigWidth-2, 0)
 
   // todo verify it
-  expRaw := input.bits.dividend(fpWidth-1, sigWidth-1) - input.bits.divisor(fpWidth-1, sigWidth-1)
-  expOutNext := expRaw + 127.U
+
+  expOutNext := input.bits.dividend(fpWidth-1, sigWidth-1) - input.bits.divisor(fpWidth-1, sigWidth-1)
   val expOutReg = RegEnable(expOutNext, 0.U(expWidth.W), input.fire)
-  expfinal := expOutReg - needNorm
+  expToRound := expOutReg - needNorm
 
 
-  val sigUnRound = Mux(needNorm, divModule.output.bits.quotient(calWidth-3, calWidth-sigWidth-1),divModule.output.bits.quotient(calWidth-2, calWidth-sigWidth))
-  val rbits = Mux(needNorm, divModule.output.bits.quotient(calWidth-sigWidth-2)##1.U(1.W), divModule.output.bits.quotient(calWidth-sigWidth-1)##1.U(1.W))
+  val sigToRound = Mux(needNorm, divModule.output.bits.quotient(calWidth-3, calWidth-sigWidth-1),
+    divModule.output.bits.quotient(calWidth-2, calWidth-sigWidth))
+  val rbits      = Mux(needNorm, divModule.output.bits.quotient(calWidth-sigWidth-2)##1.U(1.W),
+    divModule.output.bits.quotient(calWidth-sigWidth-1)##1.U(1.W))
 
 
   /** @todo exceptions
@@ -80,20 +85,17 @@ class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
 
 
 
-
-
-
   output.bits.result := RoundingUnit(
     signReg,
-    expfinal,
-    sigUnRound,
+    expToRound,
+    sigToRound,
     rbits,
     consts.round_near_even,
     invalidExec,
     infinitExec)
 
-  output.bits.sig := output.bits.result(22,0)
-  output.bits.exp := expfinal
+  output.bits.sig := output.bits.result(sigWidth-2, 0)
+  output.bits.exp := output.bits.result(fpWidth-1, sigWidth-1)
 
 
 }
diff --git a/arithmetic/tests/src/float/DivFloatTester.scala b/arithmetic/tests/src/float/DivFloatTester.scala
index 41a36f4..a0b7f2d 100644
--- a/arithmetic/tests/src/float/DivFloatTester.scala
+++ b/arithmetic/tests/src/float/DivFloatTester.scala
@@ -34,13 +34,8 @@ object DivFloatTester extends TestSuite with ChiselUtestTester {
         val exp_Expect = qFloatString.substring(1, 9)
 
 
-
         val xExp = Integer.parseInt(xFloatString.substring(1, 9), 2)
         val dExp = Integer.parseInt(dFloatString.substring(1, 9), 2)
-//        val expDiff = xExp - dExp
-
-
-
 
         // test
         testCircuit(
@@ -65,11 +60,6 @@ object DivFloatTester extends TestSuite with ChiselUtestTester {
               val expInt_Actual = Integer.parseInt(exp_Actual,2)
               val expInt_Expect = Integer.parseInt(exp_Expect,2)
 
-
-
-
-
-
               def printvalue(): Unit = {
 
                 println(xFloat.toString + "/ " + dFloat.toString + "="+ qDouble.toString)

From e344736113d7285549b95a23df40a271b5c9c702 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 8 Aug 2023 16:59:37 +0800
Subject: [PATCH 024/109] [sqrtfloat] fix io

---
 arithmetic/src/float/SqrtFloat.scala          | 27 ++++++++++---------
 .../tests/src/float/SqrtFloatTester.scala     | 16 +++++------
 2 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
index 4873b66..4dd1c20 100644
--- a/arithmetic/src/float/SqrtFloat.scala
+++ b/arithmetic/src/float/SqrtFloat.scala
@@ -11,6 +11,19 @@ import sqrt._
   *
   * */
 class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
+  class FloatSqrtInput(expWidth: Int, sigWidth: Int) extends Bundle() {
+    val oprand = UInt((expWidth + sigWidth).W)
+  }
+
+  /** add 2 for rounding */
+  class FloatSqrtOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
+    val result = UInt((expWidth + sigWidth).W)
+    val sig = UInt((sigWidth + 2).W)
+    val exp = UInt(expWidth.W)
+
+    //  val exceptionFlags = UInt(5.W)
+  }
+
   val input = IO(Flipped(DecoupledIO(new FloatSqrtInput(expWidth, sigWidth))))
   val output = IO(DecoupledIO(new FloatSqrtOutput(expWidth, sigWidth)))
   val rawFloatIn = rawFloatFromFN(expWidth,sigWidth,input.bits.oprand)
@@ -66,22 +79,10 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
     consts.round_near_even,
     invalidExec,
     infinitExec)
-  output.bits.sig := SqrtModule.output.bits.result
+  output.bits.sig := output.bits.result(sigWidth-2, 0)
   output.bits.exp := output.bits.result(30,23)
   output.valid := SqrtModule.output.valid || fastWorking
 
 }
 
-class FloatSqrtInput(expWidth: Int, sigWidth: Int) extends Bundle() {
-  val oprand = UInt((expWidth + sigWidth).W)
-}
-
-/** add 2 for rounding*/
-class FloatSqrtOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
-  val result = UInt((expWidth + sigWidth).W)
-  val sig = UInt((sigWidth+2).W)
-  val exp = UInt(expWidth.W)
-
-//  val exceptionFlags = UInt(5.W)
-}
 
diff --git a/arithmetic/tests/src/float/SqrtFloatTester.scala b/arithmetic/tests/src/float/SqrtFloatTester.scala
index d4cad3f..e255cf6 100644
--- a/arithmetic/tests/src/float/SqrtFloatTester.scala
+++ b/arithmetic/tests/src/float/SqrtFloatTester.scala
@@ -6,7 +6,7 @@ import utest._
 import scala.util.{Random}
 import scala.math._
 
-object SquareRootTester extends TestSuite with ChiselUtestTester {
+object SqrtFloatTester extends TestSuite with ChiselUtestTester {
   def tests: Tests = Tests {
     test("Sqrt Float FP32 should pass") {
       def testcase(): Unit = {
@@ -20,17 +20,13 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
         val ExepctFracIn = if(oprandFloat<0.5)"b01" + oprandSigString + "0"  else "b1" + oprandSigString + "00"
         val circuitInput = "b"+ oprandString
 
-
-
         val x = sqrt(oprandDouble)
-        x.toFloat.round
         val xDoublestring = java.lang.Double.doubleToLongBits(x).toBinaryString
         val xFloatstring = java.lang.Float.floatToIntBits(x.toFloat).toBinaryString
         val xDouble = extendTofull(xDoublestring,64)
         val xFloat = extendTofull(xFloatstring,32)
         // 0.xxxxxx,   hidden 1+23bits + 2bits for round
-        val sigExpect =   "1"+xDouble.substring(12, 37)
-        // todo:
+        val sigExpect =   xFloat.substring(9,32)
         val expExpect =   xFloat.substring(1,9)
 
         // test
@@ -48,7 +44,7 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
             if (dut.output.valid.peek().litValue == 1) {
               flag = true
               val resultActual = extendTofull(dut.output.bits.result.peek().litValue.toString(2),32)
-              val sigActual = dut.output.bits.sig.peek().litValue.toString(2)
+              val sigActual = extendTofull(dut.output.bits.sig.peek().litValue.toString(2),23)
               val expActual = extendTofull(dut.output.bits.exp.peek().litValue.toString(2),8)
 
               def printValue() :Unit = {
@@ -88,9 +84,9 @@ object SquareRootTester extends TestSuite with ChiselUtestTester {
         }
       }
 
-//      for (i <- 1 to 100) {
-//        testcase()
-//      }
+      for (i <- 1 to 100) {
+        testcase()
+      }
 
     }
   }

From feadb878cbb80c987a3c5a493e394789884052f6 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 8 Aug 2023 17:09:38 +0800
Subject: [PATCH 025/109] [sqrtfloat] fix sqrtfloat

---
 arithmetic/src/float/SqrtFloat.scala | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
index 4dd1c20..38a5ef9 100644
--- a/arithmetic/src/float/SqrtFloat.scala
+++ b/arithmetic/src/float/SqrtFloat.scala
@@ -55,9 +55,8 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
     *      1           0    1.xxxx>>2       01xxxx    rawExp/2 +1 + bias
     *}}}
     */
-  val expOutNext = Wire(UInt(expWidth.W))
-  expOutNext := adjustedExp(expWidth,1)
-  val expOut = RegEnable(expOutNext, 0.U(expWidth.W), input.fire)
+  val expToRound = RegEnable(adjustedExp(expWidth,1), 0.U(expWidth.W), input.fire)
+
   val fractIn = Mux(input.bits.oprand(sigWidth-1), Cat("b0".U(1.W),rawFloatIn.sig(sigWidth-1, 0),0.U(1.W)),
     Cat(rawFloatIn.sig(sigWidth-1, 0),0.U(2.W)))
 
@@ -73,7 +72,7 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
   input.ready := SqrtModule.input.ready
   output.bits.result := RoundingUnit(
     input.bits.oprand(expWidth + sigWidth-1) ,
-    expOut,
+    expToRound,
     sigforRound,
     rbits,
     consts.round_near_even,

From ca7b466b77a8a2b1a5dd622fae0e80c0be891dbe Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 9 Aug 2023 11:40:18 +0800
Subject: [PATCH 026/109] [divsqrt] add divsqrt except exceptions

---
 arithmetic/src/float/DivFloat.scala           |  16 +-
 arithmetic/src/float/DivSqrt.scala            | 138 ++++++++++++++++++
 arithmetic/src/float/SqrtFloat.scala          |  22 +--
 arithmetic/src/sqrt/SquareRoot.scala          |   2 +-
 .../tests/src/float/DivFloatTester.scala      |  18 +--
 .../tests/src/float/DivSqrtTester.scala       | 103 +++++++++++++
 .../tests/src/float/SqrtFloatTester.scala     |  29 ++--
 7 files changed, 276 insertions(+), 52 deletions(-)
 create mode 100644 arithmetic/src/float/DivSqrt.scala
 create mode 100644 arithmetic/tests/src/float/DivSqrtTester.scala

diff --git a/arithmetic/src/float/DivFloat.scala b/arithmetic/src/float/DivFloat.scala
index da49f2d..667ffbc 100644
--- a/arithmetic/src/float/DivFloat.scala
+++ b/arithmetic/src/float/DivFloat.scala
@@ -24,7 +24,7 @@ class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
   val fpWidth = expWidth + sigWidth
   val calWidth = 28
   val input = IO(Flipped(DecoupledIO(new FloatDivInput(8, 24))))
-  val output = IO(DecoupledIO(new FloatDivOutput(8, 24)))
+  val output = IO(ValidIO(new FloatDivOutput(8, 24)))
 
 
   // for div, don't need to calculate rawExp
@@ -36,7 +36,7 @@ class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
   // Data Path
   val dividendIn = Wire(UInt((fpWidth).W))
   val divisorIn = Wire(UInt((fpWidth).W))
-  val expOutNext = Wire(UInt(expWidth.W))
+  val expStoreNext = Wire(UInt(expWidth.W))
   val expToRound = Wire(UInt(expWidth.W))
 
   val sign = rawA_S.sign ^ rawB_S.sign
@@ -57,15 +57,12 @@ class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
   output.valid := divModule.output.valid
 
 
-  val needNormNext = Wire(Bool())
+  val needNormNext = input.bits.divisor(sigWidth-2, 0) > input.bits.dividend(sigWidth-2, 0)
   val needNorm = RegEnable(needNormNext,input.fire)
-  needNormNext := input.bits.divisor(sigWidth-2, 0) > input.bits.dividend(sigWidth-2, 0)
 
-  // todo verify it
-
-  expOutNext := input.bits.dividend(fpWidth-1, sigWidth-1) - input.bits.divisor(fpWidth-1, sigWidth-1)
-  val expOutReg = RegEnable(expOutNext, 0.U(expWidth.W), input.fire)
-  expToRound := expOutReg - needNorm
+  expStoreNext := input.bits.dividend(fpWidth-1, sigWidth-1) - input.bits.divisor(fpWidth-1, sigWidth-1)
+  val expStore = RegEnable(expStoreNext, 0.U(expWidth.W), input.fire)
+  expToRound := expStore - needNorm
 
 
   val sigToRound = Mux(needNorm, divModule.output.bits.quotient(calWidth-3, calWidth-sigWidth-1),
@@ -104,7 +101,6 @@ class FloatDivInput(expWidth: Int, sigWidth: Int) extends Bundle() {
   val divisor  = UInt((expWidth + sigWidth).W)
 }
 
-/** add 2 for rounding */
 class FloatDivOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
   val result = UInt((expWidth + sigWidth).W)
   val sig = UInt((sigWidth-1).W)
diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
new file mode 100644
index 0000000..41477a0
--- /dev/null
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -0,0 +1,138 @@
+package float
+
+import chisel3._
+import chisel3.util._
+import division.srt.srt16._
+import sqrt._
+
+class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
+  val fpWidth = expWidth + sigWidth
+  val calWidth = 28
+  val input = IO(Flipped(DecoupledIO(new DivSqrtInput(expWidth, sigWidth))))
+  val output = IO(DecoupledIO(new DivSqrtOutput(expWidth, sigWidth)))
+
+  val opSqrtReg = RegEnable(input.bits.sqrt, input.fire)
+
+  val rawA_S = rawFloatFromFN(expWidth, sigWidth, input.bits.a)
+  val rawB_S = rawFloatFromFN(expWidth, sigWidth, input.bits.b)
+
+//  /** Exceptions */
+//
+//  val notSigNaNIn_invalidExc_S_div =
+//    (rawA_S.isZero && rawB_S.isZero) || (rawA_S.isInf && rawB_S.isInf)
+//  val notSigNaNIn_invalidExc_S_sqrt =
+//    !rawA_S.isNaN && !rawA_S.isZero && rawA_S.sign
+//  val majorExc_S =
+//    Mux(input.bits.sqrt,
+//      isSigNaNRawFloat(rawA_S) || notSigNaNIn_invalidExc_S_sqrt,
+//      isSigNaNRawFloat(rawA_S) || isSigNaNRawFloat(rawB_S) ||
+//        notSigNaNIn_invalidExc_S_div ||
+//        (!rawA_S.isNaN && !rawA_S.isInf && rawB_S.isZero)
+//    )
+//  val isNaN_S =
+//    Mux(input.bits.sqrt,
+//      rawA_S.isNaN || notSigNaNIn_invalidExc_S_sqrt,
+//      rawA_S.isNaN || rawB_S.isNaN || notSigNaNIn_invalidExc_S_div
+//    )
+//  val isInf_S = Mux(input.bits.sqrt, rawA_S.isInf, rawA_S.isInf || rawB_S.isZero)
+//  val isZero_S = Mux(input.bits.sqrt, rawA_S.isZero, rawA_S.isZero || rawB_S.isInf)
+//  val sign_S = rawA_S.sign ^ (!input.bits.sqrt && rawB_S.sign)
+//
+//  val specialCaseA_S = rawA_S.isNaN || rawA_S.isInf || rawA_S.isZero
+//  val specialCaseB_S = rawB_S.isNaN || rawB_S.isInf || rawB_S.isZero
+//  val normalCase_S_div = !specialCaseA_S && !specialCaseB_S
+//  val normalCase_S_sqrt = !specialCaseA_S && !rawA_S.sign
+//  val normalCase_S = Mux(input.bits.sqrt, normalCase_S_sqrt, normalCase_S_div)
+//
+//  val fastResponseNext =
+//
+//  val entering = input.fire
+//  val fastResponse = RegEnable(entering, false.B,entering)
+
+  // needNorm for div
+  val needNormNext = input.bits.b(sigWidth - 2, 0) > input.bits.a(sigWidth - 2, 0)
+  val needNorm = RegEnable(needNormNext, input.fire)
+
+  // sign
+  val signNext = Mux(input.bits.sqrt, true.B, rawA_S.sign ^ rawB_S.sign)
+  val signReg = RegEnable(signNext, input.fire)
+
+  // sqrt
+  val adjustedExp = Cat(rawA_S.sExp(expWidth - 1), rawA_S.sExp(expWidth - 1, 0))
+  val sqrtExIsEven = input.bits.a(sigWidth - 1)
+  val fractIn = Mux(sqrtExIsEven, Cat("b0".U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
+    Cat(rawA_S.sig(sigWidth - 1, 0), 0.U(2.W)))
+
+  val SqrtModule = Module(new SquareRoot(2, 2, sigWidth+2, sigWidth+2))
+  SqrtModule.input.bits.operand := fractIn
+  SqrtModule.input.valid := input.valid && input.bits.sqrt
+
+  val rbits_sqrt = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
+  val sigToRound_sqrt = SqrtModule.output.bits.result(24, 2)
+
+
+  // div
+  val dividendIn = Wire(UInt((fpWidth).W))
+  val divisorIn = Wire(UInt((fpWidth).W))
+  dividendIn := Cat(1.U(1.W), rawA_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
+  divisorIn := Cat(1.U(1.W), rawB_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
+
+  val divModule = Module(new SRT16(fpWidth, fpWidth, fpWidth))
+  divModule.input.bits.dividend := dividendIn
+  divModule.input.bits.divider := divisorIn
+  divModule.input.bits.counter := 8.U
+  divModule.input.valid := input.valid && !input.bits.sqrt
+
+  val sigToRound_div = Mux(needNorm, divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
+    divModule.output.bits.quotient(calWidth - 2, calWidth - sigWidth))
+  val rbits_div = Mux(needNorm, divModule.output.bits.quotient(calWidth - sigWidth - 2) ## 1.U(1.W),
+    divModule.output.bits.quotient(calWidth - sigWidth - 1) ## 1.U(1.W))
+
+
+  // collect sig result
+  val sigToRound = Mux(opSqrtReg, sigToRound_sqrt, sigToRound_div)
+  val rbitsToRound = Mux(opSqrtReg, rbits_sqrt, rbits_div)
+
+  // exp logic
+  val expStoreNext = Wire(UInt(expWidth.W))
+  val expToRound = Wire(UInt(expWidth.W))
+  expStoreNext := Mux(input.bits.sqrt,
+    Cat(rawA_S.sExp(expWidth-1), rawA_S.sExp(expWidth-1, 0))(expWidth,1),
+    input.bits.a(fpWidth-1, sigWidth-1) - input.bits.b(fpWidth-1, sigWidth-1))
+  val expStore = RegEnable(expStoreNext, 0.U(expWidth.W), input.fire)
+  expToRound := Mux(opSqrtReg, expStore, expStore - needNorm)
+
+
+  /** Exceptions */
+  val invalidExec = false.B
+  val infinitExec = false.B
+
+  output.bits.result := RoundingUnit(
+    signReg,
+    expToRound,
+    sigToRound,
+    rbitsToRound,
+    consts.round_near_even,
+    invalidExec,
+    infinitExec)
+
+  output.bits.sig := output.bits.result(sigWidth - 2, 0)
+  output.bits.exp := output.bits.result(fpWidth - 1, sigWidth - 1)
+
+  input.ready := divModule.input.ready && SqrtModule.input.ready
+  output.valid := divModule.output.valid || SqrtModule.output.valid
+}
+
+
+class DivSqrtInput(expWidth: Int, sigWidth: Int) extends Bundle() {
+  val a = UInt((expWidth + sigWidth).W)
+  val b = UInt((expWidth + sigWidth).W)
+  val sqrt = Bool()
+}
+
+
+class DivSqrtOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
+  val result = UInt((expWidth + sigWidth).W)
+  val sig = UInt((sigWidth-1).W)
+  val exp = UInt(expWidth.W)
+}
diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
index 38a5ef9..0d0195b 100644
--- a/arithmetic/src/float/SqrtFloat.scala
+++ b/arithmetic/src/float/SqrtFloat.scala
@@ -25,7 +25,7 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
   }
 
   val input = IO(Flipped(DecoupledIO(new FloatSqrtInput(expWidth, sigWidth))))
-  val output = IO(DecoupledIO(new FloatSqrtOutput(expWidth, sigWidth)))
+  val output = IO(ValidIO(new FloatSqrtOutput(expWidth, sigWidth)))
   val rawFloatIn = rawFloatFromFN(expWidth,sigWidth,input.bits.oprand)
 
   /** Control path */
@@ -45,25 +45,27 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
 
 
 
-  /** Data path */
-
-  val adjustedExp = Cat(rawFloatIn.sExp(expWidth-1), rawFloatIn.sExp(expWidth-1, 0))
-
-  /** {{{
+  /** Data path
+    *
+    * {{{
     * expLSB   rawExpLSB    Sig             SigIn     expOut
     *      0           1    1.xxxx>>2<<1    1xxxx0    rawExp/2 +1 + bias
     *      1           0    1.xxxx>>2       01xxxx    rawExp/2 +1 + bias
-    *}}}
+    * }}}
+    *
     */
-  val expToRound = RegEnable(adjustedExp(expWidth,1), 0.U(expWidth.W), input.fire)
 
-  val fractIn = Mux(input.bits.oprand(sigWidth-1), Cat("b0".U(1.W),rawFloatIn.sig(sigWidth-1, 0),0.U(1.W)),
+  val adjustedExp = Cat(rawFloatIn.sExp(expWidth-1), rawFloatIn.sExp(expWidth-1, 0))
+  val expStore = RegEnable(adjustedExp(expWidth,1), 0.U(expWidth.W), input.fire)
+  val expToRound = expStore
+
+  val sqrtExIsEven = input.bits.oprand(sigWidth - 1)
+  val fractIn = Mux(sqrtExIsEven, Cat("b0".U(1.W),rawFloatIn.sig(sigWidth-1, 0),0.U(1.W)),
     Cat(rawFloatIn.sig(sigWidth-1, 0),0.U(2.W)))
 
   val SqrtModule = Module(new SquareRoot(2, 2, 26, 26))
   SqrtModule.input.valid := input.valid && !fastCase
   SqrtModule.input.bits.operand := fractIn
-  SqrtModule.output.ready := output.ready
 
   val rbits = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
   val sigforRound = SqrtModule.output.bits.result(24,2)
diff --git a/arithmetic/src/sqrt/SquareRoot.scala b/arithmetic/src/sqrt/SquareRoot.scala
index d50a751..6ed2e70 100644
--- a/arithmetic/src/sqrt/SquareRoot.scala
+++ b/arithmetic/src/sqrt/SquareRoot.scala
@@ -37,7 +37,7 @@ class SquareRoot(
   outputWidth: Int)
     extends Module {
   val input = IO(Flipped(DecoupledIO(new SquareRootInput(inputWidth: Int, outputWidth: Int))))
-  val output = IO(DecoupledIO(new SquareRootOutput(outputWidth)))
+  val output = IO(ValidIO(new SquareRootOutput(outputWidth)))
 
   /** width for partial result  */
   val wlen = inputWidth + 2
diff --git a/arithmetic/tests/src/float/DivFloatTester.scala b/arithmetic/tests/src/float/DivFloatTester.scala
index a0b7f2d..8667d39 100644
--- a/arithmetic/tests/src/float/DivFloatTester.scala
+++ b/arithmetic/tests/src/float/DivFloatTester.scala
@@ -23,20 +23,12 @@ object DivFloatTester extends TestSuite with ChiselUtestTester {
         val xInput = "b"+xFloatString
         val dInput = "b"+dFloatString
 
-
-        val counter = 8
-
         val qDouble = xFloat / dFloat
-        val qFloat = qDouble.toFloat
-        val qDoubleString = extendTofull(java.lang.Double.doubleToLongBits(qDouble).toBinaryString, 64)
-        val qFloatString = extendTofull(java.lang.Float.floatToIntBits(qFloat).toBinaryString, 32)
+        val q = qDouble.toFloat
+        val qFloatString = extendTofull(java.lang.Float.floatToIntBits(q).toBinaryString, 32)
         val sig_Expect = qFloatString.substring(9, 32)
         val exp_Expect = qFloatString.substring(1, 9)
 
-
-        val xExp = Integer.parseInt(xFloatString.substring(1, 9), 2)
-        val dExp = Integer.parseInt(dFloatString.substring(1, 9), 2)
-
         // test
         testCircuit(
           new DivFloat(8,24),
@@ -91,9 +83,9 @@ object DivFloatTester extends TestSuite with ChiselUtestTester {
       }
 
 
-            for (i <- 1 to 100) {
-              testcase(32)
-            }
+      for (i <- 1 to 100) {
+        testcase(32)
+      }
 
     }
   }
diff --git a/arithmetic/tests/src/float/DivSqrtTester.scala b/arithmetic/tests/src/float/DivSqrtTester.scala
new file mode 100644
index 0000000..f00b2d2
--- /dev/null
+++ b/arithmetic/tests/src/float/DivSqrtTester.scala
@@ -0,0 +1,103 @@
+package float
+
+import chisel3._
+import chiseltest._
+import utest._
+
+import scala.util.Random
+import division.srt.srt16._
+
+import scala.math.sqrt
+
+object DivSqrtTester extends TestSuite with ChiselUtestTester {
+  def tests: Tests = Tests {
+    test("DivSqrt should pass") {
+      def testcase(width: Int): Unit = {
+        def extendTofull(input:String, width:Int) =(Seq.fill(width - input.length)("0").mkString("") + input)
+        val n:         Int = width
+
+        val xFloat = (Random.nextInt(100000) + Random.nextFloat() ).toFloat
+        val dFloat = (Random.nextInt(100000) + Random.nextFloat() ).toFloat
+
+        val xFloatString = extendTofull(java.lang.Float.floatToIntBits(xFloat).toBinaryString, 32)
+        val dFloatString = extendTofull(java.lang.Float.floatToIntBits(dFloat).toBinaryString, 32)
+
+        val xInput = "b"+xFloatString
+        val dInput = "b"+dFloatString
+
+        val opsqrt = "false"
+
+
+        val qDouble = xFloat / dFloat
+        val q = qDouble.toFloat
+        val qFloatString = extendTofull(java.lang.Float.floatToIntBits(q).toBinaryString, 32)
+
+
+        val t = sqrt(xFloat)
+        val tFloatString = extendTofull(java.lang.Float.floatToIntBits(t.toFloat).toBinaryString, 32)
+        // 0.xxxxxx,   hidden 1+23bits + 2bits for round
+        val sigExpect = tFloatString.substring(9, 32)
+        val expExpect = tFloatString.substring(1, 9)
+
+        val sig_Expect = if(opsqrt == "true") tFloatString.substring(9, 32) else qFloatString.substring(9, 32)
+        val exp_Expect = if(opsqrt == "true") tFloatString.substring(1, 9) else qFloatString.substring(1, 9)
+        val result_Expect = if(opsqrt == "true") tFloatString else qFloatString
+
+        // test
+        testCircuit(
+          new DivSqrt(8,24),
+          Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
+        ) { dut: DivSqrt =>
+          dut.clock.setTimeout(0)
+          dut.input.valid.poke(true.B)
+          dut.input.bits.a.poke((xInput).U)
+          dut.input.bits.b.poke((dInput).U)
+          dut.input.bits.sqrt.poke(false.B)
+          dut.clock.step()
+          dut.input.valid.poke(false.B)
+          var flag = false
+          for (a <- 1 to 1000 if !flag) {
+            if (dut.output.valid.peek().litValue == 1) {
+              flag = true
+
+              val sig_Actual = extendTofull(dut.output.bits.sig.peek().litValue.toString(2),23)
+              val exp_Actual = extendTofull(dut.output.bits.exp.peek().litValue.toString(2),8)
+              val result_Actual = extendTofull(dut.output.bits.result.peek().litValue.toString(2),32)
+
+              val expInt_Actual = Integer.parseInt(exp_Actual,2)
+              val expInt_Expect = Integer.parseInt(exp_Expect,2)
+
+              def printvalue(): Unit = {
+
+                println(xFloat.toString + "/ " + dFloat.toString + "="+ qDouble.toString)
+                println("result_Actual = " + result_Actual)
+                println("result_Expect = " + result_Expect)
+
+
+                println("sig_expect = " + sig_Expect)
+                println("sig_actual = " + sig_Actual)
+
+              }
+              if((sig_Expect != sig_Actual)|| (exp_Actual != exp_Expect)||(result_Actual != result_Expect)){
+                printvalue()
+                utest.assert(sig_Expect == sig_Actual)
+                utest.assert(exp_Expect == exp_Actual)
+                utest.assert(result_Actual == result_Expect)
+              }
+
+            }
+            dut.clock.step()
+          }
+          utest.assert(flag)
+          dut.clock.step(scala.util.Random.nextInt(10))
+        }
+      }
+
+
+      for (i <- 1 to 100) {
+        testcase(32)
+      }
+
+    }
+  }
+}
\ No newline at end of file
diff --git a/arithmetic/tests/src/float/SqrtFloatTester.scala b/arithmetic/tests/src/float/SqrtFloatTester.scala
index e255cf6..bdaa937 100644
--- a/arithmetic/tests/src/float/SqrtFloatTester.scala
+++ b/arithmetic/tests/src/float/SqrtFloatTester.scala
@@ -17,17 +17,13 @@ object SqrtFloatTester extends TestSuite with ChiselUtestTester {
         val oprandString = extendTofull(java.lang.Float.floatToIntBits(oprandFloat).toBinaryString,32)
         val oprandSigString = oprandString.substring(9, 32)
 
-        val ExepctFracIn = if(oprandFloat<0.5)"b01" + oprandSigString + "0"  else "b1" + oprandSigString + "00"
-        val circuitInput = "b"+ oprandString
+        val oprandInput = "b"+ oprandString
 
-        val x = sqrt(oprandDouble)
-        val xDoublestring = java.lang.Double.doubleToLongBits(x).toBinaryString
-        val xFloatstring = java.lang.Float.floatToIntBits(x.toFloat).toBinaryString
-        val xDouble = extendTofull(xDoublestring,64)
-        val xFloat = extendTofull(xFloatstring,32)
+        val t = sqrt(oprandDouble)
+        val tFloatString = extendTofull(java.lang.Float.floatToIntBits(t.toFloat).toBinaryString,32)
         // 0.xxxxxx,   hidden 1+23bits + 2bits for round
-        val sigExpect =   xFloat.substring(9,32)
-        val expExpect =   xFloat.substring(1,9)
+        val sigExpect =   tFloatString.substring(9,32)
+        val expExpect =   tFloatString.substring(1,9)
 
         // test
         testCircuit(
@@ -36,7 +32,7 @@ object SqrtFloatTester extends TestSuite with ChiselUtestTester {
         ) { dut: SqrtFloat =>
           dut.clock.setTimeout(0)
           dut.input.valid.poke(true.B)
-          dut.input.bits.oprand.poke(circuitInput.U)
+          dut.input.bits.oprand.poke(oprandInput.U)
           dut.clock.step()
           dut.input.valid.poke(false.B)
           var flag = false
@@ -48,13 +44,13 @@ object SqrtFloatTester extends TestSuite with ChiselUtestTester {
               val expActual = extendTofull(dut.output.bits.exp.peek().litValue.toString(2),8)
 
               def printValue() :Unit = {
-                println(oprandFloat.toString + ".sqrtx = " + x.toString)
-                println("input = " + circuitInput)
+                println(oprandFloat.toString + ".sqrtx = " + t.toString)
+                println("input = " + oprandInput)
                 println("exp_expect = " + expExpect)
                 println("exp_actual = " + expActual)
                 println("sig_expect = " + sigExpect)
                 println("sig_actual = " + sigActual)
-                println("result_expect = " + xFloat)
+                println("result_expect = " + tFloatString)
                 println("result_actual = " + resultActual)
               }
 
@@ -69,14 +65,11 @@ object SqrtFloatTester extends TestSuite with ChiselUtestTester {
                 utest.assert(expActual ==expExpect)
               }
 
-              if(resultActual != xFloat) {
+              if(resultActual != tFloatString) {
                 printValue()
-                utest.assert(resultActual == xFloat)
+                utest.assert(resultActual == tFloatString)
               }
 
-
-
-
             } else
               dut.clock.step()
           }

From 7df1e1cb78fd3e279488619427e7e7c03db37bb3 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 9 Aug 2023 14:28:21 +0800
Subject: [PATCH 027/109] [divsqrt] support exceptions

---
 arithmetic/src/float/DivFloat.scala     |  8 ++-
 arithmetic/src/float/DivSqrt.scala      | 95 ++++++++++++++-----------
 arithmetic/src/float/RoundingUnit.scala | 58 ++++++++++-----
 arithmetic/src/float/SqrtFloat.scala    |  9 ++-
 4 files changed, 107 insertions(+), 63 deletions(-)

diff --git a/arithmetic/src/float/DivFloat.scala b/arithmetic/src/float/DivFloat.scala
index 667ffbc..ccbc735 100644
--- a/arithmetic/src/float/DivFloat.scala
+++ b/arithmetic/src/float/DivFloat.scala
@@ -82,15 +82,19 @@ class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
 
 
 
-  output.bits.result := RoundingUnit(
+  val roundresult = RoundingUnit(
     signReg,
     expToRound,
     sigToRound,
     rbits,
     consts.round_near_even,
     invalidExec,
-    infinitExec)
+    infinitExec,
+    false.B,
+    false.B,
+    false.B)
 
+  output.bits.result := roundresult(0)
   output.bits.sig := output.bits.result(sigWidth-2, 0)
   output.bits.exp := output.bits.result(fpWidth-1, sigWidth-1)
 
diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 41477a0..819e67e 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -16,45 +16,54 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val rawA_S = rawFloatFromFN(expWidth, sigWidth, input.bits.a)
   val rawB_S = rawFloatFromFN(expWidth, sigWidth, input.bits.b)
 
-//  /** Exceptions */
-//
-//  val notSigNaNIn_invalidExc_S_div =
-//    (rawA_S.isZero && rawB_S.isZero) || (rawA_S.isInf && rawB_S.isInf)
-//  val notSigNaNIn_invalidExc_S_sqrt =
-//    !rawA_S.isNaN && !rawA_S.isZero && rawA_S.sign
-//  val majorExc_S =
-//    Mux(input.bits.sqrt,
-//      isSigNaNRawFloat(rawA_S) || notSigNaNIn_invalidExc_S_sqrt,
-//      isSigNaNRawFloat(rawA_S) || isSigNaNRawFloat(rawB_S) ||
-//        notSigNaNIn_invalidExc_S_div ||
-//        (!rawA_S.isNaN && !rawA_S.isInf && rawB_S.isZero)
-//    )
-//  val isNaN_S =
-//    Mux(input.bits.sqrt,
-//      rawA_S.isNaN || notSigNaNIn_invalidExc_S_sqrt,
-//      rawA_S.isNaN || rawB_S.isNaN || notSigNaNIn_invalidExc_S_div
-//    )
-//  val isInf_S = Mux(input.bits.sqrt, rawA_S.isInf, rawA_S.isInf || rawB_S.isZero)
-//  val isZero_S = Mux(input.bits.sqrt, rawA_S.isZero, rawA_S.isZero || rawB_S.isInf)
-//  val sign_S = rawA_S.sign ^ (!input.bits.sqrt && rawB_S.sign)
-//
-//  val specialCaseA_S = rawA_S.isNaN || rawA_S.isInf || rawA_S.isZero
-//  val specialCaseB_S = rawB_S.isNaN || rawB_S.isInf || rawB_S.isZero
-//  val normalCase_S_div = !specialCaseA_S && !specialCaseB_S
-//  val normalCase_S_sqrt = !specialCaseA_S && !rawA_S.sign
-//  val normalCase_S = Mux(input.bits.sqrt, normalCase_S_sqrt, normalCase_S_div)
-//
-//  val fastResponseNext =
-//
-//  val entering = input.fire
-//  val fastResponse = RegEnable(entering, false.B,entering)
+  /** Exceptions */
+
+  val notSigNaNIn_invalidExc_S_div =
+    (rawA_S.isZero && rawB_S.isZero) || (rawA_S.isInf && rawB_S.isInf)
+  val notSigNaNIn_invalidExc_S_sqrt =
+    !rawA_S.isNaN && !rawA_S.isZero && rawA_S.sign
+  val majorExc_S =
+    Mux(input.bits.sqrt,
+      isSigNaNRawFloat(rawA_S) || notSigNaNIn_invalidExc_S_sqrt,
+      isSigNaNRawFloat(rawA_S) || isSigNaNRawFloat(rawB_S) ||
+        notSigNaNIn_invalidExc_S_div ||
+        (!rawA_S.isNaN && !rawA_S.isInf && rawB_S.isZero)
+    )
+  val isNaN_S =
+    Mux(input.bits.sqrt,
+      rawA_S.isNaN || notSigNaNIn_invalidExc_S_sqrt,
+      rawA_S.isNaN || rawB_S.isNaN || notSigNaNIn_invalidExc_S_div
+    )
+  val isInf_S = Mux(input.bits.sqrt, rawA_S.isInf, rawA_S.isInf || rawB_S.isZero)
+  val isZero_S = Mux(input.bits.sqrt, rawA_S.isZero, rawA_S.isZero || rawB_S.isInf)
+
+
+  val majorExc_Z = RegEnable(majorExc_S,false.B,input.fire)
+  val isNaN_Z    = RegEnable(isNaN_S,false.B,input.fire)
+  val isInf_Z    = RegEnable(isInf_S,false.B,input.fire)
+  val isZero_Z   = RegEnable(isZero_S,false.B,input.fire)
+
+  val invalidExec = majorExc_Z &&  isNaN_Z
+  val infinitExec = majorExc_Z && !isNaN_Z
+
+
+  val specialCaseA_S = rawA_S.isNaN || rawA_S.isInf || rawA_S.isZero
+  val specialCaseB_S = rawB_S.isNaN || rawB_S.isInf || rawB_S.isZero
+  val normalCase_S_div = !specialCaseA_S && !specialCaseB_S
+  val normalCase_S_sqrt = !specialCaseA_S && !rawA_S.sign
+  val normalCase_S = Mux(input.bits.sqrt, normalCase_S_sqrt, normalCase_S_div)
+  val specialCase_S = !normalCase_S
+
+
+  val fastValid = RegInit(false.B)
+  fastValid := specialCase_S && input.fire
 
   // needNorm for div
   val needNormNext = input.bits.b(sigWidth - 2, 0) > input.bits.a(sigWidth - 2, 0)
   val needNorm = RegEnable(needNormNext, input.fire)
 
   // sign
-  val signNext = Mux(input.bits.sqrt, true.B, rawA_S.sign ^ rawB_S.sign)
+  val signNext = Mux(input.bits.sqrt, false.B, rawA_S.sign ^ rawB_S.sign)
   val signReg = RegEnable(signNext, input.fire)
 
   // sqrt
@@ -65,7 +74,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
   val SqrtModule = Module(new SquareRoot(2, 2, sigWidth+2, sigWidth+2))
   SqrtModule.input.bits.operand := fractIn
-  SqrtModule.input.valid := input.valid && input.bits.sqrt
+  SqrtModule.input.valid := input.valid && input.bits.sqrt && normalCase_S_sqrt
 
   val rbits_sqrt = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
   val sigToRound_sqrt = SqrtModule.output.bits.result(24, 2)
@@ -81,7 +90,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   divModule.input.bits.dividend := dividendIn
   divModule.input.bits.divider := divisorIn
   divModule.input.bits.counter := 8.U
-  divModule.input.valid := input.valid && !input.bits.sqrt
+  divModule.input.valid := input.valid && !input.bits.sqrt && normalCase_S_sqrt
 
   val sigToRound_div = Mux(needNorm, divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
     divModule.output.bits.quotient(calWidth - 2, calWidth - sigWidth))
@@ -103,24 +112,27 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   expToRound := Mux(opSqrtReg, expStore, expStore - needNorm)
 
 
-  /** Exceptions */
-  val invalidExec = false.B
-  val infinitExec = false.B
 
-  output.bits.result := RoundingUnit(
+
+  val roundresult = RoundingUnit(
     signReg,
     expToRound,
     sigToRound,
     rbitsToRound,
     consts.round_near_even,
     invalidExec,
-    infinitExec)
+    infinitExec,
+    isNaN_Z,
+    isInf_Z,
+    isZero_Z)
 
+  output.bits.result := roundresult(0)
   output.bits.sig := output.bits.result(sigWidth - 2, 0)
   output.bits.exp := output.bits.result(fpWidth - 1, sigWidth - 1)
+  output.bits.exceptionFlags := roundresult(1)
 
   input.ready := divModule.input.ready && SqrtModule.input.ready
-  output.valid := divModule.output.valid || SqrtModule.output.valid
+  output.valid := divModule.output.valid || SqrtModule.output.valid || fastValid
 }
 
 
@@ -135,4 +147,5 @@ class DivSqrtOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
   val result = UInt((expWidth + sigWidth).W)
   val sig = UInt((sigWidth-1).W)
   val exp = UInt(expWidth.W)
+  val exceptionFlags = UInt(5.W)
 }
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index e04c641..6e09099 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -16,6 +16,9 @@ class RoundingUnit extends Module{
   val input = IO(Input(new Bundle{
     val invalidExc = Bool() // overrides 'infiniteExc' and 'in'
     val infiniteExc = Bool() // overrides 'in' except for 'in.sign'
+    val isInf  = Bool()
+    val isZero = Bool()
+    val isNaN  = Bool()
     val sig = UInt(23.W)
     val exp = UInt(8.W)
     val rBits = UInt(2.W)
@@ -34,17 +37,15 @@ class RoundingUnit extends Module{
   val roundingMode_near_maxMag = (input.roundingMode === consts.round_near_maxMag)
 
 
-  val common_case = !(input.infiniteExc || input.invalidExc)
   val common_overflow = Wire(Bool())
+  val common_underflow = Wire(Bool())
   val common_inexact  = Wire(Bool())
 
 
-  // exception data with Spike
 
-  val invalidOut = "h7FC00000".U
-  /** Inf with sign  */
-  val infiniteOut = Cat(input.sign,"h7F800000".U)
-  val outSele1H = common_case ## input.infiniteExc ## input.invalidExc
+
+
+
 
 
 
@@ -69,7 +70,28 @@ class RoundingUnit extends Module{
   expBiased := input.exp + 127.U
   expBiasPlus := expBiased + expIncr
 
-  common_overflow := input.exp.andR && expIncr
+  // Exceptions
+  val isNaNOut = input.invalidExc || input.isNaN
+  val notNaN_isSpecialInfOut = input.infiniteExc || input.isInf
+  val commonCase = !isNaNOut && !notNaN_isSpecialInfOut && !input.isZero
+
+  val overflow = commonCase && common_overflow
+  val underflow = commonCase && common_underflow
+  val inexact = overflow || (commonCase && common_inexact)
+
+  val isZero = input.isZero && underflow
+
+
+  // exception data with Spike
+  val quietNaN = "h7FC00000".U
+
+  val infiniteOut = Cat(input.sign, "h7F800000".U)
+  val zeroOut = Cat(input.sign, 0.U(31.W))
+  val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## input.isZero
+
+  //todo
+  common_overflow := false.B
+  common_underflow := false.B
   common_inexact := input.rBits.orR
 
   val common_sigOut = Mux(sigIncr, sigPlus, input.sig)
@@ -78,23 +100,19 @@ class RoundingUnit extends Module{
   val common_out = Mux(common_overflow, infiniteOut, input.sign ## common_expOut ## common_sigOut)
 
   output.data := Mux1H(Seq(
-    outSele1H(0) -> invalidOut,
-    outSele1H(1) -> infiniteOut,
-    outSele1H(2) -> common_out)
+    outSele1H(0) -> zeroOut,
+    outSele1H(1) -> quietNaN,
+    outSele1H(2) -> infiniteOut,
+    outSele1H(3) -> common_out)
   )
 
-  val invalidOpration = input.invalidExc
-  val divideByzero = false.B
-  val overflow = common_case && common_overflow
-  val underflow = false.B
-  val inexact = overflow || (common_case && common_inexact)
 
-  output.exceptionFlags := invalidOpration ## divideByzero ## overflow ## underflow ## inexact
+  output.exceptionFlags := input.invalidExc ## input.infiniteExc ## overflow ## underflow ## inexact
 
 }
 
 object RoundingUnit {
-  def apply(sign: Bool, exp:UInt, sig: UInt, rbits:UInt, rmode: UInt,invalidExc:Bool, infiniteExc:Bool): UInt = {
+  def apply(sign: Bool, exp: UInt, sig: UInt, rbits: UInt, rmode: UInt, invalidExc: Bool, infiniteExc: Bool, isNaN: Bool, isInf: Bool, isZero: Bool): Vec[UInt] = {
 
     val rounder = Module(new RoundingUnit)
     rounder.input.sign := sign
@@ -104,8 +122,12 @@ object RoundingUnit {
     rounder.input.roundingMode := rmode
     rounder.input.invalidExc := invalidExc
     rounder.input.infiniteExc := infiniteExc
-    rounder.output.data
+    rounder.input.isInf := isInf
+    rounder.input.isZero := isZero
+    rounder.input.isNaN := isNaN
+    VecInit(rounder.output.data, rounder.output.exceptionFlags)
   }
 
 }
 
+
diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
index 0d0195b..dd82940 100644
--- a/arithmetic/src/float/SqrtFloat.scala
+++ b/arithmetic/src/float/SqrtFloat.scala
@@ -72,14 +72,19 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
 
 
   input.ready := SqrtModule.input.ready
-  output.bits.result := RoundingUnit(
+  val roundresult = RoundingUnit(
     input.bits.oprand(expWidth + sigWidth-1) ,
     expToRound,
     sigforRound,
     rbits,
     consts.round_near_even,
     invalidExec,
-    infinitExec)
+    infinitExec,
+    false.B,
+    false.B,
+    false.B
+  )
+  output.bits.result := roundresult(0)
   output.bits.sig := output.bits.result(sigWidth-2, 0)
   output.bits.exp := output.bits.result(30,23)
   output.valid := SqrtModule.output.valid || fastWorking

From 3df7343938b43f52368196ab0d1168323040a664 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Fri, 11 Aug 2023 10:28:56 +0800
Subject: [PATCH 028/109] [divsqrt] tiny fix

---
 arithmetic/src/float/DivSqrt.scala | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 819e67e..9f0c67d 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -9,7 +9,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val fpWidth = expWidth + sigWidth
   val calWidth = 28
   val input = IO(Flipped(DecoupledIO(new DivSqrtInput(expWidth, sigWidth))))
-  val output = IO(DecoupledIO(new DivSqrtOutput(expWidth, sigWidth)))
+  val output = IO(ValidIO(new DivSqrtOutput(expWidth, sigWidth)))
 
   val opSqrtReg = RegEnable(input.bits.sqrt, input.fire)
 
@@ -17,7 +17,6 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val rawB_S = rawFloatFromFN(expWidth, sigWidth, input.bits.b)
 
   /** Exceptions */
-
   val notSigNaNIn_invalidExc_S_div =
     (rawA_S.isZero && rawB_S.isZero) || (rawA_S.isInf && rawB_S.isInf)
   val notSigNaNIn_invalidExc_S_sqrt =
@@ -37,7 +36,6 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val isInf_S = Mux(input.bits.sqrt, rawA_S.isInf, rawA_S.isInf || rawB_S.isZero)
   val isZero_S = Mux(input.bits.sqrt, rawA_S.isZero, rawA_S.isZero || rawB_S.isInf)
 
-
   val majorExc_Z = RegEnable(majorExc_S,false.B,input.fire)
   val isNaN_Z    = RegEnable(isNaN_S,false.B,input.fire)
   val isInf_Z    = RegEnable(isInf_S,false.B,input.fire)
@@ -46,7 +44,6 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val invalidExec = majorExc_Z &&  isNaN_Z
   val infinitExec = majorExc_Z && !isNaN_Z
 
-
   val specialCaseA_S = rawA_S.isNaN || rawA_S.isInf || rawA_S.isZero
   val specialCaseB_S = rawB_S.isNaN || rawB_S.isInf || rawB_S.isZero
   val normalCase_S_div = !specialCaseA_S && !specialCaseB_S
@@ -54,7 +51,6 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val normalCase_S = Mux(input.bits.sqrt, normalCase_S_sqrt, normalCase_S_div)
   val specialCase_S = !normalCase_S
 
-
   val fastValid = RegInit(false.B)
   fastValid := specialCase_S && input.fire
 
@@ -110,10 +106,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     input.bits.a(fpWidth-1, sigWidth-1) - input.bits.b(fpWidth-1, sigWidth-1))
   val expStore = RegEnable(expStoreNext, 0.U(expWidth.W), input.fire)
   expToRound := Mux(opSqrtReg, expStore, expStore - needNorm)
-
-
-
-
+  
   val roundresult = RoundingUnit(
     signReg,
     expToRound,

From 6de587520a9cc1fbd87106b7b98e8f3e6f6b895c Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Fri, 11 Aug 2023 11:54:24 +0800
Subject: [PATCH 029/109] tiny fix

---
 arithmetic/src/float/DivSqrt.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 9f0c67d..34aabab 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -86,7 +86,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   divModule.input.bits.dividend := dividendIn
   divModule.input.bits.divider := divisorIn
   divModule.input.bits.counter := 8.U
-  divModule.input.valid := input.valid && !input.bits.sqrt && normalCase_S_sqrt
+  divModule.input.valid := input.valid && !input.bits.sqrt && normalCase_S_div
 
   val sigToRound_div = Mux(needNorm, divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
     divModule.output.bits.quotient(calWidth - 2, calWidth - sigWidth))
@@ -106,7 +106,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     input.bits.a(fpWidth-1, sigWidth-1) - input.bits.b(fpWidth-1, sigWidth-1))
   val expStore = RegEnable(expStoreNext, 0.U(expWidth.W), input.fire)
   expToRound := Mux(opSqrtReg, expStore, expStore - needNorm)
-  
+
   val roundresult = RoundingUnit(
     signReg,
     expToRound,

From 003d93f29125cba0718f37ec6a06bd743749f716 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 14 Aug 2023 16:35:16 +0800
Subject: [PATCH 030/109] [divsqrt] rename some val

---
 arithmetic/src/float/DivSqrt.scala | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 34aabab..55aa14f 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -64,12 +64,12 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
   // sqrt
   val adjustedExp = Cat(rawA_S.sExp(expWidth - 1), rawA_S.sExp(expWidth - 1, 0))
-  val sqrtExIsEven = input.bits.a(sigWidth - 1)
-  val fractIn = Mux(sqrtExIsEven, Cat("b0".U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
+  val sqrtExpIsEven = input.bits.a(sigWidth - 1)
+  val sqrtFractIn = Mux(sqrtExpIsEven, Cat("b0".U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
     Cat(rawA_S.sig(sigWidth - 1, 0), 0.U(2.W)))
 
   val SqrtModule = Module(new SquareRoot(2, 2, sigWidth+2, sigWidth+2))
-  SqrtModule.input.bits.operand := fractIn
+  SqrtModule.input.bits.operand := sqrtFractIn
   SqrtModule.input.valid := input.valid && input.bits.sqrt && normalCase_S_sqrt
 
   val rbits_sqrt = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
@@ -77,14 +77,14 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
 
   // div
-  val dividendIn = Wire(UInt((fpWidth).W))
-  val divisorIn = Wire(UInt((fpWidth).W))
-  dividendIn := Cat(1.U(1.W), rawA_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
-  divisorIn := Cat(1.U(1.W), rawB_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
+  val fractDividendIn = Wire(UInt((fpWidth).W))
+  val fractDivisorIn = Wire(UInt((fpWidth).W))
+  fractDividendIn := Cat(1.U(1.W), rawA_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
+  fractDivisorIn := Cat(1.U(1.W), rawB_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
 
   val divModule = Module(new SRT16(fpWidth, fpWidth, fpWidth))
-  divModule.input.bits.dividend := dividendIn
-  divModule.input.bits.divider := divisorIn
+  divModule.input.bits.dividend := fractDividendIn
+  divModule.input.bits.divider := fractDivisorIn
   divModule.input.bits.counter := 8.U
   divModule.input.valid := input.valid && !input.bits.sqrt && normalCase_S_div
 

From cdbf0a5e652c71bd6fd493c43bf5426fe447a64d Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 22 Aug 2023 12:36:19 +0800
Subject: [PATCH 031/109] add roundingMode intput

---
 arithmetic/src/float/DivSqrt.scala | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 55aa14f..a4d158c 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -12,6 +12,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val output = IO(ValidIO(new DivSqrtOutput(expWidth, sigWidth)))
 
   val opSqrtReg = RegEnable(input.bits.sqrt, input.fire)
+  val roundingModeReg = RegEnable(input.bits.roundingMode, input.fire)
 
   val rawA_S = rawFloatFromFN(expWidth, sigWidth, input.bits.a)
   val rawB_S = rawFloatFromFN(expWidth, sigWidth, input.bits.b)
@@ -112,7 +113,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     expToRound,
     sigToRound,
     rbitsToRound,
-    consts.round_near_even,
+    roundingModeReg,
     invalidExec,
     infinitExec,
     isNaN_Z,
@@ -120,8 +121,6 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     isZero_Z)
 
   output.bits.result := roundresult(0)
-  output.bits.sig := output.bits.result(sigWidth - 2, 0)
-  output.bits.exp := output.bits.result(fpWidth - 1, sigWidth - 1)
   output.bits.exceptionFlags := roundresult(1)
 
   input.ready := divModule.input.ready && SqrtModule.input.ready
@@ -133,12 +132,11 @@ class DivSqrtInput(expWidth: Int, sigWidth: Int) extends Bundle() {
   val a = UInt((expWidth + sigWidth).W)
   val b = UInt((expWidth + sigWidth).W)
   val sqrt = Bool()
+  val roundingMode = UInt(3.W)
 }
 
 
 class DivSqrtOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
   val result = UInt((expWidth + sigWidth).W)
-  val sig = UInt((sigWidth-1).W)
-  val exp = UInt(expWidth.W)
   val exceptionFlags = UInt(5.W)
 }

From 364000171fba3af4dc3495aeed5b8b8183ba7788 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 21 Aug 2023 16:10:57 +0800
Subject: [PATCH 032/109] [makefile] add Makefile

---
 Makefile | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 Makefile

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..2e3c435
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,12 @@
+init:
+	git submodule update --init
+
+compile:
+	mill -i arithmetic[5.0.0].compile
+
+bsp:
+	mill -i mill.bsp.BSP/install
+
+clean:
+	git clean -fd
+

From 75e468e575ccd9616b4f754f4588b18183486292 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 21 Aug 2023 16:33:52 +0800
Subject: [PATCH 033/109] [build] add oslib

---
 build.sc  | 3 +++
 common.sc | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/build.sc b/build.sc
index f08cb59..1134de5 100644
--- a/build.sc
+++ b/build.sc
@@ -7,6 +7,7 @@ object v {
   val scala = "2.13.10"
   val spire = ivy"org.typelevel::spire:0.18.0"
   val evilplot = ivy"io.github.cibotech::evilplot:0.9.0"
+  val oslib =  ivy"com.lihaoyi::os-lib:0.9.1"
   val chiselCrossVersions = Map(
     "5.0.0" -> (ivy"org.chipsalliance::chisel:5.0.0", ivy"org.chipsalliance:::chisel-plugin:5.0.0"),
   )
@@ -27,6 +28,8 @@ trait Arithmetic
 
   def evilplotIvy = v.evilplot
 
+  def oslibIvy = v.oslib
+
   def chiselModule = None
 
   def chiselPluginJar = None
diff --git a/common.sc b/common.sc
index 1975792..88978e8 100644
--- a/common.sc
+++ b/common.sc
@@ -33,6 +33,8 @@ trait ArithmeticModule
 
   def evilplotIvy: T[Dep]
 
+  def oslibIvy: T[Dep]
+
   override def ivyDeps = T(super.ivyDeps() ++ Seq(spireIvy(), evilplotIvy()))
 }
 

From dc2d843d2c8b2f09e47476f5509708aad726bc72 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 21 Aug 2023 16:34:18 +0800
Subject: [PATCH 034/109] [test] add FTest draft

---
 arithmetic/src/float/Ftests.scala | 84 +++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 arithmetic/src/float/Ftests.scala

diff --git a/arithmetic/src/float/Ftests.scala b/arithmetic/src/float/Ftests.scala
new file mode 100644
index 0000000..bbba0b7
--- /dev/null
+++ b/arithmetic/src/float/Ftests.scala
@@ -0,0 +1,84 @@
+package float
+
+object Ftests extends App{
+
+  import chisel3.stage.ChiselGeneratorAnnotation
+  import firrtl.AnnotationSeq
+
+  val resources = os.resource()
+  val runDir = os.pwd / "run"
+  os.remove.all(runDir)
+  val elaborateDir = runDir / "elaborate"
+  os.makeDir.all(elaborateDir)
+  val rtlDir = runDir / "rtl"
+  os.makeDir.all(rtlDir)
+  val emulatorDir = runDir / "emulator"
+  os.makeDir.all(emulatorDir)
+  val emulatorCSrc = emulatorDir / "src"
+  os.makeDir.all(emulatorCSrc)
+  val emulatorCHeader = emulatorDir / "include"
+  os.makeDir.all(emulatorCHeader)
+  val emulatorBuildDir = emulatorDir / "build"
+  os.makeDir.all(emulatorBuildDir)
+
+  val emulatorThreads = 8
+  val verilatorArgs = Seq(
+    // format: off
+    "--x-initial unique",
+    "--output-split 100000",
+    "--max-num-width 1048576",
+    "--main",
+    "--timing",
+    // use for coverage
+    "--coverage-user",
+    "--assert",
+    // format: on
+  )
+
+  // TODO: this will be replaced by binder API
+  // elaborate
+  var topName: String = null
+  val annos: AnnotationSeq = Seq(
+    new chisel3.stage.phases.Elaborate,
+    new chisel3.stage.phases.Convert
+  ).foldLeft(
+    Seq(
+      ChiselGeneratorAnnotation(() => new DivSqrt(8,24))
+    ): AnnotationSeq
+  ) { case (annos, stage) => stage.transform(annos) }
+    .flatMap {
+      case FirrtlCircuitAnnotation(circuit) =>
+        topName = circuit.main
+        os.write.over(elaborateDir / s"$topName.fir", circuit.serialize)
+        None
+      case _: chisel3.stage.DesignAnnotation[_] => None
+      case _: chisel3.stage.ChiselCircuitAnnotation => None
+      case a => Some(a)
+    }
+  os.write.over(elaborateDir / s"$topName.anno.json", firrtl.annotations.JsonProtocol.serialize(annos))
+
+  // rtl
+  os.proc(
+    "firtool",
+    elaborateDir / s"$topName.fir", s"--annotation-file=${elaborateDir / s"$topName.anno.json"}",
+    "-dedup",
+    "-O=release",
+    "--disable-all-randomization",
+    "--split-verilog",
+    "--preserve-values=none",
+    "--preserve-aggregate=all",
+    "--strip-debug-info",
+    s"-o=$rtlDir"
+  ).call()
+  val verilogs = os.read.lines(rtlDir / "filelist.f")
+    .map(str =>
+      try {
+        os.Path(str)
+      } catch {
+        case e: IllegalArgumentException if e.getMessage.contains("is not an absolute path") =>
+          rtlDir / str.stripPrefix("./")
+      }
+    )
+    .filter(p => p.ext == "v" || p.ext == "sv")
+
+}
\ No newline at end of file

From 139ac86946bf58576fe0b062f2ec1d0036ccda49 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 21 Aug 2023 16:36:58 +0800
Subject: [PATCH 035/109] [submodule] switch chisel version

---
 dependencies/chisel | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dependencies/chisel b/dependencies/chisel
index d79ae71..4c8b6c0 160000
--- a/dependencies/chisel
+++ b/dependencies/chisel
@@ -1 +1 @@
-Subproject commit d79ae71e70b1b69dc316cee44710d99420f5ff5c
+Subproject commit 4c8b6c0cd0543b6058e410c06a9e2108501c0b45

From afa1f64946e961a26a0819e516c9b0ec2f1e40d8 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 22 Aug 2023 12:27:54 +0800
Subject: [PATCH 036/109] able to elaborate sv

---
 .gitignore                        | 3 ++-
 Makefile                          | 5 ++++-
 arithmetic/src/float/Ftests.scala | 3 +++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index c0b4ce2..6687e1c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # mill
 out/
+run/
 # sbt
 diplomacy/target/
 diplomacy/macros/target/
@@ -24,4 +25,4 @@ verdiLog
 *.out
 *.cmd
 *.log
-*.json
\ No newline at end of file
+*.json
diff --git a/Makefile b/Makefile
index 2e3c435..1af774e 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,10 @@ init:
 	git submodule update --init
 
 compile:
-	mill -i arithmetic[5.0.0].compile
+	mill -i -j 0 arithmetic[5.0.0].compile
+
+run:
+	mill -i -j 0 arithmetic[5.0.0].run
 
 bsp:
 	mill -i mill.bsp.BSP/install
diff --git a/arithmetic/src/float/Ftests.scala b/arithmetic/src/float/Ftests.scala
index bbba0b7..b0baa48 100644
--- a/arithmetic/src/float/Ftests.scala
+++ b/arithmetic/src/float/Ftests.scala
@@ -4,6 +4,9 @@ object Ftests extends App{
 
   import chisel3.stage.ChiselGeneratorAnnotation
   import firrtl.AnnotationSeq
+  import firrtl.stage._
+
+  println("this is Ftests")
 
   val resources = os.resource()
   val runDir = os.pwd / "run"

From eae9fc3f3a45622b4d468baec51a9d0bb64b67f1 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 22 Aug 2023 12:31:06 +0800
Subject: [PATCH 037/109] add DivSqrtTester

---
 arithmetic/src/float/DivSqrtTester.scala | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 arithmetic/src/float/DivSqrtTester.scala

diff --git a/arithmetic/src/float/DivSqrtTester.scala b/arithmetic/src/float/DivSqrtTester.scala
new file mode 100644
index 0000000..9e8be3b
--- /dev/null
+++ b/arithmetic/src/float/DivSqrtTester.scala
@@ -0,0 +1,10 @@
+package float
+
+import chisel3._
+import chisel3.util._
+import division.srt.srt16._
+import sqrt._
+
+class DivSqrtTester extends Module{
+
+}
\ No newline at end of file

From 3e5f5315eb45a2b04dcd8b3db2c01491b6b53e31 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 22 Aug 2023 14:20:24 +0800
Subject: [PATCH 038/109] [build] add arithmetictest target

---
 build.sc                                      | 28 +++++++++++++++++++
 common.sc                                     | 15 ++++++++++
 .../src/float => tests/src}/Ftests.scala      |  4 ++-
 3 files changed, 46 insertions(+), 1 deletion(-)
 rename {arithmetic/src/float => tests/src}/Ftests.scala (98%)

diff --git a/build.sc b/build.sc
index 1134de5..e30acd6 100644
--- a/build.sc
+++ b/build.sc
@@ -15,6 +15,8 @@ object v {
 
 object arithmetic extends Cross[Arithmetic](v.chiselCrossVersions.keys.toSeq)
 
+object arithmetictest extends Cross[ArithmeticTest](v.chiselCrossVersions.keys.toSeq)
+
 trait Arithmetic
   extends common.ArithmeticModule
     with ScalafmtModule
@@ -38,3 +40,29 @@ trait Arithmetic
 
   def chiselPluginIvy = Some(v.chiselCrossVersions(crossValue)._2)
 }
+
+trait ArithmeticTest
+  extends common.ArithmeticTestModule
+    with ScalafmtModule
+    with Cross.Module[String] {
+
+  override def scalaVersion = T(v.scala)
+
+  override def millSourcePath = os.pwd / "tests"
+
+  def arithmeticModule = arithmetic(crossValue)
+
+  def spireIvy = v.spire
+
+  def evilplotIvy = v.evilplot
+
+  def oslibIvy = v.oslib
+
+  def chiselModule = None
+
+  def chiselPluginJar = None
+
+  def chiselIvy = Some(v.chiselCrossVersions(crossValue)._1)
+
+  def chiselPluginIvy = Some(v.chiselCrossVersions(crossValue)._2)
+}
diff --git a/common.sc b/common.sc
index 88978e8..d477643 100644
--- a/common.sc
+++ b/common.sc
@@ -39,3 +39,18 @@ trait ArithmeticModule
 }
 
 // TODO: migrate test to svsim
+
+trait ArithmeticTestModule
+  extends ScalaModule
+    with HasChisel {
+  def arithmeticModule: ArithmeticModule
+  def spireIvy: T[Dep]
+
+  def evilplotIvy: T[Dep]
+
+  def oslibIvy: T[Dep]
+
+  override def moduleDeps = super.moduleDeps ++ Some(arithmeticModule)
+
+  override def ivyDeps = T(super.ivyDeps() ++ Seq(spireIvy(), evilplotIvy()))
+}
diff --git a/arithmetic/src/float/Ftests.scala b/tests/src/Ftests.scala
similarity index 98%
rename from arithmetic/src/float/Ftests.scala
rename to tests/src/Ftests.scala
index b0baa48..3a4e8cf 100644
--- a/arithmetic/src/float/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -1,4 +1,6 @@
-package float
+package tests
+
+import float._
 
 object Ftests extends App{
 

From 3013a3802c39cdc5eaac05fcda79b7d69144651e Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 22 Aug 2023 14:27:41 +0800
Subject: [PATCH 039/109] [nix] add softfloat and testfloat

---
 flake.nix         |  1 +
 nix/softfloat.nix | 20 ++++++++++++++++++++
 nix/testfloat.nix | 18 ++++++++++++++++++
 overlay.nix       |  2 ++
 4 files changed, 41 insertions(+)
 create mode 100644 nix/softfloat.nix
 create mode 100644 nix/testfloat.nix

diff --git a/flake.nix b/flake.nix
index 193c1dc..ffff777 100644
--- a/flake.nix
+++ b/flake.nix
@@ -19,6 +19,7 @@
             mill
             circt
             verilator
+            testfloat
           ];
         in
         {
diff --git a/nix/softfloat.nix b/nix/softfloat.nix
new file mode 100644
index 0000000..7a23d00
--- /dev/null
+++ b/nix/softfloat.nix
@@ -0,0 +1,20 @@
+{ stdenv, fetchFromGitHub }:
+stdenv.mkDerivation rec {
+  pname = "softfloat";
+  version = "5c06db33fc1e2130f67c045327b0ec949032df1d";
+  src = fetchFromGitHub {
+    owner = "ucb-bar";
+    repo = "berkeley-softfloat-3";
+    rev = version;
+    sha256 = "sha256-uqf2xATeLyPEs/f8Yqc/Cr5YiklV2754g8IJu5z50sk=";
+  };
+  buildPhase = ''
+    make -C build/Linux-x86_64-GCC SPECIALIZE_TYPE=RISCV
+  '';
+  installPhase = ''
+    mkdir -p $out/lib
+    mkdir -p $out/include
+    mv build/Linux-x86_64-GCC/softfloat.a $out/lib/softfloat.a
+    cp source/include/* $out/include
+  '';
+}
diff --git a/nix/testfloat.nix b/nix/testfloat.nix
new file mode 100644
index 0000000..1471c33
--- /dev/null
+++ b/nix/testfloat.nix
@@ -0,0 +1,18 @@
+{ stdenv, fetchFromGitHub, softfloat }:
+stdenv.mkDerivation rec {
+  pname = "softfloat";
+  version = "06b20075dd3c1a5d0dd007a93643282832221612";
+  src = fetchFromGitHub {
+    owner = "ucb-bar";
+    repo = "berkeley-testfloat-3";
+    rev = version;
+    sha256 = "sha256-4C0a3jmmQPYlgbQ9F1frjtVixk3+wvLZFiujOhHshmw=";
+  };
+  buildPhase = ''
+    make -C build/Linux-x86_64-GCC SPECIALIZE_TYPE=RISCV SOFTFLOAT_INCLUDE_DIR=${softfloat}/include SOFTFLOAT_LIB=${softfloat}/lib/softfloat.a
+  '';
+  installPhase = ''
+    mkdir -p $out/bin
+    cp build/Linux-x86_64-GCC/testfloat_gen $out/bin/testfloat_gen
+  '';
+}
diff --git a/overlay.nix b/overlay.nix
index 398c8f4..bcca536 100644
--- a/overlay.nix
+++ b/overlay.nix
@@ -7,4 +7,6 @@ final: prev: {
     };
   });
   espresso = final.callPackage ./nix/espresso.nix { };
+  softfloat = final.callPackage ./nix/softfloat.nix { };
+  testfloat = final.callPackage ./nix/testfloat.nix { };
 }

From d76feccdd8bae423e591467eaf743b5d67f42fbc Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 22 Aug 2023 15:00:47 +0800
Subject: [PATCH 040/109] [test] add resources

---
 tests/resources/csrc/test.cpp                 | 89 +++++++++++++++++++
 .../includes/DivSqrtRecF32_small_div.h        |  2 +
 .../includes/DivSqrtRecF32_small_sqrt.h       |  5 ++
 .../includes/DivSqrtRecFN_small_div.h         | 54 +++++++++++
 .../includes/DivSqrtRecFN_small_sqrt.h        | 46 ++++++++++
 tests/resources/includes/verilator.h          | 29 ++++++
 tests/src/Ftests.scala                        |  2 +
 7 files changed, 227 insertions(+)
 create mode 100644 tests/resources/csrc/test.cpp
 create mode 100644 tests/resources/includes/DivSqrtRecF32_small_div.h
 create mode 100644 tests/resources/includes/DivSqrtRecF32_small_sqrt.h
 create mode 100644 tests/resources/includes/DivSqrtRecFN_small_div.h
 create mode 100644 tests/resources/includes/DivSqrtRecFN_small_sqrt.h
 create mode 100644 tests/resources/includes/verilator.h

diff --git a/tests/resources/csrc/test.cpp b/tests/resources/csrc/test.cpp
new file mode 100644
index 0000000..11da540
--- /dev/null
+++ b/tests/resources/csrc/test.cpp
@@ -0,0 +1,89 @@
+#if VM_TRACE
+# include "verilator.h"
+#endif
+
+// include files are part of the g++ command line
+
+int main (int argc, char* argv[])
+{
+    if (argc < 3) {
+        printf("usage: %s <rounding-mode> <tininess-detection>\n", argv[0]);
+        return -1;
+    }
+
+    dut module;
+
+#if VM_TRACE
+    VerilatedVcdFILE vcdfd(stderr);
+    VerilatedVcdC tfp(&vcdfd);
+    Verilated::traceEverOn(true);
+    module.trace(&tfp, 99);
+    tfp.open("");
+#endif
+
+    initialize_dut(module);
+    module.ROUNDING_MODE = strtoull(argv[1], NULL, 16);
+    module.DETECT_TININESS = strtoull(argv[2], NULL, 16);
+
+    size_t error = 0;
+    size_t cnt = 0;
+
+    // reset
+    for (size_t i=0; i<10; i++) {
+        module.reset = 1;
+        module.clock = 0;
+        module.eval();
+        module.clock = 1;
+        module.eval();
+    }
+    module.reset = 0;
+
+    // main operation
+    for (size_t cycle = 0; ; cycle++) {
+        if (!process_inputs(module) || !process_outputs(module)) {
+            printf("Ran %ld tests.\n", cnt);
+            if (!error) fputs("No errors found.\n", stdout);
+            break;
+        }
+
+        module.clock = 0;
+        module.eval();
+
+#if VM_TRACE
+        tfp.dump(static_cast<vluint64_t>(cycle * 2));
+#endif
+
+        if (module.io_check) {
+            if ((cnt % 10000 == 0) && cnt) printf("Ran %ld tests.\n", cnt);
+            if (!module.io_pass) {
+                error++;
+                printf("[%07ld]", cnt);
+                // for (size_t i=0; i<inputs.size(); i++) {
+                //    printf(" %s", inputs[i]->to_str().c_str());
+                // }
+                printf(
+                    "\n\t=> %#x %#x   expected: %#x %#x\n",
+                    module.io_actual_out,
+                    module.io_actual_exceptionFlags,
+                    module.io_expected_recOut,
+                    module.io_expected_exceptionFlags
+                );
+                if (error == 20) {
+                    printf("Reached %ld errors. Aborting.\n", error);
+                    break;
+                }
+            }
+            cnt++;
+        }
+
+        module.clock = 1;
+        module.eval();
+
+#if VM_TRACE
+        tfp.dump(static_cast<vluint64_t>(cycle * 2 + 1));
+#endif
+    }
+
+    return 0;
+}
+
diff --git a/tests/resources/includes/DivSqrtRecF32_small_div.h b/tests/resources/includes/DivSqrtRecF32_small_div.h
new file mode 100644
index 0000000..ebc6604
--- /dev/null
+++ b/tests/resources/includes/DivSqrtRecF32_small_div.h
@@ -0,0 +1,2 @@
+#define FLEN 32
+#include "DivSqrtRecFN_small_div.h"
diff --git a/tests/resources/includes/DivSqrtRecF32_small_sqrt.h b/tests/resources/includes/DivSqrtRecF32_small_sqrt.h
new file mode 100644
index 0000000..2effeb7
--- /dev/null
+++ b/tests/resources/includes/DivSqrtRecF32_small_sqrt.h
@@ -0,0 +1,5 @@
+#define FLEN 32
+#include "DivSqrtRecFN_small_sqrt.h"
+
+#define ROUNDING_MODE io_input_bits_roundingMode
+#define DETECT_TININESS io_input_bits_detectTininess
diff --git a/tests/resources/includes/DivSqrtRecFN_small_div.h b/tests/resources/includes/DivSqrtRecFN_small_div.h
new file mode 100644
index 0000000..b5d0704
--- /dev/null
+++ b/tests/resources/includes/DivSqrtRecFN_small_div.h
@@ -0,0 +1,54 @@
+#include "dut.h"
+
+#define ROUNDING_MODE io_input_bits_roundingMode
+#define DETECT_TININESS io_input_bits_detectTininess
+
+static void initialize_dut(dut& m)
+{
+  m.io_input_valid = 1;
+}
+
+static int process_inputs(dut& m)
+{
+  char value[64];
+
+  if (!m.io_input_ready) {
+    return 1;
+  }
+
+  if (scanf("%s", value) != 1) {
+    return 0;
+  }
+  m.io_input_bits_a = strtoull(value, NULL, 16);
+
+  if (scanf("%s", value) != 1) {
+    return 0;
+  }
+  m.io_input_bits_b = strtoull(value, NULL, 16);
+
+  return 1;
+}
+
+static int process_outputs(dut& m)
+{
+  char value[64];
+
+  if (!m.io_input_ready) {
+    return 1;
+  }
+
+  // output
+  if (scanf("%s", value) != 1) {
+    return 0;
+  }
+  m.io_input_bits_out = strtoull(value, NULL, 16);
+
+  // exception flags
+  if (scanf("%s", value) != 1) {
+    return 0;
+  }
+  m.io_input_bits_exceptionFlags = strtoull(value, NULL, 16);
+
+  return 1;
+}
+
diff --git a/tests/resources/includes/DivSqrtRecFN_small_sqrt.h b/tests/resources/includes/DivSqrtRecFN_small_sqrt.h
new file mode 100644
index 0000000..5a4fed0
--- /dev/null
+++ b/tests/resources/includes/DivSqrtRecFN_small_sqrt.h
@@ -0,0 +1,46 @@
+#include "dut.h"
+
+static void initialize_dut(dut& m)
+{
+  m.io_input_valid = 1;
+}
+
+static int process_inputs(dut& m)
+{
+  char value[64];
+
+  if (!m.io_input_ready) {
+    return 1;
+  }
+
+  if (scanf("%s", value) != 1) {
+    return 0;
+  }
+  m.io_input_bits_a = strtoull(value, NULL, 16);
+
+  return 1;
+}
+
+static int process_outputs(dut& m)
+{
+  char value[64];
+
+  if (!m.io_input_ready) {
+    return 1;
+  }
+
+  // output
+  if (scanf("%s", value) != 1) {
+    return 0;
+  }
+  m.io_input_bits_out = strtoull(value, NULL, 16);
+
+  // exception flags
+  if (scanf("%s", value) != 1) {
+    return 0;
+  }
+  m.io_input_bits_exceptionFlags = strtoull(value, NULL, 16);
+
+  return 1;
+}
+
diff --git a/tests/resources/includes/verilator.h b/tests/resources/includes/verilator.h
new file mode 100644
index 0000000..d5ada6c
--- /dev/null
+++ b/tests/resources/includes/verilator.h
@@ -0,0 +1,29 @@
+#ifndef _ROCKET_VERILATOR_H
+#define _ROCKET_VERILATOR_H
+
+#include "verilated_vcd_c.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+extern bool verbose;
+extern bool done_reset;
+
+class VerilatedVcdFILE : public VerilatedVcdFile {
+ public:
+  VerilatedVcdFILE(FILE* file) : file(file) {}
+  ~VerilatedVcdFILE() {}
+  bool open(const std::string& name) override {
+    // file should already be open
+    return file != NULL;
+  }
+  void close() override {
+    // file should be closed elsewhere
+  }
+  ssize_t write(const char* bufp, ssize_t len) override {
+    return fwrite(bufp, 1, len, file);
+  }
+ private:
+  FILE* file;
+};
+
+#endif
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 3a4e8cf..71f9523 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -86,4 +86,6 @@ object Ftests extends App{
     )
     .filter(p => p.ext == "v" || p.ext == "sv")
 
+  os.write(rtlDir / "dut.v", chisel3.getVerilogString(new DivSqrt(8,24)))
+
 }
\ No newline at end of file

From 69cf5cf71a1eecd61cd561fe5301992c2cd46ae8 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 22 Aug 2023 15:01:38 +0800
Subject: [PATCH 041/109] [test] add dut for divsqrt

---
 tests/src/ValExec_DivSqrtRecFN_small.scala | 182 +++++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 tests/src/ValExec_DivSqrtRecFN_small.scala

diff --git a/tests/src/ValExec_DivSqrtRecFN_small.scala b/tests/src/ValExec_DivSqrtRecFN_small.scala
new file mode 100644
index 0000000..51a1c2d
--- /dev/null
+++ b/tests/src/ValExec_DivSqrtRecFN_small.scala
@@ -0,0 +1,182 @@
+
+/*============================================================================
+
+This Chisel source file is part of a pre-release version of the HardFloat IEEE
+Floating-Point Arithmetic Package, by John R. Hauser (with some contributions
+from Yunsup Lee and Andrew Waterman, mainly concerning testing).
+
+Copyright 2017 SiFive, Inc.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of SiFive nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY SIFIVE AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL SIFIVE OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+import chisel3._
+import chisel3.util._
+
+class DivRecFN_io(expWidth: Int, sigWidth: Int) extends Bundle {
+    val a = Bits((expWidth + sigWidth).W)
+    val b = Bits((expWidth + sigWidth).W)
+    val roundingMode   = UInt(3.W)
+    val detectTininess = UInt(1.W)
+    val out = Bits((expWidth + sigWidth).W)
+    val exceptionFlags = Bits(5.W)
+
+}
+
+class
+    ValExec_DivSqrtRecFN_small_div(expWidth: Int, sigWidth: Int, options: Int) extends Module
+{
+    val io = IO(new Bundle {
+        val input = Flipped(Decoupled(new DivRecFN_io(expWidth, sigWidth)))
+
+        val output = new Bundle {
+            val a = Flipped(Input(Bits((expWidth + sigWidth).W)))
+            val b = Flipped(Input(Bits((expWidth + sigWidth).W)))
+            val roundingMode   = Output(UInt(3.W))
+            val detectTininess = Output(UInt(1.W))
+        }
+
+        val expected = new Bundle {
+            val out = Output(Bits((expWidth + sigWidth).W))
+            val exceptionFlags = Output(Bits(5.W))
+            val recOut = Output(Bits((expWidth + sigWidth + 1).W))
+        }
+
+        val actual = new Bundle {
+            val out = Output(Bits((expWidth + sigWidth + 1).W))
+            val exceptionFlags = Output(Bits(5.W))
+        }
+
+        val check = Output(Bool())
+        val pass = Output(Bool())
+    })
+
+    val ds = Module(new DivSqrtRecFN_small(expWidth, sigWidth, options))
+    val cq = Module(new Queue(new DivRecFN_io(expWidth, sigWidth), 5))
+
+    cq.io.enq.valid := io.input.valid && ds.io.inReady
+    cq.io.enq.bits := io.input.bits
+
+    io.input.ready := ds.io.inReady && cq.io.enq.ready
+    ds.io.inValid := io.input.valid && cq.io.enq.ready
+    ds.io.sqrtOp := false.B
+    ds.io.a := recFNFromFN(expWidth, sigWidth, io.input.bits.a)
+    ds.io.b := recFNFromFN(expWidth, sigWidth, io.input.bits.b)
+    ds.io.roundingMode   := io.input.bits.roundingMode
+    ds.io.detectTininess := io.input.bits.detectTininess
+
+    io.output.a := cq.io.deq.bits.a
+    io.output.b := cq.io.deq.bits.b
+    io.output.roundingMode   := cq.io.deq.bits.roundingMode
+    io.output.detectTininess := cq.io.deq.bits.detectTininess
+
+    io.expected.out := cq.io.deq.bits.out
+    io.expected.exceptionFlags := cq.io.deq.bits.exceptionFlags
+    io.expected.recOut := recFNFromFN(expWidth, sigWidth, cq.io.deq.bits.out)
+
+    io.actual.out := ds.io.out
+    io.actual.exceptionFlags := ds.io.exceptionFlags
+
+    cq.io.deq.ready := ds.io.outValid_div
+
+    io.check := ds.io.outValid_div
+    io.pass :=
+        cq.io.deq.valid &&
+        equivRecFN(expWidth, sigWidth, io.actual.out, io.expected.recOut) &&
+        (io.actual.exceptionFlags === io.expected.exceptionFlags)
+}
+
+class SqrtRecFN_io(expWidth: Int, sigWidth: Int) extends Bundle {
+    val a = Bits((expWidth + sigWidth).W)
+    val roundingMode   = UInt(3.W)
+    val detectTininess = UInt(1.W)
+    val out = Bits((expWidth + sigWidth).W)
+    val exceptionFlags = Bits(5.W)
+
+}
+
+class
+    ValExec_DivSqrtRecFN_small_sqrt(expWidth: Int, sigWidth: Int, options: Int)
+    extends Module
+{
+    val io = IO(new Bundle {
+        val input = Flipped(Decoupled(new SqrtRecFN_io(expWidth, sigWidth)))
+
+        val output = new Bundle {
+            val a = Output(Bits((expWidth + sigWidth).W))
+            val roundingMode   = Output(UInt(3.W))
+            val detectTininess = Output(UInt(1.W))
+        }
+
+        val expected = new Bundle {
+            val out = Output(Bits((expWidth + sigWidth).W))
+            val exceptionFlags = Output(Bits(5.W))
+            val recOut = Output(Bits((expWidth + sigWidth + 1).W))
+        }
+
+        val actual = new Bundle {
+            val out = Output(Bits((expWidth + sigWidth + 1).W))
+            val exceptionFlags = Output(Bits(5.W))
+        }
+
+        val check = Output(Bool())
+        val pass = Output(Bool())
+    })
+
+    val ds = Module(new DivSqrtRecFN_small(expWidth, sigWidth, options))
+    val cq = Module(new Queue(new SqrtRecFN_io(expWidth, sigWidth), 5))
+
+    cq.io.enq.valid := io.input.valid && ds.io.inReady
+    cq.io.enq.bits := io.input.bits
+
+    io.input.ready := ds.io.inReady && cq.io.enq.ready
+    ds.io.inValid := io.input.valid && cq.io.enq.ready
+    ds.io.sqrtOp := true.B
+    ds.io.a := recFNFromFN(expWidth, sigWidth, io.input.bits.a)
+    ds.io.b := DontCare
+    ds.io.roundingMode   := io.input.bits.roundingMode
+    ds.io.detectTininess := io.input.bits.detectTininess
+
+    io.output.a := cq.io.deq.bits.a
+    io.output.roundingMode   := cq.io.deq.bits.roundingMode
+    io.output.detectTininess := cq.io.deq.bits.detectTininess
+
+    io.expected.out := cq.io.deq.bits.out
+    io.expected.exceptionFlags := cq.io.deq.bits.exceptionFlags
+    io.expected.recOut := recFNFromFN(expWidth, sigWidth, cq.io.deq.bits.out)
+
+    io.actual.exceptionFlags := ds.io.exceptionFlags
+    io.actual.out := ds.io.out
+
+    cq.io.deq.ready := ds.io.outValid_sqrt
+
+    io.check := ds.io.outValid_sqrt
+    io.pass :=
+        cq.io.deq.valid &&
+        equivRecFN(expWidth, sigWidth, io.actual.out, io.expected.recOut) &&
+        (io.actual.exceptionFlags === io.expected.exceptionFlags)
+}

From 12968263e761d99f80585cab73b62a9fe9faaafa Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 22 Aug 2023 15:12:09 +0800
Subject: [PATCH 042/109] [test] fix dut for divsqrt

---
 tests/src/Ftests.scala                     |  2 +-
 tests/src/ValExec_DivSqrtRecFN_small.scala | 63 +++++++++++-----------
 2 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 71f9523..5032dee 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -48,7 +48,7 @@ object Ftests extends App{
     new chisel3.stage.phases.Convert
   ).foldLeft(
     Seq(
-      ChiselGeneratorAnnotation(() => new DivSqrt(8,24))
+      ChiselGeneratorAnnotation(() => new ValExec_DivSqrtRecFN_small_div(8,24,0))
     ): AnnotationSeq
   ) { case (annos, stage) => stage.transform(annos) }
     .flatMap {
diff --git a/tests/src/ValExec_DivSqrtRecFN_small.scala b/tests/src/ValExec_DivSqrtRecFN_small.scala
index 51a1c2d..1651a90 100644
--- a/tests/src/ValExec_DivSqrtRecFN_small.scala
+++ b/tests/src/ValExec_DivSqrtRecFN_small.scala
@@ -33,9 +33,10 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 =============================================================================*/
-
+package tests
 import chisel3._
 import chisel3.util._
+import float._
 
 class DivRecFN_io(expWidth: Int, sigWidth: Int) extends Bundle {
     val a = Bits((expWidth + sigWidth).W)
@@ -75,19 +76,19 @@ class
         val pass = Output(Bool())
     })
 
-    val ds = Module(new DivSqrtRecFN_small(expWidth, sigWidth, options))
+    val ds = Module(new DivSqrt(8,24))
     val cq = Module(new Queue(new DivRecFN_io(expWidth, sigWidth), 5))
 
-    cq.io.enq.valid := io.input.valid && ds.io.inReady
+    cq.io.enq.valid := io.input.valid && ds.input.ready
     cq.io.enq.bits := io.input.bits
 
-    io.input.ready := ds.io.inReady && cq.io.enq.ready
-    ds.io.inValid := io.input.valid && cq.io.enq.ready
-    ds.io.sqrtOp := false.B
-    ds.io.a := recFNFromFN(expWidth, sigWidth, io.input.bits.a)
-    ds.io.b := recFNFromFN(expWidth, sigWidth, io.input.bits.b)
-    ds.io.roundingMode   := io.input.bits.roundingMode
-    ds.io.detectTininess := io.input.bits.detectTininess
+    io.input.ready := ds.input.ready && cq.io.enq.ready
+    ds.input.valid := io.input.valid && cq.io.enq.ready
+    ds.input.bits.sqrt := false.B
+    ds.input.bits.a := io.input.bits.a
+    ds.input.bits.b := io.input.bits.b
+    ds.input.bits.roundingMode   := io.input.bits.roundingMode
+    //ds.input.bits.detectTininess := io.input.bits.detectTininess
 
     io.output.a := cq.io.deq.bits.a
     io.output.b := cq.io.deq.bits.b
@@ -96,17 +97,17 @@ class
 
     io.expected.out := cq.io.deq.bits.out
     io.expected.exceptionFlags := cq.io.deq.bits.exceptionFlags
-    io.expected.recOut := recFNFromFN(expWidth, sigWidth, cq.io.deq.bits.out)
+    io.expected.recOut := cq.io.deq.bits.out
 
-    io.actual.out := ds.io.out
-    io.actual.exceptionFlags := ds.io.exceptionFlags
+    io.actual.out := ds.output.bits.result
+    io.actual.exceptionFlags := ds.output.bits.exceptionFlags
 
-    cq.io.deq.ready := ds.io.outValid_div
+    cq.io.deq.ready := ds.output.valid
 
-    io.check := ds.io.outValid_div
+    io.check := ds.output.valid
     io.pass :=
         cq.io.deq.valid &&
-        equivRecFN(expWidth, sigWidth, io.actual.out, io.expected.recOut) &&
+          (io.actual.out===io.expected.recOut) &&
         (io.actual.exceptionFlags === io.expected.exceptionFlags)
 }
 
@@ -147,19 +148,19 @@ class
         val pass = Output(Bool())
     })
 
-    val ds = Module(new DivSqrtRecFN_small(expWidth, sigWidth, options))
+    val ds = Module(new DivSqrt(8,24))
     val cq = Module(new Queue(new SqrtRecFN_io(expWidth, sigWidth), 5))
 
-    cq.io.enq.valid := io.input.valid && ds.io.inReady
+    cq.io.enq.valid := io.input.valid && ds.input.ready
     cq.io.enq.bits := io.input.bits
 
-    io.input.ready := ds.io.inReady && cq.io.enq.ready
-    ds.io.inValid := io.input.valid && cq.io.enq.ready
-    ds.io.sqrtOp := true.B
-    ds.io.a := recFNFromFN(expWidth, sigWidth, io.input.bits.a)
-    ds.io.b := DontCare
-    ds.io.roundingMode   := io.input.bits.roundingMode
-    ds.io.detectTininess := io.input.bits.detectTininess
+    io.input.ready := ds.input.ready && cq.io.enq.ready
+    ds.input.valid := io.input.valid && cq.io.enq.ready
+    ds.input.bits.sqrt := true.B
+    ds.input.bits.a := io.input.bits.a
+    ds.input.bits.b := DontCare
+    ds.input.bits.roundingMode   := io.input.bits.roundingMode
+//    ds.input.bits.detectTininess := io.input.bits.detectTininess
 
     io.output.a := cq.io.deq.bits.a
     io.output.roundingMode   := cq.io.deq.bits.roundingMode
@@ -167,16 +168,16 @@ class
 
     io.expected.out := cq.io.deq.bits.out
     io.expected.exceptionFlags := cq.io.deq.bits.exceptionFlags
-    io.expected.recOut := recFNFromFN(expWidth, sigWidth, cq.io.deq.bits.out)
+    io.expected.recOut := cq.io.deq.bits.out
 
-    io.actual.exceptionFlags := ds.io.exceptionFlags
-    io.actual.out := ds.io.out
+    io.actual.exceptionFlags := ds.output.bits.exceptionFlags
+    io.actual.out := ds.output.bits.result
 
-    cq.io.deq.ready := ds.io.outValid_sqrt
+    cq.io.deq.ready := ds.output.valid
 
-    io.check := ds.io.outValid_sqrt
+    io.check := ds.output.valid
     io.pass :=
         cq.io.deq.valid &&
-        equivRecFN(expWidth, sigWidth, io.actual.out, io.expected.recOut) &&
+          (io.actual.out === io.expected.recOut) &&
         (io.actual.exceptionFlags === io.expected.exceptionFlags)
 }

From a26f5fea3be763f537dfd3171898597875edbad1 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 22 Aug 2023 16:50:04 +0800
Subject: [PATCH 043/109] [test] add FMATester

---
 Makefile                                   |   3 +
 build.sc                                   |   8 +-
 common.sc                                  |  20 +-
 tests/src/Ftests.scala                     | 284 +++++++++++++++------
 tests/src/ValExec_DivSqrtRecFN_small.scala |   4 +-
 5 files changed, 233 insertions(+), 86 deletions(-)

diff --git a/Makefile b/Makefile
index 1af774e..6463eae 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,9 @@ compile:
 run:
 	mill -i -j 0 arithmetic[5.0.0].run
 
+test:
+	mill -i -j 0 arithmetictest[5.0.0].test
+
 bsp:
 	mill -i mill.bsp.BSP/install
 
diff --git a/build.sc b/build.sc
index e30acd6..5061cab 100644
--- a/build.sc
+++ b/build.sc
@@ -11,6 +11,9 @@ object v {
   val chiselCrossVersions = Map(
     "5.0.0" -> (ivy"org.chipsalliance::chisel:5.0.0", ivy"org.chipsalliance:::chisel-plugin:5.0.0"),
   )
+
+  val scalatest = ivy"org.scalatest::scalatest:3.2.0"
+  val scalapar = ivy"org.scala-lang.modules::scala-parallel-collections:1.0.4"
 }
 
 object arithmetic extends Cross[Arithmetic](v.chiselCrossVersions.keys.toSeq)
@@ -43,7 +46,6 @@ trait Arithmetic
 
 trait ArithmeticTest
   extends common.ArithmeticTestModule
-    with ScalafmtModule
     with Cross.Module[String] {
 
   override def scalaVersion = T(v.scala)
@@ -52,6 +54,10 @@ trait ArithmeticTest
 
   def arithmeticModule = arithmetic(crossValue)
 
+  def scalatestIvy = v.scalatest
+
+  def scalaparIvy = v.scalapar
+
   def spireIvy = v.spire
 
   def evilplotIvy = v.evilplot
diff --git a/common.sc b/common.sc
index d477643..3efeccd 100644
--- a/common.sc
+++ b/common.sc
@@ -41,8 +41,9 @@ trait ArithmeticModule
 // TODO: migrate test to svsim
 
 trait ArithmeticTestModule
-  extends ScalaModule
-    with HasChisel {
+  extends TestModule
+    with HasChisel
+    with TestModule.ScalaTest {
   def arithmeticModule: ArithmeticModule
   def spireIvy: T[Dep]
 
@@ -50,7 +51,20 @@ trait ArithmeticTestModule
 
   def oslibIvy: T[Dep]
 
+  def scalatestIvy: Dep
+
+  def scalaparIvy: Dep
+
   override def moduleDeps = super.moduleDeps ++ Some(arithmeticModule)
 
-  override def ivyDeps = T(super.ivyDeps() ++ Seq(spireIvy(), evilplotIvy()))
+  override def defaultCommandName() = "test"
+
+  override def ivyDeps = T(
+    super.ivyDeps() ++ Agg(
+      scalatestIvy,
+      scalaparIvy,
+      spireIvy(),
+      evilplotIvy()
+    )
+  )
 }
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 5032dee..04aecfc 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -1,91 +1,215 @@
 package tests
 
+import chisel3.RawModule
 import float._
 
-object Ftests extends App{
-
-  import chisel3.stage.ChiselGeneratorAnnotation
-  import firrtl.AnnotationSeq
-  import firrtl.stage._
-
-  println("this is Ftests")
-
-  val resources = os.resource()
-  val runDir = os.pwd / "run"
-  os.remove.all(runDir)
-  val elaborateDir = runDir / "elaborate"
-  os.makeDir.all(elaborateDir)
-  val rtlDir = runDir / "rtl"
-  os.makeDir.all(rtlDir)
-  val emulatorDir = runDir / "emulator"
-  os.makeDir.all(emulatorDir)
-  val emulatorCSrc = emulatorDir / "src"
-  os.makeDir.all(emulatorCSrc)
-  val emulatorCHeader = emulatorDir / "include"
-  os.makeDir.all(emulatorCHeader)
-  val emulatorBuildDir = emulatorDir / "build"
-  os.makeDir.all(emulatorBuildDir)
-
-  val emulatorThreads = 8
-  val verilatorArgs = Seq(
-    // format: off
-    "--x-initial unique",
-    "--output-split 100000",
-    "--max-num-width 1048576",
-    "--main",
-    "--timing",
-    // use for coverage
-    "--coverage-user",
-    "--assert",
-    // format: on
+import java.text.SimpleDateFormat
+import java.util.Calendar
+import java.text.SimpleDateFormat
+import java.util.Calendar
+import scala.collection.parallel.CollectionConverters._
+
+import chisel3.RawModule
+import org.scalatest.ParallelTestExecution
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+import java.text.SimpleDateFormat
+import java.util.Calendar
+import scala.collection.parallel.CollectionConverters._
+
+//object Ftests extends App{
+//  import chisel3.stage.ChiselGeneratorAnnotation
+//  import firrtl.AnnotationSeq
+//  import firrtl.stage._
+//
+//  println("this is Ftests")
+//
+//  val resources = os.resource()
+//  val runDir = os.pwd / "run"
+//  os.remove.all(runDir)
+//  val elaborateDir = runDir / "elaborate"
+//  os.makeDir.all(elaborateDir)
+//  val rtlDir = runDir / "rtl"
+//  os.makeDir.all(rtlDir)
+//  val emulatorDir = runDir / "emulator"
+//  os.makeDir.all(emulatorDir)
+//  val emulatorCSrc = emulatorDir / "src"
+//  os.makeDir.all(emulatorCSrc)
+//  val emulatorCHeader = emulatorDir / "include"
+//  os.makeDir.all(emulatorCHeader)
+//  val emulatorBuildDir = emulatorDir / "build"
+//  os.makeDir.all(emulatorBuildDir)
+//
+//  val emulatorThreads = 8
+//  val verilatorArgs = Seq(
+//    // format: off
+//    "--x-initial unique",
+//    "--output-split 100000",
+//    "--max-num-width 1048576",
+//    "--main",
+//    "--timing",
+//    // use for coverage
+//    "--coverage-user",
+//    "--assert",
+//    // format: on
+//  )
+//
+//  // TODO: this will be replaced by binder API
+//  // elaborate
+//  var topName: String = null
+//  val annos: AnnotationSeq = Seq(
+//    new chisel3.stage.phases.Elaborate,
+//    new chisel3.stage.phases.Convert
+//  ).foldLeft(
+//    Seq(
+//      ChiselGeneratorAnnotation(() => new ValExec_DivSqrtRecFN_small_div(8,24,0))
+//    ): AnnotationSeq
+//  ) { case (annos, stage) => stage.transform(annos) }
+//    .flatMap {
+//      case FirrtlCircuitAnnotation(circuit) =>
+//        topName = circuit.main
+//        os.write.over(elaborateDir / s"$topName.fir", circuit.serialize)
+//        None
+//      case _: chisel3.stage.DesignAnnotation[_] => None
+//      case _: chisel3.stage.ChiselCircuitAnnotation => None
+//      case a => Some(a)
+//    }
+//  os.write.over(elaborateDir / s"$topName.anno.json", firrtl.annotations.JsonProtocol.serialize(annos))
+//
+//  // rtl
+//  os.proc(
+//    "firtool",
+//    elaborateDir / s"$topName.fir", s"--annotation-file=${elaborateDir / s"$topName.anno.json"}",
+//    "-dedup",
+//    "-O=release",
+//    "--disable-all-randomization",
+//    "--split-verilog",
+//    "--preserve-values=none",
+//    "--preserve-aggregate=all",
+//    "--strip-debug-info",
+//    s"-o=$rtlDir"
+//  ).call()
+//  val verilogs = os.read.lines(rtlDir / "filelist.f")
+//    .map(str =>
+//      try {
+//        os.Path(str)
+//      } catch {
+//        case e: IllegalArgumentException if e.getMessage.contains("is not an absolute path") =>
+//          rtlDir / str.stripPrefix("./")
+//      }
+//    )
+//    .filter(p => p.ext == "v" || p.ext == "sv")
+//
+////  os.write(rtlDir / "dut.v", chisel3.getVerilogString(new DivSqrt(8,24)))
+//
+//}
+
+trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
+  def exp(f: Int) = f match {
+    case 16 => 5
+    case 32 => 8
+    case 64 => 11
+  }
+
+  def sig(f: Int) = f match {
+    case 16 => 11
+    case 32 => 24
+    case 64 => 53
+  }
+
+  val roundings = Seq(
+    "-rnear_even" -> "0",
+    "-rminMag" -> "1",
+    "-rmin" -> "2",
+    "-rmax" -> "3",
+    "-rnear_maxMag" -> "4",
+    "-rodd" -> "6"
   )
 
-  // TODO: this will be replaced by binder API
-  // elaborate
-  var topName: String = null
-  val annos: AnnotationSeq = Seq(
-    new chisel3.stage.phases.Elaborate,
-    new chisel3.stage.phases.Convert
-  ).foldLeft(
-    Seq(
-      ChiselGeneratorAnnotation(() => new ValExec_DivSqrtRecFN_small_div(8,24,0))
-    ): AnnotationSeq
-  ) { case (annos, stage) => stage.transform(annos) }
-    .flatMap {
-      case FirrtlCircuitAnnotation(circuit) =>
-        topName = circuit.main
-        os.write.over(elaborateDir / s"$topName.fir", circuit.serialize)
-        None
-      case _: chisel3.stage.DesignAnnotation[_] => None
-      case _: chisel3.stage.ChiselCircuitAnnotation => None
-      case a => Some(a)
+  def check(stdouts: Seq[String]) = {
+    stdouts foreach (_ shouldNot include("expected"))
+    stdouts foreach (_ shouldNot include("Ran 0 tests."))
+    stdouts foreach (_ should include("No errors found."))
+  }
+
+  def test(name: String, module: () => RawModule, softfloatArg: Seq[String]): Seq[String] = {
+    val (softfloatArgs, dutArgs) = (roundings.map { case (s, d) =>
+      (Seq(s, "-tininessbefore") ++ softfloatArg, Seq(d, "0"))
+    }).unzip
+    test(name, module, "test.cpp", softfloatArgs, Some(dutArgs))
+  }
+
+  /** Run a FMA test. Before running, `softfloat_gen` should be accessible in the $PATH environment.
+    *
+    * @param name          is name of this test, which should corresponds to header's name in `includes` directory.
+    * @param module        function to generate DUT.
+    * @param harness       C++ harness name, which should corresponds to c++ hardness's name in `csrc` directory.
+    * @param softfloatArgs arguments passed to `softfloat_gen` application. If has multiple command lines, multiple test will be executed.
+    * @param dutArgs       arguments passed to verilator dut executor, If set to [[None]], no arguments will be passed to.
+    */
+  def test(name: String, module: () => RawModule, harness: String, softfloatArgs: Seq[Seq[String]], dutArgs: Option[Seq[Seq[String]]] = None) = {
+
+    val testRunDir = os.pwd / "test_run_dir" / s"${this.getClass.getSimpleName}_$name" / s"${new SimpleDateFormat("yyyyMMddHHmmss").format(Calendar.getInstance.getTime)}"
+    os.makeDir.all(testRunDir)
+    os.write(testRunDir / "dut.v", chisel3.getVerilogString(module()))
+
+    /* command Synthesis verilog to C++. */
+    val verilatorCompile: Seq[String] = Seq(
+      "verilator",
+      "-cc",
+      "--prefix", "dut",
+      "--Mdir", testRunDir.toString,
+      "-CFLAGS", s"""-I${getClass.getResource("/includes/").getPath} -include ${getClass.getResource(s"/includes/$name.h").getPath}""",
+      "dut.v",
+      "--exe", s"${getClass.getResource(s"/csrc/$harness").getPath}",
+      "--trace"
+    )
+    os.proc(verilatorCompile).call(testRunDir)
+
+    /* Build C++ executor. */
+    val verilatorBuild: Seq[String] = Seq(
+      "make",
+      "-C", testRunDir.toString,
+      "-j",
+      "-f", s"dut.mk",
+      "dut")
+    os.proc(verilatorBuild).call(testRunDir)
+
+    def executeAndLog(softfloatArg: Seq[String], dutArg: Seq[String]): String = {
+      val stdoutFile = testRunDir / s"${name}__${(softfloatArg ++ dutArg).mkString("_")}.txt"
+      val vcdFile = testRunDir / s"${name}__${(softfloatArg ++ dutArg).mkString("_")}.vcd"
+      os.proc((testRunDir / "dut").toString +: dutArg).call(stdin = os.proc("testfloat_gen" +: softfloatArg).spawn().stdout, stdout = stdoutFile, stderr = vcdFile)
+      os.read(stdoutFile)
     }
-  os.write.over(elaborateDir / s"$topName.anno.json", firrtl.annotations.JsonProtocol.serialize(annos))
-
-  // rtl
-  os.proc(
-    "firtool",
-    elaborateDir / s"$topName.fir", s"--annotation-file=${elaborateDir / s"$topName.anno.json"}",
-    "-dedup",
-    "-O=release",
-    "--disable-all-randomization",
-    "--split-verilog",
-    "--preserve-values=none",
-    "--preserve-aggregate=all",
-    "--strip-debug-info",
-    s"-o=$rtlDir"
-  ).call()
-  val verilogs = os.read.lines(rtlDir / "filelist.f")
-    .map(str =>
-      try {
-        os.Path(str)
-      } catch {
-        case e: IllegalArgumentException if e.getMessage.contains("is not an absolute path") =>
-          rtlDir / str.stripPrefix("./")
-      }
+
+    (if (dutArgs.isDefined) {
+      require(softfloatArgs.size == dutArgs.get.size, "size of softfloatArgs and dutArgs should be same.")
+      (softfloatArgs zip dutArgs.get).par.map { case (s, d) => executeAndLog(s, d) }
+    } else softfloatArgs.par.map { s => executeAndLog(s, Seq.empty) }).seq
+  }
+}
+
+class DivSqrtRecFn_smallSpec extends FMATester {
+  def test(f: Int, fn: String): Seq[String] = {
+    def generator(options: Int) = fn match {
+      case "div" => () => new ValExec_DivSqrtRecFN_small_div(exp(f), sig(f))
+      case "sqrt" => () => new ValExec_DivSqrtRecFN_small_sqrt(exp(f), sig(f))
+    }
+    test(
+      s"DivSqrtRecF${f}_small_${fn}",
+      generator(0),
+      (if (fn == "sqrt") Seq("-level2") else Seq.empty) ++ Seq(s"f${f}_${fn}")
     )
-    .filter(p => p.ext == "v" || p.ext == "sv")
 
-  os.write(rtlDir / "dut.v", chisel3.getVerilogString(new DivSqrt(8,24)))
+  }
+
+  "DivSqrtRecF32_small_div" should "pass" in {
+    check(test(32, "div"))
+  }
+
+  "DivSqrtRecF32_small_sqrt" should "pass" in {
+    check(test(32, "sqrt"))
+  }
 
 }
\ No newline at end of file
diff --git a/tests/src/ValExec_DivSqrtRecFN_small.scala b/tests/src/ValExec_DivSqrtRecFN_small.scala
index 1651a90..44df866 100644
--- a/tests/src/ValExec_DivSqrtRecFN_small.scala
+++ b/tests/src/ValExec_DivSqrtRecFN_small.scala
@@ -49,7 +49,7 @@ class DivRecFN_io(expWidth: Int, sigWidth: Int) extends Bundle {
 }
 
 class
-    ValExec_DivSqrtRecFN_small_div(expWidth: Int, sigWidth: Int, options: Int) extends Module
+    ValExec_DivSqrtRecFN_small_div(expWidth: Int, sigWidth: Int) extends Module
 {
     val io = IO(new Bundle {
         val input = Flipped(Decoupled(new DivRecFN_io(expWidth, sigWidth)))
@@ -121,7 +121,7 @@ class SqrtRecFN_io(expWidth: Int, sigWidth: Int) extends Bundle {
 }
 
 class
-    ValExec_DivSqrtRecFN_small_sqrt(expWidth: Int, sigWidth: Int, options: Int)
+    ValExec_DivSqrtRecFN_small_sqrt(expWidth: Int, sigWidth: Int)
     extends Module
 {
     val io = IO(new Bundle {

From 263e5b80d401f71af0a6984d635899c073d34dbb Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 23 Aug 2023 14:53:06 +0800
Subject: [PATCH 044/109] [test] sqrt test passed

---
 arithmetic/src/float/DivSqrt.scala         | 28 ++++++++++++++++++----
 arithmetic/src/float/RoundingUnit.scala    |  8 +------
 tests/src/Ftests.scala                     | 11 ++++-----
 tests/src/ValExec_DivSqrtRecFN_small.scala | 10 ++++----
 4 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index a4d158c..4194ff3 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -60,13 +60,31 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val needNorm = RegEnable(needNormNext, input.fire)
 
   // sign
-  val signNext = Mux(input.bits.sqrt, false.B, rawA_S.sign ^ rawB_S.sign)
+  val signNext = Mux(input.bits.sqrt, Mux(rawA_S.isZero, rawA_S.sign, false.B), rawA_S.sign ^ rawB_S.sign)
   val signReg = RegEnable(signNext, input.fire)
 
   // sqrt
-  val adjustedExp = Cat(rawA_S.sExp(expWidth - 1), rawA_S.sExp(expWidth - 1, 0))
-  val sqrtExpIsEven = input.bits.a(sigWidth - 1)
-  val sqrtFractIn = Mux(sqrtExpIsEven, Cat("b0".U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
+
+  /** construct expForSqrt
+    *
+    * sExp first 2 bits
+    * 00 -> 10 (subnormal)
+    * 01 -> 11 (true exp negative)
+    * 10 -> 00 (true exp positive)
+    *
+    */
+  val expfirst2 = UIntToOH(rawA_S.sExp(expWidth, expWidth-1))
+  val expstart  = Mux1H(
+    Seq(
+      expfirst2(0) -> "b10".U,
+      expfirst2(1) -> "b11".U,
+      expfirst2(2) -> "b00".U,
+      expfirst2(3) -> "b10".U
+    )
+  )
+  val expForSqrt = Cat(expstart, rawA_S.sExp(expWidth - 2, 0))
+  val sqrtExpIsOdd = !rawA_S.sExp(0)
+  val sqrtFractIn = Mux(sqrtExpIsOdd, Cat("b0".U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
     Cat(rawA_S.sig(sigWidth - 1, 0), 0.U(2.W)))
 
   val SqrtModule = Module(new SquareRoot(2, 2, sigWidth+2, sigWidth+2))
@@ -103,7 +121,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val expStoreNext = Wire(UInt(expWidth.W))
   val expToRound = Wire(UInt(expWidth.W))
   expStoreNext := Mux(input.bits.sqrt,
-    Cat(rawA_S.sExp(expWidth-1), rawA_S.sExp(expWidth-1, 0))(expWidth,1),
+    expForSqrt >>1,
     input.bits.a(fpWidth-1, sigWidth-1) - input.bits.b(fpWidth-1, sigWidth-1))
   val expStore = RegEnable(expStoreNext, 0.U(expWidth.W), input.fire)
   expToRound := Mux(opSqrtReg, expStore, expStore - needNorm)
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 6e09099..2739efa 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -43,12 +43,6 @@ class RoundingUnit extends Module{
 
 
 
-
-
-
-
-
-
   val sigPlus = Wire(UInt(23.W))
   val expBiasPlus = Wire(UInt(8.W))
   val sigIncr = Wire(Bool())
@@ -61,7 +55,7 @@ class RoundingUnit extends Module{
   sigIncr := (roundingMode_near_even && input.rBits(1) && input.rBits(0)) ||
     (roundingMode_min &&  input.sign && input.rBits.orR) ||
     (roundingMode_max && !input.sign && input.rBits.orR) ||
-    (roundingMode_near_maxMag && input.rBits.orR)
+    (roundingMode_near_maxMag && input.rBits(1) && input.rBits(0))
 
   sigPlus := input.sig + sigIncr
 
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 04aecfc..4ea469b 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -124,7 +124,6 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     "-rmin" -> "2",
     "-rmax" -> "3",
     "-rnear_maxMag" -> "4",
-    "-rodd" -> "6"
   )
 
   def check(stdouts: Seq[String]) = {
@@ -135,7 +134,7 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
 
   def test(name: String, module: () => RawModule, softfloatArg: Seq[String]): Seq[String] = {
     val (softfloatArgs, dutArgs) = (roundings.map { case (s, d) =>
-      (Seq(s, "-tininessbefore") ++ softfloatArg, Seq(d, "0"))
+      (Seq(s, "-tininessafter") ++ softfloatArg, Seq(d, "0"))
     }).unzip
     test(name, module, "test.cpp", softfloatArgs, Some(dutArgs))
   }
@@ -150,7 +149,7 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     */
   def test(name: String, module: () => RawModule, harness: String, softfloatArgs: Seq[Seq[String]], dutArgs: Option[Seq[Seq[String]]] = None) = {
 
-    val testRunDir = os.pwd / "test_run_dir" / s"${this.getClass.getSimpleName}_$name" / s"${new SimpleDateFormat("yyyyMMddHHmmss").format(Calendar.getInstance.getTime)}"
+    val testRunDir = os.pwd / "test_run_dir" / s"${this.getClass.getSimpleName}_$name"
     os.makeDir.all(testRunDir)
     os.write(testRunDir / "dut.v", chisel3.getVerilogString(module()))
 
@@ -204,9 +203,9 @@ class DivSqrtRecFn_smallSpec extends FMATester {
 
   }
 
-  "DivSqrtRecF32_small_div" should "pass" in {
-    check(test(32, "div"))
-  }
+//  "DivSqrtRecF32_small_div" should "pass" in {
+//    check(test(32, "div"))
+//  }
 
   "DivSqrtRecF32_small_sqrt" should "pass" in {
     check(test(32, "sqrt"))
diff --git a/tests/src/ValExec_DivSqrtRecFN_small.scala b/tests/src/ValExec_DivSqrtRecFN_small.scala
index 44df866..33a34c0 100644
--- a/tests/src/ValExec_DivSqrtRecFN_small.scala
+++ b/tests/src/ValExec_DivSqrtRecFN_small.scala
@@ -140,7 +140,7 @@ class
         }
 
         val actual = new Bundle {
-            val out = Output(Bits((expWidth + sigWidth + 1).W))
+            val out = Output(Bits((expWidth + sigWidth).W))
             val exceptionFlags = Output(Bits(5.W))
         }
 
@@ -176,8 +176,8 @@ class
     cq.io.deq.ready := ds.output.valid
 
     io.check := ds.output.valid
-    io.pass :=
-        cq.io.deq.valid &&
-          (io.actual.out === io.expected.recOut) &&
-        (io.actual.exceptionFlags === io.expected.exceptionFlags)
+    val resultcheck = io.actual.out =/= io.expected.recOut
+    val flagcheck   = io.actual.exceptionFlags =/= io.expected.exceptionFlags
+    io.pass := !(cq.io.deq.fire && (resultcheck || flagcheck))
+
 }

From 38ecd06cc4bdb3adbfa6abf5cf9352ad58e3d69a Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 23 Aug 2023 16:03:16 +0800
Subject: [PATCH 045/109] [rtl] add rounding expin bits by 1

---
 arithmetic/src/float/DivSqrt.scala      | 14 ++++++++++++--
 arithmetic/src/float/RoundingUnit.scala |  6 +++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 4194ff3..3493455 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -74,14 +74,16 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     *
     */
   val expfirst2 = UIntToOH(rawA_S.sExp(expWidth, expWidth-1))
+  /** @todo expfirst2(3) never happens */
   val expstart  = Mux1H(
     Seq(
       expfirst2(0) -> "b10".U,
       expfirst2(1) -> "b11".U,
       expfirst2(2) -> "b00".U,
-      expfirst2(3) -> "b10".U
+      expfirst2(3) -> "b00".U
     )
   )
+  /** exp for sqrt never underlow*/
   val expForSqrt = Cat(expstart, rawA_S.sExp(expWidth - 2, 0))
   val sqrtExpIsOdd = !rawA_S.sExp(0)
   val sqrtFractIn = Mux(sqrtExpIsOdd, Cat("b0".U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
@@ -120,8 +122,16 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   // exp logic
   val expStoreNext = Wire(UInt(expWidth.W))
   val expToRound = Wire(UInt(expWidth.W))
+  /**
+    * for sqrt
+    * expForrounding effective is 8bits, MSB is sign
+    * expStoreNext = 0 + 8bits 
+    *
+    *
+    *
+    */
   expStoreNext := Mux(input.bits.sqrt,
-    expForSqrt >>1,
+    expForSqrt >> 1,
     input.bits.a(fpWidth-1, sigWidth-1) - input.bits.b(fpWidth-1, sigWidth-1))
   val expStore = RegEnable(expStoreNext, 0.U(expWidth.W), input.fire)
   expToRound := Mux(opSqrtReg, expStore, expStore - needNorm)
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 2739efa..cac56f3 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -20,7 +20,7 @@ class RoundingUnit extends Module{
     val isZero = Bool()
     val isNaN  = Bool()
     val sig = UInt(23.W)
-    val exp = UInt(8.W)
+    val exp = UInt(9.W)
     val rBits = UInt(2.W)
     val sign = Bool()
     val roundingMode = UInt(5.W)
@@ -61,8 +61,8 @@ class RoundingUnit extends Module{
 
   /** for sig = all 1 and sigIncr*/
   expIncr := input.sig.andR && sigIncr
-  expBiased := input.exp + 127.U
-  expBiasPlus := expBiased + expIncr
+  expBiased := (input.exp + 127.U)(7,0)
+  expBiasPlus := (input.exp + 128.U)(7,0)
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN

From 4998e3364c02fdbae0235ffd34138e3c92f49789 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 23 Aug 2023 19:23:02 +0800
Subject: [PATCH 046/109] [rtl] temp

---
 arithmetic/src/float/DivSqrt.scala         | 21 +++++++----
 arithmetic/src/float/RoundingUnit.scala    | 41 ++++++++++++++++++----
 tests/src/Ftests.scala                     |  7 ++--
 tests/src/ValExec_DivSqrtRecFN_small.scala |  9 +++--
 4 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 3493455..07fa448 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -4,6 +4,7 @@ import chisel3._
 import chisel3.util._
 import division.srt.srt16._
 import sqrt._
+import chisel3.dontTouch
 
 class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val fpWidth = expWidth + sigWidth
@@ -17,9 +18,12 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val rawA_S = rawFloatFromFN(expWidth, sigWidth, input.bits.a)
   val rawB_S = rawFloatFromFN(expWidth, sigWidth, input.bits.b)
 
-  /** Exceptions */
+  // Exceptions
+
+  /** inf/inf and 0/0 */
   val notSigNaNIn_invalidExc_S_div =
     (rawA_S.isZero && rawB_S.isZero) || (rawA_S.isInf && rawB_S.isInf)
+  /** negative input */
   val notSigNaNIn_invalidExc_S_sqrt =
     !rawA_S.isNaN && !rawA_S.isZero && rawA_S.sign
   val majorExc_S =
@@ -84,7 +88,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     )
   )
   /** exp for sqrt never underlow*/
-  val expForSqrt = Cat(expstart, rawA_S.sExp(expWidth - 2, 0))
+  val expForSqrt = Cat(expstart, rawA_S.sExp(expWidth - 2, 0)) >> 1
   val sqrtExpIsOdd = !rawA_S.sExp(0)
   val sqrtFractIn = Mux(sqrtExpIsOdd, Cat("b0".U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
     Cat(rawA_S.sig(sigWidth - 1, 0), 0.U(2.W)))
@@ -120,8 +124,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val rbitsToRound = Mux(opSqrtReg, rbits_sqrt, rbits_div)
 
   // exp logic
-  val expStoreNext = Wire(UInt(expWidth.W))
-  val expToRound = Wire(UInt(expWidth.W))
+  val expStoreNext,expToRound = Wire(UInt((expWidth+2).W))
   /**
     * for sqrt
     * expForrounding effective is 8bits, MSB is sign
@@ -131,10 +134,14 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     *
     */
   expStoreNext := Mux(input.bits.sqrt,
-    expForSqrt >> 1,
-    input.bits.a(fpWidth-1, sigWidth-1) - input.bits.b(fpWidth-1, sigWidth-1))
-  val expStore = RegEnable(expStoreNext, 0.U(expWidth.W), input.fire)
+    Cat(expForSqrt(8),expForSqrt(8,0)),
+    (rawA_S.sExp-rawB_S.sExp).asUInt)
+  val expStore = RegEnable(expStoreNext, 0.U((expWidth+2).W), input.fire)
   expToRound := Mux(opSqrtReg, expStore, expStore - needNorm)
+  dontTouch(expToRound)
+
+  dontTouch(rawA_S)
+  dontTouch(rawB_S)
 
   val roundresult = RoundingUnit(
     signReg,
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index cac56f3..b5df614 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -20,7 +20,7 @@ class RoundingUnit extends Module{
     val isZero = Bool()
     val isNaN  = Bool()
     val sig = UInt(23.W)
-    val exp = UInt(9.W)
+    val exp = UInt(10.W)
     val rBits = UInt(2.W)
     val sign = Bool()
     val roundingMode = UInt(5.W)
@@ -40,6 +40,7 @@ class RoundingUnit extends Module{
   val common_overflow = Wire(Bool())
   val common_underflow = Wire(Bool())
   val common_inexact  = Wire(Bool())
+  val common_subnorm  = Wire(Bool())
 
 
 
@@ -61,8 +62,17 @@ class RoundingUnit extends Module{
 
   /** for sig = all 1 and sigIncr*/
   expIncr := input.sig.andR && sigIncr
-  expBiased := (input.exp + 127.U)(7,0)
-  expBiasPlus := (input.exp + 128.U)(7,0)
+  expBiased := (input.exp.asSInt + 127.S)(7,0).asUInt
+  expBiasPlus := (input.exp.asSInt + 128.S)(7,0).asUInt
+
+  val exp_BiasForSub = (input.exp.asSInt + 127.S) + expIncr.asSInt
+  val subnormDist = -exp_BiasForSub + 1.S
+  common_subnorm := exp_BiasForSub(9)
+  val common_subnormSigOut = (Cat(1.U(1.W), input.sig) >> subnormDist.asUInt)(22,0)
+  dontTouch(exp_BiasForSub)
+  dontTouch(subnormDist)
+  dontTouch(common_subnorm)
+  dontTouch(common_subnormSigOut)
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN
@@ -75,6 +85,17 @@ class RoundingUnit extends Module{
 
   val isZero = input.isZero && underflow
 
+//  val overflowSele = UIntToOH(roundingMode_min ## roundingMode_max ## roundingMode_toZero ## (roundingMode_near_even || roundingMode_near_maxMag))
+//
+//  val infiniteOut = Mux1H(
+//    Seq(
+//      overflowSele(0) -> Cat(input.sign, "h7F80000".U(31.W)),
+//      overflowSele(1) -> Cat(input.sign, "h7F7FFFF".U(31.W)),
+//      overflowSele(2) -> Cat(0.U(1.W), "h7F7FFFF".U(31.W)),
+//      overflowSele(3) -> Cat(1.U(1.W), "h7F7FFFF".U(31.W)),
+//    )
+//  )
+
 
   // exception data with Spike
   val quietNaN = "h7FC00000".U
@@ -83,15 +104,21 @@ class RoundingUnit extends Module{
   val zeroOut = Cat(input.sign, 0.U(31.W))
   val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## input.isZero
 
-  //todo
-  common_overflow := false.B
-  common_underflow := false.B
+  /** @todo opt it */
+  common_overflow := exp_BiasForSub > 254.S
+  common_underflow := common_subnorm
   common_inexact := input.rBits.orR
 
   val common_sigOut = Mux(sigIncr, sigPlus, input.sig)
   val common_expOut = Mux(expIncr, expBiasPlus, expBiased)
+  dontTouch(common_expOut)
+  dontTouch(common_underflow)
+  dontTouch(common_overflow)
+
 
-  val common_out = Mux(common_overflow, infiniteOut, input.sign ## common_expOut ## common_sigOut)
+  val common_out = Mux(common_overflow, infiniteOut,
+    Mux(common_subnorm, input.sign ## 0.U(8.W) ## common_subnormSigOut,
+      input.sign ## common_expOut ## common_sigOut))
 
   output.data := Mux1H(Seq(
     outSele1H(0) -> zeroOut,
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 4ea469b..5fef164 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -151,6 +151,7 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
 
     val testRunDir = os.pwd / "test_run_dir" / s"${this.getClass.getSimpleName}_$name"
     os.makeDir.all(testRunDir)
+    os.remove(testRunDir / "dut.v")
     os.write(testRunDir / "dut.v", chisel3.getVerilogString(module()))
 
     /* command Synthesis verilog to C++. */
@@ -203,9 +204,9 @@ class DivSqrtRecFn_smallSpec extends FMATester {
 
   }
 
-//  "DivSqrtRecF32_small_div" should "pass" in {
-//    check(test(32, "div"))
-//  }
+  "DivSqrtRecF32_small_div" should "pass" in {
+    check(test(32, "div"))
+  }
 
   "DivSqrtRecF32_small_sqrt" should "pass" in {
     check(test(32, "sqrt"))
diff --git a/tests/src/ValExec_DivSqrtRecFN_small.scala b/tests/src/ValExec_DivSqrtRecFN_small.scala
index 33a34c0..84b2b97 100644
--- a/tests/src/ValExec_DivSqrtRecFN_small.scala
+++ b/tests/src/ValExec_DivSqrtRecFN_small.scala
@@ -68,7 +68,7 @@ class
         }
 
         val actual = new Bundle {
-            val out = Output(Bits((expWidth + sigWidth + 1).W))
+            val out = Output(Bits((expWidth + sigWidth).W))
             val exceptionFlags = Output(Bits(5.W))
         }
 
@@ -105,10 +105,9 @@ class
     cq.io.deq.ready := ds.output.valid
 
     io.check := ds.output.valid
-    io.pass :=
-        cq.io.deq.valid &&
-          (io.actual.out===io.expected.recOut) &&
-        (io.actual.exceptionFlags === io.expected.exceptionFlags)
+  val resultcheck = io.actual.out =/= io.expected.out
+  val flagcheck = io.actual.exceptionFlags =/= io.expected.exceptionFlags
+  io.pass := !(cq.io.deq.fire && (resultcheck || flagcheck))
 }
 
 class SqrtRecFN_io(expWidth: Int, sigWidth: Int) extends Bundle {

From 232b4b2ed4b4c2badeb89623a94a713ef2d5ac4b Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 23 Aug 2023 19:31:59 +0800
Subject: [PATCH 047/109] [test] tmp

---
 arithmetic/src/float/DivSqrt.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 07fa448..d9ecd40 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -134,7 +134,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     *
     */
   expStoreNext := Mux(input.bits.sqrt,
-    Cat(expForSqrt(8),expForSqrt(8,0)),
+    Cat(expForSqrt(7),expForSqrt(7),expForSqrt(7,0)),
     (rawA_S.sExp-rawB_S.sExp).asUInt)
   val expStore = RegEnable(expStoreNext, 0.U((expWidth+2).W), input.fire)
   expToRound := Mux(opSqrtReg, expStore, expStore - needNorm)

From 32d5c61c4add75e49d70fc2ef1ea520991de3645 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 23 Aug 2023 19:50:18 +0800
Subject: [PATCH 048/109] [rtl] fix common_overflowOut

---
 arithmetic/src/float/RoundingUnit.scala | 27 +++++++++++++++----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index b5df614..7e50334 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -85,16 +85,16 @@ class RoundingUnit extends Module{
 
   val isZero = input.isZero && underflow
 
-//  val overflowSele = UIntToOH(roundingMode_min ## roundingMode_max ## roundingMode_toZero ## (roundingMode_near_even || roundingMode_near_maxMag))
-//
-//  val infiniteOut = Mux1H(
-//    Seq(
-//      overflowSele(0) -> Cat(input.sign, "h7F80000".U(31.W)),
-//      overflowSele(1) -> Cat(input.sign, "h7F7FFFF".U(31.W)),
-//      overflowSele(2) -> Cat(0.U(1.W), "h7F7FFFF".U(31.W)),
-//      overflowSele(3) -> Cat(1.U(1.W), "h7F7FFFF".U(31.W)),
-//    )
-//  )
+  val overflowSele = roundingMode_min ## roundingMode_max ## roundingMode_toZero ## (roundingMode_near_even || roundingMode_near_maxMag)
+
+  val common_infiniteOut = Mux1H(
+    Seq(
+      overflowSele(0) -> Cat(input.sign, "h7F800000".U(31.W)),
+      overflowSele(1) -> Cat(input.sign, "h7F7FFFFF".U(31.W)),
+      overflowSele(2) -> Mux(input.sign,"hFF7FFFFF".U(32.W),"h7F800000".U(32.W)),
+      overflowSele(3) -> Mux(input.sign,"hFF800000".U(32.W),"h7F7FFFFF".U(32.W)),
+    )
+  )
 
 
   // exception data with Spike
@@ -114,18 +114,23 @@ class RoundingUnit extends Module{
   dontTouch(common_expOut)
   dontTouch(common_underflow)
   dontTouch(common_overflow)
+  dontTouch(overflowSele)
+  dontTouch(common_infiniteOut)
 
 
-  val common_out = Mux(common_overflow, infiniteOut,
+  val common_out = Mux(common_overflow, common_infiniteOut,
     Mux(common_subnorm, input.sign ## 0.U(8.W) ## common_subnormSigOut,
       input.sign ## common_expOut ## common_sigOut))
 
+  dontTouch(common_out)
+
   output.data := Mux1H(Seq(
     outSele1H(0) -> zeroOut,
     outSele1H(1) -> quietNaN,
     outSele1H(2) -> infiniteOut,
     outSele1H(3) -> common_out)
   )
+  dontTouch(outSele1H)
 
 
   output.exceptionFlags := input.invalidExc ## input.infiniteExc ## overflow ## underflow ## inexact

From 5c1c6d1b99958a9e2e84a0b47adf2e49a48c3dd8 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 23 Aug 2023 20:03:55 +0800
Subject: [PATCH 049/109] [rtl] fix rbits_div

---
 arithmetic/src/float/DivSqrt.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index d9ecd40..de0ef65 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -115,8 +115,8 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
   val sigToRound_div = Mux(needNorm, divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
     divModule.output.bits.quotient(calWidth - 2, calWidth - sigWidth))
-  val rbits_div = Mux(needNorm, divModule.output.bits.quotient(calWidth - sigWidth - 2) ## 1.U(1.W),
-    divModule.output.bits.quotient(calWidth - sigWidth - 1) ## 1.U(1.W))
+  val rbits_div = Mux(needNorm, divModule.output.bits.quotient(calWidth - sigWidth - 2) ## divModule.output.bits.reminder.orR,
+    divModule.output.bits.quotient(calWidth - sigWidth - 1) ## divModule.output.bits.reminder.orR)
 
 
   // collect sig result
@@ -142,6 +142,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
   dontTouch(rawA_S)
   dontTouch(rawB_S)
+  dontTouch(rbits_div)
 
   val roundresult = RoundingUnit(
     signReg,

From 2c7b163f3a1fc54f34a5b65d14998e803302a095 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 23 Aug 2023 20:15:02 +0800
Subject: [PATCH 050/109] [rtl] NaN overide isZero in Rounding

---
 arithmetic/src/float/RoundingUnit.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 7e50334..b55f908 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -102,7 +102,7 @@ class RoundingUnit extends Module{
 
   val infiniteOut = Cat(input.sign, "h7F800000".U)
   val zeroOut = Cat(input.sign, 0.U(31.W))
-  val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## input.isZero
+  val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## (input.isZero && !isNaNOut)
 
   /** @todo opt it */
   common_overflow := exp_BiasForSub > 254.S

From 1337297e5fb04dafad4bcbddda9403fdff529242 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 23 Aug 2023 21:03:09 +0800
Subject: [PATCH 051/109] [rtl] add underflow case for exp_Bias=0

---
 arithmetic/src/float/RoundingUnit.scala | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index b55f908..9d3d91c 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -44,7 +44,7 @@ class RoundingUnit extends Module{
 
 
 
-  val sigPlus = Wire(UInt(23.W))
+  val sigAfterInc = Wire(UInt(23.W))
   val expBiasPlus = Wire(UInt(8.W))
   val sigIncr = Wire(Bool())
   val expIncr = Wire(Bool())
@@ -58,7 +58,7 @@ class RoundingUnit extends Module{
     (roundingMode_max && !input.sign && input.rBits.orR) ||
     (roundingMode_near_maxMag && input.rBits(1) && input.rBits(0))
 
-  sigPlus := input.sig + sigIncr
+  sigAfterInc := input.sig + sigIncr
 
   /** for sig = all 1 and sigIncr*/
   expIncr := input.sig.andR && sigIncr
@@ -67,8 +67,8 @@ class RoundingUnit extends Module{
 
   val exp_BiasForSub = (input.exp.asSInt + 127.S) + expIncr.asSInt
   val subnormDist = -exp_BiasForSub + 1.S
-  common_subnorm := exp_BiasForSub(9)
-  val common_subnormSigOut = (Cat(1.U(1.W), input.sig) >> subnormDist.asUInt)(22,0)
+  common_subnorm := exp_BiasForSub(9) || exp_BiasForSub === 0.S
+  val common_subnormSigOut = (Cat(1.U(1.W), sigAfterInc) >> subnormDist.asUInt)(22,0)
   dontTouch(exp_BiasForSub)
   dontTouch(subnormDist)
   dontTouch(common_subnorm)
@@ -109,7 +109,7 @@ class RoundingUnit extends Module{
   common_underflow := common_subnorm
   common_inexact := input.rBits.orR
 
-  val common_sigOut = Mux(sigIncr, sigPlus, input.sig)
+  val common_sigOut = sigAfterInc
   val common_expOut = Mux(expIncr, expBiasPlus, expBiased)
   dontTouch(common_expOut)
   dontTouch(common_underflow)

From 4f038a5d3b392c757544e69741a828657dd94449 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 13:03:27 +0800
Subject: [PATCH 052/109] [rtl] opt roundingUnit exception case contrl signals

---
 arithmetic/src/float/RoundingUnit.scala | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 9d3d91c..1b26fd1 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -67,8 +67,12 @@ class RoundingUnit extends Module{
 
   val exp_BiasForSub = (input.exp.asSInt + 127.S) + expIncr.asSInt
   val subnormDist = -exp_BiasForSub + 1.S
+  // todo 23 or 24
+  val subnormOverflow = subnormDist > 24.S
+
   common_subnorm := exp_BiasForSub(9) || exp_BiasForSub === 0.S
   val common_subnormSigOut = (Cat(1.U(1.W), sigAfterInc) >> subnormDist.asUInt)(22,0)
+//val common_subnormSigOut = Mux(subnormOverflow, sigAfterInc.orR ,(Cat(1.U(1.W), sigAfterInc) >> subnormDist.asUInt)(22,0))
   dontTouch(exp_BiasForSub)
   dontTouch(subnormDist)
   dontTouch(common_subnorm)
@@ -76,7 +80,8 @@ class RoundingUnit extends Module{
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN
-  val notNaN_isSpecialInfOut = input.infiniteExc || input.isInf
+  val notNaN_isSpecialInfOut = (input.infiniteExc || input.isInf) && (!input.invalidExc)
+  val notNaN_isZero = input.isZero && !isNaNOut
   val commonCase = !isNaNOut && !notNaN_isSpecialInfOut && !input.isZero
 
   val overflow = commonCase && common_overflow
@@ -102,7 +107,7 @@ class RoundingUnit extends Module{
 
   val infiniteOut = Cat(input.sign, "h7F800000".U)
   val zeroOut = Cat(input.sign, 0.U(31.W))
-  val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## (input.isZero && !isNaNOut)
+  val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## notNaN_isZero
 
   /** @todo opt it */
   common_overflow := exp_BiasForSub > 254.S

From f521decb6142a4ffbff74a160082e6981f3b0c4e Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 15:21:01 +0800
Subject: [PATCH 053/109] [rtl] tmp

---
 arithmetic/src/float/DivSqrt.scala      |  2 +-
 arithmetic/src/float/RoundingUnit.scala | 18 ++++++++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index de0ef65..b78a97b 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -105,7 +105,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val fractDividendIn = Wire(UInt((fpWidth).W))
   val fractDivisorIn = Wire(UInt((fpWidth).W))
   fractDividendIn := Cat(1.U(1.W), rawA_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
-  fractDivisorIn := Cat(1.U(1.W), rawB_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
+  fractDivisorIn  := Cat(1.U(1.W), rawB_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
 
   val divModule = Module(new SRT16(fpWidth, fpWidth, fpWidth))
   divModule.input.bits.dividend := fractDividendIn
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 1b26fd1..ea788cf 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -45,10 +45,9 @@ class RoundingUnit extends Module{
 
 
   val sigAfterInc = Wire(UInt(23.W))
-  val expBiasPlus = Wire(UInt(8.W))
   val sigIncr = Wire(Bool())
   val expIncr = Wire(Bool())
-  val expBiased = Wire(UInt(8.W))
+  val expBiasedAfterInc = Wire(UInt(8.W))
 
   /** normal case */
 
@@ -62,21 +61,24 @@ class RoundingUnit extends Module{
 
   /** for sig = all 1 and sigIncr*/
   expIncr := input.sig.andR && sigIncr
-  expBiased := (input.exp.asSInt + 127.S)(7,0).asUInt
-  expBiasPlus := (input.exp.asSInt + 128.S)(7,0).asUInt
+
+  expBiasedAfterInc := ((input.exp.asSInt + 127.S)(7,0) + expIncr).asUInt
 
   val exp_BiasForSub = (input.exp.asSInt + 127.S) + expIncr.asSInt
   val subnormDist = -exp_BiasForSub + 1.S
   // todo 23 or 24
-  val subnormOverflow = subnormDist > 24.S
+  val common_totalUnderflow = subnormDist > 24.S
 
   common_subnorm := exp_BiasForSub(9) || exp_BiasForSub === 0.S
-  val common_subnormSigOut = (Cat(1.U(1.W), sigAfterInc) >> subnormDist.asUInt)(22,0)
-//val common_subnormSigOut = Mux(subnormOverflow, sigAfterInc.orR ,(Cat(1.U(1.W), sigAfterInc) >> subnormDist.asUInt)(22,0))
+//  val common_subnormSigOut = (Cat(1.U(1.W), sigAfterInc) >> subnormDist.asUInt)(22,0)
+  // rbits = (input.sig << 23 >> subnormDist.asUInt)(22,0).orR
+  val common_subnormSigOut = Mux(common_totalUnderflow, 0.U ,(Cat(1.U(1.W), input.sig) >> subnormDist.asUInt)(22,0) )
   dontTouch(exp_BiasForSub)
   dontTouch(subnormDist)
   dontTouch(common_subnorm)
   dontTouch(common_subnormSigOut)
+  dontTouch(sigAfterInc)
+  dontTouch(common_totalUnderflow)
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN
@@ -115,7 +117,7 @@ class RoundingUnit extends Module{
   common_inexact := input.rBits.orR
 
   val common_sigOut = sigAfterInc
-  val common_expOut = Mux(expIncr, expBiasPlus, expBiased)
+  val common_expOut = expBiasedAfterInc
   dontTouch(common_expOut)
   dontTouch(common_underflow)
   dontTouch(common_overflow)

From e487508d5fc387c0a4f79de578636af21ed00846 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 18:07:08 +0800
Subject: [PATCH 054/109] [rtl] tmp

---
 arithmetic/src/float/RoundingUnit.scala | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index ea788cf..a747fb4 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -64,21 +64,22 @@ class RoundingUnit extends Module{
 
   expBiasedAfterInc := ((input.exp.asSInt + 127.S)(7,0) + expIncr).asUInt
 
-  val exp_BiasForSub = (input.exp.asSInt + 127.S) + expIncr.asSInt
+  val exp_BiasForSub = (input.exp.asSInt + 127.S(10.W)) + expIncr.zext.asSInt
   val subnormDist = -exp_BiasForSub + 1.S
   // todo 23 or 24
   val common_totalUnderflow = subnormDist > 24.S
 
   common_subnorm := exp_BiasForSub(9) || exp_BiasForSub === 0.S
 //  val common_subnormSigOut = (Cat(1.U(1.W), sigAfterInc) >> subnormDist.asUInt)(22,0)
-  // rbits = (input.sig << 23 >> subnormDist.asUInt)(22,0).orR
-  val common_subnormSigOut = Mux(common_totalUnderflow, 0.U ,(Cat(1.U(1.W), input.sig) >> subnormDist.asUInt)(22,0) )
+  val rbits = (sigAfterInc << 23 >> subnormDist.asUInt)(22,0).orR
+  val common_subnormSigOut = Mux(common_totalUnderflow, 0.U ,(Cat(1.U(1.W), sigAfterInc) >> subnormDist.asUInt)(22,0) | rbits )
   dontTouch(exp_BiasForSub)
   dontTouch(subnormDist)
   dontTouch(common_subnorm)
   dontTouch(common_subnormSigOut)
   dontTouch(sigAfterInc)
   dontTouch(common_totalUnderflow)
+  dontTouch(rbits)
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN

From 2ccf167e176aa98d9aeb5f83fb29fc9dfe700875 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 19:48:00 +0800
Subject: [PATCH 055/109] [rtl] tmp

---
 arithmetic/src/float/RoundingUnit.scala | 33 +++++++++++++++++++------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index a747fb4..49838f6 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -64,22 +64,41 @@ class RoundingUnit extends Module{
 
   expBiasedAfterInc := ((input.exp.asSInt + 127.S)(7,0) + expIncr).asUInt
 
-  val exp_BiasForSub = (input.exp.asSInt + 127.S(10.W)) + expIncr.zext.asSInt
-  val subnormDist = -exp_BiasForSub + 1.S
+
+  val sub_sigOut = Wire(UInt(23.W))
+
+  // control logic
+  // set to 126 according to softfloat
+  val exp_BiasForSub = (input.exp.asSInt + 126.S(10.W))
+  val subnormDist = -exp_BiasForSub
   // todo 23 or 24
   val common_totalUnderflow = subnormDist > 24.S
-
   common_subnorm := exp_BiasForSub(9) || exp_BiasForSub === 0.S
-//  val common_subnormSigOut = (Cat(1.U(1.W), sigAfterInc) >> subnormDist.asUInt)(22,0)
-  val rbits = (sigAfterInc << 23 >> subnormDist.asUInt)(22,0).orR
-  val common_subnormSigOut = Mux(common_totalUnderflow, 0.U ,(Cat(1.U(1.W), sigAfterInc) >> subnormDist.asUInt)(22,0) | rbits )
+
+  val sub_sigShift = Wire(UInt(26.W))
+  val sub_sigBefore:UInt = Cat(1.U(1.W), input.sig)
+  sub_sigShift := (sub_sigBefore >> subnormDist.asUInt)(22,0)
+  val sub_Stickybits = (input.sig << 23 >> subnormDist.asUInt)(21,0).orR || input.rBits.orR
+  val sub_GuardBit = (input.sig << 23 >> subnormDist.asUInt)(22)
+  val sub_rbits : UInt= Cat(sub_GuardBit,sub_Stickybits)
+  val sub_sigIncr : Bool= (roundingMode_near_even && sub_rbits.andR) ||
+    (roundingMode_min && input.sign && sub_rbits.orR) ||
+    (roundingMode_max && !input.sign && sub_rbits.orR) ||
+    (roundingMode_near_maxMag && sub_rbits.andR)
+  // val sub_expInc : Bool= sub_sigShift(24, 2).andR && sub_sigIncr
+
+  dontTouch(sub_rbits)
+
+
+  sub_sigOut := sub_sigShift + sub_sigIncr
+
+  val common_subnormSigOut = Mux(common_totalUnderflow, sub_sigIncr ,sub_sigOut )
   dontTouch(exp_BiasForSub)
   dontTouch(subnormDist)
   dontTouch(common_subnorm)
   dontTouch(common_subnormSigOut)
   dontTouch(sigAfterInc)
   dontTouch(common_totalUnderflow)
-  dontTouch(rbits)
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN

From d2babf96ceadc6f6b35add400de40270aa441879 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 20:23:02 +0800
Subject: [PATCH 056/109] [rtl] fix overflow detact

---
 arithmetic/src/float/RoundingUnit.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 49838f6..89de8a2 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -132,7 +132,7 @@ class RoundingUnit extends Module{
   val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## notNaN_isZero
 
   /** @todo opt it */
-  common_overflow := exp_BiasForSub > 254.S
+  common_overflow := input.exp.asSInt > 127.S
   common_underflow := common_subnorm
   common_inexact := input.rBits.orR
 

From 53cdef1f5efd9c21203c192a3faeb470503a1b33 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 20:50:29 +0800
Subject: [PATCH 057/109] [rtl] focus on exp+126=0 when subnorm

---
 arithmetic/src/float/RoundingUnit.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 89de8a2..0077dbc 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -71,9 +71,9 @@ class RoundingUnit extends Module{
   // set to 126 according to softfloat
   val exp_BiasForSub = (input.exp.asSInt + 126.S(10.W))
   val subnormDist = -exp_BiasForSub
-  // todo 23 or 24
+  // todo 23 or 24, why we have this case??
   val common_totalUnderflow = subnormDist > 24.S
-  common_subnorm := exp_BiasForSub(9) || exp_BiasForSub === 0.S
+  common_subnorm := exp_BiasForSub(9) 
 
   val sub_sigShift = Wire(UInt(26.W))
   val sub_sigBefore:UInt = Cat(1.U(1.W), input.sig)

From 2cca125b2f1d703b33975ef71766c49e82905744 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 21:07:27 +0800
Subject: [PATCH 058/109] [rtl] remove underflow flag when div exact

---
 arithmetic/src/float/RoundingUnit.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 0077dbc..6d9d7b2 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -73,7 +73,7 @@ class RoundingUnit extends Module{
   val subnormDist = -exp_BiasForSub
   // todo 23 or 24, why we have this case??
   val common_totalUnderflow = subnormDist > 24.S
-  common_subnorm := exp_BiasForSub(9) 
+  common_subnorm := exp_BiasForSub(9)
 
   val sub_sigShift = Wire(UInt(26.W))
   val sub_sigBefore:UInt = Cat(1.U(1.W), input.sig)
@@ -107,7 +107,7 @@ class RoundingUnit extends Module{
   val commonCase = !isNaNOut && !notNaN_isSpecialInfOut && !input.isZero
 
   val overflow = commonCase && common_overflow
-  val underflow = commonCase && common_underflow
+  val underflow = commonCase && (common_underflow && sub_rbits.orR)
   val inexact = overflow || (commonCase && common_inexact)
 
   val isZero = input.isZero && underflow

From 16998a6f812c9b262cb5e6992e30e902320aa77b Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 21:20:51 +0800
Subject: [PATCH 059/109] [rtl] fix sub guard and sticky bit logic

---
 arithmetic/src/float/RoundingUnit.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 6d9d7b2..b71348e 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -78,8 +78,8 @@ class RoundingUnit extends Module{
   val sub_sigShift = Wire(UInt(26.W))
   val sub_sigBefore:UInt = Cat(1.U(1.W), input.sig)
   sub_sigShift := (sub_sigBefore >> subnormDist.asUInt)(22,0)
-  val sub_Stickybits = (input.sig << 23 >> subnormDist.asUInt)(21,0).orR || input.rBits.orR
-  val sub_GuardBit = (input.sig << 23 >> subnormDist.asUInt)(22)
+  val sub_Stickybits = (sub_sigBefore << 24 >> subnormDist.asUInt)(22,0).orR || input.rBits.orR
+  val sub_GuardBit = (sub_sigBefore << 24 >> subnormDist.asUInt)(23)
   val sub_rbits : UInt= Cat(sub_GuardBit,sub_Stickybits)
   val sub_sigIncr : Bool= (roundingMode_near_even && sub_rbits.andR) ||
     (roundingMode_min && input.sign && sub_rbits.orR) ||

From 58a0ce8ebcbb602339089687a6bba1e40845d661 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 21:24:05 +0800
Subject: [PATCH 060/109] [rtl] fix inexact

---
 arithmetic/src/float/RoundingUnit.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index b71348e..57e1b6c 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -134,7 +134,7 @@ class RoundingUnit extends Module{
   /** @todo opt it */
   common_overflow := input.exp.asSInt > 127.S
   common_underflow := common_subnorm
-  common_inexact := input.rBits.orR
+  common_inexact := input.rBits.orR || (common_underflow && sub_rbits.orR)
 
   val common_sigOut = sigAfterInc
   val common_expOut = expBiasedAfterInc

From cebbce776ea2202a8754b879ae407b49fd533e42 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 21:36:01 +0800
Subject: [PATCH 061/109] [rtl] fix totalunderflow to 235

---
 arithmetic/src/float/RoundingUnit.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 57e1b6c..91f3d91 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -72,7 +72,7 @@ class RoundingUnit extends Module{
   val exp_BiasForSub = (input.exp.asSInt + 126.S(10.W))
   val subnormDist = -exp_BiasForSub
   // todo 23 or 24, why we have this case??
-  val common_totalUnderflow = subnormDist > 24.S
+  val common_totalUnderflow = subnormDist > 235.S
   common_subnorm := exp_BiasForSub(9)
 
   val sub_sigShift = Wire(UInt(26.W))
@@ -92,13 +92,14 @@ class RoundingUnit extends Module{
 
   sub_sigOut := sub_sigShift + sub_sigIncr
 
-  val common_subnormSigOut = Mux(common_totalUnderflow, sub_sigIncr ,sub_sigOut )
+  val common_subnormSigOut = Mux(common_totalUnderflow, 0.U ,sub_sigOut )
   dontTouch(exp_BiasForSub)
   dontTouch(subnormDist)
   dontTouch(common_subnorm)
   dontTouch(common_subnormSigOut)
   dontTouch(sigAfterInc)
   dontTouch(common_totalUnderflow)
+  dontTouch(sub_sigOut)
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN

From 0cd72ccbcec23a520990270c3b65aabbc1037fc4 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 21:41:02 +0800
Subject: [PATCH 062/109] [rtl] fix neednorm for div

---
 arithmetic/src/float/DivSqrt.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index b78a97b..fb53430 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -60,7 +60,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   fastValid := specialCase_S && input.fire
 
   // needNorm for div
-  val needNormNext = input.bits.b(sigWidth - 2, 0) > input.bits.a(sigWidth - 2, 0)
+  val needNormNext:Bool = (rawA_S.sig + (-rawB_S.sig))(24)
   val needNorm = RegEnable(needNormNext, input.fire)
 
   // sign

From 2655bec3555db597514e9d47393a112a499cd876 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 22:01:48 +0800
Subject: [PATCH 063/109] [rtl] fix subnorm exp inc

---
 arithmetic/src/float/RoundingUnit.scala | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 91f3d91..1f977ff 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -75,7 +75,7 @@ class RoundingUnit extends Module{
   val common_totalUnderflow = subnormDist > 235.S
   common_subnorm := exp_BiasForSub(9)
 
-  val sub_sigShift = Wire(UInt(26.W))
+  val sub_sigShift = Wire(UInt(23.W))
   val sub_sigBefore:UInt = Cat(1.U(1.W), input.sig)
   sub_sigShift := (sub_sigBefore >> subnormDist.asUInt)(22,0)
   val sub_Stickybits = (sub_sigBefore << 24 >> subnormDist.asUInt)(22,0).orR || input.rBits.orR
@@ -92,6 +92,9 @@ class RoundingUnit extends Module{
 
   sub_sigOut := sub_sigShift + sub_sigIncr
 
+  val sub_expInc = Wire(UInt(8.W))
+  sub_expInc := sub_sigShift.andR && sub_sigIncr
+
   val common_subnormSigOut = Mux(common_totalUnderflow, 0.U ,sub_sigOut )
   dontTouch(exp_BiasForSub)
   dontTouch(subnormDist)
@@ -100,6 +103,7 @@ class RoundingUnit extends Module{
   dontTouch(sigAfterInc)
   dontTouch(common_totalUnderflow)
   dontTouch(sub_sigOut)
+  dontTouch(sub_expInc)
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN
@@ -147,7 +151,7 @@ class RoundingUnit extends Module{
 
 
   val common_out = Mux(common_overflow, common_infiniteOut,
-    Mux(common_subnorm, input.sign ## 0.U(8.W) ## common_subnormSigOut,
+    Mux(common_subnorm, input.sign ## sub_expInc ## common_subnormSigOut,
       input.sign ## common_expOut ## common_sigOut))
 
   dontTouch(common_out)

From 8729cccafa3f5a92c36a2dd6ee08b08c0052222b Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 22:08:17 +0800
Subject: [PATCH 064/109] [rtl] fix when subnorm dist between 23 and 235

---
 arithmetic/src/float/RoundingUnit.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 1f977ff..e16f485 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -78,7 +78,8 @@ class RoundingUnit extends Module{
   val sub_sigShift = Wire(UInt(23.W))
   val sub_sigBefore:UInt = Cat(1.U(1.W), input.sig)
   sub_sigShift := (sub_sigBefore >> subnormDist.asUInt)(22,0)
-  val sub_Stickybits = (sub_sigBefore << 24 >> subnormDist.asUInt)(22,0).orR || input.rBits.orR
+  val distlagerThan24 = subnormDist.asUInt > 24.U
+  val sub_Stickybits = Mux(distlagerThan24, 1.U, (sub_sigBefore << 24 >> subnormDist.asUInt)(22,0).orR || input.rBits.orR)
   val sub_GuardBit = (sub_sigBefore << 24 >> subnormDist.asUInt)(23)
   val sub_rbits : UInt= Cat(sub_GuardBit,sub_Stickybits)
   val sub_sigIncr : Bool= (roundingMode_near_even && sub_rbits.andR) ||

From fcacedbe729d40436f4046f7d066bc8b67f3b3f1 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 24 Aug 2023 22:19:28 +0800
Subject: [PATCH 065/109] [rtl] disable notNaN_isSpecialInfOut when NaN

---
 arithmetic/src/float/RoundingUnit.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index e16f485..8f6b3d4 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -108,7 +108,7 @@ class RoundingUnit extends Module{
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN
-  val notNaN_isSpecialInfOut = (input.infiniteExc || input.isInf) && (!input.invalidExc)
+  val notNaN_isSpecialInfOut = (input.infiniteExc || input.isInf) && (!input.invalidExc) && (!input.isNaN)
   val notNaN_isZero = input.isZero && !isNaNOut
   val commonCase = !isNaNOut && !notNaN_isSpecialInfOut && !input.isZero
 

From b7fe0de42fc80711555ccc8fbbc37824a6bc016f Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Fri, 25 Aug 2023 13:23:48 +0800
Subject: [PATCH 066/109] [rtl] fix sub_sigInc for RNE and RMM

---
 arithmetic/src/float/RoundingUnit.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 8f6b3d4..1588a52 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -82,10 +82,11 @@ class RoundingUnit extends Module{
   val sub_Stickybits = Mux(distlagerThan24, 1.U, (sub_sigBefore << 24 >> subnormDist.asUInt)(22,0).orR || input.rBits.orR)
   val sub_GuardBit = (sub_sigBefore << 24 >> subnormDist.asUInt)(23)
   val sub_rbits : UInt= Cat(sub_GuardBit,sub_Stickybits)
-  val sub_sigIncr : Bool= (roundingMode_near_even && sub_rbits.andR) ||
+
+  val sub_sigIncr : Bool= (roundingMode_near_even && (sub_rbits.andR || (sub_sigShift(0) && sub_rbits==="b10".U))) ||
     (roundingMode_min && input.sign && sub_rbits.orR) ||
     (roundingMode_max && !input.sign && sub_rbits.orR) ||
-    (roundingMode_near_maxMag && sub_rbits.andR)
+    (roundingMode_near_maxMag && sub_rbits(1))
   // val sub_expInc : Bool= sub_sigShift(24, 2).andR && sub_sigIncr
 
   dontTouch(sub_rbits)

From 4b7bf7eca8f8cde5366001e7f5f80c46776221df Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Fri, 25 Aug 2023 15:50:05 +0800
Subject: [PATCH 067/109] [doc] add rtl doc

---
 arithmetic/src/float/DivSqrt.scala      | 24 ++++++++++-----
 arithmetic/src/float/RoundingUnit.scala | 39 ++++++++++---------------
 2 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index fb53430..0523c7b 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -20,12 +20,13 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
   // Exceptions
 
-  /** inf/inf and 0/0 */
+  /** inf/inf and 0/0  => NaN out */
   val notSigNaNIn_invalidExc_S_div =
     (rawA_S.isZero && rawB_S.isZero) || (rawA_S.isInf && rawB_S.isInf)
-  /** negative input */
+  /** -Inf + -normal => NaN out */
   val notSigNaNIn_invalidExc_S_sqrt =
     !rawA_S.isNaN && !rawA_S.isZero && rawA_S.sign
+  /** isSigNaNRawFloat detect signaling NaN */
   val majorExc_S =
     Mux(input.bits.sqrt,
       isSigNaNRawFloat(rawA_S) || notSigNaNIn_invalidExc_S_sqrt,
@@ -33,6 +34,8 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
         notSigNaNIn_invalidExc_S_div ||
         (!rawA_S.isNaN && !rawA_S.isInf && rawB_S.isZero)
     )
+
+  /** all cases result in NaN output*/
   val isNaN_S =
     Mux(input.bits.sqrt,
       rawA_S.isNaN || notSigNaNIn_invalidExc_S_sqrt,
@@ -46,7 +49,10 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val isInf_Z    = RegEnable(isInf_S,false.B,input.fire)
   val isZero_Z   = RegEnable(isZero_S,false.B,input.fire)
 
+  /** invalid operation flag */
   val invalidExec = majorExc_Z &&  isNaN_Z
+
+  /** DivideByZero flag */
   val infinitExec = majorExc_Z && !isNaN_Z
 
   val specialCaseA_S = rawA_S.isNaN || rawA_S.isInf || rawA_S.isZero
@@ -60,6 +66,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   fastValid := specialCase_S && input.fire
 
   // needNorm for div
+  /** when B_sig > A_sig neednorm*/
   val needNormNext:Bool = (rawA_S.sig + (-rawB_S.sig))(24)
   val needNorm = RegEnable(needNormNext, input.fire)
 
@@ -87,7 +94,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
       expfirst2(3) -> "b00".U
     )
   )
-  /** exp for sqrt never underlow*/
+  /** exp for sqrt never underlow */
   val expForSqrt = Cat(expstart, rawA_S.sExp(expWidth - 2, 0)) >> 1
   val sqrtExpIsOdd = !rawA_S.sExp(0)
   val sqrtFractIn = Mux(sqrtExpIsOdd, Cat("b0".U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
@@ -125,13 +132,14 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
   // exp logic
   val expStoreNext,expToRound = Wire(UInt((expWidth+2).W))
-  /**
-    * for sqrt
-    * expForrounding effective is 8bits, MSB is sign
-    * expStoreNext = 0 + 8bits 
-    *
+  /** expStore
     *
+    * for sqrt
+    * expForSqrt(7,0) effective is 8bits, MSB is sign
+    * extends 2 sign bit in MSB
+    * expStoreNext = 10bits
     *
+    * todo define it format, important
     */
   expStoreNext := Mux(input.bits.sqrt,
     Cat(expForSqrt(7),expForSqrt(7),expForSqrt(7,0)),
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 1588a52..96e423d 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -5,11 +5,8 @@ import chisel3.util._
 
 
 /**
-  * input.rbits = 2bits + sticky bit
+  * exp is 10bits SInt, MSB is sign
   *
-  * leave
-  *
-  * output is subnormal
   *
   * */
 class RoundingUnit extends Module{
@@ -42,16 +39,14 @@ class RoundingUnit extends Module{
   val common_inexact  = Wire(Bool())
   val common_subnorm  = Wire(Bool())
 
-
-
   val sigAfterInc = Wire(UInt(23.W))
   val sigIncr = Wire(Bool())
   val expIncr = Wire(Bool())
   val expBiasedAfterInc = Wire(UInt(8.W))
 
-  /** normal case */
+  /** normal case(not subnormal) */
 
-  /** todo later use Mux?*/
+  /** todo opt it with Mux1H? */
   sigIncr := (roundingMode_near_even && input.rBits(1) && input.rBits(0)) ||
     (roundingMode_min &&  input.sign && input.rBits.orR) ||
     (roundingMode_max && !input.sign && input.rBits.orR) ||
@@ -62,42 +57,40 @@ class RoundingUnit extends Module{
   /** for sig = all 1 and sigIncr*/
   expIncr := input.sig.andR && sigIncr
 
+  /** todo: opt it*/
   expBiasedAfterInc := ((input.exp.asSInt + 127.S)(7,0) + expIncr).asUInt
 
-
-  val sub_sigOut = Wire(UInt(23.W))
+  val sub_sigShift = Wire(UInt(23.W))
+  val sub_sigOut,common_subnormSigOut = Wire(UInt(23.W))
+  val sub_expInc = Wire(UInt(8.W))
 
   // control logic
   // set to 126 according to softfloat
+  // todo: merge it with normal case
   val exp_BiasForSub = (input.exp.asSInt + 126.S(10.W))
   val subnormDist = -exp_BiasForSub
-  // todo 23 or 24, why we have this case??
+  // todo why we have this case? IN IEEE754 or definded by Hardfloat?
   val common_totalUnderflow = subnormDist > 235.S
   common_subnorm := exp_BiasForSub(9)
 
-  val sub_sigShift = Wire(UInt(23.W))
   val sub_sigBefore:UInt = Cat(1.U(1.W), input.sig)
   sub_sigShift := (sub_sigBefore >> subnormDist.asUInt)(22,0)
+  // todo opt it, creat method for it, it;s jamm32
   val distlagerThan24 = subnormDist.asUInt > 24.U
   val sub_Stickybits = Mux(distlagerThan24, 1.U, (sub_sigBefore << 24 >> subnormDist.asUInt)(22,0).orR || input.rBits.orR)
   val sub_GuardBit = (sub_sigBefore << 24 >> subnormDist.asUInt)(23)
   val sub_rbits : UInt= Cat(sub_GuardBit,sub_Stickybits)
 
+  // todo merge it with sigIncr
   val sub_sigIncr : Bool= (roundingMode_near_even && (sub_rbits.andR || (sub_sigShift(0) && sub_rbits==="b10".U))) ||
     (roundingMode_min && input.sign && sub_rbits.orR) ||
     (roundingMode_max && !input.sign && sub_rbits.orR) ||
     (roundingMode_near_maxMag && sub_rbits(1))
-  // val sub_expInc : Bool= sub_sigShift(24, 2).andR && sub_sigIncr
-
-  dontTouch(sub_rbits)
-
 
   sub_sigOut := sub_sigShift + sub_sigIncr
-
-  val sub_expInc = Wire(UInt(8.W))
   sub_expInc := sub_sigShift.andR && sub_sigIncr
+  common_subnormSigOut := Mux(common_totalUnderflow, 0.U ,sub_sigOut )
 
-  val common_subnormSigOut = Mux(common_totalUnderflow, 0.U ,sub_sigOut )
   dontTouch(exp_BiasForSub)
   dontTouch(subnormDist)
   dontTouch(common_subnorm)
@@ -106,6 +99,7 @@ class RoundingUnit extends Module{
   dontTouch(common_totalUnderflow)
   dontTouch(sub_sigOut)
   dontTouch(sub_expInc)
+  dontTouch(sub_rbits)
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN
@@ -125,12 +119,11 @@ class RoundingUnit extends Module{
     Seq(
       overflowSele(0) -> Cat(input.sign, "h7F800000".U(31.W)),
       overflowSele(1) -> Cat(input.sign, "h7F7FFFFF".U(31.W)),
-      overflowSele(2) -> Mux(input.sign,"hFF7FFFFF".U(32.W),"h7F800000".U(32.W)),
-      overflowSele(3) -> Mux(input.sign,"hFF800000".U(32.W),"h7F7FFFFF".U(32.W)),
+      overflowSele(2) -> Mux(input.sign, "hFF7FFFFF".U(32.W), "h7F800000".U(32.W)),
+      overflowSele(3) -> Mux(input.sign, "hFF800000".U(32.W), "h7F7FFFFF".U(32.W)),
     )
   )
 
-
   // exception data with Spike
   val quietNaN = "h7FC00000".U
 
@@ -138,7 +131,7 @@ class RoundingUnit extends Module{
   val zeroOut = Cat(input.sign, 0.U(31.W))
   val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## notNaN_isZero
 
-  /** @todo opt it */
+  /** @todo opt it using hardfloat methods */
   common_overflow := input.exp.asSInt > 127.S
   common_underflow := common_subnorm
   common_inexact := input.rBits.orR || (common_underflow && sub_rbits.orR)

From cd79609e706296bedd06110beb395e3652cc0ad8 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sat, 26 Aug 2023 19:17:27 +0800
Subject: [PATCH 068/109] [rtl] opt div neednorm logic when sigB > sigA

---
 arithmetic/src/float/DivSqrt.scala | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 0523c7b..8c791d0 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -65,10 +65,6 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val fastValid = RegInit(false.B)
   fastValid := specialCase_S && input.fire
 
-  // needNorm for div
-  /** when B_sig > A_sig neednorm*/
-  val needNormNext:Bool = (rawA_S.sig + (-rawB_S.sig))(24)
-  val needNorm = RegEnable(needNormNext, input.fire)
 
   // sign
   val signNext = Mux(input.bits.sqrt, Mux(rawA_S.isZero, rawA_S.sign, false.B), rawA_S.sign ^ rawB_S.sign)
@@ -120,9 +116,17 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   divModule.input.bits.counter := 8.U
   divModule.input.valid := input.valid && !input.bits.sqrt && normalCase_S_div
 
-  val sigToRound_div = Mux(needNorm, divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
+  // needNorm for div
+  /** when B_sig > A_sig
+    * divout = 0000,01xxx
+    * exp need decrease by 1
+    *
+    * */
+  val needRightShift = !divModule.output.bits.quotient(27)
+
+  val sigToRound_div = Mux(needRightShift, divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
     divModule.output.bits.quotient(calWidth - 2, calWidth - sigWidth))
-  val rbits_div = Mux(needNorm, divModule.output.bits.quotient(calWidth - sigWidth - 2) ## divModule.output.bits.reminder.orR,
+  val rbits_div = Mux(needRightShift, divModule.output.bits.quotient(calWidth - sigWidth - 2) ## divModule.output.bits.reminder.orR,
     divModule.output.bits.quotient(calWidth - sigWidth - 1) ## divModule.output.bits.reminder.orR)
 
 
@@ -133,6 +137,8 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   // exp logic
   val expStoreNext,expToRound = Wire(UInt((expWidth+2).W))
   /** expStore
+    *
+    * output is 10bits SInt
     *
     * for sqrt
     * expForSqrt(7,0) effective is 8bits, MSB is sign
@@ -145,7 +151,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     Cat(expForSqrt(7),expForSqrt(7),expForSqrt(7,0)),
     (rawA_S.sExp-rawB_S.sExp).asUInt)
   val expStore = RegEnable(expStoreNext, 0.U((expWidth+2).W), input.fire)
-  expToRound := Mux(opSqrtReg, expStore, expStore - needNorm)
+  expToRound := Mux(opSqrtReg, expStore, expStore - needRightShift)
   dontTouch(expToRound)
 
   dontTouch(rawA_S)

From b66f29970d4b95fe85472904aaba2c4569316a47 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sat, 26 Aug 2023 19:30:22 +0800
Subject: [PATCH 069/109] [rtl] change RoundingUnit exp type to SInt

---
 arithmetic/src/float/DivFloat.scala     |  2 +-
 arithmetic/src/float/DivSqrt.scala      |  2 +-
 arithmetic/src/float/RoundingUnit.scala | 10 +++++-----
 arithmetic/src/float/SqrtFloat.scala    |  2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arithmetic/src/float/DivFloat.scala b/arithmetic/src/float/DivFloat.scala
index ccbc735..55da882 100644
--- a/arithmetic/src/float/DivFloat.scala
+++ b/arithmetic/src/float/DivFloat.scala
@@ -84,7 +84,7 @@ class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
 
   val roundresult = RoundingUnit(
     signReg,
-    expToRound,
+    expToRound.asSInt,
     sigToRound,
     rbits,
     consts.round_near_even,
diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 8c791d0..0424a8d 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -160,7 +160,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
   val roundresult = RoundingUnit(
     signReg,
-    expToRound,
+    expToRound.asSInt,
     sigToRound,
     rbitsToRound,
     roundingModeReg,
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 96e423d..0bab0b2 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -17,7 +17,7 @@ class RoundingUnit extends Module{
     val isZero = Bool()
     val isNaN  = Bool()
     val sig = UInt(23.W)
-    val exp = UInt(10.W)
+    val exp = SInt(10.W)
     val rBits = UInt(2.W)
     val sign = Bool()
     val roundingMode = UInt(5.W)
@@ -58,7 +58,7 @@ class RoundingUnit extends Module{
   expIncr := input.sig.andR && sigIncr
 
   /** todo: opt it*/
-  expBiasedAfterInc := ((input.exp.asSInt + 127.S)(7,0) + expIncr).asUInt
+  expBiasedAfterInc := ((input.exp + 127.S)(7,0) + expIncr).asUInt
 
   val sub_sigShift = Wire(UInt(23.W))
   val sub_sigOut,common_subnormSigOut = Wire(UInt(23.W))
@@ -67,7 +67,7 @@ class RoundingUnit extends Module{
   // control logic
   // set to 126 according to softfloat
   // todo: merge it with normal case
-  val exp_BiasForSub = (input.exp.asSInt + 126.S(10.W))
+  val exp_BiasForSub = (input.exp + 126.S(10.W))
   val subnormDist = -exp_BiasForSub
   // todo why we have this case? IN IEEE754 or definded by Hardfloat?
   val common_totalUnderflow = subnormDist > 235.S
@@ -132,7 +132,7 @@ class RoundingUnit extends Module{
   val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## notNaN_isZero
 
   /** @todo opt it using hardfloat methods */
-  common_overflow := input.exp.asSInt > 127.S
+  common_overflow := input.exp > 127.S
   common_underflow := common_subnorm
   common_inexact := input.rBits.orR || (common_underflow && sub_rbits.orR)
 
@@ -165,7 +165,7 @@ class RoundingUnit extends Module{
 }
 
 object RoundingUnit {
-  def apply(sign: Bool, exp: UInt, sig: UInt, rbits: UInt, rmode: UInt, invalidExc: Bool, infiniteExc: Bool, isNaN: Bool, isInf: Bool, isZero: Bool): Vec[UInt] = {
+  def apply(sign: Bool, exp: SInt, sig: UInt, rbits: UInt, rmode: UInt, invalidExc: Bool, infiniteExc: Bool, isNaN: Bool, isInf: Bool, isZero: Bool): Vec[UInt] = {
 
     val rounder = Module(new RoundingUnit)
     rounder.input.sign := sign
diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
index dd82940..32de581 100644
--- a/arithmetic/src/float/SqrtFloat.scala
+++ b/arithmetic/src/float/SqrtFloat.scala
@@ -74,7 +74,7 @@ class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
   input.ready := SqrtModule.input.ready
   val roundresult = RoundingUnit(
     input.bits.oprand(expWidth + sigWidth-1) ,
-    expToRound,
+    expToRound.asSInt,
     sigforRound,
     rbits,
     consts.round_near_even,

From cdba8c01f2285927090861f25fd2ed9a486c639c Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sat, 26 Aug 2023 22:55:54 +0800
Subject: [PATCH 070/109] [rtl] opt rounding subnormal case

---
 arithmetic/src/float/RoundingUnit.scala | 58 +++++++++++++++++--------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 0bab0b2..81c9ff2 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -47,10 +47,6 @@ class RoundingUnit extends Module{
   /** normal case(not subnormal) */
 
   /** todo opt it with Mux1H? */
-  sigIncr := (roundingMode_near_even && input.rBits(1) && input.rBits(0)) ||
-    (roundingMode_min &&  input.sign && input.rBits.orR) ||
-    (roundingMode_max && !input.sign && input.rBits.orR) ||
-    (roundingMode_near_maxMag && input.rBits(1) && input.rBits(0))
 
   sigAfterInc := input.sig + sigIncr
 
@@ -68,27 +64,51 @@ class RoundingUnit extends Module{
   // set to 126 according to softfloat
   // todo: merge it with normal case
   val exp_BiasForSub = (input.exp + 126.S(10.W))
-  val subnormDist = -exp_BiasForSub
   // todo why we have this case? IN IEEE754 or definded by Hardfloat?
-  val common_totalUnderflow = subnormDist > 235.S
   common_subnorm := exp_BiasForSub(9)
+  // for non subnormal case, Dist = 0
+  val subnormDist = Mux(common_subnorm,-exp_BiasForSub, 0.S(10.W))
+  val common_totalUnderflow = subnormDist > 235.S
+
+  //--------------------------------
+
+  val greaterThan31 = subnormDist(9,5).orR
+  val allMask = ((-1).S(31.W) << 31 >> subnormDist(5,0))
+  val between24And31 = allMask(6,0).orR
+  // subnorm case when Dist>24
+  val greaterThan24 = (greaterThan31 || between24And31) && common_subnorm
+  val roundMask = Mux(!greaterThan24, Reverse(allMask(30,7)) ## 3.U(2.W), 0.U(26.W))
+
+  val shiftedRoundMask = Mux(!greaterThan24, 0.U(1.W) ## roundMask >> 1 , BigInt(-1).S(26.W).asUInt)
+  /** select the first bit need to be  rounded */
+  val roundPosMask = ~shiftedRoundMask & roundMask
+
+  val adjustedSig = Cat(1.U(1.W), input.sig, input.rBits)
+  val roundPosBit = (adjustedSig & roundPosMask).orR
+  /** Any bits is one after guard bit */
+  val anyRoundExtra = (adjustedSig & shiftedRoundMask).orR
+  /** Any bits is one containing guard bit */
+  val anyRound = roundPosBit || anyRoundExtra
+
+  dontTouch(shiftedRoundMask)
+  dontTouch(roundPosMask)
+  dontTouch(roundMask)
+  dontTouch(greaterThan24)
+  dontTouch(greaterThan31)
 
   val sub_sigBefore:UInt = Cat(1.U(1.W), input.sig)
   sub_sigShift := (sub_sigBefore >> subnormDist.asUInt)(22,0)
   // todo opt it, creat method for it, it;s jamm32
-  val distlagerThan24 = subnormDist.asUInt > 24.U
-  val sub_Stickybits = Mux(distlagerThan24, 1.U, (sub_sigBefore << 24 >> subnormDist.asUInt)(22,0).orR || input.rBits.orR)
-  val sub_GuardBit = (sub_sigBefore << 24 >> subnormDist.asUInt)(23)
-  val sub_rbits : UInt= Cat(sub_GuardBit,sub_Stickybits)
+  val rbits : UInt= Cat(roundPosBit,anyRoundExtra)
 
   // todo merge it with sigIncr
-  val sub_sigIncr : Bool= (roundingMode_near_even && (sub_rbits.andR || (sub_sigShift(0) && sub_rbits==="b10".U))) ||
-    (roundingMode_min && input.sign && sub_rbits.orR) ||
-    (roundingMode_max && !input.sign && sub_rbits.orR) ||
-    (roundingMode_near_maxMag && sub_rbits(1))
+  sigIncr := (roundingMode_near_even && (rbits.andR || (sub_sigShift(0) && rbits==="b10".U))) ||
+    (roundingMode_min && input.sign && rbits.orR) ||
+    (roundingMode_max && !input.sign && rbits.orR) ||
+    (roundingMode_near_maxMag && rbits(1))
 
-  sub_sigOut := sub_sigShift + sub_sigIncr
-  sub_expInc := sub_sigShift.andR && sub_sigIncr
+  sub_sigOut := sub_sigShift + sigIncr
+  sub_expInc := sub_sigShift.andR && sigIncr
   common_subnormSigOut := Mux(common_totalUnderflow, 0.U ,sub_sigOut )
 
   dontTouch(exp_BiasForSub)
@@ -99,7 +119,7 @@ class RoundingUnit extends Module{
   dontTouch(common_totalUnderflow)
   dontTouch(sub_sigOut)
   dontTouch(sub_expInc)
-  dontTouch(sub_rbits)
+  dontTouch(rbits)
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN
@@ -108,7 +128,7 @@ class RoundingUnit extends Module{
   val commonCase = !isNaNOut && !notNaN_isSpecialInfOut && !input.isZero
 
   val overflow = commonCase && common_overflow
-  val underflow = commonCase && (common_underflow && sub_rbits.orR)
+  val underflow = commonCase && (common_underflow && rbits.orR)
   val inexact = overflow || (commonCase && common_inexact)
 
   val isZero = input.isZero && underflow
@@ -134,7 +154,7 @@ class RoundingUnit extends Module{
   /** @todo opt it using hardfloat methods */
   common_overflow := input.exp > 127.S
   common_underflow := common_subnorm
-  common_inexact := input.rBits.orR || (common_underflow && sub_rbits.orR)
+  common_inexact := input.rBits.orR || (common_underflow && rbits.orR)
 
   val common_sigOut = sigAfterInc
   val common_expOut = expBiasedAfterInc

From 8bc2f36c35c9449f25fdc2bb42443901eef1e4ec Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sun, 27 Aug 2023 00:09:57 +0800
Subject: [PATCH 071/109] [rtl] merge sigInc logic in two cases

---
 arithmetic/src/float/RoundingUnit.scala | 41 +++++++++++++------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 81c9ff2..88c79cf 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -41,24 +41,22 @@ class RoundingUnit extends Module{
 
   val sigAfterInc = Wire(UInt(23.W))
   val sigIncr = Wire(Bool())
-  val expIncr = Wire(Bool())
   val expBiasedAfterInc = Wire(UInt(8.W))
 
+  val sig_afterInc = Wire(UInt(27.W))
+
+
+  val sub_sigOut, common_subnormSigOut = Wire(UInt(23.W))
+  val expInc = Wire(UInt(8.W))
+
   /** normal case(not subnormal) */
 
   /** todo opt it with Mux1H? */
 
-  sigAfterInc := input.sig + sigIncr
-
-  /** for sig = all 1 and sigIncr*/
-  expIncr := input.sig.andR && sigIncr
+  sigAfterInc := sig_afterInc(24,2)
 
   /** todo: opt it*/
-  expBiasedAfterInc := ((input.exp + 127.S)(7,0) + expIncr).asUInt
-
-  val sub_sigShift = Wire(UInt(23.W))
-  val sub_sigOut,common_subnormSigOut = Wire(UInt(23.W))
-  val sub_expInc = Wire(UInt(8.W))
+  expBiasedAfterInc := ((input.exp + 127.S)(7,0) + expInc).asUInt
 
   // control logic
   // set to 126 according to softfloat
@@ -90,27 +88,31 @@ class RoundingUnit extends Module{
   /** Any bits is one containing guard bit */
   val anyRound = roundPosBit || anyRoundExtra
 
+  val lastBitMask = (roundPosMask<<1.U)(25,0)
+  val lastBit = (adjustedSig & lastBitMask ).orR
+
+  val equalTo24 = roundPosMask(25) && !roundPosMask(24,0).orR
+
   dontTouch(shiftedRoundMask)
   dontTouch(roundPosMask)
   dontTouch(roundMask)
   dontTouch(greaterThan24)
   dontTouch(greaterThan31)
 
-  val sub_sigBefore:UInt = Cat(1.U(1.W), input.sig)
-  sub_sigShift := (sub_sigBefore >> subnormDist.asUInt)(22,0)
-  // todo opt it, creat method for it, it;s jamm32
   val rbits : UInt= Cat(roundPosBit,anyRoundExtra)
 
-  // todo merge it with sigIncr
-  sigIncr := (roundingMode_near_even && (rbits.andR || (sub_sigShift(0) && rbits==="b10".U))) ||
+  sigIncr := (roundingMode_near_even && (rbits.andR || (lastBit && rbits==="b10".U))) ||
     (roundingMode_min && input.sign && rbits.orR) ||
     (roundingMode_max && !input.sign && rbits.orR) ||
     (roundingMode_near_maxMag && rbits(1))
 
-  sub_sigOut := sub_sigShift + sigIncr
-  sub_expInc := sub_sigShift.andR && sigIncr
+  sub_sigOut := Mux(greaterThan24 || equalTo24 ,Mux(sigIncr,1.U(26.W), 0.U(26.W)),(sig_afterInc >> subnormDist(4,0))(24,2))
+  expInc := sig_afterInc(26)  && (!common_subnorm || subnormDist===1.S )
   common_subnormSigOut := Mux(common_totalUnderflow, 0.U ,sub_sigOut )
 
+  val sigIncrement = Mux(sigIncr,lastBitMask, 0.U(26.W))
+  sig_afterInc := adjustedSig +& sigIncrement
+
   dontTouch(exp_BiasForSub)
   dontTouch(subnormDist)
   dontTouch(common_subnorm)
@@ -118,8 +120,9 @@ class RoundingUnit extends Module{
   dontTouch(sigAfterInc)
   dontTouch(common_totalUnderflow)
   dontTouch(sub_sigOut)
-  dontTouch(sub_expInc)
+  dontTouch(expInc)
   dontTouch(rbits)
+  dontTouch(sigIncrement)
 
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN
@@ -166,7 +169,7 @@ class RoundingUnit extends Module{
 
 
   val common_out = Mux(common_overflow, common_infiniteOut,
-    Mux(common_subnorm, input.sign ## sub_expInc ## common_subnormSigOut,
+    Mux(common_subnorm, input.sign ## expInc ## common_subnormSigOut,
       input.sign ## common_expOut ## common_sigOut))
 
   dontTouch(common_out)

From 9c58ef30f275c8d35ae7f8bbda94190b0378c027 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sun, 27 Aug 2023 00:21:51 +0800
Subject: [PATCH 072/109] [rtl] rename and format

---
 arithmetic/src/float/RoundingUnit.scala | 58 ++++++-------------------
 1 file changed, 13 insertions(+), 45 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 88c79cf..4feae57 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -33,51 +33,40 @@ class RoundingUnit extends Module{
   val roundingMode_max         = (input.roundingMode === consts.round_max)
   val roundingMode_near_maxMag = (input.roundingMode === consts.round_near_maxMag)
 
-
   val common_overflow = Wire(Bool())
   val common_underflow = Wire(Bool())
   val common_inexact  = Wire(Bool())
   val common_subnorm  = Wire(Bool())
 
-  val sigAfterInc = Wire(UInt(23.W))
   val sigIncr = Wire(Bool())
-  val expBiasedAfterInc = Wire(UInt(8.W))
-
+  val expBiased = Wire(UInt(8.W))
   val sig_afterInc = Wire(UInt(27.W))
-
-
   val sub_sigOut, common_subnormSigOut = Wire(UInt(23.W))
   val expInc = Wire(UInt(8.W))
 
-  /** normal case(not subnormal) */
-
-  /** todo opt it with Mux1H? */
-
-  sigAfterInc := sig_afterInc(24,2)
-
   /** todo: opt it*/
-  expBiasedAfterInc := ((input.exp + 127.S)(7,0) + expInc).asUInt
+  expBiased := ((input.exp + 127.S)(7,0) + expInc).asUInt
 
   // control logic
   // set to 126 according to softfloat
   // todo: merge it with normal case
   val exp_BiasForSub = (input.exp + 126.S(10.W))
-  // todo why we have this case? IN IEEE754 or definded by Hardfloat?
   common_subnorm := exp_BiasForSub(9)
   // for non subnormal case, Dist = 0
   val subnormDist = Mux(common_subnorm,-exp_BiasForSub, 0.S(10.W))
+  // todo why we have this case? IN IEEE754 or definded by Hardfloat?
   val common_totalUnderflow = subnormDist > 235.S
 
-  //--------------------------------
+  //-----------------subnormal loggic---------------
 
-  val greaterThan31 = subnormDist(9,5).orR
+  val distGT32 = subnormDist(9,5).orR
   val allMask = ((-1).S(31.W) << 31 >> subnormDist(5,0))
-  val between24And31 = allMask(6,0).orR
+  val distIn24And31 = allMask(6,0).orR
   // subnorm case when Dist>24
-  val greaterThan24 = (greaterThan31 || between24And31) && common_subnorm
-  val roundMask = Mux(!greaterThan24, Reverse(allMask(30,7)) ## 3.U(2.W), 0.U(26.W))
+  val distGT24 = (distGT32 || distIn24And31) && common_subnorm
+  val roundMask = Mux(!distGT24, Reverse(allMask(30,7)) ## 3.U(2.W), 0.U(26.W))
 
-  val shiftedRoundMask = Mux(!greaterThan24, 0.U(1.W) ## roundMask >> 1 , BigInt(-1).S(26.W).asUInt)
+  val shiftedRoundMask = Mux(!distGT24, 0.U(1.W) ## roundMask >> 1 , BigInt(-1).S(26.W).asUInt)
   /** select the first bit need to be  rounded */
   val roundPosMask = ~shiftedRoundMask & roundMask
 
@@ -91,13 +80,8 @@ class RoundingUnit extends Module{
   val lastBitMask = (roundPosMask<<1.U)(25,0)
   val lastBit = (adjustedSig & lastBitMask ).orR
 
-  val equalTo24 = roundPosMask(25) && !roundPosMask(24,0).orR
+  val distEQ24 = roundPosMask(25) && !roundPosMask(24,0).orR
 
-  dontTouch(shiftedRoundMask)
-  dontTouch(roundPosMask)
-  dontTouch(roundMask)
-  dontTouch(greaterThan24)
-  dontTouch(greaterThan31)
 
   val rbits : UInt= Cat(roundPosBit,anyRoundExtra)
 
@@ -106,24 +90,13 @@ class RoundingUnit extends Module{
     (roundingMode_max && !input.sign && rbits.orR) ||
     (roundingMode_near_maxMag && rbits(1))
 
-  sub_sigOut := Mux(greaterThan24 || equalTo24 ,Mux(sigIncr,1.U(26.W), 0.U(26.W)),(sig_afterInc >> subnormDist(4,0))(24,2))
+  sub_sigOut := Mux(distGT24 || distEQ24 ,Mux(sigIncr,1.U(26.W), 0.U(26.W)),(sig_afterInc >> subnormDist(4,0))(24,2))
   expInc := sig_afterInc(26)  && (!common_subnorm || subnormDist===1.S )
   common_subnormSigOut := Mux(common_totalUnderflow, 0.U ,sub_sigOut )
 
   val sigIncrement = Mux(sigIncr,lastBitMask, 0.U(26.W))
   sig_afterInc := adjustedSig +& sigIncrement
 
-  dontTouch(exp_BiasForSub)
-  dontTouch(subnormDist)
-  dontTouch(common_subnorm)
-  dontTouch(common_subnormSigOut)
-  dontTouch(sigAfterInc)
-  dontTouch(common_totalUnderflow)
-  dontTouch(sub_sigOut)
-  dontTouch(expInc)
-  dontTouch(rbits)
-  dontTouch(sigIncrement)
-
   // Exceptions
   val isNaNOut = input.invalidExc || input.isNaN
   val notNaN_isSpecialInfOut = (input.infiniteExc || input.isInf) && (!input.invalidExc) && (!input.isNaN)
@@ -159,13 +132,8 @@ class RoundingUnit extends Module{
   common_underflow := common_subnorm
   common_inexact := input.rBits.orR || (common_underflow && rbits.orR)
 
-  val common_sigOut = sigAfterInc
-  val common_expOut = expBiasedAfterInc
-  dontTouch(common_expOut)
-  dontTouch(common_underflow)
-  dontTouch(common_overflow)
-  dontTouch(overflowSele)
-  dontTouch(common_infiniteOut)
+  val common_sigOut = sig_afterInc(24,2)
+  val common_expOut = expBiased
 
 
   val common_out = Mux(common_overflow, common_infiniteOut,

From 5b0b07f13c0dd3bf641be76ecc8c2d06d3c4b4f0 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sun, 27 Aug 2023 08:52:04 +0800
Subject: [PATCH 073/109] [rtl] rename and format

---
 arithmetic/src/float/RoundingUnit.scala | 34 +++++++++++++------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 4feae57..45801b8 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -36,24 +36,21 @@ class RoundingUnit extends Module{
   val common_overflow = Wire(Bool())
   val common_underflow = Wire(Bool())
   val common_inexact  = Wire(Bool())
-  val common_subnorm  = Wire(Bool())
 
   val sigIncr = Wire(Bool())
-  val expBiased = Wire(UInt(8.W))
+  val expBiasedOut = Wire(UInt(8.W))
   val sig_afterInc = Wire(UInt(27.W))
   val sub_sigOut, common_subnormSigOut = Wire(UInt(23.W))
   val expInc = Wire(UInt(8.W))
 
-  /** todo: opt it*/
-  expBiased := ((input.exp + 127.S)(7,0) + expInc).asUInt
+  expBiasedOut := ((input.exp + 127.S)(7,0) + expInc).asUInt
 
   // control logic
   // set to 126 according to softfloat
   // todo: merge it with normal case
-  val exp_BiasForSub = (input.exp + 126.S(10.W))
-  common_subnorm := exp_BiasForSub(9)
+  val exp_ForSub = (input.exp + 126.S(10.W))
   // for non subnormal case, Dist = 0
-  val subnormDist = Mux(common_subnorm,-exp_BiasForSub, 0.S(10.W))
+  val subnormDist = Mux(common_underflow, -exp_ForSub, 0.S(10.W))
   // todo why we have this case? IN IEEE754 or definded by Hardfloat?
   val common_totalUnderflow = subnormDist > 235.S
 
@@ -63,7 +60,7 @@ class RoundingUnit extends Module{
   val allMask = ((-1).S(31.W) << 31 >> subnormDist(5,0))
   val distIn24And31 = allMask(6,0).orR
   // subnorm case when Dist>24
-  val distGT24 = (distGT32 || distIn24And31) && common_subnorm
+  val distGT24 = (distGT32 || distIn24And31) && common_underflow
   val roundMask = Mux(!distGT24, Reverse(allMask(30,7)) ## 3.U(2.W), 0.U(26.W))
 
   val shiftedRoundMask = Mux(!distGT24, 0.U(1.W) ## roundMask >> 1 , BigInt(-1).S(26.W).asUInt)
@@ -82,16 +79,17 @@ class RoundingUnit extends Module{
 
   val distEQ24 = roundPosMask(25) && !roundPosMask(24,0).orR
 
-
-  val rbits : UInt= Cat(roundPosBit,anyRoundExtra)
+  val rbits : UInt= Cat(roundPosBit, anyRoundExtra)
 
   sigIncr := (roundingMode_near_even && (rbits.andR || (lastBit && rbits==="b10".U))) ||
     (roundingMode_min && input.sign && rbits.orR) ||
     (roundingMode_max && !input.sign && rbits.orR) ||
     (roundingMode_near_maxMag && rbits(1))
 
+  /** sig_afterInc doesn;t cover distEQ24 */
   sub_sigOut := Mux(distGT24 || distEQ24 ,Mux(sigIncr,1.U(26.W), 0.U(26.W)),(sig_afterInc >> subnormDist(4,0))(24,2))
-  expInc := sig_afterInc(26)  && (!common_subnorm || subnormDist===1.S )
+  /** when subnormDist===1.S, there may be expInc*/
+  expInc := sig_afterInc(26)  && (!common_underflow || subnormDist === 1.S )
   common_subnormSigOut := Mux(common_totalUnderflow, 0.U ,sub_sigOut )
 
   val sigIncrement = Mux(sigIncr,lastBitMask, 0.U(26.W))
@@ -127,17 +125,21 @@ class RoundingUnit extends Module{
   val zeroOut = Cat(input.sign, 0.U(31.W))
   val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## notNaN_isZero
 
-  /** @todo opt it using hardfloat methods */
-  common_overflow := input.exp > 127.S
-  common_underflow := common_subnorm
+  /** @todo opt it using hardfloat methods
+    *
+    * overflow : > 127
+    * */
+
+  common_overflow := input.exp(8,7).orR && !input.exp(9)
+  common_underflow := exp_ForSub(9)
   common_inexact := input.rBits.orR || (common_underflow && rbits.orR)
 
   val common_sigOut = sig_afterInc(24,2)
-  val common_expOut = expBiased
+  val common_expOut = expBiasedOut
 
 
   val common_out = Mux(common_overflow, common_infiniteOut,
-    Mux(common_subnorm, input.sign ## expInc ## common_subnormSigOut,
+    Mux(common_underflow, input.sign ## expInc ## common_subnormSigOut,
       input.sign ## common_expOut ## common_sigOut))
 
   dontTouch(common_out)

From e97ac98871279905a311a5aa5669be1b79fdc323 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sun, 27 Aug 2023 09:40:48 +0800
Subject: [PATCH 074/109] [rtl] doc and format

---
 arithmetic/src/float/DivSqrt.scala      | 19 +++++++------------
 arithmetic/src/float/RoundingUnit.scala |  3 ---
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 0424a8d..3cff4e3 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -4,7 +4,6 @@ import chisel3._
 import chisel3.util._
 import division.srt.srt16._
 import sqrt._
-import chisel3.dontTouch
 
 class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val fpWidth = expWidth + sigWidth
@@ -70,7 +69,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val signNext = Mux(input.bits.sqrt, Mux(rawA_S.isZero, rawA_S.sign, false.B), rawA_S.sign ^ rawB_S.sign)
   val signReg = RegEnable(signNext, input.fire)
 
-  // sqrt
+  // sqrt input
 
   /** construct expForSqrt
     *
@@ -90,7 +89,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
       expfirst2(3) -> "b00".U
     )
   )
-  /** exp for sqrt never underlow */
+
   val expForSqrt = Cat(expstart, rawA_S.sExp(expWidth - 2, 0)) >> 1
   val sqrtExpIsOdd = !rawA_S.sExp(0)
   val sqrtFractIn = Mux(sqrtExpIsOdd, Cat("b0".U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
@@ -104,7 +103,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val sigToRound_sqrt = SqrtModule.output.bits.result(24, 2)
 
 
-  // div
+  // divInput
   val fractDividendIn = Wire(UInt((fpWidth).W))
   val fractDivisorIn = Wire(UInt((fpWidth).W))
   fractDividendIn := Cat(1.U(1.W), rawA_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
@@ -116,14 +115,15 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   divModule.input.bits.counter := 8.U
   divModule.input.valid := input.valid && !input.bits.sqrt && normalCase_S_div
 
-  // needNorm for div
-  /** when B_sig > A_sig
+
+  /** collect div sig result
+    *
+    * when B_sig > A_sig
     * divout = 0000,01xxx
     * exp need decrease by 1
     *
     * */
   val needRightShift = !divModule.output.bits.quotient(27)
-
   val sigToRound_div = Mux(needRightShift, divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
     divModule.output.bits.quotient(calWidth - 2, calWidth - sigWidth))
   val rbits_div = Mux(needRightShift, divModule.output.bits.quotient(calWidth - sigWidth - 2) ## divModule.output.bits.reminder.orR,
@@ -152,11 +152,6 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     (rawA_S.sExp-rawB_S.sExp).asUInt)
   val expStore = RegEnable(expStoreNext, 0.U((expWidth+2).W), input.fire)
   expToRound := Mux(opSqrtReg, expStore, expStore - needRightShift)
-  dontTouch(expToRound)
-
-  dontTouch(rawA_S)
-  dontTouch(rawB_S)
-  dontTouch(rbits_div)
 
   val roundresult = RoundingUnit(
     signReg,
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 45801b8..a1abbb1 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -142,7 +142,6 @@ class RoundingUnit extends Module{
     Mux(common_underflow, input.sign ## expInc ## common_subnormSigOut,
       input.sign ## common_expOut ## common_sigOut))
 
-  dontTouch(common_out)
 
   output.data := Mux1H(Seq(
     outSele1H(0) -> zeroOut,
@@ -150,8 +149,6 @@ class RoundingUnit extends Module{
     outSele1H(2) -> infiniteOut,
     outSele1H(3) -> common_out)
   )
-  dontTouch(outSele1H)
-
 
   output.exceptionFlags := input.invalidExc ## input.infiniteExc ## overflow ## underflow ## inexact
 

From 9a80fd3cedd7bad02b97d9a16cb25a18768e8f31 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 28 Aug 2023 11:24:46 +0800
Subject: [PATCH 075/109] [rtl] opt and reformat

---
 arithmetic/src/float/DivSqrt.scala      | 48 ++++++++++++-------------
 arithmetic/src/float/RoundingUnit.scala | 43 +++++++++++-----------
 2 files changed, 44 insertions(+), 47 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 3cff4e3..60812f3 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -11,8 +11,8 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val input = IO(Flipped(DecoupledIO(new DivSqrtInput(expWidth, sigWidth))))
   val output = IO(ValidIO(new DivSqrtOutput(expWidth, sigWidth)))
 
-  val opSqrtReg = RegEnable(input.bits.sqrt, input.fire)
-  val roundingModeReg = RegEnable(input.bits.roundingMode, input.fire)
+  val opSqrtReg       = RegEnable(input.bits.sqrt        , false.B, input.fire)
+  val roundingModeReg = RegEnable(input.bits.roundingMode, 0.U    , input.fire)
 
   val rawA_S = rawFloatFromFN(expWidth, sigWidth, input.bits.a)
   val rawB_S = rawFloatFromFN(expWidth, sigWidth, input.bits.b)
@@ -34,19 +34,19 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
         (!rawA_S.isNaN && !rawA_S.isInf && rawB_S.isZero)
     )
 
-  /** all cases result in NaN output*/
+  /** all cases result in NaN output */
   val isNaN_S =
     Mux(input.bits.sqrt,
       rawA_S.isNaN || notSigNaNIn_invalidExc_S_sqrt,
       rawA_S.isNaN || rawB_S.isNaN || notSigNaNIn_invalidExc_S_div
     )
-  val isInf_S = Mux(input.bits.sqrt, rawA_S.isInf, rawA_S.isInf || rawB_S.isZero)
+  val isInf_S  = Mux(input.bits.sqrt, rawA_S.isInf, rawA_S.isInf || rawB_S.isZero)
   val isZero_S = Mux(input.bits.sqrt, rawA_S.isZero, rawA_S.isZero || rawB_S.isInf)
 
-  val majorExc_Z = RegEnable(majorExc_S,false.B,input.fire)
-  val isNaN_Z    = RegEnable(isNaN_S,false.B,input.fire)
-  val isInf_Z    = RegEnable(isInf_S,false.B,input.fire)
-  val isZero_Z   = RegEnable(isZero_S,false.B,input.fire)
+  val majorExc_Z = RegEnable(majorExc_S, false.B, input.fire)
+  val isNaN_Z    = RegEnable(isNaN_S   , false.B, input.fire)
+  val isInf_Z    = RegEnable(isInf_S   , false.B, input.fire)
+  val isZero_Z   = RegEnable(isZero_S  , false.B, input.fire)
 
   /** invalid operation flag */
   val invalidExec = majorExc_Z &&  isNaN_Z
@@ -66,17 +66,16 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
 
   // sign
-  val signNext = Mux(input.bits.sqrt, Mux(rawA_S.isZero, rawA_S.sign, false.B), rawA_S.sign ^ rawB_S.sign)
+  val signNext = Mux(input.bits.sqrt, rawA_S.isZero && rawA_S.sign, rawA_S.sign ^ rawB_S.sign)
   val signReg = RegEnable(signNext, input.fire)
 
-  // sqrt input
-
-  /** construct expForSqrt
+  /** sqrt exp logic
     *
+    * {{{
     * sExp first 2 bits
     * 00 -> 10 (subnormal)
     * 01 -> 11 (true exp negative)
-    * 10 -> 00 (true exp positive)
+    * 10 -> 00 (true exp positive)}}}
     *
     */
   val expfirst2 = UIntToOH(rawA_S.sExp(expWidth, expWidth-1))
@@ -85,21 +84,20 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     Seq(
       expfirst2(0) -> "b10".U,
       expfirst2(1) -> "b11".U,
-      expfirst2(2) -> "b00".U,
-      expfirst2(3) -> "b00".U
+      expfirst2(2) -> "b00".U
     )
   )
 
   val expForSqrt = Cat(expstart, rawA_S.sExp(expWidth - 2, 0)) >> 1
   val sqrtExpIsOdd = !rawA_S.sExp(0)
-  val sqrtFractIn = Mux(sqrtExpIsOdd, Cat("b0".U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
+  val sqrtFractIn = Mux(sqrtExpIsOdd, Cat(0.U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
     Cat(rawA_S.sig(sigWidth - 1, 0), 0.U(2.W)))
 
   val SqrtModule = Module(new SquareRoot(2, 2, sigWidth+2, sigWidth+2))
   SqrtModule.input.bits.operand := sqrtFractIn
   SqrtModule.input.valid := input.valid && input.bits.sqrt && normalCase_S_sqrt
 
-  val rbits_sqrt = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
+  val rbits_sqrt      = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
   val sigToRound_sqrt = SqrtModule.output.bits.result(24, 2)
 
 
@@ -124,28 +122,28 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     *
     * */
   val needRightShift = !divModule.output.bits.quotient(27)
-  val sigToRound_div = Mux(needRightShift, divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
+  val sigToRound_div = Mux(needRightShift,
+    divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
     divModule.output.bits.quotient(calWidth - 2, calWidth - sigWidth))
   val rbits_div = Mux(needRightShift, divModule.output.bits.quotient(calWidth - sigWidth - 2) ## divModule.output.bits.reminder.orR,
     divModule.output.bits.quotient(calWidth - sigWidth - 1) ## divModule.output.bits.reminder.orR)
 
-
   // collect sig result
-  val sigToRound = Mux(opSqrtReg, sigToRound_sqrt, sigToRound_div)
+  val sigToRound   = Mux(opSqrtReg, sigToRound_sqrt, sigToRound_div)
   val rbitsToRound = Mux(opSqrtReg, rbits_sqrt, rbits_div)
 
   // exp logic
   val expStoreNext,expToRound = Wire(UInt((expWidth+2).W))
-  /** expStore
-    *
-    * output is 10bits SInt
+  /** expStore is 10bits SInt
     *
     * for sqrt
     * expForSqrt(7,0) effective is 8bits, MSB is sign
     * extends 2 sign bit in MSB
     * expStoreNext = 10bits
     *
-    * todo define it format, important
+    * for div
+    * rawA_S.sExp - rawB_S.sExp
+    *
     */
   expStoreNext := Mux(input.bits.sqrt,
     Cat(expForSqrt(7),expForSqrt(7),expForSqrt(7,0)),
@@ -168,7 +166,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   output.bits.result := roundresult(0)
   output.bits.exceptionFlags := roundresult(1)
 
-  input.ready := divModule.input.ready && SqrtModule.input.ready
+  input.ready  := divModule.input.ready  && SqrtModule.input.ready
   output.valid := divModule.output.valid || SqrtModule.output.valid || fastValid
 }
 
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index a1abbb1..b01298a 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -5,10 +5,9 @@ import chisel3.util._
 
 
 /**
-  * exp is 10bits SInt, MSB is sign
-  *
-  *
-  * */
+  * exp: 10bits SInt, MSB is sign
+  * sig: 23bits
+  */
 class RoundingUnit extends Module{
   val input = IO(Input(new Bundle{
     val invalidExc = Bool() // overrides 'infiniteExc' and 'in'
@@ -47,29 +46,30 @@ class RoundingUnit extends Module{
 
   // control logic
   // set to 126 according to softfloat
-  // todo: merge it with normal case
   val exp_ForSub = (input.exp + 126.S(10.W))
   // for non subnormal case, Dist = 0
   val subnormDist = Mux(common_underflow, -exp_ForSub, 0.S(10.W))
-  // todo why we have this case? IN IEEE754 or definded by Hardfloat?
+  // todo why we have this case? IN IEEE754 or definded in Hardfloat?
   val common_totalUnderflow = subnormDist > 235.S
 
-  //-----------------subnormal loggic---------------
-
+  /** subnormal logic
+    *
+    * roundMask is 26bits mask selecting all bits will be rounded, considering subnormal case
+    *
+    */
   val distGT32 = subnormDist(9,5).orR
   val allMask = ((-1).S(31.W) << 31 >> subnormDist(5,0))
   val distIn24And31 = allMask(6,0).orR
-  // subnorm case when Dist>24
   val distGT24 = (distGT32 || distIn24And31) && common_underflow
   val roundMask = Mux(!distGT24, Reverse(allMask(30,7)) ## 3.U(2.W), 0.U(26.W))
 
   val shiftedRoundMask = Mux(!distGT24, 0.U(1.W) ## roundMask >> 1 , BigInt(-1).S(26.W).asUInt)
-  /** select the first bit need to be  rounded */
+  /** select the guard bit need to be  rounded */
   val roundPosMask = ~shiftedRoundMask & roundMask
 
   val adjustedSig = Cat(1.U(1.W), input.sig, input.rBits)
   val roundPosBit = (adjustedSig & roundPosMask).orR
-  /** Any bits is one after guard bit */
+  /** Any bits is one after guard bit  => sticky bit */
   val anyRoundExtra = (adjustedSig & shiftedRoundMask).orR
   /** Any bits is one containing guard bit */
   val anyRound = roundPosBit || anyRoundExtra
@@ -88,22 +88,22 @@ class RoundingUnit extends Module{
 
   /** sig_afterInc doesn;t cover distEQ24 */
   sub_sigOut := Mux(distGT24 || distEQ24 ,Mux(sigIncr,1.U(26.W), 0.U(26.W)),(sig_afterInc >> subnormDist(4,0))(24,2))
-  /** when subnormDist===1.S, there may be expInc*/
+  /** when subnormDist===1.S, there may be expInc */
   expInc := sig_afterInc(26)  && (!common_underflow || subnormDist === 1.S )
   common_subnormSigOut := Mux(common_totalUnderflow, 0.U ,sub_sigOut )
 
   val sigIncrement = Mux(sigIncr,lastBitMask, 0.U(26.W))
   sig_afterInc := adjustedSig +& sigIncrement
 
-  // Exceptions
+  /** Exceptions output */
   val isNaNOut = input.invalidExc || input.isNaN
   val notNaN_isSpecialInfOut = (input.infiniteExc || input.isInf) && (!input.invalidExc) && (!input.isNaN)
   val notNaN_isZero = input.isZero && !isNaNOut
   val commonCase = !isNaNOut && !notNaN_isSpecialInfOut && !input.isZero
 
-  val overflow = commonCase && common_overflow
+  val overflow  = commonCase && common_overflow
   val underflow = commonCase && (common_underflow && rbits.orR)
-  val inexact = overflow || (commonCase && common_inexact)
+  val inexact   = overflow || (commonCase && common_inexact)
 
   val isZero = input.isZero && underflow
 
@@ -118,21 +118,20 @@ class RoundingUnit extends Module{
     )
   )
 
-  // exception data with Spike
+  /** qNaN in Spike */
   val quietNaN = "h7FC00000".U
 
   val infiniteOut = Cat(input.sign, "h7F800000".U)
   val zeroOut = Cat(input.sign, 0.U(31.W))
   val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## notNaN_isZero
 
-  /** @todo opt it using hardfloat methods
+  /** common_overflow = input.exp > 127.S
     *
-    * overflow : > 127
-    * */
-
-  common_overflow := input.exp(8,7).orR && !input.exp(9)
+    * @todo opt it using hardfloat methods?
+    */
+  common_overflow  := input.exp(8,7).orR && !input.exp(9)
   common_underflow := exp_ForSub(9)
-  common_inexact := input.rBits.orR || (common_underflow && rbits.orR)
+  common_inexact   := anyRound
 
   val common_sigOut = sig_afterInc(24,2)
   val common_expOut = expBiasedOut

From 55c1b7e1de48b846844af605a809bb400c5c8bb0 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 28 Aug 2023 12:48:54 +0800
Subject: [PATCH 076/109] [doc] add doc to divsqrt

---
 arithmetic/src/float/DivSqrt.scala | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 60812f3..3d31d2e 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -5,6 +5,29 @@ import chisel3.util._
 import division.srt.srt16._
 import sqrt._
 
+/**
+  * DIV
+  * input
+  * {{{
+  * dividend = 0.1f  -> 1f +"00000" right extends to 32
+  * divisor  = 0.1f  -> 1f +"00000" right extends to 32
+  * }}}
+  *
+  * output = 0.01f or 0.1f, LSB 28bits effective
+  * {{{
+  * 0.01f: 28bits=01f f=sig=select(25,3)
+  * 0.1f : 28bits=1f  f=sig=select(26,4)
+  * }}}
+  *
+  * SQRT
+  * {{{
+  * expLSB   rawExpLSB    Sig             SigIn     expOut
+  *      0           1    1.xxxx>>2<<1    1xxxx0    rawExp/2 +1 + bias
+  *      1           0    1.xxxx>>2       01xxxx    rawExp/2 +1 + bias
+  * }}}
+  *
+  *
+  */
 class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val fpWidth = expWidth + sigWidth
   val calWidth = 28

From 6677f04f89013feef57c804a14484d2b2bb6564c Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 28 Aug 2023 12:53:14 +0800
Subject: [PATCH 077/109] [doc] clean

---
 arithmetic/src/float/DivSqrtTester.scala | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 arithmetic/src/float/DivSqrtTester.scala

diff --git a/arithmetic/src/float/DivSqrtTester.scala b/arithmetic/src/float/DivSqrtTester.scala
deleted file mode 100644
index 9e8be3b..0000000
--- a/arithmetic/src/float/DivSqrtTester.scala
+++ /dev/null
@@ -1,10 +0,0 @@
-package float
-
-import chisel3._
-import chisel3.util._
-import division.srt.srt16._
-import sqrt._
-
-class DivSqrtTester extends Module{
-
-}
\ No newline at end of file

From bedf206e61639512db04b1cb8b88ba072c8db855 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 20 Sep 2023 12:40:06 +0800
Subject: [PATCH 078/109] [doc] add doc for divsqrt

---
 arithmetic/src/float/DivFloat.scala           | 112 ------------------
 arithmetic/src/float/DivSqrt.scala            |  19 ++-
 arithmetic/src/float/RoundingUnit.scala       |  41 ++++---
 arithmetic/src/float/SqrtFloat.scala          |  94 ---------------
 .../tests/src/float/DivFloatTester.scala      |  92 --------------
 .../tests/src/float/SqrtFloatTester.scala     |  86 --------------
 6 files changed, 32 insertions(+), 412 deletions(-)
 delete mode 100644 arithmetic/src/float/DivFloat.scala
 delete mode 100644 arithmetic/src/float/SqrtFloat.scala
 delete mode 100644 arithmetic/tests/src/float/DivFloatTester.scala
 delete mode 100644 arithmetic/tests/src/float/SqrtFloatTester.scala

diff --git a/arithmetic/src/float/DivFloat.scala b/arithmetic/src/float/DivFloat.scala
deleted file mode 100644
index 55da882..0000000
--- a/arithmetic/src/float/DivFloat.scala
+++ /dev/null
@@ -1,112 +0,0 @@
-package float
-
-import chisel3._
-import chisel3.util._
-import division.srt.srt16._
-
-
-/**
-  * input
-  * {{{
-  * dividend = 0.1f  -> 1f +"00000" right extends to 32
-  * divisor  = 0.1f  -> 1f +"00000" right extends to 32
-  * }}}
-  *
-  * output = 0.01f or 0.1f, LSB 28bits effective
-  * {{{
-  * 0.01f: 28bits=01f f=sig=select(25,3)
-  * 0.1f : 28bits=1f  f=sig=select(26,4)
-  * }}}
-  *
-  *
-  * */
-class DivFloat(expWidth: Int, sigWidth: Int) extends Module{
-  val fpWidth = expWidth + sigWidth
-  val calWidth = 28
-  val input = IO(Flipped(DecoupledIO(new FloatDivInput(8, 24))))
-  val output = IO(ValidIO(new FloatDivOutput(8, 24)))
-
-
-  // for div, don't need to calculate rawExp
-  val rawA_S = rawFloatFromFN(expWidth,sigWidth,input.bits.dividend)
-  val rawB_S  = rawFloatFromFN(expWidth,sigWidth,input.bits.divisor)
-
-
-
-  // Data Path
-  val dividendIn = Wire(UInt((fpWidth).W))
-  val divisorIn = Wire(UInt((fpWidth).W))
-  val expStoreNext = Wire(UInt(expWidth.W))
-  val expToRound = Wire(UInt(expWidth.W))
-
-  val sign = rawA_S.sign ^ rawB_S.sign
-  val signReg = RegEnable(sign, input.fire)
-
-  // divIter logic
-
-  dividendIn := Cat(1.U(1.W), rawA_S.sig(sigWidth-2, 0), 0.U(expWidth.W))
-  divisorIn  := Cat(1.U(1.W), rawB_S.sig(sigWidth-2, 0), 0.U(expWidth.W))
-
-  val divModule = Module(new SRT16(fpWidth,fpWidth,fpWidth))
-  divModule.input.bits.dividend := dividendIn
-  divModule.input.bits.divider  := divisorIn
-  divModule.input.bits.counter  := 8.U
-
-  divModule.input.valid := input.valid
-  input.ready  := divModule.input.ready
-  output.valid := divModule.output.valid
-
-
-  val needNormNext = input.bits.divisor(sigWidth-2, 0) > input.bits.dividend(sigWidth-2, 0)
-  val needNorm = RegEnable(needNormNext,input.fire)
-
-  expStoreNext := input.bits.dividend(fpWidth-1, sigWidth-1) - input.bits.divisor(fpWidth-1, sigWidth-1)
-  val expStore = RegEnable(expStoreNext, 0.U(expWidth.W), input.fire)
-  expToRound := expStore - needNorm
-
-
-  val sigToRound = Mux(needNorm, divModule.output.bits.quotient(calWidth-3, calWidth-sigWidth-1),
-    divModule.output.bits.quotient(calWidth-2, calWidth-sigWidth))
-  val rbits      = Mux(needNorm, divModule.output.bits.quotient(calWidth-sigWidth-2)##1.U(1.W),
-    divModule.output.bits.quotient(calWidth-sigWidth-1)##1.U(1.W))
-
-
-  /** @todo exceptions
-    *
-    *       >256
-    *       subnormal
-    * */
-
-  val invalidExec = false.B
-  val infinitExec = false.B
-
-
-
-  val roundresult = RoundingUnit(
-    signReg,
-    expToRound.asSInt,
-    sigToRound,
-    rbits,
-    consts.round_near_even,
-    invalidExec,
-    infinitExec,
-    false.B,
-    false.B,
-    false.B)
-
-  output.bits.result := roundresult(0)
-  output.bits.sig := output.bits.result(sigWidth-2, 0)
-  output.bits.exp := output.bits.result(fpWidth-1, sigWidth-1)
-
-
-}
-class FloatDivInput(expWidth: Int, sigWidth: Int) extends Bundle() {
-  val dividend = UInt((expWidth + sigWidth).W)
-  val divisor  = UInt((expWidth + sigWidth).W)
-}
-
-class FloatDivOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
-  val result = UInt((expWidth + sigWidth).W)
-  val sig = UInt((sigWidth-1).W)
-  val exp = UInt(expWidth.W)
-}
\ No newline at end of file
diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 3d31d2e..298fc79 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -5,8 +5,7 @@ import chisel3.util._
 import division.srt.srt16._
 import sqrt._
 
-/**
-  * DIV
+/** DIV
   * input
   * {{{
   * dividend = 0.1f  -> 1f +"00000" right extends to 32
@@ -25,8 +24,6 @@ import sqrt._
   *      0           1    1.xxxx>>2<<1    1xxxx0    rawExp/2 +1 + bias
   *      1           0    1.xxxx>>2       01xxxx    rawExp/2 +1 + bias
   * }}}
-  *
-  *
   */
 class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val fpWidth = expWidth + sigWidth
@@ -98,11 +95,11 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     * sExp first 2 bits
     * 00 -> 10 (subnormal)
     * 01 -> 11 (true exp negative)
-    * 10 -> 00 (true exp positive)}}}
-    *
+    * 10 -> 00 (true exp positive)
+    * }}}
     */
   val expfirst2 = UIntToOH(rawA_S.sExp(expWidth, expWidth-1))
-  /** @todo expfirst2(3) never happens */
+  /** expfirst2(3) never happens */
   val expstart  = Mux1H(
     Seq(
       expfirst2(0) -> "b10".U,
@@ -139,11 +136,12 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
   /** collect div sig result
     *
+    * {{{
     * when B_sig > A_sig
     * divout = 0000,01xxx
     * exp need decrease by 1
-    *
-    * */
+    * }}}
+    */
   val needRightShift = !divModule.output.bits.quotient(27)
   val sigToRound_div = Mux(needRightShift,
     divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
@@ -160,13 +158,14 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   /** expStore is 10bits SInt
     *
     * for sqrt
+    * {{{
     * expForSqrt(7,0) effective is 8bits, MSB is sign
     * extends 2 sign bit in MSB
     * expStoreNext = 10bits
+    * }}}
     *
     * for div
     * rawA_S.sExp - rawB_S.sExp
-    *
     */
   expStoreNext := Mux(input.bits.sqrt,
     Cat(expForSqrt(7),expForSqrt(7),expForSqrt(7,0)),
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index b01298a..ab8220f 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -37,13 +37,12 @@ class RoundingUnit extends Module{
   val common_inexact  = Wire(Bool())
 
   val sigIncr = Wire(Bool())
-  val expBiasedOut = Wire(UInt(8.W))
+  val common_expOut = Wire(UInt(8.W))
+  val common_sigOut = Wire(UInt(23.W))
   val sig_afterInc = Wire(UInt(27.W))
   val sub_sigOut, common_subnormSigOut = Wire(UInt(23.W))
   val expInc = Wire(UInt(8.W))
 
-  expBiasedOut := ((input.exp + 127.S)(7,0) + expInc).asUInt
-
   // control logic
   // set to 126 according to softfloat
   val exp_ForSub = (input.exp + 126.S(10.W))
@@ -52,47 +51,53 @@ class RoundingUnit extends Module{
   // todo why we have this case? IN IEEE754 or definded in Hardfloat?
   val common_totalUnderflow = subnormDist > 235.S
 
+  /** contains the hidden 1 and rBits */
+  val adjustedSig = Cat(1.U(1.W), input.sig, input.rBits)
+
   /** subnormal logic
     *
     * roundMask is 26bits mask selecting all bits will be rounded, considering subnormal case
-    *
     */
   val distGT32 = subnormDist(9,5).orR
   val allMask = ((-1).S(31.W) << 31 >> subnormDist(5,0))
   val distIn24And31 = allMask(6,0).orR
   val distGT24 = (distGT32 || distIn24And31) && common_underflow
+  /** last 2 bits is rbits, always 1s */
   val roundMask = Mux(!distGT24, Reverse(allMask(30,7)) ## 3.U(2.W), 0.U(26.W))
-
+  /** mask for all bits after guard bit */
   val shiftedRoundMask = Mux(!distGT24, 0.U(1.W) ## roundMask >> 1 , BigInt(-1).S(26.W).asUInt)
-  /** select the guard bit need to be  rounded */
+  /** select the guard bit need to be rounded */
   val roundPosMask = ~shiftedRoundMask & roundMask
-
-  val adjustedSig = Cat(1.U(1.W), input.sig, input.rBits)
   val roundPosBit = (adjustedSig & roundPosMask).orR
   /** Any bits is one after guard bit  => sticky bit */
   val anyRoundExtra = (adjustedSig & shiftedRoundMask).orR
   /** Any bits is one containing guard bit */
   val anyRound = roundPosBit || anyRoundExtra
 
-  val lastBitMask = (roundPosMask<<1.U)(25,0)
+  /** the last effective bit */
+  val lastBitMask = (roundPosMask << 1.U)(25,0)
   val lastBit = (adjustedSig & lastBitMask ).orR
 
   val distEQ24 = roundPosMask(25) && !roundPosMask(24,0).orR
-
+  /** 2 bits for final rounding */
   val rbits : UInt= Cat(roundPosBit, anyRoundExtra)
 
   sigIncr := (roundingMode_near_even && (rbits.andR || (lastBit && rbits==="b10".U))) ||
-    (roundingMode_min && input.sign && rbits.orR) ||
-    (roundingMode_max && !input.sign && rbits.orR) ||
+    (roundingMode_min &&  input.sign &&  rbits.orR) ||
+    (roundingMode_max && !input.sign &&  rbits.orR) ||
     (roundingMode_near_maxMag && rbits(1))
 
-  /** sig_afterInc doesn;t cover distEQ24 */
-  sub_sigOut := Mux(distGT24 || distEQ24 ,Mux(sigIncr,1.U(26.W), 0.U(26.W)),(sig_afterInc >> subnormDist(4,0))(24,2))
+  /** sig_afterInc won't cover distEQ24 */
+  sub_sigOut := Mux(
+    distGT24 || distEQ24,
+    Mux(sigIncr, 1.U(23.W), 0.U(23.W)),
+    (sig_afterInc >> subnormDist(4,0))(24,2))
   /** when subnormDist===1.S, there may be expInc */
   expInc := sig_afterInc(26)  && (!common_underflow || subnormDist === 1.S )
-  common_subnormSigOut := Mux(common_totalUnderflow, 0.U ,sub_sigOut )
+  common_subnormSigOut := Mux(common_totalUnderflow, 0.U, sub_sigOut )
 
-  val sigIncrement = Mux(sigIncr,lastBitMask, 0.U(26.W))
+  /** conforms to last bit position */
+  val sigIncrement = Mux(sigIncr, lastBitMask, 0.U(26.W))
   sig_afterInc := adjustedSig +& sigIncrement
 
   /** Exceptions output */
@@ -133,8 +138,8 @@ class RoundingUnit extends Module{
   common_underflow := exp_ForSub(9)
   common_inexact   := anyRound
 
-  val common_sigOut = sig_afterInc(24,2)
-  val common_expOut = expBiasedOut
+  common_sigOut := sig_afterInc(24,2)
+  common_expOut := ((input.exp + 127.S)(7,0) + expInc).asUInt
 
 
   val common_out = Mux(common_overflow, common_infiniteOut,
diff --git a/arithmetic/src/float/SqrtFloat.scala b/arithmetic/src/float/SqrtFloat.scala
deleted file mode 100644
index 32de581..0000000
--- a/arithmetic/src/float/SqrtFloat.scala
+++ /dev/null
@@ -1,94 +0,0 @@
-package float
-
-import chisel3._
-import chisel3.util._
-import sqrt._
-
-/**
-  *
-  * @todo Opt for zero
-  *       input is Subnormal!
-  *
-  * */
-class SqrtFloat(expWidth: Int, sigWidth: Int) extends Module{
-  class FloatSqrtInput(expWidth: Int, sigWidth: Int) extends Bundle() {
-    val oprand = UInt((expWidth + sigWidth).W)
-  }
-
-  /** add 2 for rounding */
-  class FloatSqrtOutput(expWidth: Int, sigWidth: Int) extends Bundle() {
-    val result = UInt((expWidth + sigWidth).W)
-    val sig = UInt((sigWidth + 2).W)
-    val exp = UInt(expWidth.W)
-
-    //  val exceptionFlags = UInt(5.W)
-  }
-
-  val input = IO(Flipped(DecoupledIO(new FloatSqrtInput(expWidth, sigWidth))))
-  val output = IO(ValidIO(new FloatSqrtOutput(expWidth, sigWidth)))
-  val rawFloatIn = rawFloatFromFN(expWidth,sigWidth,input.bits.oprand)
-
-  /** Control path */
-  val isNegaZero = rawFloatIn.isZero && rawFloatIn.sign
-  val isPosiInf  = rawFloatIn.isInf  && rawFloatIn.sign
-
-  val fastWorking = RegInit(false.B)
-  val fastCase = Wire(Bool())
-
-  /** negative or NaN*/
-  val invalidExec = (rawFloatIn.sign && !isNegaZero) || rawFloatIn.isNaN
-  /** positive inf */
-  val infinitExec = isPosiInf
-
-  fastCase := invalidExec || infinitExec
-  fastWorking := input.fire && fastCase
-
-
-
-  /** Data path
-    *
-    * {{{
-    * expLSB   rawExpLSB    Sig             SigIn     expOut
-    *      0           1    1.xxxx>>2<<1    1xxxx0    rawExp/2 +1 + bias
-    *      1           0    1.xxxx>>2       01xxxx    rawExp/2 +1 + bias
-    * }}}
-    *
-    */
-
-  val adjustedExp = Cat(rawFloatIn.sExp(expWidth-1), rawFloatIn.sExp(expWidth-1, 0))
-  val expStore = RegEnable(adjustedExp(expWidth,1), 0.U(expWidth.W), input.fire)
-  val expToRound = expStore
-
-  val sqrtExIsEven = input.bits.oprand(sigWidth - 1)
-  val fractIn = Mux(sqrtExIsEven, Cat("b0".U(1.W),rawFloatIn.sig(sigWidth-1, 0),0.U(1.W)),
-    Cat(rawFloatIn.sig(sigWidth-1, 0),0.U(2.W)))
-
-  val SqrtModule = Module(new SquareRoot(2, 2, 26, 26))
-  SqrtModule.input.valid := input.valid && !fastCase
-  SqrtModule.input.bits.operand := fractIn
-
-  val rbits = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
-  val sigforRound = SqrtModule.output.bits.result(24,2)
-
-
-  input.ready := SqrtModule.input.ready
-  val roundresult = RoundingUnit(
-    input.bits.oprand(expWidth + sigWidth-1) ,
-    expToRound.asSInt,
-    sigforRound,
-    rbits,
-    consts.round_near_even,
-    invalidExec,
-    infinitExec,
-    false.B,
-    false.B,
-    false.B
-  )
-  output.bits.result := roundresult(0)
-  output.bits.sig := output.bits.result(sigWidth-2, 0)
-  output.bits.exp := output.bits.result(30,23)
-  output.valid := SqrtModule.output.valid || fastWorking
-
-}
-
-
diff --git a/arithmetic/tests/src/float/DivFloatTester.scala b/arithmetic/tests/src/float/DivFloatTester.scala
deleted file mode 100644
index 8667d39..0000000
--- a/arithmetic/tests/src/float/DivFloatTester.scala
+++ /dev/null
@@ -1,92 +0,0 @@
-package float
-
-import chisel3._
-import chiseltest._
-import utest._
-
-import scala.util.Random
-
-import division.srt.srt16._
-
-object DivFloatTester extends TestSuite with ChiselUtestTester {
-  def tests: Tests = Tests {
-    test("Div Float should pass") {
-      def testcase(width: Int): Unit = {
-        def extendTofull(input:String, width:Int) =(Seq.fill(width - input.length)("0").mkString("") + input)
-        val n:         Int = width
-
-        val xFloat = (Random.nextInt(100000) + Random.nextFloat() ).toFloat
-        val dFloat = (Random.nextInt(100000) + Random.nextFloat() ).toFloat
-
-        val xFloatString = extendTofull(java.lang.Float.floatToIntBits(xFloat).toBinaryString, 32)
-        val dFloatString = extendTofull(java.lang.Float.floatToIntBits(dFloat).toBinaryString, 32)
-        val xInput = "b"+xFloatString
-        val dInput = "b"+dFloatString
-
-        val qDouble = xFloat / dFloat
-        val q = qDouble.toFloat
-        val qFloatString = extendTofull(java.lang.Float.floatToIntBits(q).toBinaryString, 32)
-        val sig_Expect = qFloatString.substring(9, 32)
-        val exp_Expect = qFloatString.substring(1, 9)
-
-        // test
-        testCircuit(
-          new DivFloat(8,24),
-          Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
-        ) { dut: DivFloat =>
-          dut.clock.setTimeout(0)
-          dut.input.valid.poke(true.B)
-          dut.input.bits.dividend.poke((xInput).U)
-          dut.input.bits.divisor.poke((dInput).U)
-          dut.clock.step()
-          dut.input.valid.poke(false.B)
-          var flag = false
-          for (a <- 1 to 1000 if !flag) {
-            if (dut.output.valid.peek().litValue == 1) {
-              flag = true
-
-              val sig_Actual = extendTofull(dut.output.bits.sig.peek().litValue.toString(2),23)
-              val exp_Actual = extendTofull(dut.output.bits.exp.peek().litValue.toString(2),8)
-              val result_Actual = extendTofull(dut.output.bits.result.peek().litValue.toString(2),32)
-
-              val expInt_Actual = Integer.parseInt(exp_Actual,2)
-              val expInt_Expect = Integer.parseInt(exp_Expect,2)
-
-              def printvalue(): Unit = {
-
-                println(xFloat.toString + "/ " + dFloat.toString + "="+ qDouble.toString)
-                println("expInt_Actual = " + expInt_Actual)
-                println("expInt_Expect = " + expInt_Expect)
-
-//                println("all q = " + quotient_actual)
-//                println("all q size ="+ quotient_actual.length.toString)
-
-                println("sig_expect = " + sig_Expect)
-                println("sig_actual = " + sig_Actual)
-
-              }
-              if((sig_Expect != sig_Actual)|| (exp_Actual != exp_Expect)||(result_Actual != qFloatString)){
-                printvalue()
-                utest.assert(sig_Expect == sig_Actual)
-                utest.assert(exp_Expect == exp_Actual)
-                utest.assert(result_Actual != qFloatString)
-              }
-
-
-
-            }
-            dut.clock.step()
-          }
-          utest.assert(flag)
-          dut.clock.step(scala.util.Random.nextInt(10))
-        }
-      }
-
-
-      for (i <- 1 to 100) {
-        testcase(32)
-      }
-
-    }
-  }
-}
\ No newline at end of file
diff --git a/arithmetic/tests/src/float/SqrtFloatTester.scala b/arithmetic/tests/src/float/SqrtFloatTester.scala
deleted file mode 100644
index bdaa937..0000000
--- a/arithmetic/tests/src/float/SqrtFloatTester.scala
+++ /dev/null
@@ -1,86 +0,0 @@
-package float
-
-import chisel3._
-import chiseltest._
-import utest._
-import scala.util.{Random}
-import scala.math._
-
-object SqrtFloatTester extends TestSuite with ChiselUtestTester {
-  def tests: Tests = Tests {
-    test("Sqrt Float FP32 should pass") {
-      def testcase(): Unit = {
-        def extendTofull(input:String, width:Int) =(Seq.fill(width - input.length)("0").mkString("") + input)
-        val oprandFloat:  Float = Random.nextInt(1000000)+Random.nextFloat()
-        val oprandDouble: Double = oprandFloat.toDouble
-
-        val oprandString = extendTofull(java.lang.Float.floatToIntBits(oprandFloat).toBinaryString,32)
-        val oprandSigString = oprandString.substring(9, 32)
-
-        val oprandInput = "b"+ oprandString
-
-        val t = sqrt(oprandDouble)
-        val tFloatString = extendTofull(java.lang.Float.floatToIntBits(t.toFloat).toBinaryString,32)
-        // 0.xxxxxx,   hidden 1+23bits + 2bits for round
-        val sigExpect =   tFloatString.substring(9,32)
-        val expExpect =   tFloatString.substring(1,9)
-
-        // test
-        testCircuit(
-          new SqrtFloat(8,24),
-          Seq(chiseltest.internal.NoThreadingAnnotation, chiseltest.simulator.WriteVcdAnnotation)
-        ) { dut: SqrtFloat =>
-          dut.clock.setTimeout(0)
-          dut.input.valid.poke(true.B)
-          dut.input.bits.oprand.poke(oprandInput.U)
-          dut.clock.step()
-          dut.input.valid.poke(false.B)
-          var flag = false
-          for (i <- 0 to 1000 if !flag) {
-            if (dut.output.valid.peek().litValue == 1) {
-              flag = true
-              val resultActual = extendTofull(dut.output.bits.result.peek().litValue.toString(2),32)
-              val sigActual = extendTofull(dut.output.bits.sig.peek().litValue.toString(2),23)
-              val expActual = extendTofull(dut.output.bits.exp.peek().litValue.toString(2),8)
-
-              def printValue() :Unit = {
-                println(oprandFloat.toString + ".sqrtx = " + t.toString)
-                println("input = " + oprandInput)
-                println("exp_expect = " + expExpect)
-                println("exp_actual = " + expActual)
-                println("sig_expect = " + sigExpect)
-                println("sig_actual = " + sigActual)
-                println("result_expect = " + tFloatString)
-                println("result_actual = " + resultActual)
-              }
-
-
-              if(sigExpect != sigActual ){
-                printValue()
-                utest.assert(sigExpect  == sigActual)
-              }
-
-              if (expActual != expExpect) {
-                printValue()
-                utest.assert(expActual ==expExpect)
-              }
-
-              if(resultActual != tFloatString) {
-                printValue()
-                utest.assert(resultActual == tFloatString)
-              }
-
-            } else
-              dut.clock.step()
-          }
-          utest.assert(flag)
-        }
-      }
-
-      for (i <- 1 to 100) {
-        testcase()
-      }
-
-    }
-  }
-}
\ No newline at end of file

From e96ec7c41df3e086002ac83c8b58e9220e693db9 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 20 Sep 2023 12:50:34 +0800
Subject: [PATCH 079/109] [doc] add doc for roundingUnit

---
 arithmetic/src/float/RoundingUnit.scala | 31 ++++++++++++++-----------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index ab8220f..e72259b 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -3,10 +3,18 @@ package float
 import chisel3._
 import chisel3.util._
 
-
-/**
+/** RoundingUnit for all cases
+  *
+  * functions
+  * {{{
+  *   add bias to exp
+  *   deal with exceptions and produce flags
+  *   do rounding
+  *   construct FP32 output if result is subnormal
+  * }}}
+  *
   * exp: 10bits SInt, MSB is sign
-  * sig: 23bits
+  * sig: 23bits UInt
   */
 class RoundingUnit extends Module{
   val input = IO(Input(new Bundle{
@@ -54,24 +62,24 @@ class RoundingUnit extends Module{
   /** contains the hidden 1 and rBits */
   val adjustedSig = Cat(1.U(1.W), input.sig, input.rBits)
 
-  /** subnormal logic
-    *
-    * roundMask is 26bits mask selecting all bits will be rounded, considering subnormal case
-    */
+  // rounding logic
   val distGT32 = subnormDist(9,5).orR
   val allMask = ((-1).S(31.W) << 31 >> subnormDist(5,0))
   val distIn24And31 = allMask(6,0).orR
   val distGT24 = (distGT32 || distIn24And31) && common_underflow
-  /** last 2 bits is rbits, always 1s */
+  /** 26bits mask selecting all bits will be rounded, considering subnormal case
+    *
+    * last 2 bits is rbits, always 1s
+    */
   val roundMask = Mux(!distGT24, Reverse(allMask(30,7)) ## 3.U(2.W), 0.U(26.W))
   /** mask for all bits after guard bit */
   val shiftedRoundMask = Mux(!distGT24, 0.U(1.W) ## roundMask >> 1 , BigInt(-1).S(26.W).asUInt)
   /** select the guard bit need to be rounded */
   val roundPosMask = ~shiftedRoundMask & roundMask
   val roundPosBit = (adjustedSig & roundPosMask).orR
-  /** Any bits is one after guard bit  => sticky bit */
+  /** Any bit is one after guard bit => sticky bit */
   val anyRoundExtra = (adjustedSig & shiftedRoundMask).orR
-  /** Any bits is one containing guard bit */
+  /** Any bit is one containing guard bit */
   val anyRound = roundPosBit || anyRoundExtra
 
   /** the last effective bit */
@@ -110,8 +118,6 @@ class RoundingUnit extends Module{
   val underflow = commonCase && (common_underflow && rbits.orR)
   val inexact   = overflow || (commonCase && common_inexact)
 
-  val isZero = input.isZero && underflow
-
   val overflowSele = roundingMode_min ## roundingMode_max ## roundingMode_toZero ## (roundingMode_near_even || roundingMode_near_maxMag)
 
   val common_infiniteOut = Mux1H(
@@ -122,7 +128,6 @@ class RoundingUnit extends Module{
       overflowSele(3) -> Mux(input.sign, "hFF800000".U(32.W), "h7F7FFFFF".U(32.W)),
     )
   )
-
   /** qNaN in Spike */
   val quietNaN = "h7FC00000".U
 

From 395168e0a1c8c5ee294bcd2ec7ba9fe0b9ed0379 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 20 Sep 2023 12:55:18 +0800
Subject: [PATCH 080/109] [build system] remove some ivys

---
 build.sc  | 2 --
 common.sc | 2 --
 2 files changed, 4 deletions(-)

diff --git a/build.sc b/build.sc
index 5061cab..69e00cd 100644
--- a/build.sc
+++ b/build.sc
@@ -33,8 +33,6 @@ trait Arithmetic
 
   def evilplotIvy = v.evilplot
 
-  def oslibIvy = v.oslib
-
   def chiselModule = None
 
   def chiselPluginJar = None
diff --git a/common.sc b/common.sc
index 3efeccd..58ae3a3 100644
--- a/common.sc
+++ b/common.sc
@@ -33,8 +33,6 @@ trait ArithmeticModule
 
   def evilplotIvy: T[Dep]
 
-  def oslibIvy: T[Dep]
-
   override def ivyDeps = T(super.ivyDeps() ++ Seq(spireIvy(), evilplotIvy()))
 }
 

From 514b6ab470a2c96921831ce762da70ad349cb45b Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 20 Sep 2023 13:30:01 +0800
Subject: [PATCH 081/109] [reformat] reformat

---
 arithmetic/src/float/DivSqrt.scala      | 99 ++++++++++++-------------
 arithmetic/src/float/RoundingUnit.scala | 84 ++++++++++-----------
 2 files changed, 91 insertions(+), 92 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 298fc79..9737f10 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -34,59 +34,58 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val opSqrtReg       = RegEnable(input.bits.sqrt        , false.B, input.fire)
   val roundingModeReg = RegEnable(input.bits.roundingMode, 0.U    , input.fire)
 
-  val rawA_S = rawFloatFromFN(expWidth, sigWidth, input.bits.a)
-  val rawB_S = rawFloatFromFN(expWidth, sigWidth, input.bits.b)
+  val rawA = rawFloatFromFN(expWidth, sigWidth, input.bits.a)
+  val rawB = rawFloatFromFN(expWidth, sigWidth, input.bits.b)
 
   // Exceptions
 
   /** inf/inf and 0/0  => NaN out */
-  val notSigNaNIn_invalidExc_S_div =
-    (rawA_S.isZero && rawB_S.isZero) || (rawA_S.isInf && rawB_S.isInf)
+  val divNotSigNaNInButInvalidExc =
+    (rawA.isZero && rawB.isZero) || (rawA.isInf && rawB.isInf)
   /** -Inf + -normal => NaN out */
-  val notSigNaNIn_invalidExc_S_sqrt =
-    !rawA_S.isNaN && !rawA_S.isZero && rawA_S.sign
+  val sqrtNotSigNaNInButInvalidExc =
+    !rawA.isNaN && !rawA.isZero && rawA.sign
   /** isSigNaNRawFloat detect signaling NaN */
-  val majorExc_S =
+  val majorExc =
     Mux(input.bits.sqrt,
-      isSigNaNRawFloat(rawA_S) || notSigNaNIn_invalidExc_S_sqrt,
-      isSigNaNRawFloat(rawA_S) || isSigNaNRawFloat(rawB_S) ||
-        notSigNaNIn_invalidExc_S_div ||
-        (!rawA_S.isNaN && !rawA_S.isInf && rawB_S.isZero)
+      isSigNaNRawFloat(rawA) || sqrtNotSigNaNInButInvalidExc,
+      isSigNaNRawFloat(rawA) || isSigNaNRawFloat(rawB) ||
+        divNotSigNaNInButInvalidExc ||
+        (!rawA.isNaN && !rawA.isInf && rawB.isZero)
     )
 
   /** all cases result in NaN output */
-  val isNaN_S =
+  val isNaN =
     Mux(input.bits.sqrt,
-      rawA_S.isNaN || notSigNaNIn_invalidExc_S_sqrt,
-      rawA_S.isNaN || rawB_S.isNaN || notSigNaNIn_invalidExc_S_div
+      rawA.isNaN || sqrtNotSigNaNInButInvalidExc,
+      rawA.isNaN || rawB.isNaN || divNotSigNaNInButInvalidExc
     )
-  val isInf_S  = Mux(input.bits.sqrt, rawA_S.isInf, rawA_S.isInf || rawB_S.isZero)
-  val isZero_S = Mux(input.bits.sqrt, rawA_S.isZero, rawA_S.isZero || rawB_S.isInf)
+  val isInf  = Mux(input.bits.sqrt, rawA.isInf, rawA.isInf || rawB.isZero)
+  val isZero = Mux(input.bits.sqrt, rawA.isZero, rawA.isZero || rawB.isInf)
 
-  val majorExc_Z = RegEnable(majorExc_S, false.B, input.fire)
-  val isNaN_Z    = RegEnable(isNaN_S   , false.B, input.fire)
-  val isInf_Z    = RegEnable(isInf_S   , false.B, input.fire)
-  val isZero_Z   = RegEnable(isZero_S  , false.B, input.fire)
+  val majorExcReg = RegEnable(majorExc, false.B, input.fire)
+  val isNaNReg    = RegEnable(isNaN   , false.B, input.fire)
+  val isInfReg    = RegEnable(isInf   , false.B, input.fire)
+  val isZeroReg   = RegEnable(isZero  , false.B, input.fire)
 
   /** invalid operation flag */
-  val invalidExec = majorExc_Z &&  isNaN_Z
+  val invalidExec = majorExcReg &&  isNaNReg
 
   /** DivideByZero flag */
-  val infinitExec = majorExc_Z && !isNaN_Z
+  val infinitExec = majorExcReg && !isNaNReg
 
-  val specialCaseA_S = rawA_S.isNaN || rawA_S.isInf || rawA_S.isZero
-  val specialCaseB_S = rawB_S.isNaN || rawB_S.isInf || rawB_S.isZero
-  val normalCase_S_div = !specialCaseA_S && !specialCaseB_S
-  val normalCase_S_sqrt = !specialCaseA_S && !rawA_S.sign
-  val normalCase_S = Mux(input.bits.sqrt, normalCase_S_sqrt, normalCase_S_div)
-  val specialCase_S = !normalCase_S
+  val specialCaseA = rawA.isNaN || rawA.isInf || rawA.isZero
+  val specialCaseB = rawB.isNaN || rawB.isInf || rawB.isZero
+  val normalCaseDiv = !specialCaseA && !specialCaseB
+  val normalCaseSqrt = !specialCaseA && !rawA.sign
+  val normalCase = Mux(input.bits.sqrt, normalCaseSqrt, normalCaseDiv)
+  val specialCase = !normalCase
 
   val fastValid = RegInit(false.B)
-  fastValid := specialCase_S && input.fire
-
+  fastValid := specialCase && input.fire
 
   // sign
-  val signNext = Mux(input.bits.sqrt, rawA_S.isZero && rawA_S.sign, rawA_S.sign ^ rawB_S.sign)
+  val signNext = Mux(input.bits.sqrt, rawA.isZero && rawA.sign, rawA.sign ^ rawB.sign)
   val signReg = RegEnable(signNext, input.fire)
 
   /** sqrt exp logic
@@ -98,7 +97,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     * 10 -> 00 (true exp positive)
     * }}}
     */
-  val expfirst2 = UIntToOH(rawA_S.sExp(expWidth, expWidth-1))
+  val expfirst2 = UIntToOH(rawA.sExp(expWidth, expWidth-1))
   /** expfirst2(3) never happens */
   val expstart  = Mux1H(
     Seq(
@@ -108,30 +107,30 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     )
   )
 
-  val expForSqrt = Cat(expstart, rawA_S.sExp(expWidth - 2, 0)) >> 1
-  val sqrtExpIsOdd = !rawA_S.sExp(0)
-  val sqrtFractIn = Mux(sqrtExpIsOdd, Cat(0.U(1.W), rawA_S.sig(sigWidth - 1, 0), 0.U(1.W)),
-    Cat(rawA_S.sig(sigWidth - 1, 0), 0.U(2.W)))
+  val expForSqrt = Cat(expstart, rawA.sExp(expWidth - 2, 0)) >> 1
+  val sqrtExpIsOdd = !rawA.sExp(0)
+  val sqrtFractIn = Mux(sqrtExpIsOdd, Cat(0.U(1.W), rawA.sig(sigWidth - 1, 0), 0.U(1.W)),
+    Cat(rawA.sig(sigWidth - 1, 0), 0.U(2.W)))
 
   val SqrtModule = Module(new SquareRoot(2, 2, sigWidth+2, sigWidth+2))
   SqrtModule.input.bits.operand := sqrtFractIn
-  SqrtModule.input.valid := input.valid && input.bits.sqrt && normalCase_S_sqrt
+  SqrtModule.input.valid := input.valid && input.bits.sqrt && normalCaseSqrt
 
-  val rbits_sqrt      = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
-  val sigToRound_sqrt = SqrtModule.output.bits.result(24, 2)
+  val rbitsSqrt      = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
+  val sigToRoundSqrt = SqrtModule.output.bits.result(24, 2)
 
 
   // divInput
   val fractDividendIn = Wire(UInt((fpWidth).W))
   val fractDivisorIn = Wire(UInt((fpWidth).W))
-  fractDividendIn := Cat(1.U(1.W), rawA_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
-  fractDivisorIn  := Cat(1.U(1.W), rawB_S.sig(sigWidth - 2, 0), 0.U(expWidth.W))
+  fractDividendIn := Cat(1.U(1.W), rawA.sig(sigWidth - 2, 0), 0.U(expWidth.W))
+  fractDivisorIn  := Cat(1.U(1.W), rawB.sig(sigWidth - 2, 0), 0.U(expWidth.W))
 
   val divModule = Module(new SRT16(fpWidth, fpWidth, fpWidth))
   divModule.input.bits.dividend := fractDividendIn
   divModule.input.bits.divider := fractDivisorIn
   divModule.input.bits.counter := 8.U
-  divModule.input.valid := input.valid && !input.bits.sqrt && normalCase_S_div
+  divModule.input.valid := input.valid && !input.bits.sqrt && normalCaseDiv
 
 
   /** collect div sig result
@@ -143,15 +142,15 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     * }}}
     */
   val needRightShift = !divModule.output.bits.quotient(27)
-  val sigToRound_div = Mux(needRightShift,
+  val sigToRoundDiv = Mux(needRightShift,
     divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
     divModule.output.bits.quotient(calWidth - 2, calWidth - sigWidth))
-  val rbits_div = Mux(needRightShift, divModule.output.bits.quotient(calWidth - sigWidth - 2) ## divModule.output.bits.reminder.orR,
+  val rbitsDiv = Mux(needRightShift, divModule.output.bits.quotient(calWidth - sigWidth - 2) ## divModule.output.bits.reminder.orR,
     divModule.output.bits.quotient(calWidth - sigWidth - 1) ## divModule.output.bits.reminder.orR)
 
   // collect sig result
-  val sigToRound   = Mux(opSqrtReg, sigToRound_sqrt, sigToRound_div)
-  val rbitsToRound = Mux(opSqrtReg, rbits_sqrt, rbits_div)
+  val sigToRound   = Mux(opSqrtReg, sigToRoundSqrt, sigToRoundDiv)
+  val rbitsToRound = Mux(opSqrtReg, rbitsSqrt, rbitsDiv)
 
   // exp logic
   val expStoreNext,expToRound = Wire(UInt((expWidth+2).W))
@@ -169,7 +168,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     */
   expStoreNext := Mux(input.bits.sqrt,
     Cat(expForSqrt(7),expForSqrt(7),expForSqrt(7,0)),
-    (rawA_S.sExp-rawB_S.sExp).asUInt)
+    (rawA.sExp-rawB.sExp).asUInt)
   val expStore = RegEnable(expStoreNext, 0.U((expWidth+2).W), input.fire)
   expToRound := Mux(opSqrtReg, expStore, expStore - needRightShift)
 
@@ -181,9 +180,9 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     roundingModeReg,
     invalidExec,
     infinitExec,
-    isNaN_Z,
-    isInf_Z,
-    isZero_Z)
+    isNaNReg,
+    isInfReg,
+    isZeroReg)
 
   output.bits.result := roundresult(0)
   output.bits.exceptionFlags := roundresult(1)
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index e72259b..007fb12 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -34,30 +34,30 @@ class RoundingUnit extends Module{
     val exceptionFlags = Output(Bits(5.W))
   }))
 
-  val roundingMode_near_even   = (input.roundingMode === consts.round_near_even)
-  val roundingMode_toZero      = (input.roundingMode === consts.round_minMag)
-  val roundingMode_min         = (input.roundingMode === consts.round_min)
-  val roundingMode_max         = (input.roundingMode === consts.round_max)
-  val roundingMode_near_maxMag = (input.roundingMode === consts.round_near_maxMag)
+  val rmRNE = (input.roundingMode === consts.round_near_even)
+  val rmRTZ = (input.roundingMode === consts.round_minMag)
+  val rmRDN = (input.roundingMode === consts.round_min)
+  val rmRUP = (input.roundingMode === consts.round_max)
+  val rmRMM = (input.roundingMode === consts.round_near_maxMag)
 
-  val common_overflow = Wire(Bool())
-  val common_underflow = Wire(Bool())
-  val common_inexact  = Wire(Bool())
+  val commonOverflow  = Wire(Bool())
+  val commonUnderflow = Wire(Bool())
+  val commonInexact   = Wire(Bool())
 
   val sigIncr = Wire(Bool())
-  val common_expOut = Wire(UInt(8.W))
-  val common_sigOut = Wire(UInt(23.W))
-  val sig_afterInc = Wire(UInt(27.W))
-  val sub_sigOut, common_subnormSigOut = Wire(UInt(23.W))
+  val commonExpOut = Wire(UInt(8.W))
+  val commonSigOut = Wire(UInt(23.W))
+  val sigAfterInc = Wire(UInt(27.W))
+  val subSigOut, commonSubnormSigOut = Wire(UInt(23.W))
   val expInc = Wire(UInt(8.W))
 
   // control logic
   // set to 126 according to softfloat
-  val exp_ForSub = (input.exp + 126.S(10.W))
+  val expSubnorm = (input.exp + 126.S(10.W))
   // for non subnormal case, Dist = 0
-  val subnormDist = Mux(common_underflow, -exp_ForSub, 0.S(10.W))
+  val subnormDist = Mux(commonUnderflow, -expSubnorm, 0.S(10.W))
   // todo why we have this case? IN IEEE754 or definded in Hardfloat?
-  val common_totalUnderflow = subnormDist > 235.S
+  val commonTotalUnderflow = subnormDist > 235.S
 
   /** contains the hidden 1 and rBits */
   val adjustedSig = Cat(1.U(1.W), input.sig, input.rBits)
@@ -66,7 +66,7 @@ class RoundingUnit extends Module{
   val distGT32 = subnormDist(9,5).orR
   val allMask = ((-1).S(31.W) << 31 >> subnormDist(5,0))
   val distIn24And31 = allMask(6,0).orR
-  val distGT24 = (distGT32 || distIn24And31) && common_underflow
+  val distGT24 = (distGT32 || distIn24And31) && commonUnderflow
   /** 26bits mask selecting all bits will be rounded, considering subnormal case
     *
     * last 2 bits is rbits, always 1s
@@ -90,35 +90,35 @@ class RoundingUnit extends Module{
   /** 2 bits for final rounding */
   val rbits : UInt= Cat(roundPosBit, anyRoundExtra)
 
-  sigIncr := (roundingMode_near_even && (rbits.andR || (lastBit && rbits==="b10".U))) ||
-    (roundingMode_min &&  input.sign &&  rbits.orR) ||
-    (roundingMode_max && !input.sign &&  rbits.orR) ||
-    (roundingMode_near_maxMag && rbits(1))
+  sigIncr := (rmRNE && (rbits.andR || (lastBit && rbits==="b10".U))) ||
+    (rmRDN &&  input.sign &&  rbits.orR) ||
+    (rmRUP && !input.sign &&  rbits.orR) ||
+    (rmRMM && rbits(1))
 
   /** sig_afterInc won't cover distEQ24 */
-  sub_sigOut := Mux(
+  subSigOut := Mux(
     distGT24 || distEQ24,
     Mux(sigIncr, 1.U(23.W), 0.U(23.W)),
-    (sig_afterInc >> subnormDist(4,0))(24,2))
+    (sigAfterInc >> subnormDist(4,0))(24,2))
   /** when subnormDist===1.S, there may be expInc */
-  expInc := sig_afterInc(26)  && (!common_underflow || subnormDist === 1.S )
-  common_subnormSigOut := Mux(common_totalUnderflow, 0.U, sub_sigOut )
+  expInc := sigAfterInc(26)  && (!commonUnderflow || subnormDist === 1.S )
+  commonSubnormSigOut := Mux(commonTotalUnderflow, 0.U, subSigOut )
 
   /** conforms to last bit position */
   val sigIncrement = Mux(sigIncr, lastBitMask, 0.U(26.W))
-  sig_afterInc := adjustedSig +& sigIncrement
+  sigAfterInc := adjustedSig +& sigIncrement
 
   /** Exceptions output */
   val isNaNOut = input.invalidExc || input.isNaN
-  val notNaN_isSpecialInfOut = (input.infiniteExc || input.isInf) && (!input.invalidExc) && (!input.isNaN)
-  val notNaN_isZero = input.isZero && !isNaNOut
-  val commonCase = !isNaNOut && !notNaN_isSpecialInfOut && !input.isZero
+  val notNaNIsSpecialInfOut = (input.infiniteExc || input.isInf) && (!input.invalidExc) && (!input.isNaN)
+  val notNaNIsZero = input.isZero && !isNaNOut
+  val commonCase = !isNaNOut && !notNaNIsSpecialInfOut && !input.isZero
 
-  val overflow  = commonCase && common_overflow
-  val underflow = commonCase && (common_underflow && rbits.orR)
-  val inexact   = overflow || (commonCase && common_inexact)
+  val overflow  = commonCase && commonOverflow
+  val underflow = commonCase && (commonUnderflow && rbits.orR)
+  val inexact   = overflow || (commonCase && commonInexact)
 
-  val overflowSele = roundingMode_min ## roundingMode_max ## roundingMode_toZero ## (roundingMode_near_even || roundingMode_near_maxMag)
+  val overflowSele = rmRDN ## rmRUP ## rmRTZ ## (rmRNE || rmRMM)
 
   val common_infiniteOut = Mux1H(
     Seq(
@@ -133,30 +133,30 @@ class RoundingUnit extends Module{
 
   val infiniteOut = Cat(input.sign, "h7F800000".U)
   val zeroOut = Cat(input.sign, 0.U(31.W))
-  val outSele1H = commonCase ## notNaN_isSpecialInfOut ## isNaNOut ## notNaN_isZero
+  val outSele1H = commonCase ## notNaNIsSpecialInfOut ## isNaNOut ## notNaNIsZero
 
   /** common_overflow = input.exp > 127.S
     *
     * @todo opt it using hardfloat methods?
     */
-  common_overflow  := input.exp(8,7).orR && !input.exp(9)
-  common_underflow := exp_ForSub(9)
-  common_inexact   := anyRound
+  commonOverflow  := input.exp(8,7).orR && !input.exp(9)
+  commonUnderflow := expSubnorm(9)
+  commonInexact   := anyRound
 
-  common_sigOut := sig_afterInc(24,2)
-  common_expOut := ((input.exp + 127.S)(7,0) + expInc).asUInt
+  commonSigOut := sigAfterInc(24,2)
+  commonExpOut := ((input.exp + 127.S)(7,0) + expInc).asUInt
 
 
-  val common_out = Mux(common_overflow, common_infiniteOut,
-    Mux(common_underflow, input.sign ## expInc ## common_subnormSigOut,
-      input.sign ## common_expOut ## common_sigOut))
+  val commonOut = Mux(commonOverflow, common_infiniteOut,
+    Mux(commonUnderflow, input.sign ## expInc ## commonSubnormSigOut,
+      input.sign ## commonExpOut ## commonSigOut))
 
 
   output.data := Mux1H(Seq(
     outSele1H(0) -> zeroOut,
     outSele1H(1) -> quietNaN,
     outSele1H(2) -> infiniteOut,
-    outSele1H(3) -> common_out)
+    outSele1H(3) -> commonOut)
   )
 
   output.exceptionFlags := input.invalidExc ## input.infiniteExc ## overflow ## underflow ## inexact

From 16faa9d14de960317303203ca44183ca749855d5 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 20 Sep 2023 13:41:04 +0800
Subject: [PATCH 082/109] [reformat] reformat

---
 arithmetic/src/float/DivSqrt.scala | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 9737f10..968c7b1 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -40,25 +40,25 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   // Exceptions
 
   /** inf/inf and 0/0  => NaN out */
-  val divNotSigNaNInButInvalidExc =
+  val divInvalidExcNotSigNaNIn =
     (rawA.isZero && rawB.isZero) || (rawA.isInf && rawB.isInf)
   /** -Inf + -normal => NaN out */
-  val sqrtNotSigNaNInButInvalidExc =
+  val sqrtInvalidExcNotSigNaNIn =
     !rawA.isNaN && !rawA.isZero && rawA.sign
   /** isSigNaNRawFloat detect signaling NaN */
   val majorExc =
     Mux(input.bits.sqrt,
-      isSigNaNRawFloat(rawA) || sqrtNotSigNaNInButInvalidExc,
+      isSigNaNRawFloat(rawA) || sqrtInvalidExcNotSigNaNIn,
       isSigNaNRawFloat(rawA) || isSigNaNRawFloat(rawB) ||
-        divNotSigNaNInButInvalidExc ||
+        divInvalidExcNotSigNaNIn ||
         (!rawA.isNaN && !rawA.isInf && rawB.isZero)
     )
 
   /** all cases result in NaN output */
   val isNaN =
     Mux(input.bits.sqrt,
-      rawA.isNaN || sqrtNotSigNaNInButInvalidExc,
-      rawA.isNaN || rawB.isNaN || divNotSigNaNInButInvalidExc
+      rawA.isNaN || sqrtInvalidExcNotSigNaNIn,
+      rawA.isNaN || rawB.isNaN || divInvalidExcNotSigNaNIn
     )
   val isInf  = Mux(input.bits.sqrt, rawA.isInf, rawA.isInf || rawB.isZero)
   val isZero = Mux(input.bits.sqrt, rawA.isZero, rawA.isZero || rawB.isInf)

From d389ca4c0c8b176230614722365f7796a8a060cf Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sun, 1 Oct 2023 14:25:56 +0800
Subject: [PATCH 083/109] [submodule] bump chisel

---
 dependencies/chisel | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dependencies/chisel b/dependencies/chisel
index 4c8b6c0..440f01a 160000
--- a/dependencies/chisel
+++ b/dependencies/chisel
@@ -1 +1 @@
-Subproject commit 4c8b6c0cd0543b6058e410c06a9e2108501c0b45
+Subproject commit 440f01addeadd265fca2518c0a4df00b698e4603

From ab500b8326af7d873c808b2980e052623678dfba Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sun, 1 Oct 2023 15:59:20 +0800
Subject: [PATCH 084/109] [tests] add dpi framework

---
 flake.nix                                     |   5 +
 tests/resources/csrc/dpi.cc                   |  49 +++
 tests/resources/csrc/test.cpp                 |  89 -----
 tests/resources/csrc/vbridge_impl.cc          |  46 +++
 tests/resources/csrc/vbridge_impl.h           |  46 +++
 .../includes/DivSqrtRecF32_small_div.h        |   2 -
 .../includes/DivSqrtRecF32_small_sqrt.h       |   5 -
 .../includes/DivSqrtRecFN_small_div.h         |  54 ----
 .../includes/DivSqrtRecFN_small_sqrt.h        |  46 ---
 tests/src/DUT.scala                           |  65 ++++
 tests/src/Ftests.scala                        | 304 ++++++++++--------
 tests/src/GrandCentral.scala                  |  45 +++
 tests/src/TestBench.scala                     |  32 ++
 tests/src/VerificationModule.scala            |  78 +++++
 14 files changed, 531 insertions(+), 335 deletions(-)
 create mode 100644 tests/resources/csrc/dpi.cc
 delete mode 100644 tests/resources/csrc/test.cpp
 create mode 100644 tests/resources/csrc/vbridge_impl.cc
 create mode 100644 tests/resources/csrc/vbridge_impl.h
 delete mode 100644 tests/resources/includes/DivSqrtRecF32_small_div.h
 delete mode 100644 tests/resources/includes/DivSqrtRecF32_small_sqrt.h
 delete mode 100644 tests/resources/includes/DivSqrtRecFN_small_div.h
 delete mode 100644 tests/resources/includes/DivSqrtRecFN_small_sqrt.h
 create mode 100644 tests/src/DUT.scala
 create mode 100644 tests/src/GrandCentral.scala
 create mode 100644 tests/src/TestBench.scala
 create mode 100644 tests/src/VerificationModule.scala

diff --git a/flake.nix b/flake.nix
index ffff777..8ec0c8f 100644
--- a/flake.nix
+++ b/flake.nix
@@ -20,6 +20,11 @@
             circt
             verilator
             testfloat
+            cmake
+            libargs
+            glog
+            fmt
+            zlib
           ];
         in
         {
diff --git a/tests/resources/csrc/dpi.cc b/tests/resources/csrc/dpi.cc
new file mode 100644
index 0000000..b8d456c
--- /dev/null
+++ b/tests/resources/csrc/dpi.cc
@@ -0,0 +1,49 @@
+#ifdef COSIM_VERILATOR
+#include <VTestBench__Dpi.h>
+#endif
+
+#include <csignal>
+
+#include <glog/logging.h>
+#include <fmt/core.h>
+
+#include "svdpi.h"
+#include "vbridge_impl.h"
+
+
+//void sigint_handler(int s) {
+//  terminated = true;
+//  dpiFinish();
+//}
+
+
+#if VM_TRACE
+
+void VBridgeImpl::dpiDumpWave() {
+
+        ::dpiDumpWave((wave + ".fst").c_str());
+
+}
+#endif
+
+void dpiInitCosim() {
+//  std::signal(SIGINT, sigint_handler);
+  svSetScope(svGetScopeFromName("TOP.TestBench.verificationModule.verbatim"));
+  vbridge_impl_instance.dpiInitCosim();
+}
+
+[[maybe_unused]] void dpiTimeoutCheck() {
+
+        vbridge_impl_instance.timeoutCheck();
+
+}
+
+[[maybe_unused]] void dpiBasePoke(svBitVecVal *resetVector) {
+  uint32_t v = 0x1000;
+  *resetVector = v;
+}
+
+
+
+
+
diff --git a/tests/resources/csrc/test.cpp b/tests/resources/csrc/test.cpp
deleted file mode 100644
index 11da540..0000000
--- a/tests/resources/csrc/test.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-#if VM_TRACE
-# include "verilator.h"
-#endif
-
-// include files are part of the g++ command line
-
-int main (int argc, char* argv[])
-{
-    if (argc < 3) {
-        printf("usage: %s <rounding-mode> <tininess-detection>\n", argv[0]);
-        return -1;
-    }
-
-    dut module;
-
-#if VM_TRACE
-    VerilatedVcdFILE vcdfd(stderr);
-    VerilatedVcdC tfp(&vcdfd);
-    Verilated::traceEverOn(true);
-    module.trace(&tfp, 99);
-    tfp.open("");
-#endif
-
-    initialize_dut(module);
-    module.ROUNDING_MODE = strtoull(argv[1], NULL, 16);
-    module.DETECT_TININESS = strtoull(argv[2], NULL, 16);
-
-    size_t error = 0;
-    size_t cnt = 0;
-
-    // reset
-    for (size_t i=0; i<10; i++) {
-        module.reset = 1;
-        module.clock = 0;
-        module.eval();
-        module.clock = 1;
-        module.eval();
-    }
-    module.reset = 0;
-
-    // main operation
-    for (size_t cycle = 0; ; cycle++) {
-        if (!process_inputs(module) || !process_outputs(module)) {
-            printf("Ran %ld tests.\n", cnt);
-            if (!error) fputs("No errors found.\n", stdout);
-            break;
-        }
-
-        module.clock = 0;
-        module.eval();
-
-#if VM_TRACE
-        tfp.dump(static_cast<vluint64_t>(cycle * 2));
-#endif
-
-        if (module.io_check) {
-            if ((cnt % 10000 == 0) && cnt) printf("Ran %ld tests.\n", cnt);
-            if (!module.io_pass) {
-                error++;
-                printf("[%07ld]", cnt);
-                // for (size_t i=0; i<inputs.size(); i++) {
-                //    printf(" %s", inputs[i]->to_str().c_str());
-                // }
-                printf(
-                    "\n\t=> %#x %#x   expected: %#x %#x\n",
-                    module.io_actual_out,
-                    module.io_actual_exceptionFlags,
-                    module.io_expected_recOut,
-                    module.io_expected_exceptionFlags
-                );
-                if (error == 20) {
-                    printf("Reached %ld errors. Aborting.\n", error);
-                    break;
-                }
-            }
-            cnt++;
-        }
-
-        module.clock = 1;
-        module.eval();
-
-#if VM_TRACE
-        tfp.dump(static_cast<vluint64_t>(cycle * 2 + 1));
-#endif
-    }
-
-    return 0;
-}
-
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
new file mode 100644
index 0000000..f1a122c
--- /dev/null
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -0,0 +1,46 @@
+#include <fmt/core.h>
+#include <glog/logging.h>
+
+#include "verilated.h"
+
+#include "vbridge_impl.h"
+
+
+
+VBridgeImpl::VBridgeImpl() : _cycles(100) {}
+
+
+uint64_t VBridgeImpl::get_t() {
+  return getCycle();
+}
+
+
+int VBridgeImpl::timeoutCheck() {
+  if (get_t() > _cycles) {
+    LOG(INFO) << fmt::format("Simulation timeout, t={}", get_t());
+    dpiFinish();
+  }
+  return 0;
+}
+
+void VBridgeImpl::dpiInitCosim() {
+  google::InitGoogleLogging("emulator");
+  FLAGS_logtostderr = true;
+
+  ctx = Verilated::threadContextp();
+
+
+  LOG(INFO) << fmt::format("[{}] dpiInitCosim", getCycle());
+  LOG(INFO) << fmt::format(" running");
+
+  dpiDumpWave();
+}
+
+
+
+
+VBridgeImpl vbridge_impl_instance;
+
+
+
+
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
new file mode 100644
index 0000000..dd6f369
--- /dev/null
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <optional>
+#include <queue>
+
+#include <VTestBench__Dpi.h>
+#include "verilated_fst_c.h"
+
+
+#include <svdpi.h>
+
+
+
+class VBridgeImpl {
+public:
+    explicit VBridgeImpl();
+
+    void dpiDumpWave();
+
+    void dpiInitCosim();
+
+    uint64_t get_t();
+
+    int timeoutCheck();
+
+    uint64_t getCycle() { return ctx->time(); }
+
+
+
+
+private:
+
+    VerilatedContext *ctx;
+    VerilatedFstC tfp;
+
+    uint64_t _cycles;
+
+
+    const std::string wave = "/home/yyq/Projects/arithmetic/run/wave";
+
+
+
+
+};
+
+extern VBridgeImpl vbridge_impl_instance;
diff --git a/tests/resources/includes/DivSqrtRecF32_small_div.h b/tests/resources/includes/DivSqrtRecF32_small_div.h
deleted file mode 100644
index ebc6604..0000000
--- a/tests/resources/includes/DivSqrtRecF32_small_div.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define FLEN 32
-#include "DivSqrtRecFN_small_div.h"
diff --git a/tests/resources/includes/DivSqrtRecF32_small_sqrt.h b/tests/resources/includes/DivSqrtRecF32_small_sqrt.h
deleted file mode 100644
index 2effeb7..0000000
--- a/tests/resources/includes/DivSqrtRecF32_small_sqrt.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#define FLEN 32
-#include "DivSqrtRecFN_small_sqrt.h"
-
-#define ROUNDING_MODE io_input_bits_roundingMode
-#define DETECT_TININESS io_input_bits_detectTininess
diff --git a/tests/resources/includes/DivSqrtRecFN_small_div.h b/tests/resources/includes/DivSqrtRecFN_small_div.h
deleted file mode 100644
index b5d0704..0000000
--- a/tests/resources/includes/DivSqrtRecFN_small_div.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#include "dut.h"
-
-#define ROUNDING_MODE io_input_bits_roundingMode
-#define DETECT_TININESS io_input_bits_detectTininess
-
-static void initialize_dut(dut& m)
-{
-  m.io_input_valid = 1;
-}
-
-static int process_inputs(dut& m)
-{
-  char value[64];
-
-  if (!m.io_input_ready) {
-    return 1;
-  }
-
-  if (scanf("%s", value) != 1) {
-    return 0;
-  }
-  m.io_input_bits_a = strtoull(value, NULL, 16);
-
-  if (scanf("%s", value) != 1) {
-    return 0;
-  }
-  m.io_input_bits_b = strtoull(value, NULL, 16);
-
-  return 1;
-}
-
-static int process_outputs(dut& m)
-{
-  char value[64];
-
-  if (!m.io_input_ready) {
-    return 1;
-  }
-
-  // output
-  if (scanf("%s", value) != 1) {
-    return 0;
-  }
-  m.io_input_bits_out = strtoull(value, NULL, 16);
-
-  // exception flags
-  if (scanf("%s", value) != 1) {
-    return 0;
-  }
-  m.io_input_bits_exceptionFlags = strtoull(value, NULL, 16);
-
-  return 1;
-}
-
diff --git a/tests/resources/includes/DivSqrtRecFN_small_sqrt.h b/tests/resources/includes/DivSqrtRecFN_small_sqrt.h
deleted file mode 100644
index 5a4fed0..0000000
--- a/tests/resources/includes/DivSqrtRecFN_small_sqrt.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#include "dut.h"
-
-static void initialize_dut(dut& m)
-{
-  m.io_input_valid = 1;
-}
-
-static int process_inputs(dut& m)
-{
-  char value[64];
-
-  if (!m.io_input_ready) {
-    return 1;
-  }
-
-  if (scanf("%s", value) != 1) {
-    return 0;
-  }
-  m.io_input_bits_a = strtoull(value, NULL, 16);
-
-  return 1;
-}
-
-static int process_outputs(dut& m)
-{
-  char value[64];
-
-  if (!m.io_input_ready) {
-    return 1;
-  }
-
-  // output
-  if (scanf("%s", value) != 1) {
-    return 0;
-  }
-  m.io_input_bits_out = strtoull(value, NULL, 16);
-
-  // exception flags
-  if (scanf("%s", value) != 1) {
-    return 0;
-  }
-  m.io_input_bits_exceptionFlags = strtoull(value, NULL, 16);
-
-  return 1;
-}
-
diff --git a/tests/src/DUT.scala b/tests/src/DUT.scala
new file mode 100644
index 0000000..80a5695
--- /dev/null
+++ b/tests/src/DUT.scala
@@ -0,0 +1,65 @@
+package tests
+
+import chisel3._
+import chisel3.util._
+import float._
+
+/** num for input number
+  *
+  * input.valid need cancel
+  *
+  * in
+  *
+  * */
+class DUT(expWidth:Int, sigWidth:Int) extends Module {
+
+  val io = IO(new Bundle {
+    val input = Flipped(Decoupled(new DUTInput(expWidth, sigWidth)))
+    val expected = Input(new Reference(expWidth, sigWidth))
+
+    val actual = new Bundle {
+      val out = Output(Bits((expWidth + sigWidth).W))
+      val exceptionFlags = Output(Bits(5.W))
+    }
+
+    val check = Output(Bool())
+    val pass = Output(Bool())
+  })
+
+  val ds = Module(new DivSqrt(expWidth: Int, sigWidth: Int))
+  ds.input.valid := io.input.valid
+  ds.input.bits.sqrt := io.input.valid
+  ds.input.bits.a := io.input.bits.a
+  ds.input.bits.b := io.input.bits.b
+  ds.input.bits.roundingMode := io.input.bits.roundingMode
+  /** @todo */
+  io.input.ready := ds.input.ready
+
+  // collect result
+  io.actual.out := ds.output.bits.result
+  io.actual.exceptionFlags := ds.output.bits.exceptionFlags
+
+
+  val resultError = io.actual.out =/= io.expected.out
+  val flagError = io.actual.exceptionFlags =/= io.expected.exceptionFlags
+
+  io.check := ds.output.valid
+  io.pass := !(ds.output.valid && (resultError || flagError))
+
+}
+
+class DUTInput(expWidth: Int, sigWidth: Int) extends Bundle {
+  val a = Bits((expWidth + sigWidth).W)
+  val b = Bits((expWidth + sigWidth).W)
+  val op = UInt(2.W)
+  val roundingMode = UInt(3.W)
+}
+
+class Reference(expWidth: Int, sigWidth: Int) extends Bundle {
+  val out = UInt((expWidth + sigWidth).W)
+  val exceptionFlags = UInt(5.W)
+}
+
+
+
+
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 5fef164..bbcfee7 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -8,104 +8,31 @@ import java.util.Calendar
 import java.text.SimpleDateFormat
 import java.util.Calendar
 import scala.collection.parallel.CollectionConverters._
-
 import chisel3.RawModule
+import firrtl.AnnotationSeq
 import org.scalatest.ParallelTestExecution
 import org.scalatest.flatspec.AnyFlatSpec
 import org.scalatest.matchers.should.Matchers
 
-import java.text.SimpleDateFormat
-import java.util.Calendar
-import scala.collection.parallel.CollectionConverters._
+import chisel3.experimental.ExtModule
+import chisel3.util.{HasExtModuleInline, HasExtModuleResource}
+import firrtl.stage.FirrtlCircuitAnnotation
+
+import chisel3.stage._
+import os._
+
+
 
-//object Ftests extends App{
-//  import chisel3.stage.ChiselGeneratorAnnotation
-//  import firrtl.AnnotationSeq
-//  import firrtl.stage._
-//
-//  println("this is Ftests")
-//
-//  val resources = os.resource()
-//  val runDir = os.pwd / "run"
-//  os.remove.all(runDir)
-//  val elaborateDir = runDir / "elaborate"
-//  os.makeDir.all(elaborateDir)
-//  val rtlDir = runDir / "rtl"
-//  os.makeDir.all(rtlDir)
-//  val emulatorDir = runDir / "emulator"
-//  os.makeDir.all(emulatorDir)
-//  val emulatorCSrc = emulatorDir / "src"
-//  os.makeDir.all(emulatorCSrc)
-//  val emulatorCHeader = emulatorDir / "include"
-//  os.makeDir.all(emulatorCHeader)
-//  val emulatorBuildDir = emulatorDir / "build"
-//  os.makeDir.all(emulatorBuildDir)
-//
-//  val emulatorThreads = 8
-//  val verilatorArgs = Seq(
-//    // format: off
-//    "--x-initial unique",
-//    "--output-split 100000",
-//    "--max-num-width 1048576",
-//    "--main",
-//    "--timing",
-//    // use for coverage
-//    "--coverage-user",
-//    "--assert",
-//    // format: on
-//  )
-//
-//  // TODO: this will be replaced by binder API
-//  // elaborate
-//  var topName: String = null
-//  val annos: AnnotationSeq = Seq(
-//    new chisel3.stage.phases.Elaborate,
-//    new chisel3.stage.phases.Convert
-//  ).foldLeft(
-//    Seq(
-//      ChiselGeneratorAnnotation(() => new ValExec_DivSqrtRecFN_small_div(8,24,0))
-//    ): AnnotationSeq
-//  ) { case (annos, stage) => stage.transform(annos) }
-//    .flatMap {
-//      case FirrtlCircuitAnnotation(circuit) =>
-//        topName = circuit.main
-//        os.write.over(elaborateDir / s"$topName.fir", circuit.serialize)
-//        None
-//      case _: chisel3.stage.DesignAnnotation[_] => None
-//      case _: chisel3.stage.ChiselCircuitAnnotation => None
-//      case a => Some(a)
-//    }
-//  os.write.over(elaborateDir / s"$topName.anno.json", firrtl.annotations.JsonProtocol.serialize(annos))
-//
-//  // rtl
-//  os.proc(
-//    "firtool",
-//    elaborateDir / s"$topName.fir", s"--annotation-file=${elaborateDir / s"$topName.anno.json"}",
-//    "-dedup",
-//    "-O=release",
-//    "--disable-all-randomization",
-//    "--split-verilog",
-//    "--preserve-values=none",
-//    "--preserve-aggregate=all",
-//    "--strip-debug-info",
-//    s"-o=$rtlDir"
-//  ).call()
-//  val verilogs = os.read.lines(rtlDir / "filelist.f")
-//    .map(str =>
-//      try {
-//        os.Path(str)
-//      } catch {
-//        case e: IllegalArgumentException if e.getMessage.contains("is not an absolute path") =>
-//          rtlDir / str.stripPrefix("./")
-//      }
-//    )
-//    .filter(p => p.ext == "v" || p.ext == "sv")
-//
-////  os.write(rtlDir / "dut.v", chisel3.getVerilogString(new DivSqrt(8,24)))
-//
-//}
 
 trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
+  val roundings = Seq(
+    "-rnear_even" -> "0",
+    "-rminMag" -> "1",
+    "-rmin" -> "2",
+    "-rmax" -> "3",
+    "-rnear_maxMag" -> "4",
+  )
+
   def exp(f: Int) = f match {
     case 16 => 5
     case 32 => 8
@@ -118,14 +45,6 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     case 64 => 53
   }
 
-  val roundings = Seq(
-    "-rnear_even" -> "0",
-    "-rminMag" -> "1",
-    "-rmin" -> "2",
-    "-rmax" -> "3",
-    "-rnear_maxMag" -> "4",
-  )
-
   def check(stdouts: Seq[String]) = {
     stdouts foreach (_ shouldNot include("expected"))
     stdouts foreach (_ shouldNot include("Ran 0 tests."))
@@ -149,53 +68,163 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     */
   def test(name: String, module: () => RawModule, harness: String, softfloatArgs: Seq[Seq[String]], dutArgs: Option[Seq[Seq[String]]] = None) = {
 
-    val testRunDir = os.pwd / "test_run_dir" / s"${this.getClass.getSimpleName}_$name"
-    os.makeDir.all(testRunDir)
-    os.remove(testRunDir / "dut.v")
-    os.write(testRunDir / "dut.v", chisel3.getVerilogString(module()))
-
-    /* command Synthesis verilog to C++. */
-    val verilatorCompile: Seq[String] = Seq(
-      "verilator",
-      "-cc",
-      "--prefix", "dut",
-      "--Mdir", testRunDir.toString,
-      "-CFLAGS", s"""-I${getClass.getResource("/includes/").getPath} -include ${getClass.getResource(s"/includes/$name.h").getPath}""",
-      "dut.v",
-      "--exe", s"${getClass.getResource(s"/csrc/$harness").getPath}",
-      "--trace"
-    )
-    os.proc(verilatorCompile).call(testRunDir)
-
-    /* Build C++ executor. */
-    val verilatorBuild: Seq[String] = Seq(
-      "make",
-      "-C", testRunDir.toString,
-      "-j",
-      "-f", s"dut.mk",
-      "dut")
-    os.proc(verilatorBuild).call(testRunDir)
-
-    def executeAndLog(softfloatArg: Seq[String], dutArg: Seq[String]): String = {
-      val stdoutFile = testRunDir / s"${name}__${(softfloatArg ++ dutArg).mkString("_")}.txt"
-      val vcdFile = testRunDir / s"${name}__${(softfloatArg ++ dutArg).mkString("_")}.vcd"
-      os.proc((testRunDir / "dut").toString +: dutArg).call(stdin = os.proc("testfloat_gen" +: softfloatArg).spawn().stdout, stdout = stdoutFile, stderr = vcdFile)
-      os.read(stdoutFile)
+    var topName: String = null
+    val emulatorThreads = 8
+
+    val runDir: Path = os.pwd / "run"
+    os.remove.all(runDir)
+
+    val elaborateDir = runDir / "elaborate"
+    os.makeDir.all(elaborateDir)
+    val rtlDir = runDir / "rtl"
+    os.makeDir.all(rtlDir)
+    val emulatorDir = runDir / "emulator"
+    os.makeDir.all(emulatorDir)
+    val emulatorCSrc = emulatorDir / "src"
+    os.makeDir.all(emulatorCSrc)
+    val emulatorCHeader = emulatorDir / "include"
+    os.makeDir.all(emulatorCHeader)
+    val emulatorBuildDir = emulatorDir / "build"
+    os.makeDir.all(emulatorBuildDir)
+
+
+//    os.remove(rtlDir / "dut.sv")
+//    os.write(rtlDir / "dut.sv", chisel3.getVerilogString(new VerificationModule))
+
+
+
+    val annos: AnnotationSeq = Seq(
+      new chisel3.stage.phases.Elaborate,
+      new chisel3.stage.phases.Convert
+    ).foldLeft(
+      Seq(
+        ChiselGeneratorAnnotation(() => new TestBench(8,24))
+      ): AnnotationSeq
+    ) { case (annos, stage) => stage.transform(annos) }
+      .flatMap {
+        case FirrtlCircuitAnnotation(circuit) =>
+          topName = circuit.main
+          os.write.over(elaborateDir / s"$topName.fir", circuit.serialize)
+          None
+        case _: chisel3.stage.DesignAnnotation[_] => None
+        case _: chisel3.stage.ChiselCircuitAnnotation => None
+        case a => Some(a)
+      }
+    os.write.over(elaborateDir / s"$topName.anno.json", firrtl.annotations.JsonProtocol.serialize(annos))
+
+    // rtl
+    os.proc(
+      "firtool",
+      elaborateDir / s"$topName.fir", s"--annotation-file=${elaborateDir / s"$topName.anno.json"}",
+      "-dedup",
+      "-O=release",
+      "--disable-all-randomization",
+      "--split-verilog",
+      "--preserve-values=none",
+      "--preserve-aggregate=all",
+      "--strip-debug-info",
+      s"-o=$rtlDir"
+    ).call()
+    val verilogs = os.read.lines(rtlDir / "filelist.f")
+      .map(str =>
+        try {
+          os.Path(str)
+        } catch {
+          case e: IllegalArgumentException if e.getMessage.contains("is not an absolute path") =>
+            rtlDir / str.stripPrefix("./")
+        }
+      )
+      .filter(p => p.ext == "v" || p.ext == "sv")
+
+
+    val allCSourceFiles = Seq(
+      "dpi.cc",
+      "vbridge_impl.cc",
+      "vbridge_impl.h"
+    ).map { f =>
+      os.write.over(emulatorCSrc / f, os.read(os.pwd / "tests" / "resources" / "csrc" / f))
+      emulatorCSrc / f
+    }
+
+    val allCHeaderFiles = Seq(
+      "verilator.h"
+    ).map { f =>
+      os.write.over(emulatorCHeader / f, os.read(os.pwd / "tests" / "resources" / "includes" / f))
+      emulatorCHeader / f
     }
 
-    (if (dutArgs.isDefined) {
-      require(softfloatArgs.size == dutArgs.get.size, "size of softfloatArgs and dutArgs should be same.")
-      (softfloatArgs zip dutArgs.get).par.map { case (s, d) => executeAndLog(s, d) }
-    } else softfloatArgs.par.map { s => executeAndLog(s, Seq.empty) }).seq
+    val verilatorArgs = Seq(
+      // format: off
+      "--x-initial unique",
+      "--output-split 100000",
+      "--max-num-width 1048576",
+      "--timing",
+      // use for coverage
+      "--coverage-user",
+      "--assert",
+      // format: on
+      "--main"
+    )
+
+    os.write(emulatorBuildDir / "CMakeLists.txt",
+      // format: off
+      s"""cmake_minimum_required(VERSION 3.20)
+         |project(emulator)
+         |set(CMAKE_CXX_STANDARD 17)
+         |
+         |find_package(args REQUIRED)
+         |find_package(glog REQUIRED)
+         |find_package(fmt REQUIRED)
+         |find_package(verilator REQUIRED)
+         |find_package(Threads REQUIRED)
+         |set(THREADS_PREFER_PTHREAD_FLAG ON)
+         |
+         |add_executable(emulator
+         |${allCSourceFiles.mkString("\n")}
+         |)
+         |
+         |target_include_directories(emulator PUBLIC $emulatorCHeader)
+         |
+         |target_link_libraries(emulator PUBLIC $${CMAKE_THREAD_LIBS_INIT})
+         |target_link_libraries(emulator PUBLIC  fmt::fmt glog::glog )  # note that libargs is header only, nothing to link
+         |target_compile_definitions(emulator PRIVATE COSIM_VERILATOR)
+         |
+         |verilate(emulator
+         |  SOURCES
+         |  ${verilogs.mkString("\n")}
+         |  "TRACE_FST"
+         |  TOP_MODULE $topName
+         |  PREFIX V$topName
+         |  OPT_FAST
+         |  THREADS $emulatorThreads
+         |  VERILATOR_ARGS ${verilatorArgs.mkString(" ")}
+         |)
+         |""".stripMargin
+      // format: on
+    )
+
+    // build verilator
+    os.proc(Seq(
+      "cmake",
+      "-G", "Ninja",
+      "-S", emulatorBuildDir,
+      "-B", emulatorBuildDir
+    ).map(_.toString)).call(emulatorBuildDir)
+
+    // build emulator
+    os.proc(Seq("ninja", "-C", emulatorBuildDir).map(_.toString)).call(emulatorBuildDir)
+
+    Seq("No errors found.")
   }
 }
 
 class DivSqrtRecFn_smallSpec extends FMATester {
   def test(f: Int, fn: String): Seq[String] = {
     def generator(options: Int) = fn match {
-      case "div" => () => new ValExec_DivSqrtRecFN_small_div(exp(f), sig(f))
-      case "sqrt" => () => new ValExec_DivSqrtRecFN_small_sqrt(exp(f), sig(f))
+      case "div" => () => new TestBench(exp(f), sig(f))
+      case "sqrt" => () => new TestBench(exp(f), sig(f))
     }
+
     test(
       s"DivSqrtRecF${f}_small_${fn}",
       generator(0),
@@ -208,8 +237,5 @@ class DivSqrtRecFn_smallSpec extends FMATester {
     check(test(32, "div"))
   }
 
-  "DivSqrtRecF32_small_sqrt" should "pass" in {
-    check(test(32, "sqrt"))
-  }
 
 }
\ No newline at end of file
diff --git a/tests/src/GrandCentral.scala b/tests/src/GrandCentral.scala
new file mode 100644
index 0000000..7d36277
--- /dev/null
+++ b/tests/src/GrandCentral.scala
@@ -0,0 +1,45 @@
+package sifive {
+  package enterprise {
+    package grandcentral {
+
+      import firrtl.annotations._
+
+      case class ReferenceDataTapKey(source: ReferenceTarget, sink: ReferenceTarget)
+
+      case class DataTapsAnnotation(keys: Seq[ReferenceDataTapKey])
+          extends NoTargetAnnotation
+          with HasSerializationHints {
+        override def typeHints: Seq[Class[_]] = Seq(classOf[ReferenceDataTapKey])
+      }
+    }
+
+  }
+
+}
+
+package tests {
+
+    import chisel3._
+    import chisel3.experimental.ChiselAnnotation
+    import sifive.enterprise.grandcentral._
+    trait TapModule extends RawModule { t =>
+      private val dataTapKeys = scala.collection.mutable.ArrayBuffer[(Data, Data)]()
+      def tap[T <: Data](source: T): T = {
+        val sink = Wire(chiselTypeOf(source))
+        dontTouch(sink)
+        dataTapKeys.append((source, sink))
+        sink
+      }
+      // wait for https://github.com/chipsalliance/chisel3/pull/1943
+      def done(): Unit = {
+        chisel3.experimental.annotate(new ChiselAnnotation {
+          override def toFirrtl = DataTapsAnnotation(dataTapKeys.toSeq.map({
+            case (source, sink) =>
+              ReferenceDataTapKey(source.toTarget, sink.toTarget)
+          }))
+        })
+      }
+    }
+
+
+}
diff --git a/tests/src/TestBench.scala b/tests/src/TestBench.scala
new file mode 100644
index 0000000..dabd2aa
--- /dev/null
+++ b/tests/src/TestBench.scala
@@ -0,0 +1,32 @@
+package tests
+
+import chisel3._
+import float.DivSqrt
+
+
+class TestBench(expWidth: Int, sigWidth: Int) extends RawModule {
+  val clock = Wire(Clock())
+  val reset = Wire(Bool())
+  val dut = withClockAndReset(clock, reset) {
+    Module(
+      new DUT(8,24)
+    )
+  }
+  val verificationModule = Module(new VerificationModule)
+  clock := verificationModule.clock
+  reset := verificationModule.reset
+
+  dut.io.input.bits.a := verificationModule.pokeDUT.bits.a
+  dut.io.input.bits.b := verificationModule.pokeDUT.bits.b
+  dut.io.input.bits.op := 0.U
+  dut.io.input.bits.roundingMode := verificationModule.pokeDUT.bits.roundingMode
+  dut.io.input.valid := verificationModule.pokeDUT.valid
+
+  dut.io.expected.out := verificationModule.pokeReference.out
+  dut.io.expected.exceptionFlags := verificationModule.pokeReference.exceptionFlags
+
+
+
+}
+
+
diff --git a/tests/src/VerificationModule.scala b/tests/src/VerificationModule.scala
new file mode 100644
index 0000000..94e1596
--- /dev/null
+++ b/tests/src/VerificationModule.scala
@@ -0,0 +1,78 @@
+package tests
+
+import chisel3._
+import chisel3.experimental.ExtModule
+import chisel3.experimental.hierarchy.{instantiable, public}
+import chisel3.util._
+
+
+class VerificationModule extends RawModule {
+
+  val clockRate = 2
+
+  val clock = IO(Output(Clock()))
+  val reset = IO(Output(Bool()))
+
+
+  val pokeDUT = IO(Valid(new DUTInput(8,24)))
+  val pokeReference  = IO(Output(new Reference(8,24)))
+
+  pokeDUT.bits.a := 0.U
+  pokeDUT.bits.b := 0.U
+  pokeDUT.bits.op := 0.U
+  pokeDUT.bits.roundingMode := 0.U
+  pokeDUT.valid := true.B
+
+  pokeReference.out := 0.U
+  pokeReference.exceptionFlags := 0.U
+
+  val verbatim = Module(new ExtModule with HasExtModuleInline {
+    override val desiredName = "Verbatim"
+    val clock = IO(Output(Clock()))
+    val reset = IO(Output(Bool()))
+    setInline(
+      "verbatim.sv",
+      s"""module Verbatim(
+         |  output clock,
+         |  output reset
+         |);
+         |  reg _clock = 1'b0;
+         |  always #($clockRate) _clock = ~_clock;
+         |  reg _reset = 1'b1;
+         |  initial #(${2 * clockRate + 1}) _reset = 0;
+         |
+         |  assign clock = _clock;
+         |  assign reset = _reset;
+         |
+         |  import "DPI-C" function void dpiInitCosim();
+         |  initial dpiInitCosim();
+         |
+         |  import "DPI-C" function void dpiTimeoutCheck();
+         |  always #(${2 * clockRate + 1}) dpiTimeoutCheck();
+         |
+         |
+         |  export "DPI-C" function dpiDumpWave;
+         |  function dpiDumpWave(input string file);
+         |   $$dumpfile(file);
+         |   $$dumpvars(0);
+         |  endfunction;
+         |
+         |  export "DPI-C" function dpiFinish;
+         |  function dpiFinish();
+         |   $$finish;
+         |  endfunction;
+         |
+         |  export "DPI-C" function dpiError;
+         |  function dpiError(input string what);
+         |   $$error(what);
+         |  endfunction;
+         |
+         |endmodule
+         |""".stripMargin
+    )
+  })
+  clock := verbatim.clock
+  reset := verbatim.reset
+
+
+}

From 7a2aa692550479b737dc311a6306d70a78af37dd Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 5 Oct 2023 15:12:48 +0800
Subject: [PATCH 085/109] [tests] basePoke and basePeek

---
 tests/resources/csrc/dpi.cc          |  9 ++++--
 tests/resources/csrc/vbridge_impl.cc |  4 +++
 tests/resources/csrc/vbridge_impl.h  |  2 ++
 tests/src/Ftests.scala               | 10 ++++---
 tests/src/TestBench.scala            |  2 ++
 tests/src/VerificationModule.scala   | 44 +++++++++++++++++++++++++++-
 6 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/tests/resources/csrc/dpi.cc b/tests/resources/csrc/dpi.cc
index b8d456c..687327e 100644
--- a/tests/resources/csrc/dpi.cc
+++ b/tests/resources/csrc/dpi.cc
@@ -38,9 +38,12 @@ void dpiInitCosim() {
 
 }
 
-[[maybe_unused]] void dpiBasePoke(svBitVecVal *resetVector) {
-  uint32_t v = 0x1000;
-  *resetVector = v;
+[[maybe_unused]] void dpiBasePoke(svBitVecVal *a) {
+  vbridge_impl_instance.dpiBasePoke(a);
+}
+
+[[maybe_unused]] void dpiBasePeek(svBit ready) {
+  LOG(INFO) << fmt::format("ready = {}", ready);
 }
 
 
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index f1a122c..d419e84 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -36,6 +36,10 @@ void VBridgeImpl::dpiInitCosim() {
   dpiDumpWave();
 }
 
+void VBridgeImpl::dpiBasePoke(svBitVecVal *a) {
+  uint32_t v = 0x1000;
+  *a = v;
+}
 
 
 
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index dd6f369..93c9e87 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -25,6 +25,8 @@ class VBridgeImpl {
 
     uint64_t getCycle() { return ctx->time(); }
 
+    static void dpiBasePoke(uint32_t *a);
+
 
 
 
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index bbcfee7..8e9b2b2 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -142,15 +142,13 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
       "vbridge_impl.cc",
       "vbridge_impl.h"
     ).map { f =>
-      os.write.over(emulatorCSrc / f, os.read(os.pwd / "tests" / "resources" / "csrc" / f))
-      emulatorCSrc / f
+      os.pwd / "tests" / "resources" / "csrc" / f
     }
 
     val allCHeaderFiles = Seq(
       "verilator.h"
     ).map { f =>
-      os.write.over(emulatorCHeader / f, os.read(os.pwd / "tests" / "resources" / "includes" / f))
-      emulatorCHeader / f
+      os.pwd / "tests" / "resources" / "includes" / f
     }
 
     val verilatorArgs = Seq(
@@ -214,6 +212,10 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     // build emulator
     os.proc(Seq("ninja", "-C", emulatorBuildDir).map(_.toString)).call(emulatorBuildDir)
 
+    // run
+    os.proc(Seq("./emulator").map(_.toString)).call(emulatorBuildDir)
+
+
     Seq("No errors found.")
   }
 }
diff --git a/tests/src/TestBench.scala b/tests/src/TestBench.scala
index dabd2aa..45bde9f 100644
--- a/tests/src/TestBench.scala
+++ b/tests/src/TestBench.scala
@@ -25,6 +25,8 @@ class TestBench(expWidth: Int, sigWidth: Int) extends RawModule {
   dut.io.expected.out := verificationModule.pokeReference.out
   dut.io.expected.exceptionFlags := verificationModule.pokeReference.exceptionFlags
 
+  verificationModule.ready := dut.io.input.ready
+
 
 
 }
diff --git a/tests/src/VerificationModule.scala b/tests/src/VerificationModule.scala
index 94e1596..25eb116 100644
--- a/tests/src/VerificationModule.scala
+++ b/tests/src/VerificationModule.scala
@@ -17,7 +17,9 @@ class VerificationModule extends RawModule {
   val pokeDUT = IO(Valid(new DUTInput(8,24)))
   val pokeReference  = IO(Output(new Reference(8,24)))
 
-  pokeDUT.bits.a := 0.U
+  val ready = IO(Input(Bool()))
+
+
   pokeDUT.bits.b := 0.U
   pokeDUT.bits.op := 0.U
   pokeDUT.bits.roundingMode := 0.U
@@ -74,5 +76,45 @@ class VerificationModule extends RawModule {
   clock := verbatim.clock
   reset := verbatim.reset
 
+  val dpiBasePoke = Module(new ExtModule with HasExtModuleInline {
+    override val desiredName = "dpiBasePoke"
+    val a = IO(Output(UInt(32.W)))
+    val clock = IO(Input(Clock()))
+    setInline(
+      s"$desiredName.sv",
+      s"""module $desiredName(
+         |  input clock,
+         |  output [31:0] a
+         |);
+         |  import "DPI-C" function void $desiredName(output bit[31:0] a);
+         |
+         |  always @ (posedge clock) $desiredName(a);
+         |endmodule
+         |""".stripMargin
+    )
+  })
+  dpiBasePoke.clock := verbatim.clock
+  pokeDUT.bits.a := dpiBasePoke.a
+
+  val dpiBasePeek = Module(new ExtModule with HasExtModuleInline {
+    override val desiredName = "dpiBasePeek"
+    val ready = IO(Input(Bool()))
+    val clock = IO(Input(Clock()))
+    setInline(
+      s"$desiredName.sv",
+      s"""module $desiredName(
+         |  input clock,
+         |  input ready
+         |);
+         |  import "DPI-C" function void $desiredName(input bit ready);
+         |
+         |  always @ (posedge clock) $desiredName(ready);
+         |endmodule
+         |""".stripMargin
+    )
+  })
+  dpiBasePeek.clock := verbatim.clock
+  dpiBasePeek.ready := ready
+
 
 }

From d1877c515aa3b24d91b8920a8add6364a55e8ae7 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 5 Oct 2023 15:21:15 +0800
Subject: [PATCH 086/109] [tests] opt toDut interfave

---
 tests/resources/csrc/dpi.cc        | 11 +++++++++++
 tests/resources/csrc/encoding.h    | 11 +++++++++++
 tests/src/DUT.scala                | 16 ++++++----------
 tests/src/Ftests.scala             |  3 ++-
 tests/src/TestBench.scala          | 20 ++++++++------------
 tests/src/VerificationModule.scala | 25 ++++++++++---------------
 6 files changed, 48 insertions(+), 38 deletions(-)
 create mode 100644 tests/resources/csrc/encoding.h

diff --git a/tests/resources/csrc/dpi.cc b/tests/resources/csrc/dpi.cc
index 687327e..212edb1 100644
--- a/tests/resources/csrc/dpi.cc
+++ b/tests/resources/csrc/dpi.cc
@@ -46,6 +46,17 @@ void dpiInitCosim() {
   LOG(INFO) << fmt::format("ready = {}", ready);
 }
 
+//[[maybe_unused]] void dpiPeekPoke(svBit ready,
+//                 svBit *valid,
+//                 svBitVecVal *a,
+//                 svBitVecVal *b,
+//                 svBitVecVal *op,
+//                 svBitVecVal *rm,
+//                 svBitVecVal *refOut,
+//                 svBitVecVal *refFlags) {
+//  vbridge_impl_instance.dpiPeekPoke(DutInterface(ready, valid, a, b, op, rm, refOut, refFlags));
+//}
+
 
 
 
diff --git a/tests/resources/csrc/encoding.h b/tests/resources/csrc/encoding.h
new file mode 100644
index 0000000..dc78930
--- /dev/null
+++ b/tests/resources/csrc/encoding.h
@@ -0,0 +1,11 @@
+struct DutInterface{
+                    svBit ready;
+
+                    svBit *valid;
+                    svBitVecVal *a;
+                    svBitVecVal *b;
+                    svBitVecVal *op;
+                    svBitVecVal *rm;
+                    svBitVecVal *refOut;
+                    svBitVecVal *refFlags
+};
\ No newline at end of file
diff --git a/tests/src/DUT.scala b/tests/src/DUT.scala
index 80a5695..c3f862f 100644
--- a/tests/src/DUT.scala
+++ b/tests/src/DUT.scala
@@ -14,8 +14,7 @@ import float._
 class DUT(expWidth:Int, sigWidth:Int) extends Module {
 
   val io = IO(new Bundle {
-    val input = Flipped(Decoupled(new DUTInput(expWidth, sigWidth)))
-    val expected = Input(new Reference(expWidth, sigWidth))
+    val input = Flipped(Decoupled(new DutInterface(expWidth, sigWidth)))
 
     val actual = new Bundle {
       val out = Output(Bits((expWidth + sigWidth).W))
@@ -40,24 +39,21 @@ class DUT(expWidth:Int, sigWidth:Int) extends Module {
   io.actual.exceptionFlags := ds.output.bits.exceptionFlags
 
 
-  val resultError = io.actual.out =/= io.expected.out
-  val flagError = io.actual.exceptionFlags =/= io.expected.exceptionFlags
+  val resultError = io.actual.out =/= io.input.bits.refOut
+  val flagError = io.actual.exceptionFlags =/= io.input.bits.refFlags
 
   io.check := ds.output.valid
   io.pass := !(ds.output.valid && (resultError || flagError))
 
 }
 
-class DUTInput(expWidth: Int, sigWidth: Int) extends Bundle {
+class DutInterface(expWidth: Int, sigWidth: Int) extends Bundle {
   val a = Bits((expWidth + sigWidth).W)
   val b = Bits((expWidth + sigWidth).W)
   val op = UInt(2.W)
   val roundingMode = UInt(3.W)
-}
-
-class Reference(expWidth: Int, sigWidth: Int) extends Bundle {
-  val out = UInt((expWidth + sigWidth).W)
-  val exceptionFlags = UInt(5.W)
+  val refOut = UInt((expWidth + sigWidth).W)
+  val refFlags = UInt(5.W)
 }
 
 
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 8e9b2b2..a7a721a 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -140,7 +140,8 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     val allCSourceFiles = Seq(
       "dpi.cc",
       "vbridge_impl.cc",
-      "vbridge_impl.h"
+      "vbridge_impl.h",
+      "encoding.h"
     ).map { f =>
       os.pwd / "tests" / "resources" / "csrc" / f
     }
diff --git a/tests/src/TestBench.scala b/tests/src/TestBench.scala
index 45bde9f..d135b62 100644
--- a/tests/src/TestBench.scala
+++ b/tests/src/TestBench.scala
@@ -16,18 +16,14 @@ class TestBench(expWidth: Int, sigWidth: Int) extends RawModule {
   clock := verificationModule.clock
   reset := verificationModule.reset
 
-  dut.io.input.bits.a := verificationModule.pokeDUT.bits.a
-  dut.io.input.bits.b := verificationModule.pokeDUT.bits.b
-  dut.io.input.bits.op := 0.U
-  dut.io.input.bits.roundingMode := verificationModule.pokeDUT.bits.roundingMode
-  dut.io.input.valid := verificationModule.pokeDUT.valid
-
-  dut.io.expected.out := verificationModule.pokeReference.out
-  dut.io.expected.exceptionFlags := verificationModule.pokeReference.exceptionFlags
-
-  verificationModule.ready := dut.io.input.ready
-
-
+  dut.io.input.bits.a             := verificationModule.toDUT.bits.a
+  dut.io.input.bits.b             := verificationModule.toDUT.bits.b
+  dut.io.input.bits.op            := verificationModule.toDUT.bits.op
+  dut.io.input.bits.roundingMode  := verificationModule.toDUT.bits.roundingMode
+  dut.io.input.bits.refOut        := verificationModule.toDUT.bits.refOut
+  dut.io.input.bits.refFlags      := verificationModule.toDUT.bits.refFlags
+  dut.io.input.valid              := verificationModule.toDUT.valid
+  verificationModule.toDUT.ready  := dut.io.input.ready
 
 }
 
diff --git a/tests/src/VerificationModule.scala b/tests/src/VerificationModule.scala
index 25eb116..165882e 100644
--- a/tests/src/VerificationModule.scala
+++ b/tests/src/VerificationModule.scala
@@ -14,19 +14,7 @@ class VerificationModule extends RawModule {
   val reset = IO(Output(Bool()))
 
 
-  val pokeDUT = IO(Valid(new DUTInput(8,24)))
-  val pokeReference  = IO(Output(new Reference(8,24)))
-
-  val ready = IO(Input(Bool()))
-
-
-  pokeDUT.bits.b := 0.U
-  pokeDUT.bits.op := 0.U
-  pokeDUT.bits.roundingMode := 0.U
-  pokeDUT.valid := true.B
-
-  pokeReference.out := 0.U
-  pokeReference.exceptionFlags := 0.U
+  val toDUT = IO(DecoupledIO(new DutInterface(8,24)))
 
   val verbatim = Module(new ExtModule with HasExtModuleInline {
     override val desiredName = "Verbatim"
@@ -94,7 +82,7 @@ class VerificationModule extends RawModule {
     )
   })
   dpiBasePoke.clock := verbatim.clock
-  pokeDUT.bits.a := dpiBasePoke.a
+  toDUT.bits.a := dpiBasePoke.a
 
   val dpiBasePeek = Module(new ExtModule with HasExtModuleInline {
     override val desiredName = "dpiBasePeek"
@@ -114,7 +102,14 @@ class VerificationModule extends RawModule {
     )
   })
   dpiBasePeek.clock := verbatim.clock
-  dpiBasePeek.ready := ready
+  dpiBasePeek.ready := toDUT.ready
+
+  toDUT.valid             := true.B
+  toDUT.bits.b            := 1.U
+  toDUT.bits.op           := 1.U
+  toDUT.bits.roundingMode := 1.U
+  toDUT.bits.refOut       := 1.U
+  toDUT.bits.refFlags     := 1.U
 
 
 }

From 6d25c7a8aba6f059ab48ccc33d05c574323fe6c7 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 5 Oct 2023 16:01:07 +0800
Subject: [PATCH 087/109] [tests] build dpiPeekPoke

---
 tests/resources/csrc/dpi.cc          | 24 +++++-----
 tests/resources/csrc/encoding.h      |  6 +--
 tests/resources/csrc/vbridge_impl.cc | 15 ++++++
 tests/resources/csrc/vbridge_impl.h  |  5 ++
 tests/src/DUT.scala                  | 42 ++++++++---------
 tests/src/TestBench.scala            | 16 +++----
 tests/src/VerificationModule.scala   | 68 +++++++++++++++++++++++++---
 7 files changed, 125 insertions(+), 51 deletions(-)

diff --git a/tests/resources/csrc/dpi.cc b/tests/resources/csrc/dpi.cc
index 212edb1..934bf16 100644
--- a/tests/resources/csrc/dpi.cc
+++ b/tests/resources/csrc/dpi.cc
@@ -9,6 +9,7 @@
 
 #include "svdpi.h"
 #include "vbridge_impl.h"
+#include "encoding.h"
 
 
 //void sigint_handler(int s) {
@@ -43,19 +44,20 @@ void dpiInitCosim() {
 }
 
 [[maybe_unused]] void dpiBasePeek(svBit ready) {
-  LOG(INFO) << fmt::format("ready = {}", ready);
+  vbridge_impl_instance.dpiBasePeek(ready);
 }
 
-//[[maybe_unused]] void dpiPeekPoke(svBit ready,
-//                 svBit *valid,
-//                 svBitVecVal *a,
-//                 svBitVecVal *b,
-//                 svBitVecVal *op,
-//                 svBitVecVal *rm,
-//                 svBitVecVal *refOut,
-//                 svBitVecVal *refFlags) {
-//  vbridge_impl_instance.dpiPeekPoke(DutInterface(ready, valid, a, b, op, rm, refOut, refFlags));
-//}
+[[maybe_unused]] void dpiPeekPoke(
+                 svBit *valid,
+                 svBitVecVal *a,
+                 svBitVecVal *b,
+                 svBitVecVal *op,
+                 svBitVecVal *rm,
+                 svBitVecVal *refOut,
+                 svBitVecVal *refFlags) {
+  vbridge_impl_instance.dpiPeekPoke(DutInterface{valid, a, b, op, rm, refOut, refFlags});
+
+}
 
 
 
diff --git a/tests/resources/csrc/encoding.h b/tests/resources/csrc/encoding.h
index dc78930..bbe8afc 100644
--- a/tests/resources/csrc/encoding.h
+++ b/tests/resources/csrc/encoding.h
@@ -1,11 +1,11 @@
-struct DutInterface{
-                    svBit ready;
+#pragma once
 
+struct DutInterface{
                     svBit *valid;
                     svBitVecVal *a;
                     svBitVecVal *b;
                     svBitVecVal *op;
                     svBitVecVal *rm;
                     svBitVecVal *refOut;
-                    svBitVecVal *refFlags
+                    svBitVecVal *refFlags;
 };
\ No newline at end of file
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index d419e84..50596a6 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -41,6 +41,21 @@ void VBridgeImpl::dpiBasePoke(svBitVecVal *a) {
   *a = v;
 }
 
+void VBridgeImpl::dpiBasePeek(svBit ready) {
+    LOG(INFO) << fmt::format("dpiPeek running = {}", ready);
+}
+
+void VBridgeImpl::dpiPeekPoke(const DutInterface &toDut) {
+  uint32_t v = 0x1000;
+  *toDut.a = v;
+  *toDut.b = v;
+  *toDut.op = 0;
+  *toDut.rm = 0;
+  *toDut.refOut = v;
+  *toDut.refFlags = v;
+  *toDut.valid = 0;
+}
+
 
 
 VBridgeImpl vbridge_impl_instance;
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index 93c9e87..0ffafe7 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -9,6 +9,7 @@
 
 #include <svdpi.h>
 
+#include "encoding.h"
 
 
 class VBridgeImpl {
@@ -27,6 +28,10 @@ class VBridgeImpl {
 
     static void dpiBasePoke(uint32_t *a);
 
+    void dpiPeekPoke(const DutInterface &toDut);
+
+    static void dpiBasePeek(svBit ready);
+
 
 
 
diff --git a/tests/src/DUT.scala b/tests/src/DUT.scala
index c3f862f..e28ced7 100644
--- a/tests/src/DUT.scala
+++ b/tests/src/DUT.scala
@@ -12,44 +12,42 @@ import float._
   *
   * */
 class DUT(expWidth:Int, sigWidth:Int) extends Module {
+    val input = IO(Flipped(Decoupled(new DutInterface(expWidth, sigWidth))))
 
-  val io = IO(new Bundle {
-    val input = Flipped(Decoupled(new DutInterface(expWidth, sigWidth)))
-
-    val actual = new Bundle {
+    val actual = IO(new Bundle {
       val out = Output(Bits((expWidth + sigWidth).W))
       val exceptionFlags = Output(Bits(5.W))
-    }
+    })
+
+    val check = IO(Output(Bool()))
+    val pass = IO(Output(Bool()))
 
-    val check = Output(Bool())
-    val pass = Output(Bool())
-  })
 
   val ds = Module(new DivSqrt(expWidth: Int, sigWidth: Int))
-  ds.input.valid := io.input.valid
-  ds.input.bits.sqrt := io.input.valid
-  ds.input.bits.a := io.input.bits.a
-  ds.input.bits.b := io.input.bits.b
-  ds.input.bits.roundingMode := io.input.bits.roundingMode
+  ds.input.valid :=  input.valid
+  ds.input.bits.sqrt :=  input.valid
+  ds.input.bits.a :=  input.bits.a
+  ds.input.bits.b :=  input.bits.b
+  ds.input.bits.roundingMode :=  input.bits.roundingMode
   /** @todo */
-  io.input.ready := ds.input.ready
+   input.ready := ds.input.ready
 
   // collect result
-  io.actual.out := ds.output.bits.result
-  io.actual.exceptionFlags := ds.output.bits.exceptionFlags
+   actual.out := ds.output.bits.result
+   actual.exceptionFlags := ds.output.bits.exceptionFlags
 
 
-  val resultError = io.actual.out =/= io.input.bits.refOut
-  val flagError = io.actual.exceptionFlags =/= io.input.bits.refFlags
+  val resultError =  actual.out =/=  input.bits.refOut
+  val flagError =  actual.exceptionFlags =/=  input.bits.refFlags
 
-  io.check := ds.output.valid
-  io.pass := !(ds.output.valid && (resultError || flagError))
+   check := ds.output.valid
+   pass := !(ds.output.valid && (resultError || flagError))
 
 }
 
 class DutInterface(expWidth: Int, sigWidth: Int) extends Bundle {
-  val a = Bits((expWidth + sigWidth).W)
-  val b = Bits((expWidth + sigWidth).W)
+  val a = UInt((expWidth + sigWidth).W)
+  val b = UInt((expWidth + sigWidth).W)
   val op = UInt(2.W)
   val roundingMode = UInt(3.W)
   val refOut = UInt((expWidth + sigWidth).W)
diff --git a/tests/src/TestBench.scala b/tests/src/TestBench.scala
index d135b62..c83f92d 100644
--- a/tests/src/TestBench.scala
+++ b/tests/src/TestBench.scala
@@ -16,14 +16,14 @@ class TestBench(expWidth: Int, sigWidth: Int) extends RawModule {
   clock := verificationModule.clock
   reset := verificationModule.reset
 
-  dut.io.input.bits.a             := verificationModule.toDUT.bits.a
-  dut.io.input.bits.b             := verificationModule.toDUT.bits.b
-  dut.io.input.bits.op            := verificationModule.toDUT.bits.op
-  dut.io.input.bits.roundingMode  := verificationModule.toDUT.bits.roundingMode
-  dut.io.input.bits.refOut        := verificationModule.toDUT.bits.refOut
-  dut.io.input.bits.refFlags      := verificationModule.toDUT.bits.refFlags
-  dut.io.input.valid              := verificationModule.toDUT.valid
-  verificationModule.toDUT.ready  := dut.io.input.ready
+  dut.input.bits.a             := verificationModule.toDUT.bits.a
+  dut.input.bits.b             := verificationModule.toDUT.bits.b
+  dut.input.bits.op            := verificationModule.toDUT.bits.op
+  dut.input.bits.roundingMode  := verificationModule.toDUT.bits.roundingMode
+  dut.input.bits.refOut        := verificationModule.toDUT.bits.refOut
+  dut.input.bits.refFlags      := verificationModule.toDUT.bits.refFlags
+  dut.input.valid              := verificationModule.toDUT.valid
+  verificationModule.toDUT.ready  := dut.input.ready
 
 }
 
diff --git a/tests/src/VerificationModule.scala b/tests/src/VerificationModule.scala
index 165882e..81f4e74 100644
--- a/tests/src/VerificationModule.scala
+++ b/tests/src/VerificationModule.scala
@@ -10,6 +10,8 @@ class VerificationModule extends RawModule {
 
   val clockRate = 2
 
+  val latPeek = 2
+
   val clock = IO(Output(Clock()))
   val reset = IO(Output(Bool()))
 
@@ -82,7 +84,7 @@ class VerificationModule extends RawModule {
     )
   })
   dpiBasePoke.clock := verbatim.clock
-  toDUT.bits.a := dpiBasePoke.a
+//  toDUT.bits.a := dpiBasePoke.a
 
   val dpiBasePeek = Module(new ExtModule with HasExtModuleInline {
     override val desiredName = "dpiBasePeek"
@@ -104,12 +106,64 @@ class VerificationModule extends RawModule {
   dpiBasePeek.clock := verbatim.clock
   dpiBasePeek.ready := toDUT.ready
 
-  toDUT.valid             := true.B
-  toDUT.bits.b            := 1.U
-  toDUT.bits.op           := 1.U
-  toDUT.bits.roundingMode := 1.U
-  toDUT.bits.refOut       := 1.U
-  toDUT.bits.refFlags     := 1.U
+  val dpiPeekPoke = Module(new ExtModule with HasExtModuleInline {
+    override val desiredName = "dpiPeekPoke"
+    val clock = IO(Input(Clock()))
+    val a = IO(Output(UInt(32.W)))
+    val b = IO(Output(UInt(32.W)))
+    val op = IO(Output(UInt(2.W)))
+    val rm = IO(Output(UInt(3.W)))
+    val refOut = IO(Output(UInt(32.W)))
+    val refFlags = IO(Output(UInt(5.W)))
+    val valid = IO(Output(Bool()))
+    setInline(
+      s"$desiredName.sv",
+      s"""module $desiredName(
+         |  output clock,
+         |  output valid,
+         |  output [31:0] a,
+         |  output [31:0] b,
+         |  output [1:0] op,
+         |  output [2:0] rm,
+         |  output [31:0] refOut,
+         |  output [4:0]  refFlags
+         |);
+         |
+         |  import "DPI-C" function void $desiredName(
+         |  output bit valid,
+         |  output bit[31:0] a,
+         |  output bit[31:0] b,
+         |  output bit[1:0]  op,
+         |  output bit[2:0]  rm,
+         |  output bit[31:0] refOut,
+         |  output bit[4:0]  refFlags
+         |  );
+         |
+         |  always @ (negedge clock) $desiredName(
+         |  valid,
+         |  a,
+         |  b,
+         |  op,
+         |  rm,
+         |  refOut,
+         |  refFlags);
+         |
+         |
+         |
+         |endmodule
+         |""".stripMargin
+    )
+  })
+  dpiPeekPoke.clock       := verbatim.clock
+  toDUT.valid             := dpiPeekPoke.valid
+  toDUT.bits.a            := dpiPeekPoke.a
+  toDUT.bits.b            := dpiPeekPoke.b
+  toDUT.bits.op           := dpiPeekPoke.op
+  toDUT.bits.roundingMode := dpiPeekPoke.rm
+  toDUT.bits.refOut       := dpiPeekPoke.refOut
+  toDUT.bits.refFlags     := dpiPeekPoke.refFlags
+
+
 
 
 }

From 086edd28ebe335e1448d84db4a183ce1db096c02 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 5 Oct 2023 17:07:43 +0800
Subject: [PATCH 088/109] [submodule] add submodule

---
 .gitmodules            |  6 ++++++
 Makefile               | 10 ++++++++++
 berkeley-softfloat-3   |  1 +
 berkeley-testfloat-3   |  1 +
 tests/src/Ftests.scala | 18 +++++++++++++++---
 5 files changed, 33 insertions(+), 3 deletions(-)
 create mode 160000 berkeley-softfloat-3
 create mode 160000 berkeley-testfloat-3

diff --git a/.gitmodules b/.gitmodules
index ffaed5c..3ad38ca 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,9 @@
 [submodule "dependencies/chisel"]
 	path = dependencies/chisel
 	url = git@github.com:chipsalliance/chisel.git
+[submodule "berkeley-testfloat-3"]
+	path = berkeley-testfloat-3
+	url = git@github.com:cyyself/berkeley-testfloat-3.git
+[submodule "berkeley-softfloat-3"]
+	path = berkeley-softfloat-3
+	url = git@github.com:cyyself/berkeley-softfloat-3.git
diff --git a/Makefile b/Makefile
index 6463eae..44cfedb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,5 @@
+
+
 init:
 	git submodule update --init
 
@@ -16,3 +18,11 @@ bsp:
 clean:
 	git clean -fd
 
+softfloat:
+	make -C berkeley-softfloat-3/build/Linux-x86_64-GCC TESTFLOAT_OPTS="-DFLOAT64 -DFLOAT_ROUND_ODD" softfloat.a -j `nproc`
+	cp berkeley-softfloat-3/build/Linux-x86_64-GCC/softfloat.a run/
+
+testfloat:
+	make -C berkeley-testfloat-3/build/Linux-x86_64-GCC TESTFLOAT_OPTS="-DFLOAT64 -DFLOAT_ROUND_ODD" testfloat.a -j `nproc`
+	cp berkeley-testfloat-3/build/Linux-x86_64-GCC/testfloat.a run/
+
diff --git a/berkeley-softfloat-3 b/berkeley-softfloat-3
new file mode 160000
index 0000000..134f555
--- /dev/null
+++ b/berkeley-softfloat-3
@@ -0,0 +1 @@
+Subproject commit 134f55559754d1c184a5a7955c1fa7a8b99ccc40
diff --git a/berkeley-testfloat-3 b/berkeley-testfloat-3
new file mode 160000
index 0000000..8ffedb5
--- /dev/null
+++ b/berkeley-testfloat-3
@@ -0,0 +1 @@
+Subproject commit 8ffedb5134487652825873098fd22f48a16ebc35
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index a7a721a..d6194e0 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -88,9 +88,15 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     os.makeDir.all(emulatorBuildDir)
 
 
-//    os.remove(rtlDir / "dut.sv")
-//    os.write(rtlDir / "dut.sv", chisel3.getVerilogString(new VerificationModule))
+    os.proc(
+      "make",
+      "softfloat",
+    ).call()
 
+    os.proc(
+      "make",
+      "testfloat",
+    ).call()
 
 
     val annos: AnnotationSeq = Seq(
@@ -180,9 +186,15 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
          |
          |add_executable(emulator
          |${allCSourceFiles.mkString("\n")}
+         |${runDir}/testfloat.a
+         |${runDir}/softfloat.a
          |)
          |
-         |target_include_directories(emulator PUBLIC $emulatorCHeader)
+         |target_include_directories(emulator PUBLIC
+         |$emulatorCHeader
+         |./berkeley-testfloat-3/source/
+         |./berkeley-softfloat-3/source/include/
+         |)
          |
          |target_link_libraries(emulator PUBLIC $${CMAKE_THREAD_LIBS_INIT})
          |target_link_libraries(emulator PUBLIC  fmt::fmt glog::glog )  # note that libargs is header only, nothing to link

From 0c76d92807a2029a72a3b2e2885952e39b1a4270 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 5 Oct 2023 19:30:01 +0800
Subject: [PATCH 089/109] [tests] link softfloat and testfloat

---
 tests/resources/csrc/encoding.h     |  2 +-
 tests/resources/csrc/vbridge_impl.h | 20 ++++++++++++++++++++
 tests/src/Ftests.scala              | 13 ++++++++-----
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/tests/resources/csrc/encoding.h b/tests/resources/csrc/encoding.h
index bbe8afc..8cd946a 100644
--- a/tests/resources/csrc/encoding.h
+++ b/tests/resources/csrc/encoding.h
@@ -8,4 +8,4 @@ struct DutInterface{
                     svBitVecVal *rm;
                     svBitVecVal *refOut;
                     svBitVecVal *refFlags;
-};
\ No newline at end of file
+};
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index 0ffafe7..89c2b28 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -11,6 +11,26 @@
 
 #include "encoding.h"
 
+#include <cstdio>
+#include <cassert>
+#include <cstdint>
+
+extern "C" {
+#include "functions.h"
+#include "softfloat.h"
+#include "genCases.h"
+#include "genLoops.h"
+}
+
+struct testdata {
+    uint64_t a;
+    uint64_t b;
+    uint64_t expected_out;
+    function_t function;
+    roundingMode_t roundingMode;
+    exceptionFlag_t expectedException;
+};
+
 
 class VBridgeImpl {
 public:
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index d6194e0..fcbf18a 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -186,18 +186,21 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
          |
          |add_executable(emulator
          |${allCSourceFiles.mkString("\n")}
-         |${runDir}/testfloat.a
-         |${runDir}/softfloat.a
          |)
          |
          |target_include_directories(emulator PUBLIC
          |$emulatorCHeader
-         |./berkeley-testfloat-3/source/
-         |./berkeley-softfloat-3/source/include/
+         |/home/yyq/Projects/arithmetic/berkeley-testfloat-3/source
+         |/home/yyq/Projects/arithmetic/berkeley-softfloat-3/source/include
          |)
          |
          |target_link_libraries(emulator PUBLIC $${CMAKE_THREAD_LIBS_INIT})
-         |target_link_libraries(emulator PUBLIC  fmt::fmt glog::glog )  # note that libargs is header only, nothing to link
+         |target_link_libraries(emulator PUBLIC
+         |fmt::fmt
+         |glog::glog
+         |/home/yyq/Projects/arithmetic/run/softfloat.a
+         |/home/yyq/Projects/arithmetic/run/testfloat.a
+         |)  # note that libargs is header only, nothing to link
          |target_compile_definitions(emulator PRIVATE COSIM_VERILATOR)
          |
          |verilate(emulator

From ed13f23fbc19e8680a3374807131c2a8b820e504 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 5 Oct 2023 20:15:40 +0800
Subject: [PATCH 090/109] [tests] add test generator

---
 flake.nix                            |   1 +
 tests/resources/csrc/vbridge_impl.cc | 103 ++++++++++++++++++++++++---
 tests/resources/csrc/vbridge_impl.h  |  14 +++-
 tests/src/DUT.scala                  |  42 +++++------
 4 files changed, 128 insertions(+), 32 deletions(-)

diff --git a/flake.nix b/flake.nix
index 8ec0c8f..c06e2f0 100644
--- a/flake.nix
+++ b/flake.nix
@@ -25,6 +25,7 @@
             glog
             fmt
             zlib
+            ninja
           ];
         in
         {
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index 50596a6..32a1b0d 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -23,16 +23,25 @@ int VBridgeImpl::timeoutCheck() {
   return 0;
 }
 
+void VBridgeImpl::set_available() {
+  available = true;
+}
+
+void VBridgeImpl::clr_available() {
+  available = false;
+}
+
 void VBridgeImpl::dpiInitCosim() {
   google::InitGoogleLogging("emulator");
   FLAGS_logtostderr = true;
 
   ctx = Verilated::threadContextp();
 
-
   LOG(INFO) << fmt::format("[{}] dpiInitCosim", getCycle());
   LOG(INFO) << fmt::format(" running");
 
+  initTestCases();
+
   dpiDumpWave();
 }
 
@@ -42,18 +51,94 @@ void VBridgeImpl::dpiBasePoke(svBitVecVal *a) {
 }
 
 void VBridgeImpl::dpiBasePeek(svBit ready) {
-    LOG(INFO) << fmt::format("dpiPeek running = {}", ready);
+
+    if(ready == 1) {
+      set_available();
+      LOG(INFO) << fmt::format("available = {}",available);
+    }
+
+
 }
 
 void VBridgeImpl::dpiPeekPoke(const DutInterface &toDut) {
-  uint32_t v = 0x1000;
-  *toDut.a = v;
-  *toDut.b = v;
+  if(available==false) return;
+
+  LOG(INFO) << fmt::format("start to poke");
+
+  *toDut.a = test_queue.front().a;
+  *toDut.b = test_queue.front().b;
   *toDut.op = 0;
-  *toDut.rm = 0;
-  *toDut.refOut = v;
-  *toDut.refFlags = v;
-  *toDut.valid = 0;
+  *toDut.rm = test_queue.front().roundingMode;
+  *toDut.refOut = test_queue.front().expected_out;
+  *toDut.refFlags = test_queue.front().expectedException;
+  *toDut.valid = true;
+
+  test_queue.pop();
+}
+
+std::vector<testdata> mygen_abz_f32( float32_t trueFunction( float32_t, float32_t ) , function_t function, roundingMode_t roundingMode) {
+  // modified from berkeley-testfloat-3/source/genLoops.c
+  union ui32_f32 { uint32_t ui; float32_t f; } u;
+  uint_fast8_t trueFlags;
+
+  std::vector<testdata> res;
+
+  genCases_f32_ab_init();
+  while ( ! genCases_done ) {
+    genCases_f32_ab_next();
+
+    testdata curData;
+    curData.function = function;
+    curData.roundingMode = roundingMode;
+    u.f = genCases_f32_a;
+    curData.a = u.ui;
+    u.f = genCases_f32_b;
+    curData.b = u.ui;
+    softfloat_exceptionFlags = 0;
+    u.f = trueFunction( genCases_f32_a, genCases_f32_b );
+    curData.expectedException = static_cast<exceptionFlag_t>(softfloat_exceptionFlags);
+    curData.expected_out = u.ui;
+
+    res.push_back(curData);
+  }
+
+  return res;
+}
+
+
+std::vector<testdata> genTestCase(function_t function, roundingMode_t roundingMode) { // call it in dpiInit
+  // see berkeley-testfloat-3/source/testfloat_gen.c
+  std::vector<testdata> res;
+
+  genCases_setLevel( 1 );
+
+  switch (function) {
+    case F32_DIV:
+      res = mygen_abz_f32(f32_add, function, roundingMode);
+      break;
+    default:
+      assert(false);
+  }
+
+  return res;
+}
+
+void outputTestCases(std::vector<testdata> cases) {
+  for (auto x : cases) {
+    printf("%08x %08x %08x %02x\n", x.a, x.b, x.expected_out, x.expectedException);
+  }
+}
+
+void fillTestQueue(std::vector<testdata> cases) {
+  for (auto x : cases) {
+    vbridge_impl_instance.test_queue.push(x);
+  }
+}
+
+void VBridgeImpl::initTestCases() {
+  auto res = genTestCase(F32_DIV, ROUND_NEAR_EVEN);
+  fillTestQueue(res);
+//  outputTestCases(res); // TODO: demo, please delete
 }
 
 
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index 89c2b28..1df45f0 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -46,13 +46,19 @@ class VBridgeImpl {
 
     uint64_t getCycle() { return ctx->time(); }
 
-    static void dpiBasePoke(uint32_t *a);
+    void dpiBasePoke(uint32_t *a);
 
     void dpiPeekPoke(const DutInterface &toDut);
 
-    static void dpiBasePeek(svBit ready);
+    void dpiBasePeek(svBit ready);
 
+    std::queue <testdata> test_queue;
 
+    void initTestCases();
+
+    void set_available();
+
+    void clr_available();
 
 
 private:
@@ -65,6 +71,10 @@ class VBridgeImpl {
 
     const std::string wave = "/home/yyq/Projects/arithmetic/run/wave";
 
+    bool available;
+
+
+
 
 
 
diff --git a/tests/src/DUT.scala b/tests/src/DUT.scala
index e28ced7..4e3a4ac 100644
--- a/tests/src/DUT.scala
+++ b/tests/src/DUT.scala
@@ -11,37 +11,37 @@ import float._
   * in
   *
   * */
-class DUT(expWidth:Int, sigWidth:Int) extends Module {
-    val input = IO(Flipped(Decoupled(new DutInterface(expWidth, sigWidth))))
+class DUT(expWidth: Int, sigWidth: Int) extends Module {
+  val input = IO(Flipped(Decoupled(new DutInterface(expWidth, sigWidth))))
 
-    val actual = IO(new Bundle {
-      val out = Output(Bits((expWidth + sigWidth).W))
-      val exceptionFlags = Output(Bits(5.W))
-    })
+  val actual = IO(new Bundle {
+    val out = Output(Bits((expWidth + sigWidth).W))
+    val exceptionFlags = Output(Bits(5.W))
+  })
 
-    val check = IO(Output(Bool()))
-    val pass = IO(Output(Bool()))
+  val check = IO(Output(Bool()))
+  val pass = IO(Output(Bool()))
 
 
   val ds = Module(new DivSqrt(expWidth: Int, sigWidth: Int))
-  ds.input.valid :=  input.valid
-  ds.input.bits.sqrt :=  input.valid
-  ds.input.bits.a :=  input.bits.a
-  ds.input.bits.b :=  input.bits.b
-  ds.input.bits.roundingMode :=  input.bits.roundingMode
-  /** @todo */
-   input.ready := ds.input.ready
+  ds.input.valid := input.valid
+  ds.input.bits.sqrt := input.valid
+  ds.input.bits.a := input.bits.a
+  ds.input.bits.b := input.bits.b
+  ds.input.bits.roundingMode := input.bits.roundingMode
+
+  input.ready := ds.input.ready
 
   // collect result
-   actual.out := ds.output.bits.result
-   actual.exceptionFlags := ds.output.bits.exceptionFlags
+  actual.out := ds.output.bits.result
+  actual.exceptionFlags := ds.output.bits.exceptionFlags
 
 
-  val resultError =  actual.out =/=  input.bits.refOut
-  val flagError =  actual.exceptionFlags =/=  input.bits.refFlags
+  val resultError = actual.out =/= input.bits.refOut
+  val flagError = actual.exceptionFlags =/= input.bits.refFlags
 
-   check := ds.output.valid
-   pass := !(ds.output.valid && (resultError || flagError))
+  check := ds.output.valid
+  pass := !(ds.output.valid && (resultError || flagError))
 
 }
 

From 3d0f2f7f79021839d350d6c25513e0ae1282d6a9 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 5 Oct 2023 21:05:25 +0800
Subject: [PATCH 091/109] [tests] opt test io

---
 tests/src/DUT.scala                | 4 ++--
 tests/src/Ftests.scala             | 7 ++-----
 tests/src/TestBench.scala          | 4 ++++
 tests/src/VerificationModule.scala | 8 +++++++-
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/tests/src/DUT.scala b/tests/src/DUT.scala
index 4e3a4ac..1154c3d 100644
--- a/tests/src/DUT.scala
+++ b/tests/src/DUT.scala
@@ -12,7 +12,7 @@ import float._
   *
   * */
 class DUT(expWidth: Int, sigWidth: Int) extends Module {
-  val input = IO(Flipped(Decoupled(new DutInterface(expWidth, sigWidth))))
+  val input = IO(Flipped(Decoupled(new DutPoke(expWidth, sigWidth))))
 
   val actual = IO(new Bundle {
     val out = Output(Bits((expWidth + sigWidth).W))
@@ -45,7 +45,7 @@ class DUT(expWidth: Int, sigWidth: Int) extends Module {
 
 }
 
-class DutInterface(expWidth: Int, sigWidth: Int) extends Bundle {
+class DutPoke(expWidth: Int, sigWidth: Int) extends Bundle {
   val a = UInt((expWidth + sigWidth).W)
   val b = UInt((expWidth + sigWidth).W)
   val op = UInt(2.W)
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index fcbf18a..6edab82 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -123,12 +123,9 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
       "firtool",
       elaborateDir / s"$topName.fir", s"--annotation-file=${elaborateDir / s"$topName.anno.json"}",
       "-dedup",
-      "-O=release",
-      "--disable-all-randomization",
+      "-O=debug",
       "--split-verilog",
-      "--preserve-values=none",
-      "--preserve-aggregate=all",
-      "--strip-debug-info",
+      "--preserve-values=named",
       s"-o=$rtlDir"
     ).call()
     val verilogs = os.read.lines(rtlDir / "filelist.f")
diff --git a/tests/src/TestBench.scala b/tests/src/TestBench.scala
index c83f92d..305846a 100644
--- a/tests/src/TestBench.scala
+++ b/tests/src/TestBench.scala
@@ -24,6 +24,10 @@ class TestBench(expWidth: Int, sigWidth: Int) extends RawModule {
   dut.input.bits.refFlags      := verificationModule.toDUT.bits.refFlags
   dut.input.valid              := verificationModule.toDUT.valid
   verificationModule.toDUT.ready  := dut.input.ready
+  verificationModule.check := dut.check
+  verificationModule.pass  := dut.pass
+  verificationModule.result := dut.actual.out
+  verificationModule.fflags := dut.actual.exceptionFlags
 
 }
 
diff --git a/tests/src/VerificationModule.scala b/tests/src/VerificationModule.scala
index 81f4e74..1045a03 100644
--- a/tests/src/VerificationModule.scala
+++ b/tests/src/VerificationModule.scala
@@ -16,7 +16,13 @@ class VerificationModule extends RawModule {
   val reset = IO(Output(Bool()))
 
 
-  val toDUT = IO(DecoupledIO(new DutInterface(8,24)))
+  val toDUT = IO(DecoupledIO(new DutPoke(8,24)))
+  val check = IO(Input(Bool()))
+  val pass = IO(Input(Bool()))
+
+  val result = IO(Input(UInt(32.W)))
+  val fflags = IO(Input(UInt(32.W)))
+
 
   val verbatim = Module(new ExtModule with HasExtModuleInline {
     override val desiredName = "Verbatim"

From 35e88a0210c3ee8f07b41523c04a4bac28a9817a Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Thu, 5 Oct 2023 22:01:40 +0800
Subject: [PATCH 092/109] [tests] build dpiCheck

---
 tests/resources/csrc/dpi.cc          | 14 +++--
 tests/resources/csrc/encoding.h      |  2 -
 tests/resources/csrc/vbridge_impl.cc | 70 +++++++++++++++++-----
 tests/resources/csrc/vbridge_impl.h  |  8 +++
 tests/src/DUT.scala                  | 31 ++++------
 tests/src/TestBench.scala            | 22 ++++---
 tests/src/VerificationModule.scala   | 86 ++++++++++++++--------------
 7 files changed, 139 insertions(+), 94 deletions(-)

diff --git a/tests/resources/csrc/dpi.cc b/tests/resources/csrc/dpi.cc
index 934bf16..4c5cf78 100644
--- a/tests/resources/csrc/dpi.cc
+++ b/tests/resources/csrc/dpi.cc
@@ -52,13 +52,19 @@ void dpiInitCosim() {
                  svBitVecVal *a,
                  svBitVecVal *b,
                  svBitVecVal *op,
-                 svBitVecVal *rm,
-                 svBitVecVal *refOut,
-                 svBitVecVal *refFlags) {
-  vbridge_impl_instance.dpiPeekPoke(DutInterface{valid, a, b, op, rm, refOut, refFlags});
+                 svBitVecVal *rm) {
+  vbridge_impl_instance.dpiPeekPoke(DutInterface{valid, a, b, op, rm});
 
 }
 
+[[maybe_unused]] void dpiCheck(
+            svBit valid,
+            const svBitVecVal *result,
+            const svBitVecVal *fflags) {
+
+   vbridge_impl_instance.dpiCheck(valid, *result, *fflags);
+}
+
 
 
 
diff --git a/tests/resources/csrc/encoding.h b/tests/resources/csrc/encoding.h
index 8cd946a..421219c 100644
--- a/tests/resources/csrc/encoding.h
+++ b/tests/resources/csrc/encoding.h
@@ -6,6 +6,4 @@ struct DutInterface{
                     svBitVecVal *b;
                     svBitVecVal *op;
                     svBitVecVal *rm;
-                    svBitVecVal *refOut;
-                    svBitVecVal *refFlags;
 };
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index 32a1b0d..640cf46 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -7,7 +7,7 @@
 
 
 
-VBridgeImpl::VBridgeImpl() : _cycles(100) {}
+VBridgeImpl::VBridgeImpl() : _cycles(10000) {}
 
 
 uint64_t VBridgeImpl::get_t() {
@@ -38,10 +38,11 @@ void VBridgeImpl::dpiInitCosim() {
   ctx = Verilated::threadContextp();
 
   LOG(INFO) << fmt::format("[{}] dpiInitCosim", getCycle());
-  LOG(INFO) << fmt::format(" running");
 
   initTestCases();
 
+  reloadcase();
+
   dpiDumpWave();
 }
 
@@ -54,7 +55,7 @@ void VBridgeImpl::dpiBasePeek(svBit ready) {
 
     if(ready == 1) {
       set_available();
-      LOG(INFO) << fmt::format("available = {}",available);
+//      LOG(INFO) << fmt::format("available = {}",available);
     }
 
 
@@ -63,17 +64,35 @@ void VBridgeImpl::dpiBasePeek(svBit ready) {
 void VBridgeImpl::dpiPeekPoke(const DutInterface &toDut) {
   if(available==false) return;
 
-  LOG(INFO) << fmt::format("start to poke");
 
-  *toDut.a = test_queue.front().a;
-  *toDut.b = test_queue.front().b;
+  *toDut.a = testcase.a;
+  *toDut.b = testcase.b;
   *toDut.op = 0;
-  *toDut.rm = test_queue.front().roundingMode;
-  *toDut.refOut = test_queue.front().expected_out;
-  *toDut.refFlags = test_queue.front().expectedException;
+  *toDut.rm = 0;
   *toDut.valid = true;
 
-  test_queue.pop();
+
+
+}
+
+void VBridgeImpl::dpiCheck(svBit valid, svBitVecVal result, svBitVecVal fflags) {
+  if(valid == 0) return;
+  LOG(INFO) << fmt::format("check");
+  if((result == testcase.expected_out) && (fflags == testcase.expectedException))
+    reloadcase();
+  else
+  {
+    LOG(INFO) << fmt::format("error");
+    LOG(INFO) << fmt::format("a = {:08X} \n", testcase.a);
+    LOG(INFO) << fmt::format("b = {:08X} \n", testcase.b);
+    LOG(INFO) << fmt::format("dut_result = {:08X} \n" , result);
+    LOG(INFO) << fmt::format("ref_result = {:08X} \n",testcase.expected_out);
+    LOG(INFO) << fmt::format("dut_flags = {:X} \n",fflags);
+    LOG(INFO) << fmt::format("ref_flags = {:X} \n",(int)testcase.expectedException);
+    dpiFinish();
+
+  }
+
 }
 
 std::vector<testdata> mygen_abz_f32( float32_t trueFunction( float32_t, float32_t ) , function_t function, roundingMode_t roundingMode) {
@@ -114,7 +133,7 @@ std::vector<testdata> genTestCase(function_t function, roundingMode_t roundingMo
 
   switch (function) {
     case F32_DIV:
-      res = mygen_abz_f32(f32_add, function, roundingMode);
+      res = mygen_abz_f32(f32_div, function, roundingMode);
       break;
     default:
       assert(false);
@@ -125,20 +144,45 @@ std::vector<testdata> genTestCase(function_t function, roundingMode_t roundingMo
 
 void outputTestCases(std::vector<testdata> cases) {
   for (auto x : cases) {
-    printf("%08x %08x %08x %02x\n", x.a, x.b, x.expected_out, x.expectedException);
+//    printf("%08x %08x %08x %02x\n", x.a, x.b, x.expected_out, x.expectedException);
   }
 }
 
 void fillTestQueue(std::vector<testdata> cases) {
   for (auto x : cases) {
     vbridge_impl_instance.test_queue.push(x);
+//    LOG(INFO) << fmt::format("queue = {}  {}",vbridge_impl_instance.test_queue.back().a, vbridge_impl_instance.test_queue.back().b);
   }
 }
 
+
 void VBridgeImpl::initTestCases() {
   auto res = genTestCase(F32_DIV, ROUND_NEAR_EVEN);
   fillTestQueue(res);
-//  outputTestCases(res); // TODO: demo, please delete
+  outputTestCases(res); // TODO: demo, please delete
+
+
+}
+
+void VBridgeImpl::reloadcase() {
+
+
+
+  testcase.a = test_queue.front().a;
+  testcase.b = test_queue.front().b;
+  testcase.expected_out = test_queue.front().expected_out;
+  testcase.expectedException = test_queue.front().expectedException;
+//  printf("%08x %08x %08x\n", test_vector[1].a, test_vector[1].b, test_vector[1].expected_out);
+//  LOG(INFO) << fmt::format("a = {:08X} \n", test_vector[0].a);
+//  LOG(INFO) << fmt::format("b = {:08X} \n", test_vector[0].b);
+//  LOG(INFO) << fmt::format("a = {:08X} \n", testcase.a);
+//  LOG(INFO) << fmt::format("b = {:08X} \n", testcase.b);
+//  LOG(INFO) << fmt::format("ref_result = {:08X} \n",testcase.expected_out);
+//  LOG(INFO) << fmt::format("reload");
+
+
+  test_queue.pop();
+
 }
 
 
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index 1df45f0..a8e7e86 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -54,12 +54,20 @@ class VBridgeImpl {
 
     std::queue <testdata> test_queue;
 
+    testdata testcase;
+
     void initTestCases();
 
+    void dpiCheck(svBit valid,
+                  svBitVecVal result,
+                  svBitVecVal fflags);
+
     void set_available();
 
     void clr_available();
 
+    void reloadcase();
+
 
 private:
 
diff --git a/tests/src/DUT.scala b/tests/src/DUT.scala
index 1154c3d..0c2d0d1 100644
--- a/tests/src/DUT.scala
+++ b/tests/src/DUT.scala
@@ -13,35 +13,21 @@ import float._
   * */
 class DUT(expWidth: Int, sigWidth: Int) extends Module {
   val input = IO(Flipped(Decoupled(new DutPoke(expWidth, sigWidth))))
-
-  val actual = IO(new Bundle {
-    val out = Output(Bits((expWidth + sigWidth).W))
-    val exceptionFlags = Output(Bits(5.W))
-  })
-
-  val check = IO(Output(Bool()))
-  val pass = IO(Output(Bool()))
-
+  val output = IO(Valid(new DutPeek(expWidth, sigWidth)))
 
   val ds = Module(new DivSqrt(expWidth: Int, sigWidth: Int))
   ds.input.valid := input.valid
-  ds.input.bits.sqrt := input.valid
+  ds.input.bits.sqrt := input.bits.op
   ds.input.bits.a := input.bits.a
   ds.input.bits.b := input.bits.b
   ds.input.bits.roundingMode := input.bits.roundingMode
 
   input.ready := ds.input.ready
 
-  // collect result
-  actual.out := ds.output.bits.result
-  actual.exceptionFlags := ds.output.bits.exceptionFlags
-
+  output.bits.result := ds.output.bits.result
+  output.bits.fflags := ds.output.bits.exceptionFlags
+  output.valid := ds.output.valid
 
-  val resultError = actual.out =/= input.bits.refOut
-  val flagError = actual.exceptionFlags =/= input.bits.refFlags
-
-  check := ds.output.valid
-  pass := !(ds.output.valid && (resultError || flagError))
 
 }
 
@@ -50,8 +36,11 @@ class DutPoke(expWidth: Int, sigWidth: Int) extends Bundle {
   val b = UInt((expWidth + sigWidth).W)
   val op = UInt(2.W)
   val roundingMode = UInt(3.W)
-  val refOut = UInt((expWidth + sigWidth).W)
-  val refFlags = UInt(5.W)
+}
+
+class DutPeek(expWidth: Int, sigWidth: Int) extends Bundle {
+  val result = UInt((expWidth + sigWidth).W)
+  val fflags = UInt(5.W)
 }
 
 
diff --git a/tests/src/TestBench.scala b/tests/src/TestBench.scala
index 305846a..b8a9cf6 100644
--- a/tests/src/TestBench.scala
+++ b/tests/src/TestBench.scala
@@ -16,18 +16,16 @@ class TestBench(expWidth: Int, sigWidth: Int) extends RawModule {
   clock := verificationModule.clock
   reset := verificationModule.reset
 
-  dut.input.bits.a             := verificationModule.toDUT.bits.a
-  dut.input.bits.b             := verificationModule.toDUT.bits.b
-  dut.input.bits.op            := verificationModule.toDUT.bits.op
-  dut.input.bits.roundingMode  := verificationModule.toDUT.bits.roundingMode
-  dut.input.bits.refOut        := verificationModule.toDUT.bits.refOut
-  dut.input.bits.refFlags      := verificationModule.toDUT.bits.refFlags
-  dut.input.valid              := verificationModule.toDUT.valid
-  verificationModule.toDUT.ready  := dut.input.ready
-  verificationModule.check := dut.check
-  verificationModule.pass  := dut.pass
-  verificationModule.result := dut.actual.out
-  verificationModule.fflags := dut.actual.exceptionFlags
+//  dut.input.bits.a             := verificationModule.dutPoke.bits.a
+//  dut.input.bits.b             := verificationModule.dutPoke.bits.b
+//  dut.input.bits.op            := verificationModule.dutPoke.bits.op
+//  dut.input.bits.roundingMode  := verificationModule.dutPoke.bits.roundingMode
+//  dut.input.valid              := verificationModule.dutPoke.valid
+//  verificationModule.dutPoke.ready  := dut.input.ready
+
+  verificationModule.dutPoke <> dut.input
+
+  verificationModule.dutPeek := dut.output
 
 }
 
diff --git a/tests/src/VerificationModule.scala b/tests/src/VerificationModule.scala
index 1045a03..7205693 100644
--- a/tests/src/VerificationModule.scala
+++ b/tests/src/VerificationModule.scala
@@ -16,12 +16,10 @@ class VerificationModule extends RawModule {
   val reset = IO(Output(Bool()))
 
 
-  val toDUT = IO(DecoupledIO(new DutPoke(8,24)))
-  val check = IO(Input(Bool()))
-  val pass = IO(Input(Bool()))
+  val dutPoke = IO(DecoupledIO(new DutPoke(8,24)))
 
-  val result = IO(Input(UInt(32.W)))
-  val fflags = IO(Input(UInt(32.W)))
+
+  val dutPeek = IO(Flipped(ValidIO(new DutPeek(8,24))))
 
 
   val verbatim = Module(new ExtModule with HasExtModuleInline {
@@ -72,45 +70,61 @@ class VerificationModule extends RawModule {
   clock := verbatim.clock
   reset := verbatim.reset
 
-  val dpiBasePoke = Module(new ExtModule with HasExtModuleInline {
-    override val desiredName = "dpiBasePoke"
-    val a = IO(Output(UInt(32.W)))
+
+  val dpiBasePeek = Module(new ExtModule with HasExtModuleInline {
+    override val desiredName = "dpiBasePeek"
+    val ready = IO(Input(Bool()))
     val clock = IO(Input(Clock()))
     setInline(
       s"$desiredName.sv",
       s"""module $desiredName(
          |  input clock,
-         |  output [31:0] a
+         |  input ready
          |);
-         |  import "DPI-C" function void $desiredName(output bit[31:0] a);
+         |  import "DPI-C" function void $desiredName(input bit ready);
          |
-         |  always @ (posedge clock) $desiredName(a);
+         |  always @ (posedge clock) $desiredName(ready);
          |endmodule
          |""".stripMargin
     )
   })
-  dpiBasePoke.clock := verbatim.clock
-//  toDUT.bits.a := dpiBasePoke.a
+  dpiBasePeek.clock := verbatim.clock
+  dpiBasePeek.ready := dutPoke.ready
+
+  val dpiCheck = Module(new ExtModule with HasExtModuleInline {
+    override val desiredName = "dpiCheck"
+    val clock  = IO(Input(Clock()))
+    val valid  = IO(Input(Bool()))
+    val result = IO(Input(UInt(32.W)))
+    val fflags = IO(Input(UInt(5.W)))
 
-  val dpiBasePeek = Module(new ExtModule with HasExtModuleInline {
-    override val desiredName = "dpiBasePeek"
-    val ready = IO(Input(Bool()))
-    val clock = IO(Input(Clock()))
     setInline(
       s"$desiredName.sv",
       s"""module $desiredName(
          |  input clock,
-         |  input ready
+         |  input valid,
+         |  input [31:0] result,
+         |  input [4:0]  fflags
          |);
-         |  import "DPI-C" function void $desiredName(input bit ready);
+         |  import "DPI-C" function void $desiredName(
+         |  input bit valid,
+         |  input bit[31:0] result,
+         |  input bit[4:0]  fflags
+         |  );
          |
-         |  always @ (posedge clock) $desiredName(ready);
+         |  always @ (posedge clock) #1 $desiredName(
+         |  valid,
+         |  result,
+         |  fflags
+         |  );
          |endmodule
          |""".stripMargin
     )
   })
-  dpiBasePeek.clock := verbatim.clock
-  dpiBasePeek.ready := toDUT.ready
+  dpiCheck.clock  := verbatim.clock
+  dpiCheck.result := dutPeek.bits.result
+  dpiCheck.fflags := dutPeek.bits.fflags
+  dpiCheck.valid  := dutPeek.valid
 
   val dpiPeekPoke = Module(new ExtModule with HasExtModuleInline {
     override val desiredName = "dpiPeekPoke"
@@ -119,8 +133,6 @@ class VerificationModule extends RawModule {
     val b = IO(Output(UInt(32.W)))
     val op = IO(Output(UInt(2.W)))
     val rm = IO(Output(UInt(3.W)))
-    val refOut = IO(Output(UInt(32.W)))
-    val refFlags = IO(Output(UInt(5.W)))
     val valid = IO(Output(Bool()))
     setInline(
       s"$desiredName.sv",
@@ -130,9 +142,7 @@ class VerificationModule extends RawModule {
          |  output [31:0] a,
          |  output [31:0] b,
          |  output [1:0] op,
-         |  output [2:0] rm,
-         |  output [31:0] refOut,
-         |  output [4:0]  refFlags
+         |  output [2:0] rm
          |);
          |
          |  import "DPI-C" function void $desiredName(
@@ -140,9 +150,7 @@ class VerificationModule extends RawModule {
          |  output bit[31:0] a,
          |  output bit[31:0] b,
          |  output bit[1:0]  op,
-         |  output bit[2:0]  rm,
-         |  output bit[31:0] refOut,
-         |  output bit[4:0]  refFlags
+         |  output bit[2:0]  rm
          |  );
          |
          |  always @ (negedge clock) $desiredName(
@@ -150,24 +158,18 @@ class VerificationModule extends RawModule {
          |  a,
          |  b,
          |  op,
-         |  rm,
-         |  refOut,
-         |  refFlags);
-         |
-         |
+         |  rm);
          |
          |endmodule
          |""".stripMargin
     )
   })
   dpiPeekPoke.clock       := verbatim.clock
-  toDUT.valid             := dpiPeekPoke.valid
-  toDUT.bits.a            := dpiPeekPoke.a
-  toDUT.bits.b            := dpiPeekPoke.b
-  toDUT.bits.op           := dpiPeekPoke.op
-  toDUT.bits.roundingMode := dpiPeekPoke.rm
-  toDUT.bits.refOut       := dpiPeekPoke.refOut
-  toDUT.bits.refFlags     := dpiPeekPoke.refFlags
+  dutPoke.valid             := dpiPeekPoke.valid
+  dutPoke.bits.a            := dpiPeekPoke.a
+  dutPoke.bits.b            := dpiPeekPoke.b
+  dutPoke.bits.op           := dpiPeekPoke.op
+  dutPoke.bits.roundingMode := dpiPeekPoke.rm
 
 
 

From cb3f5b69ef980730828d18e7db53509fc01f961e Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sun, 8 Oct 2023 14:51:59 +0800
Subject: [PATCH 093/109] add plantform option to build testfloat and softfloat

---
 .gitmodules            | 4 ++--
 Makefile               | 4 ++--
 tests/src/Ftests.scala | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 3ad38ca..38c019c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -3,7 +3,7 @@
 	url = git@github.com:chipsalliance/chisel.git
 [submodule "berkeley-testfloat-3"]
 	path = berkeley-testfloat-3
-	url = git@github.com:cyyself/berkeley-testfloat-3.git
+	url = git@github.com:ucb-bar/berkeley-testfloat-3.git
 [submodule "berkeley-softfloat-3"]
 	path = berkeley-softfloat-3
-	url = git@github.com:cyyself/berkeley-softfloat-3.git
+	url = git@github.com:ucb-bar/berkeley-softfloat-3.git
diff --git a/Makefile b/Makefile
index 44cfedb..9e33eac 100644
--- a/Makefile
+++ b/Makefile
@@ -19,10 +19,10 @@ clean:
 	git clean -fd
 
 softfloat:
-	make -C berkeley-softfloat-3/build/Linux-x86_64-GCC TESTFLOAT_OPTS="-DFLOAT64 -DFLOAT_ROUND_ODD" softfloat.a -j `nproc`
+	make -C berkeley-softfloat-3/build/Linux-x86_64-GCC  SPECIALIZE_TYPE=RISCV TESTFLOAT_OPTS="-DFLOAT64 -DFLOAT_ROUND_ODD" softfloat.a -j `nproc`
 	cp berkeley-softfloat-3/build/Linux-x86_64-GCC/softfloat.a run/
 
 testfloat:
-	make -C berkeley-testfloat-3/build/Linux-x86_64-GCC TESTFLOAT_OPTS="-DFLOAT64 -DFLOAT_ROUND_ODD" testfloat.a -j `nproc`
+	make -C berkeley-testfloat-3/build/Linux-x86_64-GCC  SPECIALIZE_TYPE=RISCV TESTFLOAT_OPTS="-DFLOAT64 -DFLOAT_ROUND_ODD" testfloat.a -j `nproc`
 	cp berkeley-testfloat-3/build/Linux-x86_64-GCC/testfloat.a run/
 
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 6edab82..28287ed 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -187,16 +187,16 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
          |
          |target_include_directories(emulator PUBLIC
          |$emulatorCHeader
-         |/home/yyq/Projects/arithmetic/berkeley-testfloat-3/source
-         |/home/yyq/Projects/arithmetic/berkeley-softfloat-3/source/include
+         |${os.pwd}/berkeley-testfloat-3/source
+         |${os.pwd}/berkeley-softfloat-3/source/include
          |)
          |
          |target_link_libraries(emulator PUBLIC $${CMAKE_THREAD_LIBS_INIT})
          |target_link_libraries(emulator PUBLIC
          |fmt::fmt
          |glog::glog
-         |/home/yyq/Projects/arithmetic/run/softfloat.a
-         |/home/yyq/Projects/arithmetic/run/testfloat.a
+         |$runDir/softfloat.a
+         |$runDir/testfloat.a
          |)  # note that libargs is header only, nothing to link
          |target_compile_definitions(emulator PRIVATE COSIM_VERILATOR)
          |

From d2fb27b9c5c0b481faffd50789dfeb2bfb3eb3cf Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sun, 8 Oct 2023 16:13:15 +0800
Subject: [PATCH 094/109] add case number counter

---
 tests/resources/csrc/vbridge_impl.cc | 13 ++++++++-----
 tests/resources/csrc/vbridge_impl.h  |  2 ++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index 640cf46..5a11b5b 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -7,7 +7,7 @@
 
 
 
-VBridgeImpl::VBridgeImpl() : _cycles(10000) {}
+VBridgeImpl::VBridgeImpl() : _cycles(1000000) {}
 
 
 uint64_t VBridgeImpl::get_t() {
@@ -17,7 +17,7 @@ uint64_t VBridgeImpl::get_t() {
 
 int VBridgeImpl::timeoutCheck() {
   if (get_t() > _cycles) {
-    LOG(INFO) << fmt::format("Simulation timeout, t={}", get_t());
+    LOG(INFO) << fmt::format("Simulation timeout, t={}, num={}", get_t(), cnt);
     dpiFinish();
   }
   return 0;
@@ -41,6 +41,8 @@ void VBridgeImpl::dpiInitCosim() {
 
   initTestCases();
 
+  cnt = 0;
+
   reloadcase();
 
   dpiDumpWave();
@@ -77,18 +79,19 @@ void VBridgeImpl::dpiPeekPoke(const DutInterface &toDut) {
 
 void VBridgeImpl::dpiCheck(svBit valid, svBitVecVal result, svBitVecVal fflags) {
   if(valid == 0) return;
-  LOG(INFO) << fmt::format("check");
+//  LOG(INFO) << fmt::format("check");
   if((result == testcase.expected_out) && (fflags == testcase.expectedException))
     reloadcase();
   else
   {
-    LOG(INFO) << fmt::format("error");
+
     LOG(INFO) << fmt::format("a = {:08X} \n", testcase.a);
     LOG(INFO) << fmt::format("b = {:08X} \n", testcase.b);
     LOG(INFO) << fmt::format("dut_result = {:08X} \n" , result);
     LOG(INFO) << fmt::format("ref_result = {:08X} \n",testcase.expected_out);
     LOG(INFO) << fmt::format("dut_flags = {:X} \n",fflags);
     LOG(INFO) << fmt::format("ref_flags = {:X} \n",(int)testcase.expectedException);
+    LOG(FATAL) << fmt::format("error at {} cases",cnt);
     dpiFinish();
 
   }
@@ -166,7 +169,7 @@ void VBridgeImpl::initTestCases() {
 
 void VBridgeImpl::reloadcase() {
 
-
+  cnt++;
 
   testcase.a = test_queue.front().a;
   testcase.b = test_queue.front().b;
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index a8e7e86..0929a2d 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -68,6 +68,8 @@ class VBridgeImpl {
 
     void reloadcase();
 
+    uint64_t cnt;
+
 
 private:
 

From 9f2bd6603c40f37e1c5ee5b6e5db4d0a46034eec Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Sun, 8 Oct 2023 18:36:15 +0800
Subject: [PATCH 095/109] add env and mutiple roundingmodes

---
 tests/resources/csrc/dpi.cc                |  2 +-
 tests/resources/csrc/exceptions.h          | 18 +++++++
 tests/resources/csrc/glog_exception_safe.h | 37 +++++++++++++++
 tests/resources/csrc/util.h                | 16 +++++++
 tests/resources/csrc/vbridge_impl.cc       | 55 ++++++++++++++++++----
 tests/resources/csrc/vbridge_impl.h        | 18 ++++++-
 tests/src/Ftests.scala                     | 14 +++++-
 7 files changed, 148 insertions(+), 12 deletions(-)
 create mode 100644 tests/resources/csrc/exceptions.h
 create mode 100644 tests/resources/csrc/glog_exception_safe.h
 create mode 100644 tests/resources/csrc/util.h

diff --git a/tests/resources/csrc/dpi.cc b/tests/resources/csrc/dpi.cc
index 4c5cf78..4187841 100644
--- a/tests/resources/csrc/dpi.cc
+++ b/tests/resources/csrc/dpi.cc
@@ -22,7 +22,7 @@
 
 void VBridgeImpl::dpiDumpWave() {
 
-        ::dpiDumpWave((wave + ".fst").c_str());
+        ::dpiDumpWave((wave + op + rmstring + ".fst").c_str());
 
 }
 #endif
diff --git a/tests/resources/csrc/exceptions.h b/tests/resources/csrc/exceptions.h
new file mode 100644
index 0000000..1eac049
--- /dev/null
+++ b/tests/resources/csrc/exceptions.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <stdexcept>
+
+class CosimException : public std::runtime_error {
+public:
+    explicit CosimException(const char *what) : runtime_error(what) {}
+};
+
+class TimeoutException : CosimException {
+public:
+    TimeoutException() : CosimException("timeout") {}
+};
+
+class ReturnException : CosimException {
+public:
+    ReturnException() : CosimException("returned") {}
+};
\ No newline at end of file
diff --git a/tests/resources/csrc/glog_exception_safe.h b/tests/resources/csrc/glog_exception_safe.h
new file mode 100644
index 0000000..3b7e0ba
--- /dev/null
+++ b/tests/resources/csrc/glog_exception_safe.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <glog/logging.h>
+
+namespace google {
+
+    class CheckFailedException : public std::runtime_error {
+    public:
+        explicit CheckFailedException() : std::runtime_error("check failed") {}
+    };
+
+    class LogMessageFatal_S : public LogMessage {
+    public:
+        LogMessageFatal_S(const char *file, int line) : LogMessage(file, line, GLOG_ERROR) {};
+
+        LogMessageFatal_S(const char *file, int line, const CheckOpString &result) : LogMessage(file, line,
+                                                                                                GLOG_ERROR) {
+          stream() << "Check failed: " << (*result.str_) << " ";
+        };
+
+        ~LogMessageFatal_S() noexcept(false) {
+          Flush();
+          throw CheckFailedException();
+        };
+    };
+}// namespace google
+
+#define CHECK_OP_S(name, op, val1, val2) \
+  CHECK_OP_LOG(name, op, val1, val2, google::LogMessageFatal_S)
+
+#define COMPACT_GOOGLE_LOG_FATAL_S google::LogMessageFatal_S(__FILE__, __LINE__)
+
+#define CHECK_EQ_S(val1, val2) CHECK_OP_S(_EQ, ==, val1, val2)
+
+#define CHECK_S(condition)  \
+      LOG_IF(FATAL, GOOGLE_PREDICT_BRANCH_NOT_TAKEN(!(condition))) \
+             << "Check failed: " #condition " "
diff --git a/tests/resources/csrc/util.h b/tests/resources/csrc/util.h
new file mode 100644
index 0000000..ac74aa9
--- /dev/null
+++ b/tests/resources/csrc/util.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <cstdint>
+#include "glog_exception_safe.h"
+
+
+inline char *get_env_arg(const char *name) {
+  char *val = std::getenv(name);
+  CHECK_S(val != nullptr) << fmt::format("cannot find environment of name '{}'", name);
+  return val;
+}
+
+inline char *get_env_arg_default(const char *name, char *default_val) {
+  char *val = std::getenv(name);
+  return val == nullptr ? default_val : val;
+}
\ No newline at end of file
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index 5a11b5b..8f44a6e 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -7,7 +7,7 @@
 
 
 
-VBridgeImpl::VBridgeImpl() : _cycles(1000000) {}
+VBridgeImpl::VBridgeImpl() : _cycles(1000) {}
 
 
 uint64_t VBridgeImpl::get_t() {
@@ -16,8 +16,8 @@ uint64_t VBridgeImpl::get_t() {
 
 
 int VBridgeImpl::timeoutCheck() {
-  if (get_t() > _cycles) {
-    LOG(INFO) << fmt::format("Simulation timeout, t={}, num={}", get_t(), cnt);
+  if (cnt > _cycles) {
+    LOG(INFO) << fmt::format("pass {} cases, time = {}", cnt, get_t());
     dpiFinish();
   }
   return 0;
@@ -39,15 +39,48 @@ void VBridgeImpl::dpiInitCosim() {
 
   LOG(INFO) << fmt::format("[{}] dpiInitCosim", getCycle());
 
-  initTestCases();
+
 
   cnt = 0;
 
+  switch(rm){
+    case 0:
+      roundingMode = ROUND_NEAR_EVEN;
+      rmstring = "RNE";
+      break;
+    case 1:
+      roundingMode = ROUND_MINMAG;
+      rmstring = "RTZ";
+      break;
+    case 2:
+      roundingMode = ROUND_MIN;
+      rmstring = "RDN";
+      break;
+    case 3:
+      roundingMode = ROUND_MAX;
+      rmstring = "RUP";
+      break;
+    case 4:
+      roundingMode = ROUND_NEAR_MAXMAG;
+      rmstring = "RMM";
+      break;
+    default:
+      LOG(FATAL) << fmt::format("ilegal rm value = {}",rm);
+  }
+
+  LOG(INFO) << fmt::format("start test operation={} rounding mode= {}",op,rmstring);
+
+  initTestCases();
+
+
+
   reloadcase();
 
   dpiDumpWave();
 }
 
+
+
 void VBridgeImpl::dpiBasePoke(svBitVecVal *a) {
   uint32_t v = 0x1000;
   *a = v;
@@ -70,7 +103,7 @@ void VBridgeImpl::dpiPeekPoke(const DutInterface &toDut) {
   *toDut.a = testcase.a;
   *toDut.b = testcase.b;
   *toDut.op = 0;
-  *toDut.rm = 0;
+  *toDut.rm = rm;
   *toDut.valid = true;
 
 
@@ -91,7 +124,7 @@ void VBridgeImpl::dpiCheck(svBit valid, svBitVecVal result, svBitVecVal fflags)
     LOG(INFO) << fmt::format("ref_result = {:08X} \n",testcase.expected_out);
     LOG(INFO) << fmt::format("dut_flags = {:X} \n",fflags);
     LOG(INFO) << fmt::format("ref_flags = {:X} \n",(int)testcase.expectedException);
-    LOG(FATAL) << fmt::format("error at {} cases",cnt);
+    LOG(INFO) << fmt::format("error at {} cases",cnt);
     dpiFinish();
 
   }
@@ -154,14 +187,20 @@ void outputTestCases(std::vector<testdata> cases) {
 void fillTestQueue(std::vector<testdata> cases) {
   for (auto x : cases) {
     vbridge_impl_instance.test_queue.push(x);
-//    LOG(INFO) << fmt::format("queue = {}  {}",vbridge_impl_instance.test_queue.back().a, vbridge_impl_instance.test_queue.back().b);
+
   }
 }
 
 
 void VBridgeImpl::initTestCases() {
-  auto res = genTestCase(F32_DIV, ROUND_NEAR_EVEN);
+  LOG(INFO) << fmt::format("generate cases in roundingMode = {}", (int)roundingMode);
+  LOG(INFO) << fmt::format("circuit  rm = {}", rm);
+  auto res = genTestCase(F32_DIV, roundingMode);
+  LOG(INFO) << fmt::format("vector = {:08X} ",res[0].expected_out);
+
+
   fillTestQueue(res);
+  LOG(INFO) << fmt::format("queue = {:08X} ",vbridge_impl_instance.test_queue.front().expected_out);
   outputTestCases(res); // TODO: demo, please delete
 
 
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index 0929a2d..e294d1f 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -10,6 +10,7 @@
 #include <svdpi.h>
 
 #include "encoding.h"
+#include "util.h"
 
 #include <cstdio>
 #include <cassert>
@@ -70,6 +71,10 @@ class VBridgeImpl {
 
     uint64_t cnt;
 
+    roundingMode_t roundingMode;
+
+    std::string rmstring;
+
 
 private:
 
@@ -79,10 +84,21 @@ class VBridgeImpl {
     uint64_t _cycles;
 
 
-    const std::string wave = "/home/yyq/Projects/arithmetic/run/wave";
+//    const std::string wave = "/home/yyq/Projects/arithmetic/run/wave";
 
     bool available;
 
+    const std::string wave = get_env_arg("wave");
+
+    const std::string op = get_env_arg("op");
+
+    const int rm = std::stoul(get_env_arg("rm"), nullptr, 10);
+
+
+
+
+
+
 
 
 
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 28287ed..689c90a 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -144,7 +144,10 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
       "dpi.cc",
       "vbridge_impl.cc",
       "vbridge_impl.h",
-      "encoding.h"
+      "encoding.h",
+      "exceptions.h",
+      "glog_exception_safe.h",
+      "util.h"
     ).map { f =>
       os.pwd / "tests" / "resources" / "csrc" / f
     }
@@ -225,8 +228,15 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     // build emulator
     os.proc(Seq("ninja", "-C", emulatorBuildDir).map(_.toString)).call(emulatorBuildDir)
 
+    val runEnv = Map(
+      "wave" -> s"${runDir}/",
+      "op" -> "div",
+      "rm" -> "1"
+    )
+
+
     // run
-    os.proc(Seq("./emulator").map(_.toString)).call(emulatorBuildDir)
+    os.proc(Seq("./emulator").map(_.toString)).call(cwd=emulatorBuildDir,env=runEnv)
 
 
     Seq("No errors found.")

From 5d260c8b814a6557823d79cbdc101eb00f3dd34e Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 10 Oct 2023 12:46:13 +0800
Subject: [PATCH 096/109] add all rounding mode support

---
 tests/resources/csrc/vbridge_impl.cc | 12 +++++-------
 tests/resources/csrc/vbridge_impl.h  | 13 -------------
 tests/src/Ftests.scala               | 24 +++++++++++++++++-------
 3 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index 8f44a6e..83621c8 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -120,11 +120,10 @@ void VBridgeImpl::dpiCheck(svBit valid, svBitVecVal result, svBitVecVal fflags)
 
     LOG(INFO) << fmt::format("a = {:08X} \n", testcase.a);
     LOG(INFO) << fmt::format("b = {:08X} \n", testcase.b);
-    LOG(INFO) << fmt::format("dut_result = {:08X} \n" , result);
-    LOG(INFO) << fmt::format("ref_result = {:08X} \n",testcase.expected_out);
-    LOG(INFO) << fmt::format("dut_flags = {:X} \n",fflags);
-    LOG(INFO) << fmt::format("ref_flags = {:X} \n",(int)testcase.expectedException);
+    LOG(INFO) << fmt::format("Result differs! dut vs ref  = {:08X} vs {:08X} \n" , result,testcase.expected_out);
+    LOG(INFO) << fmt::format("Flag differs!   dut vs ref  = {:08X} vs {:08X} \n",fflags,(int)testcase.expectedException);
     LOG(INFO) << fmt::format("error at {} cases",cnt);
+    dpiError("error");
     dpiFinish();
 
   }
@@ -138,6 +137,8 @@ std::vector<testdata> mygen_abz_f32( float32_t trueFunction( float32_t, float32_
 
   std::vector<testdata> res;
 
+  softfloat_roundingMode = roundingMode - 1 ;
+
   genCases_f32_ab_init();
   while ( ! genCases_done ) {
     genCases_f32_ab_next();
@@ -196,11 +197,8 @@ void VBridgeImpl::initTestCases() {
   LOG(INFO) << fmt::format("generate cases in roundingMode = {}", (int)roundingMode);
   LOG(INFO) << fmt::format("circuit  rm = {}", rm);
   auto res = genTestCase(F32_DIV, roundingMode);
-  LOG(INFO) << fmt::format("vector = {:08X} ",res[0].expected_out);
-
 
   fillTestQueue(res);
-  LOG(INFO) << fmt::format("queue = {:08X} ",vbridge_impl_instance.test_queue.front().expected_out);
   outputTestCases(res); // TODO: demo, please delete
 
 
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index e294d1f..f25cc5d 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -83,9 +83,6 @@ class VBridgeImpl {
 
     uint64_t _cycles;
 
-
-//    const std::string wave = "/home/yyq/Projects/arithmetic/run/wave";
-
     bool available;
 
     const std::string wave = get_env_arg("wave");
@@ -94,16 +91,6 @@ class VBridgeImpl {
 
     const int rm = std::stoul(get_env_arg("rm"), nullptr, 10);
 
-
-
-
-
-
-
-
-
-
-
 };
 
 extern VBridgeImpl vbridge_impl_instance;
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 689c90a..744b464 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -33,6 +33,14 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     "-rnear_maxMag" -> "4",
   )
 
+  val rmMaps = Map(
+    0 -> "RNE",
+    1 -> "RTZ",
+    2 -> "RDN",
+    3 -> "RUP",
+    4 -> "RMM"
+  )
+
   def exp(f: Int) = f match {
     case 16 => 5
     case 32 => 8
@@ -228,15 +236,17 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     // build emulator
     os.proc(Seq("ninja", "-C", emulatorBuildDir).map(_.toString)).call(emulatorBuildDir)
 
-    val runEnv = Map(
-      "wave" -> s"${runDir}/",
-      "op" -> "div",
-      "rm" -> "1"
-    )
-
 
     // run
-    os.proc(Seq("./emulator").map(_.toString)).call(cwd=emulatorBuildDir,env=runEnv)
+    for(x<- 0 to 4){
+      val runEnv = Map(
+        "wave" -> s"${runDir}/",
+        "op" -> "div",
+        "rm" -> s"$x"
+      )
+      os.proc(Seq("./emulator").map(_.toString)).call(stdout = runDir / s"${rmMaps(x)}.log",cwd=emulatorBuildDir,env=runEnv)
+    }
+
 
 
     Seq("No errors found.")

From 36fbc87dd8f5dc92dbe1285f9ede076ff23c43f0 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 10 Oct 2023 13:46:45 +0800
Subject: [PATCH 097/109] add sqrt support

---
 tests/resources/csrc/dpi.cc                |  9 ---
 tests/resources/csrc/encoding.h            |  9 ---
 tests/resources/csrc/glog_exception_safe.h | 37 ------------
 tests/resources/csrc/util.h                |  3 +-
 tests/resources/csrc/vbridge_impl.cc       | 69 ++++++++++++++--------
 tests/resources/csrc/vbridge_impl.h        | 10 +++-
 tests/src/Ftests.scala                     |  3 -
 7 files changed, 54 insertions(+), 86 deletions(-)
 delete mode 100644 tests/resources/csrc/encoding.h
 delete mode 100644 tests/resources/csrc/glog_exception_safe.h

diff --git a/tests/resources/csrc/dpi.cc b/tests/resources/csrc/dpi.cc
index 4187841..2bfa9b8 100644
--- a/tests/resources/csrc/dpi.cc
+++ b/tests/resources/csrc/dpi.cc
@@ -9,14 +9,8 @@
 
 #include "svdpi.h"
 #include "vbridge_impl.h"
-#include "encoding.h"
 
 
-//void sigint_handler(int s) {
-//  terminated = true;
-//  dpiFinish();
-//}
-
 
 #if VM_TRACE
 
@@ -28,15 +22,12 @@ void VBridgeImpl::dpiDumpWave() {
 #endif
 
 void dpiInitCosim() {
-//  std::signal(SIGINT, sigint_handler);
   svSetScope(svGetScopeFromName("TOP.TestBench.verificationModule.verbatim"));
   vbridge_impl_instance.dpiInitCosim();
 }
 
 [[maybe_unused]] void dpiTimeoutCheck() {
-
         vbridge_impl_instance.timeoutCheck();
-
 }
 
 [[maybe_unused]] void dpiBasePoke(svBitVecVal *a) {
diff --git a/tests/resources/csrc/encoding.h b/tests/resources/csrc/encoding.h
deleted file mode 100644
index 421219c..0000000
--- a/tests/resources/csrc/encoding.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#pragma once
-
-struct DutInterface{
-                    svBit *valid;
-                    svBitVecVal *a;
-                    svBitVecVal *b;
-                    svBitVecVal *op;
-                    svBitVecVal *rm;
-};
diff --git a/tests/resources/csrc/glog_exception_safe.h b/tests/resources/csrc/glog_exception_safe.h
deleted file mode 100644
index 3b7e0ba..0000000
--- a/tests/resources/csrc/glog_exception_safe.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#include <glog/logging.h>
-
-namespace google {
-
-    class CheckFailedException : public std::runtime_error {
-    public:
-        explicit CheckFailedException() : std::runtime_error("check failed") {}
-    };
-
-    class LogMessageFatal_S : public LogMessage {
-    public:
-        LogMessageFatal_S(const char *file, int line) : LogMessage(file, line, GLOG_ERROR) {};
-
-        LogMessageFatal_S(const char *file, int line, const CheckOpString &result) : LogMessage(file, line,
-                                                                                                GLOG_ERROR) {
-          stream() << "Check failed: " << (*result.str_) << " ";
-        };
-
-        ~LogMessageFatal_S() noexcept(false) {
-          Flush();
-          throw CheckFailedException();
-        };
-    };
-}// namespace google
-
-#define CHECK_OP_S(name, op, val1, val2) \
-  CHECK_OP_LOG(name, op, val1, val2, google::LogMessageFatal_S)
-
-#define COMPACT_GOOGLE_LOG_FATAL_S google::LogMessageFatal_S(__FILE__, __LINE__)
-
-#define CHECK_EQ_S(val1, val2) CHECK_OP_S(_EQ, ==, val1, val2)
-
-#define CHECK_S(condition)  \
-      LOG_IF(FATAL, GOOGLE_PREDICT_BRANCH_NOT_TAKEN(!(condition))) \
-             << "Check failed: " #condition " "
diff --git a/tests/resources/csrc/util.h b/tests/resources/csrc/util.h
index ac74aa9..4ce3566 100644
--- a/tests/resources/csrc/util.h
+++ b/tests/resources/csrc/util.h
@@ -1,12 +1,11 @@
 #pragma once
 
 #include <cstdint>
-#include "glog_exception_safe.h"
 
 
 inline char *get_env_arg(const char *name) {
   char *val = std::getenv(name);
-  CHECK_S(val != nullptr) << fmt::format("cannot find environment of name '{}'", name);
+  CHECK(val != nullptr) << fmt::format("cannot find environment of name '{}'", name);
   return val;
 }
 
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index 83621c8..5680941 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -34,12 +34,10 @@ void VBridgeImpl::clr_available() {
 void VBridgeImpl::dpiInitCosim() {
   google::InitGoogleLogging("emulator");
   FLAGS_logtostderr = true;
+  FLAGS_minloglevel = 0;
 
   ctx = Verilated::threadContextp();
-
-  LOG(INFO) << fmt::format("[{}] dpiInitCosim", getCycle());
-
-
+//  LOG(INFO) << fmt::format("[{}] dpiInitCosim", getCycle());
 
   cnt = 0;
 
@@ -68,12 +66,11 @@ void VBridgeImpl::dpiInitCosim() {
       LOG(FATAL) << fmt::format("ilegal rm value = {}",rm);
   }
 
-  LOG(INFO) << fmt::format("start test operation={} rounding mode= {}",op,rmstring);
+  LOG(INFO) << fmt::format("test f32_{} in {}",op,rmstring);
 
   initTestCases();
 
 
-
   reloadcase();
 
   dpiDumpWave();
@@ -99,45 +96,36 @@ void VBridgeImpl::dpiBasePeek(svBit ready) {
 void VBridgeImpl::dpiPeekPoke(const DutInterface &toDut) {
   if(available==false) return;
 
-
   *toDut.a = testcase.a;
   *toDut.b = testcase.b;
   *toDut.op = 0;
   *toDut.rm = rm;
   *toDut.valid = true;
 
-
-
 }
 
 void VBridgeImpl::dpiCheck(svBit valid, svBitVecVal result, svBitVecVal fflags) {
   if(valid == 0) return;
-//  LOG(INFO) << fmt::format("check");
   if((result == testcase.expected_out) && (fflags == testcase.expectedException))
     reloadcase();
   else
   {
+    LOG(ERROR) << fmt::format("error at {} cases", cnt);
+    LOG(ERROR) << fmt::format("a = {:08X},b = {:08X} \n", testcase.a, testcase.b);
+    LOG(ERROR) << fmt::format("Result  dut vs ref  = {:08X} vs {:08X} \n" , result,testcase.expected_out);
+    LOG(ERROR) << fmt::format("Flag    dut vs ref  = {:08X} vs {:08X} \n" , fflags,(int)testcase.expectedException);
 
-    LOG(INFO) << fmt::format("a = {:08X} \n", testcase.a);
-    LOG(INFO) << fmt::format("b = {:08X} \n", testcase.b);
-    LOG(INFO) << fmt::format("Result differs! dut vs ref  = {:08X} vs {:08X} \n" , result,testcase.expected_out);
-    LOG(INFO) << fmt::format("Flag differs!   dut vs ref  = {:08X} vs {:08X} \n",fflags,(int)testcase.expectedException);
-    LOG(INFO) << fmt::format("error at {} cases",cnt);
-    dpiError("error");
     dpiFinish();
-
   }
-
 }
 
 std::vector<testdata> mygen_abz_f32( float32_t trueFunction( float32_t, float32_t ) , function_t function, roundingMode_t roundingMode) {
   // modified from berkeley-testfloat-3/source/genLoops.c
   union ui32_f32 { uint32_t ui; float32_t f; } u;
-  uint_fast8_t trueFlags;
 
   std::vector<testdata> res;
 
-  softfloat_roundingMode = roundingMode - 1 ;
+  softfloat_roundingMode = roundingMode - 1;
 
   genCases_f32_ab_init();
   while ( ! genCases_done ) {
@@ -145,7 +133,6 @@ std::vector<testdata> mygen_abz_f32( float32_t trueFunction( float32_t, float32_
 
     testdata curData;
     curData.function = function;
-    curData.roundingMode = roundingMode;
     u.f = genCases_f32_a;
     curData.a = u.ui;
     u.f = genCases_f32_b;
@@ -161,6 +148,31 @@ std::vector<testdata> mygen_abz_f32( float32_t trueFunction( float32_t, float32_
   return res;
 }
 
+std::vector<testdata> mygen_az_f32( float32_t trueFunction( float32_t ), function_t function, roundingMode_t roundingMode)
+{
+  union ui32_f32 { uint32_t ui; float32_t f; } u;
+  std::vector<testdata> res;
+  softfloat_roundingMode = roundingMode - 1;
+
+  genCases_f32_a_init();
+  while ( ! genCases_done  ) {
+    genCases_f32_a_next();
+
+    testdata curData;
+    curData.function = function;
+
+    u.f = genCases_f32_a;
+    curData.a = u.ui;
+    curData.b = u.ui;
+    softfloat_exceptionFlags = 0;
+    u.f = trueFunction( genCases_f32_a );
+    curData.expectedException = static_cast<exceptionFlag_t>(softfloat_exceptionFlags);
+    curData.expected_out = u.ui;
+    res.push_back(curData);
+  }
+  return res;
+}
+
 
 std::vector<testdata> genTestCase(function_t function, roundingMode_t roundingMode) { // call it in dpiInit
   // see berkeley-testfloat-3/source/testfloat_gen.c
@@ -172,6 +184,9 @@ std::vector<testdata> genTestCase(function_t function, roundingMode_t roundingMo
     case F32_DIV:
       res = mygen_abz_f32(f32_div, function, roundingMode);
       break;
+    case F32_SQRT:
+      res = mygen_az_f32(f32_sqrt, function, roundingMode);
+      break;
     default:
       assert(false);
   }
@@ -194,9 +209,15 @@ void fillTestQueue(std::vector<testdata> cases) {
 
 
 void VBridgeImpl::initTestCases() {
-  LOG(INFO) << fmt::format("generate cases in roundingMode = {}", (int)roundingMode);
-  LOG(INFO) << fmt::format("circuit  rm = {}", rm);
-  auto res = genTestCase(F32_DIV, roundingMode);
+
+  std::vector<testdata> res;
+
+  if (op=="div"){
+    res = genTestCase(F32_DIV, roundingMode);
+  } else if (op=="sqrt"){
+    res = genTestCase(F32_SQRT, roundingMode);
+  } else LOG(FATAL) << fmt::format("illegal operation");
+
 
   fillTestQueue(res);
   outputTestCases(res); // TODO: demo, please delete
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index f25cc5d..06dd970 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -9,7 +9,6 @@
 
 #include <svdpi.h>
 
-#include "encoding.h"
 #include "util.h"
 
 #include <cstdio>
@@ -23,12 +22,19 @@ extern "C" {
 #include "genLoops.h"
 }
 
+struct DutInterface{
+    svBit *valid;
+    svBitVecVal *a;
+    svBitVecVal *b;
+    svBitVecVal *op;
+    svBitVecVal *rm;
+};
+
 struct testdata {
     uint64_t a;
     uint64_t b;
     uint64_t expected_out;
     function_t function;
-    roundingMode_t roundingMode;
     exceptionFlag_t expectedException;
 };
 
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 744b464..1ed9f22 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -152,9 +152,6 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
       "dpi.cc",
       "vbridge_impl.cc",
       "vbridge_impl.h",
-      "encoding.h",
-      "exceptions.h",
-      "glog_exception_safe.h",
       "util.h"
     ).map { f =>
       os.pwd / "tests" / "resources" / "csrc" / f

From 6cc9c97f71e8185e9215f8762586d61a92e7885b Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 10 Oct 2023 14:04:13 +0800
Subject: [PATCH 098/109] fix op IO width

---
 tests/resources/csrc/dpi.cc          | 2 +-
 tests/resources/csrc/vbridge_impl.cc | 2 +-
 tests/resources/csrc/vbridge_impl.h  | 2 +-
 tests/src/DUT.scala                  | 2 +-
 tests/src/VerificationModule.scala   | 6 +++---
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/resources/csrc/dpi.cc b/tests/resources/csrc/dpi.cc
index 2bfa9b8..5888a3b 100644
--- a/tests/resources/csrc/dpi.cc
+++ b/tests/resources/csrc/dpi.cc
@@ -42,7 +42,7 @@ void dpiInitCosim() {
                  svBit *valid,
                  svBitVecVal *a,
                  svBitVecVal *b,
-                 svBitVecVal *op,
+                 svBit *op,
                  svBitVecVal *rm) {
   vbridge_impl_instance.dpiPeekPoke(DutInterface{valid, a, b, op, rm});
 
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index 5680941..9ad7720 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -98,7 +98,7 @@ void VBridgeImpl::dpiPeekPoke(const DutInterface &toDut) {
 
   *toDut.a = testcase.a;
   *toDut.b = testcase.b;
-  *toDut.op = 0;
+  *toDut.op = false;
   *toDut.rm = rm;
   *toDut.valid = true;
 
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index 06dd970..16dd0a6 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -26,7 +26,7 @@ struct DutInterface{
     svBit *valid;
     svBitVecVal *a;
     svBitVecVal *b;
-    svBitVecVal *op;
+    svBit *op;
     svBitVecVal *rm;
 };
 
diff --git a/tests/src/DUT.scala b/tests/src/DUT.scala
index 0c2d0d1..1385673 100644
--- a/tests/src/DUT.scala
+++ b/tests/src/DUT.scala
@@ -34,7 +34,7 @@ class DUT(expWidth: Int, sigWidth: Int) extends Module {
 class DutPoke(expWidth: Int, sigWidth: Int) extends Bundle {
   val a = UInt((expWidth + sigWidth).W)
   val b = UInt((expWidth + sigWidth).W)
-  val op = UInt(2.W)
+  val op = Bool()
   val roundingMode = UInt(3.W)
 }
 
diff --git a/tests/src/VerificationModule.scala b/tests/src/VerificationModule.scala
index 7205693..48ba3de 100644
--- a/tests/src/VerificationModule.scala
+++ b/tests/src/VerificationModule.scala
@@ -131,7 +131,7 @@ class VerificationModule extends RawModule {
     val clock = IO(Input(Clock()))
     val a = IO(Output(UInt(32.W)))
     val b = IO(Output(UInt(32.W)))
-    val op = IO(Output(UInt(2.W)))
+    val op = IO(Output(Bool()))
     val rm = IO(Output(UInt(3.W)))
     val valid = IO(Output(Bool()))
     setInline(
@@ -141,7 +141,7 @@ class VerificationModule extends RawModule {
          |  output valid,
          |  output [31:0] a,
          |  output [31:0] b,
-         |  output [1:0] op,
+         |  output op,
          |  output [2:0] rm
          |);
          |
@@ -149,7 +149,7 @@ class VerificationModule extends RawModule {
          |  output bit valid,
          |  output bit[31:0] a,
          |  output bit[31:0] b,
-         |  output bit[1:0]  op,
+         |  output bit op,
          |  output bit[2:0]  rm
          |  );
          |

From efa4b49fbb0dffa3463188ae9596fb105078faa5 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 10 Oct 2023 15:04:34 +0800
Subject: [PATCH 099/109] finish div and sqrt tests

---
 tests/resources/csrc/dpi.cc                |  12 +-
 tests/resources/csrc/vbridge_impl.cc       |  40 ++---
 tests/resources/csrc/vbridge_impl.h        |  10 +-
 tests/src/Ftests.scala                     |   2 +-
 tests/src/GrandCentral.scala               |  45 -----
 tests/src/ValExec_DivSqrtRecFN_small.scala | 182 ---------------------
 tests/src/VerificationModule.scala         |  24 +--
 7 files changed, 40 insertions(+), 275 deletions(-)
 delete mode 100644 tests/src/GrandCentral.scala
 delete mode 100644 tests/src/ValExec_DivSqrtRecFN_small.scala

diff --git a/tests/resources/csrc/dpi.cc b/tests/resources/csrc/dpi.cc
index 5888a3b..5160367 100644
--- a/tests/resources/csrc/dpi.cc
+++ b/tests/resources/csrc/dpi.cc
@@ -30,21 +30,17 @@ void dpiInitCosim() {
         vbridge_impl_instance.timeoutCheck();
 }
 
-[[maybe_unused]] void dpiBasePoke(svBitVecVal *a) {
-  vbridge_impl_instance.dpiBasePoke(a);
+[[maybe_unused]] void dpiPeek(svBit ready) {
+  vbridge_impl_instance.dpiPeek(ready);
 }
 
-[[maybe_unused]] void dpiBasePeek(svBit ready) {
-  vbridge_impl_instance.dpiBasePeek(ready);
-}
-
-[[maybe_unused]] void dpiPeekPoke(
+[[maybe_unused]] void dpiPoke(
                  svBit *valid,
                  svBitVecVal *a,
                  svBitVecVal *b,
                  svBit *op,
                  svBitVecVal *rm) {
-  vbridge_impl_instance.dpiPeekPoke(DutInterface{valid, a, b, op, rm});
+  vbridge_impl_instance.dpiPoke(DutInterface{valid, a, b, op, rm});
 
 }
 
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index 9ad7720..59247dc 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -7,7 +7,7 @@
 
 
 
-VBridgeImpl::VBridgeImpl() : _cycles(1000) {}
+VBridgeImpl::VBridgeImpl() : terminate(false) {}
 
 
 uint64_t VBridgeImpl::get_t() {
@@ -16,8 +16,8 @@ uint64_t VBridgeImpl::get_t() {
 
 
 int VBridgeImpl::timeoutCheck() {
-  if (cnt > _cycles) {
-    LOG(INFO) << fmt::format("pass {} cases, time = {}", cnt, get_t());
+  if (terminate == true) {
+    LOG(INFO) << fmt::format("pass {} cases, time = {}", cnt-1, get_t());
     dpiFinish();
   }
   return 0;
@@ -66,6 +66,14 @@ void VBridgeImpl::dpiInitCosim() {
       LOG(FATAL) << fmt::format("ilegal rm value = {}",rm);
   }
 
+  if (op=="div"){
+      opSignal = false;
+    } else if (op=="sqrt"){
+      opSignal = true;
+    } else LOG(FATAL) << fmt::format("illegal operation");
+
+
+
   LOG(INFO) << fmt::format("test f32_{} in {}",op,rmstring);
 
   initTestCases();
@@ -78,27 +86,22 @@ void VBridgeImpl::dpiInitCosim() {
 
 
 
-void VBridgeImpl::dpiBasePoke(svBitVecVal *a) {
-  uint32_t v = 0x1000;
-  *a = v;
-}
 
-void VBridgeImpl::dpiBasePeek(svBit ready) {
+void VBridgeImpl::dpiPeek(svBit ready) {
 
     if(ready == 1) {
       set_available();
-//      LOG(INFO) << fmt::format("available = {}",available);
     }
 
 
 }
 
-void VBridgeImpl::dpiPeekPoke(const DutInterface &toDut) {
+void VBridgeImpl::dpiPoke(const DutInterface &toDut) {
   if(available==false) return;
 
   *toDut.a = testcase.a;
   *toDut.b = testcase.b;
-  *toDut.op = false;
+  *toDut.op = opSignal;
   *toDut.rm = rm;
   *toDut.valid = true;
 
@@ -154,9 +157,9 @@ std::vector<testdata> mygen_az_f32( float32_t trueFunction( float32_t ), functio
   std::vector<testdata> res;
   softfloat_roundingMode = roundingMode - 1;
 
-  genCases_f32_a_init();
+  genCases_f32_ab_init();
   while ( ! genCases_done  ) {
-    genCases_f32_a_next();
+    genCases_f32_ab_next();
 
     testdata curData;
     curData.function = function;
@@ -218,7 +221,6 @@ void VBridgeImpl::initTestCases() {
     res = genTestCase(F32_SQRT, roundingMode);
   } else LOG(FATAL) << fmt::format("illegal operation");
 
-
   fillTestQueue(res);
   outputTestCases(res); // TODO: demo, please delete
 
@@ -233,17 +235,9 @@ void VBridgeImpl::reloadcase() {
   testcase.b = test_queue.front().b;
   testcase.expected_out = test_queue.front().expected_out;
   testcase.expectedException = test_queue.front().expectedException;
-//  printf("%08x %08x %08x\n", test_vector[1].a, test_vector[1].b, test_vector[1].expected_out);
-//  LOG(INFO) << fmt::format("a = {:08X} \n", test_vector[0].a);
-//  LOG(INFO) << fmt::format("b = {:08X} \n", test_vector[0].b);
-//  LOG(INFO) << fmt::format("a = {:08X} \n", testcase.a);
-//  LOG(INFO) << fmt::format("b = {:08X} \n", testcase.b);
-//  LOG(INFO) << fmt::format("ref_result = {:08X} \n",testcase.expected_out);
-//  LOG(INFO) << fmt::format("reload");
-
 
   test_queue.pop();
-
+  if(test_queue.size() == 0) terminate = true;
 }
 
 
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index 16dd0a6..5918790 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -53,11 +53,9 @@ class VBridgeImpl {
 
     uint64_t getCycle() { return ctx->time(); }
 
-    void dpiBasePoke(uint32_t *a);
+    void dpiPoke(const DutInterface &toDut);
 
-    void dpiPeekPoke(const DutInterface &toDut);
-
-    void dpiBasePeek(svBit ready);
+    void dpiPeek(svBit ready);
 
     std::queue <testdata> test_queue;
 
@@ -79,6 +77,8 @@ class VBridgeImpl {
 
     roundingMode_t roundingMode;
 
+    bool opSignal;
+
     std::string rmstring;
 
 
@@ -89,6 +89,8 @@ class VBridgeImpl {
 
     uint64_t _cycles;
 
+    bool terminate;
+
     bool available;
 
     const std::string wave = get_env_arg("wave");
diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index 1ed9f22..fb32a85 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -238,7 +238,7 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     for(x<- 0 to 4){
       val runEnv = Map(
         "wave" -> s"${runDir}/",
-        "op" -> "div",
+        "op" -> "sqrt",
         "rm" -> s"$x"
       )
       os.proc(Seq("./emulator").map(_.toString)).call(stdout = runDir / s"${rmMaps(x)}.log",cwd=emulatorBuildDir,env=runEnv)
diff --git a/tests/src/GrandCentral.scala b/tests/src/GrandCentral.scala
deleted file mode 100644
index 7d36277..0000000
--- a/tests/src/GrandCentral.scala
+++ /dev/null
@@ -1,45 +0,0 @@
-package sifive {
-  package enterprise {
-    package grandcentral {
-
-      import firrtl.annotations._
-
-      case class ReferenceDataTapKey(source: ReferenceTarget, sink: ReferenceTarget)
-
-      case class DataTapsAnnotation(keys: Seq[ReferenceDataTapKey])
-          extends NoTargetAnnotation
-          with HasSerializationHints {
-        override def typeHints: Seq[Class[_]] = Seq(classOf[ReferenceDataTapKey])
-      }
-    }
-
-  }
-
-}
-
-package tests {
-
-    import chisel3._
-    import chisel3.experimental.ChiselAnnotation
-    import sifive.enterprise.grandcentral._
-    trait TapModule extends RawModule { t =>
-      private val dataTapKeys = scala.collection.mutable.ArrayBuffer[(Data, Data)]()
-      def tap[T <: Data](source: T): T = {
-        val sink = Wire(chiselTypeOf(source))
-        dontTouch(sink)
-        dataTapKeys.append((source, sink))
-        sink
-      }
-      // wait for https://github.com/chipsalliance/chisel3/pull/1943
-      def done(): Unit = {
-        chisel3.experimental.annotate(new ChiselAnnotation {
-          override def toFirrtl = DataTapsAnnotation(dataTapKeys.toSeq.map({
-            case (source, sink) =>
-              ReferenceDataTapKey(source.toTarget, sink.toTarget)
-          }))
-        })
-      }
-    }
-
-
-}
diff --git a/tests/src/ValExec_DivSqrtRecFN_small.scala b/tests/src/ValExec_DivSqrtRecFN_small.scala
deleted file mode 100644
index 84b2b97..0000000
--- a/tests/src/ValExec_DivSqrtRecFN_small.scala
+++ /dev/null
@@ -1,182 +0,0 @@
-
-/*============================================================================
-
-This Chisel source file is part of a pre-release version of the HardFloat IEEE
-Floating-Point Arithmetic Package, by John R. Hauser (with some contributions
-from Yunsup Lee and Andrew Waterman, mainly concerning testing).
-
-Copyright 2017 SiFive, Inc.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of SiFive nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY SIFIVE AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL SIFIVE OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-package tests
-import chisel3._
-import chisel3.util._
-import float._
-
-class DivRecFN_io(expWidth: Int, sigWidth: Int) extends Bundle {
-    val a = Bits((expWidth + sigWidth).W)
-    val b = Bits((expWidth + sigWidth).W)
-    val roundingMode   = UInt(3.W)
-    val detectTininess = UInt(1.W)
-    val out = Bits((expWidth + sigWidth).W)
-    val exceptionFlags = Bits(5.W)
-
-}
-
-class
-    ValExec_DivSqrtRecFN_small_div(expWidth: Int, sigWidth: Int) extends Module
-{
-    val io = IO(new Bundle {
-        val input = Flipped(Decoupled(new DivRecFN_io(expWidth, sigWidth)))
-
-        val output = new Bundle {
-            val a = Flipped(Input(Bits((expWidth + sigWidth).W)))
-            val b = Flipped(Input(Bits((expWidth + sigWidth).W)))
-            val roundingMode   = Output(UInt(3.W))
-            val detectTininess = Output(UInt(1.W))
-        }
-
-        val expected = new Bundle {
-            val out = Output(Bits((expWidth + sigWidth).W))
-            val exceptionFlags = Output(Bits(5.W))
-            val recOut = Output(Bits((expWidth + sigWidth + 1).W))
-        }
-
-        val actual = new Bundle {
-            val out = Output(Bits((expWidth + sigWidth).W))
-            val exceptionFlags = Output(Bits(5.W))
-        }
-
-        val check = Output(Bool())
-        val pass = Output(Bool())
-    })
-
-    val ds = Module(new DivSqrt(8,24))
-    val cq = Module(new Queue(new DivRecFN_io(expWidth, sigWidth), 5))
-
-    cq.io.enq.valid := io.input.valid && ds.input.ready
-    cq.io.enq.bits := io.input.bits
-
-    io.input.ready := ds.input.ready && cq.io.enq.ready
-    ds.input.valid := io.input.valid && cq.io.enq.ready
-    ds.input.bits.sqrt := false.B
-    ds.input.bits.a := io.input.bits.a
-    ds.input.bits.b := io.input.bits.b
-    ds.input.bits.roundingMode   := io.input.bits.roundingMode
-    //ds.input.bits.detectTininess := io.input.bits.detectTininess
-
-    io.output.a := cq.io.deq.bits.a
-    io.output.b := cq.io.deq.bits.b
-    io.output.roundingMode   := cq.io.deq.bits.roundingMode
-    io.output.detectTininess := cq.io.deq.bits.detectTininess
-
-    io.expected.out := cq.io.deq.bits.out
-    io.expected.exceptionFlags := cq.io.deq.bits.exceptionFlags
-    io.expected.recOut := cq.io.deq.bits.out
-
-    io.actual.out := ds.output.bits.result
-    io.actual.exceptionFlags := ds.output.bits.exceptionFlags
-
-    cq.io.deq.ready := ds.output.valid
-
-    io.check := ds.output.valid
-  val resultcheck = io.actual.out =/= io.expected.out
-  val flagcheck = io.actual.exceptionFlags =/= io.expected.exceptionFlags
-  io.pass := !(cq.io.deq.fire && (resultcheck || flagcheck))
-}
-
-class SqrtRecFN_io(expWidth: Int, sigWidth: Int) extends Bundle {
-    val a = Bits((expWidth + sigWidth).W)
-    val roundingMode   = UInt(3.W)
-    val detectTininess = UInt(1.W)
-    val out = Bits((expWidth + sigWidth).W)
-    val exceptionFlags = Bits(5.W)
-
-}
-
-class
-    ValExec_DivSqrtRecFN_small_sqrt(expWidth: Int, sigWidth: Int)
-    extends Module
-{
-    val io = IO(new Bundle {
-        val input = Flipped(Decoupled(new SqrtRecFN_io(expWidth, sigWidth)))
-
-        val output = new Bundle {
-            val a = Output(Bits((expWidth + sigWidth).W))
-            val roundingMode   = Output(UInt(3.W))
-            val detectTininess = Output(UInt(1.W))
-        }
-
-        val expected = new Bundle {
-            val out = Output(Bits((expWidth + sigWidth).W))
-            val exceptionFlags = Output(Bits(5.W))
-            val recOut = Output(Bits((expWidth + sigWidth + 1).W))
-        }
-
-        val actual = new Bundle {
-            val out = Output(Bits((expWidth + sigWidth).W))
-            val exceptionFlags = Output(Bits(5.W))
-        }
-
-        val check = Output(Bool())
-        val pass = Output(Bool())
-    })
-
-    val ds = Module(new DivSqrt(8,24))
-    val cq = Module(new Queue(new SqrtRecFN_io(expWidth, sigWidth), 5))
-
-    cq.io.enq.valid := io.input.valid && ds.input.ready
-    cq.io.enq.bits := io.input.bits
-
-    io.input.ready := ds.input.ready && cq.io.enq.ready
-    ds.input.valid := io.input.valid && cq.io.enq.ready
-    ds.input.bits.sqrt := true.B
-    ds.input.bits.a := io.input.bits.a
-    ds.input.bits.b := DontCare
-    ds.input.bits.roundingMode   := io.input.bits.roundingMode
-//    ds.input.bits.detectTininess := io.input.bits.detectTininess
-
-    io.output.a := cq.io.deq.bits.a
-    io.output.roundingMode   := cq.io.deq.bits.roundingMode
-    io.output.detectTininess := cq.io.deq.bits.detectTininess
-
-    io.expected.out := cq.io.deq.bits.out
-    io.expected.exceptionFlags := cq.io.deq.bits.exceptionFlags
-    io.expected.recOut := cq.io.deq.bits.out
-
-    io.actual.exceptionFlags := ds.output.bits.exceptionFlags
-    io.actual.out := ds.output.bits.result
-
-    cq.io.deq.ready := ds.output.valid
-
-    io.check := ds.output.valid
-    val resultcheck = io.actual.out =/= io.expected.recOut
-    val flagcheck   = io.actual.exceptionFlags =/= io.expected.exceptionFlags
-    io.pass := !(cq.io.deq.fire && (resultcheck || flagcheck))
-
-}
diff --git a/tests/src/VerificationModule.scala b/tests/src/VerificationModule.scala
index 48ba3de..f05c8ea 100644
--- a/tests/src/VerificationModule.scala
+++ b/tests/src/VerificationModule.scala
@@ -71,8 +71,8 @@ class VerificationModule extends RawModule {
   reset := verbatim.reset
 
 
-  val dpiBasePeek = Module(new ExtModule with HasExtModuleInline {
-    override val desiredName = "dpiBasePeek"
+  val dpiPeek = Module(new ExtModule with HasExtModuleInline {
+    override val desiredName = "dpiPeek"
     val ready = IO(Input(Bool()))
     val clock = IO(Input(Clock()))
     setInline(
@@ -88,8 +88,8 @@ class VerificationModule extends RawModule {
          |""".stripMargin
     )
   })
-  dpiBasePeek.clock := verbatim.clock
-  dpiBasePeek.ready := dutPoke.ready
+  dpiPeek.clock := verbatim.clock
+  dpiPeek.ready := dutPoke.ready
 
   val dpiCheck = Module(new ExtModule with HasExtModuleInline {
     override val desiredName = "dpiCheck"
@@ -126,8 +126,8 @@ class VerificationModule extends RawModule {
   dpiCheck.fflags := dutPeek.bits.fflags
   dpiCheck.valid  := dutPeek.valid
 
-  val dpiPeekPoke = Module(new ExtModule with HasExtModuleInline {
-    override val desiredName = "dpiPeekPoke"
+  val dpiPoke = Module(new ExtModule with HasExtModuleInline {
+    override val desiredName = "dpiPoke"
     val clock = IO(Input(Clock()))
     val a = IO(Output(UInt(32.W)))
     val b = IO(Output(UInt(32.W)))
@@ -164,12 +164,12 @@ class VerificationModule extends RawModule {
          |""".stripMargin
     )
   })
-  dpiPeekPoke.clock       := verbatim.clock
-  dutPoke.valid             := dpiPeekPoke.valid
-  dutPoke.bits.a            := dpiPeekPoke.a
-  dutPoke.bits.b            := dpiPeekPoke.b
-  dutPoke.bits.op           := dpiPeekPoke.op
-  dutPoke.bits.roundingMode := dpiPeekPoke.rm
+  dpiPoke.clock       := verbatim.clock
+  dutPoke.valid             := dpiPoke.valid
+  dutPoke.bits.a            := dpiPoke.a
+  dutPoke.bits.b            := dpiPoke.b
+  dutPoke.bits.op           := dpiPoke.op
+  dutPoke.bits.roundingMode := dpiPoke.rm
 
 
 

From fe5799805cba76337be24620ff2be60cf968cffb Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 16 Oct 2023 13:16:10 +0800
Subject: [PATCH 100/109] opt test framwork

---
 tests/src/Ftests.scala | 96 ++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 50 deletions(-)

diff --git a/tests/src/Ftests.scala b/tests/src/Ftests.scala
index fb32a85..f2654a1 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Ftests.scala
@@ -1,36 +1,21 @@
 package tests
 
 import chisel3.RawModule
-import float._
-
-import java.text.SimpleDateFormat
-import java.util.Calendar
-import java.text.SimpleDateFormat
-import java.util.Calendar
-import scala.collection.parallel.CollectionConverters._
-import chisel3.RawModule
+import chisel3.stage._
 import firrtl.AnnotationSeq
+import firrtl.stage.FirrtlCircuitAnnotation
 import org.scalatest.ParallelTestExecution
 import org.scalatest.flatspec.AnyFlatSpec
 import org.scalatest.matchers.should.Matchers
-
-import chisel3.experimental.ExtModule
-import chisel3.util.{HasExtModuleInline, HasExtModuleResource}
-import firrtl.stage.FirrtlCircuitAnnotation
-
-import chisel3.stage._
 import os._
 
-
-
-
 trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
   val roundings = Seq(
     "-rnear_even" -> "0",
     "-rminMag" -> "1",
     "-rmin" -> "2",
     "-rmax" -> "3",
-    "-rnear_maxMag" -> "4",
+    "-rnear_maxMag" -> "4"
   )
 
   val rmMaps = Map(
@@ -54,14 +39,15 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
   }
 
   def check(stdouts: Seq[String]) = {
-    stdouts foreach (_ shouldNot include("expected"))
-    stdouts foreach (_ shouldNot include("Ran 0 tests."))
-    stdouts foreach (_ should include("No errors found."))
+    stdouts.foreach(_ shouldNot include("expected"))
+    stdouts.foreach(_ shouldNot include("Ran 0 tests."))
+    stdouts.foreach(_ should include("No errors found."))
   }
 
   def test(name: String, module: () => RawModule, softfloatArg: Seq[String]): Seq[String] = {
-    val (softfloatArgs, dutArgs) = (roundings.map { case (s, d) =>
-      (Seq(s, "-tininessafter") ++ softfloatArg, Seq(d, "0"))
+    val (softfloatArgs, dutArgs) = (roundings.map {
+      case (s, d) =>
+        (Seq(s, "-tininessafter") ++ softfloatArg, Seq(d, "0"))
     }).unzip
     test(name, module, "test.cpp", softfloatArgs, Some(dutArgs))
   }
@@ -74,7 +60,13 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     * @param softfloatArgs arguments passed to `softfloat_gen` application. If has multiple command lines, multiple test will be executed.
     * @param dutArgs       arguments passed to verilator dut executor, If set to [[None]], no arguments will be passed to.
     */
-  def test(name: String, module: () => RawModule, harness: String, softfloatArgs: Seq[Seq[String]], dutArgs: Option[Seq[Seq[String]]] = None) = {
+  def test(
+    name:          String,
+    module:        () => RawModule,
+    harness:       String,
+    softfloatArgs: Seq[Seq[String]],
+    dutArgs:       Option[Seq[Seq[String]]] = None
+  ) = {
 
     var topName: String = null
     val emulatorThreads = 8
@@ -95,24 +87,22 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     val emulatorBuildDir = emulatorDir / "build"
     os.makeDir.all(emulatorBuildDir)
 
-
     os.proc(
       "make",
-      "softfloat",
+      "softfloat"
     ).call()
 
     os.proc(
       "make",
-      "testfloat",
+      "testfloat"
     ).call()
 
-
     val annos: AnnotationSeq = Seq(
       new chisel3.stage.phases.Elaborate,
       new chisel3.stage.phases.Convert
     ).foldLeft(
       Seq(
-        ChiselGeneratorAnnotation(() => new TestBench(8,24))
+        ChiselGeneratorAnnotation(() => new TestBench(8, 24))
       ): AnnotationSeq
     ) { case (annos, stage) => stage.transform(annos) }
       .flatMap {
@@ -120,7 +110,7 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
           topName = circuit.main
           os.write.over(elaborateDir / s"$topName.fir", circuit.serialize)
           None
-        case _: chisel3.stage.DesignAnnotation[_] => None
+        case _: chisel3.stage.DesignAnnotation[_]     => None
         case _: chisel3.stage.ChiselCircuitAnnotation => None
         case a => Some(a)
       }
@@ -129,14 +119,16 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     // rtl
     os.proc(
       "firtool",
-      elaborateDir / s"$topName.fir", s"--annotation-file=${elaborateDir / s"$topName.anno.json"}",
+      elaborateDir / s"$topName.fir",
+      s"--annotation-file=${elaborateDir / s"$topName.anno.json"}",
       "-dedup",
       "-O=debug",
       "--split-verilog",
       "--preserve-values=named",
       s"-o=$rtlDir"
     ).call()
-    val verilogs = os.read.lines(rtlDir / "filelist.f")
+    val verilogs = os.read
+      .lines(rtlDir / "filelist.f")
       .map(str =>
         try {
           os.Path(str)
@@ -147,7 +139,6 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
       )
       .filter(p => p.ext == "v" || p.ext == "sv")
 
-
     val allCSourceFiles = Seq(
       "dpi.cc",
       "vbridge_impl.cc",
@@ -176,7 +167,8 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
       "--main"
     )
 
-    os.write(emulatorBuildDir / "CMakeLists.txt",
+    os.write(
+      emulatorBuildDir / "CMakeLists.txt",
       // format: off
       s"""cmake_minimum_required(VERSION 3.20)
          |project(emulator)
@@ -223,29 +215,34 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     )
 
     // build verilator
-    os.proc(Seq(
-      "cmake",
-      "-G", "Ninja",
-      "-S", emulatorBuildDir,
-      "-B", emulatorBuildDir
-    ).map(_.toString)).call(emulatorBuildDir)
+    os.proc(
+      Seq(
+        "cmake",
+        "-G",
+        "Ninja",
+        "-S",
+        emulatorBuildDir,
+        "-B",
+        emulatorBuildDir
+      ).map(_.toString)
+    ).call(emulatorBuildDir)
 
     // build emulator
     os.proc(Seq("ninja", "-C", emulatorBuildDir).map(_.toString)).call(emulatorBuildDir)
 
-
     // run
-    for(x<- 0 to 4){
-      val runEnv = Map(
+    for (x <- 0 to 4) {
+      def runEnv(opration: String) = Map(
         "wave" -> s"${runDir}/",
-        "op" -> "sqrt",
+        "op" -> s"$opration",
         "rm" -> s"$x"
       )
-      os.proc(Seq("./emulator").map(_.toString)).call(stdout = runDir / s"${rmMaps(x)}.log",cwd=emulatorBuildDir,env=runEnv)
+      os.proc(Seq("./emulator").map(_.toString))
+        .call(stdout = runDir / s"${rmMaps(x)}.log", cwd = emulatorBuildDir, env = runEnv("div"))
+      os.proc(Seq("./emulator").map(_.toString))
+        .call(stdout = runDir / s"${rmMaps(x)}.log", cwd = emulatorBuildDir, env = runEnv("sqrt"))
     }
 
-
-
     Seq("No errors found.")
   }
 }
@@ -253,7 +250,7 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
 class DivSqrtRecFn_smallSpec extends FMATester {
   def test(f: Int, fn: String): Seq[String] = {
     def generator(options: Int) = fn match {
-      case "div" => () => new TestBench(exp(f), sig(f))
+      case "div"  => () => new TestBench(exp(f), sig(f))
       case "sqrt" => () => new TestBench(exp(f), sig(f))
     }
 
@@ -269,5 +266,4 @@ class DivSqrtRecFn_smallSpec extends FMATester {
     check(test(32, "div"))
   }
 
-
-}
\ No newline at end of file
+}

From 463108f850b22134727e77abd2a3f8fcd98b86f8 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 16 Oct 2023 14:18:31 +0800
Subject: [PATCH 101/109] add condition for dpicheck and dpipoke

---
 tests/src/VerificationModule.scala | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/src/VerificationModule.scala b/tests/src/VerificationModule.scala
index f05c8ea..9ac3a1f 100644
--- a/tests/src/VerificationModule.scala
+++ b/tests/src/VerificationModule.scala
@@ -112,7 +112,9 @@ class VerificationModule extends RawModule {
          |  input bit[4:0]  fflags
          |  );
          |
-         |  always @ (posedge clock) #1 $desiredName(
+         |  always @ (posedge clock) #1
+         |  if (valid == 1)
+         |  $desiredName(
          |  valid,
          |  result,
          |  fflags
@@ -134,9 +136,11 @@ class VerificationModule extends RawModule {
     val op = IO(Output(Bool()))
     val rm = IO(Output(UInt(3.W)))
     val valid = IO(Output(Bool()))
+    val ready  = IO(Input(Bool()))
     setInline(
       s"$desiredName.sv",
       s"""module $desiredName(
+         |  input  ready,
          |  output clock,
          |  output valid,
          |  output [31:0] a,
@@ -153,7 +157,9 @@ class VerificationModule extends RawModule {
          |  output bit[2:0]  rm
          |  );
          |
-         |  always @ (negedge clock) $desiredName(
+         |  always @ (negedge clock)
+         |  if(ready==1)
+         |  $desiredName(
          |  valid,
          |  a,
          |  b,
@@ -170,6 +176,7 @@ class VerificationModule extends RawModule {
   dutPoke.bits.b            := dpiPoke.b
   dutPoke.bits.op           := dpiPoke.op
   dutPoke.bits.roundingMode := dpiPoke.rm
+  dpiPoke.ready  := dutPoke.ready
 
 
 

From d5481ed846bda10bc004d849f6e4358e68f8a598 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 16 Oct 2023 16:55:36 +0800
Subject: [PATCH 102/109] [rtl]opt and doc rtl

---
 arithmetic/src/float/DivSqrt.scala      |  47 +++++----
 arithmetic/src/float/RoundingUnit.scala |  10 +-
 arithmetic/src/float/common.scala       |  91 ++++-------------
 arithmetic/src/float/primitives.scala   | 127 ------------------------
 4 files changed, 55 insertions(+), 220 deletions(-)
 delete mode 100644 arithmetic/src/float/primitives.scala

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 968c7b1..f1fef5a 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -40,46 +40,52 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   // Exceptions
 
   /** inf/inf and 0/0  => NaN out */
-  val divInvalidExcNotSigNaNIn =
+  val divTwoInvalidOpCases =
     (rawA.isZero && rawB.isZero) || (rawA.isInf && rawB.isInf)
-  /** -Inf + -normal => NaN out */
-  val sqrtInvalidExcNotSigNaNIn =
+  /** A = -Inf or -Normal */
+  val sqrtInvalidNegaCases =
     !rawA.isNaN && !rawA.isZero && rawA.sign
-  /** isSigNaNRawFloat detect signaling NaN */
-  val majorExc =
+  /** classified in flags
+    *
+    * contains all NV and DZ flags cases
+    */
+  val isNVorDZ =
     Mux(input.bits.sqrt,
-      isSigNaNRawFloat(rawA) || sqrtInvalidExcNotSigNaNIn,
+      isSigNaNRawFloat(rawA) || sqrtInvalidNegaCases,
       isSigNaNRawFloat(rawA) || isSigNaNRawFloat(rawB) ||
-        divInvalidExcNotSigNaNIn ||
+        divTwoInvalidOpCases ||
         (!rawA.isNaN && !rawA.isInf && rawB.isZero)
     )
 
-  /** all cases result in NaN output */
+  /** classified in output result
+    *
+    * qNaN output
+    */
   val isNaN =
     Mux(input.bits.sqrt,
-      rawA.isNaN || sqrtInvalidExcNotSigNaNIn,
-      rawA.isNaN || rawB.isNaN || divInvalidExcNotSigNaNIn
+      rawA.isNaN || sqrtInvalidNegaCases,
+      rawA.isNaN || rawB.isNaN || divTwoInvalidOpCases
     )
   val isInf  = Mux(input.bits.sqrt, rawA.isInf, rawA.isInf || rawB.isZero)
   val isZero = Mux(input.bits.sqrt, rawA.isZero, rawA.isZero || rawB.isInf)
 
-  val majorExcReg = RegEnable(majorExc, false.B, input.fire)
+  val isNVorDZreg = RegEnable(isNVorDZ, false.B, input.fire)
   val isNaNReg    = RegEnable(isNaN   , false.B, input.fire)
   val isInfReg    = RegEnable(isInf   , false.B, input.fire)
   val isZeroReg   = RegEnable(isZero  , false.B, input.fire)
 
   /** invalid operation flag */
-  val invalidExec = majorExcReg &&  isNaNReg
+  val invalidExec = isNVorDZreg &&  isNaNReg
 
   /** DivideByZero flag */
-  val infinitExec = majorExcReg && !isNaNReg
+  val infinitExec = isNVorDZreg && !isNaNReg
 
-  val specialCaseA = rawA.isNaN || rawA.isInf || rawA.isZero
-  val specialCaseB = rawB.isNaN || rawB.isInf || rawB.isZero
-  val normalCaseDiv = !specialCaseA && !specialCaseB
+  val specialCaseA   = rawA.isNaN || rawA.isInf || rawA.isZero
+  val specialCaseB   = rawB.isNaN || rawB.isInf || rawB.isZero
+  val normalCaseDiv  = !specialCaseA && !specialCaseB
   val normalCaseSqrt = !specialCaseA && !rawA.sign
-  val normalCase = Mux(input.bits.sqrt, normalCaseSqrt, normalCaseDiv)
-  val specialCase = !normalCase
+  val normalCase     = Mux(input.bits.sqrt, normalCaseSqrt, normalCaseDiv)
+  val specialCase    = !normalCase
 
   val fastValid = RegInit(false.B)
   fastValid := specialCase && input.fire
@@ -116,6 +122,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   SqrtModule.input.bits.operand := sqrtFractIn
   SqrtModule.input.valid := input.valid && input.bits.sqrt && normalCaseSqrt
 
+  // collect sqrt result
   val rbitsSqrt      = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
   val sigToRoundSqrt = SqrtModule.output.bits.result(24, 2)
 
@@ -133,7 +140,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   divModule.input.valid := input.valid && !input.bits.sqrt && normalCaseDiv
 
 
-  /** collect div sig result
+  /** collect div result
     *
     * {{{
     * when B_sig > A_sig
@@ -161,6 +168,8 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     * expForSqrt(7,0) effective is 8bits, MSB is sign
     * extends 2 sign bit in MSB
     * expStoreNext = 10bits
+    * input =   axxxxxxx
+    * out   = aaaxxxxxxx
     * }}}
     *
     * for div
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index 007fb12..b0fa678 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -34,11 +34,11 @@ class RoundingUnit extends Module{
     val exceptionFlags = Output(Bits(5.W))
   }))
 
-  val rmRNE = (input.roundingMode === consts.round_near_even)
-  val rmRTZ = (input.roundingMode === consts.round_minMag)
-  val rmRDN = (input.roundingMode === consts.round_min)
-  val rmRUP = (input.roundingMode === consts.round_max)
-  val rmRMM = (input.roundingMode === consts.round_near_maxMag)
+  val rmRNE = (input.roundingMode === RoundingMode.RNE)
+  val rmRTZ = (input.roundingMode === RoundingMode.RTZ)
+  val rmRDN = (input.roundingMode === RoundingMode.RDN)
+  val rmRUP = (input.roundingMode === RoundingMode.RUP)
+  val rmRMM = (input.roundingMode === RoundingMode.RMM)
 
   val commonOverflow  = Wire(Bool())
   val commonUnderflow = Wire(Bool())
diff --git a/arithmetic/src/float/common.scala b/arithmetic/src/float/common.scala
index ae026b3..27ae7d5 100644
--- a/arithmetic/src/float/common.scala
+++ b/arithmetic/src/float/common.scala
@@ -1,84 +1,37 @@
-
-/*============================================================================
-
-This Chisel source file is part of a pre-release version of the HardFloat IEEE
-Floating-Point Arithmetic Package, by John R. Hauser (with some contributions
-from Yunsup Lee and Andrew Waterman, mainly concerning testing).
-
-Copyright 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of
-the University of California.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
 package float
 
 import chisel3._
-
-object consts {
-    /*------------------------------------------------------------------------
-    | For rounding to integer values, rounding mode 'odd' rounds to minimum
-    | magnitude instead, same as 'minMag'.
-    *------------------------------------------------------------------------*/
-    def round_near_even   = "b000".U(3.W)
-    def round_minMag      = "b001".U(3.W)
-    def round_min         = "b010".U(3.W)
-    def round_max         = "b011".U(3.W)
-    def round_near_maxMag = "b100".U(3.W)
-    def round_odd         = "b110".U(3.W)
-    /*------------------------------------------------------------------------
-    *------------------------------------------------------------------------*/
-    def tininess_beforeRounding = 0.U
-    def tininess_afterRounding  = 1.U
-    /*------------------------------------------------------------------------
-    *------------------------------------------------------------------------*/
-    def flRoundOpt_sigMSBitAlwaysZero  = 1
-    def flRoundOpt_subnormsAlwaysExact = 2
-    def flRoundOpt_neverUnderflows     = 4
-    def flRoundOpt_neverOverflows      = 8
-    /*------------------------------------------------------------------------
-    *------------------------------------------------------------------------*/
-    def divSqrtOpt_twoBitsPerCycle     = 16
+import chisel3.util._
+
+object RoundingMode {
+    def RNE   = "b000".U(3.W)
+    def RTZ   = "b001".U(3.W)
+    def RDN   = "b010".U(3.W)
+    def RUP   = "b011".U(3.W)
+    def RMM   = "b100".U(3.W)
 }
 
 class RawFloat(val expWidth: Int, val sigWidth: Int) extends Bundle
 {
-    val isNaN: Bool = Bool()              // overrides all other fields
-    val isInf: Bool = Bool()              // overrides 'isZero', 'sExp', and 'sig'
+    val isNaN: Bool  = Bool()              // overrides all other fields
+    val isInf: Bool  = Bool()              // overrides 'isZero', 'sExp', and 'sig'
     val isZero: Bool = Bool()              // overrides 'sExp' and 'sig'
-    val sign: Bool = Bool()
-    val sExp: SInt = SInt((expWidth + 2).W)
-    val sig: UInt = UInt((sigWidth + 1).W)   // 2 m.s. bits cannot both be 0
-
+    val sign: Bool   = Bool()
+    // sExp = 0.U(1.W) ## (Exp &- bias)
+    // Exp - bias width is expWidth + 1
+    val sExp: SInt   = SInt((expWidth + 2).W)
+    // sig = 0 ## restored 1 or 0 ## fracIn
+    val sig: UInt    = UInt((sigWidth + 1).W)   // 2 m.s. bits cannot both be 0
 }
 
-//*** CHANGE THIS INTO A '.isSigNaN' METHOD OF THE 'RawFloat' CLASS:
+/** is signaling NaN */
 object isSigNaNRawFloat
 {
     def apply(in: RawFloat): Bool = in.isNaN && !in.sig(in.sigWidth - 2)
 }
 
+object countLeadingZeros
+{
+  def apply(in: UInt): UInt = PriorityEncoder(in.asBools.reverse)
+}
+
diff --git a/arithmetic/src/float/primitives.scala b/arithmetic/src/float/primitives.scala
deleted file mode 100644
index cb75215..0000000
--- a/arithmetic/src/float/primitives.scala
+++ /dev/null
@@ -1,127 +0,0 @@
-
-/*============================================================================
-
-This Chisel source file is part of a pre-release version of the HardFloat IEEE
-Floating-Point Arithmetic Package, by John R. Hauser (with some contributions
-from Yunsup Lee and Andrew Waterman, mainly concerning testing).
-
-Copyright 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
-University of California.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-package float
-
-import chisel3._
-import chisel3.util._
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-object lowMask
-{
-    def apply(in: UInt, topBound: BigInt, bottomBound: BigInt): UInt =
-    {
-        require(topBound != bottomBound)
-        val numInVals = BigInt(1)<<in.getWidth
-        if (topBound < bottomBound) {
-            lowMask(~in, numInVals - 1 - topBound, numInVals - 1 - bottomBound)
-        } else if (numInVals > 64 /* Empirical */) {
-            // For simulation performance, we should avoid generating
-            // exteremely wide shifters, so we divide and conquer.
-            // Empirically, this does not impact synthesis QoR.
-            val mid = numInVals / 2
-            val msb = in(in.getWidth - 1)
-            val lsbs = in(in.getWidth - 2, 0)
-            if (mid < topBound) {
-                if (mid <= bottomBound) {
-                    Mux(msb,
-                        lowMask(lsbs, topBound - mid, bottomBound - mid),
-                        0.U
-                    )
-                } else {
-                    Mux(msb,
-                        lowMask(lsbs, topBound - mid, 0) ## ((BigInt(1)<<(mid - bottomBound).toInt) - 1).U,
-                        lowMask(lsbs, mid, bottomBound)
-                    )
-                }
-            } else {
-                ~Mux(msb, 0.U, ~lowMask(lsbs, topBound, bottomBound))
-            }
-        } else {
-            val shift = (BigInt(-1)<<numInVals.toInt).S>>in
-            Reverse(
-                shift(
-                    (numInVals - 1 - bottomBound).toInt,
-                    (numInVals - topBound).toInt
-                )
-            )
-        }
-    }
-}
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-object countLeadingZeros
-{
-    def apply(in: UInt): UInt = PriorityEncoder(in.asBools.reverse)
-}
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-object orReduceBy2
-{
-    def apply(in: UInt): UInt =
-    {
-        val reducedWidth = (in.getWidth + 1)>>1
-        val reducedVec = Wire(Vec(reducedWidth, Bool()))
-        for (ix <- 0 until reducedWidth - 1) {
-            reducedVec(ix) := in(ix * 2 + 1, ix * 2).orR
-        }
-        reducedVec(reducedWidth - 1) :=
-            in(in.getWidth - 1, (reducedWidth - 1) * 2).orR
-        reducedVec.asUInt
-    }
-}
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-object orReduceBy4
-{
-    def apply(in: UInt): UInt =
-    {
-        val reducedWidth = (in.getWidth + 3)>>2
-        val reducedVec = Wire(Vec(reducedWidth, Bool()))
-        for (ix <- 0 until reducedWidth - 1) {
-            reducedVec(ix) := in(ix * 4 + 3, ix * 4).orR
-        }
-        reducedVec(reducedWidth - 1) :=
-            in(in.getWidth - 1, (reducedWidth - 1) * 4).orR
-        reducedVec.asUInt
-    }
-}
-

From d241b72510047289d49d17f9dc8d8e11aaaf9dfb Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 16 Oct 2023 17:11:24 +0800
Subject: [PATCH 103/109] [test]reformat test code

---
 tests/resources/csrc/dpi.cc          |  19 +--
 tests/resources/csrc/exceptions.h    |  18 ---
 tests/resources/csrc/vbridge_impl.cc | 197 +++++++++++++--------------
 tests/resources/csrc/vbridge_impl.h  |  89 ++++++------
 4 files changed, 142 insertions(+), 181 deletions(-)
 delete mode 100644 tests/resources/csrc/exceptions.h

diff --git a/tests/resources/csrc/dpi.cc b/tests/resources/csrc/dpi.cc
index 5160367..fdb78f9 100644
--- a/tests/resources/csrc/dpi.cc
+++ b/tests/resources/csrc/dpi.cc
@@ -1,24 +1,23 @@
 #ifdef COSIM_VERILATOR
+
 #include <VTestBench__Dpi.h>
+
 #endif
 
 #include <csignal>
 
-#include <glog/logging.h>
 #include <fmt/core.h>
+#include <glog/logging.h>
 
 #include "svdpi.h"
 #include "vbridge_impl.h"
 
-
-
 #if VM_TRACE
 
 void VBridgeImpl::dpiDumpWave() {
-
-        ::dpiDumpWave((wave + op + rmstring + ".fst").c_str());
-
+  ::dpiDumpWave((wave + op + rmstring + ".fst").c_str());
 }
+
 #endif
 
 void dpiInitCosim() {
@@ -27,7 +26,7 @@ void dpiInitCosim() {
 }
 
 [[maybe_unused]] void dpiTimeoutCheck() {
-        vbridge_impl_instance.timeoutCheck();
+  vbridge_impl_instance.timeoutCheck();
 }
 
 [[maybe_unused]] void dpiPeek(svBit ready) {
@@ -41,7 +40,6 @@ void dpiInitCosim() {
                  svBit *op,
                  svBitVecVal *rm) {
   vbridge_impl_instance.dpiPoke(DutInterface{valid, a, b, op, rm});
-
 }
 
 [[maybe_unused]] void dpiCheck(
@@ -51,8 +49,3 @@ void dpiInitCosim() {
 
    vbridge_impl_instance.dpiCheck(valid, *result, *fflags);
 }
-
-
-
-
-
diff --git a/tests/resources/csrc/exceptions.h b/tests/resources/csrc/exceptions.h
deleted file mode 100644
index 1eac049..0000000
--- a/tests/resources/csrc/exceptions.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include <stdexcept>
-
-class CosimException : public std::runtime_error {
-public:
-    explicit CosimException(const char *what) : runtime_error(what) {}
-};
-
-class TimeoutException : CosimException {
-public:
-    TimeoutException() : CosimException("timeout") {}
-};
-
-class ReturnException : CosimException {
-public:
-    ReturnException() : CosimException("returned") {}
-};
\ No newline at end of file
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index 59247dc..c7c42ec 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -5,31 +5,21 @@
 
 #include "vbridge_impl.h"
 
-
-
 VBridgeImpl::VBridgeImpl() : terminate(false) {}
 
-
-uint64_t VBridgeImpl::get_t() {
-  return getCycle();
-}
-
+uint64_t VBridgeImpl::get_t() { return getCycle(); }
 
 int VBridgeImpl::timeoutCheck() {
   if (terminate == true) {
-    LOG(INFO) << fmt::format("pass {} cases, time = {}", cnt-1, get_t());
+    LOG(INFO) << fmt::format("pass {} cases, time = {}", cnt - 1, get_t());
     dpiFinish();
   }
   return 0;
 }
 
-void VBridgeImpl::set_available() {
-  available = true;
-}
+void VBridgeImpl::set_available() { available = true; }
 
-void VBridgeImpl::clr_available() {
-  available = false;
-}
+void VBridgeImpl::clr_available() { available = false; }
 
 void VBridgeImpl::dpiInitCosim() {
   google::InitGoogleLogging("emulator");
@@ -37,101 +27,104 @@ void VBridgeImpl::dpiInitCosim() {
   FLAGS_minloglevel = 0;
 
   ctx = Verilated::threadContextp();
-//  LOG(INFO) << fmt::format("[{}] dpiInitCosim", getCycle());
+  //  LOG(INFO) << fmt::format("[{}] dpiInitCosim", getCycle());
 
   cnt = 0;
 
-  switch(rm){
-    case 0:
-      roundingMode = ROUND_NEAR_EVEN;
-      rmstring = "RNE";
-      break;
-    case 1:
-      roundingMode = ROUND_MINMAG;
-      rmstring = "RTZ";
-      break;
-    case 2:
-      roundingMode = ROUND_MIN;
-      rmstring = "RDN";
-      break;
-    case 3:
-      roundingMode = ROUND_MAX;
-      rmstring = "RUP";
-      break;
-    case 4:
-      roundingMode = ROUND_NEAR_MAXMAG;
-      rmstring = "RMM";
-      break;
-    default:
-      LOG(FATAL) << fmt::format("ilegal rm value = {}",rm);
+  switch (rm) {
+  case 0:
+    roundingMode = ROUND_NEAR_EVEN;
+    rmstring = "RNE";
+    break;
+  case 1:
+    roundingMode = ROUND_MINMAG;
+    rmstring = "RTZ";
+    break;
+  case 2:
+    roundingMode = ROUND_MIN;
+    rmstring = "RDN";
+    break;
+  case 3:
+    roundingMode = ROUND_MAX;
+    rmstring = "RUP";
+    break;
+  case 4:
+    roundingMode = ROUND_NEAR_MAXMAG;
+    rmstring = "RMM";
+    break;
+  default:
+    LOG(FATAL) << fmt::format("ilegal rm value = {}", rm);
   }
 
-  if (op=="div"){
-      opSignal = false;
-    } else if (op=="sqrt"){
-      opSignal = true;
-    } else LOG(FATAL) << fmt::format("illegal operation");
+  if (op == "div") {
+    opSignal = false;
+  } else if (op == "sqrt") {
+    opSignal = true;
+  } else
+    LOG(FATAL) << fmt::format("illegal operation");
 
-
-
-  LOG(INFO) << fmt::format("test f32_{} in {}",op,rmstring);
+  LOG(INFO) << fmt::format("test f32_{} in {}", op, rmstring);
 
   initTestCases();
 
-
   reloadcase();
 
   dpiDumpWave();
 }
 
-
-
-
 void VBridgeImpl::dpiPeek(svBit ready) {
 
-    if(ready == 1) {
-      set_available();
-    }
-
-
+  if (ready == 1) {
+    set_available();
+  }
 }
 
 void VBridgeImpl::dpiPoke(const DutInterface &toDut) {
-  if(available==false) return;
+  if (available == false)
+    return;
 
   *toDut.a = testcase.a;
   *toDut.b = testcase.b;
   *toDut.op = opSignal;
   *toDut.rm = rm;
   *toDut.valid = true;
-
 }
 
-void VBridgeImpl::dpiCheck(svBit valid, svBitVecVal result, svBitVecVal fflags) {
-  if(valid == 0) return;
-  if((result == testcase.expected_out) && (fflags == testcase.expectedException))
+void VBridgeImpl::dpiCheck(svBit valid, svBitVecVal result,
+                           svBitVecVal fflags) {
+  if (valid == 0)
+    return;
+  if ((result == testcase.expected_out) &&
+      (fflags == testcase.expectedException))
     reloadcase();
-  else
-  {
+  else {
     LOG(ERROR) << fmt::format("error at {} cases", cnt);
-    LOG(ERROR) << fmt::format("a = {:08X},b = {:08X} \n", testcase.a, testcase.b);
-    LOG(ERROR) << fmt::format("Result  dut vs ref  = {:08X} vs {:08X} \n" , result,testcase.expected_out);
-    LOG(ERROR) << fmt::format("Flag    dut vs ref  = {:08X} vs {:08X} \n" , fflags,(int)testcase.expectedException);
+    LOG(ERROR) << fmt::format("a = {:08X},b = {:08X} \n", testcase.a,
+                              testcase.b);
+    LOG(ERROR) << fmt::format("Result  dut vs ref  = {:08X} vs {:08X} \n",
+                              result, testcase.expected_out);
+    LOG(ERROR) << fmt::format("Flag    dut vs ref  = {:08X} vs {:08X} \n",
+                              fflags, (int)testcase.expectedException);
 
     dpiFinish();
   }
 }
 
-std::vector<testdata> mygen_abz_f32( float32_t trueFunction( float32_t, float32_t ) , function_t function, roundingMode_t roundingMode) {
+std::vector<testdata>
+mygen_abz_f32(float32_t trueFunction(float32_t, float32_t), function_t function,
+              roundingMode_t roundingMode) {
   // modified from berkeley-testfloat-3/source/genLoops.c
-  union ui32_f32 { uint32_t ui; float32_t f; } u;
+  union ui32_f32 {
+    uint32_t ui;
+    float32_t f;
+  } u;
 
   std::vector<testdata> res;
 
   softfloat_roundingMode = roundingMode - 1;
 
   genCases_f32_ab_init();
-  while ( ! genCases_done ) {
+  while (!genCases_done) {
     genCases_f32_ab_next();
 
     testdata curData;
@@ -141,8 +134,9 @@ std::vector<testdata> mygen_abz_f32( float32_t trueFunction( float32_t, float32_
     u.f = genCases_f32_b;
     curData.b = u.ui;
     softfloat_exceptionFlags = 0;
-    u.f = trueFunction( genCases_f32_a, genCases_f32_b );
-    curData.expectedException = static_cast<exceptionFlag_t>(softfloat_exceptionFlags);
+    u.f = trueFunction(genCases_f32_a, genCases_f32_b);
+    curData.expectedException =
+        static_cast<exceptionFlag_t>(softfloat_exceptionFlags);
     curData.expected_out = u.ui;
 
     res.push_back(curData);
@@ -151,14 +145,18 @@ std::vector<testdata> mygen_abz_f32( float32_t trueFunction( float32_t, float32_
   return res;
 }
 
-std::vector<testdata> mygen_az_f32( float32_t trueFunction( float32_t ), function_t function, roundingMode_t roundingMode)
-{
-  union ui32_f32 { uint32_t ui; float32_t f; } u;
+std::vector<testdata> mygen_az_f32(float32_t trueFunction(float32_t),
+                                   function_t function,
+                                   roundingMode_t roundingMode) {
+  union ui32_f32 {
+    uint32_t ui;
+    float32_t f;
+  } u;
   std::vector<testdata> res;
   softfloat_roundingMode = roundingMode - 1;
 
   genCases_f32_ab_init();
-  while ( ! genCases_done  ) {
+  while (!genCases_done) {
     genCases_f32_ab_next();
 
     testdata curData;
@@ -168,30 +166,32 @@ std::vector<testdata> mygen_az_f32( float32_t trueFunction( float32_t ), functio
     curData.a = u.ui;
     curData.b = u.ui;
     softfloat_exceptionFlags = 0;
-    u.f = trueFunction( genCases_f32_a );
-    curData.expectedException = static_cast<exceptionFlag_t>(softfloat_exceptionFlags);
+    u.f = trueFunction(genCases_f32_a);
+    curData.expectedException =
+        static_cast<exceptionFlag_t>(softfloat_exceptionFlags);
     curData.expected_out = u.ui;
     res.push_back(curData);
   }
   return res;
 }
 
-
-std::vector<testdata> genTestCase(function_t function, roundingMode_t roundingMode) { // call it in dpiInit
+std::vector<testdata>
+genTestCase(function_t function,
+            roundingMode_t roundingMode) { // call it in dpiInit
   // see berkeley-testfloat-3/source/testfloat_gen.c
   std::vector<testdata> res;
 
-  genCases_setLevel( 1 );
+  genCases_setLevel(1);
 
   switch (function) {
-    case F32_DIV:
-      res = mygen_abz_f32(f32_div, function, roundingMode);
-      break;
-    case F32_SQRT:
-      res = mygen_az_f32(f32_sqrt, function, roundingMode);
-      break;
-    default:
-      assert(false);
+  case F32_DIV:
+    res = mygen_abz_f32(f32_div, function, roundingMode);
+    break;
+  case F32_SQRT:
+    res = mygen_az_f32(f32_sqrt, function, roundingMode);
+    break;
+  default:
+    assert(false);
   }
 
   return res;
@@ -199,32 +199,30 @@ std::vector<testdata> genTestCase(function_t function, roundingMode_t roundingMo
 
 void outputTestCases(std::vector<testdata> cases) {
   for (auto x : cases) {
-//    printf("%08x %08x %08x %02x\n", x.a, x.b, x.expected_out, x.expectedException);
+    //    printf("%08x %08x %08x %02x\n", x.a, x.b, x.expected_out,
+    //    x.expectedException);
   }
 }
 
 void fillTestQueue(std::vector<testdata> cases) {
   for (auto x : cases) {
     vbridge_impl_instance.test_queue.push(x);
-
   }
 }
 
-
 void VBridgeImpl::initTestCases() {
 
   std::vector<testdata> res;
 
-  if (op=="div"){
+  if (op == "div") {
     res = genTestCase(F32_DIV, roundingMode);
-  } else if (op=="sqrt"){
+  } else if (op == "sqrt") {
     res = genTestCase(F32_SQRT, roundingMode);
-  } else LOG(FATAL) << fmt::format("illegal operation");
+  } else
+    LOG(FATAL) << fmt::format("illegal operation");
 
   fillTestQueue(res);
   outputTestCases(res); // TODO: demo, please delete
-
-
 }
 
 void VBridgeImpl::reloadcase() {
@@ -237,13 +235,8 @@ void VBridgeImpl::reloadcase() {
   testcase.expectedException = test_queue.front().expectedException;
 
   test_queue.pop();
-  if(test_queue.size() == 0) terminate = true;
+  if (test_queue.size() == 0)
+    terminate = true;
 }
 
-
-
 VBridgeImpl vbridge_impl_instance;
-
-
-
-
diff --git a/tests/resources/csrc/vbridge_impl.h b/tests/resources/csrc/vbridge_impl.h
index 5918790..f905bf6 100644
--- a/tests/resources/csrc/vbridge_impl.h
+++ b/tests/resources/csrc/vbridge_impl.h
@@ -3,102 +3,95 @@
 #include <optional>
 #include <queue>
 
-#include <VTestBench__Dpi.h>
 #include "verilated_fst_c.h"
-
+#include <VTestBench__Dpi.h>
 
 #include <svdpi.h>
 
 #include "util.h"
 
-#include <cstdio>
 #include <cassert>
 #include <cstdint>
+#include <cstdio>
 
 extern "C" {
 #include "functions.h"
-#include "softfloat.h"
 #include "genCases.h"
 #include "genLoops.h"
+#include "softfloat.h"
 }
 
-struct DutInterface{
-    svBit *valid;
-    svBitVecVal *a;
-    svBitVecVal *b;
-    svBit *op;
-    svBitVecVal *rm;
+struct DutInterface {
+  svBit *valid;
+  svBitVecVal *a;
+  svBitVecVal *b;
+  svBit *op;
+  svBitVecVal *rm;
 };
 
 struct testdata {
-    uint64_t a;
-    uint64_t b;
-    uint64_t expected_out;
-    function_t function;
-    exceptionFlag_t expectedException;
+  uint64_t a;
+  uint64_t b;
+  uint64_t expected_out;
+  function_t function;
+  exceptionFlag_t expectedException;
 };
 
-
 class VBridgeImpl {
 public:
-    explicit VBridgeImpl();
-
-    void dpiDumpWave();
+  explicit VBridgeImpl();
 
-    void dpiInitCosim();
+  void dpiDumpWave();
 
-    uint64_t get_t();
+  void dpiInitCosim();
 
-    int timeoutCheck();
+  uint64_t get_t();
 
-    uint64_t getCycle() { return ctx->time(); }
+  int timeoutCheck();
 
-    void dpiPoke(const DutInterface &toDut);
+  uint64_t getCycle() { return ctx->time(); }
 
-    void dpiPeek(svBit ready);
+  void dpiPoke(const DutInterface &toDut);
 
-    std::queue <testdata> test_queue;
+  void dpiPeek(svBit ready);
 
-    testdata testcase;
+  std::queue<testdata> test_queue;
 
-    void initTestCases();
+  testdata testcase;
 
-    void dpiCheck(svBit valid,
-                  svBitVecVal result,
-                  svBitVecVal fflags);
+  void initTestCases();
 
-    void set_available();
+  void dpiCheck(svBit valid, svBitVecVal result, svBitVecVal fflags);
 
-    void clr_available();
+  void set_available();
 
-    void reloadcase();
+  void clr_available();
 
-    uint64_t cnt;
+  void reloadcase();
 
-    roundingMode_t roundingMode;
+  uint64_t cnt;
 
-    bool opSignal;
+  roundingMode_t roundingMode;
 
-    std::string rmstring;
+  bool opSignal;
 
+  std::string rmstring;
 
 private:
+  VerilatedContext *ctx;
+  VerilatedFstC tfp;
 
-    VerilatedContext *ctx;
-    VerilatedFstC tfp;
-
-    uint64_t _cycles;
-
-    bool terminate;
+  uint64_t _cycles;
 
-    bool available;
+  bool terminate;
 
-    const std::string wave = get_env_arg("wave");
+  bool available;
 
-    const std::string op = get_env_arg("op");
+  const std::string wave = get_env_arg("wave");
 
-    const int rm = std::stoul(get_env_arg("rm"), nullptr, 10);
+  const std::string op = get_env_arg("op");
 
+  const int rm = std::stoul(get_env_arg("rm"), nullptr, 10);
 };
 
 extern VBridgeImpl vbridge_impl_instance;

From 866fda863f174ea9b6637fb7ffb3880aeb916df2 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Tue, 17 Oct 2023 13:09:07 +0800
Subject: [PATCH 104/109] [rtl] add isSNaN to rawFLoat

---
 arithmetic/src/float/DivSqrt.scala        | 29 ++++++++++++-----------
 arithmetic/src/float/common.scala         |  7 +-----
 arithmetic/src/float/rawFloatFromFN.scala |  1 +
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index f1fef5a..424c527 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -39,11 +39,12 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
   // Exceptions
 
-  /** inf/inf and 0/0  => NaN out */
-  val divTwoInvalidOpCases =
-    (rawA.isZero && rawB.isZero) || (rawA.isInf && rawB.isInf)
-  /** A = -Inf or -Normal */
-  val sqrtInvalidNegaCases =
+  /** inf/inf and 0/0  => qNaN, set NV */
+  val divInvalidCases = (rawA.isZero && rawB.isZero) || (rawA.isInf && rawB.isInf)
+  /** normal/0  => inf, set DV */
+  val divDivideZero = (!rawA.isNaN && !rawA.isInf && rawB.isZero)
+  /** A = -Inf or -Normal => qNaN, set NV*/
+  val sqrtInvalidCases =
     !rawA.isNaN && !rawA.isZero && rawA.sign
   /** classified in flags
     *
@@ -51,10 +52,10 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     */
   val isNVorDZ =
     Mux(input.bits.sqrt,
-      isSigNaNRawFloat(rawA) || sqrtInvalidNegaCases,
-      isSigNaNRawFloat(rawA) || isSigNaNRawFloat(rawB) ||
-        divTwoInvalidOpCases ||
-        (!rawA.isNaN && !rawA.isInf && rawB.isZero)
+      rawA.isSNaN || sqrtInvalidCases,
+      rawA.isSNaN || rawB.isSNaN ||
+        divInvalidCases ||
+        divDivideZero
     )
 
   /** classified in output result
@@ -63,22 +64,22 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     */
   val isNaN =
     Mux(input.bits.sqrt,
-      rawA.isNaN || sqrtInvalidNegaCases,
-      rawA.isNaN || rawB.isNaN || divTwoInvalidOpCases
+      rawA.isNaN || sqrtInvalidCases,
+      rawA.isNaN || rawB.isNaN || divInvalidCases
     )
   val isInf  = Mux(input.bits.sqrt, rawA.isInf, rawA.isInf || rawB.isZero)
   val isZero = Mux(input.bits.sqrt, rawA.isZero, rawA.isZero || rawB.isInf)
 
-  val isNVorDZreg = RegEnable(isNVorDZ, false.B, input.fire)
+  val isNVorDZReg = RegEnable(isNVorDZ, false.B, input.fire)
   val isNaNReg    = RegEnable(isNaN   , false.B, input.fire)
   val isInfReg    = RegEnable(isInf   , false.B, input.fire)
   val isZeroReg   = RegEnable(isZero  , false.B, input.fire)
 
   /** invalid operation flag */
-  val invalidExec = isNVorDZreg &&  isNaNReg
+  val invalidExec = isNVorDZReg &&  isNaNReg
 
   /** DivideByZero flag */
-  val infinitExec = isNVorDZreg && !isNaNReg
+  val infinitExec = isNVorDZReg && !isNaNReg
 
   val specialCaseA   = rawA.isNaN || rawA.isInf || rawA.isZero
   val specialCaseB   = rawB.isNaN || rawB.isInf || rawB.isZero
diff --git a/arithmetic/src/float/common.scala b/arithmetic/src/float/common.scala
index 27ae7d5..078c455 100644
--- a/arithmetic/src/float/common.scala
+++ b/arithmetic/src/float/common.scala
@@ -16,6 +16,7 @@ class RawFloat(val expWidth: Int, val sigWidth: Int) extends Bundle
     val isNaN: Bool  = Bool()              // overrides all other fields
     val isInf: Bool  = Bool()              // overrides 'isZero', 'sExp', and 'sig'
     val isZero: Bool = Bool()              // overrides 'sExp' and 'sig'
+    val isSNaN:Bool  = Bool()
     val sign: Bool   = Bool()
     // sExp = 0.U(1.W) ## (Exp &- bias)
     // Exp - bias width is expWidth + 1
@@ -24,12 +25,6 @@ class RawFloat(val expWidth: Int, val sigWidth: Int) extends Bundle
     val sig: UInt    = UInt((sigWidth + 1).W)   // 2 m.s. bits cannot both be 0
 }
 
-/** is signaling NaN */
-object isSigNaNRawFloat
-{
-    def apply(in: RawFloat): Bool = in.isNaN && !in.sig(in.sigWidth - 2)
-}
-
 object countLeadingZeros
 {
   def apply(in: UInt): UInt = PriorityEncoder(in.asBools.reverse)
diff --git a/arithmetic/src/float/rawFloatFromFN.scala b/arithmetic/src/float/rawFloatFromFN.scala
index 449bce0..26fc76b 100644
--- a/arithmetic/src/float/rawFloatFromFN.scala
+++ b/arithmetic/src/float/rawFloatFromFN.scala
@@ -68,6 +68,7 @@ object rawFloatFromFN {
     out.sExp := adjustedExp(expWidth, 0).zext
     out.sig :=
       0.U(1.W) ## !isZero ## Mux(isZeroExpIn, subnormFract, fractIn)
+    out.isSNaN :=  out.isNaN && !out.sig(sigWidth - 2)
     out
   }
 }

From 56a66a958391657d45311813219d6d5c24da5a97 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 18 Oct 2023 15:07:01 +0800
Subject: [PATCH 105/109] [rtl] opt sqrt critical path

---
 arithmetic/src/float/DivSqrt.scala             | 12 ++++++------
 arithmetic/src/float/common.scala              |  1 +
 arithmetic/src/float/rawFloatFromFN.scala      |  7 +++++++
 arithmetic/tests/src/float/DivSqrtTester.scala |  4 ++--
 tests/src/DUT.scala                            |  4 ++--
 5 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 424c527..0ef94ae 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -34,8 +34,8 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val opSqrtReg       = RegEnable(input.bits.sqrt        , false.B, input.fire)
   val roundingModeReg = RegEnable(input.bits.roundingMode, 0.U    , input.fire)
 
-  val rawA = rawFloatFromFN(expWidth, sigWidth, input.bits.a)
-  val rawB = rawFloatFromFN(expWidth, sigWidth, input.bits.b)
+  val rawA = rawFloatFromFN(expWidth, sigWidth, input.bits.dividend)
+  val rawB = rawFloatFromFN(expWidth, sigWidth, input.bits.divisor)
 
   // Exceptions
 
@@ -115,8 +115,8 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   )
 
   val expForSqrt = Cat(expstart, rawA.sExp(expWidth - 2, 0)) >> 1
-  val sqrtExpIsOdd = !rawA.sExp(0)
-  val sqrtFractIn = Mux(sqrtExpIsOdd, Cat(0.U(1.W), rawA.sig(sigWidth - 1, 0), 0.U(1.W)),
+
+  val sqrtFractIn = Mux(rawA.sExpIsEven , Cat(0.U(1.W), rawA.sig(sigWidth - 1, 0), 0.U(1.W)),
     Cat(rawA.sig(sigWidth - 1, 0), 0.U(2.W)))
 
   val SqrtModule = Module(new SquareRoot(2, 2, sigWidth+2, sigWidth+2))
@@ -203,8 +203,8 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
 
 
 class DivSqrtInput(expWidth: Int, sigWidth: Int) extends Bundle() {
-  val a = UInt((expWidth + sigWidth).W)
-  val b = UInt((expWidth + sigWidth).W)
+  val dividend = UInt((expWidth + sigWidth).W)
+  val divisor = UInt((expWidth + sigWidth).W)
   val sqrt = Bool()
   val roundingMode = UInt(3.W)
 }
diff --git a/arithmetic/src/float/common.scala b/arithmetic/src/float/common.scala
index 078c455..3194e55 100644
--- a/arithmetic/src/float/common.scala
+++ b/arithmetic/src/float/common.scala
@@ -17,6 +17,7 @@ class RawFloat(val expWidth: Int, val sigWidth: Int) extends Bundle
     val isInf: Bool  = Bool()              // overrides 'isZero', 'sExp', and 'sig'
     val isZero: Bool = Bool()              // overrides 'sExp' and 'sig'
     val isSNaN:Bool  = Bool()
+    val sExpIsEven   = Bool()
     val sign: Bool   = Bool()
     // sExp = 0.U(1.W) ## (Exp &- bias)
     // Exp - bias width is expWidth + 1
diff --git a/arithmetic/src/float/rawFloatFromFN.scala b/arithmetic/src/float/rawFloatFromFN.scala
index 26fc76b..284f528 100644
--- a/arithmetic/src/float/rawFloatFromFN.scala
+++ b/arithmetic/src/float/rawFloatFromFN.scala
@@ -59,6 +59,12 @@ object rawFloatFromFN {
 
     val isZero = isZeroExpIn && isZeroFractIn
     val isSpecial = adjustedExp(expWidth, expWidth - 1) === 3.U
+    val isSubnormal = isZeroExpIn && !isZeroFractIn
+    /** gets rawAExpIsEven directly from a and leadingZero
+      *
+      * @todo 1 bits Mux?
+      */
+    val sExpIsEven = Mux(isSubnormal, in(23) ^ normDist(0), in(23)).asBool
 
     val out = Wire(new RawFloat(expWidth, sigWidth))
     out.isNaN := isSpecial && !isZeroFractIn
@@ -69,6 +75,7 @@ object rawFloatFromFN {
     out.sig :=
       0.U(1.W) ## !isZero ## Mux(isZeroExpIn, subnormFract, fractIn)
     out.isSNaN :=  out.isNaN && !out.sig(sigWidth - 2)
+    out.sExpIsEven := sExpIsEven
     out
   }
 }
diff --git a/arithmetic/tests/src/float/DivSqrtTester.scala b/arithmetic/tests/src/float/DivSqrtTester.scala
index f00b2d2..3f6694d 100644
--- a/arithmetic/tests/src/float/DivSqrtTester.scala
+++ b/arithmetic/tests/src/float/DivSqrtTester.scala
@@ -50,8 +50,8 @@ object DivSqrtTester extends TestSuite with ChiselUtestTester {
         ) { dut: DivSqrt =>
           dut.clock.setTimeout(0)
           dut.input.valid.poke(true.B)
-          dut.input.bits.a.poke((xInput).U)
-          dut.input.bits.b.poke((dInput).U)
+          dut.input.bits.dividend.poke((xInput).U)
+          dut.input.bits.divisor.poke((dInput).U)
           dut.input.bits.sqrt.poke(false.B)
           dut.clock.step()
           dut.input.valid.poke(false.B)
diff --git a/tests/src/DUT.scala b/tests/src/DUT.scala
index 1385673..b537ac1 100644
--- a/tests/src/DUT.scala
+++ b/tests/src/DUT.scala
@@ -18,8 +18,8 @@ class DUT(expWidth: Int, sigWidth: Int) extends Module {
   val ds = Module(new DivSqrt(expWidth: Int, sigWidth: Int))
   ds.input.valid := input.valid
   ds.input.bits.sqrt := input.bits.op
-  ds.input.bits.a := input.bits.a
-  ds.input.bits.b := input.bits.b
+  ds.input.bits.dividend := input.bits.a
+  ds.input.bits.divisor := input.bits.b
   ds.input.bits.roundingMode := input.bits.roundingMode
 
   input.ready := ds.input.ready

From 05310e78c35704ad82118385cca5bd0d314d9bd0 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 18 Oct 2023 15:38:13 +0800
Subject: [PATCH 106/109] [rtl] opt RoundingUnit IO

---
 arithmetic/src/float/DivSqrt.scala      | 21 +++++--------
 arithmetic/src/float/RoundingUnit.scala | 39 +++++++++++++++----------
 2 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/arithmetic/src/float/DivSqrt.scala b/arithmetic/src/float/DivSqrt.scala
index 0ef94ae..c90a0d4 100644
--- a/arithmetic/src/float/DivSqrt.scala
+++ b/arithmetic/src/float/DivSqrt.scala
@@ -124,8 +124,7 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   SqrtModule.input.valid := input.valid && input.bits.sqrt && normalCaseSqrt
 
   // collect sqrt result
-  val rbitsSqrt      = SqrtModule.output.bits.result(1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
-  val sigToRoundSqrt = SqrtModule.output.bits.result(24, 2)
+  val sigPlusSqrt = SqrtModule.output.bits.result(24, 1) ## (!SqrtModule.output.bits.zeroRemainder || SqrtModule.output.bits.result(0))
 
 
   // divInput
@@ -150,15 +149,10 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
     * }}}
     */
   val needRightShift = !divModule.output.bits.quotient(27)
-  val sigToRoundDiv = Mux(needRightShift,
-    divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 1),
-    divModule.output.bits.quotient(calWidth - 2, calWidth - sigWidth))
-  val rbitsDiv = Mux(needRightShift, divModule.output.bits.quotient(calWidth - sigWidth - 2) ## divModule.output.bits.reminder.orR,
-    divModule.output.bits.quotient(calWidth - sigWidth - 1) ## divModule.output.bits.reminder.orR)
-
-  // collect sig result
-  val sigToRound   = Mux(opSqrtReg, sigToRoundSqrt, sigToRoundDiv)
-  val rbitsToRound = Mux(opSqrtReg, rbitsSqrt, rbitsDiv)
+  val sigPlusDiv = Mux(needRightShift,
+    divModule.output.bits.quotient(calWidth - 3, calWidth - sigWidth - 2) ## divModule.output.bits.reminder.orR,
+    divModule.output.bits.quotient(calWidth - 2, calWidth - sigWidth-1)   ## divModule.output.bits.reminder.orR
+  )
 
   // exp logic
   val expStoreNext,expToRound = Wire(UInt((expWidth+2).W))
@@ -182,11 +176,12 @@ class DivSqrt(expWidth: Int, sigWidth: Int) extends Module{
   val expStore = RegEnable(expStoreNext, 0.U((expWidth+2).W), input.fire)
   expToRound := Mux(opSqrtReg, expStore, expStore - needRightShift)
 
+  val sigPlus = Mux(opSqrtReg, sigPlusSqrt, sigPlusDiv)
+
   val roundresult = RoundingUnit(
     signReg,
     expToRound.asSInt,
-    sigToRound,
-    rbitsToRound,
+    sigPlus,
     roundingModeReg,
     invalidExec,
     infinitExec,
diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index b0fa678..b14067b 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -17,18 +17,7 @@ import chisel3.util._
   * sig: 23bits UInt
   */
 class RoundingUnit extends Module{
-  val input = IO(Input(new Bundle{
-    val invalidExc = Bool() // overrides 'infiniteExc' and 'in'
-    val infiniteExc = Bool() // overrides 'in' except for 'in.sign'
-    val isInf  = Bool()
-    val isZero = Bool()
-    val isNaN  = Bool()
-    val sig = UInt(23.W)
-    val exp = SInt(10.W)
-    val rBits = UInt(2.W)
-    val sign = Bool()
-    val roundingMode = UInt(5.W)
-  }))
+  val input = IO(Input(new RoundingInput))
   val output = IO(Output(new Bundle{
     val data = UInt(32.W)
     val exceptionFlags = Output(Bits(5.W))
@@ -51,6 +40,9 @@ class RoundingUnit extends Module{
   val subSigOut, commonSubnormSigOut = Wire(UInt(23.W))
   val expInc = Wire(UInt(8.W))
 
+  val allSig = Wire(UInt(25.W))
+  allSig := input.sigPlus
+
   // control logic
   // set to 126 according to softfloat
   val expSubnorm = (input.exp + 126.S(10.W))
@@ -60,7 +52,7 @@ class RoundingUnit extends Module{
   val commonTotalUnderflow = subnormDist > 235.S
 
   /** contains the hidden 1 and rBits */
-  val adjustedSig = Cat(1.U(1.W), input.sig, input.rBits)
+  val adjustedSig = Cat(1.U(1.W), allSig)
 
   // rounding logic
   val distGT32 = subnormDist(9,5).orR
@@ -164,13 +156,14 @@ class RoundingUnit extends Module{
 }
 
 object RoundingUnit {
-  def apply(sign: Bool, exp: SInt, sig: UInt, rbits: UInt, rmode: UInt, invalidExc: Bool, infiniteExc: Bool, isNaN: Bool, isInf: Bool, isZero: Bool): Vec[UInt] = {
+  def apply(sign: Bool, exp: SInt, sigPlus: UInt, rmode: UInt, invalidExc: Bool, infiniteExc: Bool, isNaN: Bool, isInf: Bool, isZero: Bool): Vec[UInt] = {
 
     val rounder = Module(new RoundingUnit)
     rounder.input.sign := sign
-    rounder.input.sig := sig
+    rounder.input.sigPlus := sigPlus
+
     rounder.input.exp := exp
-    rounder.input.rBits := rbits
+
     rounder.input.roundingMode := rmode
     rounder.input.invalidExc := invalidExc
     rounder.input.infiniteExc := infiniteExc
@@ -182,4 +175,18 @@ object RoundingUnit {
 
 }
 
+class RoundingInput extends Bundle{
+  val invalidExc = Bool() // overrides 'infiniteExc' and 'in'
+  val infiniteExc = Bool() // overrides 'in' except for 'in.sign'
+  val isInf = Bool()
+  val isZero = Bool()
+  val isNaN = Bool()
+  val sigPlus = UInt(25.W)
+
+  val exp = SInt(10.W)
+
+  val sign = Bool()
+  val roundingMode = UInt(5.W)
+}
+
 

From 3abb56b4646b35e1f8512573978f94c5126debfc Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Wed, 18 Oct 2023 16:36:11 +0800
Subject: [PATCH 107/109] [rtl] rename RoundingUnit

---
 arithmetic/src/float/RoundingUnit.scala   | 68 ++++++++++++-----------
 arithmetic/src/float/common.scala         |  6 +-
 arithmetic/src/float/rawFloatFromFN.scala |  7 +--
 3 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/arithmetic/src/float/RoundingUnit.scala b/arithmetic/src/float/RoundingUnit.scala
index b14067b..2d8238c 100644
--- a/arithmetic/src/float/RoundingUnit.scala
+++ b/arithmetic/src/float/RoundingUnit.scala
@@ -33,54 +33,59 @@ class RoundingUnit extends Module{
   val commonUnderflow = Wire(Bool())
   val commonInexact   = Wire(Bool())
 
-  val sigIncr = Wire(Bool())
+  val sigIncr      = Wire(Bool())
   val commonExpOut = Wire(UInt(8.W))
   val commonSigOut = Wire(UInt(23.W))
-  val sigAfterInc = Wire(UInt(27.W))
-  val subSigOut, commonSubnormSigOut = Wire(UInt(23.W))
-  val expInc = Wire(UInt(8.W))
+  val sigAfterInc  = Wire(UInt(27.W))
+  val subSigOut    = Wire(UInt(23.W))
+  val subExpOut    = Wire(UInt(8.W))
 
   val allSig = Wire(UInt(25.W))
   allSig := input.sigPlus
 
-  // control logic
-  // set to 126 according to softfloat
+  // subnormal control
+
+  // B = A + 126 : if B<0 =>  A <= -127 => A is subnormal
   val expSubnorm = (input.exp + 126.S(10.W))
   // for non subnormal case, Dist = 0
   val subnormDist = Mux(commonUnderflow, -expSubnorm, 0.S(10.W))
   // todo why we have this case? IN IEEE754 or definded in Hardfloat?
   val commonTotalUnderflow = subnormDist > 235.S
 
-  /** contains the hidden 1 and rBits */
+  /** restore the hidden 1 */
   val adjustedSig = Cat(1.U(1.W), allSig)
 
   // rounding logic
-  val distGT32 = subnormDist(9,5).orR
-  val allMask = ((-1).S(31.W) << 31 >> subnormDist(5,0))
-  val distIn24And31 = allMask(6,0).orR
-  val distGT24 = (distGT32 || distIn24And31) && commonUnderflow
+  val allMask = ((-1).S(31.W) << 31 >> subnormDist(5,0))(31,0)
+  /** For dist > 24, all bits all be rounded
+    *
+    * implement a optimized logcic
+    * distGT32 = subnormDist(9,5).orR
+    * distIn24And31 = allMask(6,0).orR
+    */
+  val distGT24 = (subnormDist(9,5).orR || allMask(6,0).orR) && commonUnderflow
   /** 26bits mask selecting all bits will be rounded, considering subnormal case
     *
     * last 2 bits is rbits, always 1s
     */
-  val roundMask = Mux(!distGT24, Reverse(allMask(30,7)) ## 3.U(2.W), 0.U(26.W))
+  val roundMask = Mux(!distGT24, Reverse(allMask(30,7)) ## "b11".U(2.W), 0.U(26.W))
   /** mask for all bits after guard bit */
   val shiftedRoundMask = Mux(!distGT24, 0.U(1.W) ## roundMask >> 1 , BigInt(-1).S(26.W).asUInt)
-  /** select the guard bit need to be rounded */
-  val roundPosMask = ~shiftedRoundMask & roundMask
-  val roundPosBit = (adjustedSig & roundPosMask).orR
+  /** select the guard bit */
+  val guardBitMask = ~shiftedRoundMask & roundMask
+  val guardBit = (adjustedSig & guardBitMask).orR
   /** Any bit is one after guard bit => sticky bit */
-  val anyRoundExtra = (adjustedSig & shiftedRoundMask).orR
+  val stickyBit = (adjustedSig & shiftedRoundMask).orR
   /** Any bit is one containing guard bit */
-  val anyRound = roundPosBit || anyRoundExtra
+  val anyRound = guardBit || stickyBit
 
   /** the last effective bit */
-  val lastBitMask = (roundPosMask << 1.U)(25,0)
+  val lastBitMask = (guardBitMask << 1.U)(25,0)
   val lastBit = (adjustedSig & lastBitMask ).orR
 
-  val distEQ24 = roundPosMask(25) && !roundPosMask(24,0).orR
+  val distEQ24 = subnormDist === 24.S
   /** 2 bits for final rounding */
-  val rbits : UInt= Cat(roundPosBit, anyRoundExtra)
+  val rbits : UInt= Cat(guardBit, stickyBit)
 
   sigIncr := (rmRNE && (rbits.andR || (lastBit && rbits==="b10".U))) ||
     (rmRDN &&  input.sign &&  rbits.orR) ||
@@ -93,8 +98,7 @@ class RoundingUnit extends Module{
     Mux(sigIncr, 1.U(23.W), 0.U(23.W)),
     (sigAfterInc >> subnormDist(4,0))(24,2))
   /** when subnormDist===1.S, there may be expInc */
-  expInc := sigAfterInc(26)  && (!commonUnderflow || subnormDist === 1.S )
-  commonSubnormSigOut := Mux(commonTotalUnderflow, 0.U, subSigOut )
+  subExpOut := sigAfterInc(26)  && (!commonUnderflow || subnormDist === 1.S )
 
   /** conforms to last bit position */
   val sigIncrement = Mux(sigIncr, lastBitMask, 0.U(26.W))
@@ -107,7 +111,7 @@ class RoundingUnit extends Module{
   val commonCase = !isNaNOut && !notNaNIsSpecialInfOut && !input.isZero
 
   val overflow  = commonCase && commonOverflow
-  val underflow = commonCase && (commonUnderflow && rbits.orR)
+  val underflow = commonCase && commonUnderflow && commonInexact
   val inexact   = overflow || (commonCase && commonInexact)
 
   val overflowSele = rmRDN ## rmRUP ## rmRTZ ## (rmRNE || rmRMM)
@@ -136,14 +140,12 @@ class RoundingUnit extends Module{
   commonInexact   := anyRound
 
   commonSigOut := sigAfterInc(24,2)
-  commonExpOut := ((input.exp + 127.S)(7,0) + expInc).asUInt
-
+  commonExpOut := ((input.exp + 127.S)(7,0) + subExpOut).asUInt
 
   val commonOut = Mux(commonOverflow, common_infiniteOut,
-    Mux(commonUnderflow, input.sign ## expInc ## commonSubnormSigOut,
+    Mux(commonUnderflow, input.sign ## subExpOut ## Mux(commonTotalUnderflow, 0.U, subSigOut),
       input.sign ## commonExpOut ## commonSigOut))
 
-
   output.data := Mux1H(Seq(
     outSele1H(0) -> zeroOut,
     outSele1H(1) -> quietNaN,
@@ -157,13 +159,10 @@ class RoundingUnit extends Module{
 
 object RoundingUnit {
   def apply(sign: Bool, exp: SInt, sigPlus: UInt, rmode: UInt, invalidExc: Bool, infiniteExc: Bool, isNaN: Bool, isInf: Bool, isZero: Bool): Vec[UInt] = {
-
     val rounder = Module(new RoundingUnit)
     rounder.input.sign := sign
     rounder.input.sigPlus := sigPlus
-
     rounder.input.exp := exp
-
     rounder.input.roundingMode := rmode
     rounder.input.invalidExc := invalidExc
     rounder.input.infiniteExc := infiniteExc
@@ -172,7 +171,6 @@ object RoundingUnit {
     rounder.input.isNaN := isNaN
     VecInit(rounder.output.data, rounder.output.exceptionFlags)
   }
-
 }
 
 class RoundingInput extends Bundle{
@@ -181,10 +179,14 @@ class RoundingInput extends Bundle{
   val isInf = Bool()
   val isZero = Bool()
   val isNaN = Bool()
+  /** 23bits + 2 bits for rounding */
   val sigPlus = UInt(25.W)
-
+  /** true exp with sign
+    *
+    * why need 10bit:
+    * for div: ExpMaxMag = 255 - (0+22) = 277
+    * */
   val exp = SInt(10.W)
-
   val sign = Bool()
   val roundingMode = UInt(5.W)
 }
diff --git a/arithmetic/src/float/common.scala b/arithmetic/src/float/common.scala
index 3194e55..dbbbca4 100644
--- a/arithmetic/src/float/common.scala
+++ b/arithmetic/src/float/common.scala
@@ -13,9 +13,9 @@ object RoundingMode {
 
 class RawFloat(val expWidth: Int, val sigWidth: Int) extends Bundle
 {
-    val isNaN: Bool  = Bool()              // overrides all other fields
-    val isInf: Bool  = Bool()              // overrides 'isZero', 'sExp', and 'sig'
-    val isZero: Bool = Bool()              // overrides 'sExp' and 'sig'
+    val isNaN: Bool  = Bool()
+    val isInf: Bool  = Bool()
+    val isZero: Bool = Bool()
     val isSNaN:Bool  = Bool()
     val sExpIsEven   = Bool()
     val sign: Bool   = Bool()
diff --git a/arithmetic/src/float/rawFloatFromFN.scala b/arithmetic/src/float/rawFloatFromFN.scala
index 284f528..5eb5205 100644
--- a/arithmetic/src/float/rawFloatFromFN.scala
+++ b/arithmetic/src/float/rawFloatFromFN.scala
@@ -60,11 +60,8 @@ object rawFloatFromFN {
     val isZero = isZeroExpIn && isZeroFractIn
     val isSpecial = adjustedExp(expWidth, expWidth - 1) === 3.U
     val isSubnormal = isZeroExpIn && !isZeroFractIn
-    /** gets rawAExpIsEven directly from a and leadingZero
-      *
-      * @todo 1 bits Mux?
-      */
-    val sExpIsEven = Mux(isSubnormal, in(23) ^ normDist(0), in(23)).asBool
+    /** gets rawAExpIsEven directly from expLSB and leadingZero */
+    val sExpIsEven = (isSubnormal && (in(23) ^ normDist(0))) || (!isSubnormal && in(23))
 
     val out = Wire(new RawFloat(expWidth, sigWidth))
     out.isNaN := isSpecial && !isZeroFractIn

From 23faa640e3372f6891d160b5bb2ba8707b5c25d5 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 23 Oct 2023 15:10:39 +0800
Subject: [PATCH 108/109] [test] opt test system

---
 Makefile                                 |   2 +-
 build.sc                                 |   3 -
 common.sc                                |  12 +-
 tests/resources/csrc/vbridge_impl.cc     |  13 +-
 tests/src/{Ftests.scala => Tester.scala} | 163 ++++++-----------------
 5 files changed, 47 insertions(+), 146 deletions(-)
 rename tests/src/{Ftests.scala => Tester.scala} (50%)

diff --git a/Makefile b/Makefile
index 9e33eac..22774b8 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ run:
 	mill -i -j 0 arithmetic[5.0.0].run
 
 test:
-	mill -i -j 0 arithmetictest[5.0.0].test
+	mill -i -j 0 arithmetictest[5.0.0].run
 
 bsp:
 	mill -i mill.bsp.BSP/install
diff --git a/build.sc b/build.sc
index 69e00cd..696a52c 100644
--- a/build.sc
+++ b/build.sc
@@ -52,9 +52,6 @@ trait ArithmeticTest
 
   def arithmeticModule = arithmetic(crossValue)
 
-  def scalatestIvy = v.scalatest
-
-  def scalaparIvy = v.scalapar
 
   def spireIvy = v.spire
 
diff --git a/common.sc b/common.sc
index 58ae3a3..6223cb4 100644
--- a/common.sc
+++ b/common.sc
@@ -39,9 +39,9 @@ trait ArithmeticModule
 // TODO: migrate test to svsim
 
 trait ArithmeticTestModule
-  extends TestModule
+  extends ScalaModule
     with HasChisel
-    with TestModule.ScalaTest {
+     {
   def arithmeticModule: ArithmeticModule
   def spireIvy: T[Dep]
 
@@ -49,18 +49,10 @@ trait ArithmeticTestModule
 
   def oslibIvy: T[Dep]
 
-  def scalatestIvy: Dep
-
-  def scalaparIvy: Dep
-
   override def moduleDeps = super.moduleDeps ++ Some(arithmeticModule)
 
-  override def defaultCommandName() = "test"
-
   override def ivyDeps = T(
     super.ivyDeps() ++ Agg(
-      scalatestIvy,
-      scalaparIvy,
       spireIvy(),
       evilplotIvy()
     )
diff --git a/tests/resources/csrc/vbridge_impl.cc b/tests/resources/csrc/vbridge_impl.cc
index c7c42ec..3de6000 100644
--- a/tests/resources/csrc/vbridge_impl.cc
+++ b/tests/resources/csrc/vbridge_impl.cc
@@ -11,7 +11,7 @@ uint64_t VBridgeImpl::get_t() { return getCycle(); }
 
 int VBridgeImpl::timeoutCheck() {
   if (terminate == true) {
-    LOG(INFO) << fmt::format("pass {} cases, time = {}", cnt - 1, get_t());
+    LOG(INFO) << fmt::format("ran {} cases, cycles = {}", cnt - 1, get_t());
     dpiFinish();
   }
   return 0;
@@ -27,8 +27,6 @@ void VBridgeImpl::dpiInitCosim() {
   FLAGS_minloglevel = 0;
 
   ctx = Verilated::threadContextp();
-  //  LOG(INFO) << fmt::format("[{}] dpiInitCosim", getCycle());
-
   cnt = 0;
 
   switch (rm) {
@@ -105,7 +103,6 @@ void VBridgeImpl::dpiCheck(svBit valid, svBitVecVal result,
                               result, testcase.expected_out);
     LOG(ERROR) << fmt::format("Flag    dut vs ref  = {:08X} vs {:08X} \n",
                               fflags, (int)testcase.expectedException);
-
     dpiFinish();
   }
 }
@@ -197,13 +194,6 @@ genTestCase(function_t function,
   return res;
 }
 
-void outputTestCases(std::vector<testdata> cases) {
-  for (auto x : cases) {
-    //    printf("%08x %08x %08x %02x\n", x.a, x.b, x.expected_out,
-    //    x.expectedException);
-  }
-}
-
 void fillTestQueue(std::vector<testdata> cases) {
   for (auto x : cases) {
     vbridge_impl_instance.test_queue.push(x);
@@ -222,7 +212,6 @@ void VBridgeImpl::initTestCases() {
     LOG(FATAL) << fmt::format("illegal operation");
 
   fillTestQueue(res);
-  outputTestCases(res); // TODO: demo, please delete
 }
 
 void VBridgeImpl::reloadcase() {
diff --git a/tests/src/Ftests.scala b/tests/src/Tester.scala
similarity index 50%
rename from tests/src/Ftests.scala
rename to tests/src/Tester.scala
index f2654a1..0a61dd2 100644
--- a/tests/src/Ftests.scala
+++ b/tests/src/Tester.scala
@@ -1,74 +1,22 @@
 package tests
 
 import chisel3.RawModule
-import chisel3.stage._
-import firrtl.AnnotationSeq
-import firrtl.stage.FirrtlCircuitAnnotation
-import org.scalatest.ParallelTestExecution
-import org.scalatest.flatspec.AnyFlatSpec
-import org.scalatest.matchers.should.Matchers
-import os._
+import float._
 
-trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
-  val roundings = Seq(
-    "-rnear_even" -> "0",
-    "-rminMag" -> "1",
-    "-rmin" -> "2",
-    "-rmax" -> "3",
-    "-rnear_maxMag" -> "4"
-  )
 
-  val rmMaps = Map(
-    0 -> "RNE",
-    1 -> "RTZ",
-    2 -> "RDN",
-    3 -> "RUP",
-    4 -> "RMM"
-  )
+import firrtl.AnnotationSeq
 
-  def exp(f: Int) = f match {
-    case 16 => 5
-    case 32 => 8
-    case 64 => 11
-  }
 
-  def sig(f: Int) = f match {
-    case 16 => 11
-    case 32 => 24
-    case 64 => 53
-  }
+import firrtl.stage.FirrtlCircuitAnnotation
 
-  def check(stdouts: Seq[String]) = {
-    stdouts.foreach(_ shouldNot include("expected"))
-    stdouts.foreach(_ shouldNot include("Ran 0 tests."))
-    stdouts.foreach(_ should include("No errors found."))
-  }
+import chisel3.stage._
+import os._
 
-  def test(name: String, module: () => RawModule, softfloatArg: Seq[String]): Seq[String] = {
-    val (softfloatArgs, dutArgs) = (roundings.map {
-      case (s, d) =>
-        (Seq(s, "-tininessafter") ++ softfloatArg, Seq(d, "0"))
-    }).unzip
-    test(name, module, "test.cpp", softfloatArgs, Some(dutArgs))
-  }
 
-  /** Run a FMA test. Before running, `softfloat_gen` should be accessible in the $PATH environment.
-    *
-    * @param name          is name of this test, which should corresponds to header's name in `includes` directory.
-    * @param module        function to generate DUT.
-    * @param harness       C++ harness name, which should corresponds to c++ hardness's name in `csrc` directory.
-    * @param softfloatArgs arguments passed to `softfloat_gen` application. If has multiple command lines, multiple test will be executed.
-    * @param dutArgs       arguments passed to verilator dut executor, If set to [[None]], no arguments will be passed to.
-    */
-  def test(
-    name:          String,
-    module:        () => RawModule,
-    harness:       String,
-    softfloatArgs: Seq[Seq[String]],
-    dutArgs:       Option[Seq[Seq[String]]] = None
-  ) = {
 
-    var topName: String = null
+
+object Tester extends App {
+    var topName: String = "TestBench"
     val emulatorThreads = 8
 
     val runDir: Path = os.pwd / "run"
@@ -87,14 +35,15 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     val emulatorBuildDir = emulatorDir / "build"
     os.makeDir.all(emulatorBuildDir)
 
+
     os.proc(
       "make",
-      "softfloat"
+      "softfloat",
     ).call()
 
     os.proc(
       "make",
-      "testfloat"
+      "testfloat",
     ).call()
 
     val annos: AnnotationSeq = Seq(
@@ -102,7 +51,7 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
       new chisel3.stage.phases.Convert
     ).foldLeft(
       Seq(
-        ChiselGeneratorAnnotation(() => new TestBench(8, 24))
+        ChiselGeneratorAnnotation(() => new TestBench(8,24))
       ): AnnotationSeq
     ) { case (annos, stage) => stage.transform(annos) }
       .flatMap {
@@ -110,7 +59,7 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
           topName = circuit.main
           os.write.over(elaborateDir / s"$topName.fir", circuit.serialize)
           None
-        case _: chisel3.stage.DesignAnnotation[_]     => None
+        case _: chisel3.stage.DesignAnnotation[_] => None
         case _: chisel3.stage.ChiselCircuitAnnotation => None
         case a => Some(a)
       }
@@ -119,16 +68,14 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
     // rtl
     os.proc(
       "firtool",
-      elaborateDir / s"$topName.fir",
-      s"--annotation-file=${elaborateDir / s"$topName.anno.json"}",
+      elaborateDir / s"$topName.fir", s"--annotation-file=${elaborateDir / s"$topName.anno.json"}",
       "-dedup",
       "-O=debug",
       "--split-verilog",
       "--preserve-values=named",
       s"-o=$rtlDir"
     ).call()
-    val verilogs = os.read
-      .lines(rtlDir / "filelist.f")
+    val verilogs = os.read.lines(rtlDir / "filelist.f")
       .map(str =>
         try {
           os.Path(str)
@@ -139,6 +86,7 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
       )
       .filter(p => p.ext == "v" || p.ext == "sv")
 
+
     val allCSourceFiles = Seq(
       "dpi.cc",
       "vbridge_impl.cc",
@@ -167,8 +115,7 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
       "--main"
     )
 
-    os.write(
-      emulatorBuildDir / "CMakeLists.txt",
+    os.write(emulatorBuildDir / "CMakeLists.txt",
       // format: off
       s"""cmake_minimum_required(VERSION 3.20)
          |project(emulator)
@@ -214,56 +161,32 @@ trait FMATester extends AnyFlatSpec with Matchers with ParallelTestExecution {
       // format: on
     )
 
-    // build verilator
-    os.proc(
-      Seq(
-        "cmake",
-        "-G",
-        "Ninja",
-        "-S",
-        emulatorBuildDir,
-        "-B",
-        emulatorBuildDir
-      ).map(_.toString)
-    ).call(emulatorBuildDir)
-
-    // build emulator
-    os.proc(Seq("ninja", "-C", emulatorBuildDir).map(_.toString)).call(emulatorBuildDir)
-
-    // run
-    for (x <- 0 to 4) {
-      def runEnv(opration: String) = Map(
-        "wave" -> s"${runDir}/",
-        "op" -> s"$opration",
-        "rm" -> s"$x"
-      )
-      os.proc(Seq("./emulator").map(_.toString))
-        .call(stdout = runDir / s"${rmMaps(x)}.log", cwd = emulatorBuildDir, env = runEnv("div"))
-      os.proc(Seq("./emulator").map(_.toString))
-        .call(stdout = runDir / s"${rmMaps(x)}.log", cwd = emulatorBuildDir, env = runEnv("sqrt"))
-    }
-
-    Seq("No errors found.")
-  }
-}
-
-class DivSqrtRecFn_smallSpec extends FMATester {
-  def test(f: Int, fn: String): Seq[String] = {
-    def generator(options: Int) = fn match {
-      case "div"  => () => new TestBench(exp(f), sig(f))
-      case "sqrt" => () => new TestBench(exp(f), sig(f))
-    }
-
-    test(
-      s"DivSqrtRecF${f}_small_${fn}",
-      generator(0),
-      (if (fn == "sqrt") Seq("-level2") else Seq.empty) ++ Seq(s"f${f}_${fn}")
+  // build verilator
+  os.proc(Seq(
+    "cmake",
+    "-G", "Ninja",
+    "-S", emulatorBuildDir,
+    "-B", emulatorBuildDir
+  ).map(_.toString)).call(emulatorBuildDir)
+
+  // build emulator
+  os.proc(Seq("ninja", "-C", emulatorBuildDir).map(_.toString)).call(emulatorBuildDir)
+
+  // run
+  for (x <- 0 to 4) {
+    val rmMaps = Map(
+      0 -> "RNE",
+      1 -> "RTZ",
+      2 -> "RDN",
+      3 -> "RUP",
+      4 -> "RMM"
     )
-
-  }
-
-  "DivSqrtRecF32_small_div" should "pass" in {
-    check(test(32, "div"))
+    val runEnv = Map(
+      "wave" -> s"${runDir}/",
+      "op" -> "sqrt",
+      "rm" -> s"$x"
+    )
+    os.proc(Seq("./emulator").map(_.toString)).call(stdout = runDir / s"${rmMaps(x)}.log", cwd = emulatorBuildDir, env = runEnv)
   }
 
-}
+}
\ No newline at end of file

From 8429b9414c69a90f4b9d4ef8e79feca83849f966 Mon Sep 17 00:00:00 2001
From: Yanqi Yang <shxxyyq0000@qq.com>
Date: Mon, 23 Oct 2023 15:35:19 +0800
Subject: [PATCH 109/109] [nix] remove redundent dependencies

---
 flake.nix         |  1 -
 nix/softfloat.nix | 20 --------------------
 nix/testfloat.nix | 18 ------------------
 overlay.nix       |  2 --
 4 files changed, 41 deletions(-)
 delete mode 100644 nix/softfloat.nix
 delete mode 100644 nix/testfloat.nix

diff --git a/flake.nix b/flake.nix
index c06e2f0..23e3b5f 100644
--- a/flake.nix
+++ b/flake.nix
@@ -19,7 +19,6 @@
             mill
             circt
             verilator
-            testfloat
             cmake
             libargs
             glog
diff --git a/nix/softfloat.nix b/nix/softfloat.nix
deleted file mode 100644
index 7a23d00..0000000
--- a/nix/softfloat.nix
+++ /dev/null
@@ -1,20 +0,0 @@
-{ stdenv, fetchFromGitHub }:
-stdenv.mkDerivation rec {
-  pname = "softfloat";
-  version = "5c06db33fc1e2130f67c045327b0ec949032df1d";
-  src = fetchFromGitHub {
-    owner = "ucb-bar";
-    repo = "berkeley-softfloat-3";
-    rev = version;
-    sha256 = "sha256-uqf2xATeLyPEs/f8Yqc/Cr5YiklV2754g8IJu5z50sk=";
-  };
-  buildPhase = ''
-    make -C build/Linux-x86_64-GCC SPECIALIZE_TYPE=RISCV
-  '';
-  installPhase = ''
-    mkdir -p $out/lib
-    mkdir -p $out/include
-    mv build/Linux-x86_64-GCC/softfloat.a $out/lib/softfloat.a
-    cp source/include/* $out/include
-  '';
-}
diff --git a/nix/testfloat.nix b/nix/testfloat.nix
deleted file mode 100644
index 1471c33..0000000
--- a/nix/testfloat.nix
+++ /dev/null
@@ -1,18 +0,0 @@
-{ stdenv, fetchFromGitHub, softfloat }:
-stdenv.mkDerivation rec {
-  pname = "softfloat";
-  version = "06b20075dd3c1a5d0dd007a93643282832221612";
-  src = fetchFromGitHub {
-    owner = "ucb-bar";
-    repo = "berkeley-testfloat-3";
-    rev = version;
-    sha256 = "sha256-4C0a3jmmQPYlgbQ9F1frjtVixk3+wvLZFiujOhHshmw=";
-  };
-  buildPhase = ''
-    make -C build/Linux-x86_64-GCC SPECIALIZE_TYPE=RISCV SOFTFLOAT_INCLUDE_DIR=${softfloat}/include SOFTFLOAT_LIB=${softfloat}/lib/softfloat.a
-  '';
-  installPhase = ''
-    mkdir -p $out/bin
-    cp build/Linux-x86_64-GCC/testfloat_gen $out/bin/testfloat_gen
-  '';
-}
diff --git a/overlay.nix b/overlay.nix
index bcca536..398c8f4 100644
--- a/overlay.nix
+++ b/overlay.nix
@@ -7,6 +7,4 @@ final: prev: {
     };
   });
   espresso = final.callPackage ./nix/espresso.nix { };
-  softfloat = final.callPackage ./nix/softfloat.nix { };
-  testfloat = final.callPackage ./nix/testfloat.nix { };
 }