diff --git a/apps/riscv-tests/isa/rv64uv/vid.c b/apps/riscv-tests/isa/rv64uv/vid.c
index 7db9a1fc5..465d6fdd4 100644
--- a/apps/riscv-tests/isa/rv64uv/vid.c
+++ b/apps/riscv-tests/isa/rv64uv/vid.c
@@ -11,6 +11,12 @@ void TEST_CASE1() {
   VSET(16, e8, m1);
   __asm__ volatile("vid.v v1");
   VCMP_U8(1, v1, 0, 1, 2, 3, 4, 5, 6, 7);
+  VSET(10, e8, m1);
+
+  VLOAD_8(v1, 0b00001110, 0b00000111, 0b00010000, 0b01001011, 0b00110100, 0b01111101, 0b11001100, 0b00011000, 0b01000111, 0b00010100);
+  VSET(77, e8, m1);
+  asm volatile("vid.v v2");
+  VCMP_U8(2, v2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76);
 }
 
 void TEST_CASE2() {
@@ -18,7 +24,7 @@ void TEST_CASE2() {
   VLOAD_8(v0, 85, 0, 0, 0, 0, 0, 0, 0);
   VCLEAR(v1);
   __asm__ volatile("vid.v v1, v0.t");
-  VCMP_U8(2, v1, 0, 0, 2, 0, 4, 0, 6, 0);
+  VCMP_U8(3, v1, 0, 0, 2, 0, 4, 0, 6, 0);
 }
 
 int main(void) {
diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv
index 818207b45..ca3e95801 100644
--- a/hardware/src/ara_dispatcher.sv
+++ b/hardware/src/ara_dispatcher.sv
@@ -1300,7 +1300,10 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         ara_req_d.vtype.vsew = eew_q[ara_req_d.vd];
                       end
                       5'b10000: ara_req_d.op = ara_pkg::VIOTA;
-                      5'b10001: ara_req_d.op = ara_pkg::VID;
+                      5'b10001: begin
+                        ara_req_d.op = ara_pkg::VID;
+                        ara_req_d.use_vs2 = 1'b0;
+                      end
                     endcase
                   end
                   6'b001000: ara_req_d.op = ara_pkg::VAADDU;
diff --git a/hardware/src/lane/valu.sv b/hardware/src/lane/valu.sv
index a61242f67..9bb8fd3e7 100644
--- a/hardware/src/lane/valu.sv
+++ b/hardware/src/lane/valu.sv
@@ -445,10 +445,10 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
     prevent_commit = 1'b0;
 
     // How many elements are we processing this cycle?
-    element_cnt_buf_issue = (unsigned'(EW64) - unsigned'(vinsn_issue_q.vtype.vsew));
+    element_cnt_buf_issue = 1 << (unsigned'(EW64) - unsigned'(vinsn_issue_q.vtype.vsew));
     element_cnt_issue = vinsn_issue_q.op inside {[VMSBF:VMXNOR]} ? ELEN : {2'b0, element_cnt_buf_issue};
 
-    element_cnt_buf_commit = (unsigned'(EW64) - unsigned'(vinsn_commit.vtype.vsew));
+    element_cnt_buf_commit = 1 << (unsigned'(EW64) - unsigned'(vinsn_commit.vtype.vsew));
     element_cnt_commit = vinsn_commit.op inside {[VMSBF:VMXNOR]} ? ELEN : {2'b0, element_cnt_buf_commit};
 
     ////////////////////////////////////////
diff --git a/hardware/src/masku/masku.sv b/hardware/src/masku/masku.sv
index d7bc525c6..9766ebfc4 100644
--- a/hardware/src/masku/masku.sv
+++ b/hardware/src/masku/masku.sv
@@ -560,14 +560,14 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
 
   always_comb begin
     // Tail-agnostic bus
-    alu_result          = '0;
-    alu_result_vm       = '0;
-    alu_result_vm_m     = '0;
-    alu_result_vm_shuf  = '0;
-    alu_result_vmsif_vm = '0;
-    alu_result_vmsbf_vm = '0;
-    alu_result_vmsof_vm = '0;
-    alu_result_vm       = '0;
+    alu_result          = '1;
+    alu_result_vm       = '1;
+    alu_result_vm_m     = '1;
+    alu_result_vm_shuf  = '1;
+    alu_result_vmsif_vm = '1;
+    alu_result_vmsbf_vm = '1;
+    alu_result_vmsof_vm = '1;
+    alu_result_vm       = '1;
 
     vcpop_operand = '0;
 
@@ -624,9 +624,9 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
           // Mask the input vector
           // VID uses the same datapath of VIOTA, but with implicit input vector at '1
           masku_operand_alu_seq_m = (vinsn_issue.op == VID)
-                                  ? masku_operand_m_seq | {NrLanes*DataWidth{vinsn_issue.vm}}
+                                  ? '1 // VID mask does NOT modify the count
                                   : masku_operand_alu_seq
-                                    & (masku_operand_m_seq | {NrLanes*DataWidth{vinsn_issue.vm}});
+                                    & (masku_operand_m_seq | {NrLanes*DataWidth{vinsn_issue.vm}}); // VIOTA mask DOES modify the count
 
           // Compute output results on `ViotaParallelism 16-bit adders
           viota_res[0] = viota_acc_q;
@@ -656,19 +656,19 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
 		  unique case (vinsn_issue.vtype.vsew)
             EW8: for (int i = 0; i < ViotaParallelism; i++) begin
               be_viota_seq_d[out_valid_cnt_q[idx_width(NrLanes*DataWidth/8/ViotaParallelism)-1:0] * ViotaParallelism * 1 + 1*i +: 1] =
-                {1{vinsn_issue.vm}} | {1{masku_operand_m_seq[out_valid_cnt_q[idx_width(NrLanes*DataWidth/8/ViotaParallelism)-1:0] * ViotaParallelism + i]}};
+                {1{vinsn_issue.vm}} | {1{masku_operand_m_seq[in_m_ready_cnt_q[idx_width(NrLanes*DataWidth/ViotaParallelism)-1:0] * ViotaParallelism + i]}};
             end
             EW16: for (int i = 0; i < ViotaParallelism; i++) begin
               be_viota_seq_d[out_valid_cnt_q[idx_width(NrLanes*DataWidth/16/ViotaParallelism)-1:0] * ViotaParallelism * 2 + 2*i +: 2] =
-                {2{vinsn_issue.vm}} | {2{masku_operand_m_seq[out_valid_cnt_q[idx_width(NrLanes*DataWidth/8/ViotaParallelism)-1:0] * ViotaParallelism + i]}};
+                {2{vinsn_issue.vm}} | {2{masku_operand_m_seq[in_m_ready_cnt_q[idx_width(NrLanes*DataWidth/ViotaParallelism)-1:0] * ViotaParallelism + i]}};
             end
             EW32: for (int i = 0; i < ViotaParallelism; i++) begin
               be_viota_seq_d[out_valid_cnt_q[idx_width(NrLanes*DataWidth/32/ViotaParallelism)-1:0] * ViotaParallelism * 4 + 4*i +: 4] =
-                {4{vinsn_issue.vm}} | {4{masku_operand_m_seq[out_valid_cnt_q[idx_width(NrLanes*DataWidth/8/ViotaParallelism)-1:0] * ViotaParallelism + i]}};
+                {4{vinsn_issue.vm}} | {4{masku_operand_m_seq[in_m_ready_cnt_q[idx_width(NrLanes*DataWidth/ViotaParallelism)-1:0] * ViotaParallelism + i]}};
             end
             default: for (int i = 0; i < ViotaParallelism; i++) begin // EW64
               be_viota_seq_d[out_valid_cnt_q[idx_width(NrLanes*DataWidth/64/ViotaParallelism)-1:0] * ViotaParallelism * 8 + 8*i +: 8] =
-                {8{vinsn_issue.vm}} | {8{masku_operand_m_seq[out_valid_cnt_q[idx_width(NrLanes*DataWidth/8/ViotaParallelism)-1:0] * ViotaParallelism + i]}};
+                {8{vinsn_issue.vm}} | {8{masku_operand_m_seq[in_m_ready_cnt_q[idx_width(NrLanes*DataWidth/ViotaParallelism)-1:0] * ViotaParallelism + i]}};
             end
           endcase
         end
@@ -689,7 +689,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
     // Shuffle the VIOTA,VID BE
     for (int b = 0; b < (NrLanes*StrbWidth); b++) begin
       automatic int shuffle_byte  = shuffle_index(b, NrLanes, vinsn_issue.vtype.vsew);
-      be_viota_shuf[shuffle_byte] = be_viota_seq_q[b];
+      be_viota_shuf[shuffle_byte] = be_viota_seq_d[b];
     end
 
     // alu_result propagation mux
@@ -920,7 +920,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
         wdata: result_queue_q[result_queue_write_pnt_q][lane].wdata, // Retain the last-cycle's data
 		// VIOTA, VID generate a non-mask vector and should comply with undisturbed policy
         // This means that we can use the byte-enable signal
-        be   : vinsn_issue.op inside {[VIOTA:VID]} ? be(elm_per_lane + additional_elm[lane], vinsn_issue.vtype.vsew) | be_viota_shuf : '1,
+        be   : vinsn_issue.op inside {[VIOTA:VID]} ? be(elm_per_lane + additional_elm[lane], vinsn_issue.vtype.vsew) & be_viota_shuf[lane*StrbWidth +: StrbWidth] : '1,
         addr : vaddr(vinsn_issue.vd, NrLanes, VLEN) + iteration_cnt_q,
         id   : vinsn_issue.id
       };
@@ -941,7 +941,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
         for (int unsigned lane = 0; lane < NrLanes; lane++) begin
           result_queue_background_data[lane] = (out_valid_cnt_q != '0)
                                              ? result_queue_q[result_queue_write_pnt_q][lane].wdata
-                                             : masku_operand_vd[lane];
+                                             : vinsn_issue.op inside {[VIOTA:VID]} ? '1 : masku_operand_vd[lane];
         end
         for (int unsigned lane = 0; lane < NrLanes; lane++) begin
           // The mask vector writes at 1 (tail-agnostic ok value) both the background body
@@ -1007,8 +1007,8 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
 
         // Have we finished insn execution? Clear MASKU ALU state
         if (issue_cnt_d == '0) begin
+          be_viota_seq_d = '1; // Default: write
           viota_acc_d    = '0;
-          be_viota_seq_d = '0;
           found_one_d    = '0;
         end
       end
@@ -1291,7 +1291,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
       out_valid_threshold_q  <= '0;
       viota_acc_q            <= '0;
       found_one_q            <= '0;
-      be_viota_seq_q         <= '0;
+      be_viota_seq_q         <= '1; // Default: write
     end else begin
       vinsn_running_q        <= vinsn_running_d;
       read_cnt_q             <= read_cnt_d;
diff --git a/hardware/src/masku/masku_operands.sv b/hardware/src/masku/masku_operands.sv
index ff1c246cc..f63f6b44e 100644
--- a/hardware/src/masku/masku_operands.sv
+++ b/hardware/src/masku/masku_operands.sv
@@ -219,7 +219,7 @@ module masku_operands import ara_pkg::*; import rvv_pkg::*; #(
   // Compress ALU/FPU results into a mask vector
   // -------------------------------------------
   always_comb begin
-    alu_result_compressed_o = '0;
+    alu_result_compressed_o = '1;
     for (int b = 0; b < ELENB * NrLanes; b++) begin
       if ((b % (1 << vinsn_issue_i.vtype.vsew)) == '0) begin
         automatic int src_byte        = shuffle_index(b, NrLanes, vinsn_issue_i.vtype.vsew);