@@ -40,6 +40,7 @@ module VX_decode import VX_gpu_pkg::*; #(
4040 reg [NUM_SRC_OPDS : 0 ] use_regs;
4141 reg [NUM_XREGS - 1 : 0 ] rd_xregs;
4242 reg [NUM_XREGS - 1 : 0 ] wr_xregs;
43+ reg [BYTESEL_BITS - 1 : 0 ] bytesel;
4344 reg is_wstall;
4445
4546 wire [31 : 0 ] instr = fetch_if.data.instr;
@@ -157,6 +158,7 @@ module VX_decode import VX_gpu_pkg::*; #(
157158 use_regs = '0 ;
158159 rd_xregs = '0 ;
159160 wr_xregs = '0 ;
161+ bytesel = BYTESEL_DEFAULT ;
160162 is_wstall = 0 ;
161163
162164 case (opcode)
@@ -297,7 +299,8 @@ module VX_decode import VX_gpu_pkg::*; #(
297299 op_type = INST_LSU_FENCE ;
298300 op_args.lsu.is_store = 0 ;
299301 op_args.lsu.is_float = 0 ;
300- op_args.lsu.offset = 0 ;
302+ op_args.lsu.pack = 0 ;
303+ op_args.lsu.offset = 0 ;
301304 end
302305 INST_SYS : begin
303306 if (funct3[1 : 0 ] != 0 ) begin
@@ -332,7 +335,8 @@ module VX_decode import VX_gpu_pkg::*; #(
332335 op_type = INST_OP_BITS ' ({ 1'b0 , funct3} );
333336 op_args.lsu.is_store = 0 ;
334337 op_args.lsu.is_float = opcode[2 ];
335- op_args.lsu.offset = u_12;
338+ op_args.lsu.pack = 0 ;
339+ op_args.lsu.offset = u_12;
336340 `USED_IREG (rs1);
337341 `ifdef EXT_F_ENABLE
338342 `USED_REG (opcode[2 ], rd, 1'b1 );
@@ -348,7 +352,8 @@ module VX_decode import VX_gpu_pkg::*; #(
348352 op_type = INST_OP_BITS ' ({ 1'b1 , funct3} );
349353 op_args.lsu.is_store = 1 ;
350354 op_args.lsu.is_float = opcode[2 ];
351- op_args.lsu.offset = s_imm;
355+ op_args.lsu.pack = 0 ;
356+ op_args.lsu.offset = s_imm;
352357 `USED_IREG (rs1);
353358 `ifdef EXT_F_ENABLE
354359 `USED_REG (opcode[2 ], rs2, 1'b1 );
@@ -544,19 +549,6 @@ module VX_decode import VX_gpu_pkg::*; #(
544549 end
545550 op_type = INST_OP_BITS ' (funct3);
546551 end
547- `ifdef EXT_DXA_ENABLE
548- 7'h03 : begin // DXA issue (dimension-specific)
549- // funct3 encodes dimensionality: 0=1D .. 4=5D.
550- // Expanded into micro-ops by VX_dxa_uops.
551- if (funct3 <= 3'd4 ) begin
552- ex_type = EX_SFU ;
553- op_type = INST_OP_BITS ' (INST_SFU_DXA );
554- op_args.dxa.op = funct3;
555- `USED_IREG (rs1);
556- `USED_IREG (rs2);
557- end
558- end
559- `endif
560552 `ifdef EXT_TCU_ENABLE
561553 7'h02 : begin
562554 if (funct3 == 3'h0 ) begin
@@ -592,6 +584,46 @@ module VX_decode import VX_gpu_pkg::*; #(
592584 `endif
593585 end
594586 `endif
587+ `ifdef EXT_DXA_ENABLE
588+ 7'h03 : begin // DXA issue (dimension-specific)
589+ // funct3 encodes dimensionality: 0=1D .. 4=5D.
590+ // Expanded into micro-ops by VX_dxa_uops.
591+ if (funct3 <= 3'd4 ) begin
592+ ex_type = EX_SFU ;
593+ op_type = INST_OP_BITS ' (INST_SFU_DXA );
594+ op_args.dxa.op = funct3;
595+ `USED_IREG (rs1);
596+ `USED_IREG (rs2);
597+ end
598+ end
599+ `endif
600+ 7'h04 : begin // Load packing: vx_packlb_f / vx_packlh_f
601+ case (funct3)
602+ 3'h1 : begin // vx_packlb_f — pack 4 strided bytes into float
603+ ex_type = EX_LSU ;
604+ op_type = INST_OP_BITS ' (INST_LSU_LBU );
605+ op_args.lsu.is_store = 0 ;
606+ op_args.lsu.is_float = 1 ;
607+ op_args.lsu.pack = 2'b01 ;
608+ op_args.lsu.offset = '0 ;
609+ `USED_FREG (rd);
610+ `USED_IREG (rs1);
611+ `USED_IREG (rs2);
612+ end
613+ 3'h2 : begin // vx_packlh_f — pack 2 strided halfwords into float
614+ ex_type = EX_LSU ;
615+ op_type = INST_OP_BITS ' (INST_LSU_LHU );
616+ op_args.lsu.is_store = 0 ;
617+ op_args.lsu.is_float = 1 ;
618+ op_args.lsu.pack = 2'b10 ;
619+ op_args.lsu.offset = '0 ;
620+ `USED_FREG (rd);
621+ `USED_IREG (rs1);
622+ `USED_IREG (rs2);
623+ end
624+ default : ;
625+ endcase
626+ end
595627 default : ;
596628 endcase
597629 end
@@ -610,8 +642,8 @@ module VX_decode import VX_gpu_pkg::*; #(
610642 .reset (reset),
611643 .valid_in (fetch_if.valid),
612644 .ready_in (fetch_if.ready),
613- .data_in ({ fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC , ex_type, op_type, op_args, wb, rd_xregs, wr_xregs, use_regs[3 : 1 ], reg_ids[RV_RD ], reg_ids[RV_RS1 ], reg_ids[RV_RS2 ], reg_ids[RV_RS3 ]} ),
614- .data_out ({ decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC , decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args, decode_if.data.wb, decode_if.data.rd_xregs, decode_if.data.wr_xregs, decode_if.data.used_rs, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3} ),
645+ .data_in ({ fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC , ex_type, op_type, op_args, wb, rd_xregs, wr_xregs, use_regs[3 : 1 ], reg_ids[RV_RD ], bytesel, reg_ids[RV_RS1 ], reg_ids[RV_RS2 ], reg_ids[RV_RS3 ]} ),
646+ .data_out ({ decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC , decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args, decode_if.data.wb, decode_if.data.rd_xregs, decode_if.data.wr_xregs, decode_if.data.used_rs, decode_if.data.rd, decode_if.data.bytesel, decode_if.data. rs1, decode_if.data.rs2, decode_if.data.rs3} ),
615647 .valid_out (decode_if.valid),
616648 .ready_out (decode_if.ready)
617649 );
0 commit comments