Skip to content

Commit

Permalink
Add Arm64 encodings for IF_SVE_EQ_3A to IF_SVE_HR_3A (dotnet#95679)
Browse files Browse the repository at this point in the history
  • Loading branch information
a74nh authored Dec 7, 2023
1 parent 1aae18a commit eafe818
Show file tree
Hide file tree
Showing 3 changed files with 271 additions and 3 deletions.
72 changes: 72 additions & 0 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10513,6 +10513,30 @@ void CodeGen::genArm64EmitterUnitTests()
INS_OPTS_SCALABLE_D); /* REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_revw, EA_SCALABLE, REG_V25, REG_P4, REG_V16,
INS_OPTS_SCALABLE_D); /* REVW <Zd>.D, <Pg>/M, <Zn>.D */

// IF_SVE_EQ_3A
// Note: Scalable size is the size of the destination <T>, not the source <Tb>.
theEmitter->emitIns_R_R_R(INS_sve_sadalp, EA_SCALABLE, REG_V26, REG_P3, REG_V8,
INS_OPTS_SCALABLE_H); /* SADALP <Zda>.<T>, <Pg>/M, <Zn>.<Tb> */
theEmitter->emitIns_R_R_R(INS_sve_uadalp, EA_SCALABLE, REG_V27, REG_P2, REG_V9,
INS_OPTS_SCALABLE_S); /* UADALP <Zda>.<T>, <Pg>/M, <Zn>.<Tb> */
theEmitter->emitIns_R_R_R(INS_sve_uadalp, EA_SCALABLE, REG_V28, REG_P0, REG_V31,
INS_OPTS_SCALABLE_D); /* UADALP <Zda>.<T>, <Pg>/M, <Zn>.<Tb> */

// IF_SVE_ES_3A
theEmitter->emitIns_R_R_R(INS_sve_sqabs, EA_SCALABLE, REG_V29, REG_P7, REG_V0,
INS_OPTS_SCALABLE_B); /* SQABS <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_sqneg, EA_SCALABLE, REG_V31, REG_P6, REG_V1,
INS_OPTS_SCALABLE_H); /* SQNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_sqneg, EA_SCALABLE, REG_V0, REG_P5, REG_V2,
INS_OPTS_SCALABLE_S); /* SQNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_sqneg, EA_SCALABLE, REG_V1, REG_P4, REG_V3,
INS_OPTS_SCALABLE_D); /* SQNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_urecpe, EA_SCALABLE, REG_V2, REG_P3, REG_V4,
INS_OPTS_SCALABLE_S); /* URECPE <Zd>.S, <Pg>/M, <Zn>.S */
theEmitter->emitIns_R_R_R(INS_sve_ursqrte, EA_SCALABLE, REG_V3, REG_P0, REG_V5,
INS_OPTS_SCALABLE_S); /* URSQRTE <Zd>.S, <Pg>/M, <Zn>.S */

// IF_SVE_GA_2A
theEmitter->emitIns_R_R_I(INS_sve_sqrshrn, EA_SCALABLE, REG_V0, REG_V0, 5,
INS_OPTS_SCALABLE_H); // SQRSHRN <Zd>.H, {<Zn1>.S-<Zn2>.S }, #<const>
Expand All @@ -10533,6 +10557,54 @@ void CodeGen::genArm64EmitterUnitTests()
theEmitter->emitIns_R_R_I(INS_sve_uqrshrn, EA_SCALABLE, REG_V15, REG_V12, 1,
INS_OPTS_SCALABLE_H); // UQRSHRN <Zd>.H, {<Zn1>.S-<Zn2>.S }, #<const>

// IF_SVE_GS_3A
#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
theEmitter->emitIns_R_R_R(INS_sve_faddqv, EA_8BYTE, REG_V16, REG_P0, REG_V12,
INS_OPTS_SCALABLE_H_WITH_SIMD_VECTOR); /* FADDQV <Vd>.<T>, <Pg>, <Zn>.<Tb> */
theEmitter->emitIns_R_R_R(INS_sve_fmaxnmqv, EA_8BYTE, REG_V17, REG_P1, REG_V11,
INS_OPTS_SCALABLE_S_WITH_SIMD_VECTOR); /* FMAXNMQV <Vd>.<T>, <Pg>, <Zn>.<Tb> */
theEmitter->emitIns_R_R_R(INS_sve_fmaxqv, EA_8BYTE, REG_V18, REG_P3, REG_V10,
INS_OPTS_SCALABLE_D_WITH_SIMD_VECTOR); /* FMAXQV <Vd>.<T>, <Pg>, <Zn>.<Tb> */
theEmitter->emitIns_R_R_R(INS_sve_fminnmqv, EA_8BYTE, REG_V19, REG_P4, REG_V9,
INS_OPTS_SCALABLE_H_WITH_SIMD_VECTOR); /* FMINNMQV <Vd>.<T>, <Pg>, <Zn>.<Tb> */
theEmitter->emitIns_R_R_R(INS_sve_fminqv, EA_8BYTE, REG_V20, REG_P5, REG_V8,
INS_OPTS_SCALABLE_D_WITH_SIMD_VECTOR); /* FMINQV <Vd>.<T>, <Pg>, <Zn>.<Tb> */
#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED

// IF_SVE_HE_3A
theEmitter->emitIns_R_R_R(INS_sve_faddv, EA_2BYTE, REG_V21, REG_P7, REG_V7,
INS_OPTS_SCALABLE_H_WITH_SIMD_SCALAR); /* FADDV <V><d>, <Pg>, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_fmaxnmv, EA_2BYTE, REG_V22, REG_P6, REG_V6,
INS_OPTS_SCALABLE_H_WITH_SIMD_SCALAR); /* FMAXNMV <V><d>, <Pg>, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_fmaxv, EA_4BYTE, REG_V23, REG_P5, REG_V5,
INS_OPTS_SCALABLE_S_WITH_SIMD_SCALAR); /* FMAXV <V><d>, <Pg>, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_fminnmv, EA_8BYTE, REG_V24, REG_P4, REG_V4,
INS_OPTS_SCALABLE_D_WITH_SIMD_SCALAR); /* FMINNMV <V><d>, <Pg>, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_fminv, EA_4BYTE, REG_V25, REG_P3, REG_V3,
INS_OPTS_SCALABLE_S_WITH_SIMD_SCALAR); /* FMINV <V><d>, <Pg>, <Zn>.<T> */

// IF_SVE_HQ_3A
theEmitter->emitIns_R_R_R(INS_sve_frinta, EA_SCALABLE, REG_V26, REG_P7, REG_V2,
INS_OPTS_SCALABLE_H); /* FRINTA <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_frinti, EA_SCALABLE, REG_V27, REG_P6, REG_V1,
INS_OPTS_SCALABLE_S); /* FRINTI <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_frintm, EA_SCALABLE, REG_V28, REG_P5, REG_V0,
INS_OPTS_SCALABLE_D); /* FRINTM <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_frintn, EA_SCALABLE, REG_V29, REG_P4, REG_V10,
INS_OPTS_SCALABLE_H); /* FRINTN <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_frintp, EA_SCALABLE, REG_V30, REG_P3, REG_V11,
INS_OPTS_SCALABLE_S); /* FRINTP <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_frintx, EA_SCALABLE, REG_V31, REG_P2, REG_V12,
INS_OPTS_SCALABLE_D); /* FRINTX <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_frintz, EA_SCALABLE, REG_V0, REG_P0, REG_V13,
INS_OPTS_SCALABLE_H); /* FRINTZ <Zd>.<T>, <Pg>/M, <Zn>.<T> */

// IF_SVE_HR_3A
theEmitter->emitIns_R_R_R(INS_sve_frecpx, EA_SCALABLE, REG_V5, REG_P5, REG_V5,
INS_OPTS_SCALABLE_H); /* FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> */
theEmitter->emitIns_R_R_R(INS_sve_fsqrt, EA_SCALABLE, REG_V6, REG_P6, REG_V6,
INS_OPTS_SCALABLE_S); /* FSQRT <Zd>.<T>, <Pg>/M, <Zn>.<T> */

#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE

#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -1705,7 +1705,8 @@ class emitter
#define PERFSCORE_THROUGHPUT_10C 10.0f // slower - 10 cycles
#define PERFSCORE_THROUGHPUT_11C 10.0f // slower - 10 cycles
#define PERFSCORE_THROUGHPUT_13C 13.0f // slower - 13 cycles
#define PERFSCORE_THROUGHPUT_14C 13.0f // slower - 13 cycles
#define PERFSCORE_THROUGHPUT_14C 14.0f // slower - 13 cycles
#define PERFSCORE_THROUGHPUT_16C 16.0f // slower - 13 cycles
#define PERFSCORE_THROUGHPUT_19C 19.0f // slower - 19 cycles
#define PERFSCORE_THROUGHPUT_25C 25.0f // slower - 25 cycles
#define PERFSCORE_THROUGHPUT_33C 33.0f // slower - 33 cycles
Expand All @@ -1730,6 +1731,7 @@ class emitter
#define PERFSCORE_LATENCY_11C 11.0f
#define PERFSCORE_LATENCY_12C 12.0f
#define PERFSCORE_LATENCY_13C 13.0f
#define PERFSCORE_LATENCY_14C 14.0f
#define PERFSCORE_LATENCY_15C 15.0f
#define PERFSCORE_LATENCY_16C 16.0f
#define PERFSCORE_LATENCY_18C 18.0f
Expand Down
Loading

0 comments on commit eafe818

Please sign in to comment.