Skip to content

Commit

Permalink
feat: Add vrecps[s|d]_[f32|f64]
Browse files Browse the repository at this point in the history
  • Loading branch information
howjmay committed Jul 30, 2024
1 parent 1d79156 commit d6d5ec3
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 6 deletions.
4 changes: 2 additions & 2 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -4796,9 +4796,9 @@ FORCE_INLINE float64x2_t vrecpsq_f64(float64x2_t a, float64x2_t b) {
return __riscv_vfnmsac_vv_f64m1(vdupq_n_f64(2.0), a, b, 2);
}

// FORCE_INLINE float32_t vrecpss_f32(float32_t a, float32_t b);
FORCE_INLINE float32_t vrecpss_f32(float32_t a, float32_t b) { return 2.0 - a * b; }

// FORCE_INLINE float64_t vrecpsd_f64(float64_t a, float64_t b);
FORCE_INLINE float64_t vrecpsd_f64(float64_t a, float64_t b) { return 2.0 - a * b; }

// FORCE_INLINE float32x2_t vsqrt_f32(float32x2_t a);

Expand Down
26 changes: 24 additions & 2 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16984,9 +16984,31 @@ result_t test_vrecpsq_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#endif // ENABLE_TEST_ALL
}

result_t test_vrecpss_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vrecpss_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const float *_a = (const float *)impl.test_cases_float_pointer1;
const float *_b = (const float *)impl.test_cases_float_pointer2;
float _c, c;
_c = 2.0 - _a[0] * _b[0];
c = vrecpss_f32(_a[0], _b[0]);
return validate_float_error(c, _c, 0.001f);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vrecpsd_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vrecpsd_f64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const double *_a = (const double *)impl.test_cases_float_pointer1;
const double *_b = (const double *)impl.test_cases_float_pointer2;
double _c, c;
_c = 2.0 - _a[0] * _b[0];
c = vrecpsd_f64(_a[0], _b[0]);
return validate_double_error(c, _c, 0.001f);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vsqrt_f32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }

Expand Down
4 changes: 2 additions & 2 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -959,8 +959,8 @@
_(vrecpsq_f32) \
_(vrecps_f64) \
_(vrecpsq_f64) \
/*_(vrecpss_f32) */ \
/*_(vrecpsd_f64) */ \
_(vrecpss_f32) \
_(vrecpsd_f64) \
/*_(vsqrt_f32) */ \
/*_(vsqrtq_f32) */ \
/*_(vsqrt_f64) */ \
Expand Down

0 comments on commit d6d5ec3

Please sign in to comment.