Skip to content

Commit

Permalink
improve speed of the Gamma loss function
Browse files Browse the repository at this point in the history
  • Loading branch information
paulbkoch committed Sep 28, 2024
1 parent 2b5c09f commit 66fa2ae
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 55 deletions.
32 changes: 16 additions & 16 deletions shared/libebm/compute/avx2_ebm/avx2_32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,24 @@ namespace DEFINED_ZONE_NAME {
#error DEFINED_ZONE_NAME must be defined
#endif // DEFINED_ZONE_NAME

// this is super-special and included inside the zone namespace
#include "objective_registrations.hpp"

static constexpr size_t k_cAlignment = 32;

struct alignas(k_cAlignment) Avx2_32_Float;
struct alignas(k_cAlignment) Avx2_32_Int;

template<bool bNegateInput = false,
bool bNaNPossible = true,
bool bUnderflowPossible = true,
bool bOverflowPossible = true>
inline Avx2_32_Float Exp(const Avx2_32_Float& val) noexcept;
template<bool bNegateOutput = false,
bool bNaNPossible = true,
bool bNegativePossible = true,
bool bZeroPossible = true,
bool bPositiveInfinityPossible = true>
inline Avx2_32_Float Log(const Avx2_32_Float& val) noexcept;

// this is super-special and included inside the zone namespace
#include "objective_registrations.hpp"

struct alignas(k_cAlignment) Avx2_32_Int final {
friend Avx2_32_Float;
Expand Down Expand Up @@ -138,18 +150,6 @@ struct alignas(k_cAlignment) Avx2_32_Int final {
static_assert(std::is_standard_layout<Avx2_32_Int>::value && std::is_trivially_copyable<Avx2_32_Int>::value,
"This allows offsetof, memcpy, memset, inter-language, GPU and cross-machine use where needed");

template<bool bNegateInput = false,
bool bNaNPossible = true,
bool bUnderflowPossible = true,
bool bOverflowPossible = true>
inline Avx2_32_Float Exp(const Avx2_32_Float& val) noexcept;
template<bool bNegateOutput = false,
bool bNaNPossible = true,
bool bNegativePossible = true,
bool bZeroPossible = true,
bool bPositiveInfinityPossible = true>
inline Avx2_32_Float Log(const Avx2_32_Float& val) noexcept;

struct alignas(k_cAlignment) Avx2_32_Float final {
template<bool bNegateInput, bool bNaNPossible, bool bUnderflowPossible, bool bOverflowPossible>
friend Avx2_32_Float Exp(const Avx2_32_Float& val) noexcept;
Expand Down
32 changes: 16 additions & 16 deletions shared/libebm/compute/avx512f_ebm/avx512f_32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,24 @@ namespace DEFINED_ZONE_NAME {
#error DEFINED_ZONE_NAME must be defined
#endif // DEFINED_ZONE_NAME

// this is super-special and included inside the zone namespace
#include "objective_registrations.hpp"

static constexpr size_t k_cAlignment = 64;

struct alignas(k_cAlignment) Avx512f_32_Float;
struct alignas(k_cAlignment) Avx512f_32_Int;

template<bool bNegateInput = false,
bool bNaNPossible = true,
bool bUnderflowPossible = true,
bool bOverflowPossible = true>
inline Avx512f_32_Float Exp(const Avx512f_32_Float& val) noexcept;
template<bool bNegateOutput = false,
bool bNaNPossible = true,
bool bNegativePossible = true,
bool bZeroPossible = true,
bool bPositiveInfinityPossible = true>
inline Avx512f_32_Float Log(const Avx512f_32_Float& val) noexcept;

// this is super-special and included inside the zone namespace
#include "objective_registrations.hpp"

struct alignas(k_cAlignment) Avx512f_32_Int final {
friend Avx512f_32_Float;
Expand Down Expand Up @@ -152,18 +164,6 @@ struct alignas(k_cAlignment) Avx512f_32_Int final {
static_assert(std::is_standard_layout<Avx512f_32_Int>::value && std::is_trivially_copyable<Avx512f_32_Int>::value,
"This allows offsetof, memcpy, memset, inter-language, GPU and cross-machine use where needed");

template<bool bNegateInput = false,
bool bNaNPossible = true,
bool bUnderflowPossible = true,
bool bOverflowPossible = true>
inline Avx512f_32_Float Exp(const Avx512f_32_Float& val) noexcept;
template<bool bNegateOutput = false,
bool bNaNPossible = true,
bool bNegativePossible = true,
bool bZeroPossible = true,
bool bPositiveInfinityPossible = true>
inline Avx512f_32_Float Log(const Avx512f_32_Float& val) noexcept;

struct alignas(k_cAlignment) Avx512f_32_Float final {
template<bool bNegateInput, bool bNaNPossible, bool bUnderflowPossible, bool bOverflowPossible>
friend Avx512f_32_Float Exp(const Avx512f_32_Float& val) noexcept;
Expand Down
29 changes: 15 additions & 14 deletions shared/libebm/compute/cpu_ebm/cpu_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,24 @@ namespace DEFINED_ZONE_NAME {
#error DEFINED_ZONE_NAME must be defined
#endif // DEFINED_ZONE_NAME

struct Cpu_64_Float;
struct Cpu_64_Int;

template<bool bNegateInput = false,
bool bNaNPossible = true,
bool bUnderflowPossible = true,
bool bOverflowPossible = true>
inline Cpu_64_Float Exp(const Cpu_64_Float& val) noexcept;
template<bool bNegateOutput = false,
bool bNaNPossible = true,
bool bNegativePossible = true,
bool bZeroPossible = true,
bool bPositiveInfinityPossible = true>
inline Cpu_64_Float Log(const Cpu_64_Float& val) noexcept;

// this is super-special and included inside the zone namespace
#include "objective_registrations.hpp"

struct Cpu_64_Float;

struct Cpu_64_Int final {
friend Cpu_64_Float;
friend inline Cpu_64_Float IfEqual(const Cpu_64_Int& cmp1,
Expand Down Expand Up @@ -96,18 +109,6 @@ struct Cpu_64_Int final {
static_assert(std::is_standard_layout<Cpu_64_Int>::value && std::is_trivially_copyable<Cpu_64_Int>::value,
"This allows offsetof, memcpy, memset, inter-language, GPU and cross-machine use where needed");

template<bool bNegateInput = false,
bool bNaNPossible = true,
bool bUnderflowPossible = true,
bool bOverflowPossible = true>
inline Cpu_64_Float Exp(const Cpu_64_Float& val) noexcept;
template<bool bNegateOutput = false,
bool bNaNPossible = true,
bool bNegativePossible = true,
bool bZeroPossible = true,
bool bPositiveInfinityPossible = true>
inline Cpu_64_Float Log(const Cpu_64_Float& val) noexcept;

struct Cpu_64_Float final {
template<bool bNegateInput, bool bNaNPossible, bool bUnderflowPossible, bool bOverflowPossible>
friend Cpu_64_Float Exp(const Cpu_64_Float& val) noexcept;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,25 +52,23 @@ template<typename TFloat> struct GammaDevianceRegressionObjective : RegressionOb
inline double FinishMetric(const double metricSum) const noexcept { return 2.0 * metricSum; }

GPU_DEVICE inline TFloat CalcMetric(const TFloat& score, const TFloat& target) const noexcept {
const TFloat prediction = Exp(score); // log link function
const TFloat frac = target / prediction;
const TFloat invPrediction = Exp<true>(score); // log link function
const TFloat frac = target * invPrediction;
const TFloat metric = frac - 1.0 - Log(frac);
return metric;
}

GPU_DEVICE inline TFloat CalcGradient(const TFloat& score, const TFloat& target) const noexcept {
const TFloat prediction = Exp(score); // log link function
const TFloat frac = target / prediction;
const TFloat gradient = 1.0 - frac;
const TFloat invPrediction = Exp<true>(score); // log link function
const TFloat gradient = FusedNegateMultiplyAdd(target, invPrediction, 1.0);
return gradient;
}

GPU_DEVICE inline GradientHessian<TFloat> CalcGradientHessian(
const TFloat& score, const TFloat& target) const noexcept {
const TFloat prediction = Exp(score); // log link function
const TFloat frac = target / prediction;
const TFloat gradient = 1.0 - frac;
const TFloat hessian = frac;
const TFloat invPrediction = Exp<true>(score); // log link function
const TFloat gradient = FusedNegateMultiplyAdd(target, invPrediction, 1.0);
const TFloat hessian = target * invPrediction;
return MakeGradientHessian(gradient, hessian);
}
};

0 comments on commit 66fa2ae

Please sign in to comment.