Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CUDA build #5

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/CudaHelp.hh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
#define MIN_CTAS_PER_SM 4
#define MAX_REDUCTION_CTAS 1024

#ifdef __CUDACC__
#ifdef USE_CUDA
#include <cuda_runtime.h>
#include "legion.h"
#ifndef __CUDA_HD__
#define __CUDA_HD__ __host__ __device__
#endif

#ifdef __CUDACC__
template<typename REDUCTION>
__device__ __forceinline__
void reduce_double(Legion::DeferredReduction<REDUCTION> result, double value)
Expand Down Expand Up @@ -39,6 +42,7 @@ void reduce_double(Legion::DeferredReduction<REDUCTION> result, double value)
__threadfence_system();
}
}
#endif

#else
#define __CUDA_HD__
Expand Down
10 changes: 5 additions & 5 deletions src/Hydro.cc
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ Hydro::Hydro(
tts = new TTS(inp, this);
qcs = new QCS(inp, this);

const double2 vfixx = double2(1., 0.);
const double2 vfixy = double2(0., 1.);
const double2 vfixx = make_double2(1., 0.);
const double2 vfixy = make_double2(0., 1.);
for (int i = 0; i < bcx.size(); ++i)
bcs.push_back(new HydroBC(mesh, vfixx, bcx[i], true/*xplane*/));
for (int i = 0; i < bcy.size(); ++i)
Expand Down Expand Up @@ -331,7 +331,7 @@ void Hydro::init() {
}
else
{
const double2 zero2(0., 0.);
const double2 zero2 = make_double2(0., 0.);
FillLauncher launcher(lrp, lrp, TaskArgument(&zero2,sizeof(zero2)));
launcher.add_field(FID_PU);
runtime->fill_fields(ctx, launcher);
Expand Down Expand Up @@ -375,7 +375,7 @@ Future Hydro::doCycle(
launchffd.argument = TaskArgument(ffdargs, sizeof(ffdargs));
launchffd.predicate = p_not_done;

double2 ffd2args[] = { double2(0., 0.) };
double2 ffd2args[] = { make_double2(0., 0.) };
IndexFillLauncher launchffd2;
launchffd2.launch_space = ispc;
launchffd2.projection = 0;
Expand Down Expand Up @@ -1802,7 +1802,7 @@ void Hydro::initRadialVelTask(
if (pmag > args->eps)
acc_pu[*itr] = args->vel * px / pmag;
else
acc_pu[*itr] = double2(0., 0.);
acc_pu[*itr] = make_double2(0., 0.);
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/Mesh.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ const int SumOp<int>::identity = 0;
template <>
const double SumOp<double>::identity = 0.;
template <>
const double2 SumOp<double2>::identity = double2(0., 0.);
const double2 SumOp<double2>::identity = make_double2(0., 0.);
template <>
const double MinOp<double>::identity = DBL_MAX;
template <>
Expand Down Expand Up @@ -817,7 +817,7 @@ void Mesh::calcCtrsTask(

const IndexSpace& isz = task->regions[1].region.get_index_space();
for (PointIterator itr(runtime, isz); itr(); itr++)
acc_zx[*itr] = double2(0., 0.);
acc_zx[*itr] = make_double2(0., 0.);

const IndexSpace& iss = task->regions[0].region.get_index_space();
for (PointIterator itr(runtime, iss); itr(); itr++)
Expand Down Expand Up @@ -863,7 +863,7 @@ void Mesh::calcCtrsOMPTask(
const Rect<1> rectz = runtime->get_index_space_domain(isz);
#pragma omp parallel for
for (coord_t z = rectz.lo[0]; z <= rectz.hi[0]; z++)
acc_zx[z] = double2(0., 0.);
acc_zx[z] = make_double2(0., 0.);

const IndexSpace& iss = task->regions[0].region.get_index_space();
// This will assert if it is not dense
Expand Down Expand Up @@ -1288,7 +1288,7 @@ void Mesh::calcCtrs(

int zfirst = mapsz[sfirst];
int zlast = (slast < nums ? mapsz[slast] : numz);
fill(&zx[zfirst], &zx[zlast], double2(0., 0.));
fill(&zx[zfirst], &zx[zlast], make_double2(0., 0.));

for (int s = sfirst; s < slast; ++s) {
int p1 = mapsp1[s];
Expand Down
4 changes: 2 additions & 2 deletions src/QCS.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void QCS::setCornerDivTask(
// [1] Compute a zone-centered velocity
const IndexSpace& isz = task->regions[1].region.get_index_space();
for (PointIterator itz(runtime, isz); itz(); itz++)
acc_zuc[*itz] = double2(0., 0.);
acc_zuc[*itz] = make_double2(0., 0.);

const IndexSpace& iss = task->regions[0].region.get_index_space();
for (PointIterator its(runtime, iss); its(); its++)
Expand Down Expand Up @@ -488,7 +488,7 @@ void QCS::setCornerDivOMPTask(
const Rect<1> rectz = runtime->get_index_space_domain(isz);
#pragma omp parallel for
for (coord_t z = rectz.lo[0]; z <= rectz.hi[0]; z++)
acc_zuc[z] = double2(0., 0.);
acc_zuc[z] = make_double2(0., 0.);

const IndexSpace& iss = task->regions[0].region.get_index_space();
// This will assert if it is not dense
Expand Down
66 changes: 35 additions & 31 deletions src/Vec2.hh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
// This struct is defined with all functions inline,
// to give the compiler maximum opportunity to optimize.

#ifndef __CUDACC__
#ifndef USE_CUDA
struct double2
{
typedef double value_type;
Expand All @@ -37,38 +37,10 @@ struct double2
return(*this);
}

inline double2& operator+=(const double2& v2)
{
x += v2.x;
y += v2.y;
return(*this);
}

inline double2& operator-=(const double2& v2)
{
x -= v2.x;
y -= v2.y;
return(*this);
}

inline double2& operator*=(const double& r)
{
x *= r;
y *= r;
return(*this);
}

inline double2& operator/=(const double& r)
{
x /= r;
y /= r;
return(*this);
}

}; // double2
#endif // __CUDACC__
#endif // USE_CUDA

#ifndef __CUDACC__
#ifndef USE_CUDA
// Already has a decleration in cuda
inline double2 make_double2(double x_, double y_) {
return(double2(x_, y_));
Expand Down Expand Up @@ -119,20 +91,44 @@ inline double2 operator+(const double2& v1, const double2& v2)
return make_double2(v1.x + v2.x, v1.y + v2.y);
}

__CUDA_HD__
inline double2& operator+=(double2& v1, const double2& v2)
{
v1.x += v2.x;
v1.y += v2.y;
return v1;
}

// subtract
__CUDA_HD__
inline double2 operator-(const double2& v1, const double2& v2)
{
return make_double2(v1.x - v2.x, v1.y - v2.y);
}

__CUDA_HD__
inline double2& operator-=(double2& v1, const double2& v2)
{
v1.x -= v2.x;
v1.y -= v2.y;
return v1;
}

// multiply vector by scalar
__CUDA_HD__
inline double2 operator*(const double2& v, const double& r)
{
return make_double2(v.x * r, v.y * r);
}

__CUDA_HD__
inline double2& operator*=(double2& v, const double& r)
{
v.x *= r;
v.y *= r;
return v;
}

// multiply scalar by vector
__CUDA_HD__
inline double2 operator*(const double& r, const double2& v)
Expand All @@ -148,6 +144,14 @@ inline double2 operator/(const double2& v, const double& r)
return make_double2(v.x * rinv, v.y * rinv);
}

__CUDA_HD__
inline double2& operator/=(double2& v, const double& r)
{
double rinv = (double) 1. / r;
v.x *= rinv;
v.y *= rinv;
return v;
}

// other vector operations:

Expand Down