Skip to content

Commit

Permalink
[release] okwxl
Browse files Browse the repository at this point in the history
  • Loading branch information
archibate committed Aug 2, 2022
2 parents e3a24e6 + 44e4fc7 commit cb22c77
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 92 deletions.
27 changes: 22 additions & 5 deletions projects/CUDA/fem/Codim.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2145,7 +2145,11 @@ struct CodimStepping : INode {
auto relDX3D = point_point_rel_dx(p0, p1);
auto relDX = basis.transpose() * relDX3D;
auto relDXNorm2 = relDX.l2NormSqr();
auto E = f0_SF(relDXNorm2, epsvh) * fn;
T E = 0;
if (relDXNorm2 > epsvh * epsvh)
E = fn * zs::sqrt(relDXNorm2);
else
E = fn * f0_SF(relDXNorm2, epsvh);
reduce_to(fppi, n, E, es[fppi / 32]);
});
Es.push_back(reduce(pol, es) * fricMu);
Expand All @@ -2171,7 +2175,11 @@ struct CodimStepping : INode {
auto relDX3D = point_edge_rel_dx(p, e0, e1, yita);
auto relDX = basis.transpose() * relDX3D;
auto relDXNorm2 = relDX.l2NormSqr();
auto E = f0_SF(relDXNorm2, epsvh) * fn;
T E = 0;
if (relDXNorm2 > epsvh * epsvh)
E = fn * zs::sqrt(relDXNorm2);
else
E = fn * f0_SF(relDXNorm2, epsvh);
reduce_to(fpei, n, E, es[fpei / 32]);
});
Es.push_back(reduce(pol, es) * fricMu);
Expand Down Expand Up @@ -2200,7 +2208,11 @@ struct CodimStepping : INode {
point_triangle_rel_dx(p, v0, v1, v2, betas[0], betas[1]);
auto relDX = basis.transpose() * relDX3D;
auto relDXNorm2 = relDX.l2NormSqr();
auto E = f0_SF(relDXNorm2, epsvh) * fn;
T E = 0;
if (relDXNorm2 > epsvh * epsvh)
E = fn * zs::sqrt(relDXNorm2);
else
E = fn * f0_SF(relDXNorm2, epsvh);
reduce_to(fpti, n, E, es[fpti / 32]);
});
Es.push_back(reduce(pol, es) * fricMu);
Expand Down Expand Up @@ -2229,7 +2241,11 @@ struct CodimStepping : INode {
edge_edge_rel_dx(e0, e1, e2, e3, gammas[0], gammas[1]);
auto relDX = basis.transpose() * relDX3D;
auto relDXNorm2 = relDX.l2NormSqr();
auto E = f0_SF(relDXNorm2, epsvh) * fn;
T E = 0;
if (relDXNorm2 > epsvh * epsvh)
E = fn * zs::sqrt(relDXNorm2);
else
E = fn * f0_SF(relDXNorm2, epsvh);
reduce_to(feei, n, E, es[feei / 32]);
});
Es.push_back(reduce(pol, es) * fricMu);
Expand Down Expand Up @@ -4497,7 +4513,8 @@ struct CodimStepping : INode {
if constexpr (s_enableFriction)
if (epsv == 0) {
epsv = dHat;
}
} else
epsv *= dHat;
// extForce here means gravity acceleration (not actually force)
// targetGRes = std::min(targetGRes, extForce.norm() * dt * dt * (T)0.5 /
// nSubsteps / nSubsteps);
Expand Down
148 changes: 64 additions & 84 deletions projects/CUDA/fem/FricIpc.inl
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@ void CodimStepping::IPCSystem::computeFrictionBarrierGradientAndHessian(
auto relDX = basis.transpose() * relDX3D;
auto relDXNorm2 = relDX.l2NormSqr();
auto relDXNorm = zs::sqrt(relDXNorm2);
auto f1_div_relDXNorm = zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
relDX *= f1_div_relDXNorm * fricMu * fn;
if (relDXNorm > epsvh)
relDX /= relDXNorm;
else
relDX *= zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
auto TTTDX = -point_point_rel_dx_tan_to_mesh(relDX, basis);
TTTDX *= fricMu * fn;
// gradient
for (int d = 0; d != 3; ++d) {
atomic_add(exec_cuda, &vtemp(gTag, d, fpp[0]), TTTDX(0, d));
Expand All @@ -37,30 +40,22 @@ void CodimStepping::IPCSystem::computeFrictionBarrierGradientAndHessian(
return;
relDX = basis.transpose() * relDX3D;
auto TT = point_point_TT(basis); // 2x6
auto f2_term = f2_SF_term(relDXNorm2, epsvh);
using HessT = zs::vec<T, 6, 6>;
auto hess = HessT::zeros();
if (relDXNorm2 >= epsvh * epsvh) {
zs::vec<T, 2> ubar{-relDX[1], relDX[0]};
hess = dyadic_prod(
TT.transpose() *
((fricMu * fn * f1_div_relDXNorm / relDXNorm2) * ubar),
ubar * TT);
auto innerMtr = zs::vec<T, 2, 2>::identity();
if (relDXNorm2 > epsvh * epsvh) {
innerMtr /= relDXNorm;
innerMtr -= dyadic_prod(relDX, relDX) / (relDXNorm2 * relDXNorm);
} else {
if (relDXNorm == 0) {
if (fn > 0)
hess = fricMu * fn * f1_div_relDXNorm * TT.transpose() * TT;
// or ignored
} else {
auto innerMtr = dyadic_prod((f2_term / relDXNorm) * relDX, relDX);
innerMtr(0, 0) += f1_div_relDXNorm;
innerMtr(1, 1) += f1_div_relDXNorm;
innerMtr *= fricMu * fn;
//
make_pd(innerMtr);
hess = TT.transpose() * innerMtr * TT;
auto f1_div_relDXNorm = zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
auto f2_term = f2_SF_term(relDXNorm2, epsvh);
innerMtr *= f1_div_relDXNorm;
if (f2_term != f1_div_relDXNorm && relDXNorm2) {
innerMtr -= dyadic_prod(relDX, relDX) * ((f1_div_relDXNorm - f2_term) / relDXNorm2);
}
}
make_pd(innerMtr);
hess = fricMu * fn * TT.transpose() * innerMtr * TT;
#else
#endif
// rotate and project
Expand Down Expand Up @@ -101,9 +96,12 @@ void CodimStepping::IPCSystem::computeFrictionBarrierGradientAndHessian(
auto relDX = basis.transpose() * relDX3D;
auto relDXNorm2 = relDX.l2NormSqr();
auto relDXNorm = zs::sqrt(relDXNorm2);
auto f1_div_relDXNorm = zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
relDX *= f1_div_relDXNorm * fricMu * fn;
if (relDXNorm > epsvh)
relDX /= relDXNorm;
else
relDX *= zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
auto TTTDX = -point_edge_rel_dx_tan_to_mesh(relDX, basis, yita);
TTTDX *= fricMu * fn;
// gradient
for (int d = 0; d != 3; ++d) {
atomic_add(exec_cuda, &vtemp(gTag, d, fpe[0]), TTTDX(0, d));
Expand All @@ -115,30 +113,22 @@ void CodimStepping::IPCSystem::computeFrictionBarrierGradientAndHessian(
return;
relDX = basis.transpose() * relDX3D;
auto TT = point_edge_TT(basis, yita); // 2x9
auto f2_term = f2_SF_term(relDXNorm2, epsvh);
using HessT = zs::vec<T, 9, 9>;
auto hess = HessT::zeros();
if (relDXNorm2 >= epsvh * epsvh) {
zs::vec<T, 2> ubar{-relDX[1], relDX[0]};
hess = dyadic_prod(
TT.transpose() *
((fricMu * fn * f1_div_relDXNorm / relDXNorm2) * ubar),
ubar * TT);
auto innerMtr = zs::vec<T, 2, 2>::identity();
if (relDXNorm2 > epsvh * epsvh) {
innerMtr /= relDXNorm;
innerMtr -= dyadic_prod(relDX, relDX) / (relDXNorm2 * relDXNorm);
} else {
if (relDXNorm == 0) {
if (fn > 0)
hess = fricMu * fn * f1_div_relDXNorm * TT.transpose() * TT;
// or ignored
} else {
auto innerMtr = dyadic_prod((f2_term / relDXNorm) * relDX, relDX);
innerMtr(0, 0) += f1_div_relDXNorm;
innerMtr(1, 1) += f1_div_relDXNorm;
innerMtr *= fricMu * fn;
//
make_pd(innerMtr);
hess = TT.transpose() * innerMtr * TT;
auto f1_div_relDXNorm = zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
auto f2_term = f2_SF_term(relDXNorm2, epsvh);
innerMtr *= f1_div_relDXNorm;
if (f2_term != f1_div_relDXNorm && relDXNorm2) {
innerMtr -= dyadic_prod(relDX, relDX) * ((f1_div_relDXNorm - f2_term) / relDXNorm2);
}
}
make_pd(innerMtr);
hess = fricMu * fn * TT.transpose() * innerMtr * TT;
#else
#endif
// rotate and project
Expand Down Expand Up @@ -180,10 +170,13 @@ void CodimStepping::IPCSystem::computeFrictionBarrierGradientAndHessian(
auto relDX = basis.transpose() * relDX3D;
auto relDXNorm2 = relDX.l2NormSqr();
auto relDXNorm = zs::sqrt(relDXNorm2);
auto f1_div_relDXNorm = zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
relDX *= f1_div_relDXNorm * fricMu * fn;
if (relDXNorm > epsvh)
relDX /= relDXNorm;
else
relDX *= zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
auto TTTDX = -point_triangle_rel_dx_tan_to_mesh(relDX, basis, betas[0],
betas[1]);
TTTDX *= fricMu * fn;
// gradient
for (int d = 0; d != 3; ++d) {
atomic_add(exec_cuda, &vtemp(gTag, d, fpt[0]), TTTDX(0, d));
Expand All @@ -196,30 +189,22 @@ void CodimStepping::IPCSystem::computeFrictionBarrierGradientAndHessian(
return;
relDX = basis.transpose() * relDX3D;
auto TT = point_triangle_TT(basis, betas[0], betas[1]); // 2x12
auto f2_term = f2_SF_term(relDXNorm2, epsvh);
using HessT = zs::vec<T, 12, 12>;
auto hess = HessT::zeros();
if (relDXNorm2 >= epsvh * epsvh) {
zs::vec<T, 2> ubar{-relDX[1], relDX[0]};
hess = dyadic_prod(
TT.transpose() *
((fricMu * fn * f1_div_relDXNorm / relDXNorm2) * ubar),
ubar * TT);
auto innerMtr = zs::vec<T, 2, 2>::identity();
if (relDXNorm2 > epsvh * epsvh) {
innerMtr /= relDXNorm;
innerMtr -= dyadic_prod(relDX, relDX) / (relDXNorm2 * relDXNorm);
} else {
if (relDXNorm == 0) {
if (fn > 0)
hess = fricMu * fn * f1_div_relDXNorm * TT.transpose() * TT;
// or ignored
} else {
auto innerMtr = dyadic_prod((f2_term / relDXNorm) * relDX, relDX);
innerMtr(0, 0) += f1_div_relDXNorm;
innerMtr(1, 1) += f1_div_relDXNorm;
innerMtr *= fricMu * fn;
//
make_pd(innerMtr);
hess = TT.transpose() * innerMtr * TT;
auto f1_div_relDXNorm = zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
auto f2_term = f2_SF_term(relDXNorm2, epsvh);
innerMtr *= f1_div_relDXNorm;
if (f2_term != f1_div_relDXNorm && relDXNorm2) {
innerMtr -= dyadic_prod(relDX, relDX) * ((f1_div_relDXNorm - f2_term) / relDXNorm2);
}
}
make_pd(innerMtr);
hess = fricMu * fn * TT.transpose() * innerMtr * TT;
#else
#endif
// rotate and project
Expand Down Expand Up @@ -261,10 +246,13 @@ void CodimStepping::IPCSystem::computeFrictionBarrierGradientAndHessian(
auto relDX = basis.transpose() * relDX3D;
auto relDXNorm2 = relDX.l2NormSqr();
auto relDXNorm = zs::sqrt(relDXNorm2);
auto f1_div_relDXNorm = zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
relDX *= f1_div_relDXNorm * fricMu * fn;
if (relDXNorm > epsvh)
relDX /= relDXNorm;
else
relDX *= zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
auto TTTDX =
-edge_edge_rel_dx_tan_to_mesh(relDX, basis, gammas[0], gammas[1]);
TTTDX *= fricMu * fn;
// gradient
for (int d = 0; d != 3; ++d) {
atomic_add(exec_cuda, &vtemp(gTag, d, fee[0]), TTTDX(0, d));
Expand All @@ -277,30 +265,22 @@ void CodimStepping::IPCSystem::computeFrictionBarrierGradientAndHessian(
return;
relDX = basis.transpose() * relDX3D;
auto TT = edge_edge_TT(basis, gammas[0], gammas[1]); // 2x12
auto f2_term = f2_SF_term(relDXNorm2, epsvh);
using HessT = zs::vec<T, 12, 12>;
auto hess = HessT::zeros();
if (relDXNorm2 >= epsvh * epsvh) {
zs::vec<T, 2> ubar{-relDX[1], relDX[0]};
hess = dyadic_prod(
TT.transpose() *
((fricMu * fn * f1_div_relDXNorm / relDXNorm2) * ubar),
ubar * TT);
auto innerMtr = zs::vec<T, 2, 2>::identity();
if (relDXNorm2 > epsvh * epsvh) {
innerMtr /= relDXNorm;
innerMtr -= dyadic_prod(relDX, relDX) / (relDXNorm2 * relDXNorm);
} else {
if (relDXNorm == 0) {
if (fn > 0)
hess = fricMu * fn * f1_div_relDXNorm * TT.transpose() * TT;
// or ignored
} else {
auto innerMtr = dyadic_prod((f2_term / relDXNorm) * relDX, relDX);
innerMtr(0, 0) += f1_div_relDXNorm;
innerMtr(1, 1) += f1_div_relDXNorm;
innerMtr *= fricMu * fn;
//
make_pd(innerMtr);
hess = TT.transpose() * innerMtr * TT;
auto f1_div_relDXNorm = zs::f1_SF_div_rel_dx_norm(relDXNorm2, epsvh);
auto f2_term = f2_SF_term(relDXNorm2, epsvh);
innerMtr *= f1_div_relDXNorm;
if (f2_term != f1_div_relDXNorm && relDXNorm2) {
innerMtr -= dyadic_prod(relDX, relDX) * ((f1_div_relDXNorm - f2_term) / relDXNorm2);
}
}
make_pd(innerMtr);
hess = fricMu * fn * TT.transpose() * innerMtr * TT;
#else
#endif
// rotate and project
Expand Down
1 change: 1 addition & 0 deletions projects/zenvdb/VDBVisualize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <zeno/PrimitiveObject.h>
#include <zeno/StringObject.h>
#include <zeno/VDBGrid.h>
#include <zeno/utils/log.h>
#include <zeno/utils/vec.h>
#include <zeno/zeno.h>
#include <zeno/ZenoInc.h>
Expand Down
57 changes: 55 additions & 2 deletions zeno/src/nodes/neo/PrimProject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
#include <zeno/types/StringObject.h>
#include <zeno/utils/arrayindex.h>
#include <zeno/utils/variantswitch.h>
#include <zeno/core/INode.h>
#include <zeno/zeno.h>
#if defined(_OPENMP)
#define WXL 0//1
#define WXL 1
#else
#define WXL 0
#endif
Expand Down Expand Up @@ -56,6 +57,33 @@ static float tri_intersect(Cond cond, vec3f const &ro, vec3f const &rd, vec3f co
return std::numeric_limits<float>::infinity();
}

/// ref: An Efficient and Robust Ray-Box Intersection Algorithm, 2005
static bool ray_box_intersect(vec3f const &ro, vec3f const &rd, std::pair<vec3f, vec3f> const &box) {
vec3f invd{1 / rd[0], 1 / rd[1], 1 / rd[2]};
int sign[3] = {invd[0] < 0, invd[1] < 0, invd[2] < 0};
float tmin, tmax, tymin, tymax, tzmin, tzmax;

tmin = ((sign[0] ? box.second : box.first)[0] - ro[0]) * invd[0];
tmax = ((sign[0] ? box.first : box.second)[0] - ro[0]) * invd[0];
tymin = ((sign[1] ? box.second : box.first)[1] - ro[1]) * invd[1];
tymax = ((sign[1] ? box.first : box.second)[1] - ro[1]) * invd[1];
if (tmin > tymax || tymin > tmax)
return false;
if (tymin > tmin)
tmin = tymin;
if (tymax < tmax)
tmax = tymax;
tzmin = ((sign[2] ? box.second : box.first)[2] - ro[2]) * invd[2];
tzmax = ((sign[2] ? box.first : box.second)[2] - ro[2]) * invd[2];
if (tmin > tzmax || tzmin > tmax)
return false;
if (tzmin > tmin)
tmin = tzmin;
if (tzmax < tmax)
tmax = tzmax;
return tmax >= 0.f;
}

struct BVH { // TODO: WXL please complete this to accel up
PrimitiveObject const *prim{};
#if WXL
Expand Down Expand Up @@ -337,7 +365,7 @@ struct BVH { // TODO: WXL please complete this to accel up
Ti level = levels[node];
// level and node are always in sync
for (; level; --level, ++node)
if (auto d = ray_box_distance(sortedBvs[node], ro, rd); std::abs(d) > std::abs(ret))
if (!ray_box_intersect(ro, rd, sortedBvs[node]))
break;
// leaf node check
if (level == 0) {
Expand Down Expand Up @@ -454,5 +482,30 @@ ZENDEFNODE(PrimProject, {
{"primitive"},
});

struct TestRayBox : INode {
void apply() override {
auto origin = get_input2<vec3f>("ray_origin");
auto dir = get_input2<vec3f>("ray_dir");
auto bmin = get_input2<vec3f>("box_min");
auto bmax = get_input2<vec3f>("box_max");
set_output("predicate",
std::make_shared<NumericObject>((int)ray_box_intersect(origin, dir, std::make_pair(bmin, bmax))));
}
};

ZENDEFNODE(TestRayBox, {
{
{"ray_origin"},
{"ray_dir"},
{"box_min"},
{"box_max"},
},
{
{"predicate"},
},
{},
{"primitive"},
});

} // namespace
} // namespace zeno

0 comments on commit cb22c77

Please sign in to comment.