Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hw04作业 #26

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
build
GNUmakefile
bench.txt
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ if (NOT CMAKE_BUILD_TYPE)
endif()

add_executable(main main.cpp)
target_compile_options(main PUBLIC -ffast-math -march=native)
140 changes: 94 additions & 46 deletions main.cpp
Original file line number Diff line number Diff line change
@@ -1,73 +1,121 @@
#include <chrono>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <tuple>
#include <vector>
#include <chrono>
#include <cmath>

float frand() {
return (float)rand() / RAND_MAX * 2 - 1;
#define VECTOR_SIZE 48

/* static float frand() {
float INV_RAND_MAX = 1.0 / RAND_MAX;
return (float)rand() * (INV_RAND_MAX * 2) - 1;
} */
static std::tuple<float, float, float, float, float, float, float> frand7() {
float INV_RAND_MAX = 1.0 / RAND_MAX;
float a = (float)rand() * (INV_RAND_MAX * 2) - 1;
float b = (float)rand() * (INV_RAND_MAX * 2) - 1;
float c = (float)rand() * (INV_RAND_MAX * 2) - 1;
float d = (float)rand() * (INV_RAND_MAX * 2) - 1;
float e = (float)rand() * (INV_RAND_MAX * 2) - 1;
float f = (float)rand() * (INV_RAND_MAX * 2) - 1;
float g = (float)rand() * (INV_RAND_MAX * 2) - 1;
return std::make_tuple(a, b, c, d, e, f, g);
}

struct Star {
float px, py, pz;
float vx, vy, vz;
float mass;
struct alignas(32) Star {
// float px, py, pz;
// float vx, vy, vz;
// float mass;
float px[VECTOR_SIZE];
float py[VECTOR_SIZE];
float pz[VECTOR_SIZE];
float vx[VECTOR_SIZE];
float vy[VECTOR_SIZE];
float vz[VECTOR_SIZE];
float mass[VECTOR_SIZE];
};

std::vector<Star> stars;
// std::vector<Star> stars;
Star stars;

void init() {
for (int i = 0; i < 48; i++) {
stars.push_back({
frand(), frand(), frand(),
frand(), frand(), frand(),
frand() + 1,
});
static void init() {
#pragma GCC unroll 8
for (size_t i = 0; i < VECTOR_SIZE; i++) {
// stars.push_back({
// frand(), frand(), frand(),
// frand(), frand(), frand(),
// frand() + 1,
// });
// stars.px.emplace_back(frand());
// stars.py.emplace_back(frand());
// stars.pz.emplace_back(frand());
// stars.vx.emplace_back(frand());
// stars.vy.emplace_back(frand());
// stars.vz.emplace_back(frand());
// stars.mass.emplace_back(frand() + 1);
auto [a, b, c, d, e, f, g] = frand7();
stars.px[i] = a;
stars.py[i] = b;
stars.pz[i] = c;
stars.vx[i] = d;
stars.vy[i] = e;
stars.vz[i] = f;
stars.mass[i] = g + 1;
}
}

float G = 0.001;
float eps = 0.001;
float dt = 0.01;
constexpr float G = 0.001;
constexpr float eps = 0.001;
constexpr float dt = 0.01;
constexpr float eps_times_eps = eps * eps; // eps * eps

void step() {
for (auto &star: stars) {
for (auto &other: stars) {
float dx = other.px - star.px;
float dy = other.py - star.py;
float dz = other.pz - star.pz;
float d2 = dx * dx + dy * dy + dz * dz + eps * eps;
d2 *= sqrt(d2);
star.vx += dx * other.mass * G * dt / d2;
star.vy += dy * other.mass * G * dt / d2;
star.vz += dz * other.mass * G * dt / d2;
static void step() {
#pragma GCC ivdep
for (int i = 0; i < VECTOR_SIZE; ++i) {
for (int j = 0; j < VECTOR_SIZE; ++j) {
if (i == j)
continue;
float dx = stars.px[j] - stars.px[i];
float dy = stars.py[j] - stars.py[i];
float dz = stars.pz[j] - stars.pz[i];
float d2 = dx * dx + dy * dy + dz * dz + eps_times_eps;
d2 *= std::sqrt(d2);
float inv_d2 = 1.0 / d2;
float tmp = stars.mass[j] * (G * dt) * inv_d2;
stars.vx[i] += dx * tmp;
stars.vy[i] += dy * tmp;
stars.vz[i] += dz * tmp;
}
}
for (auto &star: stars) {
star.px += star.vx * dt;
star.py += star.vy * dt;
star.pz += star.vz * dt;
#pragma GCC unroll 16
for (int i = 0; i < VECTOR_SIZE; ++i) {
stars.px[i] += stars.vx[i] * dt;
stars.py[i] += stars.vy[i] * dt;
stars.pz[i] += stars.vz[i] * dt;
}
}

float calc() {
static float calc() {
float energy = 0;
for (auto &star: stars) {
float v2 = star.vx * star.vx + star.vy * star.vy + star.vz * star.vz;
energy += star.mass * v2 / 2;
for (auto &other: stars) {
float dx = other.px - star.px;
float dy = other.py - star.py;
float dz = other.pz - star.pz;
float d2 = dx * dx + dy * dy + dz * dz + eps * eps;
energy -= other.mass * star.mass * G / sqrt(d2) / 2;
#pragma GCC ivdep
for (int i = 0; i < VECTOR_SIZE; ++i) {
float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i];
energy += stars.mass[i] * v2 / 2;
for (int j = 0; j < VECTOR_SIZE; ++j) {
float dx = stars.px[j] - stars.px[i];
float dy = stars.py[j] - stars.py[i];
float dz = stars.pz[j] - stars.pz[i];
float d2 = dx * dx + dy * dy + dz * dz + eps_times_eps;
float inv_sqrt_d2 = 1 / std::sqrt(d2);
energy -= stars.mass[j] * stars.mass[i] * (G * 0.5f) * inv_sqrt_d2;
}
}
return energy;
}

template <class Func>
long benchmark(Func const &func) {
static long benchmark(Func const& func) {
auto t0 = std::chrono::steady_clock::now();
func();
auto t1 = std::chrono::steady_clock::now();
Expand All @@ -79,7 +127,7 @@ int main() {
init();
printf("Initial energy: %f\n", calc());
auto dt = benchmark([&] {
for (int i = 0; i < 100000; i++)
for (size_t i = 0; i < 100000; i++)
step();
});
printf("Final energy: %f\n", calc());
Expand Down