-
Notifications
You must be signed in to change notification settings - Fork 2
/
fastscape_BW+PI.cpp
439 lines (360 loc) · 17.4 KB
/
fastscape_BW+PI.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
//This file contains a serial implementation of Braun and Willett's FastScape
//algorithm. The implementation was developed by adapting Fortran code provided
//by Braun and attempts to be a faithful reproduction of the ideas therein.
#include <array>
#include <cmath>
#include <cstdlib>
#include <fenv.h> //Used to catch floating point NaN issues
#include <fstream>
#include <iomanip>
#include <iostream>
#include <limits>
#include "random.hpp"
#include <vector>
#include "CumulativeTimer.hpp"
///This is a quick-and-dirty, zero-dependency function for saving the outputs of
///the model in ArcGIS ASCII DEM format (aka Arc/Info ASCII Grid, AAIGrid).
///Production code for experimentation should probably use GeoTIFF or a similar
///format as it will have a smaller file size and, thus, save quicker.
void PrintDEM(
const std::string filename,
const std::vector<double>& h,
const int width,
const int height
){
std::ofstream fout(filename.c_str());
//Since the outer ring of the dataset is a halo used for simplifying
//neighbour-finding logic, we do not save it to the output here.
fout<<"ncols "<<(width- 2)<<"\n";
fout<<"nrows "<<(height-2)<<"\n";
fout<<"xllcorner 637500.000\n"; //Arbitrarily chosen value
fout<<"yllcorner 206000.000\n"; //Arbitrarily chosen value
fout<<"cellsize 500.000\n"; //Arbitrarily chosen value
fout<<"NODATA_value -9999\n"; //Value which is guaranteed not to correspond to an actual data value
for(int y=1;y<height-1;y++){
for(int x=1;x<width-1;x++)
fout<<h[y*width+x]<<" ";
fout<<"\n";
}
}
///The entire model is contained in a handy class, which makes it easy to set up
///and solve many such models.
class FastScape_BWPF {
private:
//Value used to indicate that a cell had no downhill neighbour and, thus, does
//not flow anywhere.
const int NO_FLOW = -1;
const double SQRT2 = 1.414213562373095048801688724209698078569671875376948; //Yup, this is overkill.
public:
//NOTE: Having these constants specified in the class rather than globally
//results in a significant speed loss. However, it is better to have them here
//under the assumption that they'd be dynamic in a real implementation.
const double keq = 2e-6; //Stream power equation constant (coefficient)
const double neq = 2; //Stream power equation constant (slope modifier)
const double meq = 0.8; //Stream power equation constant (area modifier)
const double ueq = 2e-3; //Rate of uplift
const double dt = 1000.; //Timestep interval
const double dr[8] = {1,SQRT2,1,SQRT2,1,SQRT2,1,SQRT2}; //Distance between adjacent cell centers on a rectangular grid arbitrarily scale to cell edge lengths of 1
const double tol = 1e-3; //Tolerance for Newton-Rhapson convergence while solving implicit Euler
const double cell_area = 40000; //Area of a single cell
private:
int width; //Width of DEM
int height; //Height of DEM
int size; //Size of DEM (width*height)
//The dataset is set up so that the outermost edge is never actually used:
//it's a halo which allows every cell that is actually processed to consider
//its neighbours in all 8 directions without having to check first to see if
//it is an edge cell. The ring of second-most outer cells is set to a fixed
//value to which everything erodes (in this model).
//Rec directions (also used for nshift offsets) - see below for details
//1 2 3
//0 4
//7 6 5
std::vector<double> h; //Digital elevation model (height)
std::vector<double> accum; //Flow accumulation at each point
std::vector<int> rec; //Direction of receiving cell
std::vector<int> donor; //Indices of a cell's donor cells
std::vector<int> ndon; //How many donors a cell has
std::vector<int> stack; //Indices of cells in the order they should be processed
std::array<int,8> nshift; //Offset from a focal cell's index to its neighbours in terms of flat indexing
//Indicates where each tree of the stack begins
std::vector<int> stack_start;
//Timers for keeping track of how long each part of the code takes
CumulativeTimer Tmr_Step1_Initialize;
CumulativeTimer Tmr_Step2_DetermineReceivers;
CumulativeTimer Tmr_Step3_DetermineDonors;
CumulativeTimer Tmr_Step4_GenerateOrder;
CumulativeTimer Tmr_Step5_FlowAcc;
CumulativeTimer Tmr_Step6_Uplift;
CumulativeTimer Tmr_Step7_Erosion;
CumulativeTimer Tmr_Overall;
private:
void GenerateRandomTerrain(){
//srand(std::random_device()());
for(int y=0;y<height;y++)
for(int x=0;x<width;x++){
const int c = y*width+x;
h[c] = uniform_rand_real(0,1);
//Outer edge is set to 0 and never touched again. It is used only as a
//convenience so we don't have to worry when a focal cell looks at its
//neighbours.
if(x == 0 || y==0 || x==width-1 || y==height-1)
h[c] = 0;
//Second outer-most edge is set to 0 and never touched again. This is the
//baseline to which all cells would erode where it not for uplift. You can
//think of this as being "sea level".
if(x == 1 || y==1 || x==width-2 || y==height-2)
h[c] = 0;
}
}
public:
FastScape_BWPF(const int width0, const int height0)
//Initialize code for finding neighbours of a cell
: nshift{-1,-width0-1,-width0,-width0+1,1,width0+1,width0,width0-1}
{
Tmr_Overall.start();
Tmr_Step1_Initialize.start();
width = width0;
height = height0;
size = width*height;
//Each edge node of the DEM is the root of a stack. The `stack_start` array
//indicates where these nodes are located in the global stack. This is then
//used to induce parallelism. Note that, especially at the beginning of the
//program, there may be interior nodes with in depressions or with no local
//gradient. These may become the starts of stacks as well, so the
//`stack_start` array may be larger than just the perimeter of the DEM.
stack_start.reserve(3*width+2*height);
h.resize(size); //Memory for terrain height
GenerateRandomTerrain(); //Could replace this with custom initializer
Tmr_Step1_Initialize.stop();
Tmr_Overall.stop();
}
private:
///The receiver of a focal cell is the cell which receives the focal cells'
///flow. Here, we model the receiving cell as being the one connected to the
///focal cell by the steppest gradient. If there is no local gradient, than
///the special value NO_FLOW is assigned.
void ComputeReceivers(){
//Edge cells do not have receivers because they do not distribute their flow
//to anywhere.
for(int y=2;y<height-2;y++)
for(int x=2;x<width-2;x++){
const int c = y*width+x;
//The slope must be greater than zero for there to be downhill flow;
//otherwise, the cell is marked NO_FLOW.
double max_slope = 0; //Maximum slope seen so far amongst neighbours
int max_n = NO_FLOW; //Direction of neighbour which had maximum slope to focal cell
//Loop over neighbours
for(int n=0;n<8;n++){
const double slope = (h[c] - h[c+nshift[n]])/dr[n]; //Slope to neighbour n
if(slope>max_slope){ //Is this the steepest slope we've seen?
max_slope = slope; //If so, make a note of the slope
max_n = n; //And which cell it came from
}
}
rec[c] = max_n; //Having considered all neighbours, this is the steepest
}
}
///The donors of a focal cell are the neighbours from which it receives flow.
///Here, we identify those neighbours by inverting the Receivers array.
void ComputeDonors(){
//The B&W method of developing the donor array has each focal cell F inform
//its receiving cell R that F is a donor of R. Unfortunately, parallelizing
//this is difficult because more than one cell might be informing R at any
//given time. Atomics are a solution, but they impose a performance cost
//(though using the latest and greatest hardware decreases this penalty).
//Instead, we invert the operation. Each focal cell now examines its
//neighbours to see if it receives from them. Each focal cell is then
//guaranteed to have sole write-access to its location in the donor array.
#pragma omp parallel for collapse(2)
for(int y=1;y<height-1;y++)
for(int x=1;x<width-1;x++){
const int c = y*width+x;
ndon[c] = 0;
for(int ni=0;ni<8;ni++){
const int n = c+nshift[ni];
if(rec[n]!=NO_FLOW && n+nshift[rec[n]]==c){
donor[8*c+ndon[c]] = n;
ndon[c]++;
}
}
}
}
void FindStack(const int c, int &nstack){
for(int k=0;k<ndon[c];k++){
const auto n = donor[8*c+k];
stack[nstack++] = n;
FindStack(n,nstack);
}
}
///Cells must be ordered so that they can be traversed such that higher cells
///are processed before their lower neighbouring cells. This method creates
///such an order.
void GenerateOrder(){
//The `stack_start` array has an unpredictable size, so we fill it
//dynamically and reset it each time. This should have minimal impact on the
//algorithm's speed since std::vector's memory is not actually reallocated.
stack_start.clear();
int nstack=0;
for(int c=0;c<size;c++){
if(rec[c]==NO_FLOW){
stack_start.push_back(nstack);
stack[nstack++] = c;
FindStack(c,nstack);
}
}
//We add an additional note to the end of `stack_start` that serves as an
//upper bound on the locations of the cells in the final stack. See b
stack_start.push_back(nstack);
}
void ComputeFlowAcc(){
//Initialize cell areas to their weights. Here, all the weights are the
//same.
for(int i=0;i<size;i++)
accum[i] = cell_area;
//Highly-elevated cells pass their flow to less elevated neighbour cells.
//The stack is ordered so that higher cells are keyed to higher indices in
//the stack; therefore, parsing the stack in reverse ensures that fluid
//flows downhill.
#pragma omp parallel for schedule(dynamic)
for(unsigned int ss=0;ss<stack_start.size()-1;ss++){
const int sstart = stack_start.at(ss);
const int send = stack_start.at(ss+1);
for(int s=send-1;s>=sstart;s--){
const int c = stack[s];
if(rec[c]!=NO_FLOW){
const int n = c+nshift[rec[c]];
accum[n] += accum[c];
}
}
}
}
///Raise each cell in the landscape by some amount, otherwise it wil get worn
///flat (in this model, with these settings)
void AddUplift(){
//We exclude two exterior rings of cells in this example. The outermost ring
//(the edges of the dataset) allows us to ignore the edges of the dataset,
//the second-most outer ring (the cells bordering the edge cells of the
//dataset) are fixed to a specified height in this model. All other cells
//have heights which actively change and they are altered here.
#pragma omp parallel for collapse(2)
for(int y=2;y<height-2;y++)
for(int x=2;x<width-2;x++){
const int c = y*width+x;
h[c] += ueq*dt;
}
}
///Decrease he height of cells according to the stream power equation; that
///is, based on a constant K, flow accumulation A, the local slope between
///the cell and its receiving neighbour, and some judiciously-chosen constants
///m and n.
/// h_next = h_current - K*dt*(A^m)*(Slope)^n
///We solve this equation implicitly to preserve accuracy
void Erode(){
#pragma omp parallel for schedule(dynamic)
for(unsigned int ss=0;ss<stack_start.size()-1;ss++){
const int sstart = stack_start.at(ss);
const int send = stack_start.at(ss+1);
for(int s=sstart;s<send;s++){
const int c = stack[s]; //Cell from which flow originates
if(rec[c]==NO_FLOW)
continue;
const int n = c+nshift[rec[c]]; //Cell receiving the flow
const double length = dr[rec[c]];
const double fact = keq*dt*std::pow(accum[c],meq)/std::pow(length,neq);
const double h0 = h[c]; //Elevation of focal cell
const double hn = h[n]; //Elevation of neighbouring (receiving, lower) cell
double hnew = h0; //Current updated value of focal cell
double hp = h0; //Previous updated value of focal cell
double diff = 2*tol; //Difference between current and previous updated values
while(std::abs(diff)>tol){ //Newton-Rhapson method (run until subsequent values differ by less than a tolerance, which can be set to any desired precision)
hnew -= (hnew-h0+fact*std::pow(hnew-hn,neq))/(1.+fact*neq*std::pow(hnew-hn,neq-1));
diff = hnew - hp; //Difference between previous and current value of the iteration
hp = hnew; //Update previous value to new value
}
h[c] = hnew; //Update value in array
}
}
}
public:
///Run the model forward for a specified number of timesteps. No new
///initialization is done. This allows the model to be stopped, the terrain
///altered, and the model continued. For space-efficiency, a number of
///temporary arrays are created each time this is run, so repeatedly running
///this function for the same model will likely not be performant due to
///reallocations. If that is your use case, you'll want to modify your code
///appropriately.
void run(const int nstep){
Tmr_Overall.start();
Tmr_Step1_Initialize.start();
accum.resize( size); //Stores flow accumulation
rec.resize ( size); //Array of Receiver directions
ndon.resize ( size); //Number of donors each cell has
donor.resize(8*size); //Array listing the donors of each cell (up to 8 for a rectangular grid)
stack.resize( size); //Order in which to process cells
///All receivers initially point to nowhere
for(int i=0;i<size;i++)
rec[i] = NO_FLOW;
Tmr_Step1_Initialize.stop();
for(int step=0;step<=nstep;step++){
Tmr_Step2_DetermineReceivers.start (); ComputeReceivers (); Tmr_Step2_DetermineReceivers.stop ();
Tmr_Step3_DetermineDonors.start (); ComputeDonors (); Tmr_Step3_DetermineDonors.stop ();
Tmr_Step4_GenerateOrder.start (); GenerateOrder (); Tmr_Step4_GenerateOrder.stop ();
Tmr_Step5_FlowAcc.start (); ComputeFlowAcc (); Tmr_Step5_FlowAcc.stop ();
Tmr_Step6_Uplift.start (); AddUplift (); Tmr_Step6_Uplift.stop ();
Tmr_Step7_Erosion.start (); Erode (); Tmr_Step7_Erosion.stop ();
if( step%20==0 ) //Show progress
std::cout<<"p Step = "<<step<<std::endl;
}
Tmr_Overall.stop();
std::cout<<"t Step1: Initialize = "<<std::setw(15)<<Tmr_Step1_Initialize.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step2: DetermineReceivers = "<<std::setw(15)<<Tmr_Step2_DetermineReceivers.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step3: DetermineDonors = "<<std::setw(15)<<Tmr_Step3_DetermineDonors.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step4: GenerateOrder = "<<std::setw(15)<<Tmr_Step4_GenerateOrder.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step5: FlowAcc = "<<std::setw(15)<<Tmr_Step5_FlowAcc.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step6: Uplift = "<<std::setw(15)<<Tmr_Step6_Uplift.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Step7: Erosion = "<<std::setw(15)<<Tmr_Step7_Erosion.elapsed() <<" microseconds"<<std::endl;
std::cout<<"t Overall = "<<std::setw(15)<<Tmr_Overall.elapsed() <<" microseconds"<<std::endl;
//Free up memory, except for the resulting landscape height field prior to
//exiting so that unnecessary space is not used when the model is not being
//run.
accum .clear(); accum .shrink_to_fit();
rec .clear(); rec .shrink_to_fit();
ndon .clear(); ndon .shrink_to_fit();
stack .clear(); stack .shrink_to_fit();
donor .clear(); donor .shrink_to_fit();
}
///Returns a pointer to the data so that it can be copied, printed, &c.
std::vector<double>& getH() {
return h;
}
};
int main(int argc, char **argv){
//Enable this to stop the program if a floating-point exception happens
//feenableexcept(FE_ALL_EXCEPT);
if(argc!=5){
std::cerr<<"Syntax: "<<argv[0]<<" <Dimension> <Steps> <Output Name> <Seed>"<<std::endl;
return -1;
}
const int width = std::stoi (argv[1]);
const int height = std::stoi (argv[1]);
const int nstep = std::stoi (argv[2]);
const std::string output_name = argv[3] ;
const auto rand_seed = std::stoul(argv[4]);
seed_rand(rand_seed);
//Uses the RichDEM machine-readable line prefixes
//Name of algorithm
std::cout<<"A FastScape B&W+PI"<<std::endl;
//Citation for algorithm
std::cout<<"C Richard Barnes TODO"<<std::endl;
//Git hash of code used to produce outputs of algorithm
std::cout<<"h git_hash = "<<GIT_HASH<<std::endl;
//Random seed used to produce outputs
std::cout<<"m Random seed = "<<rand_seed<<std::endl;
CumulativeTimer tmr(true);
FastScape_BWPF tm(width,height);
tm.run(nstep);
std::cout<<"t Total calculation time = "<<std::setw(15)<<tmr.elapsed()<<" microseconds"<<std::endl;
PrintDEM(output_name, tm.getH(), width, height);
return 0;
}