File Testing.cu
File List > src > Testing.cu
Go to the documentation of this file
#include "Testing.h"
template <class T> bool Testing(Param XParam, Forcing<float> XForcing, Model<T> XModel, Model<T> XModel_g)
{
bool isfailed = false;
std::string result;
log("\nRunning internal test(s):");
int mytest;
mytest = XParam.test;
if (XParam.test == 99)
{
mytest = 0;
}
while (mytest <= XParam.test)
{
if (mytest == 0)
{
bool bumptest, bumptestComp;
bool bumptestGPU = true;
// Test 0 is pure bump test
log("\t ### Gaussian wave on Cartesian grid ###");
//set gpu is -1 for cpu test
bumptest = GaussianHumptest(0.1, -1, false);
result = bumptest ? "successful" : "failed";
log("\t\tCPU test: " + result);
// If original XParam tried to use GPU we try also
if (XParam.GPUDEVICE >= 0)
{
bumptestGPU = GaussianHumptest(0.1, XParam.GPUDEVICE, false);
result = bumptestGPU ? "successful" : "failed";
log("\t\tGPU test: " + result);
if (!bumptestGPU)
{
bumptestComp = GaussianHumptest(0.1, XParam.GPUDEVICE, true);
}
}
isfailed = ((bumptest == true) && (bumptestGPU == true)) ? false : true;
}
if (mytest == 1)
{
bool rivertest;
// Test 1 is vertical discharge on a flat uniorm cartesian mesh (GPU and CU version)
log("\t ### River Mass conservation grid ###");
rivertest = Rivertest(0.1, -1);
result = rivertest ? "successful" : "failed";
log("\t\tCPU test: " + result);
isfailed = (!rivertest || isfailed) ? true : false;
log(" \t\t\t GPU device= " + std::to_string(XParam.GPUDEVICE));
if (XParam.GPUDEVICE >= 0)
{
rivertest = Rivertest(0.1, XParam.GPUDEVICE);
result = rivertest ? "successful" : "failed";
log("\t\tGPU test: " + result);
isfailed = (!rivertest || isfailed) ? true : false;
}
rivertest = RiverVolumeAdapt(XParam, T(0.4));
result = rivertest ? "successful" : "failed";
log("\t\tRiver Volume Adapt: " + result);
isfailed = (!rivertest || isfailed) ? true : false;
}
if (mytest == 2)
{
if (XParam.GPUDEVICE >= 0)
{
bool GPUvsCPUtest;
log("\t### Gaussian wave on Cartesian grid: CPU vs GPU ###");
GPUvsCPUtest = GaussianHumptest(0.1, XParam.GPUDEVICE, true);
result = GPUvsCPUtest ? "successful" : "failed";
log("\t\tCPU vs GPU test: " + result);
isfailed = (!GPUvsCPUtest || isfailed) ? true : false;
}
else
{
log("Specify GPU device to run test 2 (CPU vs GPU comparison)");
}
}
if (mytest == 3)
{
bool testresults;
bool testreduction = true;
// Iterate this test niter times:
int niter = 1000;
srand(time(0));
log("\t### Reduction Test ###");
for (int iter = 0; iter < niter; iter++)
{
testresults = reductiontest(XParam, XModel, XModel_g);
testreduction = testreduction && testresults;
}
result = testreduction ? "successful" : "failed";
log("\t\tReduction test: " + result);
isfailed = (!testreduction || isfailed) ? true : false;
}
if (mytest == 4)
{
log("\t### Boundary Test ###");
bool testBound = testboundaries(XParam, T(0.1));
result = testBound ? "successful" : "failed";
isfailed = (!testBound || isfailed) ? true : false;
log("\t\tboundaries test: " + result);
}
if (mytest == 5)
{
log("\t### Lake-at-rest Test ###");
bool testTLAR = ThackerLakeAtRest(XParam, T(0.0));
result = testTLAR ? "successful" : "failed";
isfailed = (!testTLAR || isfailed) ? true : false;
log("\t\tThaker lake-at-rest test: " + result);
testTLAR = LakeAtRest(XParam, XModel);
isfailed = (!testTLAR || isfailed) ? true : false;
log("\t\tLake-at-rest test: " + result);
}
if (mytest == 6)
{
log("\t### Mass conservation Test ###");
bool testSteepSlope = MassConserveSteepSlope(XParam.zsinit, XParam.GPUDEVICE);
result = testSteepSlope ? "successful" : "failed";
isfailed = (!testSteepSlope || isfailed) ? true : false;
log("\t\tMass conservation test: " + result);
}
if (mytest == 7)
{
bool testrainGPU, testrainCPU;
/* Test 7 is homogeneous rain on a uniform slope for cartesian mesh (GPU and CPU version)
The input parameters are :
- the initial water level (zs)
- GPU option
- the slope (%)
*/
log("\t### Homogeneous rain on grid Mass conservation test ###");
if (XParam.GPUDEVICE >= 0)
{
testrainGPU = Raintest(0.0, 0, 10, XParam.engine);
result = testrainGPU ? "successful" : "failed";
log("\t\tHomogeneous rain on grid test GPU: " + result);
}
testrainCPU = Raintest(0.0, -1, 10, XParam.engine);
result = testrainCPU ? "successful" : "failed";
log("\t\tHomogeneous rain on grid test CPU: " + result);
isfailed = (!testrainCPU || !testrainGPU || isfailed) ? true : false;
}
if (XParam.test == 8)
{
bool raintest2;
/* Test 8 is non-homogeneous rain on a non-uniform slope for cartesian mesh (GPU and CPU version)
It is based on a teste case from litterature (Iwagaki1955) and tests the different
rain inputs (time serie for 1D input or netCDF file).
*/
log("\t non-uniform rain forcing on slope based on Aureli2020");
int gpu = 0;
raintest2 = Raintestinput(gpu);
result = raintest2 ? "successful" : "failed";
log("\t\tNon-uniform rain forcing : " + result);
}
if (mytest == 9)
{
bool testzoneOutDef, testzoneOutUser;
/* Test 9 is basic configuration to test the zoned outputs, with different resolutions.
The default (without zoned defined by user) configuration is tested.
Then, the creation of 3 zones is then tested(whole, zoned complexe, zoned with part of the levels).
The size of the created nc files is used to verified this fonctionnality.
Parameter: nbzones: number of zones for output defined by the user
zsinit: initial water elevation
*/
log("\t### Test zoned output ###");
int nbzones = 0;
T zsinit = 0.01;
testzoneOutDef = ZoneOutputTest(nbzones, zsinit);
result = testzoneOutDef ? "successful" : "failed";
log("\n\nDefault zoned Outputs: " + result);
nbzones = 3; // 3 only
testzoneOutUser = ZoneOutputTest(nbzones, zsinit);
result = testzoneOutUser ? "successful" : "failed";
log("\n\nUser defined zones Outputs: " + result);
isfailed = (!testzoneOutDef || !testzoneOutUser || isfailed) ? true : false;
}
if (mytest == 10)
{
bool testrainlossesGPU, testrainlossesCPU;
/* Test 10 is to test the Initial / Continuous Losses of rain, on a uniform slope, under uniform rain
for cartesian mesh (GPU and CPU version)
The input parameters are :
- the initial water level (zs)
- GPU option
- the slope (%)
*/
log("\t### IL-CL Rain losses test on GPU ###");
testrainlossesGPU = Rainlossestest(0.0, 0, 10);
result = testrainlossesGPU ? "successful" : "failed";
log("\n\n\t IL-CL Rain losses test GPU: " + result);
testrainlossesCPU = Rainlossestest(0.0, -1, 10);
result = testrainlossesCPU ? "successful" : "failed";
log("\n\n\t IL-CL Rain losses test CPU: " + result);
isfailed = (!testrainlossesCPU || !testrainlossesGPU || isfailed) ? true : false;
}
if (mytest == 11)
{
bool instab;
log("\t### Wet/dry Instability test with Conserve Elevation ###");
instab = TestInstability(XParam, XModel, XModel_g);
result = instab ? "successful" : "failed";
log("\t\tWet/dry Instability test : " + result);
}
if (mytest == 12)
{
/* Test 12 is to test the calendar time to second conversion
This test will fail if the system or compiler does not suport long long
*/
bool timetest;
timetest = testime1(1) && testime2(2);
result = timetest ? "successful" : "failed";
log("\t\tCalendar time test : " + result);
}
if (mytest == 13)
{
/* Test 13 is to test the input of different roughness maps (and different bathymetry at the same time)
Test1: 2 DEM and 2 roughness netcdf files are created and saved; then read.
The max / min values are check to see if the z/z0 maps are created as expected
Test2: A roughness file name is changed to have a number in first position. We check that the
file is read and not the number taken as z0 value.
Test3: A roughness is entered as a value, test that it is implemented for the whole domain.
Test4 : Test value input for initial loss / continuous loss
*/
bool RoughBathyresult, RoughInput, RoughtInputnumber, ILCLInputnumber;
log("\t### Different bathy and different roughness file inputs ###");
RoughBathyresult = TestMultiBathyRough(0, 0.0, 0);//&& TestRoughness(XParam, XModel, XModel_g);
result = RoughBathyresult ? "successful" : "failed";
log("\t\t ##### \n");
log("\t\t ##### Different Bathy and Roughness test : " + result + "\n");
RoughInput = TestMultiBathyRough(0, 0.0, 1);//&& TestRoughness(XParam, XModel, XModel_g);
result = RoughInput ? "successful" : "failed";
log("\t\t ##### \n");
log("\t\t ##### Roughness file name test : " + result + "\n");
RoughtInputnumber = TestMultiBathyRough(0, 0.0, 2);//&& TestRoughness(XParam, XModel, XModel_g);
result = RoughtInputnumber ? "successful" : "failed";
log("\t\t ##### \n");
log("\t\t ##### Roughness value input test : " + result + "\n");
log("\t\t ##### \n");
ILCLInputnumber = TestMultiBathyRough(0, 0.0, 3);//&& TestRoughness(XParam, XModel, XModel_g);
result = ILCLInputnumber ? "successful" : "failed";
log("\t\t ##### \n");
log("\t\t ##### Initial Loss / Continuous Loss value input test : " + result + "\n");
log("\t\t ##### \n");
isfailed = (!RoughBathyresult || !RoughInput || !RoughtInputnumber || !ILCLInputnumber || isfailed) ? true : false;
}
if (mytest == 14)
{
/* Test 14 This test AOI bnds aswall to start with
*/
bool wallbndleft, wallbndright, wallbndbot, wallbndtop;
log("\t###AOI bnd wall test ###");
wallbndleft = TestAIObnd(XParam, XModel, XModel_g, false, false, false);
wallbndright = TestAIObnd(XParam, XModel, XModel_g, false, true, false);
wallbndbot = TestAIObnd(XParam, XModel, XModel_g, true, false, false);
wallbndtop = TestAIObnd(XParam, XModel, XModel_g, true, true, false);
result = (wallbndleft & wallbndright & wallbndbot & wallbndtop) ? "successful" : "failed";
log("\t\tBBox bnd wall test : " + result);
wallbndleft = TestAIObnd(XParam, XModel, XModel_g, false, false, true);
wallbndright = TestAIObnd(XParam, XModel, XModel_g, false, true, true);
wallbndbot = TestAIObnd(XParam, XModel, XModel_g, true, false, true);
wallbndtop = TestAIObnd(XParam, XModel, XModel_g, true, true, true);
result = (wallbndleft & wallbndright & wallbndbot & wallbndtop) ? "successful" : "failed";
log("\t\tAOI bnd wall test : " + result);
}
if (mytest == 15)
/* Test 15 is to test the input of flexible times outputs (general and in zone_outputs)
Test1: Test of times in second/durations (for general and zone_outputs)
The data is read from paramfile and we test the reading and nc files created.
*/
{
bool FlexibleOutTime;
log("\t### Tests for flexible time outputs (general and zones outputs) ###");
FlexibleOutTime = TestFlexibleOutputTimes(0, 0.0, 0);
result = FlexibleOutTime ? "successful" : "failed";
log("\t\t ##### \n");
log("\t\t ##### Flexible output times reading test : " + result + "\n");
log("\t\t ##### \n");
isfailed = (!FlexibleOutTime || isfailed) ? true : false;
}
if (mytest == 993)
{
//pinned pageable Memory test
TestPinMem(XParam, XModel, XModel_g);
}
if (mytest == 900)
{
GaussianHumptest(0.1, XParam.GPUDEVICE, false);
}
if (mytest == 994)
{
Testzbinit(XParam, XForcing, XModel, XModel_g);
}
if (mytest == 995)
{
TestFirsthalfstep(XParam, XForcing, XModel, XModel_g);
}
if (mytest == 996)
{
TestHaloSpeed(XParam, XModel, XModel_g);
}
if (mytest == 997)
{
TestGradientSpeed(XParam, XModel, XModel_g);
}
if (mytest == 998)
{
//
bool testresults;
log("\t### CPU vs GPU Test ###");
testresults = CPUGPUtest(XParam, XModel, XModel_g);
isfailed = (!testresults || isfailed) ? true : false;
if (testresults)
{
exit(0);
}
else
{
exit(1);
}
}
if (XParam.test == 999)
{
//
DebugLoop(XParam, XForcing, XModel, XModel_g);
}
mytest++;
}
return(isfailed);
}
template bool Testing<float>(Param XParam, Forcing<float> XForcing, Model<float> XModel, Model<float> XModel_g);
template bool Testing<double>(Param XParam, Forcing<float> XForcing, Model<double> XModel, Model<double> XModel_g);
template <class T> bool GaussianHumptest(T zsnit, int gpu, bool compare)
{
log("#####");
// this is a preplica of the tutorial case for Basilisk
Param XParam;
T x, y, delta;
T cc = T(0.05);// Match the 200 in chracteristic radius used in Basilisk 1/(2*cc^2)=200
//XParam.engine = 2;
T a = T(1.0); //Gaussian wave amplitude
// Verification data
// This is a transect across iy=15:16:127 at ix=127 (or vice versa because the solution is symetrical)
// These values are based on single precision output from Netcdf file so are only accurate to 10-7
//double ZsVerifKurganov[8] = { 0.100000000023, 0.100000063119, 0.100110376004, 0.195039970749, 0.136739044168, 0.0848024805994, 0.066275833049, 0.0637058445888 };
//double ZsVerification[8] = { 0.100000008904, 0.187920326216, 0.152329657390, 0.117710230042, 0.0828616638138, 0.0483274739972, 0.0321501737555, 0.0307609731288 };
double ZsVerifButtinger[8] = { 0.100000000023, 0.100000063119, 0.100093580546, 0.195088199869, 0.136767978925, 0.0850706353898, 0.0663028448129, 0.063727949607 };
// initialise domain and required resolution
XParam.dx = 1.0 / ((1 << 8));
XParam.xo = -0.5;
XParam.yo = -0.5;
XParam.xmax = 0.5;
XParam.ymax = 0.5;
//level 8 is
XParam.initlevel = 0;
XParam.minlevel = 0;
XParam.maxlevel = 0;
XParam.zsinit = zsnit;
XParam.zsoffset = 0.0;
XParam.aoibnd = 3;
//Output times for comparisons
XParam.endtime = 1.0;
XParam.outputtimestep = 0.1;
XParam.smallnc = 0;
XParam.cf = 0.0;
XParam.frictionmodel = 0;
// Enforece GPU/CPU
XParam.GPUDEVICE = gpu;
std::string outvi[18] = { "zb","h","zs","u","v","Fqux","Fqvx","Fquy","Fqvy", "Fhu", "Fhv", "dh", "dhu", "dhv", "ho", "vo", "uo", "cf" };
std::vector<std::string> outv;
for (int nv = 0; nv < 18; nv++)
{
outv.push_back(outvi[nv]);
}
XParam.outvars = outv;
// create Model setup
Model<T> XModel;
Model<T> XModel_g;
Forcing<float> XForcing;
StaticForcingP<float> bathy;
XForcing.Bathy.push_back(bathy);
// initialise forcing bathymetry to 0
XForcing.Bathy[0].xo = -1.0;
XForcing.Bathy[0].yo = -1.0;
XForcing.Bathy[0].xmax = 1.0;
XForcing.Bathy[0].ymax = 1.0;
XForcing.Bathy[0].nx = 3;
XForcing.Bathy[0].ny = 3;
XForcing.Bathy[0].dx = 1.0;
XForcing.Bathy[0].dy = XForcing.Bathy[0].dx;
//AllocateCPU(1, 1, XForcing.left.blks, XForcing.right.blks, XForcing.top.blks, XForcing.bot.blks);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.Bathy[0].val);
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = 0.0f;
}
}
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
// Recreate the initia;l conditions
//InitArrayBUQ(XParam, XModel.blocks, T(0.0), XModel.zb);
//InitArrayBUQ(XParam, XModel.blocks, zsnit, XModel.evolv.zs);
//zs is initialised here:
InitialConditions(XParam, XForcing, XModel);
T xorigin = T(0.0);
T yorigin = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
//printf("bl=%d\tblockxo[bl]=%f\tblockyo[bl]=%f\n", bl, blockxo[bl], blockyo[bl]);
int ib = XModel.blocks.active[ibl];
delta = T(calcres(XParam.dx, XModel.blocks.level[ib]));
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
//
int n = memloc(XParam, ix, iy, ib);
x = T(XParam.xo) + XModel.blocks.xo[ib] + ix * delta;
y = T(XParam.yo) + XModel.blocks.yo[ib] + iy * delta;
XModel.evolv.zs[n] = XModel.evolv.zs[n] + a * exp(T(-1.0) * ((x - xorigin) * (x - xorigin) + (y - yorigin) * (y - yorigin)) / (T(2.0) * cc * cc));
XModel.evolv.h[n] = utils::max(XModel.evolv.zs[n] - XModel.zb[n], T(0.0));
}
}
}
SetupGPU(XParam, XModel, XForcing, XModel_g);
Loop<T> XLoop;
Loop<T> XLoop_g;
XLoop.hugenegval = std::numeric_limits<T>::min();
XLoop.hugeposval = std::numeric_limits<T>::max();
XLoop.epsilon = std::numeric_limits<T>::epsilon();
XLoop.totaltime = 0.0;
//InitSave2Netcdf(XParam, XModel);
XLoop.nextoutputtime = XParam.outputtimestep;
XLoop.dtmax = initdt(XParam, XLoop, XModel);
//XLoop_g = XLoop;
XLoop_g.hugenegval = std::numeric_limits<T>::min();
XLoop_g.hugeposval = std::numeric_limits<T>::max();
XLoop_g.epsilon = std::numeric_limits<T>::epsilon();
XLoop_g.totaltime = 0.0;
//InitSave2Netcdf(XParam, XModel);
XLoop_g.nextoutputtime = XParam.outputtimestep;
XLoop_g.dtmax = XLoop.dtmax;
if (XParam.GPUDEVICE >= 0 && compare)
{
CompareCPUvsGPU(XParam, XModel, XModel_g, outv, false);
}
bool modelgood = true;
fillHaloC(XParam, XModel.blocks, XModel.zb);
gradientC(XParam, XModel.blocks, XModel.zb, XModel.grad.dzbdx, XModel.grad.dzbdy);
gradientHalo(XParam, XModel.blocks, XModel.zb, XModel.grad.dzbdx, XModel.grad.dzbdy);
refine_linear(XParam, XModel.blocks, XModel.zb, XModel.grad.dzbdx, XModel.grad.dzbdy);
gradientHalo(XParam, XModel.blocks, XModel.zb, XModel.grad.dzbdx, XModel.grad.dzbdy);
while (XLoop.totaltime < XLoop.nextoutputtime)
{
if (XParam.GPUDEVICE >= 0)
{
if (XParam.engine == 5)
{
FlowMLGPU(XParam, XLoop, XForcing, XModel_g);
}
else
{
FlowGPU(XParam, XLoop_g, XForcing, XModel_g);
}
XLoop.dt = XLoop_g.dt;
}
else
{
FlowCPU(XParam, XLoop, XForcing, XModel);
}
if (XParam.GPUDEVICE >= 0 && compare)
{
int GPUdev = XParam.GPUDEVICE;
XParam.GPUDEVICE = -1;
FlowCPU(XParam, XLoop, XForcing, XModel);
XParam.GPUDEVICE = GPUdev;
T diffdt = T(XLoop_g.dt - XLoop.dt);
if (abs(diffdt) > T(100.0) * (XLoop.epsilon))
{
printf("Timestep Difference=%f\n", diffdt);
compare = false;
}
CompareCPUvsGPU(XParam, XModel, XModel_g, outv, false);
}
//diffdh(XParam, XModel.blocks, XModel.flux.Su, diff, shuffle);
//diffSource(XParam, XModel.blocks, XModel.flux.Fqux, XModel.flux.Su, diff);
XLoop.totaltime = XLoop.totaltime + XLoop.dt;
XLoop_g.totaltime = XLoop_g.totaltime + XLoop_g.dt;
if (XLoop.nextoutputtime - XLoop.totaltime <= XLoop.dt * T(0.00001) && XParam.outputtimestep > 0.0)
{
if (XParam.GPUDEVICE >= 0)
{
for (int ivar = 0; ivar < XParam.outvars.size(); ivar++)
{
CUDA_CHECK(cudaMemcpy(XModel.OutputVarMap[XParam.outvars[ivar]], XModel_g.OutputVarMap[XParam.outvars[ivar]], XParam.nblkmem * XParam.blksize * sizeof(T), cudaMemcpyDeviceToHost));
}
}
//Save2Netcdf(XParam, XModel);
// Verify the Validity of results
double diff;
for (int iv = 0; iv < 8; iv++)
{
int ix, iy, ib, ii, jj, ibx, iby, nbx;
jj = 127;
ii = (iv + 1) * 16 - 1;
// Theoretical size is 255x255
nbx = 256 / 16;
ibx = ftoi(floor(ii / XParam.blkwidth));
iby = ftoi(floor(jj / XParam.blkwidth));
ib = (iby)*nbx + ibx;
ix = ii - ibx * XParam.blkwidth;
iy = jj - iby * XParam.blkwidth;
int n = memloc(XParam, ix, iy, ib);
diff = abs(T(XModel.evolv.zs[n]) - ZsVerifButtinger[iv]);
if (diff > 1e-6)//Tolerance is 1e-6 or 1e-7/1e-8??
{
printf("ib=%d, ix=%d, iy=%d; simulated=%f; expected=%f; diff=%e\n", ib, ix, iy, XModel.evolv.zs[n], ZsVerifButtinger[iv], diff);
modelgood = false;
creatncfileBUQ(XParam, XModel.blocks);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "zs", 3, XModel.evolv.zs, XModel.blocks.outZone[0]);
}
}
//XLoop.nextoutputtime = min(XLoop.nextoutputtime + XParam.outputtimestep, XParam.endtime);
}
}
log("#####");
return modelgood;
}
template bool GaussianHumptest<float>(float zsnit, int gpu, bool compare);
template bool GaussianHumptest<double>(double zsnit, int gpu, bool compare);
template <class T> bool Rivertest(T zsnit, int gpu)
{
log("#####");
Param XParam;
T delta = 0;
T initVol = 0;
T finalVol = 0;
T TheoryInput = 0;
// initialise domain and required resolution
XParam.dx = 1.0 / ((1 << 4));
XParam.xo = -0.5;
XParam.yo = -0.5;
XParam.xmax = 0.5;
XParam.ymax = 0.5;
//level 8 is
XParam.initlevel = 0;
XParam.minlevel = 0;
XParam.maxlevel = 0;
XParam.zsinit = zsnit;
XParam.zsoffset = 0.0;
//Output times for comparisons
XParam.endtime = 1.0;
XParam.outputtimestep = 0.1;
XParam.smallnc = 0;
XParam.cf = 0.0;
XParam.frictionmodel = 0;
// Enforece GPU/CPU
XParam.GPUDEVICE = gpu;
std::vector<std::string> outv = { "zb","h","zs","u","v","Fqux","Fqvx","Fquy","Fqvy", "Fhu", "Fhv", "dh", "dhu", "dhv", "Su", "Sv","dhdx", "dhdy", "dudx", "dvdx", "dzsdx", "twet", "hUmax", "Umean" };
XParam.outvars = outv;
XParam.outmax = true;
XParam.outmean = true;
XParam.outtwet = true;
XParam.ForceMassConserve = true;
// create Model setup
Model<T> XModel;
Model<T> XModel_g;
Forcing<float> XForcing;
StaticForcingP<float> bathy;
XForcing.Bathy.push_back(bathy);
// initialise forcing bathymetry to 0
XForcing.Bathy[0].xo = -1.0;
XForcing.Bathy[0].yo = -1.0;
XForcing.Bathy[0].xmax = 1.0;
XForcing.Bathy[0].ymax = 1.0;
XForcing.Bathy[0].nx = 3;
XForcing.Bathy[0].ny = 3;
XForcing.Bathy[0].dx = 1.0;
XForcing.Bathy[0].dy = 1.0;
AllocateCPU(1, 1, XForcing.left.blks, XForcing.right.blks, XForcing.top.blks, XForcing.bot.blks);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.Bathy[0].val);
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = 0.0f;
}
}
//
//
//
T Q = T(0.001);
TheoryInput = Q * T(XParam.outputtimestep);
//Create a temporary file with river fluxes
std::ofstream river_file(
"testriver.tmp", std::ios_base::out | std::ios_base::trunc);
river_file << "0.0 " + std::to_string(Q) << std::endl;
river_file << "3600.0 " + std::to_string(Q) << std::endl;
river_file.close(); //destructor implicitly does it
River thisriver;
thisriver.Riverflowfile = "testriver.tmp";
thisriver.xstart = -1.0 * XParam.dx * 3.0;
thisriver.xend = XParam.dx * 3.0;
thisriver.ystart = -1.0 * XParam.dx * 3.0;
thisriver.yend = XParam.dx * 3.0;
XForcing.rivers.push_back(thisriver);
XForcing.rivers[0].flowinput = readFlowfile(XForcing.rivers[0].Riverflowfile, XParam.reftime);
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
Loop<T> XLoop;
XLoop.hugenegval = std::numeric_limits<T>::min();
XLoop.hugeposval = std::numeric_limits<T>::max();
XLoop.epsilon = std::numeric_limits<T>::epsilon();
XLoop.totaltime = 0.0;
//InitSave2Netcdf(XParam, XModel);
XLoop.nextoutputtime = XParam.outputtimestep;
XLoop.dtmax = initdt(XParam, XLoop, XModel);
initVol = T(0.0);
fillHaloC(XParam, XModel.blocks, XModel.zb);
// Calculate initial water volume
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
//printf("bl=%d\tblockxo[bl]=%f\tblockyo[bl]=%f\n", bl, blockxo[bl], blockyo[bl]);
int ib = XModel.blocks.active[ibl];
delta = calcres(T(XParam.delta), XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
//
int n = memloc(XParam, ix, iy, ib);
//printf("h[%d] = %f\n", n, XModel.evolv.h[n]);
initVol = initVol + XModel.evolv.h[n] * delta * delta;
}
}
}
//InitSave2Netcdf(XParam, XModel);
bool modelgood = true;
while (XLoop.totaltime < XLoop.nextoutputtime)
{
if (XParam.GPUDEVICE >= 0)
{
FlowGPU(XParam, XLoop, XForcing, XModel_g);
}
else
{
printf("h[1] = %f\n", XModel.evolv.h[1]);
FlowCPU(XParam, XLoop, XForcing, XModel);
}
XLoop.totaltime = XLoop.totaltime + XLoop.dt;
//Save2Netcdf(XParam, XLoop, XModel);
if (XLoop.nextoutputtime - XLoop.totaltime <= XLoop.dt * T(0.00001))
{
if (XParam.GPUDEVICE >= 0)
{
for (int ivar = 0; ivar < XParam.outvars.size(); ivar++)
{
CUDA_CHECK(cudaMemcpy(XModel.OutputVarMap[XParam.outvars[ivar]], XModel_g.OutputVarMap[XParam.outvars[ivar]], XParam.nblkmem * XParam.blksize * sizeof(T), cudaMemcpyDeviceToHost));
}
}
//Save2Netcdf(XParam, XLoop, XModel);
// Verify the Validity of results
finalVol = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
//printf("bl=%d\tblockxo[bl]=%f\tblockyo[bl]=%f\n", bl, blockxo[bl], blockyo[bl]);
int ib = XModel.blocks.active[ibl];
delta = calcres(T(XParam.delta), XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
//
int n = memloc(XParam, ix, iy, ib);
//printf("h[%d] = %f\n", n, XModel.evolv.h[n]);
finalVol = finalVol + XModel.evolv.h[n] * delta * delta;
}
}
}
T error = ((finalVol - initVol) - TheoryInput) / TheoryInput;
printf("error = %g %%, initial volume=%4.4f; final Volume=%4.4f; abs. difference=%4.4f, Theoretical input=%4.4f \n", error, initVol, finalVol, abs(finalVol - initVol), TheoryInput);
modelgood = abs(error) < 0.05;
}
}
if (!modelgood)
{
InitSave2Netcdf(XParam, XModel);
}
log("#####");
return modelgood;
}
template bool Rivertest<float>(float zsnit, int gpu);
template bool Rivertest<double>(double zsnit, int gpu);
template <class T> bool MassConserveSteepSlope(T zsnit, int gpu)
{
log("#####");
Param XParam;
T delta, initVol, finalVol, TheoryInput;
// initialise domain and required resolution
XParam.dx = 1.0 / ((1 << 8));
XParam.xo = -0.5;
XParam.yo = -0.5;
XParam.xmax = 0.5;
XParam.ymax = 0.5;
//level 8 is
XParam.initlevel = 0;
XParam.minlevel = -1;
XParam.maxlevel = 1;
XParam.AdaptCrit = "Threshold";
XParam.Adapt_arg1 = "3.5";
XParam.Adapt_arg2 = "zb";
XParam.zsinit = zsnit;
XParam.zsoffset = 0.0;
//Output times for comparisons
XParam.endtime = 1.0;
XParam.outputtimestep = 0.04;//0.035;
XParam.smallnc = 0;
XParam.cf = 0.001;
XParam.frictionmodel = 1;
XParam.conserveElevation = false;
XParam.ForceMassConserve = true;
// Enforce GPU/CPU
XParam.GPUDEVICE = gpu;
std::vector<std::string> outv = { "zb","h","zs","u","v","Fqux","Fqvx","Fquy","Fqvy", "Fhu", "Fhv", "dh", "dhu", "dhv", "Su", "Sv","dhdx", "dhdy" };
XParam.outvars = outv;
// create Model setup
Model<T> XModel;
Model<T> XModel_g;
Forcing<float> XForcing;
StaticForcingP<float> bathy;
XForcing.Bathy.push_back(bathy);
// initialise forcing bathymetry to 0
XForcing.Bathy[0].xo = -1.0;
XForcing.Bathy[0].yo = -1.0;
XForcing.Bathy[0].xmax = 1.0;
XForcing.Bathy[0].ymax = 1.0;
XForcing.Bathy[0].nx = 3;
XForcing.Bathy[0].ny = 3;
XForcing.Bathy[0].dx = 1.0;
XForcing.Bathy[0].dy = XForcing.Bathy[0].dx;
AllocateCPU(1, 1, XForcing.left.blks, XForcing.right.blks, XForcing.top.blks, XForcing.bot.blks);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.Bathy[0].val);
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = T(i * 4);
}
}
//
//
//
T Q = T(0.10);
TheoryInput = Q * T(XParam.outputtimestep);
//Create a temporary file with river fluxes
std::ofstream river_file(
"testriver.tmp", std::ios_base::out | std::ios_base::trunc);
river_file << "0.0 " + std::to_string(Q) << std::endl;
river_file << "3600.0 " + std::to_string(Q) << std::endl;
river_file.close(); //destructor implicitly does it
River thisriver;
thisriver.Riverflowfile = "testriver.tmp";
thisriver.xstart = -1.0 * XParam.dx * 3.0;
thisriver.xend = XParam.dx * 3.0;
thisriver.ystart = -1.0 * XParam.dx * 3.0;
thisriver.yend = XParam.dx * 3.0;
XForcing.rivers.push_back(thisriver);
XForcing.rivers[0].flowinput = readFlowfile(XForcing.rivers[0].Riverflowfile, XParam.reftime);
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
Loop<T> XLoop;
XLoop.hugenegval = std::numeric_limits<T>::min();
XLoop.hugeposval = std::numeric_limits<T>::max();
XLoop.epsilon = std::numeric_limits<T>::epsilon();
XLoop.totaltime = 0.0;
InitSave2Netcdf(XParam, XModel);
XLoop.nextoutputtime = XParam.outputtimestep;
XLoop.dtmax = 0.025;// initdt(XParam, XLoop, XModel);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
//printf("bl=%d\tblockxo[bl]=%f\tblockyo[bl]=%f\n", bl, blockxo[bl], blockyo[bl]);
int ib = XModel.blocks.active[ibl];
//delta = calcres(XParam.dx, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
//
int n = memloc(XParam, ix, iy, ib);
if (XModel.zb[n] < XParam.eps)
{
printf("ix=%d, iy=%d, ib=%d, n=%d; zb=%f \n", ix, iy, ib, n, XModel.zb[n]);
}
}
}
}
if (XParam.GPUDEVICE >= 0)
{
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
fillHaloGPU(XParam, XModel_g.blocks, stream, XModel_g.zb);
cudaStreamDestroy(stream);
}
else
{
fillHaloC(XParam, XModel.blocks, XModel.zb);
}
initVol = T(0.0);
// Calculate initial water volume
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
//printf("bl=%d\tblockxo[bl]=%f\tblockyo[bl]=%f\n", bl, blockxo[bl], blockyo[bl]);
int ib = XModel.blocks.active[ibl];
delta = calcres(T(XParam.delta), XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
//
int n = memloc(XParam, ix, iy, ib);
initVol = initVol + XModel.evolv.h[n] * delta * delta;
}
}
}
//InitSave2Netcdf(XParam, XModel);+
bool modelgood = true;
while (XLoop.totaltime < XLoop.nextoutputtime)
{
if (XParam.GPUDEVICE >= 0)
{
FlowGPU(XParam, XLoop, XForcing, XModel_g);
}
else
{
FlowCPU(XParam, XLoop, XForcing, XModel);
}
XLoop.totaltime = XLoop.totaltime + XLoop.dt;
//Save2Netcdf(XParam, XLoop, XModel);
if (XLoop.nextoutputtime - XLoop.totaltime <= XLoop.dt * T(0.00001) && XParam.outputtimestep > 0.0)
{
if (XParam.GPUDEVICE >= 0)
{
for (int ivar = 0; ivar < XParam.outvars.size(); ivar++)
{
CUDA_CHECK(cudaMemcpy(XModel.OutputVarMap[XParam.outvars[ivar]], XModel_g.OutputVarMap[XParam.outvars[ivar]], XParam.nblkmem * XParam.blksize * sizeof(T), cudaMemcpyDeviceToHost));
}
}
Save2Netcdf(XParam, XLoop, XModel);
// Verify the Validity of results
finalVol = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
//printf("bl=%d\tblockxo[bl]=%f\tblockyo[bl]=%f\n", bl, blockxo[bl], blockyo[bl]);
int ib = XModel.blocks.active[ibl];
delta = calcres(T(XParam.delta), XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
//
int n = memloc(XParam, ix, iy, ib);
finalVol = finalVol + XModel.evolv.h[n] * delta * delta;
}
}
}
T error = (finalVol - initVol) - TheoryInput;
modelgood = error / TheoryInput < 0.05;
}
}
log("#####");
return modelgood;
}
template bool MassConserveSteepSlope<float>(float zsnit, int gpu);
template bool MassConserveSteepSlope<double>(double zsnit, int gpu);
template <class T> bool reductiontest(Param XParam, Model<T> XModel, Model<T> XModel_g)
{
dim3 blockDim(XParam.blkwidth, XParam.blkwidth, 1);
dim3 gridDim(XParam.nblk, 1, 1);
//srand(seed);
T mininput = T(rand()) / T(RAND_MAX);
bool test = true;
Loop<T> XLoop;
XLoop.hugenegval = std::numeric_limits<T>::min();
XLoop.hugeposval = std::numeric_limits<T>::max();
XLoop.epsilon = std::numeric_limits<T>::epsilon();
XLoop.totaltime = 0.0;
//InitSave2Netcdf(XParam, XModel);
XLoop.nextoutputtime = mininput * T(2.0);
XLoop.dtmax = mininput * T(2.01);
// Fill in dtmax with random values that are larger than mininput
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
//
int n = memloc(XParam, ix, iy, ib);
XModel.time.dtmax[n] = mininput * T(1.1) + utils::max(T(rand()) / T(RAND_MAX), T(0.0));
}
}
}
// randomly select a block a i and a j were the maximum value will be relocated
int ibbl = ftoi(floor(T(rand()) / T(RAND_MAX) * XParam.nblk));
int ibb = XModel.blocks.active[ibbl];
int ixx = ftoi(floor(T(rand()) / T(RAND_MAX) * XParam.blkwidth));
int iyy = ftoi(floor(T(rand()) / T(RAND_MAX) * XParam.blkwidth));
int nn = memloc(XParam, ixx, iyy, ibb);
XModel.time.dtmax[nn] = mininput;
T reducedt = CalctimestepCPU(XParam, XLoop, XModel.blocks, XModel.time);
test = abs(reducedt - mininput) < T(100.0) * (XLoop.epsilon);
bool testgpu;
if (!test)
{
char buffer[256]; sprintf(buffer, "%e", abs(reducedt - mininput));
std::string str(buffer);
log("\t\t CPU test failed! : Expected=" + std::to_string(mininput) + "; Reduced=" + std::to_string(reducedt) + "; error=" + str);
}
if (XParam.GPUDEVICE >= 0)
{
reset_var <<< gridDim, blockDim, 0 >>> (XParam.halowidth, XModel_g.blocks.active, XLoop.hugeposval, XModel_g.time.dtmax);
CUDA_CHECK(cudaDeviceSynchronize());
CopytoGPU(XParam.nblkmem, XParam.blksize, XModel.time.dtmax, XModel_g.time.dtmax);
T reducedtgpu = CalctimestepGPU(XParam, XLoop, XModel_g.blocks, XModel_g.time);
testgpu = abs(reducedtgpu - mininput) < T(100.0) * (XLoop.epsilon);
if (!testgpu)
{
char buffer[256]; sprintf(buffer, "%e", abs(reducedtgpu - mininput));
std::string str(buffer);
log("\t\t GPU test failed! : Expected=" + std::to_string(mininput) + "; Reduced=" + std::to_string(reducedtgpu) + "; error=" + str);
}
if (abs(reducedtgpu - reducedt) > T(100.0) * (XLoop.epsilon))
{
char buffer[256]; sprintf(buffer, "%e", abs(reducedtgpu - reducedt));
std::string str(buffer);
log("\t\t CPU vs GPU test failed! : Expected=" + std::to_string(reducedt) + "; Reduced=" + std::to_string(reducedtgpu) + "; error=" + str);
}
test = test && testgpu;
}
return test;
}
template bool reductiontest<float>(Param XParam, Model<float> XModel, Model<float> XModel_g);
template bool reductiontest<double>(Param XParam, Model<double> XModel, Model<double> XModel_g);
template<class T> bool CPUGPUtest(Param XParam, Model<T> XModel, Model<T> XModel_g)
{
bool test = true;
T initdepth = T(0.1);
T testamp = T(1.0);
dim3 blockDim(XParam.blkwidth, XParam.blkwidth, 1);
dim3 gridDim(XParam.nblk, 1, 1);
// for flux reconstruction the loop overlap the right(or top for the y direction) halo
dim3 blockDimKX(XParam.blkwidth + XParam.halowidth, XParam.blkwidth, 1);
dim3 blockDimKY(XParam.blkwidth, XParam.blkwidth + XParam.halowidth, 1);
InitArrayBUQ(XParam, XModel.blocks, T(0.0), XModel.zb);
InitArrayBUQ(XParam, XModel.blocks, T(initdepth), XModel.evolv.zs);
InitArrayBUQ(XParam, XModel.blocks, T(initdepth), XModel.evolv.h);
InitArrayBUQ(XParam, XModel.blocks, T(0.0), XModel.evolv.u);
InitArrayBUQ(XParam, XModel.blocks, T(0.0), XModel.evolv.v);
reset_var <<< gridDim, blockDim, 0 >>> (XParam.halowidth, XModel_g.blocks.active, T(0.0), XModel_g.zb);
CUDA_CHECK(cudaDeviceSynchronize());
// Create some usefull vectors
std::string evolvst[4] = { "h","zs","u","v" };
std::vector<std::string> evolvVar;
for (int nv = 0; nv < 4; nv++)
{
evolvVar.push_back(evolvst[nv]);
}
// Check fillhalo function
// fill with all evolv array with random value
/*
fillrandom(XParam, XModel.blocks, XModel.evolv.zs);
fillrandom(XParam, XModel.blocks, XModel.evolv.h);
fillrandom(XParam, XModel.blocks, XModel.evolv.u);
fillrandom(XParam, XModel.blocks, XModel.evolv.v);
*/
fillgauss(XParam, XModel.blocks, testamp, XModel.evolv.zs);
fillgauss(XParam, XModel.blocks, testamp, XModel.evolv.h);
fillgauss(XParam, XModel.blocks, T(0.5 * testamp), XModel.evolv.u);
fillgauss(XParam, XModel.blocks, T(0.5 * testamp), XModel.evolv.v);
//copy to GPU
CopytoGPU(XParam.nblkmem, XParam.blksize, XModel.evolv, XModel_g.evolv);
//============================================
// Fill the halo for gradient reconstruction
fillHalo(XParam, XModel.blocks, XModel.evolv, XModel.zb);
fillHaloGPU(XParam, XModel_g.blocks, XModel_g.evolv, XModel_g.zb);
CompareCPUvsGPU(XParam, XModel, XModel_g, evolvVar, true);
//============================================
//perform gradient reconstruction
//gradientCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.zb);
//gradientGPU(XParam, XModel_g.blocks, XModel_g.evolv, XModel_g.grad, XModel.zb);
// CPU gradients
std::thread t0(&gradientC<T>, XParam, XModel.blocks, XModel.evolv.h, XModel.grad.dhdx, XModel.grad.dhdy);
std::thread t1(&gradientC<T>, XParam, XModel.blocks, XModel.evolv.zs, XModel.grad.dzsdx, XModel.grad.dzsdy);
std::thread t2(&gradientC<T>, XParam, XModel.blocks, XModel.evolv.u, XModel.grad.dudx, XModel.grad.dudy);
std::thread t3(&gradientC<T>, XParam, XModel.blocks, XModel.evolv.v, XModel.grad.dvdx, XModel.grad.dvdy);
t0.join();
t1.join();
t2.join();
t3.join();
//GPU gradients
gradient <<< gridDim, blockDim, 0 >>> (XParam.halowidth, XModel_g.blocks.active, XModel_g.blocks.level, (T)XParam.theta, (T)XParam.delta, XModel_g.evolv.h, XModel_g.grad.dhdx, XModel_g.grad.dhdy);
CUDA_CHECK(cudaDeviceSynchronize());
gradient <<< gridDim, blockDim, 0 >>> (XParam.halowidth, XModel_g.blocks.active, XModel_g.blocks.level, (T)XParam.theta, (T)XParam.delta, XModel_g.evolv.zs, XModel_g.grad.dzsdx, XModel_g.grad.dzsdy);
CUDA_CHECK(cudaDeviceSynchronize());
gradient <<< gridDim, blockDim, 0 >>> (XParam.halowidth, XModel_g.blocks.active, XModel_g.blocks.level, (T)XParam.theta, (T)XParam.delta, XModel_g.evolv.u, XModel_g.grad.dudx, XModel_g.grad.dudy);
CUDA_CHECK(cudaDeviceSynchronize());
gradient <<< gridDim, blockDim, 0 >>> (XParam.halowidth, XModel_g.blocks.active, XModel_g.blocks.level, (T)XParam.theta, (T)XParam.delta, XModel_g.evolv.v, XModel_g.grad.dvdx, XModel_g.grad.dvdy);
CUDA_CHECK(cudaDeviceSynchronize());
std::string gradst[8] = { "dhdx","dzsdx","dudx","dvdx","dhdy","dzsdy","dudy","dvdy" };
std::vector<std::string> gradVar;
for (int nv = 0; nv < 8; nv++)
{
gradVar.push_back(gradst[nv]);
}
CompareCPUvsGPU(XParam, XModel, XModel_g, gradVar, false);
// Gradient in Halo
// CPU
gradientHalo(XParam, XModel.blocks, XModel.evolv.h, XModel.grad.dhdx, XModel.grad.dhdy);
gradientHalo(XParam, XModel.blocks, XModel.evolv.zs, XModel.grad.dzsdx, XModel.grad.dzsdy);
gradientHalo(XParam, XModel.blocks, XModel.evolv.u, XModel.grad.dudx, XModel.grad.dudy);
gradientHalo(XParam, XModel.blocks, XModel.evolv.v, XModel.grad.dvdx, XModel.grad.dvdy);
// GPU
gradientHaloGPU(XParam, XModel_g.blocks, XModel_g.evolv.h, XModel_g.grad.dhdx, XModel_g.grad.dhdy);
gradientHaloGPU(XParam, XModel_g.blocks, XModel_g.evolv.zs, XModel_g.grad.dzsdx, XModel_g.grad.dzsdy);
gradientHaloGPU(XParam, XModel_g.blocks, XModel_g.evolv.u, XModel_g.grad.dudx, XModel_g.grad.dudy);
gradientHaloGPU(XParam, XModel_g.blocks, XModel_g.evolv.v, XModel_g.grad.dvdx, XModel_g.grad.dvdy);
CompareCPUvsGPU(XParam, XModel, XModel_g, gradVar, true);
//============================================
// Kurganov scheme
std::string fluxst[8] = { "Fhu","Su","Fqux","Fqvx","Fhv","Sv","Fqvy","Fquy" };
std::vector<std::string> fluxVar;
for (int nv = 0; nv < 8; nv++)
{
fluxVar.push_back(fluxst[nv]);
}
updateKurgXCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.flux, XModel.time.dtmax, XModel.zb);
//GPU part
updateKurgXGPU <<< gridDim, blockDimKX, 0 >>> (XParam, XModel_g.blocks, XModel_g.evolv, XModel_g.grad, XModel_g.flux, XModel_g.time.dtmax, XModel_g.zb);
CUDA_CHECK(cudaDeviceSynchronize());
// Y- direction
updateKurgYCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.flux, XModel.time.dtmax, XModel.zb);
updateKurgYGPU <<< gridDim, blockDimKY, 0 >>> (XParam, XModel_g.blocks, XModel_g.evolv, XModel_g.grad, XModel_g.flux, XModel_g.time.dtmax, XModel_g.zb);
CUDA_CHECK(cudaDeviceSynchronize());
CompareCPUvsGPU(XParam, XModel, XModel_g, fluxVar, false);
fillHalo(XParam, XModel.blocks, XModel.flux);
fillHaloGPU(XParam, XModel_g.blocks, XModel_g.flux);
CompareCPUvsGPU(XParam, XModel, XModel_g, fluxVar, true);
//============================================
// Update step
std::string advst[3] = { "dh","dhu","dhv" };
std::vector<std::string> advVar;
for (int nv = 0; nv < 3; nv++)
{
advVar.push_back(advst[nv]);
}
updateEVCPU(XParam, XModel.blocks, XModel.evolv, XModel.flux, XModel.adv);
updateEVGPU <<< gridDim, blockDim, 0 >>> (XParam, XModel_g.blocks, XModel_g.evolv, XModel_g.flux, XModel_g.adv);
CUDA_CHECK(cudaDeviceSynchronize());
CompareCPUvsGPU(XParam, XModel, XModel_g, advVar, false);
//============================================
// Advance step
std::string evost[4] = { "zso","ho","uo","vo" };
std::vector<std::string> evoVar;
for (int nv = 0; nv < 4; nv++)
{
evoVar.push_back(evost[nv]);
}
AdvkernelCPU(XParam, XModel.blocks, T(0.0005), XModel.zb, XModel.evolv, XModel.adv, XModel.evolv_o);
AdvkernelGPU <<< gridDim, blockDim, 0 >>> (XParam, XModel_g.blocks, T(0.0005), XModel_g.zb, XModel_g.evolv, XModel_g.adv, XModel_g.evolv_o);
CUDA_CHECK(cudaDeviceSynchronize());
CompareCPUvsGPU(XParam, XModel, XModel_g, evoVar, false);
//============================================
// Bottom friction
bottomfrictionCPU(XParam, XModel.blocks, T(0.5), XModel.cf, XModel.evolv_o);
bottomfrictionGPU <<< gridDim, blockDim, 0 >>> (XParam, XModel_g.blocks, T(0.5), XModel_g.cf, XModel_g.evolv_o);
CUDA_CHECK(cudaDeviceSynchronize());
CompareCPUvsGPU(XParam, XModel, XModel_g, evoVar, false);
//============================================
// Repeat the full test
Loop<T> XLoop;
Loop<T> XLoop_g;
XParam.endtime = utils::min(0.5 * (XParam.ymax - XParam.yo), 0.5 * (XParam.xmax - XParam.xo)) / (sqrt(XParam.g * (testamp + initdepth)));
XParam.outputtimestep = XParam.endtime / 10.0;
XLoop.hugenegval = std::numeric_limits<T>::min();
XLoop.hugeposval = std::numeric_limits<T>::max();
XLoop.epsilon = std::numeric_limits<T>::epsilon();
XLoop.totaltime = 0.0;
//InitSave2Netcdf(XParam, XModel);
XLoop.nextoutputtime = XParam.outputtimestep;
XLoop.dtmax = initdt(XParam, XLoop, XModel);
XLoop_g.hugenegval = std::numeric_limits<T>::min();
XLoop_g.hugeposval = std::numeric_limits<T>::max();
XLoop_g.epsilon = std::numeric_limits<T>::epsilon();
XLoop_g.totaltime = 0.0;
//InitSave2Netcdf(XParam, XModel);
XLoop_g.nextoutputtime = XLoop.nextoutputtime;
XLoop_g.dtmax = XLoop.dtmax;
std::string outvi[18] = { "zb","h","zs","u","v","Fqux","Fqvx","Fquy","Fqvy", "Fhu", "Fhv", "dh", "dhu", "dhv", "ho", "vo", "uo", "cf" };
std::vector<std::string> outv;
for (int nv = 0; nv < 18; nv++)
{
outv.push_back(outvi[nv]);
}
InitArrayBUQ(XParam, XModel.blocks, T(0.0), XModel.evolv.u);
InitArrayBUQ(XParam, XModel.blocks, T(0.0), XModel.evolv.v);
reset_var <<< gridDim, blockDim, 0 >>> (XParam.halowidth, XModel_g.blocks.active, T(0.0), XModel_g.evolv.u);
CUDA_CHECK(cudaDeviceSynchronize());
reset_var <<< gridDim, blockDim, 0 >>> (XParam.halowidth, XModel_g.blocks.active, T(0.0), XModel_g.evolv.v);
CUDA_CHECK(cudaDeviceSynchronize());
Forcing<float> XForcing;
while (XLoop.totaltime < XParam.endtime)
{
FlowGPU(XParam, XLoop_g, XForcing, XModel_g);
FlowCPU(XParam, XLoop, XForcing, XModel);
XLoop.totaltime = XLoop.totaltime + XLoop.dt;
XLoop_g.totaltime = XLoop_g.totaltime + XLoop_g.dt;
if (XLoop.nextoutputtime - XLoop.totaltime <= XLoop.dt * T(0.00001) && XParam.outputtimestep > 0.0)
{
CompareCPUvsGPU(XParam, XModel, XModel_g, outv, false);
XLoop.nextoutputtime = min(XLoop.nextoutputtime + XParam.outputtimestep, XParam.endtime);
XLoop_g.nextoutputtime = XLoop.nextoutputtime;
}
}
return test;
}
template <class T> T ValleyBathy(T x, T y, T slope, T center)
{
T bathy;
bathy = (abs(x - center) + y) * slope;
return bathy;
}
template <class T> T ThackerBathy(T x, T y, T L, T D)
{
T bathy = D * ((x * x + y * y) / (L * L) - 1.0);
return bathy;
}
template <class T> bool ThackerLakeAtRest(Param XParam, T zsinit)
{
bool test = true;
// Make a Parabolic bathy
auto modeltype = XParam.doubleprecision < 1 ? float() : double();
Model<decltype(modeltype)> XModel; // For CPU pointers
Model<decltype(modeltype)> XModel_g; // For GPU pointers
Forcing<float> XForcing;
StaticForcingP<float> bathy;
XForcing.Bathy.push_back(bathy);
// initialise forcing bathymetry to 0
T Lo = T(2500.0);
T Do = T(1.0);
T x, y;
XForcing.Bathy[0].xo = -4000.0;
XForcing.Bathy[0].yo = -4000.0;
XForcing.Bathy[0].xmax = 4000.0;
XForcing.Bathy[0].ymax = 4000.0;
XForcing.Bathy[0].nx = 64;
XForcing.Bathy[0].ny = 64;
XForcing.Bathy[0].dx = 126.0;
XForcing.Bathy[0].dy = XForcing.Bathy[0].dx;
AllocateCPU(1, 1, XForcing.left.blks, XForcing.right.blks, XForcing.top.blks, XForcing.bot.blks);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.Bathy[0].val);
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
x = T(XForcing.Bathy[0].xo + i * XForcing.Bathy[0].dx);
y = T(XForcing.Bathy[0].yo + j * XForcing.Bathy[0].dx);
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = float(ThackerBathy(x, y, Lo, Do));
}
}
// Overrule whatever may be set in the param file
XParam.xmax = XForcing.Bathy[0].xmax;
XParam.ymax = XForcing.Bathy[0].ymax;
XParam.xo = XForcing.Bathy[0].xo;
XParam.yo = XForcing.Bathy[0].yo;
XParam.dx = XForcing.Bathy[0].dx;
XParam.zsinit = zsinit;
XParam.endtime = 1390.0;
XParam.outputtimestep = XParam.endtime;
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
MainLoop(XParam, XForcing, XModel, XModel_g);
// Check Lake at rest state?
// all velocities should be very small
T smallvel = T(1e-6);
int i;
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
i = memloc(XParam, ix, iy, ib);
if (abs(XModel.evolv.u[i]) > smallvel || abs(XModel.evolv.v[i]) > smallvel)
{
log("Lake at rest state not acheived!");
test = false;
}
}
}
}
return test;
}
template bool ThackerLakeAtRest<float>(Param XParam, float zsinit);
template bool ThackerLakeAtRest<double>(Param XParam, double zsinit);
template <class T> bool RiverVolumeAdapt(Param XParam, T maxslope)
{
//T maxslope = 0.45; // tthe mass conservation is better with smaller slopes
bool UnitestA, UnitestB, UnitestC, UnitestD;
bool ctofA, ctofB, ctofC, ctofD;
bool ftocA, ftocB, ftocC, ftocD;
std::string details;
XParam.minlevel = 1;
XParam.maxlevel = 1;
XParam.initlevel = 1;
XParam.ForceMassConserve = true;
UnitestA=RiverVolumeAdapt(XParam, maxslope, false, false);
UnitestB=RiverVolumeAdapt(XParam, maxslope, true, false);
UnitestC=RiverVolumeAdapt(XParam, maxslope, false, true);
UnitestD=RiverVolumeAdapt(XParam, maxslope, true, true);
if (UnitestA && UnitestB && UnitestC && UnitestD)
{
log("River Volume Conservation Test: Uniform mesh: Success");
}
else
{
log("River Volume Conservation Test: Uniform mesh: Failed");
details = UnitestA ? "successful" : "failed";
log("\t Uniform mesh A :" + details);
details = UnitestB ? "successful" : "failed";
log("\t Uniform mesh B :" + details);
details = UnitestC ? "successful" : "failed";
log("\t Uniform mesh C :" + details);
details = UnitestD ? "successful" : "failed";
log("\t Uniform mesh D :" + details);
}
XParam.minlevel = 0;
XParam.maxlevel = 1;
XParam.initlevel = 0;
//Fine to coarse
// Change arg 1 and 2 if the slope is changed
XParam.AdaptCrit = "Inrange";
XParam.Adapt_arg1 = "28.0";
XParam.Adapt_arg2 = "40.0";
XParam.Adapt_arg3 = "zb";
ftocA = RiverVolumeAdapt(XParam, maxslope, false, false);
ftocB = RiverVolumeAdapt(XParam, maxslope, true, false);
ftocC = RiverVolumeAdapt(XParam, maxslope, false, true);
ftocD = RiverVolumeAdapt(XParam, maxslope, true, true);
if (ftocA && ftocB && ftocC && ftocD)
{
log("River Volume Conservation Test: Flow from fine to coarse adapted mesh: Success");
}
else
{
log("River Volume Conservation Test: Flow from fine to coarse adapted mesh: Failed");
details = ftocA ? "successful" : "failed";
log("\t Flow from fine to coarse adapted mesh A :" + details);
details = ftocB ? "successful" : "failed";
log("\t Flow from fine to coarse adapted mesh B :" + details);
details = ftocC ? "successful" : "failed";
log("\t Flow from fine to coarse adapted mesh C :" + details);
details = ftocD ? "successful" : "failed";
log("\t Flow from fine to coarse adapted mesh D :" + details);
}
//coarse to fine
// Change arg 1 and 2 if the slope is changed
XParam.AdaptCrit = "Inrange";
XParam.Adapt_arg1 = "0.0";
XParam.Adapt_arg2 = "2.0";
XParam.Adapt_arg3 = "zb";
ctofA = RiverVolumeAdapt(XParam, maxslope, false, false);
ctofB = RiverVolumeAdapt(XParam, maxslope, true, false);
ctofC = RiverVolumeAdapt(XParam, maxslope, false, true);
ctofD = RiverVolumeAdapt(XParam, maxslope, true, true);
if (ctofA && ctofB && ctofC && ctofD)
{
log("River Volume Conservation Test: Flow from coarse to fine adapted mesh: Success");
}
else
{
log("River Volume Conservation Test: Flow from coarse to fine adapted: Failed");
details = ctofA ? "successful" : "failed";
log("\t Flow from coarse to fine adapted mesh A :" + details);
details = ctofB ? "successful" : "failed";
log("\t Flow from coarse to fine adapted mesh B :" + details);
details = ctofC ? "successful" : "failed";
log("\t Flow from coarse to fine adapted mesh C :" + details);
details = ctofD ? "successful" : "failed";
log("\t Flow from coarse to fine adapted mesh D :" + details);
}
return (UnitestA * UnitestB * UnitestC * UnitestD * ctofA * ctofB * ctofC * ctofD * ftocA * ftocB * ftocC * ftocD);
}
template <class T> bool RiverVolumeAdapt(Param XParam, T slope, bool bottop, bool flip)
{
//bool test = true;
//
auto modeltype = XParam.doubleprecision < 1 ? float() : double();
Model<decltype(modeltype)> XModel; // For CPU pointers
Model<decltype(modeltype)> XModel_g; // For GPU pointers
Forcing<float> XForcing;
XForcing = MakValleyBathy(XParam, slope, bottop, flip);
T x, y;
T center = T(10.5);
float maxtopo = std::numeric_limits<float>::min();
float mintopo = std::numeric_limits<float>::max();
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
maxtopo = max(XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx], maxtopo);
mintopo = min(XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx], mintopo);
}
}
// Overrule whatever is set in the river forcing
T Q = T(1.0);
double upstream = !flip ? 24.0 : 8;
double riverx = !bottop ? upstream : center;
double rivery = !bottop ? center : upstream;
//Create a temporary file with river fluxes
std::ofstream river_file(
"testriver.tmp", std::ios_base::out | std::ios_base::trunc);
river_file << "0.0 " + std::to_string(Q) << std::endl;
river_file << "3600.0 " + std::to_string(Q) << std::endl;
river_file.close(); //destructor implicitly does it
River thisriver;
thisriver.Riverflowfile = "testriver.tmp";
thisriver.xstart = riverx - 1.0;
thisriver.xend = riverx + 1.0;
thisriver.ystart = rivery - 1.0;
thisriver.yend = rivery + 1.0;
XForcing.rivers.push_back(thisriver);
XForcing.rivers[0].flowinput = readFlowfile(XForcing.rivers[0].Riverflowfile, XParam.reftime);
// Overrule whatever may be set in the param file
XParam.xmax = XForcing.Bathy[0].xmax;
XParam.ymax = XForcing.Bathy[0].ymax;
XParam.xo = XForcing.Bathy[0].xo;
XParam.yo = XForcing.Bathy[0].yo;
XParam.dx = XForcing.Bathy[0].dx;
XParam.zsinit = mintopo + 0.5;// Had a small amount of water to avoid a huge first step that would surely break the setup
XParam.endtime = 20.0;
XParam.outputtimestep = XParam.endtime;
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
T initVol = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
T delta = calcres(XParam.delta, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
int i = memloc(XParam, ix, iy, ib);
initVol = initVol + T(XModel.evolv.h[i]) * delta * delta;
}
}
}
MainLoop(XParam, XForcing, XModel, XModel_g);
T TheoryInput = Q * XParam.endtime;
T SimulatedVolume = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
T delta = calcres(XParam.delta, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
int i = memloc(XParam, ix, iy, ib);
SimulatedVolume = SimulatedVolume + XModel.evolv.h[i] * delta * delta;
}
}
}
SimulatedVolume = SimulatedVolume - initVol;
T error = abs(SimulatedVolume - TheoryInput);
return error / TheoryInput < 0.05;
}
template <class T> bool testboundaries(Param XParam, T maxslope)
{
//T maxslope = 0.45; // the mass conservation is better with smaller slopes
bool Wall_B;// , Wall_R, Wall_L, Wall_T;
//bool ctofA, ctofB, ctofC, ctofD;
//bool ftocA, ftocB, ftocC, ftocD;
std::string details;
int Bound_type;
XParam.GPUDEVICE = 0;
maxslope = 0.0;
//Dir = 3;
Bound_type = -1;
Wall_B = RiverOnBoundary(XParam, maxslope, 3, Bound_type);
//Wall_R = RiverOnBoundary(XParam, maxslope, 0, 0);
//Wall_L = RiverOnBoundary(XParam, maxslope, 1, 0);
//Wall_T = RiverOnBoundary(XParam, maxslope, 2, 0);
/*
if (UnitestA && UnitestB && UnitestC && UnitestD)
{
log("River Volume Conservation Test: Uniform mesh: Success");
}
else
{
log("River Volume Conservation Test: Uniform mesh: Failed");
details = UnitestA ? "successful" : "failed";
log("\t Uniform mesh A :" + details);
details = UnitestB ? "successful" : "failed";
log("\t Uniform mesh B :" + details);
details = UnitestC ? "successful" : "failed";
log("\t Uniform mesh C :" + details);
details = UnitestD ? "successful" : "failed";
log("\t Uniform mesh D :" + details);
}
XParam.minlevel = 0;
XParam.maxlevel = 1;
XParam.initlevel = 0;
//Fine to coarse
// Change arg 1 and 2 if the slope is changed
XParam.AdaptCrit = "Inrange";
XParam.Adapt_arg1 = "28.0";
XParam.Adapt_arg2 = "40.0";
XParam.Adapt_arg3 = "zb";
ftocA = RiverVolumeAdapt(XParam, maxslope, false, false);
ftocB = RiverVolumeAdapt(XParam, maxslope, true, false);
ftocC = RiverVolumeAdapt(XParam, maxslope, false, true);
ftocD = RiverVolumeAdapt(XParam, maxslope, true, true);
if (ftocA && ftocB && ftocC && ftocD)
{
log("River Volume Conservation Test: Flow from fine to coarse adapted mesh: Success");
}
else
{
log("River Volume Conservation Test: Flow from fine to coarse adapted mesh: Failed");
details = ftocA ? "successful" : "failed";
log("\t Flow from fine to coarse adapted mesh A :" + details);
details = ftocB ? "successful" : "failed";
log("\t Flow from fine to coarse adapted mesh B :" + details);
details = ftocC ? "successful" : "failed";
log("\t Flow from fine to coarse adapted mesh C :" + details);
details = ftocD ? "successful" : "failed";
log("\t Flow from fine to coarse adapted mesh D :" + details);
}
//coarse to fine
// Change arg 1 and 2 if the slope is changed
XParam.AdaptCrit = "Inrange";
XParam.Adapt_arg1 = "0.0";
XParam.Adapt_arg2 = "2.0";
XParam.Adapt_arg3 = "zb";
ctofA = RiverVolumeAdapt(XParam, maxslope, false, false);
ctofB = RiverVolumeAdapt(XParam, maxslope, true, false);
ctofC = RiverVolumeAdapt(XParam, maxslope, false, true);
ctofD = RiverVolumeAdapt(XParam, maxslope, true, true);
if (ctofA && ctofB && ctofC && ctofD)
{
log("River Volume Conservation Test: Flow from coarse to fine adapted mesh: Success");
}
else
{
log("River Volume Conservation Test: Flow from coarse to fine adapted: Failed");
details = ctofA ? "successful" : "failed";
log("\t Flow from coarse to fine adapted mesh A :" + details);
details = ctofB ? "successful" : "failed";
log("\t Flow from coarse to fine adapted mesh B :" + details);
details = ctofC ? "successful" : "failed";
log("\t Flow from coarse to fine adapted mesh C :" + details);
details = ctofD ? "successful" : "failed";
log("\t Flow from coarse to fine adapted mesh D :" + details);
}*/
//return (UnitestA * UnitestB * UnitestC * UnitestD * ctofA * ctofB * ctofC * ctofD * ftocA * ftocB * ftocC * ftocD);
return(Wall_B);
}
template <class T> bool RiverOnBoundary(Param XParam, T slope, int Dir, int Bound_type)
{
//bool test = true;
// Make a Parabolic bathy
//Param XParam;
XParam.GPUDEVICE = -1;
auto modeltype = XParam.doubleprecision < 1 ? float() : double();
Model<decltype(modeltype)> XModel; // For CPU pointers
Model<decltype(modeltype)> XModel_g; // For GPU pointers
Forcing<float> XForcing;
StaticForcingP<float> bathy;
float* dummybathy;
//Boundary conditions
XForcing.top.type = 0;
XForcing.bot.type = 0;
XForcing.right.type = 0;
XForcing.left.type = 0;
//Physical wall boundary condition
bool PhysWall = 0;
if (Bound_type == -1)
{
PhysWall = 1;
Bound_type = 0;
}
if (Dir == 0) //To right
{
XForcing.right.type = Bound_type;
XForcing.top.type = 0;
}
else if (Dir == 1) //To left
{
XForcing.left.type = Bound_type;
XForcing.bot.type = 0;
}
else if (Dir == 2) //To top
{
XForcing.top.type = Bound_type;
XForcing.left.type = 0;
}
else if (Dir == 3) //To bottom
{
XForcing.bot.type = Bound_type;
XForcing.right.type = 0;
}
XForcing.Bathy.push_back(bathy);
XForcing.Bathy[0].xo = 0.0;
XForcing.Bathy[0].yo = 0.0;
XForcing.Bathy[0].xmax = 31.0;
XForcing.Bathy[0].ymax = 31.0;
XForcing.Bathy[0].nx = 32;
XForcing.Bathy[0].ny = 32;
XForcing.Bathy[0].dx = 1.0;
XForcing.Bathy[0].dy = XForcing.Bathy[0].dx;
T x, y;
T center = T(31.0);
AllocateCPU(1, 1, XForcing.left.blks, XForcing.right.blks, XForcing.top.blks, XForcing.bot.blks);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.Bathy[0].val);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, dummybathy);
//float maxtopo = std::numeric_limits<float>::min();
float mintopo = 1000000000000.0f;
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
x = T(XForcing.Bathy[0].xo + i * XForcing.Bathy[0].dx);
y = T(XForcing.Bathy[0].yo + j * XForcing.Bathy[0].dx);
dummybathy[i + j * XForcing.Bathy[0].nx] = float(ValleyBathy(y, x, slope, center));
//Add physical walls
if (PhysWall == 1)
{
//if (j < 3)
//{
// dummybathy[i + j * XForcing.Bathy[0].nx] = 100.0;
//}
if (j > XForcing.Bathy[0].ny - 3)
{
dummybathy[i + j * XForcing.Bathy[0].nx] = 100.0;
}
if (i > XForcing.Bathy[0].nx - 3)
{
dummybathy[i + j * XForcing.Bathy[0].nx] = 100.0;
}
if (i < 17)
{
dummybathy[i + j * XForcing.Bathy[0].nx] = 1000.0;
}
}
mintopo = utils::min(dummybathy[i + j * XForcing.Bathy[0].nx], mintopo);
//maxtopo = max(dummybathy[i + j * XForcing.Bathy[0].nx], maxtopo);
}
}
// Flip or rotate the bathy according to what is requested
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
if (Dir == 1) //left wise
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = dummybathy[i + j * XForcing.Bathy[0].nx];
}
else if (Dir == 0) //right wise
{
XForcing.Bathy[0].val[(XForcing.Bathy[0].nx - 1 - i) + j * XForcing.Bathy[0].nx] = dummybathy[i + j * XForcing.Bathy[0].nx];
}
else if (Dir == 3) //bottom wise
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = dummybathy[j + i * XForcing.Bathy[0].nx];
}
else if (Dir == 2) //top wise
{
XForcing.Bathy[0].val[i + (XForcing.Bathy[0].ny - 1 - j) * XForcing.Bathy[0].nx] = dummybathy[j + i * XForcing.Bathy[0].nx];
}
}
}
free(dummybathy);
// Overrule whatever is set in the river forcing
T Q = T(1.0);
double riverx = (Dir == 0 | Dir == 2) ? 6.0 : 25.0; //Dir=1 =>leftward
double rivery = (Dir == 2 | Dir == 1) ? 6.0 : 25.0; //Dir=2 =>topward
//Create a temporary file with river fluxes
std::ofstream river_file(
"testriver.tmp", std::ios_base::out | std::ios_base::trunc);
river_file << "0.0 " + std::to_string(Q) << std::endl;
river_file << "3600.0 " + std::to_string(Q) << std::endl;
river_file.close(); //destructor implicitly does it
River thisriver;
thisriver.Riverflowfile = "testriver.tmp";
thisriver.xstart = riverx - 1.0;
thisriver.xend = riverx + 1.0;
thisriver.ystart = rivery - 1.0;
thisriver.yend = rivery + 1.0;
XForcing.rivers.push_back(thisriver);
XForcing.rivers[0].flowinput = readFlowfile(XForcing.rivers[0].Riverflowfile, XParam.reftime);
// Overrule whatever may be set in the param file
XParam.xmax = XForcing.Bathy[0].xmax;
XParam.ymax = XForcing.Bathy[0].ymax;
XParam.xo = XForcing.Bathy[0].xo;
XParam.yo = XForcing.Bathy[0].yo;
XParam.dx = XForcing.Bathy[0].dx;
XParam.zsinit = mintopo + 0.5;// Had a small amount of water to avoid a huge first step that would surely break the setup
//XParam.zsoffset = 0.2;
XParam.endtime = 50.0;
XParam.dtinit = 0.1;
XParam.mask = 999.0;
XParam.outishift = 0;
XParam.outjshift = 0;
XParam.ForceMassConserve = true;
XParam.outputtimestep = 10.0;// XParam.endtime;
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
InitSave2Netcdf(XParam, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
T initVol = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
T delta = calcres(XParam.delta, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
int i = memloc(XParam, ix, iy, ib);
initVol = initVol + XModel.evolv.h[i] * delta * delta;
}
}
}
MainLoop(XParam, XForcing, XModel, XModel_g);
T TheoryInput = Q * (T)XParam.endtime;
T SimulatedVolume = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
T delta = calcres(XParam.delta, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
int i = memloc(XParam, ix, iy, ib);
SimulatedVolume = SimulatedVolume + XModel.evolv.h[i] * delta * delta;
}
}
}
printf("End Volume : %f \n", SimulatedVolume);
printf("Init Volume : %f \n", initVol);
SimulatedVolume = SimulatedVolume - initVol;
printf("End Volume - Init volume : %f \n", SimulatedVolume);
T error = abs(SimulatedVolume - TheoryInput);
printf("error : %f \n", error);
printf("Theory input : %f \n", TheoryInput);
printf("return : %f \n", (error / TheoryInput));
return error / TheoryInput < 0.01;
}
template <class T> bool LakeAtRest(Param XParam, Model<T> XModel)
{
T epsi = T(1e-5);
int ib;
bool test = true;
Loop<T> XLoop = InitLoop(XParam, XModel);
fillHaloC(XParam, XModel.blocks, XModel.zb);
gradientC(XParam, XModel.blocks, XModel.zb, XModel.grad.dzbdx, XModel.grad.dzbdy);
gradientHalo(XParam, XModel.blocks, XModel.zb, XModel.grad.dzbdx, XModel.grad.dzbdy);
refine_linear(XParam, XModel.blocks, XModel.zb, XModel.grad.dzbdx, XModel.grad.dzbdy);
gradientHalo(XParam, XModel.blocks, XModel.zb, XModel.grad.dzbdx, XModel.grad.dzbdy);
//============================================
// Predictor step in reimann solver
//============================================
//============================================
// Fill the halo for gradient reconstruction
fillHalo(XParam, XModel.blocks, XModel.evolv, XModel.zb);
//============================================
// Reset DTmax
InitArrayBUQ(XParam, XModel.blocks, XLoop.hugeposval, XModel.time.dtmax);
//============================================
// Calculate gradient for evolving parameters
gradientCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.zb);
//============================================
// Flux and Source term reconstruction
// X- direction
//updateKurgXCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.flux, XModel.time.dtmax, XModel.zb);
UpdateButtingerXCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.flux, XModel.time.dtmax, XModel.zb);
//AddSlopeSourceXCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.flux, XModel.zb);
// Y- direction
//updateKurgYCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.flux, XModel.time.dtmax, XModel.zb);
UpdateButtingerYCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.flux, XModel.time.dtmax, XModel.zb);
//AddSlopeSourceYCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.flux, XModel.zb);
//============================================
// Fill Halo for flux from fine to coarse
fillHalo(XParam, XModel.blocks, XModel.flux);
// Do we need to check also before fill halo part?
// Check Fhu and Fhv (they should be zero)
int i, iright;
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
ib = XModel.blocks.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
i = memloc(XParam, ix, iy, ib);
iright = memloc(XParam, ix + 1, iy, ib);
//ileft = memloc(XParam, ix - 1, iy, ib);
//itop = memloc(XParam, ix, iy + 1, ib);
//ibot = memloc(XParam, ix, iy - 1, ib);
if (abs(XModel.flux.Fhu[i]) > epsi)
{
log("Fhu is not zero. Lake at rest not preserved!!!");
test = false;
}
if (abs(XModel.flux.Fhv[i]) > epsi)
{
log("Fhv is not zero. Lake at rest not preserved!!!");
test = false;
}
T dhus = (XModel.flux.Fqux[i] - XModel.flux.Su[iright]);
if (abs(dhus) > epsi)
{
test = false;
log("dhu is not zero. Lake at rest not preserved!!!");
printf("Fhu[i]=%f\n", XModel.flux.Fhu[i]);
printf("Fqux[i]=%f; Su[iright]=%f; Diff=%f \n", XModel.flux.Fqux[i], XModel.flux.Su[iright], (XModel.flux.Fqux[i] - XModel.flux.Su[iright]));
printf(" At i: (ib=%d; ix=%d; iy=%d)\n", ib, ix, iy);
testButtingerX(XParam, ib, ix, iy, XModel);
printf(" At iright: (ib=%d; ix=%d; iy=%d)\n", ib, ix + 1, iy);
testButtingerX(XParam, ib, ix + 1, iy, XModel);
}
}
}
}
if (!test)
{
copyID2var(XParam, XModel.blocks, XModel.flux.Fhu);
copyBlockinfo2var(XParam, XModel.blocks, XModel.blocks.LeftBot, XModel.grad.dhdx);
copyBlockinfo2var(XParam, XModel.blocks, XModel.blocks.LeftTop, XModel.grad.dhdy);
copyBlockinfo2var(XParam, XModel.blocks, XModel.blocks.TopLeft, XModel.grad.dzsdx);
copyBlockinfo2var(XParam, XModel.blocks, XModel.blocks.TopRight, XModel.grad.dzsdy);
copyBlockinfo2var(XParam, XModel.blocks, XModel.blocks.RightTop, XModel.grad.dudx);
copyBlockinfo2var(XParam, XModel.blocks, XModel.blocks.RightBot, XModel.grad.dudy);
copyBlockinfo2var(XParam, XModel.blocks, XModel.blocks.BotRight, XModel.grad.dvdx);
copyBlockinfo2var(XParam, XModel.blocks, XModel.blocks.BotLeft, XModel.grad.dvdy);
creatncfileBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "blockID", 3, XModel.flux.Fhu, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "LeftBot", 3, XModel.grad.dhdx, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "LeftTop", 3, XModel.grad.dhdy, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "TopLeft", 3, XModel.grad.dzsdx, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "TopRight", 3, XModel.grad.dzsdy, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "RightTop", 3, XModel.grad.dudx, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "RightBot", 3, XModel.grad.dudy, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "BotLeft", 3, XModel.grad.dvdx, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "BotRight", 3, XModel.grad.dvdy, XModel.blocks.outZone[0]);
}
return test;
}
template <class T> void testButtingerX(Param XParam, int ib, int ix, int iy, Model<T> XModel)
{
int RB, levRB, LBRB, LB, levLB, RBLB;
int i = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy, ib);
int ileft = memloc(XParam.halowidth, XParam.blkmemwidth, ix - 1, iy, ib);
int lev = XModel.blocks.level[ib];
T delta = calcres(T(XParam.delta), lev);
T g = T(XParam.g);
T CFL = T(XParam.CFL);
T epsi = nextafter(T(1.0), T(2.0)) - T(1.0);
T eps = T(XParam.eps) + epsi;
// neighbours for source term
RB = XModel.blocks.RightBot[ib];
levRB = XModel.blocks.level[RB];
LBRB = XModel.blocks.LeftBot[RB];
LB = XModel.blocks.LeftBot[ib];
levLB = XModel.blocks.level[LB];
RBLB = XModel.blocks.RightBot[LB];
T cm = T(1.0);
T fmu = T(1.0);
T hi = XModel.evolv.h[i];
T hn = XModel.evolv.h[ileft];
//if (hi > eps || hn > eps)
{
T dx, zi, zn, hr, hl, etar, etal, zr, zl, zA, zCN, hCNr, hCNl;
T ui, vi, uli, vli, dhdxi, dhdxil, dudxi, dudxil, dvdxi, dvdxil;
T ga = g * T(0.5);
// along X
dx = delta * T(0.5);
zi = XModel.zb[i];
zn = XModel.zb[ileft];
ui = XModel.evolv.u[i];
vi = XModel.evolv.v[i];
uli = XModel.evolv.u[ileft];
vli = XModel.evolv.v[ileft];
dhdxi = XModel.grad.dhdx[i];
dhdxil = XModel.grad.dhdx[ileft];
dudxi = XModel.grad.dudx[i];
dudxil = XModel.grad.dudx[ileft];
dvdxi = XModel.grad.dvdx[i];
dvdxil = XModel.grad.dvdx[ileft];
hr = hi - dx * dhdxi;
hl = hn + dx * dhdxil;
etar = XModel.evolv.zs[i] - dx * XModel.grad.dzsdx[i];
etal = XModel.evolv.zs[ileft] + dx * XModel.grad.dzsdx[ileft];
//define the topography term at the interfaces
zr = zi - dx * XModel.grad.dzbdx[i];
zl = zn + dx * XModel.grad.dzbdx[ileft];
//define the Audusse terms
zA = max(zr, zl);
// Now the CN terms
zCN = min(zA, min(etal, etar));
hCNr = max(T(0.0), min(etar - zCN, hr));
hCNl = max(T(0.0), min(etal - zCN, hl));
//Velocity reconstruction
//To avoid high velocities near dry cells, we reconstruct velocities according to Bouchut.
T ul, ur, vl, vr, sl, sr;
if (hi > eps) {
ur = ui - (T(1.) + dx * dhdxi / hi) * dx * dudxi;
vr = vi - (T(1.) + dx * dhdxi / hi) * dx * dvdxi;
}
else {
ur = ui - dx * dudxi;
vr = vi - dx * dvdxi;
}
if (hn > eps) {
ul = uli + (T(1.) - dx * dhdxil / hn) * dx * dudxil;
vl = vli + (T(1.) - dx * dhdxil / hn) * dx * dvdxil;
}
else {
ul = uli + dx * dudxil;
vl = vli + dx * dvdxil;
}
T fh, fu, fv, dt;
//solver below also modifies fh and fu
dt = hllc(g, delta, epsi, CFL, cm, fmu, hCNl, hCNr, ul, ur, fh, fu);
//hllc(T g, T delta, T epsi, T CFL, T cm, T fm, T hm, T hp, T um, T up, T & fh, T & fq)
fv = (fh > 0. ? vl : vr) * fh;
// Topographic source term
// In the case of adaptive refinement, care must be taken to ensure
// well-balancing at coarse/fine faces (see [notes/balanced.tm]()).
if ((ix == XParam.blkwidth) && levRB < lev)//(ix==16) i.e. in the right halo
{
int jj = LBRB == ib ? ftoi(floor(iy * (T)0.5)) : ftoi(floor(iy * (T)0.5) + XParam.blkwidth / 2);
int iright = memloc(XParam.halowidth, XParam.blkmemwidth, 0, jj, RB);;
hi = XModel.evolv.h[iright];
zi = XModel.zb[iright];
}
if ((ix == 0) && levLB < lev)//(ix==16) i.e. in the right halo if you
{
int jj = RBLB == ib ? ftoi(floor(iy * (T)0.5)) : ftoi(floor(iy * (T)0.5) + XParam.blkwidth / 2);
int ilc = memloc(XParam.halowidth, XParam.blkmemwidth, XParam.blkwidth - 1, jj, LB);
//int ilc = memloc(halowidth, blkmemwidth, -1, iy, ib);
hn = XModel.evolv.h[ilc];
zn = XModel.zb[ilc];
}
sl = ga * (hi + hCNr) * (zi - zCN);
sr = ga * (hCNl + hn) * (zn - zCN);
printf("dt=%f; etar=%f; etal=%f; zCN=%f; zi=%f; zn=%f; zA=%f, zr=%f, zl=%f\n", dt, etar, etal, zCN, zi, zn, zA, zr, zl);
printf("hi=%f; hn=%f,fh=%f; fu=%f; sl=%f; sr=%f; hCNl=%f; hCNr=%f; hr=%f; hl=%f; zr=%f; zl=%f;\n", hi, hn, fh, fu, sl, sr, hCNl, hCNr, hr, hl, zr, zl);
printf("h[i]=%f; h[ileft]=%f dhdx[i]=%f, dhdx[ileft]=%f, zs[i]=%f, zs[ileft]=%f, dzsdx[i]=%f, dzsdx[ileft]=%f, dzbdx[i]=%f, dzbdx[ileft]=%f\n\n", XModel.evolv.h[i], XModel.evolv.h[ileft], XModel.grad.dhdx[i], XModel.grad.dhdx[ileft], XModel.evolv.zs[i], XModel.evolv.zs[ileft], XModel.grad.dzsdx[i], XModel.grad.dzsdx[ileft], XModel.grad.dzbdx[i], XModel.grad.dzbdx[ileft]);
}
}
template <class T> void testkurganovX(Param XParam, int ib, int ix, int iy, Model<T> XModel)
{
int RB, levRB, LBRB, LB, levLB, RBLB;
int i = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy, ib);
int ileft = memloc(XParam.halowidth, XParam.blkmemwidth, ix - 1, iy, ib);
int lev = XModel.blocks.level[ib];
T delta = calcres(T(XParam.delta), lev);
T g = T(XParam.g);
T CFL = T(XParam.CFL);
T epsi = nextafter(T(1.0), T(2.0)) - T(1.0);
T eps = T(XParam.eps) + epsi;
// neighbours for source term
RB = XModel.blocks.RightBot[ib];
levRB = XModel.blocks.level[RB];
LBRB = XModel.blocks.LeftBot[RB];
LB = XModel.blocks.LeftBot[ib];
levLB = XModel.blocks.level[LB];
RBLB = XModel.blocks.RightBot[LB];
T dhdxi = XModel.grad.dhdx[i];
T dhdxmin = XModel.grad.dhdx[ileft];
T cm = T(1.0);
T fmu = T(1.0);
T hi = XModel.evolv.h[i];
T hn = XModel.evolv.h[ileft];
T dx, zi, zl, zn, zr, zlr, hl, up, hp, hr, um, hm, ga;
// along X
dx = delta * T(0.5);
zi = XModel.evolv.zs[i] - hi;
//printf("%f\n", zi);
//zl = zi - dx*(dzsdx[i] - dhdx[i]);
zl = zi - dx * (XModel.grad.dzsdx[i] - dhdxi);
//printf("%f\n", zl);
zn = XModel.evolv.zs[ileft] - hn;
//printf("%f\n", zn);
zr = zn + dx * (XModel.grad.dzsdx[ileft] - dhdxmin);
zlr = max(zl, zr);
//hl = hi - dx*dhdx[i];
hl = hi - dx * dhdxi;
up = XModel.evolv.u[i] - dx * XModel.grad.dudx[i];
hp = max(T(0.0), hl + zl - zlr);
hr = hn + dx * dhdxmin;
um = XModel.evolv.u[ileft] + dx * XModel.grad.dudx[ileft];
hm = max(T(0.0), hr + zr - zlr);
ga = g * T(0.5);
T fh, fu, fv, sl, sr, dt;
//solver below also modifies fh and fu
dt = KurgSolver(g, delta, epsi, CFL, cm, fmu, hp, hm, up, um, fh, fu);
if ((ix == XParam.blkwidth) && levRB < lev)//(ix==16) i.e. in the right halo
{
int jj = LBRB == ib ? floor(iy * (T)0.5) : floor(iy * (T)0.5) + XParam.blkwidth / 2;
int iright = memloc(XParam.halowidth, XParam.blkmemwidth, 0, jj, RB);;
hi = XModel.evolv.h[iright];
zi = XModel.zb[iright];
}
if ((ix == 0) && levLB < lev)//(ix==16) i.e. in the right halo if you
{
int jj = RBLB == ib ? floor(iy * (T)0.5) : floor(iy * (T)0.5) + XParam.blkwidth / 2;
int ilc = memloc(XParam.halowidth, XParam.blkmemwidth, XParam.blkwidth - 1, jj, LB);
hn = XModel.evolv.h[ilc];
zn = XModel.zb[ilc];
}
sl = ga * (utils::sq(hp) - utils::sq(hl) + (hl + hi) * (zi - zl));
sr = ga * (utils::sq(hm) - utils::sq(hr) + (hr + hn) * (zn - zr));
//Fhu[i] = fmu * fh;
//Fqux[i] = fmu * (fu - sl);
//Su[i] = fmu * (fu - sr);
//Fqvx[i] = fmu * fv;
printf("hi=%f; hn=%f,fh=%f; fu=%f; sl=%f; sr=%f; hp=%f; hm=%f; hr=%f; hl=%f; zr=%f; zl=%f;\n", hi, hn, fh, fu, sl, sr, hp, hm, hr, hl, zr, zl);
printf("h[i]=%f; h[ileft]=%f dhdx[i]=%f, dhdx[ileft]=%f, zs[i]=%f, zs[ileft]=%f, dzsdx[i]=%f, dzsdx[ileft]=%f\n", XModel.evolv.h[i], XModel.evolv.h[ileft], XModel.grad.dhdx[i], XModel.grad.dhdx[ileft], XModel.evolv.zs[i], XModel.evolv.zs[ileft], XModel.grad.dzsdx[i], XModel.grad.dzsdx[ileft]);
}
template <class T> bool Raintest(T zsnit, int gpu, float alpha,int engine)
{
log("#####");
Param XParam;
T initVol, TheoryInput;
TheoryInput = T(0.0);
// initialise domain and required resolution
XParam.dx = 1.0 / ((1 << 6)); //1<<8 = 2^8
XParam.xo = -0.5;
XParam.yo = -0.5;
XParam.xmax = 0.5;
XParam.ymax = 0.5;
XParam.engine = engine;
//XParam.initlevel = 0;
//XParam.minlevel = 0;
//XParam.maxlevel = 0;
XParam.zsinit = zsnit;
//XParam.zsoffset = 0.0;
//Output times for comparisons
XParam.endtime = 1.0;
XParam.outputtimestep = 0.1;
XParam.smallnc = 0;
XParam.cf = 0.01;
XParam.frictionmodel = 0;
//Specification of the test
//XParam.test = 7;
XParam.rainforcing = true;
XParam.ForceMassConserve = true;
// Enforce GPU/CPU
XParam.GPUDEVICE = gpu;
XParam.rainbnd = true;
//output vars
std::vector<std::string> outv = { "zb","h","zs","u","v" };
XParam.outvars = outv;
// create Model setup
Model<T> XModel;
Model<T> XModel_g;
Forcing<float> XForcing;
StaticForcingP<float> bathy;
XForcing.Bathy.push_back(bathy);
// initialise forcing bathymetry to 0
XForcing.Bathy[0].xo = -1.0;
XForcing.Bathy[0].yo = -1.0;
XForcing.Bathy[0].xmax = 1.0;
XForcing.Bathy[0].ymax = 1.0;
XForcing.Bathy[0].nx = 3;
XForcing.Bathy[0].ny = 3;
XForcing.Bathy[0].dx = 1.0;
XForcing.Bathy[0].dy = XForcing.Bathy[0].dx;
AllocateCPU(1, 1, XForcing.left.blks, XForcing.right.blks, XForcing.top.blks, XForcing.bot.blks);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.Bathy[0].val);
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = i * alpha / 100;
}
}
// Add wall boundary conditions
XForcing.right.type = 0;
XForcing.left.type = 0;
XForcing.top.type = 0;
XForcing.bot.type = 0;
//Value definition for surface rain fall
T Q = 300; // mm/hr
std::cout << "# Theoretical volume of water input during the simulation in m3: " << TheoryInput << ", from a rain input of: " << Q << "mm/hr." << std::endl;
//Create a temporary file with rain fluxes
std::ofstream rain_file(
"testrain.tmp", std::ios_base::out | std::ios_base::trunc);
rain_file << "0.0 " + std::to_string(Q) << std::endl;
rain_file << "3600.0 " + std::to_string(Q) << std::endl;
rain_file.close(); //destructor implicitly does it
XForcing.Rain.inputfile = "testrain.tmp";
XForcing.Rain.uniform = true;
// Reading rain forcing from file for CPU and unifor rain
XForcing.Rain.unidata = readINfileUNI(XForcing.Rain.inputfile, XParam.reftime);
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
initVol = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
T delta = calcres(XParam.delta, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
int i = memloc(XParam, ix, iy, ib);
initVol = initVol + XModel.evolv.h[i] * delta * delta;
}
}
}
MainLoop(XParam, XForcing, XModel, XModel_g);
TheoryInput = Q / T(1000.0) / T(3600.0) * XParam.endtime;
T SimulatedVolume = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
T delta = calcres(XParam.delta, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
int i = memloc(XParam, ix, iy, ib);
SimulatedVolume = SimulatedVolume + XModel.evolv.h[i] * delta * delta;
}
}
}
SimulatedVolume = SimulatedVolume - initVol;
T error = abs(SimulatedVolume - TheoryInput);
T modelgood = error / TheoryInput < 0.05;
//log("#####");
return modelgood;
}
bool Raintestinput(int gpu)
{
//Results of the experiment of Aureli, interpolated to output values
bool modelgood1, modelgood2;
std::string result;
//int dim_flux;
std::vector<float> Flux1D, Flux3DUni, Flux3D, Flux_obs;
float diff, ref, error;
//Comparison between the 1D forcing and the 3D hommgeneous forcing
Flux1D = Raintestmap(gpu, 1, -0.03);
Flux3DUni = Raintestmap(gpu, 31, -0.03);
ref = 0.0;
diff = 0.0;
for (int i = 0; i < Flux1D.size(); i++)
{
diff = diff + Flux1D[i] - Flux3DUni[i];
ref = ref + Flux1D[i];
}
error = abs(diff / ref);
printf("Error %f \n", error);
modelgood1 = error < 0.005;
result = modelgood1 ? "successful" : "failed";
log("\t\tRain test input 1D vs 3D homogeneous: " + result);
//Comparison between the 3D forcing and the observations from Iwagaki1955.
//From Observations
//Flux_obs = { 1.75136262, 4.31856716, 24.36350225, 32.02235696, 32.41207121,
// 31.68632601, 29.8140878 , 47.9632521 , 68.78608061, 57.03656989 };
//From BG_run of the testcase
Flux_obs = { 4.003079, 12.664897, 25.376514, 33.214722, 34.987427, 34.054474,
32.696472, 30.718161, 89.497993, 58.156021 };
Flux3D = Raintestmap(gpu, 3, -0.03);
ref = 0.0;
diff = 0.0;
for (int i = 0; i < Flux3D.size(); i++)
{
diff = diff + Flux_obs[i] - Flux3D[i];
ref = ref + Flux3D[i];
}
error = abs(diff / ref);
printf("Error %f \n", error);
modelgood2 = error < 0.00005;
result = modelgood2 ? "successful" : "failed";
log("\t\tRain test input 3D map vs Iwagaki1955: " + result);
return (modelgood1 * modelgood2);
}
template <class T> std::vector<float> Raintestmap(int gpu, int dimf, T zinit)
{
log("#####");
int k;
T rainDuration = 10.0;
int NX = 2502;
int NY = 22;
int NT;
double* xRain;
double* yRain;
double* tRain;
double* rainForcing;
Param XParam;
T delta;
// initialise domain and required resolution
XParam.xo = 0;
XParam.yo = 0;
XParam.ymax = 0.196;
XParam.dx = (XParam.ymax - XParam.yo) / (1 << 1);
XParam.delta = XParam.dx;
double Xmax_exp = 28.0; //minimum Xmax position (adjust to have a "full blocks" config)
//Calculating xmax to have full blocs with at least a full block behaving as a reservoir
XParam.xmax = XParam.xo + (16 * XParam.dx) * std::ceil((Xmax_exp - XParam.xo) / (16 * XParam.dx)) + (16 * XParam.dx);
//Surf = T((XParam.xmax - XParam.xo) * (XParam.ymax - XParam.yo));
XParam.nblk = ftoi(((XParam.xmax - XParam.xo) / XParam.dx / 16) * ((XParam.ymax - XParam.yo) / XParam.dx / 16));
XParam.rainbnd = true;
XParam.zsinit = zinit;
//Output times for comparisons
XParam.endtime = 30.0;
XParam.outputtimestep = 3.0;
XParam.smallnc = 0;
//Specification of the test
XParam.test = 8;
// Enforce GPU/CPU
XParam.GPUDEVICE = gpu;
//Bottom friction
XParam.frictionmodel = -1; //Manning model
XParam.cf = 0.009; //n in m^(-1/3)s
std::string outvi[16] = { "zb","h","zs","u","v","Fqux","Fqvx","Fquy","Fqvy", "Fhu", "Fhv", "dh", "dhu", "dhv", "Su", "Sv" };
std::vector<std::string> outv;
for (int nv = 0; nv < 15; nv++)
{
outv.push_back(outvi[nv]);
}
XParam.outvars = outv;
// create Model setup
Model<T> XModel;
Model<T> XModel_g;
Forcing<float> XForcing;
StaticForcingP<float> bathy;
XForcing.Bathy.push_back(bathy);
// initialise forcing bathymetry to 0
XForcing.Bathy[0].xo = -1.0;
XForcing.Bathy[0].yo = -1.0;
XForcing.Bathy[0].xmax = 28.0;
XForcing.Bathy[0].ymax = 1.0;
XForcing.Bathy[0].dx = 0.1;
XForcing.Bathy[0].dy = XForcing.Bathy[0].dx;
XForcing.Bathy[0].nx = ftoi((XForcing.Bathy[0].xmax - XForcing.Bathy[0].xo) / XForcing.Bathy[0].dx + 1);
XForcing.Bathy[0].ny = ftoi((XForcing.Bathy[0].ymax - XForcing.Bathy[0].yo) / XForcing.Bathy[0].dx + 1);
AllocateCPU(1, 1, XForcing.left.blks, XForcing.right.blks, XForcing.top.blks, XForcing.bot.blks);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.Bathy[0].val);
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = -10.0;
if (i < (9 / XForcing.Bathy[0].dx + 1))
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = T(0.2 + (9.0 - i * XForcing.Bathy[0].dx) * 2.0 / 100.0);
}
else if (i < (17 / XForcing.Bathy[0].dx + 1))
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = T(0.08 + (17.0 - i * XForcing.Bathy[0].dx) * 1.5 / 100.0);
}
else if (i < (25 / XForcing.Bathy[0].dx + 1))
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = T(0.0 + (25.0 - i * XForcing.Bathy[0].dx) * 1.0 / 100.0);
}
}
}
// Add wall boundary conditions but at the bottom of the slope
//XForcing.right.type = 0;
XForcing.left.type = 0;
//XForcing.top.type = 0;
//XForcing.bot.type = 0;
//Value definition for surface rain fall
T r1 = T(3888.0); // mm/hr
T r2 = T(2296.8); //mm/hr
T r3 = T(2880.0); //mm/hr
T Q = (r1 + r2 + r3) / 3;
//TheoryInput = Q * XParam.outputtimestep / T(1000.0) / T(3600.0) * Surf; //m3/s
//printf("# Theoretical volume of water input during the simulation in m3: %f , from a mean rain input of: %f mm/hr.\n", TheoryInput, Q);
double eps = 0.0001;
// Create the rain forcing file
if (dimf == 1)
{
//Create a temporary file with rain fluxes for uniform rain
std::ofstream rain_file(
"testrain.tmp", std::ios_base::out | std::ios_base::trunc);
rain_file << "0.0 " + std::to_string(Q) << std::endl;
rain_file << std::to_string(rainDuration) + " " + std::to_string(Q) << std::endl;
rain_file << std::to_string(rainDuration + eps) + " 0.0" << std::endl;
rain_file << std::to_string(rainDuration + 360000) + " 0.0" << std::endl;
rain_file.close(); //destructor implicitly does it
XForcing.Rain.inputfile = "testrain.tmp";
XForcing.Rain.uniform = true;
// Reading rain forcing from file for CPU and uniform rain
XForcing.Rain.unidata = readINfileUNI(XForcing.Rain.inputfile, XParam.reftime);
printf("1D rain forcing read\n");
}
else //non-uniform forcing
{
XForcing.Rain.uniform = false;
//X Y variables
xRain = (double*)malloc(sizeof(double) * NX);
yRain = (double*)malloc(sizeof(double) * NY);
for (int i = 0; i < NX; i++) { xRain[i] = -0.005 + 0.01 * i; }
for (int j = 0; j < NY; j++) { yRain[j] = -0.01 + 0.01 * j; }
NT = 601;
tRain = (double*)malloc(sizeof(double) * NT);
for (int tt = 0; tt < NT; tt++) { tRain[tt] = XParam.endtime / (NT - 1) * tt; }
rainForcing = (double*)malloc(sizeof(double) * NT * NY * NX);
//Create a non-uniform time-variable rain forcing
if (dimf == 3)
{
//Create the rain forcing:
for (k = 0; k < NT; k++)
{
for (int j = 0; j < NY; j++)
{
for (int i = 0; i < NX; i++)
{
if (tRain[k] < rainDuration + eps)
{
if (xRain[i] < 8.0)
{
rainForcing[k * (NX * NY) + j * NX + i] = r1;
}
else if (xRain[i] < 16.0)
{
rainForcing[k * (NX * NY) + j * NX + i] = r2;
}
else
{
rainForcing[k * (NX * NY) + j * NX + i] = r3;
}
}
else
{
rainForcing[k * (NX * NY) + i * NY + j] = 0.0;
}
}
}
}
//Write the netcdf file
create3dnc("rainTemp.nc", NX, NY, NT, xRain, yRain, tRain, rainForcing, "myrainforcing");
printf("non-uniform forcing\n");
//End creation of the nc file for rain forcing
}
//Create a uniform time-variable rain forcing using a map forcing (nc file)
else if (dimf == 31)
{
//Create the rain forcing:
for (k = 0; k < NT; k++)
{
for (int j = 0; j < NY; j++)
{
for (int i = 0; i < NX; i++)
{
if (tRain[k] < rainDuration + eps)
{
rainForcing[k * (NX * NY) + j * NX + i] = Q;
}
else
{
rainForcing[k * (NX * NY) + i * NY + j] = 0.0;
}
}
}
}
//Write the netcdf file
create3dnc("rainTemp.nc", NX, NY, NT, xRain, yRain, tRain, rainForcing, "myrainforcing");
printf("non-uniform forcing 31\n");
//End creation of the nc file for rain forcing
}
/*
//2D forcing (map without time variation is not working)
else if (dimf == 2)//dimf==2 for rain forcing
{
//Create a non-uniform time-constant rain forcing
rainForcing = (double*)malloc(sizeof(double) * NY * NX);
//Create the rain forcing:
for (int j = 0; j < NY; j++)
{
for (int i = 0; i < NX; i++)
{
if (xRain[i] < 8.0)
{
rainForcing[j * NX + i] = r1;
}
else if (xRain[i] < 16.0)
{
rainForcing[j * NX + i] = r2;
}
else
{
rainForcing[j * NX + i] = r3;
}
}
}
create2dnc("rainTempt.nc", NX, NY, xRain, yRain, rainForcing, "myrainforcing");
//End creation of the nc file for rain forcing
}
*/
else { printf("Error in rain forcing dimension (should be in [1,3,31])\n"); }
//Reading non-unform forcing
bool gpgpu = 0;
if (XParam.GPUDEVICE != -1)
{
gpgpu = 1;
}
XForcing.Rain = readfileinfo("rainTemp.nc", XForcing.Rain);
XForcing.Rain.uniform = 0;
XForcing.Rain.varname = "myrainforcing";
InitDynforcing(gpgpu, XParam, XForcing.Rain);
//readDynforcing(gpgpu, XParam.totaltime, XForcing.Rain);
free(rainForcing);
free(xRain);
free(yRain);
free(tRain);
}
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
log("Initialising model main loop");
Loop<T> XLoop = InitLoop(XParam, XModel);
//Define some useful variables
Initmeanmax(XParam, XLoop, XModel, XModel_g);
log("\t\tCompleted");
log("Model Running...");
std::vector<float> Flux;
while (XLoop.totaltime < XParam.endtime)
{
// Calculate Forcing at this step
updateforcing(XParam, XLoop, XForcing);
// Core engine
if (XParam.GPUDEVICE >= 0)
{
FlowGPU(XParam, XLoop, XForcing, XModel_g);
}
else
{
FlowCPU(XParam, XLoop, XForcing, XModel);
}
// Time keeping
XLoop.totaltime = XLoop.totaltime + XLoop.dt;
//printf("\tTime = %f \n", XLoop.totaltime);
//if Toutput, calculate the flux at x=24m;
// Getting the coordinate for the flux calculation
int bl, ixx, ibl, ix, ib;
T dist = T(1000000000.0);
for (ibl = 0; ibl < XParam.nblk; ibl++)
{
ib = XModel.blocks.active[ibl];
delta = calcres(T(XParam.dx), XModel.blocks.level[ib]);
for (ix = 0; ix < XParam.blkwidth; ix++)
{
//n = memloc(XParam, ix, 0, ib);
if (abs(XModel.blocks.xo[ibl] + ix * delta - 24.0) < dist)
{
ixx = ix;
bl = ibl;
dist = T(abs(XModel.blocks.xo[ibl] + ix * delta - 24.0));
}
}
}
if (XLoop.nextoutputtime - XLoop.totaltime <= XLoop.dt * T(0.00001) && XParam.outputtimestep > 0.0)
{
T finalFlux = T(0.0);
if (XParam.GPUDEVICE >= 0)
{
for (int ivar = 0; ivar < XParam.outvars.size(); ivar++)
{
CUDA_CHECK(cudaMemcpy(XModel.OutputVarMap[XParam.outvars[ivar]], XModel_g.OutputVarMap[XParam.outvars[ivar]], XParam.nblkmem * XParam.blksize * sizeof(T), cudaMemcpyDeviceToHost));
}
}
Save2Netcdf(XParam, XLoop, XModel);
//Calculation of the flux at the bottom of the slope (x=24m)
ib = XModel.blocks.active[bl];
delta = calcres(T(XParam.delta), XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
int n = memloc(XParam, ixx, iy, ib);
finalFlux = finalFlux + XModel.evolv.h[n] * XModel.evolv.u[n] * delta;
}
finalFlux = finalFlux / float(XParam.ymax - XParam.yo) * 100.0f * 100.0f;
Flux.push_back(finalFlux);
XLoop.nextoutputtime = XLoop.nextoutputtime + XParam.outputtimestep;
printf("\tTime = %f, Flux at bottom end of slope : %f \n", XLoop.totaltime, finalFlux);
}
}
/*
for (int n = 0; n < Flux.size(); n++)
{
printf("Flux at %i : %f \n", n, Flux[n]);
}
*/
return Flux;
}
template std::vector<float> Raintestmap<float>(int gpu, int dimf, float Zsinit);
template std::vector<float> Raintestmap<double>(int gpu, int dimf, double Zsinit);
template <class T> bool ZoneOutputTest(int nzones, T zsinit)
//template bool ZoneOutputTest<float>(int nzones, float zsinit);
{
log("#####");
Param XParam;
Forcing<float> XForcing;
if (nzones == 3)
{
// read param file
//readforcing(XParam, XForcing);
outzoneP zone;
zone.outname = "whole.nc";
zone.xstart = -10;
zone.xend = 10;
zone.ystart = -10;
zone.yend = 10;
XParam.outzone.push_back(zone);
zone.outname = "zoomed.nc";
zone.xstart = 1;
zone.xend = 2;
zone.ystart = -2;
zone.yend = 2;
XParam.outzone.push_back(zone);
zone.outname = "zoomed2.nc";
zone.xstart = -2;
zone.xend = 2;
zone.ystart = -4;
zone.yend = 2;
XParam.outzone.push_back(zone);
}
// initialise domain and required resolution
XParam.dx = 1.0 / ((1 << 6)); //1<<8 = 2^8
XParam.xo = -5;
XParam.yo = -5;
XParam.xmax = 5;
XParam.ymax = 5;
XParam.initlevel = 0;
XParam.minlevel = -1;
XParam.maxlevel = 1;
XParam.zsinit = zsinit;
//XParam.zsoffset = 0.0;
//Output times for comparisons
XParam.endtime = 1.0;
XParam.outputtimestep = 0.5;
XParam.smallnc = 0;
XParam.cf = 0.0001;
XParam.frictionmodel = 1;
//Specification of the test
//XParam.test = 7;
XParam.rainforcing = true;
// Enforce GPU/CPU
//XParam.GPUDEVICE = gpu;
//XParam.rainbnd = true;
// create Model setup
Model<T> XModel;
Model<T> XModel_g;
StaticForcingP<float> bathy;
XForcing.Bathy.push_back(bathy);
// initialise forcing bathymetry to a central hill
XForcing.Bathy[0].xo = -10.0;
XForcing.Bathy[0].yo = -10.0;
XForcing.Bathy[0].xmax = 10.0;
XForcing.Bathy[0].ymax = 10.0;
XForcing.Bathy[0].nx = 501;
XForcing.Bathy[0].ny = 501;
XForcing.Bathy[0].dx = 0.1;
XForcing.Bathy[0].dy = XForcing.Bathy[0].dx;
AllocateCPU(1, 1, XForcing.left.blks, XForcing.right.blks, XForcing.top.blks, XForcing.bot.blks);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.Bathy[0].val);
float rs, x, y, r, hm;
rs = 20; //hill radio
hm = 5; //hill top
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
x = float(XForcing.Bathy[0].xo + i * XForcing.Bathy[0].dx);
y = float(XForcing.Bathy[0].yo + j * XForcing.Bathy[0].dx);
r = sqrt(x * x + y * y);
if (r < rs)
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = hm * (1 - r / rs);
}
if (x < -4.7 | x > 4.7 | y < -4.7 | y > 4.7)
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = 10;
}
}
}
//Adaptation
XParam.AdaptCrit = "Targetlevel";
StaticForcingP<int> Target;
XForcing.targetadapt.push_back(Target);
XForcing.targetadapt[0].xo = -10;
XForcing.targetadapt[0].yo = -10;
XForcing.targetadapt[0].xmax = 10.0;
XForcing.targetadapt[0].ymax = 10.0;
XForcing.targetadapt[0].nx = 501;
XForcing.targetadapt[0].ny = 501;
XForcing.targetadapt[0].dx = 0.1;
AllocateCPU(XForcing.targetadapt[0].nx, XForcing.targetadapt[0].ny, XForcing.targetadapt[0].val);
for (int j = 0; j < XForcing.targetadapt[0].ny; j++)
{
for (int i = 0; i < XForcing.targetadapt[0].nx; i++)
{
x = float(XForcing.targetadapt[0].xo + i * XForcing.targetadapt[0].dx);
y = float(XForcing.targetadapt[0].yo + j * XForcing.targetadapt[0].dx);
if (x < 0.0)
{
XForcing.targetadapt[0].val[i + j * XForcing.targetadapt[0].nx] = -1;
}
else
{
if (y < 0.0)
{
XForcing.targetadapt[0].val[i + j * XForcing.targetadapt[0].nx] = 0;
}
else
{
XForcing.targetadapt[0].val[i + j * XForcing.targetadapt[0].nx] = 1;
}
}
}
}
// Add wall boundary conditions
XForcing.right.type = 0;
XForcing.left.type = 0;
XForcing.top.type = 0;
XForcing.bot.type = 0;
//Create a temporary file with river fluxes
float Q = 1;
std::ofstream river_file(
"testriver.tmp", std::ios_base::out | std::ios_base::trunc);
river_file << "0.0 " + std::to_string(Q) << std::endl;
river_file << "3600.0 " + std::to_string(Q) << std::endl;
river_file.close(); //destructor implicitly does it
River thisriver;
thisriver.Riverflowfile = "testriver.tmp";
thisriver.xstart = -0.2;
thisriver.xend = 0.2;
thisriver.ystart = -0.2;
thisriver.yend = 0.2;
XForcing.rivers.push_back(thisriver);
XForcing.rivers[0].flowinput = readFlowfile(XForcing.rivers[0].Riverflowfile, XParam.reftime);
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
MainLoop(XParam, XForcing, XModel, XModel_g);
//Test if file exist and can be open:
int error = 1;
std::vector<int> observedSize{ 473251462,23304761,130802886 };
for (int o = 0; o < XModel.blocks.outZone.size(); o++)
{
std::ifstream fs(XModel.blocks.outZone[o].outname);
if (fs.fail())
{
error++;
}
else
{
//Calculate the size of the file in bytes
std::ifstream in_file(XModel.blocks.outZone[o].outname, std::ios::binary);
in_file.seekg(0, std::ios::end);
int file_size = in_file.tellg();
printf("sizes : %i in bytes\n", file_size);
error = error * (observedSize[o] / file_size);
}
}
bool modelgood = (1 - abs(error)) < 0.05;
//log("#####");
return modelgood;
}
template bool ZoneOutputTest<float>(int nzones, float zsinit);
template bool ZoneOutputTest<double>(int nzones, double zsinit);
template <class T> bool Rainlossestest(T zsinit, int gpu, float alpha)
{
int NX = 21;
int NY = 21;
double* xLoss;
double* yLoss;
double* ilForcing;
double* clForcing;
log("#####");
Param XParam;
T initVol, TheoryInput;
TheoryInput = T(0.0);
// initialise domain and required resolution
XParam.dx = 1.0 / ((1 << 6)); //1<<8 = 2^8
XParam.xo = -0.5;
XParam.yo = -0.5;
XParam.xmax = 0.5;
XParam.ymax = 0.5;
XParam.zsinit = zsinit;
//XParam.zsoffset = 0.0;
XParam.infiltration = true;
//Output times for comparisons
XParam.endtime = 1.0;
XParam.outputtimestep = 0.1;
XParam.smallnc = 0;
XParam.cf = 0.01;
XParam.frictionmodel = 0;
// Enforce GPU/CPU
XParam.GPUDEVICE = gpu;
XParam.rainbnd = true;
//output vars
std::string outvi[17] = { "zb","h","zs","u","v","Fqux","Fqvx","Fquy","Fqvy", "Fhu", "Fhv", "dh", "dhu", "dhv", "Su", "Sv", "hgw" };
std::vector<std::string> outv;
for (int nv = 0; nv < 17; nv++)
{
outv.push_back(outvi[nv]);
}
XParam.outvars = outv;
// create Model setup
Model<T> XModel;
Model<T> XModel_g;
Forcing<float> XForcing;
StaticForcingP<float> bathy;
XForcing.Bathy.push_back(bathy);
// initialise forcing bathymetry to 0
XForcing.Bathy[0].xo = -1.0;
XForcing.Bathy[0].yo = -1.0;
XForcing.Bathy[0].xmax = 1.0;
XForcing.Bathy[0].ymax = 1.0;
XForcing.Bathy[0].nx = 3;
XForcing.Bathy[0].ny = 3;
XForcing.Bathy[0].dx = 1.0;
XForcing.Bathy[0].dy = XForcing.Bathy[0].dx;
AllocateCPU(1, 1, XForcing.left.blks, XForcing.right.blks, XForcing.top.blks, XForcing.bot.blks);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.Bathy[0].val);
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = T(0.0);
}
}
// Add wall boundary conditions
XForcing.right.type = 0;
XForcing.left.type = 0;
XForcing.top.type = 0;
XForcing.bot.type = 0;
//Value definition for surface rain fall
T Q = 300; // mm/hr
std::cout << "# Theoretical volume of water input during the simulation in m3: " << TheoryInput << ", from a rain input of: " << Q << "mm/hr." << std::endl;
//Create a temporary file with rain fluxes
std::ofstream rain_file(
"testrain.tmp", std::ios_base::out | std::ios_base::trunc);
rain_file << "0.0 " + std::to_string(Q) << std::endl;
rain_file << "3600.0 " + std::to_string(Q) << std::endl;
rain_file.close(); //destructor implicitly does it
XForcing.Rain.inputfile = "testrain.tmp";
XForcing.Rain.uniform = true;
// Reading rain forcing from file for CPU and unifor rain
XForcing.Rain.unidata = readINfileUNI(XForcing.Rain.inputfile, XParam.reftime);
//Value definition for surface IL-CL
T IL = 0.02; // mm
T CL = 100; // mm/hr
//Create a uniform map of IL-CL forcing (nc file)
xLoss = (double*)malloc(sizeof(double) * NX);
yLoss = (double*)malloc(sizeof(double) * NY);
for (int i = 0; i < NX; i++) { xLoss[i] = -1.0 + 0.1 * i; }
for (int j = 0; j < NY; j++) { yLoss[j] = -1.0 + 0.1 * j; }
ilForcing = (double*)malloc(sizeof(double) * NY * NX);
clForcing = (double*)malloc(sizeof(double) * NY * NX);
//Create the Losses forcing:
for (int j = 0; j < NY; j++)
{
for (int i = 0; i < NX; i++)
{
if (xLoss[i] < 0)
{
ilForcing[j * NX + i] = 0;
clForcing[j * NX + i] = 0;
}
else
{
ilForcing[j * NX + i] = IL;
clForcing[j * NX + i] = CL;
}
}
}
create2dnc("ilrainlossTempt.nc", NX, NY, xLoss, yLoss, ilForcing, "initialloss");
create2dnc("clrainlossTempt.nc", NX, NY, xLoss, yLoss, clForcing, "continuousloss");
//Reading non-unform forcing
bool gpgpu = 0;
if (XParam.GPUDEVICE != -1)
{
gpgpu = 1;
}
XForcing.il = readfileinfo("ilrainlossTempt.nc", XForcing.il);
XForcing.il.varname = "initialloss";
XForcing.cl = readfileinfo("clrainlossTempt.nc", XForcing.cl);
XForcing.cl.varname = "continuousloss";
readstaticforcing(XForcing.il);
readstaticforcing(XForcing.cl);
free(ilForcing);
free(clForcing);
free(xLoss);
free(yLoss);
//XParam.infiltration = false;
checkparamsanity(XParam, XForcing);
//printf("h: %f \n", XModel.evolv.h[10]);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
initVol = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
T delta = calcres(XParam.dx, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
int i = memloc(XParam, ix, iy, ib);
initVol = initVol + XModel.evolv.h[i] * delta * delta;
}
}
}
MainLoop(XParam, XForcing, XModel, XModel_g);
TheoryInput = Q / T(1000.0) / T(3600.0) * XParam.endtime;
T SimulatedVolume = T(0.0);
T Infiltration_model = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
T delta = calcres(XParam.dx, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
int i = memloc(XParam, ix, iy, ib);
SimulatedVolume = SimulatedVolume + XModel.evolv.h[i] * delta * delta;
Infiltration_model = Infiltration_model + XModel.hgw[i] * delta * delta;
}
}
}
SimulatedVolume = SimulatedVolume - initVol;
T error = abs(SimulatedVolume - TheoryInput + Infiltration_model);
T modelgood = error / abs(TheoryInput) < 0.05;
//printf("Simulatedvolume: %f , Theory input: %f , Calcultated loss: %f\n", SimulatedVolume, TheoryInput, Infiltration_model);
//log("#####");
return modelgood;
}
template <class T> int TestGradientSpeed(Param XParam, Model<T> XModel, Model<T> XModel_g)
{
//
int fastest = 1;
dim3 blockDim(XParam.blkwidth, XParam.blkwidth, 1);
dim3 gridDim(XParam.nblk, 1, 1);
// for flux reconstruction the loop overlap the right(or top for the y direction) halo
dim3 blockDimX2(XParam.blkwidth + XParam.halowidth * 2, XParam.blkwidth + XParam.halowidth * 2, 1);
// Allocate CUDA events that we'll use for timing
cudaEvent_t startA, startB, startC, startG, startGnew;
cudaEvent_t stopA, stopB, stopC, stopG, stopGnew;
fillHalo(XParam, XModel.blocks, XModel.evolv, XModel.zb);
std::thread t0(&gradientC<T>, XParam, XModel.blocks, XModel.zb, XModel.grad.dzbdx, XModel.grad.dzbdy);
t0.join();
Loop<T> XLoop;
// GPU stuff
XLoop.hugenegval = std::numeric_limits<T>::min();
XLoop.hugeposval = std::numeric_limits<T>::max();
XLoop.epsilon = std::numeric_limits<T>::epsilon();
XLoop.totaltime = 0.0;
XLoop.nextoutputtime = 3600.0;
cudaEventCreate(&startA);
cudaEventCreate(&stopA);
// Record the start event
cudaEventRecord(startA, NULL);
gradient <<< gridDim, blockDim, 0 >>> (XParam.halowidth, XModel_g.blocks.active, XModel_g.blocks.level, (T)XParam.theta, (T)XParam.delta, XModel_g.zb, XModel_g.grad.dzbdx, XModel_g.grad.dzbdy);
CUDA_CHECK(cudaDeviceSynchronize());
// Record the stop event
cudaEventRecord(stopA, NULL);
// Wait for the stop event to complete
cudaEventSynchronize(stopA);
float msecTotalGrad = 0.0f;
cudaEventElapsedTime(&msecTotalGrad, startA, stopA);
cudaEventDestroy(startA);
cudaEventDestroy(stopA);
cudaEventCreate(&startB);
cudaEventCreate(&stopB);
// Record the start event
cudaEventRecord(startB, NULL);
gradientSM <<< gridDim, blockDim >>> (XParam.halowidth, XModel_g.blocks.active, XModel_g.blocks.level, (T)XParam.theta, (T)XParam.delta, XModel_g.zb, XModel_g.grad.dzsdx, XModel_g.grad.dzsdy);
CUDA_CHECK(cudaDeviceSynchronize());
// Record the stop event
cudaEventRecord(stopB, NULL);
// Wait for the stop event to complete
cudaEventSynchronize(stopB);
float msecTotalSM = 0.0f;
cudaEventElapsedTime(&msecTotalSM, startB, stopB);
cudaEventDestroy(startB);
cudaEventDestroy(stopB);
cudaEventCreate(&startC);
cudaEventCreate(&stopC);
// Record the start event
cudaEventRecord(startC, NULL);
gradientSMC <<< gridDim, blockDim >>> (XParam.halowidth, XModel_g.blocks.active, XModel_g.blocks.level, (T)XParam.theta, (T)XParam.delta, XModel_g.zb, XModel_g.grad.dhdx, XModel_g.grad.dhdy);
CUDA_CHECK(cudaDeviceSynchronize());
// Record the stop event
cudaEventRecord(stopC, NULL);
// Wait for the stop event to complete
cudaEventSynchronize(stopC);
float msecTotalSMB = 0.0f;
cudaEventElapsedTime(&msecTotalSMB, startC, stopC);
cudaEventDestroy(startC);
cudaEventDestroy(stopC);
CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dudx, XModel_g.grad.dzbdx);
CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dudy, XModel_g.grad.dzbdy);
CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dzsdx, XModel_g.grad.dzsdx);
CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dzsdy, XModel_g.grad.dzsdy);
CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dhdx, XModel_g.grad.dhdx);
CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dhdy, XModel_g.grad.dhdy);
printf("Runtime : normal=%f, shared mem=%f, SharedmemB=%f in msec\n", msecTotalGrad, msecTotalSM, msecTotalSMB);
/*
creatncfileBUQ(XParam, XModel.blocks);
std::vector<std::string> varlist = { "zb", "dzbdx", "dzbdy" };
for (int ivar = 0; ivar < varlist.size(); ivar++)
{
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, varlist[ivar], 3, XModel.OutputVarMap[varlist[ivar]], XModel.blocks.outZone[0]);
}
diffArray(XParam, XLoop, XModel.blocks, "SMdx", false, XModel.grad.dzbdx, XModel_g.grad.dzsdx, XModel.time.arrmax, XModel.grad.dzsdx);
diffArray(XParam, XLoop, XModel.blocks, "SMBdx", false, XModel.grad.dzbdx, XModel_g.grad.dhdx, XModel.time.arrmax, XModel.grad.dhdx);
diffArray(XParam, XLoop, XModel.blocks, "SMBdy", false, XModel.grad.dzbdy, XModel_g.grad.dhdy, XModel.time.arrmax, XModel.grad.dhdy);
diffArray(XParam, XLoop, XModel.blocks, "SMdy", false, XModel.grad.dzbdy, XModel_g.grad.dzsdy, XModel.time.arrmax, XModel.grad.dzsdy);
*/
T maxdiffx, maxdiffy;
maxdiffx = T(0.0);
maxdiffy = T(0.0);
T maxdiffsmx, maxdiffsmy;
maxdiffsmx = T(0.0);
maxdiffsmy = T(0.0);
T maxdiffsmbx, maxdiffsmby;
maxdiffsmbx = T(0.0);
maxdiffsmby = T(0.0);
T diffsm, diffsmb;
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
int i = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy, ib);
diffsm = abs(XModel.grad.dzbdx[i] - XModel.grad.dudx[i]);
maxdiffx = max(maxdiffx, diffsm);
diffsm = abs(XModel.grad.dzbdy[i] - XModel.grad.dudy[i]);
maxdiffx = max(maxdiffx, diffsm);
diffsm = abs(XModel.grad.dzbdx[i] - XModel.grad.dzsdx[i]);
maxdiffsmx = max(maxdiffsmx, diffsm);
diffsm = abs(XModel.grad.dzbdy[i] - XModel.grad.dzsdy[i]);
maxdiffsmy = max(maxdiffsmy, diffsm);
diffsm = abs(XModel.grad.dzbdx[i] - XModel.grad.dhdx[i]);
maxdiffsmbx = max(maxdiffsmbx, diffsm);
diffsm = abs(XModel.grad.dzbdy[i] - XModel.grad.dhdy[i]);
maxdiffsmby = max(maxdiffsmby, diffsm);
//
}
}
}
printf("max error : normx=%e, normy=%e, smx=%e, smy=%e, smbx=%e, smby=%e in m\n", maxdiffx, maxdiffy, maxdiffsmx, maxdiffsmy, maxdiffsmbx, maxdiffsmby);
gradientCPU(XParam, XModel.blocks, XModel.evolv, XModel.grad, XModel.zb);
cudaEventCreate(&startG);
cudaEventCreate(&stopG);
cudaEventRecord(startG, NULL);
gradientGPU(XParam, XModel_g.blocks, XModel_g.evolv, XModel_g.grad, XModel_g.zb);
cudaEventRecord(stopG, NULL);
// Wait for the stop event to complete
cudaEventSynchronize(stopG);
float msecTotalG = 0.0f;
cudaEventElapsedTime(&msecTotalG, startG, stopG);
cudaEventDestroy(startG);
cudaEventDestroy(stopG);
CompareCPUvsGPU(XParam, XModel, XModel_g, { "dhdx","dhdy", "dzsdx","dzsdy","dudx","dudy","dvdx","dvdy" }, true);
//CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dzbdx, XModel_g.grad.dzbdx);
//CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dzbdy, XModel_g.grad.dzbdy);
//CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dzsdx, XModel_g.grad.dzsdx);
//CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dzsdy, XModel_g.grad.dzsdy);
//CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dhdx, XModel_g.grad.dhdx);
//CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.grad.dhdy, XModel_g.grad.dhdy);
cudaEventCreate(&startGnew);
cudaEventCreate(&stopGnew);
cudaEventRecord(startGnew, NULL);
gradientGPUnew(XParam, XModel_g.blocks, XModel_g.evolv, XModel_g.grad, XModel_g.zb);
cudaEventRecord(stopGnew, NULL);
// Wait for the stop event to complete
cudaEventSynchronize(stopGnew);
float msecTotalGnew = 0.0f;
cudaEventElapsedTime(&msecTotalGnew, startGnew, stopGnew);
cudaEventDestroy(startGnew);
cudaEventDestroy(stopGnew);
CompareCPUvsGPU(XParam, XModel, XModel_g, { "dhdx","dhdy", "dzsdx","dzsdy","dudx","dudy","dvdx","dvdy" }, true);
printf("Runtime : old gradient=%f, new Gradient=%f in msec\n", msecTotalG, msecTotalGnew);
return fastest;
}
template <class T> bool TestHaloSpeed(Param XParam, Model<T> XModel, Model<T> XModel_g)
{
Forcing<float> XForcing;
XForcing = MakValleyBathy(XParam, T(0.4), true, true);
float maxtopo = std::numeric_limits<float>::min();
float mintopo = std::numeric_limits<float>::max();
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
maxtopo = max(XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx], maxtopo);
mintopo = min(XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx], mintopo);
}
}
// Overrule whatever may be set in the param file
XParam.xmax = XForcing.Bathy[0].xmax;
XParam.ymax = XForcing.Bathy[0].ymax;
XParam.xo = XForcing.Bathy[0].xo;
XParam.yo = XForcing.Bathy[0].yo;
XParam.dx = XForcing.Bathy[0].dx;
XParam.zsinit = mintopo + 0.5;// Had a small amount of water to avoid a huge first step that would surely break the setup
XParam.endtime = 20.0;
XParam.outputtimestep = XParam.endtime;
XParam.minlevel = 0;
XParam.maxlevel = 1;
XParam.initlevel = 0;
//coarse to fine
// Change arg 1 and 2 if the slope is changed
XParam.AdaptCrit = "Inrange";
XParam.Adapt_arg1 = "0.0";
XParam.Adapt_arg2 = "2.0";
XParam.Adapt_arg3 = "zb";
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
// Copy zs from CPU to GPU ... again
CopytoGPU(XParam.nblkmem, XParam.blksize, XModel.evolv.zs, XModel_g.evolv_o.zs);
CopytoGPU(XParam.nblkmem, XParam.blksize, XModel.evolv.zs, XModel_g.evolv.zs);
cudaStream_t streams[2];
CUDA_CHECK(cudaStreamCreate(&streams[0]));
CUDA_CHECK(cudaStreamCreate(&streams[1]));
fillHaloC(XParam, XModel.blocks, XModel.evolv.zs);
fillHaloGPU(XParam, XModel_g.blocks, streams[0], XModel_g.evolv.zs);
fillHaloGPUnew(XParam, XModel_g.blocks, streams[1], XModel_g.evolv_o.zs);
CUDA_CHECK(cudaDeviceSynchronize());
cudaStreamDestroy(streams[0]);
cudaStreamDestroy(streams[1]);
//CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.evolv.u, XModel_g.evolv.zs);
//CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, XModel.evolv.v, XModel_g.evolvo.zs);
diffArray(XParam, XModel.blocks, "GPU_old", true, XModel.evolv.zs, XModel_g.evolv.zs, XModel.evolv.u, XModel.evolv_o.u);
diffArray(XParam, XModel.blocks, "GPU_new", true, XModel.evolv.zs, XModel_g.evolv_o.zs, XModel.evolv.v, XModel.evolv_o.v);
return true;
}
template <class T> int TestInstability(Param XParam, Model<T> XModel, Model<T> XModel_g)
{
Forcing<float> XForcing;
XForcing = MakValleyBathy(XParam, T(0.4), true, true);
XParam.conserveElevation = true;
float maxtopo = std::numeric_limits<float>::min();
float mintopo = std::numeric_limits<float>::max();
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
maxtopo = max(XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx], maxtopo);
mintopo = min(XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx], mintopo);
}
}
// Overrule whatever may be set in the param file
XParam.xmax = XForcing.Bathy[0].xmax;
XParam.ymax = XForcing.Bathy[0].ymax;
XParam.xo = XForcing.Bathy[0].xo;
XParam.yo = XForcing.Bathy[0].yo;
XParam.dx = XForcing.Bathy[0].dx;
XParam.zsinit = mintopo + 6.9;// Had a water level so that the wet and dry affects the
XParam.endtime = 20.0;
XParam.outputtimestep = XParam.endtime;
XParam.minlevel = 0;
XParam.maxlevel = 2;
XParam.initlevel = 0;
// coarse to fine
// Change arg 1 and 2 if the slope is changed
XParam.AdaptCrit = "Targetlevel";
XParam.Adapt_arg1 = "";
XParam.Adapt_arg2 = "";
XParam.Adapt_arg3 = "";
StaticForcingP<int> targetlevel;
XForcing.targetadapt.push_back(targetlevel);
XForcing.targetadapt[0].xo = 0.0;
XForcing.targetadapt[0].yo = 0.0;
XForcing.targetadapt[0].xmax = 31.0;
XForcing.targetadapt[0].ymax = 31.0;
XForcing.targetadapt[0].nx = 32;
XForcing.targetadapt[0].ny = 32;
XForcing.targetadapt[0].dx = 1.0;
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.targetadapt[0].val);
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
XForcing.targetadapt[0].val[i + j * XForcing.Bathy[0].nx] = 1;
}
}
XForcing.targetadapt[0].val[12 + 12 * XForcing.Bathy[0].nx] = 2;
// Setup Model(s)
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
// Run first full step (i.e. 2 half steps)
Loop<T> XLoop = InitLoop(XParam, XModel);
//FlowCPU(XParam, XLoop, XForcing, XModel);
HalfStepCPU(XParam, XLoop, XForcing, XModel);
T maxu = std::numeric_limits<float>::min();
T maxv = std::numeric_limits<float>::min();
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
int i = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy, ib);
maxu = max(maxu, abs(XModel.evolv.u[i]));
maxv = max(maxv, abs(XModel.evolv.v[i]));
}
}
}
bool test = false;
if (maxu > T(std::numeric_limits<T>::epsilon() * T(1000.0)) || maxv > T(std::numeric_limits<T>::epsilon() * T(1000.0)))
{
//test = true;
XParam.outvars = { "zb","h","zs","u","v","Fqux","Fqvx","Fquy","Fqvy", "Fhu", "Fhv", "dh", "dhu", "dhv", "Su", "Sv","dhdx", "dhdy", "dzsdx", "dzsdy" };
InitSave2Netcdf(XParam, XModel);
}
else
{
test = true;
}
return test;
}
//TestMultiBathyRough(int gpu, T ref, int scenario)
template <class T> bool TestMultiBathyRough(int gpu, T ref, int scenario)
{
T Z0 = ref + 0.0;
T Z1 = ref + 2.0;
T R0 = 0.000001;
T R1 = 0.1;
T IL = 8.6;
T CL = 7.2;
T eps;
int NX = 21;
int NY = 21;
double* xz;
double* yz;
double* map;
Param XParam;
Forcing<float> XForcing;
Model<float> XModel;
Model<float> XModel_g;
char* name_file_R1;
//Creation of a Bathy file
xz = (double*)malloc(sizeof(double) * NX);
yz = (double*)malloc(sizeof(double) * NY);
for (int i = 0; i < NX; i++) { xz[i] = -1.0 + 0.1 * i; }
for (int j = 0; j < NY; j++) { yz[j] = -1.0 + 0.1 * j; }
map = (double*)malloc(sizeof(double) * NY * NX);
for (int j = 0; j < NY; j++)
{
for (int i = 0; i < NX; i++)
{
map[j * NX + i] = Z0; //+ (yz[j] + 1) * 0.5;
}
}
create2dnc("Z0_map.nc", NX, NY, xz, yz, map, "z");
//Creation of a smaller Bathy file
//xz = (double*)malloc(sizeof(double) * NX);
//yz = (double*)malloc(sizeof(double) * NY);
for (int i = 0; i < NX; i++) { xz[i] = 0.0 + 0.05 * i; }
for (int j = 0; j < NY; j++) { yz[j] = -1.0 + 0.05 * j; }
//map = (double*)malloc(sizeof(double) * NY * NX);
//Create the Losses forcing:
for (int j = 0; j < NY; j++)
{
for (int i = 0; i < NX; i++)
{
map[j * NX + i] = Z1; // -(yz[j] + 1) * 0.5;
}
}
create2dnc("Z1_map.nc", NX, NY, xz, yz, map, "z");
//Creation of a roughness file
//xz = (double*)malloc(sizeof(double) * NX);
//yz = (double*)malloc(sizeof(double) * NY);
for (int i = 0; i < NX; i++) { xz[i] = -1.0 + 0.1 * i; }
for (int j = 0; j < NY; j++) { yz[j] = -1.0 + 0.1 * j; }
//map = (double*)malloc(sizeof(double) * NY * NX);
for (int j = 0; j < NY; j++)
{
for (int i = 0; i < NX; i++)
{
map[j * NX + i] = R0;
}
}
create2dnc("R0_map.nc", NX, NY, xz, yz, map, "z0");
//Creation of a smaller Roughness file
//xz = (double*)malloc(sizeof(double) * NX);
//yz = (double*)malloc(sizeof(double) * NY);
for (int i = 0; i < NX; i++) { xz[i] = 0.0 + 0.05 * i; }
for (int j = 0; j < NY; j++) { yz[j] = 0.0 + 0.05 * j; }
//map = (double*)malloc(sizeof(double) * NY * NX);
//Create the Losses forcing:
for (int j = 0; j < NY; j++)
{
for (int i = 0; i < NX; i++)
{
map[j * NX + i] = R1;
}
}
if (scenario < 0.5)
{
name_file_R1 = "R1_map.nc";
}
else
{
name_file_R1 = "1R_map.nc";
}
create2dnc(name_file_R1, NX, NY, xz, yz, map, "z0");
//Creation of a refinement file
//xz = (double*)malloc(sizeof(double) * NX);
//yz = (double*)malloc(sizeof(double) * NY);
for (int i = 0; i < NX; i++) { xz[i] = -1.0 + 0.1 * i; }
for (int j = 0; j < NY; j++) { yz[j] = -1.0 + 0.1 * j; }
//map = (double*)malloc(sizeof(double) * NY * NX);
for (int j = 0; j < NY; j++)
{
for (int i = 0; i < NX; i++)
{
map[j * NX + i] = 0;
if ((abs(xz[i]) < 0.5) && (abs(yz[j]) < 0.5))
{
map[j * NX + i] = 1;
}
}
}
create2dnc("refinement.nc", NX, NY, xz, yz, map, "z");
/*// Creation of a rain fall file
std::ofstream rain_file(
"rainTest13.txt", std::ios_base::out | std::ios_base::trunc);
rain_file << "0.000000\t10.00" << std::endl;
rain_file << "1000.000\t10.00" << std::endl;
rain_file.close();*/
// Creation of BG_param_test13.txt file
std::ofstream param_file(
"BG_param_test13.txt", std::ios_base::out | std::ios_base::trunc);
//Add Bathymetries to the file
param_file << "bathy = Z0_map.nc?z ;" << std::endl;
param_file << "bathy = Z1_map.nc?z ;" << std::endl;
//Add Roughness to the file
if (scenario > 1.5)
{
R1 = 3.56;
param_file << "cfmap = " << R1 << std::endl;
R0 = 3.56;
}
else
{
param_file << "cfmap = R0_map.nc?z0 ;" << std::endl;
param_file << "cfmap = " << name_file_R1 << "?z0 ;" << std::endl;
}
//param_file << "cfmap = R1_map.nc?z0 ;" << std::endl;
param_file << "frictionmodel=1 ;" << std::endl;
//Add refinement to the file
param_file << "Adaptation = Targetlevel,refinement.nc?z ;" << std::endl;
param_file << "initlevel = 0; " << std::endl;
param_file << "maxlevel = 1; " << std::endl;
param_file << "minlevel = 0; " << std::endl;
//Add River forcing
//param_file << "rainfile = rainTest13.txt ;" << std::endl;
//Add endtime and outputvar
param_file << "endtime = 10.0 ;" << std::endl;
param_file << "outvars = zs,h,u,v,zb,cf;" << std::endl;
param_file << "dx = 0.01;" << std::endl;
param_file << "zsinit = 0.1;" << std::endl;
param_file << "smallnc = 0;" << std::endl;
param_file << "doubleprecision = 1;" << std::endl;
if (scenario > 2.5)
{
param_file << "il = " << IL << std::endl;
param_file << "cl = " << CL << std::endl;
}
param_file.close();
//read param file
Readparamfile(XParam, XForcing, "BG_param_test13.txt"); // "BG_param_test13.txt");
//readforcing
readforcing(XParam, XForcing);
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
// Run first full step (i.e. 2 half steps)
//Loop<T> XLoop = InitLoop(XParam, XModel);
MainLoop(XParam, XForcing, XModel, XModel_g);
//if XModel.cf[0]
// XModel.zb
T maxz = T(-1.0) * std::numeric_limits<float>::max();
T minz = std::numeric_limits<float>::max();
T maxr = T(-1.0) * std::numeric_limits<float>::max();
T minr = std::numeric_limits<float>::max();
printf("min float=%f\n", std::numeric_limits<float>::min());
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
int i = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy, ib);
maxz = max(maxz, abs(XModel.zb[i]));
minz = min(minz, abs(XModel.zb[i]));
maxr = max(maxr, abs(XModel.cf[i]));
minr = min(minr, abs(XModel.cf[i]));
}
}
}
bool result = false;
eps = 0.0000001;
if ((abs(maxz - Z1) < eps) && (abs(maxr - R1) < eps) && (abs(minz - Z0) < eps) && (abs(minr - R0) < eps))
{
result = true;
}
printf("\t\n");
printf("\t\tZ max forced : %f, Z max obs : %f\n ", Z1, maxz);
printf("\t\tR max forced : %f, R max obs: %f\n", R1, maxr);
printf("\t\tZ min forced : %f, Z min obs: %f\n", Z0, minz);
printf("\t\tR min forced : %f, R min obs : %f\n ", R0, minr);
if (scenario > 2.5)
{
T maxil = T(-1.0) * std::numeric_limits<float>::max();
T minil = std::numeric_limits<float>::max();
T maxcl = T(-1.0) * std::numeric_limits<float>::max();
T mincl = std::numeric_limits<float>::max();
//printf("min float=%f\n", std::numeric_limits<float>::min());
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
int i = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy, ib);
maxil = max(maxil, abs(XModel.il[i]));
minil = min(minil, abs(XModel.il[i]));
maxcl = max(maxcl, abs(XModel.cl[i]));
mincl = min(mincl, abs(XModel.cl[i]));
}
}
}
bool result = false;
eps = 0.0000001;
// IL is expected here to be value when dry and 0 where wet at the begining of the computation
if ((abs(maxil - IL) < eps) && (abs(maxcl - CL) < eps) && (abs(minil - T(0.0)) < eps) && (abs(mincl - CL) < eps))
{
result = true;
}
}
return result;
}
//TestFlexibleOutputTimes(int gpu, T ref, int scenario)
template <class T> bool TestFlexibleOutputTimes(int gpu, T ref, int scenario)
{
T Z0 = ref + 0.0;
T Z1 = ref + 2.0;
T eps;
int NX = 21;
int NY = 21;
double* xz;
double* yz;
double* map;
Param XParam;
Forcing<float> XForcing;
Model<float> XModel;
Model<float> XModel_g;
char* name_file_R1;
//Creation of a Bathy file
xz = (double*)malloc(sizeof(double) * NX);
yz = (double*)malloc(sizeof(double) * NY);
for (int i = 0; i < NX; i++) { xz[i] = -1.0 + 0.1 * i; }
for (int j = 0; j < NY; j++) { yz[j] = -1.0 + 0.1 * j; }
map = (double*)malloc(sizeof(double) * NY * NX);
for (int j = 0; j < NY; j++)
{
for (int i = 0; i < NX; i++)
{
map[j * NX + i] = Z0; //+ (yz[j] + 1) * 0.5;
}
}
create2dnc("Z0_map.nc", NX, NY, xz, yz, map, "z");
// Creation of BG_param_test13.txt file
std::ofstream param_file(
"BG_param_test15.txt", std::ios_base::out | std::ios_base::trunc);
//Add Bathymetries to the file
param_file << "bathy = Z0_map.nc?z ;" << std::endl;
//Add endtime and outputvar
param_file << "endtime = 11.0 ;" << std::endl;
param_file << "reftime = 2020-01-01T00:00:00 ;" << std::endl;
param_file << "outvars = zs,h,u,v,zb;" << std::endl;
param_file << "dx = 0.05;" << std::endl;
param_file << "zsinit = 0.1;" << std::endl;
param_file << "smallnc = 0;" << std::endl;
param_file << "doubleprecision = 1;" << std::endl;
param_file << "Toutput = 1|2|5, 2020-01-01T00:00:08, 9.5;" << std::endl;
param_file << "outzone = Test15_zoom1.nc,0.2,0.6,-0.2,0.2, 2020-01-01T00:00:02|0.008min|2020-01-01T00:00:03, 5.6,6.9;" << std::endl;
param_file << "outzone = Test15_zoom2.nc,0.2,0.6,-0.2,0.2, 8.1|0.7|, 5.6;" << std::endl;
param_file << "outzone = Test15_zoom3.nc,0.2,0.6,-0.2,0.2, |0.8|2;" << std::endl;
param_file << "outzone = Test15_zoom4.nc,0.2,0.6,-0.2,0.2, 8.2||9;" << std::endl; // Here the step in not given so assumed infinite
param_file << "outzone = Test15_zoom5.nc,0.2,0.6,-0.2,0.2;" << std::endl;
param_file.close();
//read param file
Readparamfile(XParam, XForcing, "BG_param_test15.txt"); // "BG_param_test13.txt");
//readforcing
readforcing(XParam, XForcing);
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
// Run first full step (i.e. 2 half steps)
//Loop<T> XLoop = InitLoop(XParam, XModel);
//MainLoop(XParam, XForcing, XModel, XModel_g);
//TEST 1: reading and default values check:
bool result = false;
if (XModel.OutputT.size()==20)
{
result = true;
}
/*
if (!XParam.Toutput.end == 11.0)
result = false;
if (!XParam.Toutput.val[1] == 9.5)
result = false;
if (!XParam.outzone[2].Toutput.init == 0.0)
result = false;
if (!XParam.outzone[3].Toutput.tstep == 11.0)
result = false;
if (!XParam.outzone[4].Toutput.tstep == 1.0)
result = false;
*/
return result;
}
template <class T> void TestFirsthalfstep(Param XParam, Forcing<float> XForcing, Model<T> XModel, Model<T> XModel_g)
{
// Setup Model(s)
XParam.outvars = { "zb","h","zs","u","v","Fqux","Fqvx","Fquy","Fqvy", "Fhu", "Fhv", "dh", "dhu", "dhv", "Su", "Sv","dhdx", "dhdy", "dzsdx", "dzsdy", "dzbdx", "dzbdy" };
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
// Run first full step (i.e. 2 half steps)
Loop<T> XLoop = InitLoop(XParam, XModel);
if (XParam.GPUDEVICE < 0)
{
//FlowCPU(XParam, XLoop, XForcing, XModel);
HalfStepCPU(XParam, XLoop, XForcing, XModel);
}
else
{
HalfStepGPU(XParam, XLoop, XForcing, XModel_g);
for (int ivar = 0; ivar < XParam.outvars.size(); ivar++)
{
CUDA_CHECK(cudaMemcpy(XModel.OutputVarMap[XParam.outvars[ivar]], XModel_g.OutputVarMap[XParam.outvars[ivar]], XParam.nblkmem * XParam.blksize * sizeof(T), cudaMemcpyDeviceToHost));
}
}
T maxu = std::numeric_limits<float>::min();
T maxv = std::numeric_limits<float>::min();
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
int i = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy, ib);
maxu = max(maxu, abs(XModel.evolv.u[i]));
maxv = max(maxv, abs(XModel.evolv.v[i]));
}
}
}
bool test = false;
//test = true;
InitSave2Netcdf(XParam, XModel);
}
template <class T> void Testzbinit(Param XParam, Forcing<float> XForcing, Model<T> XModel, Model<T> XModel_g)
{
// Setup Model(s)
XParam.outvars = { "zb","u","v","Fqux","Fqvx","Fquy","Fqvy", "Fhu", "Fhv", "dh", "dhu", "dhv", "Su", "Sv","dhdx", "dhdy", "dzsdx", "dzsdy", "dzbdx", "dzbdy" };
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
// Run first full step (i.e. 2 half steps)
Loop<T> XLoop = InitLoop(XParam, XModel);
//FlowCPU(XParam, XLoop, XForcing, XModel);
//HalfStepCPU(XParam, XLoop, XForcing, XModel);
if (XParam.conserveElevation)
{
refine_linear(XParam, XModel.blocks, XModel.zb, XModel.grad.dzbdx, XModel.grad.dzbdy);
}
//HalfStepGPU(XParam, XLoop, XForcing, XModel_g);
if (XParam.conserveElevation)
{
refine_linearGPU(XParam, XModel_g.blocks, XModel_g.zb, XModel_g.grad.dzbdx, XModel_g.grad.dzbdy);
}
//for (int ivar = 0; ivar < XParam.outvars.size(); ivar++)
//{
// CUDA_CHECK(cudaMemcpy(XModel.OutputVarMap[XParam.outvars[ivar]], XModel_g.OutputVarMap[XParam.outvars[ivar]], XParam.nblkmem * XParam.blksize * sizeof(T), cudaMemcpyDeviceToHost));
//}
CompareCPUvsGPU(XParam, XModel, XModel_g, XParam.outvars, true);
//T maxu = std::numeric_limits<float>::min();
//T maxv = std::numeric_limits<float>::min();
/*
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
int i = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy, ib);
maxu = max(maxu, abs(XModel.evolv.u[i]));
maxv = max(maxv, abs(XModel.evolv.v[i]));
}
}
}
*/
bool test = false;
//test = true;
//InitSave2Netcdf(XParam, XModel);
}
template <class T> int TestAIObnd(Param XParam, Model<T> XModel, Model<T> XModel_g, bool bottop, bool flip, bool withaoi)
{
Forcing<float> XForcing;
XForcing = MakValleyBathy(XParam, T(0.4), bottop, flip);
XParam.conserveElevation = true;
float maxtopo = std::numeric_limits<float>::min();
float mintopo = std::numeric_limits<float>::max();
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
maxtopo = max(XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx], maxtopo);
mintopo = min(XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx], mintopo);
}
}
// Overrule whatever may be set in the param file
XParam.xmax = XForcing.Bathy[0].xmax;
XParam.ymax = XForcing.Bathy[0].ymax;
XParam.xo = XForcing.Bathy[0].xo;
XParam.yo = XForcing.Bathy[0].yo;
XParam.dx = XForcing.Bathy[0].dx;
//XParam.zsinit = mintopo - 6.9;// Had a water level so that the wet and dry affects the
XParam.zsinit = mintopo - 9.9;// Had a water level so that the wet and dry affects the
XParam.endtime = 20.0;
XParam.dtmin = 0.00000001;
XParam.aoibnd = 0;
XParam.outputtimestep = XParam.endtime;
std::ofstream aoi_file(
"testaoi.tmp", std::ios_base::out | std::ios_base::trunc);
aoi_file << "5.0 3.0" << std::endl;
aoi_file << "27.0 3.0" << std::endl;
aoi_file << "27.0 27.0" << std::endl;
aoi_file << "5.0 27.0" << std::endl;
aoi_file << "5.0 3.0" << std::endl;
aoi_file.close(); //destructor implicitly does it
/*
std::ofstream aoi_file(
"testaoi.tmp", std::ios_base::out | std::ios_base::trunc);
aoi_file << "-5.0 -3.0" << std::endl;
aoi_file << "27.0 -3.0" << std::endl;
aoi_file << "27.0 270.0" << std::endl;
aoi_file << "-5.0 270.0" << std::endl;
aoi_file << "-5.0 -3.0" << std::endl;
aoi_file.close(); //destructor implicitly does it
*/
if (withaoi)
{
XForcing.AOI.file = "testaoi.tmp";
XForcing.AOI.active = true;
XForcing.AOI.poly = readPolygon(XForcing.AOI.file);
}
/*
if (bottop==false && flip==false)
{
XForcing.left.type = 0;
}
if (bottop == false && flip == true)
{
XForcing.right.type = 0;
}
if (bottop == true && flip == false)
{
XForcing.bot.type = 0;
}
if (bottop == true && flip == true)
{
XForcing.top.type = 0;
}
*/
XParam.minlevel = 3;
XParam.maxlevel = 3;
XParam.initlevel = 3;
/*
// coarse to fine
// Change arg 1 and 2 if the slope is changed
XParam.AdaptCrit = "Targetlevel";
XParam.Adapt_arg1 = "";
XParam.Adapt_arg2 = "";
XParam.Adapt_arg3 = "";
StaticForcingP<int> targetlevel;
XForcing.targetadapt.push_back(targetlevel);
XForcing.targetadapt[0].xo = 0.0;
XForcing.targetadapt[0].yo = 0.0;
XForcing.targetadapt[0].xmax = 31.0;
XForcing.targetadapt[0].ymax = 31.0;
XForcing.targetadapt[0].nx = 32;
XForcing.targetadapt[0].ny = 32;
XForcing.targetadapt[0].dx = 1.0;
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.targetadapt[0].val);
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
XForcing.targetadapt[0].val[i + j * XForcing.Bathy[0].nx] = 1;
}
}
XForcing.targetadapt[0].val[12 + 12 * XForcing.Bathy[0].nx] = 2;
*/
// Add rain forcing
//Create a temporary file with river fluxes
float Q = 1;
std::ofstream river_file(
"testriver.tmp", std::ios_base::out | std::ios_base::trunc);
river_file << "0.0 " + std::to_string(Q) << std::endl;
river_file << "3600.0 " + std::to_string(Q) << std::endl;
river_file.close(); //destructor implicitly does it
River thisriver;
thisriver.Riverflowfile = "testriver.tmp";
thisriver.xstart = 10;
thisriver.xend = 12;
thisriver.ystart = 10;
thisriver.yend = 12;
XForcing.rivers.push_back(thisriver);
XForcing.rivers[0].flowinput = readFlowfile(XForcing.rivers[0].Riverflowfile, XParam.reftime);
// Setup Model(s)
checkparamsanity(XParam, XForcing);
InitMesh(XParam, XForcing, XModel);
InitialConditions(XParam, XForcing, XModel);
InitialAdaptation(XParam, XForcing, XModel);
SetupGPU(XParam, XModel, XForcing, XModel_g);
T initVol = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
T delta = calcres(XParam.delta, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
int i = memloc(XParam, ix, iy, ib);
initVol = initVol + XModel.evolv.h[i] * delta * delta;
}
}
}
MainLoop(XParam, XForcing, XModel, XModel_g);
T TheoryInput = Q * XParam.endtime;
T SimulatedVolume = T(0.0);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XModel.blocks.active[ibl];
T delta = calcres(XParam.delta, XModel.blocks.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < (XParam.blkwidth); ix++)
{
int i = memloc(XParam, ix, iy, ib);
SimulatedVolume = SimulatedVolume + XModel.evolv.h[i] * delta * delta;
}
}
}
SimulatedVolume = SimulatedVolume - initVol;
T error = abs(SimulatedVolume - TheoryInput);
int modelgood = error / TheoryInput < 0.001;
printf("\nSim Vol = %f, theory=%f, Error = %f, (%f %%) \n", SimulatedVolume, TheoryInput, error, (error / TheoryInput) * 100);
//log("#####");
return modelgood;
}
template <class T> __global__ void vectoroffsetGPU(int nx, T offset, T* z)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < nx)
{
z[idx] = z[idx] + offset;
}
}
template <class T> int TestPinMem(Param XParam, Model<T> XModel, Model<T> XModel_g)
{
T* zf, *zf_g, * zf_recov;
int nx = 32;
int ny = 1;
int nelem = nx * ny;
AllocateMappedMemCPU(nx, ny, XParam.GPUDEVICE, zf);
AllocateCPU(nx, ny, zf_recov);
for (int i = 0; i < nx; i++)
{
for (int j = 0; j < ny; j++)
{
zf[i + j * nx] = i + j * nx +T(0.25);
}
}
T checkrem = T(0.0);
if (XParam.GPUDEVICE >= 0)
{
AllocateMappedMemGPU(nx, ny, XParam.GPUDEVICE, zf_g, zf);
dim3 block(16);
dim3 grid((unsigned int)ceil(nelem / (float)block.x));
vectoroffsetGPU <<<grid, block >>> (nelem, T(1.0), zf_g);
CUDA_CHECK(cudaDeviceSynchronize());
CUDA_CHECK(cudaMemcpy(zf_recov, zf_g, nx * ny * sizeof(T), cudaMemcpyDeviceToHost));
for (int i = 0; i < nx; i++)
{
for (int j = 0; j < ny; j++)
{
checkrem = checkrem + abs(zf[i + j * nx] - zf_recov[i + j * nx]);
}
}
}
int modelgood = checkrem < 1.e-6f;
if (checkrem > 1.e-6f)
{
printf("\n Test Failed error = %e \n", checkrem);
return modelgood;
}
else
{
printf("\n Test Success error = %e \n", checkrem);
}
return modelgood;
}
template int TestPinMem<float>(Param XParam, Model<float> XModel, Model<float> XModel_g);
template int TestPinMem<double>(Param XParam, Model<double> XModel, Model<double> XModel_g);
template <class T> Forcing<float> MakValleyBathy(Param XParam, T slope, bool bottop, bool flip)
{
//
Forcing<float> XForcing;
StaticForcingP<float> bathy;
float* dummybathy;
XForcing.Bathy.push_back(bathy);
XForcing.Bathy[0].xo = 0.0;
XForcing.Bathy[0].yo = 0.0;
XForcing.Bathy[0].xmax = 31.0;
XForcing.Bathy[0].ymax = 31.0;
XForcing.Bathy[0].nx = 32;
XForcing.Bathy[0].ny = 32;
XForcing.Bathy[0].dx = 1.0;
XForcing.Bathy[0].dy = XForcing.Bathy[0].dx;
T x, y;
T center = T(10.5);
AllocateCPU(1, 1, XForcing.left.blks, XForcing.right.blks, XForcing.top.blks, XForcing.bot.blks);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, XForcing.Bathy[0].val);
AllocateCPU(XForcing.Bathy[0].nx, XForcing.Bathy[0].ny, dummybathy);
float maxtopo = std::numeric_limits<float>::min();
float mintopo = std::numeric_limits<float>::max();
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
x = T(XForcing.Bathy[0].xo + i * XForcing.Bathy[0].dx);
y = T(XForcing.Bathy[0].yo + j * XForcing.Bathy[0].dx);
dummybathy[i + j * XForcing.Bathy[0].nx] = float(ValleyBathy(y, x, slope, center));
maxtopo = max(dummybathy[i + j * XForcing.Bathy[0].nx], maxtopo);
}
}
// Make surrounding wall
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
dummybathy[0 + j * XForcing.Bathy[0].nx] = maxtopo + 5.0f;
dummybathy[1 + j * XForcing.Bathy[0].nx] = maxtopo + 5.0f;
dummybathy[j + 0 * XForcing.Bathy[0].nx] = maxtopo + 5.0f;
dummybathy[j + 1 * XForcing.Bathy[0].nx] = maxtopo + 5.0f;
dummybathy[(XForcing.Bathy[0].nx - 1) + j * XForcing.Bathy[0].nx] = maxtopo + 5.0f;
dummybathy[(XForcing.Bathy[0].nx - 2) + j * XForcing.Bathy[0].nx] = maxtopo + 5.0f;
dummybathy[j + (XForcing.Bathy[0].ny - 1) * XForcing.Bathy[0].nx] = maxtopo + 5.0f;
dummybathy[j + (XForcing.Bathy[0].ny - 2) * XForcing.Bathy[0].nx] = maxtopo + 5.0f;
}
// make a specially elevated spot
dummybathy[(XForcing.Bathy[0].nx - 1) + 0 * XForcing.Bathy[0].nx] = maxtopo + 10.0f;
dummybathy[(XForcing.Bathy[0].nx - 2) + 0 * XForcing.Bathy[0].nx] = maxtopo + 10.0f;
dummybathy[(XForcing.Bathy[0].nx - 1) + 1 * XForcing.Bathy[0].nx] = maxtopo + 10.0f;
dummybathy[(XForcing.Bathy[0].nx - 2) + 1 * XForcing.Bathy[0].nx] = maxtopo + 10.0f;
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
mintopo = min(dummybathy[i + j * XForcing.Bathy[0].nx], mintopo);
}
}
// Flip or rotate the bathy according to what is requested
for (int j = 0; j < XForcing.Bathy[0].ny; j++)
{
for (int i = 0; i < XForcing.Bathy[0].nx; i++)
{
if (!flip && !bottop)
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = dummybathy[i + j * XForcing.Bathy[0].nx];
}
else if (flip && !bottop)
{
XForcing.Bathy[0].val[(XForcing.Bathy[0].nx - 1 - i) + j * XForcing.Bathy[0].nx] = dummybathy[i + j * XForcing.Bathy[0].nx];
}
else if (!flip && bottop)
{
XForcing.Bathy[0].val[i + j * XForcing.Bathy[0].nx] = dummybathy[j + i * XForcing.Bathy[0].nx];
}
else if (flip && bottop)
{
XForcing.Bathy[0].val[i + (XForcing.Bathy[0].ny - 1 - j) * XForcing.Bathy[0].nx] = dummybathy[j + i * XForcing.Bathy[0].nx];
}
}
}
free(dummybathy);
return XForcing;
}
void alloc_init2Darray(float** arr, int NX, int NY)
{
int i, j;
//Allocation
arr = (float**)malloc(sizeof(float*) * NX);
for (i = 0; i < NX; i++) {
arr[i] = (float*)malloc(sizeof(float) * NY);
}
//arr = (int **)malloc(sizeof(int *) * NX);
//for (i = 0; i < NX; i++) {
// arr[i] = (int *)malloc(sizeof(int) * NY);
//}
//Initialisation
for (i = 0; i < NX; i++) {
for (j = 0; j < NY; j++) {
arr[i][j] = 0;
}
}
}
void init3Darray(float*** arr, int rows, int cols, int depths)
{
int i, j, k;
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
for (k = 0; k < depths; k++)
{
arr[i][j][k] = 0;
}
}
}
}
template <class T> void fillrandom(Param XParam, BlockP<T> XBlock, T* z)
{
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XBlock.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
//
int n = memloc(XParam, ix, iy, ib);
z[n] = T(rand()) / T(RAND_MAX);
}
}
}
}
template void fillrandom<float>(Param XParam, BlockP<float> XBlock, float* z);
template void fillrandom<double>(Param XParam, BlockP<double> XBlock, double* z);
template <class T> void fillgauss(Param XParam, BlockP<T> XBlock, T amp, T* z)
{
T delta, x, y;
T cc = T(0.05) * (XParam.xmax - XParam.xo);
T xorigin = XParam.xo + T(0.5) * (XParam.xmax - XParam.xo);
T yorigin = XParam.yo + T(0.5) * (XParam.ymax - XParam.yo);
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
//printf("bl=%d\tblockxo[bl]=%f\tblockyo[bl]=%f\n", bl, blockxo[bl], blockyo[bl]);
int ib = XBlock.active[ibl];
delta = calcres(T(XParam.dx), XBlock.level[ib]);
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
//
int n = memloc(XParam, ix, iy, ib);
x = T(XParam.xo + XBlock.xo[ib] + ix * delta);
y = T(XParam.yo + XBlock.yo[ib] + iy * delta);
z[n] = z[n] + amp * exp(T(-1.0) * T(((x - xorigin) * (x - xorigin) + (y - yorigin) * (y - yorigin)) / (2.0 * cc * cc)));
}
}
}
}
template void fillgauss<float>(Param XParam, BlockP<float> XBlock, float amp, float* z);
template void fillgauss<double>(Param XParam, BlockP<double> XBlock, double amp, double* z);
template <class T>
void TestingOutput(Param XParam, Model<T> XModel)
{
std::string outvar;
Loop<T> XLoop;
// GPU stuff
XLoop.hugenegval = std::numeric_limits<T>::min();
XLoop.hugeposval = std::numeric_limits<T>::max();
XLoop.epsilon = std::numeric_limits<T>::epsilon();
XLoop.totaltime = 0.0;
XLoop.nextoutputtime = 0.2;
Forcing<float> XForcing;
//FlowCPU(XParam, XLoop, XModel);
//log(std::to_string(XForcing.Bathy.val[50]));
creatncfileBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, XModel.blocks.outZone[0]);
outvar = "h";
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, outvar, 3, XModel.OutputVarMap[outvar], XModel.blocks.outZone[0]);
outvar = "u";
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, outvar, 3, XModel.OutputVarMap[outvar], XModel.blocks.outZone[0]);
outvar = "v";
//copyID2var(XParam, XModel.blocks, XModel.OutputVarMap[outvar]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, outvar, 3, XModel.OutputVarMap[outvar], XModel.blocks.outZone[0]);
outvar = "zb";
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, outvar, 3, XModel.OutputVarMap[outvar], XModel.blocks.outZone[0]);
outvar = "zs";
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, outvar, 3, XModel.OutputVarMap[outvar], XModel.blocks.outZone[0]);
FlowCPU(XParam, XLoop, XForcing, XModel);
//outvar = "cf";
//defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, outvar, 3, XModel.cf);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "dhdx", 3, XModel.grad.dhdx, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "dhdy", 3, XModel.grad.dhdy, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "Fhv", 3, XModel.flux.Fhv, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "Fhu", 3, XModel.flux.Fhu, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "Fqux", 3, XModel.flux.Fqux, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "Fquy", 3, XModel.flux.Fquy, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "Fqvx", 3, XModel.flux.Fqvx, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "Fqvy", 3, XModel.flux.Fqvy, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "Su", 3, XModel.flux.Su, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "Sv", 3, XModel.flux.Sv, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "dh", 3, XModel.adv.dh, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "dhu", 3, XModel.adv.dhu, XModel.blocks.outZone[0]);
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, "dhv", 3, XModel.adv.dhv, XModel.blocks.outZone[0]);
writenctimestep(XParam.outfile, XLoop.totaltime + XLoop.dt);
outvar = "h";
writencvarstepBUQ(XParam, 3, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, outvar, XModel.OutputVarMap[outvar], XModel.blocks.outZone[0]);
outvar = "zs";
writencvarstepBUQ(XParam, 3, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, outvar, XModel.OutputVarMap[outvar], XModel.blocks.outZone[0]);
outvar = "u";
writencvarstepBUQ(XParam, 3, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, outvar, XModel.OutputVarMap[outvar], XModel.blocks.outZone[0]);
outvar = "v";
writencvarstepBUQ(XParam, 3, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, outvar, XModel.OutputVarMap[outvar], XModel.blocks.outZone[0]);
}
template void TestingOutput<float>(Param XParam, Model<float> XModel);
template void TestingOutput<double>(Param XParam, Model<double> XModel);
template <class T> void copyID2var(Param XParam, BlockP<T> XBlock, T* z)
{
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XBlock.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
int n = memloc(XParam, ix, iy, ib);
z[n] = T(ib);
}
}
}
}
template void copyID2var<float>(Param XParam, BlockP<float> XBlock, float* z);
template void copyID2var<double>(Param XParam, BlockP<double> XBlock, double* z);
template <class T> void copyBlockinfo2var(Param XParam, BlockP<T> XBlock, int* blkinfo, T* z)
{
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
int ib = XBlock.active[ibl];
int info = blkinfo[ib];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
int n = memloc(XParam, ix, iy, ib);
z[n] = T(info);
}
}
}
}
template void copyBlockinfo2var<float>(Param XParam, BlockP<float> XBlock, int* blkinfo, float* z);
template void copyBlockinfo2var<double>(Param XParam, BlockP<double> XBlock, int* blkinfo, double* z);
template <class T> void CompareCPUvsGPU(Param XParam, Model<T> XModel, Model<T> XModel_g, std::vector<std::string> varlist, bool checkhalo)
{
Loop<T> XLoop;
// GPU stuff
XLoop.hugenegval = std::numeric_limits<T>::min();
XLoop.hugeposval = std::numeric_limits<T>::max();
XLoop.epsilon = std::numeric_limits<T>::epsilon();
XLoop.totaltime = 0.0;
XLoop.nextoutputtime = 3600.0;
T* gpureceive;
T* diff;
//Forcing<float> XForcing;
AllocateCPU(XParam.nblkmem, XParam.blksize, gpureceive);
AllocateCPU(XParam.nblkmem, XParam.blksize, diff);
//============================================
// Compare gradients for evolving parameters
// calculate difference
//diffArray(XParam, XLoop, XModel.blocks, XModel.evolv.h, XModel_g.evolv.h, XModel.evolv_o.u);
/*
creatncfileBUQ(XParam, XModel.blocks);
for (int ivar = 0; ivar < varlist.size(); ivar++)
{
defncvarBUQ(XParam, XModel.blocks.active, XModel.blocks.level, XModel.blocks.xo, XModel.blocks.yo, varlist[ivar], 3, XModel.OutputVarMap[varlist[ivar]], XModel.blocks.outZone[0]);
}
*/
/*
std::string varname = "dt";
if (abs(dtgpu - dtcpu) < (XLoop.epsilon * 2))
{
log(varname + " PASS");
}
else
{
log(varname + " FAIL: " + " GPU(" + std::to_string(dtgpu) + ") - CPU("+std::to_string(dtcpu) +") = difference: "+ std::to_string(abs(dtgpu - dtcpu)) + " Eps: " + std::to_string(XLoop.epsilon));
}
*/
//Check variable
for (int ivar = 0; ivar < varlist.size(); ivar++)
{
diffArray(XParam, XModel.blocks, varlist[ivar], checkhalo, XModel.OutputVarMap[varlist[ivar]], XModel_g.OutputVarMap[varlist[ivar]], gpureceive, diff);
}
free(gpureceive);
free(diff);
}
template void CompareCPUvsGPU<float>(Param XParam, Model<float> XModel, Model<float> XModel_g, std::vector<std::string> varlist, bool checkhalo);
template void CompareCPUvsGPU<double>(Param XParam, Model<double> XModel, Model<double> XModel_g, std::vector<std::string> varlist, bool checkhalo);
template <class T> void diffdh(Param XParam, BlockP<T> XBlock, T* input, T* output, T* shuffle)
{
int iright, itop;
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
//printf("bl=%d\tblockxo[bl]=%f\tblockyo[bl]=%f\n", bl, blockxo[bl], blockyo[bl]);
int ib = XBlock.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
int i = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy, ib);
iright = memloc(XParam.halowidth, XParam.blkmemwidth, ix + 1, iy, ib);
itop = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy + 1, ib);
output[i] = input[iright] - input[i];
shuffle[i] = input[iright];
}
}
}
}
template <class T> void diffSource(Param XParam, BlockP<T> XBlock, T* Fqux, T* Su, T* output)
{
int iright, itop;
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
//printf("bl=%d\tblockxo[bl]=%f\tblockyo[bl]=%f\n", bl, blockxo[bl], blockyo[bl]);
int ib = XBlock.active[ibl];
for (int iy = 0; iy < XParam.blkwidth; iy++)
{
for (int ix = 0; ix < XParam.blkwidth; ix++)
{
int i = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy, ib);
iright = memloc(XParam.halowidth, XParam.blkmemwidth, ix + 1, iy, ib);
itop = memloc(XParam.halowidth, XParam.blkmemwidth, ix, iy + 1, ib);
output[i] = Fqux[i] - Su[iright];
//shuffle[i] = input[iright];
}
}
}
}
template <class T> void diffArray(Param XParam, BlockP<T> XBlock, std::string varname, bool checkhalo, T* cpu, T* gpu, T* dummy, T* out)
{
T diff, maxdiff, rmsdiff;
unsigned int nit = 0;
int ixmd, iymd, ibmd;
//copy GPU back to the CPU (store in dummy)
CopyGPUtoCPU(XParam.nblkmem, XParam.blksize, dummy, gpu);
T hugeposval = std::numeric_limits<T>::max();
T hugenegval = T(-1.0) * hugeposval;
T epsilon = std::numeric_limits<T>::epsilon();
rmsdiff = T(0.0);
maxdiff = hugenegval;
ixmd = 0;
iymd = 0;
ibmd = 0;
// calculate difference
for (int ibl = 0; ibl < XParam.nblk; ibl++)
{
//printf("bl=%d\tblockxo[bl]=%f\tblockyo[bl]=%f\n", bl, blockxo[bl], blockyo[bl]);
int ib = XBlock.active[ibl];
int yst = checkhalo ? -1 : 0;
int ynd = checkhalo ? XParam.blkwidth + 1 : XParam.blkwidth;
int xst = checkhalo ? -1 : 0;
int xnd = checkhalo ? XParam.blkwidth + 1 : XParam.blkwidth;
for (int iy = yst; iy < ynd; iy++)
{
for (int ix = xst; ix < xnd; ix++)
{
int n = memloc(XParam, ix, iy, ib);
diff = dummy[n] - cpu[n];
if (abs(diff) >= maxdiff)
{
maxdiff = utils::max(abs(diff), maxdiff);
ixmd = ix;
iymd = iy;
ibmd = ib;
}
rmsdiff = rmsdiff + utils::sq(diff);
nit++;
out[n] = diff;
}
}
}
rmsdiff = rmsdiff / nit;
if (maxdiff <= T(10000.0) * (epsilon))
{
log(varname + " PASS");
}
else
{
creatncfileBUQ(XParam, XBlock);
log(varname + " FAIL: " + " Max difference: " + std::to_string(maxdiff) + " (at: ix = " + std::to_string(ixmd) + " iy = " + std::to_string(iymd) + " ib = " + std::to_string(ibmd) + ") RMS difference: " + std::to_string(rmsdiff) + " Eps: " + std::to_string(epsilon));
defncvarBUQ(XParam, XBlock.active, XBlock.level, XBlock.xo, XBlock.yo, varname + "_CPU", 3, cpu, XBlock.outZone[0]);
defncvarBUQ(XParam, XBlock.active, XBlock.level, XBlock.xo, XBlock.yo, varname + "_GPU", 3, dummy, XBlock.outZone[0]);
defncvarBUQ(XParam, XBlock.active, XBlock.level, XBlock.xo, XBlock.yo, varname + "_diff", 3, out, XBlock.outZone[0]);
}
}