00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <mpi.h>
00006
00007
00013 #ifdef PMGRID
00014 #if !defined (PERIODIC) || defined (PLACEHIGHRESREGION)
00015
00016 #ifdef NOTYPEPREFIX_FFTW
00017 #include <rfftw_mpi.h>
00018 #else
00019 #ifdef DOUBLEPRECISION_FFTW
00020 #include <drfftw_mpi.h>
00021 #else
00022 #include <srfftw_mpi.h>
00023 #endif
00024 #endif
00025
00026 #include "allvars.h"
00027 #include "proto.h"
00028
00029 #define GRID (2*PMGRID)
00030 #define GRID2 (2*(GRID/2 + 1))
00031
00032
00033
00034 static rfftwnd_mpi_plan fft_forward_plan, fft_inverse_plan;
00035
00036 static int slab_to_task[GRID];
00037 static int *slabs_per_task;
00038 static int *first_slab_of_task;
00039
00040 static int *meshmin_list, *meshmax_list;
00041
00042 static int slabstart_x, nslab_x, slabstart_y, nslab_y;
00043
00044 static int fftsize, maxfftsize;
00045
00046 static fftw_real *kernel[2], *rhogrid, *forcegrid, *workspace;
00047 static fftw_complex *fft_of_kernel[2], *fft_of_rhogrid;
00048
00058 void pm_init_regionsize(void)
00059 {
00060 double meshinner[2], xmin[2][3], xmax[2][3];
00061 int i, j, t;
00062
00063
00064
00065 for(j = 0; j < 3; j++)
00066 {
00067 xmin[0][j] = xmin[1][j] = 1.0e36;
00068 xmax[0][j] = xmax[1][j] = -1.0e36;
00069 }
00070
00071 for(i = 0; i < NumPart; i++)
00072 for(j = 0; j < 3; j++)
00073 {
00074 t = 0;
00075 #ifdef PLACEHIGHRESREGION
00076 if(((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00077 t = 1;
00078 #endif
00079 if(P[i].Pos[j] > xmax[t][j])
00080 xmax[t][j] = P[i].Pos[j];
00081 if(P[i].Pos[j] < xmin[t][j])
00082 xmin[t][j] = P[i].Pos[j];
00083 }
00084
00085 MPI_Allreduce(xmin, All.Xmintot, 6, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
00086 MPI_Allreduce(xmax, All.Xmaxtot, 6, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
00087
00088 for(j = 0; j < 2; j++)
00089 {
00090 All.TotalMeshSize[j] = All.Xmaxtot[j][0] - All.Xmintot[j][0];
00091 All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][1] - All.Xmintot[j][1]);
00092 All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][2] - All.Xmintot[j][2]);
00093 #ifdef ENLARGEREGION
00094 All.TotalMeshSize[j] *= ENLARGEREGION;
00095 #endif
00096
00097
00098 for(i = 0; i < 3; i++)
00099 {
00100 All.Xmintot[j][i] = (All.Xmintot[j][i] + All.Xmaxtot[j][i]) / 2 - All.TotalMeshSize[j] / 2;
00101 All.Xmaxtot[j][i] = All.Xmintot[j][i] + All.TotalMeshSize[j];
00102 }
00103 }
00104
00105
00106
00107
00108 for(j = 0; j < 2; j++)
00109 {
00110 meshinner[j] = All.TotalMeshSize[j];
00111 All.TotalMeshSize[j] *= 2.001 * (GRID) / ((double) (GRID - 2 - 8));
00112 }
00113
00114
00115
00116 for(j = 0; j < 2; j++)
00117 for(i = 0; i < 3; i++)
00118 {
00119 All.Corner[j][i] = All.Xmintot[j][i] - 2.0005 * All.TotalMeshSize[j] / GRID;
00120 All.UpperCorner[j][i] = All.Corner[j][i] + (GRID / 2 - 1) * (All.TotalMeshSize[j] / GRID);
00121 }
00122
00123
00124 #ifndef PERIODIC
00125 All.Asmth[0] = ASMTH * All.TotalMeshSize[0] / GRID;
00126 All.Rcut[0] = RCUT * All.Asmth[0];
00127 #endif
00128
00129 #ifdef PLACEHIGHRESREGION
00130 All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID;
00131 All.Rcut[1] = RCUT * All.Asmth[1];
00132 #endif
00133
00134 #ifdef PLACEHIGHRESREGION
00135 if(2 * All.TotalMeshSize[1] / GRID < All.Rcut[0])
00136 {
00137 All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double) (GRID - 2));
00138
00139 for(i = 0; i < 3; i++)
00140 {
00141 All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * All.Rcut[0];
00142 All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID);
00143 }
00144
00145 if(2 * All.TotalMeshSize[1] / GRID > All.Rcut[0])
00146 {
00147 All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double) (GRID - 10));
00148
00149 for(i = 0; i < 3; i++)
00150 {
00151 All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * (All.Rcut[0] + 2 * All.TotalMeshSize[j] / GRID);
00152 All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID);
00153 }
00154 }
00155
00156 All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID;
00157 All.Rcut[1] = RCUT * All.Asmth[1];
00158 }
00159 #endif
00160
00161 if(ThisTask == 0)
00162 {
00163 #ifndef PERIODIC
00164 printf("\nAllowed region for isolated PM mesh (coarse):\n");
00165 printf("(%g|%g|%g) -> (%g|%g|%g) ext=%g totmeshsize=%g meshsize=%g\n\n",
00166 All.Xmintot[0][0], All.Xmintot[0][1], All.Xmintot[0][2],
00167 All.Xmaxtot[0][0], All.Xmaxtot[0][1], All.Xmaxtot[0][2], meshinner[0], All.TotalMeshSize[0],
00168 All.TotalMeshSize[0] / GRID);
00169 #endif
00170 #ifdef PLACEHIGHRESREGION
00171 printf("\nAllowed region for isolated PM mesh (high-res):\n");
00172 printf("(%g|%g|%g) -> (%g|%g|%g) ext=%g totmeshsize=%g meshsize=%g\n\n",
00173 All.Xmintot[1][0], All.Xmintot[1][1], All.Xmintot[1][2],
00174 All.Xmaxtot[1][0], All.Xmaxtot[1][1], All.Xmaxtot[1][2],
00175 meshinner[1], All.TotalMeshSize[1], All.TotalMeshSize[1] / GRID);
00176 #endif
00177 }
00178
00179 }
00180
00185 void pm_init_nonperiodic(void)
00186 {
00187 int i, slab_to_task_local[GRID];
00188 double bytes_tot = 0;
00189 size_t bytes;
00190
00191
00192
00193 fft_forward_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, GRID, GRID, GRID,
00194 FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
00195 fft_inverse_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, GRID, GRID, GRID,
00196 FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
00197
00198
00199
00200 rfftwnd_mpi_local_sizes(fft_forward_plan, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y, &fftsize);
00201
00202
00203 for(i = 0; i < GRID; i++)
00204 slab_to_task_local[i] = 0;
00205
00206 for(i = 0; i < nslab_x; i++)
00207 slab_to_task_local[slabstart_x + i] = ThisTask;
00208
00209 MPI_Allreduce(slab_to_task_local, slab_to_task, GRID, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00210
00211 slabs_per_task = malloc(NTask * sizeof(int));
00212 MPI_Allgather(&nslab_x, 1, MPI_INT, slabs_per_task, 1, MPI_INT, MPI_COMM_WORLD);
00213
00214 #ifndef PERIODIC
00215 if(ThisTask == 0)
00216 {
00217 for(i = 0; i < NTask; i++)
00218 printf("Task=%d FFT-Slabs=%d\n", i, slabs_per_task[i]);
00219 }
00220 #endif
00221
00222 first_slab_of_task = malloc(NTask * sizeof(int));
00223 MPI_Allgather(&slabstart_x, 1, MPI_INT, first_slab_of_task, 1, MPI_INT, MPI_COMM_WORLD);
00224
00225 meshmin_list = malloc(3 * NTask * sizeof(int));
00226 meshmax_list = malloc(3 * NTask * sizeof(int));
00227
00228 MPI_Allreduce(&fftsize, &maxfftsize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
00229
00230
00231
00232 #if !defined(PERIODIC)
00233 if(!(kernel[0] = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00234 {
00235 printf("failed to allocate memory for `FFT-kernel[0]' (%g MB).\n", bytes / (1024.0 * 1024.0));
00236 endrun(1);
00237 }
00238 bytes_tot += bytes;
00239 fft_of_kernel[0] = (fftw_complex *) kernel[0];
00240 #endif
00241
00242 #if defined(PLACEHIGHRESREGION)
00243 if(!(kernel[1] = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00244 {
00245 printf("failed to allocate memory for `FFT-kernel[1]' (%g MB).\n", bytes / (1024.0 * 1024.0));
00246 endrun(1);
00247 }
00248 bytes_tot += bytes;
00249 fft_of_kernel[1] = (fftw_complex *) kernel[1];
00250 #endif
00251
00252 if(ThisTask == 0)
00253 printf("\nAllocated %g MByte for FFT kernel(s).\n\n", bytes_tot / (1024.0 * 1024.0));
00254
00255 }
00256
00257
00264 void pm_init_nonperiodic_allocate(int dimprod)
00265 {
00266 static int first_alloc = 1;
00267 int dimprodmax;
00268 double bytes_tot = 0;
00269 size_t bytes;
00270
00271 MPI_Allreduce(&dimprod, &dimprodmax, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
00272
00273 if(!(rhogrid = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00274 {
00275 printf("failed to allocate memory for `FFT-rhogrid' (%g MB).\n", bytes / (1024.0 * 1024.0));
00276 endrun(1);
00277 }
00278 bytes_tot += bytes;
00279
00280 fft_of_rhogrid = (fftw_complex *) rhogrid;
00281
00282 if(!(forcegrid = (fftw_real *) malloc(bytes = imax(fftsize, dimprodmax) * sizeof(fftw_real))))
00283 {
00284 printf("failed to allocate memory for `FFT-forcegrid' (%g MB).\n", bytes / (1024.0 * 1024.0));
00285 endrun(1);
00286 }
00287 bytes_tot += bytes;
00288
00289 if(!(workspace = (fftw_real *) malloc(bytes = imax(maxfftsize, dimprodmax) * sizeof(fftw_real))))
00290 {
00291 printf("failed to allocate memory for `FFT-workspace' (%g MB).\n", bytes / (1024.0 * 1024.0));
00292 endrun(1);
00293 }
00294 bytes_tot += bytes;
00295
00296 if(first_alloc == 1)
00297 {
00298 first_alloc = 0;
00299 if(ThisTask == 0)
00300 printf("\nUsing %g MByte for non-periodic FFT computation.\n\n", bytes_tot / (1024.0 * 1024.0));
00301 }
00302 }
00303
00304
00309 void pm_init_nonperiodic_free(void)
00310 {
00311
00312 free(workspace);
00313 free(forcegrid);
00314 free(rhogrid);
00315 }
00316
00317
00321 void pm_setup_nonperiodic_kernel(void)
00322 {
00323 int i, j, k;
00324 double x, y, z, r, u, fac;
00325 double kx, ky, kz, k2, fx, fy, fz, ff;
00326 int ip;
00327
00328
00329
00330 pm_init_nonperiodic_allocate(0);
00331
00332 #if !defined(PERIODIC)
00333 for(i = 0; i < fftsize; i++)
00334 kernel[0][i] = 0;
00335
00336 for(i = slabstart_x; i < (slabstart_x + nslab_x); i++)
00337 for(j = 0; j < GRID; j++)
00338 for(k = 0; k < GRID; k++)
00339 {
00340 x = ((double) i) / GRID;
00341 y = ((double) j) / GRID;
00342 z = ((double) k) / GRID;
00343
00344 if(x >= 0.5)
00345 x -= 1.0;
00346 if(y >= 0.5)
00347 y -= 1.0;
00348 if(z >= 0.5)
00349 z -= 1.0;
00350
00351 r = sqrt(x * x + y * y + z * z);
00352
00353 u = 0.5 * r / (((double) ASMTH) / GRID);
00354
00355 fac = 1 - erfc(u);
00356
00357 if(r > 0)
00358 kernel[0][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] = -fac / r;
00359 else
00360 kernel[0][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] =
00361 -1 / (sqrt(M_PI) * (((double) ASMTH) / GRID));
00362 }
00363
00364
00365
00366 rfftwnd_mpi(fft_forward_plan, 1, kernel[0], workspace, FFTW_TRANSPOSED_ORDER);
00367 #endif
00368
00369
00370 #if defined(PLACEHIGHRESREGION)
00371 for(i = 0; i < fftsize; i++)
00372 kernel[1][i] = 0;
00373
00374 for(i = slabstart_x; i < (slabstart_x + nslab_x); i++)
00375 for(j = 0; j < GRID; j++)
00376 for(k = 0; k < GRID; k++)
00377 {
00378 x = ((double) i) / GRID;
00379 y = ((double) j) / GRID;
00380 z = ((double) k) / GRID;
00381
00382 if(x >= 0.5)
00383 x -= 1.0;
00384 if(y >= 0.5)
00385 y -= 1.0;
00386 if(z >= 0.5)
00387 z -= 1.0;
00388
00389 r = sqrt(x * x + y * y + z * z);
00390
00391 u = 0.5 * r / (((double) ASMTH) / GRID);
00392
00393 fac = erfc(u * All.Asmth[1] / All.Asmth[0]) - erfc(u);
00394
00395 if(r > 0)
00396 kernel[1][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] = -fac / r;
00397 else
00398 {
00399 fac = 1 - All.Asmth[1] / All.Asmth[0];
00400 kernel[1][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] =
00401 -fac / (sqrt(M_PI) * (((double) ASMTH) / GRID));
00402 }
00403 }
00404
00405
00406
00407 rfftwnd_mpi(fft_forward_plan, 1, kernel[1], workspace, FFTW_TRANSPOSED_ORDER);
00408 #endif
00409
00410
00411
00412 for(y = slabstart_y; y < slabstart_y + nslab_y; y++)
00413 for(x = 0; x < GRID; x++)
00414 for(z = 0; z < GRID / 2 + 1; z++)
00415 {
00416 if(x > GRID / 2)
00417 kx = x - GRID;
00418 else
00419 kx = x;
00420 if(y > GRID / 2)
00421 ky = y - GRID;
00422 else
00423 ky = y;
00424 if(z > GRID / 2)
00425 kz = z - GRID;
00426 else
00427 kz = z;
00428
00429 k2 = kx * kx + ky * ky + kz * kz;
00430
00431 if(k2 > 0)
00432 {
00433 fx = fy = fz = 1;
00434 if(kx != 0)
00435 {
00436 fx = (M_PI * kx) / GRID;
00437 fx = sin(fx) / fx;
00438 }
00439 if(ky != 0)
00440 {
00441 fy = (M_PI * ky) / GRID;
00442 fy = sin(fy) / fy;
00443 }
00444 if(kz != 0)
00445 {
00446 fz = (M_PI * kz) / GRID;
00447 fz = sin(fz) / fz;
00448 }
00449 ff = 1 / (fx * fy * fz);
00450 ff = ff * ff * ff * ff;
00451
00452 ip = GRID * (GRID / 2 + 1) * (y - slabstart_y) + (GRID / 2 + 1) * x + z;
00453 #if !defined(PERIODIC)
00454 fft_of_kernel[0][ip].re *= ff;
00455 fft_of_kernel[0][ip].im *= ff;
00456 #endif
00457 #if defined(PLACEHIGHRESREGION)
00458 fft_of_kernel[1][ip].re *= ff;
00459 fft_of_kernel[1][ip].im *= ff;
00460 #endif
00461 }
00462 }
00463
00464
00465 pm_init_nonperiodic_free();
00466 }
00467
00468
00469
00477 int pmforce_nonperiodic(int grnr)
00478 {
00479 double dx, dy, dz;
00480 double fac, to_slab_fac;
00481 double re, im, acc_dim;
00482 int i, j, slab, level, sendTask, recvTask, flag, flagsum;
00483 int x, y, z, xl, yl, zl, xr, yr, zr, xll, yll, zll, xrr, yrr, zrr, ip, dim;
00484 int slab_x, slab_y, slab_z;
00485 int slab_xx, slab_yy, slab_zz;
00486 int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
00487 int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
00488 MPI_Status status;
00489
00490 if(ThisTask == 0)
00491 printf("Starting non-periodic PM calculation (grid=%d).\n", grnr);
00492
00493 fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3);
00494 fac *= 1 / (2 * All.TotalMeshSize[grnr] / GRID);
00495
00496 to_slab_fac = GRID / All.TotalMeshSize[grnr];
00497
00498
00499
00500
00501 for(j = 0; j < 3; j++)
00502 {
00503 meshmin[j] = GRID;
00504 meshmax[j] = 0;
00505 }
00506
00507 for(i = 0, flag = 0; i < NumPart; i++)
00508 {
00509 #ifdef PLACEHIGHRESREGION
00510 if(grnr == 0 || (grnr == 1 && ((1 << P[i].Type) & (PLACEHIGHRESREGION))))
00511 #endif
00512 {
00513 for(j = 0; j < 3; j++)
00514 {
00515 if(P[i].Pos[j] < All.Xmintot[grnr][j] || P[i].Pos[j] > All.Xmaxtot[grnr][j])
00516 {
00517 if(flag == 0)
00518 {
00519 printf
00520 ("Particle Id=%d on task=%d with coordinates (%g|%g|%g) lies outside PM mesh.\nStopping\n",
00521 (int)P[i].ID, ThisTask, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]);
00522 fflush(stdout);
00523 }
00524 flag++;
00525 break;
00526 }
00527 }
00528 }
00529
00530 if(flag > 0)
00531 continue;
00532
00533 if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
00534 if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
00535 if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
00536 {
00537 for(j = 0; j < 3; j++)
00538 {
00539 slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
00540
00541 if(slab < meshmin[j])
00542 meshmin[j] = slab;
00543
00544 if(slab > meshmax[j])
00545 meshmax[j] = slab;
00546 }
00547 }
00548 }
00549
00550
00551 MPI_Allreduce(&flag, &flagsum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00552 if(flagsum > 0)
00553 {
00554 if(ThisTask == 0)
00555 {
00556 printf("In total %d particles were outside allowed range.\n", flagsum);
00557 fflush(stdout);
00558 }
00559 return 1;
00560 }
00561
00562 MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
00563 MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
00564
00565 dimx = meshmax[0] - meshmin[0] + 2;
00566 dimy = meshmax[1] - meshmin[1] + 2;
00567 dimz = meshmax[2] - meshmin[2] + 2;
00568
00569
00570 force_treefree();
00571
00572 pm_init_nonperiodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));
00573
00574 for(i = 0; i < dimx * dimy * dimz; i++)
00575 workspace[i] = 0;
00576
00577 for(i = 0; i < NumPart; i++)
00578 {
00579 if(P[i].Pos[0] < All.Corner[grnr][0] || P[i].Pos[0] >= All.UpperCorner[grnr][0])
00580 continue;
00581 if(P[i].Pos[1] < All.Corner[grnr][1] || P[i].Pos[1] >= All.UpperCorner[grnr][1])
00582 continue;
00583 if(P[i].Pos[2] < All.Corner[grnr][2] || P[i].Pos[2] >= All.UpperCorner[grnr][2])
00584 continue;
00585
00586 slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
00587 dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
00588 slab_x -= meshmin[0];
00589 slab_xx = slab_x + 1;
00590
00591 slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
00592 dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
00593 slab_y -= meshmin[1];
00594 slab_yy = slab_y + 1;
00595
00596 slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
00597 dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
00598 slab_z -= meshmin[2];
00599 slab_zz = slab_z + 1;
00600
00601 workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00602 workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
00603 workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
00604 workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;
00605
00606 workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
00607 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
00608 workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
00609 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
00610 }
00611
00612
00613 for(i = 0; i < fftsize; i++)
00614 rhogrid[i] = 0;
00615
00616 for(level = 0; level < (1 << PTask); level++)
00617 {
00618 sendTask = ThisTask;
00619 recvTask = ThisTask ^ level;
00620 if(recvTask < NTask)
00621 {
00622
00623 sendmin = 2 * GRID;
00624 sendmax = -1;
00625 for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
00626 if(slab_to_task[slab_x] == recvTask)
00627 {
00628 if(slab_x < sendmin)
00629 sendmin = slab_x;
00630 if(slab_x > sendmax)
00631 sendmax = slab_x;
00632 }
00633 if(sendmax == -1)
00634 sendmin = 0;
00635
00636
00637 recvmin = 2 * GRID;
00638 recvmax = -1;
00639 for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
00640 if(slab_to_task[slab_x] == sendTask)
00641 {
00642 if(slab_x < recvmin)
00643 recvmin = slab_x;
00644 if(slab_x > recvmax)
00645 recvmax = slab_x;
00646 }
00647 if(recvmax == -1)
00648 recvmin = 0;
00649
00650 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
00651 {
00652 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
00653 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
00654 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;
00655
00656 if(level > 0)
00657 {
00658 MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
00659 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
00660 TAG_NONPERIOD_A, forcegrid,
00661 (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
00662 recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status);
00663 }
00664 else
00665 {
00666 memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
00667 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
00668 }
00669
00670 for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
00671 {
00672 slab_xx = slab_x - first_slab_of_task[ThisTask];
00673
00674 if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
00675 {
00676 for(slab_y = meshmin_list[3 * recvTask + 1];
00677 slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
00678 {
00679 slab_yy = slab_y;
00680
00681 for(slab_z = meshmin_list[3 * recvTask + 2];
00682 slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
00683 {
00684 slab_zz = slab_z;
00685
00686 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz] +=
00687 forcegrid[((slab_x - recvmin) * recv_dimy +
00688 (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
00689 (slab_z - meshmin_list[3 * recvTask + 2])];
00690 }
00691 }
00692 }
00693 }
00694 }
00695 }
00696 }
00697
00698
00699
00700
00701 rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00702
00703
00704
00705
00706 for(y = 0; y < nslab_y; y++)
00707 for(x = 0; x < GRID; x++)
00708 for(z = 0; z < GRID / 2 + 1; z++)
00709 {
00710 ip = GRID * (GRID / 2 + 1) * y + (GRID / 2 + 1) * x + z;
00711
00712 re =
00713 fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].re -
00714 fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].im;
00715
00716 im =
00717 fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].im +
00718 fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].re;
00719
00720 fft_of_rhogrid[ip].re = re;
00721 fft_of_rhogrid[ip].im = im;
00722 }
00723
00724
00725
00726 rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736 #ifdef PLACEHIGHRESREGION
00737 if(grnr == 1)
00738 {
00739 for(j = 0; j < 3; j++)
00740 {
00741 meshmin[j] = GRID;
00742 meshmax[j] = 0;
00743 }
00744
00745 for(i = 0; i < NumPart; i++)
00746 {
00747 if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00748 continue;
00749
00750
00751 if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
00752 if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
00753 if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
00754 {
00755 for(j = 0; j < 3; j++)
00756 {
00757 slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
00758
00759 if(slab < meshmin[j])
00760 meshmin[j] = slab;
00761
00762 if(slab > meshmax[j])
00763 meshmax[j] = slab;
00764 }
00765 }
00766 }
00767
00768 MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
00769 MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
00770 }
00771 #endif
00772
00773 dimx = meshmax[0] - meshmin[0] + 6;
00774 dimy = meshmax[1] - meshmin[1] + 6;
00775 dimz = meshmax[2] - meshmin[2] + 6;
00776
00777 for(j = 0; j < 3; j++)
00778 {
00779 if(meshmin[j] < 2)
00780 endrun(131231);
00781 if(meshmax[j] > GRID / 2 - 3)
00782 endrun(131288);
00783 }
00784
00785 for(level = 0; level < (1 << PTask); level++)
00786 {
00787 sendTask = ThisTask;
00788 recvTask = ThisTask ^ level;
00789
00790 if(recvTask < NTask)
00791 {
00792
00793 sendmin = 2 * GRID;
00794 sendmax = -GRID;
00795 for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
00796 if(slab_to_task[slab_x] == sendTask)
00797 {
00798 if(slab_x < sendmin)
00799 sendmin = slab_x;
00800 if(slab_x > sendmax)
00801 sendmax = slab_x;
00802 }
00803 if(sendmax == -GRID)
00804 sendmin = sendmax + 1;
00805
00806
00807
00808 recvmin = 2 * GRID;
00809 recvmax = -GRID;
00810 for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
00811 if(slab_to_task[slab_x] == recvTask)
00812 {
00813 if(slab_x < recvmin)
00814 recvmin = slab_x;
00815 if(slab_x > recvmax)
00816 recvmax = slab_x;
00817 }
00818 if(recvmax == -GRID)
00819 recvmin = recvmax + 1;
00820
00821 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
00822 {
00823 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
00824 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
00825 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;
00826
00827
00828 if(sendmax - sendmin >= 0)
00829 {
00830 for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
00831 {
00832 slab_xx = slab_x - first_slab_of_task[ThisTask];
00833
00834 for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
00835 slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
00836 {
00837 slab_yy = slab_y;
00838
00839 for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
00840 slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++)
00841 {
00842 slab_zz = slab_z;
00843
00844 forcegrid[((slab_x - sendmin) * recv_dimy +
00845 (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
00846 slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
00847 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz];
00848 }
00849 }
00850 }
00851 }
00852
00853 if(level > 0)
00854 {
00855 MPI_Sendrecv(forcegrid,
00856 (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
00857 MPI_BYTE, recvTask, TAG_NONPERIOD_B,
00858 workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
00859 (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
00860 recvTask, TAG_NONPERIOD_B, MPI_COMM_WORLD, &status);
00861 }
00862 else
00863 {
00864 memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
00865 forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
00866 }
00867 }
00868 }
00869 }
00870
00871 dimx = meshmax[0] - meshmin[0] + 2;
00872 dimy = meshmax[1] - meshmin[1] + 2;
00873 dimz = meshmax[2] - meshmin[2] + 2;
00874
00875 recv_dimx = meshmax[0] - meshmin[0] + 6;
00876 recv_dimy = meshmax[1] - meshmin[1] + 6;
00877 recv_dimz = meshmax[2] - meshmin[2] + 6;
00878
00879
00880 for(dim = 0; dim < 3; dim++)
00881 {
00882
00883
00884
00885 for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
00886 for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
00887 for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
00888 {
00889 xrr = xll = xr = xl = x;
00890 yrr = yll = yr = yl = y;
00891 zrr = zll = zr = zl = z;
00892
00893 switch (dim)
00894 {
00895 case 0:
00896 xr = x + 1;
00897 xrr = x + 2;
00898 xl = x - 1;
00899 xll = x - 2;
00900 break;
00901 case 1:
00902 yr = y + 1;
00903 yl = y - 1;
00904 yrr = y + 2;
00905 yll = y - 2;
00906 break;
00907 case 2:
00908 zr = z + 1;
00909 zl = z - 1;
00910 zrr = z + 2;
00911 zll = z - 2;
00912 break;
00913 }
00914
00915 forcegrid[(x * dimy + y) * dimz + z]
00916 =
00917 fac * ((4.0 / 3) *
00918 (workspace[((xl + 2) * recv_dimy + (yl + 2)) * recv_dimz + (zl + 2)]
00919 - workspace[((xr + 2) * recv_dimy + (yr + 2)) * recv_dimz + (zr + 2)]) -
00920 (1.0 / 6) *
00921 (workspace[((xll + 2) * recv_dimy + (yll + 2)) * recv_dimz + (zll + 2)] -
00922 workspace[((xrr + 2) * recv_dimy + (yrr + 2)) * recv_dimz + (zrr + 2)]));
00923 }
00924
00925
00926
00927
00928 for(i = 0; i < NumPart; i++)
00929 {
00930 #ifdef PLACEHIGHRESREGION
00931 if(grnr == 1)
00932 if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00933 continue;
00934 #endif
00935 slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
00936 dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
00937 slab_x -= meshmin[0];
00938 slab_xx = slab_x + 1;
00939
00940 slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
00941 dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
00942 slab_y -= meshmin[1];
00943 slab_yy = slab_y + 1;
00944
00945 slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
00946 dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
00947 slab_z -= meshmin[2];
00948 slab_zz = slab_z + 1;
00949
00950 acc_dim =
00951 forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00952 acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
00953 acc_dim += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
00954 acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;
00955
00956 acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
00957 acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
00958 acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
00959 acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;
00960
00961 P[i].GravPM[dim] += acc_dim;
00962 }
00963 }
00964
00965 pm_init_nonperiodic_free();
00966 force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);
00967 All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
00968
00969 if(ThisTask == 0)
00970 printf("done PM.\n");
00971
00972 return 0;
00973 }
00974
00975
00976
00977
00983 int pmpotential_nonperiodic(int grnr)
00984 {
00985 double dx, dy, dz;
00986 double fac, to_slab_fac;
00987 double re, im, pot;
00988 int i, j, slab, level, sendTask, recvTask, flag, flagsum;
00989 int x, y, z, ip;
00990 int slab_x, slab_y, slab_z;
00991 int slab_xx, slab_yy, slab_zz;
00992 int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
00993 int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
00994 MPI_Status status;
00995
00996
00997 if(ThisTask == 0)
00998 printf("Starting non-periodic PM-potential calculation.\n");
00999
01000 fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3);
01001
01002 to_slab_fac = GRID / All.TotalMeshSize[grnr];
01003
01004
01005
01006 for(j = 0; j < 3; j++)
01007 {
01008 meshmin[j] = GRID;
01009 meshmax[j] = 0;
01010 }
01011
01012 for(i = 0, flag = 0; i < NumPart; i++)
01013 {
01014 #ifdef PLACEHIGHRESREGION
01015 if(grnr == 0 || (grnr == 1 && ((1 << P[i].Type) & (PLACEHIGHRESREGION))))
01016 #endif
01017 {
01018 for(j = 0; j < 3; j++)
01019 {
01020 if(P[i].Pos[j] < All.Xmintot[grnr][j] || P[i].Pos[j] > All.Xmaxtot[grnr][j])
01021 {
01022 if(flag == 0)
01023 {
01024 printf
01025 ("Particle Id=%d on task=%d with coordinates (%g|%g|%g) lies outside PM mesh.\nStopping\n",
01026 (int)P[i].ID, ThisTask, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]);
01027 fflush(stdout);
01028 }
01029 flag++;
01030 break;
01031 }
01032 }
01033 }
01034
01035 if(flag > 0)
01036 continue;
01037
01038 if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
01039 if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
01040 if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
01041 {
01042 for(j = 0; j < 3; j++)
01043 {
01044 slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
01045
01046 if(slab < meshmin[j])
01047 meshmin[j] = slab;
01048
01049 if(slab > meshmax[j])
01050 meshmax[j] = slab;
01051 }
01052 }
01053 }
01054
01055
01056 MPI_Allreduce(&flag, &flagsum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
01057 if(flagsum > 0)
01058 {
01059 if(ThisTask == 0)
01060 {
01061 printf("In total %d particles were outside allowed range.\n", flagsum);
01062 fflush(stdout);
01063 }
01064 return 1;
01065 }
01066
01067
01068
01069 MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
01070 MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
01071
01072 dimx = meshmax[0] - meshmin[0] + 2;
01073 dimy = meshmax[1] - meshmin[1] + 2;
01074 dimz = meshmax[2] - meshmin[2] + 2;
01075
01076
01077 force_treefree();
01078
01079 pm_init_nonperiodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));
01080
01081 for(i = 0; i < dimx * dimy * dimz; i++)
01082 workspace[i] = 0;
01083
01084 for(i = 0; i < NumPart; i++)
01085 {
01086 if(P[i].Pos[0] < All.Corner[grnr][0] || P[i].Pos[0] >= All.UpperCorner[grnr][0])
01087 continue;
01088 if(P[i].Pos[1] < All.Corner[grnr][1] || P[i].Pos[1] >= All.UpperCorner[grnr][1])
01089 continue;
01090 if(P[i].Pos[2] < All.Corner[grnr][2] || P[i].Pos[2] >= All.UpperCorner[grnr][2])
01091 continue;
01092
01093 slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
01094 dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
01095 slab_x -= meshmin[0];
01096 slab_xx = slab_x + 1;
01097
01098 slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
01099 dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
01100 slab_y -= meshmin[1];
01101 slab_yy = slab_y + 1;
01102
01103 slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
01104 dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
01105 slab_z -= meshmin[2];
01106 slab_zz = slab_z + 1;
01107
01108 workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
01109 workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
01110 workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
01111 workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;
01112
01113 workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
01114 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
01115 workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
01116 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
01117 }
01118
01119
01120 for(i = 0; i < fftsize; i++)
01121 rhogrid[i] = 0;
01122
01123 for(level = 0; level < (1 << PTask); level++)
01124 {
01125 sendTask = ThisTask;
01126 recvTask = ThisTask ^ level;
01127 if(recvTask < NTask)
01128 {
01129
01130 sendmin = 2 * GRID;
01131 sendmax = -1;
01132 for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
01133 if(slab_to_task[slab_x] == recvTask)
01134 {
01135 if(slab_x < sendmin)
01136 sendmin = slab_x;
01137 if(slab_x > sendmax)
01138 sendmax = slab_x;
01139 }
01140 if(sendmax == -1)
01141 sendmin = 0;
01142
01143
01144 recvmin = 2 * GRID;
01145 recvmax = -1;
01146 for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
01147 if(slab_to_task[slab_x] == sendTask)
01148 {
01149 if(slab_x < recvmin)
01150 recvmin = slab_x;
01151 if(slab_x > recvmax)
01152 recvmax = slab_x;
01153 }
01154 if(recvmax == -1)
01155 recvmin = 0;
01156
01157 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
01158 {
01159 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
01160 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
01161 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;
01162
01163 if(level > 0)
01164 {
01165 MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
01166 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
01167 TAG_NONPERIOD_C, forcegrid,
01168 (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
01169 recvTask, TAG_NONPERIOD_C, MPI_COMM_WORLD, &status);
01170 }
01171 else
01172 {
01173 memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
01174 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
01175 }
01176
01177 for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
01178 {
01179 slab_xx = slab_x - first_slab_of_task[ThisTask];
01180
01181 if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
01182 {
01183 for(slab_y = meshmin_list[3 * recvTask + 1];
01184 slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
01185 {
01186 slab_yy = slab_y;
01187
01188 for(slab_z = meshmin_list[3 * recvTask + 2];
01189 slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
01190 {
01191 slab_zz = slab_z;
01192
01193 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz] +=
01194 forcegrid[((slab_x - recvmin) * recv_dimy +
01195 (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
01196 (slab_z - meshmin_list[3 * recvTask + 2])];
01197 }
01198 }
01199 }
01200 }
01201 }
01202 }
01203 }
01204
01205
01206
01207
01208 rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
01209
01210
01211
01212
01213 for(y = 0; y < nslab_y; y++)
01214 for(x = 0; x < GRID; x++)
01215 for(z = 0; z < GRID / 2 + 1; z++)
01216 {
01217 ip = GRID * (GRID / 2 + 1) * y + (GRID / 2 + 1) * x + z;
01218
01219 re =
01220 fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].re -
01221 fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].im;
01222
01223 im =
01224 fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].im +
01225 fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].re;
01226
01227 fft_of_rhogrid[ip].re = fac * re;
01228 fft_of_rhogrid[ip].im = fac * im;
01229 }
01230
01231
01232
01233 rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243 #ifdef PLACEHIGHRESREGION
01244 if(grnr == 1)
01245 {
01246 for(j = 0; j < 3; j++)
01247 {
01248 meshmin[j] = GRID;
01249 meshmax[j] = 0;
01250 }
01251
01252 for(i = 0; i < NumPart; i++)
01253 {
01254 if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
01255 continue;
01256
01257
01258 if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
01259 if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
01260 if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
01261 {
01262 for(j = 0; j < 3; j++)
01263 {
01264 slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
01265
01266 if(slab < meshmin[j])
01267 meshmin[j] = slab;
01268
01269 if(slab > meshmax[j])
01270 meshmax[j] = slab;
01271 }
01272 }
01273 }
01274
01275 MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
01276 MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
01277 }
01278 #endif
01279
01280 dimx = meshmax[0] - meshmin[0] + 6;
01281 dimy = meshmax[1] - meshmin[1] + 6;
01282 dimz = meshmax[2] - meshmin[2] + 6;
01283
01284 for(j = 0; j < 3; j++)
01285 {
01286 if(meshmin[j] < 2)
01287 endrun(131231);
01288 if(meshmax[j] > GRID / 2 - 3)
01289 endrun(131288);
01290 }
01291
01292 for(level = 0; level < (1 << PTask); level++)
01293 {
01294 sendTask = ThisTask;
01295 recvTask = ThisTask ^ level;
01296
01297 if(recvTask < NTask)
01298 {
01299
01300 sendmin = 2 * GRID;
01301 sendmax = -GRID;
01302 for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
01303 if(slab_to_task[slab_x] == sendTask)
01304 {
01305 if(slab_x < sendmin)
01306 sendmin = slab_x;
01307 if(slab_x > sendmax)
01308 sendmax = slab_x;
01309 }
01310 if(sendmax == -GRID)
01311 sendmin = sendmax + 1;
01312
01313
01314
01315 recvmin = 2 * GRID;
01316 recvmax = -GRID;
01317 for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
01318 if(slab_to_task[slab_x] == recvTask)
01319 {
01320 if(slab_x < recvmin)
01321 recvmin = slab_x;
01322 if(slab_x > recvmax)
01323 recvmax = slab_x;
01324 }
01325 if(recvmax == -GRID)
01326 recvmin = recvmax + 1;
01327
01328 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
01329 {
01330 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
01331 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
01332 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;
01333
01334
01335 if(sendmax - sendmin >= 0)
01336 {
01337 for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
01338 {
01339 slab_xx = slab_x - first_slab_of_task[ThisTask];
01340
01341 for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
01342 slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
01343 {
01344 slab_yy = slab_y;
01345
01346 for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
01347 slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++)
01348 {
01349 slab_zz = slab_z;
01350
01351 forcegrid[((slab_x - sendmin) * recv_dimy +
01352 (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
01353 slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
01354 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz];
01355 }
01356 }
01357 }
01358 }
01359
01360 if(level > 0)
01361 {
01362 MPI_Sendrecv(forcegrid,
01363 (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
01364 MPI_BYTE, recvTask, TAG_NONPERIOD_D,
01365 workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
01366 (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
01367 recvTask, TAG_NONPERIOD_D, MPI_COMM_WORLD, &status);
01368 }
01369 else
01370 {
01371 memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
01372 forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
01373 }
01374 }
01375 }
01376 }
01377
01378 dimx = meshmax[0] - meshmin[0] + 2;
01379 dimy = meshmax[1] - meshmin[1] + 2;
01380 dimz = meshmax[2] - meshmin[2] + 2;
01381
01382 recv_dimx = meshmax[0] - meshmin[0] + 6;
01383 recv_dimy = meshmax[1] - meshmin[1] + 6;
01384 recv_dimz = meshmax[2] - meshmin[2] + 6;
01385
01386
01387 for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
01388 for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
01389 for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
01390 {
01391 forcegrid[(x * dimy + y) * dimz + z]
01392 = workspace[((x + 2) * recv_dimy + (y + 2)) * recv_dimz + (z + 2)];
01393 }
01394
01395
01396
01397
01398 for(i = 0; i < NumPart; i++)
01399 {
01400 #ifdef PLACEHIGHRESREGION
01401 if(grnr == 1)
01402 if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
01403 continue;
01404 #endif
01405 slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
01406 dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
01407 slab_x -= meshmin[0];
01408 slab_xx = slab_x + 1;
01409
01410 slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
01411 dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
01412 slab_y -= meshmin[1];
01413 slab_yy = slab_y + 1;
01414
01415 slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
01416 dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
01417 slab_z -= meshmin[2];
01418 slab_zz = slab_z + 1;
01419
01420 pot = forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
01421 pot += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
01422 pot += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
01423 pot += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;
01424
01425 pot += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
01426 pot += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
01427 pot += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
01428 pot += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;
01429
01430 P[i].Potential += pot;
01431 }
01432
01433 pm_init_nonperiodic_free();
01434 force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);
01435 All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
01436
01437 if(ThisTask == 0)
01438 printf("done PM-potential.\n");
01439
01440 return 0;
01441 }
01442
01443
01444 #endif
01445 #endif