Main Page | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

pm_nonperiodic.c

Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <mpi.h>
00006 
00007 
00013 #ifdef PMGRID
00014 #if !defined (PERIODIC) || defined (PLACEHIGHRESREGION)
00015 
00016 #ifdef NOTYPEPREFIX_FFTW
00017 #include        <rfftw_mpi.h>
00018 #else
00019 #ifdef DOUBLEPRECISION_FFTW
00020 #include     <drfftw_mpi.h>     /* double precision FFTW */
00021 #else
00022 #include     <srfftw_mpi.h>
00023 #endif
00024 #endif
00025 
00026 #include "allvars.h"
00027 #include "proto.h"
00028 
00029 #define  GRID  (2*PMGRID)
00030 #define  GRID2 (2*(GRID/2 + 1))
00031 
00032 
00033 
00034 static rfftwnd_mpi_plan fft_forward_plan, fft_inverse_plan;
00035 
00036 static int slab_to_task[GRID];
00037 static int *slabs_per_task;
00038 static int *first_slab_of_task;
00039 
00040 static int *meshmin_list, *meshmax_list;
00041 
00042 static int slabstart_x, nslab_x, slabstart_y, nslab_y;
00043 
00044 static int fftsize, maxfftsize;
00045 
00046 static fftw_real *kernel[2], *rhogrid, *forcegrid, *workspace;
00047 static fftw_complex *fft_of_kernel[2], *fft_of_rhogrid;
00048 
00058 void pm_init_regionsize(void)
00059 {
00060   double meshinner[2], xmin[2][3], xmax[2][3];
00061   int i, j, t;
00062 
00063   /* find enclosing rectangle */
00064 
00065   for(j = 0; j < 3; j++)
00066     {
00067       xmin[0][j] = xmin[1][j] = 1.0e36;
00068       xmax[0][j] = xmax[1][j] = -1.0e36;
00069     }
00070 
00071   for(i = 0; i < NumPart; i++)
00072     for(j = 0; j < 3; j++)
00073       {
00074         t = 0;
00075 #ifdef PLACEHIGHRESREGION
00076         if(((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00077           t = 1;
00078 #endif
00079         if(P[i].Pos[j] > xmax[t][j])
00080           xmax[t][j] = P[i].Pos[j];
00081         if(P[i].Pos[j] < xmin[t][j])
00082           xmin[t][j] = P[i].Pos[j];
00083       }
00084 
00085   MPI_Allreduce(xmin, All.Xmintot, 6, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
00086   MPI_Allreduce(xmax, All.Xmaxtot, 6, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
00087 
00088   for(j = 0; j < 2; j++)
00089     {
00090       All.TotalMeshSize[j] = All.Xmaxtot[j][0] - All.Xmintot[j][0];
00091       All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][1] - All.Xmintot[j][1]);
00092       All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][2] - All.Xmintot[j][2]);
00093 #ifdef ENLARGEREGION
00094       All.TotalMeshSize[j] *= ENLARGEREGION;
00095 #endif
00096 
00097       /* symmetrize the box onto the center */
00098       for(i = 0; i < 3; i++)
00099         {
00100           All.Xmintot[j][i] = (All.Xmintot[j][i] + All.Xmaxtot[j][i]) / 2 - All.TotalMeshSize[j] / 2;
00101           All.Xmaxtot[j][i] = All.Xmintot[j][i] + All.TotalMeshSize[j];
00102         }
00103     }
00104 
00105   /* this will produce enough room for zero-padding and buffer region to
00106      allow finite differencing of the potential  */
00107 
00108   for(j = 0; j < 2; j++)
00109     {
00110       meshinner[j] = All.TotalMeshSize[j];
00111       All.TotalMeshSize[j] *= 2.001 * (GRID) / ((double) (GRID - 2 - 8));
00112     }
00113 
00114   /* move lower left corner by two cells to allow finite differencing of the potential by a 4-point function */
00115 
00116   for(j = 0; j < 2; j++)
00117     for(i = 0; i < 3; i++)
00118       {
00119         All.Corner[j][i] = All.Xmintot[j][i] - 2.0005 * All.TotalMeshSize[j] / GRID;
00120         All.UpperCorner[j][i] = All.Corner[j][i] + (GRID / 2 - 1) * (All.TotalMeshSize[j] / GRID);
00121       }
00122 
00123 
00124 #ifndef PERIODIC
00125   All.Asmth[0] = ASMTH * All.TotalMeshSize[0] / GRID;
00126   All.Rcut[0] = RCUT * All.Asmth[0];
00127 #endif
00128 
00129 #ifdef PLACEHIGHRESREGION
00130   All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID;
00131   All.Rcut[1] = RCUT * All.Asmth[1];
00132 #endif
00133 
00134 #ifdef PLACEHIGHRESREGION
00135   if(2 * All.TotalMeshSize[1] / GRID < All.Rcut[0])
00136     {
00137       All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double) (GRID - 2));
00138 
00139       for(i = 0; i < 3; i++)
00140         {
00141           All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * All.Rcut[0];
00142           All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID);
00143         }
00144 
00145       if(2 * All.TotalMeshSize[1] / GRID > All.Rcut[0])
00146         {
00147           All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double) (GRID - 10));
00148 
00149           for(i = 0; i < 3; i++)
00150             {
00151               All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * (All.Rcut[0] + 2 * All.TotalMeshSize[j] / GRID);
00152               All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID);
00153             }
00154         }
00155 
00156       All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID;
00157       All.Rcut[1] = RCUT * All.Asmth[1];
00158     }
00159 #endif
00160 
00161   if(ThisTask == 0)
00162     {
00163 #ifndef PERIODIC
00164       printf("\nAllowed region for isolated PM mesh (coarse):\n");
00165       printf("(%g|%g|%g)  -> (%g|%g|%g)   ext=%g  totmeshsize=%g  meshsize=%g\n\n",
00166              All.Xmintot[0][0], All.Xmintot[0][1], All.Xmintot[0][2],
00167              All.Xmaxtot[0][0], All.Xmaxtot[0][1], All.Xmaxtot[0][2], meshinner[0], All.TotalMeshSize[0],
00168              All.TotalMeshSize[0] / GRID);
00169 #endif
00170 #ifdef PLACEHIGHRESREGION
00171       printf("\nAllowed region for isolated PM mesh (high-res):\n");
00172       printf("(%g|%g|%g)  -> (%g|%g|%g)   ext=%g  totmeshsize=%g  meshsize=%g\n\n",
00173              All.Xmintot[1][0], All.Xmintot[1][1], All.Xmintot[1][2],
00174              All.Xmaxtot[1][0], All.Xmaxtot[1][1], All.Xmaxtot[1][2],
00175              meshinner[1], All.TotalMeshSize[1], All.TotalMeshSize[1] / GRID);
00176 #endif
00177     }
00178 
00179 }
00180 
00185 void pm_init_nonperiodic(void)
00186 {
00187   int i, slab_to_task_local[GRID];
00188   double bytes_tot = 0;
00189   size_t bytes;
00190 
00191   /* Set up the FFTW plan files. */
00192 
00193   fft_forward_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, GRID, GRID, GRID,
00194                                              FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
00195   fft_inverse_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, GRID, GRID, GRID,
00196                                              FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
00197 
00198   /* Workspace out the ranges on each processor. */
00199 
00200   rfftwnd_mpi_local_sizes(fft_forward_plan, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y, &fftsize);
00201 
00202 
00203   for(i = 0; i < GRID; i++)
00204     slab_to_task_local[i] = 0;
00205 
00206   for(i = 0; i < nslab_x; i++)
00207     slab_to_task_local[slabstart_x + i] = ThisTask;
00208 
00209   MPI_Allreduce(slab_to_task_local, slab_to_task, GRID, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00210 
00211   slabs_per_task = malloc(NTask * sizeof(int));
00212   MPI_Allgather(&nslab_x, 1, MPI_INT, slabs_per_task, 1, MPI_INT, MPI_COMM_WORLD);
00213 
00214 #ifndef PERIODIC
00215   if(ThisTask == 0)
00216     {
00217       for(i = 0; i < NTask; i++)
00218         printf("Task=%d  FFT-Slabs=%d\n", i, slabs_per_task[i]);
00219     }
00220 #endif
00221 
00222   first_slab_of_task = malloc(NTask * sizeof(int));
00223   MPI_Allgather(&slabstart_x, 1, MPI_INT, first_slab_of_task, 1, MPI_INT, MPI_COMM_WORLD);
00224 
00225   meshmin_list = malloc(3 * NTask * sizeof(int));
00226   meshmax_list = malloc(3 * NTask * sizeof(int));
00227 
00228   MPI_Allreduce(&fftsize, &maxfftsize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
00229 
00230   /* now allocate memory to hold the FFT fields */
00231 
00232 #if !defined(PERIODIC)
00233   if(!(kernel[0] = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00234     {
00235       printf("failed to allocate memory for `FFT-kernel[0]' (%g MB).\n", bytes / (1024.0 * 1024.0));
00236       endrun(1);
00237     }
00238   bytes_tot += bytes;
00239   fft_of_kernel[0] = (fftw_complex *) kernel[0];
00240 #endif
00241 
00242 #if defined(PLACEHIGHRESREGION)
00243   if(!(kernel[1] = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00244     {
00245       printf("failed to allocate memory for `FFT-kernel[1]' (%g MB).\n", bytes / (1024.0 * 1024.0));
00246       endrun(1);
00247     }
00248   bytes_tot += bytes;
00249   fft_of_kernel[1] = (fftw_complex *) kernel[1];
00250 #endif
00251 
00252   if(ThisTask == 0)
00253     printf("\nAllocated %g MByte for FFT kernel(s).\n\n", bytes_tot / (1024.0 * 1024.0));
00254 
00255 }
00256 
00257 
00264 void pm_init_nonperiodic_allocate(int dimprod)
00265 {
00266   static int first_alloc = 1;
00267   int dimprodmax;
00268   double bytes_tot = 0;
00269   size_t bytes;
00270 
00271   MPI_Allreduce(&dimprod, &dimprodmax, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
00272 
00273   if(!(rhogrid = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00274     {
00275       printf("failed to allocate memory for `FFT-rhogrid' (%g MB).\n", bytes / (1024.0 * 1024.0));
00276       endrun(1);
00277     }
00278   bytes_tot += bytes;
00279 
00280   fft_of_rhogrid = (fftw_complex *) rhogrid;
00281 
00282   if(!(forcegrid = (fftw_real *) malloc(bytes = imax(fftsize, dimprodmax) * sizeof(fftw_real))))
00283     {
00284       printf("failed to allocate memory for `FFT-forcegrid' (%g MB).\n", bytes / (1024.0 * 1024.0));
00285       endrun(1);
00286     }
00287   bytes_tot += bytes;
00288 
00289   if(!(workspace = (fftw_real *) malloc(bytes = imax(maxfftsize, dimprodmax) * sizeof(fftw_real))))
00290     {
00291       printf("failed to allocate memory for `FFT-workspace' (%g MB).\n", bytes / (1024.0 * 1024.0));
00292       endrun(1);
00293     }
00294   bytes_tot += bytes;
00295 
00296   if(first_alloc == 1)
00297     {
00298       first_alloc = 0;
00299       if(ThisTask == 0)
00300         printf("\nUsing %g MByte for non-periodic FFT computation.\n\n", bytes_tot / (1024.0 * 1024.0));
00301     }
00302 }
00303 
00304 
00309 void pm_init_nonperiodic_free(void)
00310 {
00311   /* deallocate memory */
00312   free(workspace);
00313   free(forcegrid);
00314   free(rhogrid);
00315 }
00316 
00317 
00321 void pm_setup_nonperiodic_kernel(void)
00322 {
00323   int i, j, k;
00324   double x, y, z, r, u, fac;
00325   double kx, ky, kz, k2, fx, fy, fz, ff;
00326   int ip;
00327 
00328   /* now set up kernel and its Fourier transform */
00329 
00330   pm_init_nonperiodic_allocate(0);
00331 
00332 #if !defined(PERIODIC)
00333   for(i = 0; i < fftsize; i++)  /* clear local density field */
00334     kernel[0][i] = 0;
00335 
00336   for(i = slabstart_x; i < (slabstart_x + nslab_x); i++)
00337     for(j = 0; j < GRID; j++)
00338       for(k = 0; k < GRID; k++)
00339         {
00340           x = ((double) i) / GRID;
00341           y = ((double) j) / GRID;
00342           z = ((double) k) / GRID;
00343 
00344           if(x >= 0.5)
00345             x -= 1.0;
00346           if(y >= 0.5)
00347             y -= 1.0;
00348           if(z >= 0.5)
00349             z -= 1.0;
00350 
00351           r = sqrt(x * x + y * y + z * z);
00352 
00353           u = 0.5 * r / (((double) ASMTH) / GRID);
00354 
00355           fac = 1 - erfc(u);
00356 
00357           if(r > 0)
00358             kernel[0][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] = -fac / r;
00359           else
00360             kernel[0][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] =
00361               -1 / (sqrt(M_PI) * (((double) ASMTH) / GRID));
00362         }
00363 
00364   /* do the forward transform of the kernel */
00365 
00366   rfftwnd_mpi(fft_forward_plan, 1, kernel[0], workspace, FFTW_TRANSPOSED_ORDER);
00367 #endif
00368 
00369 
00370 #if defined(PLACEHIGHRESREGION)
00371   for(i = 0; i < fftsize; i++)  /* clear local density field */
00372     kernel[1][i] = 0;
00373 
00374   for(i = slabstart_x; i < (slabstart_x + nslab_x); i++)
00375     for(j = 0; j < GRID; j++)
00376       for(k = 0; k < GRID; k++)
00377         {
00378           x = ((double) i) / GRID;
00379           y = ((double) j) / GRID;
00380           z = ((double) k) / GRID;
00381 
00382           if(x >= 0.5)
00383             x -= 1.0;
00384           if(y >= 0.5)
00385             y -= 1.0;
00386           if(z >= 0.5)
00387             z -= 1.0;
00388 
00389           r = sqrt(x * x + y * y + z * z);
00390 
00391           u = 0.5 * r / (((double) ASMTH) / GRID);
00392 
00393           fac = erfc(u * All.Asmth[1] / All.Asmth[0]) - erfc(u);
00394 
00395           if(r > 0)
00396             kernel[1][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] = -fac / r;
00397           else
00398             {
00399               fac = 1 - All.Asmth[1] / All.Asmth[0];
00400               kernel[1][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] =
00401                 -fac / (sqrt(M_PI) * (((double) ASMTH) / GRID));
00402             }
00403         }
00404 
00405   /* do the forward transform of the kernel */
00406 
00407   rfftwnd_mpi(fft_forward_plan, 1, kernel[1], workspace, FFTW_TRANSPOSED_ORDER);
00408 #endif
00409 
00410   /* deconvolve the Greens function twice with the CIC kernel */
00411 
00412   for(y = slabstart_y; y < slabstart_y + nslab_y; y++)
00413     for(x = 0; x < GRID; x++)
00414       for(z = 0; z < GRID / 2 + 1; z++)
00415         {
00416           if(x > GRID / 2)
00417             kx = x - GRID;
00418           else
00419             kx = x;
00420           if(y > GRID / 2)
00421             ky = y - GRID;
00422           else
00423             ky = y;
00424           if(z > GRID / 2)
00425             kz = z - GRID;
00426           else
00427             kz = z;
00428 
00429           k2 = kx * kx + ky * ky + kz * kz;
00430 
00431           if(k2 > 0)
00432             {
00433               fx = fy = fz = 1;
00434               if(kx != 0)
00435                 {
00436                   fx = (M_PI * kx) / GRID;
00437                   fx = sin(fx) / fx;
00438                 }
00439               if(ky != 0)
00440                 {
00441                   fy = (M_PI * ky) / GRID;
00442                   fy = sin(fy) / fy;
00443                 }
00444               if(kz != 0)
00445                 {
00446                   fz = (M_PI * kz) / GRID;
00447                   fz = sin(fz) / fz;
00448                 }
00449               ff = 1 / (fx * fy * fz);
00450               ff = ff * ff * ff * ff;
00451 
00452               ip = GRID * (GRID / 2 + 1) * (y - slabstart_y) + (GRID / 2 + 1) * x + z;
00453 #if !defined(PERIODIC)
00454               fft_of_kernel[0][ip].re *= ff;
00455               fft_of_kernel[0][ip].im *= ff;
00456 #endif
00457 #if defined(PLACEHIGHRESREGION)
00458               fft_of_kernel[1][ip].re *= ff;
00459               fft_of_kernel[1][ip].im *= ff;
00460 #endif
00461             }
00462         }
00463   /* end deconvolution */
00464 
00465   pm_init_nonperiodic_free();
00466 }
00467 
00468 
00469 
00477 int pmforce_nonperiodic(int grnr)
00478 {
00479   double dx, dy, dz;
00480   double fac, to_slab_fac;
00481   double re, im, acc_dim;
00482   int i, j, slab, level, sendTask, recvTask, flag, flagsum;
00483   int x, y, z, xl, yl, zl, xr, yr, zr, xll, yll, zll, xrr, yrr, zrr, ip, dim;
00484   int slab_x, slab_y, slab_z;
00485   int slab_xx, slab_yy, slab_zz;
00486   int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
00487   int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
00488   MPI_Status status;
00489 
00490   if(ThisTask == 0)
00491     printf("Starting non-periodic PM calculation (grid=%d).\n", grnr);
00492 
00493   fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3);       /* to get potential */
00494   fac *= 1 / (2 * All.TotalMeshSize[grnr] / GRID);      /* for finite differencing */
00495 
00496   to_slab_fac = GRID / All.TotalMeshSize[grnr];
00497 
00498 
00499   /* first, establish the extension of the local patch in GRID (for binning) */
00500 
00501   for(j = 0; j < 3; j++)
00502     {
00503       meshmin[j] = GRID;
00504       meshmax[j] = 0;
00505     }
00506 
00507   for(i = 0, flag = 0; i < NumPart; i++)
00508     {
00509 #ifdef PLACEHIGHRESREGION
00510       if(grnr == 0 || (grnr == 1 && ((1 << P[i].Type) & (PLACEHIGHRESREGION))))
00511 #endif
00512         {
00513           for(j = 0; j < 3; j++)
00514             {
00515               if(P[i].Pos[j] < All.Xmintot[grnr][j] || P[i].Pos[j] > All.Xmaxtot[grnr][j])
00516                 {
00517                   if(flag == 0)
00518                     {
00519                       printf
00520                         ("Particle Id=%d on task=%d with coordinates (%g|%g|%g) lies outside PM mesh.\nStopping\n",
00521                          (int)P[i].ID, ThisTask, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]);
00522                       fflush(stdout);
00523                     }
00524                   flag++;
00525                   break;
00526                 }
00527             }
00528         }
00529 
00530       if(flag > 0)
00531         continue;
00532 
00533       if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
00534         if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
00535           if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
00536             {
00537               for(j = 0; j < 3; j++)
00538                 {
00539                   slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
00540 
00541                   if(slab < meshmin[j])
00542                     meshmin[j] = slab;
00543 
00544                   if(slab > meshmax[j])
00545                     meshmax[j] = slab;
00546                 }
00547             }
00548     }
00549 
00550 
00551   MPI_Allreduce(&flag, &flagsum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00552   if(flagsum > 0)
00553     {
00554       if(ThisTask == 0)
00555         {
00556           printf("In total %d particles were outside allowed range.\n", flagsum);
00557           fflush(stdout);
00558         }
00559       return 1;                 /* error - need to return because particle were outside allowed range */
00560     }
00561 
00562   MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
00563   MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
00564 
00565   dimx = meshmax[0] - meshmin[0] + 2;
00566   dimy = meshmax[1] - meshmin[1] + 2;
00567   dimz = meshmax[2] - meshmin[2] + 2;
00568 
00569 
00570   force_treefree();
00571 
00572   pm_init_nonperiodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));
00573 
00574   for(i = 0; i < dimx * dimy * dimz; i++)
00575     workspace[i] = 0;
00576 
00577   for(i = 0; i < NumPart; i++)
00578     {
00579       if(P[i].Pos[0] < All.Corner[grnr][0] || P[i].Pos[0] >= All.UpperCorner[grnr][0])
00580         continue;
00581       if(P[i].Pos[1] < All.Corner[grnr][1] || P[i].Pos[1] >= All.UpperCorner[grnr][1])
00582         continue;
00583       if(P[i].Pos[2] < All.Corner[grnr][2] || P[i].Pos[2] >= All.UpperCorner[grnr][2])
00584         continue;
00585 
00586       slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
00587       dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
00588       slab_x -= meshmin[0];
00589       slab_xx = slab_x + 1;
00590 
00591       slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
00592       dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
00593       slab_y -= meshmin[1];
00594       slab_yy = slab_y + 1;
00595 
00596       slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
00597       dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
00598       slab_z -= meshmin[2];
00599       slab_zz = slab_z + 1;
00600 
00601       workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00602       workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
00603       workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
00604       workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;
00605 
00606       workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
00607       workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
00608       workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
00609       workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
00610     }
00611 
00612 
00613   for(i = 0; i < fftsize; i++)  /* clear local density field */
00614     rhogrid[i] = 0;
00615 
00616   for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
00617     {
00618       sendTask = ThisTask;
00619       recvTask = ThisTask ^ level;
00620       if(recvTask < NTask)
00621         {
00622           /* check how much we have to send */
00623           sendmin = 2 * GRID;
00624           sendmax = -1;
00625           for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
00626             if(slab_to_task[slab_x] == recvTask)
00627               {
00628                 if(slab_x < sendmin)
00629                   sendmin = slab_x;
00630                 if(slab_x > sendmax)
00631                   sendmax = slab_x;
00632               }
00633           if(sendmax == -1)
00634             sendmin = 0;
00635 
00636           /* check how much we have to receive */
00637           recvmin = 2 * GRID;
00638           recvmax = -1;
00639           for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
00640             if(slab_to_task[slab_x] == sendTask)
00641               {
00642                 if(slab_x < recvmin)
00643                   recvmin = slab_x;
00644                 if(slab_x > recvmax)
00645                   recvmax = slab_x;
00646               }
00647           if(recvmax == -1)
00648             recvmin = 0;
00649 
00650           if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)      /* ok, we have a contribution to the slab */
00651             {
00652               recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
00653               recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
00654               recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;
00655 
00656               if(level > 0)
00657                 {
00658                   MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
00659                                (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
00660                                TAG_NONPERIOD_A, forcegrid,
00661                                (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
00662                                recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status);
00663                 }
00664               else
00665                 {
00666                   memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
00667                          (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
00668                 }
00669 
00670               for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
00671                 {
00672                   slab_xx = slab_x - first_slab_of_task[ThisTask];
00673 
00674                   if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
00675                     {
00676                       for(slab_y = meshmin_list[3 * recvTask + 1];
00677                           slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
00678                         {
00679                           slab_yy = slab_y;
00680 
00681                           for(slab_z = meshmin_list[3 * recvTask + 2];
00682                               slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
00683                             {
00684                               slab_zz = slab_z;
00685 
00686                               rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz] +=
00687                                 forcegrid[((slab_x - recvmin) * recv_dimy +
00688                                            (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
00689                                           (slab_z - meshmin_list[3 * recvTask + 2])];
00690                             }
00691                         }
00692                     }
00693                 }
00694             }
00695         }
00696     }
00697 
00698 
00699   /* Do the FFT of the density field */
00700 
00701   rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00702 
00703 
00704   /* multiply with the Fourier transform of the Green's function (kernel) */
00705 
00706   for(y = 0; y < nslab_y; y++)
00707     for(x = 0; x < GRID; x++)
00708       for(z = 0; z < GRID / 2 + 1; z++)
00709         {
00710           ip = GRID * (GRID / 2 + 1) * y + (GRID / 2 + 1) * x + z;
00711 
00712           re =
00713             fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].re -
00714             fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].im;
00715 
00716           im =
00717             fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].im +
00718             fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].re;
00719 
00720           fft_of_rhogrid[ip].re = re;
00721           fft_of_rhogrid[ip].im = im;
00722         }
00723 
00724   /* get the potential by inverse FFT */
00725 
00726   rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00727 
00728   /* Now rhogrid holds the potential */
00729   /* construct the potential for the local patch */
00730 
00731 
00732   /* if we have a high-res mesh, establish the extension of the local patch in GRID (for reading out the
00733    * forces) 
00734    */
00735 
00736 #ifdef PLACEHIGHRESREGION
00737   if(grnr == 1)
00738     {
00739       for(j = 0; j < 3; j++)
00740         {
00741           meshmin[j] = GRID;
00742           meshmax[j] = 0;
00743         }
00744 
00745       for(i = 0; i < NumPart; i++)
00746         {
00747           if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00748             continue;
00749 
00750 
00751           if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
00752             if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
00753               if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
00754                 {
00755                   for(j = 0; j < 3; j++)
00756                     {
00757                       slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
00758 
00759                       if(slab < meshmin[j])
00760                         meshmin[j] = slab;
00761 
00762                       if(slab > meshmax[j])
00763                         meshmax[j] = slab;
00764                     }
00765                 }
00766         }
00767 
00768       MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
00769       MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
00770     }
00771 #endif
00772 
00773   dimx = meshmax[0] - meshmin[0] + 6;
00774   dimy = meshmax[1] - meshmin[1] + 6;
00775   dimz = meshmax[2] - meshmin[2] + 6;
00776 
00777   for(j = 0; j < 3; j++)
00778     {
00779       if(meshmin[j] < 2)
00780         endrun(131231);
00781       if(meshmax[j] > GRID / 2 - 3)
00782         endrun(131288);
00783     }
00784 
00785   for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
00786     {
00787       sendTask = ThisTask;
00788       recvTask = ThisTask ^ level;
00789 
00790       if(recvTask < NTask)
00791         {
00792           /* check how much we have to send */
00793           sendmin = 2 * GRID;
00794           sendmax = -GRID;
00795           for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
00796             if(slab_to_task[slab_x] == sendTask)
00797               {
00798                 if(slab_x < sendmin)
00799                   sendmin = slab_x;
00800                 if(slab_x > sendmax)
00801                   sendmax = slab_x;
00802               }
00803           if(sendmax == -GRID)
00804             sendmin = sendmax + 1;
00805 
00806 
00807           /* check how much we have to receive */
00808           recvmin = 2 * GRID;
00809           recvmax = -GRID;
00810           for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
00811             if(slab_to_task[slab_x] == recvTask)
00812               {
00813                 if(slab_x < recvmin)
00814                   recvmin = slab_x;
00815                 if(slab_x > recvmax)
00816                   recvmax = slab_x;
00817               }
00818           if(recvmax == -GRID)
00819             recvmin = recvmax + 1;
00820 
00821           if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)      /* ok, we have a contribution to the slab */
00822             {
00823               recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
00824               recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
00825               recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;
00826 
00827               /* prepare what we want to send */
00828               if(sendmax - sendmin >= 0)
00829                 {
00830                   for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
00831                     {
00832                       slab_xx = slab_x - first_slab_of_task[ThisTask];
00833 
00834                       for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
00835                           slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
00836                         {
00837                           slab_yy = slab_y;
00838 
00839                           for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
00840                               slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++)
00841                             {
00842                               slab_zz = slab_z;
00843 
00844                               forcegrid[((slab_x - sendmin) * recv_dimy +
00845                                          (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
00846                                         slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
00847                                 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz];
00848                             }
00849                         }
00850                     }
00851                 }
00852 
00853               if(level > 0)
00854                 {
00855                   MPI_Sendrecv(forcegrid,
00856                                (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
00857                                MPI_BYTE, recvTask, TAG_NONPERIOD_B,
00858                                workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
00859                                (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
00860                                recvTask, TAG_NONPERIOD_B, MPI_COMM_WORLD, &status);
00861                 }
00862               else
00863                 {
00864                   memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
00865                          forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
00866                 }
00867             }
00868         }
00869     }
00870 
00871   dimx = meshmax[0] - meshmin[0] + 2;
00872   dimy = meshmax[1] - meshmin[1] + 2;
00873   dimz = meshmax[2] - meshmin[2] + 2;
00874 
00875   recv_dimx = meshmax[0] - meshmin[0] + 6;
00876   recv_dimy = meshmax[1] - meshmin[1] + 6;
00877   recv_dimz = meshmax[2] - meshmin[2] + 6;
00878 
00879 
00880   for(dim = 0; dim < 3; dim++)  /* Calculate each component of the force. */
00881     {
00882       /* get the force component by finite differencing the potential */
00883       /* note: "workspace" now contains the potential for the local patch, plus a suffiently large buffer region */
00884 
00885       for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
00886         for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
00887           for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
00888             {
00889               xrr = xll = xr = xl = x;
00890               yrr = yll = yr = yl = y;
00891               zrr = zll = zr = zl = z;
00892 
00893               switch (dim)
00894                 {
00895                 case 0:
00896                   xr = x + 1;
00897                   xrr = x + 2;
00898                   xl = x - 1;
00899                   xll = x - 2;
00900                   break;
00901                 case 1:
00902                   yr = y + 1;
00903                   yl = y - 1;
00904                   yrr = y + 2;
00905                   yll = y - 2;
00906                   break;
00907                 case 2:
00908                   zr = z + 1;
00909                   zl = z - 1;
00910                   zrr = z + 2;
00911                   zll = z - 2;
00912                   break;
00913                 }
00914 
00915               forcegrid[(x * dimy + y) * dimz + z]
00916                 =
00917                 fac * ((4.0 / 3) *
00918                        (workspace[((xl + 2) * recv_dimy + (yl + 2)) * recv_dimz + (zl + 2)]
00919                         - workspace[((xr + 2) * recv_dimy + (yr + 2)) * recv_dimz + (zr + 2)]) -
00920                        (1.0 / 6) *
00921                        (workspace[((xll + 2) * recv_dimy + (yll + 2)) * recv_dimz + (zll + 2)] -
00922                         workspace[((xrr + 2) * recv_dimy + (yrr + 2)) * recv_dimz + (zrr + 2)]));
00923             }
00924 
00925 
00926       /* read out the forces */
00927 
00928       for(i = 0; i < NumPart; i++)
00929         {
00930 #ifdef PLACEHIGHRESREGION
00931           if(grnr == 1)
00932             if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00933               continue;
00934 #endif
00935           slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
00936           dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
00937           slab_x -= meshmin[0];
00938           slab_xx = slab_x + 1;
00939 
00940           slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
00941           dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
00942           slab_y -= meshmin[1];
00943           slab_yy = slab_y + 1;
00944 
00945           slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
00946           dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
00947           slab_z -= meshmin[2];
00948           slab_zz = slab_z + 1;
00949 
00950           acc_dim =
00951             forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00952           acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
00953           acc_dim += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
00954           acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;
00955 
00956           acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
00957           acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
00958           acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
00959           acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;
00960 
00961           P[i].GravPM[dim] += acc_dim;
00962         }
00963     }
00964 
00965   pm_init_nonperiodic_free();
00966   force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);
00967   All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
00968 
00969   if(ThisTask == 0)
00970     printf("done PM.\n");
00971 
00972   return 0;
00973 }
00974 
00975 
00976 
00977 
00983 int pmpotential_nonperiodic(int grnr)
00984 {
00985   double dx, dy, dz;
00986   double fac, to_slab_fac;
00987   double re, im, pot;
00988   int i, j, slab, level, sendTask, recvTask, flag, flagsum;
00989   int x, y, z, ip;
00990   int slab_x, slab_y, slab_z;
00991   int slab_xx, slab_yy, slab_zz;
00992   int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
00993   int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
00994   MPI_Status status;
00995 
00996 
00997   if(ThisTask == 0)
00998     printf("Starting non-periodic PM-potential calculation.\n");
00999 
01000   fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3);       /* to get potential */
01001 
01002   to_slab_fac = GRID / All.TotalMeshSize[grnr];
01003 
01004   /* first, establish the extension of the local patch in GRID (for binning) */
01005 
01006   for(j = 0; j < 3; j++)
01007     {
01008       meshmin[j] = GRID;
01009       meshmax[j] = 0;
01010     }
01011 
01012   for(i = 0, flag = 0; i < NumPart; i++)
01013     {
01014 #ifdef PLACEHIGHRESREGION
01015       if(grnr == 0 || (grnr == 1 && ((1 << P[i].Type) & (PLACEHIGHRESREGION))))
01016 #endif
01017         {
01018           for(j = 0; j < 3; j++)
01019             {
01020               if(P[i].Pos[j] < All.Xmintot[grnr][j] || P[i].Pos[j] > All.Xmaxtot[grnr][j])
01021                 {
01022                   if(flag == 0)
01023                     {
01024                       printf
01025                         ("Particle Id=%d on task=%d with coordinates (%g|%g|%g) lies outside PM mesh.\nStopping\n",
01026                          (int)P[i].ID, ThisTask, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]);
01027                       fflush(stdout);
01028                     }
01029                   flag++;
01030                   break;
01031                 }
01032             }
01033         }
01034 
01035       if(flag > 0)
01036         continue;
01037 
01038       if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
01039         if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
01040           if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
01041             {
01042               for(j = 0; j < 3; j++)
01043                 {
01044                   slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
01045 
01046                   if(slab < meshmin[j])
01047                     meshmin[j] = slab;
01048 
01049                   if(slab > meshmax[j])
01050                     meshmax[j] = slab;
01051                 }
01052             }
01053     }
01054 
01055 
01056   MPI_Allreduce(&flag, &flagsum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
01057   if(flagsum > 0) 
01058     {
01059       if(ThisTask == 0)
01060         {
01061           printf("In total %d particles were outside allowed range.\n", flagsum);
01062           fflush(stdout);
01063         }
01064       return 1;                 /* error - need to return because particle were outside allowed range */
01065     }
01066 
01067 
01068 
01069   MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
01070   MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
01071 
01072   dimx = meshmax[0] - meshmin[0] + 2;
01073   dimy = meshmax[1] - meshmin[1] + 2;
01074   dimz = meshmax[2] - meshmin[2] + 2;
01075 
01076 
01077   force_treefree();
01078 
01079   pm_init_nonperiodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));
01080 
01081   for(i = 0; i < dimx * dimy * dimz; i++)
01082     workspace[i] = 0;
01083 
01084   for(i = 0; i < NumPart; i++)
01085     {
01086       if(P[i].Pos[0] < All.Corner[grnr][0] || P[i].Pos[0] >= All.UpperCorner[grnr][0])
01087         continue;
01088       if(P[i].Pos[1] < All.Corner[grnr][1] || P[i].Pos[1] >= All.UpperCorner[grnr][1])
01089         continue;
01090       if(P[i].Pos[2] < All.Corner[grnr][2] || P[i].Pos[2] >= All.UpperCorner[grnr][2])
01091         continue;
01092 
01093       slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
01094       dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
01095       slab_x -= meshmin[0];
01096       slab_xx = slab_x + 1;
01097 
01098       slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
01099       dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
01100       slab_y -= meshmin[1];
01101       slab_yy = slab_y + 1;
01102 
01103       slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
01104       dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
01105       slab_z -= meshmin[2];
01106       slab_zz = slab_z + 1;
01107 
01108       workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
01109       workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
01110       workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
01111       workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;
01112 
01113       workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
01114       workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
01115       workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
01116       workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
01117     }
01118 
01119 
01120   for(i = 0; i < fftsize; i++)  /* clear local density field */
01121     rhogrid[i] = 0;
01122 
01123   for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
01124     {
01125       sendTask = ThisTask;
01126       recvTask = ThisTask ^ level;
01127       if(recvTask < NTask)
01128         {
01129           /* check how much we have to send */
01130           sendmin = 2 * GRID;
01131           sendmax = -1;
01132           for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
01133             if(slab_to_task[slab_x] == recvTask)
01134               {
01135                 if(slab_x < sendmin)
01136                   sendmin = slab_x;
01137                 if(slab_x > sendmax)
01138                   sendmax = slab_x;
01139               }
01140           if(sendmax == -1)
01141             sendmin = 0;
01142 
01143           /* check how much we have to receive */
01144           recvmin = 2 * GRID;
01145           recvmax = -1;
01146           for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
01147             if(slab_to_task[slab_x] == sendTask)
01148               {
01149                 if(slab_x < recvmin)
01150                   recvmin = slab_x;
01151                 if(slab_x > recvmax)
01152                   recvmax = slab_x;
01153               }
01154           if(recvmax == -1)
01155             recvmin = 0;
01156 
01157           if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)      /* ok, we have a contribution to the slab */
01158             {
01159               recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
01160               recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
01161               recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;
01162 
01163               if(level > 0)
01164                 {
01165                   MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
01166                                (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
01167                                TAG_NONPERIOD_C, forcegrid,
01168                                (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
01169                                recvTask, TAG_NONPERIOD_C, MPI_COMM_WORLD, &status);
01170                 }
01171               else
01172                 {
01173                   memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
01174                          (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
01175                 }
01176 
01177               for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
01178                 {
01179                   slab_xx = slab_x - first_slab_of_task[ThisTask];
01180 
01181                   if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
01182                     {
01183                       for(slab_y = meshmin_list[3 * recvTask + 1];
01184                           slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
01185                         {
01186                           slab_yy = slab_y;
01187 
01188                           for(slab_z = meshmin_list[3 * recvTask + 2];
01189                               slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
01190                             {
01191                               slab_zz = slab_z;
01192 
01193                               rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz] +=
01194                                 forcegrid[((slab_x - recvmin) * recv_dimy +
01195                                            (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
01196                                           (slab_z - meshmin_list[3 * recvTask + 2])];
01197                             }
01198                         }
01199                     }
01200                 }
01201             }
01202         }
01203     }
01204 
01205 
01206   /* Do the FFT of the density field */
01207 
01208   rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
01209 
01210 
01211   /* multiply with the Fourier transform of the Green's function (kernel) */
01212 
01213   for(y = 0; y < nslab_y; y++)
01214     for(x = 0; x < GRID; x++)
01215       for(z = 0; z < GRID / 2 + 1; z++)
01216         {
01217           ip = GRID * (GRID / 2 + 1) * y + (GRID / 2 + 1) * x + z;
01218 
01219           re =
01220             fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].re -
01221             fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].im;
01222 
01223           im =
01224             fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].im +
01225             fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].re;
01226 
01227           fft_of_rhogrid[ip].re = fac * re;
01228           fft_of_rhogrid[ip].im = fac * im;
01229         }
01230 
01231   /* get the potential by inverse FFT */
01232 
01233   rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
01234 
01235   /* Now rhogrid holds the potential */
01236   /* construct the potential for the local patch */
01237 
01238 
01239   /* if we have a high-res mesh, establish the extension of the local patch in GRID (for reading out the
01240    * forces) 
01241    */
01242 
01243 #ifdef PLACEHIGHRESREGION
01244   if(grnr == 1)
01245     {
01246       for(j = 0; j < 3; j++)
01247         {
01248           meshmin[j] = GRID;
01249           meshmax[j] = 0;
01250         }
01251 
01252       for(i = 0; i < NumPart; i++)
01253         {
01254           if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
01255             continue;
01256 
01257 
01258           if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
01259             if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
01260               if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
01261                 {
01262                   for(j = 0; j < 3; j++)
01263                     {
01264                       slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
01265 
01266                       if(slab < meshmin[j])
01267                         meshmin[j] = slab;
01268 
01269                       if(slab > meshmax[j])
01270                         meshmax[j] = slab;
01271                     }
01272                 }
01273         }
01274 
01275       MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
01276       MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
01277     }
01278 #endif
01279 
01280   dimx = meshmax[0] - meshmin[0] + 6;
01281   dimy = meshmax[1] - meshmin[1] + 6;
01282   dimz = meshmax[2] - meshmin[2] + 6;
01283 
01284   for(j = 0; j < 3; j++)
01285     {
01286       if(meshmin[j] < 2)
01287         endrun(131231);
01288       if(meshmax[j] > GRID / 2 - 3)
01289         endrun(131288);
01290     }
01291 
01292   for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
01293     {
01294       sendTask = ThisTask;
01295       recvTask = ThisTask ^ level;
01296 
01297       if(recvTask < NTask)
01298         {
01299           /* check how much we have to send */
01300           sendmin = 2 * GRID;
01301           sendmax = -GRID;
01302           for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
01303             if(slab_to_task[slab_x] == sendTask)
01304               {
01305                 if(slab_x < sendmin)
01306                   sendmin = slab_x;
01307                 if(slab_x > sendmax)
01308                   sendmax = slab_x;
01309               }
01310           if(sendmax == -GRID)
01311             sendmin = sendmax + 1;
01312 
01313 
01314           /* check how much we have to receive */
01315           recvmin = 2 * GRID;
01316           recvmax = -GRID;
01317           for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
01318             if(slab_to_task[slab_x] == recvTask)
01319               {
01320                 if(slab_x < recvmin)
01321                   recvmin = slab_x;
01322                 if(slab_x > recvmax)
01323                   recvmax = slab_x;
01324               }
01325           if(recvmax == -GRID)
01326             recvmin = recvmax + 1;
01327 
01328           if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)      /* ok, we have a contribution to the slab */
01329             {
01330               recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
01331               recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
01332               recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;
01333 
01334               /* prepare what we want to send */
01335               if(sendmax - sendmin >= 0)
01336                 {
01337                   for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
01338                     {
01339                       slab_xx = slab_x - first_slab_of_task[ThisTask];
01340 
01341                       for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
01342                           slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
01343                         {
01344                           slab_yy = slab_y;
01345 
01346                           for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
01347                               slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++)
01348                             {
01349                               slab_zz = slab_z;
01350 
01351                               forcegrid[((slab_x - sendmin) * recv_dimy +
01352                                          (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
01353                                         slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
01354                                 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz];
01355                             }
01356                         }
01357                     }
01358                 }
01359 
01360               if(level > 0)
01361                 {
01362                   MPI_Sendrecv(forcegrid,
01363                                (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
01364                                MPI_BYTE, recvTask, TAG_NONPERIOD_D,
01365                                workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
01366                                (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
01367                                recvTask, TAG_NONPERIOD_D, MPI_COMM_WORLD, &status);
01368                 }
01369               else
01370                 {
01371                   memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
01372                          forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
01373                 }
01374             }
01375         }
01376     }
01377 
01378   dimx = meshmax[0] - meshmin[0] + 2;
01379   dimy = meshmax[1] - meshmin[1] + 2;
01380   dimz = meshmax[2] - meshmin[2] + 2;
01381 
01382   recv_dimx = meshmax[0] - meshmin[0] + 6;
01383   recv_dimy = meshmax[1] - meshmin[1] + 6;
01384   recv_dimz = meshmax[2] - meshmin[2] + 6;
01385 
01386 
01387   for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
01388     for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
01389       for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
01390         {
01391           forcegrid[(x * dimy + y) * dimz + z]
01392             = workspace[((x + 2) * recv_dimy + (y + 2)) * recv_dimz + (z + 2)];
01393         }
01394 
01395 
01396   /* read out the potential */
01397 
01398   for(i = 0; i < NumPart; i++)
01399     {
01400 #ifdef PLACEHIGHRESREGION
01401       if(grnr == 1)
01402         if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
01403           continue;
01404 #endif
01405       slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
01406       dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
01407       slab_x -= meshmin[0];
01408       slab_xx = slab_x + 1;
01409 
01410       slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
01411       dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
01412       slab_y -= meshmin[1];
01413       slab_yy = slab_y + 1;
01414 
01415       slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
01416       dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
01417       slab_z -= meshmin[2];
01418       slab_zz = slab_z + 1;
01419 
01420       pot = forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
01421       pot += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
01422       pot += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
01423       pot += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;
01424 
01425       pot += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
01426       pot += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
01427       pot += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
01428       pot += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;
01429 
01430       P[i].Potential += pot;
01431     }
01432 
01433   pm_init_nonperiodic_free();
01434   force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);
01435   All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
01436 
01437   if(ThisTask == 0)
01438     printf("done PM-potential.\n");
01439 
01440   return 0;
01441 }
01442 
01443 
01444 #endif
01445 #endif

Generated on Sat Apr 22 15:29:41 2006 for GADGET-2 by  doxygen 1.3.9.1