Actual source code: stag2d.c
  1: /* Functions specific to the 2-dimensional implementation of DMStag */
  2: #include <petsc/private/dmstagimpl.h>
  4: /*@C
  5:   DMStagCreate2d - Create an object to manage data living on the elements, faces, and vertices of a parallelized regular 2D grid.
  7:   Collective
  9:   Input Parameters:
 10: + comm - MPI communicator
 11: . bndx,bndy - boundary type: `DM_BOUNDARY_NONE`, `DM_BOUNDARY_PERIODIC`, or `DM_BOUNDARY_GHOSTED`
 12: . M,N - global number of elements in x,y directions
 13: . m,n - number of ranks in the x,y directions (may be `PETSC_DECIDE`)
 14: . dof0 - number of degrees of freedom per vertex/0-cell
 15: . dof1 - number of degrees of freedom per face/1-cell
 16: . dof2 - number of degrees of freedom per element/2-cell
 17: . stencilType - ghost/halo region type: `DMSTAG_STENCIL_NONE`, `DMSTAG_STENCIL_BOX`, or `DMSTAG_STENCIL_STAR`
 18: . stencilWidth - width, in elements, of halo/ghost region
 19: - lx,ly - arrays of local x,y element counts, of length equal to m,n, summing to M,N
 21:   Output Parameter:
 22: . dm - the new `DMSTAG` object
 24:   Options Database Keys:
 25: + -dm_view - calls `DMViewFromOptions()` at the conclusion of `DMSetUp()`
 26: . -stag_grid_x <nx> - number of elements in the x direction
 27: . -stag_grid_y <ny> - number of elements in the y direction
 28: . -stag_ranks_x <rx> - number of ranks in the x direction
 29: . -stag_ranks_y <ry> - number of ranks in the y direction
 30: . -stag_ghost_stencil_width - width of ghost region, in elements
 31: . -stag_boundary_type_x <none,ghosted,periodic> - `DMBoundaryType` value
 32: - -stag_boundary_type_y <none,ghosted,periodic> - `DMBoundaryType` value
 34:   Level: beginner
 36:   Notes:
 37:   You must call `DMSetUp()` after this call, before using the `DM`.
 38:   If you wish to use the options database (see the keys above) to change values in the `DMSTAG`, you must call
 39:   `DMSetFromOptions()` after this function but before `DMSetUp()`.
 41: .seealso: [](chapter_stag), `DMSTAG`, `DMStagCreate1d()`, `DMStagCreate3d()`, `DMDestroy()`, `DMView()`, `DMCreateGlobalVector()`, `DMCreateLocalVector()`, `DMLocalToGlobalBegin()`, `DMDACreate2d()`
 42: @*/
 43: PETSC_EXTERN PetscErrorCode DMStagCreate2d(MPI_Comm comm, DMBoundaryType bndx, DMBoundaryType bndy, PetscInt M, PetscInt N, PetscInt m, PetscInt n, PetscInt dof0, PetscInt dof1, PetscInt dof2, DMStagStencilType stencilType, PetscInt stencilWidth, const PetscInt lx[], const PetscInt ly[], DM *dm)
 44: {
 45:   DMCreate(comm, dm);
 46:   DMSetDimension(*dm, 2);
 47:   DMStagInitialize(bndx, bndy, DM_BOUNDARY_NONE, M, N, 0, m, n, 0, dof0, dof1, dof2, 0, stencilType, stencilWidth, lx, ly, NULL, *dm);
 48:   return 0;
 49: }
 51: PETSC_INTERN PetscErrorCode DMStagRestrictSimple_2d(DM dmf, Vec xf_local, DM dmc, Vec xc_local)
 52: {
 53:   PetscScalar ***LA_xf, ***LA_xc;
 54:   PetscInt       i, j, start[2], n[2], nextra[2], N[2];
 55:   PetscInt       d, dof[3];
 56:   PetscInt       slot_down_left_coarse, slot_down_left_fine;
 57:   PetscInt       slot_element_fine, slot_element_coarse;
 58:   PetscInt       slot_left_coarse, slot_down_coarse, slot_left_fine, slot_down_fine;
 60:   DMStagGetDOF(dmc, &dof[0], &dof[1], &dof[2], NULL);
 61:   DMStagGetCorners(dmc, &start[0], &start[1], NULL, &n[0], &n[1], NULL, &nextra[0], &nextra[1], NULL);
 62:   DMStagGetGlobalSizes(dmc, &N[0], &N[1], NULL);
 63:   if (PetscDefined(USE_DEBUG)) {
 64:     PetscInt dof_check[3], n_fine[2], start_fine[2];
 66:     DMStagGetDOF(dmf, &dof_check[0], &dof_check[1], &dof_check[2], NULL);
 67:     DMStagGetCorners(dmf, &start_fine[0], &start_fine[1], NULL, &n_fine[0], &n_fine[1], NULL, NULL, NULL, NULL);
 71:     {
 72:       PetscInt size_local, entries_local;
 74:       DMStagGetEntriesLocal(dmf, &entries_local);
 75:       VecGetLocalSize(xf_local, &size_local);
 77:     }
 78:     {
 79:       PetscInt size_local, entries_local;
 81:       DMStagGetEntriesLocal(dmc, &entries_local);
 82:       VecGetLocalSize(xc_local, &size_local);
 84:     }
 85:   }
 86:   VecZeroEntries(xc_local);
 87:   DMStagVecGetArray(dmf, xf_local, &LA_xf);
 88:   DMStagVecGetArray(dmc, xc_local, &LA_xc);
 89:   DMStagGetLocationSlot(dmf, DMSTAG_DOWN_LEFT, 0, &slot_down_left_fine);
 90:   DMStagGetLocationSlot(dmf, DMSTAG_LEFT, 0, &slot_left_fine);
 91:   DMStagGetLocationSlot(dmf, DMSTAG_DOWN, 0, &slot_down_fine);
 92:   DMStagGetLocationSlot(dmf, DMSTAG_ELEMENT, 0, &slot_element_fine);
 93:   DMStagGetLocationSlot(dmc, DMSTAG_DOWN_LEFT, 0, &slot_down_left_coarse);
 94:   DMStagGetLocationSlot(dmc, DMSTAG_LEFT, 0, &slot_left_coarse);
 95:   DMStagGetLocationSlot(dmc, DMSTAG_DOWN, 0, &slot_down_coarse);
 96:   DMStagGetLocationSlot(dmc, DMSTAG_ELEMENT, 0, &slot_element_coarse);
 97:   for (j = start[1]; j < start[1] + n[1] + nextra[1]; j++) {
 98:     for (i = start[0]; i < start[0] + n[0] + nextra[0]; i++) {
 99:       for (d = 0; d < dof[0]; ++d) LA_xc[j][i][slot_down_left_coarse + d] = LA_xf[2 * j][2 * i][slot_down_left_fine + d];
100:       for (d = 0; d < dof[1]; ++d) {
101:         if (j < N[1]) LA_xc[j][i][slot_left_coarse + d] = 0.5 * (LA_xf[2 * j][2 * i][slot_left_fine + d] + LA_xf[2 * j + 1][2 * i][slot_left_fine + d]);
102:         if (i < N[0]) LA_xc[j][i][slot_down_coarse + d] = 0.5 * (LA_xf[2 * j][2 * i][slot_down_fine + d] + LA_xf[2 * j][2 * i + 1][slot_down_fine + d]);
103:       }
104:       for (d = 0; d < dof[2]; ++d) {
105:         if (i < N[0] && j < N[1]) {
106:           LA_xc[j][i][slot_element_coarse + d] = 0.25 * (LA_xf[2 * j][2 * i][slot_element_fine + d] + LA_xf[2 * j + 1][2 * i][slot_element_fine + d] + LA_xf[2 * j][2 * i + 1][slot_element_fine + d] + LA_xf[2 * j + 1][2 * i + 1][slot_element_fine + d]);
107:         }
108:       }
109:     }
110:   }
111:   DMStagVecRestoreArray(dmf, xf_local, &LA_xf);
112:   DMStagVecRestoreArray(dmc, xc_local, &LA_xc);
113:   return 0;
114: }
116: PETSC_INTERN PetscErrorCode DMStagSetUniformCoordinatesExplicit_2d(DM dm, PetscReal xmin, PetscReal xmax, PetscReal ymin, PetscReal ymax)
117: {
118:   DM_Stag       *stagCoord;
119:   DM             dmCoord;
120:   Vec            coordLocal;
121:   PetscReal      h[2], min[2];
122:   PetscScalar ***arr;
123:   PetscInt       ind[2], start_ghost[2], n_ghost[2], s, c;
124:   PetscInt       idownleft, idown, ileft, ielement;
126:   DMGetCoordinateDM(dm, &dmCoord);
127:   stagCoord = (DM_Stag *)dmCoord->data;
128:   for (s = 0; s < 3; ++s) {
130:                stagCoord->dof[s]);
131:   }
132:   DMCreateLocalVector(dmCoord, &coordLocal);
134:   DMStagVecGetArray(dmCoord, coordLocal, &arr);
135:   if (stagCoord->dof[0]) DMStagGetLocationSlot(dmCoord, DMSTAG_DOWN_LEFT, 0, &idownleft);
136:   if (stagCoord->dof[1]) {
137:     DMStagGetLocationSlot(dmCoord, DMSTAG_DOWN, 0, &idown);
138:     DMStagGetLocationSlot(dmCoord, DMSTAG_LEFT, 0, &ileft);
139:   }
140:   if (stagCoord->dof[2]) DMStagGetLocationSlot(dmCoord, DMSTAG_ELEMENT, 0, &ielement);
141:   DMStagGetGhostCorners(dmCoord, &start_ghost[0], &start_ghost[1], NULL, &n_ghost[0], &n_ghost[1], NULL);
143:   min[0] = xmin;
144:   min[1] = ymin;
145:   h[0]   = (xmax - xmin) / stagCoord->N[0];
146:   h[1]   = (ymax - ymin) / stagCoord->N[1];
148:   for (ind[1] = start_ghost[1]; ind[1] < start_ghost[1] + n_ghost[1]; ++ind[1]) {
149:     for (ind[0] = start_ghost[0]; ind[0] < start_ghost[0] + n_ghost[0]; ++ind[0]) {
150:       if (stagCoord->dof[0]) {
151:         const PetscReal offs[2] = {0.0, 0.0};
152:         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][idownleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
153:       }
154:       if (stagCoord->dof[1]) {
155:         const PetscReal offs[2] = {0.5, 0.0};
156:         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][idown + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
157:       }
158:       if (stagCoord->dof[1]) {
159:         const PetscReal offs[2] = {0.0, 0.5};
160:         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][ileft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
161:       }
162:       if (stagCoord->dof[2]) {
163:         const PetscReal offs[2] = {0.5, 0.5};
164:         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][ielement + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
165:       }
166:     }
167:   }
168:   DMStagVecRestoreArray(dmCoord, coordLocal, &arr);
169:   DMSetCoordinatesLocal(dm, coordLocal);
170:   VecDestroy(&coordLocal);
171:   return 0;
172: }
174: /* Helper functions used in DMSetUp_Stag() */
175: static PetscErrorCode DMStagSetUpBuildRankGrid_2d(DM);
176: static PetscErrorCode DMStagSetUpBuildNeighbors_2d(DM);
177: static PetscErrorCode DMStagSetUpBuildGlobalOffsets_2d(DM, PetscInt **);
178: static PetscErrorCode DMStagComputeLocationOffsets_2d(DM);
180: PETSC_INTERN PetscErrorCode DMSetUp_Stag_2d(DM dm)
181: {
182:   DM_Stag *const stag = (DM_Stag *)dm->data;
183:   PetscMPIInt    size, rank;
184:   PetscInt       i, j, d, entriesPerElementRowGhost, entriesPerCorner, entriesPerFace, entriesPerElementRow;
185:   MPI_Comm       comm;
186:   PetscInt      *globalOffsets;
187:   PetscBool      star, dummyStart[2], dummyEnd[2];
188:   const PetscInt dim = 2;
190:   PetscObjectGetComm((PetscObject)dm, &comm);
191:   MPI_Comm_size(comm, &size);
192:   MPI_Comm_rank(comm, &rank);
194:   /* Rank grid sizes (populates stag->nRanks) */
195:   DMStagSetUpBuildRankGrid_2d(dm);
197:   /* Determine location of rank in grid (these get extra boundary points on the last element)
198:      Order is x-fast, as usual */
199:   stag->rank[0] = rank % stag->nRanks[0];
200:   stag->rank[1] = rank / stag->nRanks[0];
201:   for (i = 0; i < dim; ++i) {
202:     stag->firstRank[i] = PetscNot(stag->rank[i]);
203:     stag->lastRank[i]  = (PetscBool)(stag->rank[i] == stag->nRanks[i] - 1);
204:   }
206:   /* Determine Locally owned region
208:    Divide equally, giving lower ranks in each dimension and extra element if needbe.
210:    Note that this uses O(P) storage. If this ever becomes an issue, this could
211:    be refactored to not keep this data around.  */
212:   for (i = 0; i < dim; ++i) {
213:     if (!stag->l[i]) {
214:       const PetscInt Ni = stag->N[i], nRanksi = stag->nRanks[i];
215:       PetscMalloc1(stag->nRanks[i], &stag->l[i]);
216:       for (j = 0; j < stag->nRanks[i]; ++j) stag->l[i][j] = Ni / nRanksi + ((Ni % nRanksi) > j);
217:     }
218:   }
220:   /* Retrieve local size in stag->n */
221:   for (i = 0; i < dim; ++i) stag->n[i] = stag->l[i][stag->rank[i]];
222:   if (PetscDefined(USE_DEBUG)) {
223:     for (i = 0; i < dim; ++i) {
224:       PetscInt Ncheck, j;
225:       Ncheck = 0;
226:       for (j = 0; j < stag->nRanks[i]; ++j) Ncheck += stag->l[i][j];
228:     }
229:   }
231:   /* Compute starting elements */
232:   for (i = 0; i < dim; ++i) {
233:     stag->start[i] = 0;
234:     for (j = 0; j < stag->rank[i]; ++j) stag->start[i] += stag->l[i][j];
235:   }
237:   /* Determine ranks of neighbors, using DMDA's convention
239:      n6 n7 n8
240:      n3    n5
241:      n0 n1 n2                                               */
242:   DMStagSetUpBuildNeighbors_2d(dm);
244:   /* Determine whether the ghost region includes dummies or not. This is currently
245:        equivalent to having a non-periodic boundary. If not, then
246:        ghostOffset{Start,End}[d] elements correspond to elements on the neighbor.
247:        If true, then
248:        - at the start, there are ghostOffsetStart[d] ghost elements
249:        - at the end, there is a layer of extra "physical" points inside a layer of
250:          ghostOffsetEnd[d] ghost elements
251:        Note that this computation should be updated if any boundary types besides
252:        NONE, GHOSTED, and PERIODIC are supported.  */
253:   for (d = 0; d < 2; ++d) dummyStart[d] = (PetscBool)(stag->firstRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
254:   for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
256:   /* Define useful sizes */
257:   stag->entriesPerElement = stag->dof[0] + 2 * stag->dof[1] + stag->dof[2];
258:   entriesPerFace          = stag->dof[0] + stag->dof[1];
259:   entriesPerCorner        = stag->dof[0];
260:   entriesPerElementRow    = stag->n[0] * stag->entriesPerElement + (dummyEnd[0] ? entriesPerFace : 0);
261:   stag->entries           = stag->n[1] * entriesPerElementRow + (dummyEnd[1] ? stag->n[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
263:   /* Compute offsets for each rank into global vectors
264:      This again requires O(P) storage, which could be replaced with some global
265:      communication.  */
266:   DMStagSetUpBuildGlobalOffsets_2d(dm, &globalOffsets);
268:   for (d = 0; d < dim; ++d)
271:   /* Define ghosted/local sizes */
272:   if (stag->stencilType != DMSTAG_STENCIL_NONE && (stag->n[0] < stag->stencilWidth || stag->n[1] < stag->stencilWidth)) {
273:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "DMStag 2d setup does not support local sizes (%" PetscInt_FMT " x %" PetscInt_FMT ") smaller than the elementwise stencil width (%" PetscInt_FMT ")", stag->n[0], stag->n[1], stag->stencilWidth);
274:   }
275:   for (d = 0; d < dim; ++d) {
276:     switch (stag->boundaryType[d]) {
277:     case DM_BOUNDARY_NONE:
278:       /* Note: for a elements-only DMStag, the extra elements on the faces aren't necessary but we include them anyway */
279:       switch (stag->stencilType) {
280:       case DMSTAG_STENCIL_NONE: /* only the extra one on the right/top faces */
281:         stag->nGhost[d]     = stag->n[d];
282:         stag->startGhost[d] = stag->start[d];
283:         if (stag->lastRank[d]) stag->nGhost[d] += 1;
284:         break;
285:       case DMSTAG_STENCIL_STAR: /* allocate the corners but don't use them */
286:       case DMSTAG_STENCIL_BOX:
287:         stag->nGhost[d]     = stag->n[d];
288:         stag->startGhost[d] = stag->start[d];
289:         if (!stag->firstRank[d]) {
290:           stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
291:           stag->startGhost[d] -= stag->stencilWidth;
292:         }
293:         if (!stag->lastRank[d]) {
294:           stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
295:         } else {
296:           stag->nGhost[d] += 1; /* one element on the boundary to complete blocking */
297:         }
298:         break;
299:       default:
300:         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
301:       }
302:       break;
303:     case DM_BOUNDARY_GHOSTED:
304:       switch (stag->stencilType) {
305:       case DMSTAG_STENCIL_NONE:
306:         stag->startGhost[d] = stag->start[d];
307:         stag->nGhost[d]     = stag->n[d] + (stag->lastRank[d] ? 1 : 0);
308:         break;
309:       case DMSTAG_STENCIL_STAR:
310:       case DMSTAG_STENCIL_BOX:
311:         stag->startGhost[d] = stag->start[d] - stag->stencilWidth; /* This value may be negative */
312:         stag->nGhost[d]     = stag->n[d] + 2 * stag->stencilWidth + (stag->lastRank[d] && stag->stencilWidth == 0 ? 1 : 0);
313:         break;
314:       default:
315:         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
316:       }
317:       break;
318:     case DM_BOUNDARY_PERIODIC:
319:       switch (stag->stencilType) {
320:       case DMSTAG_STENCIL_NONE: /* only the extra one on the right/top faces */
321:         stag->nGhost[d]     = stag->n[d];
322:         stag->startGhost[d] = stag->start[d];
323:         break;
324:       case DMSTAG_STENCIL_STAR:
325:       case DMSTAG_STENCIL_BOX:
326:         stag->nGhost[d]     = stag->n[d] + 2 * stag->stencilWidth;
327:         stag->startGhost[d] = stag->start[d] - stag->stencilWidth;
328:         break;
329:       default:
330:         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
331:       }
332:       break;
333:     default:
334:       SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported boundary type in dimension %" PetscInt_FMT, d);
335:     }
336:   }
337:   stag->entriesGhost        = stag->nGhost[0] * stag->nGhost[1] * stag->entriesPerElement;
338:   entriesPerElementRowGhost = stag->nGhost[0] * stag->entriesPerElement;
340:   /* Create global-->local VecScatter and local->global ISLocalToGlobalMapping
342:      We iterate over all local points twice. First, we iterate over each neighbor, populating
343:      1. idxLocal[] : the subset of points, in local numbering ("S" from 0 on all points including ghosts), which correspond to global points. That is, the set of all non-dummy points in the ghosted representation
344:      2. idxGlobal[]: the corresponding global points, in global numbering (Nested "S"s - ranks then non-ghost points in each rank)
346:      Next, we iterate over all points in the local ordering, populating
347:      3. idxGlobalAll[] : entry i is the global point corresponding to local point i, or -1 if local point i is a dummy.
349:      Note further here that the local/ghosted vectors:
350:      - Are always an integral number of elements-worth of points, in all directions.
351:      - Contain three flavors of points:
352:      1. Points which "live here" in the global representation
353:      2. Ghost points which correspond to points on other ranks in the global representation
354:      3. Ghost points, which we call "dummy points," which do not correspond to any point in the global representation
356:      Dummy ghost points arise in at least three ways:
357:      1. As padding for the right, top, and front physical boundaries, to complete partial elements
358:      2. As unused space in the "corners" on interior ranks when using a star stencil
359:      3. As additional work space on all physical boundaries, when DM_BOUNDARY_GHOSTED is used
361:      Note that, because of the boundary dummies,
362:      with a stencil width of zero, on 1 rank, local and global vectors
363:      are still different!
365:      We assume that the size on each rank is greater than or equal to the
366:      stencil width.
367:      */
369:   /* Check stencil type */
371:   star = (PetscBool)(stag->stencilType == DMSTAG_STENCIL_STAR || stag->stencilType == DMSTAG_STENCIL_NONE);
373:   {
374:     PetscInt *idxLocal, *idxGlobal, *idxGlobalAll;
375:     PetscInt  count, countAll, entriesToTransferTotal, i, j, d, ghostOffsetStart[2], ghostOffsetEnd[2];
376:     IS        isLocal, isGlobal;
377:     PetscInt  jghost, ighost;
378:     PetscInt  nNeighbors[9][2];
379:     PetscBool nextToDummyEnd[2];
381:     /* Compute numbers of elements on each neighbor */
382:     for (i = 0; i < 9; ++i) {
383:       const PetscInt neighborRank = stag->neighbors[i];
384:       if (neighborRank >= 0) { /* note we copy the values for our own rank (neighbor 4) */
385:         nNeighbors[i][0] = stag->l[0][neighborRank % stag->nRanks[0]];
386:         nNeighbors[i][1] = stag->l[1][neighborRank / stag->nRanks[0]];
387:       } /* else leave uninitialized - error if accessed */
388:     }
390:     /* These offsets should always be non-negative, and describe how many
391:        ghost elements exist at each boundary. These are not always equal to the stencil width,
392:        because we may have different numbers of ghost elements at the boundaries. In particular,
393:        we always have at least one ghost (dummy) element at the right/top/front. */
394:     for (d = 0; d < 2; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
395:     for (d = 0; d < 2; ++d) ghostOffsetEnd[d] = stag->startGhost[d] + stag->nGhost[d] - (stag->start[d] + stag->n[d]);
397:     /* Compute whether the next rank has an extra point (only used in x direction) */
398:     for (d = 0; d < 2; ++d) nextToDummyEnd[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->rank[d] == stag->nRanks[d] - 2);
400:     /* Compute the number of local entries which correspond to any global entry */
401:     {
402:       PetscInt nNonDummyGhost[2];
403:       for (d = 0; d < 2; ++d) nNonDummyGhost[d] = stag->nGhost[d] - (dummyStart[d] ? ghostOffsetStart[d] : 0) - (dummyEnd[d] ? ghostOffsetEnd[d] : 0);
404:       if (star) {
405:         entriesToTransferTotal = (nNonDummyGhost[0] * stag->n[1] + stag->n[0] * nNonDummyGhost[1] - stag->n[0] * stag->n[1]) * stag->entriesPerElement + (dummyEnd[0] ? nNonDummyGhost[1] * entriesPerFace : 0) + (dummyEnd[1] ? nNonDummyGhost[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
406:       } else {
407:         entriesToTransferTotal = nNonDummyGhost[0] * nNonDummyGhost[1] * stag->entriesPerElement + (dummyEnd[0] ? nNonDummyGhost[1] * entriesPerFace : 0) + (dummyEnd[1] ? nNonDummyGhost[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
408:       }
409:     }
411:     /* Allocate arrays to populate */
412:     PetscMalloc1(entriesToTransferTotal, &idxLocal);
413:     PetscMalloc1(entriesToTransferTotal, &idxGlobal);
415:     /* Counts into idxLocal/idxGlobal */
416:     count = 0;
418:     /* Here and below, we work with (i,j) describing element numbers within a neighboring rank's global ordering,
419:        to be offset by that rank's global offset,
420:        and (ighost,jghost) referring to element numbers within this ranks local (ghosted) ordering */
422:     /* Neighbor 0 (down left) */
423:     if (!star && !dummyStart[0] && !dummyStart[1]) {
424:       const PetscInt        neighbor                     = 0;
425:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
426:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
427:       const PetscInt        entriesPerElementRowNeighbor = stag->entriesPerElement * nNeighbor[0];
428:       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
429:         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
430:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
431:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
432:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
433:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
434:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
435:           }
436:         }
437:       }
438:     }
440:     /* Neighbor 1 (down) */
441:     if (!dummyStart[1]) {
442:       /* We may be a ghosted boundary in x, in which case the neighbor is also */
443:       const PetscInt        neighbor                     = 1;
444:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
445:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
446:       const PetscInt        entriesPerElementRowNeighbor = entriesPerElementRow; /* same as here */
447:       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
448:         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
449:         for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
450:           const PetscInt i = ighost - ghostOffsetStart[0];
451:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
452:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
453:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
454:           }
455:         }
456:         if (dummyEnd[0]) {
457:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0];
458:           const PetscInt i      = stag->n[0];
459:           for (d = 0; d < stag->dof[0]; ++d, ++count) { /* Vertex */
460:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
461:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
462:           }
463:           for (d = 0; d < stag->dof[1]; ++d, ++count) { /* Face */
464:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + stag->dof[0] + d;
465:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
466:           }
467:         }
468:       }
469:     }
471:     /* Neighbor 2 (down right) */
472:     if (!star && !dummyEnd[0] && !dummyStart[1]) {
473:       const PetscInt        neighbor                     = 2;
474:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
475:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
476:       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
477:       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
478:         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
479:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
480:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
481:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
482:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
483:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
484:           }
485:         }
486:       }
487:     }
489:     /* Neighbor 3 (left) */
490:     if (!dummyStart[0]) {
491:       /* Our neighbor is never a ghosted boundary in x, but we may be
492:          Here, we may be a ghosted boundary in y and thus so will our neighbor be */
493:       const PetscInt        neighbor                     = 3;
494:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
495:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
496:       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement;
497:       for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
498:         const PetscInt j = jghost - ghostOffsetStart[1];
499:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
500:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
501:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
502:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
503:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
504:           }
505:         }
506:       }
507:       if (dummyEnd[1]) {
508:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1];
509:         const PetscInt j      = stag->n[1];
510:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
511:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
512:           for (d = 0; d < entriesPerFace; ++d, ++count) {                                                /* only vertices and horizontal face (which are the first dof) */
513:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * entriesPerFace + d; /* i moves by face here */
514:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
515:           }
516:         }
517:       }
518:     }
520:     /* Interior/Resident-here-in-global elements ("Neighbor 4" - same rank)
521:        *including* entries from boundary dummy elements */
522:     {
523:       const PetscInt neighbor     = 4;
524:       const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
525:       for (j = 0; j < stag->n[1]; ++j) {
526:         const PetscInt jghost = j + ghostOffsetStart[1];
527:         for (i = 0; i < stag->n[0]; ++i) {
528:           const PetscInt ighost = i + ghostOffsetStart[0];
529:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
530:             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
531:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
532:           }
533:         }
534:         if (dummyEnd[0]) {
535:           const PetscInt ighost = i + ghostOffsetStart[0];
536:           i                     = stag->n[0];
537:           for (d = 0; d < stag->dof[0]; ++d, ++count) { /* vertex first */
538:             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
539:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
540:           }
541:           for (d = 0; d < stag->dof[1]; ++d, ++count) { /* then left edge (skipping bottom face) */
542:             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + stag->dof[0] + d;
543:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
544:           }
545:         }
546:       }
547:       if (dummyEnd[1]) {
548:         const PetscInt jghost = j + ghostOffsetStart[1];
549:         j                     = stag->n[1];
550:         for (i = 0; i < stag->n[0]; ++i) {
551:           const PetscInt ighost = i + ghostOffsetStart[0];
552:           for (d = 0; d < entriesPerFace; ++d, ++count) {                                        /* vertex and bottom face (which are the first entries) */
553:             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
554:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
555:           }
556:         }
557:         if (dummyEnd[0]) {
558:           const PetscInt ighost = i + ghostOffsetStart[0];
559:           i                     = stag->n[0];
560:           for (d = 0; d < entriesPerCorner; ++d, ++count) {                                      /* vertex only */
561:             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
562:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
563:           }
564:         }
565:       }
566:     }
568:     /* Neighbor 5 (right) */
569:     if (!dummyEnd[0]) {
570:       /* We can never be right boundary, but we may be a top boundary, along with the right neighbor */
571:       const PetscInt        neighbor                     = 5;
572:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
573:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
574:       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
575:       for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
576:         const PetscInt j = jghost - ghostOffsetStart[1];
577:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
578:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
579:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
580:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
581:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
582:           }
583:         }
584:       }
585:       if (dummyEnd[1]) {
586:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1];
587:         const PetscInt j      = nNeighbor[1];
588:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
589:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
590:           for (d = 0; d < entriesPerFace; ++d, ++count) {                                                /* only vertices and horizontal face (which are the first dof) */
591:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * entriesPerFace + d; /* Note i increment by entriesPerFace */
592:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
593:           }
594:         }
595:       }
596:     }
598:     /* Neighbor 6 (up left) */
599:     if (!star && !dummyStart[0] && !dummyEnd[1]) {
600:       /* We can never be a top boundary, but our neighbor may be
601:        We may be a right boundary, but our neighbor cannot be */
602:       const PetscInt        neighbor                     = 6;
603:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
604:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
605:       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement;
606:       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
607:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
608:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
609:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
610:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
611:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
612:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
613:           }
614:         }
615:       }
616:     }
618:     /* Neighbor 7 (up) */
619:     if (!dummyEnd[1]) {
620:       /* We cannot be the last rank in y, though our neighbor may be
621:        We may be the last rank in x, in which case our neighbor is also */
622:       const PetscInt        neighbor                     = 7;
623:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
624:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
625:       const PetscInt        entriesPerElementRowNeighbor = entriesPerElementRow; /* same as here */
626:       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
627:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
628:         for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
629:           const PetscInt i = ighost - ghostOffsetStart[0];
630:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
631:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
632:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
633:           }
634:         }
635:         if (dummyEnd[0]) {
636:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0];
637:           const PetscInt i      = nNeighbor[0];
638:           for (d = 0; d < stag->dof[0]; ++d, ++count) { /* Vertex */
639:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
640:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
641:           }
642:           for (d = 0; d < stag->dof[1]; ++d, ++count) { /* Face */
643:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + stag->dof[0] + d;
644:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
645:           }
646:         }
647:       }
648:     }
650:     /* Neighbor 8 (up right) */
651:     if (!star && !dummyEnd[0] && !dummyEnd[1]) {
652:       /* We can never be a ghosted boundary
653:          Our neighbor may be a top boundary, a right boundary, or both */
654:       const PetscInt        neighbor                     = 8;
655:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
656:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
657:       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
658:       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
659:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
660:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
661:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
662:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
663:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
664:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
665:           }
666:         }
667:       }
668:     }
672:     /* Create Local and Global ISs (transferring pointer ownership) */
673:     ISCreateGeneral(PetscObjectComm((PetscObject)dm), entriesToTransferTotal, idxLocal, PETSC_OWN_POINTER, &isLocal);
674:     ISCreateGeneral(PetscObjectComm((PetscObject)dm), entriesToTransferTotal, idxGlobal, PETSC_OWN_POINTER, &isGlobal);
676:     /* Create stag->gtol. The order is computed as PETSc ordering, and doesn't include dummy entries */
677:     {
678:       Vec local, global;
679:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm), 1, stag->entries, PETSC_DECIDE, NULL, &global);
680:       VecCreateSeqWithArray(PETSC_COMM_SELF, stag->entriesPerElement, stag->entriesGhost, NULL, &local);
681:       VecScatterCreate(global, isGlobal, local, isLocal, &stag->gtol);
682:       VecDestroy(&global);
683:       VecDestroy(&local);
684:     }
686:     /* Destroy ISs */
687:     ISDestroy(&isLocal);
688:     ISDestroy(&isGlobal);
690:     /* Next, we iterate over the local entries  again, in local order, recording the global entry to which each maps,
691:        or -1 if there is none */
692:     PetscMalloc1(stag->entriesGhost, &idxGlobalAll);
694:     countAll = 0;
696:     /* Loop over rows 1/3 : down */
697:     if (!dummyStart[1]) {
698:       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
699:         /* Loop over columns 1/3 : down left */
700:         if (!star && !dummyStart[0]) {
701:           const PetscInt        neighbor     = 0;
702:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
703:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
704:           const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost; /* Note: this is actually the same value for the whole row of ranks below, so recomputing it for the next two ranks is redundant, and one could even get rid of jghost entirely if desired */
705:           const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement;
706:           for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
707:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
708:           }
709:         } else {
710:           /* Down Left dummies */
711:           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
712:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
713:           }
714:         }
716:         /* Loop over columns 2/3 : down middle */
717:         {
718:           const PetscInt        neighbor     = 1;
719:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
720:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
721:           const PetscInt        j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
722:           const PetscInt        eprNeighbor  = entriesPerElementRow; /* same as here */
723:           for (i = 0; i < nNeighbor[0]; ++i) {
724:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
725:           }
726:         }
728:         /* Loop over columns 3/3 : down right */
729:         if (!star && !dummyEnd[0]) {
730:           const PetscInt        neighbor     = 2;
731:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
732:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
733:           const PetscInt        j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
734:           const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
735:           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
736:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
737:           }
738:         } else if (dummyEnd[0]) {
739:           /* Down right partial dummy elements, living on the *down* rank */
740:           const PetscInt        neighbor     = 1;
741:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
742:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
743:           const PetscInt        j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
744:           const PetscInt        eprNeighbor  = entriesPerElementRow; /* same as here */
745:           PetscInt              dGlobal;
746:           i = nNeighbor[0];
747:           for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
748:           for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy down face point */ }
749:           for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
750:           for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy element point */ }
751:           ++i;
752:           for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
753:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
754:           }
755:         } else {
756:           /* Down Right dummies */
757:           for (ighost = 0; ighost < ghostOffsetEnd[0]; ++ighost) {
758:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
759:           }
760:         }
761:       }
762:     } else {
763:       /* Down dummies row */
764:       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
765:         for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
766:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
767:         }
768:       }
769:     }
771:     /* Loop over rows 2/3 : center */
772:     for (j = 0; j < stag->n[1]; ++j) {
773:       /* Loop over columns 1/3 : left */
774:       if (!dummyStart[0]) {
775:         const PetscInt        neighbor     = 3;
776:         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
777:         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
778:         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
779:         for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
780:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
781:         }
782:       } else {
783:         /* (Middle) Left dummies */
784:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
785:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
786:         }
787:       }
789:       /* Loop over columns 2/3 : here (the "neighbor" is ourselves, here) */
790:       {
791:         const PetscInt neighbor     = 4;
792:         const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
793:         const PetscInt eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
794:         for (i = 0; i < stag->n[0]; ++i) {
795:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
796:         }
797:       }
799:       /* Loop over columns 3/3 : right */
800:       if (!dummyEnd[0]) {
801:         const PetscInt        neighbor     = 5;
802:         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
803:         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
804:         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
805:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
806:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
807:         }
808:       } else {
809:         /* -1's for right layer of partial dummies, living on *this* rank */
810:         const PetscInt        neighbor     = 4;
811:         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
812:         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
813:         const PetscInt        eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
814:         PetscInt              dGlobal;
815:         i = nNeighbor[0];
816:         for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
817:         for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy down face point */ }
818:         for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
819:         for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy element point */ }
820:         ++i;
821:         for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
822:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
823:         }
824:       }
825:     }
827:     /* Loop over rows 3/3 : up */
828:     if (!dummyEnd[1]) {
829:       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
830:         /* Loop over columns 1/3 : up left */
831:         if (!star && !dummyStart[0]) {
832:           const PetscInt        neighbor     = 6;
833:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
834:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
835:           const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
836:           for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
837:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
838:           }
839:         } else {
840:           /* Up Left dummies */
841:           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
842:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
843:           }
844:         }
846:         /* Loop over columns 2/3 : up */
847:         {
848:           const PetscInt        neighbor     = 7;
849:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
850:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
851:           const PetscInt        eprNeighbor  = entriesPerElementRow; /* Same as here */
852:           for (i = 0; i < nNeighbor[0]; ++i) {
853:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
854:           }
855:         }
857:         /* Loop over columns 3/3 : up right */
858:         if (!star && !dummyEnd[0]) {
859:           const PetscInt        neighbor     = 8;
860:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
861:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
862:           const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
863:           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
864:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
865:           }
866:         } else if (dummyEnd[0]) {
867:           /* -1's for right layer of partial dummies, living on rank above */
868:           const PetscInt        neighbor     = 7;
869:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
870:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
871:           const PetscInt        eprNeighbor  = entriesPerElementRow; /* Same as here */
872:           PetscInt              dGlobal;
873:           i = nNeighbor[0];
874:           for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
875:           for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy down face point */ }
876:           for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
877:           for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy element point */ }
878:           ++i;
879:           for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
880:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
881:           }
882:         } else {
883:           /* Up Right dummies */
884:           for (ighost = 0; ighost < ghostOffsetEnd[0]; ++ighost) {
885:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
886:           }
887:         }
888:       }
889:     } else {
890:       j = stag->n[1];
891:       /* Top layer of partial dummies */
893:       /* up left partial dummies layer : Loop over columns 1/3 : living on *left* neighbor */
894:       if (!dummyStart[0]) {
895:         const PetscInt        neighbor     = 3;
896:         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
897:         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
898:         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
899:         for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
900:           for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */ }
901:           for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy left face and element points */ }
902:         }
903:       } else {
904:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
905:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
906:         }
907:       }
909:       /* up partial dummies layer : Loop over columns 2/3 : living on *this* rank */
910:       {
911:         const PetscInt neighbor     = 4;
912:         const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
913:         const PetscInt eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
914:         for (i = 0; i < stag->n[0]; ++i) {
915:           for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */ }
916:           for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy left face and element points */ }
917:         }
918:       }
920:       if (!dummyEnd[0]) {
921:         /* up right partial dummies layer : Loop over columns 3/3 :  living on *right* neighbor */
922:         const PetscInt        neighbor     = 5;
923:         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
924:         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
925:         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
926:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
927:           for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */ }
928:           for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy left face and element points */ }
929:         }
930:       } else {
931:         /* Top partial dummies layer : Loop over columns 3/3 : right, living *here* */
932:         const PetscInt neighbor     = 4;
933:         const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
934:         const PetscInt eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
935:         i                           = stag->n[0];
936:         for (d = 0; d < stag->dof[0]; ++d, ++countAll) {                                    /* Note just the vertex here */
937:           idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */
938:         }
939:         for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy bottom face, left face and element points */ }
940:         ++i;
941:         for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
942:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
943:         }
944:       }
945:       ++j;
946:       /* Additional top dummy layers */
947:       for (; j < stag->n[1] + ghostOffsetEnd[1]; ++j) {
948:         for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
949:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
950:         }
951:       }
952:     }
954:     /* Create local-to-global map (in local ordering, includes maps to -1 for dummy points) */
955:     ISLocalToGlobalMappingCreate(comm, 1, stag->entriesGhost, idxGlobalAll, PETSC_OWN_POINTER, &dm->ltogmap);
956:   }
958:   /* In special cases, create a dedicated injective local-to-global map */
959:   if ((stag->boundaryType[0] == DM_BOUNDARY_PERIODIC && stag->nRanks[0] == 1) || (stag->boundaryType[1] == DM_BOUNDARY_PERIODIC && stag->nRanks[1] == 1)) DMStagPopulateLocalToGlobalInjective(dm);
961:   /* Free global offsets */
962:   PetscFree(globalOffsets);
964:   /* Precompute location offsets */
965:   DMStagComputeLocationOffsets_2d(dm);
967:   /* View from Options */
968:   DMViewFromOptions(dm, NULL, "-dm_view");
970:   return 0;
971: }
973: /* adapted from da2.c */
974: static PetscErrorCode DMStagSetUpBuildRankGrid_2d(DM dm)
975: {
976:   DM_Stag *const stag = (DM_Stag *)dm->data;
977:   PetscInt       m, n;
978:   PetscMPIInt    rank, size;
979:   const PetscInt M = stag->N[0];
980:   const PetscInt N = stag->N[1];
982:   MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size);
983:   MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank);
984:   m = stag->nRanks[0];
985:   n = stag->nRanks[1];
986:   if (m != PETSC_DECIDE) {
989:   }
990:   if (n != PETSC_DECIDE) {
993:   }
994:   if (m == PETSC_DECIDE || n == PETSC_DECIDE) {
995:     if (n != PETSC_DECIDE) {
996:       m = size / n;
997:     } else if (m != PETSC_DECIDE) {
998:       n = size / m;
999:     } else {
1000:       /* try for squarish distribution */
1001:       m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M) * ((PetscReal)size) / ((PetscReal)N)));
1002:       if (!m) m = 1;
1003:       while (m > 0) {
1004:         n = size / m;
1005:         if (m * n == size) break;
1006:         m--;
1007:       }
1008:       if (M > N && m < n) {
1009:         PetscInt _m = m;
1010:         m           = n;
1011:         n           = _m;
1012:       }
1013:     }
1018:   stag->nRanks[0] = m;
1019:   stag->nRanks[1] = n;
1020:   return 0;
1021: }
1023: static PetscErrorCode DMStagSetUpBuildNeighbors_2d(DM dm)
1024: {
1025:   DM_Stag *const stag = (DM_Stag *)dm->data;
1026:   PetscInt       d, i;
1027:   PetscBool      per[2], first[2], last[2];
1028:   PetscInt       neighborRank[9][2], r[2], n[2];
1029:   const PetscInt dim = 2;
1031:   for (d = 0; d < dim; ++d)
1033:                DMBoundaryTypes[stag->boundaryType[d]]);
1035:   /* Assemble some convenience variables */
1036:   for (d = 0; d < dim; ++d) {
1037:     per[d]   = (PetscBool)(stag->boundaryType[d] == DM_BOUNDARY_PERIODIC);
1038:     first[d] = stag->firstRank[d];
1039:     last[d]  = stag->lastRank[d];
1040:     r[d]     = stag->rank[d];
1041:     n[d]     = stag->nRanks[d];
1042:   }
1044:   /* First, compute the position in the rank grid for all neighbors */
1045:   neighborRank[0][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  down */
1046:   neighborRank[0][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
1048:   neighborRank[1][0] = r[0]; /*       down */
1049:   neighborRank[1][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
1051:   neighborRank[2][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right down */
1052:   neighborRank[2][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
1054:   neighborRank[3][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left       */
1055:   neighborRank[3][1] = r[1];
1057:   neighborRank[4][0] = r[0]; /*            */
1058:   neighborRank[4][1] = r[1];
1060:   neighborRank[5][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right      */
1061:   neighborRank[5][1] = r[1];
1063:   neighborRank[6][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  up   */
1064:   neighborRank[6][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
1066:   neighborRank[7][0] = r[0]; /*       up   */
1067:   neighborRank[7][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
1069:   neighborRank[8][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right up   */
1070:   neighborRank[8][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
1072:   /* Then, compute the rank of each in the linear ordering */
1073:   PetscMalloc1(9, &stag->neighbors);
1074:   for (i = 0; i < 9; ++i) {
1075:     if (neighborRank[i][0] >= 0 && neighborRank[i][1] >= 0) {
1076:       stag->neighbors[i] = neighborRank[i][0] + n[0] * neighborRank[i][1];
1077:     } else {
1078:       stag->neighbors[i] = -1;
1079:     }
1080:   }
1082:   return 0;
1083: }
1085: static PetscErrorCode DMStagSetUpBuildGlobalOffsets_2d(DM dm, PetscInt **pGlobalOffsets)
1086: {
1087:   const DM_Stag *const stag = (DM_Stag *)dm->data;
1088:   PetscInt            *globalOffsets;
1089:   PetscInt             i, j, d, entriesPerFace, count;
1090:   PetscMPIInt          size;
1091:   PetscBool            extra[2];
1093:   MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size);
1094:   for (d = 0; d < 2; ++d) extra[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC); /* Extra points in global rep */
1095:   entriesPerFace = stag->dof[0] + stag->dof[1];
1096:   PetscMalloc1(size, pGlobalOffsets);
1097:   globalOffsets    = *pGlobalOffsets;
1098:   globalOffsets[0] = 0;
1099:   count            = 1; /* note the count is offset by 1 here. We add the size of the previous rank */
1100:   for (j = 0; j < stag->nRanks[1] - 1; ++j) {
1101:     const PetscInt nnj = stag->l[1][j];
1102:     for (i = 0; i < stag->nRanks[0] - 1; ++i) {
1103:       const PetscInt nni   = stag->l[0][i];
1104:       globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * stag->entriesPerElement; /* No right/top/front boundaries */
1105:       ++count;
1106:     }
1107:     {
1108:       /* i = stag->nRanks[0]-1; */
1109:       const PetscInt nni   = stag->l[0][i];
1110:       globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * stag->entriesPerElement + (extra[0] ? nnj * entriesPerFace : 0); /* Extra faces on the right */
1111:       ++count;
1112:     }
1113:   }
1114:   {
1115:     /* j = stag->nRanks[1]-1; */
1116:     const PetscInt nnj = stag->l[1][j];
1117:     for (i = 0; i < stag->nRanks[0] - 1; ++i) {
1118:       const PetscInt nni   = stag->l[0][i];
1119:       globalOffsets[count] = globalOffsets[count - 1] + nni * nnj * stag->entriesPerElement + (extra[1] ? nni * entriesPerFace : 0); /* Extra faces on the top */
1120:       ++count;
1121:     }
1122:     /* Don't need to compute entries in last element */
1123:   }
1124:   return 0;
1125: }
1127: static PetscErrorCode DMStagComputeLocationOffsets_2d(DM dm)
1128: {
1129:   DM_Stag *const stag = (DM_Stag *)dm->data;
1130:   const PetscInt epe  = stag->entriesPerElement;
1131:   const PetscInt epr  = stag->nGhost[0] * epe;
1133:   PetscMalloc1(DMSTAG_NUMBER_LOCATIONS, &stag->locationOffsets);
1134:   stag->locationOffsets[DMSTAG_DOWN_LEFT]  = 0;
1135:   stag->locationOffsets[DMSTAG_DOWN]       = stag->locationOffsets[DMSTAG_DOWN_LEFT] + stag->dof[0];
1136:   stag->locationOffsets[DMSTAG_DOWN_RIGHT] = stag->locationOffsets[DMSTAG_DOWN_LEFT] + epe;
1137:   stag->locationOffsets[DMSTAG_LEFT]       = stag->locationOffsets[DMSTAG_DOWN] + stag->dof[1];
1138:   stag->locationOffsets[DMSTAG_ELEMENT]    = stag->locationOffsets[DMSTAG_LEFT] + stag->dof[1];
1139:   stag->locationOffsets[DMSTAG_RIGHT]      = stag->locationOffsets[DMSTAG_LEFT] + epe;
1140:   stag->locationOffsets[DMSTAG_UP_LEFT]    = stag->locationOffsets[DMSTAG_DOWN_LEFT] + epr;
1141:   stag->locationOffsets[DMSTAG_UP]         = stag->locationOffsets[DMSTAG_DOWN] + epr;
1142:   stag->locationOffsets[DMSTAG_UP_RIGHT]   = stag->locationOffsets[DMSTAG_UP_LEFT] + epe;
1143:   return 0;
1144: }
1146: PETSC_INTERN PetscErrorCode DMStagPopulateLocalToGlobalInjective_2d(DM dm)
1147: {
1148:   DM_Stag *const  stag = (DM_Stag *)dm->data;
1149:   PetscInt       *idxLocal, *idxGlobal, *globalOffsetsRecomputed;
1150:   const PetscInt *globalOffsets;
1151:   PetscInt        i, j, d, count, entriesPerCorner, entriesPerFace, entriesPerElementRowGhost, entriesPerElementRow, ghostOffsetStart[2];
1152:   IS              isLocal, isGlobal;
1153:   PetscBool       dummyEnd[2];
1155:   DMStagSetUpBuildGlobalOffsets_2d(dm, &globalOffsetsRecomputed); /* note that we don't actually use all of these. An available optimization is to pass them, when available */
1156:   globalOffsets = globalOffsetsRecomputed;
1157:   PetscMalloc1(stag->entries, &idxLocal);
1158:   PetscMalloc1(stag->entries, &idxGlobal);
1159:   for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1160:   entriesPerCorner          = stag->dof[0];
1161:   entriesPerFace            = stag->dof[0] + stag->dof[1];
1162:   entriesPerElementRow      = stag->n[0] * stag->entriesPerElement + (dummyEnd[0] ? entriesPerFace : 0);
1163:   entriesPerElementRowGhost = stag->nGhost[0] * stag->entriesPerElement;
1164:   count                     = 0;
1165:   for (d = 0; d < 2; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
1166:   {
1167:     const PetscInt neighbor     = 4;
1168:     const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
1169:     for (j = 0; j < stag->n[1]; ++j) {
1170:       const PetscInt jghost = j + ghostOffsetStart[1];
1171:       for (i = 0; i < stag->n[0]; ++i) {
1172:         const PetscInt ighost = i + ghostOffsetStart[0];
1173:         for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
1174:           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
1175:           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1176:         }
1177:       }
1178:       if (dummyEnd[0]) {
1179:         const PetscInt ighost = i + ghostOffsetStart[0];
1180:         i                     = stag->n[0];
1181:         for (d = 0; d < stag->dof[0]; ++d, ++count) { /* vertex first */
1182:           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
1183:           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1184:         }
1185:         for (d = 0; d < stag->dof[1]; ++d, ++count) { /* then left edge (skipping bottom face) */
1186:           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + stag->dof[0] + d;
1187:           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
1188:         }
1189:       }
1190:     }
1191:     if (dummyEnd[1]) {
1192:       const PetscInt jghost = j + ghostOffsetStart[1];
1193:       j                     = stag->n[1];
1194:       for (i = 0; i < stag->n[0]; ++i) {
1195:         const PetscInt ighost = i + ghostOffsetStart[0];
1196:         for (d = 0; d < entriesPerFace; ++d, ++count) {                                        /* vertex and bottom face (which are the first entries) */
1197:           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
1198:           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1199:         }
1200:       }
1201:       if (dummyEnd[0]) {
1202:         const PetscInt ighost = i + ghostOffsetStart[0];
1203:         i                     = stag->n[0];
1204:         for (d = 0; d < entriesPerCorner; ++d, ++count) {                                      /* vertex only */
1205:           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
1206:           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1207:         }
1208:       }
1209:     }
1210:   }
1211:   ISCreateGeneral(PetscObjectComm((PetscObject)dm), stag->entries, idxLocal, PETSC_OWN_POINTER, &isLocal);
1212:   ISCreateGeneral(PetscObjectComm((PetscObject)dm), stag->entries, idxGlobal, PETSC_OWN_POINTER, &isGlobal);
1213:   {
1214:     Vec local, global;
1215:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm), 1, stag->entries, PETSC_DECIDE, NULL, &global);
1216:     VecCreateSeqWithArray(PETSC_COMM_SELF, stag->entriesPerElement, stag->entriesGhost, NULL, &local);
1217:     VecScatterCreate(local, isLocal, global, isGlobal, &stag->ltog_injective);
1218:     VecDestroy(&global);
1219:     VecDestroy(&local);
1220:   }
1221:   ISDestroy(&isLocal);
1222:   ISDestroy(&isGlobal);
1223:   if (globalOffsetsRecomputed) PetscFree(globalOffsetsRecomputed);
1224:   return 0;
1225: }
1227: PETSC_INTERN PetscErrorCode DMCreateMatrix_Stag_2D_AIJ_Assemble(DM dm, Mat A)
1228: {
1229:   PetscInt          entries, dof[DMSTAG_MAX_STRATA], epe, stencil_width, N[2], start[2], n[2], n_extra[2];
1230:   DMStagStencilType stencil_type;
1231:   DMBoundaryType    boundary_type[2];
1233:   DMStagGetDOF(dm, &dof[0], &dof[1], &dof[2], NULL);
1234:   DMStagGetStencilType(dm, &stencil_type);
1235:   DMStagGetStencilWidth(dm, &stencil_width);
1236:   DMStagGetEntries(dm, &entries);
1237:   DMStagGetEntriesPerElement(dm, &epe);
1238:   DMStagGetCorners(dm, &start[0], &start[1], NULL, &n[0], &n[1], NULL, &n_extra[0], &n_extra[1], NULL);
1239:   DMStagGetGlobalSizes(dm, &N[0], &N[1], NULL);
1240:   DMStagGetBoundaryTypes(dm, &boundary_type[0], &boundary_type[1], NULL);
1242:   if (stencil_type == DMSTAG_STENCIL_NONE) {
1243:     /* Couple all DOF at each location to each other */
1244:     DMStagStencil *row_vertex, *row_face_down, *row_face_left, *row_element;
1246:     PetscMalloc1(dof[0], &row_vertex);
1247:     for (PetscInt c = 0; c < dof[0]; ++c) {
1248:       row_vertex[c].loc = DMSTAG_DOWN_LEFT;
1249:       row_vertex[c].c   = c;
1250:     }
1252:     PetscMalloc1(dof[1], &row_face_down);
1253:     for (PetscInt c = 0; c < dof[1]; ++c) {
1254:       row_face_down[c].loc = DMSTAG_DOWN;
1255:       row_face_down[c].c   = c;
1256:     }
1258:     PetscMalloc1(dof[1], &row_face_left);
1259:     for (PetscInt c = 0; c < dof[1]; ++c) {
1260:       row_face_left[c].loc = DMSTAG_LEFT;
1261:       row_face_left[c].c   = c;
1262:     }
1264:     PetscMalloc1(dof[2], &row_element);
1265:     for (PetscInt c = 0; c < dof[2]; ++c) {
1266:       row_element[c].loc = DMSTAG_ELEMENT;
1267:       row_element[c].c   = c;
1268:     }
1270:     for (PetscInt ey = start[1]; ey < start[1] + n[1] + n_extra[1]; ++ey) {
1271:       for (PetscInt ex = start[0]; ex < start[0] + n[0] + n_extra[0]; ++ex) {
1272:         {
1273:           for (PetscInt c = 0; c < dof[0]; ++c) {
1274:             row_vertex[c].i = ex;
1275:             row_vertex[c].j = ey;
1276:           }
1277:           DMStagMatSetValuesStencil(dm, A, dof[0], row_vertex, dof[0], row_vertex, NULL, INSERT_VALUES);
1278:         }
1279:         if (ex < N[0]) {
1280:           for (PetscInt c = 0; c < dof[1]; ++c) {
1281:             row_face_down[c].i = ex;
1282:             row_face_down[c].j = ey;
1283:           }
1284:           DMStagMatSetValuesStencil(dm, A, dof[1], row_face_down, dof[1], row_face_down, NULL, INSERT_VALUES);
1285:         }
1286:         if (ey < N[1]) {
1287:           for (PetscInt c = 0; c < dof[1]; ++c) {
1288:             row_face_left[c].i = ex;
1289:             row_face_left[c].j = ey;
1290:           }
1291:           DMStagMatSetValuesStencil(dm, A, dof[1], row_face_left, dof[1], row_face_left, NULL, INSERT_VALUES);
1292:         }
1293:         if (ex < N[0] && ey < N[1]) {
1294:           for (PetscInt c = 0; c < dof[2]; ++c) {
1295:             row_element[c].i = ex;
1296:             row_element[c].j = ey;
1297:           }
1298:           DMStagMatSetValuesStencil(dm, A, dof[2], row_element, dof[2], row_element, NULL, INSERT_VALUES);
1299:         }
1300:       }
1301:     }
1302:     PetscFree(row_vertex);
1303:     PetscFree(row_face_left);
1304:     PetscFree(row_face_down);
1305:     PetscFree(row_element);
1306:   } else if (stencil_type == DMSTAG_STENCIL_STAR || stencil_type == DMSTAG_STENCIL_BOX) {
1307:     DMStagStencil *col, *row;
1309:     PetscMalloc1(epe, &row);
1310:     {
1311:       PetscInt nrows = 0;
1313:       for (PetscInt c = 0; c < dof[0]; ++c) {
1314:         row[nrows].c   = c;
1315:         row[nrows].loc = DMSTAG_DOWN_LEFT;
1316:         ++nrows;
1317:       }
1318:       for (PetscInt c = 0; c < dof[1]; ++c) {
1319:         row[nrows].c   = c;
1320:         row[nrows].loc = DMSTAG_LEFT;
1321:         ++nrows;
1322:       }
1323:       for (PetscInt c = 0; c < dof[1]; ++c) {
1324:         row[nrows].c   = c;
1325:         row[nrows].loc = DMSTAG_DOWN;
1326:         ++nrows;
1327:       }
1328:       for (PetscInt c = 0; c < dof[2]; ++c) {
1329:         row[nrows].c   = c;
1330:         row[nrows].loc = DMSTAG_ELEMENT;
1331:         ++nrows;
1332:       }
1333:     }
1335:     PetscMalloc1(epe, &col);
1336:     {
1337:       PetscInt ncols = 0;
1339:       for (PetscInt c = 0; c < dof[0]; ++c) {
1340:         col[ncols].c   = c;
1341:         col[ncols].loc = DMSTAG_DOWN_LEFT;
1342:         ++ncols;
1343:       }
1344:       for (PetscInt c = 0; c < dof[1]; ++c) {
1345:         col[ncols].c   = c;
1346:         col[ncols].loc = DMSTAG_LEFT;
1347:         ++ncols;
1348:       }
1349:       for (PetscInt c = 0; c < dof[1]; ++c) {
1350:         col[ncols].c   = c;
1351:         col[ncols].loc = DMSTAG_DOWN;
1352:         ++ncols;
1353:       }
1354:       for (PetscInt c = 0; c < dof[2]; ++c) {
1355:         col[ncols].c   = c;
1356:         col[ncols].loc = DMSTAG_ELEMENT;
1357:         ++ncols;
1358:       }
1359:     }
1361:     for (PetscInt ey = start[1]; ey < start[1] + n[1] + n_extra[1]; ++ey) {
1362:       for (PetscInt ex = start[0]; ex < start[0] + n[0] + n_extra[0]; ++ex) {
1363:         for (PetscInt i = 0; i < epe; ++i) {
1364:           row[i].i = ex;
1365:           row[i].j = ey;
1366:         }
1367:         for (PetscInt offset_y = -stencil_width; offset_y <= stencil_width; ++offset_y) {
1368:           const PetscInt ey_offset = ey + offset_y;
1369:           for (PetscInt offset_x = -stencil_width; offset_x <= stencil_width; ++offset_x) {
1370:             const PetscInt ex_offset = ex + offset_x;
1371:             /* Only set values corresponding to elements which can have non-dummy entries,
1372:                meaning those that map to unknowns in the global representation. In the periodic
1373:                case, this is the entire stencil, but in all other cases, only includes a single
1374:                "extra" element which is partially outside the physical domain (those points in the
1375:                global representation */
1376:             if ((stencil_type == DMSTAG_STENCIL_BOX || offset_x == 0 || offset_y == 0) && (boundary_type[0] == DM_BOUNDARY_PERIODIC || (ex_offset < N[0] + 1 && ex_offset >= 0)) && (boundary_type[1] == DM_BOUNDARY_PERIODIC || (ey_offset < N[1] + 1 && ey_offset >= 0))) {
1377:               for (PetscInt i = 0; i < epe; ++i) {
1378:                 col[i].i = ex_offset;
1379:                 col[i].j = ey_offset;
1380:               }
1381:               DMStagMatSetValuesStencil(dm, A, epe, row, epe, col, NULL, INSERT_VALUES);
1382:             }
1383:           }
1384:         }
1385:       }
1386:     }
1387:     PetscFree(row);
1388:     PetscFree(col);
1389:   } else SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Unsupported stencil type %s", DMStagStencilTypes[stencil_type]);
1390:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
1391:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
1392:   return 0;
1393: }