Actual source code: ex51.c
  2: static char help[] = "This example solves a linear system in parallel with KSP.  The matrix\n\
  3: uses arbitrary order polynomials for finite elements on the unit square.  To test the parallel\n\
  4: matrix assembly, the matrix is intentionally laid out across processors\n\
  5: differently from the way it is assembled.  Input arguments are:\n\
  6:   -m <size> -p <order> : mesh size and polynomial order\n\n";
  8: /* Contributed by Travis Austin <austin@txcorp.com>, 2010.
  9:    based on src/ksp/ksp/tutorials/ex3.c
 10:  */
 12: #include <petscksp.h>
 14: /* Declare user-defined routines */
 15: static PetscReal      src(PetscReal, PetscReal);
 16: static PetscReal      ubdy(PetscReal, PetscReal);
 17: static PetscReal      polyBasisFunc(PetscInt, PetscInt, PetscReal *, PetscReal);
 18: static PetscReal      derivPolyBasisFunc(PetscInt, PetscInt, PetscReal *, PetscReal);
 19: static PetscErrorCode Form1DElementMass(PetscReal, PetscInt, PetscReal *, PetscReal *, PetscScalar *);
 20: static PetscErrorCode Form1DElementStiffness(PetscReal, PetscInt, PetscReal *, PetscReal *, PetscScalar *);
 21: static PetscErrorCode Form2DElementMass(PetscInt, PetscScalar *, PetscScalar *);
 22: static PetscErrorCode Form2DElementStiffness(PetscInt, PetscScalar *, PetscScalar *, PetscScalar *);
 23: static PetscErrorCode FormNodalRhs(PetscInt, PetscReal, PetscReal, PetscReal, PetscReal *, PetscScalar *);
 24: static PetscErrorCode FormNodalSoln(PetscInt, PetscReal, PetscReal, PetscReal, PetscReal *, PetscScalar *);
 25: static void           leggaulob(PetscReal, PetscReal, PetscReal[], PetscReal[], int);
 26: static void           qAndLEvaluation(int, PetscReal, PetscReal *, PetscReal *, PetscReal *);
 28: int main(int argc, char **args)
 29: {
 30:   PetscInt     p = 2, m = 5;
 31:   PetscInt     num1Dnodes, num2Dnodes;
 32:   PetscScalar *Ke1D, *Ke2D, *Me1D, *Me2D;
 33:   PetscScalar *r, *ue, val;
 34:   Vec          u, ustar, b, q;
 35:   Mat          A, Mass;
 36:   KSP          ksp;
 37:   PetscInt     M, N;
 38:   PetscMPIInt  rank, size;
 39:   PetscReal    x, y, h, norm;
 40:   PetscInt    *idx, indx, count, *rows, i, j, k, start, end, its;
 41:   PetscReal   *rowsx, *rowsy;
 42:   PetscReal   *gllNode, *gllWgts;
 45:   PetscInitialize(&argc, &args, (char *)0, help);
 46:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Options for p-FEM", "");
 47:   PetscOptionsInt("-m", "Number of elements in each direction", "None", m, &m, NULL);
 48:   PetscOptionsInt("-p", "Order of each element (tensor product basis)", "None", p, &p, NULL);
 49:   PetscOptionsEnd();
 51:   N = (p * m + 1) * (p * m + 1); /* dimension of matrix */
 52:   M = m * m;                     /* number of elements */
 53:   h = 1.0 / m;                   /* mesh width */
 54:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
 55:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 57:   /* Create stiffness matrix */
 58:   MatCreate(PETSC_COMM_WORLD, &A);
 59:   MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, N, N);
 60:   MatSetFromOptions(A);
 61:   MatSetUp(A);
 63:   /* Create matrix  */
 64:   MatCreate(PETSC_COMM_WORLD, &Mass);
 65:   MatSetSizes(Mass, PETSC_DECIDE, PETSC_DECIDE, N, N);
 66:   MatSetFromOptions(Mass);
 67:   MatSetUp(Mass);
 68:   start = rank * (M / size) + ((M % size) < rank ? (M % size) : rank);
 69:   end   = start + M / size + ((M % size) > rank);
 71:   /* Allocate element stiffness matrices */
 72:   num1Dnodes = (p + 1);
 73:   num2Dnodes = num1Dnodes * num1Dnodes;
 75:   PetscMalloc1(num1Dnodes * num1Dnodes, &Me1D);
 76:   PetscMalloc1(num1Dnodes * num1Dnodes, &Ke1D);
 77:   PetscMalloc1(num2Dnodes * num2Dnodes, &Me2D);
 78:   PetscMalloc1(num2Dnodes * num2Dnodes, &Ke2D);
 79:   PetscMalloc1(num2Dnodes, &idx);
 80:   PetscMalloc1(num2Dnodes, &r);
 81:   PetscMalloc1(num2Dnodes, &ue);
 83:   /* Allocate quadrature and create stiffness matrices */
 84:   PetscMalloc1(p + 1, &gllNode);
 85:   PetscMalloc1(p + 1, &gllWgts);
 86:   leggaulob(0.0, 1.0, gllNode, gllWgts, p); /* Get GLL nodes and weights */
 87:   Form1DElementMass(h, p, gllNode, gllWgts, Me1D);
 88:   Form1DElementStiffness(h, p, gllNode, gllWgts, Ke1D);
 89:   Form2DElementMass(p, Me1D, Me2D);
 90:   Form2DElementStiffness(p, Ke1D, Me1D, Ke2D);
 92:   /* Assemble matrix */
 93:   for (i = start; i < end; i++) {
 94:     indx = 0;
 95:     for (k = 0; k < (p + 1); ++k) {
 96:       for (j = 0; j < (p + 1); ++j) idx[indx++] = p * (p * m + 1) * (i / m) + p * (i % m) + k * (p * m + 1) + j;
 97:     }
 98:     MatSetValues(A, num2Dnodes, idx, num2Dnodes, idx, Ke2D, ADD_VALUES);
 99:     MatSetValues(Mass, num2Dnodes, idx, num2Dnodes, idx, Me2D, ADD_VALUES);
100:   }
101:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
102:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
103:   MatAssemblyBegin(Mass, MAT_FINAL_ASSEMBLY);
104:   MatAssemblyEnd(Mass, MAT_FINAL_ASSEMBLY);
106:   PetscFree(Me1D);
107:   PetscFree(Ke1D);
108:   PetscFree(Me2D);
109:   PetscFree(Ke2D);
111:   /* Create right-hand-side and solution vectors */
112:   VecCreate(PETSC_COMM_WORLD, &u);
113:   VecSetSizes(u, PETSC_DECIDE, N);
114:   VecSetFromOptions(u);
115:   PetscObjectSetName((PetscObject)u, "Approx. Solution");
116:   VecDuplicate(u, &b);
117:   PetscObjectSetName((PetscObject)b, "Right hand side");
118:   VecDuplicate(u, &q);
119:   PetscObjectSetName((PetscObject)q, "Right hand side 2");
120:   VecDuplicate(b, &ustar);
121:   VecSet(u, 0.0);
122:   VecSet(b, 0.0);
123:   VecSet(q, 0.0);
125:   /* Assemble nodal right-hand-side and soln vector  */
126:   for (i = start; i < end; i++) {
127:     x    = h * (i % m);
128:     y    = h * (i / m);
129:     indx = 0;
130:     for (k = 0; k < (p + 1); ++k) {
131:       for (j = 0; j < (p + 1); ++j) idx[indx++] = p * (p * m + 1) * (i / m) + p * (i % m) + k * (p * m + 1) + j;
132:     }
133:     FormNodalRhs(p, x, y, h, gllNode, r);
134:     FormNodalSoln(p, x, y, h, gllNode, ue);
135:     VecSetValues(q, num2Dnodes, idx, r, INSERT_VALUES);
136:     VecSetValues(ustar, num2Dnodes, idx, ue, INSERT_VALUES);
137:   }
138:   VecAssemblyBegin(q);
139:   VecAssemblyEnd(q);
140:   VecAssemblyBegin(ustar);
141:   VecAssemblyEnd(ustar);
143:   PetscFree(idx);
144:   PetscFree(r);
145:   PetscFree(ue);
147:   /* Get FE right-hand side vector */
148:   MatMult(Mass, q, b);
150:   /* Modify matrix and right-hand-side for Dirichlet boundary conditions */
151:   PetscMalloc1(4 * p * m, &rows);
152:   PetscMalloc1(4 * p * m, &rowsx);
153:   PetscMalloc1(4 * p * m, &rowsy);
154:   for (i = 0; i < p * m + 1; i++) {
155:     rows[i]                  = i; /* bottom */
156:     rowsx[i]                 = (i / p) * h + gllNode[i % p] * h;
157:     rowsy[i]                 = 0.0;
158:     rows[3 * p * m - 1 + i]  = (p * m) * (p * m + 1) + i; /* top */
159:     rowsx[3 * p * m - 1 + i] = (i / p) * h + gllNode[i % p] * h;
160:     rowsy[3 * p * m - 1 + i] = 1.0;
161:   }
162:   count = p * m + 1; /* left side */
163:   indx  = 1;
164:   for (i = p * m + 1; i < (p * m) * (p * m + 1); i += (p * m + 1)) {
165:     rows[count]    = i;
166:     rowsx[count]   = 0.0;
167:     rowsy[count++] = (indx / p) * h + gllNode[indx % p] * h;
168:     indx++;
169:   }
170:   count = 2 * p * m; /* right side */
171:   indx  = 1;
172:   for (i = 2 * p * m + 1; i < (p * m) * (p * m + 1); i += (p * m + 1)) {
173:     rows[count]    = i;
174:     rowsx[count]   = 1.0;
175:     rowsy[count++] = (indx / p) * h + gllNode[indx % p] * h;
176:     indx++;
177:   }
178:   for (i = 0; i < 4 * p * m; i++) {
179:     x   = rowsx[i];
180:     y   = rowsy[i];
181:     val = ubdy(x, y);
182:     VecSetValues(b, 1, &rows[i], &val, INSERT_VALUES);
183:     VecSetValues(u, 1, &rows[i], &val, INSERT_VALUES);
184:   }
185:   MatZeroRows(A, 4 * p * m, rows, 1.0, 0, 0);
186:   PetscFree(rows);
187:   PetscFree(rowsx);
188:   PetscFree(rowsy);
190:   VecAssemblyBegin(u);
191:   VecAssemblyEnd(u);
192:   VecAssemblyBegin(b);
193:   VecAssemblyEnd(b);
195:   /* Solve linear system */
196:   KSPCreate(PETSC_COMM_WORLD, &ksp);
197:   KSPSetOperators(ksp, A, A);
198:   KSPSetInitialGuessNonzero(ksp, PETSC_TRUE);
199:   KSPSetFromOptions(ksp);
200:   KSPSolve(ksp, b, u);
202:   /* Check error */
203:   VecAXPY(u, -1.0, ustar);
204:   VecNorm(u, NORM_2, &norm);
205:   KSPGetIterationNumber(ksp, &its);
206:   PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)(norm * h), its);
208:   PetscFree(gllNode);
209:   PetscFree(gllWgts);
211:   KSPDestroy(&ksp);
212:   VecDestroy(&u);
213:   VecDestroy(&b);
214:   VecDestroy(&q);
215:   VecDestroy(&ustar);
216:   MatDestroy(&A);
217:   MatDestroy(&Mass);
219:   PetscFinalize();
220:   return 0;
221: }
223: /* --------------------------------------------------------------------- */
225: /* 1d element stiffness mass matrix  */
226: static PetscErrorCode Form1DElementMass(PetscReal H, PetscInt P, PetscReal *gqn, PetscReal *gqw, PetscScalar *Me1D)
227: {
228:   PetscInt i, j, k;
229:   PetscInt indx;
232:   for (j = 0; j < (P + 1); ++j) {
233:     for (i = 0; i < (P + 1); ++i) {
234:       indx       = j * (P + 1) + i;
235:       Me1D[indx] = 0.0;
236:       for (k = 0; k < (P + 1); ++k) Me1D[indx] += H * gqw[k] * polyBasisFunc(P, i, gqn, gqn[k]) * polyBasisFunc(P, j, gqn, gqn[k]);
237:     }
238:   }
239:   return 0;
240: }
242: /* --------------------------------------------------------------------- */
244: /* 1d element stiffness matrix for derivative */
245: static PetscErrorCode Form1DElementStiffness(PetscReal H, PetscInt P, PetscReal *gqn, PetscReal *gqw, PetscScalar *Ke1D)
246: {
247:   PetscInt i, j, k;
248:   PetscInt indx;
251:   for (j = 0; j < (P + 1); ++j) {
252:     for (i = 0; i < (P + 1); ++i) {
253:       indx       = j * (P + 1) + i;
254:       Ke1D[indx] = 0.0;
255:       for (k = 0; k < (P + 1); ++k) Ke1D[indx] += (1. / H) * gqw[k] * derivPolyBasisFunc(P, i, gqn, gqn[k]) * derivPolyBasisFunc(P, j, gqn, gqn[k]);
256:     }
257:   }
258:   return 0;
259: }
261: /* --------------------------------------------------------------------- */
263: /* element mass matrix */
264: static PetscErrorCode Form2DElementMass(PetscInt P, PetscScalar *Me1D, PetscScalar *Me2D)
265: {
266:   PetscInt i1, j1, i2, j2;
267:   PetscInt indx1, indx2, indx3;
270:   for (j2 = 0; j2 < (P + 1); ++j2) {
271:     for (i2 = 0; i2 < (P + 1); ++i2) {
272:       for (j1 = 0; j1 < (P + 1); ++j1) {
273:         for (i1 = 0; i1 < (P + 1); ++i1) {
274:           indx1       = j1 * (P + 1) + i1;
275:           indx2       = j2 * (P + 1) + i2;
276:           indx3       = (j2 * (P + 1) + j1) * (P + 1) * (P + 1) + (i2 * (P + 1) + i1);
277:           Me2D[indx3] = Me1D[indx1] * Me1D[indx2];
278:         }
279:       }
280:     }
281:   }
282:   return 0;
283: }
285: /* --------------------------------------------------------------------- */
287: /* element stiffness for Laplacian */
288: static PetscErrorCode Form2DElementStiffness(PetscInt P, PetscScalar *Ke1D, PetscScalar *Me1D, PetscScalar *Ke2D)
289: {
290:   PetscInt i1, j1, i2, j2;
291:   PetscInt indx1, indx2, indx3;
294:   for (j2 = 0; j2 < (P + 1); ++j2) {
295:     for (i2 = 0; i2 < (P + 1); ++i2) {
296:       for (j1 = 0; j1 < (P + 1); ++j1) {
297:         for (i1 = 0; i1 < (P + 1); ++i1) {
298:           indx1       = j1 * (P + 1) + i1;
299:           indx2       = j2 * (P + 1) + i2;
300:           indx3       = (j2 * (P + 1) + j1) * (P + 1) * (P + 1) + (i2 * (P + 1) + i1);
301:           Ke2D[indx3] = Ke1D[indx1] * Me1D[indx2] + Me1D[indx1] * Ke1D[indx2];
302:         }
303:       }
304:     }
305:   }
306:   return 0;
307: }
309: /* --------------------------------------------------------------------- */
311: static PetscErrorCode FormNodalRhs(PetscInt P, PetscReal x, PetscReal y, PetscReal H, PetscReal *nds, PetscScalar *r)
312: {
313:   PetscInt i, j, indx;
316:   indx = 0;
317:   for (j = 0; j < (P + 1); ++j) {
318:     for (i = 0; i < (P + 1); ++i) {
319:       r[indx] = src(x + H * nds[i], y + H * nds[j]);
320:       indx++;
321:     }
322:   }
323:   return 0;
324: }
326: /* --------------------------------------------------------------------- */
328: static PetscErrorCode FormNodalSoln(PetscInt P, PetscReal x, PetscReal y, PetscReal H, PetscReal *nds, PetscScalar *u)
329: {
330:   PetscInt i, j, indx;
333:   indx = 0;
334:   for (j = 0; j < (P + 1); ++j) {
335:     for (i = 0; i < (P + 1); ++i) {
336:       u[indx] = ubdy(x + H * nds[i], y + H * nds[j]);
337:       indx++;
338:     }
339:   }
340:   return 0;
341: }
343: /* --------------------------------------------------------------------- */
345: static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
346: {
347:   PetscReal denominator = 1.;
348:   PetscReal numerator   = 1.;
349:   PetscInt  i           = 0;
351:   for (i = 0; i < (order + 1); i++) {
352:     if (i != basis) {
353:       numerator *= (xval - xLocVal[i]);
354:       denominator *= (xLocVal[basis] - xLocVal[i]);
355:     }
356:   }
357:   return (numerator / denominator);
358: }
360: /* --------------------------------------------------------------------- */
362: static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
363: {
364:   PetscReal denominator;
365:   PetscReal numerator;
366:   PetscReal numtmp;
367:   PetscInt  i = 0, j = 0;
369:   denominator = 1.;
370:   for (i = 0; i < (order + 1); i++) {
371:     if (i != basis) denominator *= (xLocVal[basis] - xLocVal[i]);
372:   }
373:   numerator = 0.;
374:   for (j = 0; j < (order + 1); ++j) {
375:     if (j != basis) {
376:       numtmp = 1.;
377:       for (i = 0; i < (order + 1); ++i) {
378:         if (i != basis && j != i) numtmp *= (xval - xLocVal[i]);
379:       }
380:       numerator += numtmp;
381:     }
382:   }
384:   return (numerator / denominator);
385: }
387: /* --------------------------------------------------------------------- */
389: static PetscReal ubdy(PetscReal x, PetscReal y)
390: {
391:   return x * x * y * y;
392: }
394: static PetscReal src(PetscReal x, PetscReal y)
395: {
396:   return -2. * y * y - 2. * x * x;
397: }
398: /* --------------------------------------------------------------------- */
400: static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], int n)
401: /*******************************************************************************
402: Given the lower and upper limits of integration x1 and x2, and given n, this
403: routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas
404: and weights of the Gauss-Lobatto-Legendre n-point quadrature formula.
405: *******************************************************************************/
406: {
407:   PetscInt  j, m;
408:   PetscReal z1, z, xm, xl, q, qp, Ln, scale;
409:   if (n == 1) {
410:     x[0] = x1; /* Scale the root to the desired interval, */
411:     x[1] = x2; /* and put in its symmetric counterpart.   */
412:     w[0] = 1.; /* Compute the weight */
413:     w[1] = 1.; /* and its symmetric counterpart. */
414:   } else {
415:     x[0] = x1;                 /* Scale the root to the desired interval, */
416:     x[n] = x2;                 /* and put in its symmetric counterpart.   */
417:     w[0] = 2. / (n * (n + 1)); /* Compute the weight */
418:     w[n] = 2. / (n * (n + 1)); /* and its symmetric counterpart. */
419:     m    = (n + 1) / 2;        /* The roots are symmetric, so we only find half of them. */
420:     xm   = 0.5 * (x2 + x1);
421:     xl   = 0.5 * (x2 - x1);
422:     for (j = 1; j <= (m - 1); j++) { /* Loop over the desired roots. */
423:       z = -1.0 * PetscCosReal((PETSC_PI * (j + 0.25) / (n)) - (3.0 / (8.0 * n * PETSC_PI)) * (1.0 / (j + 0.25)));
424:       /* Starting with the above approximation to the ith root, we enter */
425:       /* the main loop of refinement by Newton's method.                 */
426:       do {
427:         qAndLEvaluation(n, z, &q, &qp, &Ln);
428:         z1 = z;
429:         z  = z1 - q / qp; /* Newton's method. */
430:       } while (PetscAbsReal(z - z1) > 3.0e-11);
431:       qAndLEvaluation(n, z, &q, &qp, &Ln);
432:       x[j]     = xm + xl * z;                   /* Scale the root to the desired interval, */
433:       x[n - j] = xm - xl * z;                   /* and put in its symmetric counterpart.   */
434:       w[j]     = 2.0 / (n * (n + 1) * Ln * Ln); /* Compute the weight */
435:       w[n - j] = w[j];                          /* and its symmetric counterpart. */
436:     }
437:   }
438:   if (n % 2 == 0) {
439:     qAndLEvaluation(n, 0.0, &q, &qp, &Ln);
440:     x[n / 2] = (x2 - x1) / 2.0;
441:     w[n / 2] = 2.0 / (n * (n + 1) * Ln * Ln);
442:   }
443:   /* scale the weights according to mapping from [-1,1] to [0,1] */
444:   scale = (x2 - x1) / 2.0;
445:   for (j = 0; j <= n; ++j) w[j] = w[j] * scale;
446: }
448: /******************************************************************************/
449: static void qAndLEvaluation(int n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln)
450: /*******************************************************************************
451: Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in
452: addition to L_N(x) as these are needed for the GLL points.  See the book titled
453: "Implementing Spectral Methods for Partial Differential Equations: Algorithms
454: for Scientists and Engineers" by David A. Kopriva.
455: *******************************************************************************/
456: {
457:   PetscInt k;
459:   PetscReal Lnp;
460:   PetscReal Lnp1, Lnp1p;
461:   PetscReal Lnm1, Lnm1p;
462:   PetscReal Lnm2, Lnm2p;
464:   Lnm1  = 1.0;
465:   *Ln   = x;
466:   Lnm1p = 0.0;
467:   Lnp   = 1.0;
469:   for (k = 2; k <= n; ++k) {
470:     Lnm2  = Lnm1;
471:     Lnm1  = *Ln;
472:     Lnm2p = Lnm1p;
473:     Lnm1p = Lnp;
474:     *Ln   = (2. * k - 1.) / (1.0 * k) * x * Lnm1 - (k - 1.) / (1.0 * k) * Lnm2;
475:     Lnp   = Lnm2p + (2.0 * k - 1) * Lnm1;
476:   }
477:   k     = n + 1;
478:   Lnp1  = (2. * k - 1.) / (1.0 * k) * x * (*Ln) - (k - 1.) / (1.0 * k) * Lnm1;
479:   Lnp1p = Lnm1p + (2.0 * k - 1) * (*Ln);
480:   *q    = Lnp1 - Lnm1;
481:   *qp   = Lnp1p - Lnm1p;
482: }
484: /*TEST
486:    test:
487:       nsize: 2
488:       args: -ksp_monitor_short
490: TEST*/