Actual source code: nepdefl.c
slepc-3.14.2 2021-02-01
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/nepimpl.h>
12: #include <slepcblaslapack.h>
13: #include "nepdefl.h"
15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16: {
20: if (X) *X = extop->X;
21: if (H) {
22: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H);
23: }
24: return(0);
25: }
27: PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
28: {
30: Vec uu;
31: PetscInt ld,i;
32: PetscMPIInt np;
33: PetscReal norm;
36: BVGetColumn(extop->X,k,&uu);
37: ld = extop->szd+1;
38: NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE);
39: BVRestoreColumn(extop->X,k,&uu);
40: BVNormColumn(extop->X,k,NORM_2,&norm);
41: BVScaleColumn(extop->X,k,1.0/norm);
42: MPI_Comm_size(PetscObjectComm((PetscObject)u),&np);
43: for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
44: extop->H[k*(ld+1)] = lambda;
45: return(0);
46: }
48: PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
49: {
51: PetscScalar *Ap;
52: PetscInt ldh=extop->szd+1,ldds,i,j,k1=k+1;
53: PetscScalar *eigr,*eigi,*t,*Z;
54: Vec x;
57: NEPDeflationExtendInvariantPair(extop,u,lambda,k);
58: PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t);
59: DSReset(ds);
60: DSSetType(ds,DSNHEP);
61: DSAllocate(ds,ldh);
62: DSGetLeadingDimension(ds,&ldds);
63: DSGetArray(ds,DS_MAT_A,&Ap);
64: for (j=0;j<k1;j++)
65: for (i=0;i<k1;i++) Ap[j*ldds+i] = extop->H[j*ldh+i];
66: DSRestoreArray(ds,DS_MAT_A,&Ap);
67: DSSetDimensions(ds,k1,0,0,k1);
68: DSSolve(ds,eigr,eigi);
69: DSVectors(ds,DS_MAT_X,&k,NULL);
70: DSGetArray(ds,DS_MAT_X,&Z);
71: BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds);
72: DSRestoreArray(ds,DS_MAT_X,&Z);
73: BVGetColumn(extop->X,k,&x);
74: NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE);
75: BVRestoreColumn(extop->X,k,&x);
76: PetscFree3(eigr,eigi,t);
77: return(0);
78: }
80: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
81: {
83: PetscMPIInt np,rk,count;
84: PetscScalar *array1,*array2;
85: PetscInt nloc;
88: if (extop->szd) {
89: MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk);
90: MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np);
91: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
92: if (v) {
93: VecGetArray(v,&array1);
94: VecGetArray(vex,&array2);
95: if (back) {
96: PetscArraycpy(array1,array2,nloc);
97: } else {
98: PetscArraycpy(array2,array1,nloc);
99: }
100: VecRestoreArray(v,&array1);
101: VecRestoreArray(vex,&array2);
102: }
103: if (a) {
104: VecGetArray(vex,&array2);
105: if (back) {
106: PetscArraycpy(a,array2+nloc,extop->szd);
107: PetscMPIIntCast(extop->szd,&count);
108: MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex));
109: } else {
110: PetscArraycpy(array2+nloc,a,extop->szd);
111: PetscMPIIntCast(extop->szd,&count);
112: MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex));
113: }
114: VecRestoreArray(vex,&array2);
115: }
116: } else {
117: if (back) {VecCopy(vex,v);}
118: else {VecCopy(v,vex);}
119: }
120: return(0);
121: }
123: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
124: {
126: PetscInt nloc;
127: Vec u;
128: VecType type;
131: if (extop->szd) {
132: BVGetColumn(extop->nep->V,0,&u);
133: VecGetType(u,&type);
134: BVRestoreColumn(extop->nep->V,0,&u);
135: VecCreate(PetscObjectComm((PetscObject)extop->nep),v);
136: VecSetType(*v,type);
137: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
138: nloc += extop->szd;
139: VecSetSizes(*v,nloc,PETSC_DECIDE);
140: } else {
141: BVCreateVec(extop->nep->V,v);
142: }
143: return(0);
144: }
146: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
147: {
148: PetscErrorCode ierr;
149: PetscInt nloc;
150: BVType type;
151: BVOrthogType otype;
152: BVOrthogRefineType oref;
153: PetscReal oeta;
154: BVOrthogBlockType oblock;
155: NEP nep=extop->nep;
158: if (extop->szd) {
159: BVGetSizes(nep->V,&nloc,NULL,NULL);
160: BVCreate(PetscObjectComm((PetscObject)nep),V);
161: BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz);
162: BVGetType(nep->V,&type);
163: BVSetType(*V,type);
164: BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock);
165: BVSetOrthogonalization(*V,otype,oref,oeta,oblock);
166: PetscObjectStateIncrease((PetscObject)*V);
167: PetscLogObjectParent((PetscObject)nep,(PetscObject)*V);
168: } else {
169: BVDuplicateResize(nep->V,sz,V);
170: }
171: return(0);
172: }
174: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
175: {
177: PetscInt n,next,i;
178: PetscRandom rand;
179: PetscScalar *array;
180: PetscMPIInt nn,np;
183: BVGetRandomContext(extop->nep->V,&rand);
184: VecSetRandom(v,rand);
185: if (extop->szd) {
186: MPI_Comm_size(PetscObjectComm((PetscObject)v),&np);
187: BVGetSizes(extop->nep->V,&n,NULL,NULL);
188: VecGetLocalSize(v,&next);
189: VecGetArray(v,&array);
190: for (i=n+extop->n;i<next;i++) array[i] = 0.0;
191: for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
192: PetscMPIIntCast(extop->n,&nn);
193: MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v));
194: VecRestoreArray(v,&array);
195: }
196: return(0);
197: }
199: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
200: {
202: PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
203: PetscScalar sone=1.0,zero=0.0;
204: PetscBLASInt ldh_,ldhj_,n_;
207: i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
208: PetscArrayzero(Hj,i);
209: PetscBLASIntCast(ldhj+1,&ldh_);
210: PetscBLASIntCast(ldhj,&ldhj_);
211: PetscBLASIntCast(n,&n_);
212: if (idx<1) {
213: if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
214: else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
215: } else {
216: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
217: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
218: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
219: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
220: }
221: if (idx<0) {
222: idx = -idx;
223: for (k=1;k<idx;k++) {
224: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
225: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
226: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
227: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
228: }
229: }
230: return(0);
231: }
233: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
234: {
236: PetscInt i;
239: NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n);
240: extop->n++;
241: BVSetActiveColumns(extop->X,0,extop->n);
242: if (extop->n <= extop->szd) {
243: /* update XpX */
244: BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd);
245: extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
246: for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
247: /* determine minimality index */
248: extop->midx = PetscMin(extop->max_midx,extop->n);
249: /* polynominal basis coeficients */
250: for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
251: /* evaluate the polynomial basis in H */
252: NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL);
253: }
254: return(0);
255: }
257: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
258: {
260: PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
261: Mat A,B;
262: PetscScalar *array;
265: if (idx<0) {ini = 0; fin = extop->nep->nt;}
266: else {ini = idx; fin = idx+1;}
267: if (y) sz = hfjp?n+2:n+1;
268: else sz = hfjp?3*n:2*n;
269: ldh = extop->szd+1;
270: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A);
271: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B);
272: MatDenseGetArray(A,&array);
273: for (j=0;j<n;j++)
274: for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
275: MatDenseRestoreArray(A,&array);
276: if (y) {
277: MatDenseGetArray(A,&array);
278: array[extop->n*(sz+1)] = lambda;
279: if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
280: for (i=0;i<n;i++) array[n*sz+i] = y[i];
281: MatDenseRestoreArray(A,&array);
282: for (j=ini;j<fin;j++) {
283: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
284: MatDenseGetArray(B,&array);
285: for (i=0;i<n;i++) hfj[j*ld+i] = array[n*sz+i];
286: if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = array[(n+1)*sz+i];
287: MatDenseRestoreArray(B,&array);
288: }
289: } else {
290: off = idx<0?ld*n:0;
291: MatDenseGetArray(A,&array);
292: for (k=0;k<n;k++) {
293: array[(n+k)*sz+k] = 1.0;
294: array[(n+k)*sz+n+k] = lambda;
295: }
296: if (hfjp) for (k=0;k<n;k++) {
297: array[(2*n+k)*sz+n+k] = 1.0;
298: array[(2*n+k)*sz+2*n+k] = lambda;
299: }
300: MatDenseRestoreArray(A,&array);
301: for (j=ini;j<fin;j++) {
302: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
303: MatDenseGetArray(B,&array);
304: for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = array[n*sz+i*sz+k];
305: if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = array[2*n*sz+i*sz+k];
306: MatDenseRestoreArray(B,&array);
307: }
308: }
309: MatDestroy(&A);
310: MatDestroy(&B);
311: return(0);
312: }
314: static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
315: {
316: NEP_DEF_MATSHELL *matctx;
317: PetscErrorCode ierr;
318: NEP_EXT_OP extop;
319: Vec x1,y1;
320: PetscScalar *yy,sone=1.0,zero=0.0;
321: const PetscScalar *xx;
322: PetscInt nloc,i;
323: PetscMPIInt np;
324: PetscBLASInt n_,one=1,szd_;
327: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
328: MatShellGetContext(M,(void**)&matctx);
329: extop = matctx->extop;
330: if (extop->ref) {
331: VecZeroEntries(y);
332: }
333: if (extop->szd) {
334: x1 = matctx->w[0]; y1 = matctx->w[1];
335: VecGetArrayRead(x,&xx);
336: VecPlaceArray(x1,xx);
337: VecGetArray(y,&yy);
338: VecPlaceArray(y1,yy);
339: MatMult(matctx->T,x1,y1);
340: if (!extop->ref && extop->n) {
341: VecGetLocalSize(x1,&nloc);
342: /* copy for avoiding warning of constant array xx */
343: for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
344: BVMultVec(matctx->U,1.0,1.0,y1,matctx->work);
345: BVDotVec(extop->X,x1,matctx->work);
346: PetscBLASIntCast(extop->n,&n_);
347: PetscBLASIntCast(extop->szd,&szd_);
348: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
349: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
350: for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
351: }
352: VecResetArray(x1);
353: VecRestoreArrayRead(x,&xx);
354: VecResetArray(y1);
355: VecRestoreArray(y,&yy);
356: } else {
357: MatMult(matctx->T,x,y);
358: }
359: return(0);
360: }
362: static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
363: {
364: PetscErrorCode ierr;
365: NEP_DEF_MATSHELL *matctx;
368: MatShellGetContext(M,(void**)&matctx);
369: if (right) {
370: VecDuplicate(matctx->w[0],right);
371: }
372: if (left) {
373: VecDuplicate(matctx->w[0],left);
374: }
375: return(0);
376: }
378: static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
379: {
380: PetscErrorCode ierr;
381: NEP_DEF_MATSHELL *matctx;
384: MatShellGetContext(M,(void**)&matctx);
385: if (matctx->extop->szd) {
386: BVDestroy(&matctx->U);
387: PetscFree2(matctx->hfj,matctx->work);
388: PetscFree2(matctx->A,matctx->B);
389: VecDestroy(&matctx->w[0]);
390: VecDestroy(&matctx->w[1]);
391: }
392: MatDestroy(&matctx->T);
393: PetscFree(matctx);
394: return(0);
395: }
397: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
398: {
399: PetscScalar p;
400: PetscInt i;
403: if (!jacobian) {
404: val[0] = 1.0;
405: for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
406: } else {
407: val[0] = 0.0;
408: p = 1.0;
409: for (i=1;i<extop->n;i++) {
410: val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
411: p *= (lambda-extop->bc[i-1]);
412: }
413: }
414: return(0);
415: }
417: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
418: {
419: PetscErrorCode ierr;
420: NEP_DEF_MATSHELL *matctx,*matctxC;
421: PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
422: Mat F,Mshell,Mcomp;
423: PetscBool ini=PETSC_FALSE;
424: PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
425: PetscBLASInt n_,info,szd_;
428: if (!M) Mshell = jacobian?extop->MJ:extop->MF;
429: else Mshell = *M;
430: Mcomp = jacobian?extop->MF:extop->MJ;
431: if (!Mshell) {
432: ini = PETSC_TRUE;
433: PetscNew(&matctx);
434: MatGetLocalSize(extop->nep->function,&mloc,&nloc);
435: nloc += szd; mloc += szd;
436: MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell);
437: MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation);
438: MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation);
439: MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation);
440: matctx->nep = extop->nep;
441: matctx->extop = extop;
442: if (!M) {
443: if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
444: else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
445: PetscObjectReference((PetscObject)matctx->T);
446: } else {
447: matctx->jacob = jacobian;
448: MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES, &matctx->T);
449: *M = Mshell;
450: }
451: if (szd) {
452: BVCreateVec(extop->nep->V,matctx->w);
453: VecDuplicate(matctx->w[0],matctx->w+1);
454: BVDuplicateResize(extop->nep->V,szd,&matctx->U);
455: PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work);
456: PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B);
457: }
458: } else {
459: MatShellGetContext(Mshell,(void**)&matctx);
460: }
461: if (ini || matctx->theta != lambda || matctx->n != extop->n) {
462: if (ini || matctx->theta != lambda) {
463: if (jacobian) {
464: NEPComputeJacobian(extop->nep,lambda,matctx->T);
465: } else {
466: NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->T);
467: }
468: }
469: if (n) {
470: matctx->hfjset = PETSC_FALSE;
471: if (!extop->simpU) {
472: /* likely hfjp has been already computed */
473: if (Mcomp) {
474: MatShellGetContext(Mcomp,(void**)&matctxC);
475: if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
476: PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt);
477: matctx->hfjset = PETSC_TRUE;
478: }
479: }
480: hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
481: if (!matctx->hfjset) {
482: NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n);
483: matctx->hfjset = PETSC_TRUE;
484: }
485: BVSetActiveColumns(matctx->U,0,n);
486: hf = jacobian?hfjp:hfj;
487: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F);
488: BVMatMult(extop->X,extop->nep->A[0],matctx->U);
489: BVMultInPlace(matctx->U,F,0,n);
490: BVSetActiveColumns(extop->W,0,extop->n);
491: for (j=1;j<extop->nep->nt;j++) {
492: BVMatMult(extop->X,extop->nep->A[j],extop->W);
493: MatDensePlaceArray(F,hf+j*n*n);
494: BVMult(matctx->U,1.0,1.0,extop->W,F);
495: MatDenseResetArray(F);
496: }
497: MatDestroy(&F);
498: } else {
499: hfj = matctx->hfj;
500: BVSetActiveColumns(matctx->U,0,n);
501: BVMatMult(extop->X,matctx->T,matctx->U);
502: for (j=0;j<n;j++) {
503: for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
504: hfj[j*(n+1)] += lambda;
505: }
506: PetscBLASIntCast(n,&n_);
507: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
508: SlepcCheckLapackInfo("trtri",info);
509: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F);
510: BVMultInPlace(matctx->U,F,0,n);
511: if (jacobian) {
512: NEPDeflationComputeFunction(extop,lambda,NULL);
513: MatShellGetContext(extop->MF,(void**)&matctxC);
514: BVMult(matctx->U,-1.0,1.0,matctxC->U,F);
515: }
516: MatDestroy(&F);
517: }
518: PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev);
519: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian);
520: A = matctx->A;
521: PetscArrayzero(A,szd*szd);
522: if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
523: for (j=0;j<n;j++)
524: for (i=0;i<n;i++)
525: for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
526: PetscBLASIntCast(n,&n_);
527: PetscBLASIntCast(szd,&szd_);
528: B = matctx->B;
529: PetscArrayzero(B,szd*szd);
530: for (i=1;i<extop->midx;i++) {
531: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
532: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
533: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
534: pts = hHprev; hHprev = hH; hH = pts;
535: }
536: PetscFree3(basisv,hH,hHprev);
537: }
538: matctx->theta = lambda;
539: matctx->n = extop->n;
540: }
541: return(0);
542: }
544: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
545: {
549: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL);
550: if (F) *F = extop->MF;
551: return(0);
552: }
554: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
555: {
559: NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL);
560: if (J) *J = extop->MJ;
561: return(0);
562: }
564: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
565: {
566: PetscErrorCode ierr;
567: NEP_DEF_MATSHELL *matctx;
568: NEP_DEF_FUN_SOLVE solve;
569: PetscInt i,j,n=extop->n;
570: Vec u,tu;
571: Mat F;
572: PetscScalar snone=-1.0,sone=1.0;
573: PetscBLASInt n_,szd_,ldh_,*p,info;
574: Mat Mshell;
577: solve = extop->solve;
578: if (lambda!=solve->theta || n!=solve->n) {
579: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T);
580: Mshell = (solve->sincf)?extop->MF:solve->T;
581: MatShellGetContext(Mshell,(void**)&matctx);
582: KSPSetOperators(solve->ksp,matctx->T,matctx->T);
583: if (!extop->ref && n) {
584: PetscBLASIntCast(n,&n_);
585: PetscBLASIntCast(extop->szd,&szd_);
586: PetscBLASIntCast(extop->szd+1,&ldh_);
587: if (!extop->simpU) {
588: BVSetActiveColumns(solve->T_1U,0,n);
589: for (i=0;i<n;i++) {
590: BVGetColumn(matctx->U,i,&u);
591: BVGetColumn(solve->T_1U,i,&tu);
592: KSPSolve(solve->ksp,u,tu);
593: BVRestoreColumn(solve->T_1U,i,&tu);
594: BVRestoreColumn(matctx->U,i,&u);
595: }
596: MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F);
597: BVDot(solve->T_1U,extop->X,F);
598: MatDestroy(&F);
599: } else {
600: for (j=0;j<n;j++)
601: for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
602: for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
603: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
604: for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
605: }
606: PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd);
607: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
608: PetscMalloc1(n,&p);
609: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
610: SlepcCheckLapackInfo("getrf",info);
611: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
612: SlepcCheckLapackInfo("getri",info);
613: PetscFree(p);
614: }
615: solve->theta = lambda;
616: solve->n = n;
617: }
618: return(0);
619: }
621: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
622: {
623: PetscErrorCode ierr;
624: Vec b1,x1;
625: PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
626: NEP_DEF_MATSHELL *matctx;
627: NEP_DEF_FUN_SOLVE solve=extop->solve;
628: PetscBLASInt one=1,szd_,n_,ldh_;
629: PetscInt nloc,i;
630: PetscMPIInt np,count;
633: if (extop->ref) {
634: VecZeroEntries(x);
635: }
636: if (extop->szd) {
637: x1 = solve->w[0]; b1 = solve->w[1];
638: VecGetArray(x,&xx);
639: VecPlaceArray(x1,xx);
640: VecGetArray(b,&bb);
641: VecPlaceArray(b1,bb);
642: } else {
643: b1 = b; x1 = x;
644: }
645: KSPSolve(extop->solve->ksp,b1,x1);
646: if (!extop->ref && extop->n) {
647: PetscBLASIntCast(extop->szd,&szd_);
648: PetscBLASIntCast(extop->n,&n_);
649: PetscBLASIntCast(extop->szd+1,&ldh_);
650: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
651: PetscMalloc2(extop->n,&b2,extop->n,&x2);
652: MPI_Comm_size(PetscObjectComm((PetscObject)b),&np);
653: for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
654: w = solve->work; w2 = solve->work+extop->n;
655: MatShellGetContext(solve->sincf?extop->MF:solve->T,(void**)&matctx);
656: PetscArraycpy(w2,b2,extop->n);
657: BVDotVec(extop->X,x1,w);
658: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
659: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
660: if (extop->simpU) {
661: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
662: for (i=0;i<extop->n;i++) w[i] = x2[i];
663: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
664: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
665: BVMultVec(extop->X,-1.0,1.0,x1,w);
666: } else {
667: BVMultVec(solve->T_1U,-1.0,1.0,x1,x2);
668: }
669: for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
670: PetscMPIIntCast(extop->n,&count);
671: MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b));
672: }
673: if (extop->szd) {
674: VecResetArray(x1);
675: VecRestoreArray(x,&xx);
676: VecResetArray(b1);
677: VecRestoreArray(b,&bb);
678: if (!extop->ref && extop->n) { PetscFree2(b2,x2);}
679: }
680: return(0);
681: }
683: PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
684: {
686: extop->ref = ref;
687: return(0);
688: }
690: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
691: {
692: PetscErrorCode ierr;
693: PetscInt j;
694: NEP_DEF_FUN_SOLVE solve;
697: if (!extop) return(0);
698: PetscFree(extop->H);
699: BVDestroy(&extop->X);
700: if (extop->szd) {
701: VecDestroy(&extop->w);
702: PetscFree3(extop->Hj,extop->XpX,extop->bc);
703: BVDestroy(&extop->W);
704: }
705: MatDestroy(&extop->MF);
706: MatDestroy(&extop->MJ);
707: if (extop->solve) {
708: solve = extop->solve;
709: if (extop->szd) {
710: if (!extop->simpU) {BVDestroy(&solve->T_1U);}
711: PetscFree2(solve->M,solve->work);
712: VecDestroy(&solve->w[0]);
713: VecDestroy(&solve->w[1]);
714: }
715: if (!solve->sincf) {
716: MatDestroy(&solve->T);
717: }
718: PetscFree(extop->solve);
719: }
720: if (extop->proj) {
721: if (extop->szd) {
722: for (j=0;j<extop->nep->nt;j++) {MatDestroy(&extop->proj->V1pApX[j]);}
723: MatDestroy(&extop->proj->XpV1);
724: PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work);
725: VecDestroy(&extop->proj->w);
726: BVDestroy(&extop->proj->V1);
727: }
728: PetscFree(extop->proj);
729: }
730: PetscFree(extop);
731: return(0);
732: }
734: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
735: {
736: PetscErrorCode ierr;
737: NEP_EXT_OP op;
738: NEP_DEF_FUN_SOLVE solve;
739: PetscInt szd;
740: Vec x;
743: NEPDeflationReset(*extop);
744: PetscNew(&op);
745: *extop = op;
746: op->nep = nep;
747: op->n = 0;
748: op->szd = szd = sz-1;
749: op->max_midx = PetscMin(MAX_MINIDX,szd);
750: op->X = X;
751: if (!X) { BVDuplicateResize(nep->V,sz,&op->X); }
752: else { PetscObjectReference((PetscObject)X); }
753: PetscCalloc1(sz*sz,&(op)->H);
754: if (op->szd) {
755: BVGetColumn(op->X,0,&x);
756: VecDuplicate(x,&op->w);
757: BVRestoreColumn(op->X,0,&x);
758: op->simpU = PETSC_FALSE;
759: if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
760: /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
761: PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL);
762: } else {
763: op->simpU = PETSC_TRUE;
764: }
765: PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc);
766: BVDuplicateResize(op->X,op->szd,&op->W);
767: }
768: if (ksp) {
769: PetscNew(&solve);
770: op->solve = solve;
771: solve->ksp = ksp;
772: solve->sincf = sincfun;
773: solve->n = -1;
774: if (op->szd) {
775: if (!op->simpU) {
776: BVDuplicateResize(nep->V,szd,&solve->T_1U);
777: }
778: PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work);
779: BVCreateVec(nep->V,&solve->w[0]);
780: VecDuplicate(solve->w[0],&solve->w[1]);
781: }
782: }
783: return(0);
784: }
786: PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
787: {
788: PetscScalar *T,*E,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
789: PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
790: PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
791: PetscBLASInt inc=1,nv_,ldds_,dim_,dim2,szdk,szd_,n_,ldh_;
792: PetscMPIInt np;
793: NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
794: NEP_EXT_OP extop=proj->extop;
795: NEP nep=extop->nep;
796: PetscErrorCode ierr;
799: DSGetDimensions(ds,&nv,NULL,NULL,NULL,NULL);
800: DSGetLeadingDimension(ds,&ldds);
801: DSGetArray(ds,mat,&T);
802: PetscArrayzero(T,ldds*nv);
803: PetscBLASIntCast(ldds*nv,&dim2);
804: /* mat = V1^*T(lambda)V1 */
805: for (i=0;i<nep->nt;i++) {
806: if (deriv) {
807: FNEvaluateDerivative(nep->f[i],lambda,&alpha);
808: } else {
809: FNEvaluateFunction(nep->f[i],lambda,&alpha);
810: }
811: DSGetArray(ds,DSMatExtra[i],&E);
812: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dim2,&alpha,E,&inc,T,&inc));
813: DSRestoreArray(ds,DSMatExtra[i],&E);
814: }
815: if (!extop->ref && extop->n) {
816: n = extop->n;
817: szd = extop->szd;
818: PetscArrayzero(proj->work,proj->lwork);
819: PetscBLASIntCast(nv,&nv_);
820: PetscBLASIntCast(n,&n_);
821: PetscBLASIntCast(ldds,&ldds_);
822: PetscBLASIntCast(szd,&szd_);
823: PetscBLASIntCast(proj->dim,&dim_);
824: PetscBLASIntCast(extop->szd+1,&ldh_);
825: w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
826: nwork += 2*proj->dim*proj->dim;
828: /* mat = mat + V1^*U(lambda)V2 */
829: for (i=0;i<nep->nt;i++) {
830: MatDenseGetArray(proj->V1pApX[i],&E);
831: if (extop->simpU) {
832: if (deriv) {
833: FNEvaluateDerivative(nep->f[i],lambda,&alpha);
834: } else {
835: FNEvaluateFunction(nep->f[i],lambda,&alpha);
836: }
837: ww = w1; w = w2;
838: PetscArraycpy(ww,proj->V2,szd*nv);
839: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
840: for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
841: for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
842: alpha = -alpha;
843: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
844: if (deriv) {
845: PetscBLASIntCast(szd*nv,&szdk);
846: FNEvaluateFunction(nep->f[i],lambda,&alpha2);
847: PetscArraycpy(w,proj->V2,szd*nv);
848: for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
849: alpha2 = -alpha2;
850: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
851: alpha2 = 1.0;
852: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
853: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
854: }
855: for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
856: } else {
857: NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd);
858: w = deriv?w2:w1; ww = deriv?w1:w2;
859: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
860: s = PetscSqrtReal(np);
861: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
862: }
863: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
864: MatDenseRestoreArray(proj->V1pApX[i],&E);
865: }
867: /* mat = mat + V2^*A(lambda)V1 */
868: basisv = proj->work+nwork; nwork += szd;
869: hH = proj->work+nwork; nwork += szd*szd;
870: hHprev = proj->work+nwork; nwork += szd*szd;
871: AB = proj->work+nwork;
872: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv);
873: if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
874: for (j=0;j<n;j++)
875: for (i=0;i<n;i++)
876: for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
877: MatDenseGetArray(proj->XpV1,&E);
878: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
879: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
880: MatDenseRestoreArray(proj->XpV1,&E);
882: /* mat = mat + V2^*B(lambda)V2 */
883: PetscArrayzero(AB,szd*szd);
884: for (i=1;i<extop->midx;i++) {
885: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
886: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
887: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
888: pts = hHprev; hHprev = hH; hH = pts;
889: }
890: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
891: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
892: }
893: DSRestoreArray(ds,mat,&T);
894: return(0);
895: }
897: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
898: {
899: PetscErrorCode ierr;
900: PetscInt k,j,n=extop->n,dim;
901: Vec v,ve;
902: BV V1;
903: Mat G;
904: NEP nep=extop->nep;
905: NEP_DEF_PROJECT proj;
908: NEPCheckSplit(extop->nep,1);
909: proj = extop->proj;
910: if (!proj) {
911: /* Initialize the projection data structure */
912: PetscNew(&proj);
913: extop->proj = proj;
914: proj->extop = extop;
915: BVGetSizes(Vext,NULL,NULL,&dim);
916: proj->dim = dim;
917: if (extop->szd) {
918: proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
919: PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work);
920: for (j=0;j<nep->nt;j++) {
921: MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]);
922: }
923: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1);
924: BVCreateVec(extop->X,&proj->w);
925: BVDuplicateResize(extop->X,proj->dim,&proj->V1);
926: }
927: DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj);
928: }
930: /* Split Vext in V1 and V2 */
931: if (extop->szd) {
932: for (j=j0;j<j1;j++) {
933: BVGetColumn(Vext,j,&ve);
934: BVGetColumn(proj->V1,j,&v);
935: NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE);
936: BVRestoreColumn(proj->V1,j,&v);
937: BVRestoreColumn(Vext,j,&ve);
938: }
939: V1 = proj->V1;
940: } else V1 = Vext;
942: /* Compute matrices V1^* A_i V1 */
943: BVSetActiveColumns(V1,j0,j1);
944: for (k=0;k<nep->nt;k++) {
945: DSGetMat(ds,DSMatExtra[k],&G);
946: BVMatProject(V1,nep->A[k],V1,G);
947: DSRestoreMat(ds,DSMatExtra[k],&G);
948: }
950: if (extop->n) {
951: if (extop->szd) {
952: /* Compute matrices V1^* A_i X and V1^* X */
953: BVSetActiveColumns(extop->W,0,n);
954: for (k=0;k<nep->nt;k++) {
955: BVMatMult(extop->X,nep->A[k],extop->W);
956: BVDot(extop->W,V1,proj->V1pApX[k]);
957: }
958: BVDot(V1,extop->X,proj->XpV1);
959: }
960: }
961: return(0);
962: }