/* Use PARPACK + SuperLU_DIST to solve generalized eigenvalue arising from Accelerator Design (SLAC problem) This version uses the nonsymmetric solver of ARPACK: pdnaupd. NOTE: Aztec is used for sparse matrix-vector multiplication. */ #include #include "superlu_ddefs.h" #define workd(i) workd[(i)-1] #define z(i,j) z[nloc*( (j) -1 )+ (i) - 1] #define zglb(i,j) zglb[na*( (j) -1 )+ (i) - 1] #define asub(i) asub[(i) - 1] #define amtx(i) amtx[(i) - 1] #define USECRS #define USEAZTEC #ifdef USEAZTEC #include "az_aztec.h" #endif #ifdef USECRS /* Convert csr format to msr format */ void csr2msr(int n,int nnz,int *row_start, int *col_index, double *val, int *bindex,double *newval,int *update) { int row,j,count,offdiags; /* Copy val and col_index to second part of bindex and newval */ bindex[0]=n+1; count = n+1; offdiags=0; for(row=0; row < n;row++) { if(row > 0) { bindex[row]=bindex[row-1]+offdiags; offdiags=0; } for(j=row_start[row];j < row_start[row+1];j++) { if(col_index[j] == update[row]) { newval[row]=val[j]; /* Diagonal Element */ } else { newval[count]=val[j]; bindex[count]=col_index[j]; count++; offdiags++; } } } bindex[n]=count; } /* Convert a global compressed column matrix to local compressed row matrix Input matrix is in ncols, col_start, row_index, col_val Local row range is given by my_starting_row <= row < my_finishing_row Matrix is output in p_row_start, p_col_index, and p_val Returns number of local entries */ int gccs2lcrs(int ncols,int *col_start, int *row_index, double *col_val, int my_starting_row, int my_finishing_row, int **p_row_start, int **p_col_index, double **p_val) { int *row_start; int *col_index; double *val; int n_local_rows = my_finishing_row - my_starting_row; int *row_length; int *seen_in_row; int total_entries, gcol,i,current_row,lrow; total_entries = 0; row_length = (int *) calloc(n_local_rows,sizeof(int)); for(gcol=0;gcol < ncols;gcol++) { for(i=col_start[gcol];i < col_start[gcol+1];i++) { current_row = row_index[i]; if((current_row >= my_starting_row) && (current_row < my_finishing_row)) { row_length[current_row-my_starting_row]++; total_entries++; } } } printf("ncols: %d\n",ncols); printf("n_local_rows: %d\n",n_local_rows); printf("Total entries: %d\n",total_entries); *p_row_start = row_start = (int *) calloc(n_local_rows+1,sizeof(int)); *p_col_index = col_index = (int *) calloc(total_entries,sizeof(int)); *p_val = val = (double *) calloc(total_entries,sizeof(double)); row_start[0]=0; for(lrow=1;lrow <= n_local_rows;lrow++) { row_start[lrow]=row_start[lrow-1]+row_length[lrow-1]; } seen_in_row = (int *) calloc(n_local_rows,sizeof(int)); for(lrow=0;lrow < n_local_rows;lrow++) { seen_in_row[lrow]=row_start[lrow]; } for(gcol=0;gcol < ncols;gcol++) { for(i=col_start[gcol];i < col_start[gcol+1];i++) { current_row = row_index[i]; if((current_row >= my_starting_row) && (current_row < my_finishing_row)) { lrow = current_row - my_starting_row; col_index[seen_in_row[lrow]]=gcol; val[seen_in_row[lrow]]=col_val[i]; seen_in_row[lrow]++; } } } free(row_length); free(seen_in_row); return(total_entries); } void crsmvm(int n,int *row_start, int *col_index, double *val, double *x,double *y) { int i,j; for(i=0;i < n;i++) { y[i]=0.0; for(j=row_start[i];j < row_start[i+1];j++) { y[i]+=val[j]*x[col_index[j]]; } } } #endif main(int argc, char *argv[]) /* * Purpose * ======= * * The driver program PEIGEN * * This example illustrates how to use SuperLU_DIST and PARPACK * to solve a large-scale sparse standard eigenvalue problem * in shift-invert mode. * In this case, we factorize A only once in the first call to * pdgssvx_ABglobal, and reuse the following data structures * in the subsequent call to pdgssvx_ABglobal: * ScalePermstruct : DiagScale, R, C, perm_r, perm_c * LUstruct : Glu_persist, Llu * * On the Cray T3E, the program may be run by typing: * mpprun -n pddrive1 -r -c * */ { superlu_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; double *amtx, *bmtx, *b, *b1, *b2; int_t *asub, *xa, *bsub, *xb; int_t i, j, ma, na, nnza, mb, nb, nnzb; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp1, *fp2, *fopen(); double t0, t1; /* ---------------- ARPACK variables ----------------*/ MPI_Comm arcomm; MPI_Status mpistat; int nprocs, ibeg; int ido, nloc, nrem, nev , ncv , lworkl, mypid, sid, arflg, pin, pout, maxitr, rvec , nconv, iopt; double tol, sigmar =2000.0, sigmai=0.0; char bmat[1] = "G", which[2]="LR", howmny[1]="A"; int ipntr[14], iparam[11], *select, *psize, *nbase; double *v , *z , *workd, *workl, *workev, *resid, *recvbuf, *dr, *di, *zglb , *res; int ione = 1, stiff = 1, irow, ibegin, iend; double done = 1.0, dzero = 0.0; /* make a copy of the matrix for residual checking */ int *colptr, *rowind; double *nzvals; #ifdef USECRS int *row_start; int *col_index; double *val; #ifdef USEAZTEC int proc_config[AZ_PROC_SIZE]; AZ_MATRIX *Bmat; double *dsrc; double *ddest,*ddest2; int N_update; int *external, *update_index, *update, *extern_index, *data_org; int aztotal; int *bindx; double *msrval; int local_nnz; #endif #endif nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* Parse command line argv[]. */ if (argc != 7) ABORT("Number of arguements incorrect"); for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last two args are considered a filename */ if (stiff) { /* open the stiffness matrix */ if ( !(fp1 = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } stiff = 0; } else { if ( !(fp2 = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } } /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); t0 = MPI_Wtime(); #ifdef USEAZTEC AZ_set_proc_config(proc_config,MPI_COMM_WORLD); #endif /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); /* Read the stiffness matrix stored on disk in binary format. */ fread(&ma , 4, 1, fp1); na = ma; fread(&nnza, 4, 1, fp1); xa = (int*)calloc(ma+1, sizeof(int)); asub = (int*)calloc(nnza, sizeof(int)); amtx = (double*)calloc(nnza, sizeof(double)); fread((int*)xa , 4, na+1, fp1); fread((int*)asub , 4, nnza, fp1); fread((double*)amtx, 8, nnza, fp1); fclose(fp1); printf("\tA: Dimension\t%dx%d\t # nonzeros %d\n", ma, na, nnza); /* Change to 0-based index */ for (i=0;imatvec(dsrc,ddest,Bmat,proc_config); AZ_invorder_vec(ddest,data_org,update_index,NULL,ddest2); MPI_Allgatherv(ddest2,nloc,MPI_DOUBLE,b2,psize,nbase, MPI_DOUBLE,arcomm); #else crsmvm(nb,row_start,col_index,val,b1,b2); #endif #else iopt = 1; dmvm( &nb, bmtx, bsub, xb, b1, b2, &iopt ); #endif nrhs = 1; pdgssvx_ABglobal(&options, &A , &ScalePermstruct, b2 , ldb , nrhs , &grid , &LUstruct, berr , &stat , &info); /* put the solution into workd */ ibeg = nbase[mypid]; dcopy(&nloc, &b2[ibeg], &ione, &workd(pout), &ione); } else if (ido == 1) { /* collect the local pieces from all processors and put it in b1 */ pin = ipntr[2]; pout = ipntr[1]; MPI_Allgatherv(&workd(pin),nloc,MPI_DOUBLE,b1,psize,nbase, MPI_DOUBLE,arcomm); nrhs = 1; pdgssvx_ABglobal(&options, &A , &ScalePermstruct, b1 , ldb , nrhs , &grid , &LUstruct, berr , &stat , &info); /* put the solution into workd */ ibeg = nbase[mypid]; dcopy(&nloc, &b1[ibeg], &ione, &workd(pout), &ione); } else if (ido == 2) { /* collect the local pieces from all processors and put it in b1 */ pin = ipntr[0]; pout = ipntr[1]; MPI_Allgatherv(&workd(pin),nloc,MPI_DOUBLE,b1,psize,nbase, MPI_DOUBLE,arcomm); #ifdef USECRS #ifdef USEAZTEC bzero(dsrc,aztotal*sizeof(double)); dcopy(&nloc, &workd(pin), &ione, dsrc, &ione); AZ_reorder_vec(dsrc,data_org,update_index,NULL); Bmat->matvec(dsrc,ddest,Bmat,proc_config); AZ_invorder_vec(ddest,data_org,update_index,NULL,ddest2); MPI_Allgatherv(ddest2,nloc,MPI_DOUBLE,b2,psize,nbase, MPI_DOUBLE,arcomm); #else crsmvm(nb,row_start,col_index,val,b1,b2); #endif #else iopt = 1; dmvm( &nb, bmtx, bsub, xb, b1, b2, &iopt); #endif /* put the product into workd */ ibeg = nbase[mypid]; dcopy(&nloc, &b2[ibeg], &ione, &workd(pout), &ione); } } /* DEALLOCATE SOME STORAGE */ Destroy_CompCol_Matrix_dist(&A); Destroy_LU(na, &grid, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); if (arflg == 0) { rvec = 1; nconv = iparam[4]; pdneupd(&arcomm , &rvec , howmny , select , dr , di , z , &nloc , &sigmar, &sigmai, workev, bmat , &nloc , which , &nev , &tol , resid , &ncv , v , &nloc , iparam , ipntr , workd , workl, &lworkl, &arflg); if (arflg==0) { /* compute residual */ if (!iam ) printf(" nconv = %d\n", nconv); /* Check the accuracy of the solution. */ if (mypid == 0) { zglb = (double*) malloc(na*nconv*sizeof(double)); res = (double*) malloc(nconv*sizeof(double)); } for (i=0;i=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif #ifdef USECRS #ifdef USEAZTEC printf("\tAztec Matrix Multiplication\n"); #else printf("\tCRS Matrix Multiplication\n"); #endif #endif printf("....\n"); }