DMRGKron.cpp
1 #include <vector>
2 #include <iostream>
3 #include <set>
4 #include <map>
5 #include <unordered_map>
6 #include <stdio.h>
7 
8 #include "DMRGKron.hpp"
9 #include <../src/mat/impls/aij/seq/aij.h> /* Mat_SeqAIJ */
10 #include <../src/mat/impls/aij/mpi/mpiaij.h> /* Mat_MPIAIJ */
11 
12 // #define DMRG_KRON_DEBUG
13 
14 #if defined(DMRG_KRON_DEBUG)
15  #ifndef PRINT_RANK_BEGIN
16  #define PRINT_RANK_BEGIN() \
17  for(PetscMPIInt irank = 0; irank < mpi_size; ++irank){\
18  if(irank==mpi_rank){std::cout << "[" << mpi_rank << "]<<" << std::endl;
19  #endif
20 
21  #ifndef PRINT_RANK_END
22  #define PRINT_RANK_END() \
23  std::cout << ">>[" << mpi_rank << "]" << std::endl;}\
24  ierr = MPI_Barrier(mpi_comm); CHKERRQ(ierr);}
25  #endif
26 #else
27  #ifndef PRINT_RANK_BEGIN
28  #define PRINT_RANK_BEGIN()
29  #endif
30 
31  #ifndef PRINT_RANK_END
32  #define PRINT_RANK_END()
33  #endif
34 #endif
35 
36 // PETSC_EXTERN PetscErrorCode PreSplitOwnership(const MPI_Comm comm, const PetscInt N, PetscInt& locrows, PetscInt& Istart);
37 PETSC_EXTERN PetscErrorCode MatEnsureAssembled(const Mat& matin);
38 PETSC_EXTERN PetscErrorCode MatSetOption_MultipleMats(
39  const std::vector<Mat>& matrices,
40  const std::vector<MatOption>& options,
41  const std::vector<PetscBool>& flgs);
42 PETSC_EXTERN PetscErrorCode MatSetOption_MultipleMatGroups(
43  const std::vector<std::vector<Mat>>& matgroups,
44  const std::vector<MatOption>& options,
45  const std::vector<PetscBool>& flgs);
46 PETSC_EXTERN PetscErrorCode MatEnsureAssembled_MultipleMatGroups(const std::vector<std::vector<Mat>>& matgroups);
47 PETSC_EXTERN PetscErrorCode InitSingleSiteOperator(const MPI_Comm& comm, const PetscInt dim, Mat* mat);
48 
49 static const PetscScalar one = 1.0;
50 
52 PetscErrorCode MatKronEyeConstruct(
53  Block::SpinBase& LeftBlock,
54  Block::SpinBase& RightBlock,
55  const KronBlocks_t& KronBlocks,
56  Block::SpinBase& BlockOut
57  )
58 {
59  PetscErrorCode ierr = 0;
60 
61  MPI_Comm mpi_comm = LeftBlock.MPIComm();
62  PetscMPIInt mpi_rank, mpi_size;
63  ierr = MPI_Comm_rank(mpi_comm, &mpi_rank); CHKERRQ(ierr);
64  ierr = MPI_Comm_size(mpi_comm, &mpi_size); CHKERRQ(ierr);
65 
66  /* Determine the ownership range from the Sz matrix of the 0th site */
67  if(!BlockOut.NumSites()) SETERRQ(PETSC_COMM_SELF,1,"BlockOut must have at least one site.");
68  Mat matin = BlockOut.Sz(0);
69 
70  const PetscInt rstart = matin->rmap->rstart;
71  const PetscInt lrows = matin->rmap->n;
72  const PetscInt cstart = matin->cmap->rstart;
73  const PetscInt cend = matin->cmap->rend;
74 
75  /* Verify that the row and column mapping match what is expected */
76  {
77  PetscInt M,N, locrows, loccols, Istart, Cstart, lcols=cend-cstart;
78  ierr = MatGetSize(matin, &M, &N); CHKERRQ(ierr);
79  ierr = PreSplitOwnership(mpi_comm, M, locrows, Istart); CHKERRQ(ierr);
80  if(locrows!=lrows) SETERRQ4(PETSC_COMM_SELF, 1,
81  "Incorrect guess for locrows. Expected %d. Got %d. Size: %d x %d.", locrows, lrows, M, N);
82  if(Istart!=rstart) SETERRQ4(PETSC_COMM_SELF, 1,
83  "Incorrect guess for Istart. Expected %d. Got %d. Size: %d x %d.", Istart, rstart, M, N);
84  ierr = PreSplitOwnership(mpi_comm, N, loccols, Cstart); CHKERRQ(ierr);
85  if(loccols!=lcols) SETERRQ4(PETSC_COMM_SELF, 1,
86  "Incorrect guess for loccols. Expected %d. Got %d. Size: %d x %d.", loccols, lcols, M, N);
87  if(Cstart!=cstart) SETERRQ4(PETSC_COMM_SELF, 1,
88  "Incorrect guess for Cstart. Expected %d. Got %d. Size: %d x %d.", Cstart, cstart, M, N);
89  }
90 
91  const PetscInt TotSites = BlockOut.NumSites();
92  const std::vector<PetscInt> NumSites_LR = {LeftBlock.NumSites(), RightBlock.NumSites()};
93 
94  /* Maps the global indices of the rows of L and R to their local indices in the corresponding submatrices */
95  std::unordered_map<PetscInt,PetscInt> MapRowsL, MapRowsR;
96 
97  /* Vectors which store the values of the global indices of the rows which will be stored into local submatrices */
98  std::vector<PetscInt> ReqRowsL, ReqRowsR;
99  PetscInt NReqRowsL, NReqRowsR;
100 
101  /**************************
102  SUBMATRIX COLLECTION
103  **************************/
104  {
105  KronBlocksIterator KIter(KronBlocks, rstart, rstart+lrows);
106 
107  /* Temporarily stores the global rows of L and R needed for the local rows of O */
108  std::set<PetscInt> SetRowsL, SetRowsR;
109 
110  for( ; KIter.Loop(); ++KIter)
111  {
112  SetRowsL.insert(KIter.GlobalIdxLeft());
113  SetRowsR.insert(KIter.GlobalIdxRight());
114  }
115 
116  /* Store the results from the sets into the corresponding map where the key represents the global row while
117  the value represents the sequential index where that global row will be stored */
118  NReqRowsL = SetRowsL.size();
119  NReqRowsR = SetRowsR.size();
120 
121  /* Allocate enough space and store the required rows into a standard array for generating an index set */
122  ReqRowsL.resize(NReqRowsL);
123  ReqRowsR.resize(NReqRowsR);
124  /* Dump the set values into the array for local->global lookup and into the map for global->local lookup */
125  {
126  size_t idx = 0;
127  for(PetscInt row: SetRowsL){
128  ReqRowsL[idx] = row;
129  MapRowsL[row] = idx++;
130  }
131  idx = 0;
132  for(PetscInt row: SetRowsR){
133  ReqRowsR[idx] = row;
134  MapRowsR[row] = idx++;
135  }
136  }
137  }
138 
139  /* Submatrix array containing local rows of all operators of both the left and right sides */
140  Mat **SubMatArray;
141  ierr = PetscCalloc1(2*TotSites, &SubMatArray); CHKERRQ(ierr);
142  const std::vector<PetscInt> SiteShifts_LR = {0, LeftBlock.NumSites()};
143  #define p_SubMat(OPTYPE, SIDETYPE, ISITE) (SubMatArray[ (ISITE + (SiteShifts_LR [SIDETYPE]) )*2+(OPTYPE) ])
144  #define SubMat(OPTYPE, SIDETYPE, ISITE) (*p_SubMat((OPTYPE), (SIDETYPE), (ISITE)))
145 
146  /* Generate the index sets needed to get the rows and columns */
147  IS isrow_L, isrow_R, iscol_L, iscol_R;
148  /* Get only some required rows */
149  ierr = ISCreateGeneral(mpi_comm, NReqRowsL, ReqRowsL.data(), PETSC_USE_POINTER, &isrow_L); CHKERRQ(ierr);
150  ierr = ISCreateGeneral(mpi_comm, NReqRowsR, ReqRowsR.data(), PETSC_USE_POINTER, &isrow_R); CHKERRQ(ierr);
151  /* Get all columns in each required row */
152  ierr = ISCreateStride(mpi_comm, LeftBlock.NumStates(), 0, 1, &iscol_L); CHKERRQ(ierr);
153  ierr = ISCreateStride(mpi_comm, RightBlock.NumStates(), 0, 1, &iscol_R); CHKERRQ(ierr);
154 
155  /* Looping through all sites and matrices, get the submatrices containing the required rows */
156  for(PetscInt isite=0; isite<LeftBlock.NumSites(); ++isite){
157  ierr = MatCreateSubMatrices(LeftBlock.Sz(isite), 1, &isrow_L, &iscol_L,
158  MAT_INITIAL_MATRIX, &p_SubMat(OpSz, SideLeft, isite)); CHKERRQ(ierr);
159  ierr = MatCreateSubMatrices(LeftBlock.Sp(isite), 1, &isrow_L, &iscol_L,
160  MAT_INITIAL_MATRIX, &p_SubMat(OpSp, SideLeft, isite)); CHKERRQ(ierr);
161  }
162  for(PetscInt isite=0; isite<RightBlock.NumSites(); ++isite){
163  ierr = MatCreateSubMatrices(RightBlock.Sz(isite), 1, &isrow_R, &iscol_R,
164  MAT_INITIAL_MATRIX, &p_SubMat(OpSz, SideRight, isite)); CHKERRQ(ierr);
165  ierr = MatCreateSubMatrices(RightBlock.Sp(isite), 1, &isrow_R, &iscol_R,
166  MAT_INITIAL_MATRIX, &p_SubMat(OpSp, SideRight, isite)); CHKERRQ(ierr);
167  }
168 
169  /*******************
170  PREALLOCATION
171  *******************/
172  /* Array of vectors containing the number of elements in the diagonal and off-diagonal
173  blocks of Sz and Sp matrices on each site */
174  std::vector<PetscInt> D_NNZ_all(2*TotSites*lrows), O_NNZ_all(2*TotSites*lrows);
175  #define Dnnz(OPTYPE,SIDETYPE,ISITE,LROW) (D_NNZ_all[ ((ISITE + (SiteShifts_LR [SIDETYPE]) )*2+(OPTYPE))*lrows + LROW ])
176  #define Onnz(OPTYPE,SIDETYPE,ISITE,LROW) (O_NNZ_all[ ((ISITE + (SiteShifts_LR [SIDETYPE]) )*2+(OPTYPE))*lrows + LROW ])
177 
178  /* Require all output block matrices to be preallocated */
179  ierr = MatSetOption_MultipleMatGroups({ BlockOut.Sz(), BlockOut.Sp() },
180  { MAT_NO_OFF_PROC_ENTRIES, MAT_NEW_NONZERO_LOCATION_ERR }, { PETSC_TRUE, PETSC_TRUE }); CHKERRQ(ierr);
181 
182  const std::vector<std::vector<Mat>>& MatOut_ZP = {BlockOut.Sz(), BlockOut.Sp()};
183  PetscInt MaxElementsPerRow = 0;
184  {
185  std::vector<PetscInt> fws_O_Sp_LR, col_NStatesR_LR;
186 
187  KronBlocksIterator KIter(KronBlocks, rstart, rstart+lrows);
188  for( ; KIter.Loop(); ++KIter)
189  {
190  const PetscInt lrow = KIter.Steps();
191  const PetscInt Row_BlockIdx_L = KIter.BlockIdxLeft();
192  const PetscInt Row_BlockIdx_R = KIter.BlockIdxRight();
193  const PetscInt Row_NumStates_R = KIter.NumStatesRight();
194  const PetscInt Row_LocIdx_L = KIter.LocIdxLeft();
195  const PetscInt Row_LocIdx_R = KIter.LocIdxRight();
196  const PetscInt Row_L = KIter.GlobalIdxLeft();
197  const PetscInt Row_R = KIter.GlobalIdxRight();
198  const PetscInt LocRow_L = MapRowsL[Row_L];
199  const PetscInt LocRow_R = MapRowsR[Row_R];
200 
201  PetscBool flg[2];
202  PetscInt nz_L, nz_R, col_NStatesR;
203  const PetscInt *idx_L, *idx_R;
204  const PetscScalar *v_L, *v_R;
205  // const PetscScalar *v_O;
206 
207  /* Precalculate the post-shift for Sz operators */
208  const PetscInt fws_O_Sz = KIter.BlockStartIdx(OpSz);
209  /* Reduced redundant map lookup by pre-calculating all possible post-shifts and rblock numstates
210  for S+ operators in the left and right blocks and updating them only when the Row_BlockIdx is updated */
211  if(KIter.UpdatedBlock()){
212  fws_O_Sp_LR = {
213  KronBlocks.Offsets(Row_BlockIdx_L + 1, Row_BlockIdx_R),
214  KronBlocks.Offsets(Row_BlockIdx_L, Row_BlockIdx_R + 1)
215  };
216  col_NStatesR_LR = {
217  RightBlock.Magnetization.Sizes(Row_BlockIdx_R),
218  RightBlock.Magnetization.Sizes(Row_BlockIdx_R + 1)
219  };
220  }
221 
222  /* Operator-dependent scope */
223  for(Op_t OpType: BasicOpTypes)
224  {
225  /* Calculate the backward pre-shift associated to taking only the non-zero quantum number block */
226  const std::vector<PetscInt> shift_L = {
227  LeftBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_L, OpType, flg[SideLeft]), 0 };
228  const std::vector<PetscInt> shift_R = {
229  0, RightBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_R, OpType, flg[SideRight])};
230 
231  for (Side_t SideType: SideTypes)
232  {
233  /* Calculate the forward shift for the final elements
234  corresponding to the first element of the non-zero block */
235  PetscInt fws_O;
236  if(OpType == OpSz)
237  {
238  col_NStatesR = Row_NumStates_R;
239  fws_O = fws_O_Sz; /* The row and column indices of the block are the same */
240  if(fws_O == -1) continue;
241  }
242  else if(OpType == OpSp)
243  {
244  /* +1 on block index that corresponds to the side */
245  col_NStatesR = col_NStatesR_LR[SideType];
246  fws_O = fws_O_Sp_LR[SideType];
247  if(fws_O == -1) continue;
248  }
249  else
250  {
251  SETERRQ(mpi_comm, 1, "Invalid operator type.");
252  }
253 
254  /* Site-dependent scope */
255  for(PetscInt isite=0; isite < NumSites_LR[SideType]; ++isite)
256  {
257  if(!flg[SideType]) continue;
258 
259  /* Get the pre-shift value of the operator based on whether [L,R] and [Sz,Sp] and set the other as 0 */
260  const PetscInt bks_L = shift_L[SideType];
261  const PetscInt bks_R = shift_R[SideType];
262  const Mat mat = SubMat(OpType, SideType, isite);
263 
264  /* Fill one side (L/R) with operator values and fill the other (R/L) with the indentity */
265  if(SideType) /* Right */
266  {
267  nz_L = 1;
268  idx_L = &Row_LocIdx_L;
269  v_L = &one;
270  ierr = (*mat->ops->getrow)(mat, LocRow_R, &nz_R, (PetscInt**)&idx_R, (PetscScalar**)&v_R); CHKERRQ(ierr);
271  // v_O = v_R;
272  }
273  else /* Left */
274  {
275  ierr = (*mat->ops->getrow)(mat, LocRow_L, &nz_L, (PetscInt**)&idx_L, (PetscScalar**)&v_L); CHKERRQ(ierr);
276  nz_R = 1;
277  idx_R = &Row_LocIdx_R;
278  v_R = &one;
279  // v_O = v_L;
280  }
281 
282  /* Calculate the resulting indices */
283  PetscInt idx;
284  PetscInt& diag = Dnnz(OpType, SideType, isite,lrow);
285  PetscInt& odiag = Onnz(OpType, SideType, isite,lrow);
286  for(PetscInt l=0; l<nz_L; ++l){
287  for(PetscInt r=0; r<nz_R; ++r)
288  {
289  idx = (idx_L[l] - bks_L) * col_NStatesR + (idx_R[r] - bks_R) + fws_O;
290  if ( cstart <= idx && idx < cend ) ++diag;
291  else ++odiag;
292  }
293  }
294  PetscInt nelts = nz_L * nz_R;
295  if (nelts > MaxElementsPerRow) MaxElementsPerRow = nelts;
296  }
297  }
298  }
299  }
300  }
301 
302  /* Call the preallocation for all matrices */
303  for(Side_t SideType: SideTypes){
304  for(Op_t OpType: BasicOpTypes){
305  for(PetscInt isite = 0; isite < NumSites_LR[SideType]; ++isite){
306  ierr = MatMPIAIJSetPreallocation(
307  MatOut_ZP[OpType][isite+SiteShifts_LR[SideType]],
308  0, &Dnnz(OpType,SideType,isite,0),
309  0, &Onnz(OpType,SideType,isite,0)); CHKERRQ(ierr);
310  /* Note: Preallocation for seq not required as long as mpiaij(mkl) matrices are specified */
311  }
312  }
313  }
314 
315  /*************************
316  MATRIX CONSTRUCTION
317  *************************/
318 
319  /* Allocate static workspace for idx */
320  PetscInt *idx;
321  ierr = PetscCalloc1(MaxElementsPerRow+1, &idx); CHKERRQ(ierr);
322 
323  {
324  KronBlocksIterator KIter(KronBlocks, rstart, rstart+lrows); /* Iterates through component subspaces and final block */
325  std::vector<PetscInt> fws_O_Sp_LR, col_NStatesR_LR;
326  /* Iterate through all basis states belonging to local rows */
327  for( ; KIter.Loop(); ++KIter)
328  {
329  const PetscInt lrow = KIter.Steps(); /* output local row index */
330  const PetscInt Irow = lrow + rstart; /* output global row index */
331  /* Index of the block in the Kronecker-product block */
332  const PetscInt Row_BlockIdx_L = KIter.BlockIdxLeft();
333  const PetscInt Row_BlockIdx_R = KIter.BlockIdxRight();
334  const PetscInt Row_NumStates_R = KIter.NumStatesRight();
335  /* Local index of the row in the Kronecker-product block */
336  const PetscInt Row_LocIdx_L = KIter.LocIdxLeft();
337  const PetscInt Row_LocIdx_R = KIter.LocIdxRight();
338  /* Corresponding indices in the sequential submatrices depending on MPI row indices */
339  const PetscInt LocRow_L = MapRowsL[KIter.GlobalIdxLeft()];
340  const PetscInt LocRow_R = MapRowsR[KIter.GlobalIdxRight()];
341 
342  PetscBool flg[2];
343  PetscInt nz_L, nz_R, col_NStatesR;
344  const PetscInt *idx_L, *idx_R;
345  const PetscScalar *v_L, *v_R, *v_O;
346 
347  /* Precalculate the post-shift for Sz operators */
348  const PetscInt fws_O_Sz = KIter.BlockStartIdx(OpSz);
349  /* Reduced redundant map lookup by pre-calculating all possible post-shifts and rblock numstates
350  for S+ operators in the left and right blocks and updating them only when the Row_BlockIdx is updated */
351  if(KIter.UpdatedBlock()){
352  fws_O_Sp_LR = {
353  KronBlocks.Offsets(Row_BlockIdx_L + 1, Row_BlockIdx_R),
354  KronBlocks.Offsets(Row_BlockIdx_L, Row_BlockIdx_R + 1)
355  };
356  col_NStatesR_LR = {
357  RightBlock.Magnetization.Sizes(Row_BlockIdx_R),
358  RightBlock.Magnetization.Sizes(Row_BlockIdx_R + 1)
359  };
360  }
361 
362  /* Operator-dependent scope */
363  const std::vector<std::vector<Mat>>& MatOut = {BlockOut.Sz(), BlockOut.Sp()};
364  for(Op_t OpType: BasicOpTypes)
365  {
366  /* Calculate the backward pre-shift associated to taking only the non-zero quantum number block */
367  const std::vector<PetscInt> shift_L = {
368  LeftBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_L, OpType, flg[SideLeft]), 0 };
369  const std::vector<PetscInt> shift_R = {
370  0, RightBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_R, OpType, flg[SideRight])};
371 
372  for (Side_t SideType: SideTypes)
373  {
374  /* Calculate the forward shift for the final elements
375  corresponding to the first element of the non-zero block */
376  PetscInt fws_O;
377  if(OpType == OpSz)
378  {
379  col_NStatesR = Row_NumStates_R;
380  fws_O = fws_O_Sz; /* The row and column indices of the block are the same */
381  if(fws_O == -1) continue;
382  }
383  else if(OpType == OpSp)
384  {
385  /* +1 on block index that corresponds to the side */
386  col_NStatesR = col_NStatesR_LR[SideType];
387  fws_O = fws_O_Sp_LR[SideType];
388  if(fws_O == -1) continue;
389  }
390  else
391  {
392  SETERRQ(mpi_comm, 1, "Invalid operator type.");
393  }
394 
395  /* Corresponding shift in position of the final site */
396  const PetscInt ishift = SiteShifts_LR[SideType];
397 
398  /* Site-dependent scope */
399  for(PetscInt isite=0; isite < NumSites_LR[SideType]; ++isite)
400  {
401  if(!flg[SideType]) continue;
402 
403  /* Get the pre-shift value of the operator based on whether [L,R] and [Sz,Sp] and set the other as 0 */
404  const PetscInt bks_L = shift_L[SideType];
405  const PetscInt bks_R = shift_R[SideType];
406  const Mat mat = SubMat(OpType, SideType, isite);
407 
408  /* Fill one side (L/R) with operator values and fill the other (R/L) with the indentity */
409  if(SideType) /* Right */
410  {
411  nz_L = 1;
412  idx_L = &Row_LocIdx_L;
413  v_L = &one;
414  ierr = (*mat->ops->getrow)(mat, LocRow_R, &nz_R, (PetscInt**)&idx_R, (PetscScalar**)&v_R); CHKERRQ(ierr);
415  v_O = v_R;
416  }
417  else /* Left */
418  {
419  ierr = (*mat->ops->getrow)(mat, LocRow_L, &nz_L, (PetscInt**)&idx_L, (PetscScalar**)&v_L); CHKERRQ(ierr);
420  nz_R = 1;
421  idx_R = &Row_LocIdx_R;
422  v_R = &one;
423  v_O = v_L;
424  }
425 
426  /* Calculate the resulting indices */
427  for(PetscInt l=0; l<nz_L; ++l)
428  for(PetscInt r=0; r<nz_R; ++r)
429  idx[l*nz_R+r] = (idx_L[l] - bks_L) * col_NStatesR + (idx_R[r] - bks_R) + fws_O;
430 
431  /* Set the matrix elements for this row in the output matrix */
432  ierr = MatSetValues(MatOut[OpType][isite+ishift], 1, &Irow, nz_L*nz_R, &idx[0], v_O, INSERT_VALUES); CHKERRQ(ierr);
433  }
434  }
435  }
436  }
437  }
438 
439  /* Assemble all output block matrices */
440  ierr = MatEnsureAssembled_MultipleMatGroups({BlockOut.Sz(), BlockOut.Sp()}); CHKERRQ(ierr);
441 
442  for(PetscInt i=0; i<2*TotSites; ++i){
443  ierr = MatDestroySubMatrices(1, &SubMatArray[i]); CHKERRQ(ierr);
444  }
445  ierr = PetscFree(idx); CHKERRQ(ierr);
446  ierr = PetscFree(SubMatArray); CHKERRQ(ierr);
447  ierr = ISDestroy(&isrow_L); CHKERRQ(ierr);
448  ierr = ISDestroy(&isrow_R); CHKERRQ(ierr);
449  ierr = ISDestroy(&iscol_L); CHKERRQ(ierr);
450  ierr = ISDestroy(&iscol_R); CHKERRQ(ierr);
451  #undef p_SubMat
452  #undef SubMat
453  #undef Dnnz
454  #undef Onnz
455  return ierr;
456 }
457 
458 
459 PetscErrorCode KronEye_Explicit(
460  Block::SpinBase& LeftBlock,
461  Block::SpinBase& RightBlock,
462  const std::vector< Hamiltonians::Term >& Terms,
463  Block::SpinBase& BlockOut
464  )
465 {
466  PetscErrorCode ierr = 0;
467 
468  /* Require input blocks to be initialized */
469  if(!LeftBlock.Initialized()) SETERRQ(PETSC_COMM_SELF,1,"Left input block not initialized.");
470  if(!RightBlock.Initialized()) SETERRQ(PETSC_COMM_SELF,1,"Right input block not initialized.");
471 
472  MPI_Comm mpi_comm = LeftBlock.MPIComm();
473  if(mpi_comm != RightBlock.MPIComm()) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Input blocks must have the same communicator.");
474  PetscMPIInt mpi_rank, mpi_size;
475  ierr = MPI_Comm_rank(mpi_comm, &mpi_rank); CHKERRQ(ierr);
476  ierr = MPI_Comm_size(mpi_comm, &mpi_size); CHKERRQ(ierr);
477 
478  /* For checking the accuracy of the routine. TODO: Remove later */
479  #if defined(DMRG_KRON_DEBUG)
480  PRINT_RANK_BEGIN()
481  std::cout << "***** Kron_Explicit *****" << std::endl;
482  std::cout << "LeftBlock qn_list: ";
483  for(auto i: LeftBlock.Magnetization.List()) std::cout << i << " ";
484  std::cout << std::endl;
485 
486  std::cout << "LeftBlock qn_size: ";
487  for(auto i: LeftBlock.Magnetization.Sizes()) std::cout << i << " ";
488  std::cout << std::endl;
489 
490  std::cout << "LeftBlock qn_offset: ";
491  for(auto i: LeftBlock.Magnetization.Offsets()) std::cout << i << " ";
492  std::cout << std::endl;
493 
494  std::cout << std::endl;
495 
496  std::cout << "RightBlock qn_list: ";
497  for(auto i: RightBlock.Magnetization.List()) std::cout << i << " ";
498  std::cout << std::endl;
499 
500  std::cout << "RightBlock qn_size: ";
501  for(auto i: RightBlock.Magnetization.Sizes()) std::cout << i << " ";
502  std::cout << std::endl;
503 
504  std::cout << "RightBlock qn_offset: ";
505  for(auto i: RightBlock.Magnetization.Offsets()) std::cout << i << " ";
506  std::cout << std::endl;
507  PRINT_RANK_END()
508  #endif
509 
510  /* Check the validity of input blocks */
511  ierr = LeftBlock.CheckOperators(); CHKERRQ(ierr);
512  ierr = LeftBlock.CheckSectors(); CHKERRQ(ierr);
513  ierr = LeftBlock.CheckOperatorBlocks(); CHKERRQ(ierr); /* NOTE: Possibly costly operation */
514  ierr = RightBlock.CheckOperators(); CHKERRQ(ierr);
515  ierr = RightBlock.CheckSectors(); CHKERRQ(ierr);
516  ierr = RightBlock.CheckOperatorBlocks(); CHKERRQ(ierr); /* NOTE: Possibly costly operation */
517 
518  /* Create a list of tuples of quantum numbers following the kronecker product structure */
519  KronBlocks_t KronBlocks(LeftBlock, RightBlock, {}, NULL, -1);
520 
521  #if defined(DMRG_KRON_DEBUG)
522  PRINT_RANK_BEGIN()
523  {
524  PetscInt i = 0;
525  std::cout << "KronBlocks: \n";
526  for(KronBlock_t kb: KronBlocks.data())
527  {
528  std::cout << "( "
529  << std::get<0>(kb) << ", "
530  << std::get<1>(kb) << ", "
531  << std::get<2>(kb) << ", "
532  << std::get<3>(kb) << ", "
533  << KronBlocks.Offsets()[i++] <<" )\n";
534  }
535  std::cout << "*************************" << std::endl;
536  }
537  PRINT_RANK_END()
538  #endif
539 
540  /* Count the input and output number of sites */
541  PetscInt nsites_left = LeftBlock.NumSites();
542  PetscInt nsites_right = RightBlock.NumSites();
543  PetscInt nsites_out = nsites_left + nsites_right;
544 
545  /* Count the input and output number of sectors */
546  PetscInt nsectors_left = LeftBlock.Magnetization.NumSectors();
547  PetscInt nsectors_right = RightBlock.Magnetization.NumSectors();
548  PetscInt nsectors_out = nsectors_left * nsectors_right;
549  if(PetscUnlikely(KronBlocks.size() != nsectors_out ))
550  SETERRQ2(mpi_comm, 1, "Mismatch in number of sectors. Expected %lu. Got %d.", KronBlocks.size(), nsectors_out);
551 
552  /* Count the input and output number of states */
553  PetscInt nstates_left = LeftBlock.Magnetization.NumStates();
554  PetscInt nstates_right = RightBlock.Magnetization.NumStates();
555  PetscInt nstates_out = nstates_left * nstates_right;
556  PetscInt KronBlocks_nstates = KronBlocks.NumStates();
557  if(PetscUnlikely(KronBlocks_nstates != nstates_out))
558  SETERRQ2(mpi_comm, 1, "Mismatch in number of states. Expected %lu. Got %d.", KronBlocks_nstates, nstates_out);
559 
560  /* Some quantum numbers that appear multiple times need to be grouped into a single quantum number block */
561  std::vector<PetscReal> QN_List;
562  std::vector<PetscInt> QN_Size;
563  PetscReal QN_last = 0;
564  for (auto tup: KronBlocks.data()){
565  const PetscReal& qn = std::get<0>(tup);
566  const PetscInt& size = std::get<3>(tup);
567  if(qn < QN_last || QN_List.size()==0){
568  QN_List.push_back(qn);
569  QN_Size.push_back(size);
570  } else {
571  QN_Size.back() += size;
572  }
573  QN_last = qn;
574  }
575 
576  PetscInt QN_Size_total = 0;
577  for(PetscInt size: QN_Size) QN_Size_total += size;
578  if(PetscUnlikely(nstates_out != QN_Size_total))
579  SETERRQ2(mpi_comm, 1, "Mismatch in number of states. Expected %d. Got %d.", nstates_out, QN_Size_total);
580 
581  #if defined(DMRG_KRON_DEBUG)
582  PRINT_RANK_BEGIN()
583  std::cout << "QN_List: "; for(auto q: QN_List) std::cout << q << " "; std::cout << std::endl;
584  std::cout << "Total Sites: " << nsites_out << std::endl;
585  std::cout << "Total Sectors: " << nsectors_out << std::endl;
586  std::cout << "Total States: " << nstates_out << std::endl;
587  PRINT_RANK_END()
588  #endif
589 
590  /* Initialize the new block using the quantum number blocks */
591  ierr = BlockOut.Initialize(mpi_comm, nsites_out, QN_List, QN_Size); CHKERRQ(ierr);
592 
593  #if defined(DMRG_KRON_DEBUG)
594  PRINT_RANK_BEGIN()
595  std::cout << "Mag: QN_List: "; for(auto q: BlockOut.Magnetization.List()) std::cout << q << " "; std::cout << std::endl;
596  std::cout << "Mag: QN_Size: "; for(auto q: BlockOut.Magnetization.Sizes()) std::cout << q << " "; std::cout << std::endl;
597  PRINT_RANK_END()
598  #endif
599 
600  /* Combine sites from the old blocks to form the new block */
601  /* Expand the left-block states explicitly by padding identities to the right */
602  /* Expand the right-block states explicitly by padding identities to the left */
603  ierr = MatKronEyeConstruct(LeftBlock, RightBlock, KronBlocks, BlockOut); CHKERRQ(ierr);
604 
605  /* Fill in the Hamiltonian of the output block
606  This part assumes that the input terms include all terms of all sites involved */
607  for(const Hamiltonians::Term& term: Terms){
608  if(term.Iop >= nsites_out || term.Jop >= nsites_out)
609  SETERRQ3(mpi_comm,1,"Term indices must be less than %d. Got %d and %d.",nsites_out,term.Iop,term.Jop);
610  }
611 
612  ierr = KronBlocks.KronSumConstruct(Terms, BlockOut.H); CHKERRQ(ierr);
613 
614  return ierr;
615 }
616 
617 
619  const std::vector< Mat >& Matrices,
620  const Side_t& SideType
621  )
622 {
623  PetscErrorCode ierr = 0;
624  for(const Mat& mat: Matrices){
625  if (SideType == SideLeft){
626  ierr = LeftBlock.MatCheckOperatorBlocks(OpSz, mat); CHKERRQ(ierr);
627  } else if (SideType == SideRight){
628  ierr = RightBlock.MatCheckOperatorBlocks(OpSz, mat); CHKERRQ(ierr);
629  }
630  }
631  return(0);
632 }
633 
634 
636  const Mat& Mat_L,
637  const Op_t& OpType_L,
638  const Mat& Mat_R,
639  const Op_t& OpType_R,
640  Mat& MatOut
641  )
642 {
643  PetscErrorCode ierr;
644  FUNCTION_TIMINGS_BEGIN()
645 
646  /* Verify whether the input matrices conform to the given operator type */
647  ierr = LeftBlock.MatCheckOperatorBlocks(OpType_L, Mat_L); CHKERRQ(ierr);
648  ierr = RightBlock.MatCheckOperatorBlocks(OpType_R, Mat_R); CHKERRQ(ierr);
649 
650  if(do_shell)
651  {
652  KronSumShellCtx *shellctx;
653  ierr = PetscNew(&shellctx); CHKERRQ(ierr);
654  shellctx->p_ctx = new KronSumCtx();
655  KronSumCtx& ctx = *(shellctx->p_ctx);
656 
657  /* Assumes that output matrix is square */
658  ctx.Nrows = ctx.Ncols = num_states;
659  /* Split the ownership of rows depending on a previous call to KronSumShellSplitOwnership */
661  {
662  ctx.lrows = prev_lrows;
663  ctx.rstart = prev_rstart;
664  }
665  else
666  {
667  /* Split the ownership of rows using the default way in petsc */
668  ierr = PreSplitOwnership(mpi_comm, ctx.Nrows, ctx.lrows, ctx.rstart); CHKERRQ(ierr);
669  }
670  ctx.cstart = ctx.rstart;
671  ctx.lcols = ctx.lrows;
672  ctx.rend = ctx.cend = ctx.rstart + ctx.lrows;
673 
674  ierr = KronGetSubmatrices(Mat_L, OpType_L, Mat_R, OpType_R, ctx); CHKERRQ(ierr);
675  ierr = KronSumSetUpShellTerms(shellctx); CHKERRQ(ierr);
676  /* Create vector scatter object */
677  Vec x_mpi;
678  ierr = VecCreateMPI(mpi_comm, ctx.lrows, PETSC_DETERMINE, &x_mpi); CHKERRQ(ierr);
679  ierr = VecScatterCreateToAll(x_mpi, &shellctx->vsctx, &shellctx->x_seq); CHKERRQ(ierr);
680  ierr = VecDestroy(&x_mpi); CHKERRQ(ierr);
681 
682  /* Create the matrix */
683  ierr = MatCreateShell(mpi_comm, ctx.lrows, ctx.lcols, ctx.Nrows, ctx.Ncols, (void*)shellctx, &MatOut); CHKERRQ(ierr);
684  ierr = MatShellSetOperation(MatOut, MATOP_MULT, (void(*)())MatMult_KronSumShell); CHKERRQ(ierr);
685  }
686  else
687  {
688  SETERRQ(mpi_comm,1,"Not implemented for non-shell matrices"); CHKERRQ(ierr);
689  }
690 
691  FUNCTION_TIMINGS_END()
692  return(0);
693 }
694 
695 
696 PetscErrorCode KronBlocks_t::KronGetSubmatrices(
697  const Mat& Mat_L,
698  const Op_t& OpType_L,
699  const Mat& Mat_R,
700  const Op_t& OpType_R,
701  KronSumCtx& ctx
702  )
703 {
704  PetscErrorCode ierr;
705  FUNCTION_TIMINGS_BEGIN()
706 
707  /* Determine the local rows to be collected from each of the left and right block */
708  {
709  KronBlocksIterator KIter(*this, ctx.rstart, ctx.rend);
710  std::set<PetscInt> SetRowsL, SetRowsR;
711  for( ; KIter.Loop(); ++KIter)
712  {
713  SetRowsL.insert(KIter.GlobalIdxLeft());
714  SetRowsR.insert(KIter.GlobalIdxRight());
715  }
716  ctx.NReqRowsL = SetRowsL.size();
717  ctx.NReqRowsR = SetRowsR.size();
718  ctx.ReqRowsL.resize(ctx.NReqRowsL);
719  ctx.ReqRowsR.resize(ctx.NReqRowsR);
720  size_t idx = 0;
721  for(PetscInt row: SetRowsL){
722  ctx.ReqRowsL[idx] = row;
723  ctx.MapRowsL[row] = idx++;
724  }
725  idx = 0;
726  for(PetscInt row: SetRowsR){
727  ctx.ReqRowsR[idx] = row;
728  ctx.MapRowsR[row] = idx++;
729  }
730  }
731  /* Generate the index sets needed to get the rows and columns */
732  IS isrow_L, isrow_R, iscol_L, iscol_R;
733  /* Get only some required rows */
734  ierr = ISCreateGeneral(mpi_comm, ctx.NReqRowsL, ctx.ReqRowsL.data(), PETSC_USE_POINTER, &isrow_L); CHKERRQ(ierr);
735  ierr = ISCreateGeneral(mpi_comm, ctx.NReqRowsR, ctx.ReqRowsR.data(), PETSC_USE_POINTER, &isrow_R); CHKERRQ(ierr);
736  /* Get all columns in each required row */
737  ierr = ISCreateStride(mpi_comm, LeftBlock.NumStates(), 0, 1, &iscol_L); CHKERRQ(ierr);
738  ierr = ISCreateStride(mpi_comm, RightBlock.NumStates(), 0, 1, &iscol_R); CHKERRQ(ierr);
739 
740  {
741  Mat *submat_L, *submat_R;
742  ierr = MatCreateSubMatrices(Mat_L, 1, &isrow_L, &iscol_L, MAT_INITIAL_MATRIX, &submat_L); CHKERRQ(ierr);
743  ierr = MatCreateSubMatrices(Mat_R, 1, &isrow_R, &iscol_R, MAT_INITIAL_MATRIX, &submat_R); CHKERRQ(ierr);
744  ctx.Terms.push_back({1.0, OpType_L, submat_L[0], OpType_R, submat_R[0]});
745  ctx.LocalSubMats.push_back(submat_L);
746  ctx.LocalSubMats.push_back(submat_R);
747  }
748 
749  ierr = ISDestroy(&isrow_L); CHKERRQ(ierr);
750  ierr = ISDestroy(&isrow_R); CHKERRQ(ierr);
751  ierr = ISDestroy(&iscol_L); CHKERRQ(ierr);
752  ierr = ISDestroy(&iscol_R); CHKERRQ(ierr);
753 
754  FUNCTION_TIMINGS_END()
755  return(0);
756 }
757 
758 
760  const std::vector< Hamiltonians::Term >& Terms,
761  Mat& MatOut
762  )
763 {
764  PetscErrorCode ierr = 0;
765 
766  /* Count the input and total number of sites */
767  PetscInt nsites_left = LeftBlock.NumSites();
768  PetscInt nsites_right = RightBlock.NumSites();
769  PetscInt nsites_out = nsites_left + nsites_right;
770 
771  /* Check that the maximum site index in Terms is less than the number of sites in the blocks */
772  PetscInt Max_Isite = 0;
773  for(const Hamiltonians::Term& term: Terms){
774  Max_Isite = ( term.Isite > Max_Isite ) ? term.Isite : Max_Isite;
775  Max_Isite = ( term.Jsite > Max_Isite ) ? term.Jsite : Max_Isite;
776  }
777  if(Max_Isite >= nsites_out) SETERRQ2(mpi_comm, 1, "Maximum site index from Terms (%d) has to be "
778  "less than the total number of sites in the blocks (%d).", Max_Isite, nsites_out);
779 
780  /* Check the validity of input blocks */
781  ierr = LeftBlock.CheckOperators(); CHKERRQ(ierr);
782  ierr = LeftBlock.CheckSectors(); CHKERRQ(ierr);
783  ierr = LeftBlock.CheckOperatorBlocks(); CHKERRQ(ierr); /* NOTE: Possibly costly operation */
784  ierr = RightBlock.CheckOperators(); CHKERRQ(ierr);
785  ierr = RightBlock.CheckSectors(); CHKERRQ(ierr);
786  ierr = RightBlock.CheckOperatorBlocks(); CHKERRQ(ierr); /* NOTE: Possibly costly operation */
787 
788  /* Classify the terms according to whether an intra-block or inter-block product will be performed */
789  std::vector< Hamiltonians::Term > TermsLR; /* Inter-block */
790  for( const Hamiltonians::Term& term: Terms ){
791  if ((0 <= term.Isite && term.Isite < nsites_left) && (nsites_left <= term.Jsite && term.Jsite < nsites_out)){
792  if(term.a == PetscScalar(0.0)) continue;
793  TermsLR.push_back(term);
794  }
795  else if ((0 <= term.Isite && term.Isite < nsites_left) && (0 <= term.Jsite && term.Jsite < nsites_left)){}
796  else if ((nsites_left <= term.Isite && term.Isite < nsites_out) && (nsites_left <= term.Jsite && term.Jsite < nsites_out)){}
797  else {
798  SETERRQ4(mpi_comm, 1, "Invalid term: Isite=%d Jsite=%d for nsites_left=%d and nsites_right=%d.",
799  term.Isite, term.Jsite, nsites_left, nsites_right);
800  }
801  }
802 
803  /* REFLECTION SYMMETRY: Reverse the sequence of sites in the right block so that
804  new sites are always located at the interface */
805  for (Hamiltonians::Term& term: TermsLR){
806  term.Jsite = nsites_out - 1 - term.Jsite;
807  }
808 
809  /* Check whether there is any need to create the Sm matrices
810  TODO: Implement interface to selectively create only the needed Sm matrices */
811  PetscBool CreateSmL = PETSC_FALSE, CreateSmR = PETSC_FALSE;
812  for(const Hamiltonians::Term& term: TermsLR){
813  if(term.Iop == OpSm){
814  CreateSmL = PETSC_TRUE;
815  ierr = LeftBlock.CreateSm(); CHKERRQ(ierr);
816  break;
817  }
818  }
819  for(const Hamiltonians::Term& term: TermsLR){
820  if(term.Jop == OpSm){
821  CreateSmR = PETSC_TRUE;
822  ierr = RightBlock.CreateSm(); CHKERRQ(ierr);
823  break;
824  }
825  }
826 
827  if(do_shell){
828  ierr = KronSumConstructShell(TermsLR, MatOut); CHKERRQ(ierr);
829  } else {
830  ierr = KronSumConstructExplicit(TermsLR, MatOut); CHKERRQ(ierr);
831  }
832 
833  /* Destroy Sm in advance to avoid clashes with modifications in Sp */
834  if(CreateSmL){
835  ierr = LeftBlock.DestroySm(); CHKERRQ(ierr);
836  }
837  if(CreateSmR){
838  ierr = RightBlock.DestroySm(); CHKERRQ(ierr);
839  }
840  return(0);
841 }
842 
843 
844 PetscErrorCode KronBlocks_t::KronSumConstructExplicit(
845  const std::vector< Hamiltonians::Term >& TermsLR,
846  Mat& MatOut
847  )
848 {
849  PetscErrorCode ierr = 0;
850  /* Assumes that output matrix is square */
851  KronSumCtx ctx;
852  ctx.Nrows = ctx.Ncols = num_states;
853  /* Split the ownership of rows using the default way in petsc */
854  ierr = PreSplitOwnership(mpi_comm, ctx.Nrows, ctx.lrows, ctx.rstart); CHKERRQ(ierr);
855  ctx.cstart = ctx.rstart;
856  ctx.lcols = ctx.lrows;
857  ctx.rend = ctx.cend = ctx.rstart + ctx.lrows;
858 
859  TIMINGS_NEWLINE()
860  ierr = KronSumGetSubmatrices(LeftBlock.H, RightBlock.H, TermsLR, ctx); CHKERRQ(ierr);
861  ierr = KronSumCalcPreallocation(ctx); CHKERRQ(ierr);
862  if(do_redistribute){
863  PetscBool flg;
864  ierr = KronSumRedistribute(ctx,flg); CHKERRQ(ierr);
865  if(flg){
866  ierr = KronSumGetSubmatrices(LeftBlock.H, RightBlock.H, TermsLR, ctx); CHKERRQ(ierr);
867  ierr = KronSumCalcPreallocation(ctx); CHKERRQ(ierr);
868  }
869  }
870  ierr = SavePreallocData(ctx); CHKERRQ(ierr);
871  ierr = LeftBlock.EnsureSaved(); CHKERRQ(ierr);
872  ierr = RightBlock.EnsureSaved(); CHKERRQ(ierr);
873  ierr = KronSumPreallocate(ctx, MatOut); CHKERRQ(ierr);
874  ierr = KronSumFillMatrix(ctx, MatOut); CHKERRQ(ierr);
875 
876  /* Destroy local submatrices and temporary matrices */
877  for(Mat *mat: ctx.LocalSubMats){
878  ierr = MatDestroySubMatrices(1,&mat); CHKERRQ(ierr);
879  }
880  return(0);
881 }
882 
883 
884 #define GetBlockMat(BLOCK,OP,ISITE)\
885  ((OP)==OpSp ? (BLOCK).Sp(ISITE) : ((OP)==OpSm ? (BLOCK).Sm(ISITE) : ((OP)==OpSz ? (BLOCK).Sz(ISITE) : NULL)))
886 
887 #define GetBlockMatFromTuple(BLOCK,TUPLE)\
888  GetBlockMat((BLOCK), std::get<0>(TUPLE), std::get<1>(TUPLE))
889 
890 
891 PetscErrorCode KronBlocks_t::KronSumGetSubmatrices(
892  const Mat& OpProdSumLL,
893  const Mat& OpProdSumRR,
894  const std::vector< Hamiltonians::Term >& TermsLR,
895  KronSumCtx& ctx
896  )
897 {
898  PetscErrorCode ierr = 0;
899  FUNCTION_TIMINGS_BEGIN()
900 
901  /* Determine the local rows to be collected from each of the left and right block */
902  {
903  KronBlocksIterator KIter(*this, ctx.rstart, ctx.rend);
904  std::set<PetscInt> SetRowsL, SetRowsR;
905  for( ; KIter.Loop(); ++KIter)
906  {
907  SetRowsL.insert(KIter.GlobalIdxLeft());
908  SetRowsR.insert(KIter.GlobalIdxRight());
909  }
910  ctx.NReqRowsL = SetRowsL.size();
911  ctx.NReqRowsR = SetRowsR.size();
912  ctx.ReqRowsL.resize(ctx.NReqRowsL);
913  ctx.ReqRowsR.resize(ctx.NReqRowsR);
914  size_t idx = 0;
915  for(PetscInt row: SetRowsL){
916  ctx.ReqRowsL[idx] = row;
917  ctx.MapRowsL[row] = idx++;
918  }
919  idx = 0;
920  for(PetscInt row: SetRowsR){
921  ctx.ReqRowsR[idx] = row;
922  ctx.MapRowsR[row] = idx++;
923  }
924  }
925  /* Generate the index sets needed to get the rows and columns */
926  IS isrow_L, isrow_R, iscol_L, iscol_R;
927  /* Get only some required rows */
928  ierr = ISCreateGeneral(mpi_comm, ctx.NReqRowsL, ctx.ReqRowsL.data(), PETSC_USE_POINTER, &isrow_L); CHKERRQ(ierr);
929  ierr = ISCreateGeneral(mpi_comm, ctx.NReqRowsR, ctx.ReqRowsR.data(), PETSC_USE_POINTER, &isrow_R); CHKERRQ(ierr);
930  /* Get all columns in each required row */
931  ierr = ISCreateStride(mpi_comm, LeftBlock.NumStates(), 0, 1, &iscol_L); CHKERRQ(ierr);
932  ierr = ISCreateStride(mpi_comm, RightBlock.NumStates(), 0, 1, &iscol_R); CHKERRQ(ierr);
933 
934  /* Perform submatrix collection and append to ctx.Terms and also fill LocalSubMats to ensure that
935  local submatrices are tracked and deleted after usage */
936  const PetscInt NumTerms = 2 + TermsLR.size();
937  ctx.Terms.reserve(NumTerms);
938  /* LL terms */
939  {
940  Mat *A;
941  ierr = MatCreateSubMatrices(OpProdSumLL, 1, &isrow_L, &iscol_L, MAT_INITIAL_MATRIX, &A); CHKERRQ(ierr);
942  ctx.Terms.push_back({1.0,OpSz,*A,OpEye,nullptr});
943  ctx.LocalSubMats.push_back(A);
944  }
945  /* RR terms */
946  {
947  Mat *B;
948  ierr = MatCreateSubMatrices(OpProdSumRR, 1, &isrow_R, &iscol_R, MAT_INITIAL_MATRIX, &B); CHKERRQ(ierr);
949  ctx.Terms.push_back({1.0,OpEye,nullptr,OpSz,*B});
950  ctx.LocalSubMats.push_back(B);
951  }
952  /* LR terms
953  Create a mapping for the matrices needed in the L-R block:
954  from global matrix operator to local submatrix */
955  std::map< std::tuple< PetscInt, PetscInt >, Mat> OpLeft;
956  std::map< std::tuple< PetscInt, PetscInt >, Mat> OpRight;
957  for (const Hamiltonians::Term& term: TermsLR){
958  OpLeft[ std::make_tuple(term.Iop, term.Isite) ] = nullptr;
959  OpRight[ std::make_tuple(term.Jop, term.Jsite) ] = nullptr;
960  }
961  /* For each of the operators in OpLeft and OpRight, get the submatrix of locally needed rows */
962  for (auto& Op: OpLeft){
963  const Mat mat = GetBlockMatFromTuple(LeftBlock, Op.first);
964  Mat *submat;
965  ierr = MatCreateSubMatrices(mat, 1, &isrow_L, &iscol_L, MAT_INITIAL_MATRIX, &submat); CHKERRQ(ierr);
966  Op.second = submat[0];
967  ctx.LocalSubMats.push_back(submat);
968  }
969  for (auto& Op: OpRight){
970  const Mat mat = GetBlockMatFromTuple(RightBlock, Op.first);
971  Mat *submat;
972  ierr = MatCreateSubMatrices(mat, 1, &isrow_R, &iscol_R, MAT_INITIAL_MATRIX, &submat); CHKERRQ(ierr);
973  Op.second = submat[0];
974  ctx.LocalSubMats.push_back(submat);
975  }
976  /* Generate the terms */
977  for (const Hamiltonians::Term& term: TermsLR){
978  const Mat A = OpLeft.at(std::make_tuple(term.Iop, term.Isite));
979  const Mat B = OpRight.at(std::make_tuple(term.Jop, term.Jsite));
980  ctx.Terms.push_back({term.a, term.Iop, A, term.Jop, B});
981  }
982  ierr = ISDestroy(&isrow_L); CHKERRQ(ierr);
983  ierr = ISDestroy(&isrow_R); CHKERRQ(ierr);
984  ierr = ISDestroy(&iscol_L); CHKERRQ(ierr);
985  ierr = ISDestroy(&iscol_R); CHKERRQ(ierr);
986 
987  FUNCTION_TIMINGS_END()
988  return(0);
989 }
990 
991 #undef GetBlockMat
992 #undef GetBlockMatFromTuple
993 
994 PetscErrorCode KronBlocks_t::KronSumCalcPreallocation(
995  KronSumCtx& ctx
996  )
997 {
998  PetscErrorCode ierr = 0;
999  FUNCTION_TIMINGS_BEGIN()
1000 
1001  /* Prepare a full dense row */
1002  PetscInt Nvals = ctx.lrows > 0 ? ctx.Ncols : 1;
1003  PetscScalar *val_arr;
1004 
1005  ierr = PetscCalloc1(Nvals, &val_arr); CHKERRQ(ierr);
1006 
1007  /* Go through each local row, then go through each term in ctx
1008  and determine the number of entries that go into each row */
1009  ctx.MinIdx = ctx.Ncols-1;
1010  ctx.MaxIdx = 0;
1011  ctx.Nnz = 0;
1012  ctx.Nelts = 0;
1013  ierr = PetscCalloc2(ctx.lrows, &ctx.Dnnz, ctx.lrows, &ctx.Onnz); CHKERRQ(ierr);
1014 
1015  /* Lookup for the forward shift depending on the operator type of the left block. This automatically
1016  assumes that the right block is a valid operator type such that the resulting matrix term is block-diagonal
1017  in quantum numbers */
1018  std::map< Op_t, PetscInt > fws_LOP = {};
1019  std::map< Op_t, PetscInt > Row_NumStates_ROP = {};
1020  if(ctx.lrows)
1021  {
1022  KronBlocksIterator KIter(*this, ctx.rstart, ctx.rend);
1023  for( ; KIter.Loop(); ++KIter)
1024  {
1025  const PetscInt lrow = KIter.Steps();
1026  const PetscInt Row_BlockIdx_L = KIter.BlockIdxLeft();
1027  const PetscInt Row_BlockIdx_R = KIter.BlockIdxRight();
1028  const PetscInt Row_L = KIter.GlobalIdxLeft();
1029  const PetscInt Row_R = KIter.GlobalIdxRight();
1030  const PetscInt LocRow_L = ctx.MapRowsL[Row_L];
1031  const PetscInt LocRow_R = ctx.MapRowsR[Row_R];
1032 
1033  PetscBool flg[2];
1034  PetscInt nz_L, nz_R, bks_L, bks_R, col_NStatesR, fws_O, MinIdx, MaxIdx;
1035  const PetscInt *idx_L, *idx_R;
1036  const PetscScalar *v_L, *v_R;
1037 
1038  if(KIter.UpdatedBlock())
1039  {
1040  /* Searchable by the operator type of the left block */
1041  fws_LOP = {
1042  {OpEye, KIter.BlockStartIdx(OpSz)},
1043  {OpSz , KIter.BlockStartIdx(OpSz)},
1044  {OpSp , Offsets(Row_BlockIdx_L + 1, Row_BlockIdx_R - 1),},
1045  {OpSm , Offsets(Row_BlockIdx_L - 1, Row_BlockIdx_R + 1)}
1046  };
1047  /* Searchable by the operator type on the right block */
1048  Row_NumStates_ROP = {
1049  {OpEye, KIter.NumStatesRight()},
1050  {OpSz, KIter.NumStatesRight()},
1051  {OpSp, RightBlock.Magnetization.Sizes(Row_BlockIdx_R+1)},
1052  {OpSm, RightBlock.Magnetization.Sizes(Row_BlockIdx_R-1)}
1053  };
1054  }
1055 
1056  /* Loop through each term in this row. Treat the identity separately by directly declaring the matrix element */
1057  ierr = PetscMemzero(val_arr, Nvals*sizeof(val_arr[0])); CHKERRQ(ierr);
1058  for(const KronSumTerm& term: ctx.Terms)
1059  {
1060  if(term.a == PetscScalar(0.0)) continue;
1061 
1062  if(term.OpTypeA != OpEye){
1063  ierr = (*term.A->ops->getrow)(term.A, LocRow_L, &nz_L, (PetscInt**)&idx_L, (PetscScalar**)&v_L); CHKERRQ(ierr);
1064  bks_L = LeftBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_L, term.OpTypeA, flg[SideLeft]);
1065  } else {
1066  nz_L = 1;
1067  idx_L = &Row_L;
1068  v_L = &one;
1069  bks_L = LeftBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_L, OpSz, flg[SideLeft]);
1070  }
1071 
1072  if(term.OpTypeB != OpEye){
1073  ierr = (*term.B->ops->getrow)(term.B, LocRow_R, &nz_R, (PetscInt**)&idx_R, (PetscScalar**)&v_R); CHKERRQ(ierr);
1074  bks_R = RightBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_R, term.OpTypeB, flg[SideRight]);
1075  } else {
1076  nz_R = 1;
1077  idx_R = &Row_R;
1078  v_R = &one;
1079  bks_R = RightBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_R, OpSz, flg[SideRight]);
1080  }
1081 
1082  if(!(flg[SideLeft] && flg[SideRight])) continue;
1083  if(nz_L*nz_R == 0) continue;
1084 
1085  fws_O = fws_LOP.at(term.OpTypeA);
1086  col_NStatesR = Row_NumStates_ROP.at(term.OpTypeB);
1087  if(col_NStatesR==-1) SETERRQ(PETSC_COMM_SELF,1,"Accessed incorrect value.");
1088 
1089  for(PetscInt l=0; l<nz_L; ++l)
1090  {
1091  for(PetscInt r=0; r<nz_R; ++r)
1092  {
1093  val_arr[( (idx_L[l] - bks_L) * col_NStatesR + (idx_R[r] - bks_R) + fws_O )] += term.a * v_L[l] * v_R[r];
1094  }
1095  }
1096 
1097  /* Determine the smallest and largest indices for this row */
1098  if (nz_L*nz_R > 0){
1099  MinIdx = ( (idx_L[0] - bks_L) * col_NStatesR + (idx_R[0] - bks_R) + fws_O );
1100  MaxIdx = ( (idx_L[nz_L-1] - bks_L) * col_NStatesR + (idx_R[nz_R-1] - bks_R) + fws_O );
1101  if(MinIdx < ctx.MinIdx) ctx.MinIdx = MinIdx;
1102  if(MaxIdx > ctx.MaxIdx) ctx.MaxIdx = MaxIdx;
1103  }
1104  ctx.Nelts += nz_L*nz_R;
1105  }
1106 
1107  /* Sum up all columns in the diagonal and off-diagonal */
1108  PetscInt dnnz = 0, onnz=0, i;
1109  for (i=0; i<ctx.cstart; i++) onnz += !(PetscAbsScalar(val_arr[i]) < ks_tol);
1110  for (i=ctx.cstart; i<ctx.cend; i++) dnnz += !(PetscAbsScalar(val_arr[i]) < ks_tol);
1111  for (i=ctx.cend; i<ctx.Ncols; i++) onnz += !(PetscAbsScalar(val_arr[i]) < ks_tol);
1112  ctx.Dnnz[lrow] = dnnz + (PetscAbsScalar(val_arr[lrow+ctx.rstart]) < ks_tol ? 1 : 0);
1113  ctx.Onnz[lrow] = onnz;
1114  ctx.Nnz += ctx.Dnnz[lrow] + ctx.Onnz[lrow];
1115  }
1116  }
1117  ierr = PetscFree(val_arr); CHKERRQ(ierr);
1118 
1119  FUNCTION_TIMINGS_END()
1120  return(0);
1121 }
1122 
1123 PetscErrorCode KronBlocks_t::KronSumRedistribute(
1124  KronSumCtx& ctx,
1125  PetscBool& flg
1126  )
1127 {
1128  PetscErrorCode ierr = 0;
1129  FUNCTION_TIMINGS_BEGIN()
1130 
1131  /* Gather all lrows and rstart to root process */
1132  PetscInt *lrows_arr, *rstart_arr;
1133  const PetscInt arr_size = mpi_rank ? 0 : mpi_size;
1134  ierr = PetscCalloc2(arr_size, &lrows_arr, arr_size, &rstart_arr); CHKERRQ(ierr);
1135  ierr = MPI_Gather(&ctx.lrows, 1, MPIU_INT, lrows_arr, 1, MPIU_INT, 0, PETSC_COMM_WORLD); CHKERRQ(ierr);
1136  ierr = MPI_Gather(&ctx.rstart, 1, MPIU_INT, rstart_arr, 1, MPIU_INT, 0, PETSC_COMM_WORLD); CHKERRQ(ierr);
1137 
1138  PetscMPIInt *lrows_arr_mpi, *rstart_arr_mpi;
1139  #if defined(PETSC_USE_64BIT_INDICES)
1140  {
1141  /* Convert lrows_arr and rstart_arr into type PetscMPIInt */
1142  ierr = PetscCalloc2(arr_size, &lrows_arr_mpi, arr_size, &rstart_arr_mpi); CHKERRQ(ierr);
1143  for(PetscInt p=0; p<arr_size; ++p){
1144  ierr = PetscMPIIntCast(lrows_arr[p],&lrows_arr_mpi[p]); CHKERRQ(ierr);
1145  }
1146  for(PetscInt p=0; p<arr_size; ++p){
1147  ierr = PetscMPIIntCast(rstart_arr[p],&rstart_arr_mpi[p]); CHKERRQ(ierr);
1148  }
1149  }
1150  #else
1151  {
1152  lrows_arr_mpi = lrows_arr;
1153  rstart_arr_mpi = rstart_arr;
1154  }
1155  #endif
1156 
1157  /* Gather preallocation data to root process */
1158  PetscInt *Tnnz_arr, *Onnz_arr;
1159  const PetscInt tot_size = mpi_rank ? 0 : ctx.Nrows;
1160  ierr = PetscCalloc2(tot_size, &Tnnz_arr, tot_size, &Onnz_arr); CHKERRQ(ierr);
1161  ierr = MPI_Gatherv(ctx.Dnnz, ctx.lrows, MPIU_INT, Tnnz_arr, lrows_arr_mpi, rstart_arr_mpi, MPIU_INT, 0, mpi_comm); CHKERRQ(ierr);
1162  ierr = MPI_Gatherv(ctx.Onnz, ctx.lrows, MPIU_INT, Onnz_arr, lrows_arr_mpi, rstart_arr_mpi, MPIU_INT, 0, mpi_comm); CHKERRQ(ierr);
1163  for(PetscInt irow = 0; irow < tot_size; ++irow) Tnnz_arr[irow] += Onnz_arr[irow];
1164 
1165  // #define DEBUG_REDIST
1166  #if defined(DEBUG_REDIST)
1167  if(!mpi_rank){
1168  printf("-------\n");
1169  for(PetscInt p=0; p<mpi_size; ++p){
1170  PetscInt tot_nnz = 0;
1171  for(PetscInt irow=rstart_arr[p]; irow<rstart_arr[p]+lrows_arr[p]; ++irow) tot_nnz += Tnnz_arr[irow];
1172  printf("[%d] lrows: %3d rstart: %3d nnz: %d\n", p, lrows_arr[p], rstart_arr[p], tot_nnz);
1173  }
1174  printf("excess: %d\n", ctx.Nrows % mpi_size);
1175  printf("-------\n");
1176  }
1177  #endif
1178 
1179  PetscInt tot_nnz = 0;
1180  for(PetscInt irow = 0; irow < tot_size; ++irow) tot_nnz += Tnnz_arr[irow];
1181  const PetscInt avg_nnz_proc = tot_nnz / mpi_size;
1182 
1183  ierr = PetscMemzero(lrows_arr, sizeof(PetscInt) * arr_size); CHKERRQ(ierr);
1184  ierr = PetscMemzero(rstart_arr, sizeof(PetscInt) * arr_size); CHKERRQ(ierr);
1185 
1186  /* Recalculate boundaries */
1187  flg = PETSC_TRUE;
1188  if(!mpi_rank){
1189 
1190  #if defined(DEBUG_REDIST)
1191  printf("tot_nnz: %d\n", tot_nnz);
1192  printf("avg_nnz_proc: %d\n", avg_nnz_proc);
1193  printf("-------\n");
1194  #endif
1195 
1196  std::vector< PetscInt > nnz_proc(mpi_size);
1197  for(PetscInt irow=0, iproc=0; irow<ctx.Nrows; ++irow){
1198  if(iproc>=mpi_size) break;
1199 
1200  nnz_proc[iproc] += Tnnz_arr[irow];
1201  ++lrows_arr[iproc];
1202 
1203  #if defined(DEBUG_REDIST) && 0
1204  printf("irow: %-5d iproc: %-5d nrows_proc: %-5d Tnnz: %-5d nnz_proc: %-5d\n", irow, iproc, lrows_arr[iproc], Tnnz_arr[irow], nnz_proc[iproc]);
1205  #endif
1206 
1207  if( nnz_proc[iproc] >= avg_nnz_proc ){
1208 
1209  #if defined(DEBUG_REDIST) && 0
1210  printf("\ntriggered\n");
1211  printf("irow: %-5d iproc: %-5d nrows_proc: %-5d Tnnz: %-5d nnz_proc: %-5d\n", irow, iproc, lrows_arr[iproc], Tnnz_arr[irow], nnz_proc[iproc]);
1212  printf("lrows_arr[%d] = %d\n", iproc, lrows_arr[iproc]);
1213  printf("\n");
1214  #endif
1215 
1216  ++iproc;
1217  }
1218  }
1219 
1220  PetscInt tot_lrows = 0;
1221  for(PetscInt p=0; p<mpi_size; ++p){
1222  rstart_arr[p] = tot_lrows;
1223  tot_lrows += lrows_arr[p];
1224  }
1225  if(ctx.Nrows!=tot_lrows){
1226 
1227  printf("--------------------------------------------------\n");
1228  printf("[0] Redistribution failed at GlobIdx: %lld\n", LLD(GlobIdx));
1229  printf("[0] >>> tot_lrows: %lld\n", LLD(tot_lrows));
1230  printf("[0] >>> ctx.Nrows: %lld\n", LLD(ctx.Nrows));
1231  printf("[0] >>> tot_nnz: %lld\n", LLD(tot_nnz));
1232  printf("[0] >>> avg_nnz_proc: %lld\n", LLD(avg_nnz_proc));
1233  printf("[0] >>> nnz_proc: [ %lld", LLD(nnz_proc[0]));
1234  for(PetscInt p=1; p<mpi_size; ++p) printf(", %lld", LLD(nnz_proc[p]));
1235  printf(" ]\n");
1236  #if 0 && defined(DEBUG_REDIST)
1237  printf("[0] >>> Tnnz_arr: [ %d", Tnnz_arr[0]);
1238  for(PetscInt i=1; i<ctx.Nrows; ++i) printf(", %d", Tnnz_arr[i]);
1239  printf(" ]\n");
1240  printf("--------------------------------------------------\n");
1241  #endif
1242 
1243  flg = PETSC_FALSE;
1244 
1245  #if 0
1246  SETERRQ2(PETSC_COMM_SELF,1,"Incorrect total number of rows. "
1247  "Expected %d. Got %d.", ctx.Nrows, tot_lrows);
1248  #endif
1249  }
1250 
1251  #if defined(DEBUG_REDIST)
1252  for(PetscInt p=0; p<mpi_size; ++p){
1253  printf("[%d] lrows: %3d rstart: %3d nnz: %d\n", p, lrows_arr[p], rstart_arr[p], nnz_proc[p]);
1254  }
1255  #endif
1256  }
1257 
1258  ierr = MPI_Bcast(&flg, 1, MPI_INT, 0, PETSC_COMM_WORLD); CHKERRQ(ierr);
1259  if(flg){
1260  /* Scatter data on lrows and rstart */
1261  ierr = MPI_Scatter(lrows_arr, 1, MPIU_INT, &ctx.lrows, 1, MPIU_INT, 0, PETSC_COMM_WORLD); CHKERRQ(ierr);
1262  ierr = MPI_Scatter(rstart_arr, 1, MPIU_INT, &ctx.rstart, 1, MPIU_INT, 0, PETSC_COMM_WORLD); CHKERRQ(ierr);
1263  ctx.cstart = ctx.rstart;
1264  ctx.lcols = ctx.lrows;
1265  ctx.rend = ctx.cend = ctx.rstart + ctx.lrows;
1266 
1267  /* Destroy ctx data */
1268  ierr = PetscFree2(ctx.Dnnz, ctx.Onnz); CHKERRQ(ierr);
1269  ctx.ReqRowsL.clear();
1270  ctx.ReqRowsR.clear();
1271  ctx.MapRowsL.clear();
1272  ctx.MapRowsR.clear();
1273  ctx.Terms.clear();
1274  ctx.Maxnnz.clear();
1275  for(Mat *mat: ctx.LocalSubMats){
1276  ierr = MatDestroySubMatrices(1,&mat); CHKERRQ(ierr);
1277  }
1278  ctx.LocalSubMats.clear();
1279  }
1280  ierr = PetscFree2(Tnnz_arr, Onnz_arr); CHKERRQ(ierr);
1281  ierr = PetscFree2(lrows_arr, rstart_arr); CHKERRQ(ierr);
1282  #if defined(PETSC_USE_64BIT_INDICES)
1283  {
1284  ierr = PetscFree2(lrows_arr_mpi, rstart_arr_mpi); CHKERRQ(ierr);
1285  }
1286  #endif
1287 
1288  FUNCTION_TIMINGS_END()
1289  return(0);
1290 }
1291 
1292 PetscErrorCode KronBlocks_t::KronSumPreallocate(
1293  KronSumCtx& ctx,
1294  Mat& MatOut
1295  )
1296 {
1297  PetscErrorCode ierr = 0;
1298  FUNCTION_TIMINGS_BEGIN()
1299 
1300  ierr = MatCreate(mpi_comm, &MatOut); CHKERRQ(ierr);
1301  ierr = MatSetSizes(MatOut, ctx.lrows, ctx.lcols, ctx.Nrows, ctx.Ncols); CHKERRQ(ierr);
1302  ierr = MatSetType(MatOut,MATMPIAIJ); CHKERRQ(ierr);
1303  ierr = MatSetOptionsPrefix(MatOut, "H_"); CHKERRQ(ierr);
1304  ierr = MatSetFromOptions(MatOut); CHKERRQ(ierr);
1305  ierr = MatMPIAIJSetPreallocation(MatOut, 0, ctx.Dnnz, 0, ctx.Onnz); CHKERRQ(ierr);
1306  ierr = PetscFree2(ctx.Dnnz, ctx.Onnz); CHKERRQ(ierr);
1307  /* Note: Preallocation for seq not required as long as mpiaij(mkl) matrices are specified */
1308 
1309  ierr = MatSetOption(MatOut, MAT_HERMITIAN, PETSC_TRUE); CHKERRQ(ierr);
1310  ierr = MatSetOption(MatOut, MAT_NEW_NONZERO_LOCATIONS, PETSC_FALSE); CHKERRQ(ierr);
1311  ierr = MatSetOption(MatOut, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE); CHKERRQ(ierr);
1312  ierr = MatSetOption(MatOut, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE); CHKERRQ(ierr);
1313  ierr = MatSetOption(MatOut, MAT_IGNORE_OFF_PROC_ENTRIES, PETSC_TRUE); CHKERRQ(ierr);
1314  ierr = MatSetOption(MatOut, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE); CHKERRQ(ierr);
1315  ierr = MatSetOption(MatOut, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE); CHKERRQ(ierr);
1316  ierr = MatSetOption(MatOut, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE); CHKERRQ(ierr);
1317 
1318  /* Verify that the row and column mapping match what is expected */
1319  {
1320  const PetscInt M = MatOut->rmap->N;
1321  const PetscInt N = MatOut->cmap->N;
1322  const PetscInt Istart = MatOut->rmap->rstart;
1323  const PetscInt locrows = MatOut->rmap->rend - Istart;
1324  if(locrows!=ctx.lrows) SETERRQ4(PETSC_COMM_SELF, 1,
1325  "Incorrect guess for locrows. Expected %d. Got %d. Size: %d x %d.", locrows, ctx.lrows, M, N);
1326  if(Istart!=ctx.rstart) SETERRQ4(PETSC_COMM_SELF, 1,
1327  "Incorrect guess for Istart. Expected %d. Got %d. Size: %d x %d.", Istart, ctx.rstart, M, N);
1328  const PetscInt Cstart = MatOut->cmap->rstart;
1329  const PetscInt loccols = MatOut->cmap->rend - Cstart;
1330  if(loccols!=ctx.lcols) SETERRQ4(PETSC_COMM_SELF, 1,
1331  "Incorrect guess for loccols. Expected %d. Got %d. Size: %d x %d.", loccols, ctx.lcols, M, N);
1332  if(Cstart!=ctx.cstart) SETERRQ4(PETSC_COMM_SELF, 1,
1333  "Incorrect guess for Cstart. Expected %d. Got %d. Size: %d x %d.", Cstart, ctx.cstart, M, N);
1334  }
1335 
1336  FUNCTION_TIMINGS_END()
1337  return(0);
1338 }
1339 
1340 PetscErrorCode KronBlocks_t::KronSumFillMatrix(
1341  KronSumCtx& ctx,
1342  Mat& MatOut
1343  )
1344 {
1345  PetscErrorCode ierr = 0;
1346  INTERVAL_TIMINGS_SETUP()
1347  FUNCTION_TIMINGS_BEGIN()
1348 
1349  /* Preallocate largest needed workspace */
1350  PetscInt *idx_arr;
1351  PetscScalar *val_arr;
1352  PetscInt Nvals = ((ctx.lrows > 0) && (ctx.MaxIdx > ctx.MinIdx)) ? ((ctx.MaxIdx+1)-ctx.MinIdx) : 1;
1353  if(Nvals <= 0) SETERRQ1(PETSC_COMM_SELF,1,"Incorrect value of Nvals. Must be positive. Got %lld.", LLD(Nvals));
1354  ierr = PetscCalloc1(Nvals, &idx_arr); CHKERRQ(ierr);
1355  ierr = PetscCalloc1(Nvals, &val_arr); CHKERRQ(ierr);
1356 
1357  /* Fill in all indices */
1358  for(PetscInt i=0; i < Nvals; ++i) idx_arr[i] = ctx.MinIdx + i;
1359  ctx.Nfiltered = 0;
1360 
1361  /* Lookup for the forward shift depending on the operator type of the left block. This automatically
1362  assumes that the right block is a valid operator type such that the resulting matrix term is block-diagonal
1363  in quantum numbers */
1364  std::map< Op_t, PetscInt > fws_LOP = {};
1365  std::map< Op_t, PetscInt > Row_NumStates_ROP = {};
1366 
1367  ACCUM_TIMINGS_SETUP(KronIterSetup)
1368  ACCUM_TIMINGS_SETUP(MatSetValues)
1369  ACCUM_TIMINGS_SETUP(MatLoop)
1370  INTERVAL_TIMINGS_BEGIN()
1371  if(ctx.lrows > 0)
1372  {
1373  KronBlocksIterator KIter(*this, ctx.rstart, ctx.rend);
1374  for( ; KIter.Loop(); ++KIter)
1375  {
1376  ACCUM_TIMINGS_BEGIN(KronIterSetup)
1377  const PetscInt Irow = KIter.Steps() + ctx.rstart;
1378  const PetscInt Row_BlockIdx_L = KIter.BlockIdxLeft();
1379  const PetscInt Row_BlockIdx_R = KIter.BlockIdxRight();
1380  const PetscInt Row_L = KIter.GlobalIdxLeft();
1381  const PetscInt Row_R = KIter.GlobalIdxRight();
1382  const PetscInt LocRow_L = ctx.MapRowsL.at(Row_L);
1383  const PetscInt LocRow_R = ctx.MapRowsR.at(Row_R);
1384 
1385  PetscBool flg[2];
1386  PetscInt nz_L, nz_R, bks_L, bks_R, col_NStatesR, fws_O;
1387  const PetscInt *idx_L, *idx_R;
1388  const PetscScalar *v_L, *v_R;
1389 
1390  if(KIter.UpdatedBlock())
1391  {
1392  /* Searchable by the operator type of the left block */
1393  fws_LOP = {
1394  {OpEye, KIter.BlockStartIdx(OpSz)},
1395  {OpSz , KIter.BlockStartIdx(OpSz)},
1396  {OpSp , Offsets(Row_BlockIdx_L + 1, Row_BlockIdx_R - 1),},
1397  {OpSm , Offsets(Row_BlockIdx_L - 1, Row_BlockIdx_R + 1)}
1398  };
1399  /* Searchable by the operator type on the right block */
1400  Row_NumStates_ROP = {
1401  {OpEye, KIter.NumStatesRight()},
1402  {OpSz, KIter.NumStatesRight()},
1403  {OpSp, RightBlock.Magnetization.Sizes(Row_BlockIdx_R+1)},
1404  {OpSm, RightBlock.Magnetization.Sizes(Row_BlockIdx_R-1)}
1405  };
1406  }
1407 
1408  /* Loop through each term in this row. Treat the identity separately by directly declaring the matrix element */
1409  ierr = PetscMemzero(val_arr, Nvals*sizeof(val_arr[0])); CHKERRQ(ierr);
1410  ACCUM_TIMINGS_END(KronIterSetup)
1411  ACCUM_TIMINGS_BEGIN(MatLoop)
1412  for(const KronSumTerm& term: ctx.Terms)
1413  {
1414  if(term.OpTypeA != OpEye){
1415  ierr = (*term.A->ops->getrow)(term.A, LocRow_L, &nz_L, (PetscInt**)&idx_L, (PetscScalar**)&v_L); CHKERRQ(ierr);
1416  bks_L = LeftBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_L, term.OpTypeA, flg[SideLeft]);
1417  } else {
1418  nz_L = 1;
1419  idx_L = &Row_L;
1420  v_L = &one;
1421  bks_L = LeftBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_L, OpSz, flg[SideLeft]);
1422  }
1423 
1424  if(term.OpTypeB != OpEye){
1425  ierr = (*term.B->ops->getrow)(term.B, LocRow_R, &nz_R, (PetscInt**)&idx_R, (PetscScalar**)&v_R); CHKERRQ(ierr);
1426  bks_R = RightBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_R, term.OpTypeB, flg[SideRight]);
1427  } else {
1428  nz_R = 1;
1429  idx_R = &Row_R;
1430  v_R = &one;
1431  bks_R = RightBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_R, OpSz, flg[SideRight]);
1432  }
1433 
1434  if(!(flg[SideLeft] && flg[SideRight])) continue;
1435  if(nz_L*nz_R == 0) continue;
1436 
1437  fws_O = fws_LOP.at(term.OpTypeA) - ctx.MinIdx;
1438  col_NStatesR = Row_NumStates_ROP.at(term.OpTypeB);
1439  if(col_NStatesR==-1) SETERRQ(PETSC_COMM_SELF,1,"Accessed incorrect value.");
1440  for(PetscInt l=0; l<nz_L; ++l)
1441  {
1442  for(PetscInt r=0; r<nz_R; ++r)
1443  {
1444  val_arr[( (idx_L[l] - bks_L) * col_NStatesR + (idx_R[r] - bks_R) + fws_O )] += term.a * v_L[l] * v_R[r];
1445  }
1446  }
1447  }
1448 
1449  for(PetscInt i=0; i<Nvals; i++){
1450  if(PetscAbsScalar(val_arr[i]) < ks_tol){
1451  val_arr[i] = 0.0;
1452  ++ctx.Nfiltered;
1453  }
1454  }
1455 
1456  ACCUM_TIMINGS_END(MatLoop)
1457  ACCUM_TIMINGS_BEGIN(MatSetValues)
1458  ierr = MatSetValues(MatOut, 1, &Irow, Nvals, idx_arr, val_arr, INSERT_VALUES); CHKERRQ(ierr);
1459  ACCUM_TIMINGS_END(MatSetValues)
1460  }
1461  }
1462  ierr = PetscFree(val_arr); CHKERRQ(ierr);
1463  ierr = PetscFree(idx_arr); CHKERRQ(ierr);
1464 
1465  ACCUM_TIMINGS_PRINT(KronIterSetup, " KronIterSetup")
1466  ACCUM_TIMINGS_PRINT(MatLoop, " MatLoop")
1467  ACCUM_TIMINGS_PRINT(MatSetValues, " MatSetValues")
1468  INTERVAL_TIMINGS_END("KronSumFillMatrixLoop")
1469 
1470  INTERVAL_TIMINGS_BEGIN()
1471  ierr = MatEnsureAssembled(MatOut); CHKERRQ(ierr);
1472  INTERVAL_TIMINGS_END("MatEnsureAssembled")
1473 
1474  FUNCTION_TIMINGS_END()
1475  FUNCTION_TIMINGS_PRINT_SPACE()
1476  return(0);
1477 }
1478 
1479 PetscErrorCode KronBlocks_t::SavePreallocData(const KronSumCtx& ctx)
1480 {
1481  if(!do_saveprealloc) return(0);
1482 
1483  /* Gather preallocation data to root process */
1484  PetscInt Dnnz=0, Onnz=0;
1485  for(PetscInt irow=0; irow<ctx.lrows; ++irow) Dnnz += ctx.Dnnz[irow];
1486  for(PetscInt irow=0; irow<ctx.lrows; ++irow) Onnz += ctx.Onnz[irow];
1487 
1488  PetscErrorCode ierr = 0;
1489  PetscInt *Dnnz_arr, *Onnz_arr;
1490  PetscInt arr_size = mpi_rank ? 0 : mpi_size;
1491  ierr = PetscCalloc2(arr_size, &Dnnz_arr, arr_size, &Onnz_arr); CHKERRQ(ierr);
1492  ierr = MPI_Gather(&Dnnz, 1, MPIU_INT, Dnnz_arr, 1, MPIU_INT, 0, PETSC_COMM_WORLD); CHKERRQ(ierr);
1493  ierr = MPI_Gather(&Onnz, 1, MPIU_INT, Onnz_arr, 1, MPIU_INT, 0, PETSC_COMM_WORLD); CHKERRQ(ierr);
1494 
1495  if(mpi_rank) return(0);
1496 
1497  if(GlobIdx) fprintf(fp_prealloc,",\n");
1498  fprintf(fp_prealloc," {\n"
1499  " \"GlobIdx\" : %lld,\n"
1500  " \"Dnnz\" : [", LLD(GlobIdx));
1501  fprintf(fp_prealloc," %lld", LLD(Dnnz_arr[0]));
1502  for(PetscInt iproc=1; iproc<mpi_size; ++iproc){
1503  fprintf(fp_prealloc,", %lld", LLD(Dnnz_arr[iproc]));
1504  }
1505  fprintf(fp_prealloc,"],\n"
1506  " \"Onnz\" : [");
1507  fprintf(fp_prealloc," %lld", LLD(Onnz_arr[0]));
1508  for(PetscInt iproc=1; iproc<mpi_size; ++iproc){
1509  fprintf(fp_prealloc,", %lld", LLD(Onnz_arr[iproc]));
1510  }
1511  fprintf(fp_prealloc,"]\n }");
1512  fflush(fp_prealloc);
1513  ierr = PetscFree2(Dnnz_arr, Onnz_arr); CHKERRQ(ierr);
1514  return(0);
1515 }
1516 
1517 /*--- Definitions for shell matrices ---*/
1518 
1519 PetscErrorCode KronBlocks_t::KronSumShellSplitOwnership(
1520  const Mat& OpProdSumLL,
1521  const Mat& OpProdSumRR,
1522  const std::vector< Hamiltonians::Term >& TermsLR,
1523  const PetscInt Nrows,
1524  PetscInt& lrows,
1525  PetscInt& rstart
1526  )
1527 {
1528  FUNCTION_TIMINGS_BEGIN();
1529  PetscErrorCode ierr;
1530  if(Nrows!=num_states) SETERRQ2(mpi_comm,1,"Invalid input Nrows. Expected %lld. Got %lld.", LLD(num_states), LLD(Nrows));
1531 
1532  /* Map unique operators to their respective positions
1533  key tuple: Side_t, Op_t, SiteIdx */
1534  std::map< std::tuple< Side_t, Op_t, PetscInt >, std::vector<PetscInt> > nnzs_loc, nnzs;
1535  std::tuple< Side_t, Op_t, PetscInt > key;
1536  PetscMPIInt lrows_L, lrows_R;
1537 
1538  /* LR terms */
1539  for(const Hamiltonians::Term& term: TermsLR)
1540  {
1541  /* L-block */
1542  key = std::make_tuple(SideLeft, term.Iop, term.Isite);
1543  if(nnzs_loc.find(key)==nnzs_loc.end())
1544  {
1545  ierr = LeftBlock.MatOpGetNNZs(term.Iop, term.Isite, nnzs_loc[key]); CHKERRQ(ierr);
1546  }
1547 
1548  /* R-block */
1549  key = std::make_tuple(SideRight, term.Jop, term.Jsite);
1550  if(nnzs_loc.find(key)==nnzs_loc.end())
1551  {
1552  ierr = RightBlock.MatOpGetNNZs(term.Jop, term.Jsite, nnzs_loc[key]); CHKERRQ(ierr);
1553  }
1554  }
1555 
1556  /* LL term */
1557  key = std::make_tuple(SideLeft, OpSz, -1);
1558  if(nnzs_loc.find(key)==nnzs_loc.end())
1559  {
1560  ierr = LeftBlock.MatGetNNZs(OpProdSumLL, nnzs_loc[key]); CHKERRQ(ierr);
1561  ierr = PetscMPIIntCast(PetscInt(nnzs_loc[key].size()), &lrows_L);
1562  }
1563  else SETERRQ(mpi_comm,1,"Key error: (SideLeft, OpSz, -1).");
1564 
1565  /* RR term */
1566  key = std::make_tuple(SideRight, OpSz, -1);
1567  if(nnzs_loc.find(key)==nnzs_loc.end())
1568  {
1569  ierr = LeftBlock.MatGetNNZs(OpProdSumRR, nnzs_loc[key]); CHKERRQ(ierr);
1570  ierr = PetscMPIIntCast(PetscInt(nnzs_loc[key].size()), &lrows_R);
1571  }
1572  else SETERRQ(mpi_comm,1,"Key error: (SideRight, OpSz, -1).");
1573 
1574  /* Gather the number of local rows in each process to root */
1575  std::vector< std::vector< PetscMPIInt > > side_lrows(2);
1576  side_lrows[SideLeft ].resize(mpi_rank ? 1 : mpi_size);
1577  side_lrows[SideRight].resize(mpi_rank ? 1 : mpi_size);
1578 
1579  ierr = MPI_Gather(&lrows_L, 1, MPI_INT, &side_lrows[SideLeft ].at(0), 1, MPI_INT, 0, mpi_comm); CHKERRQ(ierr);
1580  ierr = MPI_Gather(&lrows_R, 1, MPI_INT, &side_lrows[SideRight].at(0), 1, MPI_INT, 0, mpi_comm); CHKERRQ(ierr);
1581 
1582  /* Calculate the offsets on root */
1583  std::vector< std::vector< PetscMPIInt > > side_offset(2);
1584  std::vector< PetscMPIInt > side_nrows(2);
1585  side_offset[SideLeft ].resize(mpi_rank ? 1 : mpi_size);
1586  side_offset[SideRight].resize(mpi_rank ? 1 : mpi_size);
1587 
1588  for(const auto& side_key: SideTypes)
1589  {
1590  {
1591  const std::vector< PetscMPIInt >& lrows = side_lrows.at(side_key);
1592  std::vector< PetscMPIInt >& offset = side_offset.at(side_key);
1593  PetscInt sum_lrows = 0;
1594  for(size_t i=0; i<lrows.size(); ++i)
1595  {
1596  offset[i] = sum_lrows;
1597  sum_lrows += lrows.at(i);
1598  }
1599  ierr = PetscMPIIntCast(PetscInt(mpi_rank ? 1 : sum_lrows), &side_nrows.at(side_key)); CHKERRQ(ierr);
1600  }
1601  }
1602 
1603  /* Gather the arrays containing nnzs for each operator */
1604  for(const auto& pair_loc: nnzs_loc)
1605  {
1606  const std::tuple< Side_t, Op_t, PetscInt >& key = pair_loc.first;
1607  const std::vector< PetscInt >& nnz_loc = pair_loc.second;
1608  const Side_t side = std::get<0>(key);
1609  PetscMPIInt nnz_loc_size;
1610  ierr = PetscMPIIntCast(PetscInt(nnz_loc.size()),&nnz_loc_size); CHKERRQ(ierr);
1611  nnzs[key].resize(side_nrows.at(side));
1612  /* Redirect to NULL if the vector is empty */
1613  const PetscInt *nnz_loc_val = nnz_loc.empty() ? NULL : &nnz_loc.at(0);
1614  ierr = MPI_Gatherv(nnz_loc_val, nnz_loc_size, MPIU_INT,
1615  &nnzs.at(key).at(0), &side_lrows.at(side).at(0), &side_offset.at(side).at(0),
1616  MPIU_INT, 0, mpi_comm); CHKERRQ(ierr);
1617  }
1618 
1619  PetscBool flg = PETSC_TRUE;
1620  std::vector< PetscInt > lrows_arr(mpi_rank ? 1: mpi_size), rstart_arr(mpi_rank ? 1: mpi_size);
1621  if(!mpi_rank)
1622  {
1623  /* Determine the number of non-zeros in each row of the resulting final matrix */
1624  std::vector< PetscInt > ks_nnz(num_states);
1625  std::tuple< Side_t, Op_t, PetscInt > key_L, key_R;
1626  KronBlocksIterator KIter(*this, 0, num_states);
1627  for( ; KIter.Loop(); ++KIter)
1628  {
1629  const PetscInt lrow = KIter.Steps();
1630  const PetscInt Row_L = KIter.GlobalIdxLeft();
1631  const PetscInt Row_R = KIter.GlobalIdxRight();
1632 
1633  for(const Hamiltonians::Term& term: TermsLR)
1634  {
1635  key_L = std::make_tuple(SideLeft, term.Iop, term.Isite);
1636  key_R = std::make_tuple(SideRight, term.Jop, term.Jsite);
1637  ks_nnz.at(lrow) += nnzs.at(key_L).at(Row_L) * nnzs.at(key_R).at(Row_R);
1638  }
1639  key_L = std::make_tuple(SideLeft, OpSz, -1);
1640  ks_nnz.at(lrow) += nnzs.at(key_L).at(Row_L);
1641  key_R = std::make_tuple(SideRight, OpSz, -1);
1642  ks_nnz.at(lrow) += nnzs.at(key_R).at(Row_R);
1643  }
1644 
1645  PetscInt tot_nnz=0;
1646  for(const PetscInt& nnz: ks_nnz) tot_nnz+=nnz;
1647 
1648  /* Impose an average number of non-zeros that should go into a row */
1649  PetscInt avg_nnz_proc = tot_nnz / mpi_size;
1650  std::vector< PetscInt > nnz_proc(mpi_size);
1651  for(PetscInt irow=0, iproc=0; irow<Nrows; ++irow)
1652  {
1653  if(iproc>=mpi_size) break;
1654  nnz_proc.at(iproc) += ks_nnz.at(irow);
1655  ++lrows_arr.at(iproc);
1656  if( nnz_proc.at(iproc) >= avg_nnz_proc ) ++iproc;
1657  }
1658 
1659  PetscInt tot_lrows = 0;
1660  for(PetscInt p=0; p<mpi_size; ++p)
1661  {
1662  rstart_arr[p] = tot_lrows;
1663  tot_lrows += lrows_arr[p];
1664  }
1665  if(Nrows!=tot_lrows)
1666  {
1667  printf("--------------------------------------------------\n");
1668  printf("[0] Redistribution failed at GlobIdx: %lld\n", LLD(GlobIdx));
1669  printf("[0] >>> tot_lrows: %lld\n", LLD(tot_lrows));
1670  printf("[0] >>> ctx.Nrows: %lld\n", LLD(Nrows));
1671  printf("[0] >>> tot_nnz: %lld\n", LLD(tot_nnz));
1672  printf("[0] >>> avg_nnz_proc: %lld\n", LLD(avg_nnz_proc));
1673  printf("[0] >>> nnz_proc: [ %lld", LLD(nnz_proc[0]));
1674  for(PetscInt p=1; p<mpi_size; ++p) printf(", %lld", LLD(nnz_proc[p]));
1675  printf(" ]\n");
1676 
1677  #if 0
1678  SETERRQ2(PETSC_COMM_SELF,1,"Incorrect total number of rows. "
1679  "Expected %d. Got %d.", Nrows, tot_lrows);
1680  #else
1681  flg = PETSC_FALSE;
1682  #endif
1683  }
1684  }
1685 
1686  ierr = MPI_Bcast(&flg, 1, MPI_INT, 0, PETSC_COMM_WORLD); CHKERRQ(ierr);
1687  if(flg)
1688  {
1689  /* Scatter data on lrows and rstart */
1690  ierr = MPI_Scatter(&lrows_arr.at(0), 1, MPIU_INT, &lrows, 1, MPIU_INT, 0, PETSC_COMM_WORLD); CHKERRQ(ierr);
1691  ierr = MPI_Scatter(&rstart_arr.at(0), 1, MPIU_INT, &rstart, 1, MPIU_INT, 0, PETSC_COMM_WORLD); CHKERRQ(ierr);
1692  }
1693  else
1694  {
1695  ierr = PreSplitOwnership(mpi_comm, Nrows, lrows, rstart); CHKERRQ(ierr);
1696  }
1697 
1698  prev_lrows = lrows;
1699  prev_rstart = rstart;
1700  called_KronSumShellSplitOwnership = PETSC_TRUE;
1701 
1702  FUNCTION_TIMINGS_END();
1703  return (0);
1704 }
1705 
1706 PetscErrorCode KronBlocks_t::KronSumSetUpShellTerms(KronSumShellCtx *shellctx)
1707 {
1708  PetscErrorCode ierr;
1709  FUNCTION_TIMINGS_BEGIN()
1710 
1711  KronSumCtx& ctx = *(shellctx->p_ctx);
1712 
1713  /* Fill in the context and allocate space */
1714  shellctx->one = 1.00;
1715  shellctx->Nterms = ctx.Terms.size();
1716  shellctx->Nrowterms = (shellctx->Nterms) * (ctx.lrows);
1717  ierr = PetscCalloc1(ctx.lrows, &(shellctx->Rows_L)); CHKERRQ(ierr);
1718  ierr = PetscCalloc1(ctx.lrows, &(shellctx->Rows_R)); CHKERRQ(ierr);
1719  ierr = PetscCalloc1(shellctx->Nrowterms, &(shellctx->kstr)); CHKERRQ(ierr);
1720 
1721  /* Fill in the coefficients of the terms */
1722  ierr = PetscCalloc1(shellctx->Nterms, &(shellctx->term_a)); CHKERRQ(ierr);
1723  for(PetscInt it = 0; it < shellctx->Nterms; ++it){
1724  (shellctx->term_a)[it] = ctx.Terms[it].a;
1725  }
1726 
1727  /* Lookup for the forward shift depending on the operator type of the left block. This automatically
1728  assumes that the right block is a valid operator type such that the resulting matrix term is block-diagonal
1729  in quantum numbers */
1730  std::map< Op_t, PetscInt > fws_LOP = {};
1731  std::map< Op_t, PetscInt > Row_NumStates_ROP = {};
1732  PetscInt irt = 0;
1733  ACCUM_TIMINGS_SETUP(KronIterSetup)
1734  ACCUM_TIMINGS_SETUP(MatLoop)
1735  if(ctx.lrows > 0)
1736  {
1737  KronBlocksIterator KIter(*this, ctx.rstart, ctx.rend);
1738  for( ; KIter.Loop(); ++KIter)
1739  {
1740  ACCUM_TIMINGS_BEGIN(KronIterSetup)
1741  const PetscInt lrow = KIter.Steps();
1742  const PetscInt Row_BlockIdx_L = KIter.BlockIdxLeft();
1743  const PetscInt Row_BlockIdx_R = KIter.BlockIdxRight();
1744  const PetscInt Row_L = shellctx->Rows_L[lrow] = KIter.GlobalIdxLeft();
1745  const PetscInt Row_R = shellctx->Rows_R[lrow] = KIter.GlobalIdxRight();
1746  const PetscInt LocRow_L = ctx.MapRowsL.at(Row_L);
1747  const PetscInt LocRow_R = ctx.MapRowsR.at(Row_R);
1748 
1749  PetscBool flg[2];
1750 
1751  if(KIter.UpdatedBlock())
1752  {
1753  /* Searchable by the operator type of the left block */
1754  fws_LOP = {
1755  {OpEye, KIter.BlockStartIdx(OpSz)},
1756  {OpSz , KIter.BlockStartIdx(OpSz)},
1757  {OpSp , Offsets(Row_BlockIdx_L + 1, Row_BlockIdx_R - 1),},
1758  {OpSm , Offsets(Row_BlockIdx_L - 1, Row_BlockIdx_R + 1)}
1759  };
1760  /* Searchable by the operator type on the right block */
1761  Row_NumStates_ROP = {
1762  {OpEye, KIter.NumStatesRight()},
1763  {OpSz, KIter.NumStatesRight()},
1764  {OpSp, RightBlock.Magnetization.Sizes(Row_BlockIdx_R+1)},
1765  {OpSm, RightBlock.Magnetization.Sizes(Row_BlockIdx_R-1)}
1766  };
1767  }
1768  ACCUM_TIMINGS_END(KronIterSetup)
1769 
1770  /* Loop through each term in this row. Treat the identity separately by directly declaring the matrix element */
1771  ACCUM_TIMINGS_BEGIN(MatLoop)
1772  for(const KronSumTerm& term: ctx.Terms)
1773  {
1774  KronSumTermRow& kstr = shellctx->kstr[irt];
1775  PetscInt bks_R = 0;
1776 
1777  if(term.OpTypeA != OpEye){
1778  ierr = (*term.A->ops->getrow)(term.A, LocRow_L,
1779  &(kstr.nz_L), (PetscInt**)&(kstr.idx_L), (PetscScalar**)&(kstr.v_L)); CHKERRQ(ierr);
1780  kstr.bks_L = LeftBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_L, term.OpTypeA, flg[SideLeft]);
1781  } else {
1782  kstr.nz_L = 1;
1783  kstr.idx_L = &(shellctx->Rows_L[lrow]);
1784  kstr.v_L = &(shellctx->one);
1785  kstr.bks_L = LeftBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_L, OpSz, flg[SideLeft]);
1786  }
1787 
1788  if(term.OpTypeB != OpEye){
1789  ierr = (*term.B->ops->getrow)(term.B, LocRow_R,
1790  &(kstr.nz_R), (PetscInt**)&(kstr.idx_R), (PetscScalar**)&(kstr.v_R)); CHKERRQ(ierr);
1791  bks_R = RightBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_R, term.OpTypeB, flg[SideRight]);
1792  } else {
1793  kstr.nz_R = 1;
1794  kstr.idx_R = &(shellctx->Rows_R[lrow]);
1795  kstr.v_R = &(shellctx->one);
1796  bks_R = RightBlock.Magnetization.OpBlockToGlobalRangeStart(Row_BlockIdx_R, OpSz, flg[SideRight]);
1797  }
1798 
1799  if( (!flg[SideLeft]) || (!flg[SideRight]) || (kstr.nz_L*kstr.nz_R == 0)){
1800  kstr.nz_L = 0;
1801  kstr.nz_R = 0;
1802  } else {
1803  kstr.fws_O = fws_LOP.at(term.OpTypeA) - bks_R;
1804  kstr.col_NStatesR = Row_NumStates_ROP.at(term.OpTypeB);
1805  if(kstr.col_NStatesR==-1) SETERRQ(PETSC_COMM_SELF,1,"Accessed incorrect value.");
1806  }
1807 
1808  ++irt;
1809  }
1810  ACCUM_TIMINGS_END(MatLoop)
1811  }
1812 
1813  if(irt!=(shellctx->Nrowterms)) SETERRQ2(PETSC_COMM_SELF,1,
1814  "Wrong index irt. Expected %lld. Got %lld.", LLD(shellctx->Nrowterms), LLD(irt));
1815 
1816  }
1817 
1818  ACCUM_TIMINGS_PRINT(KronIterSetup, " KronIterSetup")
1819  ACCUM_TIMINGS_PRINT(MatLoop, " MatLoop")
1820 
1821  FUNCTION_TIMINGS_END()
1822  FUNCTION_TIMINGS_PRINT_SPACE()
1823  return(0);
1824 }
1825 
1826 
1827 PETSC_EXTERN PetscErrorCode MatMult_KronSumShell(Mat A, Vec x, Vec y)
1828 {
1829  PetscErrorCode ierr;
1830  KronSumShellCtx *shellctx;
1831  ierr = MatShellGetContext(A,(void**)&shellctx); CHKERRQ(ierr);
1832  KronSumCtx& ctx = *(shellctx->p_ctx);
1833  ierr = VecScatterBegin(shellctx->vsctx, x, shellctx->x_seq, INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr);
1834  ierr = VecScatterEnd(shellctx->vsctx, x, shellctx->x_seq, INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr);
1835 
1836  const PetscScalar *xvals;
1837  PetscScalar *yvals;
1838  ierr = VecGetArrayRead(shellctx->x_seq, &xvals); CHKERRQ(ierr);
1839  ierr = VecGetArray(y, &yvals); CHKERRQ(ierr);
1840  ierr = PetscMemzero(yvals, (ctx.lrows)*sizeof(PetscScalar)); CHKERRQ(ierr);
1841 
1842  PetscInt irt=-1,idx;
1843  PetscScalar yval, temp;
1844  for(PetscInt ir = 0; ir < ctx.lrows; ++ir)
1845  {
1846  yval = 0.0;
1847  for(PetscInt it = 0; it < shellctx->Nterms; ++it)
1848  {
1849  ++irt;
1850  const KronSumTermRow& kstr = shellctx->kstr[irt];
1851 
1852  for(PetscInt l=0; l<kstr.nz_L; ++l)
1853  {
1854  /* TODO: fuse fws_O-bks_R */
1855  idx = (kstr.idx_L[l] - kstr.bks_L) * kstr.col_NStatesR + kstr.fws_O;
1856  temp = shellctx->term_a[it] * kstr.v_L[l];
1857  for(PetscInt r=0; r<kstr.nz_R; ++r)
1858  {
1859  yval += temp * kstr.v_R[r] * xvals[idx + kstr.idx_R[r]];
1860  }
1861  }
1862  }
1863  yvals[ir] = yval;
1864  }
1865 
1866  ierr = VecRestoreArrayRead(shellctx->x_seq, &xvals);
1867  ierr = VecRestoreArray(y, &yvals);
1868  return(0);
1869 }
1870 
1871 PetscErrorCode KronBlocks_t::KronSumConstructShell(
1872  const std::vector< Hamiltonians::Term >& TermsLR,
1873  Mat& MatOut
1874  )
1875 {
1876  PetscErrorCode ierr = 0;
1877 
1878  KronSumShellCtx *shellctx;
1879  ierr = PetscNew(&shellctx); CHKERRQ(ierr);
1880 
1881  shellctx->p_ctx = new KronSumCtx();
1882  KronSumCtx& ctx = *(shellctx->p_ctx);
1883 
1884  /* Assumes that output matrix is square */
1885  ctx.Nrows = ctx.Ncols = num_states;
1886 
1887  if(do_redistribute)
1888  {
1889  ierr = KronSumShellSplitOwnership(LeftBlock.H, RightBlock.H, TermsLR, ctx.Nrows, ctx.lrows, ctx.rstart); CHKERRQ(ierr);
1890  }
1891  else
1892  {
1893  /* Split the ownership of rows using the default way in petsc */
1894  ierr = PreSplitOwnership(mpi_comm, ctx.Nrows, ctx.lrows, ctx.rstart); CHKERRQ(ierr);
1895  }
1896  ctx.cstart = ctx.rstart;
1897  ctx.lcols = ctx.lrows;
1898  ctx.rend = ctx.cend = ctx.rstart + ctx.lrows;
1899 
1900  TIMINGS_NEWLINE()
1901  ierr = KronSumGetSubmatrices(LeftBlock.H, RightBlock.H, TermsLR, ctx); CHKERRQ(ierr);
1902  ierr = LeftBlock.EnsureSaved(); CHKERRQ(ierr);
1903  ierr = RightBlock.EnsureSaved(); CHKERRQ(ierr);
1904  ierr = KronSumSetUpShellTerms(shellctx); CHKERRQ(ierr);
1905  /* Create vector scatter object */
1906  Vec x_mpi;
1907  ierr = VecCreateMPI(mpi_comm, ctx.lrows, PETSC_DETERMINE, &x_mpi); CHKERRQ(ierr);
1908  ierr = VecScatterCreateToAll(x_mpi, &shellctx->vsctx, &shellctx->x_seq); CHKERRQ(ierr);
1909  ierr = VecDestroy(&x_mpi); CHKERRQ(ierr);
1910 
1911  /* Create the matrix */
1912  ierr = MatCreateShell(mpi_comm, ctx.lrows, ctx.lcols, ctx.Nrows, ctx.Ncols, (void*)shellctx, &MatOut); CHKERRQ(ierr);
1913 
1914  ierr = MatShellSetOperation(MatOut, MATOP_MULT, (void(*)())MatMult_KronSumShell); CHKERRQ(ierr);
1915 
1916  return(0);
1917 }
1918 
1919 PetscErrorCode MatDestroy_KronSumShell(Mat *p_mat)
1920 {
1921  PetscErrorCode ierr;
1922  KronSumShellCtx *shellctx;
1923  ierr = MatShellGetContext(*p_mat,(void**)&shellctx); CHKERRQ(ierr);
1924  KronSumCtx& ctx = *(shellctx->p_ctx);
1925 
1926  ierr = PetscFree(shellctx->term_a); CHKERRQ(ierr);
1927  ierr = PetscFree(shellctx->kstr); CHKERRQ(ierr);
1928  ierr = PetscFree(shellctx->Rows_L); CHKERRQ(ierr);
1929  ierr = PetscFree(shellctx->Rows_R); CHKERRQ(ierr);
1930  ierr = VecDestroy(&(shellctx->x_seq)); CHKERRQ(ierr);
1931  ierr = VecScatterDestroy(&(shellctx->vsctx)); CHKERRQ(ierr);
1932  /* Destroy local submatrices and temporary matrices */
1933  for(Mat *mat: ctx.LocalSubMats){
1934  ierr = MatDestroySubMatrices(1,&mat); CHKERRQ(ierr);
1935  }
1936 
1937  delete shellctx->p_ctx;
1938  shellctx->p_ctx = NULL;
1939  ierr = PetscFree(shellctx); CHKERRQ(ierr);
1940  shellctx = NULL;
1941  return(0);
1942 }
PetscInt Steps() const
Gets the number of steps incremented from the starting index.
Definition: DMRGKron.hpp:584
Base class for the implementation of a block of spin sites.
Definition: DMRGBlock.hpp:79
Calculates the sum of the Kronecker product of operators on two blocks following the terms of a Hamil...
Definition: DMRGKron.hpp:501
PetscBool do_shell
Whether to create an implicit MATSHELL matrix.
Definition: DMRGKron.hpp:383
PetscInt rstart
Starting index of local rows.
Definition: DMRGKron.hpp:38
PetscInt size() const
Returns the total number blocks.
Definition: DMRGKron.hpp:216
PetscInt OpBlockToGlobalRangeStart(const PetscInt &BlockIdx, const PetscInt &BlockShift, PetscBool &flg) const
Maps the shifted quantum number block index to the global index start in the range [start...
operator
Definition: DMRGBlock.hpp:25
A single term of the KronSum.
Definition: DMRGKron.hpp:28
Context structure defining a single row of KronSumShell.
Definition: DMRGKron.hpp:85
std::vector< KronSumTerm > Terms
Lists down all the terms of the KronSum with Mat entries filled with local submatrices.
Definition: DMRGKron.hpp:57
PetscErrorCode EnsureSaved()
Ensures that the block matrices have been saved if the block is initialized, otherwise does nothing...
Definition: DMRGBlock.cpp:1090
left block
Definition: DMRGBlock.hpp:46
PetscErrorCode KronEye_Explicit(Block::SpinBase &LeftBlock, Block::SpinBase &RightBlock, const std::vector< Hamiltonians::Term > &Terms, Block::SpinBase &BlockOut)
Calculates a new block combining two spin-1/2 blocks.
Definition: DMRGKron.cpp:459
FILE * fp_prealloc
File to store preallocation data for each processor.
Definition: DMRGKron.hpp:374
Op_t
Identifies the three possible spin operators and also represents the shift associated to its action o...
Definition: DMRGBlock.hpp:21
PetscErrorCode CheckOperatorBlocks() const
Checks whether all matrix blocks follow the correct sector indices using MatCheckOperatorBlocks() ...
Definition: DMRGBlock.cpp:601
PetscErrorCode VerifySzAssumption(const std::vector< Mat > &Matrices, const Side_t &SideType)
Verifies that the assumtpion of intra-block terms to follow Sz form is valid.
Definition: DMRGKron.cpp:618
PetscBool UpdatedBlock() const
Gets the update state of the block index from the previous increment.
Definition: DMRGKron.hpp:623
PetscBool do_saveprealloc
Whether to store preallocation data for each processor.
Definition: DMRGKron.hpp:377
PetscInt * Onnz
Preallocation data of the output matrix for local off-diagonal diagonal rows.
Definition: DMRGKron.hpp:66
std::unordered_map< PetscInt, PetscInt > MapRowsL
Maps the global indices of the rows of L and R to their local indices in the corresponding submatrice...
Definition: DMRGKron.hpp:54
Mat H
Matrix representation of the Hamiltonian operator.
Definition: DMRGBlock.hpp:350
PetscErrorCode KronConstruct(const Mat &Mat_L, const Op_t &OpType_L, const Mat &Mat_R, const Op_t &OpType_R, Mat &MatOut)
Constructs the explicit sum of Kronecker products of two matrices provided that they follow a fixed K...
Definition: DMRGKron.cpp:635
PetscInt cstart
Starting index of local columns.
Definition: DMRGKron.hpp:41
std::vector< PetscReal > List() const
Returns the list of quantum numbers.
PetscInt rend
Index after last of local rows, exclusive.
Definition: DMRGKron.hpp:39
std::vector< PetscInt > Maxnnz
Predicted maximum number of elements on each local row.
Definition: DMRGKron.hpp:75
PetscInt cend
Index after last of local columns, exclusive.
Definition: DMRGKron.hpp:42
std::vector< Mat * > LocalSubMats
List of unique submatrices to be destroyed later.
Definition: DMRGKron.hpp:60
PetscErrorCode MatOpGetNNZs(const Op_t &OpType, const PetscInt &isite, std::vector< PetscInt > &nnzs) const
Returns the number of non-zeros in each row for an operator acting on a given site.
Definition: DMRGBlock.cpp:450
PetscErrorCode DestroySm()
Destroys the Sm matrices on the fly.
Definition: DMRGBlock.cpp:639
A single interaction term on a one-dimensional lattice.
PetscInt BlockIdxLeft() const
Gets the block index for the left side.
Definition: DMRGKron.hpp:600
PetscInt Nrows
Total number of rows.
Definition: DMRGKron.hpp:44
PetscErrorCode CheckOperators() const
Checks whether all operators have been initialized and have correct dimensions.
Definition: DMRGBlock.cpp:413
PetscErrorCode CheckSectors() const
Checks whether sector indexing was done properly.
Definition: DMRGBlock.cpp:429
std::vector< PetscInt > ReqRowsL
List of required rows of the left and right blocks.
Definition: DMRGKron.hpp:51
operator
Definition: DMRGBlock.hpp:23
std::vector< PetscInt > Sizes() const
Returns the number of basis states in each quantum number block.
std::vector< PetscInt > Offsets() const
Returns the offsets for each quantum number block.
PetscInt * Dnnz
Preallocation data of the output matrix for local diagonal rows.
Definition: DMRGKron.hpp:63
PetscInt MaxIdx
Largest non-zero index in the current set of local rows.
Definition: DMRGKron.hpp:72
PetscInt prev_rstart
Stores the resulting rstart of a previous call to KronSumShellSplitOwnership.
Definition: DMRGKron.hpp:392
PetscBool do_redistribute
Whether to redistribute the resulting KronSum.
Definition: DMRGKron.hpp:380
PetscInt lrows
Number of local rows.
Definition: DMRGKron.hpp:40
PetscInt BlockStartIdx(const PetscInt &BlockShift) const
Gets the column index of the starting block with a shift.
Definition: DMRGKron.hpp:556
PetscErrorCode KronSumConstruct(const std::vector< Hamiltonians::Term > &Terms, Mat &MatOut)
Constructs the explicit sum of Kronecker products of matrices from the blocks.
Definition: DMRGKron.cpp:759
KronSumTermRow * kstr
Contains the KronSumTermRow&#39;s needed for calculating the terms of the full matrix.
Definition: DMRGKron.hpp:99
Mat Sz(const PetscInt &Isite) const
Returns the matrix pointer to the operator at site Isite.
Definition: DMRGBlock.hpp:353
PetscBool Initialized() const
Indicates whether block has been initialized before using it.
Definition: DMRGBlock.hpp:335
const std::vector< KronBlock_t > & data() const
Returns a const reference to the KronBlocks object.
Definition: DMRGKron.hpp:219
PetscInt NReqRowsL
Number of required rows of the left and right blocks.
Definition: DMRGKron.hpp:48
PetscErrorCode CreateSm()
Creates the Sm matrices on the fly.
Definition: DMRGBlock.cpp:623
PetscInt Ncols
Total number of columns.
Definition: DMRGKron.hpp:45
Identity operator.
Definition: DMRGBlock.hpp:26
PetscInt MinIdx
Smallest non-zero index in the current set of local rows.
Definition: DMRGKron.hpp:69
PetscInt NumSectors() const
Returns the number of quantum number sectors.
std::vector< PetscInt > Offsets() const
Returns the offsets for each quantum number block.
Definition: DMRGKron.hpp:231
std::tuple< PetscReal, PetscInt, PetscInt, PetscInt > KronBlock_t
Storage for information on resulting blocks of quantum numbers stored as a tuple for quick sorting...
Definition: DMRGKron.hpp:22
PetscInt num_states
The total number of states stored.
Definition: DMRGKron.hpp:365
PetscInt lcols
Number of local columns.
Definition: DMRGKron.hpp:43
operator
Definition: DMRGBlock.hpp:24
KronSumCtx * p_ctx
Contains the usual ctx object for explicit matrices.
Definition: DMRGKron.hpp:96
right block
Definition: DMRGBlock.hpp:47
PetscErrorCode Initialize(const MPI_Comm &comm_in)
Initializes block object&#39;s MPI attributes.
Definition: DMRGBlock.cpp:31
Context for the shell matrix object.
Definition: DMRGKron.hpp:92
A container of ordered KronBlock_t objects representing a Kronecker product structure.
Definition: DMRGKron.hpp:117
QuantumNumbers Magnetization
Stores the information on the magnetization Sectors.
Definition: DMRGBlock.hpp:326
PetscErrorCode MatGetNNZs(const Mat &matin, std::vector< PetscInt > &nnzs) const
Returns the number of non-zeros in each row for a given matrix.
Definition: DMRGBlock.cpp:477
PetscBool called_KronSumShellSplitOwnership
Whether KronSumShellSplitOwnership has been called before.
Definition: DMRGKron.hpp:386
PetscInt NumSites() const
Gets the number of sites that are currently initialized.
Definition: DMRGBlock.hpp:344
PetscErrorCode MatCheckOperatorBlocks(const Op_t &op_t, const Mat &matin) const
Checks the block indexing in the matrix operator op_t on specified matrix matin.
Definition: DMRGBlock.cpp:520
Mat Sp(const PetscInt &Isite) const
Returns the matrix pointer to the operator at site Isite.
Definition: DMRGBlock.hpp:359
MPI_Comm MPIComm() const
Gets the communicator associated to the block.
Definition: DMRGBlock.hpp:341
PetscInt NumStates() const
Returns the total number of states.
Definition: DMRGKron.hpp:297
bool Loop() const
Determines whether the end of the range has not yet been reached.
Definition: DMRGKron.hpp:581
Defines a single operator to be used for performing measurements.
PetscInt BlockIdxRight() const
Gets the block index for the right side.
Definition: DMRGKron.hpp:609
PetscInt prev_lrows
Stores the resulting lrows of a previous call to KronSumShellSplitOwnership.
Definition: DMRGKron.hpp:389
PetscInt NumStates() const
Gets the number of states that are currently used.
Definition: DMRGBlock.hpp:347
PetscInt NumStates() const
Returns the total number of states.
Context struct for the KronSumShell matrix.
Definition: DMRGKron.hpp:37
PetscInt NumStatesRight() const
Gets the number of states for the right block.
Definition: DMRGKron.hpp:606
Side_t
Identifies the sides of the DMRG block.
Definition: DMRGBlock.hpp:44
This site was generated by Sphinx using Doxygen with a customized theme from doxygen-bootstrapped.