48#if !defined(SPM_WITH_MPI)
49#error "This file should not be compiled if MPI support is not enabled (SPM_WITH_MPI)"
88 for ( c=spm->
clustnbr-1; c>0; c-- ) {
89 allcounts[ 3 * c ] = allcounts[c];
138 assert( newspm->
gN > 0 );
150 assert( root != -1 );
153 root, newspm->
comm );
155 assert( newspm->
n == counters[0] );
156 newspm->
nnz = counters[1];
157 newspm->
nnzexp = counters[2];
162 dofs = oldspm->
dofs - baseval;
166 for ( jg=baseval; jg<oldspm->
n+baseval; jg++, oldcol++ )
178 if ( newspm->
dof > 0 ) {
182 dofj = dofs[ jg+1 ] - dofs[ jg ];
187 for( kg = oldcol[0]; kg<oldcol[1]; kg++, oldrow++ )
193 if ( newspm->
dof <= 0 ) {
194 nnzexp += dofs[ ig+1 ] - dofs[ ig ];
197 nnzexp += newspm->
dof;
202 allcounts[ c * 3 + 1 ] += nnz;
203 allcounts[ c * 3 + 2 ] += nnzexp * dofj;
212 root, newspm->
comm );
216 newspm->
nnz = allcounts[ newspm->
clustnum * 3 + 1 ];
271 assert( root != -1 );
274 root, newspm->
comm );
276 assert( newspm->
n == counters[0] );
277 newspm->
nnz = counters[1];
278 newspm->
nnzexp = counters[2];
283 dofs = oldspm->
dofs - baseval;
284 oldcol = distByColumn ? oldspm->
colptr : oldspm->
rowptr;
285 oldrow = distByColumn ? oldspm->
rowptr : oldspm->
colptr;
287 dof2 = newspm->
dof * newspm->
dof;
289 for ( kg=0; kg<oldspm->
nnz; kg++, oldcol++, oldrow++ )
302 if ( newspm->
dof > 0 ) {
306 dofi = dofs[ ig+1 ] - dofs[ ig ];
307 dofj = dofs[ jg+1 ] - dofs[ jg ];
312 allcounts[ c * 3 + 1 ]++;
313 allcounts[ c * 3 + 2 ] += nnz;
322 root, newspm->
comm );
326 newspm->
nnz = allcounts[ newspm->
clustnum * 3 + 1 ];
387 int alloc = (root == -1) || (root == clustnum);
389 MPI_Comm_size( comm, &clustnbr );
399 if ( root == clustnum ) {
403 MPI_Bcast( newspm,
sizeof(
spmatrix_t), MPI_BYTE, root, comm );
421 if( loc2glob != NULL ) {
457 if ( newspm->
dof < 1 ) {
495 const char *oldval = oldspm->
values;
498 char *newval = newspm->
values;
507 spm_int_t i, il, ig, jl, jg, nnz, nnzexp;
515 for ( il=0; il<newspm->
n; il++, loc2glob++, newcol++)
518 *newcol = jl + baseval;
520 ig = *loc2glob - baseval;
521 jg = oldcol[ ig ] - baseval;
522 nnz = oldcol[ ig+1 ] - oldcol[ ig ];
525 dofj = (dof > 0) ? dof : dofs[ig+1] - dofs[ig];
527 for ( i=0; i < nnz; i++ )
529 row = oldrow[jg + i] - baseval;
530 dofi += (dof > 0) ? dof : dofs[row + 1] - dofs[row];
534 memcpy( newrow, oldrow + jg, nnz *
sizeof(
spm_int_t) );
540 nnzexp = dofi * dofj;
542 memcpy( newval, oldval + vg * typesize, nnzexp * typesize );
546 newval += nnzexp * typesize;
548 *newcol = jl + baseval;
550 assert( jl == newspm->
nnz );
551 assert( vl == (
size_t)(newspm->
nnzexp) );
583 const char *oldval = oldspm->
values;
586 char *newval = newspm->
values;
595 for( c=0; c<newspm->
clustnum; c++ ) {
596 ig += allcounts[3 * c];
597 jg += allcounts[3 * c + 1];
598 vg += allcounts[3 * c + 2];
602 assert( jg == (oldcol[ig] - newspm->
baseval) );
605 memcpy( newcol, oldcol + ig, (newspm->
n + 1) *
sizeof(
spm_int_t) );
606 for( c=0; c<=newspm->
n; c++ ) {
611 memcpy( newrow, oldrow + jg, newspm->
nnz *
sizeof(
spm_int_t) );
615 memcpy( newval, oldval + vg * typesize, newspm->
nnzexp * typesize );
653 MPI_Datatype valtype = spm_get_datatype( oldspm );
665 for( dst=0; dst<newspm->
clustnbr; dst++, counts+=3 ) {
676 maxnnzexp =
spm_imax( maxnnzexp, nnzexp );
682 memcpy( &dstspm, newspm,
sizeof(
spmatrix_t) );
683 newcol = malloc( (maxn+1) *
sizeof(
spm_int_t) );
684 newrow = malloc( maxnnz *
sizeof(
spm_int_t) );
685 loc2glob = malloc( maxn *
sizeof(
spm_int_t) );
687 newval = malloc( maxnnzexp * typesize );
702 for( dst=0; dst<newspm->
clustnbr; dst++, counts+=3 ) {
720 if ( dstspm.
n == 0 ) {
738 MPI_Send( newval, nnzexp, valtype, dst, 2, newspm->
comm );
776static inline MPI_Request *
782 MPI_Request *allreqs = malloc( (newspm->
clustnbr-1) * 3 *
sizeof(MPI_Request) );
783 MPI_Request *requests;
786 const char *oldval = oldspm->
values;
787 MPI_Datatype valtype = spm_get_datatype( oldspm );
797 for( dst=0; dst<newspm->
clustnbr; dst++, allcounts+=3 ) {
800 nnzexp = allcounts[2];
807 requests[0] = MPI_REQUEST_NULL;
808 requests[1] = MPI_REQUEST_NULL;
809 requests[2] = MPI_REQUEST_NULL;
815 MPI_Isend( oldcol + ig, n+1,
SPM_MPI_INT, dst, 0, newspm->
comm, requests );
818 MPI_Isend( oldrow + jg, nnz,
SPM_MPI_INT, dst, 1, newspm->
comm, requests + 1 );
823 MPI_Isend( oldval + vg * typesize, nnzexp, valtype, dst, 2, newspm->
comm, requests + 2 );
826 requests[2] = MPI_REQUEST_NULL;
876 MPI_Request *allreqs;
877 MPI_Status *allstatus = malloc( (newspm->
clustnbr-1) * 3 *
sizeof(MPI_Status) );
885 MPI_Waitall( (newspm->
clustnbr-1) * 3, allreqs, allstatus );
926 MPI_Request allrequests[3] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL };
927 MPI_Status allstatuses[3];
930 char *newval = newspm->
values;
931 MPI_Datatype valtype = spm_get_datatype( newspm );
933 if ( newspm->
n == 0 ) {
942 MPI_Irecv( newcol, newspm->
n+1,
SPM_MPI_INT, root, 0, newspm->
comm, allrequests );
945 MPI_Irecv( newrow, newspm->
nnz,
SPM_MPI_INT, root, 1, newspm->
comm, allrequests + 1 );
949 MPI_Irecv( newval, newspm->
nnzexp, valtype, root, 2, newspm->
comm, allrequests + 2 );
952 MPI_Waitall( 3, allrequests, allstatuses );
955 if ( continuous && (newspm->
n > 0) ) {
959 for( i=0; i<=newspm->
n; i++, newcol++ ) {
1001 if ( newspm->
n == 0 ) {
1049 const char *oldval = oldspm->
values;
1052 char *newval = newspm->
values;
1064 glob2loc -= baseval;
1067 dof2 = newspm->
dof * newspm->
dof;
1070 for ( kg=0; kg<oldspm->
nnz; kg++, oldcol++, oldrow++ )
1075 if ( newspm->
dof > 0 ) {
1079 dofi = dofs[ ig+1 ] - dofs[ ig ];
1080 dofj = dofs[ jg+1 ] - dofs[ jg ];
1084 if ( glob2loc[ jg ] < 0 ) {
1085 oldval += typesize * nnz;
1099 memcpy( newval, oldval, nnz * typesize );
1100 newval += nnz * typesize;
1101 oldval += nnz * typesize;
1106 assert( kl == newspm->
nnz );
1107 assert( vl == newspm->
nnzexp );
1139 const char *oldval = oldspm->
values;
1142 char *newval = newspm->
values;
1153 glob2loc -= baseval;
1156 dof2 = newspm->
dof * newspm->
dof;
1159 for ( kg=0; kg<oldspm->
nnz; kg++, oldcol++, oldrow++ )
1164 if ( newspm->
dof > 0 ) {
1168 dofi = dofs[ ig+1 ] - dofs[ ig ];
1169 dofj = dofs[ jg+1 ] - dofs[ jg ];
1173 if ( glob2loc[ jg ] != (-newspm->
clustnum-1) ) {
1174 oldval += typesize * nnz;
1188 memcpy( newval, oldval, nnz * typesize );
1189 newval += nnz * typesize;
1190 oldval += nnz * typesize;
1195 assert( kl == newspm->
nnz );
1196 assert( vl == newspm->
nnzexp );
1239 char *newval = NULL;
1240 MPI_Datatype valtype = spm_get_datatype( oldspm );
1250 for( dst=0; dst<newspm->
clustnbr; dst++, counts+=3 ) {
1254 if ( dst == root ) {
1259 maxnnzexp =
spm_imax( maxnnzexp, nnzexp );
1265 memcpy( &dstspm, newspm,
sizeof(
spmatrix_t) );
1266 newcol = malloc( maxnnz *
sizeof(
spm_int_t) );
1267 newrow = malloc( maxnnz *
sizeof(
spm_int_t) );
1269 newval = malloc( maxnnzexp * typesize );
1278 for( dst=0; dst<newspm->
clustnbr; dst++, counts+=3 ) {
1283 if ( dst == root ) {
1307 MPI_Send( newval, nnzexp, valtype, dst, 2, newspm->
comm );
1339 MPI_Request allrequests[3] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL };
1340 MPI_Status allstatuses[3];
1343 char *newval = newspm->
values;
1344 MPI_Datatype valtype = spm_get_datatype( newspm );
1350 MPI_Irecv( newrow, newspm->
nnz,
SPM_MPI_INT, root, 1, newspm->
comm, allrequests + 1 );
1354 MPI_Irecv( newval, newspm->
nnzexp, valtype, root, 2, newspm->
comm, allrequests + 2 );
1357 MPI_Waitall( 3, allrequests, allstatuses );
1401 distByColumn, root );
1467 int clustnum, clustnbr;
1470 MPI_Comm_rank( comm, &clustnum );
1471 local = ( ( root == -1 ) || (root == clustnum) );
1481 if ( oldspm == NULL ) {
1482 spm_print_warning(
"[%02d] spmScatter: Missing input matrix\n", clustnum );
1489 spm_print_warning(
"[%02d] spmScatter: The spm is already distributed\n", clustnum );
1494 if ( loc2glob && (gN != oldspm->
gN) ) {
1495 spm_print_warning(
"[%02d] spmScatter: Incorrect n sum (%ld != %ld)\n",
1496 clustnum, (
long)(oldspm->
gN), (
long)gN );
1504 spm_print_warning(
"[%02d] spmScatter: Does not support to scatter along the non compressed array in CSC/CSR formats\n",
1510 MPI_Allreduce( MPI_IN_PLACE, &rc, 1, MPI_INT,
1521 MPI_Allreduce( MPI_IN_PLACE, &rc, 1, MPI_INT,
1529 MPI_Comm_size( comm, &clustnbr );
1530 if ( clustnbr == 1 ) {
1532 newspm->
comm = comm;
1540 n, loc2glob, distByColumn,
1542 root, clustnum, comm );
1549 allcounts, (loc2glob == NULL), root );
1553 allcounts, distByColumn, root );
1556 fprintf( stderr,
"spmScatter (Unexpected error)\n" );
1566 assert( (allcounts == NULL) || (allcounts[ newspm->
clustnum * 3 + 0 ] == newspm->
n ) );
1567 assert( (allcounts == NULL) || (allcounts[ newspm->
clustnum * 3 + 1 ] == newspm->
nnz ) );
1568 assert( (allcounts == NULL) || (allcounts[ newspm->
clustnum * 3 + 2 ] == newspm->
nnzexp) );
1570 if ( allcounts != NULL ) {
static size_t spm_size_of(spm_coeftype_t type)
Double datatype that is not converted through precision generator functions.
static spm_int_t spm_imax(spm_int_t a, spm_int_t b)
Internal function to compute max(a,b)
spm_int_t * spm_get_value_idx_by_elt(const spmatrix_t *spm)
Create an array that represents the shift for each sub-element of the original multidof value array.
void spmAlloc(spmatrix_t *spm)
Allocate the arrays of an spm structure.
#define SPM_MPI_INT
The MPI type associated to spm_int_t.
void spmUpdateComputedFields(spmatrix_t *spm)
Update all the computed fields based on the static values stored.
int spmScatter(spmatrix_t *spm_scattered, int root, const spmatrix_t *opt_spm_gathered, spm_int_t opt_n, const spm_int_t *opt_loc2glob, int opt_distByColumn, SPM_Comm opt_comm)
Scatter the SPM thanks to loc2glob.
int spm_int_t
The main integer datatype used in spm arrays.
void spmCopy(const spmatrix_t *spm_in, spmatrix_t *spm_out)
Create a copy of the spm.
void spmInit(spmatrix_t *spm)
Init the spm structure.
The sparse matrix data structure.
spm_int_t * spm_getandset_glob2loc(spmatrix_t *spm)
Computes the glob2loc array if needed, and returns it.
spm_int_t spm_create_loc2glob_continuous(const spmatrix_t *spm, spm_int_t **l2g_ptr)
Generate a continuous loc2glob array on each node.
static void spm_scatter_ijv_remote(const spmatrix_t *oldspm, spmatrix_t *newspm, int distByColumn)
Initialize a temporary remote spm in IJV format to send it.
static void spm_scatter_csx_send_generic(const spmatrix_t *oldspm, const spmatrix_t *newspm, const spm_int_t *allcounts, int root)
Send function to scatter an SPM in CSC or CSR format from a single node when the loc2glob array is ge...
static void spm_scatter_ijv(const spmatrix_t *oldspm, spmatrix_t *newspm, const spm_int_t *allcounts, int distByColumn, int root)
Scatter the SPM in the IJV format.
static void spm_scatter_csx_local_generic(const spmatrix_t *oldspm, spmatrix_t *newspm)
Local copy of a scattered SPM in CSC or CSR format when everyone holds the original (Generic loc2glob...
static MPI_Request * spm_scatter_csx_send_continuous(const spmatrix_t *oldspm, const spmatrix_t *newspm, const spm_int_t *allcounts, int root)
Send function to scatter an SPM in CSC or CSR format from a single node when the loc2glob array is sp...
static void spm_scatter_csx(const spmatrix_t *oldspm, spmatrix_t *newspm, const spm_int_t *allcounts, int continuous, int root)
Scatter the SPM in the CSC/CSR formats.
static void spm_scatter_ijv_get_locals(const spmatrix_t *oldspm, spmatrix_t *newspm, int distByColumn, spm_int_t *allcounts, int root)
Compute the allcounts array with IJV format.
static void spm_scatter_init(spmatrix_t *newspm, const spmatrix_t *oldspm, int n, const spm_int_t *loc2glob, int distByColumn, spm_int_t **allcounts, int root, int clustnum, SPM_Comm comm)
Generic function to initialize a scattered spm on each node.
static void spm_scatter_csx_local_continuous(const spmatrix_t *oldspm, spmatrix_t *newspm, const spm_int_t *allcounts)
Local copy of a scattered SPM in CSC or CSR format when everyone holds the original (Contiuous loc2gl...
static void spm_scatter_getn(const spmatrix_t *spm, spm_int_t *allcounts, int root)
Gather the n values from all nodes.
static void spm_scatter_csx_recv(const spmatrix_t *newspm, int continuous, int root)
Reception of a scattered SPM in the CSC/CSR formats.
static void spm_scatter_csx_send(const spmatrix_t *oldspm, spmatrix_t *newspm, const spm_int_t *allcounts, int continuous, int root)
Send wrapper function to scatter an SPM in CSC or CSR format from a single node.
static void spm_scatter_ijv_local(const spmatrix_t *oldspm, spmatrix_t *newspm, int distByColumn)
Initialize a local spm in IJV format.
static void spm_scatter_ijv_send(const spmatrix_t *oldspm, spmatrix_t *newspm, const spm_int_t *allcounts, int distByColumn, int root)
Send function to scatter an IJV SPM from a single node.
static void spm_scatter_csx_get_locals(const spmatrix_t *oldspm, spmatrix_t *newspm, spm_int_t *allcounts, int root)
Compute the allcounts array with CSC/CSR formats.
static void spm_scatter_ijv_recv(const spmatrix_t *newspm, int root)
Reception of a scattered SPM in the IJV format.