30struct __spm_smatvec_s;
66#if defined(PRECISION_c) || defined(PRECISION_z)
82__fct_conj(
float val )
92struct __spm_smatvec_s {
175 for(jj=0; jj<dofj; jj++)
177 for(ii=0; ii<dofi; ii++, values++)
179 y[ row + (ii * incy) ] += alpha * conjA_fct( *values ) * x[ col +(jj * incx) ];
245 for(jj=0; jj<dofj; jj++)
247 for(ii=0; ii<dofi; ii++, values++)
249 y[ row + (ii * incy) ] += alpha * conjA_fct( *values ) * x[ col +(jj * incx) ];
250 y[ col + (jj * incy) ] += alpha * conjAt_fct( *values ) * x[ row +(ii * incx) ];
302__spm_smatvec_dof_loop_sy_csr(
spm_int_t row,
315 __spm_smatvec_dof_loop_sy( row, dofi, col, dofj, y, incy, x, incx, values, conjAt_fct, conjA_fct, alpha );
340 float alpha = args->alpha;
343 const float *values = args->values;
344 const spm_int_t *loc2glob = args->loc2glob;
347 const float *x = args->x;
361 = ( args->follow_x ) ? __spm_smatvec_dof_loop_sy_csr : __spm_smatvec_dof_loop_sy;
363 for( j=0; j<n; j++, colptr++ )
365 jg = (loc2glob == NULL) ? j : loc2glob[j] - baseval ;
366 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
367 col = ( dof > 0 ) ? dof * jg : dofs[jg] - baseval;
368 for( i=colptr[0]; i<colptr[1]; i++, rowptr++ )
370 ig = *rowptr - baseval;
371 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
372 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
374 dof_loop_sy( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, conjAt_fct, alpha );
377 __spm_smatvec_dof_loop( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, alpha );
407 float alpha = args->alpha;
410 const float *values = args->values;
411 const spm_int_t *loc2glob = args->loc2glob;
414 const float *x = args->x;
422 if ( args->follow_x ) {
423 for( j = 0; j < n; j++, colptr++ )
425 jg = (loc2glob == NULL) ? j : loc2glob[j] - baseval;
426 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
427 for( i=colptr[0]; i<colptr[1]; i++, rowptr++ )
429 ig = *rowptr - baseval;
430 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
431 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
432 __spm_smatvec_dof_loop( row, dofi, 0, dofj, y, incy, x, 1, values, conjA_fct, alpha );
433 values += dofi * dofj;
439 for( j=0; j<n; j++, colptr++ )
441 jg = (loc2glob == NULL) ? j : loc2glob[j] - baseval;
442 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
443 for( i=colptr[0]; i<colptr[1]; i++, rowptr++ )
445 ig = *rowptr - baseval;
446 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
447 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
448 __spm_smatvec_dof_loop( 0, dofj, row, dofi, y, 1, x, incx, values, conjA_fct, alpha );
449 values += dofi * dofj;
479 float alpha = args->alpha;
482 const float *values = args->values;
485 const float *x = args->x;
494 for( i=0; i<nnz; i++, colptr++, rowptr++ )
496 ig = *rowptr - baseval;
497 jg = *colptr - baseval;
499 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
500 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
502 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
503 col = ( dof > 0 ) ? dof * jg : dofs[jg] - baseval;
506 __spm_smatvec_dof_loop_sy( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, conjAt_fct, alpha );
509 __spm_smatvec_dof_loop( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, alpha );
541__spm_smatvec_dofs_local(
const spm_int_t *dofs,
548 result = calloc( gN ,
sizeof(
spm_int_t) );
550 for ( i = 0; i < gN; i++, glob2loc++, resptr++, dofs++ )
552 if( *glob2loc >= 0 ) {
554 acc += dofs[1] - dofs[0];
582 float alpha = args->alpha;
585 const float *values = args->values;
586 const spm_int_t *glob2loc = args->glob2loc;
589 const float *x = args->x;
599 assert( ((dof > 0) && (dofs == NULL)) ||
600 ((dof <= 0) && (dofs != NULL)) );
602 if( (dofs != NULL) && (glob2loc != NULL) ) {
603 dof_local = __spm_smatvec_dofs_local( dofs, glob2loc, args->gN );
604 assert( dof_local != NULL );
607 if( args->follow_x ) {
608 for( i=0; i<nnz; i++, colptr++, rowptr++ )
610 ig = *rowptr - baseval;
611 jg = *colptr - baseval;
613 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
614 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
616 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
617 if ( glob2loc == NULL ) {
618 col = ( dof > 0 ) ? dof * jg : dofs[jg] - baseval;
621 assert( glob2loc[jg] >= 0 );
622 col = ( dof > 0 ) ? dof * glob2loc[jg] : dof_local[jg];
624 __spm_smatvec_dof_loop( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, alpha );
629 for( i=0; i<nnz; i++, colptr++, rowptr++ )
631 ig = *rowptr - baseval;
632 jg = *colptr - baseval;
634 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
635 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
637 col = ( dof > 0 ) ? dof * jg : dofs[jg] - baseval;
638 if ( glob2loc == NULL ) {
639 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
642 assert( glob2loc[ig] >= 0 );
643 row = ( dof > 0 ) ? dof * glob2loc[ig] : dof_local[ig];
645 __spm_smatvec_dof_loop( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, alpha );
650 if(dof_local != NULL) {
657#if !defined(LAPACKE_WITH_LASCL)
683__spm_slascl(
float alpha,
691 for( j=0; j<n; j++ ) {
692 for( i=0; i<m; i++, A++ ) {
737#define LAPACKE_slascl_work( _dir_, _uplo_, _kl_, _ku_, _cfrom_, _cto_, _m_, _n_, _A_, _lda_ ) \
738 __spm_slascl( (_cto_), (_m_), (_n_), (_A_), (_lda_) )
815 args->glob2loc = NULL;
817 args->dofs = A->
dofs;
825#if defined(PRECISION_c) || defined(PRECISION_z)
828 args->conjA_fct = __fct_conj;
829 args->conjAt_fct = __fct_conj;
834 args->conjA_fct = __fct_conj;
839 args->conjAt_fct = __fct_conj;
844 args->loop_fct = NULL;
860 args->loop_fct = (A->
mtxtype ==
SpmGeneral) ? __spm_smatvec_ge_csx : __spm_smatvec_sy_csx;
882 args->loop_fct = (A->
mtxtype ==
SpmGeneral) ? __spm_smatvec_ge_csx : __spm_smatvec_sy_csx;
891 args->conjA_fct = args->conjAt_fct;
892 args->conjAt_fct = tmp_fct;
910 args->loop_fct = (A->
mtxtype ==
SpmGeneral) ? __spm_smatvec_ge_ijv : __spm_smatvec_sy_ijv;
960 C = calloc(spm->
gNexp * nrhs,
sizeof(
float));
964 for ( j=0; j<nrhs; j++ )
966 Cptr = Cloc + j * ldcl;
968 for ( i=0; i<spm->
n; i++, loc2glob++ )
970 ig = *loc2glob - baseval;
971 dof = (spm->
dof > 0) ? spm->
dof : spm->
dofs[ig+1] - spm->
dofs[ig];
972 idx = (spm->
dof > 0) ? spm->
dof * ig : spm->
dofs[ig] - baseval;
973 memcpy( (C + j * ldc + idx),
975 dof *
sizeof(
float) );
1020 *Bglb = malloc( *ldbg * nrhs *
sizeof(
float) );
1130 fprintf(stderr,
"transB != SpmNoTrans not supported yet in spmv computations\n");
1145 rc = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', M, N, 0., 0., C, ldc );
1154 if ( alpha == 0. ) {
1182 __spm_smatvec_args_init( &args, side, transA, distribution,
1183 alpha, A, Btmp, ldbtmp, Ctmp, ldctmp );
1186 args.x = Btmp + r * ldbtmp;
1187 args.y = Ctmp + r * ldctmp;
1188 rc = args.loop_fct( &args );
1261 memset( y, 0, A->
nexp *
sizeof(
float) );
1264 cblas_sscal( A->
nexp, (beta), y, incy );
1267 if ( alpha == 0. ) {
1271 assert( (incx == 1) && (incy == 1) );
1274 ldx = A->
nexp * incx;
1275 ldy = A->
nexp * incy;
1298 __spm_smatvec_args_init( &args,
SpmLeft, trans, distribution,
1299 alpha, A, xtmp, ldxtmp, ytmp, ldytmp );
1300 rc = args.loop_fct( &args );
#define SpmDistByColumn
Distribution of the matrix storage.
enum spm_trans_e spm_trans_t
Transpostion.
enum spm_side_e spm_side_t
Side of the operation.
int spm_sspmv(spm_trans_t trans, float alpha, const spmatrix_t *A, const float *x, spm_int_t incx, float beta, float *y, spm_int_t incy)
compute the matrix-vector product:
int spm_sspmm(spm_side_t side, spm_trans_t transA, spm_trans_t transB, spm_int_t K, float alpha, const spmatrix_t *A, const float *B, spm_int_t ldb, float beta, float *C, spm_int_t ldc)
Compute a matrix-matrix product.
void s_spmGatherRHS(int nrhs, const spmatrix_t *spm, const float *x, spm_int_t ldx, int root, float *gx, spm_int_t ldgx)
Gather all the global C coefficients and store the good ones in local.
void s_spmReduceRHS(int nrhs, const spmatrix_t *spm, float *bglob, spm_int_t ldbg, float *bloc, spm_int_t ldbl)
Reduce all the global coefficients of a rhs and store the local ones.
int spm_int_t
The main integer datatype used in spm arrays.
The sparse matrix data structure.
static void s_spmm_build_Ctmp(int nrhs, const spmatrix_t *spm, const float *Cloc, spm_int_t ldcl, float **Cglb, spm_int_t *ldcg)
Build a global C RHS, set to 0 for remote datas.
#define LAPACKE_slascl_work(_dir_, _uplo_, _kl_, _ku_, _cfrom_, _cto_, _m_, _n_, _A_, _lda_)
Alias if Lapacke zlscl is not available.
float(* __conj_fct_t)(float)
Typedef to define the op function applied to the element (id or )
static float __fct_id(float val)
Identity function.
static void s_spmm_build_Btmp(int nrhs, const spmatrix_t *spm, const float *Bloc, spm_int_t ldbl, float **Bglb, spm_int_t *ldbg)
Build a global B vector by gathering datas from all nodes.
struct __spm_smatvec_s __spm_smatvec_t
Typedef associated to structure.
int(* __loop_fct_t)(const __spm_smatvec_t *)
Typedef to the main loop function performing the matvec operation.
int spm_get_distribution(const spmatrix_t *spm)
Search the distribution pattern used in the spm structure.