26#include "frobeniusupdate.h"
28#if !defined(LAPACKE_WITH_LASSQ)
64 spm_int_t incx,
float *scale,
float *sumsq )
68 for( i=0; i<n; i++, x+=incx ) {
69#if defined(PRECISION_z) || defined(PRECISION_c)
72 frobenius_update( 1, scale, sumsq, &val );
74 frobenius_update( 1, scale, sumsq, &val );
76 frobenius_update( 1, scale, sumsq, x );
88#define LAPACKE_classq_work( _n_, _X_, _incx_, _scale_, _sumsq_ ) \
89 __spm_classq( (_n_), (_X_), (_incx_), (_scale_), (_sumsq_) )
92#if defined(SPM_WITH_MPI)
114c_spm_frobenius_merge(
float *dist,
117 MPI_Datatype *dtype )
120 frobenius_merge( dist[0], dist[1], loc, loc+1 );
158 for(jj=0; jj<dofs; jj++)
161 for(ii=0; ii<jj; ii++) {
163#if defined(PRECISION_z) || defined(PRECISION_c)
169 frobenius_update( 1, data, data + 1, valptr );
170#if defined(PRECISION_z) || defined(PRECISION_c)
172 frobenius_update( 1, data, data + 1, valptr );
176 for(ii=jj+1; ii<dofs; ii++, valptr++)
178 frobenius_update( 2, data, data + 1, valptr );
180#if defined(PRECISION_z) || defined(PRECISION_c)
182 frobenius_update( 2, data, data + 1, valptr );
212 for(ii=0; ii<nbelts; ii++, valptr++)
214 frobenius_update( 2, data, data + 1, valptr );
216#if defined(PRECISION_z) || defined(PRECISION_c)
218 frobenius_update( 2, data, data + 1, valptr );
260 assert( dofi == dofj );
305 for(j=0; j<spm->
n; j++, colptr++, loc2glob++)
307 jg = spm->
replicated ? j : (*loc2glob) - baseval;
308 if ( spm->
dof > 0 ) {
313 dofj = dofs[jg+1] - dofs[jg];
314 col = dofs[jg] - baseval;
317 for(k=colptr[0]; k<colptr[1]; k++, rowptr++)
319 ig = (*rowptr - baseval);
320 if ( spm->
dof > 0 ) {
325 dofi = dofs[ig+1] - dofs[ig];
326 row = dofs[ig] - baseval;
330 valptr += dofi * dofj;
373 for(i=0; i<spm->
n; i++, rowptr++, loc2glob++)
375 ig = spm->
replicated ? i : (*loc2glob) - baseval;
376 if ( spm->
dof > 0 ) {
381 dofi = dofs[ig+1] - dofs[ig];
382 row = dofs[ig] - baseval;
385 for(k=rowptr[0]; k<rowptr[1]; k++, colptr++)
387 jg = (*colptr - baseval);
388 if ( spm->
dof > 0 ) {
393 dofj = dofs[jg+1] - dofs[jg];
394 col = dofs[jg] - baseval;
398 valptr += dofi * dofj;
439 for(k=0; k<spm->
nnz; k++, rowptr++, colptr++)
441 i = *rowptr - baseval;
442 j = *colptr - baseval;
444 if ( spm->
dof > 0 ) {
451 dofi = dofs[i+1] - dofs[i];
452 row = dofs[i] - baseval;
453 dofj = dofs[j+1] - dofs[j];
454 col = dofs[j] - baseval;
458 valptr += dofi * dofj;
482 float data[] = { 0., 1. };
485 const float *valptr = (
float*)spm->
values;
488 for(i=0; i <spm->
nnzexp; i++, valptr++) {
489 frobenius_update( 1, data, data + 1, valptr );
491#if defined(PRECISION_z) || defined(PRECISION_c)
493 frobenius_update( 1, data, data + 1, valptr );
513#if defined(SPM_WITH_MPI)
516 MPI_Op_create( (MPI_User_function *)c_spm_frobenius_merge, 1, &merge );
517 MPI_Allreduce( MPI_IN_PLACE, data, 2, SPM_MPI_FLOAT, merge, spm->
comm );
518 MPI_Op_free( &merge );
522 return data[0] * sqrtf( data[1] );
547 float tmp, norm = 0.;
549 for(i=0; i <spm->
nnzexp; i++, valptr++) {
550 tmp = cabsf( *valptr );
551 norm = (norm > tmp) ? norm : tmp;
554#if defined(SPM_WITH_MPI)
556 MPI_Allreduce( MPI_IN_PLACE, &norm, 1, MPI_FLOAT, MPI_MAX, spm->
comm );
600 for(jj=0; jj<dofi; jj++)
603 for(ii=0; ii<jj; ii++) {
608 sumtab[jj] += cabsf( *valptr );
611 for(ii=jj+1; ii<dofi; ii++, valptr++)
614 sumtab[ii] += cabsf( *valptr );
616 sumtab[jj] += cabsf( *valptr );
666 for(jj=0; jj<dofj; jj++)
668 for(ii=0; ii<dofi; ii++, valptr++)
670 sumtab[ii] += cabsf( *valptr );
705 for(jj=0; jj<dofj; jj++, sumtab++)
707 for(ii=0; ii<dofi; ii++, valptr++)
709 *sumtab += cabsf( *valptr );
751 float *sumrow = sumtab + row;
752 float *sumcol = sumtab + col;
755 for(jj=0; jj<dofj; jj++, sumcol++)
757 for(ii=0; ii<dofi; ii++, valptr++)
759 float v = cabsf( *valptr );
959 const spm_int_t *colptr, *rowptr, *loc2glob, *dofs;
969 for(j=0; j<spm->
n; j++, colptr++, loc2glob++)
971 jg = spm->
replicated ? j : (*loc2glob) - baseval;
977 dofj = dofs[jg+1] - dofs[jg];
978 col = dofs[jg] - baseval;
981 for(i=colptr[0]; i<colptr[1]; i++, rowptr++)
983 ig = (*rowptr - baseval);
989 dofi = dofs[ig+1] - dofs[ig];
990 row = dofs[ig] - baseval;
994 row, dofi, col, dofj, valptr,
996 valptr += dofi * dofj;
1025 const spm_int_t *colptr, *rowptr, *loc2glob, *dofs;
1035 for(i=0; i<spm->
n; i++, rowptr++, loc2glob++)
1037 ig = spm->
replicated ? i : (*loc2glob) - baseval;
1043 dofi = dofs[ig+1] - dofs[ig];
1044 row = dofs[ig] - baseval;
1047 for(j=rowptr[0]; j<rowptr[1]; j++, colptr++)
1049 jg = (*colptr - baseval);
1055 dofj = dofs[jg+1] - dofs[jg];
1056 col = dofs[jg] - baseval;
1060 row, dofi, col, dofj, valptr,
1062 valptr += dofi * dofj;
1101 for(k=0; k<spm->
nnz; k++, rowptr++, colptr++)
1103 ig = *rowptr - baseval;
1104 jg = *colptr - baseval;
1113 dofi = dofs[ig+1] - dofs[ig];
1114 row = dofs[ig] - baseval;
1115 dofj = dofs[jg+1] - dofs[jg];
1116 col = dofs[jg] - baseval;
1120 row, dofi, col, dofj, valptr,
1122 valptr += dofi * dofj;
1153 float *sumtab = calloc( spm->
gNexp,
sizeof(
float) );
1170#if defined(SPM_WITH_MPI)
1172 MPI_Allreduce( MPI_IN_PLACE, sumtab, spm->
gNexp, MPI_FLOAT, MPI_SUM, spm->
comm );
1178 const float *sumtmp = sumtab;
1179 for( k=0; k<spm->
gNexp; k++, sumtmp++ )
1181 if( norm < *sumtmp ) {
1219 if ( spm == NULL ) {
1238 fprintf(stderr,
"c_spmNorm: invalid norm type\n");
1287 if ( spm == NULL ) {
1294 norm = LAPACKE_clange( LAPACK_COL_MAJOR,
1296 spm->
nexp, n, A, lda );
1297#if defined(SPM_WITH_MPI)
1299 MPI_Allreduce( MPI_IN_PLACE, &norm, 1, MPI_FLOAT,
1300 MPI_MAX, spm->
comm );
1308 float *sumtab = calloc( n,
sizeof(
float) );
1311 for( j=0; j<n; j++, sumtmp++ )
1313 *sumtmp = cblas_scasum( spm->
nexp, A + j * lda, 1 );
1316#if defined(SPM_WITH_MPI)
1318 MPI_Allreduce( MPI_IN_PLACE, sumtab, n, MPI_FLOAT,
1319 MPI_SUM, spm->
comm );
1325 for( j=0; j<n; j++, sumtmp++ )
1327 if( norm < *sumtmp ) {
1337 float data[] = { 0., 1. };
1339 for ( j=0; j<n; j++ ) {
1344#if defined(SPM_WITH_MPI)
1347 MPI_Op_create( (MPI_User_function *)c_spm_frobenius_merge, 1, &merge );
1348 MPI_Allreduce( MPI_IN_PLACE, data, 2, SPM_MPI_FLOAT, merge, spm->
comm );
1349 MPI_Op_free( &merge );
1353 norm = data[0] * sqrtf( data[1] );
1358 fprintf(stderr,
"c_spmNorm: invalid norm type\n");
float c_spmMaxNorm(const spmatrix_t *spm)
Compute the Max norm of the given spm structure.
static void c_spmFrobeniusNorm_csc(const spmatrix_t *spm, float *data)
Compute the Frobenius norm of a symmetrix/hermitian CSC matrix.
static void c_spm_oneinf_elt_gen(spm_layout_t layout, spm_int_t row, spm_int_t dofi, spm_int_t col, spm_int_t dofj, const spm_complex32_t *valptr, spm_normtype_t ntype, float *sumtab)
Compute the sum array for the one and inf norms of a general element.
static float c_spmOneInfNorm(spm_normtype_t ntype, const spmatrix_t *spm)
Compute the one/inf norm of the given spm structure given by the maximum row sum.
static void c_spm_frobenius_elt_sym_diag(spm_int_t dofs, const float *valptr, float *data)
Compute the Frobenius norm of a diagonal element within a symmetric/hermitian matrix with column/row ...
static void c_spmOneInfNorm_csc(spm_normtype_t ntype, const spmatrix_t *spm, float *sumtab)
Compute the one/inf norm of an spm CSC structure.
static void c_spm_frobenius_elt_sym(spm_int_t row, spm_int_t dofi, spm_int_t col, spm_int_t dofj, const spm_complex32_t *valptr, float *data)
Compute the Frobenius norm of any element matrix in the symmetric/hermitian case.
#define LAPACKE_classq_work(_n_, _X_, _incx_, _scale_, _sumsq_)
TODO.
float c_spmFrobeniusNorm(const spmatrix_t *spm)
Compute the Frobenius norm of the given spm structure.
static void c_spm_oneinf_elt_gen_AB(spm_int_t row, spm_int_t dofi, spm_int_t col, spm_int_t dofj, const spm_complex32_t *valptr, float *sumtab)
Compute the sum array for both the one and inf norms for the off-diagonal elements of symmetric/hermi...
static void c_spm_frobenius_elt_sym_offd(spm_int_t nbelts, const float *valptr, float *data)
Compute the Frobenius norm of an off-diagonal element matrix in the symmetric/hermitian case.
static void c_spmFrobeniusNorm_ijv(const spmatrix_t *spm, float *data)
Compute the Frobenius norm of a symmetrix/hermitian IJV matrix.
static void c_spmOneInfNorm_ijv(spm_normtype_t ntype, const spmatrix_t *spm, float *sumtab)
Compute the one/inf norm of an spm IJV structure.
static int __spm_classq(spm_int_t n, spm_complex32_t *x, spm_int_t incx, float *scale, float *sumsq)
Updates the values scale and sumsq such that.
static void c_spmFrobeniusNorm_csr(const spmatrix_t *spm, float *data)
Compute the Frobenius norm of a symmetrix/hermitian CSR matrix.
static void c_spmOneInfNorm_csr(spm_normtype_t ntype, const spmatrix_t *spm, float *sumtab)
Compute the one/inf norm of an spm CSR structure.
static void c_spm_oneinf_elt_gen_A(spm_int_t dofi, spm_int_t dofj, const spm_complex32_t *valptr, float *sumtab)
Compute the sum array for the one/inf norms of a general element.
static void c_spm_oneinf_elt_gen_B(spm_int_t dofi, spm_int_t dofj, const spm_complex32_t *valptr, float *sumtab)
Compute the sum array for the one/inf norms of a general element.
static void c_spm_oneinf_elt_sym_diag(spm_int_t row, spm_int_t dofi, const spm_complex32_t *valptr, float *sumtab)
Compute the sum array for the one/inf norms of a diagonal element within a symmetric/hermitian matrix...
static void c_spm_oneinf_elt(spm_mtxtype_t mtxtype, spm_layout_t layout, spm_int_t row, spm_int_t dofi, spm_int_t col, spm_int_t dofj, const spm_complex32_t *valptr, spm_normtype_t ntype, float *sumtab)
Compute the sum array for the one/inf norm for an element matrix.
static void c_spm_oneinf_elt_sym_offd(spm_layout_t layout, spm_int_t row, spm_int_t dofi, spm_int_t col, spm_int_t dofj, const spm_complex32_t *valptr, float *sumtab)
Compute the sum array for both the one and inf norms for the off-diagonal elements of symmetric/hermi...
float _Complex spm_complex32_t
enum spm_layout_e spm_layout_t
Direction of the matrix storage.
enum spm_normtype_e spm_normtype_t
Norms.
enum spm_mtxtype_e spm_mtxtype_t
Matrix symmetry type property.
float c_spmNormMat(spm_normtype_t ntype, const spmatrix_t *spm, spm_int_t n, const spm_complex32_t *A, spm_int_t lda)
Compute the norm of a dense matrix that follows the distribution of an spm matrix.
float c_spmNorm(spm_normtype_t ntype, const spmatrix_t *spm)
Compute the norm of an spm matrix.
int spm_int_t
The main integer datatype used in spm arrays.
The sparse matrix data structure.