SpM Handbook: build/src/s_spm

Searching...
No Matches
/**
 * @file s_spm_norm.c
 *
 * SParse Matrix package norm routine.
 *
 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
 *
 * @version 1.2.4
 * @author Mathieu Faverge
 * @author Pierre Ramet
 * @author Tony Delarue
 * @author Matias Hastaran
 * @author Alycia Lisito
 * @date 2024-06-25
 *
 * @generated from /builds/2mk6rsew/0/fpruvost/spm/src/z_spm_norm.c, normal z -> s, Fri Nov 29 11:34:30 2024
 *
 * @ingroup spm_dev_norm
 * @{
 *
 **/
#include "common.h"
#include <lapacke.h>
#include <cblas.h>
#include "frobeniusupdate.h"
 
#if !defined(LAPACKE_WITH_LASSQ)
/**
 *******************************************************************************
 *
 * @brief Updates the values scale and sumsq such that
 *
 *    ( scale**2 )*sumsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
 *
 * This routine is inspired from LAPACK slassq function.
 *
 *******************************************************************************
 *
 * @param[in] n
 *          The number of elements in the vector
 *
 * @param[in] x
 *          The vector of size abs(n * incx)
 *
 * @param[in] incx
 *          The increment between two elements in the vector x.
 *
 * @param[inout] scale
 *           On entry, the former scale
 *           On exit, the update scale to take into account the value
 *
 * @param[inout] sumsq
 *           On entry, the former sumsq
 *           On exit, the update sumsq to take into account the value
 *
 *******************************************************************************
 *
 * @return  SPM_SUCESS to match the prototype of LAPACKE_slassq_work
 *
 *******************************************************************************/
static inline int
__spm_slassq( spm_int_t n, float *x,
              spm_int_t incx, float *scale, float *sumsq )
{
    spm_int_t i;
 
    for( i=0; i<n; i++, x+=incx ) {
#if defined(PRECISION_z) || defined(PRECISION_c)
        float val;
        val = ( *x );
        frobenius_update( 1, scale, sumsq, &val );
        val = ( *x );
        frobenius_update( 1, scale, sumsq, &val );
#else
        frobenius_update( 1, scale, sumsq, x );
#endif
    }
    return 0;
}
 
/**
 *******************************************************************************
 *
 * @brief TODO
 *
 *******************************************************************************/
#define LAPACKE_slassq_work( _n_, _X_, _incx_, _scale_, _sumsq_ ) \
    __spm_slassq( (_n_), (_X_), (_incx_), (_scale_), (_sumsq_) )
#endif
 
#if defined(SPM_WITH_MPI)
/**
 *******************************************************************************
 *
 * @brief MPI reduce operator to merge frobenius partial results together.
 *
 *******************************************************************************
 *
 * @param[in] dist
 *          TODO
 *
 * @param[inout] loc
 *          TODO
 *
 * @param[in] len
 *          TODO
 *
 * @param[in] dtype
 *          TODO
 *
 *******************************************************************************/
void
s_spm_frobenius_merge( float       *dist,
                       float       *loc,
                       int          *len,
                       MPI_Datatype *dtype )
{
    assert( *len == 2 );
    frobenius_merge( dist[0], dist[1], loc, loc+1 );
    (void)len;
    (void)dtype;
}
#endif
 
/**
 *******************************************************************************
 *
 * @brief Compute the Frobenius norm of a diagonal element within a
 * symmetric/symmetric matrix with column/row major storage
 *
 * Note that column major is using the low triangular part only of the diagonal
 * element matrices, and row major, by symmetry, is using only the upper
 * triangular part.
 *
 * The comments in the code are made for column major storage.
 *
 *******************************************************************************
 *
 * @param[in] dofs
 *          TODO
 *
 * @param[in] valptr
 *          TODO
 *
 * @param[inout] data
 *          TODO
 *
 *
 *******************************************************************************/
static inline void
s_spm_frobenius_elt_sym_diag( spm_int_t     dofs,
                              const float *valptr,
                              float       *data )
{
    spm_int_t ii, jj;
 
    for(jj=0; jj<dofs; jj++)
    {
        /* Skip unused upper triangular part */
        for(ii=0; ii<jj; ii++) {
            valptr++;
#if defined(PRECISION_z) || defined(PRECISION_c)
            valptr++;
#endif
        }
 
        /* Diagonal element */
        frobenius_update( 1, data, data + 1, valptr );
#if defined(PRECISION_z) || defined(PRECISION_c)
        valptr++;
        frobenius_update( 1, data, data + 1, valptr );
#endif
        valptr++;
 
        for(ii=jj+1; ii<dofs; ii++, valptr++)
        {
            frobenius_update( 2, data, data + 1, valptr );
 
#if defined(PRECISION_z) || defined(PRECISION_c)
            valptr++;
            frobenius_update( 2, data, data + 1, valptr );
#endif
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the Frobenius norm of an off-diagonal element matrix in the
 * symmetric/symmetric case
 *
 *******************************************************************************
 *
 * @param[in] nbelts
 *          TODO
 *
 * @param[in] valptr
 *          TODO
 *
 * @param[inout] data
 *
 *******************************************************************************/
static inline void
s_spm_frobenius_elt_sym_offd( spm_int_t     nbelts,
                              const float *valptr,
                              float       *data )
{
    spm_int_t ii;
 
    for(ii=0; ii<nbelts; ii++, valptr++)
    {
        frobenius_update( 2, data, data + 1, valptr );
 
#if defined(PRECISION_z) || defined(PRECISION_c)
        valptr++;
        frobenius_update( 2, data, data + 1, valptr );
#endif
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the Frobenius norm of any element matrix in the
 * symmetric/symmetric case
 *
 *******************************************************************************
 *
 * @param[in] row
 *          TODO
 *
 * @param[in] dofi
 *          TODO
 *
 * @param[in] col
 *          TODO
 *
 * @param[in] dofj
 *          TODO
 *
 * @param[in] valptr
 *          TODO
 *
 * @param[inout] data
 *          TODO
 *
 *
 *******************************************************************************/
static inline void
s_spm_frobenius_elt_sym( spm_int_t              row,
                         spm_int_t              dofi,
                         spm_int_t              col,
                         spm_int_t              dofj,
                         const float *valptr,
                         float                *data )
{
    if ( row == col ) {
        assert( dofi == dofj );
        s_spm_frobenius_elt_sym_diag( dofi, (const float*)valptr, data );
    }
    else {
        s_spm_frobenius_elt_sym_offd( dofi * dofj, (const float*)valptr, data );
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the Frobenius norm of a symmetrix/symmetric CSC matrix
 *
 *******************************************************************************
 *
 * @param[in] spm
 *           The spm from which the norm need to be computed.
 *
 * @param[in,out] data
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spmFrobeniusNorm_csc( const spmatrix_t *spm,
                        float           *data )
{
    spm_int_t              j, k, baseval;
    spm_int_t              ig, dofi, row;
    spm_int_t              jg, dofj, col;
    const spm_int_t       *colptr;
    const spm_int_t       *rowptr;
    const spm_int_t       *dofs;
    const spm_int_t       *loc2glob;
    const float *valptr;
 
    assert( spm->fmttype == SpmCSC );
    assert( spm->flttype == SpmFloat );
 
    baseval  = spm->baseval;
    colptr   = spm->colptr;
    rowptr   = spm->rowptr;
    valptr   = (float*)(spm->values);
    dofs     = spm->dofs;
    loc2glob = spm->loc2glob;
 
    for(j=0; j<spm->n; j++, colptr++, loc2glob++)
    {
        jg = spm->replicated ? j : (*loc2glob) - baseval;
        if ( spm->dof > 0 ) {
            dofj = spm->dof;
            col  = spm->dof * jg;
        }
        else {
            dofj = dofs[jg+1] - dofs[jg];
            col  = dofs[jg] - baseval;
        }
 
        for(k=colptr[0]; k<colptr[1]; k++, rowptr++)
        {
            ig = (*rowptr - baseval);
            if ( spm->dof > 0 ) {
                dofi = spm->dof;
                row  = spm->dof * ig;
            }
            else {
                dofi = dofs[ig+1] - dofs[ig];
                row  = dofs[ig] - baseval;
            }
 
            s_spm_frobenius_elt_sym( row, dofi, col, dofj, valptr, data );
            valptr += dofi * dofj;
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the Frobenius norm of a symmetrix/symmetric CSR matrix
 *
 *******************************************************************************
 *
 * @param[in] spm
 *           The spm from which the norm need to be computed.
 *
 * @param[in,out] data
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spmFrobeniusNorm_csr( const spmatrix_t *spm,
                        float           *data )
{
    spm_int_t              i, k, baseval;
    spm_int_t              ig, dofi, row;
    spm_int_t              jg, dofj, col;
    const spm_int_t       *colptr;
    const spm_int_t       *rowptr;
    const spm_int_t       *dofs;
    const spm_int_t       *loc2glob;
    const float *valptr;
 
    assert( spm->fmttype == SpmCSR );
    assert( spm->flttype == SpmFloat );
 
    baseval = spm->baseval;
 
    colptr   = spm->colptr;
    rowptr   = spm->rowptr;
    valptr   = (float*)(spm->values);
    dofs     = spm->dofs;
    loc2glob = spm->loc2glob;
 
    for(i=0; i<spm->n; i++, rowptr++, loc2glob++)
    {
        ig = spm->replicated ? i : (*loc2glob) - baseval;
        if ( spm->dof > 0 ) {
            dofi = spm->dof;
            row  = spm->dof * ig;
        }
        else {
            dofi = dofs[ig+1] - dofs[ig];
            row  = dofs[ig] - baseval;
        }
 
        for(k=rowptr[0]; k<rowptr[1]; k++, colptr++)
        {
            jg = (*colptr - baseval);
            if ( spm->dof > 0 ) {
                dofj = spm->dof;
                col  = spm->dof * jg;
            }
            else {
                dofj = dofs[jg+1] - dofs[jg];
                col  = dofs[jg] - baseval;
            }
 
            s_spm_frobenius_elt_sym( row, dofi, col, dofj, valptr, data );
            valptr += dofi * dofj;
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the Frobenius norm of a symmetrix/symmetric IJV matrix
 *
 *******************************************************************************
 *
 * @param[in] spm
 *           The spm from which the norm need to be computed.
 *
 * @param[in,out] data
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spmFrobeniusNorm_ijv( const spmatrix_t *spm,
                        float           *data )
{
    spm_int_t              k, baseval;
    spm_int_t              i, dofi, row;
    spm_int_t              j, dofj, col;
    const spm_int_t       *colptr;
    const spm_int_t       *rowptr;
    const spm_int_t       *dofs;
    const float *valptr;
 
    assert( spm->fmttype == SpmIJV );
    assert( spm->flttype == SpmFloat );
 
    baseval = spm->baseval;
 
    colptr = spm->colptr;
    rowptr = spm->rowptr;
    valptr = (float*)(spm->values);
    dofs   = spm->dofs;
 
    for(k=0; k<spm->nnz; k++, rowptr++, colptr++)
    {
        i = *rowptr - baseval;
        j = *colptr - baseval;
 
        if ( spm->dof > 0 ) {
            dofi = spm->dof;
            row  = spm->dof * i;
            dofj = spm->dof;
            col  = spm->dof * j;
        }
        else {
            dofi = dofs[i+1] - dofs[i];
            row  = dofs[i] - baseval;
            dofj = dofs[j+1] - dofs[j];
            col  = dofs[j] - baseval;
        }
 
        s_spm_frobenius_elt_sym( row, dofi, col, dofj, valptr, data );
        valptr += dofi * dofj;
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the Frobenius norm of the given spm structure.
 *
 *  ||A|| = sqrtf( sum( a_ij ^ 2 ) )
 *
 *******************************************************************************
 *
 * @param[in] spm
 *           The spm from which the norm need to be computed.
 *
 *******************************************************************************
 *
 * @return The computed frobenius norm
 *
 *******************************************************************************/
float
s_spmFrobeniusNorm( const spmatrix_t *spm )
{
    float data[] = { 0., 1. }; /* Scale, Sum */
 
    if (spm->mtxtype == SpmGeneral) {
        const float *valptr = (float*)spm->values;
        spm_int_t i;
 
        for(i=0; i <spm->nnzexp; i++, valptr++) {
            frobenius_update( 1, data, data + 1, valptr );
 
#if defined(PRECISION_z) || defined(PRECISION_c)
            valptr++;
            frobenius_update( 1, data, data + 1, valptr );
#endif
        }
    }
    else {
        switch( spm->fmttype ) {
        case SpmCSC:
            s_spmFrobeniusNorm_csc( spm, data );
            break;
 
        case SpmCSR:
            s_spmFrobeniusNorm_csr( spm, data );
            break;
 
        case SpmIJV:
        default:
            s_spmFrobeniusNorm_ijv( spm, data );
        }
    }
 
#if defined(SPM_WITH_MPI)
    if ( !(spm->replicated) && (spm->clustnbr > 1) ) {
        MPI_Op merge;
        MPI_Op_create( (MPI_User_function *)s_spm_frobenius_merge, 1, &merge );
        MPI_Allreduce( MPI_IN_PLACE, data, 2, SPM_MPI_FLOAT, merge, spm->comm );
        MPI_Op_free( &merge );
    }
#endif
 
    return data[0] * sqrtf( data[1] );
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the Max norm of the given spm structure.
 *
 *  ||A|| = max( abs(a_ij) )
 *
 *******************************************************************************
 *
 * @param[in] spm
 *           The spm from which the norm need to be computed.
 *
 *******************************************************************************
 *
 * @return The computed max norm
 *
 *******************************************************************************/
float
s_spmMaxNorm( const spmatrix_t *spm )
{
    spm_int_t              i;
    const float *valptr = (float *)spm->values;
    float                 tmp, norm = 0.;
 
    for(i=0; i <spm->nnzexp; i++, valptr++) {
        tmp = fabsf( *valptr );
        norm = (norm > tmp) ? norm : tmp;
    }
 
#if defined(SPM_WITH_MPI)
    if ( !(spm->replicated) && (spm->clustnbr > 1) ) {
        MPI_Allreduce( MPI_IN_PLACE, &norm, 1, MPI_FLOAT, MPI_MAX, spm->comm );
    }
#endif
 
    return norm;
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the sum array for the one/inf norms of a diagonal element
 * within a symmetric/symmetric matrix with column/row major storage.
 *
 * Note that column major is using the low triangular part only of the diagonal
 * element matrices, and row major, by symmetry, is using only the upper
 * triangular part.
 *
 * The comments in the code are made for column major storage.
 *
 *******************************************************************************
 *
 * @param[in] row
 *          TODO
 *
 * @param[in] dofi
 *          TODO
 *
 * @param[in] valptr
 *          TODO
 *
 * @param[inout] sumtab
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spm_oneinf_elt_sym_diag( spm_int_t              row,
                           spm_int_t              dofi,
                           const float *valptr,
                           float                *sumtab )
{
    spm_int_t ii, jj;
 
    sumtab += row;
 
    for(jj=0; jj<dofi; jj++)
    {
        /* Skip unused upper triangular part */
        for(ii=0; ii<jj; ii++) {
            valptr++;
        }
 
        /* Diagonal element */
        sumtab[jj] += fabsf( *valptr );
        valptr++;
 
        for(ii=jj+1; ii<dofi; ii++, valptr++)
        {
            /* Lower part */
            sumtab[ii] += fabsf( *valptr );
            /* Upper part */
            sumtab[jj] += fabsf( *valptr );
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the sum array for the one/inf norms of a general element.
 *
 * We can observe two cases A and B;
 *  ________    _          ________    _
 * |  |  |  |  | |        |________|  | |
 * |  |  |  |  |A|   OR   |________|  |B|
 * |__|__|__|  |_|        |________|  |_|
 *  ________               ________
 * |___B____|             |___A____|
 *
 *               | One Norm | Inf norm |
 *  -------------+----------+----------+
 *  Column Major |    B     |     A    |
 *  -------------+----------+----------+
 *  Row Major    |    A     |     B    |
 *  -------------+----------+----------+
 *
 * @warning: The sumtab must be shifted at the right place on input
 *
 *******************************************************************************
 *
 * @param[in] dofi
 *          TODO
 *
 * @param[in] dofj
 *          TODO
 *
 * @param[in] valptr
 *          TODO
 *
 * @param[inout] sumtab
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spm_oneinf_elt_gen_A( spm_int_t              dofi,
                        spm_int_t              dofj,
                        const float *valptr,
                        float                *sumtab )
{
    spm_int_t ii, jj;
 
    for(jj=0; jj<dofj; jj++)
    {
        for(ii=0; ii<dofi; ii++, valptr++)
        {
            sumtab[ii] += fabsf( *valptr );
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the sum array for the one/inf norms of a general element.
 *
 * See s_spm_oneinf_elt_gen_A()
 *
 *******************************************************************************
 *
 * @param[in] dofi
 *          TODO
 *
 * @param[in] dofj
 *          TODO
 *
 * @param[in] valptr
 *          TODO
 *
 * @param[inout] sumtab
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spm_oneinf_elt_gen_B( spm_int_t              dofi,
                        spm_int_t              dofj,
                        const float *valptr,
                        float                *sumtab )
{
    spm_int_t ii, jj;
 
    for(jj=0; jj<dofj; jj++, sumtab++)
    {
        for(ii=0; ii<dofi; ii++, valptr++)
        {
            *sumtab += fabsf( *valptr );
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the sum array for both the one and inf norms for the
 * off-diagonal elements of symmetric/symmetric element matrices.
 *
 * See s_spm_oneinf_elt_gen_A()
 *
 *******************************************************************************
 *
 * @param[in] row
 *          TODO
 *
 * @param[in] dofi
 *          TODO
 *
 * @param[in] col
 *          TODO
 *
 * @param[in] dofj
 *          TODO
 *
 * @param[in] valptr
 *          TODO
 *
 * @param[inout] sumtab
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spm_oneinf_elt_gen_AB( spm_int_t              row,
                         spm_int_t              dofi,
                         spm_int_t              col,
                         spm_int_t              dofj,
                         const float *valptr,
                         float                *sumtab )
{
    float   *sumrow = sumtab + row;
    float   *sumcol = sumtab + col;
    spm_int_t ii, jj;
 
    for(jj=0; jj<dofj; jj++, sumcol++)
    {
        for(ii=0; ii<dofi; ii++, valptr++)
        {
            float v = fabsf( *valptr );
            sumrow[ii] += v;
            *sumcol += v;
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the sum array for the one and inf norms of a general element.
 *
 *******************************************************************************
 *
 * @param[in] layout
 *          TODO
 *
 * @param[in] row
 *          TODO
 *
 * @param[in] dofi
 *          TODO
 *
 * @param[in] col
 *          TODO
 *
 * @param[in] dofj
 *          TODO
 *
 * @param[in] valptr
 *          TODO
 *
 * @param[in] ntype
 *          TODO
 *
 * @param[inout] sumtab
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spm_oneinf_elt_gen( spm_layout_t           layout,
                      spm_int_t              row,
                      spm_int_t              dofi,
                      spm_int_t              col,
                      spm_int_t              dofj,
                      const float *valptr,
                      spm_normtype_t         ntype,
                      float                *sumtab )
{
    if ( layout == SpmColMajor ) {
        if ( ntype == SpmInfNorm ) {
            s_spm_oneinf_elt_gen_A( dofi, dofj, valptr, sumtab + row );
        }
        else {
            assert( ntype == SpmOneNorm );
            s_spm_oneinf_elt_gen_B( dofi, dofj, valptr, sumtab + col );
        }
    }
    else {
        if ( ntype == SpmInfNorm ) {
            s_spm_oneinf_elt_gen_B( dofj, dofi, valptr, sumtab + row );
        }
        else {
            assert( ntype == SpmOneNorm );
            s_spm_oneinf_elt_gen_A( dofj, dofi, valptr, sumtab + col );
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the sum array for both the one and inf norms for the
 * off-diagonal elements of symmetric/symmetric element matrices in either
 * column or row major layout.
 *
 *******************************************************************************
 *
 * @param[in] layout
 *          TODO
 *
 * @param[in] row
 *          TODO
 *
 * @param[in] dofi
 *          TODO
 *
 * @param[in] col
 *          TODO
 *
 * @param[in] dofj
 *          TODO
 *
 * @param[in] valptr
 *          TODO
 *
 * @param[inout] sumtab
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spm_oneinf_elt_sym_offd( spm_layout_t           layout,
                           spm_int_t              row,
                           spm_int_t              dofi,
                           spm_int_t              col,
                           spm_int_t              dofj,
                           const float *valptr,
                           float                *sumtab )
{
    if ( layout == SpmColMajor ) {
        s_spm_oneinf_elt_gen_AB( row, dofi, col, dofj, valptr, sumtab );
    }
    else {
        s_spm_oneinf_elt_gen_AB( col, dofj, row, dofi, valptr, sumtab );
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the sum array for the one/inf norm for an element matrix.
 *
 *******************************************************************************
 *
 * @param[in] mtxtype
 *          TODO
 *
 * @param[in] layout
 *          TODO
 *
 * @param[in] row
 *          TODO
 *
 * @param[in] dofi
 *          TODO
 *
 * @param[in] col
 *          TODO
 *
 * @param[in] dofj
 *          TODO
 *
 * @param[in] valptr
 *          TODO
 *
 * @param[in] ntype
 *          TODO
 *
 * @param[inout] sumtab
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spm_oneinf_elt( spm_mtxtype_t          mtxtype,
                  spm_layout_t           layout,
                  spm_int_t              row,
                  spm_int_t              dofi,
                  spm_int_t              col,
                  spm_int_t              dofj,
                  const float *valptr,
                  spm_normtype_t         ntype,
                  float                *sumtab )
{
    if ( mtxtype == SpmGeneral ) {
        s_spm_oneinf_elt_gen( layout, row, dofi, col, dofj, valptr, ntype, sumtab );
    }
    else {
        if ( row == col ) {
            s_spm_oneinf_elt_sym_diag( row, dofi, valptr, sumtab );
        }
        else {
            s_spm_oneinf_elt_sym_offd( layout, row, dofi, col, dofj, valptr, sumtab );
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the one/inf norm of an spm CSC structure.
 *
 *******************************************************************************
 *
 * @param[in] ntype
 *          TODO
 *
 * @param[in] spm
 *          TODO
 *
 * @param[inout] sumtab
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spmOneInfNorm_csc( spm_normtype_t    ntype,
                     const spmatrix_t *spm,
                     float           *sumtab )
{
    spm_int_t              i, j, ig, jg, col, row;
    spm_int_t              dofi, dofj, dof, baseval;
    const spm_int_t       *colptr, *rowptr, *loc2glob, *dofs;
    const float *valptr;
 
    baseval  = spm->baseval;
    colptr   = spm->colptr;
    rowptr   = spm->rowptr;
    valptr   = (const float *)(spm->values);
    loc2glob = spm->loc2glob;
    dofs     = spm->dofs;
    dof      = spm->dof;
    for(j=0; j<spm->n; j++, colptr++, loc2glob++)
    {
        jg = spm->replicated ? j : (*loc2glob) - baseval;
        if ( dof > 0 ) {
            dofj = dof;
            col  = dof * jg;
        }
        else {
            dofj = dofs[jg+1] - dofs[jg];
            col  = dofs[jg] - baseval;
        }
 
        for(i=colptr[0]; i<colptr[1]; i++, rowptr++)
        {
            ig = (*rowptr - baseval);
            if ( dof > 0 ) {
                dofi = dof;
                row  = dof * ig;
            }
            else {
                dofi = dofs[ig+1] - dofs[ig];
                row  = dofs[ig] - baseval;
            }
 
            s_spm_oneinf_elt( spm->mtxtype, spm->layout,
                              row, dofi, col, dofj, valptr,
                              ntype, sumtab );
            valptr += dofi * dofj;
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the one/inf norm of an spm CSR structure.
 *
 *******************************************************************************
 *
 * @param[in] ntype
 *          TODO
 *
 * @param[in] spm
 *          TODO
 *
 * @param[inout] sumtab
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spmOneInfNorm_csr( spm_normtype_t    ntype,
                     const spmatrix_t *spm,
                     float           *sumtab )
{
    spm_int_t              i, j, ig, jg, col, row;
    spm_int_t              dofi, dofj, dof, baseval;
    const spm_int_t       *colptr, *rowptr, *loc2glob, *dofs;
    const float *valptr;
 
    baseval  = spm->baseval;
    colptr   = spm->colptr;
    rowptr   = spm->rowptr;
    valptr   = (const float *)(spm->values);
    loc2glob = spm->loc2glob;
    dofs     = spm->dofs;
    dof      = spm->dof;
    for(i=0; i<spm->n; i++, rowptr++, loc2glob++)
    {
        ig = spm->replicated ? i : (*loc2glob) - baseval;
        if ( dof > 0 ) {
            dofi = dof;
            row  = dof * ig;
        }
        else {
            dofi = dofs[ig+1] - dofs[ig];
            row  = dofs[ig] - baseval;
        }
 
        for(j=rowptr[0]; j<rowptr[1]; j++, colptr++)
        {
            jg = (*colptr - baseval);
            if ( dof > 0 ) {
                dofj = dof;
                col  = dof * jg;
            }
            else {
                dofj = dofs[jg+1] - dofs[jg];
                col  = dofs[jg] - baseval;
            }
 
            s_spm_oneinf_elt( spm->mtxtype, spm->layout,
                              row, dofi, col, dofj, valptr,
                              ntype, sumtab );
            valptr += dofi * dofj;
        }
    }
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the one/inf norm of an spm IJV structure.
 *
 *******************************************************************************
 *
 * @param[in] ntype
 *          TODO
 *
 * @param[in] spm
 *          TODO
 *
 * @param[inout] sumtab
 *          TODO
 *
 *******************************************************************************/
static inline void
s_spmOneInfNorm_ijv( spm_normtype_t    ntype,
                     const spmatrix_t *spm,
                     float           *sumtab )
{
    spm_int_t        k, ig, jg, col, row;
    spm_int_t        dofi, dofj, dof, baseval;
    spm_int_t       *colptr, *rowptr, *dofs;
    float *valptr;
 
    baseval = spm->baseval;
    colptr  = spm->colptr;
    rowptr  = spm->rowptr;
    valptr  = (float *)(spm->values);
    dofs    = spm->dofs;
    dof     = spm->dof;
 
    for(k=0; k<spm->nnz; k++, rowptr++, colptr++)
    {
        ig = *rowptr - baseval;
        jg = *colptr - baseval;
 
        if ( dof > 0 ) {
            dofi = dof;
            row  = dof * ig;
            dofj = dof;
            col  = dof * jg;
        }
        else {
            dofi = dofs[ig+1] - dofs[ig];
            row  = dofs[ig] - baseval;
            dofj = dofs[jg+1] - dofs[jg];
            col  = dofs[jg] - baseval;
        }
 
        s_spm_oneinf_elt( spm->mtxtype, spm->layout,
                          row, dofi, col, dofj, valptr,
                          ntype, sumtab );
        valptr += dofi * dofj;
    }
}
 
/**
 *******************************************************************************
 *
 * @brief  Compute the one/inf norm of the given spm structure given by
 * the maximum row sum
 *
 *  * SpmOneNorm: ||A|| = max_j( sum_i(|a_ij|) )
 *  * SpmInfNorm: ||A|| = max_i( sum_j(|a_ij|) )
 *
 *******************************************************************************
 *
 * @param[in] ntype
 *           The type of norm to compute.
 *
 * @param[in] spm
 *           The spm from which the norm need to be computed.
 *
 *******************************************************************************
 *
 * @return The computed one norm
 *
 *******************************************************************************/
static inline float
s_spmOneInfNorm( spm_normtype_t    ntype,
                 const spmatrix_t *spm )
{
    spm_int_t k;
    float   *sumtab = calloc( spm->gNexp, sizeof(float) );
    float    norm   = 0.;
 
    switch( spm->fmttype ) {
    case SpmCSC:
        s_spmOneInfNorm_csc( ntype, spm, sumtab );
        break;
 
    case SpmCSR:
        s_spmOneInfNorm_csr( ntype, spm, sumtab );
        break;
 
    case SpmIJV:
    default:
         s_spmOneInfNorm_ijv( ntype, spm, sumtab );
    }
 
#if defined(SPM_WITH_MPI)
    if ( !(spm->replicated) && (spm->clustnbr > 1) ) {
        MPI_Allreduce( MPI_IN_PLACE, sumtab, spm->gNexp, MPI_FLOAT, MPI_SUM, spm->comm );
    }
#endif
 
    /* Look for the maximum */
    {
        const float *sumtmp = sumtab;
        for( k=0; k<spm->gNexp; k++, sumtmp++ )
        {
            if( norm < *sumtmp ) {
                norm = *sumtmp;
            }
        }
    }
 
    free( sumtab );
    return norm;
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the norm of an spm matrix
 *
 *******************************************************************************
 *
 * @param[in] ntype
 *          = SpmMaxNorm: Max norm
 *          = SpmOneNorm: One norm
 *          = SpmInfNorm: Infinity norm
 *          = SpmFrobeniusNorm: Frobenius norm
 *
 * @param[in] spm
 *          The spm structure describing the matrix.
 *
 *******************************************************************************
 *
 * @return The norm of the spm matrix
 *         -1 when error occurs or with pattern only
 *
 *******************************************************************************/
float
s_spmNorm( spm_normtype_t    ntype,
           const spmatrix_t *spm )
{
    float norm = 0.;
 
    if ( spm == NULL ) {
        return -1.;
    }
 
    switch( ntype ) {
    case SpmMaxNorm:
        norm = s_spmMaxNorm( spm );
        break;
 
    case SpmInfNorm:
    case SpmOneNorm:
        norm = s_spmOneInfNorm( ntype, spm );
        break;
 
    case SpmFrobeniusNorm:
        norm = s_spmFrobeniusNorm( spm );
        break;
 
    default:
        fprintf(stderr, "s_spmNorm: invalid norm type\n");
        return -1.;
    }
 
    return norm;
}
 
/**
 *******************************************************************************
 *
 * @brief Compute the norm of a dense matrix that follows the distribution of an
 * spm matrix
 *
 *******************************************************************************
 *
 * @param[in] ntype
 *          = SpmMaxNorm: Max norm
 *          = SpmOneNorm: One norm
 *          = SpmInfNorm: Infinity norm
 *          = SpmFrobeniusNorm: Frobenius norm
 *
 * @param[in] spm
 *          The spm structure describing the matrix.
 *
 * @param[in] n
 *          The number of columns of the matrix A.
 *
 * @param[in] A
 *          The matrix A of size lda-by-n.
 *
 * @param[in] lda
 *          The leading dimension of the matrix A. Must be >= max(1, spm->nexp).
 *
 *******************************************************************************
 *
 * @return The norm of the spm matrix
 *         -1 when error occurs or with pattern only
 *
 *******************************************************************************/
float
s_spmNormMat( spm_normtype_t         ntype,
              const spmatrix_t      *spm,
              spm_int_t              n,
              const float *A,
              spm_int_t              lda )
{
    float norm = 0.;
    int    j;
 
    if ( spm == NULL ) {
        return -1.;
    }
 
    switch( ntype ) {
    case SpmMaxNorm:
    case SpmInfNorm:
        norm  = LAPACKE_slange( LAPACK_COL_MAJOR,
                                ntype == SpmMaxNorm ? 'M' : 'I',
                                spm->nexp, n, A, lda );
#if defined(SPM_WITH_MPI)
        if ( !(spm->replicated) && (spm->clustnbr > 1) ) {
            MPI_Allreduce( MPI_IN_PLACE, &norm, 1, MPI_FLOAT,
                           MPI_MAX, spm->comm );
        }
#endif
        break;
 
    case SpmOneNorm:
    {
        float *sumtmp;
        float *sumtab = calloc( n, sizeof(float) );
 
        sumtmp = sumtab;
        for( j=0; j<n; j++, sumtmp++ )
        {
            *sumtmp = cblas_sasum( spm->nexp, A + j * lda, 1 );
        }
 
#if defined(SPM_WITH_MPI)
        if ( !(spm->replicated) && (spm->clustnbr > 1) ) {
            MPI_Allreduce( MPI_IN_PLACE, sumtab, n, MPI_FLOAT,
                           MPI_SUM, spm->comm );
        }
#endif
 
        /* Look for the maximum */
        sumtmp = sumtab;
        for( j=0; j<n; j++, sumtmp++ )
        {
            if( norm < *sumtmp ) {
                norm = *sumtmp;
            }
        }
        free( sumtab );
    }
    break;
 
    case SpmFrobeniusNorm:
    {
        float data[] = { 0., 1. }; /* Scale, Sum */
 
        for ( j=0; j<n; j++ ) {
            /* LAPACKE interface is incorrect and do not have the const yet */
            LAPACKE_slassq_work( spm->nexp, (float*)(A + j * lda), 1, data, data + 1 );
        }
 
#if defined(SPM_WITH_MPI)
        if ( !(spm->replicated) && (spm->clustnbr > 1) ) {
            MPI_Op merge;
            MPI_Op_create( (MPI_User_function *)s_spm_frobenius_merge, 1, &merge );
            MPI_Allreduce( MPI_IN_PLACE, data, 2, SPM_MPI_FLOAT, merge, spm->comm );
            MPI_Op_free( &merge );
        }
#endif
 
        norm = data[0] * sqrtf( data[1] );
    }
    break;
 
    default:
        fprintf(stderr, "s_spmNorm: invalid norm type\n");
        return -1.;
    }
 
    return norm;
}
/**
 * @}
 */