SpM Handbook 1.2.4
Loading...
Searching...
No Matches
d_spm_matrixvector.c
Go to the documentation of this file.
1/**
2 *
3 * @file d_spm_matrixvector.c
4 *
5 * SParse Matrix package matrix-vector multiplication routines.
6 *
7 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 1.2.4
11 * @author Matthieu Kuhn
12 * @author Mathieu Faverge
13 * @author Tony Delarue
14 * @author Alycia Lisito
15 * @date 2024-05-29
16 *
17 * @generated from /builds/2mk6rsew/0/fpruvost/spm/src/z_spm_matrixvector.c, normal z -> d, Fri Nov 29 11:34:29 2024
18 *
19 * @ingroup spm_dev_matvec
20 * @{
21 *
22 **/
23#include "common.h"
24#include <lapacke.h>
25#include <cblas.h>
26
27/**
28 * @brief Structure to hold the parameters of the matvec operation
29 */
30struct __spm_dmatvec_s;
31
32/**
33 * @brief Typedef associated to structure
34 */
35typedef struct __spm_dmatvec_s __spm_dmatvec_t;
36
37/**
38 * @brief Typedef to define the op function applied to the element (id or )
39 */
40typedef double (*__conj_fct_t)( double );
41
42/**
43 * @brief Typedef to the main loop function performing the matvec operation
44 */
45typedef int (*__loop_fct_t)( const __spm_dmatvec_t * );
46
47/**
48 *******************************************************************************
49 *
50 * @brief Identity function
51 *
52 *******************************************************************************
53 * @param[in] val
54 * TODO
55 *
56 *******************************************************************************
57 *
58 * @return id(val)
59 *
60 *******************************************************************************/
61static inline double
62__fct_id( double val ) {
63 return val;
64}
65
66#if defined(PRECISION_c) || defined(PRECISION_z)
67/**
68 *******************************************************************************
69 *
70 * @brief Conjuguate function
71 *
72 *******************************************************************************
73 * @param[in] val
74 * TODO
75 *
76 *******************************************************************************
77 *
78 * @return (val)
79 *
80 *******************************************************************************/
81static inline double
82__fct_conj( double val )
83{
84 return ( val );
85}
86#endif
87
88/**
89 * @brief Store all the data necessary to do a matrix-matrix product
90 * for all cases.
91 */
92struct __spm_dmatvec_s {
93 int follow_x;
94
95 spm_int_t baseval, n, nnz, gN;
96
97 double alpha;
98 const spm_int_t *rowptr;
99 const spm_int_t *colptr;
100 const double *values;
101 const spm_int_t *loc2glob;
102 const spm_int_t *glob2loc;
103
104 spm_int_t dof;
105 const spm_int_t *dofs;
106
107 const double *x;
108 spm_int_t incx;
109
110 double *y;
111 spm_int_t incy;
112
113 __conj_fct_t conjA_fct;
114 __conj_fct_t conjAt_fct;
115 __loop_fct_t loop_fct;
116};
117
118/**
119 *******************************************************************************
120 *
121 * @private
122 * @brief Compute the dof loop for a general block
123 *
124 *******************************************************************************
125 *
126 * @param[in] row
127 * TODO
128 *
129 * @param[in] dofi
130 * TODO
131 *
132 * @param[in] col
133 * TODO
134 *
135 * @param[in] dofj
136 * TODO
137 *
138 * @param[in] y
139 * TODO
140 *
141 * @param[in] incy
142 * TODO
143 *
144 * @param[in] x
145 * TODO
146 *
147 * @param[in] incx
148 * TODO
149 *
150 * @param[in] values
151 * TODO
152 *
153 * @param[in] conjA_fct
154 * TODO
155 *
156 * @param[in] alpha
157 * TODO
158 *
159 *******************************************************************************/
160static inline void
161__spm_dmatvec_dof_loop( spm_int_t row,
162 spm_int_t dofi,
163 spm_int_t col,
164 spm_int_t dofj,
165 double *y,
166 spm_int_t incy,
167 const double *x,
168 spm_int_t incx,
169 const double *values,
170 const __conj_fct_t conjA_fct,
171 double alpha )
172{
173 spm_int_t ii, jj;
174
175 for(jj=0; jj<dofj; jj++)
176 {
177 for(ii=0; ii<dofi; ii++, values++)
178 {
179 y[ row + (ii * incy) ] += alpha * conjA_fct( *values ) * x[ col +(jj * incx) ];
180 }
181 }
182}
183
184/**
185 *******************************************************************************
186 *
187 * @private
188 * @brief Compute the dof loop for a symmetric off diagonal block
189 *
190 *******************************************************************************
191 *
192 * @param[in] row
193 * TODO
194 *
195 * @param[in] dofi
196 * TODO
197 *
198 * @param[in] col
199 * TODO
200 *
201 * @param[in] dofj
202 * TODO
203 *
204 * @param[in] y
205 * TODO
206 *
207 * @param[in] incy
208 * TODO
209 *
210 * @param[in] x
211 * TODO
212 *
213 * @param[in] incx
214 * TODO
215 *
216 * @param[in] values
217 * TODO
218 *
219 * @param[in] conjA_fct
220 * TODO
221 *
222 * @param[in] conjAt_fct
223 * TODO
224 *
225 * @param[in] alpha
226 * TODO
227 *
228 *******************************************************************************/
229static inline void
230__spm_dmatvec_dof_loop_sy( spm_int_t row,
231 spm_int_t dofi,
232 spm_int_t col,
233 spm_int_t dofj,
234 double *y,
235 spm_int_t incy,
236 const double *x,
237 spm_int_t incx,
238 const double *values,
239 const __conj_fct_t conjA_fct,
240 const __conj_fct_t conjAt_fct,
241 double alpha )
242{
243 spm_int_t ii, jj;
244
245 for(jj=0; jj<dofj; jj++)
246 {
247 for(ii=0; ii<dofi; ii++, values++)
248 {
249 y[ row + (ii * incy) ] += alpha * conjA_fct( *values ) * x[ col +(jj * incx) ];
250 y[ col + (jj * incy) ] += alpha * conjAt_fct( *values ) * x[ row +(ii * incx) ];
251 }
252 }
253}
254
255/**
256 *******************************************************************************
257 *
258 * @private
259 * @brief Compute the dof loop for a symmetric CSR matrix
260 * Allow code factorization.
261 *
262 *******************************************************************************
263 *
264 * @param[in] row
265 * TODO
266 *
267 * @param[in] dofi
268 * TODO
269 *
270 * @param[in] col
271 * TODO
272 *
273 * @param[in] dofj
274 * TODO
275 *
276 * @param[in] y
277 * TODO
278 *
279 * @param[in] incy
280 * TODO
281 *
282 * @param[in] x
283 * TODO
284 *
285 * @param[in] incx
286 * TODO
287 *
288 * @param[in] values
289 * TODO
290 *
291 * @param[in] conjA_fct
292 * TODO
293 *
294 * @param[in] conjAt_fct
295 * TODO
296 *
297 * @param[in] alpha
298 * TODO
299 *
300 *******************************************************************************/
301static inline void
302__spm_dmatvec_dof_loop_sy_csr( spm_int_t row,
303 spm_int_t dofi,
304 spm_int_t col,
305 spm_int_t dofj,
306 double *y,
307 spm_int_t incy,
308 const double *x,
309 spm_int_t incx,
310 const double *values,
311 const __conj_fct_t conjA_fct,
312 const __conj_fct_t conjAt_fct,
313 double alpha )
314{
315 __spm_dmatvec_dof_loop_sy( row, dofi, col, dofj, y, incy, x, incx, values, conjAt_fct, conjA_fct, alpha );
316}
317
318/**
319 *******************************************************************************
320 *
321 * @private
322 * @brief Compute A*x[i:, j] = y[i:, j]
323 * for a CSX symmetric matrix
324 *
325 *******************************************************************************
326 *
327 * @param[in] args
328 * TODO
329 *
330 *******************************************************************************
331 *
332 * @retval TODO
333 *
334 *******************************************************************************/
335static inline int
336__spm_dmatvec_sy_csx( const __spm_dmatvec_t *args )
337{
338 spm_int_t baseval = args->baseval;
339 spm_int_t n = args->n;
340 double alpha = args->alpha;
341 const spm_int_t *rowptr = args->rowptr;
342 const spm_int_t *colptr = args->colptr;
343 const double *values = args->values;
344 const spm_int_t *loc2glob = args->loc2glob;
345 const spm_int_t *dofs = args->dofs;
346 spm_int_t dof = args->dof;
347 const double *x = args->x;
348 spm_int_t incx = args->incx;
349 double *y = args->y;
350 spm_int_t incy = args->incy;
351 const __conj_fct_t conjA_fct = args->conjA_fct;
352 const __conj_fct_t conjAt_fct = args->conjAt_fct;
353 spm_int_t row, col, dofj, dofi;
354 spm_int_t i, ig, j, jg;
355
356 /* If(args->follow_x) -> CSR. We need to change exchange the functions in the symmetric dof loop */
357 void (*dof_loop_sy)( spm_int_t, spm_int_t, spm_int_t, spm_int_t,
358 double *, spm_int_t,
359 const double *, spm_int_t, const double *,
360 const __conj_fct_t, const __conj_fct_t, double )
361 = ( args->follow_x ) ? __spm_dmatvec_dof_loop_sy_csr : __spm_dmatvec_dof_loop_sy;
362
363 for( j=0; j<n; j++, colptr++ )
364 {
365 jg = (loc2glob == NULL) ? j : loc2glob[j] - baseval ;
366 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
367 col = ( dof > 0 ) ? dof * jg : dofs[jg] - baseval;
368 for( i=colptr[0]; i<colptr[1]; i++, rowptr++ )
369 {
370 ig = *rowptr - baseval;
371 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
372 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
373 if ( row != col ) {
374 dof_loop_sy( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, conjAt_fct, alpha );
375 }
376 else {
377 __spm_dmatvec_dof_loop( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, alpha );
378 }
379 values += dofi*dofj;
380 }
381 }
382 return SPM_SUCCESS;
383}
384
385/**
386 *******************************************************************************
387 *
388 * @private
389 * @brief Compute A*x[i:, j] = y[i:, j]
390 * for a CSC/CSR general matrix
391 *
392 *******************************************************************************
393 *
394 * @param[in] args
395 * TODO
396 *
397 *******************************************************************************
398 *
399 * @retval TODO
400 *
401 *******************************************************************************/
402static inline int
403__spm_dmatvec_ge_csx( const __spm_dmatvec_t *args )
404{
405 spm_int_t baseval = args->baseval;
406 spm_int_t n = args->n;
407 double alpha = args->alpha;
408 const spm_int_t *rowptr = args->rowptr;
409 const spm_int_t *colptr = args->colptr;
410 const double *values = args->values;
411 const spm_int_t *loc2glob = args->loc2glob;
412 const spm_int_t *dofs = args->dofs;
413 spm_int_t dof = args->dof;
414 const double *x = args->x;
415 spm_int_t incx = args->incx;
416 double *y = args->y;
417 spm_int_t incy = args->incy;
418 const __conj_fct_t conjA_fct = args->conjA_fct;
419 spm_int_t row, dofj, dofi;
420 spm_int_t i, ig, j, jg;
421
422 if ( args->follow_x ) {
423 for( j = 0; j < n; j++, colptr++ )
424 {
425 jg = (loc2glob == NULL) ? j : loc2glob[j] - baseval;
426 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
427 for( i=colptr[0]; i<colptr[1]; i++, rowptr++ )
428 {
429 ig = *rowptr - baseval;
430 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
431 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
432 __spm_dmatvec_dof_loop( row, dofi, 0, dofj, y, incy, x, 1, values, conjA_fct, alpha );
433 values += dofi * dofj;
434 }
435 x += dofj * incx;
436 }
437 }
438 else {
439 for( j=0; j<n; j++, colptr++ )
440 {
441 jg = (loc2glob == NULL) ? j : loc2glob[j] - baseval;
442 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
443 for( i=colptr[0]; i<colptr[1]; i++, rowptr++ )
444 {
445 ig = *rowptr - baseval;
446 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
447 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
448 __spm_dmatvec_dof_loop( 0, dofj, row, dofi, y, 1, x, incx, values, conjA_fct, alpha );
449 values += dofi * dofj;
450 }
451 y += dofj * incy;
452 }
453 }
454 return SPM_SUCCESS;
455}
456
457/**
458 *******************************************************************************
459 *
460 * @private
461 * @brief Compute A*x[i:, j] = y[i:, j]
462 * for a IJV symmetric matrix
463 *
464 *******************************************************************************
465 *
466 * @param[in] args
467 * TODO
468 *
469 *******************************************************************************
470 *
471 * @retval TODO
472 *
473 *******************************************************************************/
474static inline int
475__spm_dmatvec_sy_ijv( const __spm_dmatvec_t *args )
476{
477 spm_int_t baseval = args->baseval;
478 spm_int_t nnz = args->nnz;
479 double alpha = args->alpha;
480 const spm_int_t *rowptr = args->rowptr;
481 const spm_int_t *colptr = args->colptr;
482 const double *values = args->values;
483 const spm_int_t *dofs = args->dofs;
484 spm_int_t dof = args->dof;
485 const double *x = args->x;
486 spm_int_t incx = args->incx;
487 double *y = args->y;
488 spm_int_t incy = args->incy;
489 const __conj_fct_t conjA_fct = args->conjA_fct;
490 const __conj_fct_t conjAt_fct = args->conjAt_fct;
491 spm_int_t row, col, dofj, dofi;
492 spm_int_t i, ig, jg;
493
494 for( i=0; i<nnz; i++, colptr++, rowptr++ )
495 {
496 ig = *rowptr - baseval;
497 jg = *colptr - baseval;
498
499 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
500 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
501
502 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
503 col = ( dof > 0 ) ? dof * jg : dofs[jg] - baseval;
504
505 if ( row != col ) {
506 __spm_dmatvec_dof_loop_sy( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, conjAt_fct, alpha );
507 }
508 else {
509 __spm_dmatvec_dof_loop( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, alpha );
510 }
511 values += dofi*dofj;
512 }
513 return SPM_SUCCESS;
514}
515
516/**
517 *******************************************************************************
518 *
519 * @private
520 * @brief Build a local dofs array which corresponds
521 * to the local numerotation of a global index for
522 * variadic dofs.
523 *
524 *******************************************************************************
525 *
526 * @param[in] dofs
527 * TODO
528 *
529 * @param[in] glob2loc
530 * TODO
531 *
532 * @param[in] gN
533 * TODO
534 *
535 *******************************************************************************
536 *
537 * @retval TODO
538 *
539 *******************************************************************************/
540static inline spm_int_t *
541__spm_dmatvec_dofs_local( const spm_int_t *dofs,
542 const spm_int_t *glob2loc,
543 spm_int_t gN )
544{
545 spm_int_t i, acc = 0;
546 spm_int_t *result, *resptr;
547
548 result = calloc( gN , sizeof(spm_int_t) );
549 resptr = result;
550 for ( i = 0; i < gN; i++, glob2loc++, resptr++, dofs++ )
551 {
552 if( *glob2loc >= 0 ) {
553 *resptr = acc;
554 acc += dofs[1] - dofs[0];
555 }
556 }
557 return result;
558}
559
560/**
561 *******************************************************************************
562 *
563 * @private
564 * @brief Compute A*x[i:, j] = y[i:, j]
565 * for a IJV general matrix
566 *
567 *******************************************************************************
568 *
569 * @param[in] args
570 * TODO
571 *
572 *******************************************************************************
573 *
574 * @retval TODO
575 *
576 *******************************************************************************/
577static inline int
578__spm_dmatvec_ge_ijv( const __spm_dmatvec_t *args )
579{
580 spm_int_t baseval = args->baseval;
581 spm_int_t nnz = args->nnz;
582 double alpha = args->alpha;
583 const spm_int_t *rowptr = args->rowptr;
584 const spm_int_t *colptr = args->colptr;
585 const double *values = args->values;
586 const spm_int_t *glob2loc = args->glob2loc;
587 const spm_int_t *dofs = args->dofs;
588 spm_int_t dof = args->dof;
589 const double *x = args->x;
590 spm_int_t incx = args->incx;
591 double *y = args->y;
592 spm_int_t incy = args->incy;
593 const __conj_fct_t conjA_fct = args->conjA_fct;
594 spm_int_t row, col, dofj, dofi;
595 spm_int_t i, ig, jg;
596
597 spm_int_t *dof_local = NULL;
598
599 assert( ((dof > 0) && (dofs == NULL)) ||
600 ((dof <= 0) && (dofs != NULL)) );
601
602 if( (dofs != NULL) && (glob2loc != NULL) ) {
603 dof_local = __spm_dmatvec_dofs_local( dofs, glob2loc, args->gN );
604 assert( dof_local != NULL );
605 }
606
607 if( args->follow_x ) {
608 for( i=0; i<nnz; i++, colptr++, rowptr++ )
609 {
610 ig = *rowptr - baseval;
611 jg = *colptr - baseval;
612
613 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
614 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
615
616 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
617 if ( glob2loc == NULL ) {
618 col = ( dof > 0 ) ? dof * jg : dofs[jg] - baseval;
619 }
620 else {
621 assert( glob2loc[jg] >= 0 );
622 col = ( dof > 0 ) ? dof * glob2loc[jg] : dof_local[jg];
623 }
624 __spm_dmatvec_dof_loop( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, alpha );
625 values += dofi*dofj;
626 }
627 }
628 else {
629 for( i=0; i<nnz; i++, colptr++, rowptr++ )
630 {
631 ig = *rowptr - baseval;
632 jg = *colptr - baseval;
633
634 dofj = ( dof > 0 ) ? dof : dofs[jg+1] - dofs[jg];
635 dofi = ( dof > 0 ) ? dof : dofs[ig+1] - dofs[ig];
636
637 col = ( dof > 0 ) ? dof * jg : dofs[jg] - baseval;
638 if ( glob2loc == NULL ) {
639 row = ( dof > 0 ) ? dof * ig : dofs[ig] - baseval;
640 }
641 else {
642 assert( glob2loc[ig] >= 0 );
643 row = ( dof > 0 ) ? dof * glob2loc[ig] : dof_local[ig];
644 }
645 __spm_dmatvec_dof_loop( row, dofi, col, dofj, y, incy, x, incx, values, conjA_fct, alpha );
646 values += dofi*dofj;
647 }
648 }
649
650 if(dof_local != NULL) {
651 free(dof_local);
652 }
653
654 return SPM_SUCCESS;
655}
656
657#if !defined(LAPACKE_WITH_LASCL)
658/**
659 *******************************************************************************
660 *
661 * @private
662 * @brief Scale a matrix A by the scalara alpha
663 *
664 *******************************************************************************
665 *
666 * @param[in] alpha
667 * TODO
668 *
669 * @param[in] m
670 * TODO
671 *
672 * @param[in] n
673 * TODO
674 *
675 * @param[in] A
676 * TODO
677 *
678 * @param[in] lda
679 * TODO
680 *
681 *******************************************************************************/
682static inline void
683__spm_dlascl( double alpha,
684 spm_int_t m,
685 spm_int_t n,
686 double *A,
687 spm_int_t lda )
688{
689 spm_int_t i, j;
690
691 for( j=0; j<n; j++ ) {
692 for( i=0; i<m; i++, A++ ) {
693 *A *= alpha;
694 }
695 A += m - lda;
696 }
697}
698
699/**
700 *******************************************************************************
701 *
702 * @brief Alias if Lapacke zlscl is not available
703 *
704 *******************************************************************************
705 *
706 * @param[in] \_dir\_
707 * Unused parameter
708 *
709 * @param[in] \_uplo\_
710 * Unused parameter
711 *
712 * @param[in] \_kl\_
713 * Unused parameter
714 *
715 * @param[in] \_ku\_
716 * Unused parameter
717 *
718 * @param[in] \_cfrom\_
719 * Unused parameter
720 *
721 * @param[in] \_cto\_
722 * The scaling factor of the matrix A
723 *
724 * @param[in] \_m\_
725 * The number of rows of the matrix A
726 *
727 * @param[in] \_n\_
728 * The number of columns of the matrix A
729 *
730 * @param[inout] \_A\_
731 * On entry the lda-by-n matrix to scale. On exit, the matrix is multiplied by cto.
732 *
733 * @param[in] \_lda\_
734 * The leading dimension of the matrix A. lda >= max(1, m)
735 *
736 *******************************************************************************/
737#define LAPACKE_dlascl_work( _dir_, _uplo_, _kl_, _ku_, _cfrom_, _cto_, _m_, _n_, _A_, _lda_ ) \
738 __spm_dlascl( (_cto_), (_m_), (_n_), (_A_), (_lda_) )
739
740#endif
741
742/**
743 *******************************************************************************
744 *
745 * @private
746 * @brief Initialized the matvec argument structure
747 *
748 *******************************************************************************
749 *
750 * @param[in] args
751 * TODO
752 *
753 * @param[in] side
754 * TODO
755 *
756 * @param[in] transA
757 * TODO
758 *
759 * @param[in] alpha
760 * TODO
761 *
762 * @param[in] A
763 * TODO
764 *
765 * @param[in] B
766 * TODO
767 *
768 * @param[in] ldb
769 * TODO
770 *
771 * @param[in] C
772 * TODO
773 *
774 * @param[in] ldc
775 * TODO
776 *
777 *******************************************************************************
778 *
779 * @retval TODO
780 *
781 *******************************************************************************/
782static inline int
783__spm_dmatvec_args_init( __spm_dmatvec_t *args,
784 spm_side_t side,
785 spm_trans_t transA,
786 int distribution,
787 double alpha,
788 const spmatrix_t *A,
789 const double *B,
790 spm_int_t ldb,
791 double *C,
792 spm_int_t ldc )
793{
794 spm_int_t incx, incy;
795
796 if ( side == SpmLeft ) {
797 incx = 1;
798 incy = 1;
799 }
800 else {
801 incx = ldb;
802 incy = ldc;
803 }
804
805 args->follow_x = 0;
806 args->baseval = A->baseval;
807 args->n = A->n;
808 args->nnz = A->nnz;
809 args->gN = A->gN;
810 args->alpha = alpha;
811 args->rowptr = A->rowptr;
812 args->colptr = A->colptr;
813 args->values = A->values;
814 args->loc2glob = A->loc2glob;
815 args->glob2loc = NULL;
816 args->dof = A->dof;
817 args->dofs = A->dofs;
818 args->x = B;
819 args->incx = incx;
820 args->y = C;
821 args->incy = incy;
822 args->conjA_fct = __fct_id;
823 args->conjAt_fct = __fct_id;
824
825#if defined(PRECISION_c) || defined(PRECISION_z)
826 if ( A->mtxtype != SpmSymmetric ) {
827 if ( transA == SpmTrans ) {
828 args->conjA_fct = __fct_conj;
829 args->conjAt_fct = __fct_conj;
830 }
831 }
832 else {
833 if ( transA == SpmTrans ) {
834 args->conjA_fct = __fct_conj;
835 args->conjAt_fct = __fct_id;
836 }
837 else {
838 args->conjA_fct = __fct_id;
839 args->conjAt_fct = __fct_conj;
840 }
841 }
842#endif
843
844 args->loop_fct = NULL;
845
846 switch( A->fmttype ) {
847 case SpmCSC:
848 {
849 /* Switch pointers and side to get the correct behaviour */
850 if( A->mtxtype == SpmGeneral ) {
851 if ( ((side == SpmLeft) && (transA == SpmNoTrans)) ||
852 ((side == SpmRight) && (transA != SpmNoTrans)) )
853 {
854 args->follow_x = 1;
855 }
856 else {
857 args->follow_x = 0;
858 }
859 }
860 args->loop_fct = (A->mtxtype == SpmGeneral) ? __spm_dmatvec_ge_csx : __spm_dmatvec_sy_csx;
861 }
862 break;
863 case SpmCSR:
864 {
865 /* Switch pointers and side to get the correct behaviour */
866 if( A->mtxtype == SpmGeneral ) {
867 if ( ((side == SpmLeft) && (transA != SpmNoTrans)) ||
868 ((side == SpmRight) && (transA == SpmNoTrans)) )
869 {
870 args->follow_x = 1;
871 }
872 else {
873 args->follow_x = 0;
874 }
875 }
876 else {
877 args->follow_x = 1;
878 }
879
880 args->colptr = A->rowptr;
881 args->rowptr = A->colptr;
882 args->loop_fct = (A->mtxtype == SpmGeneral) ? __spm_dmatvec_ge_csx : __spm_dmatvec_sy_csx;
883 }
884 break;
885 case SpmIJV:
886 {
887 if ( ((side == SpmLeft) && (transA != SpmNoTrans)) ||
888 ((side == SpmRight) && (transA == SpmNoTrans)) )
889 {
890 const __conj_fct_t tmp_fct = args->conjA_fct;
891 args->conjA_fct = args->conjAt_fct;
892 args->conjAt_fct = tmp_fct;
893 args->colptr = A->rowptr;
894 args->rowptr = A->colptr;
895 if ( distribution == SpmDistByColumn ) {
896 args->follow_x = 0;
897 }
898 else {
899 args->follow_x = 1;
900 }
901 } else {
902 if ( distribution == SpmDistByColumn ) {
903 args->follow_x = 1;
904 }
905 else {
906 args->follow_x = 0;
907 }
908 }
909 args->glob2loc = A->glob2loc;
910 args->loop_fct = (A->mtxtype == SpmGeneral) ? __spm_dmatvec_ge_ijv : __spm_dmatvec_sy_ijv;
911 }
912 break;
913 default:
915 }
916
917 return SPM_SUCCESS;
918}
919
920/**
921 *******************************************************************************
922 *
923 * @brief Build a global C RHS, set to 0 for remote datas.
924 *
925 *******************************************************************************
926 *
927 * @param[in] nrhs
928 * The number of RHS.
929 *
930 * @param[in] spm
931 * The pointer to the sparse matrix structure.
932 *
933 * @param[in] Cloc
934 * The local matrix of size ldcl -by- nrhs
935 *
936 * @param[in] ldcl
937 * The leading dimension of Cloc. ldcl >= max( 1, spm->nexp )
938 *
939 * @param[out] Cglb
940 * On exit, contains the centralized version of the Cloc matrix.
941 *
942 * @param[out] ldcg
943 * On exit, contains the leading dimension of the Cglb matrix.
944 * ldcg >= max(1, spm->gNexp )
945 *
946 *******************************************************************************/
947static inline void
949 const spmatrix_t *spm,
950 const double *Cloc,
951 spm_int_t ldcl,
952 double **Cglb,
953 spm_int_t *ldcg )
954{
955 double *C;
956 const double *Cptr;
957 spm_int_t i, j, idx, ldc;
958 spm_int_t ig, dof, baseval, *loc2glob;
959
960 C = calloc(spm->gNexp * nrhs, sizeof(double));
961 ldc = spm->gNexp;
962
963 baseval = spm->baseval;
964 for ( j=0; j<nrhs; j++ )
965 {
966 Cptr = Cloc + j * ldcl;
967 loc2glob = spm->loc2glob;
968 for ( i=0; i<spm->n; i++, loc2glob++ )
969 {
970 ig = *loc2glob - baseval;
971 dof = (spm->dof > 0) ? spm->dof : spm->dofs[ig+1] - spm->dofs[ig];
972 idx = (spm->dof > 0) ? spm->dof * ig : spm->dofs[ig] - baseval;
973 memcpy( (C + j * ldc + idx),
974 Cptr,
975 dof * sizeof(double) );
976 Cptr += dof;
977 }
978 }
979
980 *Cglb = C;
981 *ldcg = ldc;
982 return;
983}
984
985/**
986 *******************************************************************************
987 *
988 * @brief Build a global B vector by gathering datas from all nodes.
989 *
990 *******************************************************************************
991 *
992 * @param[in] spm
993 * The pointer to the sparse matrix structure.
994 *
995 * @param[in] nrhs
996 * The number of RHS.
997 *
998 * @param[in] Bloc
999 * The local B vector.
1000 *
1001 * @param[inout] ldbl
1002 * The leading dimension of the local B vector.
1003 *
1004 * @param[out] Bglb
1005 * The global B vector.
1006 *
1007 * @param[out] ldbg
1008 * The leading dimension of the global B vector.
1009 *
1010 *******************************************************************************/
1011static inline void
1013 const spmatrix_t *spm,
1014 const double *Bloc,
1015 spm_int_t ldbl,
1016 double **Bglb,
1017 spm_int_t *ldbg )
1018{
1019 *ldbg = spm->gNexp;
1020 *Bglb = malloc( *ldbg * nrhs * sizeof(double) );
1021 d_spmGatherRHS( nrhs, spm, Bloc, ldbl, -1, *Bglb, *ldbg );
1022 return;
1023}
1024
1025/**
1026 *@}
1027 */
1028
1029/**
1030 *******************************************************************************
1031 *
1032 * @brief Compute a matrix-matrix product.
1033 *
1034 * C = alpha * op(A) * op(B) + beta * C
1035 * or C = alpha * op(B) * op(A) + beta * C
1036 *
1037 * where A is a sparse matrix, B and C two dense matrices. And op(A), op(B) are one of:
1038 *
1039 * op( A ) = A or op( A ) = A' or op( A ) = conjg( A' )
1040 *
1041 * alpha and beta are scalars.
1042 *
1043 *******************************************************************************
1044 *
1045 * @param[in] side
1046 * Specifies whether A * B is computed or B * A
1047 * - SpmLeft: C = alpha * op(A) * op(B) + beta * C
1048 * - SpmRight: C = alpha * op(B) * op(A) + beta * C
1049 *
1050 * @param[in] transA
1051 * Specifies whether the sparse matrix A is not transposed, transposed
1052 * or conjugate transposed:
1053 * - SpmNoTrans
1054 * - SpmTrans
1055 * - SpmTrans
1056 *
1057 * @param[in] transB
1058 * Specifies whether the dense matrix B is not transposed, transposed
1059 * or conjugate transposed:
1060 * - SpmNoTrans
1061 * - SpmTrans
1062 * - SpmTrans
1063 *
1064 * @param[in] K
1065 * If side == SpmLeft, specifies the number of columns of the matrices op(B) and C.
1066 * If side == SpmRight, specifies the number of rows of the matrices op(B) and C.
1067 *
1068 * @param[in] alpha
1069 * alpha specifies the scalar alpha.
1070 *
1071 * @param[in] A
1072 * The sparse matrix A
1073 *
1074 * @param[in] B
1075 * The matrix B of size: ldb-by-Bn, with Bn = (K, A->m or A->n) based
1076 * on the configuration of side, transA and transB.
1077 *
1078 * @param[in] ldb
1079 * The leading dimension of the matrix B. ldb >= (A->m, A->n or K) based
1080 * on the configuration of side, transA, and transB
1081 *
1082 * @param[in] beta
1083 * beta specifies the scalar beta.
1084 *
1085 * @param[inout] C
1086 * The matrix C of size ldc-by-Cn with Bn = (K, A->m or A->n) based
1087 * on the configuration of side, transA and transB.
1088 *
1089 * @param[in] ldc
1090 * The leading dimension of the matrix C. ldc >= (A->m, A->n or K) based
1091 * on the configuration of side, transA, and transB
1092 *
1093 * side | transA | transB | B | C |
1094 * -----------------------------------------------------------
1095 * Left | NoTrans | NoTrans | A->n by K | A->m by K |
1096 * Left | NoTrans | [Conj]Trans | K by A->n | A->m by K |
1097 * Left | [Conj]Trans | NoTrans | A->m by K | A->n by K |
1098 * Left | [Conj]Trans | [Conj]Trans | K by A->m | A->n by K |
1099 * Right | NoTrans | NoTrans | K by A->m | K by A->n |
1100 * Right | NoTrans | [Conj]Trans | A->m by K | K by A->n |
1101 * Right | [Conj]Trans | NoTrans | K by A->n | K by A->m |
1102 * Right | [Conj]Trans | [Conj]Trans | A->n by K | K by A->m |
1103 *
1104 *******************************************************************************
1105 *
1106 * @retval SPM_SUCCESS if the y vector has been computed successfully,
1107 * @retval SPM_ERR_BADPARAMETER otherwise.
1108 *
1109 *******************************************************************************/
1110int
1112 spm_trans_t transA,
1113 spm_trans_t transB,
1114 spm_int_t K,
1115 double alpha,
1116 const spmatrix_t *A,
1117 const double *B,
1118 spm_int_t ldb,
1119 double beta,
1120 double *C,
1121 spm_int_t ldc )
1122{
1123 int rc = SPM_SUCCESS;
1124 int distribution;
1125 spm_int_t M, N, r, ldctmp, ldbtmp;
1126 __spm_dmatvec_t args;
1127 double *Ctmp, *Btmp;
1128
1129 if ( transB != SpmNoTrans ) {
1130 fprintf(stderr, "transB != SpmNoTrans not supported yet in spmv computations\n");
1131 assert( transB == SpmNoTrans );
1132 return SPM_ERR_BADPARAMETER;
1133 }
1134
1135 if ( side == SpmLeft ) {
1136 M = A->nexp;
1137 N = K;
1138 }
1139 else {
1140 M = K;
1141 N = A->nexp;
1142 }
1143
1144 if ( beta == 0. ) {
1145 rc = LAPACKE_dlaset_work( LAPACK_COL_MAJOR, 'A', M, N, 0., 0., C, ldc );
1146 }
1147 else {
1148 rc = LAPACKE_dlascl_work( LAPACK_COL_MAJOR, 'G', -1, -1, 1., beta, M, N, C, ldc );
1149 }
1150 if ( rc != 0 ) {
1151 return SPM_ERR_INTERNAL;
1152 }
1153
1154 if ( alpha == 0. ) {
1155 return SPM_SUCCESS;
1156 }
1157
1158 Btmp = (double*)B;
1159 Ctmp = C;
1160 ldbtmp = ldb;
1161 ldctmp = ldc;
1162
1163 distribution = spm_get_distribution(A);
1164 if ( distribution != ( SpmDistByColumn | SpmDistByRow ) )
1165 {
1166 if ( A->mtxtype != SpmGeneral ) {
1167 d_spmm_build_Btmp( N, A, B, ldb, &Btmp, &ldbtmp );
1168 d_spmm_build_Ctmp( N, A, C, ldc, &Ctmp, &ldctmp );
1169 }
1170 else {
1171 if( ( (transA != SpmNoTrans) && (distribution == SpmDistByColumn) ) ||
1172 ( (transA == SpmNoTrans) && (distribution == SpmDistByRow) ) ) {
1173 d_spmm_build_Btmp( N, A, B, ldb, &Btmp, &ldbtmp );
1174 }
1175 if( ( (transA == SpmNoTrans) && (distribution == SpmDistByColumn) ) ||
1176 ( (transA != SpmNoTrans) && (distribution == SpmDistByRow) ) ) {
1177 d_spmm_build_Ctmp( N, A, C, ldc, &Ctmp, &ldctmp );
1178 }
1179 }
1180 }
1181
1182 __spm_dmatvec_args_init( &args, side, transA, distribution,
1183 alpha, A, Btmp, ldbtmp, Ctmp, ldctmp );
1184
1185 for( r=0; (r < N) && (rc == SPM_SUCCESS); r++ ) {
1186 args.x = Btmp + r * ldbtmp;
1187 args.y = Ctmp + r * ldctmp;
1188 rc = args.loop_fct( &args );
1189 }
1190
1191 if ( Ctmp != C ) {
1192 d_spmReduceRHS( N, A, Ctmp, ldctmp, C, ldc );
1193 free( Ctmp );
1194 }
1195
1196 if ( Btmp != B ) {
1197 free( Btmp );
1198 }
1199
1200 return rc;
1201}
1202
1203/**
1204 *******************************************************************************
1205 *
1206 * @brief compute the matrix-vector product:
1207 * \f[ y = alpha * A * x + beta * y \f]
1208 *
1209 * A is a SpmSymmetric spm, alpha and beta are scalars, and x and y are
1210 * vectors, and A a symm.
1211 *
1212 *******************************************************************************
1213 *
1214 * @param[in] trans
1215 * TODO
1216 *
1217 * @param[in] alpha
1218 * alpha specifies the scalar alpha
1219 *
1220 * @param[in] A
1221 * The SpmSymmetric spm.
1222 *
1223 * @param[in] x
1224 * The vector x.
1225 *
1226 * @param[in] incx
1227 * The vector x.
1228 *
1229 * @param[in] beta
1230 * beta specifies the scalar beta
1231 *
1232 * @param[inout] y
1233 * The vector y.
1234 *
1235 * @param[in] incy
1236 * The vector y.
1237 *
1238 *******************************************************************************
1239 *
1240 * @retval SPM_SUCCESS if the y vector has been computed succesfully,
1241 * @retval SPM_ERR_BADPARAMETER otherwise.
1242 *
1243 *******************************************************************************/
1244int
1246 double alpha,
1247 const spmatrix_t *A,
1248 const double *x,
1249 spm_int_t incx,
1250 double beta,
1251 double *y,
1252 spm_int_t incy )
1253{
1254 int rc = SPM_SUCCESS;
1255 int distribution;
1256 __spm_dmatvec_t args;
1257 double *ytmp, *xtmp;
1258 spm_int_t ldx, ldxtmp, ldy, ldytmp;
1259
1260 if ( beta == 0. ) {
1261 memset( y, 0, A->nexp * sizeof(double) );
1262 }
1263 else {
1264 cblas_dscal( A->nexp, (beta), y, incy );
1265 }
1266
1267 if ( alpha == 0. ) {
1268 return SPM_SUCCESS;
1269 }
1270
1271 assert( (incx == 1) && (incy == 1) );
1272 xtmp = (double*)x;
1273 ytmp = y;
1274 ldx = A->nexp * incx;
1275 ldy = A->nexp * incy;
1276 ldxtmp = ldx;
1277 ldytmp = ldy;
1278
1279 distribution = spm_get_distribution(A);
1280 if ( distribution != ( SpmDistByColumn | SpmDistByRow ) ){
1281
1282 if ( A->mtxtype != SpmGeneral ) {
1283 d_spmm_build_Btmp( 1, A, x, ldx, &xtmp, &ldxtmp );
1284 d_spmm_build_Ctmp( 1, A, y, ldy, &ytmp, &ldytmp );
1285 }
1286 else {
1287 if( ( (trans != SpmNoTrans) && (distribution == SpmDistByColumn) ) ||
1288 ( (trans == SpmNoTrans) && (distribution == SpmDistByRow) ) ) {
1289 d_spmm_build_Btmp( 1, A, x, ldx, &xtmp, &ldxtmp );
1290 }
1291 if( ( (trans == SpmNoTrans) && (distribution == SpmDistByColumn) ) ||
1292 ( (trans != SpmNoTrans) && (distribution == SpmDistByRow) ) ) {
1293 d_spmm_build_Ctmp( 1, A, y, ldy, &ytmp, &ldytmp );
1294 }
1295 }
1296 }
1297
1298 __spm_dmatvec_args_init( &args, SpmLeft, trans, distribution,
1299 alpha, A, xtmp, ldxtmp, ytmp, ldytmp );
1300 rc = args.loop_fct( &args );
1301
1302 if ( ytmp != y ) {
1303 d_spmReduceRHS( 1, A, ytmp, ldytmp, y, ldytmp );
1304 free( ytmp );
1305 }
1306
1307 if ( xtmp != x ) {
1308 free( xtmp );
1309 }
1310
1311 return rc;
1312}
double(* __conj_fct_t)(double)
Typedef to define the op function applied to the element (id or )
static void d_spmm_build_Btmp(int nrhs, const spmatrix_t *spm, const double *Bloc, spm_int_t ldbl, double **Bglb, spm_int_t *ldbg)
Build a global B vector by gathering datas from all nodes.
#define LAPACKE_dlascl_work(_dir_, _uplo_, _kl_, _ku_, _cfrom_, _cto_, _m_, _n_, _A_, _lda_)
Alias if Lapacke zlscl is not available.
int(* __loop_fct_t)(const __spm_dmatvec_t *)
Typedef to the main loop function performing the matvec operation.
struct __spm_dmatvec_s __spm_dmatvec_t
Typedef associated to structure.
static void d_spmm_build_Ctmp(int nrhs, const spmatrix_t *spm, const double *Cloc, spm_int_t ldcl, double **Cglb, spm_int_t *ldcg)
Build a global C RHS, set to 0 for remote datas.
static double __fct_id(double val)
Identity function.
#define SpmDistByRow
Definition const.h:43
#define SpmDistByColumn
Distribution of the matrix storage.
Definition const.h:42
enum spm_trans_e spm_trans_t
Transpostion.
enum spm_side_e spm_side_t
Side of the operation.
@ SPM_ERR_INTERNAL
Definition const.h:88
@ SPM_ERR_BADPARAMETER
Definition const.h:89
@ SPM_SUCCESS
Definition const.h:82
@ SpmNoTrans
Definition const.h:155
@ SpmTrans
Definition const.h:156
@ SpmGeneral
Definition const.h:165
@ SpmSymmetric
Definition const.h:166
@ SpmCSC
Definition const.h:73
@ SpmCSR
Definition const.h:74
@ SpmIJV
Definition const.h:75
@ SpmRight
Definition const.h:192
@ SpmLeft
Definition const.h:191
int spm_dspmm(spm_side_t side, spm_trans_t transA, spm_trans_t transB, spm_int_t K, double alpha, const spmatrix_t *A, const double *B, spm_int_t ldb, double beta, double *C, spm_int_t ldc)
Compute a matrix-matrix product.
int spm_dspmv(spm_trans_t trans, double alpha, const spmatrix_t *A, const double *x, spm_int_t incx, double beta, double *y, spm_int_t incy)
compute the matrix-vector product:
void d_spmReduceRHS(int nrhs, const spmatrix_t *spm, double *bglob, spm_int_t ldbg, double *bloc, spm_int_t ldbl)
Reduce all the global coefficients of a rhs and store the local ones.
Definition d_spm_rhs.c:109
void d_spmGatherRHS(int nrhs, const spmatrix_t *spm, const double *x, spm_int_t ldx, int root, double *gx, spm_int_t ldgx)
Gather all the global C coefficients and store the good ones in local.
Definition d_spm_rhs.c:165
spm_int_t * dofs
Definition spm.h:85
spm_int_t nexp
Definition spm.h:78
void * values
Definition spm.h:92
spm_int_t nnz
Definition spm.h:75
spm_int_t * rowptr
Definition spm.h:90
spm_fmttype_t fmttype
Definition spm.h:69
spm_int_t dof
Definition spm.h:82
spm_int_t n
Definition spm.h:73
spm_mtxtype_t mtxtype
Definition spm.h:65
spm_int_t * loc2glob
Definition spm.h:91
spm_int_t baseval
Definition spm.h:71
spm_int_t * glob2loc
Definition spm.h:94
spm_int_t * colptr
Definition spm.h:89
spm_int_t gNexp
Definition spm.h:77
spm_int_t gN
Definition spm.h:72
int spm_int_t
The main integer datatype used in spm arrays.
Definition datatypes.h:70
The sparse matrix data structure.
Definition spm.h:64
int spm_get_distribution(const spmatrix_t *spm)
Search the distribution pattern used in the spm structure.
Definition spm.c:2049