5#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6#define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
9#include <ginkgo/core/base/array.hpp>
10#include <ginkgo/core/base/index_set.hpp>
11#include <ginkgo/core/base/lin_op.hpp>
12#include <ginkgo/core/base/math.hpp>
13#include <ginkgo/core/matrix/permutation.hpp>
14#include <ginkgo/core/matrix/scaled_permutation.hpp>
21template <
typename ValueType>
24template <
typename ValueType>
27template <
typename ValueType,
typename IndexType>
30template <
typename ValueType,
typename IndexType>
33template <
typename ValueType,
typename IndexType>
36template <
typename ValueType,
typename IndexType>
39template <
typename ValueType,
typename IndexType>
42template <
typename ValueType,
typename IndexType>
45template <
typename ValueType,
typename IndexType>
48template <
typename ValueType,
typename IndexType>
51template <
typename IndexType>
58template <
typename ValueType = default_precision,
typename IndexType =
int32>
103template <
typename ValueType = default_precision,
typename IndexType =
int32>
105 public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
106#if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
107 public ConvertibleTo<Csr<next_precision<ValueType, 2>, IndexType>>,
109#if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
110 public ConvertibleTo<Csr<next_precision<ValueType, 3>, IndexType>>,
112 public ConvertibleTo<Dense<ValueType>>,
113 public ConvertibleTo<Coo<ValueType, IndexType>>,
114 public ConvertibleTo<Ell<ValueType, IndexType>>,
115 public ConvertibleTo<Fbcsr<ValueType, IndexType>>,
116 public ConvertibleTo<Hybrid<ValueType, IndexType>>,
117 public ConvertibleTo<Sellp<ValueType, IndexType>>,
118 public ConvertibleTo<SparsityCsr<ValueType, IndexType>>,
125 remove_complex<Csr<ValueType, IndexType>>>,
128 friend class Coo<ValueType, IndexType>;
129 friend class Dense<ValueType>;
131 friend class Ell<ValueType, IndexType>;
132 friend class Hybrid<ValueType, IndexType>;
133 friend class Sellp<ValueType, IndexType>;
135 friend class Fbcsr<ValueType, IndexType>;
136 friend class CsrBuilder<ValueType, IndexType>;
138 GKO_ASSERT_SUPPORTED_VALUE_AND_INDEX_TYPE;
161 using value_type = ValueType;
162 using index_type = IndexType;
177 friend class automatical;
218 virtual std::shared_ptr<strategy_type>
copy() = 0;
221 void set_name(std::string name) { name_ = name; }
243 auto host_mtx_exec = mtx_row_ptrs.
get_executor()->get_master();
245 const bool is_mtx_on_host{host_mtx_exec ==
247 const index_type* row_ptrs{};
248 if (is_mtx_on_host) {
251 row_ptrs_host = mtx_row_ptrs;
254 auto num_rows = mtx_row_ptrs.
get_size() - 1;
255 max_length_per_row_ = 0;
256 for (
size_type i = 0; i < num_rows; i++) {
257 max_length_per_row_ = std::max(max_length_per_row_,
258 row_ptrs[i + 1] - row_ptrs[i]);
262 int64_t
clac_size(
const int64_t nnz)
override {
return 0; }
264 index_type get_max_length_per_row() const noexcept
266 return max_length_per_row_;
269 std::shared_ptr<strategy_type>
copy()
override
271 return std::make_shared<classical>();
275 index_type max_length_per_row_;
294 int64_t
clac_size(
const int64_t nnz)
override {
return 0; }
296 std::shared_ptr<strategy_type>
copy()
override
298 return std::make_shared<merge_path>();
319 int64_t
clac_size(
const int64_t nnz)
override {
return 0; }
321 std::shared_ptr<strategy_type>
copy()
override
323 return std::make_shared<cusparse>();
343 int64_t
clac_size(
const int64_t nnz)
override {
return 0; }
345 std::shared_ptr<strategy_type>
copy()
override
347 return std::make_shared<sparselib>();
373 :
load_balance(exec->get_num_warps(), exec->get_warp_size())
382 :
load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
393 :
load_balance(exec->get_num_subgroups(), 32, false,
"intel")
408 bool cuda_strategy =
true,
409 std::string strategy_name =
"none")
412 warp_size_(warp_size),
413 cuda_strategy_(cuda_strategy),
414 strategy_name_(strategy_name)
423 auto host_srow_exec = mtx_srow->
get_executor()->get_master();
424 auto host_mtx_exec = mtx_row_ptrs.
get_executor()->get_master();
425 const bool is_srow_on_host{host_srow_exec ==
427 const bool is_mtx_on_host{host_mtx_exec ==
431 const index_type* row_ptrs{};
433 if (is_srow_on_host) {
436 srow_host = *mtx_srow;
439 if (is_mtx_on_host) {
442 row_ptrs_host = mtx_row_ptrs;
448 const auto num_rows = mtx_row_ptrs.
get_size() - 1;
449 const auto num_elems = row_ptrs[num_rows];
450 const auto bucket_divider =
451 num_elems > 0 ?
ceildiv(num_elems, warp_size_) : 1;
452 for (
size_type i = 0; i < num_rows; i++) {
456 if (bucket < nwarps) {
462 srow[i] += srow[i - 1];
464 if (!is_srow_on_host) {
465 *mtx_srow = srow_host;
472 if (warp_size_ > 0) {
474 if (nnz >=
static_cast<int64_t
>(2e8)) {
476 }
else if (nnz >=
static_cast<int64_t
>(2e7)) {
478 }
else if (nnz >=
static_cast<int64_t
>(2e6)) {
480 }
else if (nnz >=
static_cast<int64_t
>(2e5)) {
483 if (strategy_name_ ==
"intel") {
485 if (nnz >=
static_cast<int64_t
>(2e8)) {
487 }
else if (nnz >=
static_cast<int64_t
>(2e7)) {
491#if GINKGO_HIP_PLATFORM_HCC
492 if (!cuda_strategy_) {
494 if (nnz >=
static_cast<int64_t
>(1e7)) {
496 }
else if (nnz >=
static_cast<int64_t
>(1e6)) {
502 auto nwarps = nwarps_ * multiple;
509 std::shared_ptr<strategy_type>
copy()
override
511 return std::make_shared<load_balance>(
512 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
519 std::string strategy_name_;
526 const index_type nvidia_row_len_limit = 1024;
529 const index_type nvidia_nnz_limit{
static_cast<index_type
>(1e6)};
532 const index_type amd_row_len_limit = 768;
535 const index_type amd_nnz_limit{
static_cast<index_type
>(1e8)};
538 const index_type intel_row_len_limit = 25600;
541 const index_type intel_nnz_limit{
static_cast<index_type
>(3e8)};
561 :
automatical(exec->get_num_warps(), exec->get_warp_size())
570 :
automatical(exec->get_num_warps(), exec->get_warp_size(), false)
581 :
automatical(exec->get_num_subgroups(), 32, false,
"intel")
596 bool cuda_strategy =
true,
597 std::string strategy_name =
"none")
600 warp_size_(warp_size),
601 cuda_strategy_(cuda_strategy),
602 strategy_name_(strategy_name),
603 max_length_per_row_(0)
612 index_type nnz_limit = nvidia_nnz_limit;
613 index_type row_len_limit = nvidia_row_len_limit;
614 if (strategy_name_ ==
"intel") {
615 nnz_limit = intel_nnz_limit;
616 row_len_limit = intel_row_len_limit;
618#if GINKGO_HIP_PLATFORM_HCC
619 if (!cuda_strategy_) {
620 nnz_limit = amd_nnz_limit;
621 row_len_limit = amd_row_len_limit;
624 auto host_mtx_exec = mtx_row_ptrs.
get_executor()->get_master();
625 const bool is_mtx_on_host{host_mtx_exec ==
628 const index_type* row_ptrs{};
629 if (is_mtx_on_host) {
632 row_ptrs_host = mtx_row_ptrs;
635 const auto num_rows = mtx_row_ptrs.
get_size() - 1;
636 if (row_ptrs[num_rows] > nnz_limit) {
638 cuda_strategy_, strategy_name_);
639 if (is_mtx_on_host) {
640 actual_strategy.
process(mtx_row_ptrs, mtx_srow);
642 actual_strategy.
process(row_ptrs_host, mtx_srow);
644 this->set_name(actual_strategy.
get_name());
646 index_type maxnum = 0;
647 for (
size_type i = 0; i < num_rows; i++) {
648 maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
650 if (maxnum > row_len_limit) {
652 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
653 if (is_mtx_on_host) {
654 actual_strategy.
process(mtx_row_ptrs, mtx_srow);
656 actual_strategy.
process(row_ptrs_host, mtx_srow);
658 this->set_name(actual_strategy.
get_name());
661 if (is_mtx_on_host) {
662 actual_strategy.
process(mtx_row_ptrs, mtx_srow);
663 max_length_per_row_ =
664 actual_strategy.get_max_length_per_row();
666 actual_strategy.
process(row_ptrs_host, mtx_srow);
667 max_length_per_row_ =
668 actual_strategy.get_max_length_per_row();
670 this->set_name(actual_strategy.
get_name());
677 return std::make_shared<load_balance>(
678 nwarps_, warp_size_, cuda_strategy_, strategy_name_)
682 index_type get_max_length_per_row() const noexcept
684 return max_length_per_row_;
687 std::shared_ptr<strategy_type>
copy()
override
689 return std::make_shared<automatical>(
690 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
697 std::string strategy_name_;
698 index_type max_length_per_row_;
708#if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
720#if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
760 void read(
const mat_data& data)
override;
762 void read(
const device_mat_data& data)
override;
764 void read(device_mat_data&& data)
override;
766 void write(mat_data& data)
const override;
777 class multiply_reuse_info {
781 explicit multiply_reuse_info();
783 ~multiply_reuse_info();
785 multiply_reuse_info(
const multiply_reuse_info&) =
delete;
787 multiply_reuse_info(multiply_reuse_info&&)
noexcept;
789 multiply_reuse_info& operator=(
const multiply_reuse_info&) =
delete;
791 multiply_reuse_info& operator=(multiply_reuse_info&&)
noexcept;
804 explicit multiply_reuse_info(std::unique_ptr<lookup_data> data);
806 std::unique_ptr<lookup_data> internal;
845 class multiply_add_reuse_info {
849 explicit multiply_add_reuse_info();
851 ~multiply_add_reuse_info();
853 multiply_add_reuse_info(
const multiply_add_reuse_info&) =
delete;
855 multiply_add_reuse_info(multiply_add_reuse_info&&)
noexcept;
857 multiply_add_reuse_info& operator=(
const multiply_add_reuse_info&) =
860 multiply_add_reuse_info& operator=(multiply_add_reuse_info&&)
noexcept;
879 explicit multiply_add_reuse_info(std::unique_ptr<lookup_data> data);
881 std::unique_ptr<lookup_data> internal;
937 class scale_add_reuse_info {
941 explicit scale_add_reuse_info();
943 ~scale_add_reuse_info();
945 scale_add_reuse_info(
const scale_add_reuse_info&) =
delete;
947 scale_add_reuse_info(scale_add_reuse_info&&)
noexcept;
949 scale_add_reuse_info& operator=(
const scale_add_reuse_info&) =
delete;
951 scale_add_reuse_info& operator=(scale_add_reuse_info&&)
noexcept;
967 explicit scale_add_reuse_info(std::unique_ptr<lookup_data> data);
969 std::unique_ptr<lookup_data> internal;
1039 std::unique_ptr<Permutation<IndexType>> value_permutation;
1091 bool invert =
false)
const;
1138 bool invert =
false)
const;
1170 bool invert =
false)
const;
1172 std::unique_ptr<LinOp>
permute(
1175 std::unique_ptr<LinOp> inverse_permute(
1178 std::unique_ptr<LinOp> row_permute(
1181 std::unique_ptr<LinOp> column_permute(
1184 std::unique_ptr<LinOp> inverse_row_permute(
1187 std::unique_ptr<LinOp> inverse_column_permute(
1207 bool is_sorted_by_column_index()
const;
1225 return values_.get_const_data();
1256 return col_idxs_.get_const_data();
1275 return row_ptrs_.get_const_data();
1283 index_type*
get_srow() noexcept {
return srow_.get_data(); }
1294 return srow_.get_const_data();
1304 return srow_.get_size();
1314 return values_.get_size();
1333 strategy_ = std::move(strategy->copy());
1345 auto exec = this->get_executor();
1346 GKO_ASSERT_EQUAL_DIMENSIONS(alpha,
dim<2>(1, 1));
1358 auto exec = this->get_executor();
1359 GKO_ASSERT_EQUAL_DIMENSIONS(alpha,
dim<2>(1, 1));
1371 static std::unique_ptr<Csr>
create(std::shared_ptr<const Executor> exec,
1372 std::shared_ptr<strategy_type> strategy);
1386 std::shared_ptr<const Executor> exec,
const dim<2>& size = {},
1388 std::shared_ptr<strategy_type> strategy =
nullptr);
1410 std::shared_ptr<const Executor> exec,
const dim<2>& size,
1413 std::shared_ptr<strategy_type> strategy =
nullptr);
1419 template <
typename InputValueType,
typename InputColumnIndexType,
1420 typename InputRowPtrType>
1422 "explicitly construct the gko::array argument instead of passing "
1423 "initializer lists")
1426 std::initializer_list<InputValueType> values,
1427 std::initializer_list<InputColumnIndexType> col_idxs,
1428 std::initializer_list<InputRowPtrType> row_ptrs)
1451 std::shared_ptr<const Executor> exec,
const dim<2>& size,
1452 gko::detail::const_array_view<ValueType>&& values,
1453 gko::detail::const_array_view<IndexType>&& col_idxs,
1454 gko::detail::const_array_view<IndexType>&& row_ptrs,
1455 std::shared_ptr<strategy_type> strategy =
nullptr);
1485 const span& row_span,
const span& column_span)
const;
1512 Csr(std::shared_ptr<const Executor> exec,
const dim<2>& size = {},
1514 std::shared_ptr<strategy_type> strategy =
nullptr);
1516 Csr(std::shared_ptr<const Executor> exec,
const dim<2>& size,
1519 std::shared_ptr<strategy_type> strategy =
nullptr);
1521 void apply_impl(
const LinOp* b,
LinOp* x)
const override;
1523 void apply_impl(
const LinOp* alpha,
const LinOp* b,
const LinOp* beta,
1524 LinOp* x)
const override;
1527 static std::shared_ptr<strategy_type> make_default_strategy(
1528 std::shared_ptr<const Executor> exec)
1530 auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1531 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1532 auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1533 std::shared_ptr<strategy_type> new_strategy;
1535 new_strategy = std::make_shared<automatical>(cuda_exec);
1536 }
else if (hip_exec) {
1537 new_strategy = std::make_shared<automatical>(hip_exec);
1538 }
else if (dpcpp_exec) {
1539 new_strategy = std::make_shared<automatical>(dpcpp_exec);
1541 new_strategy = std::make_shared<classical>();
1543 return new_strategy;
1547 template <
typename CsrType>
1548 void convert_strategy_helper(CsrType* result)
const
1551 std::shared_ptr<typename CsrType::strategy_type> new_strat;
1553 new_strat = std::make_shared<typename CsrType::classical>();
1554 }
else if (
dynamic_cast<merge_path*
>(strat)) {
1555 new_strat = std::make_shared<typename CsrType::merge_path>();
1556 }
else if (
dynamic_cast<cusparse*
>(strat)) {
1557 new_strat = std::make_shared<typename CsrType::cusparse>();
1558 }
else if (
dynamic_cast<sparselib*
>(strat)) {
1559 new_strat = std::make_shared<typename CsrType::sparselib>();
1561 auto rexec = result->get_executor();
1563 std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1564 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1566 std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1571 std::make_shared<typename CsrType::load_balance>(
1574 new_strat = std::make_shared<typename CsrType::automatical>(
1577 }
else if (hip_exec) {
1580 std::make_shared<typename CsrType::load_balance>(
1583 new_strat = std::make_shared<typename CsrType::automatical>(
1586 }
else if (dpcpp_exec) {
1589 std::make_shared<typename CsrType::load_balance>(
1592 new_strat = std::make_shared<typename CsrType::automatical>(
1597 auto this_cuda_exec =
1598 std::dynamic_pointer_cast<const CudaExecutor>(
1599 this->get_executor());
1600 auto this_hip_exec =
1601 std::dynamic_pointer_cast<const HipExecutor>(
1602 this->get_executor());
1603 auto this_dpcpp_exec =
1604 std::dynamic_pointer_cast<const DpcppExecutor>(
1605 this->get_executor());
1606 if (this_cuda_exec) {
1609 std::make_shared<typename CsrType::load_balance>(
1613 std::make_shared<typename CsrType::automatical>(
1616 }
else if (this_hip_exec) {
1619 std::make_shared<typename CsrType::load_balance>(
1623 std::make_shared<typename CsrType::automatical>(
1626 }
else if (this_dpcpp_exec) {
1629 std::make_shared<typename CsrType::load_balance>(
1633 std::make_shared<typename CsrType::automatical>(
1641 new_strat = std::make_shared<typename CsrType::classical>();
1645 result->set_strategy(new_strat);
1653 srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
1654 strategy_->process(row_ptrs_, &srow_);
1663 virtual void scale_impl(
const LinOp* alpha);
1671 virtual void inv_scale_impl(
const LinOp* alpha);
1674 std::shared_ptr<strategy_type> strategy_;
1675 array<value_type> values_;
1676 array<index_type> col_idxs_;
1677 array<index_type> row_ptrs_;
1678 array<index_type> srow_;
1680 void add_scaled_identity_impl(
const LinOp* a,
const LinOp* b)
override;
1693template <
typename ValueType,
typename IndexType>
1694void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1696 using load_balance =
typename Csr<ValueType, IndexType>::load_balance;
1697 using automatical =
typename Csr<ValueType, IndexType>::automatical;
1698 auto strategy = result->get_strategy();
1699 auto executor = result->get_executor();
1700 if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1702 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1703 result->set_strategy(std::make_shared<load_balance>(exec));
1704 }
else if (
auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1706 result->set_strategy(std::make_shared<load_balance>(exec));
1708 }
else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1710 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1711 result->set_strategy(std::make_shared<automatical>(exec));
1712 }
else if (
auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1714 result->set_strategy(std::make_shared<automatical>(exec));
This is the Executor subclass which represents the CUDA device.
Definition executor.hpp:1542
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition lin_op.hpp:794
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition lin_op.hpp:879
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition polymorphic_object.hpp:668
The first step in using the Ginkgo library consists of creating an executor.
Definition executor.hpp:615
Definition lin_op.hpp:117
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition executor.hpp:1387
Linear operators which support permutation should implement the Permutable interface.
Definition lin_op.hpp:484
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition lin_op.hpp:605
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition lin_op.hpp:818
Linear operators which support transposition should implement the Transposable interface.
Definition lin_op.hpp:433
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition lin_op.hpp:660
An array is a container which encapsulates fixed-sized arrays, stored on the Executor tied to the arr...
Definition array.hpp:166
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition array.hpp:687
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition array.hpp:703
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition array.hpp:696
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition array.hpp:670
This type is a device-side equivalent to matrix_data.
Definition device_matrix_data.hpp:36
An index set class represents an ordered set of intervals.
Definition index_set.hpp:56
COO stores a matrix in the coordinate matrix format.
Definition coo.hpp:65
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:687
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition csr.hpp:595
automatical()
Creates an automatical strategy.
Definition csr.hpp:550
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:675
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition csr.hpp:560
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:606
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition csr.hpp:580
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition csr.hpp:569
classical is a strategy_type which uses the same number of threads on each row.
Definition csr.hpp:233
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:240
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:269
classical()
Creates a classical strategy.
Definition csr.hpp:238
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:262
cusparse is a strategy_type which uses the sparselib csr.
Definition csr.hpp:308
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:319
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:321
cusparse()
Creates a cusparse strategy.
Definition csr.hpp:313
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:315
load_balance is a strategy_type which uses the load balance algorithm.
Definition csr.hpp:354
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:417
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:509
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition csr.hpp:381
load_balance()
Creates a load_balance strategy.
Definition csr.hpp:362
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:470
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition csr.hpp:407
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition csr.hpp:372
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition csr.hpp:392
merge_path is a strategy_type which uses the merge_path algorithm.
Definition csr.hpp:283
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:294
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:296
merge_path()
Creates a merge_path strategy.
Definition csr.hpp:288
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:290
void update_values(ptr_param< const Csr > mtx, ptr_param< const Dense< value_type > > scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type > > scale_add, ptr_param< const Csr > mtx_add, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix product out = scale_mult * mtx * mtx_mult + scale_add * mtx_add w...
void update_values(ptr_param< const Csr > mtx1, ptr_param< const Csr > mtx2, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix product out = mtx1 * mtx2 when only the values of mtx1 and mtx2 c...
void update_values(ptr_param< const Dense< value_type > > scale1, ptr_param< const Csr > mtx1, ptr_param< const Dense< value_type > > scale2, ptr_param< const Csr > mtx2, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix sum out = scale1 * mtx1 + scale2 * mtx2 when only the values of m...
sparselib is a strategy_type which uses the sparselib csr.
Definition csr.hpp:332
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:343
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:339
sparselib()
Creates a sparselib strategy.
Definition csr.hpp:337
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:345
strategy_type is to decide how to set the csr algorithm.
Definition csr.hpp:176
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
std::string get_name()
Returns the name of strategy.
Definition csr.hpp:194
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
strategy_type(std::string name)
Creates a strategy_type.
Definition csr.hpp:185
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition csr.hpp:126
std::pair< std::unique_ptr< Csr >, multiply_add_reuse_info > multiply_add_reuse(ptr_param< const Dense< value_type > > scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type > > scale_add, ptr_param< const Csr > mtx_add) const
Computes the sparse matrix product scale_mult * this * mtx_mult + scale_add * mtx_add on the executor...
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
std::pair< std::unique_ptr< Csr >, multiply_reuse_info > multiply_reuse(ptr_param< const Csr > other) const
Computes the sparse matrix product this * other on the executor of this matrix, and necessary data fo...
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:1273
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const span &row_span, const span &column_span) const
Creates a submatrix from this Csr matrix given row and column spans.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type num_nonzeros={}, std::shared_ptr< strategy_type > strategy=nullptr)
Creates an uninitialized CSR matrix of the specified size.
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition csr.hpp:1292
std::unique_ptr< Csr > multiply_add(ptr_param< const Dense< value_type > > scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type > > scale_add, ptr_param< const Csr > mtx_add) const
Computes the sparse matrix product scale_mult * this * mtx_mult + scale_add * mtx_add on the executor...
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition csr.hpp:1331
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition csr.hpp:1356
index_type * get_srow() noexcept
Returns the starting rows.
Definition csr.hpp:1283
std::unique_ptr< Csr > scale_add(ptr_param< const Dense< value_type > > scale_this, ptr_param< const Dense< value_type > > scale_other, ptr_param< const Csr > mtx_other) const
Computes the sparse matrix sum scale_this * this + scale_other * mtx_add on the executor of this matr...
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps).
Definition csr.hpp:1302
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size, array< value_type > values, array< index_type > col_idxs, array< index_type > row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a CSR matrix from already allocated (and initialized) row pointer, column index and value arr...
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:1264
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy of this matrix with the given permutation .
std::unique_ptr< const Dense< ValueType > > create_const_value_view() const
Creates a const Dense view of the value array of this matrix as a column vector of dimensions nnz x 1...
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
Csr(const Csr &)
Copy-constructs a Csr matrix.
Csr & operator=(Csr &&)
Move-assigns a Csr matrix.
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
std::unique_ptr< Csr > multiply(ptr_param< const Csr > other) const
Computes the sparse matrix product this * other on the executor of this matrix.
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition csr.hpp:1223
void compute_absolute_inplace() override
Compute absolute inplace on each element.
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition csr.hpp:1312
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition csr.hpp:1321
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:1254
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > transpose_reuse() const
Computes the necessary data to update a transposed matrix from its original matrix.
std::pair< std::unique_ptr< Csr >, scale_add_reuse_info > add_scale_reuse(ptr_param< const Dense< value_type > > scale_this, ptr_param< const Dense< value_type > > scale_other, ptr_param< const Csr > mtx_other) const
Computes the sparse matrix sum scale_this * this + scale_other * mtx_add on the executor of this matr...
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > row_permutation, ptr_param< const ScaledPermutation< value_type, index_type > > column_permutation, bool invert=false) const
Creates a scaled and permuted copy of this matrix.
std::unique_ptr< Dense< ValueType > > create_value_view()
Creates a Dense view of the value array of this matrix as a column vector of dimensions nnz x 1.
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition csr.hpp:1343
value_type * get_values() noexcept
Returns the values of the matrix.
Definition csr.hpp:1214
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:1245
Csr(Csr &&)
Move-constructs a Csr matrix.
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Creates a non-symmetrically permuted copy of this matrix with the given row and column permutations...
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
Dense is a matrix format which explicitly stores all values of the matrix.
Definition dense.hpp:120
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition diagonal.hpp:56
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition ell.hpp:66
Fixed-block compressed sparse row storage matrix format.
Definition fbcsr.hpp:116
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition hybrid.hpp:57
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition permutation.hpp:112
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition scaled_permutation.hpp:38
SELL-P is a matrix format similar to ELL format.
Definition sellp.hpp:58
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition sparsity_csr.hpp:56
This class is used for function parameters in the place of raw pointers.
Definition utils_helper.hpp:41
The matrix namespace.
Definition dense_cache.hpp:24
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition permutation.hpp:42
@ symmetric
The rows and columns will be permuted.
Definition permutation.hpp:53
The Ginkgo namespace.
Definition abstract_factory.hpp:20
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:264
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition math.hpp:283
virtual void move_to(result_type *result)=0
Converts the implementer to an object of type result_type by moving data from this object.
virtual void convert_to(result_type *result) const =0
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition math.hpp:614
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:90
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition math.hpp:750
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:160
typename detail::find_precision_impl< T, -step >::type previous_precision
Obtains the previous move type of T in the singly-linked precision corresponding bfloat16/half.
Definition math.hpp:473
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition temporary_clone.hpp:208
typename detail::find_precision_impl< T, step >::type next_precision
Obtains the next move type of T in the singly-linked precision corresponding bfloat16/half.
Definition math.hpp:466
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Writes a matrix into an output stream in matrix market format.
Definition mtx_io.hpp:299
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:26
permuting_reuse_info()
Creates an empty reuse info.
void update_values(ptr_param< const Csr > input, ptr_param< Csr > output) const
Propagates the values from an input matrix to the transformed matrix.
permuting_reuse_info(std::unique_ptr< Permutation< index_type > > value_permutation)
Creates a reuse info structure from its value permutation.
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:126
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition range.hpp:46