#include "coloquinte/solvers.hxx" #include #include namespace coloquinte{ namespace gp{ linear_system linear_system::operator+(linear_system const & o) const{ if(o.internal_size() != internal_size()){ throw std::runtime_error("Mismatched system sizes"); } linear_system ret(target_.size() + o.target_.size() - internal_size(), internal_size()); ret.matrix_ = matrix_; std::vector omatrix = o.matrix_; for(matrix_triplet & t : omatrix){ if(t.c_ >= internal_size()){ t.c_ += (target_.size() - internal_size()); } if(t.r_ >= internal_size()){ t.r_ += (target_.size() - internal_size()); } } ret.matrix_.insert(ret.matrix_.end(), omatrix.begin(), omatrix.end()); // ret.target_.resize(target_.size() + o.target_.size() - internal_size); for(index_t i=0; i row_limits, col_indexes; std::vector values, diag; std::vector mul(std::vector const & x) const; std::vector solve_CG(std::vector const & goal, std::vector guess, std::uint32_t min_iter, std::uint32_t max_iter, float tol) const; csr_matrix(std::vector const & row_l, std::vector const & col_i, std::vector const & vals, std::vector const D) : row_limits(row_l), col_indexes(col_i), values(vals), diag(D){ assert(values.size() == col_indexes.size()); assert(diag.size()+1 == row_limits.size()); } }; // A matrix with successive rows padded to the same length and accessed column-major; hopefully a little better template struct ellpack_matrix{ std::vector row_limits, col_indexes; std::vector values, diag; std::vector mul(std::vector const & x) const; std::vector solve_CG(std::vector goal, std::vector guess, std::uint32_t min_iter, std::uint32_t max_iter, float tol) const; ellpack_matrix(std::vector const & row_l, std::vector const & col_i, std::vector const & vals, std::vector const D) : row_limits(row_l), col_indexes(col_i), values(vals), diag(D){ assert(values.size() == col_indexes.size()); assert(diag.size() % unroll_len == 0); assert((row_limits.size()-1) * unroll_len == diag.size() ); assert(row_limits.back() * unroll_len == values.size()); assert(values.size() % unroll_len == 0); assert(col_indexes.size() % unroll_len == 0); } }; // The proxy matrix for compressed sparse storage class doublet_matrix{ std::vector row_limits; std::vector doublets; std::uint32_t size; void get_compressed(std::vector & limits, std::vector & elements, std::vector & diag) const; public: doublet_matrix(std::vector const & triplets, std::uint32_t size); csr_matrix get_compressed_matrix() const; template ellpack_matrix get_ellpack_matrix() const; }; doublet_matrix::doublet_matrix(std::vector const & triplets, std::uint32_t n) : size(n){ row_limits.resize(size+1, 0); // First store the row sizes in the array for(uint32_t i=0; i & sizes, std::vector & elements, std::vector & diag) const{ assert(size+1 == row_limits.size()); sizes.resize(size); diag.resize(size, 0.0); std::vector tmp_doublets = doublets; for(uint32_t i=0; i tmp_doublets; std::vector sizes; std::vector diag; get_compressed(sizes, tmp_doublets, diag); // Get the limits of each row std::vector new_row_limits(row_limits.size()); new_row_limits[0] = 0; for(std::uint32_t i=0; i col_indices(tmp_doublets.size()); std::vector values(tmp_doublets.size()); for(std::uint32_t i=0; i ellpack_matrix doublet_matrix::get_ellpack_matrix() const{ std::vector tmp_doublets; std::vector sizes; std::vector diag; get_compressed(sizes, tmp_doublets, diag); std::uint32_t unrolled_size = (diag.size() % unroll_len == 0)? diag.size()/unroll_len : diag.size() / unroll_len + 1; sizes.resize(unroll_len * unrolled_size, 0); diag.resize(unroll_len * unrolled_size, 1.0); // Store the maximum size of a group of rows std::vector new_row_limits(unrolled_size+1); new_row_limits[0] = 0; for(std::uint32_t i=0; i col_indices(unroll_len * new_row_limits.back()); std::vector values(unroll_len * new_row_limits.back()); std::uint32_t d = 0; for(std::uint32_t i=0; i(new_row_limits, col_indices, values, diag); } std::vector csr_matrix::mul(std::vector const & x) const{ std::vector res(x.size()); assert(x.size() == diag.size()); for(std::uint32_t i=0; i std::vector ellpack_matrix::mul(std::vector const & x) const{ std::vector res(x.size()); assert(x.size() % unroll_len == 0); assert(x.size() == diag.size()); for(std::uint32_t i=0; i+1 float dot_prod(std::vector const & a, std::vector const & b){ assert(a.size() == b.size()); float vals[unroll_len]; for(int j=0; j csr_matrix::solve_CG(std::vector const & goal, std::vector x, std::uint32_t min_iter, std::uint32_t max_iter, float tol_ratio) const{ std::uint32_t n = diag.size(); assert(goal.size() == n); assert(x.size() == n); std::vector r, p(n), z(n), mul_res, preconditioner(n); r = mul(x); for(uint32_t i=0; i(r, z); assert(std::isfinite(cross_norm)); float_t const epsilon = std::numeric_limits::min(); float start_norm = cross_norm; for(uint32_t k=0; k < max_iter; ++k){ mul_res = mul(p); float_t pr_prod = dot_prod<16>(p, mul_res); float_t alpha = cross_norm / pr_prod; if( not std::isfinite(cross_norm) or not std::isfinite(alpha) or not std::isfinite(pr_prod) or cross_norm <= epsilon or alpha <= epsilon or pr_prod <= epsilon ){ break; } // Update the result for(uint32_t i=0; i(r, z); // Update the scaled residual and the search direction if(k >= min_iter && new_cross_norm <= tol_ratio * start_norm){ break; } float beta = new_cross_norm / cross_norm; cross_norm = new_cross_norm; for(uint32_t i=0; i std::vector ellpack_matrix::solve_CG(std::vector goal, std::vector x, std::uint32_t min_iter, std::uint32_t max_iter, float tol_ratio) const{ std::uint32_t n = diag.size(); std::uint32_t old_n = x.size(); assert(goal.size() == x.size()); x.resize(diag.size(), 0.0); goal.resize(diag.size(), 0.0); std::vector r, p(n), z(n), mul_res, preconditioner(n); r = mul(x); for(uint32_t i=0; i(r, z); float start_norm = cross_norm; for(uint32_t k=0; k < max_iter; ++k){ mul_res = mul(p); float alpha = cross_norm / dot_prod(p, mul_res); // Update the result for(uint32_t i=0; i(r, z); // Update the scaled residual and the search direction if(k >= min_iter && new_cross_norm <= tol_ratio * start_norm){ break; } float beta = new_cross_norm / cross_norm; cross_norm = new_cross_norm; for(uint32_t i=0; i linear_system::solve_CG(std::vector guess, index_t nbr_iter){ doublet_matrix tmp(matrix_, size()); csr_matrix mat = tmp.get_compressed_matrix(); //ellpack_matrix<16> mat = tmp.get_ellpack_matrix<16>(); guess.resize(target_.size(), 0.0); auto ret = mat.solve_CG(target_, guess, nbr_iter, nbr_iter, 0.0); ret.resize(internal_size()); return ret; } } }