ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
viennacl/device_specific/templates/matrix_axpy_template.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_DEVICE_SPECIFIC_TEMPLATES_MATRIX_AXPY_HPP
00002 #define VIENNACL_DEVICE_SPECIFIC_TEMPLATES_MATRIX_AXPY_HPP
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2015, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00021 
00027 #include <vector>
00028 
00029 #include "viennacl/scheduler/forwards.h"
00030 
00031 #include "viennacl/device_specific/mapped_objects.hpp"
00032 #include "viennacl/device_specific/tree_parsing.hpp"
00033 #include "viennacl/device_specific/utils.hpp"
00034 
00035 #include "viennacl/device_specific/templates/template_base.hpp"
00036 
00037 #include "viennacl/tools/tools.hpp"
00038 
00039 namespace viennacl
00040 {
00041 namespace device_specific
00042 {
00043 
00044 class matrix_axpy_parameters_type : public template_base::parameters_type
00045 {
00046 public:
00047   matrix_axpy_parameters_type(unsigned int _simd_width,
00048                               unsigned int _local_size_0, unsigned int _local_size_1,
00049                               unsigned int _num_groups_0, unsigned int _num_groups_1,
00050                               fetching_policy_type _fetching_policy) : template_base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1), num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetching_policy(_fetching_policy){ }
00051 
00052   unsigned int num_groups_0;
00053   unsigned int num_groups_1;
00054   fetching_policy_type fetching_policy;
00055 };
00056 
00057 class matrix_axpy_template : public template_base_impl<matrix_axpy_template, matrix_axpy_parameters_type>
00058 {
00059 private:
00060   int check_invalid_impl(viennacl::ocl::device const & /*dev*/) const
00061   {
00062     if (p_.simd_width>1)
00063       return TEMPLATE_INVALID_SIMD_WIDTH;
00064     return TEMPLATE_VALID;
00065   }
00066 
00067   std::string generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
00068   {
00069     std::string process_str;
00070     utils::kernel_generation_stream stream;
00071 
00072     std::string init0, upper_bound0, inc0, init1, upper_bound1, inc1;
00073 
00074     stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
00075     generate_prototype(stream, kernel_prefix, "unsigned int M, unsigned int N,", mappings, statements);
00076     stream << "{" << std::endl;
00077     stream.inc_tab();
00078 
00079     tree_parsing::process(stream, PARENT_NODE_TYPE, "scalar", "#scalartype #namereg = *#pointer;", statements, mappings);
00080     tree_parsing::process(stream, PARENT_NODE_TYPE, "matrix", "#pointer += $OFFSET{#start1, #start2};", statements, mappings);
00081     tree_parsing::process(stream, PARENT_NODE_TYPE, "vector", "#pointer += #start;", statements, mappings);
00082 
00083     fetching_loop_info(p_.fetching_policy, "M", stream, init0, upper_bound0, inc0, "get_global_id(0)", "get_global_size(0)");
00084     stream << "for(unsigned int i = " << init0 << "; i < " << upper_bound0 << "; i += " << inc0 << ")" << std::endl;
00085     stream << "{" << std::endl;
00086     stream.inc_tab();
00087     fetching_loop_info(p_.fetching_policy, "N", stream, init1, upper_bound1, inc1, "get_global_id(1)", "get_global_size(1)");
00088     stream << "for(unsigned int j = " << init1 << "; j < " << upper_bound1 << "; j += " << inc1 << ")" << std::endl;
00089     stream << "{" << std::endl;
00090     stream.inc_tab();
00091 
00092     process_str = utils::append_width("#scalartype",simd_width) + " #namereg = " + vload(simd_width, "$OFFSET{i*#stride1,j*#stride2}", "#pointer")+ ";";
00093     tree_parsing::process(stream, PARENT_NODE_TYPE, "matrix", process_str, statements, mappings);
00094     tree_parsing::process(stream, PARENT_NODE_TYPE, "vector_diag", "#scalartype #namereg = ((i + ((#diag_offset<0)?#diag_offset:0))!=(j-((#diag_offset>0)?#diag_offset:0)))?0:#pointer[min(i*#stride, j*#stride)];", statements, mappings);
00095 
00096 
00097     std::map<std::string, std::string> accessors;
00098     accessors["matrix"] = "#namereg";
00099     accessors["vector_diag"] = "#namereg";
00100     accessors["scalar"] = "#namereg";
00101     tree_parsing::evaluate(stream, PARENT_NODE_TYPE, accessors, statements, mappings);
00102 
00103     process_str = vstore(simd_width, "#namereg", "$OFFSET{i*#stride1,j*#stride2}", "#pointer")+";";
00104     tree_parsing::process(stream, LHS_NODE_TYPE, "matrix", process_str, statements, mappings);
00105 
00106     stream.dec_tab();
00107     stream << "}" << std::endl;
00108     stream.dec_tab();
00109     stream << "}" << std::endl;
00110 
00111     stream.dec_tab();
00112     stream << "}" << std::endl;
00113 
00114     return stream.str();
00115   }
00116 
00117   std::vector<std::string> generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mappings) const
00118   {
00119     std::vector<std::string> res;
00120     res.push_back(generate_impl(kernel_prefix, statements, mappings, 1));
00121     return res;
00122   }
00123 
00124 public:
00125   matrix_axpy_template(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE) : template_base_impl<matrix_axpy_template, matrix_axpy_parameters_type>(parameters, binding_policy), up_to_internal_size_(false){ }
00126 
00127   void up_to_internal_size(bool v)
00128   {
00129     up_to_internal_size_ = v;
00130   }
00131 
00132   void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements)
00133   {
00134     viennacl::ocl::kernel & kernel = programs[0].program().get_kernel(kernel_prefix);
00135 
00136     kernel.local_work_size(0, p_.local_size_0);
00137     kernel.local_work_size(1, p_.local_size_1);
00138     kernel.global_work_size(0,p_.local_size_0*p_.num_groups_0);
00139     kernel.global_work_size(1,p_.local_size_1*p_.num_groups_1);
00140 
00141     scheduler::statement_node const & root = statements.data().front().array()[statements.data().front().root()];
00142     unsigned int current_arg = 0;
00143     if (up_to_internal_size_)
00144     {
00145       kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::internal_size1_fun())));
00146       kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::internal_size2_fun())));
00147     }
00148     else
00149     {
00150       kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::size1_fun())));
00151       kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::size2_fun())));
00152     }
00153 
00154     set_arguments(statements, kernel, current_arg);
00155 
00156     viennacl::ocl::enqueue(kernel);
00157   }
00158 
00159 
00160 private:
00161   bool up_to_internal_size_;
00162 };
00163 
00164 }
00165 }
00166 
00167 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines