38 #ifndef __TENSOR2_HPP__
39 # define __TENSOR2_HPP__
45 #include <boost/shared_ptr.hpp>
46 #include <boost/multi_array/extent_gen.hpp>
47 #include <boost/multi_array/index_gen.hpp>
48 #include <cuv/tools/cuv_general.hpp>
49 #include <cuv/tools/meta_programming.hpp>
50 #include "reference.hpp"
52 namespace boost {
namespace serialization {
114 using boost::detail::multi_array::extent_gen;
115 using boost::detail::multi_array::index_gen;
125 typedef boost::detail::multi_array::index_range<boost::detail::multi_array::index,boost::detail::multi_array::size_type>
index_range;
130 #ifndef CUV_DONT_CREATE_EXTENTS_OBJ
142 extent_gen<0> extents;
159 index_gen<0,0> indices;
164 template<
class V,
class M,
class L>
class tensor;
165 template<
class V,
class M>
class linear_memory;
168 template<
class V,
class M,
class L,
class S>
169 void fill(tensor<V, M, L>& v,
const V& p);
178 template<
class V,
class M>
192 friend class boost::serialization::access;
230 m_allocator.dealloc(&this->
m_ptr);
238 template<
class V,
class M>
252 friend class boost::serialization::access;
284 assert(this->
m_ptr == NULL);
294 m_allocator.dealloc(&this->
m_ptr);
393 for (
int i = shape.
size()-1; i >= 0; --i)
395 strides[i] = (shape[i] == 1) ? 0 : size;
404 for (
unsigned int i = 0; i < shape.
size(); ++i)
406 strides[i] = (shape[i] == 1) ? 0 : size;
417 throw std::runtime_error(
"reverse of dev linear memory not implemented");
420 if (__first == __last || __first == --__last)
424 std::iter_swap(__first, __last);
437 inline bool is_c_contiguous(row_major,
const linear_memory<unsigned int,host_memory_space>& shape,
const linear_memory<int,host_memory_space>& stride){
438 bool c_contiguous =
true;
440 for (
int i = shape.size()-1; (i >= 0) && c_contiguous; --i)
444 if (stride[i] != size)
445 c_contiguous =
false;
446 size = size * shape[i];
454 inline bool is_c_contiguous(column_major,
const linear_memory<unsigned int,host_memory_space>& shape,
const linear_memory<int,host_memory_space>& stride){
455 bool c_contiguous =
true;
457 for (
unsigned int i = 0; i<shape.size() && c_contiguous; ++i)
461 if (stride[i] != size)
462 c_contiguous =
false;
463 size = size * shape[i];
469 inline bool is_2dcopyable(row_major,
const linear_memory<unsigned int,host_memory_space>& shape,
const linear_memory<int,host_memory_space>& stride){
470 bool c_contiguous = shape.size()>1;
471 int pitched_dim = shape.size()-1;
472 while(shape[pitched_dim]==1)
475 for (
int i = shape.size()-1; (i >= 0) && c_contiguous; --i)
479 }
else if(i == pitched_dim){
481 }
else if(stride[i] != size) {
482 c_contiguous =
false;
491 inline bool is_2dcopyable(column_major,
const linear_memory<unsigned int,host_memory_space>& shape,
const linear_memory<int,host_memory_space>& stride){
492 bool c_contiguous = shape.size()>1;
493 unsigned int pitched_dim = 0;
494 while(shape[pitched_dim]==1)
497 for (
unsigned int i = 0; (i < shape.size()) && c_contiguous; ++i)
501 }
else if(i == pitched_dim){
503 }
else if(stride[i] != size) {
504 c_contiguous =
false;
518 template<
class index_type,
class size_type>
519 void get_pitched_params(size_type& rows, size_type& cols, size_type& pitch,
520 const linear_memory<size_type,host_memory_space>& shape,
521 const linear_memory<index_type,host_memory_space>& stride,
524 rows = std::accumulate(shape[0].ptr,
525 shape[0].ptr+shape.size()-1,
526 1, std::multiplies<index_type>());
527 cols = shape[shape.size()-1];
528 pitch = stride[shape.size()-2];
533 template<
class index_type,
class size_type>
534 void get_pitched_params(size_type& rows, size_type& cols, size_type& pitch,
535 const linear_memory<size_type,host_memory_space>& shape,
536 const linear_memory<index_type,host_memory_space>& stride,
539 rows = std::accumulate(shape[0].ptr+1,
540 shape[0].ptr+shape.size(),
541 1, std::multiplies<index_type>());
555 template<
class V,
class M>
569 friend class boost::serialization::access;
602 :m_rows(i),m_cols(j),m_pitch(0){
alloc();}
608 assert(this->
m_ptr == NULL);
609 m_allocator.alloc2d(&this->
m_ptr,m_pitch,m_rows,m_cols);
620 m_allocator.dealloc(&this->
m_ptr);
658 if(
this==&o)
return *
this;
660 if( m_pitch < o.m_cols
688 if( m_pitch < o.m_cols
765 assert(shape.
size()>=2);
766 const int pitched_dim = shape.
size()-1;
767 for (
int i = shape.
size()-1; i >= 0; --i)
771 }
else if(i == pitched_dim){
789 assert(shape.
size()>=2);
791 for (
unsigned int i = 0; i < shape.
size(); ++i)
795 }
else if(i == pitched_dim){
809 template<
class M,
class L>
860 template<
class V,
class M,
class L>
862 template<
class V,
class M,
class L>
870 template<
class V,
class M,
class L=row_major>
898 template <
class _V,
class _M,
class _L>
916 for(
int i=0; i<D; i++){
963 template<std::
size_t D>
967 for(
int i=0; i<D; i++){
1035 return std::vector<size_type>();
1045 std::vector<size_type>
shape;
1046 shape.reserve(
ndim());
1049 std::remove_copy_if(
1052 std::back_inserter(shape),
1053 std::bind2nd(std::equal_to<size_type>(),1));
1090 for(
int i=ndim-1;i>=0;--i){
1091 virtualstride[i] = virt_size;
1097 idx -= (idx/virtualstride[i])*virtualstride[i];
1102 for(
unsigned int i=0;i<
ndim;++i){
1103 virtualstride[i] = virt_size;
1107 for(
int i=ndim-1; i>=0; --i){
1109 idx -= (idx/virtualstride[i])*virtualstride[i];
1112 delete[] virtualstride;
1118 return const_cast<tensor&
>(*this)[idx];
1326 template<std::
size_t D>
1331 for(std::size_t i=0;i<D;i++)
1341 explicit tensor(
const std::vector<size_type>& eg)
1345 for(std::size_t i=0;i<eg.size();i++)
1359 for(std::size_t i=0;i<eg.size();i++)
1367 template<std::
size_t D>
1372 for(std::size_t i=0;i<D;i++)
1386 template<std::
size_t D>
1393 for(
int i=D-1;i>=0;i--){
1396 size *= eg.ranges_[i].finish();
1399 for(std::size_t i=0;i<D;i++){
1402 size *= eg.ranges_[i].finish();
1408 unsigned int D = shape.size();
1412 for(
int i=D-1;i>=0;i--){
1418 for(std::size_t i=0;i<D;i++){
1430 template<
int D,
int E>
1437 for(
int i=D-1;i>=0;i--){
1440 size *= idx.ranges_[i].finish();
1443 for(std::size_t i=0;i<D;i++){
1446 size *= idx.ranges_[i].finish();
1463 template<
class _M,
class _L>
1475 if(
this==&o)
return *
this;
1490 typename boost::enable_if_c<boost::is_convertible<_V,value_type>::value,
tensor&>::type
1492 fill(*
this, scalar);
1579 template<
int D,
int E>
1588 std::vector<int> shapes;
1589 std::vector<int> strides;
1590 shapes.reserve(o.
ndim());
1591 strides.reserve(o.
ndim());
1592 for(std::size_t i=0;i<D;i++){
1593 int start = idx.ranges_[i].get_start(0);
1594 int finish = idx.ranges_[i].get_finish(o.
shape(i));
1595 int stride = idx.ranges_[i].stride();
1596 if (start <0) start += o.
shape(i);
1597 if (finish<0) finish += o.
shape(i);
1602 if(idx.ranges_[i].is_degenerate()){
1605 shapes.push_back((finish-start)/stride);
1612 for(
int i = D; i < o.
ndim();i++){
1613 shapes.push_back(o.
shape(i));
1614 strides.push_back(o.
stride(i));
1631 template<std::
size_t D>
1633 std::vector<size_type>
shape(D);
1634 for(std::size_t i=0;i<D;i++)
1635 shape[i] = eg.ranges_[i].finish();
1645 void reshape(
const std::vector<size_type>& shape){
1646 size_type new_size = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_type>());
1648 throw std::runtime_error(
"cannot reshape: tensor is not c_contiguous");
1649 if(
size() != new_size)
1650 throw std::runtime_error(
"cannot reshape: products do not match");
1654 for(
int i=shape.size()-1;i>=0;i--){
1660 for(std::size_t i=0;i<shape.size();i++){
1680 void resize(
const std::vector<size_type>& shape){
1682 size_type new_size = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_type>());
1698 template<std::
size_t D>
1700 std::vector<size_type>
shape(D);
1701 for(std::size_t i=0;i<D;i++)
1702 shape[i] = eg.ranges_[i].finish();
1720 template<
class V,
class M,
class L=row_major>
1738 throw std::runtime_error(
"copying tensor to tensor_view did not succeed. Maybe a shape mismatch?");
1746 throw std::runtime_error(
"copying tensor to tensor_view did not succeed. Maybe a shape mismatch?");
1755 typename boost::enable_if_c<boost::is_convertible<_V,V>::value,
tensor_view&>::type
1768 throw std::runtime_error(
"copying tensor to tensor_view did not succeed. Maybe a shape mismatch?");
1780 throw std::runtime_error(
"copying tensor to tensor_view did not succeed. Maybe a shape mismatch?");
1816 template<
int D,
int E>
1821 std::vector<int> shapes;
1822 std::vector<int> strides;
1823 shapes.reserve(o.
ndim());
1824 strides.reserve(o.
ndim());
1825 for(std::size_t i=0;i<D;i++){
1826 int start = idx.ranges_[i].get_start(0);
1827 int finish = idx.ranges_[i].get_finish(o.
shape(i));
1828 int stride = idx.ranges_[i].stride();
1829 if (start <0) start += o.
shape(i);
1830 if (finish<0) finish += o.
shape(i);
1835 if(idx.ranges_[i].is_degenerate()){
1838 shapes.push_back((finish-start)/stride);
1844 for(
int i = D; i < o.
ndim();i++){
1845 shapes.push_back(o.
shape(i));
1846 strides.push_back(o.
stride(i));
1861 template<
int D,
int E>
1866 std::vector<int> shapes;
1867 std::vector<int> strides;
1868 shapes.reserve(o.
ndim());
1869 strides.reserve(o.
ndim());
1870 for(std::size_t i=0;i<D;i++){
1871 int start = idx.ranges_[i].get_start(0);
1872 int finish = idx.ranges_[i].get_finish(o.
shape(i));
1873 int stride = idx.ranges_[i].stride();
1874 if (start <0) start += o.
shape(i);
1875 if (finish<0) finish += o.
shape(i);
1880 if(idx.ranges_[i].is_degenerate()){
1883 shapes.push_back((finish-start)/stride);
1889 for(
int i = D; i < o.
ndim();i++){
1890 shapes.push_back(o.
shape(i));
1891 strides.push_back(o.
stride(i));
1903 template<
class V,
class M0,
class M1,
class L0,
class L1>
1912 size_type row,col,pitch;
1913 detail::get_pitched_params(row,col,pitch,src.
info().host_shape, src.
info().host_stride,L1());
1914 a.
copy2d(dst.
ptr(), src.
ptr(), col*
sizeof(V),pitch*
sizeof(V),row,col,M1());
1916 size_type row,col,pitch;
1917 detail::get_pitched_params(row,col,pitch,dst.
info().host_shape, dst.
info().host_stride,L0());
1918 a.
copy2d(dst.
ptr(), src.
ptr(), pitch*
sizeof(V),col*
sizeof(V),row,col,M1());
1920 size_type srow,scol,spitch;
1921 size_type drow,dcol,dpitch;
1922 detail::get_pitched_params(drow,dcol,dpitch,dst.
info().host_shape, dst.
info().host_stride,L0());
1923 detail::get_pitched_params(srow,scol,spitch,src.
info().host_shape, src.
info().host_stride,L1());
1926 a.
copy2d(dst.
ptr(), src.
ptr(), dpitch*
sizeof(V),spitch*
sizeof(V),srow,scol,M1());
1928 throw std::runtime_error(
"copying of generic strides not implemented yet");
1931 dst.
info().host_stride.reverse();
1932 dst.
info().host_shape.reverse();
1940 template<
class V,
class M0,
class M1,
class L0,
class L1>
1946 dst.
info().host_shape = src.
info().host_shape;
1956 size_type row,col,pitch;
1957 detail::get_pitched_params(row,col,pitch,src.
info().host_shape, src.
info().host_stride,L1());
1958 a.
copy2d(d.ptr(), src.
ptr(), col*
sizeof(V),pitch*
sizeof(V),row,col,M1());
1960 throw std::runtime_error(
"copying arbitrarily strided memory not implemented");
1964 dst.
info().host_stride.reverse();
1965 dst.
info().host_shape.reverse();
1970 template<
class V,
class M0,
class M1,
class L0,
class L1>
1973 assert(src.
ndim()>=2);
1977 dst.
info().host_shape = src.
info().host_shape;
1978 size_type row,col,pitch;
1979 detail::get_pitched_params(row,col,pitch,src.
info().host_shape, src.
info().host_stride,L1());
1982 d->set_strides(dst.
info().host_stride,dst.
info().host_shape, L0());
1986 detail::get_pitched_params(row,col,pitch,src.
info().host_shape, src.
info().host_stride,L1());
1987 a.
copy2d(d.ptr(), src.
m_ptr, d.pitch()*
sizeof(V),pitch*
sizeof(V),row,col,M1());
1989 throw std::runtime_error(
"copying arbitrarily strided memory not implemented");
1994 dst.
info().host_stride.reverse();
1995 dst.
info().host_shape.reverse();
2009 template<
class V,
class V2,
class M,
class M2,
class L>
2018 template<
class Mat,
class NewVT>
2023 template<
class Mat,
class NewML>
2028 template<
class Mat,
class NewMS>
2050 ostream& operator<<(ostream& o, const cuv::linear_memory<V, cuv::host_memory_space>& t){
2052 for(
unsigned int i=0;i<t.size();i++)
2063 ostream& operator<<(ostream& o, const cuv::linear_memory<V, cuv::dev_memory_space>& t_){
2066 for(
unsigned int i=0;i<t.
size();i++)
2078 ostream& operator<<(ostream& o, const cuv::pitched_memory<V, cuv::host_memory_space>& t){
2080 for(
unsigned int i=0;i<t.rows();i++){
2081 for(
unsigned int j=0;j<t.rows();j++){
2096 ostream& operator<<(ostream& o, const cuv::pitched_memory<V, cuv::dev_memory_space>& t_){
2099 for(
unsigned int i=0;i<t.
rows();i++){
2100 for(
unsigned int j=0;j<t.
rows();j++){
2116 template<
class V,
class L>
2117 ostream& operator<<(ostream& o, const cuv::tensor<V, cuv::dev_memory_space, L>& t){
2118 return o << cuv::tensor<V,cuv::host_memory_space,L>(t);
2126 template<
class V,
class L>
2127 ostream& operator<<(ostream& o, const cuv::tensor<V, cuv::host_memory_space, L>& t){
2133 for(
unsigned int i=0;i<t.shape(0);i++) o<< t[i]<<
" ";
2138 for(
unsigned int i=0;i<t.shape(0);++i){
2142 for(
unsigned int j=0;j<t.shape(1);j++) o<< t(i,j)<<
" ";
2144 if(i != t.shape(0)-1)
2151 for(
unsigned int l=0;l<t.shape(0);l++){
2153 for(
unsigned int i=0;i<t.shape(1);++i){
2158 for(
unsigned int j=0;j<t.shape(2);j++) o<< t[l*t.shape(1)*t.shape(2) + i*t.shape(2) + j]<<
" ";
2160 if(i != t.shape(1)-1)
2169 throw std::runtime_error(
"printing of tensors with >3 dimensions not implemented");