last minute fix to get naked pointer iterators to work, and remove de…

…p to estd:: library for test/ progs
BaderLab · Jan 28, 2016 · bc00b12 · bc00b12
1 parent df353b4
commit bc00b12
Show file tree

Hide file tree

Showing 7 changed files with 110 additions and 107 deletions.
diff --git a/include/ecuda/algo/copy.hpp b/include/ecuda/algo/copy.hpp
@@ -667,7 +667,7 @@ __HOST__ __DEVICE__ inline OutputIterator copy(
 	{
 		// memory is now guaranteed to be regularly aligned so we can use cudaMemcpy2D
 		typedef typename ecuda::add_pointer<value_type>::type pointer;
-		pointer dest = naked_cast<pointer>( result.operator->() );
+		pointer dest = get_iterator_pointer( result );
 		typedef typename ecuda::add_pointer<const value_type>::type const_pointer;
 		const_pointer src = naked_cast<const_pointer>( first.operator->() );
 
@@ -805,6 +805,7 @@ __HOST__ __DEVICE__ inline OutputIterator copy( InputIterator first, InputIterat
 	return impl::copy( first, last, result, ecuda::pair<input_memory_type,output_memory_type>() );
 }
 
+
 } // namespace ecuda
 
 #endif
diff --git a/include/ecuda/algo/find_if.hpp b/include/ecuda/algo/find_if.hpp
@@ -52,6 +52,7 @@ namespace ecuda {
 /// \cond DEVELOPER_DOCUMENTATION
 namespace impl {
 
+ECUDA_SUPPRESS_HD_WARNINGS
 template<class InputIterator,class UnaryPredicate>
 __HOST__ __DEVICE__ InputIterator
 find_if( InputIterator first, InputIterator last, UnaryPredicate p, ecuda::true_type ) // device memory

diff --git a/include/ecuda/global.hpp b/include/ecuda/global.hpp
@@ -70,16 +70,17 @@ either expressed or implied, of the FreeBSD Project.
 #define ECUDA_CPP11_AVAILABLE
 #endif
 
+#ifdef __CUDACC__
+// Macro function currently throws an ecuda::cuda_error exception containing a
+// description of the problem error code.
+#define CUDA_CALL(x) do { if((x)!=cudaSuccess) { std::ostringstream oss; oss << __FILE__; oss << ":"; oss << __LINE__; oss << " "; oss << cudaGetErrorString(cudaGetLastError()); throw ::ecuda::cuda_error(x,oss.str()); /*std::runtime_error(oss.str());*/ }} while(0);
+#else
 ///
 /// Macro function that captures a CUDA error code and then does something
 /// with it.  All calls to functions in the CUDA API that return an error code
 /// should use this.
 ///
 #define CUDA_CALL(x) x // cannot do CUDA calls when emulating with host only
-#ifdef __CUDACC__
-// Macro function currently throws an ecuda::cuda_error exception containing a
-// description of the problem error code.
-#define CUDA_CALL(x) do { if((x)!=cudaSuccess) { std::ostringstream oss; oss << __FILE__; oss << ":"; oss << __LINE__; oss << " "; oss << cudaGetErrorString(cudaGetLastError()); throw ::ecuda::cuda_error(x,oss.str()); /*std::runtime_error(oss.str());*/ }} while(0);
 #endif
 
 #define S(x) #x
@@ -90,31 +91,33 @@ either expressed or implied, of the FreeBSD Project.
 ///
 #define EXCEPTION_MSG(x) "" __FILE__ ":" S__LINE__ " " x
 
+#ifdef __CUDACC__
+#define CUDA_CHECK_ERRORS() do { cudaError_t error = cudaGetLastError(); if( error != cudaSuccess ) throw ::ecuda::cuda_error(error,std::string(cudaGetErrorString(error))); } while(0);
+#else
 ///
 /// Macro that performs a check for any outstanding CUDA errors.  This macro
 /// should be declared after any CUDA API calls that do not return an error code
 /// (e.g. after calling kernel functions). Calling this when a CUDA API call
 /// has not been made is safe.
 ///
 #define CUDA_CHECK_ERRORS() do {} while(0); // cannot check CUDA errors when emulating with host only
-#ifdef __CUDACC__
-#define CUDA_CHECK_ERRORS() do { cudaError_t error = cudaGetLastError(); if( error != cudaSuccess ) throw ::ecuda::cuda_error(error,std::string(cudaGetErrorString(error))); } while(0);
 #endif
 
-///
-/// Macro that calls a CUDA kernel function, waits for completion, and throws
-/// an ecuda::cuda_error exception if any errors are reported by cudaGetLastError().
-///
-#define CUDA_CALL_KERNEL_AND_WAIT(...) do {\
-		__VA_ARGS__;\
-	} while( 0 ); // cannot do CUDA calls when emulating with host only
 #ifdef __CUDACC__
 #define CUDA_CALL_KERNEL_AND_WAIT(...) do {\
 		__VA_ARGS__;\
 		{ cudaError_t error = cudaGetLastError(); if( error != cudaSuccess ) throw ::ecuda::cuda_error(error,std::string(cudaGetErrorString(error))); }\
 		cudaDeviceSynchronize();\
 		{ cudaError_t error = cudaGetLastError(); if( error != cudaSuccess ) throw ::ecuda::cuda_error(error,std::string(cudaGetErrorString(error))); }\
 	} while(0);
+#else
+///
+/// Macro that calls a CUDA kernel function, waits for completion, and throws
+/// an ecuda::cuda_error exception if any errors are reported by cudaGetLastError().
+///
+#define CUDA_CALL_KERNEL_AND_WAIT(...) do {\
+		__VA_ARGS__;\
+	} while( 0 ); // cannot do CUDA calls when emulating with host only
 #endif
 
 /** Replace nullptr with NULL if nvcc still doesn't support C++11. */

diff --git a/include/ecuda/iterator.hpp b/include/ecuda/iterator.hpp
@@ -174,13 +174,13 @@ class device_contiguous_iterator : public device_iterator<T,typename ecuda::add_
 	}
 	#endif
 
-	__HOST__ __DEVICE__ inline device_contiguous_iterator operator+( int x ) const { return device_contiguous_iterator( base_type::ptr + x ); }
-	__HOST__ __DEVICE__ inline device_contiguous_iterator operator-( int x ) const { return device_contiguous_iterator( base_type::ptr - x ); }
+	__HOST__ __DEVICE__ inline device_contiguous_iterator operator+( difference_type x ) const { return device_contiguous_iterator( base_type::ptr + x ); }
+	__HOST__ __DEVICE__ inline device_contiguous_iterator operator-( difference_type x ) const { return device_contiguous_iterator( base_type::ptr - x ); }
 
-	__HOST__ __DEVICE__ inline device_contiguous_iterator& operator+=( int x ) { base_type::ptr += x; return *this; }
-	__HOST__ __DEVICE__ inline device_contiguous_iterator& operator-=( int x ) { base_type::ptr -= x; return *this; }
+	__HOST__ __DEVICE__ inline device_contiguous_iterator& operator+=( difference_type x ) { base_type::ptr += x; return *this; }
+	__HOST__ __DEVICE__ inline device_contiguous_iterator& operator-=( difference_type x ) { base_type::ptr -= x; return *this; }
 
-	__DEVICE__ inline reference operator[]( int x ) const { return *(base_type::ptr+x); }
+	__DEVICE__ inline reference operator[]( difference_type x ) const { return *(base_type::ptr+x); }
 
 	__HOST__ __DEVICE__ inline difference_type operator-( const device_contiguous_iterator& other ) { return base_type::ptr - other.ptr; }
 
@@ -278,16 +278,16 @@ class device_contiguous_block_iterator : public device_iterator<T,padded_ptr<T,P
 		return tmp;
 	}
 
-	__HOST__ __DEVICE__ device_contiguous_block_iterator operator+( int x ) const
+	__HOST__ __DEVICE__ device_contiguous_block_iterator operator+( difference_type x ) const
 	{
 		device_contiguous_block_iterator tmp( *this );
 		tmp += x;
 		return tmp;
 	}
 
-	__HOST__ __DEVICE__ inline device_contiguous_block_iterator operator-( int x ) const { return operator+(-x); }
+	__HOST__ __DEVICE__ inline device_contiguous_block_iterator operator-( difference_type x ) const { return operator+(-x); }
 
-	__HOST__ __DEVICE__ device_contiguous_block_iterator& operator+=( int x )
+	__HOST__ __DEVICE__ device_contiguous_block_iterator& operator+=( difference_type x )
 	{
 		const int rows = x / width;
 		base_type::ptr.skip_bytes( rows * base_type::ptr.get_pitch() );
@@ -298,9 +298,9 @@ class device_contiguous_block_iterator : public device_iterator<T,padded_ptr<T,P
 		if( offset < 0     ) { base_type::ptr.skip_bytes( width*sizeof(value_type) - base_type::ptr.get_pitch() ); offset += width; }
 		return *this;
 	}
-	__HOST__ __DEVICE__ inline device_contiguous_block_iterator& operator-=( int x ) { operator+=(-x); return *this; }
+	__HOST__ __DEVICE__ inline device_contiguous_block_iterator& operator-=( difference_type x ) { operator+=(-x); return *this; }
 
-	__DEVICE__ inline reference operator[]( int x ) const { return *operator+(x); }
+	__DEVICE__ inline reference operator[]( difference_type x ) const { return *operator+(x); }
 
 	__HOST__ __DEVICE__ inline difference_type operator-( const device_contiguous_block_iterator& other ) const
 	{
@@ -399,18 +399,18 @@ class reverse_device_iterator //: public std::iterator<device_iterator_tag,typen
 
 	__HOST__ __DEVICE__ inline difference_type operator-( const reverse_device_iterator& other ) { return parentIterator - other.parentIterator; }
 
-	__HOST__ __DEVICE__ inline reverse_device_iterator operator+( int x ) const { return reverse_device_iterator( parentIterator-x ); }
-	__HOST__ __DEVICE__ inline reverse_device_iterator operator-( int x ) const { return reverse_device_iterator( parentIterator+x ); }
+	__HOST__ __DEVICE__ inline reverse_device_iterator operator+( difference_type x ) const { return reverse_device_iterator( parentIterator-x ); }
+	__HOST__ __DEVICE__ inline reverse_device_iterator operator-( difference_type x ) const { return reverse_device_iterator( parentIterator+x ); }
 
 	__HOST__ __DEVICE__ inline bool operator<( const reverse_device_iterator& other ) const { return parentIterator < other.parentIterator; }
 	__HOST__ __DEVICE__ inline bool operator>( const reverse_device_iterator& other ) const { return parentIterator > other.parentIterator; }
 	__HOST__ __DEVICE__ inline bool operator<=( const reverse_device_iterator& other ) const { return operator<(other) || operator==(other); }
 	__HOST__ __DEVICE__ inline bool operator>=( const reverse_device_iterator& other ) const { return operator>(other) || operator==(other); }
 
-	__HOST__ __DEVICE__ inline reverse_device_iterator& operator+=( int x ) { parentIterator -= x; return *this; }
-	__HOST__ __DEVICE__ inline reverse_device_iterator& operator-=( int x ) { parentIterator += x; return *this; }
+	__HOST__ __DEVICE__ inline reverse_device_iterator& operator+=( difference_type x ) { parentIterator -= x; return *this; }
+	__HOST__ __DEVICE__ inline reverse_device_iterator& operator-=( difference_type x ) { parentIterator += x; return *this; }
 
-	__DEVICE__ reference operator[]( int x ) const { return parentIterator.operator[]( -x-1 ); }
+	__DEVICE__ reference operator[]( difference_type x ) const { return parentIterator.operator[]( -x-1 ); }
 
 	__HOST__ __DEVICE__ reverse_device_iterator& operator=( const reverse_device_iterator& other )
 	{

diff --git a/include/ecuda/type_traits.hpp b/include/ecuda/type_traits.hpp
@@ -199,6 +199,7 @@ template<typename T,typename P> class striding_padded_ptr; // forward declaratio
 /// so that a cast to a naked pointer T* is achieved by reinterpret_cast<T*>(ptr.get().get()).
 ///
 template<typename T,typename U>            __HOST__ __DEVICE__ T naked_cast( U* ptr )                       { return reinterpret_cast<T>(ptr); }
+template<typename T>                       __HOST__ __DEVICE__ T naked_cast( T* ptr ) { return ptr; }
 //template<typename T,typename U>            __HOST__ __DEVICE__ T naked_cast( const naked_ptr<U>& ptr )      { return naked_cast<T>(ptr.get()); }
 template<typename T,typename U,typename V> __HOST__ __DEVICE__ T naked_cast( const unique_ptr<U,V>& ptr )   { return naked_cast<T>(ptr.get()); }
 template<typename T,typename U>            __HOST__ __DEVICE__ T naked_cast( const shared_ptr<U>& ptr )     { return naked_cast<T>(ptr.get()); }