diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fac115f866..0479e820a0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -65,6 +65,17 @@ check: tags: - ${NODE} +check_mpich: + extends: .check_template + tags: + - starpu + - ubuntu1804 + parallel: + matrix: + - SCRIPT: [./contrib/gitlab/mpich.sh, ./contrib/gitlab/mpich_struct.sh] + script: + - ${SCRIPT} + check_simgrid: extends: .check_template tags: diff --git a/ChangeLog b/ChangeLog index 9d41c21574..39581bedcb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -19,6 +19,8 @@ StarPU 1.5.0 Changes: * Rename hierarchical tasks in recursive tasks * Fix asynchronous partitioning with data without home node + * Allow large sizes for vector, matrix, block, tensor and ndim data + interfaces, and use proper MPI datatypes to exchange them. Small changes: * Fix build system for StarPU Python interface diff --git a/configure.ac b/configure.ac index b7e7b2eca4..7a0e9838fe 100644 --- a/configure.ac +++ b/configure.ac @@ -610,6 +610,9 @@ if test x$enable_mpi = xmaybe ; then fi fi +AC_ARG_ENABLE(mpi-type-vector-c, AC_HELP_STRING([--disable-mpi-type-vector-c], [Disable usage of function MPI_Type_vector_c]), + [enable_mpi_type_vector_c=$enableval], [enable_mpi_type_vector_c=yes]) + # in case MPI was explicitly required, but mpicc is not available, this is an error if test x$enable_mpi = xyes ; then if test ! -x "$mpicc_path"; then @@ -626,6 +629,9 @@ if test x$enable_mpi = xyes ; then [AC_DEFINE(STARPU_HAVE_MPI_EXT, [1], [ is available])]) AC_CHECK_FUNC([MPI_Comm_create_group], [AC_DEFINE([STARPU_HAVE_MPI_COMM_CREATE_GROUP], [1], [Define to 1 if the function MPI_Comm_create_group is available.])]) + if test x$enable_mpi_type_vector_c = xyes ; then + AC_CHECK_FUNC([MPI_Type_vector_c], [AC_DEFINE([STARPU_HAVE_MPI_TYPE_VECTOR_C], [1], [Define to 1 if the function MPI_Type_vector_c is available.])]) + fi CC=$OLD_CC fi diff --git a/contrib/gitlab/mpich.sh b/contrib/gitlab/mpich.sh new file mode 100755 index 0000000000..a70c4851be --- /dev/null +++ b/contrib/gitlab/mpich.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +export STARPU_MICROBENCHS_DISABLED=1 +export STARPU_CHECK_DIRS=mpi +export STARPU_USER_CONFIGURE_OPTIONS="--with-mpicc=/usr/bin/mpicc.mpich --with-mpiexec=/usr/bin/mpiexec.mpich --with-mpicxx=/usr/bin/mpicxx.mpich --with-mpifort=/usr/bin/mpifort.mpich" +./contrib/ci.inria.fr/job-1-check.sh diff --git a/contrib/gitlab/mpich_struct.sh b/contrib/gitlab/mpich_struct.sh new file mode 100755 index 0000000000..6fd699bfba --- /dev/null +++ b/contrib/gitlab/mpich_struct.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# StarPU --- Runtime system for heterogeneous multicore architectures. +# +# Copyright (C) 2020-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria +# +# StarPU is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# StarPU is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See the GNU Lesser General Public License in COPYING.LGPL for more details. +# + +export STARPU_MICROBENCHS_DISABLED=1 +export STARPU_CHECK_DIRS=mpi +export STARPU_USER_CONFIGURE_OPTIONS="--with-mpicc=/usr/bin/mpicc.mpich --with-mpiexec=/usr/bin/mpiexec.mpich --with-mpicxx=/usr/bin/mpicxx.mpich --with-mpifort=/usr/bin/mpifort.mpich --disable-mpi-type-vector-c" +./contrib/ci.inria.fr/job-1-check.sh diff --git a/doc/doxygen/chapters/starpu_applications/code/vector_scal_cpu.c b/doc/doxygen/chapters/starpu_applications/code/vector_scal_cpu.c index 1efd8f2b41..beacee3432 100644 --- a/doc/doxygen/chapters/starpu_applications/code/vector_scal_cpu.c +++ b/doc/doxygen/chapters/starpu_applications/code/vector_scal_cpu.c @@ -24,7 +24,7 @@ void vector_scal_cpu(void *buffers[], void *cl_arg) //! [Extract To be included. You should update doxygen if you see this text.] struct starpu_vector_interface *vector = buffers[0]; float *val = (float *)STARPU_VECTOR_GET_PTR(vector); - unsigned n = STARPU_VECTOR_GET_NX(vector); + size_t n = STARPU_VECTOR_GET_NX(vector); //! [Extract To be included. You should update doxygen if you see this text.] //! [Unpack To be included. You should update doxygen if you see this text.] @@ -33,7 +33,7 @@ void vector_scal_cpu(void *buffers[], void *cl_arg) //! [Unpack To be included. You should update doxygen if you see this text.] //! [Compute To be included. You should update doxygen if you see this text.] - unsigned i; + size_t i; for (i = 0; i < n; i++) val[i] *= factor; //! [Compute To be included. You should update doxygen if you see this text.] diff --git a/doc/doxygen/chapters/starpu_basics/basic_examples.doxy b/doc/doxygen/chapters/starpu_basics/basic_examples.doxy index edd8337221..591a6c4992 100644 --- a/doc/doxygen/chapters/starpu_basics/basic_examples.doxy +++ b/doc/doxygen/chapters/starpu_basics/basic_examples.doxy @@ -349,11 +349,11 @@ The definition of the codelet can be written as follows: \code{.c} void scal_cpu_func(void *buffers[], void *cl_arg) { - unsigned i; + size_t i; float *factor = cl_arg; /* length of the vector */ - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); /* CPU copy of the vector pointer */ float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); diff --git a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c index 2e0cf4e8eb..d21ea300a8 100644 --- a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c +++ b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c @@ -22,7 +22,7 @@ /* This kernel takes a buffer and scales it by a constant factor */ void scal_cpu_func(void *buffers[], void *cl_arg) { - unsigned i; + size_t i; float *factor = cl_arg; /* @@ -38,7 +38,7 @@ void scal_cpu_func(void *buffers[], void *cl_arg) struct starpu_vector_interface *vector = buffers[0]; /* length of the vector */ - unsigned n = STARPU_VECTOR_GET_NX(vector); + size_t n = STARPU_VECTOR_GET_NX(vector); /* get a pointer to the local copy of the vector: note that we have to * cast it in (float *) since a vector could contain any type of @@ -53,22 +53,22 @@ void scal_cpu_func(void *buffers[], void *cl_arg) void scal_sse_func(void *buffers[], void *cl_arg) { float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); - unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); - unsigned int n_iterations = n/4; + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n_iterations = n/4; __m128 *VECTOR = (__m128*) vector; __m128 FACTOR STARPU_ATTRIBUTE_ALIGNED(16); float factor = *(float *) cl_arg; FACTOR = _mm_set1_ps(factor); - unsigned int i; + size_t i; for (i = 0; i < n_iterations; i++) VECTOR[i] = _mm_mul_ps(FACTOR, VECTOR[i]); unsigned int remainder = n%4; if (remainder != 0) { - unsigned int start = 4 * n_iterations; + size_t start = 4 * n_iterations; for (i = start; i < start+remainder; ++i) { vector[i] = factor * vector[i]; diff --git a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c index 39227a008d..5b586adb85 100644 --- a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c +++ b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c @@ -16,7 +16,7 @@ //! [To be included. You should update doxygen if you see this text.] #include -static __global__ void vector_mult_cuda(unsigned n, float *val, float factor) +static __global__ void vector_mult_cuda(size_t n, float *val, float factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) @@ -28,7 +28,7 @@ extern "C" void scal_cuda_func(void *buffers[], void *_args) float *factor = (float *)_args; /* length of the vector */ - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned threads_per_block = 64; diff --git a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c index 8b33fc399e..130d7fe719 100644 --- a/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c +++ b/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c @@ -28,7 +28,7 @@ void scal_opencl_func(void *buffers[], void *_args) cl_event event; /* OpenCL specific code */ /* length of the vector */ - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); /* OpenCL copy of the vector pointer */ cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); diff --git a/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy b/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy index 094b3bd16e..495f85237b 100644 --- a/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy +++ b/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy @@ -140,8 +140,8 @@ to use SSE to scale a vector. The codelet can be written as follows: void scal_sse_func(void *buffers[], void *cl_arg) { float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); - unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); - unsigned int n_iterations = n/4; + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n_iterations = n/4; if (n % 4 != 0) n_iterations++; @@ -149,7 +149,7 @@ void scal_sse_func(void *buffers[], void *cl_arg) __m128 factor __attribute__((aligned(16))); factor = _mm_set1_ps(*(float *) cl_arg); - unsigned int i; + size_t i; for (i = 0; i < n_iterations; i++) VECTOR[i] = _mm_mul_ps(factor, VECTOR[i]); } @@ -352,10 +352,10 @@ the rank of the current CPU within the combined worker. For instance: \code{.c} static void func(void *buffers[], void *args) { - unsigned i; + size_t i; float *factor = _args; struct starpu_vector_interface *vector = buffers[0]; - unsigned n = STARPU_VECTOR_GET_NX(vector); + size_t n = STARPU_VECTOR_GET_NX(vector); float *val = (float *)STARPU_VECTOR_GET_PTR(vector); /* Compute slice to compute */ diff --git a/doc/doxygen/chapters/starpu_extensions/code/forkmode.c b/doc/doxygen/chapters/starpu_extensions/code/forkmode.c index c364bcf28d..96c8ee3494 100644 --- a/doc/doxygen/chapters/starpu_extensions/code/forkmode.c +++ b/doc/doxygen/chapters/starpu_extensions/code/forkmode.c @@ -17,10 +17,10 @@ //! [To be included. You should update doxygen if you see this text.] void scal_cpu_func(void *buffers[], void *_args) { - unsigned i; + size_t i; float *factor = _args; struct starpu_vector_interface *vector = buffers[0]; - unsigned n = STARPU_VECTOR_GET_NX(vector); + size_t n = STARPU_VECTOR_GET_NX(vector); float *val = (float *)STARPU_VECTOR_GET_PTR(vector); #pragma omp parallel for num_threads(starpu_combined_worker_get_size()) diff --git a/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy b/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy index 7f57e39569..7b675e38a5 100644 --- a/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy +++ b/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy @@ -183,7 +183,7 @@ void fpga_add(void *buffers[], void *cl_arg) int *b = (int*) STARPU_VECTOR_GET_PTR(buffers[1]); int *c = (int*) STARPU_VECTOR_GET_PTR(buffers[2]); - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); /* actions to run on an engine */ max_actions_t *act = max_actions_init(maxfile, NULL); diff --git a/doc/doxygen/chapters/starpu_extensions/recursive_tasks.doxy b/doc/doxygen/chapters/starpu_extensions/recursive_tasks.doxy index b406c0249a..96446dea98 100644 --- a/doc/doxygen/chapters/starpu_extensions/recursive_tasks.doxy +++ b/doc/doxygen/chapters/starpu_extensions/recursive_tasks.doxy @@ -56,8 +56,8 @@ perform the computation. void func_cpu(void *descr[], void *_args) { (void) _args; - int x; - int nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t x; + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); for(x=0 ; x -static __global__ void vector_mult_cuda(float *val, unsigned int n, float factor) +static __global__ void vector_mult_cuda(float *val, size_t n, float factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) @@ -29,7 +29,7 @@ extern "C" void vector_scal_cuda(void *buffers[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &factor); /* length of the vector */ - unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned threads_per_block = 64; diff --git a/doc/tutorial/vector_scal_opencl.c b/doc/tutorial/vector_scal_opencl.c index 40d216a54a..41a22f81b5 100644 --- a/doc/tutorial/vector_scal_opencl.c +++ b/doc/tutorial/vector_scal_opencl.c @@ -29,7 +29,7 @@ void vector_scal_opencl(void *buffers[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &factor); /* length of the vector */ - unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); + cl_ulong n = STARPU_VECTOR_GET_NX(buffers[0]); /* OpenCL copy of the vector pointer */ cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); diff --git a/doc/tutorial/vector_scal_opencl_kernel.cl b/doc/tutorial/vector_scal_opencl_kernel.cl index baddd509bb..cf2bbd6725 100644 --- a/doc/tutorial/vector_scal_opencl_kernel.cl +++ b/doc/tutorial/vector_scal_opencl_kernel.cl @@ -14,7 +14,7 @@ * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ -__kernel void vector_mult_opencl(unsigned int nx, __global float* val, float factor) +__kernel void vector_mult_opencl(ulong nx, __global float* val, float factor) { const int i = get_global_id(0); if (i < nx) diff --git a/eclipse-plugin/examples/hello/hello.c b/eclipse-plugin/examples/hello/hello.c index 9092b1afca..a6e02fb082 100644 --- a/eclipse-plugin/examples/hello/hello.c +++ b/eclipse-plugin/examples/hello/hello.c @@ -21,7 +21,7 @@ void display_cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; - int nx, i; + size_t nx, i; struct starpu_vector_interface *vector; int *val; @@ -35,7 +35,8 @@ void display_cpu_func(void *buffers[], void *cl_arg) void scal_cpu_func(void *buffers[], void *cl_arg) { - int factor, nx, i; + int factor; + size_t nx, i; struct starpu_vector_interface *vector; int *val; diff --git a/examples/axpy/axpy.c b/examples/axpy/axpy.c index ebb840c73e..34340af848 100644 --- a/examples/axpy/axpy.c +++ b/examples/axpy/axpy.c @@ -55,7 +55,7 @@ void axpy_cpu(void *descr[], void *arg) { TYPE alpha = *((TYPE *)arg); - unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); TYPE *block_x = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *block_y = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); @@ -68,7 +68,7 @@ void axpy_gpu(void *descr[], void *arg) { TYPE alpha = *((TYPE *)arg); - unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); TYPE *block_x = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *block_y = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); diff --git a/examples/axpy/axpy_opencl.c b/examples/axpy/axpy_opencl.c index 21127bc0ca..a68dd44f29 100644 --- a/examples/axpy/axpy_opencl.c +++ b/examples/axpy/axpy_opencl.c @@ -29,11 +29,11 @@ void axpy_opencl(void *buffers[], void *_args) cl_kernel kernel; cl_command_queue queue; - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + cl_ulong n = STARPU_VECTOR_GET_NX(buffers[0]); cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); - unsigned x_offset = STARPU_VECTOR_GET_OFFSET(buffers[0]); + cl_ulong x_offset = STARPU_VECTOR_GET_OFFSET(buffers[0]); cl_mem y = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]); - unsigned y_offset = STARPU_VECTOR_GET_OFFSET(buffers[1]); + cl_ulong y_offset = STARPU_VECTOR_GET_OFFSET(buffers[1]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); diff --git a/examples/axpy/axpy_opencl_kernel.cl b/examples/axpy/axpy_opencl_kernel.cl index 766a92a4cc..6fefc1d722 100644 --- a/examples/axpy/axpy_opencl_kernel.cl +++ b/examples/axpy/axpy_opencl_kernel.cl @@ -19,10 +19,10 @@ #include "axpy.h" __kernel void _axpy_opencl(__global TYPE *x, - unsigned x_offset, + ulong x_offset, __global TYPE *y, - unsigned y_offset, - unsigned nx, + ulong y_offset, + ulong nx, TYPE alpha) { const int i = get_global_id(0); diff --git a/examples/basic_examples/block_cpu.c b/examples/basic_examples/block_cpu.c index 1d4a7c06a1..708e8f35cd 100644 --- a/examples/basic_examples/block_cpu.c +++ b/examples/basic_examples/block_cpu.c @@ -19,13 +19,13 @@ void cpu_codelet(void *descr[], void *_args) { float *block = (float *)STARPU_BLOCK_GET_PTR(descr[0]); - int nx = (int)STARPU_BLOCK_GET_NX(descr[0]); - int ny = (int)STARPU_BLOCK_GET_NY(descr[0]); - int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]); - unsigned ldy = STARPU_BLOCK_GET_LDY(descr[0]); - unsigned ldz = STARPU_BLOCK_GET_LDZ(descr[0]); + size_t nx = STARPU_BLOCK_GET_NX(descr[0]); + size_t ny = STARPU_BLOCK_GET_NY(descr[0]); + size_t nz = STARPU_BLOCK_GET_NZ(descr[0]); + size_t ldy = STARPU_BLOCK_GET_LDY(descr[0]); + size_t ldz = STARPU_BLOCK_GET_LDZ(descr[0]); float *multiplier = (float *)_args; - int i, j, k; + size_t i, j, k; for(k=0; k -static __global__ void cuda_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier) +static __global__ void cuda_block(float *block, size_t nx, size_t ny, size_t nz, size_t ldy, size_t ldz, float multiplier) { - int i, j, k; + size_t i, j, k; for(k=0; k>>(block, nx, ny, nz, ldy, ldz, *multiplier); diff --git a/examples/basic_examples/block_hip.hip b/examples/basic_examples/block_hip.hip index cecb19dd44..63276736b2 100644 --- a/examples/basic_examples/block_hip.hip +++ b/examples/basic_examples/block_hip.hip @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2010-2022 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -16,9 +16,9 @@ #include -static __global__ void hip_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier) +static __global__ void hip_block(float *block, size_t nx, size_t ny, size_t nz, size_t ldy, size_t ldz, float multiplier) { - int i, j, k; + size_t i, j, k; for(k=0; k 1e-6*C_ref[j + i*ldC]) { - printf("| Cref[%u,%u]=%f - Cgpu[%u,%u]=%f | Error in the computation of C: the difference between the two is bigger than 1e-6 * the reference" + printf("| Cref[%zu,%zu]=%f - Cgpu[%zu,%zu]=%f | Error in the computation of C: the difference between the two is bigger than 1e-6 * the reference" , i, j, C_ref[j + i*ldC], i, j, C_gpu[j + i*ldC]); exit(1); } @@ -407,11 +407,11 @@ int main(void) /* cpu compution to check */ /* ============================================= */ - uint32_t ldA = ydim; - uint32_t ldB = zdim; - uint32_t ldC = ydim; + size_t ldA = ydim; + size_t ldB = zdim; + size_t ldC = ydim; - unsigned i,j,k; + size_t i,j,k; for (i = 0; i < ydim; i++) { for (j = 0; j < xdim; j++) diff --git a/examples/basic_examples/mult_cuda.cu b/examples/basic_examples/mult_cuda.cu index 9b53751933..1c4b04bcce 100644 --- a/examples/basic_examples/mult_cuda.cu +++ b/examples/basic_examples/mult_cuda.cu @@ -66,11 +66,11 @@ * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have * registered data with the "matrix" data interface, we use the matrix macros. */ -static __global__ void cuda_mult_kernel(uint32_t nxC, uint32_t nyC, uint32_t nyA, - uint32_t ldA, uint32_t ldB, uint32_t ldC, +static __global__ void cuda_mult_kernel(size_t nxC, size_t nyC, size_t nyA, + size_t ldA, size_t ldB, size_t ldC, float * subA, float * subB, float * subC ) { - uint32_t id, i, j, k; + size_t id, i, j, k; float sum; id = blockIdx.x * blockDim.x + threadIdx.x; i = id % nxC; @@ -91,9 +91,9 @@ extern "C" void cuda_mult(void *descr[], void *arg) { (void)arg; float *d_subA, *d_subB, *d_subC; - uint32_t nxC, nyC, nyA; - uint32_t ldA, ldB, ldC; - uint32_t nblocks; + size_t nxC, nyC, nyA; + size_t ldA, ldB, ldC; + size_t nblocks; /* ptr gives a pointer to the first element of the local copy */ d_subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); diff --git a/examples/basic_examples/mult_hip.hip b/examples/basic_examples/mult_hip.hip index 032b8dfd26..5b86bf48c7 100644 --- a/examples/basic_examples/mult_hip.hip +++ b/examples/basic_examples/mult_hip.hip @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2009-2022 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2009-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify @@ -91,8 +91,8 @@ extern "C" void hip_mult(void *descr[], void *arg) { (void)arg; float *subA, *subB, *subC; - uint32_t nxC, nyC, nyA; - uint32_t ldA, ldB, ldC; + size_t nxC, nyC, nyA; + size_t ldA, ldB, ldC; uint32_t nblocks; /* ptr gives a pointer to the first element of the local copy */ diff --git a/examples/basic_examples/ndim.c b/examples/basic_examples/ndim.c index 476ba11347..1b3af9be18 100644 --- a/examples/basic_examples/ndim.c +++ b/examples/basic_examples/ndim.c @@ -25,16 +25,16 @@ void arr4d_cpu_func(void *buffers[], void *args) { (void)args; int *arr4d = (int *) STARPU_NDIM_GET_PTR(buffers[0]); - int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); - unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); - int nx = nn[0]; - int ny = nn[1]; - int nz = nn[2]; - int nt = nn[3]; - unsigned ldy = ldn[1]; - unsigned ldz = ldn[2]; - unsigned ldt = ldn[3]; - int i, j, k, l; + size_t *nn = STARPU_NDIM_GET_NN(buffers[0]); + size_t *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + size_t nx = nn[0]; + size_t ny = nn[1]; + size_t nz = nn[2]; + size_t nt = nn[3]; + size_t ldy = ldn[1]; + size_t ldz = ldn[2]; + size_t ldt = ldn[3]; + size_t i, j, k, l; for (l = 0; l < nt; l++) { for (k = 0; k < nz; k++) @@ -66,8 +66,8 @@ int main(void) int val = 0; int i, j, k, l; starpu_data_handle_t arr4d_handle; - unsigned nn[4] = {NX, NY, NZ, NT}; - unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; + size_t nn[4] = {NX, NY, NZ, NT}; + size_t ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; ret = starpu_init(NULL); if (ret == -ENODEV) diff --git a/examples/basic_examples/vector_scal_cpu_template.h b/examples/basic_examples/vector_scal_cpu_template.h index 146c4774d5..f5d8ae25ed 100644 --- a/examples/basic_examples/vector_scal_cpu_template.h +++ b/examples/basic_examples/vector_scal_cpu_template.h @@ -30,7 +30,7 @@ #define VECTOR_SCAL_CPU_FUNC(func_name) \ void func_name(void *buffers[], void *cl_arg) \ { \ - unsigned i; \ + size_t i; \ float *factor = (float *) cl_arg; \ \ /* \ @@ -47,7 +47,7 @@ void func_name(void *buffers[], void *cl_arg) \ struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; \ \ /* length of the vector */ \ - unsigned n = STARPU_VECTOR_GET_NX(vector); \ + size_t n = STARPU_VECTOR_GET_NX(vector); \ \ /* get a pointer to the local copy of the vector : note that we have to\ * cast it in (float *) since a vector could contain any type of \ @@ -64,22 +64,22 @@ void func_name(void *buffers[], void *cl_arg) \ void func_name(void *buffers[], void *cl_arg) \ { \ float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); \ - unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); \ - unsigned int n_iterations = n/4; \ + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); \ + size_t n_iterations = n/4; \ \ __m128 *VECTOR = (__m128*) vector; \ __m128 FACTOR STARPU_ATTRIBUTE_ALIGNED(16); \ float factor = *(float *) cl_arg; \ FACTOR = _mm_set1_ps(factor); \ \ - unsigned int i; \ + size_t i; \ for (i = 0; i < n_iterations; i++) \ VECTOR[i] = _mm_mul_ps(FACTOR, VECTOR[i]); \ \ unsigned int remainder = n%4; \ if (remainder != 0) \ { \ - unsigned int start = 4 * n_iterations; \ + size_t start = 4 * n_iterations; \ for (i = start; i < start+remainder; ++i) \ { \ vector[i] = factor * vector[i]; \ diff --git a/examples/basic_examples/vector_scal_cuda.cu b/examples/basic_examples/vector_scal_cuda.cu index 907bcf7fac..9f8c48de05 100644 --- a/examples/basic_examples/vector_scal_cuda.cu +++ b/examples/basic_examples/vector_scal_cuda.cu @@ -20,8 +20,7 @@ #include -static __global__ void vector_mult_cuda(unsigned n, float *val, - float factor) +static __global__ void vector_mult_cuda(size_t n, float *val, float factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; @@ -34,7 +33,7 @@ extern "C" void scal_cuda_func(void *buffers[], void *_args) float *factor = (float *)_args; /* length of the vector */ - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned threads_per_block = 64; diff --git a/examples/basic_examples/vector_scal_hip.hip b/examples/basic_examples/vector_scal_hip.hip index d617d17f9e..f4498415a2 100644 --- a/examples/basic_examples/vector_scal_hip.hip +++ b/examples/basic_examples/vector_scal_hip.hip @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2010-2022 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -21,7 +21,7 @@ #include -static __global__ void vector_mult_hip(unsigned n, float *val, float factor) +static __global__ void vector_mult_hip(size_t n, float *val, float factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; @@ -33,7 +33,7 @@ extern "C" void scal_hip_func(void *buffers[], void *_args) { float *factor = (float *)_args; /* length of the vector */ - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned threads_per_block = 64; diff --git a/examples/basic_examples/vector_scal_opencl.c b/examples/basic_examples/vector_scal_opencl.c index ba4ecb2105..dda80f92a9 100644 --- a/examples/basic_examples/vector_scal_opencl.c +++ b/examples/basic_examples/vector_scal_opencl.c @@ -31,7 +31,7 @@ void scal_opencl_func(void *buffers[], void *_args) cl_command_queue queue; /* length of the vector */ - unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); + cl_ulong n = STARPU_VECTOR_GET_NX(buffers[0]); /* OpenCL copy of the vector pointer */ cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); diff --git a/examples/basic_examples/vector_scal_opencl_kernel.cl b/examples/basic_examples/vector_scal_opencl_kernel.cl index baddd509bb..cf2bbd6725 100644 --- a/examples/basic_examples/vector_scal_opencl_kernel.cl +++ b/examples/basic_examples/vector_scal_opencl_kernel.cl @@ -14,7 +14,7 @@ * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ -__kernel void vector_mult_opencl(unsigned int nx, __global float* val, float factor) +__kernel void vector_mult_opencl(ulong nx, __global float* val, float factor) { const int i = get_global_id(0); if (i < nx) diff --git a/examples/cg/cg_kernels.c b/examples/cg/cg_kernels.c index 08267ed9f0..d5fd918e62 100644 --- a/examples/cg/cg_kernels.c +++ b/examples/cg/cg_kernels.c @@ -61,9 +61,9 @@ starpu_data_handle_t rtr_handle; TYPE dtq, rtr; #if 0 -static void print_vector_from_descr(unsigned nx, TYPE *v) +static void print_vector_from_descr(size_t nx, TYPE *v) { - unsigned i; + size_t i; for (i = 0; i < nx; i++) { fprintf(stderr, "%2.2e ", v[i]); @@ -72,9 +72,9 @@ static void print_vector_from_descr(unsigned nx, TYPE *v) } -static void print_matrix_from_descr(unsigned nx, unsigned ny, unsigned ld, TYPE *mat) +static void print_matrix_from_descr(size_t nx, size_t ny, size_t ld, TYPE *mat) { - unsigned i, j; + size_t i, j; for (j = 0; j < nx; j++) { for (i = 0; i < ny; i++) @@ -164,7 +164,7 @@ static void accumulate_vector_cuda(void *descr[], void *cl_arg) (void)cl_arg; TYPE *v_dst = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v_src = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); cublasStatus_t status = cublasaxpy(starpu_cublas_get_local_handle(), nx, &gp1, v_src, 1, v_dst, 1); if (status != CUBLAS_STATUS_SUCCESS) @@ -177,7 +177,7 @@ void accumulate_vector_cpu(void *descr[], void *cl_arg) (void)cl_arg; TYPE *v_dst = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v_src = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); AXPY(nx, (TYPE)1.0, v_src, 1, v_dst, 1); } @@ -208,7 +208,7 @@ struct starpu_codelet accumulate_vector_cl = */ #ifdef STARPU_USE_CUDA -extern void zero_vector(TYPE *x, unsigned nelems); +extern void zero_vector(TYPE *x, size_t nelems); static void bzero_variable_cuda(void *descr[], void *cl_arg) { @@ -253,7 +253,7 @@ static void bzero_vector_cuda(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); cudaMemsetAsync(v, 0, nx * elemsize, starpu_cuda_get_local_stream()); @@ -264,7 +264,7 @@ void bzero_vector_cpu(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); memset(v, 0, nx*sizeof(TYPE)); } @@ -302,7 +302,7 @@ static void dot_kernel_cuda(void *descr[], void *cl_arg) TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[1]); + size_t nx = STARPU_VECTOR_GET_NX(descr[1]); cublasHandle_t handle = starpu_cublas_get_local_handle(); cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); @@ -321,7 +321,7 @@ void dot_kernel_cpu(void *descr[], void *cl_arg) TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[1]); + size_t nx = STARPU_VECTOR_GET_NX(descr[1]); TYPE local_dot; /* Note that we explicitly cast the result of the DOT kernel because @@ -354,7 +354,7 @@ static struct starpu_codelet dot_kernel_cl = int dot_kernel(HANDLE_TYPE_VECTOR v1, HANDLE_TYPE_VECTOR v2, starpu_data_handle_t s, - unsigned nb) + size_t nb) { int ret; @@ -368,7 +368,7 @@ int dot_kernel(HANDLE_TYPE_VECTOR v1, STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); } - unsigned block; + size_t block; for (block = 0; block < nb; block++) { ret = TASK_INSERT(&dot_kernel_cl, @@ -393,7 +393,7 @@ static void scal_kernel_cuda(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &p1); TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); /* v1 = p1 v1 */ TYPE alpha = p1; @@ -409,7 +409,7 @@ void scal_kernel_cpu(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &alpha); TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); /* v1 = alpha v1 */ SCAL(nx, alpha, v1, 1); @@ -446,9 +446,9 @@ static void gemv_kernel_cuda(void *descr[], void *cl_arg) TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); TYPE *M = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned ld = STARPU_MATRIX_GET_LD(descr[1]); - unsigned nx = STARPU_MATRIX_GET_NX(descr[1]); - unsigned ny = STARPU_MATRIX_GET_NY(descr[1]); + size_t ld = STARPU_MATRIX_GET_LD(descr[1]); + size_t nx = STARPU_MATRIX_GET_NX(descr[1]); + size_t ny = STARPU_MATRIX_GET_NY(descr[1]); TYPE alpha, beta; starpu_codelet_unpack_args(cl_arg, &beta, &alpha); @@ -467,9 +467,9 @@ void gemv_kernel_cpu(void *descr[], void *cl_arg) TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); TYPE *M = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned ld = STARPU_MATRIX_GET_LD(descr[1]); - unsigned nx = STARPU_MATRIX_GET_NX(descr[1]); - unsigned ny = STARPU_MATRIX_GET_NY(descr[1]); + size_t ld = STARPU_MATRIX_GET_LD(descr[1]); + size_t nx = STARPU_MATRIX_GET_NX(descr[1]); + size_t ny = STARPU_MATRIX_GET_NY(descr[1]); TYPE alpha, beta; starpu_codelet_unpack_args(cl_arg, &beta, &alpha); @@ -519,9 +519,9 @@ int gemv_kernel(HANDLE_TYPE_VECTOR v1, HANDLE_TYPE_MATRIX matrix, HANDLE_TYPE_VECTOR v2, TYPE p1, TYPE p2, - unsigned nb) + size_t nb) { - unsigned b1, b2; + size_t b1, b2; int ret; for (b2 = 0; b2 < nb; b2++) @@ -566,7 +566,7 @@ static void scal_axpy_kernel_cuda(void *descr[], void *cl_arg) TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); /* Compute v1 = p1 * v1 + p2 * v2. * v1 = p1 v1 @@ -590,7 +590,7 @@ void scal_axpy_kernel_cpu(void *descr[], void *cl_arg) TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); /* Compute v1 = p1 * v1 + p2 * v2. * v1 = p1 v1 @@ -622,9 +622,9 @@ static struct starpu_codelet scal_axpy_kernel_cl = int scal_axpy_kernel(HANDLE_TYPE_VECTOR v1, TYPE p1, HANDLE_TYPE_VECTOR v2, TYPE p2, - unsigned nb) + size_t nb) { - unsigned block; + size_t block; for (block = 0; block < nb; block++) { int ret; @@ -654,7 +654,7 @@ static void axpy_kernel_cuda(void *descr[], void *cl_arg) TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); /* Compute v1 = v1 + p1 * v2. */ @@ -673,7 +673,7 @@ void axpy_kernel_cpu(void *descr[], void *cl_arg) TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); /* Compute v1 = p1 * v1 + p2 * v2. */ @@ -702,9 +702,9 @@ static struct starpu_codelet axpy_kernel_cl = int axpy_kernel(HANDLE_TYPE_VECTOR v1, HANDLE_TYPE_VECTOR v2, TYPE p1, - unsigned nb) + size_t nb) { - unsigned block; + size_t block; for (block = 0; block < nb; block++) { int ret; diff --git a/examples/cholesky/cholesky_kernels.c b/examples/cholesky/cholesky_kernels.c index df61bfd3f0..3b6e825e1b 100644 --- a/examples/cholesky/cholesky_kernels.c +++ b/examples/cholesky/cholesky_kernels.c @@ -49,13 +49,13 @@ static inline void chol_common_cpu_codelet_update_gemm(void *descr[], int s, voi float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); - unsigned dx = STARPU_MATRIX_GET_NY(descr[2]); - unsigned dy = STARPU_MATRIX_GET_NX(descr[2]); - unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + size_t dx = STARPU_MATRIX_GET_NY(descr[2]); + size_t dy = STARPU_MATRIX_GET_NX(descr[2]); + size_t dz = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); + size_t ld21 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld12 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld22 = STARPU_MATRIX_GET_LD(descr[2]); switch (s) { @@ -129,11 +129,11 @@ static inline void chol_common_cpu_codelet_update_syrk(void *descr[], int s, voi float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned dx = STARPU_MATRIX_GET_NY(descr[1]); - unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + size_t dx = STARPU_MATRIX_GET_NY(descr[1]); + size_t dz = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld22 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld21 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld22 = STARPU_MATRIX_GET_LD(descr[1]); switch (s) { @@ -189,11 +189,11 @@ static inline void chol_common_codelet_update_trsm(void *descr[], int s, void *_ sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld11 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld21 = STARPU_MATRIX_GET_LD(descr[1]); - unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]); - unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]); + size_t nx21 = STARPU_MATRIX_GET_NY(descr[1]); + size_t ny21 = STARPU_MATRIX_GET_NX(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; @@ -243,10 +243,10 @@ static inline void chol_common_codelet_update_potrf(void *descr[], int s, void * sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); - unsigned nx = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); + size_t nx = STARPU_MATRIX_GET_NY(descr[0]); + size_t ld = STARPU_MATRIX_GET_LD(descr[0]); - unsigned z; + size_t z; switch (s) { diff --git a/examples/cholesky/cholesky_tag.h b/examples/cholesky/cholesky_tag.h index efb88628a0..2e85a19f38 100644 --- a/examples/cholesky/cholesky_tag.h +++ b/examples/cholesky/cholesky_tag.h @@ -41,9 +41,9 @@ static struct starpu_task *create_task(starpu_tag_t id) { struct starpu_task *task = starpu_task_create(); - task->cl_arg = NULL; - task->use_tag = 1; - task->tag_id = id; + task->cl_arg = NULL; + task->use_tag = 1; + task->tag_id = id; return task; } @@ -247,23 +247,23 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks) double timing = end - start; - unsigned nx = starpu_matrix_get_nx(dataA); + size_t nx = starpu_matrix_get_nx(dataA); double flop = (1.0f*nx*nx*nx)/3.0f; PRINTF("# size\tms\tGFlop/s\n"); - PRINTF("%u\t%.0f\t%.1f\n", nx, timing/1000, (flop/timing/1000.0f)); + PRINTF("%zu\t%.0f\t%.1f\n", nx, timing/1000, (flop/timing/1000.0f)); return 0; } -static int cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks) +static int cholesky(float *matA, size_t size, size_t ld, size_t nblocks) { starpu_data_handle_t dataA; int ret; /* monitor and partition the A matrix into blocks : - * one block is now determined by 2 unsigned (m,n) */ + * one block is now determined by 2 size_t (m,n) */ starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float)); starpu_data_set_sequential_consistency_flag(dataA, 0); diff --git a/examples/cpp/add_vectors_interface.cpp b/examples/cpp/add_vectors_interface.cpp index 7a18a17e61..d019a6fdf4 100644 --- a/examples/cpp/add_vectors_interface.cpp +++ b/examples/cpp/add_vectors_interface.cpp @@ -148,7 +148,7 @@ struct vector_cpp_interface enum starpu_data_interface_id id; uintptr_t ptr; - uint32_t nx; + size_t nx; size_t elemsize; std::vector* vec; @@ -368,7 +368,7 @@ static void register_vector_cpp_handle(starpu_data_handle_t handle, int home_nod /* declare a new data with the vector interface */ void vector_cpp_data_register(starpu_data_handle_t *handleptr, int home_node, - std::vector* vec, uint32_t nx, size_t elemsize) + std::vector* vec, size_t nx, size_t elemsize) { #if __cplusplus >= 201103L struct vector_cpp_interface vector = @@ -398,15 +398,13 @@ void vector_cpp_data_register(starpu_data_handle_t *handleptr, int home_node, } /* offer an access to the data parameters */ -uint32_t vector_cpp_get_nx(starpu_data_handle_t handle) +size_t vector_cpp_get_nx(starpu_data_handle_t handle) { - struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return vector_interface->nx; } - static uint32_t footprint_vector_cpp_interface_crc32(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(vector_cpp_get_nx(handle), 0); @@ -424,18 +422,16 @@ static int vector_cpp_compare(void *data_interface_a, void *data_interface_b) static void display_vector_cpp_interface(starpu_data_handle_t handle, FILE *f) { - struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); - fprintf(f, "%u\t", vector_interface->nx); + fprintf(f, "%zu\t", vector_interface->nx); } static int pack_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); - struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) - starpu_data_get_interface_on_node(handle, node); + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, node); *count = vector_interface->nx*vector_interface->elemsize; @@ -452,8 +448,7 @@ static int peek_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, vo { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); - struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) - starpu_data_get_interface_on_node(handle, node); + struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == vector_interface->elemsize * vector_interface->nx); memcpy((void*)vector_interface->ptr, ptr, count); @@ -496,7 +491,7 @@ static starpu_ssize_t allocate_vector_cpp_buffer_on_node(void *data_interface_, { struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface_; - uint32_t nx = vector_interface->nx; + size_t nx = vector_interface->nx; size_t elemsize = vector_interface->elemsize; starpu_ssize_t allocated_memory; diff --git a/examples/filters/alloc.c b/examples/filters/alloc.c index bacfab664a..a9b792d467 100644 --- a/examples/filters/alloc.c +++ b/examples/filters/alloc.c @@ -22,8 +22,8 @@ void init_cpu(void* buffers[], void *args) { double *v = (double*)STARPU_VECTOR_GET_PTR(buffers[0]); - unsigned nx = STARPU_VECTOR_GET_NX(buffers[0]); - unsigned i; + size_t nx = STARPU_VECTOR_GET_NX(buffers[0]); + size_t i; for (i=0; inx); + fprintf(f, "Custom interface of size %zu", ci->nx); } -static uint32_t +static size_t custom_get_nx(starpu_data_handle_t handle) { struct custom_data_interface *data_interface; @@ -261,7 +261,7 @@ custom_get_nx(starpu_data_handle_t handle) void custom_data_register(starpu_data_handle_t *handle, int home_node, void *ptr, - uint32_t nx, + size_t nx, struct starpu_multiformat_data_interface_ops *format_ops) { struct custom_data_interface custom = diff --git a/examples/filters/custom_mf/custom_interface.h b/examples/filters/custom_mf/custom_interface.h index 53ab1b6f0e..173125ba5f 100644 --- a/examples/filters/custom_mf/custom_interface.h +++ b/examples/filters/custom_mf/custom_interface.h @@ -22,13 +22,13 @@ struct custom_data_interface void *cuda_ptr; void *opencl_ptr; struct starpu_multiformat_data_interface_ops *ops; - uint32_t nx; + size_t nx; }; void custom_data_register(starpu_data_handle_t *handle, int home_node, void *ptr, - uint32_t nx, + size_t nx, struct starpu_multiformat_data_interface_ops* ops); #define CUSTOM_GET_NX(interface) (((struct custom_data_interface*)(interface))->nx) diff --git a/examples/filters/f3d_cpu.c b/examples/filters/f3d_cpu.c index 58b4d1a7f6..dd042186a0 100644 --- a/examples/filters/f3d_cpu.c +++ b/examples/filters/f3d_cpu.c @@ -18,16 +18,16 @@ void f3d_cpu_func(void *buffers[], void *cl_arg) { - int i, j, k; + size_t i, j, k; int *factor = (int *) cl_arg; int *arr3d = (int *)STARPU_NDIM_GET_PTR(buffers[0]); - int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); - unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); - int nx = nn[0]; - int ny = nn[1]; - int nz = nn[2]; - unsigned ldy = ldn[1]; - unsigned ldz = ldn[2]; + size_t *nn = STARPU_NDIM_GET_NN(buffers[0]); + size_t *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + size_t nx = nn[0]; + size_t ny = nn[1]; + size_t nz = nn[2]; + size_t ldy = ldn[1]; + size_t ldz = ldn[2]; for(k=0; k -static __global__ void f3d_cuda(int *arr3d, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) +static __global__ void f3d_cuda(int *arr3d, size_t nx, size_t ny, size_t nz, size_t ldy, size_t ldz, float factor) { - int i, j, k; + size_t i, j, k; for(k=0; k>>(arr3d, nx, ny, nz, ldy, ldz, *factor); cudaError_t status = cudaGetLastError(); diff --git a/examples/filters/f3d_hip.hip b/examples/filters/f3d_hip.hip index 72a41aac2c..184363aa2c 100644 --- a/examples/filters/f3d_hip.hip +++ b/examples/filters/f3d_hip.hip @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2010-2022 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -18,9 +18,9 @@ #include -static __global__ void f3d_hip(int *arr3d, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) +static __global__ void f3d_hip(int *arr3d, size_t nx, size_t ny, size_t nz, size_t ldy, size_t ldz, float factor) { - int i, j, k; + size_t i, j, k; for(k=0; k -static __global__ void f4d_cuda(int *arr4d, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) +static __global__ void f4d_cuda(int *arr4d, size_t nx, size_t ny, size_t nz, size_t nt, size_t ldy, size_t ldz, size_t ldt, float factor) { - int i, j, k, l; + size_t i, j, k, l; for(l=0; l>>(arr4d, nx, ny, nz, nt, ldy, ldz, ldt, *factor); cudaError_t status = cudaGetLastError(); diff --git a/examples/filters/f4d_hip.hip b/examples/filters/f4d_hip.hip index b04be8b3e9..94fa2676e6 100644 --- a/examples/filters/f4d_hip.hip +++ b/examples/filters/f4d_hip.hip @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2010-2022 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -18,9 +18,9 @@ #include -static __global__ void f4d_hip(int *arr4d, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) +static __global__ void f4d_hip(int *arr4d, size_t nx, size_t ny, size_t nz, size_t nt, size_t ldy, size_t ldz, size_t ldt, float factor) { - int i, j, k, l; + size_t i, j, k, l; for(l=0; l -static __global__ void fblock_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) +static __global__ void fblock_cuda(int *block, size_t nx, size_t ny, size_t nz, size_t ldy, size_t ldz, float factor) { - int i, j, k; + size_t i, j, k; for(k=0; k>>(block, nx, ny, nz, ldy, ldz, *factor); diff --git a/examples/filters/fblock_hip.hip b/examples/filters/fblock_hip.hip index 9f19316885..dfbb8529cc 100644 --- a/examples/filters/fblock_hip.hip +++ b/examples/filters/fblock_hip.hip @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2010-2022 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -18,9 +18,9 @@ #include -static __global__ void fblock_hip(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) +static __global__ void fblock_hip(int *block, size_t nx, size_t ny, size_t nz, size_t ldy, size_t ldz, float factor) { - int i, j, k; + size_t i, j, k; for(k=0; k -static __global__ void fmatrix_cuda(int *matrix, int nx, int ny, unsigned ld, float factor) +static __global__ void fmatrix_cuda(int *matrix, size_t nx, size_t ny, size_t ld, float factor) { - int i, j; + size_t i, j; for(j=0; j>>(matrix, nx, ny, ld, *factor); cudaError_t status = cudaGetLastError(); diff --git a/examples/filters/fmatrix_hip.hip b/examples/filters/fmatrix_hip.hip index a95a043551..aaf7b862ef 100644 --- a/examples/filters/fmatrix_hip.hip +++ b/examples/filters/fmatrix_hip.hip @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2010-2022 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -18,9 +18,9 @@ #include -static __global__ void fmatrix_hip(int *matrix, int nx, int ny, unsigned ld, float factor) +static __global__ void fmatrix_hip(int *matrix, size_t nx, size_t ny, size_t ld, float factor) { - int i, j; + size_t i, j; for(j=0; j -static __global__ void _fmultiple_check_scale_cuda(int *val, int nx, int ny, unsigned ld, int start, int factor) +static __global__ void _fmultiple_check_scale_cuda(int *val, size_t nx, size_t ny, size_t ld, int start, int factor) { - int i, j; + size_t i, j; for(j=0; j -static __global__ void _fmultiple_check_scale_hip(int *val, int nx, int ny, unsigned ld, int start, int factor) +static __global__ void _fmultiple_check_scale_hip(int *val, size_t nx, size_t ny, size_t ld, int start, int factor) { - int i, j; + size_t i, j; for(j=0; j -static __global__ void ftensor_cuda(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) +static __global__ void ftensor_cuda(int *tensor, size_t nx, size_t ny, size_t nz, size_t nt, size_t ldy, size_t ldz, size_t ldt, float factor) { - int i, j, k, l; + size_t i, j, k, l; for(l=0; l>>(tensor, nx, ny, nz, nt, ldy, ldz, ldt, *factor); cudaError_t status = cudaGetLastError(); diff --git a/examples/filters/ftensor_hip.hip b/examples/filters/ftensor_hip.hip index 46a44bf5a3..3ddacb0272 100644 --- a/examples/filters/ftensor_hip.hip +++ b/examples/filters/ftensor_hip.hip @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2010-2022 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -18,9 +18,9 @@ #include -static __global__ void ftensor_hip(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) +static __global__ void ftensor_hip(int *tensor, size_t nx, size_t ny, size_t nz, size_t nt, size_t ldy, size_t ldz, size_t ldt, float factor) { - int i, j, k, l; + size_t i, j, k, l; for(l=0; l -static __global__ void fvector_cuda(int *vector, int n, float factor) +static __global__ void fvector_cuda(int *vector, size_t n, float factor) { - int i; + size_t i; for (i = 0; i < n; i++) vector[i] *= factor; } @@ -29,7 +29,7 @@ extern "C" void vector_cuda_func(void *buffers[], void *_args) { int *factor = (int *)_args; int *vector = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); - int n = (int)STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); fvector_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(vector, n, *factor); cudaError_t status = cudaGetLastError(); diff --git a/examples/filters/fvector_hip.hip b/examples/filters/fvector_hip.hip index 742931cf54..4ff6d58352 100644 --- a/examples/filters/fvector_hip.hip +++ b/examples/filters/fvector_hip.hip @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2010-2022 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2010-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -18,9 +18,9 @@ #include -static __global__ void fvector_hip(int *vector, int n, float factor) +static __global__ void fvector_hip(int *vector, size_t n, float factor) { - int i; + size_t i; for (i = 0; i < n; i++) vector[i] *= factor; } @@ -29,7 +29,7 @@ extern "C" void vector_hip_func(void *buffers[], void *_args) { int *factor = (int *)_args; int *vector = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); - int n = (int)STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); hipLaunchKernelGGL(fvector_hip, 1, 1, 0, starpu_hip_get_local_stream(), vector, n, *factor); hipError_t status = hipGetLastError(); diff --git a/examples/filters/shadow.c b/examples/filters/shadow.c index ab3560edfc..b5a86b437f 100644 --- a/examples/filters/shadow.c +++ b/examples/filters/shadow.c @@ -52,15 +52,15 @@ void cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; - unsigned i; + size_t i; /* length of the shadowed source vector */ - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the shadowed source vector pointer */ int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); /* length of the destination vector */ - unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]); + size_t n2 = STARPU_VECTOR_GET_NX(buffers[1]); /* local copy of the destination vector pointer */ int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]); @@ -75,12 +75,12 @@ void cuda_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source vector */ - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the shadowed source vector pointer */ int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); /* length of the destination vector */ - unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]); + size_t n2 = STARPU_VECTOR_GET_NX(buffers[1]); /* local copy of the destination vector pointer */ int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]); @@ -92,7 +92,7 @@ void cuda_func(void *buffers[], void *cl_arg) int main(void) { - unsigned j; + size_t j; int vector[NX + 2*SHADOW]; int vector2[NX + PARTS*2*SHADOW]; starpu_data_handle_t handle, handle2; diff --git a/examples/filters/shadow2d.c b/examples/filters/shadow2d.c index c3ad500872..77cb8ddceb 100644 --- a/examples/filters/shadow2d.c +++ b/examples/filters/shadow2d.c @@ -96,20 +96,20 @@ void cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ - unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); - unsigned n = STARPU_MATRIX_GET_NX(buffers[0]); - unsigned m = STARPU_MATRIX_GET_NY(buffers[0]); + size_t ld = STARPU_MATRIX_GET_LD(buffers[0]); + size_t n = STARPU_MATRIX_GET_NX(buffers[0]); + size_t m = STARPU_MATRIX_GET_NY(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); /* length of the destination matrix */ - unsigned ld2 = STARPU_MATRIX_GET_LD(buffers[1]); - unsigned n2 = STARPU_MATRIX_GET_NX(buffers[1]); - unsigned m2 = STARPU_MATRIX_GET_NY(buffers[1]); + size_t ld2 = STARPU_MATRIX_GET_LD(buffers[1]); + size_t n2 = STARPU_MATRIX_GET_NX(buffers[1]); + size_t m2 = STARPU_MATRIX_GET_NY(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_MATRIX_GET_PTR(buffers[1]); - unsigned i, j; + size_t i, j; /* If things go right, sizes should match */ STARPU_ASSERT(n == n2); @@ -126,16 +126,16 @@ void cuda_func(void *buffers[], void *cl_arg) cudaError_t cures; /* length of the shadowed source matrix */ - unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); - unsigned n = STARPU_MATRIX_GET_NX(buffers[0]); - unsigned m = STARPU_MATRIX_GET_NY(buffers[0]); + size_t ld = STARPU_MATRIX_GET_LD(buffers[0]); + size_t n = STARPU_MATRIX_GET_NX(buffers[0]); + size_t m = STARPU_MATRIX_GET_NY(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); /* length of the destination matrix */ - unsigned ld2 = STARPU_MATRIX_GET_LD(buffers[1]); - unsigned n2 = STARPU_MATRIX_GET_NX(buffers[1]); - unsigned m2 = STARPU_MATRIX_GET_NY(buffers[1]); + size_t ld2 = STARPU_MATRIX_GET_LD(buffers[1]); + size_t n2 = STARPU_MATRIX_GET_NX(buffers[1]); + size_t m2 = STARPU_MATRIX_GET_NY(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_MATRIX_GET_PTR(buffers[1]); @@ -149,7 +149,7 @@ void cuda_func(void *buffers[], void *cl_arg) int main(void) { - unsigned i, j, k, l; + size_t i, j, k, l; int matrix[NY + 2*SHADOWY][NX + 2*SHADOWX]; int matrix2[NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; starpu_data_handle_t handle, handle2; diff --git a/examples/filters/shadow3d.c b/examples/filters/shadow3d.c index 2ac62c61c4..70c3a1c8cf 100644 --- a/examples/filters/shadow3d.c +++ b/examples/filters/shadow3d.c @@ -44,24 +44,24 @@ void cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ - unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); - unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); - unsigned x = STARPU_BLOCK_GET_NX(buffers[0]); - unsigned y = STARPU_BLOCK_GET_NY(buffers[0]); - unsigned z = STARPU_BLOCK_GET_NZ(buffers[0]); + size_t ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + size_t ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + size_t x = STARPU_BLOCK_GET_NX(buffers[0]); + size_t y = STARPU_BLOCK_GET_NY(buffers[0]); + size_t z = STARPU_BLOCK_GET_NZ(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_BLOCK_GET_PTR(buffers[0]); /* length of the destination matrix */ - unsigned ldy2 = STARPU_BLOCK_GET_LDY(buffers[1]); - unsigned ldz2 = STARPU_BLOCK_GET_LDZ(buffers[1]); - unsigned x2 = STARPU_BLOCK_GET_NX(buffers[1]); - unsigned y2 = STARPU_BLOCK_GET_NY(buffers[1]); - unsigned z2 = STARPU_BLOCK_GET_NZ(buffers[1]); + size_t ldy2 = STARPU_BLOCK_GET_LDY(buffers[1]); + size_t ldz2 = STARPU_BLOCK_GET_LDZ(buffers[1]); + size_t x2 = STARPU_BLOCK_GET_NX(buffers[1]); + size_t y2 = STARPU_BLOCK_GET_NY(buffers[1]); + size_t z2 = STARPU_BLOCK_GET_NZ(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_BLOCK_GET_PTR(buffers[1]); - unsigned i, j, k; + size_t i, j, k; /* If things go right, sizes should match */ STARPU_ASSERT(x == x2); @@ -78,24 +78,24 @@ void cuda_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ - unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); - unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); - unsigned x = STARPU_BLOCK_GET_NX(buffers[0]); - unsigned y = STARPU_BLOCK_GET_NY(buffers[0]); - unsigned z = STARPU_BLOCK_GET_NZ(buffers[0]); + size_t ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + size_t ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + size_t x = STARPU_BLOCK_GET_NX(buffers[0]); + size_t y = STARPU_BLOCK_GET_NY(buffers[0]); + size_t z = STARPU_BLOCK_GET_NZ(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_BLOCK_GET_PTR(buffers[0]); /* length of the destination matrix */ - unsigned ldy2 = STARPU_BLOCK_GET_LDY(buffers[1]); - unsigned ldz2 = STARPU_BLOCK_GET_LDZ(buffers[1]); - unsigned x2 = STARPU_BLOCK_GET_NX(buffers[1]); - unsigned y2 = STARPU_BLOCK_GET_NY(buffers[1]); - unsigned z2 = STARPU_BLOCK_GET_NZ(buffers[1]); + size_t ldy2 = STARPU_BLOCK_GET_LDY(buffers[1]); + size_t ldz2 = STARPU_BLOCK_GET_LDZ(buffers[1]); + size_t x2 = STARPU_BLOCK_GET_NX(buffers[1]); + size_t y2 = STARPU_BLOCK_GET_NY(buffers[1]); + size_t z2 = STARPU_BLOCK_GET_NZ(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_BLOCK_GET_PTR(buffers[1]); - unsigned k; + size_t k; cudaError_t cures; /* If things go right, sizes should match */ @@ -113,7 +113,7 @@ void cuda_func(void *buffers[], void *cl_arg) int main(void) { - unsigned i, j, k, l, m, n; + size_t i, j, k, l, m, n; int matrix[NZ + 2*SHADOWZ][NY + 2*SHADOWY][NX + 2*SHADOWX]; int matrix2[NZ + PARTSZ*2*SHADOWZ][NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; starpu_data_handle_t handle, handle2; diff --git a/examples/filters/shadow4d.c b/examples/filters/shadow4d.c index 9c5178b0b5..cd8e0d6311 100644 --- a/examples/filters/shadow4d.c +++ b/examples/filters/shadow4d.c @@ -47,28 +47,28 @@ void cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ - unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); - unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); - unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); - unsigned x = STARPU_TENSOR_GET_NX(buffers[0]); - unsigned y = STARPU_TENSOR_GET_NY(buffers[0]); - unsigned z = STARPU_TENSOR_GET_NZ(buffers[0]); - unsigned t = STARPU_TENSOR_GET_NT(buffers[0]); + size_t ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + size_t ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + size_t ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + size_t x = STARPU_TENSOR_GET_NX(buffers[0]); + size_t y = STARPU_TENSOR_GET_NY(buffers[0]); + size_t z = STARPU_TENSOR_GET_NZ(buffers[0]); + size_t t = STARPU_TENSOR_GET_NT(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_TENSOR_GET_PTR(buffers[0]); /* length of the destination matrix */ - unsigned ldy2 = STARPU_TENSOR_GET_LDY(buffers[1]); - unsigned ldz2 = STARPU_TENSOR_GET_LDZ(buffers[1]); - unsigned ldt2 = STARPU_TENSOR_GET_LDT(buffers[1]); - unsigned x2 = STARPU_TENSOR_GET_NX(buffers[1]); - unsigned y2 = STARPU_TENSOR_GET_NY(buffers[1]); - unsigned z2 = STARPU_TENSOR_GET_NZ(buffers[1]); - unsigned t2 = STARPU_TENSOR_GET_NT(buffers[1]); + size_t ldy2 = STARPU_TENSOR_GET_LDY(buffers[1]); + size_t ldz2 = STARPU_TENSOR_GET_LDZ(buffers[1]); + size_t ldt2 = STARPU_TENSOR_GET_LDT(buffers[1]); + size_t x2 = STARPU_TENSOR_GET_NX(buffers[1]); + size_t y2 = STARPU_TENSOR_GET_NY(buffers[1]); + size_t z2 = STARPU_TENSOR_GET_NZ(buffers[1]); + size_t t2 = STARPU_TENSOR_GET_NT(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_TENSOR_GET_PTR(buffers[1]); - unsigned i, j, k, l; + size_t i, j, k, l; /* If things go right, sizes should match */ STARPU_ASSERT(x == x2); @@ -87,28 +87,28 @@ void cuda_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix*/ - unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); - unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); - unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); - unsigned x = STARPU_TENSOR_GET_NX(buffers[0]); - unsigned y = STARPU_TENSOR_GET_NY(buffers[0]); - unsigned z = STARPU_TENSOR_GET_NZ(buffers[0]); - unsigned t = STARPU_TENSOR_GET_NT(buffers[0]); + size_t ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + size_t ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + size_t ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + size_t x = STARPU_TENSOR_GET_NX(buffers[0]); + size_t y = STARPU_TENSOR_GET_NY(buffers[0]); + size_t z = STARPU_TENSOR_GET_NZ(buffers[0]); + size_t t = STARPU_TENSOR_GET_NT(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_TENSOR_GET_PTR(buffers[0]); /* length of the destination matrix */ - unsigned ldy2 = STARPU_TENSOR_GET_LDY(buffers[1]); - unsigned ldz2 = STARPU_TENSOR_GET_LDZ(buffers[1]); - unsigned ldt2 = STARPU_TENSOR_GET_LDT(buffers[1]); - unsigned x2 = STARPU_TENSOR_GET_NX(buffers[1]); - unsigned y2 = STARPU_TENSOR_GET_NY(buffers[1]); - unsigned z2 = STARPU_TENSOR_GET_NZ(buffers[1]); - unsigned t2 = STARPU_TENSOR_GET_NT(buffers[1]); + size_t ldy2 = STARPU_TENSOR_GET_LDY(buffers[1]); + size_t ldz2 = STARPU_TENSOR_GET_LDZ(buffers[1]); + size_t ldt2 = STARPU_TENSOR_GET_LDT(buffers[1]); + size_t x2 = STARPU_TENSOR_GET_NX(buffers[1]); + size_t y2 = STARPU_TENSOR_GET_NY(buffers[1]); + size_t z2 = STARPU_TENSOR_GET_NZ(buffers[1]); + size_t t2 = STARPU_TENSOR_GET_NT(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_TENSOR_GET_PTR(buffers[1]); - unsigned k, l; + size_t k, l; cudaError_t cures; /* If things go right, sizes should match */ @@ -130,7 +130,7 @@ void cuda_func(void *buffers[], void *cl_arg) int main(void) { - unsigned i, j, k, l, m, n, p, q; + size_t i, j, k, l, m, n, p, q; int matrix[NT + 2*SHADOWT][NZ + 2*SHADOWZ][NY + 2*SHADOWY][NX + 2*SHADOWX]; int matrix2[NT + PARTST*2*SHADOWT][NZ + PARTSZ*2*SHADOWZ][NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; starpu_data_handle_t handle, handle2; diff --git a/examples/filters/shadownd.c b/examples/filters/shadownd.c index ba3c43f525..7a0a751f0f 100644 --- a/examples/filters/shadownd.c +++ b/examples/filters/shadownd.c @@ -50,36 +50,36 @@ void cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ - unsigned *nn = STARPU_NDIM_GET_NN(buffers[0]); - unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); - unsigned x = nn[0]; - unsigned y = nn[1]; - unsigned z = nn[2]; - unsigned t = nn[3]; - unsigned g = nn[4]; - unsigned ldy = ldn[1]; - unsigned ldz = ldn[2]; - unsigned ldt = ldn[3]; - unsigned ldg = ldn[4]; + size_t *nn = STARPU_NDIM_GET_NN(buffers[0]); + size_t *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + size_t x = nn[0]; + size_t y = nn[1]; + size_t z = nn[2]; + size_t t = nn[3]; + size_t g = nn[4]; + size_t ldy = ldn[1]; + size_t ldz = ldn[2]; + size_t ldt = ldn[3]; + size_t ldg = ldn[4]; /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_NDIM_GET_PTR(buffers[0]); /* length of the destination matrix */ - unsigned *nn2 = STARPU_NDIM_GET_NN(buffers[1]); - unsigned *ldn2 = STARPU_NDIM_GET_LDN(buffers[1]); - unsigned x2 = nn2[0]; - unsigned y2 = nn2[1]; - unsigned z2 = nn2[2]; - unsigned t2 = nn2[3]; - unsigned g2 = nn2[4]; - unsigned ldy2 = ldn2[1]; - unsigned ldz2 = ldn2[2]; - unsigned ldt2 = ldn2[3]; - unsigned ldg2 = ldn2[4]; + size_t *nn2 = STARPU_NDIM_GET_NN(buffers[1]); + size_t *ldn2 = STARPU_NDIM_GET_LDN(buffers[1]); + size_t x2 = nn2[0]; + size_t y2 = nn2[1]; + size_t z2 = nn2[2]; + size_t t2 = nn2[3]; + size_t g2 = nn2[4]; + size_t ldy2 = ldn2[1]; + size_t ldz2 = ldn2[2]; + size_t ldt2 = ldn2[3]; + size_t ldg2 = ldn2[4]; /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_NDIM_GET_PTR(buffers[1]); - unsigned i, j, k, l, m; + size_t i, j, k, l, m; /* If things go right, sizes should match */ STARPU_ASSERT(x == x2); @@ -100,36 +100,36 @@ void cuda_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ - unsigned *nn = STARPU_NDIM_GET_NN(buffers[0]); - unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); - unsigned x = nn[0]; - unsigned y = nn[1]; - unsigned z = nn[2]; - unsigned t = nn[3]; - unsigned g = nn[4]; - unsigned ldy = ldn[1]; - unsigned ldz = ldn[2]; - unsigned ldt = ldn[3]; - unsigned ldg = ldn[4]; + size_t *nn = STARPU_NDIM_GET_NN(buffers[0]); + size_t *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + size_t x = nn[0]; + size_t y = nn[1]; + size_t z = nn[2]; + size_t t = nn[3]; + size_t g = nn[4]; + size_t ldy = ldn[1]; + size_t ldz = ldn[2]; + size_t ldt = ldn[3]; + size_t ldg = ldn[4]; /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_NDIM_GET_PTR(buffers[0]); /* length of the destination matrix */ - unsigned *nn2 = STARPU_NDIM_GET_NN(buffers[1]); - unsigned *ldn2 = STARPU_NDIM_GET_LDN(buffers[1]); - unsigned x2 = nn2[0]; - unsigned y2 = nn2[1]; - unsigned z2 = nn2[2]; - unsigned t2 = nn2[3]; - unsigned g2 = nn2[4]; - unsigned ldy2 = ldn2[1]; - unsigned ldz2 = ldn2[2]; - unsigned ldt2 = ldn2[3]; - unsigned ldg2 = ldn2[4]; + size_t *nn2 = STARPU_NDIM_GET_NN(buffers[1]); + size_t *ldn2 = STARPU_NDIM_GET_LDN(buffers[1]); + size_t x2 = nn2[0]; + size_t y2 = nn2[1]; + size_t z2 = nn2[2]; + size_t t2 = nn2[3]; + size_t g2 = nn2[4]; + size_t ldy2 = ldn2[1]; + size_t ldz2 = ldn2[2]; + size_t ldt2 = ldn2[3]; + size_t ldg2 = ldn2[4]; /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_NDIM_GET_PTR(buffers[1]); - unsigned k, l, m; + size_t k, l, m; cudaError_t cures; /* If things go right, sizes should match */ @@ -155,7 +155,7 @@ void cuda_func(void *buffers[], void *cl_arg) int main(void) { - unsigned i, j, k, l, m, n, p, q, r, s; + size_t i, j, k, l, m, n, p, q, r, s; int matrix[NG + 2*SHADOWG][NT + 2*SHADOWT][NZ + 2*SHADOWZ][NY + 2*SHADOWY][NX + 2*SHADOWX]; int matrix2[NG + PARTSG*2*SHADOWG][NT + PARTST*2*SHADOWT][NZ + PARTSZ*2*SHADOWZ][NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; starpu_data_handle_t handle, handle2; @@ -707,11 +707,11 @@ int main(void) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); - unsigned nn1[5] = {NX + 2*SHADOWX, NY + 2*SHADOWY, NZ + 2*SHADOWZ, NT + 2*SHADOWT, NG + 2*SHADOWG}; - unsigned ldn1[5] = {1, NX + 2*SHADOWX, (NX + 2*SHADOWX) * (NY + 2*SHADOWY), (NX + 2*SHADOWX) * (NY + 2*SHADOWY) * (NZ + 2*SHADOWZ), (NX + 2*SHADOWX) * (NY + 2*SHADOWY) * (NZ + 2*SHADOWZ) * (NT + 2*SHADOWT)}; + size_t nn1[5] = {NX + 2*SHADOWX, NY + 2*SHADOWY, NZ + 2*SHADOWZ, NT + 2*SHADOWT, NG + 2*SHADOWG}; + size_t ldn1[5] = {1, NX + 2*SHADOWX, (NX + 2*SHADOWX) * (NY + 2*SHADOWY), (NX + 2*SHADOWX) * (NY + 2*SHADOWY) * (NZ + 2*SHADOWZ), (NX + 2*SHADOWX) * (NY + 2*SHADOWY) * (NZ + 2*SHADOWZ) * (NT + 2*SHADOWT)}; - unsigned nn2[5] = {NX + PARTSX*2*SHADOWX, NY + PARTSY*2*SHADOWY, NZ + PARTSZ*2*SHADOWZ, NT + PARTST*2*SHADOWT, NG + PARTSG*2*SHADOWG}; - unsigned ldn2[5] = {1, NX + PARTSX*2*SHADOWX, (NX + PARTSX*2*SHADOWX) * (NY + PARTSY*2*SHADOWY), (NX + PARTSX*2*SHADOWX) * (NY + PARTSY*2*SHADOWY) * (NZ + PARTSZ*2*SHADOWZ), (NX + PARTSX*2*SHADOWX) * (NY + PARTSY*2*SHADOWY) * (NZ + PARTSZ*2*SHADOWZ) * (NT + PARTST*2*SHADOWT)}; + size_t nn2[5] = {NX + PARTSX*2*SHADOWX, NY + PARTSY*2*SHADOWY, NZ + PARTSZ*2*SHADOWZ, NT + PARTST*2*SHADOWT, NG + PARTSG*2*SHADOWG}; + size_t ldn2[5] = {1, NX + PARTSX*2*SHADOWX, (NX + PARTSX*2*SHADOWX) * (NY + PARTSY*2*SHADOWY), (NX + PARTSX*2*SHADOWX) * (NY + PARTSY*2*SHADOWY) * (NZ + PARTSZ*2*SHADOWZ), (NX + PARTSX*2*SHADOWX) * (NY + PARTSY*2*SHADOWY) * (NZ + PARTSZ*2*SHADOWZ) * (NT + PARTST*2*SHADOWT)}; /* Declare source matrix to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, ldn1, nn1, 5, sizeof(matrix[0][0][0][0][0])); diff --git a/examples/fortran90/marshalling.c b/examples/fortran90/marshalling.c index eccc32c478..a20c27a381 100644 --- a/examples/fortran90/marshalling.c +++ b/examples/fortran90/marshalling.c @@ -68,13 +68,13 @@ void loop_element_cpu_func(void *buffers[], void *cl_arg) double coeff; double **ro = (double **) STARPU_MATRIX_GET_PTR(buffers[0]); - int Neq_max = STARPU_MATRIX_GET_NX(buffers[0]); + size_t Neq_max = STARPU_MATRIX_GET_NX(buffers[0]); double **dro = (double **) STARPU_MATRIX_GET_PTR(buffers[1]); double **basis = (double **) STARPU_MATRIX_GET_PTR(buffers[2]); - int Np = STARPU_MATRIX_GET_NX(buffers[2]); - int Ng = STARPU_MATRIX_GET_NY(buffers[2]); + size_t Np = STARPU_MATRIX_GET_NX(buffers[2]); + size_t Ng = STARPU_MATRIX_GET_NY(buffers[2]); starpu_codelet_unpack_args(cl_arg, &coeff); @@ -123,8 +123,8 @@ void copy_element_cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; double **ro = (double **) STARPU_MATRIX_GET_PTR(buffers[0]); - int Neq_max = STARPU_MATRIX_GET_NX(buffers[0]); - int Np = STARPU_MATRIX_GET_NY(buffers[0]); + size_t Neq_max = STARPU_MATRIX_GET_NX(buffers[0]); + size_t Np = STARPU_MATRIX_GET_NY(buffers[0]); double **dro = (double **) STARPU_MATRIX_GET_PTR(buffers[1]); diff --git a/examples/heat/dw_factolu.c b/examples/heat/dw_factolu.c index c3a0f231d2..4082fb0b12 100644 --- a/examples/heat/dw_factolu.c +++ b/examples/heat/dw_factolu.c @@ -209,11 +209,11 @@ void dw_callback_v2_codelet_update_gemm(void *argcb) cl_args *utrsmlla = malloc(sizeof(cl_args)); struct starpu_task *task_trsm_ll = starpu_task_create(); - task_trsm_ll->callback_func = dw_callback_v2_codelet_update_trsm_ll; - task_trsm_ll->callback_arg = utrsmlla; - task_trsm_ll->cl = &cl_trsm_ll; - task_trsm_ll->cl_arg = utrsmlla; - task_trsm_ll->cl_arg_size = sizeof(*utrsmlla); + task_trsm_ll->callback_func = dw_callback_v2_codelet_update_trsm_ll; + task_trsm_ll->callback_arg = utrsmlla; + task_trsm_ll->cl = &cl_trsm_ll; + task_trsm_ll->cl_arg = utrsmlla; + task_trsm_ll->cl_arg_size = sizeof(*utrsmlla); utrsmlla->i = k+1; utrsmlla->k = i; diff --git a/examples/heat/dw_factolu_kernels.c b/examples/heat/dw_factolu_kernels.c index c519ad3adf..6e740762d0 100644 --- a/examples/heat/dw_factolu_kernels.c +++ b/examples/heat/dw_factolu_kernels.c @@ -122,13 +122,13 @@ static inline void dw_common_cpu_codelet_update_gemm(void *descr[], int s, void float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); - unsigned dx = STARPU_MATRIX_GET_NX(descr[2]); - unsigned dy = STARPU_MATRIX_GET_NY(descr[2]); - unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + size_t dx = STARPU_MATRIX_GET_NX(descr[2]); + size_t dy = STARPU_MATRIX_GET_NY(descr[2]); + size_t dz = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); + size_t ld12 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld21 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld22 = STARPU_MATRIX_GET_LD(descr[2]); #ifdef STARPU_USE_CUDA cublasStatus_t status; @@ -190,11 +190,11 @@ static inline void dw_common_codelet_update_trsm_ll(void *descr[], int s, void * sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub12 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld11 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld12 = STARPU_MATRIX_GET_LD(descr[1]); - unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]); - unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]); + size_t nx12 = STARPU_MATRIX_GET_NX(descr[1]); + size_t ny12 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; @@ -255,11 +255,11 @@ static inline void dw_common_codelet_update_trsm_ru(void *descr[], int s, void * sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld11 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld21 = STARPU_MATRIX_GET_LD(descr[1]); - unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]); - unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]); + size_t nx21 = STARPU_MATRIX_GET_NX(descr[1]); + size_t ny21 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; @@ -308,9 +308,9 @@ void dw_cublas_codelet_update_trsm_ru(void *descr[], void *_args) * GETRF */ -static inline void debug_print(float *tab, unsigned ld, unsigned n) +static inline void debug_print(float *tab, size_t ld, size_t n) { - unsigned j,i; + size_t j,i; for (j = 0; j < n; j++) { for (i = 0; i < n; i++) @@ -330,10 +330,10 @@ static inline void dw_common_codelet_update_getrf(void *descr[], int s, void *_a sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); - unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); - unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); + size_t nx = STARPU_MATRIX_GET_NX(descr[0]); + size_t ld = STARPU_MATRIX_GET_LD(descr[0]); - unsigned long z; + size_t z; #ifdef STARPU_USE_CUDA cudaStream_t stream; diff --git a/examples/heat/dw_sparse_cg_kernels.c b/examples/heat/dw_sparse_cg_kernels.c index 8fb7f9bef4..9471b5f503 100644 --- a/examples/heat/dw_sparse_cg_kernels.c +++ b/examples/heat/dw_sparse_cg_kernels.c @@ -101,7 +101,7 @@ void cpu_codelet_func_2(void *descr[], void *arg) { (void)arg; /* simply copy r into d */ - uint32_t nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); STARPU_ASSERT(STARPU_VECTOR_GET_NX(descr[0]) == STARPU_VECTOR_GET_NX(descr[1])); @@ -125,11 +125,11 @@ void cpu_codelet_func_3(void *descr[], void *arg) struct cg_problem *pb = arg; float dot; float *vec; - int size; + size_t size; /* get the vector */ vec = (float *)STARPU_VECTOR_GET_PTR(descr[0]); - size = (int)STARPU_VECTOR_GET_NX(descr[0]); + size = STARPU_VECTOR_GET_NX(descr[0]); dot = STARPU_SDOT(size, vec, 1, vec, 1); @@ -145,7 +145,7 @@ void cublas_codelet_func_3(void *descr[], void *arg) struct cg_problem *pb = arg; float dot; float *vec; - uint32_t size; + size_t size; /* get the vector */ vec = (float *)STARPU_VECTOR_GET_PTR(descr[0]); @@ -218,7 +218,7 @@ void cpu_codelet_func_5(void *descr[], void *arg) float dot; struct cg_problem *pb = arg; float *vecd, *vecq; - uint32_t size; + size_t size; /* get the vector */ vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); @@ -238,7 +238,7 @@ void cublas_codelet_func_5(void *descr[], void *arg) float dot; struct cg_problem *pb = arg; float *vecd, *vecq; - uint32_t size; + size_t size; /* get the vector */ vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); @@ -269,7 +269,7 @@ void cpu_codelet_func_6(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecx, *vecd; - uint32_t size; + size_t size; /* get the vector */ vecx = (float *)STARPU_VECTOR_GET_PTR(descr[0]); @@ -285,7 +285,7 @@ void cublas_codelet_func_6(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecx, *vecd; - uint32_t size; + size_t size; /* get the vector */ vecx = (float *)STARPU_VECTOR_GET_PTR(descr[0]); @@ -310,7 +310,7 @@ void cpu_codelet_func_7(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecr, *vecq; - uint32_t size; + size_t size; /* get the vector */ vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); @@ -326,7 +326,7 @@ void cublas_codelet_func_7(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecr, *vecq; - uint32_t size; + size_t size; /* get the vector */ vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); @@ -356,7 +356,7 @@ void cpu_codelet_func_8(void *descr[], void *arg) float dot; struct cg_problem *pb = arg; float *vecr; - uint32_t size; + size_t size; /* get the vector */ vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); @@ -375,7 +375,7 @@ void cublas_codelet_func_8(void *descr[], void *arg) float dot; struct cg_problem *pb = arg; float *vecr; - uint32_t size; + size_t size; /* get the vector */ vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); @@ -403,7 +403,7 @@ void cpu_codelet_func_9(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecd, *vecr; - uint32_t size; + size_t size; /* get the vector */ vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); @@ -423,7 +423,7 @@ void cublas_codelet_func_9(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecd, *vecr; - uint32_t size; + size_t size; /* get the vector */ vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); diff --git a/examples/heat/heat.h b/examples/heat/heat.h index d4c4f2aeae..d4084b0481 100644 --- a/examples/heat/heat.h +++ b/examples/heat/heat.h @@ -57,11 +57,11 @@ typedef struct point_t float y; } point; -extern void dw_factoLU(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned version, unsigned no_prio); -extern void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio); -extern void dw_factoLU_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks); -extern void initialize_system(float **A, float **B, unsigned dim, unsigned pinned); -extern void free_system(float *A, float *B, unsigned dim, unsigned pinned); +extern void dw_factoLU(float *matA, size_t size, size_t ld, size_t nblocks, unsigned version, unsigned no_prio); +extern void dw_factoLU_tag(float *matA, size_t size, size_t ld, size_t nblocks, unsigned no_prio); +extern void dw_factoLU_grain(float *matA, size_t size, size_t ld, size_t nblocks, size_t nbigblocks); +extern void initialize_system(float **A, float **B, size_t dim, size_t pinned); +extern void free_system(float *A, float *B, size_t dim, size_t pinned); void display_stat_heat(void); diff --git a/examples/interface/complex_dev_handle/complex_dev_handle_filters.c b/examples/interface/complex_dev_handle/complex_dev_handle_filters.c index cfb5754deb..e02cba71bb 100644 --- a/examples/interface/complex_dev_handle/complex_dev_handle_filters.c +++ b/examples/interface/complex_dev_handle/complex_dev_handle_filters.c @@ -23,16 +23,15 @@ void starpu_complex_dev_handle_filter_block(void *parent_interface, void *child_ struct starpu_complex_dev_handle_interface *complex_dev_handle_parent = parent_interface; struct starpu_complex_dev_handle_interface *complex_dev_handle_child = child_interface; - uint32_t nx = complex_dev_handle_parent->nx; + size_t nx = complex_dev_handle_parent->nx; size_t elemsize = sizeof(double); - STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); + STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %zu elements", nchunks, nx); - uint32_t child_nx; + size_t child_nx; size_t offset; /* Compute the split */ - starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, - &child_nx, &offset); + starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset); complex_dev_handle_child->nx = child_nx; diff --git a/examples/interface/complex_filters.c b/examples/interface/complex_filters.c index f89519cb37..73bb64f2bf 100644 --- a/examples/interface/complex_filters.c +++ b/examples/interface/complex_filters.c @@ -23,16 +23,15 @@ void starpu_complex_filter_block(void *parent_interface, void *child_interface, struct starpu_complex_interface *complex_parent = parent_interface; struct starpu_complex_interface *complex_child = child_interface; - uint32_t nx = complex_parent->nx; + size_t nx = complex_parent->nx; size_t elemsize = sizeof(double); - STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); + STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %zu elements", nchunks, nx); - uint32_t child_nx; + size_t child_nx; size_t offset; /* Compute the split */ - starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, - &child_nx, &offset); + starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset); complex_child->nx = child_nx; diff --git a/examples/lu/xlu_kernels.c b/examples/lu/xlu_kernels.c index 4adf88836d..01b3d06b8b 100644 --- a/examples/lu/xlu_kernels.c +++ b/examples/lu/xlu_kernels.c @@ -47,13 +47,13 @@ static inline void STARPU_LU(common_gemm)(void *descr[], int s, void *_args) TYPE *left = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *center = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); - unsigned dx = STARPU_MATRIX_GET_NX(descr[2]); - unsigned dy = STARPU_MATRIX_GET_NY(descr[2]); - unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + size_t dx = STARPU_MATRIX_GET_NX(descr[2]); + size_t dy = STARPU_MATRIX_GET_NY(descr[2]); + size_t dz = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); + size_t ld12 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld21 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld22 = STARPU_MATRIX_GET_LD(descr[2]); #ifdef STARPU_USE_CUDA cublasStatus_t status; @@ -174,11 +174,11 @@ static inline void STARPU_LU(common_trsmll)(void *descr[], int s, void *_args) sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); sub12 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld11 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld12 = STARPU_MATRIX_GET_LD(descr[1]); - unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]); - unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]); + size_t nx12 = STARPU_MATRIX_GET_NX(descr[1]); + size_t ny12 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; @@ -264,11 +264,11 @@ static inline void STARPU_LU(common_trsmru)(void *descr[], int s, void *_args) sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld11 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld21 = STARPU_MATRIX_GET_LD(descr[1]); - unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]); - unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]); + size_t nx21 = STARPU_MATRIX_GET_NX(descr[1]); + size_t ny21 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; @@ -351,10 +351,10 @@ static inline void STARPU_LU(common_getrf)(void *descr[], int s, void *_args) sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); - unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); - unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); + size_t nx = STARPU_MATRIX_GET_NX(descr[0]); + size_t ld = STARPU_MATRIX_GET_LD(descr[0]); - unsigned long z; + size_t z; #ifdef STARPU_USE_CUDA cublasStatus_t status; @@ -492,10 +492,10 @@ static inline void STARPU_LU(common_getrf_pivot)(void *descr[], sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); - unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); - unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); + size_t nx = STARPU_MATRIX_GET_NX(descr[0]); + size_t ld = STARPU_MATRIX_GET_LD(descr[0]); - unsigned long z; + size_t z; struct piv_s *piv = _args; unsigned *ipiv = piv->piv; @@ -663,10 +663,10 @@ static inline void STARPU_LU(common_pivot)(void *descr[], TYPE *matrix; matrix = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); - unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); - unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); + size_t nx = STARPU_MATRIX_GET_NX(descr[0]); + size_t ld = STARPU_MATRIX_GET_LD(descr[0]); - unsigned row; + size_t row; struct piv_s *piv = _args; unsigned *ipiv = piv->piv; diff --git a/examples/matvecmult/matvecmult.c b/examples/matvecmult/matvecmult.c index 35542ce19f..6c34e4cf9f 100644 --- a/examples/matvecmult/matvecmult.c +++ b/examples/matvecmult/matvecmult.c @@ -30,9 +30,9 @@ void opencl_codelet(void *descr[], void *_args) cl_mem matrix = (cl_mem)STARPU_MATRIX_GET_DEV_HANDLE(descr[0]); cl_mem vector = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[1]); cl_mem mult = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[2]); - int nx = STARPU_MATRIX_GET_NX(descr[0]); - int ny = STARPU_MATRIX_GET_NY(descr[0]); - int ld = STARPU_MATRIX_GET_LD(descr[0]); + cl_ulong nx = STARPU_MATRIX_GET_NX(descr[0]); + cl_ulong ny = STARPU_MATRIX_GET_NY(descr[0]); + cl_ulong ld = STARPU_MATRIX_GET_LD(descr[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); diff --git a/examples/matvecmult/matvecmult_kernel.cl b/examples/matvecmult/matvecmult_kernel.cl index 15ff8bf4b0..deadf38d2d 100644 --- a/examples/matvecmult/matvecmult_kernel.cl +++ b/examples/matvecmult/matvecmult_kernel.cl @@ -14,13 +14,13 @@ * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ -__kernel void matVecMult(const __global float *A, const __global float *X, int n, int m, __global float *Y, int ld) +__kernel void matVecMult(const __global float *A, const __global float *X, ulong n, ulong m, __global float *Y, ulong ld) { const int i = get_global_id(0); if (i < m) { float val = 0; - int j; + ulong j; for (j = 0; j < n; j++) val += A[i*ld+j] * X[j]; diff --git a/examples/mult/xgemm.c b/examples/mult/xgemm.c index dd2c93e8c4..43a043ced0 100644 --- a/examples/mult/xgemm.c +++ b/examples/mult/xgemm.c @@ -26,7 +26,7 @@ static void init_problem_data(void) { #ifndef STARPU_SIMGRID - unsigned i,j; + size_t i,j; #endif starpu_malloc_flags((void **)&A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); @@ -63,7 +63,7 @@ static void init_problem_data(void) static void partition_mult_data(void) { - unsigned x, y, z; + size_t x, y, z; starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, ydim, ydim, zdim, sizeof(TYPE)); starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, zdim, zdim, xdim, sizeof(TYPE)); @@ -130,13 +130,13 @@ static void cublas_mult(void *descr[], void *arg, const TYPE *beta) TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); - unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); - unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); - unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + size_t nxC = STARPU_MATRIX_GET_NX(descr[2]); + size_t nyC = STARPU_MATRIX_GET_NY(descr[2]); + size_t nyA = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + size_t ldA = STARPU_MATRIX_GET_LD(descr[0]); + size_t ldB = STARPU_MATRIX_GET_LD(descr[1]); + size_t ldC = STARPU_MATRIX_GET_LD(descr[2]); cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_N, @@ -156,13 +156,13 @@ void cpu_mult(void *descr[], void *arg, TYPE beta) TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); - unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); - unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); - unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + size_t nxC = STARPU_MATRIX_GET_NX(descr[2]); + size_t nyC = STARPU_MATRIX_GET_NY(descr[2]); + size_t nyA = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + size_t ldA = STARPU_MATRIX_GET_LD(descr[0]); + size_t ldB = STARPU_MATRIX_GET_LD(descr[1]); + size_t ldC = STARPU_MATRIX_GET_LD(descr[2]); int worker_size = starpu_combined_worker_get_size(); @@ -399,9 +399,9 @@ static void parse_args(int argc, char **argv) { fprintf(stderr,"Usage: %s [-3d] [-nblocks n] [-nblocksx x] [-nblocksy y] [-nblocksz z] [-x x] [-y y] [-xy n] [-z z] [-xyz n] [-size size] [-iter iter] [-bound] [-check] [-spmd] [-hostname] [-nsleeps nsleeps]\n", argv[0]); if (tiled) - fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, nslicesz, xdim / nslicesx, ydim / nslicesy, zdim / nslicesz, niter, nsleeps); + fprintf(stderr,"Currently selected: %zux%zu * %zux%zu and %zux%zux%zu blocks (size %zux%zu length %zu), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, nslicesz, xdim / nslicesx, ydim / nslicesy, zdim / nslicesz, niter, nsleeps); else - fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, xdim / nslicesx, ydim / nslicesy, zdim, niter, nsleeps); + fprintf(stderr,"Currently selected: %zux%zu * %zux%zu and %zux%zu blocks (size %zux%zu length %zu), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, xdim / nslicesx, ydim / nslicesy, zdim, niter, nsleeps); exit(EXIT_SUCCESS); } else @@ -446,7 +446,7 @@ static int run_data(void) starpu_fxt_start_profiling(); double start = starpu_timing_now(); - unsigned x, y, z, iter; + size_t x, y, z, iter; for (iter = 0; iter < niter; iter++) { if (tiled) @@ -518,7 +518,7 @@ static int run_data(void) gethostname(hostname, 255); PRINTF("%s\t", hostname); } - PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/(niter)/1000.0, flops/timing/1000.0); + PRINTF("%zu\t%zu\t%zu\t%.0f\t%.1f", xdim, ydim, zdim, timing/(niter)/1000.0, flops/timing/1000.0); if (bound) PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0); PRINTF("\n"); diff --git a/examples/mult/xgemm.h b/examples/mult/xgemm.h index bc3565c868..d740f3bd15 100644 --- a/examples/mult/xgemm.h +++ b/examples/mult/xgemm.h @@ -52,17 +52,17 @@ static unsigned niter = 2; static unsigned niter = 10; #endif static unsigned nsleeps = 1; -static unsigned nslicesx = 4; -static unsigned nslicesy = 4; -static unsigned nslicesz = 4; +static size_t nslicesx = 4; +static size_t nslicesy = 4; +static size_t nslicesz = 4; #if defined(STARPU_QUICK_CHECK) && !defined(STARPU_SIMGRID) -static unsigned xdim = 256; -static unsigned ydim = 256; -static unsigned zdim = 64; +static size_t xdim = 256; +static size_t ydim = 256; +static size_t zdim = 64; #else -static unsigned xdim = 960*4; -static unsigned ydim = 960*4; -static unsigned zdim = 960*4; +static size_t xdim = 960*4; +static size_t ydim = 960*4; +static size_t zdim = 960*4; #endif static unsigned check = 0; static unsigned bound = 0; @@ -150,13 +150,13 @@ static void hipblas_mult(void *descr[], void *arg, const TYPE *beta) TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); - unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); - unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); - unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + size_t nxC = STARPU_MATRIX_GET_NX(descr[2]); + size_t nyC = STARPU_MATRIX_GET_NY(descr[2]); + size_t nyA = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + size_t ldA = STARPU_MATRIX_GET_LD(descr[0]); + size_t ldB = STARPU_MATRIX_GET_LD(descr[1]); + size_t ldC = STARPU_MATRIX_GET_LD(descr[2]); hipblasStatus_t status = HIPBLAS_GEMM(starpu_hipblas_get_local_handle(), HIPBLAS_OP_N, HIPBLAS_OP_N, diff --git a/examples/mult/xgemm_layout.c b/examples/mult/xgemm_layout.c index e521d15952..188d6557e8 100644 --- a/examples/mult/xgemm_layout.c +++ b/examples/mult/xgemm_layout.c @@ -186,13 +186,13 @@ static void cublas_mult2d(void *descr[], void *arg, const TYPE *beta) unsigned devid = starpu_worker_get_devid(worker); TYPE *subC = Cscratch[devid]; - unsigned nxC = STARPU_MATRIX_GET_NY(descr[1]); - unsigned nyC = STARPU_MATRIX_GET_NX(descr[0]); - unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + size_t nxC = STARPU_MATRIX_GET_NY(descr[1]); + size_t nyC = STARPU_MATRIX_GET_NX(descr[0]); + size_t nyA = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ldC = nxC; + size_t ldA = STARPU_MATRIX_GET_LD(descr[0]); + size_t ldB = STARPU_MATRIX_GET_LD(descr[1]); + size_t ldC = nxC; cudaStream_t stream = starpu_cuda_get_local_stream(); @@ -214,13 +214,13 @@ static void cublas_mult(void *descr[], void *arg, const TYPE *beta) TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); - unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); - unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); - unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + size_t nxC = STARPU_MATRIX_GET_NX(descr[2]); + size_t nyC = STARPU_MATRIX_GET_NY(descr[2]); + size_t nyA = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + size_t ldA = STARPU_MATRIX_GET_LD(descr[0]); + size_t ldB = STARPU_MATRIX_GET_LD(descr[1]); + size_t ldC = STARPU_MATRIX_GET_LD(descr[2]); cudaStream_t stream = starpu_cuda_get_local_stream(); @@ -228,7 +228,7 @@ static void cublas_mult(void *descr[], void *arg, const TYPE *beta) cudaMemsetAsync(subC, 0, sizeof(*subC) * nxC * nyC, stream); else { - unsigned i; + size_t i; for (i = 0; i < nyC; i++) cudaMemsetAsync(subC + i*ldC, 0, sizeof(*subC) * nxC, stream); } @@ -257,14 +257,14 @@ void cpu_mult2d(void *descr[], void *arg, TYPE beta) TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned nxC = STARPU_MATRIX_GET_NY(descr[1]); - unsigned nyC = STARPU_MATRIX_GET_NX(descr[0]); - unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + size_t nxC = STARPU_MATRIX_GET_NY(descr[1]); + size_t nyC = STARPU_MATRIX_GET_NX(descr[0]); + size_t nyA = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); + size_t ldA = STARPU_MATRIX_GET_LD(descr[0]); + size_t ldB = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ldC = nxC; + size_t ldC = nxC; TYPE subC[nxC*nyC]; @@ -301,13 +301,13 @@ void cpu_mult(void *descr[], void *arg, TYPE beta) TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); - unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); - unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); - unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + size_t nxC = STARPU_MATRIX_GET_NX(descr[2]); + size_t nyC = STARPU_MATRIX_GET_NY(descr[2]); + size_t nyA = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + size_t ldA = STARPU_MATRIX_GET_LD(descr[0]); + size_t ldB = STARPU_MATRIX_GET_LD(descr[1]); + size_t ldC = STARPU_MATRIX_GET_LD(descr[2]); int worker_size = starpu_combined_worker_get_size(); @@ -315,7 +315,7 @@ void cpu_mult(void *descr[], void *arg, TYPE beta) memset(subC, 0, sizeof(*subC) * nxC * nyC); else { - unsigned i; + size_t i; for (i = 0; i < nyC; i++) memset(subC + i*ldC, 0, sizeof(*subC) * nxC); } @@ -636,9 +636,9 @@ static void parse_args(int argc, char **argv) { fprintf(stderr,"Usage: %s [-3d] [-nblocks n] [-nblocksx x] [-nblocksy y] [-nblocksz z] [-x x] [-y y] [-xy n] [-z z] [-xyz n] [-size size] [-iter iter] [-bound] [-check] [-spmd] [-hostname] [-nsleeps nsleeps]\n", argv[0]); if (tiled) - fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, nslicesz, xdim / nslicesx, ydim / nslicesy, zdim / nslicesz, niter, nsleeps); + fprintf(stderr,"Currently selected: %zux%zu * %zux%zu and %zux%zux%zu blocks (size %zux%zu length %zu), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, nslicesz, xdim / nslicesx, ydim / nslicesy, zdim / nslicesz, niter, nsleeps); else - fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, xdim / nslicesx, ydim / nslicesy, zdim, niter, nsleeps); + fprintf(stderr,"Currently selected: %zux%zu * %zux%zu and %zux%zu blocks (size %zux%zu length %zu), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, xdim / nslicesx, ydim / nslicesy, zdim, niter, nsleeps); exit(EXIT_SUCCESS); } else @@ -1178,7 +1178,7 @@ static int run_data(void) } double average = timing/niter; double deviation = sqrt(fabs(timing_square / niter - average*average)); - PRINTF("%u\t%u\t%u\t%.0f\t%.1f\t%f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0, flops/niter/(average*average)*deviation/1000.0); + PRINTF("%zu\t%zu\t%zu\t%.0f\t%.1f\t%f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0, flops/niter/(average*average)*deviation/1000.0); if (bound) PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f\t%f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0, flops/niter/(average*average)*deviation/1000.0); PRINTF("\n"); @@ -1186,7 +1186,7 @@ static int run_data(void) else /* We don't */ { double flops = 2.0 * ((unsigned long long)(niter+1)) * ((unsigned long long)xdim) * ((unsigned long long)ydim) * ((unsigned long long)zdim); - PRINTF("%u\t%u\t%u\t%.0f\t%.1f\t%f", xdim, ydim, zdim, timing/(niter+1)/1000.0, flops/timing/1000.0, 0.0); + PRINTF("%zu\t%zu\t%zu\t%.0f\t%.1f\t%f", xdim, ydim, zdim, timing/(niter+1)/1000.0, flops/timing/1000.0, 0.0); if (bound) PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f\t%f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0, 0.0); PRINTF("\n"); diff --git a/examples/openmp/vector_scal_omp.c b/examples/openmp/vector_scal_omp.c index e6f78f5ec9..da71bef1e6 100644 --- a/examples/openmp/vector_scal_omp.c +++ b/examples/openmp/vector_scal_omp.c @@ -40,10 +40,10 @@ void scal_cpu_func(void *buffers[], void *_args) { - unsigned i; + size_t i; float *factor = _args, f = *factor; struct starpu_vector_interface *vector = buffers[0]; - unsigned n = STARPU_VECTOR_GET_NX(vector); + size_t n = STARPU_VECTOR_GET_NX(vector); float *val = (float *)STARPU_VECTOR_GET_PTR(vector); FPRINTF(stderr, "running task with %d CPUs.\n", starpu_combined_worker_get_size()); diff --git a/examples/perf_monitoring/perf_counters_02.c b/examples/perf_monitoring/perf_counters_02.c index f58bc3c979..78350dac51 100644 --- a/examples/perf_monitoring/perf_counters_02.c +++ b/examples/perf_monitoring/perf_counters_02.c @@ -79,7 +79,7 @@ void c_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_ void func(void *buffers[], void *cl_args) { int *int_vector = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); - int NX = (int)STARPU_VECTOR_GET_NX(buffers[0]); + size_t NX = (int)STARPU_VECTOR_GET_NX(buffers[0]); const int niters; starpu_codelet_unpack_args(cl_args, &niters); int i; diff --git a/examples/pipeline/pipeline.c b/examples/pipeline/pipeline.c index c4bddc6cec..556ccb008a 100644 --- a/examples/pipeline/pipeline.c +++ b/examples/pipeline/pipeline.c @@ -60,8 +60,8 @@ void pipeline_cpu_x(void *descr[], void *args) { float x; float *val = (float *) STARPU_VECTOR_GET_PTR(descr[0]); - int n = STARPU_VECTOR_GET_NX(descr[0]); - int i; + size_t n = STARPU_VECTOR_GET_NX(descr[0]); + size_t i; starpu_codelet_unpack_args(args, &x); for (i = 0; i < n ; i++) @@ -89,7 +89,7 @@ void pipeline_cpu_axpy(void *descr[], void *arg) (void)arg; float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); float *y = (float *) STARPU_VECTOR_GET_PTR(descr[1]); - int n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); STARPU_SAXPY(n, 1., x, 1, y, 1); } @@ -100,7 +100,7 @@ void pipeline_cublas_axpy(void *descr[], void *arg) (void)arg; float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); float *y = (float *) STARPU_VECTOR_GET_PTR(descr[1]); - int n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); float alpha = 1.; cublasStatus_t status = cublasSaxpy(starpu_cublas_get_local_handle(), n, &alpha, x, 1, y, 1); @@ -133,7 +133,7 @@ void pipeline_cpu_sum(void *descr[], void *arg) { (void)arg; float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); - int n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); float y; y = STARPU_SASUM(n, x, 1); @@ -146,7 +146,7 @@ void pipeline_cublas_sum(void *descr[], void *arg) { (void)arg; float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); - int n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); float y; cublasStatus_t status = cublasSasum(starpu_cublas_get_local_handle(), n, x, 1, &y); diff --git a/examples/ppm_downscaler/yuv_downscaler.c b/examples/ppm_downscaler/yuv_downscaler.c index b37db04570..6f535222c9 100644 --- a/examples/ppm_downscaler/yuv_downscaler.c +++ b/examples/ppm_downscaler/yuv_downscaler.c @@ -59,30 +59,30 @@ void ds_kernel_cpu(void *descr[], void *arg) { (void)arg; uint8_t *input = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[0]); - const unsigned input_ld = STARPU_MATRIX_GET_LD(descr[0]); + const size_t input_ld = STARPU_MATRIX_GET_LD(descr[0]); uint8_t *output = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[1]); - const unsigned output_ld = STARPU_MATRIX_GET_LD(descr[1]); + const size_t output_ld = STARPU_MATRIX_GET_LD(descr[1]); - const unsigned ncols = STARPU_MATRIX_GET_NX(descr[0]); - const unsigned nlines = STARPU_MATRIX_GET_NY(descr[0]); + const size_t ncols = STARPU_MATRIX_GET_NX(descr[0]); + const size_t nlines = STARPU_MATRIX_GET_NY(descr[0]); - unsigned line, col; + size_t line, col; for (line = 0; line < nlines; line+=FACTOR) for (col = 0; col < ncols; col+=FACTOR) { unsigned sum = 0; - unsigned lline, lcol; + size_t lline, lcol; for (lline = 0; lline < FACTOR; lline++) for (lcol = 0; lcol < FACTOR; lcol++) { - unsigned in_index = (lcol + col) + (lline + line)*input_ld; + size_t in_index = (lcol + col) + (lline + line)*input_ld; sum += input[in_index]; } - unsigned out_index = (col / FACTOR) + (line / FACTOR)*output_ld; + size_t out_index = (col / FACTOR) + (line / FACTOR)*output_ld; output[out_index] = (uint8_t)(sum/(FACTOR*FACTOR)); } } diff --git a/examples/reductions/dot_product.c b/examples/reductions/dot_product.c index c7dd4d9349..0fef12e49f 100644 --- a/examples/reductions/dot_product.c +++ b/examples/reductions/dot_product.c @@ -223,11 +223,11 @@ void dot_cpu_func(void *descr[], void *cl_arg) float *local_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[2]); - unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); DOT_TYPE local_dot = 0.0; - unsigned i; + size_t i; for (i = 0; i < n; i++) { local_dot += (DOT_TYPE)local_x[i]*(DOT_TYPE)local_y[i]; @@ -247,7 +247,7 @@ void dot_cuda_func(void *descr[], void *cl_arg) float *local_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[2]); - unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); cudaMemcpyAsync(¤t_dot, dot, sizeof(DOT_TYPE), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); @@ -277,7 +277,7 @@ void dot_opencl_func(void *buffers[], void *cl_arg) cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); cl_mem y = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]); cl_mem dot = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[2]); - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + cl_ulong n = STARPU_VECTOR_GET_NX(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); diff --git a/examples/reductions/dot_product_opencl_kernels.cl b/examples/reductions/dot_product_opencl_kernels.cl index 89159017f3..ac8e36c4df 100644 --- a/examples/reductions/dot_product_opencl_kernels.cl +++ b/examples/reductions/dot_product_opencl_kernels.cl @@ -29,10 +29,10 @@ __kernel void _redux_opencl(__global DOT_TYPE *dota, __kernel void _dot_opencl(__global float *x, __global float *y, __global DOT_TYPE *dot, - unsigned n) + ulong n) { /* FIXME: real parallel implementation */ - unsigned i; + ulong i; __local double tmp; tmp = 0.0; for (i = 0; i < n ; i++) diff --git a/examples/reductions/minmax_reduction.c b/examples/reductions/minmax_reduction.c index 9996d05cb0..57b9ab98d1 100644 --- a/examples/reductions/minmax_reduction.c +++ b/examples/reductions/minmax_reduction.c @@ -109,7 +109,7 @@ void minmax_cpu_func(void *descr[], void *cl_arg) (void)cl_arg; /* The array containing the values */ TYPE *local_array = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); - unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); TYPE *minmax = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); @@ -117,7 +117,7 @@ void minmax_cpu_func(void *descr[], void *cl_arg) TYPE local_max = minmax[1]; /* Compute the min and the max elements in the array */ - unsigned i; + size_t i; for (i = 0; i < n; i++) { TYPE val = local_array[i]; diff --git a/examples/sched_ctx/axpy_partition_gpu.cu b/examples/sched_ctx/axpy_partition_gpu.cu index 226227a1ed..ab0bd74c5f 100644 --- a/examples/sched_ctx/axpy_partition_gpu.cu +++ b/examples/sched_ctx/axpy_partition_gpu.cu @@ -46,30 +46,30 @@ __global__ void saxpy_partitioned(__P_KARGS, int n, float a, float *x, float *y) if (i>>(__P_HKARGS,n,a,x,y); diff --git a/examples/sched_ctx/gpu_partition.c b/examples/sched_ctx/gpu_partition.c index b35d0b1a4f..0e4e1ed2ee 100644 --- a/examples/sched_ctx/gpu_partition.c +++ b/examples/sched_ctx/gpu_partition.c @@ -46,12 +46,12 @@ void axpy_cpu(void *descr[], void *arg) { float alpha = *((float *)arg); - unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); float *block_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); float *block_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); - unsigned i; + size_t i; for(i = 0; i < n; i++) block_y[i] = alpha * block_x[i] + block_y[i]; } diff --git a/examples/sched_ctx/parallel_tasks_reuse_handle.c b/examples/sched_ctx/parallel_tasks_reuse_handle.c index c4bd77f9e0..74cd4cee7d 100644 --- a/examples/sched_ctx/parallel_tasks_reuse_handle.c +++ b/examples/sched_ctx/parallel_tasks_reuse_handle.c @@ -149,9 +149,10 @@ static void sum_cpu(void * descr[], void *cl_arg) double *v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]); double *v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]); double *v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[2]); - int size = STARPU_VECTOR_GET_NX(descr[0]); + size_t size = STARPU_VECTOR_GET_NX(descr[0]); - int i, k; + size_t i; + int k; for (k=0;kmpi_node; - unsigned size_bz = block_sizes_z[bz]; + size_t size_bz = block_sizes_z[bz]; if (node == rank) { @@ -373,7 +373,7 @@ void allocate_memory_on_node(int rank) void free_memory_on_node(int rank) { - unsigned bz; + size_t bz; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); @@ -421,7 +421,7 @@ void free_memory_on_node(int rank) /* check how many cells are alive */ void check(int rank) { - unsigned bz; + size_t bz; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); @@ -431,15 +431,15 @@ void check(int rank) /* Main blocks */ if (node == rank) { - unsigned size_bz = block_sizes_z[bz]; + size_t size_bz = block_sizes_z[bz]; #ifdef LIFE - unsigned x, y, z; - unsigned sum = 0; + size_t x, y, z; + size_t sum = 0; for (x = 0; x < sizex; x++) for (y = 0; y < sizey; y++) for (z = 0; z < size_bz; z++) sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)]; - printf("block %u got %u/%u alive\n", bz, sum, sizex*sizey*size_bz); + printf("block %zu got %zu/%zu alive\n", bz, sum, sizex*sizey*size_bz); #endif } } diff --git a/examples/stencil/implicit-stencil-kernels.c b/examples/stencil/implicit-stencil-kernels.c index b19c7cadf3..c840417512 100644 --- a/examples/stencil/implicit-stencil-kernels.c +++ b/examples/stencil/implicit-stencil-kernels.c @@ -100,7 +100,7 @@ double *last_tick; static int achieved_iter; /* Record how many updates each worker performed */ -unsigned update_per_worker[STARPU_NMAXWORKERS]; +size_t update_per_worker[STARPU_NMAXWORKERS]; static void record_who_runs_what(struct block_description *block) { @@ -140,7 +140,7 @@ static void check_load(struct starpu_block_interface *block, struct starpu_block */ static void load_subblock_from_buffer_cpu(void *_block, void *_boundary, - unsigned firstz) + size_t firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; @@ -149,7 +149,7 @@ static void load_subblock_from_buffer_cpu(void *_block, /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; - unsigned offset = firstz*block->ldz; + size_t offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; memcpy(&block_data[offset], boundary_data, boundary_size); @@ -161,7 +161,7 @@ static void load_subblock_from_buffer_cpu(void *_block, #ifdef STARPU_USE_CUDA static void load_subblock_from_buffer_cuda(void *_block, void *_boundary, - unsigned firstz) + size_t firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; @@ -170,7 +170,7 @@ static void load_subblock_from_buffer_cuda(void *_block, /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; - unsigned offset = firstz*block->ldz; + size_t offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; cudaMemcpyAsync(&block_data[offset], boundary_data, boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); @@ -181,16 +181,16 @@ static void load_subblock_from_buffer_cuda(void *_block, */ static void update_func_cuda(void *descr[], void *arg) { - unsigned z; + size_t z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); int workerid = starpu_worker_get_id_check(); DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if (block->bz == 0) - FPRINTF(stderr,"!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid); + FPRINTF(stderr,"!!! DO update_func_cuda z %zu CUDA%d !!!\n", block->bz, workerid); else - DEBUG("!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid); + DEBUG("!!! DO update_func_cuda z %zu CUDA%d !!!\n", block->bz, workerid); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -198,8 +198,8 @@ static void update_func_cuda(void *descr[], void *arg) #endif DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - unsigned block_size_z = get_block_size(block->bz); - unsigned i; + size_t block_size_z = get_block_size(block->bz); + size_t i; update_per_worker[workerid]++; record_who_runs_what(block); @@ -246,14 +246,14 @@ static void update_func_cuda(void *descr[], void *arg) #ifdef STARPU_USE_OPENCL static void load_subblock_from_buffer_opencl(struct starpu_block_interface *block, struct starpu_block_interface *boundary, - unsigned firstz) + size_t firstz) { check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; - unsigned offset = firstz*block->ldz; + size_t offset = firstz*block->ldz; cl_mem block_data = (cl_mem)block->dev_handle; cl_mem boundary_data = (cl_mem)boundary->dev_handle; @@ -268,16 +268,16 @@ static void load_subblock_from_buffer_opencl(struct starpu_block_interface *bloc */ static void update_func_opencl(void *descr[], void *arg) { - unsigned z; + size_t z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); int workerid = starpu_worker_get_id_check(); DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if (block->bz == 0) - FPRINTF(stderr,"!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid); + FPRINTF(stderr,"!!! DO update_func_opencl z %zu OPENCL%d !!!\n", block->bz, workerid); else - DEBUG("!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid); + DEBUG("!!! DO update_func_opencl z %zu OPENCL%d !!!\n", block->bz, workerid); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -285,8 +285,8 @@ static void update_func_opencl(void *descr[], void *arg) #endif DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - unsigned block_size_z = get_block_size(block->bz); - unsigned i; + size_t block_size_z = get_block_size(block->bz); + size_t i; update_per_worker[workerid]++; record_who_runs_what(block); @@ -338,16 +338,16 @@ static void update_func_opencl(void *descr[], void *arg) */ void update_func_cpu(void *descr[], void *arg) { - unsigned zz; + size_t zz; starpu_codelet_unpack_args(arg, &zz); struct block_description *block = get_block_description(zz); int workerid = starpu_worker_get_id_check(); DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if (block->bz == 0) - DEBUG("!!! DO update_func_cpu z %u worker%d !!!\n", block->bz, workerid); + DEBUG("!!! DO update_func_cpu z %zu worker%d !!!\n", block->bz, workerid); else - DEBUG("!!! DO update_func_cpu z %u worker%d !!!\n", block->bz, workerid); + DEBUG("!!! DO update_func_cpu z %zu worker%d !!!\n", block->bz, workerid); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -355,8 +355,8 @@ void update_func_cpu(void *descr[], void *arg) #endif DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - unsigned block_size_z = get_block_size(block->bz); - unsigned i; + size_t block_size_z = get_block_size(block->bz); + size_t i; update_per_worker[workerid]++; record_who_runs_what(block); @@ -385,15 +385,15 @@ void update_func_cpu(void *descr[], void *arg) TYPE *old = (TYPE*) oldb->ptr, *newer = (TYPE*) newb->ptr; /* Shadow data */ - unsigned ldy = oldb->ldy, ldz = oldb->ldz; - unsigned nx = oldb->nx, ny = oldb->ny, nz = oldb->nz; - unsigned x, y, z; - unsigned stepx = 1; - unsigned stepy = 1; - unsigned stepz = 1; - unsigned idx = 0; - unsigned idy = 0; - unsigned idz = 0; + size_t ldy = oldb->ldy, ldz = oldb->ldz; + size_t nx = oldb->nx, ny = oldb->ny, nz = oldb->nz; + size_t x, y, z; + size_t stepx = 1; + size_t stepy = 1; + size_t stepz = 1; + size_t idx = 0; + size_t idy = 0; + size_t idz = 0; TYPE *ptr = old; # include "shadow.h" @@ -437,7 +437,7 @@ struct starpu_codelet cl_update = /* CPU version */ static void load_subblock_into_buffer_cpu(void *_block, void *_boundary, - unsigned firstz) + size_t firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; @@ -446,7 +446,7 @@ static void load_subblock_into_buffer_cpu(void *_block, /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; - unsigned offset = firstz*block->ldz; + size_t offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; memcpy(boundary_data, &block_data[offset], boundary_size); @@ -456,7 +456,7 @@ static void load_subblock_into_buffer_cpu(void *_block, #ifdef STARPU_USE_CUDA static void load_subblock_into_buffer_cuda(void *_block, void *_boundary, - unsigned firstz) + size_t firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; @@ -465,7 +465,7 @@ static void load_subblock_into_buffer_cuda(void *_block, /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; - unsigned offset = firstz*block->ldz; + size_t offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; cudaMemcpyAsync(boundary_data, &block_data[offset], boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); @@ -476,14 +476,14 @@ static void load_subblock_into_buffer_cuda(void *_block, #ifdef STARPU_USE_OPENCL static void load_subblock_into_buffer_opencl(struct starpu_block_interface *block, struct starpu_block_interface *boundary, - unsigned firstz) + size_t firstz) { check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; - unsigned offset = firstz*block->ldz; + size_t offset = firstz*block->ldz; cl_mem block_data = (cl_mem)block->dev_handle; cl_mem boundary_data = (cl_mem)boundary->dev_handle; @@ -496,13 +496,13 @@ static void load_subblock_into_buffer_opencl(struct starpu_block_interface *bloc #endif /* STARPU_USE_OPENCL */ /* Record how many top/bottom saves each worker performed */ -unsigned top_per_worker[STARPU_NMAXWORKERS]; -unsigned bottom_per_worker[STARPU_NMAXWORKERS]; +size_t top_per_worker[STARPU_NMAXWORKERS]; +size_t bottom_per_worker[STARPU_NMAXWORKERS]; /* top save, CPU version */ void dummy_func_top_cpu(void *descr[], void *arg) { - unsigned z; + size_t z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); @@ -512,7 +512,7 @@ void dummy_func_top_cpu(void *descr[], void *arg) DEBUG("DO SAVE Bottom block %d\n", block->bz); /* The offset along the z axis is (block_size_z + K)- K */ - unsigned block_size_z = get_block_size(block->bz); + size_t block_size_z = get_block_size(block->bz); load_subblock_into_buffer_cpu(descr[0], descr[2], block_size_z); load_subblock_into_buffer_cpu(descr[1], descr[3], block_size_z); @@ -521,7 +521,7 @@ void dummy_func_top_cpu(void *descr[], void *arg) /* bottom save, CPU version */ void dummy_func_bottom_cpu(void *descr[], void *arg) { - unsigned z; + size_t z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); STARPU_ASSERT(block); @@ -539,7 +539,7 @@ void dummy_func_bottom_cpu(void *descr[], void *arg) #ifdef STARPU_USE_CUDA static void dummy_func_top_cuda(void *descr[], void *arg) { - unsigned z; + size_t z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); @@ -549,7 +549,7 @@ static void dummy_func_top_cuda(void *descr[], void *arg) DEBUG("DO SAVE Top block %d\n", block->bz); /* The offset along the z axis is (block_size_z + K)- K */ - unsigned block_size_z = get_block_size(block->bz); + size_t block_size_z = get_block_size(block->bz); load_subblock_into_buffer_cuda(descr[0], descr[2], block_size_z); load_subblock_into_buffer_cuda(descr[1], descr[3], block_size_z); @@ -558,7 +558,7 @@ static void dummy_func_top_cuda(void *descr[], void *arg) /* bottom save, CUDA version */ static void dummy_func_bottom_cuda(void *descr[], void *arg) { - unsigned z; + size_t z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); (void) block; @@ -577,7 +577,7 @@ static void dummy_func_bottom_cuda(void *descr[], void *arg) #ifdef STARPU_USE_OPENCL static void dummy_func_top_opencl(void *descr[], void *arg) { - unsigned z; + size_t z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); @@ -587,7 +587,7 @@ static void dummy_func_top_opencl(void *descr[], void *arg) DEBUG("DO SAVE Top block %d\n", block->bz); /* The offset along the z axis is (block_size_z + K)- K */ - unsigned block_size_z = get_block_size(block->bz); + size_t block_size_z = get_block_size(block->bz); load_subblock_into_buffer_opencl(descr[0], descr[2], block_size_z); load_subblock_into_buffer_opencl(descr[1], descr[3], block_size_z); @@ -596,7 +596,7 @@ static void dummy_func_top_opencl(void *descr[], void *arg) /* bottom save, OPENCL version */ static void dummy_func_bottom_opencl(void *descr[], void *arg) { - unsigned z; + size_t z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); (void) block; @@ -660,12 +660,12 @@ struct starpu_codelet save_cl_top = void memset_func(void *descr[], void *arg) { (void)descr; - unsigned sizex, sizey, bz; + size_t sizex, sizey, bz; starpu_codelet_unpack_args(arg, &sizex, &sizey, &bz); struct block_description *block = get_block_description(bz); - unsigned size_bz = get_block_size(bz); + size_t size_bz = get_block_size(bz); - unsigned x,y,z; + size_t x,y,z; for (x = 0; x < sizex + 2*K; x++) { for (y = 0; y < sizey + 2*K; y++) @@ -725,14 +725,14 @@ struct starpu_codelet cl_memset = static void initlayer_func(void *descr[], void *arg) { (void)descr; - unsigned sizex, sizey, bz; + size_t sizex, sizey, bz; starpu_codelet_unpack_args(arg, &sizex, &sizey, &bz); struct block_description *block = get_block_description(bz); - unsigned size_bz = get_block_size(bz); + size_t size_bz = get_block_size(bz); /* Initialize layer with some random data */ - unsigned x, y, z; - unsigned sum = 0; + size_t x, y, z; + size_t sum = 0; for (x = 0; x < sizex; x++) for (y = 0; y < sizey; y++) for (z = 0; z < size_bz; z++) diff --git a/examples/stencil/implicit-stencil-tasks.c b/examples/stencil/implicit-stencil-tasks.c index 3afaca5438..f12dc639a7 100644 --- a/examples/stencil/implicit-stencil-tasks.c +++ b/examples/stencil/implicit-stencil-tasks.c @@ -44,14 +44,14 @@ * Schedule initialization tasks */ -void create_task_memset(unsigned sizex, unsigned sizey, unsigned z) +void create_task_memset(size_t sizex, size_t sizey, size_t z) { struct block_description *descr = get_block_description(z); int ret = starpu_task_insert(&cl_memset, - STARPU_VALUE, &sizex, sizeof(unsigned), - STARPU_VALUE, &sizey, sizeof(unsigned), - STARPU_VALUE, &z, sizeof(unsigned), + STARPU_VALUE, &sizex, sizeof(size_t), + STARPU_VALUE, &sizey, sizeof(size_t), + STARPU_VALUE, &z, sizeof(size_t), STARPU_W, descr->layers_handle[0], STARPU_W, descr->layers_handle[1], STARPU_W, descr->boundaries_handle[T][0], @@ -69,14 +69,14 @@ void create_task_memset(unsigned sizex, unsigned sizey, unsigned z) } } -void create_task_initlayer(unsigned sizex, unsigned sizey, unsigned z) +void create_task_initlayer(size_t sizex, size_t sizey, size_t z) { struct block_description *descr = get_block_description(z); int ret = starpu_task_insert(&cl_initlayer, - STARPU_VALUE, &sizex, sizeof(unsigned), - STARPU_VALUE, &sizey, sizeof(unsigned), - STARPU_VALUE, &z, sizeof(unsigned), + STARPU_VALUE, &sizex, sizeof(size_t), + STARPU_VALUE, &sizey, sizeof(size_t), + STARPU_VALUE, &z, sizeof(size_t), STARPU_W, descr->layers_handle[0], 0); @@ -93,7 +93,7 @@ void create_task_initlayer(unsigned sizex, unsigned sizey, unsigned z) * Schedule saving boundaries of blocks to communication buffers */ -static void create_task_save_local(unsigned z, int dir) +static void create_task_save_local(size_t z, int dir) { struct block_description *descr = get_block_description(z); struct starpu_codelet *codelet; @@ -101,7 +101,7 @@ static void create_task_save_local(unsigned z, int dir) codelet = (dir == -1)?&save_cl_bottom:&save_cl_top; ret = starpu_task_insert(codelet, - STARPU_VALUE, &z, sizeof(unsigned), + STARPU_VALUE, &z, sizeof(size_t), STARPU_R, descr->layers_handle[0], STARPU_R, descr->layers_handle[1], STARPU_W, descr->boundaries_handle[(1-dir)/2][0], @@ -122,12 +122,12 @@ static void create_task_save_local(unsigned z, int dir) * Schedule update computation in computation buffer */ -void create_task_update(unsigned iter, unsigned z, int local_rank) +void create_task_update(size_t iter, size_t z, int local_rank) { STARPU_ASSERT(iter != 0); - unsigned old_layer = (K*(iter-1)) % 2; - unsigned new_layer = (old_layer + 1) % 2; + size_t old_layer = (K*(iter-1)) % 2; + size_t new_layer = (old_layer + 1) % 2; struct block_description *descr = get_block_description(z); struct block_description *bottom_neighbour = descr->boundary_blocks[B]; @@ -143,7 +143,7 @@ void create_task_update(unsigned iter, unsigned z, int local_rank) ((bottom_neighbour->boundary_blocks[B]->mpi_node != local_rank) || (top_neighbour->boundary_blocks[T]->mpi_node != local_rank)) ? STARPU_MAX_PRIO-1 : STARPU_DEFAULT_PRIO; int ret = starpu_task_insert(codelet, - STARPU_VALUE, &z, sizeof(unsigned), + STARPU_VALUE, &z, sizeof(size_t), STARPU_RW, descr->layers_handle[old_layer], STARPU_RW, descr->layers_handle[new_layer], STARPU_R, bottom_neighbour->boundaries_handle[T][old_layer], diff --git a/examples/stencil/implicit-stencil.c b/examples/stencil/implicit-stencil.c index 781525f7d4..8fe80d01c8 100644 --- a/examples/stencil/implicit-stencil.c +++ b/examples/stencil/implicit-stencil.c @@ -25,25 +25,25 @@ /* default parameter values */ static unsigned bind_tasks = 0; -static unsigned ticks = 1000; +static size_t ticks = 1000; #ifdef STARPU_QUICK_CHECK -static unsigned niter = 4; +static size_t niter = 4; #define SIZE 16 #define NBZ 8 #else -static unsigned niter = 32; +static size_t niter = 32; #define SIZE 128 #define NBZ 64 #endif /* Problem size */ -static unsigned sizex = SIZE; -static unsigned sizey = SIZE; -static unsigned sizez = NBZ*SIZE; +static size_t sizex = SIZE; +static size_t sizey = SIZE; +static size_t sizez = NBZ*SIZE; /* Number of blocks (scattered over the different MPI processes) */ -unsigned nbz = NBZ; +size_t nbz = NBZ; double start; double begin, end; @@ -53,22 +53,22 @@ double timing; * Initialization */ -unsigned get_bind_tasks(void) +size_t get_bind_tasks(void) { return bind_tasks; } -unsigned get_nbz(void) +size_t get_nbz(void) { return nbz; } -unsigned get_niter(void) +size_t get_niter(void) { return niter; } -unsigned get_ticks(void) +size_t get_ticks(void) { return ticks; } @@ -119,10 +119,10 @@ static void parse_args(int argc, char **argv) fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, "-b bind tasks on CPUs/GPUs\n"); - fprintf(stderr, "-nbz Number of blocks on Z axis (%u by default)\n", nbz); - fprintf(stderr, "-size[xyz] Domain size on x/y/z axis (%ux%ux%u by default)\n", sizex, sizey, sizez); - fprintf(stderr, "-niter Number of iterations (%u by default)\n", niter); - fprintf(stderr, "-ticks How often to put ticks in the output (ms, %u by default)\n", ticks); + fprintf(stderr, "-nbz Number of blocks on Z axis (%zu by default)\n", nbz); + fprintf(stderr, "-size[xyz] Domain size on x/y/z axis (%zux%zux%zu by default)\n", sizex, sizey, sizez); + fprintf(stderr, "-niter Number of iterations (%zu by default)\n", niter); + fprintf(stderr, "-ticks How often to put ticks in the output (ms, %zu by default)\n", ticks); exit(0); } } @@ -177,9 +177,9 @@ static void free_problem(int rank) * Main body */ -void func(unsigned task_per_worker[STARPU_NMAXWORKERS]) +void func(size_t task_per_worker[STARPU_NMAXWORKERS]) { - unsigned total = 0; + size_t total = 0; int worker; for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) @@ -190,7 +190,7 @@ void func(unsigned task_per_worker[STARPU_NMAXWORKERS]) { char name[64]; starpu_worker_get_name(worker, name, sizeof(name)); - FPRINTF(stderr,"\t%s -> %u (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total); + FPRINTF(stderr,"\t%s -> %zu (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total); } } } @@ -339,12 +339,12 @@ int main(int argc, char **argv) func(bottom_per_worker); #endif #if 1 - unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size; + size_t nzblocks_per_process = (nbz + world_size - 1) / world_size; int iter; for (iter = 0; iter < who_runs_what_len; iter++) { - unsigned last, bz; + size_t last, bz; last = 1; for (bz = 0; bz < nbz; bz++) { diff --git a/examples/stencil/implicit-stencil.h b/examples/stencil/implicit-stencil.h index 5c682a1eaf..93b35f7884 100644 --- a/examples/stencil/implicit-stencil.h +++ b/examples/stencil/implicit-stencil.h @@ -58,7 +58,7 @@ struct block_description unsigned preferred_worker; - unsigned bz; + size_t bz; /* For each of the following buffers, there are two (0/1) buffers to @@ -88,7 +88,7 @@ int MPI_TAG1(int z, int iter, int dir); #define MIN(a,b) ((a)<(b)?(a):(b)) -void create_blocks_array(unsigned sizex, unsigned sizey, unsigned sizez, unsigned nbz); +void create_blocks_array(size_t sizex, size_t sizey, size_t sizez, size_t nbz); void free_blocks_array(); struct block_description *get_block_description(int z); void assign_blocks_to_mpi_nodes(int world_size); @@ -102,19 +102,19 @@ void free_memory_on_node(int rank); void display_memory_consumption(int rank, double time); int get_block_mpi_node(int z); -unsigned get_block_size(int z); -unsigned get_bind_tasks(void); +size_t get_block_size(int z); +size_t get_bind_tasks(void); -unsigned get_nbz(void); -unsigned get_niter(void); -unsigned get_ticks(void); +size_t get_nbz(void); +size_t get_niter(void); +size_t get_ticks(void); unsigned global_workerid(unsigned local_workerid); -void create_task_memset(unsigned sizex, unsigned sizey, unsigned z); -void create_task_initlayer(unsigned sizex, unsigned sizey, unsigned z); -void create_task_update(unsigned iter, unsigned z, int local_rank); -void create_task_save(unsigned iter, unsigned z, int dir, int local_rank); +void create_task_memset(size_t sizex, size_t sizey, size_t z); +void create_task_initlayer(size_t sizex, size_t sizey, size_t z); +void create_task_update(size_t iter, size_t z, int local_rank); +void create_task_save(size_t iter, size_t z, int dir, int local_rank); extern int starpu_mpi_initialize(void); extern int starpu_mpi_shutdown(void); @@ -126,9 +126,9 @@ extern struct starpu_codelet save_cl_top; extern struct starpu_codelet cl_memset; extern struct starpu_codelet cl_initlayer; -extern unsigned update_per_worker[STARPU_NMAXWORKERS]; -extern unsigned top_per_worker[STARPU_NMAXWORKERS]; -extern unsigned bottom_per_worker[STARPU_NMAXWORKERS]; +extern size_t update_per_worker[STARPU_NMAXWORKERS]; +extern size_t top_per_worker[STARPU_NMAXWORKERS]; +extern size_t bottom_per_worker[STARPU_NMAXWORKERS]; extern double start; extern int who_runs_what_len; diff --git a/examples/subgraphs/codelets.c b/examples/subgraphs/codelets.c index abbf5d02d9..ad39d6030a 100644 --- a/examples/subgraphs/codelets.c +++ b/examples/subgraphs/codelets.c @@ -20,13 +20,13 @@ void matrix_fill(void *buffers[], void *cl_arg) { - unsigned i, j; + size_t i, j; (void)cl_arg; /* length of the matrix */ - unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); - unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); - unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); + size_t nx = STARPU_MATRIX_GET_NX(buffers[0]); + size_t ny = STARPU_MATRIX_GET_NY(buffers[0]); + size_t ld = STARPU_MATRIX_GET_LD(buffers[0]); int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); for(j=0; j #include - - - __global__ void gpuMultKernel ( - uint32_t nxC, uint32_t nyC, uint32_t nyA, - uint32_t ldA, uint32_t ldB, uint32_t ldC, - float * subA, float * subB, float * subC + size_t nxC, size_t nyC, size_t nyA, + size_t ldA, size_t ldB, size_t ldC, + float * subA, float * subB, float * subC ) { - uint32_t id, i, j, k; + size_t id, i, j, k; float sum; id = blockIdx.x * blockDim.x + threadIdx.x; @@ -46,20 +43,16 @@ __global__ void gpuMultKernel } subC[i + j*ldC] = sum; - } - - #define THREADS_PER_BLOCK 64 extern "C" void gpu_mult(void * descr[], void * args) { - float * d_subA, * d_subB, * d_subC; - uint32_t nxC, nyC, nyA; - uint32_t ldA, ldB, ldC; - uint32_t nblocks; + size_t nxC, nyC, nyA; + size_t ldA, ldB, ldC; + size_t nblocks; d_subA = (float *) STARPU_MATRIX_GET_PTR(descr[0]); d_subB = (float *) STARPU_MATRIX_GET_PTR(descr[1]); @@ -82,5 +75,4 @@ extern "C" void gpu_mult(void * descr[], void * args) if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); - } diff --git a/julia/examples/old_examples/mandelbrot/cpu_mandelbrot.c b/julia/examples/old_examples/mandelbrot/cpu_mandelbrot.c index 4fd2c2f7e7..2d3626b2ce 100644 --- a/julia/examples/old_examples/mandelbrot/cpu_mandelbrot.c +++ b/julia/examples/old_examples/mandelbrot/cpu_mandelbrot.c @@ -31,18 +31,17 @@ struct Params void cpu_mandelbrot(void *descr[], void *cl_arg) { - struct Params *params = cl_arg; int *subP; - uint32_t nxP, nyP; - uint32_t ldP; + size_t nxP, nyP; + size_t ldP; subP = (int *)STARPU_MATRIX_GET_PTR(descr[0]); nxP = STARPU_MATRIX_GET_NX(descr[0]); nyP = STARPU_MATRIX_GET_NY(descr[0]); - + ldP = STARPU_MATRIX_GET_LD(descr[0]); float centerr = params->cr; @@ -69,7 +68,7 @@ void cpu_mandelbrot(void *descr[], void *cl_arg) float zr = cr; float zi = ci; float m = zr * zr + zi * zi; - + for (n = 0; n <= max_iter && m < conv_limit * conv_limit; n++) { float tmp = zr*zr - zi*zi + cr; diff --git a/julia/examples/old_examples/mandelbrot/cpu_mandelbrot_between.c b/julia/examples/old_examples/mandelbrot/cpu_mandelbrot_between.c index 4168dc166b..1a250da038 100644 --- a/julia/examples/old_examples/mandelbrot/cpu_mandelbrot_between.c +++ b/julia/examples/old_examples/mandelbrot/cpu_mandelbrot_between.c @@ -44,22 +44,22 @@ static inline long long jlstarpu_interval_size(long long start, long long step, void mandelbrot(void** buffers_86BwRM71, void* cl_arg_86BwRM71) { - uint32_t ld_o2BQqRir = (uint32_t) (STARPU_MATRIX_GET_LD(buffers_86BwRM71[(1) - (1)])); + size_t ld_o2BQqRir = STARPU_MATRIX_GET_LD(buffers_86BwRM71[(1) - (1)]); int64_t* ptr_o2BQqRir = (int64_t*) (STARPU_MATRIX_GET_PTR(buffers_86BwRM71[(1) - (1)])); - + //ARRAY PAR double* ptr_Ul4Ys0Mt = (double*) (STARPU_VECTOR_GET_PTR(buffers_86BwRM71[(2) - (1)])); int64_t* ptr_cE3zj60d = (int64_t*) (STARPU_VECTOR_GET_PTR(buffers_86BwRM71[(3) - (1)])); // - int64_t local_width = (int64_t) (STARPU_MATRIX_GET_NY(buffers_86BwRM71[(1) - (1)])); - int64_t local_height = (int64_t) (STARPU_MATRIX_GET_NX(buffers_86BwRM71[(1) - (1)])); + size_t local_width = STARPU_MATRIX_GET_NY(buffers_86BwRM71[(1) - (1)]); + size_t local_height = STARPU_MATRIX_GET_NX(buffers_86BwRM71[(1) - (1)]); double conv_limit = (double) (2.0); //STRUCT PAR - + /* struct Params *params = cl_arg_86BwRM71; */ - + /* double centerr = params->cr; */ /* double centeri = params->ci; */ @@ -84,7 +84,7 @@ void mandelbrot(void** buffers_86BwRM71, void* cl_arg_86BwRM71) for (x = start_qxJwMzwA ; x <= stop_qxJwMzwA ; x += 1) { - + int64_t start_ekV9GHK1 = (int64_t) (1); int64_t stop_ekV9GHK1 = (int64_t) (local_height); int64_t y; @@ -121,7 +121,7 @@ void mandelbrot(void** buffers_86BwRM71, void* cl_arg_86BwRM71) float m = zr * zr + zi * zi; /* int64_t b1 = (int64_t) (((n) < (max_iterations)) + ((((zr) * (zr)) + ((zi) * (zi))) < ((conv_limit) * (conv_limit)))); */ - + /* while ((b1) >= (2)) */ /* printf("%d\n", max_iterations); */ @@ -135,7 +135,7 @@ void mandelbrot(void** buffers_86BwRM71, void* cl_arg_86BwRM71) /* b1 = ((n) <= (max_iterations)) + ((((zr) * (zr)) + ((zi) * (zi))) <= ((conv_limit) * (conv_limit))); */ } ; - + /* printf("n: %d\n max_iter: %d\n", n, max_iterations); */ if ((n) < (max_iterations)) { @@ -150,5 +150,3 @@ void mandelbrot(void** buffers_86BwRM71, void* cl_arg_86BwRM71) } ; } - - diff --git a/julia/examples/old_examples/mandelbrot/gpu_mandelbrot.cu b/julia/examples/old_examples/mandelbrot/gpu_mandelbrot.cu index 201b2e2c3e..5df0b23277 100644 --- a/julia/examples/old_examples/mandelbrot/gpu_mandelbrot.cu +++ b/julia/examples/old_examples/mandelbrot/gpu_mandelbrot.cu @@ -91,8 +91,8 @@ __global__ void gpuMandelbrotKernel extern "C" void gpu_mandelbrot(void *descr[], void *args) { int *d_subP; - uint32_t nxP, nyP; - uint32_t ldP; + size_t nxP, nyP; + size_t ldP; uint32_t nblocks; struct Params *params = (struct Params *) args; diff --git a/julia/examples/old_examples/mandelbrot/gpu_mandelbrot_between.cu b/julia/examples/old_examples/mandelbrot/gpu_mandelbrot_between.cu index 4394c332df..b4e23bfcc4 100644 --- a/julia/examples/old_examples/mandelbrot/gpu_mandelbrot_between.cu +++ b/julia/examples/old_examples/mandelbrot/gpu_mandelbrot_between.cu @@ -50,13 +50,13 @@ __device__ static inline long long jlstarpu_interval_size__device(long long star } -__global__ void mandelbrot(int64_t kernel_ids__start_1, int64_t kernel_ids__step_1, int64_t kernel_ids__dim_1, int64_t kernel_ids__start_2, - int64_t kernel_ids__step_2, int64_t kernel_ids__dim_2, double* ptr_hF6lCYyJ, int64_t local_width, - int64_t* ptr_qoUGBRtY, int64_t local_height, double conv_limit, int64_t* ptr_A5zD9sJZ, +__global__ void mandelbrot(int64_t kernel_ids__start_1, int64_t kernel_ids__step_1, int64_t kernel_ids__dim_1, int64_t kernel_ids__start_2, + int64_t kernel_ids__step_2, int64_t kernel_ids__dim_2, double* ptr_hF6lCYyJ, int64_t local_width, + int64_t* ptr_qoUGBRtY, int64_t local_height, double conv_limit, int64_t* ptr_A5zD9sJZ, uint32_t ld_A5zD9sJZ) { int64_t THREAD_ID = (int64_t) ((((blockIdx).x) * ((blockDim).x)) + ((threadIdx).x)); - + if ((THREAD_ID) >= (((1) * (kernel_ids__dim_2)) * (kernel_ids__dim_1))) { return ; @@ -75,7 +75,7 @@ __global__ void mandelbrot(int64_t kernel_ids__start_1, int64_t kernel_ids__step double zi = (double) (ci); int64_t n = (int64_t) (0); int64_t b1 = (int64_t) (((n) < (max_iterations)) + ((((zr) * (zr)) + ((zi) * (zi))) < ((conv_limit) * (conv_limit)))); - + while ((b1) >= (2)) { double tmp = (double) ((((zr) * (zr)) - ((zi) * (zi))) + (cr)); @@ -85,7 +85,7 @@ __global__ void mandelbrot(int64_t kernel_ids__start_1, int64_t kernel_ids__step b1 = ((n) <= (max_iterations)) + ((((zr) * (zr)) + ((zi) * (zi))) <= ((conv_limit) * (conv_limit))); } ; - + if ((n) < (max_iterations)) { ptr_A5zD9sJZ[((y) + (((x) - (1)) * (ld_A5zD9sJZ))) - (1)] = ((255) * (n)) / (max_iterations); @@ -100,12 +100,12 @@ __global__ void mandelbrot(int64_t kernel_ids__start_1, int64_t kernel_ids__step extern "C" void CUDA_mandelbrot(void** buffers_uwrYFDVe, void* cl_arg_uwrYFDVe) { - uint32_t ld_A5zD9sJZ = (uint32_t) (STARPU_MATRIX_GET_LD(buffers_uwrYFDVe[(1) - (1)])); + size_t ld_A5zD9sJZ = STARPU_MATRIX_GET_LD(buffers_uwrYFDVe[(1) - (1)]); int64_t* ptr_A5zD9sJZ = (int64_t*) (STARPU_MATRIX_GET_PTR(buffers_uwrYFDVe[(1) - (1)])); double* ptr_hF6lCYyJ = (double*) (STARPU_VECTOR_GET_PTR(buffers_uwrYFDVe[(2) - (1)])); int64_t* ptr_qoUGBRtY = (int64_t*) (STARPU_VECTOR_GET_PTR(buffers_uwrYFDVe[(3) - (1)])); - int64_t local_width = (int64_t) (STARPU_MATRIX_GET_NY(buffers_uwrYFDVe[(1) - (1)])); - int64_t local_height = (int64_t) (STARPU_MATRIX_GET_NX(buffers_uwrYFDVe[(1) - (1)])); + size_t local_width = STARPU_MATRIX_GET_NY(buffers_uwrYFDVe[(1) - (1)]); + size_t local_height = STARPU_MATRIX_GET_NX(buffers_uwrYFDVe[(1) - (1)]); double conv_limit = (double) (2.0); int64_t kernel_ids__start_1 = (int64_t) (1); int64_t kernel_ids__step_1 = (int64_t) (1); @@ -115,17 +115,15 @@ extern "C" void CUDA_mandelbrot(void** buffers_uwrYFDVe, void* cl_arg_uwrYFDVe) int64_t kernel_ids__dim_2 = (int64_t) (jlstarpu_interval_size(kernel_ids__start_2, kernel_ids__step_2, local_height)); int64_t nthreads = (int64_t) (((1) * (kernel_ids__dim_1)) * (kernel_ids__dim_2)); int64_t nblocks = (int64_t) ((((nthreads) + (THREADS_PER_BLOCK)) - (1)) / (THREADS_PER_BLOCK)); - + mandelbrot <<< nblocks, THREADS_PER_BLOCK, 0, starpu_cuda_get_local_stream() - >>> (kernel_ids__start_1, kernel_ids__step_1, kernel_ids__dim_1, kernel_ids__start_2, - kernel_ids__step_2, kernel_ids__dim_2, ptr_hF6lCYyJ, local_width, - ptr_qoUGBRtY, local_height, conv_limit, ptr_A5zD9sJZ, + >>> (kernel_ids__start_1, kernel_ids__step_1, kernel_ids__dim_1, kernel_ids__start_2, + kernel_ids__step_2, kernel_ids__dim_2, ptr_hF6lCYyJ, local_width, + ptr_qoUGBRtY, local_height, conv_limit, ptr_A5zD9sJZ, ld_A5zD9sJZ); ; cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } - - diff --git a/julia/examples/old_examples/mult/cpu_mult.c b/julia/examples/old_examples/mult/cpu_mult.c index 7fa06c5050..4402874ebc 100644 --- a/julia/examples/old_examples/mult/cpu_mult.c +++ b/julia/examples/old_examples/mult/cpu_mult.c @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2020 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2019 Mael Keryell * * StarPU is free software; you can redistribute it and/or modify @@ -28,8 +28,8 @@ void cpu_mult(void *descr[], void *arg) { (void)arg; float *subA, *subB, *subC; - uint32_t nxC, nyC, nyA; - uint32_t ldA, ldB, ldC; + size_t nxC, nyC, nyA; + size_t ldA, ldB, ldC; /* .blas.ptr gives a pointer to the first element of the local copy */ subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); diff --git a/julia/examples/old_examples/mult/gpu_mult.cu b/julia/examples/old_examples/mult/gpu_mult.cu index 64fb80c5ba..530e4f640a 100644 --- a/julia/examples/old_examples/mult/gpu_mult.cu +++ b/julia/examples/old_examples/mult/gpu_mult.cu @@ -1,6 +1,6 @@ /* StarPU --- Runtime system for heterogeneous multicore architectures. * - * Copyright (C) 2020 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2020-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2019 Mael Keryell * * StarPU is free software; you can redistribute it and/or modify @@ -57,8 +57,8 @@ extern "C" void gpu_mult(void * descr[], void * args) { float * d_subA, * d_subB, * d_subC; - uint32_t nxC, nyC, nyA; - uint32_t ldA, ldB, ldC; + size_t nxC, nyC, nyA; + size_t ldA, ldB, ldC; uint32_t nblocks; d_subA = (float *) STARPU_MATRIX_GET_PTR(descr[0]); diff --git a/julia/examples/old_examples/nbody/cpu_nbody.c b/julia/examples/old_examples/nbody/cpu_nbody.c index db6b5b0328..ad77bd3aec 100644 --- a/julia/examples/old_examples/nbody/cpu_nbody.c +++ b/julia/examples/old_examples/nbody/cpu_nbody.c @@ -30,8 +30,8 @@ void cpu_nbody(void *descr[], void *arg) double *subA; double *M; - uint32_t nxP, nxA, nxM; - uint32_t ldP, ldA, ldM; + size_t nxP, nxA, nxM; + size_t ldP, ldA, ldM; P = (double *)STARPU_MATRIX_GET_PTR(descr[0]); subA = (double *)STARPU_MATRIX_GET_PTR(descr[1]); @@ -49,16 +49,16 @@ void cpu_nbody(void *descr[], void *arg) unsigned id = nxA * params->taskx; - uint32_t i,j; - + size_t i,j; + for (i = 0; i < nxA; i++){ double sumaccx = 0; double sumaccy = 0; - + for (j = 0; j < nxP; j++){ - + if (j != i + id){ - + double dx = P[j] - P[i + id]; double dy = P[j + ldP] - P[i + id + ldP]; @@ -80,8 +80,8 @@ void cpu_nbody2(void *descr[], void *arg) double *subV; double *subA; - uint32_t nxP, nxV, nxA; - uint32_t ldP, ldV, ldA; + size_t nxP, nxV, nxA; + size_t ldP, ldV, ldA; subP = (double *)STARPU_MATRIX_GET_PTR(descr[0]); subV = (double *)STARPU_MATRIX_GET_PTR(descr[1]); @@ -90,16 +90,16 @@ void cpu_nbody2(void *descr[], void *arg) nxP = STARPU_MATRIX_GET_NX(descr[0]); nxV = STARPU_MATRIX_GET_NX(descr[1]); nxA = STARPU_MATRIX_GET_NX(descr[2]); - + ldP = STARPU_MATRIX_GET_LD(descr[0]); ldV = STARPU_MATRIX_GET_LD(descr[1]); ldA = STARPU_MATRIX_GET_LD(descr[2]); - - + + unsigned i,dt; dt = 3600; for (i = 0; i < nxP; i++){ - + subV[i] = subV[i] + dt*subA[i]; subV[i + ldV] = subV[i + ldV] + dt*subA[i + ldA]; @@ -107,4 +107,3 @@ void cpu_nbody2(void *descr[], void *arg) subP[i + ldP] = subP[i + ldP] + dt*subV[i + ldV]; } } - diff --git a/julia/examples/old_examples/nbody/gpu_nbody.cu b/julia/examples/old_examples/nbody/gpu_nbody.cu index 85186b1854..8b1381fc49 100644 --- a/julia/examples/old_examples/nbody/gpu_nbody.cu +++ b/julia/examples/old_examples/nbody/gpu_nbody.cu @@ -26,11 +26,11 @@ struct Params }; __global__ void gpuNbodyKernel(double *P, double *subA, double *M, - uint32_t nxP, uint32_t nxA, uint32_t nxM, - uint32_t ldP, uint32_t ldA, + size_t nxP, size_t nxA, size_t nxM, + size_t ldP, size_t ldA, struct Params params) { - uint32_t id, i, j, k; + size_t id, i, j, k; double dx, dy, modul; id = blockIdx.x * blockDim.x + threadIdx.x; @@ -72,9 +72,9 @@ extern "C" void gpu_nbody(void * descr[], void * args) { double *d_P, *d_subA, *d_M; - uint32_t nxP, nxA, nxM; - uint32_t ldA, ldP; - uint32_t nblocks; + size_t nxP, nxA, nxM; + size_t ldA, ldP; + size_t nblocks; struct Params *params = (struct Params *) args; @@ -108,12 +108,12 @@ extern "C" void gpu_nbody(void * descr[], void * args) __global__ void gpuNbody2Kernel(double *d_subP, double *d_subV, double *d_subA, - uint32_t nxP, uint32_t nxV, uint32_t nxA, - uint32_t ldP, uint32_t ldV, uint32_t ldA, + size_t nxP, size_t nxV, size_t nxA, + size_t ldP, size_t ldV, size_t ldA, struct Params params) { - uint32_t id, i, j; + size_t id, i, j; id = blockIdx.x * blockDim.x + threadIdx.x; @@ -135,9 +135,9 @@ __global__ void gpuNbody2Kernel(double *d_subP, double *d_subV, double *d_subA, extern "C" void gpu_nbody2(void * descr[], void *args) { double *d_subP, *d_subV, *d_subA; - uint32_t nxP, nxV, nxA; - uint32_t ldP, ldV, ldA; - uint32_t nblocks; + size_t nxP, nxV, nxA; + size_t ldP, ldV, ldA; + size_t nblocks; struct Params *params = (struct Params *) args; diff --git a/julia/examples/vector_scal/cpu_vector_scal.c b/julia/examples/vector_scal/cpu_vector_scal.c index c164aa9078..4d7b35cc5a 100644 --- a/julia/examples/vector_scal/cpu_vector_scal.c +++ b/julia/examples/vector_scal/cpu_vector_scal.c @@ -35,7 +35,7 @@ float cpu_vector_scal(void *buffers[], void *cl_arg) struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; /* length of the vector */ - unsigned n = STARPU_VECTOR_GET_NX(vector); + size_t n = STARPU_VECTOR_GET_NX(vector); /* get a pointer to the local copy of the vector : note that we have to * cast it in (float *) since a vector could contain any type of diff --git a/julia/src/blas_wrapper.c b/julia/src/blas_wrapper.c index 3c3631dee5..0b3f9662d7 100644 --- a/julia/src/blas_wrapper.c +++ b/julia/src/blas_wrapper.c @@ -21,7 +21,7 @@ void julia_saxpy_cpu_codelet(void *descr[], void *arg) { float alpha = *((float *)arg); - unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); float *block_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); float *block_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); @@ -38,7 +38,7 @@ void julia_saxpy_cuda_codelet(void *descr[], void *arg) { float alpha = *((float *)arg); - unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); float *block_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); float *block_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); diff --git a/mpi/examples/benchs/abstract_sendrecv_bench.c b/mpi/examples/benchs/abstract_sendrecv_bench.c index 9b3ebe07f0..85f3f8e5b4 100644 --- a/mpi/examples/benchs/abstract_sendrecv_bench.c +++ b/mpi/examples/benchs/abstract_sendrecv_bench.c @@ -27,7 +27,7 @@ static void cuda_memset_codelet(void *descr[], void *arg) (void)arg; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); - unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + size_t length = STARPU_VECTOR_GET_NX(descr[0]); cudaMemsetAsync(buf, 0, length, starpu_cuda_get_local_stream()); } @@ -38,7 +38,7 @@ static void hip_memset_codelet(void *descr[], void *arg) (void)arg; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); - unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + size_t length = STARPU_VECTOR_GET_NX(descr[0]); hipMemsetAsync(buf, 0, length, starpu_hip_get_local_stream()); } @@ -49,7 +49,7 @@ void cpu_memset_codelet(void *descr[], void *arg) (void)arg; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); - unsigned length = STARPU_VECTOR_GET_NX(descr[0]); + size_t length = STARPU_VECTOR_GET_NX(descr[0]); memset(buf, 0, length * sizeof(*buf)); } diff --git a/mpi/examples/benchs/gemm_helper.c b/mpi/examples/benchs/gemm_helper.c index dddc45a5c6..e9b86fef27 100644 --- a/mpi/examples/benchs/gemm_helper.c +++ b/mpi/examples/benchs/gemm_helper.c @@ -97,9 +97,9 @@ static void cpu_init_matrix_random(void *descr[], void *arg) (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned nx = STARPU_MATRIX_GET_NX(descr[0]); - unsigned ny = STARPU_MATRIX_GET_NY(descr[0]); - unsigned i = 0; + size_t nx = STARPU_MATRIX_GET_NX(descr[0]); + size_t ny = STARPU_MATRIX_GET_NY(descr[0]); + size_t i; for (i = 0; i < nx *ny; i++) { @@ -113,9 +113,9 @@ static void cpu_init_matrix_zero(void *descr[], void *arg) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); - unsigned nx = STARPU_MATRIX_GET_NX(descr[0]); - unsigned ny = STARPU_MATRIX_GET_NY(descr[0]); - unsigned i = 0; + size_t nx = STARPU_MATRIX_GET_NX(descr[0]); + size_t ny = STARPU_MATRIX_GET_NY(descr[0]); + size_t i; for (i = 0; i < nx *ny; i++) { @@ -131,13 +131,13 @@ static void cpu_mult(void *descr[], void *arg) TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); - unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); - unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); - unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); + size_t nxC = STARPU_MATRIX_GET_NX(descr[2]); + size_t nyC = STARPU_MATRIX_GET_NY(descr[2]); + size_t nyA = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); + size_t ldA = STARPU_MATRIX_GET_LD(descr[0]); + size_t ldB = STARPU_MATRIX_GET_LD(descr[1]); + size_t ldC = STARPU_MATRIX_GET_LD(descr[2]); int worker_size = starpu_combined_worker_get_size(); diff --git a/mpi/examples/filters/filter.c b/mpi/examples/filters/filter.c index 94c49d16fe..0357c050be 100644 --- a/mpi/examples/filters/filter.c +++ b/mpi/examples/filters/filter.c @@ -26,13 +26,13 @@ void cpu_func(void *buffers[], void *cl_arg) { - unsigned i; + size_t i; int factor; int rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "computing on rank %d\n", rank); - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); starpu_codelet_unpack_args(cl_arg, &factor); @@ -54,12 +54,12 @@ void vector_filter(void *parent_interface, void *child_interface, struct starpu_ struct starpu_vector_interface *vector_parent = (struct starpu_vector_interface *) parent_interface; struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; - uint32_t nx = vector_parent->nx; + size_t nx = vector_parent->nx; size_t elemsize = vector_parent->elemsize; - STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); + STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %zu elements", nchunks, nx); STARPU_ASSERT(nchunks == 2); - STARPU_ASSERT_MSG((nx % nchunks) == 0, "nx=%u is not a multiple of nchunks %u\n", nx, nchunks); + STARPU_ASSERT_MSG((nx % nchunks) == 0, "nx=%zu is not a multiple of nchunks %u\n", nx, nchunks); vector_child->id = vector_parent->id; vector_child->nx = nx/2; diff --git a/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c b/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c index 2d209ee202..f59fdde5ce 100644 --- a/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c +++ b/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c @@ -44,29 +44,29 @@ static inline void chol_common_cpu_codelet_update_gemm(void *descr[], int s, voi float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); - unsigned dx = STARPU_MATRIX_GET_NY(descr[2]); - unsigned dy = STARPU_MATRIX_GET_NX(descr[2]); - unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); + size_t dx = STARPU_MATRIX_GET_NY(descr[2]); + size_t dy = STARPU_MATRIX_GET_NX(descr[2]); + size_t dz = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); - unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); + size_t ld21 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld12 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld22 = STARPU_MATRIX_GET_LD(descr[2]); switch (s) { case 0: /* CPU kernel */ STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, - right, ld12, 1.0f, center, ld22); + right, ld12, 1.0f, center, ld22); break; #ifdef STARPU_USE_CUDA case 1: { /* CUDA kernel */ cublasStatus_t status = cublasSgemm(starpu_cublas_get_local_handle(), - CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz, - &m1, left, ld21, right, ld12, - &p1, center, ld22); + CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz, + &m1, left, ld21, right, ld12, + &p1, center, ld22); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); @@ -162,11 +162,11 @@ static inline void chol_common_codelet_update_trsm(void *descr[], int s, void *_ sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); - unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); - unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); + size_t ld11 = STARPU_MATRIX_GET_LD(descr[0]); + size_t ld21 = STARPU_MATRIX_GET_LD(descr[1]); - unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]); - unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]); + size_t nx21 = STARPU_MATRIX_GET_NY(descr[1]); + size_t ny21 = STARPU_MATRIX_GET_NX(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; @@ -180,8 +180,8 @@ static inline void chol_common_codelet_update_trsm(void *descr[], int s, void *_ #ifdef STARPU_USE_CUDA case 1: status = cublasStrsm(starpu_cublas_get_local_handle(), - CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT, - nx21, ny21, &p1, sub11, ld11, sub21, ld21); + CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT, + nx21, ny21, &p1, sub11, ld11, sub21, ld21); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; @@ -216,10 +216,10 @@ static inline void chol_common_codelet_update_potrf(void *descr[], int s, void * sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); - unsigned nx = STARPU_MATRIX_GET_NY(descr[0]); - unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); + size_t nx = STARPU_MATRIX_GET_NY(descr[0]); + size_t ld = STARPU_MATRIX_GET_LD(descr[0]); - unsigned z; + size_t z; switch (s) { diff --git a/mpi/examples/matrix_mult/mm.c b/mpi/examples/matrix_mult/mm.c index e9f9a10d06..e152664dd4 100644 --- a/mpi/examples/matrix_mult/mm.c +++ b/mpi/examples/matrix_mult/mm.c @@ -246,17 +246,17 @@ static void cpu_mult(void *handles[], void *arg) double *block_B = (double *)STARPU_MATRIX_GET_PTR(handles[1]); double *block_C = (double *)STARPU_MATRIX_GET_PTR(handles[2]); - unsigned n_col_A = STARPU_MATRIX_GET_NX(handles[0]); - unsigned n_col_B = STARPU_MATRIX_GET_NX(handles[1]); - unsigned n_col_C = STARPU_MATRIX_GET_NX(handles[2]); + size_t n_col_A = STARPU_MATRIX_GET_NX(handles[0]); + size_t n_col_B = STARPU_MATRIX_GET_NX(handles[1]); + size_t n_col_C = STARPU_MATRIX_GET_NX(handles[2]); - unsigned n_row_A = STARPU_MATRIX_GET_NY(handles[0]); - unsigned n_row_B = STARPU_MATRIX_GET_NY(handles[1]); - unsigned n_row_C = STARPU_MATRIX_GET_NY(handles[2]); + size_t n_row_A = STARPU_MATRIX_GET_NY(handles[0]); + size_t n_row_B = STARPU_MATRIX_GET_NY(handles[1]); + size_t n_row_C = STARPU_MATRIX_GET_NY(handles[2]); - unsigned ld_A = STARPU_MATRIX_GET_LD(handles[0]); - unsigned ld_B = STARPU_MATRIX_GET_LD(handles[1]); - unsigned ld_C = STARPU_MATRIX_GET_LD(handles[2]); + size_t ld_A = STARPU_MATRIX_GET_LD(handles[0]); + size_t ld_B = STARPU_MATRIX_GET_LD(handles[1]); + size_t ld_C = STARPU_MATRIX_GET_LD(handles[2]); /* Sanity check, not needed in real life case */ assert(n_col_C == n_col_B); diff --git a/mpi/examples/matrix_mult/mm_2dbc.c b/mpi/examples/matrix_mult/mm_2dbc.c index 7110a9f6e9..828c8ca6b8 100644 --- a/mpi/examples/matrix_mult/mm_2dbc.c +++ b/mpi/examples/matrix_mult/mm_2dbc.c @@ -192,13 +192,13 @@ static void cpu_mult(void *handles[], void *arg) double *block_B = (double *)STARPU_MATRIX_GET_PTR(handles[1]); double *block_C = (double *)STARPU_MATRIX_GET_PTR(handles[2]); - unsigned n_col_A = STARPU_MATRIX_GET_NX(handles[0]); - unsigned n_col_C = STARPU_MATRIX_GET_NX(handles[2]); - unsigned n_row_C = STARPU_MATRIX_GET_NY(handles[2]); + size_t n_col_A = STARPU_MATRIX_GET_NX(handles[0]); + size_t n_col_C = STARPU_MATRIX_GET_NX(handles[2]); + size_t n_row_C = STARPU_MATRIX_GET_NY(handles[2]); - unsigned ld_A = STARPU_MATRIX_GET_LD(handles[0]); - unsigned ld_B = STARPU_MATRIX_GET_LD(handles[1]); - unsigned ld_C = STARPU_MATRIX_GET_LD(handles[2]); + size_t ld_A = STARPU_MATRIX_GET_LD(handles[0]); + size_t ld_B = STARPU_MATRIX_GET_LD(handles[1]); + size_t ld_C = STARPU_MATRIX_GET_LD(handles[2]); if (VERBOSE) printf("gemm_task\n"); STARPU_DGEMM("N", "N", n_row_C,n_col_C,n_col_A, @@ -211,10 +211,10 @@ static void cpu_fill(void *handles[], void *arg) (void)arg; double *block_A = (double *)STARPU_MATRIX_GET_PTR(handles[0]); - unsigned n_col_A = STARPU_MATRIX_GET_NX(handles[0]); - unsigned n_row_A = STARPU_MATRIX_GET_NY(handles[0]); + size_t n_col_A = STARPU_MATRIX_GET_NX(handles[0]); + size_t n_row_A = STARPU_MATRIX_GET_NY(handles[0]); - unsigned i,j; + size_t i,j; if (VERBOSE) printf("fill_task\n"); for (i=0;i end_line) block_end = end_line; + if (*block_count > 0 && (begin == (size_t)block_lengths[*block_count - 1]+displacements[*block_count-1]) && (block_lengths[*block_count - 1] + block_end-begin < block_size)) + { + // the new block is directly after the previous one and both do not exceed maximum size, merge them + block_lengths[*block_count - 1] += (block_end-begin); + _STARPU_MPI_DEBUG(1200, " updating previous block %d with length %u from %zu to %zu with displacement %zu\n", *block_count-1, block_lengths[*block_count-1], displacements[*block_count-1], block_end, displacements[*block_count]); + } + else + { + block_lengths[*block_count] = (block_end-begin); + block_types[*block_count] = MPI_BYTE; + displacements[*block_count] = begin; + _STARPU_MPI_DEBUG(1200, " creating block %d with length %u from %zu to %zu with displacement %zu\n", *block_count, block_lengths[*block_count], begin, block_end, displacements[*block_count]); + *block_count = *block_count + 1; + } + STARPU_ASSERT_MSG(*block_count < nb_blocks, "MPI Datatype creation failed"); + begin = block_end; + } while(begin < end_line); +} + +static void _make_recursive_datatype(size_t *layers, size_t *steps, int nb_dims, int current_dim, size_t elemsize, size_t block_size, size_t begin, int *block_lengths, MPI_Datatype *block_types, MPI_Aint *displacements, int *block_count, int nb_blocks) +{ + size_t i; + for(i=0 ; i 1) - { - MPI_Datatype datatype_ndlayer; - ret = MPI_Type_vector(nn[1], nn[0]*elemsize, ldn[1]*elemsize, MPI_BYTE, &datatype_ndlayer); - STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed"); - - MPI_Datatype oldtype = datatype_ndlayer, newtype; - unsigned i; - for (i = 2; i < ndim; i++) - { - ret = MPI_Type_create_hvector(nn[i], 1, ldn[i]*elemsize, oldtype, &newtype); - STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed"); - - ret = MPI_Type_free(&oldtype); - STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); - - oldtype = newtype; - } - *datatype = oldtype; - } - else if (ndim == 1) - { - ret = MPI_Type_contiguous(nn[0]*elemsize, MPI_BYTE, datatype); - STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); - } - - ret = MPI_Type_commit(datatype); - STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); + _make_datatype(datatype, nn, ldn, ndim, elemsize); return 0; } @@ -192,16 +230,21 @@ static int handle_to_datatype_vector(starpu_data_handle_t data_handle, unsigned { struct starpu_vector_interface *vector_interface = starpu_data_get_interface_on_node(data_handle, node); - int ret; - - unsigned nx = STARPU_VECTOR_GET_NX(vector_interface); + size_t nx = STARPU_VECTOR_GET_NX(vector_interface); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(vector_interface); - ret = MPI_Type_contiguous(nx*elemsize, MPI_BYTE, datatype); - STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); - +#ifdef STARPU_HAVE_MPI_TYPE_VECTOR_C + int ret; + _STARPU_MPI_DEBUG(1200, "creating datatype for vector using MPI_Type_vector_c\n"); + ret = MPI_Type_vector_c(1, nx*elemsize, 0, MPI_BYTE, datatype); + STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector_c failed"); ret = MPI_Type_commit(datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); +#else + _STARPU_MPI_DEBUG(1200, "creating datatype for vector using MPI_Type_create_struct\n"); + size_t layers[1] = {nx}; + _make_datatype(datatype, layers, NULL, 1, elemsize); +#endif return 0; } @@ -341,7 +384,7 @@ void _starpu_mpi_datatype_allocate(starpu_data_handle_t data_handle, struct _sta req->registered_datatype = 0; } } -#ifdef STARPU_VERBOSE +#ifdef STARPU_MPI_VERBOSE { char datatype_name[MPI_MAX_OBJECT_NAME]; int datatype_name_len; diff --git a/mpi/tests/Makefile.am b/mpi/tests/Makefile.am index 4b3abb71a3..44f64097ca 100644 --- a/mpi/tests/Makefile.am +++ b/mpi/tests/Makefile.am @@ -79,7 +79,8 @@ starpu_mpi_TESTS += \ stats \ user_defined_datatype \ wait_for_all \ - pack + pack \ + large_set if !STARPU_SIMGRID starpu_mpi_TESTS += \ @@ -172,6 +173,7 @@ endif noinst_PROGRAMS += \ datatypes \ + large_set \ pingpong \ mpi_test \ mpi_isend \ diff --git a/mpi/tests/coop_chained_sends.c b/mpi/tests/coop_chained_sends.c index db60b5c8e6..aba9d1c787 100644 --- a/mpi/tests/coop_chained_sends.c +++ b/mpi/tests/coop_chained_sends.c @@ -21,9 +21,9 @@ void scal_cpu_func(void *buffers[], void *cl_arg) { - unsigned i; + size_t i; struct starpu_vector_interface *vector = buffers[0]; - unsigned n = STARPU_VECTOR_GET_NX(vector); + size_t n = STARPU_VECTOR_GET_NX(vector); float *val = (float *) STARPU_VECTOR_GET_PTR(vector); /* scale the vector */ diff --git a/mpi/tests/coop_large.c b/mpi/tests/coop_large.c index 4ebb01be52..113978b716 100644 --- a/mpi/tests/coop_large.c +++ b/mpi/tests/coop_large.c @@ -21,10 +21,10 @@ void scal_cpu_func(void *buffers[], void *cl_arg) { - unsigned i; + size_t i; float factor; struct starpu_vector_interface *vector = buffers[0]; - unsigned n = STARPU_VECTOR_GET_NX(vector); + size_t n = STARPU_VECTOR_GET_NX(vector); float *val = (float *) STARPU_VECTOR_GET_PTR(vector); starpu_codelet_unpack_args(cl_arg, &factor); diff --git a/mpi/tests/early_request.c b/mpi/tests/early_request.c index 9f048c1faf..cefcfe376f 100644 --- a/mpi/tests/early_request.c +++ b/mpi/tests/early_request.c @@ -57,8 +57,8 @@ void fill_tmp_buffer(void *buffers[], void *cl_arg) { (void)cl_arg; int *tmp = (int *) STARPU_VECTOR_GET_PTR(buffers[0]); - int nx = STARPU_VECTOR_GET_NX(buffers[0]); - int i; + size_t nx = STARPU_VECTOR_GET_NX(buffers[0]); + size_t i; for (i=0; i +#include +#include "helper.h" +#include + +static FILE *_f_buffer; +static char _f_buffer_name[40]; +static int check=1, display=1, silent=0; +#define vector 0 +#define matrix 1 +#define block 2 +#define tensor 3 +#define ndim 4 +#define nb_tests 5 +static int tests_data[nb_tests] = {1, 1, 1, 1, 1}; +static int test_small=1, test_large=1; +static int rank_comm, size_comm; + +static void dump() +{ + fclose(_f_buffer); + char *buffer = 0; + long length; + FILE *f = fopen(_f_buffer_name, "rb"); + + assert(f); + if (f) + { + fseek(f, 0, SEEK_END); + length = ftell(f); + if (length) + { + fseek(f, 0, SEEK_SET); + buffer = malloc(length); + assert(fread(buffer, 1, length, f) > 0); + } + fclose(f); + unlink(_f_buffer_name); + } + if (rank_comm != 0) + { + MPI_Send(&length, 1, MPI_LONG, 0, rank_comm, MPI_COMM_WORLD); + MPI_Send(buffer, length, MPI_CHAR, 0, rank_comm, MPI_COMM_WORLD); + } + else + { + int x; + for(x=0 ; x= 0, "Error when creating temp file"); +#endif + _f_buffer = fopen(_f_buffer_name, "w"); +} + +#define FPRINTF_BUFFER_INIT() do { create_file(); } while(0) +#define FPRINTF_BUFFER(fmt, ...) do { if (!silent) fprintf(_f_buffer, fmt, ## __VA_ARGS__); } while(0) +#define FPRINTF_BUFFER_DUMP() do { dump(); } while(0) + +struct type_function +{ + void (*init_func)(void *); + void (*next_func)(void *); + void (*empty_func)(void *); + int (*compare_func)(void *buffer, size_t x, size_t y, size_t z, size_t t, size_t n, size_t ldy, size_t ldz, size_t ldt, size_t ldn, void *value); + void (*print_func)(FILE *f, void *buffer, size_t x, size_t y, size_t z, size_t t, size_t n, size_t ldy, size_t ldz, size_t ldt, size_t ldn); + void (*print_val_func)(FILE *f, void *value); + void (*set_func)(void *buffer, void *value, size_t x, size_t y, size_t z, size_t t, size_t n, size_t ldy, size_t ldz, size_t ldt, size_t ldn); + size_t elem_size; + void *value; + void *empty_value; +}; + +void init_func_char(void *x) +{ + char *_x = (char *)x; + *_x = 'a'; +} + +void next_func_char(void *x) +{ + char *_x = (char *)x; + *_x = *_x + 1; + if (*_x > 'z') + init_func_char(x); +} + +void set_func_char(void *buffer, void *value, size_t x, size_t y, size_t z, size_t t, size_t n, size_t ldy, size_t ldz, size_t ldt, size_t ldn) +{ + char *_buffer = (char *)buffer; + char *_value = (char *)value; + //FPRINTF_BUFFER("setting %zu %zu %zu %zu %zu at pos %zu to %c\n", x, y, z, t, n, n*ldn*ldt*ldz*ldy+t*ldt*ldz*ldy+z*ldz*ldy+y*ldy+x, *_value); + _buffer[n*ldn*ldt*ldz*ldy+t*ldt*ldz*ldy+z*ldz*ldy+y*ldy+x] = *_value; +} + +void empty_func_char(void *x) +{ + char *_x = (char *)x; + *_x = 0; +} + +int compare_func_char(void *buffer, size_t x, size_t y, size_t z, size_t t, size_t n, size_t ldy, size_t ldz, size_t ldt, size_t ldn, void *value) +{ + char *_buffer = (char *)buffer; + char *_value = (char *)value; + return (_buffer[n*ldn*ldt*ldz*ldy+t*ldt*ldz*ldy+z*ldz*ldy+y*ldy+x] == *_value); +} + +void print_func_char(FILE *f, void *buffer, size_t x, size_t y, size_t z, size_t t, size_t n, size_t ldy, size_t ldz, size_t ldt, size_t ldn) +{ + char *_buffer = (char *)buffer; + fprintf(f, "'%c' ", _buffer[n*ldn*ldt*ldz*ldy+t*ldt*ldz*ldy+z*ldz*ldy+y*ldy+x]); +} + +void print_val_func_char(FILE *f, void *value) +{ + char *_value = (char *)value; + fprintf(f, "'%c' ", *_value); +} + +void init_func_int(void *x) +{ + int *_x = (int *)x; + *_x = 1; +} + +void next_func_int(void *x) +{ + int *_x = (int *)x; + *_x = *_x + 1; + if (*_x > 1000) + init_func_int(x); +} + +void set_func_int(void *buffer, void *value, size_t x, size_t y, size_t z, size_t t, size_t n, size_t ldy, size_t ldz, size_t ldt, size_t ldn) +{ + int *_buffer = (int *)buffer; + int *_value = (int *)value; + // FPRINTF_BUFFER("setting %zu %zu %zu at pos %zu to %d\n", x, y, z, t, n*ldn*ldt*ldz*ldy+t*ldt*ldz*ldy+z*ldz*ldy+y*ldy+x, *_value); + _buffer[n*ldn*ldt*ldz*ldy+t*ldt*ldz*ldy+z*ldz*ldy+y*ldy+x] = *_value; +} + +void empty_func_int(void *x) +{ + int *_x = (int *)x; + *_x = 0; +} + +int compare_func_int(void *buffer, size_t x, size_t y, size_t z, size_t t, size_t n, size_t ldy, size_t ldz, size_t ldt, size_t ldn, void *value) +{ + int *_buffer = (int *)buffer; + int *_value = (int *)value; + return (_buffer[n*ldn*ldt*ldz*ldy+t*ldt*ldz*ldy+z*ldz*ldy+y*ldy+x] == *_value); +} + +void print_func_int(FILE *f, void *buffer, size_t x, size_t y, size_t z, size_t t, size_t n, size_t ldy, size_t ldz, size_t ldt, size_t ldn) +{ + int *_buffer = (int *)buffer; + fprintf(f, "%4d ", _buffer[n*ldn*ldt*ldz*ldy+t*ldt*ldz*ldy+z*ldz*ldy+y*ldy+x]); +} + +void print_val_func_int(FILE *f, void *value) +{ + int *_value = (int *)value; + fprintf(f, "'%4d' ", *_value); +} + +struct type_function funcs_int = +{ + .init_func = init_func_int, + .next_func = next_func_int, + .empty_func = empty_func_int, + .compare_func = compare_func_int, + .print_func = print_func_int, + .print_val_func = print_val_func_int, + .set_func = set_func_int, + .elem_size = sizeof(int) +}; + +struct type_function funcs_char = +{ + .init_func = init_func_char, + .next_func = next_func_char, + .empty_func = empty_func_char, + .compare_func = compare_func_char, + .print_func = print_func_char, + .set_func = set_func_char, + .elem_size = sizeof(char) +}; + +void print_buffer(char *buffer, size_t nx, size_t ny, size_t nz, size_t nt, size_t nn, size_t ldy, size_t ldz, size_t ldt, size_t ldn, struct type_function funcs) +{ + if (silent || !display) return; + + size_t n; + for(n=0 ; n 5 && y < ny-5) + { + if (y == 6) + FPRINTF_BUFFER("...\n"); + } + else + { + size_t x; + for(x = 0; x < nx; x++) + { + if (x > 10 && x < nx-10) + { + if (x == 11) + FPRINTF_BUFFER(" ... "); + } + else + { + funcs.print_func(_f_buffer, buffer, x, y, z, t, n, ldy, ldz, ldt, ldn); + } + } + FPRINTF_BUFFER("\n"); + } + } + FPRINTF_BUFFER("\n"); + } + FPRINTF_BUFFER("\n"); + } + } +} + +void init_buffer(char *buffer, size_t nx, size_t ny, size_t nz, size_t nt, size_t nn, size_t ldy, size_t ldz, size_t ldt, size_t ldn, size_t buffer_size, struct type_function funcs) +{ + if (rank_comm == 0) + { + funcs.init_func(funcs.value); + + size_t n; + for(n=0 ; n whole size %zu (INT_MAX %d)\n", data_funcs.data_name, nx, ldy, ny, ldz, nz, ldt, nt, ldn, nn, type_funcs.elem_size, size, INT_MAX); + + buffer = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); + if (buffer == MAP_FAILED) + { + FPRINTF_BUFFER("cannot allocate memory for %zu bytes ...\n", size); + perror("mmap"); + return -1; + } + memset(buffer, 0, size); + type_funcs.value = calloc(1, type_funcs.elem_size); + type_funcs.empty_value = calloc(1, type_funcs.elem_size); + init_buffer(buffer, nx, ny, nz, nt, nn, ldy, ldz, ldt, ldn, size, type_funcs); + + data_funcs.data_register_func(&data_handle, buffer, nx, ny, nz, nt, nn, ldy, ldz, ldt, ldn, type_funcs); + + ret = 0; + if (rank_comm == 0) + { + ret = starpu_mpi_send(data_handle, 1, 42, MPI_COMM_WORLD); + } + else if (rank_comm == 1) + { + ret = starpu_mpi_recv(data_handle, 0, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send/recv"); + + starpu_data_unregister(data_handle); + ret = check_buffer(buffer, nx, ny, nz, nt, nn, ldy, ldz, ldt, ldn, type_funcs); + munmap(buffer, size); + free(type_funcs.value); + free(type_funcs.empty_value); + FPRINTF_BUFFER("Check with %s [nx=%zu(ldy=%zu),ny=%zu(ldz=%zu),nz=%zu(ldt=%zu),nt=%zu(ldn=%zu),nn=%zu] elements of size %zu ... DONE with %s\n", data_funcs.data_name, nx, ldy, ny, ldz, nz, ldt, nt, ldn, nn, type_funcs.elem_size, ret==0?"SUCCESS":"FAILURE"); + return ret; +} + +int main(int argc, char **argv) +{ + int ret; + struct starpu_conf conf; + int mpi_init, i; + + for(i=1 ; i 1) + goto end; + +#ifndef STARPU_HAVE_MPI_TYPE_VECTOR_C + { + // As the function MPI_Type_vector_c is not defined, + // StarPU will use MPI_Type_create_struct. Before + // running the test, let's check if this function can + // properly handle large types + int block_count=3; + int block_lengths[3]={INT_MAX,INT_MAX,12}; + MPI_Aint displacements[3]={0,INT_MAX,INT_MAX*(size_t)2}; + MPI_Datatype block_types[3]={MPI_BYTE,MPI_BYTE,MPI_BYTE}; + MPI_Datatype datatype; + + ret = MPI_Type_create_struct(block_count, block_lengths, displacements, block_types, &datatype); + if (ret == MPI_SUCCESS) + { + MPI_Aint lb, extent; + MPI_Type_get_extent(datatype, &lb, &extent); + ret = (extent-displacements[2]-block_lengths[2] == 0) ? MPI_SUCCESS : MPI_ERR_TYPE; + } + if (ret != MPI_SUCCESS) + { + FPRINTF(stderr, "Function MPI_Type_create_struct fails with large types.\n"); + starpu_mpi_shutdown(); + if (!mpi_init) + MPI_Finalize(); + return rank_comm == 0 ? STARPU_TEST_SKIPPED : 0; + } + MPI_Type_commit(&datatype); + MPI_Type_free(&datatype); + } +#endif + +#if defined(STARPU_MPI_MINIMAL_TESTS) || defined(STARPU_QUICK_CHECK) + ret = check_dataset(funcs_vector, funcs_char, (size_t)INT_MAX+12, 1, 1, 1, 1, (size_t)INT_MAX+12, 1, 1, 1); + goto end; +#endif + + if (tests_data[vector]) + { + if (test_small) + { + ret = check_dataset(funcs_vector, funcs_char, 26, 1, 1, 1, 1, 26, 1, 1, 1); + if (ret == -1) goto end; + } + if (test_large) + { + ret = check_dataset(funcs_vector, funcs_char, (size_t)INT_MAX, 1, 1, 1, 1, (size_t)INT_MAX, 1, 1, 1); + if (ret == -1) goto end; + ret = check_dataset(funcs_vector, funcs_int, (size_t)INT_MAX, 1, 1, 1, 1, (size_t)INT_MAX, 1, 1, 1); + if (ret == -1) goto end; + ret = check_dataset(funcs_vector, funcs_char, (size_t)INT_MAX+12, 1, 1, 1, 1, (size_t)INT_MAX+12, 1, 1, 1); + if (ret == -1) goto end; + } + } + + if (tests_data[matrix]) + { + if (test_small) + { + ret = check_dataset(funcs_matrix, funcs_char, 4, 3, 1, 1, 1, 4, 3, 1, 1); + if (ret == -1) goto end; + ret = check_dataset(funcs_matrix, funcs_char, 3, 5, 1, 1, 1, 10, 5, 1, 1); + if (ret == -1) goto end; + ret = check_dataset(funcs_matrix, funcs_int, 3, 5, 1, 1, 1, 10, 5, 1, 1); + if (ret == -1) goto end; + } + if (test_large) + { + ret = check_dataset(funcs_matrix, funcs_char, (size_t)INT_MAX, 1, 1, 1, 1, (size_t)INT_MAX, 1, 1, 1); + if (ret == -1) goto end; + ret = check_dataset(funcs_matrix, funcs_int, (size_t)INT_MAX+100, 1, 1, 1, 1, (size_t)INT_MAX+100, 1, 1, 1); + if (ret == -1) goto end; + } + } + + if (tests_data[block]) + { + if (test_small) + { + ret = check_dataset(funcs_block, funcs_int, 6, 2, 4, 1, 1, 6, 2, 4, 1); + if (ret == -1) goto end; + ret = check_dataset(funcs_block, funcs_char, 5, 2, 7, 1, 1, 6, 3, 7, 1); + if (ret == -1) goto end; + } + if (test_large) + { + ret = check_dataset(funcs_block, funcs_char, (size_t)INT_MAX, 1, 1, 1, 1, (size_t)INT_MAX, 1, 1, 1); + if (ret == -1) goto end; + ret = check_dataset(funcs_block, funcs_char, (size_t)INT_MAX+10, 1, 1, 1, 1, (size_t)INT_MAX+10, 1, 1, 1); + if (ret == -1) goto end; + } + } + + if (tests_data[tensor]) + { + if (test_small) + { + ret = check_dataset(funcs_tensor, funcs_int, 6, 4, 2, 4, 1, 6, 4, 2, 4); + if (ret == -1) goto end; + ret = check_dataset(funcs_tensor, funcs_char, 6, 4, 2, 4, 1, 6, 5, 3, 4); + if (ret == -1) goto end; + } + if (test_large) + { + ret = check_dataset(funcs_tensor, funcs_char, ((size_t)INT_MAX+10)/20, 2, 4, 3, 1, ((size_t)INT_MAX+10)/20, 2, 4, 1); + if (ret == -1) goto end; + } + } + + if (tests_data[ndim]) + { + if (test_small) + { + ret = check_dataset(funcs_ndim, funcs_char, 3, 2, 3, 1, 2, 3, 2, 3, 1); + if (ret == -1) goto end; + ret = check_dataset(funcs_ndim, funcs_char, 2, 2, 3, 1, 2, 3, 2, 3, 1); + if (ret == -1) goto end; + } + if (test_large) + { + ret = check_dataset(funcs_ndim, funcs_char, ((size_t)INT_MAX+10)/20, 2, 4, 3, 2, ((size_t)INT_MAX+10)/20, 2, 4, 3); + if (ret == -1) goto end; + } + } + +end: + FPRINTF_BUFFER_DUMP(); + starpu_mpi_shutdown(); + + if (!mpi_init) + MPI_Finalize(); + + return rank_comm == 0 ? ret : 0; +} diff --git a/mpi/tests/mpi_reduction_kernels.c b/mpi/tests/mpi_reduction_kernels.c index 5a3075ff25..5d02a3732c 100644 --- a/mpi/tests/mpi_reduction_kernels.c +++ b/mpi/tests/mpi_reduction_kernels.c @@ -50,12 +50,12 @@ void dot_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; long int *local_x = (long int *)STARPU_VECTOR_GET_PTR(descr[0]); - unsigned n = STARPU_VECTOR_GET_NX(descr[0]); + size_t n = STARPU_VECTOR_GET_NX(descr[0]); long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]); //FPRINTF_MPI(stderr, "Before dot=%ld (adding %d elements...)\n", *dot, n); - unsigned i; + size_t i; for (i = 0; i < n; i++) { //FPRINTF_MPI(stderr, "Adding %ld\n", local_x[i]); diff --git a/mpi/tests/mpi_scatter_gather.c b/mpi/tests/mpi_scatter_gather.c index af524bdc4a..c87a1bc891 100644 --- a/mpi/tests/mpi_scatter_gather.c +++ b/mpi/tests/mpi_scatter_gather.c @@ -26,8 +26,8 @@ int my_distrib(int x, int nb_nodes) void cpu_codelet(void *descr[], void *_args) { int *vector = (int *)STARPU_VECTOR_GET_PTR(descr[0]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); - unsigned i; + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t i; int rank; starpu_codelet_unpack_args(_args, &rank); diff --git a/mpi/tests/ndim_interface.c b/mpi/tests/ndim_interface.c index 5f1331b9de..bbb13694b6 100644 --- a/mpi/tests/ndim_interface.c +++ b/mpi/tests/ndim_interface.c @@ -65,7 +65,7 @@ int main(int argc, char **argv) assert(arr4d); /* fill the inner 4-dim array */ - unsigned i, j, k, l; + size_t i, j, k, l; int n = 0; for (l = 0; l < SIZE; l++) { @@ -81,8 +81,8 @@ int main(int argc, char **argv) } } - unsigned nn[4] = {SIZE, SIZE, SIZE, SIZE}; - unsigned ldn[4] = {1, BIGSIZE, BIGSIZE*BIGSIZE, BIGSIZE*BIGSIZE*BIGSIZE}; + size_t nn[4] = {SIZE, SIZE, SIZE, SIZE}; + size_t ldn[4] = {1, BIGSIZE, BIGSIZE*BIGSIZE, BIGSIZE*BIGSIZE*BIGSIZE}; starpu_ndim_data_register(&arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); } @@ -91,8 +91,8 @@ int main(int argc, char **argv) arr4d = calloc(SIZE*SIZE*SIZE*SIZE, sizeof(int)); assert(arr4d); - unsigned nn[4] = {SIZE, SIZE, SIZE, SIZE}; - unsigned ldn[4] = {1, SIZE, SIZE*SIZE, SIZE*SIZE*SIZE}; + size_t nn[4] = {SIZE, SIZE, SIZE, SIZE}; + size_t ldn[4] = {1, SIZE, SIZE*SIZE, SIZE*SIZE*SIZE}; starpu_ndim_data_register(&arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); } @@ -111,7 +111,7 @@ int main(int argc, char **argv) STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); int m = 10; - unsigned i, j, k, l; + size_t i, j, k, l; for (l = 0; l < SIZE; l++) { for (k = 0; k < SIZE; k++) @@ -140,7 +140,7 @@ int main(int argc, char **argv) STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); int n = 0, m = 10; - unsigned i, j, k, l; + size_t i, j, k, l; for (l = 0; l < SIZE; l++) { for (k = 0; k < SIZE; k++) diff --git a/recursive_tasks/tests/basic/b2t.c b/recursive_tasks/tests/basic/b2t.c index d85ebcbb0f..8cc325bb22 100644 --- a/recursive_tasks/tests/basic/b2t.c +++ b/recursive_tasks/tests/basic/b2t.c @@ -56,8 +56,8 @@ void binary_task_func(void *buffers[], void *arg) { int *vA = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); int *vB = (int*)STARPU_VECTOR_GET_PTR(buffers[1]); - int nx = STARPU_VECTOR_GET_NX(buffers[0]); - int i; + size_t nx = STARPU_VECTOR_GET_NX(buffers[0]); + size_t i; print_vector(vA, nx, "task vA"); for(i=0 ; i>>(nx, v, 3); @@ -84,7 +84,7 @@ static __global__ void add_cuda(unsigned n, int *v, int term) extern "C" void task_cuda_func(void *buffers[], void *arg) { int *v = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); - int nx = STARPU_VECTOR_GET_NX(buffers[0]); + size_t nx = STARPU_VECTOR_GET_NX(buffers[0]); if (!getenv("STARPU_SSILENT")) { print_vector_cuda<<<1, 1, 0, starpu_cuda_get_local_stream()>>>(nx, v, 0); @@ -104,7 +104,7 @@ extern "C" void task_cuda_func(void *buffers[], void *arg) extern "C" void task_RO_cuda_func(void *buffers[], void *arg) { int *v = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); - int nx = STARPU_VECTOR_GET_NX(buffers[0]); + size_t nx = STARPU_VECTOR_GET_NX(buffers[0]); if (!getenv("STARPU_SSILENT")) { diff --git a/recursive_tasks/tests/basic/basic.h b/recursive_tasks/tests/basic/basic.h index ac8761d71e..31f35bc80f 100644 --- a/recursive_tasks/tests/basic/basic.h +++ b/recursive_tasks/tests/basic/basic.h @@ -33,11 +33,11 @@ struct starpu_data_filter f = .nchildren = PARTS }; -void print_vector(int *v, int nx, const char *label) +void print_vector(int *v, size_t nx, const char *label) { char message[100000]; int cur=0; - int i; + size_t i; cur += snprintf(&message[cur], 100000 - cur, "%s : ", label); for (i=0; iram_rowptr; uint32_t *rowptr = bcsr_parent->rowptr; - unsigned child_nrow; + size_t child_nrow; size_t child_rowoffset; STARPU_ASSERT_MSG(bcsr_parent->id == STARPU_BCSR_INTERFACE_ID, "%s can only be applied on a bcsr data", __func__); diff --git a/src/datawizard/interfaces/block_filters.c b/src/datawizard/interfaces/block_filters.c index 41859dc54f..04d9c15fae 100644 --- a/src/datawizard/interfaces/block_filters.c +++ b/src/datawizard/interfaces/block_filters.c @@ -26,10 +26,10 @@ static void _starpu_block_filter_block(int dim, void *parent_interface, void *ch unsigned blocksize; /* the element will be split, in case horizontal, it's nx, in case vertical, it's ny, in case depth, it's nz*/ - uint32_t nn; - uint32_t nx; - uint32_t ny; - uint32_t nz; + size_t nn; + size_t nx; + size_t ny; + size_t nz; switch(dim) { @@ -66,9 +66,9 @@ static void _starpu_block_filter_block(int dim, void *parent_interface, void *ch size_t elemsize = block_parent->elemsize; - STARPU_ASSERT_MSG(nparts <= nn, "cannot split %u elements in %u parts", nn, nparts); + STARPU_ASSERT_MSG(nparts <= nn, "cannot split %zu elements in %u parts", nn, nparts); - uint32_t child_nn; + size_t child_nn; size_t offset; starpu_filter_nparts_compute_chunk_size_and_offset(nn, nparts, elemsize, id, blocksize, &child_nn, &offset); @@ -159,10 +159,10 @@ static void _starpu_block_filter_pick_matrix(int dim, void *parent_interface, vo unsigned blocksize; - uint32_t nn; - uint32_t nx = block_parent->nx; - uint32_t ny = block_parent->ny; - uint32_t nz = block_parent->nz; + size_t nn; + size_t nx = block_parent->nx; + size_t ny = block_parent->ny; + size_t nz = block_parent->nz; switch(dim) { @@ -257,13 +257,12 @@ void starpu_block_filter_pick_variable(void *parent_interface, void *child_inter /* each chunk becomes a variable */ struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; - uint32_t nx = block_parent->nx; - uint32_t ny = block_parent->ny; - uint32_t nz = block_parent->nz; - - unsigned ldy = block_parent->ldy; - unsigned ldz = block_parent->ldz; + size_t nx = block_parent->nx; + size_t ny = block_parent->ny; + size_t nz = block_parent->nz; + size_t ldy = block_parent->ldy; + size_t ldz = block_parent->ldz; size_t elemsize = block_parent->elemsize; uint32_t* chunk_pos = (uint32_t*)f->filter_arg_ptr; diff --git a/src/datawizard/interfaces/block_interface.c b/src/datawizard/interfaces/block_interface.c index 15ec6bbb9e..0e5dd18bf7 100644 --- a/src/datawizard/interfaces/block_interface.c +++ b/src/datawizard/interfaces/block_interface.c @@ -114,11 +114,11 @@ static void register_block_handle(starpu_data_handle_t handle, int home_node, vo /* declare a new data with the BLAS interface */ void starpu_block_data_register(starpu_data_handle_t *handleptr, int home_node, - uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, - uint32_t ny, uint32_t nz, size_t elemsize) + uintptr_t ptr, size_t ldy, size_t ldz, size_t nx, + size_t ny, size_t nz, size_t elemsize) { - STARPU_ASSERT_MSG(ldy >= nx, "ldy = %u should not be less than nx = %u.", ldy, nx); - STARPU_ASSERT_MSG(ldz/ldy >= ny, "ldz/ldy = %u/%u = %u should not be less than ny = %u.", ldz, ldy, ldz/ldy, ny); + STARPU_ASSERT_MSG(ldy >= nx, "ldy = %zu should not be less than nx = %zu.", ldy, nx); + STARPU_ASSERT_MSG(ldz/ldy >= ny, "ldz/ldy = %zu/%zu = %zu should not be less than ny = %zu.", ldz, ldy, ldz/ldy, ny); struct starpu_block_interface block_interface = { .id = STARPU_BLOCK_INTERFACE_ID, @@ -147,7 +147,7 @@ void starpu_block_data_register(starpu_data_handle_t *handleptr, int home_node, } void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, - uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz) + uintptr_t ptr, uintptr_t dev_handle, size_t offset, size_t ldy, size_t ldz) { struct starpu_block_interface *block_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); @@ -187,7 +187,7 @@ static void display_block_interface(starpu_data_handle_t handle, FILE *f) block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); - fprintf(f, "%u\t%u\t%u\t", block_interface->nx, block_interface->ny, block_interface->nz); + fprintf(f, "%zu\t%zu\t%zu\t", block_interface->nx, block_interface->ny, block_interface->nz); } #define IS_CONTIGUOUS_MATRIX(nx, ny, ldy) ((nx) == (ldy)) @@ -200,18 +200,18 @@ static int pack_block_handle(starpu_data_handle_t handle, unsigned node, void ** struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, node); - uint32_t ldy = block_interface->ldy; - uint32_t ldz = block_interface->ldz; - uint32_t nx = block_interface->nx; - uint32_t ny = block_interface->ny; - uint32_t nz = block_interface->nz; + size_t ldy = block_interface->ldy; + size_t ldz = block_interface->ldz; + size_t nx = block_interface->nx; + size_t ny = block_interface->ny; + size_t nz = block_interface->nz; size_t elemsize = block_interface->elemsize; *count = nx*ny*nz*elemsize; if (ptr != NULL) { - uint32_t z, y; + size_t z, y; char *block = (void *)block_interface->ptr; *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); @@ -255,16 +255,16 @@ static int peek_block_handle(starpu_data_handle_t handle, unsigned node, void *p struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, node); - uint32_t ldy = block_interface->ldy; - uint32_t ldz = block_interface->ldz; - uint32_t nx = block_interface->nx; - uint32_t ny = block_interface->ny; - uint32_t nz = block_interface->nz; + size_t ldy = block_interface->ldy; + size_t ldz = block_interface->ldz; + size_t nx = block_interface->nx; + size_t ny = block_interface->ny; + size_t nz = block_interface->nz; size_t elemsize = block_interface->elemsize; STARPU_ASSERT(count == elemsize * nx * ny * nz); - uint32_t z, y; + size_t z, y; char *cur = ptr; char *block = (void *)block_interface->ptr; @@ -323,7 +323,7 @@ static size_t block_interface_get_size(starpu_data_handle_t handle) } /* offer an access to the data parameters */ -uint32_t starpu_block_get_nx(starpu_data_handle_t handle) +size_t starpu_block_get_nx(starpu_data_handle_t handle) { struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); @@ -335,7 +335,7 @@ uint32_t starpu_block_get_nx(starpu_data_handle_t handle) return block_interface->nx; } -uint32_t starpu_block_get_ny(starpu_data_handle_t handle) +size_t starpu_block_get_ny(starpu_data_handle_t handle) { struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); @@ -347,7 +347,7 @@ uint32_t starpu_block_get_ny(starpu_data_handle_t handle) return block_interface->ny; } -uint32_t starpu_block_get_nz(starpu_data_handle_t handle) +size_t starpu_block_get_nz(starpu_data_handle_t handle) { struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); @@ -359,7 +359,7 @@ uint32_t starpu_block_get_nz(starpu_data_handle_t handle) return block_interface->nz; } -uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle) +size_t starpu_block_get_local_ldy(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); @@ -376,7 +376,7 @@ uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle) return block_interface->ldy; } -uint32_t starpu_block_get_local_ldz(starpu_data_handle_t handle) +size_t starpu_block_get_local_ldz(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); @@ -432,9 +432,9 @@ static starpu_ssize_t allocate_block_buffer_on_node(void *data_interface_, unsig struct starpu_block_interface *dst_block = (struct starpu_block_interface *) data_interface_; - uint32_t nx = dst_block->nx; - uint32_t ny = dst_block->ny; - uint32_t nz = dst_block->nz; + size_t nx = dst_block->nx; + size_t ny = dst_block->ny; + size_t nz = dst_block->nz; size_t elemsize = dst_block->elemsize; starpu_ssize_t allocated_memory; @@ -462,9 +462,9 @@ static starpu_ssize_t allocate_block_buffer_on_node(void *data_interface_, unsig static void free_block_buffer_on_node(void *data_interface, unsigned node) { struct starpu_block_interface *block_interface = (struct starpu_block_interface *) data_interface; - uint32_t nx = block_interface->nx; - uint32_t ny = block_interface->ny; - uint32_t nz = block_interface->nz; + size_t nx = block_interface->nx; + size_t ny = block_interface->ny; + size_t nz = block_interface->nz; size_t elemsize = block_interface->elemsize; starpu_free_on_node(node, block_interface->dev_handle, nx*ny*nz*elemsize); @@ -521,15 +521,15 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int struct starpu_block_interface *dst_block = (struct starpu_block_interface *) dst_interface; int ret = 0; - uint32_t nx = dst_block->nx; - uint32_t ny = dst_block->ny; - uint32_t nz = dst_block->nz; + size_t nx = dst_block->nx; + size_t ny = dst_block->ny; + size_t nz = dst_block->nz; size_t elemsize = dst_block->elemsize; - uint32_t ldy_src = src_block->ldy; - uint32_t ldz_src = src_block->ldz; - uint32_t ldy_dst = dst_block->ldy; - uint32_t ldz_dst = dst_block->ldz; + size_t ldy_src = src_block->ldy; + size_t ldz_src = src_block->ldz; + size_t ldy_dst = dst_block->ldy; + size_t ldz_dst = dst_block->ldz; if (starpu_interface_copy3d(src_block->dev_handle, src_block->offset, src_node, dst_block->dev_handle, dst_block->offset, dst_node, @@ -547,9 +547,9 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_block_interface *block = (struct starpu_block_interface *) data_interface; - return snprintf(buf, size, "B%ux%ux%ux%u", - (unsigned) block->nx, - (unsigned) block->ny, - (unsigned) block->nz, - (unsigned) block->elemsize); + return snprintf(buf, size, "B%zux%zux%zux%zu", + block->nx, + block->ny, + block->nz, + block->elemsize); } diff --git a/src/datawizard/interfaces/csr_filters.c b/src/datawizard/interfaces/csr_filters.c index 34ba57327d..580114b3d2 100644 --- a/src/datawizard/interfaces/csr_filters.c +++ b/src/datawizard/interfaces/csr_filters.c @@ -31,7 +31,7 @@ void starpu_csr_filter_vertical_block(void *parent_interface, void *child_interf uint32_t *ram_rowptr = csr_parent->ram_rowptr; size_t first_index; - unsigned child_nrow; + size_t child_nrow; starpu_filter_nparts_compute_chunk_size_and_offset(nrow, nchunks, 1, id, 1, &child_nrow, &first_index); diff --git a/src/datawizard/interfaces/matrix_filters.c b/src/datawizard/interfaces/matrix_filters.c index d4efcdd34e..b15d5a8c0d 100644 --- a/src/datawizard/interfaces/matrix_filters.c +++ b/src/datawizard/interfaces/matrix_filters.c @@ -30,9 +30,9 @@ static void _starpu_matrix_filter_block(int dim, void *parent_interface, void *c unsigned blocksize; /* the element will be split, in case horizontal, it's nx, in case vertical, it's ny*/ - uint32_t nn; - uint32_t nx; - uint32_t ny; + size_t nn; + size_t nx; + size_t ny; switch(dim) { @@ -58,9 +58,9 @@ static void _starpu_matrix_filter_block(int dim, void *parent_interface, void *c size_t elemsize = matrix_parent->elemsize; - STARPU_ASSERT_MSG(nchunks <= nn, "cannot split %u elements in %u parts", nn, nchunks); + STARPU_ASSERT_MSG(nchunks <= nn, "cannot split %zu elements in %u parts", nn, nchunks); - uint32_t child_nn; + size_t child_nn; size_t offset; starpu_filter_nparts_compute_chunk_size_and_offset(nn, nchunks, elemsize, id, blocksize, &child_nn, &offset); @@ -137,8 +137,8 @@ void starpu_matrix_filter_pick_vector_y(void *parent_interface, void *child_inte unsigned blocksize; - uint32_t nx; - uint32_t ny; + size_t nx; + size_t ny; /* actual number of elements */ nx = matrix_parent->nx; @@ -185,9 +185,9 @@ void starpu_matrix_filter_pick_variable(void *parent_interface, void *child_inte unsigned blocksize; - uint32_t nx; - uint32_t ld; - uint32_t ny; + size_t nx; + size_t ld; + size_t ny; /* actual number of elements */ nx = matrix_parent->nx; diff --git a/src/datawizard/interfaces/matrix_interface.c b/src/datawizard/interfaces/matrix_interface.c index 868eb43349..aa69a3c228 100644 --- a/src/datawizard/interfaces/matrix_interface.c +++ b/src/datawizard/interfaces/matrix_interface.c @@ -129,10 +129,10 @@ static void *matrix_to_pointer(void *data_interface, unsigned node) /* declare a new data with the matrix interface */ void starpu_matrix_data_register_allocsize(starpu_data_handle_t *handleptr, int home_node, - uintptr_t ptr, uint32_t ld, uint32_t nx, - uint32_t ny, size_t elemsize, size_t allocsize) + uintptr_t ptr, size_t ld, size_t nx, + size_t ny, size_t elemsize, size_t allocsize) { - STARPU_ASSERT_MSG(ld >= nx, "ld = %u should not be less than nx = %u.", ld, nx); + STARPU_ASSERT_MSG(ld >= nx, "ld = %zu should not be less than nx = %zu.", ld, nx); struct starpu_matrix_interface matrix_interface = { .id = STARPU_MATRIX_INTERFACE_ID, @@ -160,14 +160,14 @@ void starpu_matrix_data_register_allocsize(starpu_data_handle_t *handleptr, int } void starpu_matrix_data_register(starpu_data_handle_t *handleptr, int home_node, - uintptr_t ptr, uint32_t ld, uint32_t nx, - uint32_t ny, size_t elemsize) + uintptr_t ptr, size_t ld, size_t nx, + size_t ny, size_t elemsize) { starpu_matrix_data_register_allocsize(handleptr, home_node, ptr, ld, nx, ny, elemsize, nx * ny * elemsize); } void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, - uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld) + uintptr_t ptr, uintptr_t dev_handle, size_t offset, size_t ld) { struct starpu_matrix_interface *matrix_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); @@ -212,7 +212,7 @@ static void display_matrix_interface(starpu_data_handle_t handle, FILE *f) struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); - fprintf(f, "%u\t%u\t", matrix_interface->nx, matrix_interface->ny); + fprintf(f, "%zu\t%zu\t", matrix_interface->nx, matrix_interface->ny); } #define IS_CONTIGUOUS_MATRIX(nx, ny, ld) ((nx) == (ld)) @@ -226,8 +226,8 @@ struct pack_matrix_header /* FIXME: that would break alignment for O_DIRECT disk access... * while in the disk case, we do know the matrix size anyway */ /* FIXME: rather make MPI pack the data interface in the envelope for us? */ - uint32_t nx; - uint32_t ny; + size_t nx; + size_t ny; size_t elemsize; #endif }; @@ -239,9 +239,9 @@ static int pack_matrix_handle(starpu_data_handle_t handle, unsigned node, void * struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, node); - uint32_t ld = matrix_interface->ld; - uint32_t nx = matrix_interface->nx; - uint32_t ny = matrix_interface->ny; + size_t ld = matrix_interface->ld; + size_t nx = matrix_interface->nx; + size_t ny = matrix_interface->ny; size_t elemsize = matrix_interface->elemsize; *count = nx*ny*elemsize + sizeof(struct pack_matrix_header); @@ -265,7 +265,7 @@ static int pack_matrix_handle(starpu_data_handle_t handle, unsigned node, void * memcpy(cur, matrix, nx*ny*elemsize); else { - uint32_t y; + size_t y; for(y=0 ; yld; - uint32_t nx = matrix_interface->nx; - uint32_t ny = matrix_interface->ny; + size_t ld = matrix_interface->ld; + size_t nx = matrix_interface->nx; + size_t ny = matrix_interface->ny; size_t elemsize = matrix_interface->elemsize; struct pack_matrix_header *header = ptr; @@ -300,14 +300,14 @@ static int peek_matrix_handle(starpu_data_handle_t handle, unsigned node, void * /* We can store whatever can fit */ STARPU_ASSERT_MSG(header->elemsize == elemsize, - "Data element size %u needs to be same as the received data element size %u", - (unsigned) elemsize, (unsigned) header->elemsize); + "Data element size %zu needs to be same as the received data element size %zu", + elemsize, header->elemsize); STARPU_ASSERT_MSG(header->nx * header->ny * header->elemsize <= matrix_interface->allocsize, - "Initial size of data %lu needs to be big enough for received data %ux%ux%u", - (unsigned long) matrix_interface->allocsize, - (unsigned) header->nx, (unsigned) header->ny, - (unsigned) header->elemsize); + "Initial size of data %zu needs to be big enough for received data %zux%zux%zu", + matrix_interface->allocsize, + header->nx, header->ny, + header->elemsize); /* Better keep it contiguous */ matrix_interface->ld = ld = header->nx; @@ -315,10 +315,10 @@ static int peek_matrix_handle(starpu_data_handle_t handle, unsigned node, void * else { STARPU_ASSERT_MSG(header->nx <= nx, - "Initial nx %u of data needs to be big enough for received data nx %u\n", + "Initial nx %zu of data needs to be big enough for received data nx %zu\n", nx, header->nx); STARPU_ASSERT_MSG(header->ny <= ny, - "Initial ny %u of data needs to be big enough for received data ny %u\n", + "Initial ny %zu of data needs to be big enough for received data ny %zu\n", ny, header->ny); } @@ -336,7 +336,7 @@ static int peek_matrix_handle(starpu_data_handle_t handle, unsigned node, void * memcpy(matrix, ptr, nx*ny*elemsize); else { - uint32_t y; + size_t y; for(y=0 ; ynx; } -uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle) +size_t starpu_matrix_get_ny(starpu_data_handle_t handle) { struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); @@ -407,7 +407,7 @@ uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle) return matrix_interface->ny; } -uint32_t starpu_matrix_get_local_ld(starpu_data_handle_t handle) +size_t starpu_matrix_get_local_ld(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); @@ -474,7 +474,7 @@ static starpu_ssize_t allocate_matrix_buffer_on_node(void *data_interface_, unsi struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) data_interface_; - uint32_t ld = matrix_interface->nx; // by default + size_t ld = matrix_interface->nx; // by default starpu_ssize_t allocated_memory = matrix_interface->allocsize; handle = starpu_malloc_on_node(dst_node, allocated_memory); @@ -576,12 +576,12 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int struct starpu_matrix_interface *dst_matrix = (struct starpu_matrix_interface *) dst_interface; int ret = 0; - uint32_t nx = dst_matrix->nx; - uint32_t ny = dst_matrix->ny; + size_t nx = dst_matrix->nx; + size_t ny = dst_matrix->ny; size_t elemsize = dst_matrix->elemsize; - uint32_t ld_src = src_matrix->ld; - uint32_t ld_dst = dst_matrix->ld; + size_t ld_src = src_matrix->ld; + size_t ld_dst = dst_matrix->ld; if (starpu_interface_copy2d(src_matrix->dev_handle, src_matrix->offset, src_node, dst_matrix->dev_handle, dst_matrix->offset, dst_node, @@ -598,8 +598,5 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_matrix_interface *matrix = (struct starpu_matrix_interface *) data_interface; - return snprintf(buf, size, "M%ux%ux%u", - (unsigned) matrix->nx, - (unsigned) matrix->ny, - (unsigned) matrix->elemsize); + return snprintf(buf, size, "M%zux%zux%zu", matrix->nx, matrix->ny, matrix->elemsize); } diff --git a/src/datawizard/interfaces/multiformat_interface.c b/src/datawizard/interfaces/multiformat_interface.c index f4e9c3f3e4..e953f5d49d 100644 --- a/src/datawizard/interfaces/multiformat_interface.c +++ b/src/datawizard/interfaces/multiformat_interface.c @@ -73,7 +73,7 @@ static size_t multiformat_interface_get_size(starpu_data_handle_t handle); static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle); static int multiformat_compare(void *data_interface_a, void *data_interface_b); static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f); -static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle); +static size_t starpu_multiformat_get_nx(starpu_data_handle_t handle); static struct starpu_multiformat_data_interface_ops* get_mf_ops(void *data_interface) @@ -163,7 +163,7 @@ static void register_multiformat_handle(starpu_data_handle_t handle, int home_no void starpu_multiformat_data_register(starpu_data_handle_t *handleptr, int home_node, void *ptr, - uint32_t nobjects, + size_t nx, struct starpu_multiformat_data_interface_ops *format_ops) { struct starpu_multiformat_interface multiformat = @@ -172,7 +172,7 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handleptr, .cpu_ptr = ptr, .cuda_ptr = NULL, .opencl_ptr = NULL, - .nx = nobjects, + .nx = nx, .ops = format_ops }; @@ -206,7 +206,7 @@ static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f) multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); - fprintf(f, "%u\t", multiformat_interface->nx); + fprintf(f, "%zu\t", multiformat_interface->nx); } /* XXX : returns CPU size */ @@ -219,7 +219,7 @@ static size_t multiformat_interface_get_size(starpu_data_handle_t handle) return size; } -uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle) +size_t starpu_multiformat_get_nx(starpu_data_handle_t handle) { struct starpu_multiformat_interface *multiformat_interface; multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); diff --git a/src/datawizard/interfaces/ndim_filters.c b/src/datawizard/interfaces/ndim_filters.c index dd5fb6f4a3..d9f61ead24 100644 --- a/src/datawizard/interfaces/ndim_filters.c +++ b/src/datawizard/interfaces/ndim_filters.c @@ -27,8 +27,7 @@ static void _interface_assignment_ndim_to_variable(void *ndim_interface, void *c static void _interface_deallocate(void * ndim_interface); -static void _starpu_ndim_filter_block(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, - unsigned id, unsigned nparts, uintptr_t shadow_size) +static void _starpu_ndim_filter_block(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts, uintptr_t shadow_size) { struct starpu_ndim_interface *ndim_parent = (struct starpu_ndim_interface *) parent_interface; struct starpu_ndim_interface *ndim_child = (struct starpu_ndim_interface *) child_interface; @@ -36,16 +35,16 @@ static void _starpu_ndim_filter_block(void *parent_interface, void *child_interf STARPU_ASSERT_MSG(ndim_parent->id == STARPU_NDIM_INTERFACE_ID, "%s can only be applied on a ndim array data", __func__); size_t ndim = ndim_parent->ndim; - STARPU_ASSERT_MSG(ndim > 0, "ndim %u must be greater than 0!\n", (unsigned) ndim); + STARPU_ASSERT_MSG(ndim > 0, "ndim %zu must be greater than 0!\n", ndim); unsigned dim = 0; if (ndim > 1) dim = f->filter_arg; - STARPU_ASSERT_MSG(dim < ndim, "dim %u must be less than %u!\n", dim, (unsigned) ndim); + STARPU_ASSERT_MSG(dim < ndim, "dim %u must be less than %zu!\n", dim, ndim); uint32_t parent_nn = 0; - uint32_t ni[ndim]; + size_t ni[ndim]; unsigned i; for (i=0; ildn[dim]; size_t elemsize = ndim_parent->elemsize; - uint32_t child_nn; + size_t child_nn; size_t offset; starpu_filter_nparts_compute_chunk_size_and_offset(parent_nn, nparts, elemsize, id, blocksize, &child_nn, &offset); child_nn += 2 * shadow_size; ndim_child->id = ndim_parent->id; - _STARPU_MALLOC(ndim_child->nn, ndim*sizeof(uint32_t)); + _STARPU_MALLOC(ndim_child->nn, ndim*sizeof(size_t)); for (i=0; ildn, ndim*sizeof(uint32_t)); + _STARPU_MALLOC(ndim_child->ldn, ndim*sizeof(size_t)); ndim_child->ndim = ndim; ndim_child->elemsize = elemsize; ndim_child->allocsize = elemsize; @@ -111,22 +110,19 @@ static void _starpu_ndim_filter_block(void *parent_interface, void *child_interf } } -void starpu_ndim_filter_block(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, - unsigned id, unsigned nparts) +void starpu_ndim_filter_block(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_ndim_filter_block(parent_interface, child_interface, f, id, nparts, 0); } -void starpu_ndim_filter_block_shadow(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, - unsigned id, unsigned nparts) +void starpu_ndim_filter_block_shadow(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_ndim_filter_block(parent_interface, child_interface, f, id, nparts, shadow_size); } -void starpu_ndim_filter_to_tensor(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, - unsigned id, unsigned nparts) +void starpu_ndim_filter_to_tensor(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_parent = (struct starpu_ndim_interface *) parent_interface; STARPU_ASSERT_MSG(ndim_parent->ndim == 4, "can only be applied on a 4-dim array"); @@ -142,8 +138,7 @@ void starpu_ndim_filter_to_tensor(void *parent_interface, void *child_interface, _interface_deallocate(&ndim_child); } -void starpu_ndim_filter_to_block(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, - unsigned id, unsigned nparts) +void starpu_ndim_filter_to_block(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_parent = (struct starpu_ndim_interface *) parent_interface; STARPU_ASSERT_MSG(ndim_parent->ndim == 3, "can only be applied on a 3-dim array"); @@ -159,8 +154,7 @@ void starpu_ndim_filter_to_block(void *parent_interface, void *child_interface, _interface_deallocate(&ndim_child); } -void starpu_ndim_filter_to_matrix(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, - unsigned id, unsigned nparts) +void starpu_ndim_filter_to_matrix(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_parent = (struct starpu_ndim_interface *) parent_interface; STARPU_ASSERT_MSG(ndim_parent->ndim == 2, "can only be applied on a 2-dim array"); @@ -176,8 +170,7 @@ void starpu_ndim_filter_to_matrix(void *parent_interface, void *child_interface, _interface_deallocate(&ndim_child); } -void starpu_ndim_filter_to_vector(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, - unsigned id, unsigned nparts) +void starpu_ndim_filter_to_vector(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_parent = (struct starpu_ndim_interface *) parent_interface; STARPU_ASSERT_MSG(ndim_parent->ndim == 1, "can only be applied on a 1-dim array"); @@ -193,8 +186,7 @@ void starpu_ndim_filter_to_vector(void *parent_interface, void *child_interface, _interface_deallocate(&ndim_child); } -void starpu_ndim_filter_to_variable(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, - unsigned id, unsigned nparts) +void starpu_ndim_filter_to_variable(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_parent = (struct starpu_ndim_interface *) parent_interface; STARPU_ASSERT_MSG(ndim_parent->ndim == 0, "can only be applied on a 0-dim array (a variable)"); @@ -203,8 +195,7 @@ void starpu_ndim_filter_to_variable(void *parent_interface, void *child_interfac _interface_assignment_ndim_to_variable(parent_interface, child_interface); } -void starpu_ndim_filter_pick_ndim(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, - unsigned id, unsigned nparts) +void starpu_ndim_filter_pick_ndim(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_parent = (struct starpu_ndim_interface *) parent_interface; struct starpu_ndim_interface *ndim_child = (struct starpu_ndim_interface *) child_interface; @@ -213,16 +204,16 @@ void starpu_ndim_filter_pick_ndim(void *parent_interface, void *child_interface, ndim_child->id = STARPU_NDIM_INTERFACE_ID; size_t ndim = ndim_parent->ndim; - STARPU_ASSERT_MSG(ndim > 0, "ndim %u must be greater than 0!\n", (unsigned) ndim); + STARPU_ASSERT_MSG(ndim > 0, "ndim %zu must be greater than 0!\n", ndim); unsigned dim = 0; if (ndim > 1) dim = f->filter_arg; - STARPU_ASSERT_MSG(dim < ndim, "dim %u must be less than %u!\n", dim, (unsigned) ndim); + STARPU_ASSERT_MSG(dim < ndim, "dim %u must be less than %zu!\n", dim, ndim); uint32_t parent_nn = 0; - uint32_t ni[ndim]; + size_t ni[ndim]; unsigned i; for (i=0; inn, (ndim-1)*sizeof(uint32_t)); + _STARPU_MALLOC(ndim_child->nn, (ndim-1)*sizeof(size_t)); if (ndim > 1) { j = 0; @@ -255,7 +246,7 @@ void starpu_ndim_filter_pick_ndim(void *parent_interface, void *child_interface, } } - _STARPU_MALLOC(ndim_child->ldn, (ndim-1)*sizeof(uint32_t)); + _STARPU_MALLOC(ndim_child->ldn, (ndim-1)*sizeof(size_t)); ndim_child->ndim = ndim-1; ndim_child->elemsize = elemsize; ndim_child->allocsize = elemsize; @@ -521,11 +512,11 @@ void starpu_ndim_filter_pick_variable(void *parent_interface, void *child_interf STARPU_ASSERT_MSG(ndim_parent->id == STARPU_NDIM_INTERFACE_ID, "%s can only be applied on a ndim array data", __func__); size_t ndim = ndim_parent->ndim; - STARPU_ASSERT_MSG(ndim > 0, "ndim %u must be greater than 0!\n", (unsigned) ndim); + STARPU_ASSERT_MSG(ndim > 0, "ndim %zu must be greater than 0!\n", ndim); - uint32_t nn[ndim]; - unsigned ldn[ndim]; - unsigned i; + size_t nn[ndim]; + size_t ldn[ndim]; + size_t i; for (i=0; inn[i]; @@ -533,7 +524,7 @@ void starpu_ndim_filter_pick_variable(void *parent_interface, void *child_interf } size_t elemsize = ndim_parent->elemsize; - uint32_t* chunk_pos = (uint32_t*)f->filter_arg_ptr; + size_t* chunk_pos = (size_t*)f->filter_arg_ptr; int b = 1; size_t offset = 0; for (i = 0; i < ndim; i++) diff --git a/src/datawizard/interfaces/ndim_interface.c b/src/datawizard/interfaces/ndim_interface.c index f40251e199..9b1943c524 100644 --- a/src/datawizard/interfaces/ndim_interface.c +++ b/src/datawizard/interfaces/ndim_interface.c @@ -24,7 +24,7 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int static int map_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int unmap_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int update_map_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); -static size_t _get_size(uint32_t* nn, size_t ndim, size_t elemsize); +static size_t _get_size(size_t* nn, size_t ndim, size_t elemsize); static const struct starpu_data_copy_methods ndim_copy_data_methods_s = { @@ -99,19 +99,18 @@ static void register_ndim_handle(starpu_data_handle_t handle, int home_node, voi int node; for (node = 0; node < STARPU_MAXNODES; node++) { - struct starpu_ndim_interface *local_interface = (struct starpu_ndim_interface *) - starpu_data_get_interface_on_node(handle, node); + struct starpu_ndim_interface *local_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = ndim_interface->ptr; local_interface->dev_handle = ndim_interface->dev_handle; local_interface->offset = ndim_interface->offset; - uint32_t* ldn_org = ndim_interface->ldn; - uint32_t* ldn_cpy; - _STARPU_MALLOC(ldn_cpy, ndim*sizeof(uint32_t)); + size_t* ldn_org = ndim_interface->ldn; + size_t* ldn_cpy; + _STARPU_MALLOC(ldn_cpy, ndim*sizeof(size_t)); if (ndim) - memcpy(ldn_cpy, ldn_org, ndim*sizeof(uint32_t)); + memcpy(ldn_cpy, ldn_org, ndim*sizeof(size_t)); local_interface->ldn = ldn_cpy; } else @@ -119,17 +118,17 @@ static void register_ndim_handle(starpu_data_handle_t handle, int home_node, voi local_interface->ptr = 0; local_interface->dev_handle = 0; local_interface->offset = 0; - uint32_t* ldn_zero; - _STARPU_CALLOC(ldn_zero, ndim, sizeof(uint32_t)); + size_t* ldn_zero; + _STARPU_CALLOC(ldn_zero, ndim, sizeof(size_t)); local_interface->ldn = ldn_zero; } local_interface->id = ndim_interface->id; - uint32_t* nn_org = ndim_interface->nn; - uint32_t* nn_cpy; - _STARPU_MALLOC(nn_cpy, ndim*sizeof(uint32_t)); + size_t* nn_org = ndim_interface->nn; + size_t* nn_cpy; + _STARPU_MALLOC(nn_cpy, ndim*sizeof(size_t)); if (ndim) - memcpy(nn_cpy, nn_org, ndim*sizeof(uint32_t)); + memcpy(nn_cpy, nn_org, ndim*sizeof(size_t)); local_interface->nn = nn_cpy; local_interface->ndim = ndim_interface->ndim; local_interface->elemsize = ndim_interface->elemsize; @@ -164,15 +163,14 @@ static void unregister_ndim_handle(starpu_data_handle_t handle) } /* declare a new data with the BLAS interface */ -void starpu_ndim_data_register(starpu_data_handle_t *handleptr, int home_node, - uintptr_t ptr, uint32_t* ldn, uint32_t* nn, size_t ndim, size_t elemsize) +void starpu_ndim_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, size_t* ldn, size_t* nn, size_t ndim, size_t elemsize) { unsigned i; size_t allocsize = _get_size(nn, ndim, elemsize); for (i=1; i= nn[i-1], "ldn[%u]/ldn[%u] = %u/%u = %u should not be less than nn[%u] = %u.", i, i-1, ldn[i], ldn[i-1], ldn[i]/ldn[i-1], i-1, nn[i-1]); + STARPU_ASSERT_MSG(ldn[i]/ldn[i-1] >= nn[i-1], "ldn[%u]/ldn[%u] = %zu/%zu = %zu should not be less than nn[%u] = %zu.", i, i-1, ldn[i], ldn[i-1], ldn[i]/ldn[i-1], i-1, nn[i-1]); } struct starpu_ndim_interface ndim_interface = @@ -190,7 +188,7 @@ void starpu_ndim_data_register(starpu_data_handle_t *handleptr, int home_node, #ifndef STARPU_SIMGRID if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { - uint32_t nn0 = ndim?nn[0]:1; + size_t nn0 = ndim?nn[0]:1; int b = 1; size_t buffersize = 0; for (i = 1; i < ndim; i++) @@ -218,8 +216,7 @@ void starpu_ndim_data_register(starpu_data_handle_t *handleptr, int home_node, starpu_data_register(handleptr, home_node, &ndim_interface, &starpu_interface_ndim_ops); } -void starpu_ndim_ptr_register(starpu_data_handle_t handle, unsigned node, - uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t* ldn) +void starpu_ndim_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, size_t* ldn) { struct starpu_ndim_interface *ndim_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); @@ -227,7 +224,7 @@ void starpu_ndim_ptr_register(starpu_data_handle_t handle, unsigned node, ndim_interface->dev_handle = dev_handle; ndim_interface->offset = offset; if (ndim_interface->ndim) - memcpy(ndim_interface->ldn, ldn, ndim_interface->ndim*sizeof(uint32_t)); + memcpy(ndim_interface->ldn, ldn, ndim_interface->ndim*sizeof(size_t)); } static uint32_t footprint_ndim_interface_crc32(starpu_data_handle_t handle) @@ -279,21 +276,21 @@ static void display_ndim_interface(starpu_data_handle_t handle, FILE *f) unsigned i; for (i=0; indim; i++) - fprintf(f, "%u\t", ndim_interface->nn[i]); + fprintf(f, "%zu\t", ndim_interface->nn[i]); if (ndim_interface->ndim == 0) { - fprintf(f, "%lu\t", (unsigned long)ndim_interface->elemsize); + fprintf(f, "%zu\t", ndim_interface->elemsize); } } -static int _is_contiguous_ndim(uint32_t* nn, uint32_t* ldn, size_t ndim) +static int _is_contiguous_ndim(size_t* nn, size_t* ldn, size_t ndim) { if (ndim == 0) return 1; unsigned i; - uint32_t ldi = 1; + size_t ldi = 1; for (i = 0; ildn; - uint32_t* nn = ndim_interface->nn; + size_t* ldn = ndim_interface->ldn; + size_t* nn = ndim_interface->nn; size_t ndim = ndim_interface->ndim; size_t elemsize = ndim_interface->elemsize; @@ -390,11 +386,10 @@ static int peek_ndim_handle(starpu_data_handle_t handle, unsigned node, void *pt { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); - struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) - starpu_data_get_interface_on_node(handle, node); + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); - uint32_t* ldn = ndim_interface->ldn; - uint32_t* nn = ndim_interface->nn; + size_t* ldn = ndim_interface->ldn; + size_t* nn = ndim_interface->nn; size_t ndim = ndim_interface->ndim; size_t elemsize = ndim_interface->elemsize; @@ -430,10 +425,9 @@ static size_t ndim_interface_get_size(starpu_data_handle_t handle) } /* offer an access to the data parameters */ -uint32_t* starpu_ndim_get_nn(starpu_data_handle_t handle) +size_t* starpu_ndim_get_nn(starpu_data_handle_t handle) { - struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); @@ -442,10 +436,9 @@ uint32_t* starpu_ndim_get_nn(starpu_data_handle_t handle) return ndim_interface->nn; } -uint32_t starpu_ndim_get_ni(starpu_data_handle_t handle, size_t i) +size_t starpu_ndim_get_ni(starpu_data_handle_t handle, size_t i) { - struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); STARPU_ASSERT_MSG(ndim_interface->ndim > 0, "The function can only be called when array dimension is greater than 0."); @@ -456,15 +449,14 @@ uint32_t starpu_ndim_get_ni(starpu_data_handle_t handle, size_t i) return ndim_interface->nn[i]; } -uint32_t* starpu_ndim_get_local_ldn(starpu_data_handle_t handle) +size_t* starpu_ndim_get_local_ldn(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); - struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) - starpu_data_get_interface_on_node(handle, node); + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); @@ -473,15 +465,14 @@ uint32_t* starpu_ndim_get_local_ldn(starpu_data_handle_t handle) return ndim_interface->ldn; } -uint32_t starpu_ndim_get_local_ldi(starpu_data_handle_t handle, size_t i) +size_t starpu_ndim_get_local_ldi(starpu_data_handle_t handle, size_t i) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); - struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) - starpu_data_get_interface_on_node(handle, node); + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT_MSG(ndim_interface->ndim > 0, "The function can only be called when array dimension is greater than 0."); @@ -499,8 +490,7 @@ uintptr_t starpu_ndim_get_local_ptr(starpu_data_handle_t handle) STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); - struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) - starpu_data_get_interface_on_node(handle, node); + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); @@ -511,8 +501,7 @@ uintptr_t starpu_ndim_get_local_ptr(starpu_data_handle_t handle) size_t starpu_ndim_get_ndim(starpu_data_handle_t handle) { - struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); @@ -523,8 +512,7 @@ size_t starpu_ndim_get_ndim(starpu_data_handle_t handle) size_t starpu_ndim_get_elemsize(starpu_data_handle_t handle) { - struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); @@ -539,11 +527,11 @@ size_t starpu_ndim_get_elemsize(starpu_data_handle_t handle) static void set_trivial_ndim_ld(struct starpu_ndim_interface *dst_ndarr) { size_t ndim = dst_ndarr->ndim; - uint32_t* nn = dst_ndarr->nn; + size_t* nn = dst_ndarr->nn; if (ndim > 0) { - uint32_t ntmp = 1; + size_t ntmp = 1; dst_ndarr->ldn[0] = 1; size_t i; for (i=1; inn; + size_t* nn = dst_ndarr->nn; size_t ndim = dst_ndarr->ndim; size_t elemsize = dst_ndarr->elemsize; - uint32_t* ldn_src = src_ndarr->ldn; - uint32_t* ldn_dst = dst_ndarr->ldn; + size_t* ldn_src = src_ndarr->ldn; + size_t* ldn_dst = dst_ndarr->ldn; if (starpu_interface_copynd(src_ndarr->dev_handle, src_ndarr->offset, src_node, dst_ndarr->dev_handle, dst_ndarr->offset, dst_node, diff --git a/src/datawizard/interfaces/tensor_filters.c b/src/datawizard/interfaces/tensor_filters.c index ab5bcc7fbb..d91617e89d 100644 --- a/src/datawizard/interfaces/tensor_filters.c +++ b/src/datawizard/interfaces/tensor_filters.c @@ -26,11 +26,11 @@ static void _starpu_tensor_filter_block(int dim, void *parent_interface, void *c unsigned blocksize; /* the element will be split, in case horizontal, it's nx, in case vertical, it's ny, in case depth, it's nz, in case time, it's nt*/ - uint32_t nn; - uint32_t nx; - uint32_t ny; - uint32_t nz; - uint32_t nt; + size_t nn; + size_t nx; + size_t ny; + size_t nz; + size_t nt; switch(dim) { @@ -79,9 +79,9 @@ static void _starpu_tensor_filter_block(int dim, void *parent_interface, void *c size_t elemsize = tensor_parent->elemsize; - STARPU_ASSERT_MSG(nparts <= nn, "cannot split %u elements in %u parts", nn, nparts); + STARPU_ASSERT_MSG(nparts <= nn, "cannot split %zu elements in %u parts", nn, nparts); - uint32_t child_nn; + size_t child_nn; size_t offset; starpu_filter_nparts_compute_chunk_size_and_offset(nn, nparts, elemsize, id, blocksize, &child_nn, &offset); @@ -197,11 +197,11 @@ static void _starpu_tensor_filter_pick_block(int dim, void *parent_interface, vo struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface; unsigned blocksize; - uint32_t nn; - uint32_t nx = tensor_parent->nx; - uint32_t ny = tensor_parent->ny; - uint32_t nz = tensor_parent->nz; - uint32_t nt = tensor_parent->nt; + size_t nn; + size_t nx = tensor_parent->nx; + size_t ny = tensor_parent->ny; + size_t nz = tensor_parent->nz; + size_t nt = tensor_parent->nt; switch(dim) { @@ -319,14 +319,14 @@ void starpu_tensor_filter_pick_variable(void *parent_interface, void *child_inte /* each chunk becomes a variable */ struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; - uint32_t nx = tensor_parent->nx; - uint32_t ny = tensor_parent->ny; - uint32_t nz = tensor_parent->nz; - uint32_t nt = tensor_parent->nt; + size_t nx = tensor_parent->nx; + size_t ny = tensor_parent->ny; + size_t nz = tensor_parent->nz; + size_t nt = tensor_parent->nt; - unsigned ldy = tensor_parent->ldy; - unsigned ldz = tensor_parent->ldz; - unsigned ldt = tensor_parent->ldt; + size_t ldy = tensor_parent->ldy; + size_t ldz = tensor_parent->ldz; + size_t ldt = tensor_parent->ldt; size_t elemsize = tensor_parent->elemsize; diff --git a/src/datawizard/interfaces/tensor_interface.c b/src/datawizard/interfaces/tensor_interface.c index 24b499f642..24c343c039 100644 --- a/src/datawizard/interfaces/tensor_interface.c +++ b/src/datawizard/interfaces/tensor_interface.c @@ -117,12 +117,12 @@ static void register_tensor_handle(starpu_data_handle_t handle, int home_node, v /* declare a new data with the BLAS interface */ void starpu_tensor_data_register(starpu_data_handle_t *handleptr, int home_node, - uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, - uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize) + uintptr_t ptr, size_t ldy, size_t ldz, size_t ldt, size_t nx, + size_t ny, size_t nz, size_t nt, size_t elemsize) { - STARPU_ASSERT_MSG(ldy >= nx, "ldy = %u should not be less than nx = %u.", ldy, nx); - STARPU_ASSERT_MSG(ldz/ldy >= ny, "ldz/ldy = %u/%u = %u should not be less than ny = %u.", ldz, ldy, ldz/ldy, ny); - STARPU_ASSERT_MSG(ldt/ldz >= nz, "ldt/ldz = %u/%u = %u should not be less than nz = %u.", ldt, ldz, ldt/ldz, nz); + STARPU_ASSERT_MSG(ldy >= nx, "ldy = %zu should not be less than nx = %zu.", ldy, nx); + STARPU_ASSERT_MSG(ldz/ldy >= ny, "ldz/ldy = %zu/%zu = %zu should not be less than ny = %zu.", ldz, ldy, ldz/ldy, ny); + STARPU_ASSERT_MSG(ldt/ldz >= nz, "ldt/ldz = %zu/%zu = %zu should not be less than nz = %zu.", ldt, ldz, ldt/ldz, nz); struct starpu_tensor_interface tensor_interface = { .id = STARPU_TENSOR_INTERFACE_ID, @@ -153,7 +153,7 @@ void starpu_tensor_data_register(starpu_data_handle_t *handleptr, int home_node, } void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, - uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt) + uintptr_t ptr, uintptr_t dev_handle, size_t offset, size_t ldy, size_t ldz, size_t ldt) { struct starpu_tensor_interface *tensor_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); @@ -196,7 +196,7 @@ static void display_tensor_interface(starpu_data_handle_t handle, FILE *f) tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); - fprintf(f, "%u\t%u\t%u\t%u\t", tensor_interface->nx, tensor_interface->ny, tensor_interface->nz, tensor_interface->nt); + fprintf(f, "%zu\t%zu\t%zu\t%zu\t", tensor_interface->nx, tensor_interface->ny, tensor_interface->nz, tensor_interface->nt); } #define IS_CONTIGUOUS_MATRIX(nx, ny, ldy) ((nx) == (ldy)) @@ -210,20 +210,20 @@ static int pack_tensor_handle(starpu_data_handle_t handle, unsigned node, void * struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, node); - uint32_t ldy = tensor_interface->ldy; - uint32_t ldz = tensor_interface->ldz; - uint32_t ldt = tensor_interface->ldt; - uint32_t nx = tensor_interface->nx; - uint32_t ny = tensor_interface->ny; - uint32_t nz = tensor_interface->nz; - uint32_t nt = tensor_interface->nt; + size_t ldy = tensor_interface->ldy; + size_t ldz = tensor_interface->ldz; + size_t ldt = tensor_interface->ldt; + size_t nx = tensor_interface->nx; + size_t ny = tensor_interface->ny; + size_t nz = tensor_interface->nz; + size_t nt = tensor_interface->nt; size_t elemsize = tensor_interface->elemsize; *count = nx*ny*nz*nt*elemsize; if (ptr != NULL) { - uint32_t t, z, y; + size_t t, z, y; char *block = (void *)tensor_interface->ptr; *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); @@ -279,18 +279,18 @@ static int peek_tensor_handle(starpu_data_handle_t handle, unsigned node, void * struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, node); - uint32_t ldy = tensor_interface->ldy; - uint32_t ldz = tensor_interface->ldz; - uint32_t ldt = tensor_interface->ldt; - uint32_t nx = tensor_interface->nx; - uint32_t ny = tensor_interface->ny; - uint32_t nz = tensor_interface->nz; - uint32_t nt = tensor_interface->nt; + size_t ldy = tensor_interface->ldy; + size_t ldz = tensor_interface->ldz; + size_t ldt = tensor_interface->ldt; + size_t nx = tensor_interface->nx; + size_t ny = tensor_interface->ny; + size_t nz = tensor_interface->nz; + size_t nt = tensor_interface->nt; size_t elemsize = tensor_interface->elemsize; STARPU_ASSERT(count == elemsize * nx * ny * nz * nt); - uint32_t t, z, y; + size_t t, z, y; char *cur = ptr; char *block = (void *)tensor_interface->ptr; @@ -362,7 +362,7 @@ static size_t tensor_interface_get_size(starpu_data_handle_t handle) } /* offer an access to the data parameters */ -uint32_t starpu_tensor_get_nx(starpu_data_handle_t handle) +size_t starpu_tensor_get_nx(starpu_data_handle_t handle) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); @@ -374,7 +374,7 @@ uint32_t starpu_tensor_get_nx(starpu_data_handle_t handle) return tensor_interface->nx; } -uint32_t starpu_tensor_get_ny(starpu_data_handle_t handle) +size_t starpu_tensor_get_ny(starpu_data_handle_t handle) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); @@ -386,7 +386,7 @@ uint32_t starpu_tensor_get_ny(starpu_data_handle_t handle) return tensor_interface->ny; } -uint32_t starpu_tensor_get_nz(starpu_data_handle_t handle) +size_t starpu_tensor_get_nz(starpu_data_handle_t handle) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); @@ -398,7 +398,7 @@ uint32_t starpu_tensor_get_nz(starpu_data_handle_t handle) return tensor_interface->nz; } -uint32_t starpu_tensor_get_nt(starpu_data_handle_t handle) +size_t starpu_tensor_get_nt(starpu_data_handle_t handle) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); @@ -410,7 +410,7 @@ uint32_t starpu_tensor_get_nt(starpu_data_handle_t handle) return tensor_interface->nt; } -uint32_t starpu_tensor_get_local_ldy(starpu_data_handle_t handle) +size_t starpu_tensor_get_local_ldy(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); @@ -427,7 +427,7 @@ uint32_t starpu_tensor_get_local_ldy(starpu_data_handle_t handle) return tensor_interface->ldy; } -uint32_t starpu_tensor_get_local_ldz(starpu_data_handle_t handle) +size_t starpu_tensor_get_local_ldz(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); @@ -444,7 +444,7 @@ uint32_t starpu_tensor_get_local_ldz(starpu_data_handle_t handle) return tensor_interface->ldz; } -uint32_t starpu_tensor_get_local_ldt(starpu_data_handle_t handle) +size_t starpu_tensor_get_local_ldt(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); @@ -500,10 +500,10 @@ static starpu_ssize_t allocate_tensor_buffer_on_node(void *data_interface_, unsi struct starpu_tensor_interface *dst_block = (struct starpu_tensor_interface *) data_interface_; - uint32_t nx = dst_block->nx; - uint32_t ny = dst_block->ny; - uint32_t nz = dst_block->nz; - uint32_t nt = dst_block->nt; + size_t nx = dst_block->nx; + size_t ny = dst_block->ny; + size_t nz = dst_block->nz; + size_t nt = dst_block->nt; size_t elemsize = dst_block->elemsize; starpu_ssize_t allocated_memory; @@ -532,10 +532,10 @@ static starpu_ssize_t allocate_tensor_buffer_on_node(void *data_interface_, unsi static void free_tensor_buffer_on_node(void *data_interface, unsigned node) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) data_interface; - uint32_t nx = tensor_interface->nx; - uint32_t ny = tensor_interface->ny; - uint32_t nz = tensor_interface->nz; - uint32_t nt = tensor_interface->nt; + size_t nx = tensor_interface->nx; + size_t ny = tensor_interface->ny; + size_t nz = tensor_interface->nz; + size_t nt = tensor_interface->nt; size_t elemsize = tensor_interface->elemsize; starpu_free_on_node(node, tensor_interface->dev_handle, nx*ny*nz*nt*elemsize); @@ -594,18 +594,18 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int struct starpu_tensor_interface *dst_block = (struct starpu_tensor_interface *) dst_interface; int ret = 0; - uint32_t nx = dst_block->nx; - uint32_t ny = dst_block->ny; - uint32_t nz = dst_block->nz; - uint32_t nt = dst_block->nt; + size_t nx = dst_block->nx; + size_t ny = dst_block->ny; + size_t nz = dst_block->nz; + size_t nt = dst_block->nt; size_t elemsize = dst_block->elemsize; - uint32_t ldy_src = src_block->ldy; - uint32_t ldz_src = src_block->ldz; - uint32_t ldt_src = src_block->ldt; - uint32_t ldy_dst = dst_block->ldy; - uint32_t ldz_dst = dst_block->ldz; - uint32_t ldt_dst = dst_block->ldt; + size_t ldy_src = src_block->ldy; + size_t ldz_src = src_block->ldz; + size_t ldt_src = src_block->ldt; + size_t ldy_dst = dst_block->ldy; + size_t ldz_dst = dst_block->ldz; + size_t ldt_dst = dst_block->ldt; if (starpu_interface_copy4d(src_block->dev_handle, src_block->offset, src_node, dst_block->dev_handle, dst_block->offset, dst_node, @@ -624,10 +624,10 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_tensor_interface *block = (struct starpu_tensor_interface *) data_interface; - return snprintf(buf, size, "T%ux%ux%ux%ux%u", - (unsigned) block->nx, - (unsigned) block->ny, - (unsigned) block->nz, - (unsigned) block->nt, - (unsigned) block->elemsize); + return snprintf(buf, size, "T%zux%zux%zux%zux%zu", + block->nx, + block->ny, + block->nz, + block->nt, + block->elemsize); } diff --git a/src/datawizard/interfaces/vector_filters.c b/src/datawizard/interfaces/vector_filters.c index ec6edc36f1..6c9cbb6009 100644 --- a/src/datawizard/interfaces/vector_filters.c +++ b/src/datawizard/interfaces/vector_filters.c @@ -25,12 +25,12 @@ static void _starpu_vector_filter_block(void *parent_interface, void *child_inte struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; /* actual number of elements */ - uint32_t nx = vector_parent->nx - 2 * shadow_size; + size_t nx = vector_parent->nx - 2 * shadow_size; size_t elemsize = vector_parent->elemsize; - STARPU_ASSERT_MSG(nchunks <= nx, "cannot split %u elements in %u parts", nx, nchunks); + STARPU_ASSERT_MSG(nchunks <= nx, "cannot split %zu elements in %u parts", nx, nchunks); - uint32_t child_nx; + size_t child_nx; size_t offset; starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset); child_nx += 2*shadow_size; @@ -72,12 +72,12 @@ void starpu_vector_filter_divide_in_2(void *parent_interface, void *child_interf struct starpu_vector_interface *vector_parent = (struct starpu_vector_interface *) parent_interface; struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; - uint32_t length_first = f->filter_arg; + size_t length_first = f->filter_arg; - uint32_t nx = vector_parent->nx; + size_t nx = vector_parent->nx; size_t elemsize = vector_parent->elemsize; - STARPU_ASSERT_MSG(length_first < nx, "First part is too long: %u vs %u", length_first, nx); + STARPU_ASSERT_MSG(length_first < nx, "First part is too long: %zu vs %zu", length_first, nx); STARPU_ASSERT_MSG(vector_parent->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); vector_child->id = vector_parent->id; @@ -185,7 +185,7 @@ void starpu_vector_filter_pick_variable(void *parent_interface, void *child_inte struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; /* actual number of elements */ - uint32_t nx = vector_parent->nx; + size_t nx = vector_parent->nx; size_t elemsize = vector_parent->elemsize; size_t chunk_pos = (size_t)f->filter_arg_ptr; diff --git a/src/datawizard/interfaces/vector_interface.c b/src/datawizard/interfaces/vector_interface.c index ae84e5a1bc..2754a0aca5 100644 --- a/src/datawizard/interfaces/vector_interface.c +++ b/src/datawizard/interfaces/vector_interface.c @@ -101,8 +101,7 @@ static void register_vector_handle(starpu_data_handle_t handle, int home_node, v int node; for (node = 0; node < STARPU_MAXNODES; node++) { - struct starpu_vector_interface *local_interface = (struct starpu_vector_interface *) - starpu_data_get_interface_on_node(handle, node); + struct starpu_vector_interface *local_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { @@ -126,8 +125,7 @@ static void register_vector_handle(starpu_data_handle_t handle, int home_node, v } /* declare a new data with the vector interface */ -void starpu_vector_data_register_allocsize(starpu_data_handle_t *handleptr, int home_node, - uintptr_t ptr, uint32_t nx, size_t elemsize, size_t allocsize) +void starpu_vector_data_register_allocsize(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, size_t nx, size_t elemsize, size_t allocsize) { struct starpu_vector_interface vector = { @@ -154,8 +152,7 @@ void starpu_vector_data_register_allocsize(starpu_data_handle_t *handleptr, int starpu_data_register(handleptr, home_node, &vector, &starpu_interface_vector_ops); } -void starpu_vector_data_register(starpu_data_handle_t *handleptr, int home_node, - uintptr_t ptr, uint32_t nx, size_t elemsize) +void starpu_vector_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, size_t nx, size_t elemsize) { starpu_vector_data_register_allocsize(handleptr, home_node, ptr, nx, elemsize, nx * elemsize); } @@ -202,18 +199,16 @@ static int vector_alloc_compare(void *data_interface_a, void *data_interface_b) static void display_vector_interface(starpu_data_handle_t handle, FILE *f) { - struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); - fprintf(f, "%u\t", vector_interface->nx); + fprintf(f, "%zu\t", vector_interface->nx); } static int pack_vector_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); - struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) - starpu_data_get_interface_on_node(handle, node); + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, node); *count = vector_interface->nx*vector_interface->elemsize; @@ -230,8 +225,7 @@ static int peek_vector_handle(starpu_data_handle_t handle, unsigned node, void * { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); - struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) - starpu_data_get_interface_on_node(handle, node); + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == vector_interface->elemsize * vector_interface->nx); memcpy((void*)vector_interface->ptr, ptr, count); @@ -250,8 +244,7 @@ static int unpack_vector_handle(starpu_data_handle_t handle, unsigned node, void static size_t vector_interface_get_size(starpu_data_handle_t handle) { size_t size; - struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); @@ -265,8 +258,7 @@ static size_t vector_interface_get_size(starpu_data_handle_t handle) static size_t vector_interface_get_alloc_size(starpu_data_handle_t handle) { size_t size; - struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); @@ -279,10 +271,9 @@ static size_t vector_interface_get_alloc_size(starpu_data_handle_t handle) } /* offer an access to the data parameters */ -uint32_t starpu_vector_get_nx(starpu_data_handle_t handle) +size_t starpu_vector_get_nx(starpu_data_handle_t handle) { - struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); @@ -293,13 +284,11 @@ uint32_t starpu_vector_get_nx(starpu_data_handle_t handle) uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle) { - unsigned node; - node = starpu_worker_get_local_memory_node(); + unsigned node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); - struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) - starpu_data_get_interface_on_node(handle, node); + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); @@ -310,8 +299,7 @@ uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle) size_t starpu_vector_get_elemsize(starpu_data_handle_t handle) { - struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); @@ -322,8 +310,7 @@ size_t starpu_vector_get_elemsize(starpu_data_handle_t handle) size_t starpu_vector_get_allocsize(starpu_data_handle_t handle) { - struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) - starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); + struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); @@ -448,7 +435,5 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_vector_interface *vector = (struct starpu_vector_interface *) data_interface; - return snprintf(buf, size, "V%ux%u", - (unsigned) vector->nx, - (unsigned) vector->elemsize); + return snprintf(buf, size, "V%zux%zu", vector->nx, vector->elemsize); } diff --git a/starpujni/src/main/native/starpujni-jobject-vector.c b/starpujni/src/main/native/starpujni-jobject-vector.c index d0882c46d5..2df882d561 100644 --- a/starpujni/src/main/native/starpujni-jobject-vector.c +++ b/starpujni/src/main/native/starpujni-jobject-vector.c @@ -102,7 +102,7 @@ static unsigned s_memory_node(void) return result; } -void starpujni_jobject_vector_data_register(starpu_data_handle_t *handleptr, int home_node, uint32_t size) +void starpujni_jobject_vector_data_register(starpu_data_handle_t *handleptr, int home_node, size_t size) { struct jobject_vector_interface vector = { @@ -114,7 +114,7 @@ void starpujni_jobject_vector_data_register(starpu_data_handle_t *handleptr, int starpu_data_register(handleptr, home_node, &vector, &JOBJECT_VECTOR_INTERFACE_OPS); } -uint32_t starpujni_jobject_vector_get_nx(starpu_data_handle_t handle) +size_t starpujni_jobject_vector_get_nx(starpu_data_handle_t handle) { struct jobject_vector_interface *vector = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return vector->nx; @@ -206,13 +206,13 @@ static int s_jobject_vector_compare(void *data_interface_a, void *data_interface static void s_jobject_vector_display(starpu_data_handle_t handle, FILE *f) { struct jobject_vector_interface *vector = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); - fprintf(f, "%u\t", vector->nx); + fprintf(f, "%zu\t", vector->nx); } static starpu_ssize_t s_jobject_vector_describe(void *data_interface, char *buf, size_t size) { struct jobject_vector_interface *vector = data_interface; - return snprintf(buf, size, "JV%u",(unsigned) vector->nx); + return snprintf(buf, size, "JV%zu",(unsigned) vector->nx); } static int s_jobject_vector_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) @@ -378,13 +378,13 @@ static void s_compute_chunk_size_and_offset(unsigned n, unsigned nparts, size_t static void s_jobject_vector_filter_block(void *parent_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { - uint32_t child_nx; + size_t child_nx; size_t offset; struct jobject_vector_interface *vector_parent = parent_interface; struct jobject_vector_interface *vector_child = child_interface; - uint32_t nx = vector_parent->nx; + size_t nx = vector_parent->nx; - STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); + STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %zu elements", nchunks, nx); s_compute_chunk_size_and_offset(nx, nchunks, sizeof(jobject), id, 1, &child_nx, &offset); diff --git a/starpujni/src/main/native/starpujni-jobject-vector.h b/starpujni/src/main/native/starpujni-jobject-vector.h index a172d5d9fd..a6e0332f57 100644 --- a/starpujni/src/main/native/starpujni-jobject-vector.h +++ b/starpujni/src/main/native/starpujni-jobject-vector.h @@ -22,7 +22,7 @@ struct jobject_vector_interface { int id; - uint32_t nx; + size_t nx; uintptr_t ptr; }; @@ -30,9 +30,9 @@ struct jobject_vector_interface #define STARPUJNI_JOBJECT_VECTOR_GET_LOCAL_PTR(_i_) ((struct jobject_vector_interface *)PTR_GET_ADDR(_i_))->ptr -EXTERN void starpujni_jobject_vector_data_register(starpu_data_handle_t *handleptr, int home_node, uint32_t size); +EXTERN void starpujni_jobject_vector_data_register(starpu_data_handle_t *handleptr, int home_node, size_t size); -EXTERN uint32_t starpujni_jobject_vector_get_nx(starpu_data_handle_t handle); +EXTERN size_t starpujni_jobject_vector_get_nx(starpu_data_handle_t handle); EXTERN uintptr_t starpujni_jobject_vector_get_local_ptr(starpu_data_handle_t handle); diff --git a/starpupy/src/starpupy_numpy_filters.c b/starpupy/src/starpupy_numpy_filters.c index 26d252d724..922911821d 100644 --- a/starpupy/src/starpupy_numpy_filters.c +++ b/starpupy/src/starpupy_numpy_filters.c @@ -76,7 +76,7 @@ static void starpupy_numpy_filter(void *parent_interface, void *child_interface, } /*we will do the partition on ni*/ - unsigned child_nn; + size_t child_nn; unsigned* chunks_list = (unsigned*) f->filter_arg_ptr; if (chunks_list != NULL) diff --git a/starpurm/examples/async_spawn.c b/starpurm/examples/async_spawn.c index 09e169694b..324473aa94 100644 --- a/starpurm/examples/async_spawn.c +++ b/starpurm/examples/async_spawn.c @@ -73,9 +73,9 @@ static void spawn_callback(void *_arg) static void vector_scale_func(void *cl_buffers[], void *cl_arg) { double scalar = -1.0; - int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(cl_buffers[0]); double *vector = (double *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); - int i; + size_t i; starpu_codelet_unpack_args(cl_arg, &scalar); int workerid = starpu_worker_get_id(); @@ -84,7 +84,7 @@ static void vector_scale_func(void *cl_buffers[], void *cl_arg) int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); - printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s\n", workerid, vector, n, scalar, str1); + printf("worker[%03d] - task: vector=%p, n=%zu, scalar=%lf, worker cpuset = %s\n", workerid, vector, n, scalar, str1); } hwloc_bitmap_free(worker_cpuset); diff --git a/starpurm/examples/cuda_vector_scale/vector_scale.c b/starpurm/examples/cuda_vector_scale/vector_scale.c index 19b649038d..dfc3b42ffe 100644 --- a/starpurm/examples/cuda_vector_scale/vector_scale.c +++ b/starpurm/examples/cuda_vector_scale/vector_scale.c @@ -36,7 +36,7 @@ static void init_rm_infos(void); static void vector_scale_func(void *cl_buffers[], void *cl_arg) { float scalar = -1.0; - int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(cl_buffers[0]); float *vector = (float *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); int i; starpu_codelet_unpack_args(cl_arg, &scalar); diff --git a/starpurm/examples/cuda_vector_scale/vs_cuda_kernel.cu b/starpurm/examples/cuda_vector_scale/vs_cuda_kernel.cu index ddd57a3b2b..43aedf4ff7 100644 --- a/starpurm/examples/cuda_vector_scale/vs_cuda_kernel.cu +++ b/starpurm/examples/cuda_vector_scale/vs_cuda_kernel.cu @@ -31,7 +31,7 @@ static __global__ void vector_scale_cuda_kernel(float *vector, unsigned n, float extern "C" void vector_scale_cuda_func(void *cl_buffers[], void *cl_arg) { float scalar = -1.0; - unsigned n = STARPU_VECTOR_GET_NX(cl_buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(cl_buffers[0]); float *vector = (float *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); starpu_codelet_unpack_args(cl_arg, &scalar); diff --git a/starpurm/examples/spawn.c b/starpurm/examples/spawn.c index 61cc889db5..ec1710b001 100644 --- a/starpurm/examples/spawn.c +++ b/starpurm/examples/spawn.c @@ -35,9 +35,9 @@ static void init_rm_infos(void); static void vector_scale_func(void *cl_buffers[], void *cl_arg) { double scalar = -1.0; - int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(cl_buffers[0]); double *vector = (double *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); - int i; + size_t i; starpu_codelet_unpack_args(cl_arg, &scalar); int workerid = starpu_worker_get_id(); @@ -46,7 +46,7 @@ static void vector_scale_func(void *cl_buffers[], void *cl_arg) int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); - printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s\n", workerid, vector, n, scalar, str1); + printf("worker[%03d] - task: vector=%p, n=%zu, scalar=%lf, worker cpuset = %s\n", workerid, vector, n, scalar, str1); } hwloc_bitmap_free(worker_cpuset); diff --git a/starpurm/examples/vector_scale.c b/starpurm/examples/vector_scale.c index 65c60824ef..1e259d8929 100644 --- a/starpurm/examples/vector_scale.c +++ b/starpurm/examples/vector_scale.c @@ -34,9 +34,9 @@ static void init_rm_infos(void); static void vector_scale_func(void *cl_buffers[], void *cl_arg) { double scalar = -1.0; - int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(cl_buffers[0]); double *vector = (double *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); - int i; + size_t i; starpu_codelet_unpack_args(cl_arg, &scalar); int workerid = starpu_worker_get_id(); @@ -49,7 +49,7 @@ static void vector_scale_func(void *cl_buffers[], void *cl_arg) int strl2 = hwloc_bitmap_snprintf(NULL, 0, check_cpuset); char str2[strl2+1]; hwloc_bitmap_snprintf(str2, strl2+1, check_cpuset); - printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s, selected cpuset = %s\n", workerid, vector, n, scalar, str1, str2); + printf("worker[%03d] - task: vector=%p, n=%zu, scalar=%lf, worker cpuset = %s, selected cpuset = %s\n", workerid, vector, n, scalar, str1, str2); } hwloc_bitmap_and(check_cpuset, check_cpuset, worker_cpuset); assert(!hwloc_bitmap_iszero(check_cpuset)); diff --git a/tests/datawizard/acquire_cb_insert.c b/tests/datawizard/acquire_cb_insert.c index f38bd28187..47f0b63a0d 100644 --- a/tests/datawizard/acquire_cb_insert.c +++ b/tests/datawizard/acquire_cb_insert.c @@ -47,7 +47,7 @@ struct starpu_codelet which_index = void work_cpu(void *descr[], void *_args) { - int i, n = STARPU_VECTOR_GET_NX(descr[0]); + size_t i, n = STARPU_VECTOR_GET_NX(descr[0]); float *x0 = (float *)STARPU_VECTOR_GET_PTR(descr[0]); (void)_args; diff --git a/tests/datawizard/deps.c b/tests/datawizard/deps.c index 7cd3d1ffd7..f2f32642d9 100644 --- a/tests/datawizard/deps.c +++ b/tests/datawizard/deps.c @@ -29,8 +29,8 @@ void null_cpu_func(void *buffers[], void *arg) void prod_cpu_func(void *buffers[], void *arg) { int *data = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); - int n = STARPU_VECTOR_GET_NX(buffers[0]); - int i; + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t i; int factor; starpu_codelet_unpack_args(arg, &factor); diff --git a/tests/datawizard/handle_to_pointer.c b/tests/datawizard/handle_to_pointer.c index 9e1bc853cb..b7fd1ff410 100644 --- a/tests/datawizard/handle_to_pointer.c +++ b/tests/datawizard/handle_to_pointer.c @@ -67,9 +67,9 @@ static void opencl_task(void *buffers[], void *args) starpu_opencl_get_queue(devid, &queue); cl_mem numbers = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); - unsigned size = STARPU_VECTOR_GET_NX(buffers[0]); + cl_ulong size = STARPU_VECTOR_GET_NX(buffers[0]); - unsigned i; + cl_ulong i; for (i = 0; i < size; i++) { cl_int err; diff --git a/tests/datawizard/interfaces/block/block_cuda.cu b/tests/datawizard/interfaces/block/block_cuda.cu index 6b936f0353..cfe53fb99f 100644 --- a/tests/datawizard/interfaces/block/block_cuda.cu +++ b/tests/datawizard/interfaces/block/block_cuda.cu @@ -19,11 +19,11 @@ extern struct test_config block_config; static __global__ void block_cuda(int *block, - int nx, int ny, int nz, - unsigned ldy, unsigned ldz, + size_t nx, size_t ny, size_t nz, + size_t ldy, size_t ldz, float factor, int *err) { - int i, j, k; + size_t i, j, k; int val = 0; for (k = 0; k < nz ;k++) @@ -60,11 +60,11 @@ extern "C" void test_block_cuda_func(void *buffers[], void *args) if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); - int nx = STARPU_BLOCK_GET_NX(buffers[0]); - int ny = STARPU_BLOCK_GET_NY(buffers[0]); - int nz = STARPU_BLOCK_GET_NZ(buffers[0]); - unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); - unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + size_t nx = STARPU_BLOCK_GET_NX(buffers[0]); + size_t ny = STARPU_BLOCK_GET_NY(buffers[0]); + size_t nz = STARPU_BLOCK_GET_NZ(buffers[0]); + size_t ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + size_t ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); int *block = (int *) STARPU_BLOCK_GET_PTR(buffers[0]); int factor = *(int*) args; diff --git a/tests/datawizard/interfaces/block/block_interface.c b/tests/datawizard/interfaces/block/block_interface.c index a39857fca5..182aa4bd1f 100644 --- a/tests/datawizard/interfaces/block/block_interface.c +++ b/tests/datawizard/interfaces/block/block_interface.c @@ -100,13 +100,13 @@ void test_block_cpu_func(void *buffers[], void *args) STARPU_SKIP_IF_VALGRIND; int factor = *(int*)args; - int nx = STARPU_BLOCK_GET_NX(buffers[0]); - int ny = STARPU_BLOCK_GET_NY(buffers[0]); - int nz = STARPU_BLOCK_GET_NZ(buffers[0]); - unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); - unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + size_t nx = STARPU_BLOCK_GET_NX(buffers[0]); + size_t ny = STARPU_BLOCK_GET_NY(buffers[0]); + size_t nz = STARPU_BLOCK_GET_NZ(buffers[0]); + size_t ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + size_t ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); int *block = (int *) STARPU_BLOCK_GET_PTR(buffers[0]); - int i, j, k; + size_t i, j, k; int val = 0; block_config.copy_failed = SUCCESS; for (k = 0; k < nz; k++) diff --git a/tests/datawizard/interfaces/block/block_opencl.c b/tests/datawizard/interfaces/block/block_opencl.c index d0371819fd..dcfbf36b0d 100644 --- a/tests/datawizard/interfaces/block/block_opencl.c +++ b/tests/datawizard/interfaces/block/block_opencl.c @@ -37,11 +37,11 @@ test_block_opencl_func(void *buffers[], void *args) ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); - int nx = STARPU_BLOCK_GET_NX(buffers[0]); - int ny = STARPU_BLOCK_GET_NY(buffers[0]); - int nz = STARPU_BLOCK_GET_NZ(buffers[0]); - unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); - unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); + cl_ulong nx = STARPU_BLOCK_GET_NX(buffers[0]); + cl_ulong ny = STARPU_BLOCK_GET_NY(buffers[0]); + cl_ulong nz = STARPU_BLOCK_GET_NZ(buffers[0]); + cl_ulong ldy = STARPU_BLOCK_GET_LDY(buffers[0]); + cl_ulong ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); cl_mem block = (cl_mem) STARPU_BLOCK_GET_DEV_HANDLE(buffers[0]); cl_context context; @@ -49,8 +49,7 @@ test_block_opencl_func(void *buffers[], void *args) devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); - cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(int), &block_config.copy_failed, &err); + cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &block_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); diff --git a/tests/datawizard/interfaces/block/block_opencl_kernel.cl b/tests/datawizard/interfaces/block/block_opencl_kernel.cl index 7752db9408..20e335292d 100644 --- a/tests/datawizard/interfaces/block/block_opencl_kernel.cl +++ b/tests/datawizard/interfaces/block/block_opencl_kernel.cl @@ -14,8 +14,7 @@ * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void block_opencl(__global int *block, - int nx, int ny, int nz, - int ldy, int ldz, + ulong nx, ulong ny, ulong nz, ulong ldy, ulong ldz, int factor, __global int *err) { const int idx = get_global_id(0); diff --git a/tests/datawizard/interfaces/copy_interfaces.c b/tests/datawizard/interfaces/copy_interfaces.c index 5633c6d9b4..ee43bb480f 100644 --- a/tests/datawizard/interfaces/copy_interfaces.c +++ b/tests/datawizard/interfaces/copy_interfaces.c @@ -133,8 +133,8 @@ int main(int argc, char **argv) int NX=3; int NY=2; int array2d[NX*NY]; - unsigned nn[2] = {NX, NY}; - unsigned ldn[2] = {1, NX}; + size_t nn[2] = {NX, NY}; + size_t ldn[2] = {1, NX}; starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)array2d, ldn, nn, 2, sizeof(int)); ret = check_copy(handle, "ndim"); } diff --git a/tests/datawizard/interfaces/matrix/matrix_cuda.cu b/tests/datawizard/interfaces/matrix/matrix_cuda.cu index 6a4b9e7299..d2fa04e132 100644 --- a/tests/datawizard/interfaces/matrix/matrix_cuda.cu +++ b/tests/datawizard/interfaces/matrix/matrix_cuda.cu @@ -18,7 +18,7 @@ extern struct test_config matrix_config; -__global__ void matrix_cuda(int *val, unsigned n, int *err, int factor) +__global__ void matrix_cuda(int *val, size_t n, int *err, int factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; @@ -37,7 +37,7 @@ extern "C" void test_matrix_cuda_func(void *buffers[], void *args) int *ret; int *val; cudaError_t error; - unsigned int nx, ny, n; + size_t nx, ny, n; nx = STARPU_MATRIX_GET_NX(buffers[0]); ny = STARPU_MATRIX_GET_NY(buffers[0]); @@ -52,9 +52,9 @@ extern "C" void test_matrix_cuda_func(void *buffers[], void *args) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(ret, - &matrix_config.copy_failed, - sizeof(int), - cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); + &matrix_config.copy_failed, + sizeof(int), + cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); @@ -63,9 +63,9 @@ extern "C" void test_matrix_cuda_func(void *buffers[], void *args) if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&matrix_config.copy_failed, - ret, - sizeof(int), - cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); + ret, + sizeof(int), + cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); diff --git a/tests/datawizard/interfaces/matrix/matrix_interface.c b/tests/datawizard/interfaces/matrix/matrix_interface.c index 9311861f27..5a81cff5ca 100644 --- a/tests/datawizard/interfaces/matrix/matrix_interface.c +++ b/tests/datawizard/interfaces/matrix/matrix_interface.c @@ -31,7 +31,6 @@ extern void test_matrix_cuda_func(void *buffers[], void *_args); extern void test_matrix_opencl_func(void *buffers[], void *args); #endif - static starpu_data_handle_t matrix_handle; static starpu_data_handle_t matrix2_handle; @@ -60,8 +59,8 @@ struct test_config matrix_config = static void register_data(void) { - int i; - int size = WIDTH * HEIGHT; + size_t i; + size_t size = WIDTH * HEIGHT; for (i = 0; i < size; i++) matrix[i] = i; @@ -95,17 +94,17 @@ test_matrix_cpu_func(void *buffers[], void *args) int *val; int factor; - int i; - int nx, ny; + size_t i; + size_t nx, ny; nx = STARPU_MATRIX_GET_NX(buffers[0]); ny = STARPU_MATRIX_GET_NY(buffers[0]); - val = (int *) STARPU_MATRIX_GET_PTR(buffers[0]); + val = (int*) STARPU_MATRIX_GET_PTR(buffers[0]); factor = *(int *) args; for (i = 0; i < nx*ny; i++) { - if (val[i] != i * factor) + if (val[i] != (int)i * factor) { matrix_config.copy_failed = FAILURE; return; diff --git a/tests/datawizard/interfaces/matrix/matrix_opencl.c b/tests/datawizard/interfaces/matrix/matrix_opencl.c index e6b8345471..464ece45d0 100644 --- a/tests/datawizard/interfaces/matrix/matrix_opencl.c +++ b/tests/datawizard/interfaces/matrix/matrix_opencl.c @@ -27,7 +27,7 @@ void test_matrix_opencl_func(void *buffers[], void *args) STARPU_SKIP_IF_VALGRIND; int id, devid, factor, ret; - unsigned int n; + cl_ulong n; cl_int err; cl_kernel kernel; @@ -58,19 +58,18 @@ void test_matrix_opencl_func(void *buffers[], void *args) if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); - fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(int), &matrix_config.copy_failed, &err); + fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &matrix_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); /* Setting args */ int nargs; nargs = starpu_opencl_set_kernel_args(&err, &kernel, - sizeof(val), &val, - sizeof(n), &n, - sizeof(fail), &fail, - sizeof(factor), &factor, - 0); + sizeof(val), &val, + sizeof(n), &n, + sizeof(fail), &fail, + sizeof(factor), &factor, + 0); if (nargs != 4) STARPU_OPENCL_REPORT_ERROR(err); { @@ -96,14 +95,14 @@ void test_matrix_opencl_func(void *buffers[], void *args) global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, - kernel, - 1, - NULL, - &global, - &local, - 0, - NULL, - &event); + kernel, + 1, + NULL, + &global, + &local, + 0, + NULL, + &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); diff --git a/tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl b/tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl index e5cf002647..b126c3134f 100644 --- a/tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl +++ b/tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl @@ -15,9 +15,9 @@ */ __kernel void matrix_opencl(__global int *val, - unsigned int nx, - __global int *err, - int factor) + ulong nx, + __global int *err, + int factor) { const int i = get_global_id(0); if (i >= nx) diff --git a/tests/datawizard/interfaces/ndim/ndim_cuda.cu b/tests/datawizard/interfaces/ndim/ndim_cuda.cu index 30eec8031a..899e3391b0 100644 --- a/tests/datawizard/interfaces/ndim/ndim_cuda.cu +++ b/tests/datawizard/interfaces/ndim/ndim_cuda.cu @@ -19,8 +19,8 @@ extern struct test_config arr4d_config; static __global__ void arr4d_cuda(int *arr4d, - int nx, int ny, int nz, int nt, - unsigned ldy, unsigned ldz, unsigned ldt, + size_t nx, size_t ny, size_t nz, size_t nt, + size_t ldy, size_t ldz, size_t ldt, int factor, int *err) { int i, j, k, l; @@ -63,20 +63,19 @@ extern "C" void test_arr4d_cuda_func(void *buffers[], void *args) if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); - int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); - unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); - int nx = nn[0]; - int ny = nn[1]; - int nz = nn[2]; - int nt = nn[3]; - unsigned ldy = ldn[1]; - unsigned ldz = ldn[2]; - unsigned ldt = ldn[3]; + size_t *nn = STARPU_NDIM_GET_NN(buffers[0]); + size_t *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + size_t nx = nn[0]; + size_t ny = nn[1]; + size_t nz = nn[2]; + size_t nt = nn[3]; + size_t ldy = ldn[1]; + size_t ldz = ldn[2]; + size_t ldt = ldn[3]; int *arr4d = (int *) STARPU_NDIM_GET_PTR(buffers[0]); int factor = *(int*) args; - arr4d_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>> - (arr4d, nx, ny, nz, nt, ldy, ldz, ldt, factor, ret); + arr4d_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>> (arr4d, nx, ny, nz, nt, ldy, ldz, ldt, factor, ret); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&arr4d_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); diff --git a/tests/datawizard/interfaces/ndim/ndim_interface.c b/tests/datawizard/interfaces/ndim/ndim_interface.c index 753d0e1b1f..7edaa82fe1 100644 --- a/tests/datawizard/interfaces/ndim/ndim_interface.c +++ b/tests/datawizard/interfaces/ndim/ndim_interface.c @@ -69,8 +69,8 @@ static void register_data(void) _arr4d[(l*NX*NY*NZ)+(k*NX*NY)+(j*NX)+i] = val++; /* Registering data */ - unsigned nn[4] = {NX, NY, NZ, NT}; - unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; + size_t nn[4] = {NX, NY, NZ, NT}; + size_t ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; starpu_ndim_data_register(&_arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)_arr4d, ldn, nn, 4, sizeof(_arr4d[0])); starpu_ndim_data_register(&_arr4d2_handle, STARPU_MAIN_RAM, (uintptr_t)_arr4d2, ldn, nn, 4, sizeof(_arr4d2[0])); @@ -87,17 +87,17 @@ void test_arr4d_cpu_func(void *buffers[], void *args) STARPU_SKIP_IF_VALGRIND; int factor = *(int*)args; - int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); - unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); - int nx = nn[0]; - int ny = nn[1]; - int nz = nn[2]; - int nt = nn[3]; - unsigned ldy = ldn[1]; - unsigned ldz = ldn[2]; - unsigned ldt = ldn[3]; + size_t *nn = STARPU_NDIM_GET_NN(buffers[0]); + size_t *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + size_t nx = nn[0]; + size_t ny = nn[1]; + size_t nz = nn[2]; + size_t nt = nn[3]; + size_t ldy = ldn[1]; + size_t ldz = ldn[2]; + size_t ldt = ldn[3]; int *arr4d = (int *) STARPU_NDIM_GET_PTR(buffers[0]); - int i, j, k, l; + size_t i, j, k, l; int val = 0; arr4d_config.copy_failed = SUCCESS; for (l = 0; l < nt; l++) @@ -108,7 +108,7 @@ void test_arr4d_cpu_func(void *buffers[], void *args) { for (i = 0; i < nx; i++) { - if (arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] != factor * val) + if (arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] != val * factor) { arr4d_config.copy_failed = FAILURE; return; diff --git a/tests/datawizard/interfaces/ndim/ndim_opencl.c b/tests/datawizard/interfaces/ndim/ndim_opencl.c index f94ce155b0..3b6edd5e1f 100644 --- a/tests/datawizard/interfaces/ndim/ndim_opencl.c +++ b/tests/datawizard/interfaces/ndim/ndim_opencl.c @@ -29,7 +29,7 @@ test_arr4d_opencl_func(void *buffers[], void *args) int id, devid, ret; int factor = *(int *) args; - cl_int err; + cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; @@ -37,15 +37,15 @@ test_arr4d_opencl_func(void *buffers[], void *args) ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); - int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); - unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); - int nx = nn[0]; - int ny = nn[1]; - int nz = nn[2]; - int nt = nn[3]; - unsigned ldy = ldn[1]; - unsigned ldz = ldn[2]; - unsigned ldt = ldn[3]; + cl_ulong *nn = STARPU_NDIM_GET_NN(buffers[0]); + cl_ulong *ldn = STARPU_NDIM_GET_LDN(buffers[0]); + cl_ulong nx = nn[0]; + cl_ulong ny = nn[1]; + cl_ulong nz = nn[2]; + cl_ulong nt = nn[3]; + cl_ulong ldy = ldn[1]; + cl_ulong ldz = ldn[2]; + cl_ulong ldt = ldn[3]; cl_mem arr4d = (cl_mem) STARPU_NDIM_GET_DEV_HANDLE(buffers[0]); cl_context context; @@ -53,13 +53,11 @@ test_arr4d_opencl_func(void *buffers[], void *args) devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); - cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(int), &arr4d_config.copy_failed, &err); + cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &arr4d_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); - err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, diff --git a/tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl b/tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl index eea2de2e88..9f43250f61 100644 --- a/tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl +++ b/tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl @@ -14,8 +14,8 @@ * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void arr4d_opencl(__global int *arr4d, - int nx, int ny, int nz, int nt, - int ldy, int ldz, int ldt, + ulong nx, ulong ny, ulong nz, ulong nt, + ulong ldy, ulong ldz, ulong ldt, int factor, __global int *err) { const int idx = get_global_id(0); diff --git a/tests/datawizard/interfaces/tensor/tensor_cuda.cu b/tests/datawizard/interfaces/tensor/tensor_cuda.cu index 55131d6f0d..c8152473fd 100644 --- a/tests/datawizard/interfaces/tensor/tensor_cuda.cu +++ b/tests/datawizard/interfaces/tensor/tensor_cuda.cu @@ -19,8 +19,8 @@ extern struct test_config tensor_config; static __global__ void tensor_cuda(int *tensor, - int nx, int ny, int nz, int nt, - unsigned ldy, unsigned ldz, unsigned ldt, + size_t nx, size_t ny, size_t nz, size_t nt, + size_t ldy, size_t ldz, size_t ldt, int factor, int *err) { int i, j, k, l; @@ -63,13 +63,13 @@ extern "C" void test_tensor_cuda_func(void *buffers[], void *args) if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); - int nx = STARPU_TENSOR_GET_NX(buffers[0]); - int ny = STARPU_TENSOR_GET_NY(buffers[0]); - int nz = STARPU_TENSOR_GET_NZ(buffers[0]); - int nt = STARPU_TENSOR_GET_NT(buffers[0]); - unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); - unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); - unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + size_t nx = STARPU_TENSOR_GET_NX(buffers[0]); + size_t ny = STARPU_TENSOR_GET_NY(buffers[0]); + size_t nz = STARPU_TENSOR_GET_NZ(buffers[0]); + size_t nt = STARPU_TENSOR_GET_NT(buffers[0]); + size_t ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + size_t ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + size_t ldt = STARPU_TENSOR_GET_LDT(buffers[0]); int *tensor = (int *) STARPU_TENSOR_GET_PTR(buffers[0]); int factor = *(int*) args; diff --git a/tests/datawizard/interfaces/tensor/tensor_interface.c b/tests/datawizard/interfaces/tensor/tensor_interface.c index 576eb563c2..b43c0966b8 100644 --- a/tests/datawizard/interfaces/tensor/tensor_interface.c +++ b/tests/datawizard/interfaces/tensor/tensor_interface.c @@ -106,15 +106,15 @@ void test_tensor_cpu_func(void *buffers[], void *args) STARPU_SKIP_IF_VALGRIND; int factor = *(int*)args; - int nx = STARPU_TENSOR_GET_NX(buffers[0]); - int ny = STARPU_TENSOR_GET_NY(buffers[0]); - int nz = STARPU_TENSOR_GET_NZ(buffers[0]); - int nt = STARPU_TENSOR_GET_NT(buffers[0]); - unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); - unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); - unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + size_t nx = STARPU_TENSOR_GET_NX(buffers[0]); + size_t ny = STARPU_TENSOR_GET_NY(buffers[0]); + size_t nz = STARPU_TENSOR_GET_NZ(buffers[0]); + size_t nt = STARPU_TENSOR_GET_NT(buffers[0]); + size_t ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + size_t ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + size_t ldt = STARPU_TENSOR_GET_LDT(buffers[0]); int *tensor = (int *) STARPU_TENSOR_GET_PTR(buffers[0]); - int i, j, k, l; + size_t i, j, k, l; int val = 0; tensor_config.copy_failed = SUCCESS; for (l = 0; l < nt; l++) diff --git a/tests/datawizard/interfaces/tensor/tensor_opencl.c b/tests/datawizard/interfaces/tensor/tensor_opencl.c index 4cb3895b2b..dd88087798 100644 --- a/tests/datawizard/interfaces/tensor/tensor_opencl.c +++ b/tests/datawizard/interfaces/tensor/tensor_opencl.c @@ -37,13 +37,13 @@ test_tensor_opencl_func(void *buffers[], void *args) ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); - int nx = STARPU_TENSOR_GET_NX(buffers[0]); - int ny = STARPU_TENSOR_GET_NY(buffers[0]); - int nz = STARPU_TENSOR_GET_NZ(buffers[0]); - int nt = STARPU_TENSOR_GET_NT(buffers[0]); - unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); - unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); - unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); + cl_ulong nx = STARPU_TENSOR_GET_NX(buffers[0]); + cl_ulong ny = STARPU_TENSOR_GET_NY(buffers[0]); + cl_ulong nz = STARPU_TENSOR_GET_NZ(buffers[0]); + cl_ulong nt = STARPU_TENSOR_GET_NT(buffers[0]); + cl_ulong ldy = STARPU_TENSOR_GET_LDY(buffers[0]); + cl_ulong ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); + cl_ulong ldt = STARPU_TENSOR_GET_LDT(buffers[0]); cl_mem tensor = (cl_mem) STARPU_TENSOR_GET_DEV_HANDLE(buffers[0]); cl_context context; @@ -51,8 +51,7 @@ test_tensor_opencl_func(void *buffers[], void *args) devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); - cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(int), &tensor_config.copy_failed, &err); + cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &tensor_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); diff --git a/tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl b/tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl index a711610960..3c9a663ee4 100644 --- a/tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl +++ b/tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl @@ -14,8 +14,8 @@ * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void tensor_opencl(__global int *tensor, - int nx, int ny, int nz, int nt, - int ldy, int ldz, int ldt, + ulong nx, ulong ny, ulong nz, ulong nt, + ulong ldy, ulong ldz, ulong ldt, int factor, __global int *err) { const int idx = get_global_id(0); diff --git a/tests/datawizard/interfaces/vector/vector_cuda.cu b/tests/datawizard/interfaces/vector/vector_cuda.cu index 07a84006e0..96fb45fc65 100644 --- a/tests/datawizard/interfaces/vector/vector_cuda.cu +++ b/tests/datawizard/interfaces/vector/vector_cuda.cu @@ -18,7 +18,7 @@ extern struct test_config vector_config; -__global__ void framework_cuda(int *val, unsigned n, int *err, int factor) +__global__ void framework_cuda(int *val, size_t n, int *err, int factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; @@ -47,7 +47,7 @@ extern "C" void test_vector_cuda_func(void *buffers[], void *args) if (error != cudaSuccess) return; - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); int factor = *(int*) args; diff --git a/tests/datawizard/interfaces/vector/vector_interface.c b/tests/datawizard/interfaces/vector/vector_interface.c index e8e8573a1f..a739348e52 100644 --- a/tests/datawizard/interfaces/vector/vector_interface.c +++ b/tests/datawizard/interfaces/vector/vector_interface.c @@ -83,10 +83,10 @@ void test_vector_cpu_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); int *val = (int *) STARPU_VECTOR_GET_PTR(buffers[0]); int factor = *(int*)args; - unsigned int i; + size_t i; for (i = 0; i < n; i++) { if (val[i] != (int)i*factor) diff --git a/tests/datawizard/interfaces/vector/vector_opencl.c b/tests/datawizard/interfaces/vector/vector_opencl.c index e5d1491659..2f0fc810fc 100644 --- a/tests/datawizard/interfaces/vector/vector_opencl.c +++ b/tests/datawizard/interfaces/vector/vector_opencl.c @@ -37,7 +37,7 @@ test_vector_opencl_func(void *buffers[], void *args) ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + cl_ulong n = STARPU_VECTOR_GET_NX(buffers[0]); cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); cl_context context; diff --git a/tests/datawizard/interfaces/vector/vector_opencl_kernel.cl b/tests/datawizard/interfaces/vector/vector_opencl_kernel.cl index ee7c4dff50..51db6758bd 100644 --- a/tests/datawizard/interfaces/vector/vector_opencl_kernel.cl +++ b/tests/datawizard/interfaces/vector/vector_opencl_kernel.cl @@ -14,7 +14,7 @@ * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void test_vector_opencl(__global int *val, - unsigned int nx, + ulong nx, __global int *err, int factor) { diff --git a/tests/datawizard/partition_init.c b/tests/datawizard/partition_init.c index 78b130711c..5c49b231c5 100644 --- a/tests/datawizard/partition_init.c +++ b/tests/datawizard/partition_init.c @@ -21,14 +21,14 @@ void my_func(void *buffers[], void *cl_arg) { (void)cl_arg; - unsigned nb = STARPU_VECTOR_GET_NX(buffers[0]); + size_t nb = STARPU_VECTOR_GET_NX(buffers[0]); int *v = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); - unsigned i; + size_t i; for(i=0 ; i -static __global__ void vector_mult_cuda(unsigned *val, unsigned n) +static __global__ void vector_mult_cuda(unsigned *val, size_t n) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; @@ -26,7 +26,7 @@ static __global__ void vector_mult_cuda(unsigned *val, unsigned n) extern "C" void scal_func_cuda(void *buffers[], void *_args) { - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); + size_t n = STARPU_VECTOR_GET_NX(buffers[0]); unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; diff --git a/tests/datawizard/scal_opencl.cl b/tests/datawizard/scal_opencl.cl index f45a61571a..5c403f0c4d 100644 --- a/tests/datawizard/scal_opencl.cl +++ b/tests/datawizard/scal_opencl.cl @@ -14,7 +14,7 @@ * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ -__kernel void vector_mult_opencl(__global unsigned* val, unsigned offset, unsigned nx) +__kernel void vector_mult_opencl(__global unsigned* val, ulong offset, ulong nx) { const int i = get_global_id(0); val = (__global char*) val + offset; diff --git a/tests/datawizard/scratch.c b/tests/datawizard/scratch.c index 556ce64453..19f44b1e94 100644 --- a/tests/datawizard/scratch.c +++ b/tests/datawizard/scratch.c @@ -53,12 +53,12 @@ void cpu_f(void *descr[], void *arg) unsigned *v = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned *tmp = (unsigned *)STARPU_VECTOR_GET_PTR(descr[1]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); memcpy(tmp, v, nx*elemsize); - unsigned i; + size_t i; for (i = 0; i < nx; i++) { v[i] = tmp[i] + 1; diff --git a/tests/datawizard/scratch_cuda.cu b/tests/datawizard/scratch_cuda.cu index 0d94b823cf..1177720677 100644 --- a/tests/datawizard/scratch_cuda.cu +++ b/tests/datawizard/scratch_cuda.cu @@ -20,7 +20,7 @@ #define MAXNBLOCKS 32 #define MAXTHREADSPERBLOCK 128 -static __global__ void increment_vector(unsigned *v, unsigned *tmp, int nx) +static __global__ void increment_vector(unsigned *v, unsigned *tmp, size_t nx) { const int tid = threadIdx.x + blockIdx.x*blockDim.x; const int nthreads = gridDim.x * blockDim.x; @@ -37,7 +37,7 @@ extern "C" void cuda_f(void *descr[], void *_args) unsigned *v = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned *tmp = (unsigned *)STARPU_VECTOR_GET_PTR(descr[1]); - unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); cudaMemcpyAsync(tmp, v, nx*elemsize, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); diff --git a/tests/datawizard/scratch_opencl.c b/tests/datawizard/scratch_opencl.c index 6df4cffdba..fe5ec94a5e 100644 --- a/tests/datawizard/scratch_opencl.c +++ b/tests/datawizard/scratch_opencl.c @@ -30,8 +30,8 @@ void opencl_f(void *buffers[], void *args) cl_kernel kernel; cl_command_queue queue; - unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); - unsigned elemsize = STARPU_VECTOR_GET_ELEMSIZE(buffers[0]); + cl_ulong n = STARPU_VECTOR_GET_NX(buffers[0]); + cl_ulong elemsize = STARPU_VECTOR_GET_ELEMSIZE(buffers[0]); cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); cl_mem tmp = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]); @@ -43,14 +43,14 @@ void opencl_f(void *buffers[], void *args) STARPU_OPENCL_REPORT_ERROR(err); err = clEnqueueCopyBuffer(queue, - val, - tmp, - 0, /* offset in val */ - 0, /* offset in tmp */ - n * elemsize, - 0, /* num_events_in_wait_list */ - NULL, /* event_wait_list */ - NULL); /* event */ + val, + tmp, + 0, /* offset in val */ + 0, /* offset in tmp */ + n * elemsize, + 0, /* num_events_in_wait_list */ + NULL, /* event_wait_list */ + NULL); /* event */ if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); diff --git a/tests/datawizard/scratch_opencl_kernel.cl b/tests/datawizard/scratch_opencl_kernel.cl index 29423b6371..f6f61a627b 100644 --- a/tests/datawizard/scratch_opencl_kernel.cl +++ b/tests/datawizard/scratch_opencl_kernel.cl @@ -16,7 +16,7 @@ __kernel void increment_vector_opencl(__global unsigned *val, __global unsigned *tmp, - unsigned nx) + ulong nx) { const int id = get_global_id(0); diff --git a/tests/disk/mem_reclaim.c b/tests/disk/mem_reclaim.c index 69ad128252..7d3818e7c1 100644 --- a/tests/disk/mem_reclaim.c +++ b/tests/disk/mem_reclaim.c @@ -79,8 +79,7 @@ const struct starpu_data_copy_methods my_vector_copy_data_methods_s = }; struct starpu_data_interface_ops starpu_interface_my_vector_ops; -void starpu_my_vector_data_register(starpu_data_handle_t *handleptr, int home_node, - uintptr_t ptr, uint32_t nx, size_t elemsize) +void starpu_my_vector_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, size_t nx, size_t elemsize) { struct starpu_vector_interface vector = { @@ -148,7 +147,7 @@ static struct starpu_codelet check_cl = .modes = { STARPU_R }, }; -int dotest(struct starpu_disk_ops *ops, char *base, void (*vector_data_register)(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize), const char *text) +int dotest(struct starpu_disk_ops *ops, char *base, void (*vector_data_register)(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, size_t nx, size_t elemsize), const char *text) { starpu_data_handle_t handles[NDATA]; diff --git a/tests/main/combined_workers/bfs/bfs_func/bfs_omp_func.cpp b/tests/main/combined_workers/bfs/bfs_func/bfs_omp_func.cpp index ab7872de50..ae973c1321 100644 --- a/tests/main/combined_workers/bfs/bfs_func/bfs_omp_func.cpp +++ b/tests/main/combined_workers/bfs/bfs_func/bfs_omp_func.cpp @@ -23,14 +23,14 @@ void omp_bfs_func(void *buffers[], void *_args) { Node* graph_nodes = (Node *) STARPU_VECTOR_GET_PTR(buffers[0]); - int no_of_nodes = STARPU_VECTOR_GET_NX(buffers[0]); + size_t no_of_nodes = STARPU_VECTOR_GET_NX(buffers[0]); int* graph_edges = (int *) STARPU_VECTOR_GET_PTR(buffers[1]); bool *graph_mask = (bool *) STARPU_VECTOR_GET_PTR(buffers[2]); bool *updating_graph_mask = (bool *) STARPU_VECTOR_GET_PTR(buffers[3]); bool *graph_visited = (bool *) STARPU_VECTOR_GET_PTR(buffers[4]); int* cost = (int *) STARPU_VECTOR_GET_PTR(buffers[5]); int k=0; - + bool stop; do { @@ -39,11 +39,11 @@ void omp_bfs_func(void *buffers[], void *_args) #ifdef OPEN #pragma omp parallel for num_threads(starpu_combined_worker_get_size()) -#endif - for(int tid = 0; tid < no_of_nodes; tid++ ) +#endif + for(size_t tid = 0; tid < no_of_nodes; tid++ ) { if (graph_mask[tid] == true) - { + { graph_mask[tid]=false; for(int i=graph_nodes[tid].starting; i<(graph_nodes[tid].no_of_edges + graph_nodes[tid].starting); i++) { @@ -69,7 +69,7 @@ void omp_bfs_func(void *buffers[], void *_args) k++; } while(stop); - + printf("Kernel Executed %d times, threads: %d\n",k, starpu_combined_worker_get_size()); //printf("graph_edges = %d, %d, %d\n",graph_edges[0], graph_edges[1], graph_edges[2]); //printf("graph_mask = %d, %d, %d\n",graph_mask[0], graph_mask[1], graph_mask[2]); diff --git a/tests/maxfpga/max_fpga_advanced_static.c b/tests/maxfpga/max_fpga_advanced_static.c index ec96dd783f..f4ad3f8678 100644 --- a/tests/maxfpga/max_fpga_advanced_static.c +++ b/tests/maxfpga/max_fpga_advanced_static.c @@ -34,7 +34,7 @@ void fpga_impl(void *buffers[], void *cl_arg) int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); // XXX: would rather use a scratch buffer size_t ptrCT1 = 0x00000000000000c0; @@ -103,7 +103,7 @@ void fpga_impl1(void *buffers[], void *cl_arg) int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; @@ -137,7 +137,7 @@ void fpga_impl2(void *buffers[], void *cl_arg) size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; @@ -170,7 +170,7 @@ void fpga_impl3(void *buffers[], void *cl_arg) size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; diff --git a/tests/maxfpga/max_fpga_basic_static.c b/tests/maxfpga/max_fpga_basic_static.c index e4300ab12b..c11cb05000 100644 --- a/tests/maxfpga/max_fpga_basic_static.c +++ b/tests/maxfpga/max_fpga_basic_static.c @@ -34,7 +34,7 @@ void fpga_impl(void *buffers[], void *cl_arg) int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); // XXX: would rather use a scratch buffer size_t ptrCT1 = 0x00000000000000c0; @@ -80,7 +80,7 @@ void fpga_impl1(void *buffers[], void *cl_arg) int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC); /* C = A+B */ diff --git a/tests/maxfpga/max_fpga_dynamic.c b/tests/maxfpga/max_fpga_dynamic.c index 4eadfcd02a..827bcb36ec 100644 --- a/tests/maxfpga/max_fpga_dynamic.c +++ b/tests/maxfpga/max_fpga_dynamic.c @@ -36,7 +36,7 @@ void fpga_impl1(void *buffers[], void *cl_arg) int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; @@ -83,7 +83,7 @@ void fpga_impl2(void *buffers[], void *cl_arg) size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; @@ -129,7 +129,7 @@ void fpga_impl3(void *buffers[], void *cl_arg) size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; diff --git a/tests/maxfpga/max_fpga_mux.c b/tests/maxfpga/max_fpga_mux.c index c973c05c2a..8ceff892a9 100644 --- a/tests/maxfpga/max_fpga_mux.c +++ b/tests/maxfpga/max_fpga_mux.c @@ -88,7 +88,7 @@ void fpga_impl1(void *buffers[], void *cl_arg) enum starpu_node_kind kindBT1 = starpu_node_get_kind(starpu_task_get_current_data_node(1)); enum starpu_node_kind kindCT1 = starpu_node_get_kind(starpu_task_get_current_data_node(2)); - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; @@ -138,7 +138,7 @@ void fpga_impl2(void *buffers[], void *cl_arg) enum starpu_node_kind kindBT2 = starpu_node_get_kind(starpu_task_get_current_data_node(1)); enum starpu_node_kind kindCT2 = starpu_node_get_kind(starpu_task_get_current_data_node(2)); - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; @@ -188,7 +188,7 @@ void fpga_impl3(void *buffers[], void *cl_arg) enum starpu_node_kind kindBT3 = starpu_node_get_kind(starpu_task_get_current_data_node(1)); enum starpu_node_kind kindCT3 = starpu_node_get_kind(starpu_task_get_current_data_node(2)); - int size = STARPU_VECTOR_GET_NX(buffers[0]); + size_t size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; diff --git a/tests/microbenchs/matrix_as_vector.c b/tests/microbenchs/matrix_as_vector.c index 57e630e3c9..b2c193722d 100644 --- a/tests/microbenchs/matrix_as_vector.c +++ b/tests/microbenchs/matrix_as_vector.c @@ -39,8 +39,8 @@ void vector_cpu_func(void *descr[], void *cl_arg) STARPU_SKIP_IF_VALGRIND; float *matrix = (float *)STARPU_VECTOR_GET_PTR(descr[0]); - int nx = STARPU_VECTOR_GET_NX(descr[0]); - int i; + size_t nx = STARPU_VECTOR_GET_NX(descr[0]); + size_t i; float sum=0; for(i=0 ; i