diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f7bc2814..6e354846 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,11 +4,19 @@ stages: make:generic: stage: build script: + - git clone https://xgitlab.cels.anl.gov/argo/excit.git + - cd excit - ./autogen.sh - mkdir build - ./configure --prefix=`pwd`/build - make - make install + - cd .. + - ./autogen.sh + - mkdir build + - PKG_CONFIG_PATH=excit/build/lib/pkgconfig ./configure --prefix=`pwd`/build + - make + - make install - make check artifacts: when: on_failure @@ -22,9 +30,17 @@ make:knl: stage: build script: - source /opt/intel/compilers_and_libraries/linux/bin/compilervars.sh intel64 + - git clone https://xgitlab.cels.anl.gov/argo/excit.git + - cd excit + - ./autogen.sh + - mkdir build + - ./configure --prefix=`pwd`/build + - make + - make install + - cd .. - ./autogen.sh - mkdir build - - CC=icc CFLAGS="-mkl -xhost" ./configure --prefix=`pwd`/build --enable-benchmarks + - CC=icc CFLAGS="-mkl -xhost" PKG_CONFIG_PATH=excit/build/lib/pkgconfig ./configure --prefix=`pwd`/build --enable-benchmarks - make -j64 - make install - make check diff --git a/configure.ac b/configure.ac index bf7aeec9..06260db0 100644 --- a/configure.ac +++ b/configure.ac @@ -52,6 +52,9 @@ AM_CONDITIONAL([ADD_BENCHMARKS],[test "x$benchmarks" = xtrue]) AC_CHECK_HEADERS(numa.h) AC_CHECK_LIB(numa, move_pages) +# excit iterators +PKG_CHECK_MODULES([EXCIT],[libexcit]) + # internal jemalloc ac_configure_args="$ac_configure_args \ '--with-jemalloc-prefix=jemk_aml_' \ diff --git a/src/Makefile.am b/src/Makefile.am index f8fca5cf..b945d5c0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ -AM_CPPFLAGS = -I$(top_srcdir)/jemalloc/include +AM_CPPFLAGS = -I$(top_srcdir)/jemalloc/include @EXCIT_CFLAGS@ lib_LTLIBRARIES = libaml.la ARENA_JEMALLOC_CSOURCES = arena_jemalloc.c @@ -10,21 +10,33 @@ AREA_LINUX_CSOURCES = area_linux.c \ AREA_POSIX_CSOURCES = area_posix.c +LAYOUT_CSOURCES = layout.c \ + layout_dense.c \ + layout_pad.c \ + layout_reshape.c + TILING_CSOURCES = tiling.c \ tiling_1d.c \ tiling_2d.c +TILING_ND_CSOURCES = tiling_nd.c \ + tiling_nd_resize.c \ + tiling_nd_pad.c \ + tiling_nd_collapse.c + BINDING_CSOURCES = binding.c \ binding_single.c \ binding_interleave.c DMA_CSOURCES = dma.c \ dma_linux_par.c \ - dma_linux_seq.c + dma_linux_seq.c \ + dma_layout.c SCRATCH_CSOURCES = scratch.c \ scratch_seq.c \ - scratch_par.c + scratch_par.c \ + scratch_double.c UTILS_CSOURCES = vector.c @@ -34,12 +46,24 @@ LIBCSOURCES = aml.c area.c arena.c \ $(AREA_LINUX_CSOURCES) \ $(AREA_POSIX_CSOURCES) \ $(TILING_CSOURCES) \ + $(TILING_ND_CSOURCES) \ $(BINDING_CSOURCES) \ $(DMA_CSOURCES) \ - $(SCRATCH_CSOURCES) + $(SCRATCH_CSOURCES) \ + $(LAYOUT_CSOURCES) \ + copy.c -LIBHSOURCES = aml.h +LIBHSOURCES = aml.h \ + aml-layout.h \ + aml-layout-dense.h \ + aml-layout-pad.h \ + aml-layout-reshape.h \ + aml-tiling.h \ + aml-tiling-resize.h \ + aml-tiling-pad.h \ + aml-tiling-collapse.h \ + aml-copy.h libaml_la_SOURCES = $(LIBCSOURCES) $(LIBHSOURCES) -libaml_la_LIBADD = -L$(top_srcdir)/jemalloc/lib/ -ljemalloc-aml +libaml_la_LIBADD = -L$(top_srcdir)/jemalloc/lib/ -ljemalloc-aml @EXCIT_LIBS@ include_HEADERS = $(LIBHSOURCES) diff --git a/src/aml-copy.h b/src/aml-copy.h new file mode 100644 index 00000000..22dd8933 --- /dev/null +++ b/src/aml-copy.h @@ -0,0 +1,192 @@ +#ifndef AML_COPY_H +#define AML_COPY_H 1 + + /******************************************************************************* + * Hypervolume copy and transpose functions. + ******************************************************************************/ + +/* + * Copies a (sub-)hypervolume to another (sub-)hypervolume. + * "d": number of dimensions. + * "dst": pointer to the destination hypervolume. + * "dst_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the destination hypervolume. + * "src": pointer to the source hypervolume. + * "src_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the source hypervolume. + * "elem_number": pointer to d values representing the number of elements + * in each dimension of the (sub-)hypervolume to copy. + * "elem_size": size of memory elements. + * Returns 0 if successful; an error code otherwise. + */ +int aml_copy_nd(size_t d, void *dst, const size_t *dst_pitch, + const void *src, const size_t *src_pitch, + const size_t *elem_number, const size_t elem_size); +/* + * Copies a (sub-)hypervolume to another (sub-)hypervolume while transposing. + * Reverse of aml_copy_rtnd. + * Example a[3][4][5] -> b[5][3][4] (C notation). + * "d": number of dimensions. + * "dst": pointer to the destination hypervolume. + * "dst_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the destination hypervolume. + * "src": pointer to the source hypervolume. + * "src_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the source hypervolume. + * "elem_number": pointer to d values representing the number of elements + * in each dimension of the (sub-)hypervolume to copy. + * "elem_size": size of memory elements in the src hypervolume order. + * Returns 0 if successful; an error code otherwise. + */ +int aml_copy_tnd(size_t d, void *dst, const size_t *dst_pitch, + const void *src, const size_t *src_pitch, + const size_t *elem_number, const size_t elem_size); +/* + * Copies a (sub-)hypervolume to another (sub-)hypervolume while transposing. + * Reverse of aml_copy_tnd. + * Example a[3][4][5] -> b[4][5][3] (C notation). + * "d": number of dimensions. + * "dst": pointer to the destination hypervolume. + * "dst_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the destination hypervolume. + * "src": pointer to the source hypervolume. + * "src_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the source hypervolume. + * "elem_number": pointer to d values representing the number of elements + * in each dimension of the (sub-)hypervolume to copy. + * "elem_size": size of memory elements in the src hypervolume order. + * Returns 0 if successful; an error code otherwise. + */ +int aml_copy_rtnd(size_t d, void *dst, const size_t *dst_pitch, + const void *src, const size_t *src_pitch, + const size_t *elem_number, const size_t elem_size); + +/* + * Copies a (sub-)hypervolume to another (sub-)hypervolume while shuffling + * dimensions. Example a[4][2][3][5] -> b[5][4][3][2] (C notation). + * "d": number of dimensions. + * "target_dims": array of d dimension index representing the mapping + * between the source dimensions and the target dimensions. + * Example [3, 1, 0, 2] + * "dst": pointer to the destination hypervolume. + * "dst_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the destination hypervolume. + * "src": pointer to the source hypervolume. + * "src_pitch": pointer to d-1 pitch values representing the pitch + * in each dimension of the source hypervolume. + * "elem_number": pointer to d values representing the number of elements + * in each dimension of the (sub-)hypervolume to copy. + * "elem_size": size of memory elements in the src hypervolume order. + * Returns 0 if successful; an error code otherwise. + */ +int aml_copy_shnd(size_t d, const size_t *target_dims, void *dst, + const size_t *dst_pitch, const void *src, + const size_t *src_pitch, const size_t *elem_number, + const size_t elem_size); +/* + * Strided version of aml_copy_nd. + */ +int aml_copy_ndstr(size_t d, void *dst, const size_t *dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Strided version of aml_copy_tnd. + */ +int aml_copy_tndstr(size_t d, void *dst, const size_t *dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Strided version of aml_copy_rtnd. + */ +int aml_copy_rtndstr(size_t d, void *dst, const size_t *dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Strided version of aml_copy_shnd. + */ +int aml_copy_shndstr(size_t d, const size_t *target_dims, void *dst, + const size_t *dst_pitch, const size_t *dst_stride, + const void *src, const size_t *src_pitch, + const size_t *src_stride, const size_t *elem_number, + const size_t elem_size); +/* + * Version of aml_copy_nd using cumulative pitch. + */ +int aml_copy_nd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const void *src, const size_t *cumul_src_pitch, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_ndstr using cumulative pitch. + */ +int aml_copy_ndstr_c(size_t d, void *dst, const size_t *dst_pitch, + const size_t *cumul_dst_stride, const void *src, + const size_t *src_pitch, const size_t *cumul_src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_nd using cumulative pitch. + */ +int aml_copy_tnd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const void *src, const size_t *cumul_src_pitch, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_nd using cumulative pitch. + */ +int aml_copy_rtnd_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const void *src, const size_t *cumul_src_pitch, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_shnd using cumulative pitch. + */ +int aml_copy_shnd_c(size_t d, const size_t *target_dims, void *dst, + const size_t *cumul_dst_pitch, const void *src, + const size_t *cumul_src_pitch, const size_t *elem_number, + const size_t elem_size); +/* + * Version of aml_copy_tndstr using cumulative pitch. + */ +int aml_copy_tndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *cumul_src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_rtndstr using cumulative pitch. + */ +int aml_copy_rtndstr_c(size_t d, void *dst, const size_t *cumul_dst_pitch, + const size_t *dst_stride, const void *src, + const size_t *cumul_src_pitch, const size_t *src_stride, + const size_t *elem_number, const size_t elem_size); +/* + * Version of aml_copy_shndstr using cumulative pitch. + */ +int aml_copy_shndstr_c(size_t d, const size_t *target_dims, void *dst, + const size_t *cumul_dst_pitch, const size_t *dst_stride, + const void *src, const size_t *cumul_src_pitch, + const size_t *src_stride, const size_t *elem_number, + const size_t elem_size); + + /******************************************************************************* + * Generic building block API: Native version + * Native means using AML-internal layouts. + ******************************************************************************/ + +int aml_copy_layout_native(struct aml_layout *dst, + const struct aml_layout *src); +int aml_copy_layout_transform_native(struct aml_layout *dst, + const struct aml_layout *src, + const size_t *target_dims); +int aml_copy_layout_generic(struct aml_layout *dst, + const struct aml_layout *src); +int aml_copy_layout_transform_generic(struct aml_layout *dst, + const struct aml_layout *src, + const size_t *target_dims); +int aml_copy_layout_transpose_native(struct aml_layout *dst, const struct aml_layout *src); +int aml_copy_layout_reverse_transpose_native(struct aml_layout *dst, + const struct aml_layout *src); +int aml_copy_layout_transpose_generic(struct aml_layout *dst, const struct aml_layout *src); +int aml_copy_layout_reverse_transpose_generic(struct aml_layout *dst, + const struct aml_layout *src); + +#endif diff --git a/src/aml-dma-layout.h b/src/aml-dma-layout.h new file mode 100644 index 00000000..4e3e68a3 --- /dev/null +++ b/src/aml-dma-layout.h @@ -0,0 +1,41 @@ +#ifndef AML_DMA_LAYOUT_H +#define AML_DMA_LAYOUT_H 1 + +/******************************************************************************* + * Layout aware DMA + * DMA using layouts as source and destination. + ******************************************************************************/ + +extern struct aml_dma_ops aml_dma_ops_layout; + +struct aml_dma_request_layout { + int type; + struct aml_layout *dest; + struct aml_layout *src; +}; + +typedef int (*aml_dma_operator)(struct aml_layout *, struct aml_layout *, void*); +struct aml_dma_layout { + struct aml_vector requests; + pthread_mutex_t lock; + aml_dma_operator do_work; + void *work_arg; +}; + +#define AML_DMA_LAYOUT_DECL(name) \ + struct aml_dma_layout __ ##name## _inner_data; \ + struct aml_dma name = { \ + &aml_dma_ops_layout, \ + (struct aml_dma_data *)&__ ## name ## _inner_data, \ + }; + +#define AML_DMA_LAYOUT_ALLOCSIZE \ + (sizeof(struct aml_dma_layout) + \ + sizeof(struct aml_dma)) + +int aml_dma_layout_create(struct aml_dma **dma, ...); +int aml_dma_layout_init(struct aml_dma *dma, ...); +int aml_dma_layout_vinit(struct aml_dma *dma, va_list args); +int aml_dma_layout_destroy(struct aml_dma *dma); + +#endif diff --git a/src/aml-layout-dense.h b/src/aml-layout-dense.h new file mode 100644 index 00000000..d7782cdd --- /dev/null +++ b/src/aml-layout-dense.h @@ -0,0 +1,75 @@ +#ifndef AML_LAYOUT_DENSE_H +#define AML_LAYOUT_DENSE_H 1 + +#include + +/******************************************************************************* + * Native Layout Operators. + ******************************************************************************/ + +/* Layout: describes how a multi-dimensional dense data structure is collapsed + * into a linear (and contiguous) virtual address range. + * "ptr": base pointer of the address range + * "ndims": number of dimensions + * "dims": dimensions, in element size, of the data structure, by order of + * appearance in memory. + * "stride": offset between elements of the same dimension. + * "pitch": distances between two elements of the next dimension (or total + dimension of the layout in this dimension). + * "cpitch": cumulative distances between two elements in the same dimension + * (pitch[0] is the element size in bytes). + */ +struct aml_layout_data_native { + void *ptr; + size_t ndims; + size_t *dims; + size_t *stride; + size_t *pitch; + size_t *cpitch; +}; + +#define AML_LAYOUT_NATIVE_ALLOCSIZE(ndims) (sizeof(struct aml_layout) +\ + sizeof(struct aml_layout_data_native) +\ + (ndims * 4 + 1) * sizeof(size_t)) + +#define AML_LAYOUT_NATIVE_DECL(name, ndims) \ + size_t __ ##name## _inner_data[ndims * 4 + 1]; \ + struct aml_layout_data_native __ ##name## _inner_struct = { \ + NULL, \ + ndims, \ + __ ##name## _inner_data, \ + __ ##name## _inner_data + ndims, \ + __ ##name## _inner_data + 2 * ndims, \ + __ ##name## _inner_data + 3 * ndims, \ + }; \ + struct aml_layout name = { \ + 0, \ + NULL, \ + (struct aml_layout_data *)& __ ##name## _inner_struct, \ + }; + +int aml_layout_native_struct_init(struct aml_layout *l, size_t ndims, + void *data); +int aml_layout_native_ainit(struct aml_layout *l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *pitch); +int aml_layout_native_vinit(struct aml_layout *l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + va_list data); +int aml_layout_native_init(struct aml_layout *l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, ...); +int aml_layout_native_acreate(struct aml_layout **l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *pitch); +int aml_layout_native_vcreate(struct aml_layout **l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + va_list data); +int aml_layout_native_create(struct aml_layout **l, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, ...); + +extern struct aml_layout_ops aml_layout_column_ops; +extern struct aml_layout_ops aml_layout_row_ops; + +#endif diff --git a/src/aml-layout-pad.h b/src/aml-layout-pad.h new file mode 100644 index 00000000..6705375f --- /dev/null +++ b/src/aml-layout-pad.h @@ -0,0 +1,58 @@ +#ifndef AML_LAYOUT_PAD_H +#define AML_LAYOUT_PAD_H 1 + +#include + +struct aml_layout_data_pad { + struct aml_layout *target; + size_t ndims; + size_t element_size; + size_t *dims; + size_t *target_dims; + void *neutral; +}; + + +#define AML_LAYOUT_PAD_ALLOCSIZE(ndims, neutral_size) ( \ + sizeof(struct aml_layout) + \ + sizeof(struct aml_layout_data_pad) + \ + 2 * ndims * sizeof(size_t) + \ + neutral_size ) + +#define AML_LAYOUT_PAD_DECL(name, ndims, neutral_size) \ + uint8_t __ ##name## _inner_data[2 * ndims * sizeof(size_t) + \ + neutral_size ]; \ + struct aml_layout_data_pad __ ##name## _inner_struct = { \ + NULL, \ + ndims, \ + neutral_size, \ + (size_t *) __ ##name## _inner_data, \ + (size_t *) (__ ##name## _inner_data + ndims * sizeof(size_t)), \ + (void *) (__ ##name## _inner_data + 2 * ndims * sizeof(size_t)) \ + }; \ + struct aml_layout name = { \ + 0, \ + NULL, \ + (struct aml_layout_data *)& __ ##name## _inner_struct \ + }; + +int aml_layout_pad_struct_init(struct aml_layout *l, size_t ndims, + size_t element_size, void *data); +int aml_layout_pad_ainit(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, const size_t *dims, + void *neutral); +int aml_layout_pad_vinit(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, va_list data); +int aml_layout_pad_init(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, ...); +int aml_layout_pad_acreate(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, const size_t *dims, + void *neutral); +int aml_layout_pad_vcreate(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, va_list data); +int aml_layout_pad_create(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, ...); + +extern struct aml_layout_ops aml_layout_pad_column_ops; +extern struct aml_layout_ops aml_layout_pad_row_ops; +#endif diff --git a/src/aml-layout-reshape.h b/src/aml-layout-reshape.h new file mode 100644 index 00000000..c8207c0c --- /dev/null +++ b/src/aml-layout-reshape.h @@ -0,0 +1,60 @@ +#ifndef AML_LAYOUT_RESHAPE_H +#define AML_LAYOUT_RESHAPE_H + +#include + +struct aml_layout_data_reshape { + struct aml_layout *target; + size_t ndims; + size_t target_ndims; + size_t *dims; + size_t *coffsets; + size_t *target_dims; + size_t *target_coffsets; +}; + +#define AML_LAYOUT_RESHAPE_ALLOCSIZE(ndims, target_ndims) ( \ + sizeof(struct aml_layout) + \ + sizeof(struct aml_layout_data_reshape) + \ + 2 * ndims * sizeof(size_t) + \ + target_ndims * sizeof(size_t) ) + +#define AML_LAYOUT_RESHAPE_DECL(name, ndims, target_ndims) \ + size_t __ ##name## _inner_data[ 2 * ndims + target_ndims]; \ + struct aml_layout_data_reshape __ ##name## _inner_struct = { \ + NULL, \ + ndims, \ + target_ndims, \ + __ ##name## _inner_data, \ + __ ##name## _inner_data + ndims \ + __ ##name## _inner_data + 2 * ndims \ + }; \ + struct aml_layout name = { \ + 0, \ + NULL, \ + (struct aml_layout_data *)& __ ##name## _inner_struct \ + }; + +int aml_layout_reshape_struct_init(struct aml_layout *l, size_t ndims, + void *data); +int aml_layout_reshape_ainit(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, size_t ndims, + const size_t *dims); +int aml_layout_reshape_vinit(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, size_t ndims, + va_list data); +int aml_layout_reshape_init(struct aml_layout *l, uint64_t tags, + struct aml_layout *target, size_t ndims, ...); +int aml_layout_reshape_acreate(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, size_t ndims, + const size_t *dims); +int aml_layout_reshape_vcreate(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, size_t ndims, + va_list data); +int aml_layout_reshape_create(struct aml_layout **l, uint64_t tags, + struct aml_layout *target, size_t ndims, ...); + +extern struct aml_layout_ops aml_layout_reshape_column_ops; +extern struct aml_layout_ops aml_layout_reshape_row_ops; + +#endif diff --git a/src/aml-layout.h b/src/aml-layout.h new file mode 100644 index 00000000..44327472 --- /dev/null +++ b/src/aml-layout.h @@ -0,0 +1,86 @@ +#ifndef AML_LAYOUT_H +#define AML_LAYOUT_H 1 + +#include + +/******************************************************************************* + * Data Layout Management: + ******************************************************************************/ + +struct aml_layout; +struct aml_layout_data; + +/******************************************************************************* + * Generic layout, with support for sparsity and strides. + ******************************************************************************/ + +/* Layout type tags. Defined as the bit offset to set to one. */ +#define AML_TYPE_LAYOUT_ORDER (1 << 0) +#define AML_TYPE_LAYOUT_MAX (1 << 1) + +#define AML_TYPE_LAYOUT_ROW_ORDER 1 +#define AML_TYPE_LAYOUT_COLUMN_ORDER 0 + +#define AML_TYPE_GET(tags, bit) (tags & bit) +#define AML_TYPE_CLEAR(tags, bit) (tags &= ~bit) +#define AML_TYPE_SET(tags, bit, value) do { \ + AML_TYPE_CLEAR(tags, bit); \ + if(value) tags |= bit;} while(0) + + +struct aml_layout_ops { + void *(*deref)(const struct aml_layout_data *, va_list coords); + void *(*aderef)(const struct aml_layout_data *, const size_t *coords); + void *(*aderef_column)(const struct aml_layout_data *, + const size_t *coords); + int (*order)(const struct aml_layout_data *); + int (*dims)(const struct aml_layout_data *, va_list dim_ptrs); + int (*adims)(const struct aml_layout_data *, size_t *dims); + int (*adims_column)(const struct aml_layout_data *, size_t *dims); + size_t (*ndims)(const struct aml_layout_data *); + size_t (*element_size)(const struct aml_layout_data *); + struct aml_layout * (*reshape)(const struct aml_layout_data *, + size_t ndims, va_list dims); + struct aml_layout * (*areshape)(const struct aml_layout_data *, + size_t ndims, const size_t *dims); + struct aml_layout * (*slice)(const struct aml_layout_data *, + va_list dims); + struct aml_layout * (*aslice)(const struct aml_layout_data *, + const size_t *offsets, const size_t *dims, + const size_t *strides); + struct aml_layout * (*aslice_column)(const struct aml_layout_data *, + const size_t *offsets, + const size_t *dims, + const size_t *strides); +}; + +struct aml_layout { + uint64_t tags; + struct aml_layout_ops *ops; + struct aml_layout_data *data; +}; + +void *aml_layout_deref(const struct aml_layout *l, ...); +void *aml_layout_aderef(const struct aml_layout *l, const size_t *coords); +void *aml_layout_aderef_column(const struct aml_layout *l, + const size_t *coords); +int aml_layout_order(const struct aml_layout *l); +int aml_layout_dims(const struct aml_layout *l, ...); +int aml_layout_adims(const struct aml_layout *l, size_t *dims); +int aml_layout_adims_column(const struct aml_layout *l, size_t *dims); +size_t aml_layout_ndims(const struct aml_layout *l); +size_t aml_layout_element_size(const struct aml_layout *l); +struct aml_layout * aml_layout_areshape(const struct aml_layout *l, + size_t ndims, const size_t *dims); +struct aml_layout * aml_layout_reshape(const struct aml_layout *l, + size_t ndims, ...); +struct aml_layout * aml_layout_slice(const struct aml_layout *l, ...); +struct aml_layout * aml_layout_aslice(const struct aml_layout *l, + const size_t *offsets, const size_t *dims, + const size_t *strides); +struct aml_layout * aml_layout_aslice_column(const struct aml_layout *l, + const size_t *offsets, + const size_t *dims, + const size_t *strides); + +#endif diff --git a/src/aml-scratch-double.h b/src/aml-scratch-double.h new file mode 100644 index 00000000..791dcecf --- /dev/null +++ b/src/aml-scratch-double.h @@ -0,0 +1,56 @@ +#ifndef AML_SCRATCH_DOUBLE_H +#define AML_SCRATCH_DOUBLE_H 1 + +/******************************************************************************* + * Sequential scratchpad API: + * Scratchpad uses calling thread to trigger asynchronous dma movements. + ******************************************************************************/ + +extern struct aml_scratch_ops aml_scratch_double_ops; + +struct aml_scratch_request_double { + int type; + struct aml_dma *dma; + struct aml_layout *src; + int srcid; + struct aml_layout *dest; + int dstid; + pthread_t thread; +}; + +struct aml_scratch_double_data { + struct aml_tiling_nd *src_tiling; + struct aml_tiling_nd *dest_tiling; + struct aml_dma *push_dma; + struct aml_dma *pull_dma; + struct aml_vector tilemap; + struct aml_vector requests; + pthread_mutex_t lock; +}; + +struct aml_scratch_double_ops { + void *(*do_thread)(void *); +}; + +struct aml_scratch_double { + struct aml_scratch_double_ops ops; + struct aml_scratch_double_data data; +}; + +#define AML_SCRATCH_DOUBLE_DECL(name) \ + struct aml_scratch_double __ ##name## _inner_data; \ + struct aml_scratch name = { \ + &aml_scratch_double_ops, \ + (struct aml_scratch_data *)&__ ## name ## _inner_data, \ + }; + +#define AML_SCRATCH_DOUBLE_ALLOCSIZE \ + (sizeof(struct aml_scratch_double) + \ + sizeof(struct aml_scratch)) + +int aml_scratch_double_create(struct aml_scratch **scratch, ...); +int aml_scratch_double_init(struct aml_scratch *scratch, ...); +int aml_scratch_double_vinit(struct aml_scratch *scratch, va_list args); +int aml_scratch_double_destroy(struct aml_scratch *scratch); + +#endif diff --git a/src/aml-tiling-collapse.h b/src/aml-tiling-collapse.h new file mode 100644 index 00000000..348ea0b6 --- /dev/null +++ b/src/aml-tiling-collapse.h @@ -0,0 +1,40 @@ +#ifndef AML_TILING_COLLAPSE_H +#define AML_TILING_COLLAPSE_H + +#include + +struct aml_tiling_nd_data_collapse { + const struct aml_layout *l; + size_t ndims; + size_t *tile_dims; + size_t *dims; + size_t *border_tile_dims; +}; + +#define AML_TILING_COLLAPSE_ALLOCSIZE(ndims) (sizeof(struct aml_tiling_nd) +\ + sizeof(struct aml_tiling_nd_data_collapse) +\ + (ndims * 3) * sizeof(size_t)) + +int aml_tiling_nd_collapse_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *data); +int aml_tiling_nd_collapse_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims); +int aml_tiling_nd_collapse_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_collapse_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); +int aml_tiling_nd_collapse_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims); +int aml_tiling_nd_collapse_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_collapse_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); + +extern struct aml_tiling_nd_ops aml_tiling_nd_collapse_column_ops; +extern struct aml_tiling_nd_ops aml_tiling_nd_collapse_row_ops; + +#endif diff --git a/src/aml-tiling-pad.h b/src/aml-tiling-pad.h new file mode 100644 index 00000000..46f23d86 --- /dev/null +++ b/src/aml-tiling-pad.h @@ -0,0 +1,44 @@ +#ifndef AML_TILING_PAD_H +#define AML_TILING_PAD_H + +#include + +struct aml_tiling_nd_data_pad { + const struct aml_layout *l; + size_t ndims; + size_t *tile_dims; + size_t *dims; + size_t *border_tile_dims; + size_t *pad; + void *neutral; +}; + +#define AML_TILING_PAD_ALLOCSIZE(ndims, neutral_size) ( \ + sizeof(struct aml_tiling_nd) + \ + sizeof(struct aml_tiling_nd_data_pad) + \ + (ndims * 4) * sizeof(size_t) + \ + neutral_size ) + +int aml_tiling_nd_pad_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *data); +int aml_tiling_nd_pad_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims, void *neutral); +int aml_tiling_nd_pad_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_pad_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); +int aml_tiling_nd_pad_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims, void *neutral); +int aml_tiling_nd_pad_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_pad_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); + +extern struct aml_tiling_nd_ops aml_tiling_nd_pad_column_ops; +extern struct aml_tiling_nd_ops aml_tiling_nd_pad_row_ops; + +#endif diff --git a/src/aml-tiling-resize.h b/src/aml-tiling-resize.h new file mode 100644 index 00000000..eb7fee8e --- /dev/null +++ b/src/aml-tiling-resize.h @@ -0,0 +1,40 @@ +#ifndef AML_TILING_RESIZE_H +#define AML_TILING_RESIZE_H + +#include + +struct aml_tiling_nd_data_resize { + const struct aml_layout *l; + size_t ndims; + size_t *tile_dims; + size_t *dims; + size_t *border_tile_dims; +}; + +#define AML_TILING_RESIZE_ALLOCSIZE(ndims) (sizeof(struct aml_tiling_nd) +\ + sizeof(struct aml_tiling_nd_data_resize) +\ + (ndims * 3) * sizeof(size_t)) + +int aml_tiling_nd_resize_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *data); +int aml_tiling_nd_resize_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims); +int aml_tiling_nd_resize_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_resize_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); +int aml_tiling_nd_resize_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims); +int aml_tiling_nd_resize_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data); +int aml_tiling_nd_resize_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...); + +extern struct aml_tiling_nd_ops aml_tiling_nd_resize_column_ops; +extern struct aml_tiling_nd_ops aml_tiling_nd_resize_row_ops; + +#endif diff --git a/src/aml-tiling.h b/src/aml-tiling.h new file mode 100644 index 00000000..7bf2293d --- /dev/null +++ b/src/aml-tiling.h @@ -0,0 +1,44 @@ +#ifndef AML_TILING_H +#define AML_TILING_H 1 + +#include + +struct aml_tiling_nd; +struct aml_tiling_nd_data; + +#define AML_TYPE_TILING_ORDER (1 << 0) +#define AML_TYPE_TILING_MAX (1 << 1) + +#define AML_TYPE_TILING_ROW_ORDER 1 +#define AML_TYPE_TILING_COLUMN_ORDER 0 + +struct aml_tiling_nd_ops { + struct aml_layout* (*index)(const struct aml_tiling_nd_data *, + va_list coords); + struct aml_layout* (*aindex)(const struct aml_tiling_nd_data *, + const size_t *coords); + int (*order)(const struct aml_tiling_nd_data *); + int (*tile_dims)(const struct aml_tiling_nd_data *, va_list dim_ptrs); + int (*tile_adims)(const struct aml_tiling_nd_data *, size_t *dims); + int (*dims)(const struct aml_tiling_nd_data *, va_list dim_ptrs); + int (*adims)(const struct aml_tiling_nd_data *, size_t *dims); + size_t (*ndims)(const struct aml_tiling_nd_data *); +}; + +struct aml_tiling_nd { + uint64_t tags; + struct aml_tiling_nd_ops *ops; + struct aml_tiling_nd_data *data; +}; + +struct aml_layout *aml_tiling_nd_index(const struct aml_tiling_nd *t, ...); +struct aml_layout *aml_tiling_nd_aindex(const struct aml_tiling_nd *t, + const size_t *coords); +int aml_tiling_nd_order(const struct aml_tiling_nd *t); +int aml_tiling_nd_tile_dims(const struct aml_tiling_nd *t, ...); +int aml_tiling_nd_tile_adims(const struct aml_tiling_nd *t, size_t *dims); +int aml_tiling_nd_dims(const struct aml_tiling_nd *t, ...); +int aml_tiling_nd_adims(const struct aml_tiling_nd *t, size_t *dims); +size_t aml_tiling_nd_ndims(const struct aml_tiling_nd *t); + +#endif diff --git a/src/aml.h b/src/aml.h index 73f23992..8b976419 100644 --- a/src/aml.h +++ b/src/aml.h @@ -1,6 +1,7 @@ #ifndef AML_H #define AML_H 1 +#include #include #include #include @@ -18,7 +19,15 @@ #define PAGE_SIZE 4096 #endif - +#include "aml-layout.h" +#include "aml-layout-dense.h" +#include "aml-layout-pad.h" +#include "aml-layout-reshape.h" +#include "aml-tiling.h" +#include "aml-tiling-resize.h" +#include "aml-tiling-pad.h" +#include "aml-tiling-collapse.h" +#include "aml-copy.h" /******************************************************************************* * Forward Declarations: ******************************************************************************/ @@ -1297,6 +1306,7 @@ int aml_dma_wait(struct aml_dma *dma, struct aml_dma_request *req); */ int aml_dma_cancel(struct aml_dma *dma, struct aml_dma_request *req); +#include "aml-dma-layout.h" /******************************************************************************* * Linux Sequential DMA API: * DMA logic implemented based on general linux API, with the caller thread @@ -1483,6 +1493,8 @@ struct aml_scratch_data; #define AML_SCRATCH_REQUEST_TYPE_PUSH 0 /* Pull from regular memory to the scratchpad. */ #define AML_SCRATCH_REQUEST_TYPE_PULL 1 +/* No-op/empty request */ +#define AML_SCRATCH_REQUEST_TYPE_NOOP 2 struct aml_scratch_ops { int (*create_request)(struct aml_scratch_data *scratch, @@ -1584,6 +1596,7 @@ void* aml_scratch_baseptr(const struct aml_scratch *scratch); */ int aml_scratch_release(struct aml_scratch *scratch, int scratchid); +#include "aml-scratch-double.h" /******************************************************************************* * Sequential scratchpad API: * Scratchpad uses calling thread to trigger asynchronous dma movements. diff --git a/src/copy.c b/src/copy.c new file mode 100644 index 00000000..0f3f37bf --- /dev/null +++ b/src/copy.c @@ -0,0 +1,665 @@ +#include +#include +#include +#include +#include + +static inline void aml_compute_cumulative_pitch(size_t d, + size_t * cumul_dst_pitch, + size_t * cumul_src_pitch, + const size_t * dst_pitch, + const size_t * src_pitch, + size_t elem_size) +{ + cumul_dst_pitch[0] = elem_size; + cumul_src_pitch[0] = elem_size; + for (size_t i = 0; i < d - 1; i += 1) { + cumul_dst_pitch[i + 1] = dst_pitch[i] * cumul_dst_pitch[i]; + cumul_src_pitch[i + 1] = src_pitch[i] * cumul_src_pitch[i]; + } +} + +static inline void aml_copy_nd_helper(size_t d, void *dst, + const size_t * cumul_dst_pitch, + const void *src, + const size_t * cumul_src_pitch, + const size_t * elem_number, + size_t elem_size) +{ + if (d == 1) + if (cumul_dst_pitch[0] == elem_size + && cumul_src_pitch[0] == elem_size) + memcpy(dst, src, elem_number[0] * elem_size); + else + for (size_t i = 0; i < elem_number[0]; i += 1) + memcpy((void *)((intptr_t) dst + + i * cumul_dst_pitch[0]), + (void *)((intptr_t) src + + i * cumul_src_pitch[0]), + elem_size); + else + for (size_t i = 0; i < elem_number[d - 1]; i += 1) { + aml_copy_nd_helper(d - 1, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, + elem_size); + dst = (void *)((intptr_t) dst + cumul_dst_pitch[d - 1]); + src = (void *)((intptr_t) src + cumul_src_pitch[d - 1]); + } +} + +int aml_copy_nd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const void *src, const size_t * cumul_src_pitch, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + for (size_t i = 0; i < d - 1; i += 1) { + assert(cumul_dst_pitch[i + 1] >= + cumul_dst_pitch[i] * elem_number[i]); + assert(cumul_src_pitch[i + 1] >= + cumul_src_pitch[i] * elem_number[i]); + } + aml_copy_nd_helper(d, dst, cumul_dst_pitch, src, cumul_src_pitch, + elem_number, elem_size); + return 0; +} + +int aml_copy_nd(size_t d, void *dst, const size_t * dst_pitch, const void *src, + const size_t * src_pitch, const size_t * elem_number, + size_t elem_size) +{ + assert(d > 0); + size_t *cumul_dst_pitch; + size_t *cumul_src_pitch; + cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); + aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, elem_size); + aml_copy_nd_c(d, dst, cumul_dst_pitch, src, cumul_src_pitch, + elem_number, elem_size); + return 0; +} + +static inline void aml_copy_ndstr_helper(size_t d, void *dst, + const size_t * cumul_dst_pitch, + const size_t * dst_stride, + const void *src, + const size_t * cumul_src_pitch, + const size_t * src_stride, + const size_t * elem_number, + size_t elem_size) +{ + if (d == 1) + if (dst_stride[0] * cumul_dst_pitch[0] == elem_size + && src_stride[0] * cumul_src_pitch[0] == elem_size) + memcpy(dst, src, elem_number[0] * elem_size); + else + for (size_t i = 0; i < elem_number[0]; i += 1) + memcpy((void *)((intptr_t) dst + + i * (dst_stride[0] * + cumul_dst_pitch[0])), + (void *)((intptr_t) src + + i * (src_stride[0] * + cumul_src_pitch[0])), + elem_size); + else + for (size_t i = 0; i < elem_number[d - 1]; i += 1) { + aml_copy_ndstr_helper(d - 1, dst, cumul_dst_pitch, + dst_stride, src, cumul_src_pitch, + src_stride, elem_number, + elem_size); + dst = + (void *)((intptr_t) dst + + dst_stride[d - 1] * cumul_dst_pitch[d - + 1]); + src = + (void *)((intptr_t) src + + src_stride[d - 1] * cumul_src_pitch[d - + 1]); + } +} + +int aml_copy_ndstr_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * cumul_src_pitch, const size_t * src_stride, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + for (size_t i = 0; i < d - 1; i += 1) { + assert(cumul_dst_pitch[i + 1] >= + dst_stride[i] * cumul_dst_pitch[i] * elem_number[i]); + assert(cumul_src_pitch[i + 1] >= + src_stride[i] * cumul_src_pitch[i] * elem_number[i]); + } + aml_copy_ndstr_helper(d, dst, cumul_dst_pitch, dst_stride, src, + cumul_src_pitch, src_stride, elem_number, + elem_size); + return 0; +} + +int aml_copy_ndstr(size_t d, void *dst, const size_t * dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * src_pitch, const size_t * src_stride, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + size_t *cumul_dst_pitch; + size_t *cumul_src_pitch; + cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); + aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, elem_size); + aml_copy_ndstr_c(d, dst, cumul_dst_pitch, dst_stride, src, + cumul_src_pitch, src_stride, elem_number, elem_size); + return 0; +} + +static inline void aml_copy_shnd_helper(size_t d, const size_t * target_dims, + void *dst, + const size_t * cumul_dst_pitch, + const void *src, + const size_t * cumul_src_pitch, + const size_t * elem_number, + size_t elem_size) +{ + if (d == 1) + if (cumul_dst_pitch[0] == elem_size + && cumul_src_pitch[target_dims[0]] == elem_size) + memcpy(dst, src, + elem_number[target_dims[0]] * elem_size); + else + for (size_t i = 0; i < elem_number[target_dims[0]]; + i += 1) + memcpy((void *)((intptr_t) dst + + i * cumul_dst_pitch[0]), + (void *)((intptr_t) src + + i * + cumul_src_pitch[target_dims + [0]]), + elem_size); + else + for (size_t i = 0; i < elem_number[target_dims[d - 1]]; i += 1) { + aml_copy_shnd_helper(d - 1, target_dims, dst, + cumul_dst_pitch, src, + cumul_src_pitch, elem_number, + elem_size); + dst = (void *)((intptr_t) dst + cumul_dst_pitch[d - 1]); + src = + (void *)((intptr_t) src + + cumul_src_pitch[target_dims[d - 1]]); + } +} + +int aml_copy_shnd_c(size_t d, const size_t * target_dims, void *dst, + const size_t * cumul_dst_pitch, const void *src, + const size_t * cumul_src_pitch, const size_t * elem_number, + size_t elem_size) +{ + assert(d > 0); + size_t present_dims; + present_dims = 0; + for (size_t i = 0; i < d; i += 1) { + assert(target_dims[i] < d); + present_dims |= 1 << target_dims[i]; + } + for (size_t i = 0; i < d; i += 1) + assert(present_dims & 1 << i); + for (size_t i = 0; i < d - 1; i += 1) { + assert(cumul_dst_pitch[i + 1] >= + cumul_dst_pitch[i] * elem_number[target_dims[i]]); + assert(cumul_src_pitch[i + 1] >= + cumul_src_pitch[i] * elem_number[i]); + } + aml_copy_shnd_helper(d, target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + return 0; +} + +int aml_copy_shnd(size_t d, const size_t * target_dims, void *dst, + const size_t * dst_pitch, const void *src, + const size_t * src_pitch, const size_t * elem_number, + size_t elem_size) +{ + assert(d > 0); + size_t *cumul_dst_pitch; + size_t *cumul_src_pitch; + cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); + aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, elem_size); + aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + return 0; +} + +static inline void aml_copy_shndstr_helper(size_t d, const size_t * target_dims, + void *dst, + const size_t * cumul_dst_pitch, + const size_t * dst_stride, + const void *src, + const size_t * cumul_src_pitch, + const size_t * src_stride, + const size_t * elem_number, + size_t elem_size) +{ + if (d == 1) + if (dst_stride[0] * cumul_dst_pitch[0] == elem_size + && src_stride[target_dims[0]] * + cumul_src_pitch[target_dims[0]] == elem_size) + memcpy(dst, src, + elem_number[target_dims[0]] * elem_size); + else + for (size_t i = 0; i < elem_number[target_dims[0]]; + i += 1) + memcpy((void *)((intptr_t) dst + + i * (dst_stride[0] * + cumul_dst_pitch[0])), + (void *)((intptr_t) src + + i * + (src_stride[target_dims[0]] * + cumul_src_pitch[target_dims + [0]])), + elem_size); + else + for (size_t i = 0; i < elem_number[target_dims[d - 1]]; i += 1) { + aml_copy_shndstr_helper(d - 1, target_dims, dst, + cumul_dst_pitch, dst_stride, + src, cumul_src_pitch, + src_stride, elem_number, + elem_size); + dst = + (void *)((intptr_t) dst + + dst_stride[d - 1] * cumul_dst_pitch[d - + 1]); + src = + (void *)((intptr_t) src + + src_stride[target_dims[d - 1]] * + cumul_src_pitch[target_dims[d - 1]]); + } +} + +int aml_copy_shndstr_c(size_t d, const size_t * target_dims, void *dst, + const size_t * cumul_dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * cumul_src_pitch, + const size_t * src_stride, const size_t * elem_number, + size_t elem_size) +{ + assert(d > 0); + size_t present_dims; + present_dims = 0; + for (size_t i = 0; i < d; i += 1) { + assert(target_dims[i] < d); + present_dims |= 1 << target_dims[i]; + } + for (size_t i = 0; i < d; i += 1) + assert(present_dims & 1 << i); + for (size_t i = 0; i < d - 1; i += 1) { + assert(cumul_dst_pitch[i + 1] >= + dst_stride[i] * cumul_dst_pitch[i] * + elem_number[target_dims[i]]); + assert(cumul_src_pitch[i + 1] >= + src_stride[i] * cumul_src_pitch[i] * elem_number[i]); + } + aml_copy_shndstr_helper(d, target_dims, dst, cumul_dst_pitch, + dst_stride, src, cumul_src_pitch, src_stride, + elem_number, elem_size); + return 0; +} + +int aml_copy_shndstr(size_t d, const size_t * target_dims, void *dst, + const size_t * dst_pitch, const size_t * dst_stride, + const void *src, const size_t * src_pitch, + const size_t * src_stride, const size_t * elem_number, + size_t elem_size) +{ + assert(d > 0); + size_t *cumul_dst_pitch; + size_t *cumul_src_pitch; + cumul_dst_pitch = (size_t *) alloca(d * sizeof(size_t)); + cumul_src_pitch = (size_t *) alloca(d * sizeof(size_t)); + aml_compute_cumulative_pitch(d, cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, elem_size); + aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, + src, cumul_src_pitch, src_stride, elem_number, + elem_size); + return 0; +} + +int aml_copy_tnd(size_t d, void *dst, const size_t * dst_pitch, const void *src, + const size_t * src_pitch, const size_t * elem_number, + size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[d - 1] = 0; + for (size_t i = 0; i < d - 1; i += 1) + target_dims[i] = i + 1; + aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, + elem_number, elem_size); + return 0; +} + +int aml_copy_tnd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const void *src, const size_t * cumul_src_pitch, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[d - 1] = 0; + for (size_t i = 0; i < d - 1; i += 1) + target_dims[i] = i + 1; + aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + return 0; +} + +int aml_copy_rtnd(size_t d, void *dst, const size_t * dst_pitch, + const void *src, const size_t * src_pitch, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[0] = d - 1; + for (size_t i = 1; i < d; i += 1) + target_dims[i] = i - 1; + aml_copy_shnd(d, target_dims, dst, dst_pitch, src, src_pitch, + elem_number, elem_size); + return 0; +} + +int aml_copy_rtnd_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const void *src, const size_t * cumul_src_pitch, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[0] = d - 1; + for (size_t i = 1; i < d; i += 1) + target_dims[i] = i - 1; + aml_copy_shnd_c(d, target_dims, dst, cumul_dst_pitch, src, + cumul_src_pitch, elem_number, elem_size); + return 0; +} + +int aml_copy_tndstr(size_t d, void *dst, const size_t * dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * src_pitch, const size_t * src_stride, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[d - 1] = 0; + for (size_t i = 0; i < d - 1; i += 1) + target_dims[i] = i + 1; + aml_copy_shndstr(d, target_dims, dst, dst_pitch, dst_stride, src, + src_pitch, src_stride, elem_number, elem_size); + return 0; +} + +int aml_copy_tndstr_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * cumul_src_pitch, const size_t * src_stride, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[d - 1] = 0; + for (size_t i = 0; i < d - 1; i += 1) + target_dims[i] = i + 1; + aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, + src, cumul_src_pitch, src_stride, elem_number, + elem_size); + return 0; +} + +int aml_copy_rtndstr(size_t d, void *dst, const size_t * dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * src_pitch, const size_t * src_stride, + const size_t * elem_number, size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[0] = d - 1; + for (size_t i = 1; i < d; i += 1) + target_dims[i] = i - 1; + aml_copy_shndstr(d, target_dims, dst, dst_pitch, dst_stride, src, + src_pitch, src_stride, elem_number, elem_size); + return 0; +} + +int aml_copy_rtndstr_c(size_t d, void *dst, const size_t * cumul_dst_pitch, + const size_t * dst_stride, const void *src, + const size_t * cumul_src_pitch, + const size_t * src_stride, const size_t * elem_number, + size_t elem_size) +{ + assert(d > 0); + size_t *target_dims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[0] = d - 1; + for (size_t i = 1; i < d; i += 1) + target_dims[i] = i - 1; + aml_copy_shndstr_c(d, target_dims, dst, cumul_dst_pitch, dst_stride, + src, cumul_src_pitch, src_stride, elem_number, + elem_size); + return 0; +} + +int aml_copy_layout_native(struct aml_layout *dst, const struct aml_layout *src) +{ + size_t d; + size_t elem_size; + struct aml_layout_data_native *ddst; + struct aml_layout_data_native *dsrc; + ddst = (struct aml_layout_data_native *)dst->data; + dsrc = (struct aml_layout_data_native *)src->data; + d = dsrc->ndims; + assert(d > 0); + elem_size = dsrc->cpitch[0]; + assert(d == ddst->ndims); + assert(elem_size == ddst->cpitch[0]); + for (size_t i = 0; i < d; i += 1) + assert(dsrc->dims[i] == ddst->dims[i]); + return aml_copy_ndstr_c(d, ddst->ptr, ddst->cpitch, ddst->stride, + dsrc->ptr, dsrc->cpitch, dsrc->stride, + dsrc->dims, elem_size); +} + +int aml_copy_layout_transform_native(struct aml_layout *dst, + const struct aml_layout *src, + const size_t * target_dims) +{ + size_t d; + size_t elem_size; + struct aml_layout_data_native *ddst; + struct aml_layout_data_native *dsrc; + ddst = (struct aml_layout_data_native *)dst->data; + dsrc = (struct aml_layout_data_native *)src->data; + d = dsrc->ndims; + assert(d > 0); + elem_size = dsrc->cpitch[0]; + assert(d == ddst->ndims); + assert(elem_size == ddst->cpitch[0]); + for (size_t i = 0; i < d; i += 1) + assert(dsrc->dims[target_dims[i]] == ddst->dims[i]); + return aml_copy_shndstr_c(d, target_dims, ddst->ptr, ddst->cpitch, + ddst->stride, dsrc->ptr, dsrc->cpitch, + dsrc->stride, dsrc->dims, elem_size); +} + +int aml_copy_layout_transpose_native(struct aml_layout *dst, + const struct aml_layout *src) +{ + size_t d; + size_t *target_dims; + struct aml_layout_data_native *dsrc; + dsrc = (struct aml_layout_data_native *)src->data; + d = dsrc->ndims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[d - 1] = 0; + for (size_t i = 0; i < d - 1; i += 1) + target_dims[i] = i + 1; + return aml_copy_layout_transform_native(dst, src, target_dims); +} + +int aml_copy_layout_reverse_transpose_native(struct aml_layout *dst, + const struct aml_layout *src) +{ + size_t d; + size_t *target_dims; + struct aml_layout_data_native *dsrc; + dsrc = (struct aml_layout_data_native *)src->data; + d = dsrc->ndims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[0] = d - 1; + for (size_t i = 1; i < d; i += 1) + target_dims[i] = i - 1; + return aml_copy_layout_transform_native(dst, src, target_dims); +} + +static inline void aml_copy_layout_generic_helper(size_t d, + struct aml_layout *dst, + const struct aml_layout *src, + const size_t * elem_number, + size_t elem_size, + size_t * coords) +{ + if (d == 1) + for (size_t i = 0; i < elem_number[0]; i += 1) { + coords[0] = i; + coords[0] = i; + memcpy(aml_layout_aderef_column(dst, coords), + aml_layout_aderef_column(src, coords), + elem_size); + } else + for (size_t i = 0; i < elem_number[d - 1]; i += 1) { + coords[d - 1] = i; + coords[d - 1] = i; + aml_copy_layout_generic_helper(d - 1, dst, src, + elem_number, elem_size, + coords); + } +} + +static inline void aml_copy_layout_transform_generic_helper(size_t d, + struct aml_layout + *dst, + const struct + aml_layout *src, + const size_t * + elem_number, + size_t elem_size, + size_t * coords, + size_t * coords_out, + const size_t * + target_dims) +{ + if (d == 1) + for (size_t i = 0; i < elem_number[target_dims[0]]; i += 1) { + coords_out[0] = i; + coords[target_dims[0]] = i; + memcpy(aml_layout_aderef_column(dst, coords_out), + aml_layout_aderef_column(src, coords), + elem_size); + } else + for (size_t i = 0; i < elem_number[target_dims[d - 1]]; i += 1) { + coords_out[d - 1] = i; + coords[target_dims[d - 1]] = i; + aml_copy_layout_transform_generic_helper(d - 1, dst, + src, + elem_number, + elem_size, + coords, + coords_out, + target_dims); + } +} + +int aml_copy_layout_generic(struct aml_layout *dst, + const struct aml_layout *src) +{ + size_t d; + size_t elem_size; + size_t *coords; + size_t *elem_number; + size_t *elem_number2; + assert(aml_layout_ndims(dst) == aml_layout_ndims(src)); + d = aml_layout_ndims(dst); + assert(aml_layout_element_size(dst) == aml_layout_element_size(src)); + elem_size = aml_layout_element_size(dst); + coords = (size_t *) alloca(d * sizeof(size_t)); + elem_number = (size_t *) alloca(d * sizeof(size_t)); + elem_number2 = (size_t *) alloca(d * sizeof(size_t)); + aml_layout_adims_column(src, elem_number); + aml_layout_adims_column(dst, elem_number2); + for (size_t i = 0; i < d; i += 1) + assert(elem_number[i] == elem_number2[i]); + aml_copy_layout_generic_helper(d, dst, src, elem_number, elem_size, + coords); + return 0; +} + +int aml_copy_layout_transform_generic(struct aml_layout *dst, + const struct aml_layout *src, + const size_t * target_dims) +{ + size_t d; + size_t elem_size; + size_t *coords; + size_t *coords_out; + size_t *elem_number; + size_t *elem_number2; + assert(aml_layout_ndims(dst) == aml_layout_ndims(src)); + d = aml_layout_ndims(dst); + assert(aml_layout_element_size(dst) == aml_layout_element_size(src)); + elem_size = aml_layout_element_size(dst); + coords = (size_t *) alloca(d * sizeof(size_t)); + coords_out = (size_t *) alloca(d * sizeof(size_t)); + elem_number = (size_t *) alloca(d * sizeof(size_t)); + elem_number2 = (size_t *) alloca(d * sizeof(size_t)); + aml_layout_adims_column(src, elem_number); + aml_layout_adims_column(dst, elem_number2); + for (size_t i = 0; i < d; i += 1) + assert(elem_number[target_dims[i]] == elem_number2[i]); + aml_copy_layout_transform_generic_helper(d, dst, src, elem_number, + elem_size, coords, coords_out, + target_dims); + return 0; +} + +int aml_copy_layout_transpose_generic(struct aml_layout *dst, + const struct aml_layout *src) +{ + size_t d; + size_t *target_dims; + struct aml_layout_data_native *dsrc; + dsrc = (struct aml_layout_data_native *)src->data; + d = dsrc->ndims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[d - 1] = 0; + for (size_t i = 0; i < d - 1; i += 1) + target_dims[i] = i + 1; + return aml_copy_layout_transform_generic(dst, src, target_dims); +} + +int aml_copy_layout_reverse_transpose_generic(struct aml_layout *dst, + const struct aml_layout *src) +{ + size_t d; + size_t *target_dims; + struct aml_layout_data_native *dsrc; + dsrc = (struct aml_layout_data_native *)src->data; + d = dsrc->ndims; + target_dims = (size_t *) alloca(d * sizeof(size_t)); + target_dims[0] = d - 1; + for (size_t i = 1; i < d; i += 1) + target_dims[i] = i - 1; + return aml_copy_layout_transform_generic(dst, src, target_dims); +} diff --git a/src/copy.rb b/src/copy.rb new file mode 100644 index 00000000..e276ed54 --- /dev/null +++ b/src/copy.rb @@ -0,0 +1,576 @@ +stdin1, stdout0 = IO.pipe +stdin2, stdout1 = IO.pipe + +pid1 = Process.fork { + stdout0.close + stdin2.close + require 'cast' + + parser = C::Parser::new + parser.type_names << '__builtin_va_list' + cpp = C::Preprocessor::new + cpp.macros['__attribute__(a)'] = '' + cpp.macros['__restrict'] = 'restrict' + cpp.macros['__extension__'] = '' + cpp.macros['__asm__(a)'] = '' + cpp.include_path << './' + + + + preprocessed_sources = cpp.preprocess(< +#include +#include +#include +#include +#include +EOF + + parser.parse(preprocessed_sources) + + ast = parser.parse(stdin1.read) + stdin1.close + + ast.postorder { |n| + n.stmt = n.stmt.stmts.first if n.For? && n.stmt.Block? && n.stmt.stmts.size == 1 + n.then = n.then.stmts.first if n.If? && n.then.Block? && n.then.stmts.size == 1 + n.else = n.else.stmts.first if n.If? && n.else && n.else.Block? && n.else.stmts.size == 1 + } + + stdout1.puts < +#include +#include +#include +#include + +EOF + stdout1.puts ast + stdout1.close +} + +pid2 = Process.fork { + stdin1.close + stdout0.close + stdout1.close + require 'open3' + Open3.popen3('indent -nbad -bap -nbc -bbo -hnl -br -brs -c33 -cd33 -ncdb -ce -ci4 -cli0 -d0 -di1 -nfc1 -i8 -ip0 -l80 -lp -npcs -nprs -npsl -sai -saf -saw -ncs -nsc -sob -nfca -cp33 -ss -ts8 -il1') do |i, o, t| + i.write stdin2.read + stdin2.close + i.close + puts o.read + end +} + +stdin1.close +stdout1.close +stdin2.close + +require 'BOAST' +include BOAST + +set_array_start(0) +set_lang(C) +set_default_int_size(nil) +set_output(stdout0) + +register_funccall( :alloca ) +register_funccall( :memcpy ) +register_funccall( :assert ) +register_funccall( :sizeof ) + +def name_prefix + "aml_copy_" +end + +def name(suffix = nil, stride: false, shuffle: false) + name = name_prefix + name << "sh" if shuffle + name << "nd" + name << "str" if stride + name << "_#{suffix}" if suffix + name +end + +def transpose_name(reverse: false, stride: false, cumulative: false) + name = name_prefix + name << "r" if reverse + name << "tnd" + name << "str" if stride + name << "_c" if cumulative + name +end + +def aml_compute_cumulative_pitch + d = Sizet :d + cumul_dst_pitch = Sizet :cumul_dst_pitch, dim: Dim(d), dir: :out + cumul_src_pitch = Sizet :cumul_src_pitch, dim: Dim(d), dir: :out + dst_pitch = Sizet :dst_pitch, dim: Dim(d), dir: :in + src_pitch = Sizet :src_pitch, dim: Dim(d), dir: :in + elem_size = Sizet :elem_size + i = Sizet :i + p = Procedure( :aml_compute_cumulative_pitch, + [ d, + cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, + elem_size ], + local: true, + inline: true ) { + pr cumul_dst_pitch[0] === elem_size; + pr cumul_src_pitch[0] === elem_size; + + pr For(i, 0, d - 1, operator: '<', declit: true) { + pr cumul_dst_pitch[i + 1] === dst_pitch[i] * cumul_dst_pitch[i] + pr cumul_src_pitch[i + 1] === src_pitch[i] * cumul_src_pitch[i] + } + } +end + +def aml_copy_nd_helper(stride: false, shuffle: false) + d = Sizet :d + target_dims = Sizet :target_dims, dim: Dim(), dir: :in + dst = Pointer :dst, dir: :out + cumul_dst_pitch = Sizet :cumul_dst_pitch, dim: Dim(), dir: :in + dst_stride = Sizet :dst_stride, dim: Dim(), dir: :in + src = Pointer :src, dir: :in + cumul_src_pitch = Sizet :cumul_src_pitch, dim: Dim(), dir: :in + src_stride = Sizet :src_stride, dim: Dim(), dir: :in + elem_number = Sizet :elem_number, dim: Dim(), dir: :in + elem_size = Sizet :elem_size + i = Sizet :i + + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ dst, cumul_dst_pitch ] + args += [ dst_stride ] if stride + args += [ src, cumul_src_pitch ] + args += [ src_stride ] if stride + args += [ elem_number, elem_size ] + + effective_dst_pitch = lambda { |d| cumul_dst_pitch[d] } + effective_src_pitch = lambda { |d| cumul_src_pitch[d] } + if stride + tmp_dst = effective_dst_pitch + effective_dst_pitch = lambda { |d| dst_stride[d] * tmp_dst[d] } + tmp_src = effective_src_pitch + effective_src_pitch = lambda { |d| src_stride[d] * tmp_src[d] } + end + + src_index = lambda { |d| d } + dst_index = lambda { |d| d } + elem_index = lambda { |d| d } + if shuffle + elem_index = lambda { |d| target_dims[d] } + src_index = lambda { |d| target_dims[d] } + end + + name = name(:helper, stride: stride, shuffle: shuffle) + + p = Procedure( name, + args, + local: true, + inline: true ) { + pr If( d == 1 => lambda { + pr If( And(effective_dst_pitch[dst_index[0]] == elem_size, + effective_src_pitch[src_index[0]] == elem_size) => lambda { + pr memcpy(dst, src, elem_number[elem_index[0]] * elem_size) + }, else: lambda { + pr For( i, 0, elem_number[elem_index[0]], operator: '<', declit: true ) { + pr memcpy( (dst.cast(Intptrt) + i * effective_dst_pitch[dst_index[0]]).cast(dst), + (src.cast(Intptrt) + i * effective_src_pitch[src_index[0]]).cast(src), + elem_size) + } + }) + }, else: lambda { + pr For( i, 0, elem_number[elem_index[d - 1]], operator: '<', declit: true ) { + args[0] = d - 1 + pr p.call(*args) + pr dst === (dst.cast(Intptrt) + effective_dst_pitch[dst_index[d - 1]]).cast(dst) + pr src === (src.cast(Intptrt) + effective_src_pitch[src_index[d - 1]]).cast(src) + } + }) + + } +end + +def aml_copy_nd_c(stride: false, shuffle: false) + d = Sizet :d + target_dims = Sizet :target_dims, dim: Dim(), dir: :in + dst = Pointer :dst, dir: :out + cumul_dst_pitch = Sizet :cumul_dst_pitch, dim: Dim(d), dir: :in + dst_stride = Sizet :dst_stride, dim: Dim(d), dir: :in + src = Pointer :src, dir: :in + cumul_src_pitch = Sizet :cumul_src_pitch, dim: Dim(d), dir: :in + src_stride = Sizet :src_stride, dim: Dim(d), dir: :in + elem_number = Sizet :elem_number, dim: Dim(d), dir: :in + elem_size = Sizet :elem_size + i = Sizet :i + present_dims = Sizet :present_dims + + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ dst, cumul_dst_pitch ] + args += [ dst_stride ] if stride + args += [ src, cumul_src_pitch] + args += [ src_stride ] if stride + args += [ elem_number, elem_size] + + effective_dst_pitch = lambda { |d| cumul_dst_pitch[d] } + effective_src_pitch = lambda { |d| cumul_src_pitch[d] } + if stride + tmp_dst = effective_dst_pitch + effective_dst_pitch = lambda { |d| dst_stride[d] * tmp_dst[d] } + tmp_src = effective_src_pitch + effective_src_pitch = lambda { |d| src_stride[d] * tmp_src[d] } + end + + elem_index = lambda { |d| d } + if shuffle + elem_index = lambda { |d| target_dims[d] } + end + + name = name(:c, stride: stride, shuffle: shuffle) + + p = Procedure( name, + args, + return_type: Int ) { + pr assert(d > 0) + if shuffle + decl present_dims + pr present_dims === 0 + pr For(i, 0, d, operator: '<', declit: true ) { + pr assert(target_dims[i] < d) + get_output.puts "#{present_dims} |= 1 << #{target_dims[i]};" + } + pr For(i, 0, d, operator: '<', declit: true ) { + pr assert("#{present_dims} & (1 << #{i})") + } + end + pr For(i, 0, d - 1, operator: '<', declit: true ) { + pr assert(cumul_dst_pitch[i + 1] >= effective_dst_pitch[i] * elem_number[elem_index[i]]); + pr assert(cumul_src_pitch[i + 1] >= effective_src_pitch[i] * elem_number[i]); + } + pr aml_copy_nd_helper(stride: stride, shuffle: shuffle).call( *args ) + pr Return(0) + } +end + +def aml_copy_nd(stride: false, shuffle: false) + d = Sizet :d + target_dims = Sizet :target_dims, dim: Dim(d), dir: :in + dst = Pointer :dst, dir: :out + dst_pitch = Sizet :dst_pitch, dim: Dim(d), dir: :in + dst_stride = Sizet :dst_stride, dim: Dim(d), dir: :in + src = Pointer :src, dir: :in + src_pitch = Sizet :src_pitch, dim: Dim(d), dir: :in + src_stride = Sizet :src_stride, dim: Dim(d), dir: :in + elem_number = Sizet :elem_number, dim: Dim(d), dir: :in + elem_size = Sizet :elem_size + cumul_dst_pitch = Pointer :cumul_dst_pitch, type: Sizet + cumul_src_pitch = Pointer :cumul_src_pitch, type: Sizet + + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ dst, dst_pitch ] + args += [ dst_stride ] if stride + args += [ src, src_pitch] + args += [ src_stride ] if stride + args += [ elem_number, elem_size] + + name = name(stride: stride, shuffle: shuffle) + + p = Procedure( name, + args, + return_type: Int ) { + pr assert(d > 0); + decl cumul_dst_pitch, cumul_src_pitch + pr cumul_dst_pitch === alloca(d * sizeof("size_t")).cast(cumul_dst_pitch) + pr cumul_src_pitch === alloca(d * sizeof("size_t")).cast(cumul_src_pitch) + pr $aml_compute_cumulative_pitch.call(d, cumul_dst_pitch, cumul_src_pitch, + dst_pitch, src_pitch, elem_size); + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ dst, cumul_dst_pitch ] + args += [ dst_stride ] if stride + args += [ src, cumul_src_pitch] + args += [ src_stride ] if stride + args += [ elem_number, elem_size] + + pr aml_copy_nd_c(stride: stride, shuffle: shuffle).call( *args ) + pr Return(0) + } +end + +def aml_copy_tnd(reverse: false, stride: false, cumulative: false) + d = Sizet :d + dst = Pointer :dst, dir: :out + dst_pitch = Sizet :dst_pitch, dim: Dim(d), dir: :in + cumul_dst_pitch = Sizet :cumul_dst_pitch, dim: Dim(d), dir: :in + dst_stride = Sizet :dst_stride, dim: Dim(d), dir: :in + src = Pointer :src, dir: :in + src_pitch = Sizet :src_pitch, dim: Dim(d), dir: :in + src_stride = Sizet :src_stride, dim: Dim(d), dir: :in + cumul_src_pitch = Sizet :cumul_src_pitch, dim: Dim(d), dir: :in + elem_number = Sizet :elem_number, dim: Dim(d), dir: :in + elem_size = Sizet :elem_size + + args = [] + args += [ d, dst ] + args += cumulative ? [ cumul_dst_pitch ] : [ dst_pitch ] + args += [ dst_stride ] if stride + args += [ src ] + args += cumulative ? [ cumul_src_pitch ] : [ src_pitch ] + args += [ src_stride ] if stride + args += [ elem_number, elem_size] + + target_dims = Sizet :target_dims, dim: Dim(d) + i = Sizet :i + + name = transpose_name(reverse: reverse, stride: stride, cumulative: cumulative) + + p = Procedure( name, + args, + return_type: Int ) { + pr assert(d > 0); + decl target_dims + pr target_dims === alloca(d * sizeof("size_t")).cast(target_dims) + if reverse + pr target_dims[0] === d - 1 + pr For(i, 1, d, operator: '<', declit: true) { + pr target_dims[i] === i - 1 + } + else + pr target_dims[d - 1] === 0 + pr For(i, 0, d - 1, operator: '<', declit: true) { + pr target_dims[i] === i + 1 + } + end + + args.insert(1, target_dims) + + if cumulative + pr aml_copy_nd_c(stride: stride, shuffle: true).call(*args) + else + pr aml_copy_nd(stride: stride, shuffle: true).call(*args) + end + pr Return(0) + } +end + +def aml_copy_layout_generic_helper(shuffle: false) + d = Sizet :d + dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout + src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in + elem_number = Sizet :elem_number, dim: Dim(), dir: :in + elem_size = Sizet :elem_size + coords = Sizet :coords, dim: Dim(), dir: :inout + coords_out = Sizet :coords_out, dim: Dim(), dir: :inout + target_dims = Sizet :target_dims, dim: Dim(), dir: :in + + i = Sizet :i + + name = name_prefix + "layout_" + name << "transform_" if shuffle + name << "generic_helper" + + args = [d, dst, src, elem_number, elem_size, coords] + args << coords_out << target_dims if shuffle + + src_index = lambda { |d| d } + dst_index = lambda { |d| d } + elem_index = lambda { |d| d } + if shuffle + elem_index = lambda { |d| target_dims[d] } + src_index = lambda { |d| target_dims[d] } + end + + coord_src = coords + coord_dst = coords + if shuffle + coord_dst = coords_out + end + + p = Procedure( name, args, local: true, inline: true ) { + pr If( d == 1 => lambda { + pr For( i, 0, elem_number[elem_index[0]], operator: '<', declit: true ) { + pr coord_dst[dst_index[0]] === i + pr coord_src[src_index[0]] === i + pr memcpy( FuncCall(:aml_layout_aderef_column, dst, coord_dst), FuncCall(:aml_layout_aderef_column, src, coord_src), elem_size ) + } + }, else: lambda { + pr For( i, 0, elem_number[elem_index[d - 1]], operator: '<', declit: true ) { + args[0] = d - 1 + pr coord_dst[dst_index[d - 1]] === i + pr coord_src[src_index[d - 1]] === i + pr p.call(*args) + } + }) + } +end + +def aml_copy_layout(native: true, shuffle: false) + dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout + src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in + target_dims = Sizet :target_dims, dim: Dim(), dir: :in + + ddst = Pointer :ddst, type: CStruct::new(type_name: :aml_layout_data_native, members: {}) + dsrc = Pointer :dsrc, type: CStruct::new(type_name: :aml_layout_data_native, members: {}) + d = Sizet :d + elem_size = Sizet :elem_size + i = Sizet :i + + src_index = lambda { |d| d } + dst_index = lambda { |d| d } + if shuffle + src_index = lambda { |d| target_dims[d] } + end + + name = name_prefix + "layout_" + name << "transform_" if shuffle + name << (native ? "native" : "generic") + + args = [dst, src] + args << target_dims if shuffle + + p = Procedure( name, args, return_type: Int ) { + decl d, elem_size + + if native + decl ddst, dsrc + + pr ddst === "(struct aml_layout_data_native *)#{dst}->data" + pr dsrc === "(struct aml_layout_data_native *)#{src}->data" + pr d === "#{dsrc}->ndims" + pr assert(d > 0); + + pr elem_size === "#{dsrc}->cpitch[0]" + pr assert(d == "#{ddst}->ndims") + pr assert(elem_size == "#{ddst}->cpitch[0]") + pr For(i, 0, d, operator: '<', declit: true) { + pr assert( "#{dsrc}->dims[#{src_index[i]}] == #{ddst}->dims[#{dst_index[i]}]" ) + } + + args = [] + args += [ d ] + args += [ target_dims ] if shuffle + args += [ "#{ddst}->ptr", "#{ddst}->cpitch", "#{ddst}->stride", + "#{dsrc}->ptr", "#{dsrc}->cpitch", "#{dsrc}->stride", + "#{dsrc}->dims", elem_size ] + pr Return(aml_copy_nd_c(stride: true, shuffle: shuffle).call(*args)) + else + coords = Sizet :coords, dim: Dim() + coords_out = Sizet :coords_out, dim: Dim() + elem_number = Sizet :elem_number, dim: Dim() + elem_number2 = Sizet :elem_number2, dim: Dim() + decl coords + decl coords_out if shuffle + decl elem_number + decl elem_number2 + + pr assert( FuncCall( :aml_layout_ndims, dst ) == FuncCall( :aml_layout_ndims, src ) ) + pr d === FuncCall( :aml_layout_ndims, dst ) + pr assert( FuncCall( :aml_layout_element_size, dst ) == FuncCall( :aml_layout_element_size, src ) ) + pr elem_size === FuncCall( :aml_layout_element_size, dst ) + pr coords === alloca(d * sizeof("size_t")).cast(coords) + pr coords_out === alloca(d * sizeof("size_t")).cast(coords_out) if shuffle + pr elem_number === alloca(d * sizeof("size_t")).cast(elem_number) + pr elem_number2 === alloca(d * sizeof("size_t")).cast(elem_number2) + pr FuncCall( :aml_layout_adims_column, src, elem_number ) + pr FuncCall( :aml_layout_adims_column, dst, elem_number2 ) + pr For(i, 0, d, operator: '<', declit: true) { + pr assert( "#{elem_number}[#{src_index[i]}] == #{elem_number2}[#{dst_index[i]}]" ) + } + + new_args = [d, dst, src, elem_number, elem_size, coords] + new_args << coords_out << target_dims if shuffle + + pr aml_copy_layout_generic_helper(shuffle: shuffle).call(*new_args) + pr Return(0) + end + } +end + +def aml_copy_layout_transpose(native: true, reverse: false) + dst = Pointer :dst, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :inout + src = Pointer :src, type: CStruct::new(type_name: :aml_layout, members: {}), dir: :in + + dsrc = Pointer :dsrc, type: CStruct::new(type_name: :aml_layout_data_native, members: {}) + target_dims = Sizet :target_dims, dim: Dim() + d = Sizet :d + i = Sizet :i + + name = name_prefix + "layout_" + name << "reverse_" if reverse + name << "transpose_" + name << (native ? "native" : "generic") + p = Procedure( name, [ dst, src ], return_type: Int ) { + decl d + decl target_dims + decl dsrc + + pr dsrc === "(struct aml_layout_data_native *)#{src}->data" + pr d === "#{dsrc}->ndims" + pr target_dims === alloca(d * sizeof("size_t")).cast(target_dims) + if reverse + pr target_dims[0] === d - 1 + pr For(i, 1, d, operator: '<', declit: true) { + pr target_dims[i] === i - 1 + } + else + pr target_dims[d - 1] === 0 + pr For(i, 0, d - 1, operator: '<', declit: true) { + pr target_dims[i] === i + 1 + } + end + pr Return( aml_copy_layout(native: native, shuffle: true).call( dst, src, target_dims) ) + } +end + +pr $aml_compute_cumulative_pitch = aml_compute_cumulative_pitch + +generation_space = BruteForceOptimizer::new( + OptimizationSpace::new( + shuffle: [false, true], + stride: [false, true] + ) +) + +transpose_generation_space = BruteForceOptimizer::new( + OptimizationSpace::new( + stride: [false, true], + reverse: [false, true], + cumulative: [false, true] + ) +) + +generation_space.each { |params| + pr aml_copy_nd_helper(**params) + pr aml_copy_nd_c(**params) + pr aml_copy_nd(**params) +} + +transpose_generation_space.each { |params| + pr aml_copy_tnd(**params) +} + +pr aml_copy_layout +pr aml_copy_layout(shuffle: true) +pr aml_copy_layout_transpose +pr aml_copy_layout_transpose(reverse: true) + +pr aml_copy_layout_generic_helper(shuffle: false) +pr aml_copy_layout_generic_helper(shuffle: true) +pr aml_copy_layout(native: false) +pr aml_copy_layout(native: false, shuffle: true) +pr aml_copy_layout_transpose(native: false) +pr aml_copy_layout_transpose(native: false, reverse: true) + +stdout0.close + +Process.wait(pid1) +Process.wait(pid2) + diff --git a/src/dma_layout.c b/src/dma_layout.c new file mode 100644 index 00000000..609cab96 --- /dev/null +++ b/src/dma_layout.c @@ -0,0 +1,160 @@ +#include +#include +#include +#include + +/******************************************************************************* + * Requests: + ******************************************************************************/ + +int aml_dma_request_layout_init(struct aml_dma_request_layout *req, + struct aml_layout *dl, + struct aml_layout *sl) +{ + assert(req != NULL); + req->type = AML_DMA_REQUEST_TYPE_COPY; + /* figure out pointers */ + req->dest = dl; + req->src = sl; + return 0; +} + +int aml_dma_request_layout_destroy(struct aml_dma_request_layout *r) +{ + assert(r != NULL); + return 0; +} + +/******************************************************************************* + * Public API + ******************************************************************************/ + +int aml_dma_layout_create_request(struct aml_dma_data *d, + struct aml_dma_request **r, + int type, va_list ap) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_dma_layout *dma = + (struct aml_dma_layout *)d; + + struct aml_dma_request_layout *req; + + pthread_mutex_lock(&dma->lock); + req = aml_vector_add(&dma->requests); + + /* we don't support move at this time */ + assert(type == AML_DMA_REQUEST_TYPE_COPY); + struct aml_layout *dl, *sl; + void *arg; + dl = va_arg(ap, struct aml_layout *); + sl = va_arg(ap, struct aml_layout *); + aml_dma_request_layout_init(req, dl, sl); + + pthread_mutex_unlock(&dma->lock); + *r = (struct aml_dma_request *)req; + return 0; +} + +int aml_dma_layout_destroy_request(struct aml_dma_data *d, + struct aml_dma_request *r) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_dma_layout *dma = + (struct aml_dma_layout *)d; + + struct aml_dma_request_layout *req = + (struct aml_dma_request_layout *)r; + + assert(req->type == AML_DMA_REQUEST_TYPE_COPY); + aml_dma_request_layout_destroy(req); + + /* enough to remove from request vector */ + pthread_mutex_lock(&dma->lock); + aml_vector_remove(&dma->requests, req); + pthread_mutex_unlock(&dma->lock); + return 0; +} + +int aml_dma_layout_wait_request(struct aml_dma_data *d, + struct aml_dma_request *r) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_dma_layout *dma = (struct aml_dma_layout *)d; + struct aml_dma_request_layout *req = + (struct aml_dma_request_layout *)r; + + /* execute */ + assert(req->type == AML_DMA_REQUEST_TYPE_COPY); + dma->do_work(req->dest, req->src, dma->work_arg); + + /* destroy a completed request */ + aml_dma_layout_destroy_request(d, r); + return 0; +} + +struct aml_dma_ops aml_dma_ops_layout = { + aml_dma_layout_create_request, + aml_dma_layout_destroy_request, + aml_dma_layout_wait_request, +}; + +/******************************************************************************* + * Init functions: + ******************************************************************************/ + +int aml_dma_layout_create(struct aml_dma **d, ...) +{ + va_list ap; + struct aml_dma *ret = NULL; + intptr_t baseptr, dataptr; + va_start(ap, d); + + /* alloc */ + baseptr = (intptr_t) calloc(1, AML_DMA_LAYOUT_ALLOCSIZE); + dataptr = baseptr + sizeof(struct aml_dma); + + ret = (struct aml_dma *)baseptr; + ret->data = (struct aml_dma_data *)dataptr; + + aml_dma_layout_vinit(ret, ap); + + va_end(ap); + *d = ret; + return 0; +} +int aml_dma_layout_vinit(struct aml_dma *d, va_list ap) +{ + d->ops = &aml_dma_ops_layout; + struct aml_dma_layout *dma = (struct aml_dma_layout *)d->data; + + /* request vector */ + size_t nbreqs = va_arg(ap, size_t); + dma->do_work = va_arg(ap, aml_dma_operator); + dma->work_arg = va_arg(ap, void *); + aml_vector_init(&dma->requests, nbreqs, + sizeof(struct aml_dma_request_layout), + offsetof(struct aml_dma_request_layout, type), + AML_DMA_REQUEST_TYPE_INVALID); + pthread_mutex_init(&dma->lock, NULL); + return 0; +} +int aml_dma_layout_init(struct aml_dma *d, ...) +{ + int err; + va_list ap; + va_start(ap, d); + err = aml_dma_layout_vinit(d, ap); + va_end(ap); + return err; +} + +int aml_dma_layout_destroy(struct aml_dma *d) +{ + struct aml_dma_layout *dma = (struct aml_dma_layout *)d->data; + aml_vector_destroy(&dma->requests); + pthread_mutex_destroy(&dma->lock); + return 0; +} diff --git a/src/layout.c b/src/layout.c new file mode 100644 index 00000000..0d550fa2 --- /dev/null +++ b/src/layout.c @@ -0,0 +1,138 @@ +#include + +/******************************************************************************* + * General API: common operators: + ******************************************************************************/ + +void *aml_layout_deref(const struct aml_layout *layout, ...) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + va_list ap; + void *ret; + va_start(ap, layout); + ret = layout->ops->deref(layout->data, ap); + va_end(ap); + return ret; +} + +void *aml_layout_aderef(const struct aml_layout *layout, const size_t *coords) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->aderef(layout->data, coords); +} + +void *aml_layout_aderef_column(const struct aml_layout *layout, + const size_t *coords) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->aderef_column(layout->data, coords); +} + +int aml_layout_order(const struct aml_layout *layout) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->order(layout->data); +} + +int aml_layout_dims(const struct aml_layout *layout, ...) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + va_list ap; + int ret; + va_start(ap, layout); + ret = layout->ops->dims(layout->data, ap); + va_end(ap); + return ret; +} + +int aml_layout_adims(const struct aml_layout *layout, size_t *dims) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->adims(layout->data, dims); +} + +int aml_layout_adims_column(const struct aml_layout *layout, size_t *dims) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->adims_column(layout->data, dims); +} + +size_t aml_layout_ndims(const struct aml_layout *layout) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->ndims(layout->data); +} + +size_t aml_layout_element_size(const struct aml_layout *layout) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + return layout->ops->element_size(layout->data); +} + +struct aml_layout * aml_layout_areshape(const struct aml_layout *layout, + size_t ndims, const size_t *dims) +{ + assert(ndims != 0); + assert(layout != NULL); + assert(layout->ops != NULL); + assert(layout->ops->areshape != NULL); + return layout->ops->areshape(layout->data, ndims, dims); +} + +struct aml_layout * aml_layout_reshape(const struct aml_layout *layout, + size_t ndims, ...) +{ + assert(ndims != 0); + assert(layout != NULL); + assert(layout->ops != NULL); + assert(layout->ops->reshape != NULL); + va_list ap; + struct aml_layout *ret; + va_start(ap, ndims); + ret = layout->ops->reshape(layout->data, ndims, ap); + va_end(ap); + return ret; +} + +struct aml_layout * aml_layout_slice(const struct aml_layout *layout, ...) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + assert(layout->ops->slice != NULL); + va_list ap; + struct aml_layout *ret; + va_start(ap, layout); + ret = layout->ops->slice(layout->data, ap); + va_end(ap); + return ret; +} + +struct aml_layout * aml_layout_aslice(const struct aml_layout *layout, + const size_t *offsets, const size_t *dims, + const size_t *strides) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + assert(layout->ops->aslice != NULL); + return layout->ops->aslice(layout->data, offsets, dims, strides); +} + +struct aml_layout * aml_layout_aslice_column(const struct aml_layout *layout, + const size_t *offsets, + const size_t *dims, + const size_t *strides) +{ + assert(layout != NULL); + assert(layout->ops != NULL); + assert(layout->ops->aslice != NULL); + return layout->ops->aslice_column(layout->data, offsets, dims, strides); +} diff --git a/src/layout_dense.c b/src/layout_dense.c new file mode 100644 index 00000000..2c7ea9ee --- /dev/null +++ b/src/layout_dense.c @@ -0,0 +1,638 @@ +#include + +/******************************************************************************* + * Native layout initialization: + ******************************************************************************/ + +int aml_layout_native_struct_init(struct aml_layout *layout, size_t ndims, + void *memory) +{ + struct aml_layout_data_native *dataptr; + + assert(layout == (struct aml_layout *)memory); + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout)); + dataptr = memory; + layout->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout_data_native)); + dataptr->ndims = ndims; + dataptr->dims = (size_t *)memory; + dataptr->stride = dataptr->dims + ndims; + dataptr->pitch = dataptr->stride + ndims; + dataptr->cpitch = dataptr->pitch + ndims; + return 0; +} + +static +int aml_layout_native_ainit_cpitch(struct aml_layout *layout, + uint64_t tags, void *ptr, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *cpitch) +{ + struct aml_layout_data_native *data = + (struct aml_layout_data_native *)layout->data; + layout->tags = tags; + data->ptr = ptr; + memcpy(data->dims, dims, ndims * sizeof(size_t)); + memcpy(data->stride, stride, ndims * sizeof(size_t)); + memset(data->pitch, 0, ndims * sizeof(size_t)); + memcpy(data->cpitch, cpitch, (ndims + 1) * sizeof(size_t)); + return 0; +} + +int aml_layout_native_ainit(struct aml_layout *layout, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *pitch) +{ + assert(layout != NULL); + assert(layout->data != NULL); + struct aml_layout_data_native *data = + (struct aml_layout_data_native *)layout->data; + assert(data->ndims == ndims); + assert(data->dims); + assert(data->stride); + assert(data->pitch); + assert(data->cpitch); + data->ptr = ptr; + int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); + if(type == AML_TYPE_LAYOUT_ROW_ORDER) + { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_ROW_ORDER); + layout->ops = &aml_layout_row_ops; + for(size_t i = 0; i < ndims; i++) + { + data->dims[i] = dims[ndims-i-1]; + data->stride[i] = stride[ndims-i-1]; + data->pitch[i] = pitch[ndims-i-1]; + } + data->cpitch[0] = element_size; + for(size_t i = 1; i <= ndims; i++) + data->cpitch[i] = data->cpitch[i-1]*pitch[ndims-i]; + } + else if(type == AML_TYPE_LAYOUT_COLUMN_ORDER) + { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_COLUMN_ORDER); + layout->ops = &aml_layout_column_ops; + memcpy(data->dims, dims, ndims * sizeof(size_t)); + memcpy(data->stride, stride, ndims * sizeof(size_t)); + memcpy(data->pitch, pitch, ndims * sizeof(size_t)); + data->cpitch[0] = element_size; + for(size_t i = 1; i <= ndims; i++) + data->cpitch[i] = data->cpitch[i-1]*pitch[i-1]; + } + return 0; +} + +int aml_layout_native_vinit(struct aml_layout *p, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, va_list ap) +{ + size_t dims[ndims]; + size_t stride[ndims]; + size_t pitch[ndims-1]; + for(size_t i = 0; i < ndims; i++) + dims[i] = va_arg(ap, size_t); + for(size_t i = 0; i < ndims; i++) + stride[i] = va_arg(ap, size_t); + for(size_t i = 0; i < ndims; i++) + pitch[i] = va_arg(ap, size_t); + return aml_layout_native_ainit(p, tags, ptr, element_size, ndims, dims, + stride, pitch); +} + +int aml_layout_native_init(struct aml_layout *p, uint64_t tags, void *ptr, + const size_t element_size, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_layout_native_vinit(p, tags, ptr, element_size, ndims, ap); + va_end(ap); + return err; +} + +int aml_layout_native_acreate(struct aml_layout **layout, uint64_t tags, + void *ptr, const size_t element_size, + size_t ndims, const size_t *dims, + const size_t *stride, const size_t *pitch) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(*layout, ndims, baseptr); + return aml_layout_native_ainit(*layout, tags, ptr, element_size, ndims, + dims, stride, pitch); +} + +int aml_layout_native_vcreate(struct aml_layout **layout, uint64_t tags, + void *ptr, const size_t element_size, + size_t ndims, va_list ap) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(*layout, ndims, baseptr); + return aml_layout_native_vinit(*layout, tags, ptr, element_size, ndims, + ap); +} + +int aml_layout_native_create(struct aml_layout **layout, uint64_t tags, + void *ptr, const size_t element_size, size_t ndims, + ...) +{ + int err; + va_list ap; + assert(ndims > 0); + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(*layout, ndims, baseptr); + va_start(ap, ndims); + err = aml_layout_native_vinit(*layout, tags, ptr, element_size, ndims, + ap); + va_end(ap); + return err; +} + +/******************************************************************************* + * COLUMN OPERATORS: + ******************************************************************************/ + +void *aml_layout_column_deref(const struct aml_layout_data *data, + va_list coords) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + void *ptr; + assert(d != NULL); + assert(d->ptr != NULL); + ptr = d->ptr; + for(size_t i = 0; i < d->ndims; i++) + { + size_t c = va_arg(coords, size_t); + assert(c < d->dims[i]); + ptr += c*d->cpitch[i]*d->stride[i]; + } + return ptr; +} + +void *aml_layout_column_aderef(const struct aml_layout_data *data, + const size_t *coords) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + void *ptr; + assert(d != NULL); + assert(d->ptr != NULL); + ptr = d->ptr; + for(size_t i = 0; i < d->ndims; i++) + { + assert(coords[i] < d->dims[i]); + ptr += coords[i]*d->cpitch[i]*d->stride[i]; + } + return ptr; +} + +int aml_layout_column_order(const struct aml_layout_data *data) +{ + return AML_TYPE_LAYOUT_COLUMN_ORDER; +} + +int aml_layout_column_dims(const struct aml_layout_data *data, va_list dims) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + return 0; +} + +int aml_layout_column_adims(const struct aml_layout_data *data, size_t *dims) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + assert(d != NULL); + assert(dims != NULL); + memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); + return 0; +} + +size_t aml_layout_column_ndims(const struct aml_layout_data *data) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + return d->ndims; +} + +size_t aml_layout_column_element_size(const struct aml_layout_data *data) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + return d->cpitch[0]; +} + +static void merge_dims(size_t ndims, + const size_t *dims, const size_t *stride, + const size_t *cpitch, size_t *new_ndims, + size_t *new_dims, size_t *new_stride, + size_t *new_cpitch) +{ + size_t dim_index = 0; + size_t new_dim_index = 0; + new_dims[new_dim_index] = dims[dim_index]; + new_cpitch[new_dim_index] = cpitch[dim_index]; + new_stride[new_dim_index] = stride[dim_index]; + for (; dim_index < ndims - 1; dim_index++) { + if (dims[dim_index] * stride[dim_index] * cpitch[dim_index] == + cpitch[dim_index + 1] && stride[dim_index + 1] == 1) { + new_dims[new_dim_index] *= dims[dim_index + 1]; + } else { + new_dim_index++; + new_dims[new_dim_index] = dims[dim_index + 1]; + new_cpitch[new_dim_index] = cpitch[dim_index + 1]; + new_stride[new_dim_index] = stride[dim_index + 1]; + } + } + new_cpitch[new_dim_index + 1] = cpitch[dim_index + 1]; + *new_ndims = new_dim_index + 1; +} + +static void +reshape_dims(const struct aml_layout_data_native *d, size_t ndims, + const size_t *dims, size_t *n_stride, size_t *n_cpitch) +{ + size_t m_ndims; + size_t m_dims[d->ndims]; + size_t m_stride[d->ndims]; + size_t m_cpitch[d->ndims + 1]; + + merge_dims(d->ndims, d->dims, d->stride, d->cpitch, + &m_ndims, m_dims, m_stride, m_cpitch); + + size_t m_dim_index = 0; + + n_cpitch[0] = m_cpitch[m_dim_index]; + for (size_t i = 0; i < ndims; i++) { + if (m_dims[m_dim_index] == dims[i]) { + n_stride[i] = m_stride[m_dim_index]; + n_cpitch[i + 1] = m_cpitch[m_dim_index + 1]; + m_dim_index++; + } else if (m_dims[m_dim_index] % dims[i] == 0) { + m_dims[m_dim_index] /= dims[i]; + n_stride[i] = m_stride[m_dim_index]; + n_cpitch[i + 1] = + n_cpitch[i] * dims[i] * m_stride[m_dim_index]; + m_stride[m_dim_index] = 1; + } else { + assert(0); + } + } +} + +struct aml_layout * +aml_layout_column_areshape(const struct aml_layout_data *data, size_t ndims, + const size_t *dims) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t total_size, new_total_size; + total_size = d->dims[0]; + for (size_t i = 1; i < d->ndims; i++) + total_size *= d->dims[i]; + new_total_size = dims[0]; + for (size_t i = 1; i < ndims; i++) + new_total_size *= dims[i]; + assert(total_size == total_size); + + size_t stride[ndims]; + size_t cpitch[ndims + 1]; + reshape_dims(d, ndims, dims, stride, cpitch); + + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + struct aml_layout *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(layout, ndims, baseptr); + + aml_layout_native_ainit_cpitch(layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + d->ptr, ndims, dims, stride, cpitch); + layout->ops = &aml_layout_column_ops; + + return layout; +} + +struct aml_layout * +aml_layout_column_reshape(const struct aml_layout_data *data, size_t ndims, + va_list dims) +{ + size_t n_dims[ndims]; + for (int i = 0; i < ndims; i++) { + n_dims[i] = va_arg(dims, size_t); + } + return aml_layout_column_areshape(data, ndims, n_dims); +} + +struct aml_layout * +aml_layout_column_aslice(const struct aml_layout_data *data, + const size_t *offsets, const size_t *dims, + const size_t *strides) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t ndims = d->ndims; + for (size_t i = 0; i < ndims; i++) + assert(offsets[i] + (dims[i] - 1) * strides[i] < d->dims[i]); + void * ptr = aml_layout_column_aderef(data, offsets); + size_t cpitch[ndims + 1]; + size_t new_strides[ndims]; + cpitch[ndims] = d->cpitch[ndims]; + for (size_t i = 0; i < ndims; i++) { + cpitch[i] = d->cpitch[i]; + new_strides[i] = strides[i] * d->stride[i]; + cpitch[ndims] -= cpitch[i] * offsets[i] * d->stride[i]; + } + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + struct aml_layout *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(layout, ndims, baseptr); + + aml_layout_native_ainit_cpitch(layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + ptr, ndims, dims, new_strides, cpitch); + layout->ops = &aml_layout_column_ops; + + return layout; +} + +struct aml_layout * +aml_layout_column_slice(const struct aml_layout_data *data, va_list args) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + for (int i = 0; i < ndims; i++) + offsets[i] = va_arg(args, size_t); + for (int i = 0; i < ndims; i++) + dims[i] = va_arg(args, size_t); + for (int i = 0; i < ndims; i++) + strides[i] = va_arg(args, size_t); + return aml_layout_column_aslice(data, offsets, dims, strides); +} + +struct aml_layout_ops aml_layout_column_ops = { + aml_layout_column_deref, + aml_layout_column_aderef, + aml_layout_column_aderef, + aml_layout_column_order, + aml_layout_column_dims, + aml_layout_column_adims, + aml_layout_column_adims, + aml_layout_column_ndims, + aml_layout_column_element_size, + aml_layout_column_reshape, + aml_layout_column_areshape, + aml_layout_column_slice, + aml_layout_column_aslice, + aml_layout_column_aslice +}; + +/******************************************************************************* + * ROW OPERATORS: + ******************************************************************************/ + +void *aml_layout_row_deref(const struct aml_layout_data *data, va_list coords) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + void *ptr; + assert(d != NULL); + assert(d->ptr != NULL); + ptr = d->ptr; + for(size_t i = 0; i < d->ndims; i++) + { + size_t c = va_arg(coords, size_t); + assert(c < d->dims[d->ndims - i - 1]); + ptr += c * d->cpitch[d->ndims - i - 1] * + d->stride[d->ndims - i - 1]; + } + return ptr; +} + +void *aml_layout_row_aderef(const struct aml_layout_data *data, + const size_t *coords) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + void *ptr; + assert(d != NULL); + assert(d->ptr != NULL); + ptr = d->ptr; + for(size_t i = 0; i < d->ndims; i++) + { + size_t c = coords[i]; + assert(c < d->dims[d->ndims - i - 1]); + ptr += c * d->cpitch[d->ndims - i - 1] * + d->stride[d->ndims - i - 1]; + } + return ptr; +} + +int aml_layout_row_order(const struct aml_layout_data *data) +{ + return AML_TYPE_LAYOUT_ROW_ORDER; +} + +int aml_layout_row_dims(const struct aml_layout_data *data, va_list dims) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + return 0; +} + +int aml_layout_row_adims(const struct aml_layout_data *data, size_t *dims) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + dims[i] = d->dims[d->ndims - i - 1]; + } + return 0; +} + +size_t aml_layout_row_ndims(const struct aml_layout_data *data) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + return d->ndims; +} + +size_t aml_layout_row_element_size(const struct aml_layout_data *data) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + return d->cpitch[0]; +} + +struct aml_layout * +aml_layout_row_areshape(const struct aml_layout_data *data, size_t ndims, + const size_t *dims) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t total_size, new_total_size; + total_size = d->dims[0]; + for (size_t i = 1; i < d->ndims; i++) + total_size *= d->dims[i]; + new_total_size = dims[0]; + for (size_t i = 1; i < ndims; i++) + new_total_size *= dims[i]; + assert(total_size == total_size); + + size_t n_dims[ndims]; + for (int i = 0; i < ndims; i++) + n_dims[ndims - i - 1] = dims[i]; + + size_t stride[ndims]; + size_t cpitch[ndims + 1]; + reshape_dims(d, ndims, n_dims, stride, cpitch); + + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + struct aml_layout *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(layout, ndims, baseptr); + + aml_layout_native_ainit_cpitch(layout, AML_TYPE_LAYOUT_ROW_ORDER, + d->ptr, ndims, n_dims, stride, cpitch); + layout->ops = &aml_layout_row_ops; + + return layout; +} + +struct aml_layout * +aml_layout_row_reshape(const struct aml_layout_data *data, size_t ndims, + va_list dims) +{ + size_t n_dims[ndims]; + for (int i = 0; i < ndims; i++) + n_dims[i] = va_arg(dims, size_t); + return aml_layout_row_areshape(data, ndims, n_dims); +} + + +struct aml_layout * +aml_layout_row_aslice(const struct aml_layout_data *data, + const size_t *offsets, const size_t *dims, + const size_t *strides) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t ndims = d->ndims; + size_t n_offsets[ndims]; + size_t n_dims[ndims]; + size_t n_strides[ndims]; + for (size_t i = 0; i < ndims; i++) { + n_offsets[i] = offsets[ndims - i - 1]; + n_dims[i] = dims[ndims - i - 1]; + n_strides[i] = strides[ndims - i - 1]; + } + for (size_t i = 0; i < ndims; i++) + assert(n_offsets[i] + (n_dims[i] - 1) * n_strides[i] < + d->dims[i]); + void * ptr = aml_layout_column_aderef(data, n_offsets); + size_t cpitch[ndims + 1]; + cpitch[ndims] = d->cpitch[ndims]; + for (size_t i = 0; i < ndims; i++) { + cpitch[i] = d->cpitch[i]; + n_strides[i] *= d->stride[i]; + cpitch[ndims] -= cpitch[i] * n_offsets[i] * d->stride[i]; + } + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + struct aml_layout *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(layout, ndims, baseptr); + + aml_layout_native_ainit_cpitch(layout, AML_TYPE_LAYOUT_ROW_ORDER, + ptr, ndims, n_dims, n_strides, cpitch); + layout->ops = &aml_layout_row_ops; + + return layout; +} + +struct aml_layout * +aml_layout_row_slice(const struct aml_layout_data *data, va_list args) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + for (int i = 0; i < ndims; i++) + offsets[i] = va_arg(args, size_t); + for (int i = 0; i < ndims; i++) + dims[i] = va_arg(args, size_t); + for (int i = 0; i < ndims; i++) + strides[i] = va_arg(args, size_t); + return aml_layout_row_aslice(data, offsets, dims, strides); +} + +struct aml_layout * +aml_layout_row_aslice_column(const struct aml_layout_data *data, + const size_t *offsets, const size_t *dims, + const size_t *strides) +{ + const struct aml_layout_data_native *d = + (const struct aml_layout_data_native *)data; + size_t ndims = d->ndims; + for (size_t i = 0; i < ndims; i++) + assert(offsets[i] + (dims[i] - 1) * strides[i] < d->dims[i]); + void * ptr = aml_layout_column_aderef(data, offsets); + size_t cpitch[ndims + 1]; + size_t new_strides[ndims]; + cpitch[ndims] = d->cpitch[ndims]; + for (size_t i = 0; i < ndims; i++) { + cpitch[i] = d->cpitch[i]; + new_strides[i] = strides[i] * d->stride[i]; + cpitch[ndims] -= cpitch[i] * offsets[i] * d->stride[i]; + } + void *baseptr = calloc(1, AML_LAYOUT_NATIVE_ALLOCSIZE(ndims)); + struct aml_layout *layout = (struct aml_layout *)baseptr; + aml_layout_native_struct_init(layout, ndims, baseptr); + + aml_layout_native_ainit_cpitch(layout, AML_TYPE_LAYOUT_ROW_ORDER, + ptr, ndims, dims, new_strides, cpitch); + layout->ops = &aml_layout_row_ops; + + return layout; +} + +struct aml_layout_ops aml_layout_row_ops = { + aml_layout_row_deref, + aml_layout_row_aderef, + aml_layout_column_aderef, + aml_layout_row_order, + aml_layout_row_dims, + aml_layout_row_adims, + aml_layout_column_adims, + aml_layout_row_ndims, + aml_layout_row_element_size, + aml_layout_row_reshape, + aml_layout_row_areshape, + aml_layout_row_slice, + aml_layout_row_aslice, + aml_layout_row_aslice_column +}; + diff --git a/src/layout_pad.c b/src/layout_pad.c new file mode 100644 index 00000000..3e1564bd --- /dev/null +++ b/src/layout_pad.c @@ -0,0 +1,319 @@ +#include + +int aml_layout_pad_struct_init(struct aml_layout *layout, size_t ndims, + size_t element_size, void *memory) +{ + struct aml_layout_data_pad *dataptr; + + assert(layout == (struct aml_layout *)memory); + memory = (void *)((uintptr_t)memory + sizeof(struct aml_layout)); + dataptr = memory; + layout->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout_data_pad)); + dataptr->target = NULL; + dataptr->ndims = ndims; + dataptr->element_size = element_size; + dataptr->dims = (size_t *)memory; + dataptr->target_dims = dataptr->dims + ndims; + dataptr->neutral = (void *)(dataptr->target_dims + ndims); + return 0; +} + +int aml_layout_pad_ainit(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, const size_t *dims, + void *neutral) +{ + assert(layout != NULL); + assert(layout->data != NULL); + struct aml_layout_data_pad *data = + (struct aml_layout_data_pad *)layout->data; + size_t ndims = aml_layout_ndims(target); + size_t element_size = aml_layout_element_size(target); + assert(data->ndims == ndims); + assert(data->element_size == element_size); + assert(data->dims); + assert(data->target_dims); + assert(data->neutral); + int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); + if (type == AML_TYPE_LAYOUT_ROW_ORDER) { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_ROW_ORDER); + layout->ops = &aml_layout_pad_row_ops; + for(size_t i = 0; i < ndims; i++) + data->dims[i] = dims[ndims-i-1]; + } else if (type == AML_TYPE_LAYOUT_COLUMN_ORDER) { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_COLUMN_ORDER); + layout->ops = &aml_layout_pad_column_ops; + memcpy(data->dims, dims, ndims * sizeof(size_t)); + } + type = aml_layout_order(target); + if(type == AML_TYPE_LAYOUT_ROW_ORDER) { + size_t target_dims[ndims]; + aml_layout_adims(target, target_dims); + for(size_t i = 0; i < ndims; i++) + data->target_dims[i] = target_dims[ndims-i-1]; + } else if (type == AML_TYPE_LAYOUT_COLUMN_ORDER) { + aml_layout_adims(target, data->target_dims); + } + for(size_t i = 0; i < ndims; i++) + assert(data->dims[i] >= data->target_dims[i]); + memcpy(data->neutral, neutral, element_size); + data->target = target; + return 0; +} + +int aml_layout_pad_vinit(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, va_list ap) +{ + size_t ndims = aml_layout_ndims(target); + size_t dims[ndims]; + for(size_t i = 0; i < ndims; i++) + dims[i] = va_arg(ap, size_t); + void *neutral = va_arg(ap, void *); + return aml_layout_pad_ainit(layout, tags, target, dims, neutral); +} + +int aml_layout_pad_init(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, ...) +{ + int err; + va_list ap; + va_start(ap, target); + err = aml_layout_pad_vinit(layout, tags, target, ap); + va_end(ap); + return err; +} + +int aml_layout_pad_acreate(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, const size_t *dims, + void *neutral) +{ + assert(target != NULL); + assert(target->ops != NULL); + size_t ndims = aml_layout_ndims(target); + size_t element_size = aml_layout_element_size(target); + void *baseptr = calloc(1, AML_LAYOUT_PAD_ALLOCSIZE(ndims, + element_size)); + *layout = (struct aml_layout *)baseptr; + aml_layout_pad_struct_init(*layout, ndims, element_size, baseptr); + return aml_layout_pad_ainit(*layout, tags, target, dims, neutral); +} + +int aml_layout_pad_vcreate(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, va_list ap) +{ + assert(target != NULL); + assert(target->ops != NULL); + size_t ndims = aml_layout_ndims(target); + size_t element_size = aml_layout_element_size(target); + void *baseptr = calloc(1, AML_LAYOUT_PAD_ALLOCSIZE(ndims, + element_size)); + *layout = (struct aml_layout *)baseptr; + aml_layout_pad_struct_init(*layout, ndims, element_size, baseptr); + return aml_layout_pad_vinit(*layout, tags, target, ap); +} + +int aml_layout_pad_create(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, ...) +{ + int err; + va_list ap; + assert(target != NULL); + assert(target->ops != NULL); + size_t ndims = aml_layout_ndims(target); + size_t element_size = aml_layout_element_size(target); + void *baseptr = calloc(1, AML_LAYOUT_PAD_ALLOCSIZE(ndims, + element_size)); + *layout = (struct aml_layout *)baseptr; + aml_layout_pad_struct_init(*layout, ndims, element_size, baseptr); + va_start(ap, target); + err = aml_layout_pad_vinit(*layout, tags, target, ap); + va_end(ap); + return err; +} + +/******************************************************************************* + * COLUMN OPERATORS: + ******************************************************************************/ + +void *aml_layout_pad_column_aderef(const struct aml_layout_data *data, + const size_t *coords) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d !=NULL); + size_t ndims = d->ndims; + for (int i = 0; i < ndims; i++) + assert(coords[i] < d->dims[i]); + for (int i = 0; i < ndims; i++) { + if(coords[i] >= d->target_dims[i]) + return d->neutral; + } + return d->target->ops->aderef_column(d->target->data, coords); +} + +void *aml_layout_pad_column_deref(const struct aml_layout_data *data, + va_list coords) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d !=NULL); + size_t ndims = d->ndims; + size_t target_coords[d->ndims]; + for (int i = 0; i < ndims; i++) + target_coords[i] = va_arg(coords, size_t); + return aml_layout_pad_column_aderef(data, target_coords); +} + +int aml_layout_pad_column_order(const struct aml_layout_data *data) +{ + return AML_TYPE_LAYOUT_COLUMN_ORDER; +} + +int aml_layout_pad_column_dims(const struct aml_layout_data *data, va_list dims) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + return 0; +} + +int aml_layout_pad_column_adims(const struct aml_layout_data *data, + size_t *dims) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d != NULL); + assert(dims != NULL); + memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); + return 0; +} + +size_t aml_layout_pad_ndims(const struct aml_layout_data *data) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + return d->ndims; +} + +size_t aml_layout_pad_element_size(const struct aml_layout_data *data) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + return d->element_size; +} + +struct aml_layout_ops aml_layout_pad_column_ops = { + aml_layout_pad_column_deref, + aml_layout_pad_column_aderef, + aml_layout_pad_column_aderef, + aml_layout_pad_column_order, + aml_layout_pad_column_dims, + aml_layout_pad_column_adims, + aml_layout_pad_column_adims, + aml_layout_pad_ndims, + aml_layout_pad_element_size, + NULL, + NULL, + NULL, + NULL, + NULL +}; + +/******************************************************************************* + * ROW OPERATORS: + ******************************************************************************/ + +void *aml_layout_pad_row_aderef(const struct aml_layout_data *data, + const size_t *coords) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d !=NULL); + size_t ndims = d->ndims; + for (int i = 0; i < ndims; i++) + assert(coords[ndims - i - 1] < d->dims[i]); + for (int i = 0; i < ndims; i++) { + if(coords[ndims - i - 1] >= d->target_dims[i]) + return d->neutral; + } + int type = aml_layout_order(d->target); + if (type == AML_TYPE_LAYOUT_ROW_ORDER) + return aml_layout_aderef(d->target, coords); + else { + size_t target_coords[ndims]; + for (int i = 0; i < ndims; i++) + target_coords[i] = coords[ndims - i - 1]; + return aml_layout_aderef(d->target, coords); + } +} + +void *aml_layout_pad_row_deref(const struct aml_layout_data *data, + va_list coords) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d !=NULL); + size_t ndims = d->ndims; + size_t target_coords[d->ndims]; + for (int i = 0; i < ndims; i++) + target_coords[i] = va_arg(coords, size_t); + return aml_layout_pad_row_aderef(data, target_coords); +} + +int aml_layout_pad_row_order(const struct aml_layout_data *data) +{ + return AML_TYPE_LAYOUT_ROW_ORDER; +} + +int aml_layout_pad_row_dims(const struct aml_layout_data *data, va_list dims) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + return 0; +} + +int aml_layout_pad_row_adims(const struct aml_layout_data *data, size_t *dims) +{ + const struct aml_layout_data_pad *d = + (const struct aml_layout_data_pad *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + dims[i] = d->dims[d->ndims - i - 1]; + } + return 0; +} + +struct aml_layout_ops aml_layout_pad_row_ops = { + aml_layout_pad_row_deref, + aml_layout_pad_row_aderef, + aml_layout_pad_column_aderef, + aml_layout_pad_row_order, + aml_layout_pad_row_dims, + aml_layout_pad_row_adims, + aml_layout_pad_column_adims, + aml_layout_pad_ndims, + aml_layout_pad_element_size, + NULL, + NULL, + NULL, + NULL, + NULL +}; + diff --git a/src/layout_reshape.c b/src/layout_reshape.c new file mode 100644 index 00000000..9cef96d5 --- /dev/null +++ b/src/layout_reshape.c @@ -0,0 +1,341 @@ +#include + +int aml_layout_reshape_struct_init(struct aml_layout *layout, size_t ndims, + void *memory) +{ + struct aml_layout_data_reshape *dataptr; + + assert(layout == (struct aml_layout *)memory); + memory = (void *)((uintptr_t)memory + sizeof(struct aml_layout)); + dataptr = memory; + layout->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_layout_data_reshape)); + dataptr->target = NULL; + dataptr->ndims = ndims; + dataptr->dims = (size_t *)memory; + dataptr->coffsets = dataptr->dims + ndims; + dataptr->target_dims = dataptr->dims + 2 * ndims; + return 0; +} + +int aml_layout_reshape_ainit(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, size_t ndims, + const size_t *dims) +{ + assert(layout != NULL); + assert(layout->data != NULL); + struct aml_layout_data_reshape *data = + (struct aml_layout_data_reshape *)layout->data; + size_t target_ndims = aml_layout_ndims(target); + assert(ndims != 0); + assert(data->ndims == ndims); + assert(data->dims); + assert(data->coffsets); + assert(data->target_dims); + data->target_ndims = target_ndims; + data->target = target; + assert(data->target_ndims != 0); + int type = AML_TYPE_GET(tags, AML_TYPE_LAYOUT_ORDER); + if (type == AML_TYPE_LAYOUT_ROW_ORDER) { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_ROW_ORDER); + layout->ops = &aml_layout_reshape_row_ops; + for(size_t i = 0; i < ndims; i++) + data->dims[i] = dims[ndims-i-1]; + } else { + AML_TYPE_SET(layout->tags, AML_TYPE_LAYOUT_ORDER, + AML_TYPE_LAYOUT_COLUMN_ORDER); + layout->ops = &aml_layout_reshape_column_ops; + memcpy(data->dims, dims, ndims * sizeof(size_t)); + } + type = aml_layout_order(target); + if(type == AML_TYPE_LAYOUT_ROW_ORDER) { + size_t target_dims[target_ndims]; + aml_layout_adims(target, target_dims); + for(size_t i = 0; i < target_ndims; i++) + data->target_dims[i] = target_dims[target_ndims-i-1]; + } else { + aml_layout_adims(target, data->target_dims); + } + size_t prod, target_prod; + prod = 1; + for(size_t i = 0; i < ndims; i++) { + data->coffsets[i] = prod; + prod *= data->dims[i]; + } + target_prod = 1; + for(size_t i = 0; i < data->target_ndims; i++) + target_prod *= data->target_dims[i]; + assert(target_prod == prod); + return 0; +} + +int aml_layout_reshape_vinit(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, size_t ndims, + va_list data) +{ + size_t dims[ndims]; + for(size_t i = 0; i < ndims; i++) + dims[i] = va_arg(data, size_t); + return aml_layout_reshape_ainit(layout, tags, target, ndims, dims); +} + +int aml_layout_reshape_init(struct aml_layout *layout, uint64_t tags, + struct aml_layout *target, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_layout_reshape_vinit(layout, tags, target, ndims, ap); + va_end(ap); + return err; +} + +int aml_layout_reshape_acreate(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, size_t ndims, + const size_t *dims) +{ + assert(target != NULL); + assert(target->ops != NULL); + size_t target_ndims = aml_layout_ndims(target); + void *baseptr = calloc(1, AML_LAYOUT_RESHAPE_ALLOCSIZE(ndims, + target_ndims)); + assert(baseptr != NULL); + *layout = (struct aml_layout *)baseptr; + aml_layout_reshape_struct_init(*layout, ndims, baseptr); + return aml_layout_reshape_ainit(*layout, tags, target, ndims, dims); +} + +int aml_layout_reshape_vcreate(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, size_t ndims, + va_list data) +{ + assert(target != NULL); + assert(target->ops != NULL); + size_t target_ndims = aml_layout_ndims(target); + void *baseptr = calloc(1, AML_LAYOUT_RESHAPE_ALLOCSIZE(ndims, + target_ndims)); + assert(baseptr != NULL); + *layout = (struct aml_layout *)baseptr; + aml_layout_reshape_struct_init(*layout, ndims, baseptr); + return aml_layout_reshape_vinit(*layout, tags, target, ndims, data); +} + +int aml_layout_reshape_create(struct aml_layout **layout, uint64_t tags, + struct aml_layout *target, size_t ndims, ...) +{ + int err; + va_list data; + assert(target != NULL); + assert(target->ops != NULL); + size_t target_ndims = aml_layout_ndims(target); + void *baseptr = calloc(1, AML_LAYOUT_RESHAPE_ALLOCSIZE(ndims, + target_ndims)); + assert(baseptr != NULL); + *layout = (struct aml_layout *)baseptr; + aml_layout_reshape_struct_init(*layout, ndims, baseptr); + va_start(data, ndims); + err = aml_layout_reshape_vinit(*layout, tags, target, ndims, data); + va_end(data); + return err; +} + +/******************************************************************************* + * COLUMN OPERATORS: + ******************************************************************************/ + +void *aml_layout_reshape_column_aderef(const struct aml_layout_data *data, + const size_t *coords) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d !=NULL); + + size_t ndims = d->ndims; + + for (int i = 0; i < ndims; i++) + assert(coords[i] < d->dims[i]); + + size_t target_ndims = d->target_ndims; + size_t offset = 0; + size_t remainder; + size_t target_coords[target_ndims]; + + for (int i = 0; i < ndims; i++) + offset += coords[i] * d->coffsets[i]; + + for (int i = 0; i < target_ndims; i++) { + target_coords[i] = offset % d->target_dims[i]; + offset /= d->target_dims[i]; + } + return d->target->ops->aderef_column(d->target->data, target_coords); +} + +void *aml_layout_reshape_column_deref(const struct aml_layout_data *data, + va_list coords) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d !=NULL); + size_t target_coords[d->ndims]; + for (int i = 0; i < d->ndims; i++) + target_coords[i] = va_arg(coords, size_t); + return aml_layout_reshape_column_aderef(data, target_coords); +} + +int aml_layout_reshape_column_order(const struct aml_layout_data *data) +{ + return AML_TYPE_LAYOUT_COLUMN_ORDER; +} + +int aml_layout_reshape_column_dims(const struct aml_layout_data *data, va_list dims) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + return 0; +} + +int aml_layout_reshape_column_adims(const struct aml_layout_data *data, + size_t *dims) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d != NULL); + assert(dims != NULL); + memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); + return 0; +} + +size_t aml_layout_reshape_ndims(const struct aml_layout_data *data) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + return d->ndims; +} + +size_t aml_layout_reshape_element_size(const struct aml_layout_data *data) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + return aml_layout_element_size(d->target); +} + +struct aml_layout_ops aml_layout_reshape_column_ops = { + aml_layout_reshape_column_deref, + aml_layout_reshape_column_aderef, + aml_layout_reshape_column_aderef, + aml_layout_reshape_column_order, + aml_layout_reshape_column_dims, + aml_layout_reshape_column_adims, + aml_layout_reshape_column_adims, + aml_layout_reshape_ndims, + aml_layout_reshape_element_size, + NULL, + NULL, + NULL, + NULL, + NULL +}; + +/******************************************************************************* + * ROW OPERATORS: + ******************************************************************************/ + +void *aml_layout_reshape_row_aderef(const struct aml_layout_data *data, + const size_t *coords) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d !=NULL); + + size_t ndims = d->ndims; + + for (int i = 0; i < ndims; i++) + assert(coords[ndims - i - 1] < d->dims[i]); + + size_t target_ndims = d->target_ndims; + size_t offset = 0; + size_t remainder; + size_t target_coords[target_ndims]; + + for (int i = 0; i < ndims; i++) + offset += coords[ndims - i - 1] * d->coffsets[i]; + + for (int i = 0; i < target_ndims; i++) { + target_coords[i] = offset % d->target_dims[i]; + offset /= d->target_dims[i]; + } + return d->target->ops->aderef_column(d->target->data, target_coords); +} + +void *aml_layout_reshape_row_deref(const struct aml_layout_data *data, + va_list coords) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d !=NULL); + size_t target_coords[d->ndims]; + for (int i = 0; i < d->ndims; i++) + target_coords[i] = va_arg(coords, size_t); + return aml_layout_reshape_row_aderef(data, target_coords); +} + +int aml_layout_reshape_row_order(const struct aml_layout_data *data) +{ + return AML_TYPE_LAYOUT_ROW_ORDER; +} + +int aml_layout_reshape_row_dims(const struct aml_layout_data *data, + va_list dims) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + size_t *dim = va_arg(dims, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + return 0; +} + +int aml_layout_reshape_row_adims(const struct aml_layout_data *data, + size_t *dims) +{ + const struct aml_layout_data_reshape *d = + (const struct aml_layout_data_reshape *)data; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) + { + dims[i] = d->dims[d->ndims - i - 1]; + } + return 0; +} + +struct aml_layout_ops aml_layout_reshape_row_ops = { + aml_layout_reshape_row_deref, + aml_layout_reshape_row_aderef, + aml_layout_reshape_column_aderef, + aml_layout_reshape_row_order, + aml_layout_reshape_row_dims, + aml_layout_reshape_row_adims, + aml_layout_reshape_column_adims, + aml_layout_reshape_ndims, + aml_layout_reshape_element_size, + NULL, + NULL, + NULL, + NULL, + NULL +}; + + diff --git a/src/scratch_double.c b/src/scratch_double.c new file mode 100644 index 00000000..8806e7cf --- /dev/null +++ b/src/scratch_double.c @@ -0,0 +1,287 @@ +#include +#include + +/******************************************************************************* + * Requests: + ******************************************************************************/ + +int aml_scratch_request_double_init(struct aml_scratch_request_double *req, + int type, struct aml_dma *dma, + struct aml_layout *dl, int dstid, + struct aml_layout *sl, int srcid) + +{ + assert(req != NULL); + req->type = type; + req->dma = dma; + req->dest = dl; + req->dstid = dstid; + req->src = sl; + req->srcid = srcid; + return 0; +} + +int aml_scratch_request_double_destroy(struct aml_scratch_request_double *r) +{ + assert(r != NULL); + return 0; +} + +/******************************************************************************* + * Internal functions + ******************************************************************************/ +void *aml_scratch_double_do_thread(void *arg) +{ + struct aml_scratch_request_double *req = + (struct aml_scratch_request_double *)arg; + + aml_dma_copy(req->dma, req->dest, req->src); +} + +struct aml_scratch_double_ops aml_scratch_double_inner_ops = { + aml_scratch_double_do_thread, +}; + +/******************************************************************************* + * Public API + ******************************************************************************/ + +int aml_scratch_double_create_request(struct aml_scratch_data *d, + struct aml_scratch_request **r, + int type, va_list ap) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_scratch_double *scratch = + (struct aml_scratch_double *)d; + + struct aml_scratch_request_double *req; + + pthread_mutex_lock(&scratch->data.lock); + req = aml_vector_add(&scratch->data.requests); + /* init the request */ + if(type == AML_SCRATCH_REQUEST_TYPE_PUSH) + { + struct aml_layout *scratch_layout; + struct aml_layout *src_layout; + int *src_uid; + int scratch_uid; + + src_layout = va_arg(ap, struct aml_layout *); + src_uid = va_arg(ap, int *); + scratch_layout = va_arg(ap, struct aml_layout *); + scratch_uid = va_arg(ap, int); + + /* find destination tile */ + int *slot = aml_vector_get(&scratch->data.tilemap, scratch_uid); + assert(slot != NULL); + *src_uid = *slot; + + /* init request */ + aml_scratch_request_double_init(req, type, + scratch->data.push_dma, + src_layout, *src_uid, + scratch_layout, scratch_uid); + } + else if(type == AML_SCRATCH_REQUEST_TYPE_PULL) + { + struct aml_layout **scratch_layout; + struct aml_layout *src_layout; + int *scratch_uid; + int src_uid; + + scratch_layout = va_arg(ap, struct aml_layout **); + scratch_uid = va_arg(ap, int *); + src_layout = va_arg(ap, struct aml_layout *); + src_uid = va_arg(ap, int); + + /* find scratchination tile + * We don't use add here because adding a tile means allocating + * new tiles on the sch_area too. */ + int slot = aml_vector_find(&scratch->data.tilemap, src_uid); + if(slot == -1) + { + /* create a new request */ + slot = aml_vector_find(&scratch->data.tilemap, -1); + assert(slot != -1); + int *tile = aml_vector_get(&scratch->data.tilemap, slot); + *tile = src_uid; + } + else + type = AML_SCRATCH_REQUEST_TYPE_NOOP; + + /* save the key */ + *scratch_uid = slot; + // *scratch_layout = aml_tiling_nd_get(scratch->data.scratch_tiling) + + /* init request */ + aml_scratch_request_double_init(req, type, + scratch->data.pull_dma, + *scratch_layout, slot, + src_layout, src_uid); + } + pthread_mutex_unlock(&scratch->data.lock); + /* thread creation */ + if(req->type != AML_SCRATCH_REQUEST_TYPE_NOOP) + { + pthread_create(&req->thread, NULL, scratch->ops.do_thread, req); + } + *r = (struct aml_scratch_request *)req; + return 0; +} + +int aml_scratch_double_destroy_request(struct aml_scratch_data *d, + struct aml_scratch_request *r) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_scratch_double *scratch = + (struct aml_scratch_double *)d; + + struct aml_scratch_request_double *req = + (struct aml_scratch_request_double *)r; + int *tile; + + if(req->type != AML_SCRATCH_REQUEST_TYPE_NOOP) + { + pthread_cancel(req->thread); + pthread_join(req->thread, NULL); + } + + aml_scratch_request_double_destroy(req); + + /* destroy removes the tile from the scratch */ + pthread_mutex_lock(&scratch->data.lock); + if(req->type == AML_SCRATCH_REQUEST_TYPE_PUSH) + tile = aml_vector_get(&scratch->data.tilemap,req->srcid); + else if(req->type == AML_SCRATCH_REQUEST_TYPE_PULL) + tile = aml_vector_get(&scratch->data.tilemap,req->dstid); + aml_vector_remove(&scratch->data.tilemap, tile); + aml_vector_remove(&scratch->data.requests, req); + pthread_mutex_unlock(&scratch->data.lock); + return 0; +} + +int aml_scratch_double_wait_request(struct aml_scratch_data *d, + struct aml_scratch_request *r) +{ + assert(d != NULL); + assert(r != NULL); + struct aml_scratch_double *scratch = (struct aml_scratch_double *)d; + struct aml_scratch_request_double *req = + (struct aml_scratch_request_double *)r; + int *tile; + + /* wait for completion of the request */ + if(req->type != AML_SCRATCH_REQUEST_TYPE_NOOP) + pthread_join(req->thread, NULL); + + /* cleanup a completed request. In case of push, free up the tile */ + aml_scratch_request_double_destroy(req); + pthread_mutex_lock(&scratch->data.lock); + if(req->type == AML_SCRATCH_REQUEST_TYPE_PUSH) + { + tile = aml_vector_get(&scratch->data.tilemap,req->srcid); + aml_vector_remove(&scratch->data.tilemap, tile); + } + aml_vector_remove(&scratch->data.requests, req); + pthread_mutex_unlock(&scratch->data.lock); + return 0; +} + +void *aml_scratch_double_baseptr(const struct aml_scratch_data *d) +{ + assert(d != NULL); + // don't think this function makes sense for this implementation. + return NULL; +} + +int aml_scratch_double_release(struct aml_scratch_data *d, int scratchid) +{ + assert(d != NULL); + struct aml_scratch_double *scratch = (struct aml_scratch_double *)d; + int *tile; + + pthread_mutex_lock(&scratch->data.lock); + tile = aml_vector_get(&scratch->data.tilemap, scratchid); + if(tile != NULL) + aml_vector_remove(&scratch->data.tilemap, tile); + pthread_mutex_unlock(&scratch->data.lock); + return 0; +} + +struct aml_scratch_ops aml_scratch_double_ops = { + aml_scratch_double_create_request, + aml_scratch_double_destroy_request, + aml_scratch_double_wait_request, + aml_scratch_double_baseptr, + aml_scratch_double_release, +}; + +/******************************************************************************* + * Init functions: + ******************************************************************************/ + +int aml_scratch_double_create(struct aml_scratch **d, ...) +{ + va_list ap; + struct aml_scratch *ret = NULL; + intptr_t baseptr, dataptr; + va_start(ap, d); + + /* alloc */ + baseptr = (intptr_t) calloc(1, AML_SCRATCH_DOUBLE_ALLOCSIZE); + dataptr = baseptr + sizeof(struct aml_scratch); + + ret = (struct aml_scratch *)baseptr; + ret->data = (struct aml_scratch_data *)dataptr; + + aml_scratch_double_vinit(ret, ap); + + va_end(ap); + *d = ret; + return 0; +} +int aml_scratch_double_vinit(struct aml_scratch *d, va_list ap) +{ + d->ops = &aml_scratch_double_ops; + struct aml_scratch_double *scratch = (struct aml_scratch_double *)d->data; + + scratch->ops = aml_scratch_double_inner_ops; + + scratch->data.dest_tiling = va_arg(ap, struct aml_tiling_nd *); + scratch->data.src_tiling = va_arg(ap, struct aml_tiling_nd *); + scratch->data.push_dma = va_arg(ap, struct aml_dma *); + scratch->data.pull_dma = va_arg(ap, struct aml_dma *); + size_t nbtiles = va_arg(ap, size_t); + size_t nbreqs = va_arg(ap, size_t); + + /* allocate request array */ + aml_vector_init(&scratch->data.requests, nbreqs, + sizeof(struct aml_scratch_request_double), + offsetof(struct aml_scratch_request_double, type), + AML_SCRATCH_REQUEST_TYPE_INVALID); + + /* scratch init */ + aml_vector_init(&scratch->data.tilemap, nbtiles, sizeof(int), 0, -1); + pthread_mutex_init(&scratch->data.lock, NULL); + return 0; +} +int aml_scratch_double_init(struct aml_scratch *d, ...) +{ + int err; + va_list ap; + va_start(ap, d); + err = aml_scratch_double_vinit(d, ap); + va_end(ap); + return err; +} + +int aml_scratch_double_destroy(struct aml_scratch *d) +{ + struct aml_scratch_double *scratch = (struct aml_scratch_double *)d->data; + aml_vector_destroy(&scratch->data.requests); + aml_vector_destroy(&scratch->data.tilemap); + pthread_mutex_destroy(&scratch->data.lock); + return 0; +} diff --git a/src/tiling_nd.c b/src/tiling_nd.c new file mode 100644 index 00000000..b2051884 --- /dev/null +++ b/src/tiling_nd.c @@ -0,0 +1,72 @@ +#include + +struct aml_layout *aml_tiling_nd_index(const struct aml_tiling_nd *t, ...) +{ + assert(t != NULL); + assert(t->ops != NULL); + va_list ap; + struct aml_layout *ret; + va_start(ap, t); + ret = t->ops->index(t->data, ap); + va_end(ap); + return ret; +} + +struct aml_layout *aml_tiling_nd_aindex(const struct aml_tiling_nd *t, const size_t *coords) +{ + assert(t != NULL); + assert(t->ops != NULL); + return t->ops->aindex(t->data, coords); +} + +int aml_tiling_nd_order(const struct aml_tiling_nd *t) +{ + assert(t != NULL); + assert(t->ops != NULL); + return t->ops->order(t->data); +} + +int aml_tiling_nd_tile_dims(const struct aml_tiling_nd *t, ...) +{ + assert(t != NULL); + assert(t->ops != NULL); + va_list ap; + int ret; + va_start(ap, t); + ret = t->ops->tile_dims(t->data, ap); + va_end(ap); + return ret; +} + +int aml_tiling_nd_tile_adims(const struct aml_tiling_nd *t, size_t *dims) +{ + assert(t != NULL); + assert(t->ops != NULL); + return t->ops->tile_adims(t->data, dims); +} + +int aml_tiling_nd_dims(const struct aml_tiling_nd *t, ...) +{ + assert(t != NULL); + assert(t->ops != NULL); + va_list ap; + int ret; + va_start(ap, t); + ret = t->ops->dims(t->data, ap); + va_end(ap); + return ret; +} + +int aml_tiling_nd_adims(const struct aml_tiling_nd *t, size_t *dims) +{ + assert(t != NULL); + assert(t->ops != NULL); + return t->ops->adims(t->data, dims); +} + +size_t aml_tiling_nd_ndims(const struct aml_tiling_nd *t) +{ + assert(t != NULL); + assert(t->ops != NULL); + return t->ops->ndims(t->data); +} diff --git a/src/tiling_nd_collapse.c b/src/tiling_nd_collapse.c new file mode 100644 index 00000000..d169f0c9 --- /dev/null +++ b/src/tiling_nd_collapse.c @@ -0,0 +1,385 @@ +#include + +int aml_tiling_nd_collapse_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *memory) +{ + struct aml_tiling_nd_data_collapse *dataptr; + + assert(t == (struct aml_tiling_nd *)memory); + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd)); + dataptr = memory; + t->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd_data_collapse)); + dataptr->l = NULL; + dataptr->ndims = ndims; + dataptr->tile_dims = (size_t *)memory; + dataptr->dims = dataptr->tile_dims + ndims; + dataptr->border_tile_dims = dataptr->dims + ndims; + return 0; +} + +int aml_tiling_nd_collapse_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims) +{ + assert(t != NULL); + assert(t->data != NULL); + struct aml_tiling_nd_data_collapse *data = + (struct aml_tiling_nd_data_collapse *)t->data; + assert(data->ndims == ndims); + assert(data->tile_dims); + assert(data->dims); + assert(data->border_tile_dims); + data->l = l; + int type = AML_TYPE_GET(tags, AML_TYPE_TILING_ORDER); + if (type == AML_TYPE_TILING_ROW_ORDER) { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_ROW_ORDER); + t->ops = &aml_tiling_nd_collapse_row_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[ndims-i-1]; + } else { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_COLUMN_ORDER); + t->ops = &aml_tiling_nd_collapse_column_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[i]; + } + size_t target_dims[ndims]; + l->ops->adims_column(l->data, target_dims); + for (size_t i = 0; i < ndims; i++) { + data->border_tile_dims[i] = target_dims[i] % data->tile_dims[i]; + data->dims[i] = target_dims[i] / data->tile_dims[i]; + if (data->border_tile_dims[i] == 0) + data->border_tile_dims[i] = data->tile_dims[i]; + else + data->dims[i] += 1; + } + return 0; +} + +int aml_tiling_nd_collapse_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + size_t tile_dims[ndims]; + for(size_t i = 0; i < ndims; i++) + tile_dims[i] = va_arg(data, size_t); + return aml_tiling_nd_collapse_ainit(t, tags, l, ndims, tile_dims); +} + +int aml_tiling_nd_collapse_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_tiling_nd_collapse_vinit(t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +int aml_tiling_nd_collapse_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_COLLAPSE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_collapse_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_collapse_ainit(*t, tags, l, ndims, tile_dims); +} + +int aml_tiling_nd_collapse_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_COLLAPSE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_collapse_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_collapse_vinit(*t, tags, l, ndims, data); +} + +int aml_tiling_nd_collapse_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_COLLAPSE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_collapse_struct_init(*t, ndims, baseptr); + va_start(ap, ndims); + err = aml_tiling_nd_collapse_vinit(*t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_collapse_column_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t new_coords[ndims]; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + for(size_t i = 0, j = 0; i < ndims; i++) + if (d->dims[i] > 1) { + assert(coords[j] < d->dims[i]); + new_coords[i] = coords[j]; + j++; + } else + new_coords[i] = 0; + for(size_t i = 0; i < ndims; i++) { + offsets[i] = new_coords[i] * d->tile_dims[i]; + strides[i] = 1; + } + for(size_t i = 0; i < ndims; i++) + dims[i] = (new_coords[i] == d->dims[i] - 1 ? + d->border_tile_dims[i] : + d->tile_dims[i] ); + return d->l->ops->aslice_column(d->l->data, offsets, dims, strides); +} + +struct aml_layout* +aml_tiling_nd_collapse_column_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + size_t n_coords[d->ndims]; + for(size_t i = 0, j = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + n_coords[j++] = va_arg(coords, size_t); + return aml_tiling_nd_collapse_column_aindex(l, n_coords); +} + +int +aml_tiling_nd_collapse_column_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_COLUMN_ORDER; +} + +int +aml_tiling_nd_collapse_column_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[i]; + } + return 0; +} + +int +aml_tiling_nd_collapse_column_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + memcpy((void*)tile_dims, (void*)d->tile_dims, sizeof(size_t)*d->ndims); + return 0; +} + +int +aml_tiling_nd_collapse_column_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + if (d->dims[i] > 1) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + } + return 0; +} + +int +aml_tiling_nd_collapse_column_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0, j = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + dims[j++] = d->dims[i]; + return 0; +} + +size_t +aml_tiling_nd_collapse_column_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + size_t ndims = 0; + for(size_t i = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + ndims++; + return ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_collapse_column_ops = { + aml_tiling_nd_collapse_column_index, + aml_tiling_nd_collapse_column_aindex, + aml_tiling_nd_collapse_column_order, + aml_tiling_nd_collapse_column_tile_dims, + aml_tiling_nd_collapse_column_tile_adims, + aml_tiling_nd_collapse_column_dims, + aml_tiling_nd_collapse_column_adims, + aml_tiling_nd_collapse_column_ndims +}; + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_collapse_row_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t new_coords[ndims]; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + + for(size_t i = 0, j = 0; i < ndims; i++) + if (d->dims[ndims - i - 1] > 1) { + assert(coords[j] < d->dims[ndims - i - 1]); + new_coords[ndims - i - 1] = coords[j]; + j++; + } else + new_coords[ndims - i - 1] = 0; + for(size_t i = 0; i < ndims; i++) { + + offsets[i] = new_coords[i] * d->tile_dims[i]; + strides[i] = 1; + } + for(size_t i = 0; i < ndims; i++) + dims[i] = (new_coords[i] == d->dims[i] - 1 ? + d->border_tile_dims[i] : + d->tile_dims[i] ); + return d->l->ops->aslice_column(d->l->data, offsets, dims, strides); +} + +struct aml_layout* +aml_tiling_nd_collapse_row_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + size_t n_coords[d->ndims]; + for(size_t i = 0, j = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + n_coords[j++] = va_arg(coords, size_t); + return aml_tiling_nd_collapse_row_aindex(l, n_coords); +} + +int +aml_tiling_nd_collapse_row_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_ROW_ORDER; +} + +int +aml_tiling_nd_collapse_row_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_collapse_row_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + tile_dims[i] = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_collapse_row_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + if (d->dims[d->ndims - i - 1] > 1) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + } + return 0; +} + +int +aml_tiling_nd_collapse_row_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + for(size_t i = 0, j = 0; i < d->ndims; i++) + if (d->dims[d->ndims - i - 1] > 1) + dims[j++] = d->dims[d->ndims - i - 1]; + return 0; +} + +size_t +aml_tiling_nd_collapse_row_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_collapse *d = + (const struct aml_tiling_nd_data_collapse *)l; + assert(d != NULL); + size_t ndims = 0; + for(size_t i = 0; i < d->ndims; i++) + if (d->dims[i] > 1) + ndims++; + return ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_collapse_row_ops = { + aml_tiling_nd_collapse_row_index, + aml_tiling_nd_collapse_row_aindex, + aml_tiling_nd_collapse_row_order, + aml_tiling_nd_collapse_row_tile_dims, + aml_tiling_nd_collapse_row_tile_adims, + aml_tiling_nd_collapse_row_dims, + aml_tiling_nd_collapse_row_adims, + aml_tiling_nd_collapse_row_ndims +}; diff --git a/src/tiling_nd_pad.c b/src/tiling_nd_pad.c new file mode 100644 index 00000000..8445ddc2 --- /dev/null +++ b/src/tiling_nd_pad.c @@ -0,0 +1,426 @@ +#include + +int aml_tiling_nd_pad_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *memory) +{ + struct aml_tiling_nd_data_pad *dataptr; + + assert(t == (struct aml_tiling_nd *)memory); + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd)); + dataptr = memory; + t->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd_data_pad)); + dataptr->l = NULL; + dataptr->ndims = ndims; + dataptr->tile_dims = (size_t *)memory; + dataptr->dims = dataptr->tile_dims + ndims; + dataptr->border_tile_dims = dataptr->dims + ndims; + dataptr->pad = dataptr->border_tile_dims + ndims; + dataptr->neutral = (void *)(dataptr->pad + ndims); + return 0; +} + +int aml_tiling_nd_pad_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims, void *neutral) +{ + assert(t != NULL); + assert(t->data != NULL); + struct aml_tiling_nd_data_pad *data = + (struct aml_tiling_nd_data_pad *)t->data; + size_t element_size = aml_layout_element_size(l); + assert(data->ndims == ndims); + assert(data->tile_dims); + assert(data->dims); + assert(data->border_tile_dims); + assert(data->pad); + assert(data->neutral); + data->l = l; + int type = AML_TYPE_GET(tags, AML_TYPE_TILING_ORDER); + if (type == AML_TYPE_TILING_ROW_ORDER) { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_ROW_ORDER); + t->ops = &aml_tiling_nd_pad_row_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[ndims-i-1]; + } else { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_COLUMN_ORDER); + t->ops = &aml_tiling_nd_pad_column_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[i]; + } + size_t target_dims[ndims]; + l->ops->adims_column(l->data, target_dims); + for (size_t i = 0; i < ndims; i++) { + data->border_tile_dims[i] = target_dims[i] % data->tile_dims[i]; + data->dims[i] = target_dims[i] / data->tile_dims[i]; + if (data->border_tile_dims[i] == 0) + data->border_tile_dims[i] = data->tile_dims[i]; + else { + data->dims[i] += 1; + data->pad[i] = 1; + } + } + memcpy(data->neutral, neutral, element_size); + return 0; +} + +int aml_tiling_nd_pad_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + size_t tile_dims[ndims]; + void *neutral; + for(size_t i = 0; i < ndims; i++) + tile_dims[i] = va_arg(data, size_t); + neutral = va_arg(data, void*); + return aml_tiling_nd_pad_ainit(t, tags, l, ndims, tile_dims, neutral); +} + +int aml_tiling_nd_pad_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_tiling_nd_pad_vinit(t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +int aml_tiling_nd_pad_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims, void *neutral) +{ + assert(ndims > 0); + size_t element_size = aml_layout_element_size(l); + void *baseptr = calloc(1, AML_TILING_PAD_ALLOCSIZE(ndims, + element_size)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_pad_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_pad_ainit(*t, tags, l, ndims, tile_dims, neutral); +} + +int aml_tiling_nd_pad_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + assert(ndims > 0); + size_t element_size = aml_layout_element_size(l); + void *baseptr = calloc(1, AML_TILING_PAD_ALLOCSIZE(ndims, + element_size)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_pad_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_pad_vinit(*t, tags, l, ndims, data); +} + +int aml_tiling_nd_pad_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + assert(ndims > 0); + size_t element_size = aml_layout_element_size(l); + void *baseptr = calloc(1, AML_TILING_PAD_ALLOCSIZE(ndims, + element_size)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_pad_struct_init(*t, ndims, baseptr); + va_start(ap, ndims); + err = aml_tiling_nd_pad_vinit(*t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_pad_column_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + for (size_t i = 0; i < ndims; i++) + assert(coords[i] < d->dims[i]); + for (size_t i = 0; i < ndims; i++) { + offsets[i] = coords[i] * d->tile_dims[i]; + strides[i] = 1; + } + + int pad = 0; + for (size_t i = 0; i < ndims; i++) { + if (coords[i] == d->dims[i] - 1) { + dims[i] = d->border_tile_dims[i]; + if (d->pad[i]) + pad = 1; + } else + dims[i] = d->tile_dims[i]; + } + struct aml_layout *res = d->l->ops->aslice_column(d->l->data, offsets, + dims, strides); + if (pad) { + struct aml_layout *p_layout; + int order = aml_layout_order(d->l); + if (order == AML_TYPE_LAYOUT_COLUMN_ORDER) { + /* WARNING: OWNERSHIP!!! */ + aml_layout_pad_acreate(&p_layout, + AML_TYPE_LAYOUT_COLUMN_ORDER, + res, d->tile_dims, d->neutral); + } else { + size_t row_dims[ndims]; + for (size_t i = 0; i < ndims; i++) + row_dims[i] = d->tile_dims[i]; + /* WARNING: OWNERSHIP!!! */ + aml_layout_pad_acreate(&p_layout, + AML_TYPE_LAYOUT_ROW_ORDER, + res, row_dims, d->neutral); + } + return p_layout; + } else + return res; +} + +struct aml_layout* +aml_tiling_nd_pad_column_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + size_t n_coords[d->ndims]; + for (size_t i = 0; i < d->ndims; i++) + n_coords[i] = va_arg(coords, size_t); + return aml_tiling_nd_pad_column_aindex(l, n_coords); +} + +int +aml_tiling_nd_pad_column_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_COLUMN_ORDER; +} + +int +aml_tiling_nd_pad_column_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[i]; + } + return 0; +} + +int +aml_tiling_nd_pad_column_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + memcpy((void*)tile_dims, (void*)d->tile_dims, sizeof(size_t)*d->ndims); + return 0; +} + +int +aml_tiling_nd_pad_column_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + return 0; +} + +int +aml_tiling_nd_pad_column_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); + return 0; +} + +size_t +aml_tiling_nd_pad_column_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + return d->ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_pad_column_ops = { + aml_tiling_nd_pad_column_index, + aml_tiling_nd_pad_column_aindex, + aml_tiling_nd_pad_column_order, + aml_tiling_nd_pad_column_tile_dims, + aml_tiling_nd_pad_column_tile_adims, + aml_tiling_nd_pad_column_dims, + aml_tiling_nd_pad_column_adims, + aml_tiling_nd_pad_column_ndims +}; + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_pad_row_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + + for(size_t i = 0; i < ndims; i++) + assert(coords[ndims - i - 1] < d->dims[i]); + for(size_t i = 0; i < ndims; i++) { + offsets[i] = coords[ndims - i - 1] * d->tile_dims[i]; + strides[i] = 1; + } + + int pad = 0; + for (size_t i = 0; i < ndims; i++) { + if (coords[ndims - i - 1] == d->dims[i] - 1) { + dims[i] = d->border_tile_dims[i]; + if (d->pad[i]) + pad = 1; + } else + dims[i] = d->tile_dims[i]; + } + struct aml_layout *res = d->l->ops->aslice_column(d->l->data, offsets, + dims, strides); + if (pad) { + struct aml_layout *p_layout; + int order = aml_layout_order(d->l); + if (order == AML_TYPE_LAYOUT_COLUMN_ORDER) { + /* WARNING: OWNERSHIP!!! */ + aml_layout_pad_acreate(&p_layout, + AML_TYPE_LAYOUT_COLUMN_ORDER, + res, d->tile_dims, d->neutral); + } else { + size_t row_dims[ndims]; + for (size_t i = 0; i < ndims; i++) + row_dims[i] = d->tile_dims[ndims - i - 1]; + /* WARNING: OWNERSHIP!!! */ + aml_layout_pad_acreate(&p_layout, + AML_TYPE_LAYOUT_ROW_ORDER, + res, row_dims, d->neutral); + } + return p_layout; + } else + return res; +} + +struct aml_layout* +aml_tiling_nd_pad_row_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + size_t n_coords[d->ndims]; + for(size_t i = 0; i < d->ndims; i++) + n_coords[i] = va_arg(coords, size_t); + return aml_tiling_nd_pad_row_aindex(l, n_coords); +} + +int +aml_tiling_nd_pad_row_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_ROW_ORDER; +} + +int +aml_tiling_nd_pad_row_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_pad_row_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + tile_dims[i] = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_pad_row_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_pad_row_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + dims[i] = d->dims[d->ndims - i - 1]; + } + return 0; +} + +size_t +aml_tiling_nd_pad_row_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_pad *d = + (const struct aml_tiling_nd_data_pad *)l; + assert(d != NULL); + return d->ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_pad_row_ops = { + aml_tiling_nd_pad_row_index, + aml_tiling_nd_pad_row_aindex, + aml_tiling_nd_pad_row_order, + aml_tiling_nd_pad_row_tile_dims, + aml_tiling_nd_pad_row_tile_adims, + aml_tiling_nd_pad_row_dims, + aml_tiling_nd_pad_row_adims, + aml_tiling_nd_pad_row_ndims +}; diff --git a/src/tiling_nd_resize.c b/src/tiling_nd_resize.c new file mode 100644 index 00000000..0352a737 --- /dev/null +++ b/src/tiling_nd_resize.c @@ -0,0 +1,356 @@ +#include + +int aml_tiling_nd_resize_struct_init(struct aml_tiling_nd *t, size_t ndims, + void *memory) +{ + struct aml_tiling_nd_data_resize *dataptr; + + assert(t == (struct aml_tiling_nd *)memory); + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd)); + dataptr = memory; + t->data = memory; + memory = (void *)((uintptr_t)memory + + sizeof(struct aml_tiling_nd_data_resize)); + dataptr->l = NULL; + dataptr->ndims = ndims; + dataptr->tile_dims = (size_t *)memory; + dataptr->dims = dataptr->tile_dims + ndims; + dataptr->border_tile_dims = dataptr->dims + ndims; + return 0; +} + +int aml_tiling_nd_resize_ainit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims) +{ + assert(t != NULL); + assert(t->data != NULL); + struct aml_tiling_nd_data_resize *data = + (struct aml_tiling_nd_data_resize *)t->data; + assert(data->ndims == ndims); + assert(data->tile_dims); + assert(data->dims); + assert(data->border_tile_dims); + data->l = l; + int type = AML_TYPE_GET(tags, AML_TYPE_TILING_ORDER); + if (type == AML_TYPE_TILING_ROW_ORDER) { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_ROW_ORDER); + t->ops = &aml_tiling_nd_resize_row_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[ndims-i-1]; + } else { + AML_TYPE_SET(t->tags, AML_TYPE_TILING_ORDER, + AML_TYPE_TILING_COLUMN_ORDER); + t->ops = &aml_tiling_nd_resize_column_ops; + for (size_t i = 0; i < ndims; i++) + data->tile_dims[i] = tile_dims[i]; + } + size_t target_dims[ndims]; + l->ops->adims_column(l->data, target_dims); + for (size_t i = 0; i < ndims; i++) { + data->border_tile_dims[i] = target_dims[i] % data->tile_dims[i]; + data->dims[i] = target_dims[i] / data->tile_dims[i]; + if (data->border_tile_dims[i] == 0) + data->border_tile_dims[i] = data->tile_dims[i]; + else + data->dims[i] += 1; + } + return 0; +} + +int aml_tiling_nd_resize_vinit(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + size_t tile_dims[ndims]; + for(size_t i = 0; i < ndims; i++) + tile_dims[i] = va_arg(data, size_t); + return aml_tiling_nd_resize_ainit(t, tags, l, ndims, tile_dims); +} + +int aml_tiling_nd_resize_init(struct aml_tiling_nd *t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + va_start(ap, ndims); + err = aml_tiling_nd_resize_vinit(t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +int aml_tiling_nd_resize_acreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + const size_t *tile_dims) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_RESIZE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_resize_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_resize_ainit(*t, tags, l, ndims, tile_dims); +} + +int aml_tiling_nd_resize_vcreate(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, + va_list data) +{ + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_RESIZE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_resize_struct_init(*t, ndims, baseptr); + return aml_tiling_nd_resize_vinit(*t, tags, l, ndims, data); +} + +int aml_tiling_nd_resize_create(struct aml_tiling_nd **t, uint64_t tags, + const struct aml_layout *l, size_t ndims, ...) +{ + int err; + va_list ap; + assert(ndims > 0); + void *baseptr = calloc(1, AML_TILING_RESIZE_ALLOCSIZE(ndims)); + *t = (struct aml_tiling_nd *)baseptr; + aml_tiling_nd_resize_struct_init(*t, ndims, baseptr); + va_start(ap, ndims); + err = aml_tiling_nd_resize_vinit(*t, tags, l, ndims, ap); + va_end(ap); + return err; +} + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_resize_column_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + for(size_t i = 0; i < ndims; i++) + assert(coords[i] < d->dims[i]); + for(size_t i = 0; i < ndims; i++) { + offsets[i] = coords[i] * d->tile_dims[i]; + strides[i] = 1; + } + for(size_t i = 0; i < ndims; i++) + dims[i] = (coords[i] == d->dims[i] - 1 ? + d->border_tile_dims[i] : + d->tile_dims[i] ); + return d->l->ops->aslice_column(d->l->data, offsets, dims, strides); +} + +struct aml_layout* +aml_tiling_nd_resize_column_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + size_t n_coords[d->ndims]; + for(size_t i = 0; i < d->ndims; i++) + n_coords[i] = va_arg(coords, size_t); + return aml_tiling_nd_resize_column_aindex(l, n_coords); +} + +int +aml_tiling_nd_resize_column_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_COLUMN_ORDER; +} + +int +aml_tiling_nd_resize_column_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[i]; + } + return 0; +} + +int +aml_tiling_nd_resize_column_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + memcpy((void*)tile_dims, (void*)d->tile_dims, sizeof(size_t)*d->ndims); + return 0; +} + +int +aml_tiling_nd_resize_column_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[i]; + } + return 0; +} + +int +aml_tiling_nd_resize_column_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + memcpy((void*)dims, (void*)d->dims, sizeof(size_t)*d->ndims); + return 0; +} + +size_t +aml_tiling_nd_resize_column_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + return d->ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_resize_column_ops = { + aml_tiling_nd_resize_column_index, + aml_tiling_nd_resize_column_aindex, + aml_tiling_nd_resize_column_order, + aml_tiling_nd_resize_column_tile_dims, + aml_tiling_nd_resize_column_tile_adims, + aml_tiling_nd_resize_column_dims, + aml_tiling_nd_resize_column_adims, + aml_tiling_nd_resize_column_ndims +}; + +/*----------------------------------------------------------------------------*/ + +struct aml_layout* +aml_tiling_nd_resize_row_aindex(const struct aml_tiling_nd_data *l, + const size_t *coords) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + size_t ndims = d->ndims; + size_t offsets[ndims]; + size_t dims[ndims]; + size_t strides[ndims]; + + for(size_t i = 0; i < ndims; i++) + assert(coords[ndims - i - 1] < d->dims[i]); + for(size_t i = 0; i < ndims; i++) { + offsets[i] = coords[ndims - i - 1] * d->tile_dims[i]; + strides[i] = 1; + } + for(size_t i = 0; i < ndims; i++) + dims[i] = (coords[ndims - i - 1] == d->dims[i] - 1 ? + d->border_tile_dims[i] : + d->tile_dims[i] ); + return d->l->ops->aslice_column(d->l->data, offsets, dims, strides); +} + +struct aml_layout* +aml_tiling_nd_resize_row_index(const struct aml_tiling_nd_data *l, + va_list coords) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + size_t n_coords[d->ndims]; + for(size_t i = 0; i < d->ndims; i++) + n_coords[i] = va_arg(coords, size_t); + return aml_tiling_nd_resize_row_aindex(l, n_coords); +} + +int +aml_tiling_nd_resize_row_order(const struct aml_tiling_nd_data * l) +{ + return AML_TYPE_TILING_ROW_ORDER; +} + +int +aml_tiling_nd_resize_row_tile_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_resize_row_tile_adims(const struct aml_tiling_nd_data *l, + size_t *tile_dims) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + tile_dims[i] = d->tile_dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_resize_row_dims(const struct aml_tiling_nd_data *l, + va_list dims_ptrs) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + size_t *dim = va_arg(dims_ptrs, size_t*); + assert(dim != NULL); + *dim = d->dims[d->ndims - i - 1]; + } + return 0; +} + +int +aml_tiling_nd_resize_row_adims(const struct aml_tiling_nd_data *l, + size_t *dims) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + for(size_t i = 0; i < d->ndims; i++) { + dims[i] = d->dims[d->ndims - i - 1]; + } + return 0; +} + +size_t +aml_tiling_nd_resize_row_ndims(const struct aml_tiling_nd_data *l) +{ + const struct aml_tiling_nd_data_resize *d = + (const struct aml_tiling_nd_data_resize *)l; + assert(d != NULL); + return d->ndims; +} + +struct aml_tiling_nd_ops aml_tiling_nd_resize_row_ops = { + aml_tiling_nd_resize_row_index, + aml_tiling_nd_resize_row_aindex, + aml_tiling_nd_resize_row_order, + aml_tiling_nd_resize_row_tile_dims, + aml_tiling_nd_resize_row_tile_adims, + aml_tiling_nd_resize_row_dims, + aml_tiling_nd_resize_row_adims, + aml_tiling_nd_resize_row_ndims +}; diff --git a/tests/Makefile.am b/tests/Makefile.am index 7a053236..35ccff90 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -39,7 +39,8 @@ UNIT_TESTS = $(ARENA_JEMALLOC_TESTS) \ $(AREA_LINUX_TESTS) \ $(AREA_POSIX_TESTS) \ $(DMA_LINUX_TESTS) \ - $(SCRATCH_TESTS) + $(SCRATCH_TESTS) \ + layout copy tiling_nd dma_layout # all tests TST_PROGS = $(UNIT_TESTS) diff --git a/tests/copy.c b/tests/copy.c new file mode 100644 index 00000000..854719fa --- /dev/null +++ b/tests/copy.c @@ -0,0 +1,1178 @@ +#include +#include + +void test_copy_2d(void) +{ + size_t elem_number[2] = { 5, 3 }; + size_t src_pitch[2] = { 10, 6 }; + size_t dst_pitch[2] = { 5, 3 }; + + double src[6][10]; + double dst[3][5]; + double dst2[6][10]; + + double ref_dst2[6][10]; + double ref_dst[3][5]; + + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[j][i] = (double)(i + j * 10); + ref_dst2[j][i] = 0.0; + dst2[j][i] = 0.0; + } + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[j][i] = 0.0; + ref_dst[j][i] = src[j][i]; + ref_dst2[j][i] = src[j][i]; + } + + aml_copy_nd(2, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[j][i] == dst[j][i]); + + aml_copy_nd(2, dst2, src_pitch, dst, dst_pitch, elem_number, + sizeof(double)); + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[j][i] == dst2[j][i]); + +} + +void test_copy_t2d(void) +{ + size_t elem_number[2] = { 5, 3 }; + size_t elem_number2[2] = { 3, 5 }; + size_t src_pitch[2] = { 10, 6 }; + size_t dst_pitch[2] = { 3, 5 }; + + double src[6][10]; + double dst[5][3]; + double dst2[6][10]; + + double ref_dst2[6][10]; + double ref_dst[5][3]; + + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[j][i] = (double)(i + j * 10); + ref_dst2[j][i] = 0.0; + dst2[j][i] = 0.0; + } + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][j] = 0.0; + ref_dst[i][j] = src[j][i]; + ref_dst2[j][i] = src[j][i]; + } + + aml_copy_tnd(2, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][j] == dst[i][j]); + + aml_copy_tnd(2, dst2, src_pitch, dst, dst_pitch, elem_number2, + sizeof(double)); + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[j][i] == dst2[j][i]); + +} + +void test_copy_3d(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t src_pitch[3] = { 10, 6, 4 }; + size_t dst_pitch[3] = { 5, 3, 2 }; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[k][j][i]; + ref_dst2[k][j][i] = src[k][j][i]; + } + + aml_copy_nd(3, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_nd(3, dst2, src_pitch, dst, dst_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); + +} + +void test_copy_3d_c(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t c_src_pitch[4] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4 }; + size_t c_dst_pitch[4] = { 8, 8 * 5, 8 * 5 * 3, 8 * 5 * 3 * 2 }; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[k][j][i]; + ref_dst2[k][j][i] = src[k][j][i]; + } + + aml_copy_nd_c(3, dst, c_dst_pitch, src, c_src_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_nd_c(3, dst2, c_src_pitch, dst, c_dst_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); + +} + +void test_copy_3dstr(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t src_pitch[3] = { 10, 6, 4 }; + size_t src_stride[3] = { 2, 2, 2 }; + size_t dst_pitch[3] = { 5, 3, 2 }; + size_t dst_stride[3] = { 1, 1, 1 }; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[2 * k][2 * j][2 * i]; + ref_dst2[2 * k][2 * j][2 * i] = + src[2 * k][2 * j][2 * i]; + } + + aml_copy_ndstr(3, dst, dst_pitch, dst_stride, src, src_pitch, + src_stride, elem_number, sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_ndstr(3, dst2, src_pitch, src_stride, dst, dst_pitch, + dst_stride, elem_number, sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); +} + +void test_copy_3dstr_c(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t c_src_pitch[4] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4 }; + size_t src_stride[3] = { 2, 2, 2 }; + size_t c_dst_pitch[4] = { 8, 8 * 5, 8 * 5 * 3, 8 * 5 * 3 * 2 }; + size_t dst_stride[3] = { 1, 1, 1 }; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[2 * k][2 * j][2 * i]; + ref_dst2[2 * k][2 * j][2 * i] = + src[2 * k][2 * j][2 * i]; + } + + aml_copy_ndstr_c(3, dst, c_dst_pitch, dst_stride, src, c_src_pitch, + src_stride, elem_number, sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_ndstr_c(3, dst2, c_src_pitch, src_stride, dst, c_dst_pitch, + dst_stride, elem_number, sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); +} + +void test_copy_t3d(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t elem_number2[3] = { 3, 2, 5 }; + size_t elem_number3[3] = { 2, 5, 3 }; + size_t src_pitch[3] = { 10, 6, 4 }; + size_t dst_pitch[3] = { 3, 2, 5 }; + size_t dst_pitch2[3] = { 2, 5, 3 }; + + double src[4][6][10]; + double dst[5][2][3]; + double dst2[3][5][2]; + double dst3[4][6][10]; + + double ref_dst[5][2][3]; + double ref_dst2[3][5][2]; + double ref_dst3[4][6][10]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst3[k][j][i] = 0.0; + dst3[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][k][j] = 0.0; + dst2[j][i][k] = 0.0; + ref_dst[i][k][j] = src[k][j][i]; + ref_dst2[j][i][k] = src[k][j][i]; + ref_dst3[k][j][i] = src[k][j][i]; + } + + aml_copy_tnd(3, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][k][j] == dst[i][k][j]); + + aml_copy_tnd(3, dst2, dst_pitch2, dst, dst_pitch, elem_number2, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst2[j][i][k] == dst2[j][i][k]); + + aml_copy_tnd(3, dst3, src_pitch, dst2, dst_pitch2, elem_number3, + sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst3[k][j][i] == dst3[k][j][i]); +} + +void test_copy_rt3d(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t elem_number2[3] = { 2, 5, 3 }; + size_t elem_number3[3] = { 3, 2, 5 }; + size_t src_pitch[3] = { 10, 6, 4 }; + size_t dst_pitch[3] = { 2, 5, 3 }; + size_t dst_pitch2[3] = { 3, 2, 5 }; + + double src[4][6][10]; + double dst[3][5][2]; + double dst2[5][2][3]; + double dst3[4][6][10]; + + double ref_dst[3][5][2]; + double ref_dst2[5][2][3]; + double ref_dst3[4][6][10]; + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst3[k][j][i] = 0.0; + dst3[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[j][i][k] = 0.0; + dst2[i][k][j] = 0.0; + ref_dst[j][i][k] = src[k][j][i]; + ref_dst2[i][k][j] = src[k][j][i]; + ref_dst3[k][j][i] = src[k][j][i]; + } + + aml_copy_rtnd(3, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[j][i][k] == dst[j][i][k]); + + aml_copy_rtnd(3, dst2, dst_pitch2, dst, dst_pitch, elem_number2, + sizeof(double)); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst2[i][k][j] == dst2[i][k][j]); + + aml_copy_rtnd(3, dst3, src_pitch, dst2, dst_pitch2, elem_number3, + sizeof(double)); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst3[k][j][i] == dst3[k][j][i]); +} + +void test_copy_t4d(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t src_pitch[4] = { 10, 6, 4, 8 }; + size_t dst_pitch[4] = { 3, 2, 4, 5 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = src[l][k][j][i]; + ref_dst2[l][k][j][i] = src[l][k][j][i]; + } + + aml_copy_tnd(4, dst, dst_pitch, src, src_pitch, elem_number, + sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_rtnd(4, dst2, src_pitch, dst, dst_pitch, elem_number2, + sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_t4d_c(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, + 8 * 10 * 6 * 4 * 8 }; + size_t c_dst_pitch[5] = { 8, 8 * 3, 8 * 3 * 2, 8 * 3 * 2 * 4, + 8 * 3 * 2 * 4 * 5 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = src[l][k][j][i]; + ref_dst2[l][k][j][i] = src[l][k][j][i]; + } + + aml_copy_tnd_c(4, dst, c_dst_pitch, src, c_src_pitch, elem_number, + sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_rtnd_c(4, dst2, c_src_pitch, dst, c_dst_pitch, elem_number2, + sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_t4dstr(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t src_pitch[4] = { 10, 6, 4, 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t dst_pitch[4] = { 3, 2, 4, 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_tndstr(4, dst, dst_pitch, dst_stride, src, src_pitch, + src_stride, elem_number, sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_rtndstr(4, dst2, src_pitch, src_stride, dst, dst_pitch, + dst_stride, elem_number2, sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_t4dstr_c(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, + 8 * 10 * 6 * 4 * 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t c_dst_pitch[5] = { 8, 8 * 3, 8 * 3 * 2, 8 * 3 * 2 * 4, + 8 * 3 * 2 * 4 * 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_tndstr_c(4, dst, c_dst_pitch, dst_stride, src, c_src_pitch, + src_stride, elem_number, sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_rtndstr_c(4, dst2, c_src_pitch, src_stride, dst, c_dst_pitch, + dst_stride, elem_number2, sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_sh4d(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 2, 3, 4, 5 }; + size_t target_dims[4] = { 2, 1, 3, 0 }; + size_t target_dims2[4] = { 3, 1, 0, 2 }; + size_t src_pitch[4] = { 10, 6, 4, 8 }; + size_t dst_pitch[4] = { 2, 3, 4, 5 }; + + double src[8][4][6][10]; + double dst[5][4][3][2]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][3][2]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][j][k] = 0.0; + ref_dst[i][l][j][k] = src[l][k][j][i]; + ref_dst2[l][k][j][i] = src[l][k][j][i]; + } + + aml_copy_shnd(4, target_dims, dst, dst_pitch, src, src_pitch, + elem_number, sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][j][k] == + dst[i][l][j][k]); + + aml_copy_shnd(4, target_dims2, dst2, src_pitch, dst, dst_pitch, + elem_number2, sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_sh4d_c(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 2, 3, 4, 5 }; + size_t target_dims[4] = { 2, 1, 3, 0 }; + size_t target_dims2[4] = { 3, 1, 0, 2 }; + size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, + 8 * 10 * 6 * 4 * 8 }; + size_t c_dst_pitch[5] = { 8, 8 * 2, 8 * 2 * 3, 8 * 2 * 3 * 4, + 8 * 2 * 3 * 4 * 5 }; + + double src[8][4][6][10]; + double dst[5][4][3][2]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][3][2]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][j][k] = 0.0; + ref_dst[i][l][j][k] = src[l][k][j][i]; + ref_dst2[l][k][j][i] = src[l][k][j][i]; + } + + aml_copy_shnd_c(4, target_dims, dst, c_dst_pitch, src, c_src_pitch, + elem_number, sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][j][k] == + dst[i][l][j][k]); + + aml_copy_shnd_c(4, target_dims2, dst2, c_src_pitch, dst, c_dst_pitch, + elem_number2, sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_sh4dstr(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 2, 3, 4, 5 }; + size_t target_dims[4] = { 2, 1, 3, 0 }; + size_t target_dims2[4] = { 3, 1, 0, 2 }; + size_t src_pitch[4] = { 10, 6, 4, 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t dst_pitch[4] = { 2, 3, 4, 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][3][2]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][3][2]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][j][k] = 0.0; + ref_dst[i][l][j][k] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_shndstr(4, target_dims, dst, dst_pitch, dst_stride, src, + src_pitch, src_stride, elem_number, sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][j][k] == + dst[i][l][j][k]); + + aml_copy_shndstr(4, target_dims2, dst2, src_pitch, src_stride, dst, + dst_pitch, dst_stride, elem_number2, sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_sh4dstr_c(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 2, 3, 4, 5 }; + size_t target_dims[4] = { 2, 1, 3, 0 }; + size_t target_dims2[4] = { 3, 1, 0, 2 }; + size_t c_src_pitch[5] = { 8, 8 * 10, 8 * 10 * 6, 8 * 10 * 6 * 4, + 8 * 10 * 6 * 4 * 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t c_dst_pitch[5] = { 8, 8 * 2, 8 * 2 * 3, 8 * 2 * 3 * 4, + 8 * 2 * 3 * 4 * 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][3][2]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][3][2]; + double ref_dst2[8][4][6][10]; + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][j][k] = 0.0; + ref_dst[i][l][j][k] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_shndstr_c(4, target_dims, dst, c_dst_pitch, dst_stride, src, + c_src_pitch, src_stride, elem_number, + sizeof(double)); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][j][k] == + dst[i][l][j][k]); + + aml_copy_shndstr_c(4, target_dims2, dst2, c_src_pitch, src_stride, dst, + c_dst_pitch, dst_stride, elem_number2, + sizeof(double)); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_copy_layout(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t c_src_pitch[3] = { 10, 6, 4 }; + size_t src_stride[3] = { 1, 1, 1}; + size_t c_dst_pitch[3] = { 5, 3, 2 }; + size_t dst_stride[3] = { 1, 1, 1}; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + AML_LAYOUT_NATIVE_DECL(src_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst2_layout, 3); + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 3, elem_number, + dst_stride, c_dst_pitch); + aml_layout_native_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst2, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[k][j][i]; + ref_dst2[k][j][i] = src[k][j][i]; + } + + aml_copy_layout_native(&dst_layout, &src_layout); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_layout_native(&dst2_layout, &dst_layout); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); + +} + +void test_copy_layout_generic(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t c_src_pitch[3] = { 10, 6, 4 }; + size_t src_stride[3] = { 1, 1, 1}; + size_t c_dst_pitch[3] = { 5, 3, 2 }; + size_t dst_stride[3] = { 1, 1, 1}; + + double src[4][6][10]; + double dst[2][3][5]; + double dst2[4][6][10]; + + double ref_dst2[4][6][10]; + double ref_dst[2][3][5]; + + AML_LAYOUT_NATIVE_DECL(src_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst2_layout, 3); + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 3, elem_number, + dst_stride, c_dst_pitch); + aml_layout_native_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst2, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + ref_dst2[k][j][i] = 0.0; + dst2[k][j][i] = 0.0; + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[k][j][i]; + ref_dst2[k][j][i] = src[k][j][i]; + } + + aml_copy_layout_generic(&dst_layout, &src_layout); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_copy_layout_generic(&dst2_layout, &dst_layout); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[k][j][i] == dst2[k][j][i]); + +} + +void test_copy_layout_pad_generic(void) +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t src_pitch[3] = { 10, 6, 4 }; + size_t src_stride[3] = { 1, 1, 1}; + + size_t elem_number2[3] = { 7, 3, 4 }; + + double src[4][6][10]; + double dst[4][6][10]; + double dst_ref[4][6][10]; + + + + AML_LAYOUT_NATIVE_DECL(src_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst_layout, 3); + AML_LAYOUT_PAD_DECL(src_pad, 3, sizeof(double)); + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 3, elem_number, + src_stride, src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 3, elem_number2, + src_stride, src_pitch); + + double neutral = 1337.0; + aml_layout_pad_ainit(&src_pad, AML_TYPE_LAYOUT_COLUMN_ORDER, + &src_layout, elem_number2, (void*)&neutral); + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + dst[k][j][i] = 0.0; + dst_ref[k][j][i] = 0.0; + } + for (int k = 0; k < 4; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 7; i++) + dst_ref[k][j][i] = 1337.0; + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + dst_ref[k][j][i] = src[k][j][i]; + aml_copy_layout_generic(&dst_layout, &src_pad); + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(dst_ref[k][j][i] == dst[k][j][i]); +} +void test_transpose_layout(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t c_src_pitch[4] = { 10, 6, 4, 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t c_dst_pitch[4] = { 3, 2, 4, 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + AML_LAYOUT_NATIVE_DECL(src_layout, 4); + AML_LAYOUT_NATIVE_DECL(dst_layout, 4); + AML_LAYOUT_NATIVE_DECL(dst2_layout, 4); + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 4, elem_number2, + dst_stride, c_dst_pitch); + aml_layout_native_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst2, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_layout_transpose_native(&dst_layout, &src_layout); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_layout_reverse_transpose_native(&dst2_layout, &dst_layout); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +void test_transpose_layout_generic(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t c_src_pitch[4] = { 10, 6, 4, 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t c_dst_pitch[4] = { 3, 2, 4, 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + double dst2[8][4][6][10]; + + double ref_dst[5][4][2][3]; + double ref_dst2[8][4][6][10]; + + AML_LAYOUT_NATIVE_DECL(src_layout, 4); + AML_LAYOUT_NATIVE_DECL(dst_layout, 4); + AML_LAYOUT_NATIVE_DECL(dst2_layout, 4); + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 4, elem_number2, + dst_stride, c_dst_pitch); + aml_layout_native_ainit(&dst2_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst2, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + ref_dst2[l][k][j][i] = 0.0; + dst2[l][k][j][i] = 0.0; + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = + src[2 * l][2 * k][2 * j][2 * i]; + ref_dst2[2 * l][2 * k][2 * j][2 * i] = + src[2 * l][2 * k][2 * j][2 * i]; + } + + aml_copy_layout_transpose_generic(&dst_layout, &src_layout); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + + aml_copy_layout_reverse_transpose_generic(&dst2_layout, &dst_layout); + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) + assert(ref_dst2[l][k][j][i] == + dst2[l][k][j][i]); + +} + +int main(int argc, char *argv[]) +{ + test_copy_2d(); + test_copy_t2d(); + test_copy_3d(); + test_copy_3d_c(); + test_copy_3dstr(); + test_copy_3dstr_c(); + test_copy_t3d(); + test_copy_rt3d(); + test_copy_t4d(); + test_copy_t4d_c(); + test_copy_t4dstr(); + test_copy_t4dstr_c(); + test_copy_sh4d(); + test_copy_sh4d_c(); + test_copy_sh4dstr(); + test_copy_sh4dstr_c(); + test_copy_layout(); + test_copy_layout_generic(); + test_copy_layout_pad_generic(); + test_transpose_layout(); + test_transpose_layout_generic(); + return 0; +} diff --git a/tests/dma_layout.c b/tests/dma_layout.c new file mode 100644 index 00000000..3469b2e3 --- /dev/null +++ b/tests/dma_layout.c @@ -0,0 +1,111 @@ +#include +#include + +void test_dma_copy_generic() +{ + size_t elem_number[3] = { 5, 3, 2 }; + size_t c_src_pitch[3] = { 10, 6, 4 }; + size_t src_stride[3] = { 1, 1, 1}; + size_t c_dst_pitch[3] = { 5, 3, 2 }; + size_t dst_stride[3] = { 1, 1, 1}; + + double src[4][6][10]; + double dst[2][3][5]; + + double ref_dst[2][3][5]; + + AML_LAYOUT_NATIVE_DECL(src_layout, 3); + AML_LAYOUT_NATIVE_DECL(dst_layout, 3); + AML_DMA_LAYOUT_DECL(dma); + + /* library initialization */ + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 3, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 3, elem_number, + dst_stride, c_dst_pitch); + aml_dma_layout_init(&dma, 1, aml_copy_layout_generic, NULL); + + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[k][j][i] = + (double)(i + j * 10 + k * 10 * 6); + } + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[k][j][i] = 0.0; + ref_dst[k][j][i] = src[k][j][i]; + } + + aml_dma_copy(&dma, &dst_layout, &src_layout); + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[k][j][i] == dst[k][j][i]); + + aml_dma_layout_destroy(&dma); +} + +void test_dma_transpose_generic(void) +{ + size_t elem_number[4] = { 5, 3, 2, 4 }; + size_t elem_number2[4] = { 3, 2, 4, 5 }; + size_t c_src_pitch[4] = { 10, 6, 4, 8 }; + size_t src_stride[4] = { 2, 2, 2, 2 }; + size_t c_dst_pitch[4] = { 3, 2, 4, 5 }; + size_t dst_stride[4] = { 1, 1, 1, 1 }; + + double src[8][4][6][10]; + double dst[5][4][2][3]; + + double ref_dst[5][4][2][3]; + + AML_LAYOUT_NATIVE_DECL(src_layout, 4); + AML_LAYOUT_NATIVE_DECL(dst_layout, 4); + AML_DMA_LAYOUT_DECL(dma); + + aml_layout_native_ainit(&src_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)src, sizeof(double), 4, elem_number, + src_stride, c_src_pitch); + aml_layout_native_ainit(&dst_layout, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)dst, sizeof(double), 4, elem_number2, + dst_stride, c_dst_pitch); + aml_dma_layout_init(&dma, 1, aml_copy_layout_transpose_generic, NULL); + + for (int l = 0; l < 8; l++) + for (int k = 0; k < 4; k++) + for (int j = 0; j < 6; j++) + for (int i = 0; i < 10; i++) { + src[l][k][j][i] = + (double)(i + j * 10 + k * 10 * 6 + + l * 10 * 6 * 4); + } + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) { + dst[i][l][k][j] = 0.0; + ref_dst[i][l][k][j] = + src[2 * l][2 * k][2 * j][2 * i]; + } + aml_dma_copy(&dma, &dst_layout, &src_layout); + for (int l = 0; l < 4; l++) + for (int k = 0; k < 2; k++) + for (int j = 0; j < 3; j++) + for (int i = 0; i < 5; i++) + assert(ref_dst[i][l][k][j] == + dst[i][l][k][j]); + aml_dma_layout_destroy(&dma); +} + +int main(int argc, char *argv[]) +{ + aml_init(&argc, &argv); + test_dma_copy_generic(); + aml_finalize(); + return 0; +} diff --git a/tests/layout.c b/tests/layout.c new file mode 100644 index 00000000..83400259 --- /dev/null +++ b/tests/layout.c @@ -0,0 +1,456 @@ +#include +#include + +void test_slice_contiguous(void) +{ + int memory[6][5][4]; + size_t dims_col[3] = {4, 5, 6}; + size_t dims_row[3] = {6, 5, 4}; + + size_t stride[3] = {1, 1, 1}; + + size_t offsets_col[3] = {2, 2, 3}; + size_t offsets_row[3] = {3, 2, 2}; + + size_t new_dims_col[3] = {2, 3, 3}; + size_t new_dims_row[3] = {3, 3, 2}; + + + int l = 0; + for(size_t i = 0; i < 6; i++) + for(size_t j = 0; j < 5; j++) + for(size_t k = 0; k < 4; k++, l++) + memory[i][j][k] = l; + + struct aml_layout *a; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + struct aml_layout *b = aml_layout_aslice(a, offsets_col, new_dims_col, stride); + assert(AML_TYPE_LAYOUT_COLUMN_ORDER == aml_layout_order(b)); + + for(size_t i = 0; i < 3; i++) + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + { + assert( memory[i+3][j+2][k+2] == *(int *)aml_layout_deref(b, k, j, i)); + fprintf(stderr, "%d == %d\n", memory[i+3][j+2][k+2], *(int *)aml_layout_deref(b, k, j, i)); + } + free(a); + free(b); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + b = aml_layout_aslice(a, offsets_row, new_dims_row, stride); + assert(AML_TYPE_LAYOUT_ROW_ORDER == aml_layout_order(b)); + + for(size_t i = 0; i < 3; i++) + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + { + assert( memory[i+3][j+2][k+2] == *(int *)aml_layout_deref(b, i, j, k)); + fprintf(stderr, "%d == %d\n", memory[i+3][j+2][k+2], *(int *)aml_layout_deref(b, i, j, k)); + } + free(a); + free(b); + +} + +void test_slice_strided(void) +{ + int memory[12][5][8]; + + size_t dims_col[3] = {4, 5, 6}; + size_t dims_row[3] = {6, 5, 4}; + + size_t stride[3] = {2, 1, 2}; + + size_t pitch_col[3] = {8, 5, 12}; + size_t pitch_row[3] = {12, 5, 8}; + + size_t offsets_col[3] = {1, 2, 0}; + size_t offsets_row[3] = {0, 2, 1}; + + size_t new_dims_col[3] = {2, 3, 3}; + size_t new_dims_row[3] = {3, 3, 2}; + + size_t new_stride_col[3] = {2, 1, 1}; + size_t new_stride_row[3] = {1, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 12; i++) + for(size_t j = 0; j < 5; j++) + for(size_t k = 0; k < 8; k++, l++) + memory[i][j][k] = l; + + struct aml_layout *a; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, pitch_col); + struct aml_layout *b = aml_layout_aslice(a, offsets_col, new_dims_col, new_stride_col); + + for(size_t i = 0; i < 3; i++) + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + assert( memory[stride[2] * (offsets_col[2] + new_stride_col[2] * i)][ + stride[1] * (offsets_col[1] + new_stride_col[1] * j)][ + stride[0] * (offsets_col[0] + new_stride_col[0] * k)] == *(int *)aml_layout_deref(b, k, j, i)); + + free(a); + free(b); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, pitch_row); + b = aml_layout_aslice(a, offsets_row, new_dims_row, new_stride_row); + + for(size_t i = 0; i < 3; i++) + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + assert( memory[stride[2] * (offsets_col[2] + new_stride_col[2] * i)][ + stride[1] * (offsets_col[1] + new_stride_col[1] * j)][ + stride[0] * (offsets_col[0] + new_stride_col[0] * k)] == *(int *)aml_layout_deref(b, i, j, k)); + + free(a); + free(b); + +} + +void test_reshape_contiguous(void) +{ + int memory[4*5*6]; + + size_t dims_col[3] = {4, 5, 6}; + size_t dims_row[3] = {6, 5, 4}; + + size_t stride[3] = {1, 1, 1}; + + size_t new_dims_col[2] = {24, 5}; + size_t new_dims_row[2] = {5, 24}; + + int i; + for(i = 0; i < 4*5*6; i++) + memory[i] = i; + + struct aml_layout *a; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + struct aml_layout *b = aml_layout_areshape(a, 2, new_dims_col); + assert(AML_TYPE_LAYOUT_COLUMN_ORDER == aml_layout_order(b)); + struct aml_layout *c; + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_COLUMN_ORDER, + a, 2, new_dims_col); + assert(AML_TYPE_LAYOUT_COLUMN_ORDER == aml_layout_order(c)); + + i = 0; + for(size_t j = 0; j < 5; j++) + for(size_t k = 0; k < 24; k++, i++) { + assert(i == *(int *)aml_layout_deref(b, k, j)); + assert(i == *(int *)aml_layout_deref(c, k, j)); + } + + free(a); + free(b); + free(c); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + b = aml_layout_areshape(a, 2, new_dims_row); + assert(AML_TYPE_LAYOUT_ROW_ORDER == aml_layout_order(b)); + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_ROW_ORDER, + a, 2, new_dims_row); + assert(AML_TYPE_LAYOUT_ROW_ORDER == aml_layout_order(c)); + + i = 0; + for(size_t j = 0; j < 5; j++) + for(size_t k = 0; k < 24; k++, i++) { + assert(i == *(int *)aml_layout_deref(b, j, k)); + assert(i == *(int *)aml_layout_deref(c, j, k)); + } + + free(a); + free(b); + free(c); +} + +void test_reshape_discontiguous(void) +{ + int memory[7][6][5]; + + size_t dims_col[3] = {4, 5, 6}; + size_t dims_row[3] = {6, 5, 4}; + + size_t stride[3] = {1, 1, 1}; + + size_t pitch_col[3] = {5, 6, 7}; + size_t pitch_row[3] = {7, 6, 5}; + + size_t new_dims_col[5] = {2, 2, 5, 2, 3}; + size_t new_dims_row[5] = {3, 2, 5, 2, 2}; + + int i = 0; + for(int j = 0; j < 6; j++) + for(int k = 0; k < 5; k++) + for(int l = 0; l < 4; l++, i++) + memory[j][k][l] = i; + + struct aml_layout *a; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, pitch_col); + struct aml_layout *b = aml_layout_areshape(a, 5, new_dims_col); + struct aml_layout *c; + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_COLUMN_ORDER, + a, 5, new_dims_col); + + i = 0; + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + for(size_t l = 0; l < 5; l++) + for(size_t m = 0; m < 2; m++) + for(size_t n = 0; n < 2; n++, i++) { + assert(i == *(int *)aml_layout_deref(b, n, m, l, k, j)); + assert(i == *(int *)aml_layout_deref(c, n, m, l, k, j)); + } + + free(a); + free(b); + free(c); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, pitch_row); + b = aml_layout_areshape(a, 5, new_dims_row); + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_ROW_ORDER, + a, 5, new_dims_row); + + + i = 0; + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + for(size_t l = 0; l < 5; l++) + for(size_t m = 0; m < 2; m++) + for(size_t n = 0; n < 2; n++, i++) { + assert(i == *(int *)aml_layout_deref(b, j, k, l, m, n)); + assert(i == *(int *)aml_layout_deref(c, j, k, l, m, n)); + } + + free(a); + free(b); + free(c); +} + +void test_reshape_strided(void) +{ + int memory[12][5][8]; + + size_t dims_col[3] = {4, 5, 6}; + size_t dims_row[3] = {6, 5, 4}; + + size_t stride[3] = {2, 1, 2}; + + size_t pitch_col[3] = {8, 5, 12}; + size_t pitch_row[3] = {12, 5, 8}; + + size_t new_dims_col[4] = {2, 10, 2, 3}; + size_t new_dims_row[4] = {3, 2, 10, 2}; + + int i = 0; + for(int j = 0; j < 6; j++) + for(int k = 0; k < 5; k++) + for(int l = 0; l < 4; l++, i++) + memory[2*j][1*k][2*l] = i; + + struct aml_layout *a; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, pitch_col); + struct aml_layout *b = aml_layout_areshape(a, 4, new_dims_col); + struct aml_layout *c; + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_COLUMN_ORDER, + a, 4, new_dims_col); + + i = 0; + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + for(size_t l = 0; l < 10; l++) + for(size_t m = 0; m < 2; m++, i++) { + assert(i == *(int *)aml_layout_deref(b, m, l, k, j)); + assert(i == *(int *)aml_layout_deref(c, m, l, k, j)); + } + + free(a); + free(b); + free(c); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, pitch_row); + b = aml_layout_areshape(a, 4, new_dims_row); + aml_layout_reshape_acreate(&c, AML_TYPE_LAYOUT_ROW_ORDER, + a, 4, new_dims_row); + + i = 0; + for(size_t j = 0; j < 3; j++) + for(size_t k = 0; k < 2; k++) + for(size_t l = 0; l < 10; l++) + for(size_t m = 0; m < 2; m++, i++) { + assert(i == *(int *)aml_layout_deref(b, j, k, l, m)); + assert(i == *(int *)aml_layout_deref(c, j, k, l, m)); + } + + free(a); + free(b); + free(c); +} + +void test_base(void) +{ + struct aml_layout *a; + AML_LAYOUT_NATIVE_DECL(b, 5); + + /* padd the dims to the closest multiple of 2 */ + float memory[16][12][8][8][4]; + size_t pitch[5] = {4, 8, 8, 12, 16}; + size_t cpitch[6] = {4, 4*4, 4*4*8, 4*4*8*8, 4*4*8*8*12, 4*4*8*8*12*16}; + size_t dims[5] = {2, 3, 7, 11, 13}; + size_t stride[5] = {1, 2, 1, 1, 1}; + + size_t dims_col[5] = {2, 3, 7, 11, 13}; + size_t dims_row[5] = {13, 11, 7, 3, 2}; + + size_t pitch_col[5] = {4, 8, 8, 12, 16}; + size_t pitch_row[5] = {16, 12, 8, 8, 4}; + + size_t stride_col[5] = {1, 2, 1, 1, 1}; + size_t stride_row[5] = {1, 1, 1, 2, 1}; + + for(size_t i = 0; i < 4*8*8*12*16; i++) + ((float*)(&memory[0][0][0][0][0]))[i] = (float)i; + + + /* initialize column order layouts */ + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(float), 5, dims_col, + stride_col, pitch_col); + aml_layout_native_ainit(&b, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(float), 5, dims_col, + stride_col, pitch_col); + + struct aml_layout_data_native *adataptr; + struct aml_layout_data_native *bdataptr; + + adataptr = (struct aml_layout_data_native *)a->data; + bdataptr = (struct aml_layout_data_native *)b.data; + assert( (intptr_t)(adataptr->stride) - (intptr_t)(adataptr->dims) + == 5*sizeof(size_t) ); + assert( (intptr_t)(adataptr->pitch) - (intptr_t)(adataptr->dims) + == 10*sizeof(size_t) ); + assert( (intptr_t)(adataptr->cpitch) - (intptr_t)(adataptr->dims) + == 15*sizeof(size_t) ); + + /* some simple checks */ + assert(!memcmp(adataptr->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(adataptr->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(adataptr->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(adataptr->cpitch, cpitch, sizeof(size_t)*6)); + assert(!memcmp(bdataptr->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->cpitch, cpitch, sizeof(size_t)*6)); + + /* test column major subroutines */ + size_t dims_res[5]; + size_t coords_test_col[5] = { 1, 2, 3, 4, 5 }; + void *test_addr; + void *res_addr = (void *)&memory[5][4][3][2*2][1]; + + aml_layout_adims(a, dims_res); + assert(!memcmp(dims_res, dims_col, sizeof(size_t)*5)); + aml_layout_dims(a, dims_res, + dims_res + 1, + dims_res + 2, + dims_res + 3, + dims_res + 4); + assert(!memcmp(dims_res, dims_col, sizeof(size_t)*5)); + test_addr = aml_layout_aderef(a, coords_test_col); + assert(res_addr == test_addr); + test_addr = aml_layout_deref(a, coords_test_col[0], + coords_test_col[1], + coords_test_col[2], + coords_test_col[3], + coords_test_col[4]); + assert(res_addr == test_addr); + assert(AML_TYPE_LAYOUT_COLUMN_ORDER == aml_layout_order(a)); + + free(a); + + /* initialize row order layouts */ + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, + sizeof(float), 5, dims_row, stride_row, + pitch_row); + aml_layout_native_ainit(&b, AML_TYPE_LAYOUT_ROW_ORDER, (void *)memory, + sizeof(float), 5, dims_row, stride_row, + pitch_row); + + adataptr = (struct aml_layout_data_native *)a->data; + bdataptr = (struct aml_layout_data_native *)b.data; + assert( (intptr_t)(adataptr->stride) - (intptr_t)(adataptr->dims) + == 5*sizeof(size_t) ); + assert( (intptr_t)(adataptr->pitch) - (intptr_t)(adataptr->dims) + == 10*sizeof(size_t) ); + assert( (intptr_t)(adataptr->cpitch) - (intptr_t)(adataptr->dims) + == 15*sizeof(size_t) ); + + /* some simple checks */ + assert(!memcmp(adataptr->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(adataptr->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(adataptr->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(adataptr->cpitch, cpitch, sizeof(size_t)*6)); + assert(!memcmp(bdataptr->dims, dims, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->stride, stride, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->pitch, pitch, sizeof(size_t)*5)); + assert(!memcmp(bdataptr->cpitch, cpitch, sizeof(size_t)*6)); + + /* test row major subroutines */ + size_t coords_test_row[5] = { 5, 4, 3, 2, 1 }; + aml_layout_adims(a, dims_res); + assert(!memcmp(dims_res, dims_row, sizeof(size_t)*5)); + aml_layout_dims(a, dims_res, + dims_res + 1, + dims_res + 2, + dims_res + 3, + dims_res + 4); + assert(!memcmp(dims_res, dims_row, sizeof(size_t)*5)); + test_addr = aml_layout_aderef(a, coords_test_row); + assert(res_addr == test_addr); + test_addr = aml_layout_deref(a, coords_test_row[0], + coords_test_row[1], + coords_test_row[2], + coords_test_row[3], + coords_test_row[4]); + assert(res_addr == test_addr); + assert(AML_TYPE_LAYOUT_ROW_ORDER == aml_layout_order(a)); + + free(a); +} +int main(int argc, char *argv[]) +{ + /* library initialization */ + aml_init(&argc, &argv); + + test_base(); + test_reshape_contiguous(); + test_reshape_discontiguous(); + test_reshape_strided(); + + test_slice_contiguous(); + test_slice_strided(); + + aml_finalize(); + return 0; +} + diff --git a/tests/tiling_nd.c b/tests/tiling_nd.c new file mode 100644 index 00000000..6ad02e05 --- /dev/null +++ b/tests/tiling_nd.c @@ -0,0 +1,631 @@ +#include +#include + +void test_tiling_collapse(void) +{ + int memory[9][8][4][3]; + int memoryres[9][8][4][3]; + size_t dims_col[4] = {3, 4, 8, 9}; + size_t dims_row[4] = {9, 8, 4, 3}; + + size_t stride[4] = {1, 1, 1, 1}; + size_t dims_tile_col[4] = {1, 4, 1, 9}; + size_t dims_tile_row[4] = {9, 1, 4, 1}; + + size_t expected_dims_col[2] = {3, 8}; + size_t expected_dims_row[2] = {8, 3}; + + int n = 0; + for(size_t i = 0; i < 9; i++) + for(size_t k = 0; k < 8; k++) + for(size_t l = 0; l < 4; l++) + for(size_t m = 0; m < 3; m++, n++) { + memory[i][k][l][m] = n; + memoryres[i][k][l][m] = 0; + } + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 4, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 4, dims_row, + stride, dims_row); + + struct aml_tiling_nd *t, *tres; + aml_tiling_nd_collapse_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 4, dims_tile_col); + aml_tiling_nd_collapse_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 4, dims_tile_row); + + assert(aml_tiling_nd_ndims(t) == 2); + assert(aml_tiling_nd_ndims(tres) == 2); + + size_t dims[2] = { 0, 0}; + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_col, 2 * sizeof(int)) == 0); + memset(dims, 0, 2 * sizeof(int)); + aml_tiling_nd_adims(tres, dims); + assert(memcmp(dims, expected_dims_row, 2 * sizeof(int)) == 0); + + for(size_t i = 0; i < expected_dims_col[1]; i++) + for(size_t j = 0; j < expected_dims_col[0]; j++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, j, i); + bres = aml_tiling_nd_index(tres, i, j); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 9 * 4 * 3 * sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); +} + +void test_tiling_even_mixed(void) +{ + int memory[9][10][8]; + int memoryres[9][10][8]; + size_t dims_col[3] = {8, 10, 9}; + size_t dims_row[3] = {9, 10, 8}; + + size_t stride[3] = {1, 1, 1}; + + size_t dims_tile_col[3] = {4, 10, 3}; + size_t dims_tile_row[3] = {3, 10, 4}; + + size_t expected_dims_col[3] = {2, 1, 3}; + size_t expected_dims_row[3] = {3, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) { + memory[i][j][k] = l; + memoryres[i][j][k] = 0; + } + + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row, + stride, dims_row); + + + struct aml_tiling_nd *t, *tres; + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 3, dims_tile_row); + + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, k, j, i); + bres = aml_tiling_nd_index(tres, i, j, k); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row, + stride, dims_row); + + + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_ROW_ORDER, + a, 3, dims_tile_row); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, + ares, 3, dims_tile_col); + + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + memoryres[i][j][k] = 0.0; + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + +} + +void test_tiling_even(void) +{ + int memory[9][10][8]; + int memoryres[9][10][8]; + size_t dims_col[3] = {8, 10, 9}; + size_t dims_row[3] = {9, 10, 8}; + + size_t stride[3] = {1, 1, 1}; + + size_t dims_tile_col[3] = {4, 10, 3}; + size_t dims_tile_row[3] = {3, 10, 4}; + + size_t expected_dims_col[3] = {2, 1, 3}; + size_t expected_dims_row[3] = {3, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) { + memory[i][j][k] = l; + memoryres[i][j][k] = 0.0; + } + + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memoryres, sizeof(int), 3, dims_col, + stride, dims_col); + + + struct aml_tiling_nd *t, *tres; + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, + ares, 3, dims_tile_col); + + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_COLUMN_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + size_t dims[3]; + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_col, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_col, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, k, j, i); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row, + stride, dims_row); + + + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_ROW_ORDER, + a, 3, dims_tile_row); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 3, dims_tile_row); + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_ROW_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_row, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_row, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + memoryres[i][j][k] = 0.0; + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, i, j, k); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + +} + +void test_tiling_uneven(void) +{ + + int memory[8][10][7]; + int memoryres[8][10][7]; + size_t dims_col[3] = {7, 10, 8}; + size_t dims_row[3] = {8, 10, 7}; + + size_t stride[3] = {1, 1, 1}; + + size_t dims_tile_col[3] = {4, 10, 3}; + size_t dims_tile_row[3] = {3, 10, 4}; + + size_t expected_dims_col[3] = {2, 1, 3}; + size_t expected_dims_row[3] = {3, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 8; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 7; k++, l++) { + memory[i][j][k] = l; + memoryres[i][j][k] = 0.0; + } + + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memoryres, sizeof(int), 3, dims_col, + stride, dims_col); + + + struct aml_tiling_nd *t, *tres; + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, + ares, 3, dims_tile_col); + + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_COLUMN_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + size_t dims[3]; + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_col, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_col, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, k, j, i); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 7 * 10 * 8 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row, + stride, dims_row); + + aml_tiling_nd_resize_acreate(&t, AML_TYPE_TILING_ROW_ORDER, + a, 3, dims_tile_row); + aml_tiling_nd_resize_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 3, dims_tile_row); + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_ROW_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_row, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_row, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < 8; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 7; k++, l++) + memoryres[i][j][k] = 0.0; + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, i, j, k); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 7 * 10 * 8 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + +} + +void test_tiling_pad_even(void) +{ + int memory[9][10][8]; + int memoryres[9][10][8]; + size_t dims_col[3] = {8, 10, 9}; + size_t dims_row[3] = {9, 10, 8}; + + size_t stride[3] = {1, 1, 1}; + + size_t dims_tile_col[3] = {4, 10, 3}; + size_t dims_tile_row[3] = {3, 10, 4}; + + size_t expected_dims_col[3] = {2, 1, 3}; + size_t expected_dims_row[3] = {3, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) { + memory[i][j][k] = l; + memoryres[i][j][k] = 0.0; + } + + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memoryres, sizeof(int), 3, dims_col, + stride, dims_col); + + + struct aml_tiling_nd *t, *tres; + int neutral = 0xdeadbeef; + aml_tiling_nd_pad_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col, &neutral); + aml_tiling_nd_pad_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, + ares, 3, dims_tile_col, &neutral); + + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_COLUMN_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + size_t dims[3]; + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_col, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_col, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, k, j, i); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row, + stride, dims_row); + + aml_tiling_nd_pad_acreate(&t, AML_TYPE_TILING_ROW_ORDER, + a, 3, dims_tile_row, &neutral); + aml_tiling_nd_pad_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 3, dims_tile_row, &neutral); + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_ROW_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_row, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_row, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + memoryres[i][j][k] = 0.0; + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, i, j, k); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + assert(memcmp(memory, memoryres, 8 * 10 * 9 *sizeof(int)) == 0); + + free(a); + free(ares); + free(t); + free(tres); + +} + +void test_tiling_pad_uneven(void) +{ + + int memory[8][10][7]; + int memoryres[9][10][8]; + size_t dims_col[3] = {7, 10, 8}; + size_t dims_row[3] = {8, 10, 7}; + size_t dims_col_res[3] = {8, 10, 9}; + size_t dims_row_res[3] = {9, 10, 8}; + + size_t stride[3] = {1, 1, 1}; + + size_t dims_tile_col[3] = {4, 10, 3}; + size_t dims_tile_row[3] = {3, 10, 4}; + + size_t expected_dims_col[3] = {2, 1, 3}; + size_t expected_dims_row[3] = {3, 1, 2}; + + int l = 0; + for(size_t i = 0; i < 8; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 7; k++, l++) + memory[i][j][k] = l; + + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + memoryres[i][j][k] = 0.0; + + + struct aml_layout *a, *ares; + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memory, sizeof(int), 3, dims_col, + stride, dims_col); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_COLUMN_ORDER, + (void *)memoryres, sizeof(int), 3, dims_col_res, + stride, dims_col_res); + + + struct aml_tiling_nd *t, *tres; + int neutral = 0xdeadbeef; + aml_tiling_nd_pad_acreate(&t, AML_TYPE_TILING_COLUMN_ORDER, + a, 3, dims_tile_col, &neutral); + aml_tiling_nd_pad_acreate(&tres, AML_TYPE_TILING_COLUMN_ORDER, + ares, 3, dims_tile_col, &neutral); + + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_COLUMN_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + size_t dims[3]; + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_col, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_col, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, k, j, i); + bres = aml_tiling_nd_index(tres, k, j, i); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + if( k >= 7 || i >= 8) + assert(memoryres[i][j][k] == 0xdeadbeef); + else + assert(memoryres[i][j][k] == memory[i][j][k]); + + free(a); + free(t); + + aml_layout_native_acreate(&a, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memory, sizeof(int), 3, dims_row, + stride, dims_row); + aml_layout_native_acreate(&ares, AML_TYPE_LAYOUT_ROW_ORDER, + (void *)memoryres, sizeof(int), 3, dims_row_res, + stride, dims_row_res); + + aml_tiling_nd_pad_acreate(&t, AML_TYPE_TILING_ROW_ORDER, + a, 3, dims_tile_row, &neutral); + aml_tiling_nd_pad_acreate(&tres, AML_TYPE_TILING_ROW_ORDER, + ares, 3, dims_tile_row, &neutral); + + assert(aml_tiling_nd_order(t) == AML_TYPE_TILING_ROW_ORDER); + assert(aml_tiling_nd_ndims(t) == 3); + + aml_tiling_nd_tile_adims(t, dims); + assert(memcmp(dims, dims_tile_row, 3*sizeof(size_t)) == 0); + aml_tiling_nd_adims(t, dims); + assert(memcmp(dims, expected_dims_row, 3*sizeof(size_t)) == 0); + + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + memoryres[i][j][k] = 0.0; + + for(size_t i = 0; i < expected_dims_col[2]; i++) + for(size_t j = 0; j < expected_dims_col[1]; j++) + for(size_t k = 0; k < expected_dims_col[0]; k++) { + struct aml_layout *b, *bres; + b = aml_tiling_nd_index(t, i, j, k); + bres = aml_tiling_nd_index(tres, i, j, k); + aml_copy_layout_generic(bres, b); + free(b); + free(bres); + } + for(size_t i = 0; i < 9; i++) + for(size_t j = 0; j < 10; j++) + for(size_t k = 0; k < 8; k++, l++) + if( k >= 7 || i >= 8) + assert(memoryres[i][j][k] == 0xdeadbeef); + else + assert(memoryres[i][j][k] == memory[i][j][k]); + + free(a); + free(ares); + free(t); + free(tres); + +} + +int main(int argc, char *argv[]) +{ + /* library initialization */ + aml_init(&argc, &argv); + + test_tiling_even(); + test_tiling_uneven(); + test_tiling_even_mixed(); + test_tiling_pad_even(); + test_tiling_pad_uneven(); + test_tiling_collapse(); + + return 0; +} +