Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions opencl/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
SUNDIALS_ROOT=$(HOME)/kaust/sundials/2.5.0
TCHEM_ROOT=$(HOME)/kaust/tchem/TChem_v1.0
#SUNDIALS_ROOT=$(HOME)/sundials/

#CLINCPATH= $(AMDAPPSDKROOT)/include
#CLLIBPATH= $(AMDAPPSDKROOT)/lib/x86_64
CLINCPATH= /usr/include
#CLINCPATH= $(TACC_CUDA_INC)
CLLIBPATH= /usr/lib64
##CLLIBPATH= /opt/apps/intel/opencl/libmic

USE_TCHEM= 0
ifneq ($(tchem),)
ifneq ($(tchem),0)
USE_TCHEM = 1
endif
endif

CV= 0
ifneq ($(cv),)
ifneq ($(cv),0)
CV= 1
endif
endif

CL= 0
ifneq ($(cl),)
ifneq ($(cl),0)
CL= 1
endif
endif

USE_OMP= 0
ifneq ($(omp),)
ifneq ($(omp),0)
USE_OMP = 1
endif
endif

CFLAGS = -I. #-std=c99

ifeq ($(intel),1)
CC = icc -std=c99
CXX = icpc
OMP = -openmp -openmp-report
#CFLAGS += -vec-report2
else
CC = gcc -std=c99
CXX = g++
# CFLAGS += -Wno-enum-compare
CFLAGS += -mtune=native
#CFLAGS += -ftree-vectorizer-verbose=2
OMP = -fopenmp
endif

CFLAGS += -O3
# CFLAGS += -no-vec
ifeq ($(opt),3)
CFLAGS += -DFAST_MATH
# ifeq ($(intel),1)
# CFLAGS += -xHost
# endif
CFLAGS += -O3
CFLAGS += -ffast-math
endif
ifeq ($(opt),0)
endif

ifeq ($(debug),1)
CFLAGS += -DDEBUG
CFLAGS += -g
endif

ifneq ($(USE_OMP),0)
CFLAGS += $(OMP)
endif

CXXFLAGS= ${CFLAGS} -std=c++0x -Wno-enum-compare

FC = ifort
FFLAGS = -O2 -r8
FFLAGS += -g

LDFLAGS = -lstdc++
#ifeq ($(intel),1)
#LDFLAGS+= -mkl=sequential
#endif

ifneq ($(CL),0)
CFLAGS += -I$(CLINCPATH) -D__ENABLE_OPENCL
LDFLAGS += -L$(CLLIBPATH) -lOpenCL
endif

ifneq ($(CV),0)
CFLAGS += -I${SUNDIALS_ROOT}/include -DUSE_SUNDIALS
LDFLAGS+= -lsundials_cvodes -lsundials_nvecserial -L${SUNDIALS_ROOT}/lib
endif
ifneq ($(USE_TCHEM),0)
CFLAGS += -I$(TCHEM_ROOT)/include -D__ENABLE_TCHEM=1
LDFLAGS += -ltchem -L$(TCHEM_ROOT)/lib
endif

#LDFLAGS+= -llapack
#LDFLAGS+= -lfatode -L${SUNDIALS_ROOT}/lib

#LD_PRELOAD=libtbbmalloc_proxy.so.2
#LDFLAGS += -ltbbmalloc_proxy -ltbbmalloc -L/opt/apps/intel/13/composer_xe_2013.2.146/tbb/lib/mic

#CFLAGS += -D__LEVEL1_DCACHE_LINESIZE=`getconf LEVEL1_DCACHE_LINESIZE`
#CFLAGS += -D__ALIGNMENT=`getconf LEVEL1_DCACHE_LINESIZE`

ifeq ($(align),)
CFLAGS += -D__ALIGNMENT=32
else
CFLAGS += -D__ALIGNMENT=$(align)
endif

ifeq ($(intel),1)
ifneq ($(verbose),)
CFLAGS += -vec-report=$(verbose)
endif

MIC_ = 0
OFFLOAD_ = 0

CFLAGS += -restrict
CFLAGS += -wd161 -wd181

#CFLAGS += -simd

#ifeq ($(omp),1)
# CFLAGS += -openmp -openmp-report
#endif

ifeq ($(MIC_),0)
CFLAGS += -xHost
endif
endif

AR = ar rv

EXEC = test_ck.exe
OBJS = cklib.o clock.o rk.o ros.o sdirk.o
ifneq ($(CL),0)
OBJS += cl_driver.o
endif

all: exec

# Object rules

%.o: %.f
${FC} ${FFLAGS} -c $<
%.o: %.F
${FC} ${FFLAGS} -c $<
%.o: %.c
${CC} ${CFLAGS} -c $<
%.o: %.cxx
${CXX} ${CXXFLAGS} -c $<

$(EXEC) : test_ck.cxx ${OBJS}
#%.exe : test_ck.cxx ${OBJS}
@echo "Building executable $@"
${CXX} ${CXXFLAGS} -o $@ $< ${OBJS} ${LDFLAGS}

exec:
make ${EXEC}
objs:
make ${OBJS}

clean:
@echo "Cleaning objects ..."
@/bin/rm -fv ${OBJS} *.o __kernel_source.cl

veryclean: clean
@echo "Cleaning executables ..."
@/bin/rm -fv ${EXEC}
45 changes: 45 additions & 0 deletions opencl/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
The test_ck.cxx problem is a driver for the typical kinetics rhs
function and several ODE solvers that integrate the pTy systems
for constant pressure, homogeneous reactors.

The code has several parallel processing capabilities:
1) OpenMP for multi-core devices (CPU's and Intel Xeon Phi in native mode)
2) OpenCL for multi-core and many-core systems such as CPU's and GPU's.
3) OpenCL-SIMD for data-parallel systems such as CPU's and Xeon Phi's.

To build with OpenMP, simply enable on the command line via the Makefile:

prompt> make omp=1

To build with OpenCL, add `cl=1` to the `make` command. This requires that the
variable CLINCPATH in the Makefile be set to point to the location of the OpenCL
include files and CLLIBPATH to point to the system libraries.

The code must load a binary database with the mechanism information. Three are stored in the inputs/
directory. The mechanism file is specified at run-time on the CLI with the `-ck <filename>` syntax.

Other options include:

-read <filename>: load predefined problem profiles for temperature, pressure, and mass fraction.
-ck <binfile> : load prebuilt mechanism file
-ros : enable the Rosenbrock4 integrator (ROS4)
-rk : enable the RungeKuttaFehlberg integrator (exclusive with -ros)
-tstop <value> : specify the integration time for the ODE systems (1e-6)
-np <num> : number of problems to solve.
-nohost : disable the host calculations. This is only useful when using OpenCL.
-cl_iters <num> : number of iterations over the problem set for the OpenCL driver.

Several OpenCL options are controlled by environmental variables, not the CLI.

VECTOR=<num> : Controls the SIMD vector width. Must be a power-of-2 and >= 16
DEVICE=ACC|CPU : Controls the destination of the OpenCL kernels: either the available accelerator (e.g., GPU or the host CPU)
BLOCKSIZE=<num> : Control the block-size (in CUDA terms); i.e., the # of threads per workgroup.
NUMBLOCKS=<num> : Control the number of thread-blocks
NP=<num> : Controls the RHS test case size for OpenCL. The CLI option -np overrides.

For example, to run a set of 1600 premixed problems with the GRI Mech (v3) mechanism with the ROS4 integrator
and an integration time of 1e-6 on OpenCL with a SIMD width of 8 on one host CPU core and skip the baseline OpenMP host
reference, execute this:


VECTOR=8 DEVICE=CPU BLOCKSIZE=1 NP=0 NUMBLOCKS=1 ./test_ck.exe -read inputs/gri_premix_profile.bin -ck inputs/grimech.bin -ros -tstop 1e-6 -np 1600 -nohost
143 changes: 143 additions & 0 deletions opencl/Vector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#ifndef __Vector_h
#define __Vector_h

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>

#ifdef __ALIGNMENT
#define __default_alignment __ALIGNMENT
#endif

#ifndef __default_alignment
#if defined(__MIC__) || defined(_OPENMP)
#warning 'defining alignment = 64 for MIC || OpenMP'
#define __default_alignment (64)
#endif
#endif

#ifndef __default_alignment
//#warning 'defining default alignment = sizeof(double) for HOST'
//#define __default_alignment (sizeof(double))
#warning 'defining default alignment = 16'
#define __default_alignment (16)
#endif

template <typename ValueType, int _Alignment = __default_alignment>
struct VectorType
{
enum { alignment = _Alignment };

typedef ValueType value_type;

bool is_ref;
value_type *ptr;
int len;

VectorType (void) : len(0), ptr(NULL), is_ref(false) {}
explicit VectorType (const int len) : len(len), ptr(NULL), is_ref(false)
{
//ptr = new value_type [len];
//if (ptr == NULL) {
// fprintf(stderr,"Allocation error %s %d\n", __FILE__, __LINE__);
// exit(1);
//}
this->resize(this->len);
}
explicit VectorType (int len, value_type *ptr) : len(len), ptr(ptr), is_ref(true) {}
explicit VectorType (value_type *ptr) : len(INT_MAX), ptr(ptr), is_ref(true) {}
explicit VectorType (const VectorType& x) : len(x.len), ptr(x.ptr), is_ref(true) {}

~VectorType()
{
//if (this->is_ref == false and this->ptr)
// delete [] this->ptr;
//if (this->is_ref == false and this->ptr)
// resize(0);
if (not(this->is_ref))
this->clear();
}

void clear (void)
{
assert (not(this->is_ref));

if (this->is_ref == false and this->ptr)
if (alignment)
free(this->ptr);
else
delete [] this->ptr;

this->ptr = NULL;
this->len = 0;
}

void resize (const int n)
{
assert (not(this->is_ref));
//printf("inside VectorType::resize %d %d %x\n", n, len, ptr);

if (n == 0)
{
//if (this->ptr)
// delete [] this->ptr;
//this->ptr = NULL;
this->clear();
}
else
{
value_type *p = NULL;

if (alignment)
{
int ierr = posix_memalign((void**)&p, (size_t)alignment, sizeof(value_type)*n);
if (ierr) {
fprintf(stderr,"Aligned allocation error %s %d\n", __FILE__, __LINE__);
exit(1);
}
}
else
{
p = new value_type [n];
if (p == NULL) {
fprintf(stderr,"Allocation error %s %d\n", __FILE__, __LINE__);
exit(1);
}
}

if (this->len and this->ptr)
{
size_t ncopy = std::min(n,this->len);
std::copy(this->ptr, this->ptr + ncopy, p);
//delete [] this->ptr;
this->clear();
}

this->ptr = p;
}

this->len = n;

//printf("leaving VectorType::resize %d %d %x\n", n, len, ptr);
}

//const value_type & operator[] (const int i) const { return this->ptr[i]; }
// value_type & operator[] (const int i) { return this->ptr[i]; }
inline const value_type & operator[] (const int i) const { return *(this->ptr + i); }
inline value_type & operator[] (const int i) { return *(this->ptr + i); }

void operator += (const size_t offset) { this->ptr += offset; }
void operator -= (const size_t offset) { this->ptr -= offset; }

inline value_type* getPointer (void) { return this->ptr; }
inline const value_type* getPointer (void) const { return this->ptr; }

inline value_type* getPointer (const int i) { return this->ptr + i; }
inline const value_type* getPointer (const int i) const { return this->ptr + i; }

int size(void) const { return (len); }
//value_type* begin(void) { return (ptr); }
//value_type* end(void) { return (ptr + len); }
};

#endif
Loading