.PHONY: all install clean

CC          = mpicc
GPUCC       = nvcc
GPUCFLAGS   = -arch=sm_20 -O3 -Wno-deprecated-gpu-targets
CUDA_DIR    = 
OPENCL_DIR  = 
OMNI_DIR    = /usr/local
RANLIB      = ranlib
AR          = ar
ARFLAGS     = cr
ACC_INC_DIR = ../include
CFLAGS      = -I$(ACC_INC_DIR) -O2 -std=gnu99 -Wall -Wpointer-arith -fopenmp
GPUCFLAGS   += -I$(ACC_INC_DIR)
MKDIR_P     = install -m 755 -d
INSTALL     = install
LIBDIR      = /usr/local/lib/
LIBACC      = libacc.a
HEADERS     = $(ACC_INC_DIR)/acc_internal.h $(ACC_INC_DIR)/acc_gpu_internal.h \
	      $(ACC_INC_DIR)/acc_gpu_constant.h $(ACC_INC_DIR)/acc_gpu_data_struct.h
OBJECTS     = acc.o acc_memory_table.o acc_runtime.o acc_util.o acc_data.o acc_queue_map.o
LIBS        = $(LIBACC)
TMP_DIR     = .libs
SOFLAGS     = -shared
PICFLAGS    = -fPIC -DPIC

FALSE           = 1
TRUE            = 0
IS_CUDA		= 1
IS_OPENCL	= 1
IS_TCA          = @TCA@
ENABLE_SHARED   = 1

ifeq ($(IS_CUDA), $(TRUE))
LIBACC_CUDA = libacc_cuda.a
HEADERS_CUDA= 
OBJECTS_CUDA= acc_cuda.o acc_memory_cuda.o acc_queue_cuda.o acc_mpool_cuda.o acc_cuda_util.o acc_cuda_pack.o
CFLAGS      += -I$(CUDA_DIR)/include
LIBS        += $(LIBACC_CUDA) 
endif

ifeq ($(IS_OPENCL), $(TRUE))
LIBACC_CL   = libacc_cl.a
HEADERS_CL  = acc_internal_cl.h
OBJECTS_CL  = acc_cl.o acc_memory_cl.o acc_queue_cl.o acc_kernel_cl.o acc_mpool_cl.o
CFLAGS      += -I$(OPENCL_DIR)/include -DOMNI_INCLUDE_DIR="$(OMNI_DIR)/include"
LIBS        += $(LIBACC_CL)
endif

ifeq ($(ENABLE_SHARED), $(TRUE))
LIBS        := $(LIBS) $(LIBS:.a=.so)
endif

ifeq ($(IS_TCA), $(TRUE))
CFLAGS += -D_XMP_TCA -I@TCA_PREFIX@/include
endif

all: $(LIBS)

$(OBJECTS): $(HEADERS)

$(LIBACC): $(OBJECTS)
	rm -f $@
	$(AR) $(ARFLAGS) $@ $^
	$(RANLIB) $@
$(LIBACC_CUDA): $(OBJECTS_CUDA)
	rm -f $@
	$(AR) $(ARFLAGS) $@ $^
	$(RANLIB) $@
$(LIBACC_CL): $(OBJECTS_CL)
	rm -f $@
	$(AR) $(ARFLAGS) $@ $^
	$(RANLIB) $@

$(LIBACC:.a=.so): $(addprefix $(TMP_DIR)/, $(OBJECTS))
	rm -f $@
	$(CC) $(SOFLAGS) $(PICFLAGS) $^ -o $@
$(LIBACC_CUDA:.a=.so): $(addprefix $(TMP_DIR)/, $(OBJECTS_CUDA))
	rm -f $@
	$(CC) $(SOFLAGS) $(PICFLAGS) $^ -o $@
$(LIBACC_CL:.a=.so): $(addprefix $(TMP_DIR)/, $(OBJECTS_CL))
	rm -f $@
	$(CC) $(SOFLAGS) $(PICFLAGS) $^ -o $@

.SUFFIXES : .o .cu
.cu.o:
	$(GPUCC) $(GPUCFLAGS) -c -o $@ $<

$(TMP_DIR)/%.o : %.c
	@$(MKDIR_P) $(TMP_DIR)
	$(CC) $(CFLAGS) -c $< $(PICFLAGS) -o $@
$(TMP_DIR)/%.o : %.cu
	@$(MKDIR_P) $(TMP_DIR)
	$(GPUCC) $(GPUCFLAGS) -c $< -Xcompiler "$(PICFLAGS)" -o $@

install: $(LIBS)
	$(MKDIR_P) $(DESTDIR)$(LIBDIR)
	$(INSTALL) $^ $(DESTDIR)$(LIBDIR)

clean:
	rm -rf *~ *.core core core.* *.o $(LIBS) $(TMP_DIR)

