CULA compile problem with gpu function

Hello all:
I find that CULA is a very powerful tool for solving matrix problem. And I try to implement a LU solver calculated by GPU on MAC platform. I try to combine GPU matrix transpose function(which is a GPU kernel function) with CULA culaDeviceSgetrs. But it will response error while compiling.
Below are my makefile:
--------------------------------------
CC= nvcc
CFLAGS=-DNDEBUG -O3
INCLUDES=-I${CULA_INC_PATH} -I${CUDA_BIN_PATH}
LIBPATH32=-L${CULA_LIB_PATH_32}
LIBPATH64=-L${CULA_LIB_PATH_64}
LIBS= -lcula -lcublas -lcudart
usage:
@echo "To build this example, type one of:"
@echo ""
@echo " make build32"
@echo " make build64"
@echo ""
@echo "where '32' and '64' represent the platform you wish to build for"
@echo ""
@echo "Note: this example requires the CUDA toolkit to compile"
build32:
${CC} -m32 -v -o getrf getrf.cu $(CFLAGS) $(INCLUDES) $(LIBPATH32) $ (LIBS)
build64:
sh ../checkenvironment.sh
${CC} -m64 -o geqrf_device geqrf_device.c $(CFLAGS) $(INCLUDES) $(LI BPATH64) $(LIBS)
clean:
rm -f getrf
-----------------------------------
Here are the error message and compile log:
nvcc -m32 -v -o getrf getrf.cu -DNDEBUG -O3 -I/usr/local/cula/include -I/usr/local/cuda/bin: -L/usr/local/cula/lib -lcula -lcublas -lcudart
#$ _SPACE_=
#$ _CUDART_=cudart
#$ _HERE_=/usr/local/cuda/bin
#$ _THERE_=/usr/local/cuda/bin
#$ _TARGET_SIZE_=
#$ TOP=/usr/local/cuda/bin/..
#$ PATH=/usr/local/cuda/bin/../open64/bin:/usr/local/cuda/bin:/usr/local/cuda/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/usr/X11/bin
#$ INCLUDES="-I/usr/local/cuda/bin/../include"
#$ LIBRARIES= "-L/usr/local/cuda/bin/../lib" -lcudart
#$ CUDAFE_FLAGS=
#$ OPENCC_FLAGS=
#$ PTXAS_FLAGS=
#$ gcc -D__CUDA_ARCH__=100 -E -x c++ -DCUDA_FLOAT_MATH_FUNCTIONS -DCUDA_NO_SM_12_ATOMIC_INTRINSICS -DCUDA_NO_SM_13_DOUBLE_INTRINSICS -DCUDA_NO_SM_11_ATOMIC_INTRINSICS "-I/usr/local/cuda/bin/../include" -I. -D__CUDACC__ -C -O3 -I"/usr/local/cula/include" -I"/usr/local/cuda/bin:" -D"NDEBUG" -include "cuda_runtime.h" -m32 -malign-double -o "/tmp/tmpxft_00002f97_00000000-4_getrf.cpp1.ii" "getrf.cu"
#$ cudafe --m32 --gnu_version=40201 -tused --no_remove_unneeded_entities --gen_c_file_name "/tmp/tmpxft_00002f97_00000000-1_getrf.cudafe1.c" --stub_file_name "/tmp/tmpxft_00002f97_00000000-1_getrf.cudafe1.stub.c" --gen_device_file_name "/tmp/tmpxft_00002f97_00000000-1_getrf.cudafe1.gpu" --include_file_name "/tmp/tmpxft_00002f97_00000000-3_getrf.fatbin.c" "/tmp/tmpxft_00002f97_00000000-4_getrf.cpp1.ii"
getrf.cu(248): error: identifier "culaDeviceSgetrf" is undefined
getrf.cu(198): warning: variable "t_A" was declared but never referenced
1 error detected in the compilation of "/tmp/tmpxft_00002f97_00000000-4_getrf.cpp1.ii".
# --error 0x2 --
make: *** [build32] Error 2
Please help! Thank you very much!
I find that CULA is a very powerful tool for solving matrix problem. And I try to implement a LU solver calculated by GPU on MAC platform. I try to combine GPU matrix transpose function(which is a GPU kernel function) with CULA culaDeviceSgetrs. But it will response error while compiling.
Below are my makefile:
--------------------------------------
CC= nvcc
CFLAGS=-DNDEBUG -O3
INCLUDES=-I${CULA_INC_PATH} -I${CUDA_BIN_PATH}
LIBPATH32=-L${CULA_LIB_PATH_32}
LIBPATH64=-L${CULA_LIB_PATH_64}
LIBS= -lcula -lcublas -lcudart
usage:
@echo "To build this example, type one of:"
@echo ""
@echo " make build32"
@echo " make build64"
@echo ""
@echo "where '32' and '64' represent the platform you wish to build for"
@echo ""
@echo "Note: this example requires the CUDA toolkit to compile"
build32:
${CC} -m32 -v -o getrf getrf.cu $(CFLAGS) $(INCLUDES) $(LIBPATH32) $ (LIBS)
build64:
sh ../checkenvironment.sh
${CC} -m64 -o geqrf_device geqrf_device.c $(CFLAGS) $(INCLUDES) $(LI BPATH64) $(LIBS)
clean:
rm -f getrf
-----------------------------------
Here are the error message and compile log:
nvcc -m32 -v -o getrf getrf.cu -DNDEBUG -O3 -I/usr/local/cula/include -I/usr/local/cuda/bin: -L/usr/local/cula/lib -lcula -lcublas -lcudart
#$ _SPACE_=
#$ _CUDART_=cudart
#$ _HERE_=/usr/local/cuda/bin
#$ _THERE_=/usr/local/cuda/bin
#$ _TARGET_SIZE_=
#$ TOP=/usr/local/cuda/bin/..
#$ PATH=/usr/local/cuda/bin/../open64/bin:/usr/local/cuda/bin:/usr/local/cuda/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/usr/X11/bin
#$ INCLUDES="-I/usr/local/cuda/bin/../include"
#$ LIBRARIES= "-L/usr/local/cuda/bin/../lib" -lcudart
#$ CUDAFE_FLAGS=
#$ OPENCC_FLAGS=
#$ PTXAS_FLAGS=
#$ gcc -D__CUDA_ARCH__=100 -E -x c++ -DCUDA_FLOAT_MATH_FUNCTIONS -DCUDA_NO_SM_12_ATOMIC_INTRINSICS -DCUDA_NO_SM_13_DOUBLE_INTRINSICS -DCUDA_NO_SM_11_ATOMIC_INTRINSICS "-I/usr/local/cuda/bin/../include" -I. -D__CUDACC__ -C -O3 -I"/usr/local/cula/include" -I"/usr/local/cuda/bin:" -D"NDEBUG" -include "cuda_runtime.h" -m32 -malign-double -o "/tmp/tmpxft_00002f97_00000000-4_getrf.cpp1.ii" "getrf.cu"
#$ cudafe --m32 --gnu_version=40201 -tused --no_remove_unneeded_entities --gen_c_file_name "/tmp/tmpxft_00002f97_00000000-1_getrf.cudafe1.c" --stub_file_name "/tmp/tmpxft_00002f97_00000000-1_getrf.cudafe1.stub.c" --gen_device_file_name "/tmp/tmpxft_00002f97_00000000-1_getrf.cudafe1.gpu" --include_file_name "/tmp/tmpxft_00002f97_00000000-3_getrf.fatbin.c" "/tmp/tmpxft_00002f97_00000000-4_getrf.cpp1.ii"
getrf.cu(248): error: identifier "culaDeviceSgetrf" is undefined
getrf.cu(198): warning: variable "t_A" was declared but never referenced
1 error detected in the compilation of "/tmp/tmpxft_00002f97_00000000-4_getrf.cpp1.ii".
# --error 0x2 --
make: *** [build32] Error 2
Please help! Thank you very much!