Compiling NAMD for CUDA

From: Thomas Albers (talbers_at_binghamton.edu)
Date: Sat Jun 11 2011 - 16:23:01 CDT

Hello!

I am trying to compile NAMD with CUDA support, so far unsuccessfully.

The system is a Gentoo Linux with recent kernel, gcc 4.4.5, NVIDIA
toolkit 2.3:

ta_at_porsche ~ $ uname -a
Linux porsche 2.6.37-gentoo-r4 #10 SMP Wed May 25 11:10:17 EDT 2011
x86_64 AMD Phenom(tm) 9850 Quad-Core Processor AuthenticAMD GNU/Linux
ta_at_porsche ~ $ gcc --version
gcc (Gentoo 4.4.5 p1.2, pie-0.4.5) 4.4.5
ta_at_porsche ~ $ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2009 NVIDIA Corporation
Built on Tue_Jul_21_09:08:51_PDT_2009
Cuda compilation tools, release 2.3, V0.2.1221

As seen below, the make command fails. If anything is missing here, I'm
just not seeing it.

Thomas

ta_at_porsche /home/ta/NAMD_2.8_Source $ ./config Linux-x86_64-g++
--charm-arch mpi-linux-x86_64 --with-cuda
ta_at_porsche /home/ta/NAMD_2.8_Source $ cd Linux-x86_64-g++
ta_at_porsche /home/ta/NAMD_2.8_Source/Linux-x86_64-g++ $ make
/opt/cuda/bin/nvcc -O3 --maxrregcount 32 -arch sm_11 -Xcompiler "-m64"
-DNAMD_CU DA -I. -I/opt/cuda/include
-ptx "`echo src/`ComputeNonbondedCUDAKernel.cu"
src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
"max_atoms_per_patch" was set but never
used

grep global ComputeNonbondedCUDAKernel.ptx
        ld.global.u32 %r5, [%rd4+0];
        ld.global.v4.f32 {%f27,%f28,%f29,%f30}, [%rd24+0];
        ld.global.v2.u32 {%r17,%r18}, [%rd26+0];
        ld.global.u32 %r26, [%rd30+0];
        ld.global.u32 %r44, [%rd38+0];
        ld.global.s32 %r45, [%rd41+0];
        ld.global.s32 %r79, [%rd54+0];
        ld.global.s32 %r104, [%rd64+0];
        ld.global.s32 %r130, [%rd74+0];
        ld.global.s32 %r155, [%rd84+0];
        st.global.v4.f32 [%rd90+0], {%f47,%f46,%f45,%f44};
        st.global.v4.f32 [%rd92+0], {%f43,%f42,%f41,%f188};
        st.global.u32 [%rd103+0], %r191;
        st.global.f32 [%rd111+0], %f255;
        st.global.f32 [%rd117+0], %f274;
        atom.global.inc.u32 %rv1, [%rd121], %r204;
        ld.global.u32 %r215, [%rd126+0];
        ld.global.v4.f32 {%f289,%f290,%f291,%f292}, [%rd143+0];
        st.global.v4.f32 [%rd146+0], {%f288,%f287,%f286,%f285};
        ld.global.v4.f32 {%f293,%f294,%f295,_}, [%rd149+0];
        ld.global.f32 %f368, [%rd168+0];
        st.global.f32 [%rd181+0], %f381;
        ld.global.u32 %r277, [%rd185+0];
        ld.global.v4.f32 {%f383,%f384,%f385,%f386}, [%rd198+0];
        st.global.v4.f32 [%rd201+0], {%f288,%f287,%f286,%f285};
        ld.global.v4.f32 {%f387,%f388,%f389,_}, [%rd204+0];
        ld.global.f32 %f462, [%rd221+0];
        st.global.f32 [%rd234+0], %f474;
/opt/cuda/bin/nvcc -O3 --maxrregcount 32 -arch sm_11 -Xcompiler "-m64"
-DNAMD_CU DA -I. -I/opt/cuda/include
-Xptxas -v -o "`echo obj/`ComputeNonbondedCUDAKernel
              .o" -c "`echo src/`ComputeNonbondedCUDAKernel.cu"
src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
"max_atoms_per_patch" was set but never
used

ptxas info : Compiling entry function
'_Z13dev_nonbondedPK10patch_pairPK4atom
PK10atom_paramP6float4S9_PjPfSB_PKjSA_PK10force_listS9_SB_S9_SB_i6float3SH_SH_ff
                                i'
ptxas info : Used 32 registers, 4+0 bytes lmem, 3744+16 bytes smem,
8192 byte s cmem[0], 76 bytes cmem[1]
src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
"max_atoms_per_patch" was set but never
used

/usr/include/string.h:44: error: inline function 'void* memcpy(void*,
const void *, size_t)' cannot be
declared weak
/usr/include/string.h:65: error: inline function 'void* memset(void*,
int, size_ t)' cannot be declared weak
/usr/include/bits/string3.h:49: error: inline function 'void*
memcpy(void*, cons t void*, size_t)'
cannot be declared weak
/usr/include/bits/string3.h:77: error: inline function 'void*
memset(void*, int, size_t)' cannot be
declared weak
/opt/cuda/bin/../include/common_functions.h:59: error: inline function
'void* me mset(void*, int, size_t)'
cannot be declared weak
/opt/cuda/bin/../include/common_functions.h:62: error: inline function
'void* me mcpy(void*, const void*,
size_t)' cannot be declared weak
/opt/cuda/bin/../include/math_functions.h:412: error: inline function
'int __sig nbit(double)' cannot be
declared weak
/opt/cuda/bin/../include/math_functions.h:417: error: inline function
'int __sig nbitf(float)' cannot be
declared weak
/usr/include/bits/mathcalls.h:350: error: inline function 'int
__signbit(double) ' cannot be declared weak
/usr/include/bits/mathcalls.h:350: error: inline function 'int
__signbitf(float) ' cannot be declared weak
/usr/include/bits/mathcalls.h:350: error: inline function 'int
__signbitl(long d ouble)' cannot be
declared weak
/usr/include/bits/mathinline.h:38: error: inline function 'int
__signbitf(float) ' cannot be declared weak
/usr/include/bits/mathinline.h:50: error: inline function 'int
__signbit(double) ' cannot be declared weak
/usr/include/bits/mathinline.h:62: error: inline function 'int
__signbitl(long d ouble)' cannot be
declared weak
/opt/cuda/bin/../include/math_functions.h:442: error: inline function
'int __sig nbitl(long double)' cannot be
declared weak
make: *** [obj/ComputeNonbondedCUDAKernel.o] Error 255
porsche Linux-x86_64-g++ # make
/opt/cuda/bin/nvcc -O3 --maxrregcount 32 -arch sm_11 -Xcompiler "-m64"
-DNAMD_CUDA -I. -I/opt/cuda/include -ptx "`echo
src/`ComputeNonbondedCUDAKernel.cu"
src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
"max_atoms_per_patch" was set but never used

grep global ComputeNonbondedCUDAKernel.ptx
        ld.global.u32 %r5, [%rd4+0];
        ld.global.v4.f32 {%f27,%f28,%f29,%f30}, [%rd24+0];
        ld.global.v2.u32 {%r17,%r18}, [%rd26+0];
        ld.global.u32 %r26, [%rd30+0];
        ld.global.u32 %r44, [%rd38+0];
        ld.global.s32 %r45, [%rd41+0];
        ld.global.s32 %r79, [%rd54+0];
        ld.global.s32 %r104, [%rd64+0];
        ld.global.s32 %r130, [%rd74+0];
        ld.global.s32 %r155, [%rd84+0];
        st.global.v4.f32 [%rd90+0], {%f47,%f46,%f45,%f44};
        st.global.v4.f32 [%rd92+0], {%f43,%f42,%f41,%f188};
        st.global.u32 [%rd103+0], %r191;
        st.global.f32 [%rd111+0], %f255;
        st.global.f32 [%rd117+0], %f274;
        atom.global.inc.u32 %rv1, [%rd121], %r204;
        ld.global.u32 %r215, [%rd126+0];
        ld.global.v4.f32 {%f289,%f290,%f291,%f292}, [%rd143+0];
        st.global.v4.f32 [%rd146+0], {%f288,%f287,%f286,%f285};
        ld.global.v4.f32 {%f293,%f294,%f295,_}, [%rd149+0];
        ld.global.f32 %f368, [%rd168+0];
        st.global.f32 [%rd181+0], %f381;
        ld.global.u32 %r277, [%rd185+0];
        ld.global.v4.f32 {%f383,%f384,%f385,%f386}, [%rd198+0];
        st.global.v4.f32 [%rd201+0], {%f288,%f287,%f286,%f285};
        ld.global.v4.f32 {%f387,%f388,%f389,_}, [%rd204+0];
        ld.global.f32 %f462, [%rd221+0];
        st.global.f32 [%rd234+0], %f474;
/opt/cuda/bin/nvcc -O3 --maxrregcount 32 -arch sm_11 -Xcompiler "-m64"
-DNAMD_CUDA -I. -I/opt/cuda/include -Xptxas -v -o "`echo
obj/`ComputeNonbondedCUDAKernel.o" -c "`echo
src/`ComputeNonbondedCUDAKernel.cu"
src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
"max_atoms_per_patch" was set but never used

ptxas info : Compiling entry function
'_Z13dev_nonbondedPK10patch_pairPK4atomPK10atom_paramP6float4S9_PjPfSB_PKjSA_PK10force_listS9_SB_S9_SB_i6float3SH_SH_ffi'
ptxas info : Used 32 registers, 4+0 bytes lmem, 3744+16 bytes smem,
8192 bytes cmem[0], 76 bytes cmem[1]
src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
"max_atoms_per_patch" was set but never used

/usr/include/string.h:44: error: inline function 'void* memcpy(void*,
const void*, size_t)' cannot be declared weak
/usr/include/string.h:65: error: inline function 'void* memset(void*,
int, size_t)' cannot be declared weak
/usr/include/bits/string3.h:49: error: inline function 'void*
memcpy(void*, const void*, size_t)' cannot be declared weak
/usr/include/bits/string3.h:77: error: inline function 'void*
memset(void*, int, size_t)' cannot be declared weak
/opt/cuda/bin/../include/common_functions.h:59: error: inline function
'void* memset(void*, int, size_t)' cannot be declared weak
/opt/cuda/bin/../include/common_functions.h:62: error: inline function
'void* memcpy(void*, const void*, size_t)' cannot be declared weak
/opt/cuda/bin/../include/math_functions.h:412: error: inline function
'int __signbit(double)' cannot be declared weak
/opt/cuda/bin/../include/math_functions.h:417: error: inline function
'int __signbitf(float)' cannot be declared weak
/usr/include/bits/mathcalls.h:350: error: inline function 'int
__signbit(double)' cannot be declared weak
/usr/include/bits/mathcalls.h:350: error: inline function 'int
__signbitf(float)' cannot be declared weak
/usr/include/bits/mathcalls.h:350: error: inline function 'int
__signbitl(long double)' cannot be declared weak
/usr/include/bits/mathinline.h:38: error: inline function 'int
__signbitf(float)' cannot be declared weak
/usr/include/bits/mathinline.h:50: error: inline function 'int
__signbit(double)' cannot be declared weak
/usr/include/bits/mathinline.h:62: error: inline function 'int
__signbitl(long double)' cannot be declared weak
/opt/cuda/bin/../include/math_functions.h:442: error: inline function
'int __signbitl(long double)' cannot be declared weak
make: *** [obj/ComputeNonbondedCUDAKernel.o] Error 255

This archive was generated by hypermail 2.1.6 : Mon Dec 31 2012 - 23:20:25 CST