Re: Compiling NAMD for CUDA

From: Axel Kohlmeyer (akohlmey_at_gmail.com)
Date: Sat Jun 11 2011 - 16:43:28 CDT

thomas,

cuda 2.x is not compatible with gcc-4.x
you have to install gcc-3.x and trick nvcc
into using that instead of gcc-4.x.

on my fedora box i had the following set up.

[akohlmey_at_fermi ~]$ ll -l !$
ll -l /opt/gcc-34/bin/
total 0
lrwxrwxrwx 1 root root 22 2010-03-26 17:48 g++ -> ../../../usr/bin/g++34
lrwxrwxrwx 1 root root 22 2010-03-26 17:48 gcc -> ../../../usr/bin/gcc34

and then used:

nvcc -ccbin /opt/gcc-34/bin

or temporarily reset the path to include /opt/gcc-34/bin first.

cheers,
    axel.
On Sat, Jun 11, 2011 at 5:23 PM, Thomas Albers <talbers_at_binghamton.edu> wrote:
> Hello!
>
> I am trying to compile NAMD with CUDA support, so far unsuccessfully.
>
> The system is a Gentoo Linux with recent kernel, gcc 4.4.5, NVIDIA
> toolkit 2.3:
>
> ta_at_porsche ~ $ uname -a
> Linux porsche 2.6.37-gentoo-r4 #10 SMP Wed May 25 11:10:17 EDT 2011
> x86_64 AMD Phenom(tm) 9850 Quad-Core Processor AuthenticAMD GNU/Linux
> ta_at_porsche ~ $ gcc --version
> gcc (Gentoo 4.4.5 p1.2, pie-0.4.5) 4.4.5
> ta_at_porsche ~ $ nvcc --version
> nvcc: NVIDIA (R) Cuda compiler driver
> Copyright (c) 2005-2009 NVIDIA Corporation
> Built on Tue_Jul_21_09:08:51_PDT_2009
> Cuda compilation tools, release 2.3, V0.2.1221
>
> As seen below, the make command fails. If anything is missing here, I'm
> just not seeing it.
>
> Thomas
>
> ta_at_porsche /home/ta/NAMD_2.8_Source $ ./config Linux-x86_64-g++
> --charm-arch mpi-linux-x86_64 --with-cuda
> ta_at_porsche /home/ta/NAMD_2.8_Source $ cd Linux-x86_64-g++
> ta_at_porsche /home/ta/NAMD_2.8_Source/Linux-x86_64-g++ $ make
> /opt/cuda/bin/nvcc -O3 --maxrregcount 32 -arch sm_11 -Xcompiler "-m64"
> -DNAMD_CU                                 DA -I. -I/opt/cuda/include
> -ptx "`echo src/`ComputeNonbondedCUDAKernel.cu"
> src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
> "max_atoms_per_patch"                                  was set but never
> used
>
> grep global ComputeNonbondedCUDAKernel.ptx
>        ld.global.u32   %r5, [%rd4+0];
>        ld.global.v4.f32        {%f27,%f28,%f29,%f30}, [%rd24+0];
>        ld.global.v2.u32        {%r17,%r18}, [%rd26+0];
>        ld.global.u32   %r26, [%rd30+0];
>        ld.global.u32   %r44, [%rd38+0];
>        ld.global.s32   %r45, [%rd41+0];
>        ld.global.s32   %r79, [%rd54+0];
>        ld.global.s32   %r104, [%rd64+0];
>        ld.global.s32   %r130, [%rd74+0];
>        ld.global.s32   %r155, [%rd84+0];
>        st.global.v4.f32        [%rd90+0], {%f47,%f46,%f45,%f44};
>        st.global.v4.f32        [%rd92+0], {%f43,%f42,%f41,%f188};
>        st.global.u32   [%rd103+0], %r191;
>        st.global.f32   [%rd111+0], %f255;
>        st.global.f32   [%rd117+0], %f274;
>        atom.global.inc.u32     %rv1, [%rd121], %r204;
>        ld.global.u32   %r215, [%rd126+0];
>        ld.global.v4.f32        {%f289,%f290,%f291,%f292}, [%rd143+0];
>        st.global.v4.f32        [%rd146+0], {%f288,%f287,%f286,%f285};
>        ld.global.v4.f32        {%f293,%f294,%f295,_}, [%rd149+0];
>        ld.global.f32   %f368, [%rd168+0];
>        st.global.f32   [%rd181+0], %f381;
>        ld.global.u32   %r277, [%rd185+0];
>        ld.global.v4.f32        {%f383,%f384,%f385,%f386}, [%rd198+0];
>        st.global.v4.f32        [%rd201+0], {%f288,%f287,%f286,%f285};
>        ld.global.v4.f32        {%f387,%f388,%f389,_}, [%rd204+0];
>        ld.global.f32   %f462, [%rd221+0];
>        st.global.f32   [%rd234+0], %f474;
> /opt/cuda/bin/nvcc -O3 --maxrregcount 32 -arch sm_11 -Xcompiler "-m64"
> -DNAMD_CU                                 DA -I. -I/opt/cuda/include
> -Xptxas -v -o  "`echo obj/`ComputeNonbondedCUDAKernel
>              .o" -c "`echo src/`ComputeNonbondedCUDAKernel.cu"
> src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
> "max_atoms_per_patch"                                  was set but never
> used
>
> ptxas info    : Compiling entry function
> '_Z13dev_nonbondedPK10patch_pairPK4atom
> PK10atom_paramP6float4S9_PjPfSB_PKjSA_PK10force_listS9_SB_S9_SB_i6float3SH_SH_ff
>                                i'
> ptxas info    : Used 32 registers, 4+0 bytes lmem, 3744+16 bytes smem,
> 8192 byte                                 s cmem[0], 76 bytes cmem[1]
> src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
> "max_atoms_per_patch"                                  was set but never
> used
>
> /usr/include/string.h:44: error: inline function 'void* memcpy(void*,
> const void                                 *, size_t)' cannot be
> declared weak
> /usr/include/string.h:65: error: inline function 'void* memset(void*,
> int, size_                                 t)' cannot be declared weak
> /usr/include/bits/string3.h:49: error: inline function 'void*
> memcpy(void*, cons                                 t void*, size_t)'
> cannot be declared weak
> /usr/include/bits/string3.h:77: error: inline function 'void*
> memset(void*, int,                                  size_t)' cannot be
> declared weak
> /opt/cuda/bin/../include/common_functions.h:59: error: inline function
> 'void* me                                 mset(void*, int, size_t)'
> cannot be declared weak
> /opt/cuda/bin/../include/common_functions.h:62: error: inline function
> 'void* me                                 mcpy(void*, const void*,
> size_t)' cannot be declared weak
> /opt/cuda/bin/../include/math_functions.h:412: error: inline function
> 'int __sig                                 nbit(double)' cannot be
> declared weak
> /opt/cuda/bin/../include/math_functions.h:417: error: inline function
> 'int __sig                                 nbitf(float)' cannot be
> declared weak
> /usr/include/bits/mathcalls.h:350: error: inline function 'int
> __signbit(double)                                 ' cannot be declared weak
> /usr/include/bits/mathcalls.h:350: error: inline function 'int
> __signbitf(float)                                 ' cannot be declared weak
> /usr/include/bits/mathcalls.h:350: error: inline function 'int
> __signbitl(long d                                 ouble)' cannot be
> declared weak
> /usr/include/bits/mathinline.h:38: error: inline function 'int
> __signbitf(float)                                 ' cannot be declared weak
> /usr/include/bits/mathinline.h:50: error: inline function 'int
> __signbit(double)                                 ' cannot be declared weak
> /usr/include/bits/mathinline.h:62: error: inline function 'int
> __signbitl(long d                                 ouble)' cannot be
> declared weak
> /opt/cuda/bin/../include/math_functions.h:442: error: inline function
> 'int __sig                                 nbitl(long double)' cannot be
> declared weak
> make: *** [obj/ComputeNonbondedCUDAKernel.o] Error 255
> porsche Linux-x86_64-g++ # make
> /opt/cuda/bin/nvcc -O3 --maxrregcount 32 -arch sm_11 -Xcompiler "-m64"
> -DNAMD_CUDA -I. -I/opt/cuda/include -ptx "`echo
> src/`ComputeNonbondedCUDAKernel.cu"
> src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
> "max_atoms_per_patch" was set but never used
>
> grep global ComputeNonbondedCUDAKernel.ptx
>        ld.global.u32   %r5, [%rd4+0];
>        ld.global.v4.f32        {%f27,%f28,%f29,%f30}, [%rd24+0];
>        ld.global.v2.u32        {%r17,%r18}, [%rd26+0];
>        ld.global.u32   %r26, [%rd30+0];
>        ld.global.u32   %r44, [%rd38+0];
>        ld.global.s32   %r45, [%rd41+0];
>        ld.global.s32   %r79, [%rd54+0];
>        ld.global.s32   %r104, [%rd64+0];
>        ld.global.s32   %r130, [%rd74+0];
>        ld.global.s32   %r155, [%rd84+0];
>        st.global.v4.f32        [%rd90+0], {%f47,%f46,%f45,%f44};
>        st.global.v4.f32        [%rd92+0], {%f43,%f42,%f41,%f188};
>        st.global.u32   [%rd103+0], %r191;
>        st.global.f32   [%rd111+0], %f255;
>        st.global.f32   [%rd117+0], %f274;
>        atom.global.inc.u32     %rv1, [%rd121], %r204;
>        ld.global.u32   %r215, [%rd126+0];
>        ld.global.v4.f32        {%f289,%f290,%f291,%f292}, [%rd143+0];
>        st.global.v4.f32        [%rd146+0], {%f288,%f287,%f286,%f285};
>        ld.global.v4.f32        {%f293,%f294,%f295,_}, [%rd149+0];
>        ld.global.f32   %f368, [%rd168+0];
>        st.global.f32   [%rd181+0], %f381;
>        ld.global.u32   %r277, [%rd185+0];
>        ld.global.v4.f32        {%f383,%f384,%f385,%f386}, [%rd198+0];
>        st.global.v4.f32        [%rd201+0], {%f288,%f287,%f286,%f285};
>        ld.global.v4.f32        {%f387,%f388,%f389,_}, [%rd204+0];
>        ld.global.f32   %f462, [%rd221+0];
>        st.global.f32   [%rd234+0], %f474;
> /opt/cuda/bin/nvcc -O3 --maxrregcount 32 -arch sm_11 -Xcompiler "-m64"
> -DNAMD_CUDA -I. -I/opt/cuda/include -Xptxas -v -o  "`echo
> obj/`ComputeNonbondedCUDAKernel.o" -c "`echo
> src/`ComputeNonbondedCUDAKernel.cu"
> src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
> "max_atoms_per_patch" was set but never used
>
> ptxas info    : Compiling entry function
> '_Z13dev_nonbondedPK10patch_pairPK4atomPK10atom_paramP6float4S9_PjPfSB_PKjSA_PK10force_listS9_SB_S9_SB_i6float3SH_SH_ffi'
> ptxas info    : Used 32 registers, 4+0 bytes lmem, 3744+16 bytes smem,
> 8192 bytes cmem[0], 76 bytes cmem[1]
> src/ComputeNonbondedCUDAKernel.cu(110): warning: variable
> "max_atoms_per_patch" was set but never used
>
> /usr/include/string.h:44: error: inline function 'void* memcpy(void*,
> const void*, size_t)' cannot be declared weak
> /usr/include/string.h:65: error: inline function 'void* memset(void*,
> int, size_t)' cannot be declared weak
> /usr/include/bits/string3.h:49: error: inline function 'void*
> memcpy(void*, const void*, size_t)' cannot be declared weak
> /usr/include/bits/string3.h:77: error: inline function 'void*
> memset(void*, int, size_t)' cannot be declared weak
> /opt/cuda/bin/../include/common_functions.h:59: error: inline function
> 'void* memset(void*, int, size_t)' cannot be declared weak
> /opt/cuda/bin/../include/common_functions.h:62: error: inline function
> 'void* memcpy(void*, const void*, size_t)' cannot be declared weak
> /opt/cuda/bin/../include/math_functions.h:412: error: inline function
> 'int __signbit(double)' cannot be declared weak
> /opt/cuda/bin/../include/math_functions.h:417: error: inline function
> 'int __signbitf(float)' cannot be declared weak
> /usr/include/bits/mathcalls.h:350: error: inline function 'int
> __signbit(double)' cannot be declared weak
> /usr/include/bits/mathcalls.h:350: error: inline function 'int
> __signbitf(float)' cannot be declared weak
> /usr/include/bits/mathcalls.h:350: error: inline function 'int
> __signbitl(long double)' cannot be declared weak
> /usr/include/bits/mathinline.h:38: error: inline function 'int
> __signbitf(float)' cannot be declared weak
> /usr/include/bits/mathinline.h:50: error: inline function 'int
> __signbit(double)' cannot be declared weak
> /usr/include/bits/mathinline.h:62: error: inline function 'int
> __signbitl(long double)' cannot be declared weak
> /opt/cuda/bin/../include/math_functions.h:442: error: inline function
> 'int __signbitl(long double)' cannot be declared weak
> make: *** [obj/ComputeNonbondedCUDAKernel.o] Error 255
>
>

-- 
Dr. Axel Kohlmeyer
akohlmey_at_gmail.com  http://goo.gl/1wk0
Institute for Computational Molecular Science
Temple University, Philadelphia PA, USA.

This archive was generated by hypermail 2.1.6 : Wed Feb 29 2012 - 15:57:16 CST