C1Matrix Struct Reference

#include <MsmMap.h>

List of all members.

Public Member Functions

 C1Matrix (Float r=0)
void set (Float r)

Public Attributes

Float melem [C1_MATRIX_SIZE]

Friends

C1Vector operator * (const C1Matrix &m, const C1Vector &u)


Detailed Description

Definition at line 110 of file MsmMap.h.


Constructor & Destructor Documentation

C1Matrix::C1Matrix ( Float  r = 0  )  [inline]

Definition at line 112 of file MsmMap.h.

References set().

00112 { set(r); }


Member Function Documentation

void C1Matrix::set ( Float  r  )  [inline]

Definition at line 113 of file MsmMap.h.

References C1_MATRIX_SIZE, and melem.

Referenced by C1Matrix().

00113                       {
00114       for (int n=0;  n < C1_MATRIX_SIZE;  n++)  melem[n] = 0;
00115     }


Friends And Related Function Documentation

C1Vector operator * ( const C1Matrix m,
const C1Vector u 
) [friend]

Definition at line 116 of file MsmMap.h.

00116                                                                     {
00117       C1Vector v;
00118 
00119       // XXX not tested yet
00120 #if 1 && (defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE))
00121       // Hand-coded SSE2 vectorization
00122       // This loop requires that the single-precision input arrays be 
00123       // aligned on 16-byte boundaries, such that array[index % 4 == 0] 
00124       // can be safely accessed with aligned load/store operations
00125       for (int k=0, j=0;  j < C1_VECTOR_SIZE;  j++) {
00126         __m128 melem4 = _mm_load_ps(&m.melem[k]);
00127         __m128 uelem4 = _mm_load_ps(&u.velem[0]);
00128         __m128 tmp4 = _mm_mul_ps(melem4, uelem4); 
00129         melem4 = _mm_load_ps(&m.melem[k+4]);
00130         uelem4 = _mm_load_ps(&u.velem[4]);
00131         tmp4 = _mm_add_ps(tmp4, _mm_mul_ps(melem4, uelem4)); 
00132 
00133         // do a 4-element reduction and accumulate result
00134         __m128 sum4 = tmp4;
00135         sum4 = _mm_shuffle_ps(sum4, sum4, _MM_SHUFFLE(2, 3, 0, 1));
00136         sum4 = _mm_add_ps(sum4, tmp4);
00137         tmp4 = sum4;
00138         sum4 = _mm_shuffle_ps(sum4, sum4, _MM_SHUFFLE(1, 0, 3, 2));
00139         sum4 = _mm_add_ps(sum4, tmp4);
00140 
00141         // all 4 elements are now set to the sum
00142         float sum;
00143         _mm_store_ss(&sum, sum4); // store lowest element
00144         v.velem[j] += sum;
00145         k+=8;
00146       }
00147 #elif 0 && (defined(__AVX__) && ! defined(NAMD_DISABLE_SSE))
00148       // Hand-coded AVX vectorization
00149       // This loop requires that the single-precision input arrays be 
00150       // aligned on 32-byte boundaries, such that array[index % 8 == 0] 
00151       // can be safely accessed with aligned load/store operations
00152       for (int k=0, j=0;  j < C1_VECTOR_SIZE;  j++) {
00153         __m256 melem8 = _mm256_load_ps(&m.melem[k]);
00154         __m256 uelem8 = _mm256_load_ps(&u.velem[0]);
00155         __m256 tmp8 = _mm256_mul_ps(melem8, uelem8); 
00156 
00157         // XXX this still needs to be rewritten a bit for AVX
00158         // do an 8-element reduction and accumulate result
00159         __m256 sum8 = tmp8;
00160         sum8 = _mm256_hadd_ps(sum8, sum8);
00161         sum8 = _mm256_hadd_ps(sum8, sum8);
00162         tmp8 = sum8;
00163         tmp8 = _mm256_permute2f128_ps(tmp8, tmp8, 1);
00164         sum8 = _mm256_hadd_ps(tmp8, sum8);
00165 
00166         // all 8 elements are now set to the sum
00167         float sum;
00168         _mm_store_ss(&sum, sum8); // store lowest element
00169         v.velem[j] += sum;
00170         k+=8;
00171       }
00172 #else
00173 #if defined(__INTEL_COMPILER)
00174 #pragma vector always
00175 #endif
00176       for (int k=0, j=0;  j < C1_VECTOR_SIZE;  j++) {
00177         for (int i = 0;  i < C1_VECTOR_SIZE;  i++, k++) {
00178           v.velem[j] += m.melem[k] * u.velem[i];
00179         }
00180       }
00181 #endif
00182       return v;
00183     }


Member Data Documentation

Float C1Matrix::melem[C1_MATRIX_SIZE]

Definition at line 111 of file MsmMap.h.

Referenced by ComputeMsmMgr::gc_c1hermite_elem_accum(), and set().


The documentation for this struct was generated from the following file:
Generated on Tue Nov 21 01:17:17 2017 for NAMD by  doxygen 1.4.7