NAMD
Public Member Functions | Public Attributes | Friends | List of all members
C1Matrix Struct Reference

#include <MsmMap.h>

Public Member Functions

 C1Matrix (Float r=0)
 
void set (Float r)
 

Public Attributes

Float melem [C1_MATRIX_SIZE]
 

Friends

C1Vector operator* (const C1Matrix &m, const C1Vector &u)
 

Detailed Description

Definition at line 110 of file MsmMap.h.

Constructor & Destructor Documentation

C1Matrix::C1Matrix ( Float  r = 0)
inline

Definition at line 112 of file MsmMap.h.

References set().

112 { set(r); }
void set(Float r)
Definition: MsmMap.h:113

Member Function Documentation

void C1Matrix::set ( Float  r)
inline

Definition at line 113 of file MsmMap.h.

References C1_MATRIX_SIZE, and melem.

Referenced by C1Matrix(), and ComputeMsmMgr::initialize().

113  {
114  for (int n=0; n < C1_MATRIX_SIZE; n++) melem[n] = 0;
115  }
Float melem[C1_MATRIX_SIZE]
Definition: MsmMap.h:111

Friends And Related Function Documentation

C1Vector operator* ( const C1Matrix m,
const C1Vector u 
)
friend

Definition at line 116 of file MsmMap.h.

116  {
117  C1Vector v;
118 
119  // XXX not tested yet
120 #if 1 && (defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE))
121  // Hand-coded SSE2 vectorization
122  // This loop requires that the single-precision input arrays be
123  // aligned on 16-byte boundaries, such that array[index % 4 == 0]
124  // can be safely accessed with aligned load/store operations
125  for (int k=0, j=0; j < C1_VECTOR_SIZE; j++) {
126  __m128 melem4 = _mm_load_ps(&m.melem[k]);
127  __m128 uelem4 = _mm_load_ps(&u.velem[0]);
128  __m128 tmp4 = _mm_mul_ps(melem4, uelem4);
129  melem4 = _mm_load_ps(&m.melem[k+4]);
130  uelem4 = _mm_load_ps(&u.velem[4]);
131  tmp4 = _mm_add_ps(tmp4, _mm_mul_ps(melem4, uelem4));
132 
133  // do a 4-element reduction and accumulate result
134  __m128 sum4 = tmp4;
135  sum4 = _mm_shuffle_ps(sum4, sum4, _MM_SHUFFLE(2, 3, 0, 1));
136  sum4 = _mm_add_ps(sum4, tmp4);
137  tmp4 = sum4;
138  sum4 = _mm_shuffle_ps(sum4, sum4, _MM_SHUFFLE(1, 0, 3, 2));
139  sum4 = _mm_add_ps(sum4, tmp4);
140 
141  // all 4 elements are now set to the sum
142  float sum;
143  _mm_store_ss(&sum, sum4); // store lowest element
144  v.velem[j] += sum;
145  k+=8;
146  }
147 #elif 0 && (defined(__AVX__) && ! defined(NAMD_DISABLE_SSE))
148  // Hand-coded AVX vectorization
149  // This loop requires that the single-precision input arrays be
150  // aligned on 32-byte boundaries, such that array[index % 8 == 0]
151  // can be safely accessed with aligned load/store operations
152  for (int k=0, j=0; j < C1_VECTOR_SIZE; j++) {
153  __m256 melem8 = _mm256_load_ps(&m.melem[k]);
154  __m256 uelem8 = _mm256_load_ps(&u.velem[0]);
155  __m256 tmp8 = _mm256_mul_ps(melem8, uelem8);
156 
157  // XXX this still needs to be rewritten a bit for AVX
158  // do an 8-element reduction and accumulate result
159  __m256 sum8 = tmp8;
160  sum8 = _mm256_hadd_ps(sum8, sum8);
161  sum8 = _mm256_hadd_ps(sum8, sum8);
162  tmp8 = sum8;
163  tmp8 = _mm256_permute2f128_ps(tmp8, tmp8, 1);
164  sum8 = _mm256_hadd_ps(tmp8, sum8);
165 
166  // all 8 elements are now set to the sum
167  float sum;
168  _mm_store_ss(&sum, sum8); // store lowest element
169  v.velem[j] += sum;
170  k+=8;
171  }
172 #else
173 #if defined(__INTEL_COMPILER)
174 #pragma vector always
175 #endif
176  for (int k=0, j=0; j < C1_VECTOR_SIZE; j++) {
177  for (int i = 0; i < C1_VECTOR_SIZE; i++, k++) {
178  v.velem[j] += m.melem[k] * u.velem[i];
179  }
180  }
181 #endif
182  return v;
183  }
Float melem[C1_MATRIX_SIZE]
Definition: MsmMap.h:111
Float velem[C1_VECTOR_SIZE]
Definition: MsmMap.h:87

Member Data Documentation

Float C1Matrix::melem[C1_MATRIX_SIZE]

Definition at line 111 of file MsmMap.h.

Referenced by ComputeMsmMgr::gc_c1hermite_elem_accum(), and set().


The documentation for this struct was generated from the following file: