NAMD
AVXTileLists.h
Go to the documentation of this file.
1 #ifndef AVXTILELISTS_H
2 #define AVXTILELISTS_H
3 
4 #include "AVXTiles.h"
5 #include "Lattice.h"
6 
7 #ifdef NAMD_AVXTILES
8 #include <immintrin.h>
9 
10 // --------------------------------------------------------------------------
11 // Preprocessor defines for hybrid tiles/pair list
12 // - Disable with NAMD_AVXTILES_PAIR_THRESHOLD=0 and no ORDER_PATCHES
13 // --------------------------------------------------------------------------
14 // Threshold in number of atoms in i-tile to switch to pair list
15 #define NAMD_AVXTILES_PAIR_THRESHOLD 4
16 // Initial allocation size for number of neighbors per atom in pair list
17 #define NAMD_AVXTILES_IPAIRCOUNT 300
18 // Order patch pair so first patch is one with smallest # atoms in last tile
19 #define NAMD_AVXTILES_ORDER_PATCHES
20 // --------------------------------------------------------------------------
21 
22 // Class for storing data for neighbor "j" tiles packed for all atoms in patch
23 class AVXJTiles {
24  public:
25  AVXJTiles();
26  ~AVXJTiles();
27 
28  inline int numTiles() const { return _numTiles; }
29  inline int maxTiles() const { return _numTilesAlloc; }
30  inline bool realloc(const int n) {
31  _numTiles = n;
32  if (n>_numTilesAlloc) {
33  _realloc();
34  return true;
35  } else
36  return false;
37  }
38 
39  // Bitwise exclusion data for neighbor tiles
40  unsigned int *excl;
41  // Starting index for atoms in neighbor tile
42  int *atomStart;
43  // Used for deleting empty tile lists on build steps
44  int *status;
45 
46  private:
47  int _numTiles, _numTilesAlloc;
48  void _realloc();
49 };
50 
51 
52 // Data and routines for storing/building tile lists and computing forces,
53 // virials, and energies from this data for patch pairs (including self).
54 // - Modified and excluded pairs are also processed here with separate loops
55 class AVXTileLists {
56  public:
57  struct List {
58  int atomStart_i;
59  int jtileStart;
60  };
61 
62  AVXTileLists();
63  ~AVXTileLists();
64 
65  // Simulation parameters are passed to be explicit about data used for
66  // tiles algorithm. Interpolation mode is set and documented in
67  // ComputeNonbondedUtil.
68  void setSimParams(const float scale, const float scale14, const float c1,
69  const float c3, const float switchOn2, float *fastTable,
70  float *fastEnergyTable, float *slowTable,
71  float *slowEnergyTable, float *eps4sigma,
72  float *eps4sigma14, float *ljTable,
73  const float ljTableWidth, float *modifiedTable,
74  float *modifiedEnergyTable, float *excludedTable,
75  float *excludedEnergyTable, const int interpolationMode);
76 
77  inline void atomUpdate(AVXTiles *patch0tiles, AVXTiles *patch1tiles) {
78  tiles_p0 = patch0tiles;
79  tiles_p1 = patch1tiles;
80 
81  // Patch reordering currently doesn't help perf unless using hybrid pairs
82  #ifdef NAMD_AVXTILES_ORDER_PATCHES
83  _patchOrder0 = 0;
84  _patchOrder1 = 1;
85  bool reorder = false;
86  const int rem0 = patch0tiles->numAtoms() & 15;
87  const int rem1 = patch1tiles->numAtoms() & 15;
88  if (rem1 && rem1 <= NAMD_AVXTILES_PAIR_THRESHOLD && rem1 < rem0)
89  reorder = true;
90  else if ((rem0 > NAMD_AVXTILES_PAIR_THRESHOLD || rem0 == 0) &&
91  patch1tiles->numAtoms() < patch0tiles->numAtoms())
92  reorder = true;
93  if (reorder) {
94  tiles_p0 = patch1tiles;
95  tiles_p1 = patch0tiles;
96  _patchOrder0 = 1;
97  _patchOrder1 = 0;
98  }
99  #endif
100 
101  realloc(tiles_p0->numTiles());
102  }
103 
104  inline void updateParams(const Lattice &lattice, const Vector &offset,
105  const double cutoff) {
106  _cutoff2 = cutoff * cutoff;
107  _paramMinvCut3 = -1.0 / (_cutoff2 * sqrt(_cutoff2));
108  _paramCutUnder3 = 3.0 / sqrt(_cutoff2);
109  _shx = offset.x*lattice.a().x + offset.y*lattice.b().x +
110  offset.z*lattice.c().x;
111  _shy = offset.x*lattice.a().y + offset.y*lattice.b().y +
112  offset.z*lattice.c().y;
113  _shz = offset.x*lattice.a().z + offset.y*lattice.b().z +
114  offset.z*lattice.c().z;
115  }
116 
117  inline void updateBuildInfo(const int step, const int minPart,
118  const int maxPart, const int numParts,
119  const double plcutoff) {
120  _lastBuild = step;
121  _minPart = minPart;
122  _maxPart = maxPart;
123  _numParts = numParts;
124  _plcutoff2 = plcutoff * plcutoff;
125  }
126 
127  inline int numLists() const { return _numLists; }
128  // Reallocate data for storing tile lists
129  inline void realloc(const int numLists) {
130  _numLists = numLists;
131  if (numLists > _numListsAlloc) _realloc();
132  }
133  // Reallocate data for storing modified pairs
134  inline void reallocModified(const int numModified) {
135  _numModified = numModified;
136  if (numModified > _numModifiedAlloc) _reallocModified();
137  }
138  // Reallocate data for storing excluded pairs
139  inline void reallocExcluded(const int numExcluded) {
140  _numExcluded = numExcluded;
141  if (numExcluded > _numExcludedAlloc) _reallocExcluded();
142  }
143  // Reallocate data for pair lists when using hybrid tile / pairlists
144  inline void reallocPairLists(const int numPairLists, const int maxPairs) {
145  if (numPairLists > _maxPairLists || maxPairs > _maxPairs)
146  _reallocPairLists(numPairLists, maxPairs);
147  }
148 
149  inline int exclusionChecksum() const { return _exclusionChecksum; }
150  inline float energyVdw() const { return _energyVdw; }
151  inline float energyElec() const { return _energyElec; }
152  inline float energySlow() const { return _energySlow; }
153  inline float virialXX() const { return _fNet_x * _shx; }
154  inline float virialXY() const { return _fNet_x * _shy; }
155  inline float virialXZ() const { return _fNet_x * _shz; }
156  inline float virialYY() const { return _fNet_y * _shy; }
157  inline float virialYZ() const { return _fNet_y * _shz; }
158  inline float virialZZ() const { return _fNet_z * _shz; }
159  inline float virialSlowXX() const { return _fNetSlow_x * _shx; }
160  inline float virialSlowXY() const { return _fNetSlow_x * _shy; }
161  inline float virialSlowXZ() const { return _fNetSlow_x * _shz; }
162  inline float virialSlowYY() const { return _fNetSlow_y * _shy; }
163  inline float virialSlowYZ() const { return _fNetSlow_y * _shz; }
164  inline float virialSlowZZ() const { return _fNetSlow_z * _shz; }
165 
166  #ifdef NAMD_AVXTILES_ORDER_PATCHES
167  inline int patchOrder0() const { return _patchOrder0; }
168  inline int patchOrder1() const { return _patchOrder1; }
169  #else
170  inline int patchOrder0() const { return 0; }
171  inline int patchOrder1() const { return 1; }
172  #endif
173 
174  // Build bounding boxes for tiles on both patches and build initial tile
175  // lists based on bounding boxes
176  // -- Paritioning for LB is based on number of neighbor tiles
177  void build();
178  // On build steps, delete any empty tile lists after refinement in force
179  // calculation based on atom distances.
180  void delEmptyLists();
181  // Calculate forces, virials, energies for tile lists, pair lists, and
182  // modified/excluded pairs
183  void nbForceAVX512(const int doEnergy, const int doVirial, const int doList,
184  const int doSlow);
185 
186  List *lists;
187  // Number of tile neighbors
188  unsigned int *listDepth;
189 
190  // Tiles data for each patch in pair
191  AVXTiles *tiles_p0, *tiles_p1;
192  // Neighbor tile data
193  AVXJTiles jTiles;
194 
195  private:
196  template <bool count, bool partitionMode>
197  int _buildBB();
198 
199  template <bool doEnergy, bool doVirial, bool doSlow,
200  bool doList, int interpMode>
201  __forceinline void nbAVX512Tiles(__m512 &energyVdw, __m512 &energyElec,
202  __m512 &energySlow, __m512 &fNet_x,
203  __m512 &fNet_y, __m512 &fNet_z,
204  __m512 &fNetSlow_x, __m512 &fNetSlow_y,
205  __m512 &fNetSlow_z);
206  template <bool doEnergy, bool doVirial, bool doSlow, int interpMode>
207  __forceinline void nbAVX512Pairs(__m512 &energyVdw, __m512 &energyElec,
208  __m512 &energySlow, __m512 &fNet_x,
209  __m512 &fNet_y, __m512 &fNet_z,
210  __m512 &fNetSlow_x, __m512 &fNetSlow_y,
211  __m512 &fNetSlow_z);
212  template <bool doEnergy, bool doVirial, bool doSlow, int interpMode>
213  inline void nbAVX512Modified(__m512 &energyVdw, __m512 &energyElec,
214  __m512 &energySlow, __m512 &fNet_x,
215  __m512 &fNet_y, __m512 &fNet_z,
216  __m512 &fNetSlow_x, __m512 &fNetSlow_y,
217  __m512 &fNetSlow_z);
218  template <bool doEnergy, bool doVirial>
219  inline void nbAVX512Excluded(__m512 &energySlow, __m512 &fNetSlow_x,
220  __m512 &fNetSlow_y, __m512 &fNetSlow_z);
221 
222  template <bool doEnergy, bool doVirial, bool doSlow,
223  bool doList, int interpMode>
224  void doAll();
225 
226  void _realloc();
227  void _reallocModified();
228  void _reallocExcluded();
229  void _reallocPairLists(const int numPairLists, const int maxPairs);
230 
231  float _cutoff2, _plcutoff2;
232 
233  float *_paramSlowTable, *_paramSlowEnergyTable;
234  // -------------- NOT USED WITH INTERPOLATION MODES 2 and 3
235  float *_paramEps4Sigma, *_paramEps4Sigma14;
236  // -------------- NOT USED WITH INTERPOLATION MODE 3
237  float _paramMinvCut3, _paramCutUnder3;
238  // --------------
239  float *_paramModifiedTable, *_paramModifiedEnergyTable;
240  float *_paramExcludedTable, *_paramExcludedEnergyTable;
241 
242  // -------------- NOT USED WITH INTERPOLATION MODES 1 and 2
243  float *_paramFastTable, *_paramFastEnergyTable;
244  const float *_paramLjTable;
245  int _paramLjWidth;
246  // --------------
247 
248  float _shx, _shy, _shz;
249  float _paramScale, _paramScale14, _paramC1, _paramC3, _paramSwitchOn2;
250  int _numLists, _numListsAlloc;
251  int _numModified, _numModifiedAlloc, _numExcluded, _numExcludedAlloc;
252  int *_modified_i, *_modified_j, *_excluded_i, *_excluded_j;
253 
254  int _numPairLists, _maxPairLists, _maxPairs;
255  int *_pair_i, *_numPairs, *_pairStart, *_pairLists;
256 
257  float _fNet_x, _fNet_y, _fNet_z, _fNetSlow_x, _fNetSlow_y, _fNetSlow_z;
258  int _exclusionChecksum;
259  float _energyVdw, _energyElec, _energySlow;
260 
261  int _interpolationMode, _minPart, _maxPart, _numParts, _lastBuild;
262 
263  #ifdef NAMD_AVXTILES_ORDER_PATCHES
264  int _patchOrder0, _patchOrder1;
265  #endif
266 
267  #ifndef MEM_OPT_VERSION
268  char * _exclFlyListBuffer;
269  char * _exclFlyLists[16];
270  const int32 * _fullExcl[16], * _modExcl[16];
271  int _lastFlyListTile;
272  const char * buildExclFlyList(const int itileList, const int z,
273  const __m512i &atomIndex_i, const int n,
274  void *mol);
275  #endif
276 };
277 
278 #endif // NAMD_AVXTILES
279 #endif // AVXTILELISTS_H
short int32
Definition: dumpdcd.c:24
Definition: Vector.h:64
BigReal z
Definition: Vector.h:66
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ float4 *__restrict__ int *__restrict__ int *__restrict__ TileListVirialEnergy *__restrict__ virialEnergy int itileList
gridSize z
BigReal x
Definition: Vector.h:66
BigReal y
Definition: Vector.h:66
Vector b() const
Definition: Lattice.h:253
Vector a() const
Definition: Lattice.h:252
Vector c() const
Definition: Lattice.h:254