NAMD
TupleString.h
Go to the documentation of this file.
1 #ifndef TUPLESTRING_H
2 #define TUPLESTRING_H
3  /*
4  * Classes to manage the parameters that are built from tuples of strings.
5  *
6  * To avoid ever calling strcasecmp, we upcase on input.
7  *
8  * For performance reasons, we make a hash key of the tuple for faster
9  * search times in structures like unordered_map
10  *
11  */
12 
13 #include <string>
14 #include <unordered_map>
15 #include <vector>
16 #include <algorithm>
17 #include "xxh3.h"
18 
56 // this is the value we inherit from Parameter.h
57 // it may be overkill, but we're dealing with defacto standards
58 static const short ParamNameMaxLen=11;
59 
60 // this gives us a fixed length base
63 };
64 
65 
66 // these make the compiler do our indexing for us
67 template <short NumStrings>
68 struct TupleStrider {
69  union{
70  MStringStrider stride[NumStrings];
71  char mString[ParamNameMaxLen*NumStrings];
72  };
73 };
74 
75 // We build up our MultiString Template here
76 template <short NumStrings>
78 {
79  public:
82 
84  memcpy(multiString.mString, rhs.getMString(), getMStringLen());
85  }
86  // we only bother with constructors for 1,2,3,4,8 as those are what we use
87 
88  MultiString(const char *t1)
89  {
90  memset(multiString.mString, 0, getMStringLen());
91  strncpy(multiString.mString, t1, getTupleMaxLen());
92  }
93 
94  MultiString(const char *t1, const char *t2)
95  {
96  memset(multiString.mString, 0, getMStringLen());
97  strncpy(multiString.mString, t1, getTupleMaxLen());
98  strncpy(getTuplePtr(1), t2, getTupleMaxLen());
99  }
100 
101  MultiString(const char *t1, const char *t2, const char *t3)
102  {
103  memset(multiString.mString, 0, getMStringLen());
104  strncpy(multiString.mString, t1, getTupleMaxLen());
105  strncpy(getTuplePtr(1), t2, getTupleMaxLen());
106  strncpy(getTuplePtr(2), t3, getTupleMaxLen());
107  }
108  MultiString(const char *t1, const char *t2, const char *t3, const char *t4)
109  {
110  memset(multiString.mString, 0, getMStringLen());
111  strncpy(multiString.mString, t1, getTupleMaxLen());
112  strncpy(getTuplePtr(1), t2, getTupleMaxLen());
113  strncpy(getTuplePtr(2), t3, getTupleMaxLen());
114  strncpy(getTuplePtr(3), t4, getTupleMaxLen());
115  }
116 
117  MultiString(const char *t1, const char *t2, const char *t3, const char *t4,
118  const char *t5, const char *t6, const char *t7, const char *t8
119  )
120  {
121  memset(multiString.mString, 0, getMStringLen());
122  strncpy(multiString.mString, t1, getTupleMaxLen());
123  strncpy(getTuplePtr(1), t2, getTupleMaxLen());
124  strncpy(getTuplePtr(2), t3, getTupleMaxLen());
125  strncpy(getTuplePtr(3), t4, getTupleMaxLen());
126  strncpy(getTuplePtr(4), t5, getTupleMaxLen());
127  strncpy(getTuplePtr(5), t6, getTupleMaxLen());
128  strncpy(getTuplePtr(6), t7, getTupleMaxLen());
129  strncpy(getTuplePtr(7), t8, getTupleMaxLen());
130  }
131 
132  inline char *getTuplePtr(short index) {
133  return (char *) &(multiString.stride[index]); }
134 
135  inline void upcase()
136  {
137  for(int i=0; i < mStringLen ;i++)
138  {
139  multiString.mString[i] = std::toupper(multiString.mString[i]);
140  }
141  }
142 
143  inline short length() const { return mStringLen;}
144 
145  inline bool operator== (const MultiString& rhs) const
146  {
147  return memcmp(multiString.mString, rhs.getMString(), mStringCmpLen)==0;
148  }
149  inline bool operator< (const MultiString& rhs) const
150  {
151  return memcmp(multiString.mString, rhs.getMString(), mStringCmpLen) < 0 ;
152  }
153  inline const char *getMString() const {return multiString.mString;}
154  inline const short getMStringLen() const {return mStringLen;}
155  inline const short getTupleMaxLen() const {return ParamNameMaxLen;}
156 
157  private:
158  static const short mStringLen=NumStrings*ParamNameMaxLen;
159  static const short mStringCmpLen=NumStrings*ParamNameMaxLen-1; // last one will always be null
160 };
161 
162 // TupleString is just the MultiString with hashkeys built in
163 template <short NumStrings>
165 {
166  public:
169  {}
170  TupleString(const char* key1)
171  {
173  tupleCat.upcase();
174  makeHash();
175  }
176 
177  TupleString(const char* key1, const char* key2)
178  {
179  tupleCat = MultiString <NumStrings>(key1, key2);
180  tupleCat.upcase();
181  makeHash();
182  }
183 
184  TupleString(const char* key1, const char* key2, const char* key3)
185  {
186  tupleCat = MultiString <NumStrings>(key1, key2, key3);
187  tupleCat.upcase();
188  makeHash();
189  }
190 
191  TupleString(const char* key1, const char* key2, const char* key3,
192  const char* key4)
193  {
194  tupleCat = MultiString <NumStrings>(key1, key2, key3, key4);
195  tupleCat.upcase();
196  makeHash();
197  }
198 
199  TupleString(const char* key1, const char* key2, const char* key3,
200  const char* key4, const char* key5, const char* key6,
201  const char* key7, const char* key8)
202  {
203  tupleCat = MultiString <NumStrings>(key1, key2, key3, key4,
204  key5, key6, key7, key8);
205  tupleCat.upcase();
206  makeHash();
207  }
208 
209  inline void makeHash()
210  {
211  hashKey = XXH3_64bits(tupleCat.getMString(), tupleCat.length());
212  }
213 
214  // if the hashkeys are equal, check the string
215  inline bool operator== (const TupleString& rhs) const
216  {
217  if(hashKey == rhs.getHashKey())
218  {
219  return tupleCat == rhs.tupleCat;
220  }
221  return false;
222  }
223 
224  // if the hashkeys are equal, check the string
225  inline bool operator<(const TupleString& rhs) const
226  {
227  if(hashKey == rhs.getHashKey())
228  return(tupleCat < rhs.tupleCat);
229  return hashKey < rhs.getHashKey();
230  }
231 
232  inline const uint64_t getHashKey() const {return hashKey;}
233  inline const char* getCatKey() const {return tupleCat.getMString();}
234 
235  inline char *getTuplePtr(short index) { return tupleCat.getTuplePtr(index); }
236  private:
237  uint64_t hashKey;
238 };
239 
240 
241 template <short NumStrings>
243 {
244  size_t operator() (const TupleString <NumStrings>& k) const
245  {
246  return k.getHashKey();
247  }
248  static_assert((NumStrings==1) || (NumStrings==2) || (NumStrings==3)
249  || (NumStrings==4) || (NumStrings==8),
250  "invalid NumStrings must be: [1,2,3,4,8]");
251 };
252 
253 
254 /* convenience typedefs */
260 
261 
262 /*
263  combine a map with key(tuple) and value(index) with a vector of
264  record types for the map to index into. We enforce uniqueness on the
265  map and only append a new value for a unique key.
266 */
267 
268 
269 //templating a template is tricky with only c++11
270 //this looks a little weird, but avoids code replication
271 
272 template <short NumStrings, class ParamValue>
274 {
275  public:
277  std::vector<ParamValue> paramVector;
278  std::unordered_map<KeyType, size_t, TupleStringHash<NumStrings>> tupleMap;
279  inline TwoLevelParam()
280  {
281  }
282  inline ParamValue* get_param(const size_t &index)
283  {
284  return &(paramVector[index]);
285  }
286 
287  inline bool get_key_by_index(size_t index, KeyType& key) const {
288  for (auto it = tupleMap.begin(); it != tupleMap.end(); ++it) {
289  if (it->second == index) {
290  key = it->first;
291  return true;
292  }
293  }
294  return false;
295  }
296 
297  inline ParamValue* get_param_by_key(const KeyType &findKey)
298  {
299  auto ret = tupleMap.find(findKey);
300  if(ret!=tupleMap.end())
301  {
302  return &paramVector[ret->second];
303  }
304  else
305  {
306  return NULL;
307  }
308  }
309 
310  inline bool insert(const KeyType &tKey,
311  const ParamValue &mValue)
312  {
313  size_t newPlace = paramVector.size();
314  const auto ret = tupleMap.emplace(std::make_pair(tKey,newPlace));
315  if(ret.second)
316  {
317 
318  paramVector.push_back(mValue);
319  }
320  return ret.second;
321  }
322 
323  // return whether it was newly inserted and the pointer to the value
324  inline std::pair<bool, ParamValue*> insert_check(const KeyType &tKey,
325  const ParamValue &mValue)
326  {
327  size_t newPlace = paramVector.size();
328  auto ret = tupleMap.emplace(std::make_pair(tKey,newPlace));
329  if(ret.second)
330  {
331  paramVector.push_back(mValue);
332  }
333  return std::make_pair(ret.second, &paramVector[ret.first->second]);
334  }
335 
336  // underscore naming inherited from original Parameter.[hC]
337  inline std::pair<bool, size_t> const index_param(const KeyType &findKey) const
338  {
339  const auto ret = tupleMap.find(findKey);
340  if(ret!=tupleMap.end())
341  {
342  return std::make_pair(true, ret->second);
343  }
344  else
345  {
346  return std::make_pair(false, paramVector.size());
347  }
348  }
349 
350  // possibly faster
351  inline int64_t const index(const KeyType &findKey) const
352  {
353  const auto ret = tupleMap.find(findKey);
354  if(ret!=tupleMap.end())
355  {
356  return ret->second;
357  }
358  return -1;
359  }
360 
361  void clear()
362  {
363  tupleMap.clear();
364  paramVector.clear();
365  }
366 
367  // mem_opt relies on strict ordering by name for indices to match up.
368  // Sort the key strings alphanumerically and reset the indices
369  // to have an in order param vector.
370  // This is not within an #ifdef MEM_OPT_VERSION so it can be used
371  // during genCompressedPsf
372  void sort()
373  {
374  typedef std::pair<TupleString<NumStrings>, size_t> TSKeypair;
375  std::vector <TSKeypair> sortVector;
376  std::vector <ParamValue> sortValues;
377  for(auto apair : tupleMap)
378  {
379  sortVector.push_back(apair);
380  }
381  std::sort(sortVector.begin(), sortVector.end(),
382  [] (TSKeypair a, TSKeypair b)
383  {
384  return a.first.tupleCat<b.first.tupleCat;
385  });
386  for(int newIndex=0; newIndex < sortVector.size(); ++newIndex)
387  {
388  sortValues.push_back(paramVector[sortVector[newIndex].second]);
389  tupleMap[sortVector[newIndex].first]=newIndex;
390  }
391  paramVector=sortValues;
392  }
393 
394 };
395 
396 #endif
397 
ParamValue * get_param(const size_t &index)
Definition: TupleString.h:282
bool operator<(const MultiString &rhs) const
Definition: TupleString.h:149
void makeHash()
Definition: TupleString.h:209
TupleString< 1 > TupleString1
Definition: TupleString.h:255
ParamValue * get_param_by_key(const KeyType &findKey)
Definition: TupleString.h:297
MultiString(const MultiString< NumStrings > &rhs)
Definition: TupleString.h:83
MultiString(const char *t1, const char *t2, const char *t3, const char *t4)
Definition: TupleString.h:108
TupleString(const char *key1, const char *key2, const char *key3, const char *key4)
Definition: TupleString.h:191
bool operator==(const TupleString &rhs) const
Definition: TupleString.h:215
size_t operator()(const TupleString< NumStrings > &k) const
Definition: TupleString.h:244
TupleString(const char *key1, const char *key2, const char *key3, const char *key4, const char *key5, const char *key6, const char *key7, const char *key8)
Definition: TupleString.h:199
TupleString< 2 > TupleString2
Definition: TupleString.h:256
std::pair< bool, size_t > const index_param(const KeyType &findKey) const
Definition: TupleString.h:337
char * getTuplePtr(short index)
Definition: TupleString.h:235
bool insert(const KeyType &tKey, const ParamValue &mValue)
Definition: TupleString.h:310
TupleString< 8 > TupleString8
Definition: TupleString.h:259
int64_t const index(const KeyType &findKey) const
Definition: TupleString.h:351
std::unordered_map< KeyType, size_t, TupleStringHash< NumStrings > > tupleMap
Definition: TupleString.h:278
const char * getCatKey() const
Definition: TupleString.h:233
std::pair< bool, ParamValue * > insert_check(const KeyType &tKey, const ParamValue &mValue)
Definition: TupleString.h:324
char mStride[ParamNameMaxLen]
Definition: TupleString.h:62
MultiString(const char *t1, const char *t2, const char *t3, const char *t4, const char *t5, const char *t6, const char *t7, const char *t8)
Definition: TupleString.h:117
TupleString(const char *key1, const char *key2, const char *key3)
Definition: TupleString.h:184
const short getMStringLen() const
Definition: TupleString.h:154
MStringStrider stride[NumStrings]
Definition: TupleString.h:70
char * getTuplePtr(short index)
Definition: TupleString.h:132
MultiString(const char *t1)
Definition: TupleString.h:88
MultiString(const char *t1, const char *t2, const char *t3)
Definition: TupleString.h:101
MultiString< NumStrings > tupleCat
Definition: TupleString.h:167
const short getTupleMaxLen() const
Definition: TupleString.h:155
static const short ParamNameMaxLen
Definition: TupleString.h:58
bool operator<(const TupleString &rhs) const
Definition: TupleString.h:225
TupleString< 3 > TupleString3
Definition: TupleString.h:257
TupleString(const char *key1)
Definition: TupleString.h:170
std::vector< ParamValue > paramVector
Definition: TupleString.h:277
TupleStrider< NumStrings > multiString
Definition: TupleString.h:80
short length() const
Definition: TupleString.h:143
const char * getMString() const
Definition: TupleString.h:153
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void *input, size_t len)
Definition: xxh3.h:1430
const uint64_t getHashKey() const
Definition: TupleString.h:232
bool operator==(const MultiString &rhs) const
Definition: TupleString.h:145
char mString[ParamNameMaxLen *NumStrings]
Definition: TupleString.h:71
TupleString< 4 > TupleString4
Definition: TupleString.h:258
bool get_key_by_index(size_t index, KeyType &key) const
Definition: TupleString.h:287
MultiString(const char *t1, const char *t2)
Definition: TupleString.h:94
TupleString(const char *key1, const char *key2)
Definition: TupleString.h:177
void upcase()
Definition: TupleString.h:135