Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

util_simd_SVE.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2019 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: util_simd_SVE.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.6 $        $Date: 2022/04/09 17:58:39 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *
00019  * ARM SVE vector helper routines and vectorized loops for use via 
00020  * runtime CPU dispatch.
00021  *
00022  * Arm C Language Extensions for SVE documentation used to develop 
00023  * the kernels in this source file (document Arm_100987_0000_06_en,
00024  * version 00bet6, Copyright dates 2015-2020):
00025  *   https://developer.arm.com/docs/100987/latest
00026  *   https://developer.arm.com/documentation/100987/0000
00027  *
00028  * Earlier SVE docs:
00029  *   https://developer.arm.com/solutions/hpc/resources/hpc-white-papers/a-sneak-peek-into-sve-and-vla-programming 
00030  *
00031  ***************************************************************************/
00032 
00033 #if defined(VMDCPUDISPATCH) && defined(__ARM_FEATURE_SVE)
00034 #include <arm_sve.h>
00035 
00036 #include "WKFThreads.h" // CPU capability flags
00037 // #include <string.h>
00038 // #include <ctype.h>
00039 #include <math.h>
00040 #include <stdio.h>
00041 #include <stdlib.h>
00042 #include <stddef.h>
00043 
00044 int arm_sve_vecsize_32bits(void) {
00045   return svcntw();
00046 }
00047 
00048 int arm_sve_vecsize_64bits(void) {
00049   return svcntd();
00050 }
00051 
00052 
00053 // Compute min/max/mean values for a an arbitrary array of floats
00054 void minmaxmean_1fv_aligned_sve(const float *f, ptrdiff_t n,
00055                                 float *fmin, float *fmax, float *fmean) {
00056   if (n < 1) {
00057     *fmin = 0.0f;
00058     *fmax = 0.0f;
00059     *fmean = 0.0f;
00060     return;
00061   }
00062 
00063   svbool_t pg = svptrue_b32();
00064   svfloat32_t minv = svdup_f32(f[0]);
00065   svfloat32_t maxv = minv;
00066   svfloat64_t meanv = svdup_f64(0.0);
00067 
00068   for (ptrdiff_t i=0; i<n; i+=svcntw()) {
00069     pg = svwhilelt_b32(i, n);
00070     svfloat32_t tmp = svld1(pg, (float32_t *) &f[i]);
00071 
00072     minv = svmin_m(pg, minv, tmp);
00073     maxv = svmax_m(pg, maxv, tmp);
00074     meanv = svadd_z(pg, meanv, svcvt_f64_z(pg, tmp));
00075   }
00076 
00077   pg = svptrue_b32();
00078   *fmin = svminv(pg, minv);
00079   *fmax = svmaxv(pg, maxv);
00080   *fmean = float(svaddv(pg, meanv) / n);
00081 }
00082 
00083 
00084 // Compute min/max values for an arbitrary array of floats
00085 void minmax_1fv_aligned_sve(const float *f, ptrdiff_t n,
00086                             float *fmin, float *fmax) {
00087   if (n < 1)
00088     return;
00089 
00090   svbool_t pg = svptrue_b32();
00091   svfloat32_t minv = svdup_f32(f[0]);
00092   svfloat32_t maxv = minv;
00093   for (ptrdiff_t i=0; i<n; i+=svcntw()) {
00094     pg = svwhilelt_b32(i, n);
00095     svfloat32_t tmp = svld1(pg, (float32_t *) &f[i]);
00096     minv = svmin_m(pg, minv, tmp);
00097     maxv = svmax_m(pg, maxv, tmp);
00098   }
00099 
00100   pg = svptrue_b32();
00101   *fmin = svminv(pg, minv);
00102   *fmax = svmaxv(pg, maxv);
00103 }
00104 
00105 
00106 // Compute min/max values for an arbitrary array of float3s
00107 // input value n3 is the number of 3-element vectors to process
00108 void minmax_3fv_aligned_sve(const float *f, const ptrdiff_t n3,
00109                             float *fmin, float *fmax) {
00110   if (n3 < 1)
00111     return;
00112 
00113   svbool_t pg = svptrue_b32();
00114   svfloat32x3_t minv = svcreate3(svdup_f32(f[0]),
00115                                  svdup_f32(f[0]),
00116                                  svdup_f32(f[0]));
00117 
00118   svfloat32x3_t maxv = minv;
00119   int vlen = svcntw();
00120   int vlen3 = vlen*3;
00121   ptrdiff_t cnt, i;
00122   for (cnt=0,i=0; cnt<n3; cnt+=vlen,i+=vlen3) {
00123     pg = svwhilelt_b32(cnt, n3);
00124     svfloat32x3_t tmp = svld3(pg, (float32_t *) &f[i]);
00125     svset3(minv, 0, svmin_m(pg, svget3(minv, 0), svget3(tmp, 0)));
00126     svset3(maxv, 0, svmax_m(pg, svget3(maxv, 0), svget3(tmp, 0)));
00127     svset3(minv, 1, svmin_m(pg, svget3(minv, 1), svget3(tmp, 1)));
00128     svset3(maxv, 1, svmax_m(pg, svget3(maxv, 1), svget3(tmp, 1)));
00129     svset3(minv, 2, svmin_m(pg, svget3(minv, 2), svget3(tmp, 2)));
00130     svset3(maxv, 2, svmax_m(pg, svget3(maxv, 2), svget3(tmp, 2)));
00131   }
00132 
00133   pg = svptrue_b32();
00134   fmin[0] = svminv(pg, svget3(minv, 0));
00135   fmax[0] = svmaxv(pg, svget3(maxv, 0));
00136   fmin[1] = svminv(pg, svget3(minv, 1));
00137   fmax[1] = svmaxv(pg, svget3(maxv, 1));
00138   fmin[2] = svminv(pg, svget3(minv, 2));
00139   fmax[2] = svmaxv(pg, svget3(maxv, 2));
00140 }
00141 
00142 
00143 #else // CPUDISPATCH+SVE
00144 
00145 int arm_sve_vecsize_32bits(void) {
00146   return -1;
00147 }
00148 
00149 int arm_sve_vecsize_64bits(void) {
00150   return -1;
00151 }
00152 
00153 #endif
00154 

Generated on Thu Apr 18 02:45:44 2024 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002