tesseract  4.1.1
simddetect.cpp
Go to the documentation of this file.
1 // File: simddetect.cpp
3 // Description: Architecture detector.
4 // Author: Stefan Weil (based on code from Ray Smith)
5 //
6 // (C) Copyright 2014, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
17 
18 #include <numeric> // for std::inner_product
19 #include "simddetect.h"
20 #include "dotproduct.h"
21 #include "intsimdmatrix.h" // for IntSimdMatrix
22 #include "params.h" // for STRING_VAR
23 #include "tprintf.h" // for tprintf
24 
25 #if defined(AVX) || defined(AVX2) || defined(FMA) || defined(SSE4_1)
26 # define HAS_CPUID
27 #endif
28 
29 #if defined(HAS_CPUID)
30 #if defined(__GNUC__)
31 # include <cpuid.h>
32 #elif defined(_WIN32)
33 # include <intrin.h>
34 #endif
35 #endif
36 
37 namespace tesseract {
38 
39 // Computes and returns the dot product of the two n-vectors u and v.
40 // Note: because the order of addition is different among the different dot
41 // product functions, the results can (and do) vary slightly (although they
42 // agree to within about 4e-15). This produces different results when running
43 // training, despite all random inputs being precisely equal.
44 // To get consistent results, use just one of these dot product functions.
45 // On a test multi-layer network, serial is 57% slower than SSE, and AVX
46 // is about 8% faster than SSE. This suggests that the time is memory
47 // bandwidth constrained and could benefit from holding the reused vector
48 // in AVX registers.
50 
51 static STRING_VAR(dotproduct, "auto",
52  "Function used for calculation of dot product");
53 
54 SIMDDetect SIMDDetect::detector;
55 
56 // If true, then AVX has been detected.
57 bool SIMDDetect::avx_available_;
58 bool SIMDDetect::avx2_available_;
59 bool SIMDDetect::avx512F_available_;
60 bool SIMDDetect::avx512BW_available_;
61 // If true, then FMA has been detected.
62 bool SIMDDetect::fma_available_;
63 // If true, then SSe4.1 has been detected.
64 bool SIMDDetect::sse_available_;
65 
66 // Computes and returns the dot product of the two n-vectors u and v.
67 static double DotProductGeneric(const double* u, const double* v, int n) {
68  double total = 0.0;
69  for (int k = 0; k < n; ++k) total += u[k] * v[k];
70  return total;
71 }
72 
73 // Compute dot product using std::inner_product.
74 static double DotProductStdInnerProduct(const double* u, const double* v, int n) {
75  return std::inner_product(u, u + n, v, 0.0);
76 }
77 
78 static void SetDotProduct(DotProductFunction f, const IntSimdMatrix* m = nullptr) {
79  DotProduct = f;
81 }
82 
83 // Constructor.
84 // Tests the architecture in a system-dependent way to detect AVX, SSE and
85 // any other available SIMD equipment.
86 // __GNUC__ is also defined by compilers that include GNU extensions such as
87 // clang.
88 SIMDDetect::SIMDDetect() {
89  // The fallback is a generic dot product calculation.
90  SetDotProduct(DotProductGeneric);
91 
92 #if defined(HAS_CPUID)
93 #if defined(__GNUC__)
94  unsigned int eax, ebx, ecx, edx;
95  if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
96  // Note that these tests all use hex because the older compilers don't have
97  // the newer flags.
98 #if defined(SSE4_1)
99  sse_available_ = (ecx & 0x00080000) != 0;
100 #endif
101 #if defined(FMA)
102  fma_available_ = (ecx & 0x00001000) != 0;
103 #endif
104 #if defined(AVX)
105  avx_available_ = (ecx & 0x10000000) != 0;
106  if (avx_available_) {
107  // There is supposed to be a __get_cpuid_count function, but this is all
108  // there is in my cpuid.h. It is a macro for an asm statement and cannot
109  // be used inside an if.
110  __cpuid_count(7, 0, eax, ebx, ecx, edx);
111  avx2_available_ = (ebx & 0x00000020) != 0;
112  avx512F_available_ = (ebx & 0x00010000) != 0;
113  avx512BW_available_ = (ebx & 0x40000000) != 0;
114  }
115 #endif
116  }
117 # elif defined(_WIN32)
118  int cpuInfo[4];
119  int max_function_id;
120  __cpuid(cpuInfo, 0);
121  max_function_id = cpuInfo[0];
122  if (max_function_id >= 1) {
123  __cpuid(cpuInfo, 1);
124 #if defined(SSE4_1)
125  sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
126 #endif
127 #if defined(AVX) || defined(AVX2) || defined(FMA)
128  if ((cpuInfo[2] & 0x08000000) && ((_xgetbv(0) & 6) == 6)) {
129  // OSXSAVE bit is set, XMM state and YMM state are fine.
130 #if defined(FMA)
131  fma_available_ = (cpuInfo[2] & 0x00001000) != 0;
132 #endif
133 #if defined(AVX)
134  avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
135 #endif
136 #if defined(AVX2)
137  if (max_function_id >= 7) {
138  __cpuid(cpuInfo, 7);
139  avx2_available_ = (cpuInfo[1] & 0x00000020) != 0;
140  avx512F_available_ = (cpuInfo[1] & 0x00010000) != 0;
141  avx512BW_available_ = (cpuInfo[1] & 0x40000000) != 0;
142  }
143 #endif
144  }
145 #endif
146  }
147 #else
148 #error "I don't know how to test for SIMD with this compiler"
149 #endif
150 #endif
151 
152  // Select code for calculation of dot product based on autodetection.
153  if (false) {
154  // This is a dummy to support conditional compilation.
155 #if defined(AVX2)
156  } else if (avx2_available_) {
157  // AVX2 detected.
159 #endif
160 #if defined(AVX)
161  } else if (avx_available_) {
162  // AVX detected.
164 #endif
165 #if defined(SSE4_1)
166  } else if (sse_available_) {
167  // SSE detected.
169 #endif
170  }
171 }
172 
174  // Select code for calculation of dot product based on the
175  // value of the config variable if that value is not empty.
176  const char* dotproduct_method = "generic";
177  if (!strcmp(dotproduct.string(), "auto")) {
178  // Automatic detection. Nothing to be done.
179  } else if (!strcmp(dotproduct.string(), "generic")) {
180  // Generic code selected by config variable.
181  SetDotProduct(DotProductGeneric);
182  dotproduct_method = "generic";
183  } else if (!strcmp(dotproduct.string(), "native")) {
184  // Native optimized code selected by config variable.
185  SetDotProduct(DotProductNative);
186  dotproduct_method = "native";
187 #if defined(AVX2)
188  } else if (!strcmp(dotproduct.string(), "avx2")) {
189  // AVX2 selected by config variable.
191  dotproduct_method = "avx2";
192 #endif
193 #if defined(AVX)
194  } else if (!strcmp(dotproduct.string(), "avx")) {
195  // AVX selected by config variable.
197  dotproduct_method = "avx";
198 #endif
199 #if defined(FMA)
200  } else if (!strcmp(dotproduct.string(), "fma")) {
201  // FMA selected by config variable.
203  dotproduct_method = "fma";
204 #endif
205 #if defined(SSE4_1)
206  } else if (!strcmp(dotproduct.string(), "sse")) {
207  // SSE selected by config variable.
209  dotproduct_method = "sse";
210 #endif
211  } else if (!strcmp(dotproduct.string(), "std::inner_product")) {
212  // std::inner_product selected by config variable.
213  SetDotProduct(DotProductStdInnerProduct);
214  dotproduct_method = "std::inner_product";
215  } else {
216  // Unsupported value of config variable.
217  tprintf("Warning, ignoring unsupported config variable value: dotproduct=%s\n",
218  dotproduct.string());
219  tprintf("Support values for dotproduct: auto generic native"
220 #if defined(AVX)
221  " avx"
222 #endif
223 #if defined(SSE4_1)
224  " sse"
225 #endif
226  " std::inner_product.\n");
227  }
228 
229  dotproduct.set_value(dotproduct_method);
230 }
231 
232 } // namespace tesseract
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
double DotProductAVX(const double *u, const double *v, int n)
static const IntSimdMatrix * intSimdMatrix
double DotProductFMA(const double *u, const double *v, int n)
double(*)(const double *, const double *, int) DotProductFunction
Definition: simddetect.h:25
#define STRING_VAR(name, val, comment)
Definition: params.h:309
static const IntSimdMatrix intSimdMatrixAVX2
static TESS_API void Update()
Definition: simddetect.cpp:173
double DotProductNative(const double *u, const double *v, int n)
Definition: dotproduct.cpp:22
double DotProductSSE(const double *u, const double *v, int n)
DotProductFunction DotProduct
Definition: simddetect.cpp:49
static const IntSimdMatrix intSimdMatrixSSE