diff -ruN tesseract-4.0.0-beta.4/src/lstm/lstm.cpp tesseract-4.0.0-beta.4-openmp/src/lstm/lstm.cpp --- tesseract-4.0.0-beta.4/src/lstm/lstm.cpp 2018-07-27 15:10:37.000000000 +0900 +++ tesseract-4.0.0-beta.4-openmp/src/lstm/lstm.cpp 2018-10-07 08:15:23.139366142 +0900 @@ -36,9 +36,7 @@ // Macros for openmp code if it is available, otherwise empty macros. #ifdef _OPENMP #define PARALLEL_IF_OPENMP(__num_threads) \ - PRAGMA(omp parallel if (__num_threads > 1) num_threads(__num_threads)) { \ - PRAGMA(omp sections nowait) { \ - PRAGMA(omp section) { + PRAGMA(omp parallel if (__num_threads > 1) num_threads(__num_threads)) { #define SECTION_IF_OPENMP \ } \ PRAGMA(omp section) \ @@ -320,6 +318,10 @@ if (!source_.int_mode()) source_.ReadTimeStep(t, curr_input); // Matrix multiply the inputs with the source. PARALLEL_IF_OPENMP(GFS) +#ifdef _OPENMP + PRAGMA(omp sections nowait) { + PRAGMA(omp section) { +#endif // It looks inefficient to create the threads on each t iteration, but the // alternative of putting the parallel outside the t loop, a single around // the t-loop and then tasks in place of the sections is a *lot* slower. @@ -575,7 +577,10 @@ #endif // Matrix multiply to get the source errors. PARALLEL_IF_OPENMP(GFS) - +#ifdef _OPENMP + PRAGMA(omp sections nowait) { + PRAGMA(omp section) { +#endif // Cell inputs. node_values_[CI].FuncMultiply3(t, node_values_[GI], t, curr_stateerr, gate_errors[CI]);