rongzha1 commented on a change in pull request #12866: Optimization for embedding OP for CPU URL: https://github.com/apache/incubator-mxnet/pull/12866#discussion_r228833630
########## File path: src/operator/tensor/indexing_op.cc ########## @@ -39,10 +39,13 @@ template<typename DType> bool CheckIndexOutOfBound(const DType* data_ptr, size_t data_size, const DType min, const DType max) { bool is_valid = true; - for (size_t i = 0; i < data_size; i++) { + // to avoid Jenkins omp check error + int64_t size = data_size; + int omp_threads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); + #pragma omp parallel for num_threads(omp_threads) if (data_size > 2000) Review comment: Thanks for your advice. I have do some test to make sure the magic number size. size | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | max time -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- 2000 | 0.017 | 0.052 | 0.072 | 0.103 | 0.104 | 0.042 | 0.097 | 0.099 | 0.097 | 0.128 | 0.079 | 0.099 | 0.08 | 0.113 | 0.081 | 0.095 | 0.081 | 0.085 | 0.084 | 0.081 | 0.116 | 0.082 | 0.085 | 0.083 | 0.041 | 0.082 | 0.082 | 0.083 | 0.128 4000 | 0.01 | 0.006 | 0.015 | 0.015 | 0.007 | 0.007 | 0.007 | 0.007 | 0.01 | 0.006 | 0.01 | 0.007 | 0.007 | 0.007 | 0.007 | 0.007 | 0.007 | 0.009 | 0.007 | 0.008 | 0.009 | 0.008 | 0.009 | 0.008 | 0.009 | 0.009 | 0.009 | 0.013 | 0.015 6000 | 0.014 | 0.008 | 0.02 | 0.008 | 0.008 | 0.009 | 0.008 | 0.007 | 0.007 | 0.01 | 0.008 | 0.011 | 0.009 | 0.01 | 0.008 | 0.009 | 0.009 | 0.009 | 0.008 | 0.008 | 0.009 | 0.008 | 0.01 | 0.01 | 0.011 | 0.009 | 0.009 | 0.011 | 0.02 8000 | 0.018 | 0.01 | 0.025 | 0.01 | 0.007 | 0.008 | 0.008 | 0.011 | 0.01 | 0.013 | 0.009 | 0.012 | 0.009 | 0.013 | 0.009 | 0.01 | 0.009 | 0.01 | 0.008 | 0.009 | 0.01 | 0.009 | 0.009 | 0.009 | 0.011 | 0.01 | 0.01 | 0.011 | 0.025 10000 | 0.02 | 0.012 | 0.029 | 0.01 | 0.011 | 0.008 | 0.021 | 0.011 | 0.008 | 0.015 | 0.009 | 0.01 | 0.009 | 0.01 | 0.012 | 0.011 | 0.009 | 0.011 | 0.01 | 0.008 | 0.01 | 0.01 | 0.01 | 0.013 | 0.01 | 0.011 | 0.011 | 0.02 | 0.029 12000 | 0.024 | 0.015 | 0.035 | 0.011 | 0.01 | 0.01 | 0.008 | 0.009 | 0.012 | 0.012 | 0.015 | 0.012 | 0.009 | 0.01 | 0.009 | 0.009 | 0.01 | 0.011 | 0.009 | 0.01 | 0.012 | 0.012 | 0.011 | 0.011 | 0.014 | 0.011 | 0.011 | 0.012 | 0.035 14000 | 0.028 | 0.017 | 0.04 | 0.013 | 0.011 | 0.01 | 0.01 | 0.014 | 0.013 | 0.012 | 0.012 | 0.009 | 0.011 | 0.011 | 0.009 | 0.011 | 0.012 | 0.011 | 0.012 | 0.01 | 0.014 | 0.015 | 0.013 | 0.012 | 0.013 | 0.011 | 0.011 | 0.013 | 0.04 16000 | 0.032 | 0.018 | 0.018 | 0.014 | 0.012 | 0.011 | 0.01 | 0.011 | 0.01 | 0.01 | 0.011 | 0.011 | 0.011 | 0.01 | 0.011 | 0.011 | 0.011 | 0.016 | 0.014 | 0.01 | 0.018 | 0.015 | 0.011 | 0.011 | 0.018 | 0.013 | 0.013 | 0.014 | 0.032 18000 | 0.038 | 0.022 | 0.017 | 0.013 | 0.013 | 0.013 | 0.019 | 0.014 | 0.013 | 0.017 | 0.01 | 0.012 | 0.01 | 0.011 | 0.011 | 0.011 | 0.017 | 0.017 | 0.01 | 0.012 | 0.013 | 0.011 | 0.029 | 0.011 | 0.013 | 0.014 | 0.014 | 0.015 | 0.038 20000 | 0.042 | 0.023 | 0.019 | 0.015 | 0.019 | 0.013 | 0.014 | 0.012 | 0.011 | 0.012 | 0.011 | 0.012 | 0.011 | 0.01 | 0.011 | 0.01 | 0.01 | 0.011 | 0.012 | 0.012 | 0.012 | 0.013 | 0.013 | 0.016 | 0.013 | 0.017 | 0.017 | 0.014 | 0.042 we can find that : after size=14000, function CheckIndexOutOfBound will benefit from omp thread So can you help to review it pls ? @szha ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services