This is an automated email from the ASF dual-hosted git repository.

wkcn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 49932fa  #14199: catch subprocess.CalledProcessError in get_gpus() 
(#14212)
49932fa is described below

commit 49932faf4c0ba96534f3994914fb875ef721c17d
Author: Frank Liu <frankfliu2...@gmail.com>
AuthorDate: Wed Mar 6 19:16:12 2019 -0800

    #14199: catch subprocess.CalledProcessError in get_gpus() (#14212)
    
    * Fixes #14199: use proper API get number of gpus.
    
    1. Added get_gpus() and get_gpu_memory() API to python binding.
    2. Update example script to use proper API for getting gpu numbers.
    
    * retrigger CI
---
 benchmark/python/control_flow/rnn.py        |  7 +------
 example/image-classification/common/util.py |  8 +++-----
 python/mxnet/test_utils.py                  | 11 ++---------
 python/mxnet/util.py                        | 16 ++++++++++++++++
 tools/bandwidth/test_measure.py             | 10 ++++------
 5 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/benchmark/python/control_flow/rnn.py 
b/benchmark/python/control_flow/rnn.py
index 0849872..24e326c 100644
--- a/benchmark/python/control_flow/rnn.py
+++ b/benchmark/python/control_flow/rnn.py
@@ -79,12 +79,7 @@ def _array(shape, ctx):
 
 
 def _get_gpus():
-    try:
-        re = subprocess.check_output(["nvidia-smi", "-L"], 
universal_newlines=True)
-    except OSError:
-        return []
-    return range(len([i for i in re.split('\n') if 'GPU' in i]))
-
+    return range(mx.util.get_gpu_count())
 
 def run_benchmark(cell_type, ctx, seq_len, batch_size, hidden_dim):
     obj = {"foreach": ForeachRNN, "while_loop": WhileRNN}[args.benchmark]
diff --git a/example/image-classification/common/util.py 
b/example/image-classification/common/util.py
index 5f70411..8737b69 100644
--- a/example/image-classification/common/util.py
+++ b/example/image-classification/common/util.py
@@ -19,6 +19,8 @@ import subprocess
 import os
 import errno
 
+import mxnet as mx
+
 def download_file(url, local_fname=None, force_write=False):
     # requests is not default installed
     import requests
@@ -49,8 +51,4 @@ def get_gpus():
     """
     return a list of GPUs
     """
-    try:
-        re = subprocess.check_output(["nvidia-smi", "-L"], 
universal_newlines=True)
-    except OSError:
-        return []
-    return range(len([i for i in re.split('\n') if 'GPU' in i]))
+    return range(mx.util.get_gpu_count())
diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py
index 4138e4d..6d1749b 100644
--- a/python/mxnet/test_utils.py
+++ b/python/mxnet/test_utils.py
@@ -23,7 +23,6 @@ import gzip
 import struct
 import traceback
 import numbers
-import subprocess
 import sys
 import os
 import errno
@@ -213,6 +212,7 @@ def _get_powerlaw_dataset_csr(num_rows, num_cols, 
density=0.1, dtype=None):
     else:
         return mx.nd.array(output_arr).tostype("csr")
 
+
 def assign_each(the_input, function):
     """Return ndarray composed of passing each array value through some 
function"""
     if function is None:
@@ -1391,14 +1391,7 @@ def list_gpus():
         If there are n GPUs, then return a list [0,1,...,n-1]. Otherwise 
returns
         [].
     """
-    re = ''
-    nvidia_smi = ['nvidia-smi', '/usr/bin/nvidia-smi', 
'/usr/local/nvidia/bin/nvidia-smi']
-    for cmd in nvidia_smi:
-        try:
-            re = subprocess.check_output([cmd, "-L"], universal_newlines=True)
-        except (subprocess.CalledProcessError, OSError):
-            pass
-    return range(len([i for i in re.split('\n') if 'GPU' in i]))
+    return range(mx.util.get_gpu_count())
 
 def download(url, fname=None, dirname=None, overwrite=False, retries=5):
     """Download an given URL
diff --git a/python/mxnet/util.py b/python/mxnet/util.py
index 62c05d2..fc8d985 100644
--- a/python/mxnet/util.py
+++ b/python/mxnet/util.py
@@ -16,9 +16,12 @@
 # under the License.
 """general utility functions"""
 
+import ctypes
 import os
 import sys
 
+from .base import _LIB, check_call
+
 
 def makedirs(d):
     """Create directories recursively if they don't exist. 
os.makedirs(exist_ok=True) is not
@@ -28,3 +31,16 @@ def makedirs(d):
         mkpath(d)
     else:
         os.makedirs(d, exist_ok=True)  # pylint: disable=unexpected-keyword-arg
+
+
+def get_gpu_count():
+    size = ctypes.c_int()
+    check_call(_LIB.MXGetGPUCount(ctypes.byref(size)))
+    return size.value
+
+
+def get_gpu_memory(gpu_dev_id):
+    free_mem = ctypes.c_uint64(0)
+    total_mem = ctypes.c_uint64(0)
+    check_call(_LIB.MXGetGPUMemoryInformation64(gpu_dev_id, 
ctypes.byref(free_mem), ctypes.byref(total_mem)))
+    return free_mem.value, total_mem.value
diff --git a/tools/bandwidth/test_measure.py b/tools/bandwidth/test_measure.py
index 375290f..d14a7aa 100644
--- a/tools/bandwidth/test_measure.py
+++ b/tools/bandwidth/test_measure.py
@@ -21,13 +21,11 @@ test measure.py
 from measure import run
 import subprocess
 import logging
+
+import mxnet as mx
+
 def get_gpus():
-    try:
-        re = subprocess.check_output(["nvidia-smi", "-L"], 
universal_newlines=True)
-    except OSError:
-        return ''
-    gpus = [i for i in re.split('\n') if 'GPU' in i]
-    return ','.join([str(i) for i in range(len(gpus))])
+    return ','.join([str(i) for i in range(mx.util.get_gpu_count())])
 
 def test_measure(**kwargs):
     logging.info(kwargs)

Reply via email to