Add checks to validate the current active MRVL layers and increased the maximum number of layers supported to 512 for LLVM only models.
Signed-off-by: Srikanth Yalavarthi <[email protected]> --- drivers/ml/cnxk/cn10k_ml_ops.c | 6 +++--- drivers/ml/cnxk/cnxk_ml_dev.h | 4 ++-- drivers/ml/cnxk/cnxk_ml_io.h | 2 +- drivers/ml/cnxk/cnxk_ml_ops.c | 14 +++++++------- drivers/ml/cnxk/tvmrt_ml_model.c | 17 +++++++++++++++++ 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index b30af7c7a44..00c0d87a6d6 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -624,15 +624,15 @@ cn10k_ml_layer_load(void *device, uint16_t model_id, const char *layer_name, uin return ret; /* Get index */ - for (idx = 0; idx < cnxk_mldev->max_nb_layers; idx++) { + for (idx = 0; idx < cnxk_mldev->max_mrvl_layers; idx++) { if (!cnxk_mldev->index_map[idx].active) { layer->index = idx; break; } } - if (idx >= cnxk_mldev->max_nb_layers) { - plt_err("No slots available for model layers, model_id = %u, layer_id = %u", + if (idx >= cnxk_mldev->max_mrvl_layers) { + plt_err("No slots available for model MRVL layers, model_id = %u, layer_id = %u", model->model_id, layer_id); return -1; } diff --git a/drivers/ml/cnxk/cnxk_ml_dev.h b/drivers/ml/cnxk/cnxk_ml_dev.h index e93d76d1af8..181a04737d7 100644 --- a/drivers/ml/cnxk/cnxk_ml_dev.h +++ b/drivers/ml/cnxk/cnxk_ml_dev.h @@ -93,9 +93,9 @@ struct cnxk_ml_dev { #endif /* Maximum number of layers */ - uint64_t max_nb_layers; + uint64_t max_mrvl_layers; - /* Index map */ + /* Index map for MRVL layers */ struct cnxk_ml_index_map *index_map; }; diff --git a/drivers/ml/cnxk/cnxk_ml_io.h b/drivers/ml/cnxk/cnxk_ml_io.h index 3ca852706f5..e6efd588f75 100644 --- a/drivers/ml/cnxk/cnxk_ml_io.h +++ b/drivers/ml/cnxk/cnxk_ml_io.h @@ -16,7 +16,7 @@ /* Maximum number of layers per model */ #ifdef RTE_MLDEV_CNXK_ENABLE_TVMRT -#define ML_CNXK_MODEL_MAX_LAYERS 128 +#define ML_CNXK_MODEL_MAX_LAYERS 512 #else #define ML_CNXK_MODEL_MAX_LAYERS 1 #endif diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index 675cdaae2bd..dc057643068 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -636,30 +636,30 @@ cnxk_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *co /* Set device capabilities */ if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI) - cnxk_mldev->max_nb_layers = + cnxk_mldev->max_mrvl_layers = cnxk_mldev->cn10k_mldev.fw.req->cn10k_req.jd.fw_load.cap.s.max_models; else - cnxk_mldev->max_nb_layers = ML_CNXK_MAX_MODELS; + cnxk_mldev->max_mrvl_layers = 0; cnxk_mldev->mldev->enqueue_burst = cnxk_ml_enqueue_burst; cnxk_mldev->mldev->dequeue_burst = cnxk_ml_dequeue_burst; cnxk_mldev->mldev->op_error_get = cnxk_ml_op_error_get; /* Allocate and initialize index_map */ - if (cnxk_mldev->index_map == NULL) { + if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI && cnxk_mldev->index_map == NULL) { cnxk_mldev->index_map = rte_zmalloc("cnxk_ml_index_map", - sizeof(struct cnxk_ml_index_map) * cnxk_mldev->max_nb_layers, + sizeof(struct cnxk_ml_index_map) * cnxk_mldev->max_mrvl_layers, RTE_CACHE_LINE_SIZE); if (cnxk_mldev->index_map == NULL) { - plt_err("Failed to get memory for index_map, nb_layers %" PRIu64, - cnxk_mldev->max_nb_layers); + plt_err("Failed to get memory for index_map, nb_mrvl_layers %" PRIu64, + cnxk_mldev->max_mrvl_layers); ret = -ENOMEM; goto error; } } - for (i = 0; i < cnxk_mldev->max_nb_layers; i++) + for (i = 0; i < cnxk_mldev->max_mrvl_layers; i++) cnxk_mldev->index_map[i].active = false; /* Initialize xstats */ diff --git a/drivers/ml/cnxk/tvmrt_ml_model.c b/drivers/ml/cnxk/tvmrt_ml_model.c index 9bff424f0d8..319c53a05b5 100644 --- a/drivers/ml/cnxk/tvmrt_ml_model.c +++ b/drivers/ml/cnxk/tvmrt_ml_model.c @@ -398,6 +398,7 @@ tvmrt_ml_json_graph_get_arrays(json_t *json_parsed, json_t **nodes, json_t **arg int tvmrt_ml_model_json_parse(struct cnxk_ml_model *model) { + struct cnxk_ml_dev *cnxk_mldev = model->cnxk_mldev; struct tvmrt_ml_param_names param_names; json_error_t json_error; json_t *json_parsed; @@ -407,6 +408,7 @@ tvmrt_ml_model_json_parse(struct cnxk_ml_model *model) json_t *json_node_row_ptr; json_t *json_shape_values; json_t *json_dtype_values; + uint16_t nb_active_mrvl_layers; uint16_t nb_mrvl_layers; uint16_t nb_llvm_layers; DLDevice device; @@ -511,6 +513,21 @@ tvmrt_ml_model_json_parse(struct cnxk_ml_model *model) goto error; } + nb_active_mrvl_layers = 0; + for (i = 0; i < cnxk_mldev->max_mrvl_layers; i++) { + if (cnxk_mldev->index_map[i].active) + nb_active_mrvl_layers++; + } + + if (nb_active_mrvl_layers + nb_mrvl_layers > cnxk_mldev->max_mrvl_layers) { + ret = -ENOSPC; + plt_err("TVM runtime: Total MRVL layers (%u) exceeds maximum supported " + "MRVL layers (%" PRIu64 "), model_id = %u, error = %d", + nb_active_mrvl_layers + nb_mrvl_layers, cnxk_mldev->max_mrvl_layers, + model->model_id, ret); + goto error; + } + /* Set model subtype */ if ((nb_llvm_layers == 0) && (nb_mrvl_layers == 1)) model->subtype = ML_CNXK_MODEL_SUBTYPE_TVM_MRVL; -- 2.34.1

