[dpdk-dev] [PATCH v5 3/3] mempool: allow for user-owned mempool caches

2016-06-29 Thread Lazaros Koromilas
The mempool cache is only available to EAL threads as a per-lcore
resource. Change this so that the user can create and provide their own
cache on mempool get and put operations. This works with non-EAL threads
too. This commit introduces the new API calls:

rte_mempool_cache_create(size, socket_id)
rte_mempool_cache_free(cache)
rte_mempool_cache_flush(cache, mp)
rte_mempool_default_cache(mp, lcore_id)

Changes the API calls:

rte_mempool_generic_put(mp, obj_table, n, cache, flags)
rte_mempool_generic_get(mp, obj_table, n, cache, flags)

The cache-oblivious API calls use the per-lcore default local cache.

Signed-off-by: Lazaros Koromilas 
Acked-by: Olivier Matz 
---
 app/test/test_mempool.c |  73 ---
 app/test/test_mempool_perf.c|  73 +--
 doc/guides/prog_guide/env_abstraction_layer.rst |   4 +-
 doc/guides/prog_guide/mempool_lib.rst   |   6 +-
 lib/librte_mempool/rte_mempool.c|  66 +-
 lib/librte_mempool/rte_mempool.h| 164 +---
 lib/librte_mempool/rte_mempool_version.map  |   4 +
 7 files changed, 308 insertions(+), 82 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 55c2cbc..63c61f3 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -75,10 +75,16 @@
 #define MAX_KEEP 16
 #define MEMPOOL_SIZE 
((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)

+#define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__)
 #define RET_ERR() do { \
-   printf("test failed at %s():%d\n", __func__, __LINE__); \
+   LOG_ERR();  \
return -1;  \
} while (0)
+#define GOTO_ERR(var, label) do {  \
+   LOG_ERR();  \
+   var = -1;   \
+   goto label; \
+   } while (0)

 static rte_atomic32_t synchro;

@@ -191,7 +197,7 @@ my_obj_init(struct rte_mempool *mp, __attribute__((unused)) 
void *arg,

 /* basic tests (done on one core) */
 static int
-test_mempool_basic(struct rte_mempool *mp)
+test_mempool_basic(struct rte_mempool *mp, int use_external_cache)
 {
uint32_t *objnum;
void **objtable;
@@ -199,47 +205,62 @@ test_mempool_basic(struct rte_mempool *mp)
char *obj_data;
int ret = 0;
unsigned i, j;
+   int offset;
+   struct rte_mempool_cache *cache;
+
+   if (use_external_cache) {
+   /* Create a user-owned mempool cache. */
+   cache = rte_mempool_cache_create(RTE_MEMPOOL_CACHE_MAX_SIZE,
+SOCKET_ID_ANY);
+   if (cache == NULL)
+   RET_ERR();
+   } else {
+   /* May be NULL if cache is disabled. */
+   cache = rte_mempool_default_cache(mp, rte_lcore_id());
+   }

/* dump the mempool status */
rte_mempool_dump(stdout, mp);

printf("get an object\n");
-   if (rte_mempool_get(mp, &obj) < 0)
-   RET_ERR();
+   if (rte_mempool_generic_get(mp, &obj, 1, cache, 0) < 0)
+   GOTO_ERR(ret, out);
rte_mempool_dump(stdout, mp);

/* tests that improve coverage */
printf("get object count\n");
-   if (rte_mempool_count(mp) != MEMPOOL_SIZE - 1)
-   RET_ERR();
+   /* We have to count the extra caches, one in this case. */
+   offset = use_external_cache ? 1 * cache->len : 0;
+   if (rte_mempool_count(mp) + offset != MEMPOOL_SIZE - 1)
+   GOTO_ERR(ret, out);

printf("get private data\n");
if (rte_mempool_get_priv(mp) != (char *)mp +
MEMPOOL_HEADER_SIZE(mp, mp->cache_size))
-   RET_ERR();
+   GOTO_ERR(ret, out);

 #ifndef RTE_EXEC_ENV_BSDAPP /* rte_mem_virt2phy() not supported on bsd */
printf("get physical address of an object\n");
if (rte_mempool_virt2phy(mp, obj) != rte_mem_virt2phy(obj))
-   RET_ERR();
+   GOTO_ERR(ret, out);
 #endif

printf("put the object back\n");
-   rte_mempool_put(mp, obj);
+   rte_mempool_generic_put(mp, &obj, 1, cache, 0);
rte_mempool_dump(stdout, mp);

printf("get 2 objects\n");
-   if (rte_mempool_get(mp, &obj) < 0)
-   RET_ERR();
-   if (rte_mempool_get(mp, &obj2) < 0) {
-   rte_mempool_put(mp, obj);
-   RET_ERR();
+   if (rte_mempool_generic_get(mp, &obj, 1, cache, 0) < 0)
+   GOTO_ERR(ret,

[dpdk-dev] [PATCH v5 2/3] mempool: use bit flags to set multi consumers and producers

2016-06-29 Thread Lazaros Koromilas
Pass the same flags as in rte_mempool_create(). Changes API calls:

rte_mempool_generic_put(mp, obj_table, n, flags)
rte_mempool_generic_get(mp, obj_table, n, flags)

Signed-off-by: Lazaros Koromilas 
Acked-by: Olivier Matz 
---
 lib/librte_mempool/rte_mempool.h | 58 +---
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index a48f46d..971b1ba 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -953,12 +953,13 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
  * @param n
  *   The number of objects to store back in the mempool, must be strictly
  *   positive.
- * @param is_mp
- *   Mono-producer (0) or multi-producers (1).
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
  */
 static inline void __attribute__((always_inline))
 __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
- unsigned n, int is_mp)
+ unsigned n, int flags)
 {
struct rte_mempool_cache *cache;
uint32_t index;
@@ -971,7 +972,7 @@ __mempool_generic_put(struct rte_mempool *mp, void * const 
*obj_table,
__MEMPOOL_STAT_ADD(mp, put, n);

/* cache is not enabled or single producer or non-EAL thread */
-   if (unlikely(cache_size == 0 || is_mp == 0 ||
+   if (unlikely(cache_size == 0 || flags & MEMPOOL_F_SP_PUT ||
 lcore_id >= RTE_MAX_LCORE))
goto ring_enqueue;

@@ -1024,15 +1025,16 @@ ring_enqueue:
  *   A pointer to a table of void * pointers (objects).
  * @param n
  *   The number of objects to add in the mempool from the obj_table.
- * @param is_mp
- *   Mono-producer (0) or multi-producers (1).
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
  */
 static inline void __attribute__((always_inline))
 rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
-   unsigned n, int is_mp)
+   unsigned n, int flags)
 {
__mempool_check_cookies(mp, obj_table, n, 0);
-   __mempool_generic_put(mp, obj_table, n, is_mp);
+   __mempool_generic_put(mp, obj_table, n, flags);
 }

 /**
@@ -1050,7 +1052,7 @@ __rte_deprecated static inline void 
__attribute__((always_inline))
 rte_mempool_mp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
unsigned n)
 {
-   rte_mempool_generic_put(mp, obj_table, n, 1);
+   rte_mempool_generic_put(mp, obj_table, n, 0);
 }

 /**
@@ -1068,7 +1070,7 @@ __rte_deprecated static inline void 
__attribute__((always_inline))
 rte_mempool_sp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
unsigned n)
 {
-   rte_mempool_generic_put(mp, obj_table, n, 0);
+   rte_mempool_generic_put(mp, obj_table, n, MEMPOOL_F_SP_PUT);
 }

 /**
@@ -1089,8 +1091,7 @@ static inline void __attribute__((always_inline))
 rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 unsigned n)
 {
-   rte_mempool_generic_put(mp, obj_table, n,
-   !(mp->flags & MEMPOOL_F_SP_PUT));
+   rte_mempool_generic_put(mp, obj_table, n, mp->flags);
 }

 /**
@@ -1105,7 +1106,7 @@ rte_mempool_put_bulk(struct rte_mempool *mp, void * const 
*obj_table,
 __rte_deprecated static inline void __attribute__((always_inline))
 rte_mempool_mp_put(struct rte_mempool *mp, void *obj)
 {
-   rte_mempool_generic_put(mp, &obj, 1, 1);
+   rte_mempool_generic_put(mp, &obj, 1, 0);
 }

 /**
@@ -1120,7 +1121,7 @@ rte_mempool_mp_put(struct rte_mempool *mp, void *obj)
 __rte_deprecated static inline void __attribute__((always_inline))
 rte_mempool_sp_put(struct rte_mempool *mp, void *obj)
 {
-   rte_mempool_generic_put(mp, &obj, 1, 0);
+   rte_mempool_generic_put(mp, &obj, 1, MEMPOOL_F_SP_PUT);
 }

 /**
@@ -1149,15 +1150,16 @@ rte_mempool_put(struct rte_mempool *mp, void *obj)
  *   A pointer to a table of void * pointers (objects).
  * @param n
  *   The number of objects to get, must be strictly positive.
- * @param is_mc
- *   Mono-consumer (0) or multi-consumers (1).
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-consumer (MEMPOOL_F_SC_GET flag) or multi-consumers.
  * @return
  *   - >=0: Success; number of objects supplied.
  *   - <0: Error; code of ring dequeue function.
  */
 static inline int __attribute__((always_inline))
 __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
- unsigned n, int is_mc)
+ unsigned n, int flags)
 {
int ret;
struct rte_mempool_cache *cache;
@@ -1167,7 +1169,7 @@ __mempool_generic_get(struct rte_mempool *mp, void 
**obj_table,
 

[dpdk-dev] [PATCH v5 1/3] mempool: deprecate specific get and put functions

2016-06-29 Thread Lazaros Koromilas
This commit introduces the API calls:

rte_mempool_generic_put(mp, obj_table, n, is_mp)
rte_mempool_generic_get(mp, obj_table, n, is_mc)

Deprecates the API calls:

rte_mempool_mp_put_bulk(mp, obj_table, n)
rte_mempool_sp_put_bulk(mp, obj_table, n)
rte_mempool_mp_put(mp, obj)
rte_mempool_sp_put(mp, obj)
rte_mempool_mc_get_bulk(mp, obj_table, n)
rte_mempool_sc_get_bulk(mp, obj_table, n)
rte_mempool_mc_get(mp, obj_p)
rte_mempool_sc_get(mp, obj_p)

We also check cookies in one place now.

Signed-off-by: Lazaros Koromilas 
Acked-by: Olivier Matz 
---
 app/test/test_mempool.c|  10 +--
 lib/librte_mempool/rte_mempool.h   | 115 -
 lib/librte_mempool/rte_mempool_version.map |   2 +
 3 files changed, 87 insertions(+), 40 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 31582d8..55c2cbc 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -338,7 +338,7 @@ static int test_mempool_single_producer(void)
printf("obj not owned by this mempool\n");
RET_ERR();
}
-   rte_mempool_sp_put(mp_spsc, obj);
+   rte_mempool_put(mp_spsc, obj);
rte_spinlock_lock(&scsp_spinlock);
scsp_obj_table[i] = NULL;
rte_spinlock_unlock(&scsp_spinlock);
@@ -371,7 +371,7 @@ static int test_mempool_single_consumer(void)
rte_spinlock_unlock(&scsp_spinlock);
if (i >= MAX_KEEP)
continue;
-   if (rte_mempool_sc_get(mp_spsc, &obj) < 0)
+   if (rte_mempool_get(mp_spsc, &obj) < 0)
break;
rte_spinlock_lock(&scsp_spinlock);
scsp_obj_table[i] = obj;
@@ -477,13 +477,13 @@ test_mempool_basic_ex(struct rte_mempool *mp)
}

for (i = 0; i < MEMPOOL_SIZE; i ++) {
-   if (rte_mempool_mc_get(mp, &obj[i]) < 0) {
+   if (rte_mempool_get(mp, &obj[i]) < 0) {
printf("test_mp_basic_ex fail to get object for [%u]\n",
i);
goto fail_mp_basic_ex;
}
}
-   if (rte_mempool_mc_get(mp, &err_obj) == 0) {
+   if (rte_mempool_get(mp, &err_obj) == 0) {
printf("test_mempool_basic_ex get an impossible obj\n");
goto fail_mp_basic_ex;
}
@@ -494,7 +494,7 @@ test_mempool_basic_ex(struct rte_mempool *mp)
}

for (i = 0; i < MEMPOOL_SIZE; i++)
-   rte_mempool_mp_put(mp, obj[i]);
+   rte_mempool_put(mp, obj[i]);

if (rte_mempool_full(mp) != 1) {
printf("test_mempool_basic_ex the mempool should be full\n");
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 0a1777c..a48f46d 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -957,8 +957,8 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
  *   Mono-producer (0) or multi-producers (1).
  */
 static inline void __attribute__((always_inline))
-__mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
-   unsigned n, int is_mp)
+__mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
+ unsigned n, int is_mp)
 {
struct rte_mempool_cache *cache;
uint32_t index;
@@ -1016,7 +1016,7 @@ ring_enqueue:


 /**
- * Put several objects back in the mempool (multi-producers safe).
+ * Put several objects back in the mempool.
  *
  * @param mp
  *   A pointer to the mempool structure.
@@ -1024,16 +1024,37 @@ ring_enqueue:
  *   A pointer to a table of void * pointers (objects).
  * @param n
  *   The number of objects to add in the mempool from the obj_table.
+ * @param is_mp
+ *   Mono-producer (0) or multi-producers (1).
  */
 static inline void __attribute__((always_inline))
+rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
+   unsigned n, int is_mp)
+{
+   __mempool_check_cookies(mp, obj_table, n, 0);
+   __mempool_generic_put(mp, obj_table, n, is_mp);
+}
+
+/**
+ * @deprecated
+ * Put several objects back in the mempool (multi-producers safe).
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the mempool from the obj_table.
+ */
+__rte_deprecated static inline void __attribute__((always_inline))
 rte_mempool_mp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
unsigned n)
 {
-   __mempool_check_cookies(mp, obj_table, n, 0);
-   __mempool_put_bulk(mp, obj_table, n, 1);
+   rte_mempool_generic_put(mp, obj_table, 

[dpdk-dev] [PATCH v5 0/3] mempool: user-owned mempool caches

2016-06-29 Thread Lazaros Koromilas
Updated version of the user-owned cache patchset.  It applies on top of
the latest v15 external mempool manager patches from David Hunt [1].

[1] http://dpdk.org/ml/archives/dev/2016-June/042004.html

v5 changes:

 * Rework error path macros in tests.
 * Update documentation.
 * Style fixes.

v4 changes:

 * Fix compilation with shared libraries
 * Add a GOTO_ERR() macro to factorize code in test_mempool.c
 * Change title of patch 2 to conform to check-git-log.sh

v3 changes:

 * Deprecate specific mempool API calls instead of removing them.
 * Split deprecation into a separate commit to limit noise.
 * Fix cache flush by setting cache->len = 0 and make it inline.
 * Remove cache->size == 0 checks and ensure size != 0 at creation.
 * Fix tests to check if cache creation succeeded.
 * Fix tests to free allocated resources on error.

The mempool cache is only available to EAL threads as a per-lcore
resource. Change this so that the user can create and provide their own
cache on mempool get and put operations. This works with non-EAL threads
too.

Also, deprecate the explicit {mp,sp}_put and {mc,sc}_get calls and
re-route them through the new generic calls. Minor cleanup to pass the
mempool bit flags instead of using specific is_mp and is_mc. The old
cache-oblivious API calls use the per-lcore default local cache. The
mempool and mempool_perf tests are also updated to handle the
user-owned cache case.

Introduced API calls:

rte_mempool_cache_create(size, socket_id)
rte_mempool_cache_free(cache)
rte_mempool_cache_flush(cache, mp)
rte_mempool_default_cache(mp, lcore_id)

rte_mempool_generic_put(mp, obj_table, n, cache, flags)
rte_mempool_generic_get(mp, obj_table, n, cache, flags)

Deprecated API calls:

rte_mempool_mp_put_bulk(mp, obj_table, n)
rte_mempool_sp_put_bulk(mp, obj_table, n)
rte_mempool_mp_put(mp, obj)
rte_mempool_sp_put(mp, obj)
rte_mempool_mc_get_bulk(mp, obj_table, n)
rte_mempool_sc_get_bulk(mp, obj_table, n)
rte_mempool_mc_get(mp, obj_p)
rte_mempool_sc_get(mp, obj_p)

Lazaros Koromilas (3):
  mempool: deprecate specific get and put functions
  mempool: use bit flags to set multi consumers and producers
  mempool: allow for user-owned mempool caches

 app/test/test_mempool.c |  83 +---
 app/test/test_mempool_perf.c|  73 ++-
 doc/guides/prog_guide/env_abstraction_layer.rst |   4 +-
 doc/guides/prog_guide/mempool_lib.rst   |   6 +-
 lib/librte_mempool/rte_mempool.c|  66 +-
 lib/librte_mempool/rte_mempool.h| 257 ++--
 lib/librte_mempool/rte_mempool_version.map  |   6 +
 7 files changed, 385 insertions(+), 110 deletions(-)

-- 
1.9.1



[dpdk-dev] [PATCH v4 3/3] mempool: allow for user-owned mempool caches

2016-06-28 Thread Lazaros Koromilas
Hi Olivier, thanks for fixing those, just one comment below

On Mon, Jun 27, 2016 at 4:50 PM, Olivier Matz  wrote:
> From: Lazaros Koromilas 
>
> The mempool cache is only available to EAL threads as a per-lcore
> resource. Change this so that the user can create and provide their own
> cache on mempool get and put operations. This works with non-EAL threads
> too. This commit introduces the new API calls:
>
> rte_mempool_cache_create(size, socket_id)
> rte_mempool_cache_free(cache)
> rte_mempool_cache_flush(cache, mp)
> rte_mempool_default_cache(mp, lcore_id)
>
> Changes the API calls:
>
> rte_mempool_generic_put(mp, obj_table, n, cache, flags)
> rte_mempool_generic_get(mp, obj_table, n, cache, flags)
>
> The cache-oblivious API calls use the per-lcore default local cache.
>
> Signed-off-by: Lazaros Koromilas 
> Acked-by: Olivier Matz 
> ---
>  app/test/test_mempool.c|  75 +
>  app/test/test_mempool_perf.c   |  70 ++---
>  lib/librte_mempool/rte_mempool.c   |  66 +++-
>  lib/librte_mempool/rte_mempool.h   | 163 
> +
>  lib/librte_mempool/rte_mempool_version.map |   4 +
>  5 files changed, 296 insertions(+), 82 deletions(-)
>
> diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
> index 55c2cbc..5b3c754 100644
> --- a/app/test/test_mempool.c
> +++ b/app/test/test_mempool.c
> @@ -75,10 +75,18 @@
>  #define MAX_KEEP 16
>  #define MEMPOOL_SIZE 
> ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)
>
> -#define RET_ERR() do { \
> +#define LOG_ERR() do { \
> printf("test failed at %s():%d\n", __func__, __LINE__); \
> +   } while (0)
> +#define RET_ERR() do { \
> +   LOG_ERR();  \
> return -1;  \
> } while (0)
> +#define GOTO_ERR(err, label) do {  \
> +   LOG_ERR();  \
> +   ret = err;  \
> +   goto label; \
> +   } while (0)

Here, GOTO_ERR still assumes a variable named ret in the function and
has the value as an argument while RET_ERR always returns -1.  I'd
changed it to use -1:

#define GOTO_ERR(retvar, label) do { LOG_ERR(); retvar = -1; goto
label; } while (0)

Should I do it like that and also quickly add the documentation in a v5?

Thanks,
Lazaros.


[dpdk-dev] [PATCH v3 3/3] mempool: allow for user-owned mempool caches

2016-06-18 Thread Lazaros Koromilas
On Fri, Jun 17, 2016 at 11:37 AM, Olivier Matz  
wrote:
>
>
> On 06/16/2016 01:02 PM, Lazaros Koromilas wrote:
>> The mempool cache is only available to EAL threads as a per-lcore
>> resource. Change this so that the user can create and provide their own
>> cache on mempool get and put operations. This works with non-EAL threads
>> too. This commit introduces the new API calls:
>>
>> rte_mempool_cache_create(size, socket_id)
>> rte_mempool_cache_free(cache)
>> rte_mempool_cache_flush(cache, mp)
>> rte_mempool_default_cache(mp, lcore_id)
>
> These new functions should be added in the .map file, else it will
> break the compilation in with shared_lib=y.

Oops, thanks!

>> Changes the API calls:
>>
>> rte_mempool_generic_put(mp, obj_table, n, cache, flags)
>> rte_mempool_generic_get(mp, obj_table, n, cache, flags)
>>
>> The cache-oblivious API calls use the per-lcore default local cache.
>>
>> Signed-off-by: Lazaros Koromilas 
>> ---
>>  app/test/test_mempool.c  |  94 --
>>  app/test/test_mempool_perf.c |  70 ++---
>>  lib/librte_mempool/rte_mempool.c |  66 +++-
>>  lib/librte_mempool/rte_mempool.h | 163 
>> ---
>>  4 files changed, 310 insertions(+), 83 deletions(-)
>>
>> diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
>> index 10d706f..723cd39 100644
>> --- a/app/test/test_mempool.c
>> +++ b/app/test/test_mempool.c
>> @@ -79,6 +79,9 @@
>>   printf("test failed at %s():%d\n", __func__, __LINE__); \
>>   return -1;  \
>>   } while (0)
>> +#define LOG_ERR() do {  
>>  \
>> + printf("test failed at %s():%d\n", __func__, __LINE__); \
>> + } while (0)
>>
>
> I see that the usage of this macro is always like this:
>
> LOG_ERR();
> ret = -1;
> goto out;
>
> What do you think of having:
>
> #define LOG_ERR() do {  \
> printf("test failed at %s():%d\n", __func__, __LINE__); \
> } while (0)
> #define RET_ERR() do { LOG_ERR(); return -1; } while (0)
> #define GOTO_ERR() do { LOG_ERR(); ret = -1; goto out; } while (0)
>
> Then use GOTO_ERR() when appropriate. It would also factorize
> the printf.

The downside of GOTO_ERR() is that it assumes a variable and a label
name. And you may need to have multiple labels 'out0', 'out1', etc for
the error path. How about:

#define GOTO_ERR(ret, out) do { LOG_ERR(); ret = -1; goto out; } while (0)

Lazaros.


[dpdk-dev] [PATCH v3 3/3] mempool: allow for user-owned mempool caches

2016-06-16 Thread Lazaros Koromilas
The mempool cache is only available to EAL threads as a per-lcore
resource. Change this so that the user can create and provide their own
cache on mempool get and put operations. This works with non-EAL threads
too. This commit introduces the new API calls:

rte_mempool_cache_create(size, socket_id)
rte_mempool_cache_free(cache)
rte_mempool_cache_flush(cache, mp)
rte_mempool_default_cache(mp, lcore_id)

Changes the API calls:

rte_mempool_generic_put(mp, obj_table, n, cache, flags)
rte_mempool_generic_get(mp, obj_table, n, cache, flags)

The cache-oblivious API calls use the per-lcore default local cache.

Signed-off-by: Lazaros Koromilas 
---
 app/test/test_mempool.c  |  94 --
 app/test/test_mempool_perf.c |  70 ++---
 lib/librte_mempool/rte_mempool.c |  66 +++-
 lib/librte_mempool/rte_mempool.h | 163 ---
 4 files changed, 310 insertions(+), 83 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 10d706f..723cd39 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -79,6 +79,9 @@
printf("test failed at %s():%d\n", __func__, __LINE__); \
return -1;  \
} while (0)
+#define LOG_ERR() do { \
+   printf("test failed at %s():%d\n", __func__, __LINE__); \
+   } while (0)

 static rte_atomic32_t synchro;

@@ -191,7 +194,7 @@ my_obj_init(struct rte_mempool *mp, __attribute__((unused)) 
void *arg,

 /* basic tests (done on one core) */
 static int
-test_mempool_basic(struct rte_mempool *mp)
+test_mempool_basic(struct rte_mempool *mp, int use_external_cache)
 {
uint32_t *objnum;
void **objtable;
@@ -199,47 +202,79 @@ test_mempool_basic(struct rte_mempool *mp)
char *obj_data;
int ret = 0;
unsigned i, j;
+   int offset;
+   struct rte_mempool_cache *cache;
+
+   if (use_external_cache) {
+   /* Create a user-owned mempool cache. */
+   cache = rte_mempool_cache_create(RTE_MEMPOOL_CACHE_MAX_SIZE,
+SOCKET_ID_ANY);
+   if (cache == NULL)
+   RET_ERR();
+   } else {
+   /* May be NULL if cache is disabled. */
+   cache = rte_mempool_default_cache(mp, rte_lcore_id());
+   }

/* dump the mempool status */
rte_mempool_dump(stdout, mp);

printf("get an object\n");
-   if (rte_mempool_get(mp, &obj) < 0)
-   RET_ERR();
+   if (rte_mempool_generic_get(mp, &obj, 1, cache, 0) < 0) {
+   LOG_ERR();
+   ret = -1;
+   goto out;
+   }
rte_mempool_dump(stdout, mp);

/* tests that improve coverage */
printf("get object count\n");
-   if (rte_mempool_count(mp) != MEMPOOL_SIZE - 1)
-   RET_ERR();
+   /* We have to count the extra caches, one in this case. */
+   offset = use_external_cache ? 1 * cache->len : 0;
+   if (rte_mempool_count(mp) + offset != MEMPOOL_SIZE - 1) {
+   LOG_ERR();
+   ret = -1;
+   goto out;
+   }

printf("get private data\n");
if (rte_mempool_get_priv(mp) != (char *)mp +
-   MEMPOOL_HEADER_SIZE(mp, mp->cache_size))
-   RET_ERR();
+   MEMPOOL_HEADER_SIZE(mp, mp->cache_size)) {
+   LOG_ERR();
+   ret = -1;
+   goto out;
+   }

 #ifndef RTE_EXEC_ENV_BSDAPP /* rte_mem_virt2phy() not supported on bsd */
printf("get physical address of an object\n");
-   if (rte_mempool_virt2phy(mp, obj) != rte_mem_virt2phy(obj))
-   RET_ERR();
+   if (rte_mempool_virt2phy(mp, obj) != rte_mem_virt2phy(obj)) {
+   LOG_ERR();
+   ret = -1;
+   goto out;
+   }
 #endif

printf("put the object back\n");
-   rte_mempool_put(mp, obj);
+   rte_mempool_generic_put(mp, &obj, 1, cache, 0);
rte_mempool_dump(stdout, mp);

printf("get 2 objects\n");
-   if (rte_mempool_get(mp, &obj) < 0)
-   RET_ERR();
-   if (rte_mempool_get(mp, &obj2) < 0) {
-   rte_mempool_put(mp, obj);
-   RET_ERR();
+   if (rte_mempool_generic_get(mp, &obj, 1, cache, 0) < 0) {
+   LOG_ERR();
+   ret = -1;
+   goto out;
+   }
+   if (rte_mempool_generic_get(mp, &obj2, 1, cache, 0) < 0) {
+   rte_mempool_generic_put(mp, &obj, 1, cache, 0);
+   LOG_ERR();
+   ret = -1;
+   goto out;
}
rte_mempool_dump(stdout, mp);

  

[dpdk-dev] [PATCH v3 2/3] mempool: use bit flags instead of is_mp and is_mc

2016-06-16 Thread Lazaros Koromilas
Pass the same flags as in rte_mempool_create().  Changes API calls:

rte_mempool_generic_put(mp, obj_table, n, flags)
rte_mempool_generic_get(mp, obj_table, n, flags)

Signed-off-by: Lazaros Koromilas 
---
 lib/librte_mempool/rte_mempool.h | 58 +---
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 7446843..191edba 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -949,12 +949,13 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
  * @param n
  *   The number of objects to store back in the mempool, must be strictly
  *   positive.
- * @param is_mp
- *   Mono-producer (0) or multi-producers (1).
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
  */
 static inline void __attribute__((always_inline))
 __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
- unsigned n, int is_mp)
+ unsigned n, int flags)
 {
struct rte_mempool_cache *cache;
uint32_t index;
@@ -967,7 +968,7 @@ __mempool_generic_put(struct rte_mempool *mp, void * const 
*obj_table,
__MEMPOOL_STAT_ADD(mp, put, n);

/* cache is not enabled or single producer or non-EAL thread */
-   if (unlikely(cache_size == 0 || is_mp == 0 ||
+   if (unlikely(cache_size == 0 || flags & MEMPOOL_F_SP_PUT ||
 lcore_id >= RTE_MAX_LCORE))
goto ring_enqueue;

@@ -1020,15 +1021,16 @@ ring_enqueue:
  *   A pointer to a table of void * pointers (objects).
  * @param n
  *   The number of objects to add in the mempool from the obj_table.
- * @param is_mp
- *   Mono-producer (0) or multi-producers (1).
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
  */
 static inline void __attribute__((always_inline))
 rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
-   unsigned n, int is_mp)
+   unsigned n, int flags)
 {
__mempool_check_cookies(mp, obj_table, n, 0);
-   __mempool_generic_put(mp, obj_table, n, is_mp);
+   __mempool_generic_put(mp, obj_table, n, flags);
 }

 /**
@@ -1046,7 +1048,7 @@ __rte_deprecated static inline void 
__attribute__((always_inline))
 rte_mempool_mp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
unsigned n)
 {
-   rte_mempool_generic_put(mp, obj_table, n, 1);
+   rte_mempool_generic_put(mp, obj_table, n, 0);
 }

 /**
@@ -1064,7 +1066,7 @@ __rte_deprecated static inline void 
__attribute__((always_inline))
 rte_mempool_sp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
unsigned n)
 {
-   rte_mempool_generic_put(mp, obj_table, n, 0);
+   rte_mempool_generic_put(mp, obj_table, n, MEMPOOL_F_SP_PUT);
 }

 /**
@@ -1085,8 +1087,7 @@ static inline void __attribute__((always_inline))
 rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 unsigned n)
 {
-   rte_mempool_generic_put(mp, obj_table, n,
-   !(mp->flags & MEMPOOL_F_SP_PUT));
+   rte_mempool_generic_put(mp, obj_table, n, mp->flags);
 }

 /**
@@ -1101,7 +1102,7 @@ rte_mempool_put_bulk(struct rte_mempool *mp, void * const 
*obj_table,
 __rte_deprecated static inline void __attribute__((always_inline))
 rte_mempool_mp_put(struct rte_mempool *mp, void *obj)
 {
-   rte_mempool_generic_put(mp, &obj, 1, 1);
+   rte_mempool_generic_put(mp, &obj, 1, 0);
 }

 /**
@@ -1116,7 +1117,7 @@ rte_mempool_mp_put(struct rte_mempool *mp, void *obj)
 __rte_deprecated static inline void __attribute__((always_inline))
 rte_mempool_sp_put(struct rte_mempool *mp, void *obj)
 {
-   rte_mempool_generic_put(mp, &obj, 1, 0);
+   rte_mempool_generic_put(mp, &obj, 1, MEMPOOL_F_SP_PUT);
 }

 /**
@@ -1145,15 +1146,16 @@ rte_mempool_put(struct rte_mempool *mp, void *obj)
  *   A pointer to a table of void * pointers (objects).
  * @param n
  *   The number of objects to get, must be strictly positive.
- * @param is_mc
- *   Mono-consumer (0) or multi-consumers (1).
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-consumer (MEMPOOL_F_SC_GET flag) or multi-consumers.
  * @return
  *   - >=0: Success; number of objects supplied.
  *   - <0: Error; code of ring dequeue function.
  */
 static inline int __attribute__((always_inline))
 __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
- unsigned n, int is_mc)
+ unsigned n, int flags)
 {
int ret;
struct rte_mempool_cache *cache;
@@ -1163,7 +1165,7 @@ __mempool_generic_get(struct rte_mempool *mp, void 
**obj_table,
uint32_t cache_size

[dpdk-dev] [PATCH v3 1/3] mempool: deprecate specific get/put functions

2016-06-16 Thread Lazaros Koromilas
This commit introduces the API calls:

rte_mempool_generic_put(mp, obj_table, n, is_mp)
rte_mempool_generic_get(mp, obj_table, n, is_mc)

Deprecates the API calls:

rte_mempool_mp_put_bulk(mp, obj_table, n)
rte_mempool_sp_put_bulk(mp, obj_table, n)
rte_mempool_mp_put(mp, obj)
rte_mempool_sp_put(mp, obj)
rte_mempool_mc_get_bulk(mp, obj_table, n)
rte_mempool_sc_get_bulk(mp, obj_table, n)
rte_mempool_mc_get(mp, obj_p)
rte_mempool_sc_get(mp, obj_p)

We also check cookies in one place now.

Signed-off-by: Lazaros Koromilas 
---
 app/test/test_mempool.c  |  10 ++--
 lib/librte_mempool/rte_mempool.h | 115 +++
 2 files changed, 85 insertions(+), 40 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index bcf379b..10d706f 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -338,7 +338,7 @@ static int test_mempool_single_producer(void)
printf("obj not owned by this mempool\n");
RET_ERR();
}
-   rte_mempool_sp_put(mp_spsc, obj);
+   rte_mempool_put(mp_spsc, obj);
rte_spinlock_lock(&scsp_spinlock);
scsp_obj_table[i] = NULL;
rte_spinlock_unlock(&scsp_spinlock);
@@ -371,7 +371,7 @@ static int test_mempool_single_consumer(void)
rte_spinlock_unlock(&scsp_spinlock);
if (i >= MAX_KEEP)
continue;
-   if (rte_mempool_sc_get(mp_spsc, &obj) < 0)
+   if (rte_mempool_get(mp_spsc, &obj) < 0)
break;
rte_spinlock_lock(&scsp_spinlock);
scsp_obj_table[i] = obj;
@@ -477,13 +477,13 @@ test_mempool_basic_ex(struct rte_mempool *mp)
}

for (i = 0; i < MEMPOOL_SIZE; i ++) {
-   if (rte_mempool_mc_get(mp, &obj[i]) < 0) {
+   if (rte_mempool_get(mp, &obj[i]) < 0) {
printf("test_mp_basic_ex fail to get object for [%u]\n",
i);
goto fail_mp_basic_ex;
}
}
-   if (rte_mempool_mc_get(mp, &err_obj) == 0) {
+   if (rte_mempool_get(mp, &err_obj) == 0) {
printf("test_mempool_basic_ex get an impossible obj\n");
goto fail_mp_basic_ex;
}
@@ -494,7 +494,7 @@ test_mempool_basic_ex(struct rte_mempool *mp)
}

for (i = 0; i < MEMPOOL_SIZE; i++)
-   rte_mempool_mp_put(mp, obj[i]);
+   rte_mempool_put(mp, obj[i]);

if (rte_mempool_full(mp) != 1) {
printf("test_mempool_basic_ex the mempool should be full\n");
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 92deb42..7446843 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -953,8 +953,8 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
  *   Mono-producer (0) or multi-producers (1).
  */
 static inline void __attribute__((always_inline))
-__mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
-   unsigned n, int is_mp)
+__mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
+ unsigned n, int is_mp)
 {
struct rte_mempool_cache *cache;
uint32_t index;
@@ -1012,7 +1012,7 @@ ring_enqueue:


 /**
- * Put several objects back in the mempool (multi-producers safe).
+ * Put several objects back in the mempool.
  *
  * @param mp
  *   A pointer to the mempool structure.
@@ -1020,16 +1020,37 @@ ring_enqueue:
  *   A pointer to a table of void * pointers (objects).
  * @param n
  *   The number of objects to add in the mempool from the obj_table.
+ * @param is_mp
+ *   Mono-producer (0) or multi-producers (1).
  */
 static inline void __attribute__((always_inline))
+rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
+   unsigned n, int is_mp)
+{
+   __mempool_check_cookies(mp, obj_table, n, 0);
+   __mempool_generic_put(mp, obj_table, n, is_mp);
+}
+
+/**
+ * @deprecated
+ * Put several objects back in the mempool (multi-producers safe).
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the mempool from the obj_table.
+ */
+__rte_deprecated static inline void __attribute__((always_inline))
 rte_mempool_mp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
unsigned n)
 {
-   __mempool_check_cookies(mp, obj_table, n, 0);
-   __mempool_put_bulk(mp, obj_table, n, 1);
+   rte_mempool_generic_put(mp, obj_table, n, 1);
 }

 /**
+ * @deprecated
  * Put several objects back in the mempool

[dpdk-dev] [PATCH v3 0/3] mempool: user-owned mempool caches

2016-06-16 Thread Lazaros Koromilas
Updated version of the user-owned cache patchset.  It applies on top of
the latest external mempool manager patches from David Hunt [1].

[1] http://dpdk.org/ml/archives/dev/2016-June/041479.html

v3 changes:

 * Deprecate specific mempool API calls instead of removing them.
 * Split deprecation into a separate commit to limit noise.
 * Fix cache flush by setting cache->len = 0 and make it inline.
 * Remove cache->size == 0 checks and ensure size != 0 at creation.
 * Fix tests to check if cache creation succeeded.
 * Fix tests to free allocated resources on error.

The mempool cache is only available to EAL threads as a per-lcore
resource. Change this so that the user can create and provide their own
cache on mempool get and put operations. This works with non-EAL threads
too.

Also, deprecate the explicit {mp,sp}_put and {mc,sc}_get calls and
re-route them through the new generic calls. Minor cleanup to pass the
mempool bit flags instead of using specific is_mp and is_mc. The old
cache-oblivious API calls use the per-lcore default local cache. The
mempool and mempool_perf tests are also updated to handle the
user-owned cache case.

Introduced API calls:

rte_mempool_cache_create(size, socket_id)
rte_mempool_cache_free(cache)
rte_mempool_cache_flush(cache, mp)
rte_mempool_default_cache(mp, lcore_id)

rte_mempool_generic_put(mp, obj_table, n, cache, flags)
rte_mempool_generic_get(mp, obj_table, n, cache, flags)

Deprecated API calls:

rte_mempool_mp_put_bulk(mp, obj_table, n)
rte_mempool_sp_put_bulk(mp, obj_table, n)
rte_mempool_mp_put(mp, obj)
rte_mempool_sp_put(mp, obj)
rte_mempool_mc_get_bulk(mp, obj_table, n)
rte_mempool_sc_get_bulk(mp, obj_table, n)
rte_mempool_mc_get(mp, obj_p)
rte_mempool_sc_get(mp, obj_p)

Lazaros Koromilas (3):
  mempool: deprecate specific get/put functions
  mempool: use bit flags instead of is_mp and is_mc
  mempool: allow for user-owned mempool caches

 app/test/test_mempool.c  | 104 +++-
 app/test/test_mempool_perf.c |  70 +--
 lib/librte_mempool/rte_mempool.c |  66 +-
 lib/librte_mempool/rte_mempool.h | 256 +--
 4 files changed, 385 insertions(+), 111 deletions(-)

-- 
1.9.1



[dpdk-dev] [PATCH v2 1/2] mempool: allow for user-owned mempool caches

2016-06-14 Thread Lazaros Koromilas
Hi Olivier,

I have it in my queue, I'll do my best to have it before the deadline.

Thanks!
Lazaros.

On Mon, Jun 13, 2016 at 1:21 PM, Olivier Matz  wrote:
> Hi Lazaros,
>
> On 05/11/2016 11:56 AM, Olivier MATZ wrote:
>> Hi Lazaros,
>>
>> Sorry for the late review. Please find some comments,
>> in addition to what Konstantin already said.
>>
>
> Will you have the time to send a v3 before the end of the
> integration deadline at the end of the week?
>
> I think it should be rebased on top of latest mempool series
> from David Hunt:
> http://dpdk.org/ml/archives/dev/2016-June/040897.html
>
> Regards,
> Olivier


[dpdk-dev] [PATCH v2 1/2] mempool: allow for user-owned mempool caches

2016-04-19 Thread Lazaros Koromilas
Hi Konstantin,
Thanks for the review.

Regards,
Lazaros.

On Mon, Apr 18, 2016 at 4:17 PM, Ananyev, Konstantin
 wrote:
> Hi Lazaros,
>
> Looks ok to me in general, few comments below.
> One more generic question - did you observe any performance impact
> caused by these changes?
> Konstantin

I didn't observe any notable difference to the default per-lcore cache
case. Here is an excerpt from the mempool_perf test:

$ egrep '(^start|n_get_bulk=32 n_put_bulk=32 n_keep=128)'
x86_64-native-linuxapp-gcc.log
start performance test (without cache)
mempool_autotest cache=0 cores=1 n_get_bulk=32 n_put_bulk=32
n_keep=128 rate_persec=714958438
mempool_autotest cache=0 cores=2 n_get_bulk=32 n_put_bulk=32
n_keep=128 rate_persec=795738111
mempool_autotest cache=0 cores=4 n_get_bulk=32 n_put_bulk=32
n_keep=128 rate_persec=313655295
start performance test (with cache)
mempool_autotest cache=512 cores=1 n_get_bulk=32 n_put_bulk=32
n_keep=128 rate_persec=780455116
mempool_autotest cache=512 cores=2 n_get_bulk=32 n_put_bulk=32
n_keep=128 rate_persec=1046937599
mempool_autotest cache=512 cores=4 n_get_bulk=32 n_put_bulk=32
n_keep=128 rate_persec=1988362238
start performance test (with user-owned cache)
mempool_autotest cache=512 cores=1 n_get_bulk=32 n_put_bulk=32
n_keep=128 rate_persec=787519897
mempool_autotest cache=512 cores=2 n_get_bulk=32 n_put_bulk=32
n_keep=128 rate_persec=1047029350
mempool_autotest cache=512 cores=4 n_get_bulk=32 n_put_bulk=32
n_keep=128 rate_persec=1965896498

>
>> -Original Message-
>> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Lazaros Koromilas
>> Sent: Monday, April 04, 2016 4:43 PM
>> To: dev at dpdk.org
>> Subject: [dpdk-dev] [PATCH v2 1/2] mempool: allow for user-owned mempool 
>> caches
>>
>> The mempool cache is only available to EAL threads as a per-lcore
>> resource. Change this so that the user can create and provide their own
>> cache on mempool get and put operations. This works with non-EAL threads
>> too. This commit introduces the new API calls:
>>
>> rte_mempool_cache_create(size, socket_id)
>> rte_mempool_cache_flush(cache, mp)
>> rte_mempool_cache_free(cache)
>> rte_mempool_default_cache(mp, lcore_id)
>> rte_mempool_generic_put(mp, obj_table, n, cache, is_mp)
>> rte_mempool_generic_get(mp, obj_table, n, cache, is_mc)
>>
>> Removes the API calls:
>>
>> rte_mempool_sp_put_bulk(mp, obj_table, n)
>> rte_mempool_sc_get_bulk(mp, obj_table, n)
>> rte_mempool_sp_put(mp, obj)
>> rte_mempool_sc_get(mp, obj)
>
>
> Hmm, shouldn't we deprecate it first for a release before removing completely?
> Let say for now you can just make them macros that calls the remaining 
> functions or so.

How do we mark the calls as deprecated? The librte_compat stuff don't
apply here as we don't have a different version of the same symbol or
something. Do I need to put them as a notice?

>
>>
>> And the remaining API calls use the per-lcore default local cache:
>>
>> rte_mempool_put_bulk(mp, obj_table, n)
>> rte_mempool_get_bulk(mp, obj_table, n)
>> rte_mempool_put(mp, obj)
>> rte_mempool_get(mp, obj)
>>
>> Signed-off-by: Lazaros Koromilas 
>> ---
>>  app/test/test_mempool.c|  58 +--
>>  app/test/test_mempool_perf.c   |  46 +-
>>  lib/librte_eal/common/eal_common_log.c |   8 +-
>>  lib/librte_mempool/rte_mempool.c   |  76 -
>>  lib/librte_mempool/rte_mempool.h   | 291 
>> +
>>  5 files changed, 275 insertions(+), 204 deletions(-)
>>
>>
>> diff --git a/lib/librte_mempool/rte_mempool.c 
>> b/lib/librte_mempool/rte_mempool.c
>> index 73ca770..4d977c1 100644
>> --- a/lib/librte_mempool/rte_mempool.c
>> +++ b/lib/librte_mempool/rte_mempool.c
>> @@ -375,6 +375,63 @@ rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, 
>> size_t elt_sz,
>>   return usz;
>>  }
>>
>> +static void
>> +mempool_cache_init(struct rte_mempool_cache *cache, uint32_t size)
>> +{
>> + cache->size = size;
>> + cache->flushthresh = CALC_CACHE_FLUSHTHRESH(size);
>> + cache->len = 0;
>> +}
>> +
>> +/*
>> + * Create and initialize a cache for objects that are retrieved from and
>> + * returned to an underlying mempool. This structure is identical to the
>> + * local_cache[lcore_id] pointed to by the mempool structure.
>> + */
>> +struct rte_mempool_cache *
>> +rte_mempool_cache_create(uint32_t size, int socket_id)
>> +{
>> + struct rte_mempool_cache *cache;

[dpdk-dev] [PATCH v2 1/2] mempool: allow for user-owned mempool caches

2016-04-05 Thread Lazaros Koromilas
Hi all,

I forgot to mention that this series applies on top of:

http://www.dpdk.org/dev/patchwork/patch/10492/

Thanks,
Lazaros.

On Mon, Apr 4, 2016 at 6:43 PM, Lazaros Koromilas  
wrote:
> The mempool cache is only available to EAL threads as a per-lcore
> resource. Change this so that the user can create and provide their own
> cache on mempool get and put operations. This works with non-EAL threads
> too. This commit introduces the new API calls:
>
> rte_mempool_cache_create(size, socket_id)
> rte_mempool_cache_flush(cache, mp)
> rte_mempool_cache_free(cache)
> rte_mempool_default_cache(mp, lcore_id)
> rte_mempool_generic_put(mp, obj_table, n, cache, is_mp)
> rte_mempool_generic_get(mp, obj_table, n, cache, is_mc)
>
> Removes the API calls:
>
> rte_mempool_sp_put_bulk(mp, obj_table, n)
> rte_mempool_sc_get_bulk(mp, obj_table, n)
> rte_mempool_sp_put(mp, obj)
> rte_mempool_sc_get(mp, obj)
>
> And the remaining API calls use the per-lcore default local cache:
>
> rte_mempool_put_bulk(mp, obj_table, n)
> rte_mempool_get_bulk(mp, obj_table, n)
> rte_mempool_put(mp, obj)
> rte_mempool_get(mp, obj)
>
> Signed-off-by: Lazaros Koromilas 
> ---
>  app/test/test_mempool.c|  58 +--
>  app/test/test_mempool_perf.c   |  46 +-
>  lib/librte_eal/common/eal_common_log.c |   8 +-
>  lib/librte_mempool/rte_mempool.c   |  76 -
>  lib/librte_mempool/rte_mempool.h   | 291 
> +
>  5 files changed, 275 insertions(+), 204 deletions(-)
>
> diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
> index 10e1fa4..2dc0cf2 100644
> --- a/app/test/test_mempool.c
> +++ b/app/test/test_mempool.c
> @@ -79,6 +79,7 @@
>
>  static struct rte_mempool *mp;
>  static struct rte_mempool *mp_cache, *mp_nocache;
> +static int use_external_cache;
>
>  static rte_atomic32_t synchro;
>
> @@ -107,19 +108,33 @@ test_mempool_basic(void)
> char *obj_data;
> int ret = 0;
> unsigned i, j;
> +   struct rte_mempool_cache *cache;
> +
> +   if (use_external_cache)
> +   /* Create a user-owned mempool cache. */
> +   cache = rte_mempool_cache_create(RTE_MEMPOOL_CACHE_MAX_SIZE,
> +SOCKET_ID_ANY);
> +   else
> +   cache = rte_mempool_default_cache(mp, rte_lcore_id());
>
> /* dump the mempool status */
> rte_mempool_dump(stdout, mp);
>
> printf("get an object\n");
> -   if (rte_mempool_get(mp, &obj) < 0)
> +   if (rte_mempool_generic_get(mp, &obj, 1, cache, 1) < 0)
> return -1;
> rte_mempool_dump(stdout, mp);
>
> /* tests that improve coverage */
> printf("get object count\n");
> -   if (rte_mempool_count(mp) != MEMPOOL_SIZE - 1)
> -   return -1;
> +   if (use_external_cache) {
> +   /* We have to count the extra caches, one in this case. */
> +   if (rte_mempool_count(mp) + cache->len != MEMPOOL_SIZE - 1)
> +   return -1;
> +   } else {
> +   if (rte_mempool_count(mp) != MEMPOOL_SIZE - 1)
> +   return -1;
> +   }
>
> printf("get private data\n");
> if (rte_mempool_get_priv(mp) != (char *)mp +
> @@ -134,21 +149,21 @@ test_mempool_basic(void)
> return -1;
>
> printf("put the object back\n");
> -   rte_mempool_put(mp, obj);
> +   rte_mempool_generic_put(mp, &obj, 1, cache, 1);
> rte_mempool_dump(stdout, mp);
>
> printf("get 2 objects\n");
> -   if (rte_mempool_get(mp, &obj) < 0)
> +   if (rte_mempool_generic_get(mp, &obj, 1, cache, 1) < 0)
> return -1;
> -   if (rte_mempool_get(mp, &obj2) < 0) {
> -   rte_mempool_put(mp, obj);
> +   if (rte_mempool_generic_get(mp, &obj2, 1, cache, 1) < 0) {
> +   rte_mempool_generic_put(mp, &obj, 1, cache, 1);
> return -1;
> }
> rte_mempool_dump(stdout, mp);
>
> printf("put the objects back\n");
> -   rte_mempool_put(mp, obj);
> -   rte_mempool_put(mp, obj2);
> +   rte_mempool_generic_put(mp, &obj, 1, cache, 1);
> +   rte_mempool_generic_put(mp, &obj2, 1, cache, 1);
> rte_mempool_dump(stdout, mp);
>
> /*
> @@ -161,7 +176,7 @@ test_mempool_basic(void)
> }
>
> for (i=0; i -   if (rte_mempool_get(mp, &objtable[i]) 

[dpdk-dev] [PATCH] doc: announce ABI changes for user-owned mempool caches

2016-04-05 Thread Lazaros Koromilas
Deprecation notice for 16.04 for changes targeting release 16.07.
The changes affect struct rte_mempool, rte_mempool_cache and the
mempool API.

Signed-off-by: Lazaros Koromilas 
---
 doc/guides/rel_notes/deprecation.rst | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index ad31355..6ccabcb 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -40,3 +40,10 @@ Deprecation Notices
   The existing API will be backward compatible, but there will be new API
   functions added to facilitate the creation of mempools using an external
   handler. The 16.07 release will contain these changes.
+
+* ABI change for rte_mempool struct to move the cache-related fields
+  to the more appropriate rte_mempool_cache struct. The mempool API is
+  also changed to enable external cache management that is not tied to EAL
+  threads. Some mempool get and put calls are removed in favor of a more
+  compact API. The ones that remain are backwards compatible and use the
+  per-lcore default cache if available. This change targets release 16.07.
-- 
1.9.1



[dpdk-dev] [PATCH v2 2/2] mempool: use bit flags instead of is_mp and is_mc

2016-04-04 Thread Lazaros Koromilas
Pass the same flags as in rte_mempool_create().

Signed-off-by: Lazaros Koromilas 
---
 app/test/test_mempool.c  | 18 +--
 app/test/test_mempool_perf.c |  4 +--
 lib/librte_mempool/rte_mempool.h | 66 +---
 3 files changed, 45 insertions(+), 43 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 2dc0cf2..445f450 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -121,7 +121,7 @@ test_mempool_basic(void)
rte_mempool_dump(stdout, mp);

printf("get an object\n");
-   if (rte_mempool_generic_get(mp, &obj, 1, cache, 1) < 0)
+   if (rte_mempool_generic_get(mp, &obj, 1, cache, 0) < 0)
return -1;
rte_mempool_dump(stdout, mp);

@@ -149,21 +149,21 @@ test_mempool_basic(void)
return -1;

printf("put the object back\n");
-   rte_mempool_generic_put(mp, &obj, 1, cache, 1);
+   rte_mempool_generic_put(mp, &obj, 1, cache, 0);
rte_mempool_dump(stdout, mp);

printf("get 2 objects\n");
-   if (rte_mempool_generic_get(mp, &obj, 1, cache, 1) < 0)
+   if (rte_mempool_generic_get(mp, &obj, 1, cache, 0) < 0)
return -1;
-   if (rte_mempool_generic_get(mp, &obj2, 1, cache, 1) < 0) {
-   rte_mempool_generic_put(mp, &obj, 1, cache, 1);
+   if (rte_mempool_generic_get(mp, &obj2, 1, cache, 0) < 0) {
+   rte_mempool_generic_put(mp, &obj, 1, cache, 0);
return -1;
}
rte_mempool_dump(stdout, mp);

printf("put the objects back\n");
-   rte_mempool_generic_put(mp, &obj, 1, cache, 1);
-   rte_mempool_generic_put(mp, &obj2, 1, cache, 1);
+   rte_mempool_generic_put(mp, &obj, 1, cache, 0);
+   rte_mempool_generic_put(mp, &obj2, 1, cache, 0);
rte_mempool_dump(stdout, mp);

/*
@@ -176,7 +176,7 @@ test_mempool_basic(void)
}

for (i=0; iring);
@@ -187,7 +187,7 @@ per_lcore_mempool_test(__attribute__((unused)) void *arg)
while (idx < n_keep) {
rte_mempool_generic_put(mp, &obj_table[idx],
n_put_bulk,
-   cache, 1);
+   cache, 0);
idx += n_put_bulk;
}
}
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 21d43e2..fe4fed9 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -812,12 +812,13 @@ rte_mempool_default_cache(struct rte_mempool *mp, 
unsigned lcore_id)
  *   positive.
  * @param cache
  *   A pointer to a mempool cache structure. May be NULL if not needed.
- * @param is_mp
- *   Mono-producer (0) or multi-producers (1).
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
  */
 static inline void __attribute__((always_inline))
 __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
- unsigned n, struct rte_mempool_cache *cache, int is_mp)
+ unsigned n, struct rte_mempool_cache *cache, int flags)
 {
uint32_t index;
void **cache_objs;
@@ -826,7 +827,8 @@ __mempool_generic_put(struct rte_mempool *mp, void * const 
*obj_table,
__MEMPOOL_STAT_ADD(mp, put, n);

/* No cache provided or cache is not enabled or single producer */
-   if (unlikely(cache == NULL || cache->size == 0 || is_mp == 0))
+   if (unlikely(cache == NULL || cache->size == 0 ||
+flags & MEMPOOL_F_SP_PUT))
goto ring_enqueue;

/* Go straight to ring if put would overflow mem allocated for cache */
@@ -860,19 +862,18 @@ ring_enqueue:

/* push remaining objects in ring */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-   if (is_mp) {
-   if (rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n) < 0)
-   rte_panic("cannot put objects in mempool\n");
-   }
-   else {
+   if (flags & MEMPOOL_F_SP_PUT) {
if (rte_ring_sp_enqueue_bulk(mp->ring, obj_table, n) < 0)
rte_panic("cannot put objects in mempool\n");
+   } else {
+   if (rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n) < 0)
+   rte_panic("cannot put objects in mempool\n");
}
 #else
-   if (is_mp)
-   rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n);
-   else
+   if (flags & MEMPOOL_F_SP_PUT)
rte_ring_sp_enqueue_bulk(mp->ring, obj_table, n);
+   else
+   rte_ring_

[dpdk-dev] [PATCH v2 1/2] mempool: allow for user-owned mempool caches

2016-04-04 Thread Lazaros Koromilas
The mempool cache is only available to EAL threads as a per-lcore
resource. Change this so that the user can create and provide their own
cache on mempool get and put operations. This works with non-EAL threads
too. This commit introduces the new API calls:

rte_mempool_cache_create(size, socket_id)
rte_mempool_cache_flush(cache, mp)
rte_mempool_cache_free(cache)
rte_mempool_default_cache(mp, lcore_id)
rte_mempool_generic_put(mp, obj_table, n, cache, is_mp)
rte_mempool_generic_get(mp, obj_table, n, cache, is_mc)

Removes the API calls:

rte_mempool_sp_put_bulk(mp, obj_table, n)
rte_mempool_sc_get_bulk(mp, obj_table, n)
rte_mempool_sp_put(mp, obj)
rte_mempool_sc_get(mp, obj)

And the remaining API calls use the per-lcore default local cache:

rte_mempool_put_bulk(mp, obj_table, n)
rte_mempool_get_bulk(mp, obj_table, n)
rte_mempool_put(mp, obj)
rte_mempool_get(mp, obj)

Signed-off-by: Lazaros Koromilas 
---
 app/test/test_mempool.c|  58 +--
 app/test/test_mempool_perf.c   |  46 +-
 lib/librte_eal/common/eal_common_log.c |   8 +-
 lib/librte_mempool/rte_mempool.c   |  76 -
 lib/librte_mempool/rte_mempool.h   | 291 +
 5 files changed, 275 insertions(+), 204 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 10e1fa4..2dc0cf2 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -79,6 +79,7 @@

 static struct rte_mempool *mp;
 static struct rte_mempool *mp_cache, *mp_nocache;
+static int use_external_cache;

 static rte_atomic32_t synchro;

@@ -107,19 +108,33 @@ test_mempool_basic(void)
char *obj_data;
int ret = 0;
unsigned i, j;
+   struct rte_mempool_cache *cache;
+
+   if (use_external_cache)
+   /* Create a user-owned mempool cache. */
+   cache = rte_mempool_cache_create(RTE_MEMPOOL_CACHE_MAX_SIZE,
+SOCKET_ID_ANY);
+   else
+   cache = rte_mempool_default_cache(mp, rte_lcore_id());

/* dump the mempool status */
rte_mempool_dump(stdout, mp);

printf("get an object\n");
-   if (rte_mempool_get(mp, &obj) < 0)
+   if (rte_mempool_generic_get(mp, &obj, 1, cache, 1) < 0)
return -1;
rte_mempool_dump(stdout, mp);

/* tests that improve coverage */
printf("get object count\n");
-   if (rte_mempool_count(mp) != MEMPOOL_SIZE - 1)
-   return -1;
+   if (use_external_cache) {
+   /* We have to count the extra caches, one in this case. */
+   if (rte_mempool_count(mp) + cache->len != MEMPOOL_SIZE - 1)
+   return -1;
+   } else {
+   if (rte_mempool_count(mp) != MEMPOOL_SIZE - 1)
+   return -1;
+   }

printf("get private data\n");
if (rte_mempool_get_priv(mp) != (char *)mp +
@@ -134,21 +149,21 @@ test_mempool_basic(void)
return -1;

printf("put the object back\n");
-   rte_mempool_put(mp, obj);
+   rte_mempool_generic_put(mp, &obj, 1, cache, 1);
rte_mempool_dump(stdout, mp);

printf("get 2 objects\n");
-   if (rte_mempool_get(mp, &obj) < 0)
+   if (rte_mempool_generic_get(mp, &obj, 1, cache, 1) < 0)
return -1;
-   if (rte_mempool_get(mp, &obj2) < 0) {
-   rte_mempool_put(mp, obj);
+   if (rte_mempool_generic_get(mp, &obj2, 1, cache, 1) < 0) {
+   rte_mempool_generic_put(mp, &obj, 1, cache, 1);
return -1;
}
rte_mempool_dump(stdout, mp);

printf("put the objects back\n");
-   rte_mempool_put(mp, obj);
-   rte_mempool_put(mp, obj2);
+   rte_mempool_generic_put(mp, &obj, 1, cache, 1);
+   rte_mempool_generic_put(mp, &obj2, 1, cache, 1);
rte_mempool_dump(stdout, mp);

/*
@@ -161,7 +176,7 @@ test_mempool_basic(void)
}

for (i=0; i= MAX_KEEP)
continue;
-   if (rte_mempool_sc_get(mp_spsc, &obj) < 0)
+   if (rte_mempool_get(mp_spsc, &obj) < 0)
break;
rte_spinlock_lock(&scsp_spinlock);
scsp_obj_table[i] = obj;
@@ -371,12 +391,12 @@ test_mempool_basic_ex(struct rte_mempool * mp)
}

for (i = 0; i < MEMPOOL_SIZE; i ++) {
-   if (rte_mempool_mc_get(mp, &obj[i]) < 0) {
+   if (rte_mempool_get(mp, &obj[i]) < 0) {
printf("fail_mp_basic_ex fail to get mempool object for 
[%u]\n", i);
goto fail_mp_basic_ex;
}
}
-   if (rte_mempool_mc_get(mp, &err_obj) == 0) {
+   

[dpdk-dev] [PATCH v2] ring: check for zero objects mc dequeue / mp enqueue

2016-03-29 Thread Lazaros Koromilas
On Tue, Mar 29, 2016 at 7:04 PM, Bruce Richardson
 wrote:
>
> On Tue, Mar 29, 2016 at 05:29:12PM +0200, Olivier MATZ wrote:
> > Hi,
> >
> >
> > On 03/29/2016 10:54 AM, Bruce Richardson wrote:
> > >On Mon, Mar 28, 2016 at 06:48:07PM +0300, Lazaros Koromilas wrote:
> > >>Hi Olivier,
> > >>
> > >>We could have two threads (running on different cores in the general
> > >>case) that both succeed the cmpset operation. In the dequeue path,
> > >>when n == 0, then cons_next == cons_head, and cmpset will always
> > >>succeed. Now, if they both see an old r->cons.tail value from a
> > >>previous dequeue, they can get stuck in the while
> > >
> > >Hi,
> > >
> > >I don't see how threads reading an "old r->cons.tail" value is even 
> > >possible.
> > >The head and tail pointers on the ring are marked in the code as volatile, 
> > >so
> > >all reads and writes to those values are always done from memory and not 
> > >cached
> > >in registers. No deadlock should be possible on that while loop, unless a
> > >process crashes in the middle of a ring operation. Each thread which 
> > >updates
> > >the head pointer from x to y, is responsible for updating the tail pointer 
> > >in
> > >a similar manner. The loop ensures the tail updates are in the same order 
> > >as the
> > >head updates.
> > >
> > >If you believe deadlock is possible, can you outline the sequence of 
> > >operations
> > >which would lead to such a state, because I cannot see how it could occur 
> > >without
> > >a crash inside one of the threads.
> >
> > I think the deadlock Lazaros describes could occur in the following
> > condition:
> >
> > current ring state
> >  r->prod.head = 0
> >  r->prod.tail = 0
> >
> > core 0   core 1
> > 
> > enqueue 0 object
> >  cmpset(&r->prod.head, 0, 0)
> >  core 0 is interrupted here
> >  enqueue 1 object
> >   cmpset(&r->prod.head, 0, 1)
> >   copy the objects in box 0
> >   while (r->prod.tail != prod_head))
> >   r->prod.tail = prod_next
> >  copy 0 object (-> nothing to do)
> >  while (r->prod.tail != prod_head))
> > 
> >
> >
> > I think this issue is indeed fixed by Lazaros' patch (I missed it
> > in previous review). However, I don't think this deadlock could
> > happen once we avoided the (n == 0) case.
> >
> Yes, I agree with your scenario. However, I thought the issue was occuring 
> with
> non-zero updates, but I may well be mistaken.
> If it's fixed now, all good... :-)
>
> /Bruce

Hi all,

Bruce, I thought it could be still possible because of my threads
being stuck inside two dequeue(32) calls.  But haven't been able to
reproduce it with non-zero dequeues.  And by trying to find a scenario
of my own, it seems that at least one dequeue(0) is needed, similarly
to Olivier's example on the enqueue path.

Thanks,
Lazaros.


[dpdk-dev] [PATCH v2] ring: check for zero objects mc dequeue / mp enqueue

2016-03-28 Thread Lazaros Koromilas
Hi Olivier,

We could have two threads (running on different cores in the general
case) that both succeed the cmpset operation. In the dequeue path,
when n == 0, then cons_next == cons_head, and cmpset will always
succeed. Now, if they both see an old r->cons.tail value from a
previous dequeue, they can get stuck in the while
(unlikely(r->cons.tail != cons_head)) loop. I tried, however, to
reproduce (without the patch) and it seems that there is still a
window for deadlock.

I'm pasting some debug output below that shows two processes' state.
It's two receivers doing interleaved mc_dequeue(32)/mc_dequeue(0), and
one sender doing mp_enqueue(32) on the same ring.

gdb --args ./ring-test -l0 --proc-type=primary
gdb --args ./ring-test -l1 --proc-type=secondary
gdb --args ./ring-test -l2 --proc-type=secondary -- tx

This is what I would usually see, process 0 and 1 both stuck at the same state:

663 while (unlikely(r->cons.tail != cons_head)) {
(gdb) p n
$1 = 0
(gdb) p r->cons.tail
$2 = 576416
(gdb) p cons_head
$3 = 576448
(gdb) p cons_next
$4 = 576448

But now I managed to get the two processes stuck at this state too.

process 0:
663 while (unlikely(r->cons.tail != cons_head)) {
(gdb) p n
$1 = 32
(gdb) p r->cons.tail
$2 = 254348832
(gdb) p cons_head
$3 = 254348864
(gdb) p cons_next
$4 = 254348896

proccess 1:
663 while (unlikely(r->cons.tail != cons_head)) {
(gdb) p n
$1 = 32
(gdb) p r->cons.tail
$2 = 254348832
(gdb) p cons_head
$3 = 254348896
(gdb) p cons_next
$4 = 254348928

I haven't been able to trigger this with the patch so far, but it
should be possible.

Lazaros.

On Fri, Mar 25, 2016 at 1:15 PM, Olivier Matz  wrote:
> Hi Lazaros,
>
> On 03/17/2016 04:49 PM, Lazaros Koromilas wrote:
>> Issuing a zero objects dequeue with a single consumer has no effect.
>> Doing so with multiple consumers, can get more than one thread to succeed
>> the compare-and-set operation and observe starvation or even deadlock in
>> the while loop that checks for preceding dequeues.  The problematic piece
>> of code when n = 0:
>>
>> cons_next = cons_head + n;
>> success = rte_atomic32_cmpset(&r->cons.head, cons_head, cons_next);
>>
>> The same is possible on the enqueue path.
>
> Just a question about this patch (that has been applied). Thomas
> retitled the commit from your log message:
>
>   ring: fix deadlock in zero object multi enqueue or dequeue
>   http://dpdk.org/browse/dpdk/commit/?id=d0979646166e
>
> I think this patch does not fix a deadlock, or did I miss something?
>
> As explained in the following links, the ring may not perform well
> if several threads running on the same cpu use it:
>
>   http://dpdk.org/ml/archives/dev/2013-November/000714.html
>   http://www.dpdk.org/ml/archives/dev/2014-January/001070.html
>   http://www.dpdk.org/ml/archives/dev/2014-January/001162.html
>   http://dpdk.org/ml/archives/dev/2015-July/020659.html
>
> A deadlock could occur if the threads running on the same core
> have different priority.
>
> Regards,
> Olivier


[dpdk-dev] [PATCH] mempool: allow for user-owned mempool caches

2016-03-24 Thread Lazaros Koromilas
On Mon, Mar 21, 2016 at 3:49 PM, Wiles, Keith  wrote:
>>Hi Lazaros,
>>
>>Thanks for this patch. To me, this is a valuable enhancement.
>>Please find some comments inline.
>>
>>On 03/10/2016 03:44 PM, Lazaros Koromilas wrote:
>>> The mempool cache is only available to EAL threads as a per-lcore
>>> resource. Change this so that the user can create and provide their own
>>> cache on mempool get and put operations. This works with non-EAL threads
>>> too. This commit introduces new API calls with the 'with_cache' suffix,
>>> while the current ones default to the per-lcore local cache.
>>>
>>> Signed-off-by: Lazaros Koromilas 
>>> ---
>>>  lib/librte_mempool/rte_mempool.c |  65 +-
>>>  lib/librte_mempool/rte_mempool.h | 442 
>>> ---
>>>  2 files changed, 467 insertions(+), 40 deletions(-)
>>>
>>> diff --git a/lib/librte_mempool/rte_mempool.c 
>>> b/lib/librte_mempool/rte_mempool.c
>>> index f8781e1..cebc2b7 100644
>>> --- a/lib/librte_mempool/rte_mempool.c
>>> +++ b/lib/librte_mempool/rte_mempool.c
>>> @@ -375,6 +375,43 @@ rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, 
>>> size_t elt_sz,
>>>  return usz;
>>>  }
>>>
>>> +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>
>>I wonder if this wouldn't cause a conflict with Keith's patch
>>that removes some #ifdefs RTE_MEMPOOL_CACHE_MAX_SIZE.
>>See: http://www.dpdk.org/dev/patchwork/patch/10492/
>
> Hi Lazaros,
>
> The patch I submitted keeps the mempool cache structure (pointers and 
> variables) and only allocates the cache if specified by the caller to use a 
> cache. This means to me the caller could fill in the cache pointer and values 
> into the mempool structure to get a cache without a lot of extra code. If we 
> added a set of APIs to fill in these structure variables would that not give 
> you the external cache support. I have not really looked at the patch to 
> verify this will work, but it sure seems like it.
>
> So my suggestion the caller can just create a mempool without a cache and 
> then call a set of APIs to fill in his cache values, does that not work?
>
> If we can do this it reduces the API and possible the ABI changes to mempool 
> as the new cache create routines and APIs could be in a new file I think, 
> which just updates the mempool structure correctly.

Hi Keith,

The main benefit of having an external cache is to allow mempool users
(threads) to maintain a local cache even though they don't have a
valid lcore_id (non-EAL threads). The fact that cache access is done
by indexing with the lcore_id is what makes it difficult...

What could happen is only have external caches somehow, but that hurts
the common case where you want an automatic cache.
Or a cache registration mechanism (overkill?).

So, I'm going to work on the comments and send out a v2 asap. Thanks everyone!

Lazaros.

>
>>
>>As this patch is already acked for 16.07, I think that your v2
>>could be rebased on top of it to avoid conflicts when Thomas will apply
>>it.
>>
>>By the way, I also encourage you to have a look at other works in
>>progress in mempool:
>>http://www.dpdk.org/ml/archives/dev/2016-March/035107.html
>>http://www.dpdk.org/ml/archives/dev/2016-March/035201.html
>>
>>
>
> Regards,
> Keith
>
>
>
>


[dpdk-dev] [PATCH] mempool: allow for user-owned mempool caches

2016-03-24 Thread Lazaros Koromilas
Hi Konstantin,

On Mon, Mar 21, 2016 at 3:15 PM, Ananyev, Konstantin
 wrote:
>
> Hi lads,
>
>>
>> Hi Lazaros,
>>
>> Thanks for this patch. To me, this is a valuable enhancement.
>> Please find some comments inline.
>
> Yep, patch seems interesting.
> One question - what would be the usage model for get/put_with_cache functions 
> for non-EAL threads?
> I meant for each non-EAL thread user will need to maintain an array (or list 
> or some other structure)
> of pair  to make sure that the cache always 
> matches the mempool,
> correct?

Yes, the user would have to also keep track of the caches. On the use
case, the original idea was to enable caches for threads that run on
the same core. There are cases that this can work, as described in the
programming guide, section on multiple pthreads. Even with two EAL
threads, we currently cannot have multiple caches on a single core,
unless we use the --lcores long option to separate lcore id (which
really is a thread id here) from the actual core affinity.

So the cache should be a member of the thread and not the mempool, in
my opinion. It has to be consistently used with the same mempool
though.

> Again, for non-EAL threads don't we need some sort of invalidate_cache()
> that would put all mbufs in the cache back to the pool?
> For thread termination case or so?

I think also EAL threads benefit from an invalidate() function call.
This can be part of free() that Olivier proposed.

Thanks!
Lazaros.

>
>>
>> On 03/10/2016 03:44 PM, Lazaros Koromilas wrote:
>> > The mempool cache is only available to EAL threads as a per-lcore
>> > resource. Change this so that the user can create and provide their own
>> > cache on mempool get and put operations. This works with non-EAL threads
>> > too. This commit introduces new API calls with the 'with_cache' suffix,
>> > while the current ones default to the per-lcore local cache.
>> >
>> > Signed-off-by: Lazaros Koromilas 
>> > ---
>> >  lib/librte_mempool/rte_mempool.c |  65 +-
>> >  lib/librte_mempool/rte_mempool.h | 442 
>> > ---
>> >  2 files changed, 467 insertions(+), 40 deletions(-)
>> >
>> > diff --git a/lib/librte_mempool/rte_mempool.c 
>> > b/lib/librte_mempool/rte_mempool.c
>> > index f8781e1..cebc2b7 100644
>> > --- a/lib/librte_mempool/rte_mempool.c
>> > +++ b/lib/librte_mempool/rte_mempool.c
>> > @@ -375,6 +375,43 @@ rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, 
>> > size_t elt_sz,
>> > return usz;
>> >  }
>> >
>> > +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>
>> I wonder if this wouldn't cause a conflict with Keith's patch
>> that removes some #ifdefs RTE_MEMPOOL_CACHE_MAX_SIZE.
>> See: http://www.dpdk.org/dev/patchwork/patch/10492/
>>
>> As this patch is already acked for 16.07, I think that your v2
>> could be rebased on top of it to avoid conflicts when Thomas will apply
>> it.
>>
>> By the way, I also encourage you to have a look at other works in
>> progress in mempool:
>> http://www.dpdk.org/ml/archives/dev/2016-March/035107.html
>> http://www.dpdk.org/ml/archives/dev/2016-March/035201.html
>>
>>
>> > +static void
>> > +mempool_cache_init(struct rte_mempool_cache *cache, uint32_t size)
>> > +{
>> > +   cache->size = size;
>> > +   cache->flushthresh = CALC_CACHE_FLUSHTHRESH(size);
>> > +   cache->len = 0;
>> > +}
>> > +
>> > +/*
>> > + * Creates and initializes a cache for objects that are retrieved from and
>> > + * returned to an underlying mempool. This structure is identical to the
>> > + * structure included inside struct rte_mempool.
>> > + */
>>
>> On top of Keith's patch, this comment may be reworked as the cache
>> structure is not included in the mempool structure anymore.
>>
>> nit: I think the imperative form is preferred
>>
>>
>> > +struct rte_mempool_cache *
>> > +rte_mempool_cache_create(uint32_t size)
>
> I suppose extra socket_id parameter is needed, so you can call
> zmalloc_socket() beneath.
>
>> > +{
>> > +   struct rte_mempool_cache *cache;
>> > +
>> > +   if (size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
>> > +   rte_errno = EINVAL;
>> > +   return NULL;
>> > +   }
>> > +
>> > +   cache = rte_zmalloc("MEMPOOL_CACHE", sizeof(*cache), 
>> > RTE_CACHE_LINE_SIZE);
>> > +   if (cache == NULL) {
>>

[dpdk-dev] [PATCH v2] ring: check for zero objects mc dequeue / mp enqueue

2016-03-17 Thread Lazaros Koromilas
Issuing a zero objects dequeue with a single consumer has no effect.
Doing so with multiple consumers, can get more than one thread to succeed
the compare-and-set operation and observe starvation or even deadlock in
the while loop that checks for preceding dequeues.  The problematic piece
of code when n = 0:

cons_next = cons_head + n;
success = rte_atomic32_cmpset(&r->cons.head, cons_head, cons_next);

The same is possible on the enqueue path.

Signed-off-by: Lazaros Koromilas 
---
 lib/librte_ring/rte_ring.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 943c97c..eb45e41 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -431,6 +431,11 @@ __rte_ring_mp_do_enqueue(struct rte_ring *r, void * const 
*obj_table,
uint32_t mask = r->prod.mask;
int ret;

+   /* Avoid the unnecessary cmpset operation below, which is also
+* potentially harmful when n equals 0. */
+   if (n == 0)
+   return 0;
+
/* move prod.head atomically */
do {
/* Reset n to the initial burst count */
@@ -618,6 +623,11 @@ __rte_ring_mc_do_dequeue(struct rte_ring *r, void 
**obj_table,
unsigned i, rep = 0;
uint32_t mask = r->prod.mask;

+   /* Avoid the unnecessary cmpset operation below, which is also
+* potentially harmful when n equals 0. */
+   if (n == 0)
+   return 0;
+
/* move cons.head atomically */
do {
/* Restore n as it may change every loop */
-- 
1.9.1



[dpdk-dev] [PATCH] ring: assert on zero objects dequeue/enqueue

2016-03-17 Thread Lazaros Koromilas
Sure, I'm sending it again with your suggestions.

Lazaros.

On Wed, Mar 16, 2016 at 1:21 PM, Bruce Richardson
 wrote:
> On Tue, Mar 15, 2016 at 06:58:45PM +0200, Lazaros Koromilas wrote:
>> Issuing a zero objects dequeue with a single consumer has no effect.
>> Doing so with multiple consumers, can get more than one thread to succeed
>> the compare-and-set operation and observe starvation or even deadlock in
>> the while loop that checks for preceding dequeues.  The problematic piece
>> of code when n = 0:
>>
>> cons_next = cons_head + n;
>> success = rte_atomic32_cmpset(&r->cons.head, cons_head, cons_next);
>>
>> The same is possible on the enqueue path.
>>
>> Signed-off-by: Lazaros Koromilas 
>
> I'm not sure how serious a problem this really is, and I really suspect that
> just calling rte_panic is rather an overreaction here. At worst, this should
> be a check only when RTE_RING_DEBUG is on.
>
> However, probably my preferred solution to this issue would be to just add
>if (n == 0)
>return 0
>
> to the mp and mc enqueue/dequeue functions. That way there is no performance
> penalty for the higher-performing sp/sc paths, and you avoid and unnecessary
> cmpset operations for the mp/mc cases.
>
> /Bruce
>
>> ---
>>  lib/librte_ring/rte_ring.h | 26 ++
>>  1 file changed, 26 insertions(+)
>>
>> diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
>> index 943c97c..2bf9ce3 100644
>> --- a/lib/librte_ring/rte_ring.h
>> +++ b/lib/librte_ring/rte_ring.h
>> @@ -100,6 +100,7 @@ extern "C" {
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>
>>  #define RTE_TAILQ_RING_NAME "RTE_RING"
>>
>> @@ -211,6 +212,19 @@ struct rte_ring {
>>  #endif
>>
>>  /**
>> + * @internal Assert macro.
>> + * @param exp
>> + *   The expression to evaluate.
>> + */
>> +#define RTE_RING_ASSERT(exp) do { \
>> + if (!(exp)) { \
>> + rte_panic("line%d\t"  \
>> +   "assert \"" #exp "\" failed\n", \
>> +   __LINE__);  \
>> + } \
>> + } while (0)
>> +
>> +/**
>>   * Calculate the memory size needed for a ring
>>   *
>>   * This function returns the number of bytes needed for a ring, given
>> @@ -406,6 +420,7 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r);
>>   *   A pointer to a table of void * pointers (objects).
>>   * @param n
>>   *   The number of objects to add in the ring from the obj_table.
>> + *   Must be greater than zero.
>>   * @param behavior
>>   *   RTE_RING_QUEUE_FIXED:Enqueue a fixed number of items from a ring
>>   *   RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring
>> @@ -431,6 +446,8 @@ __rte_ring_mp_do_enqueue(struct rte_ring *r, void * 
>> const *obj_table,
>>   uint32_t mask = r->prod.mask;
>>   int ret;
>>
>> + RTE_RING_ASSERT(n > 0);
>> +
>>   /* move prod.head atomically */
>>   do {
>>   /* Reset n to the initial burst count */
>> @@ -510,6 +527,7 @@ __rte_ring_mp_do_enqueue(struct rte_ring *r, void * 
>> const *obj_table,
>>   *   A pointer to a table of void * pointers (objects).
>>   * @param n
>>   *   The number of objects to add in the ring from the obj_table.
>> + *   Must be greater than zero.
>>   * @param behavior
>>   *   RTE_RING_QUEUE_FIXED:Enqueue a fixed number of items from a ring
>>   *   RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring
>> @@ -533,6 +551,8 @@ __rte_ring_sp_do_enqueue(struct rte_ring *r, void * 
>> const *obj_table,
>>   uint32_t mask = r->prod.mask;
>>   int ret;
>>
>> + RTE_RING_ASSERT(n > 0);
>> +
>>   prod_head = r->prod.head;
>>   cons_tail = r->cons.tail;
>>   /* The subtraction is done between two unsigned 32bits value
>> @@ -594,6 +614,7 @@ __rte_ring_sp_do_enqueue(struct rte_ring *r, void * 
>> const *obj_table,
>>   *   A pointer to a table of void * pointers (objects) that will be filled.
>>   * @param n
>>   *   The number of objects to dequeue from the ring to the obj_table.
>> + *   Must be greater than zero.
>>   * @param behavior
>>   *   RTE_R

[dpdk-dev] [PATCH] ring: assert on zero objects dequeue/enqueue

2016-03-15 Thread Lazaros Koromilas
Issuing a zero objects dequeue with a single consumer has no effect.
Doing so with multiple consumers, can get more than one thread to succeed
the compare-and-set operation and observe starvation or even deadlock in
the while loop that checks for preceding dequeues.  The problematic piece
of code when n = 0:

cons_next = cons_head + n;
success = rte_atomic32_cmpset(&r->cons.head, cons_head, cons_next);

The same is possible on the enqueue path.

Signed-off-by: Lazaros Koromilas 
---
 lib/librte_ring/rte_ring.h | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 943c97c..2bf9ce3 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -100,6 +100,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 

 #define RTE_TAILQ_RING_NAME "RTE_RING"

@@ -211,6 +212,19 @@ struct rte_ring {
 #endif

 /**
+ * @internal Assert macro.
+ * @param exp
+ *   The expression to evaluate.
+ */
+#define RTE_RING_ASSERT(exp) do { \
+   if (!(exp)) { \
+   rte_panic("line%d\t"  \
+ "assert \"" #exp "\" failed\n", \
+ __LINE__);  \
+   } \
+   } while (0)
+
+/**
  * Calculate the memory size needed for a ring
  *
  * This function returns the number of bytes needed for a ring, given
@@ -406,6 +420,7 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r);
  *   A pointer to a table of void * pointers (objects).
  * @param n
  *   The number of objects to add in the ring from the obj_table.
+ *   Must be greater than zero.
  * @param behavior
  *   RTE_RING_QUEUE_FIXED:Enqueue a fixed number of items from a ring
  *   RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring
@@ -431,6 +446,8 @@ __rte_ring_mp_do_enqueue(struct rte_ring *r, void * const 
*obj_table,
uint32_t mask = r->prod.mask;
int ret;

+   RTE_RING_ASSERT(n > 0);
+
/* move prod.head atomically */
do {
/* Reset n to the initial burst count */
@@ -510,6 +527,7 @@ __rte_ring_mp_do_enqueue(struct rte_ring *r, void * const 
*obj_table,
  *   A pointer to a table of void * pointers (objects).
  * @param n
  *   The number of objects to add in the ring from the obj_table.
+ *   Must be greater than zero.
  * @param behavior
  *   RTE_RING_QUEUE_FIXED:Enqueue a fixed number of items from a ring
  *   RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring
@@ -533,6 +551,8 @@ __rte_ring_sp_do_enqueue(struct rte_ring *r, void * const 
*obj_table,
uint32_t mask = r->prod.mask;
int ret;

+   RTE_RING_ASSERT(n > 0);
+
prod_head = r->prod.head;
cons_tail = r->cons.tail;
/* The subtraction is done between two unsigned 32bits value
@@ -594,6 +614,7 @@ __rte_ring_sp_do_enqueue(struct rte_ring *r, void * const 
*obj_table,
  *   A pointer to a table of void * pointers (objects) that will be filled.
  * @param n
  *   The number of objects to dequeue from the ring to the obj_table.
+ *   Must be greater than zero.
  * @param behavior
  *   RTE_RING_QUEUE_FIXED:Dequeue a fixed number of items from a ring
  *   RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring
@@ -618,6 +639,8 @@ __rte_ring_mc_do_dequeue(struct rte_ring *r, void 
**obj_table,
unsigned i, rep = 0;
uint32_t mask = r->prod.mask;

+   RTE_RING_ASSERT(n > 0);
+
/* move cons.head atomically */
do {
/* Restore n as it may change every loop */
@@ -689,6 +712,7 @@ __rte_ring_mc_do_dequeue(struct rte_ring *r, void 
**obj_table,
  *   A pointer to a table of void * pointers (objects) that will be filled.
  * @param n
  *   The number of objects to dequeue from the ring to the obj_table.
+ *   Must be greater than zero.
  * @param behavior
  *   RTE_RING_QUEUE_FIXED:Dequeue a fixed number of items from a ring
  *   RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring
@@ -710,6 +734,8 @@ __rte_ring_sc_do_dequeue(struct rte_ring *r, void 
**obj_table,
unsigned i;
uint32_t mask = r->prod.mask;

+   RTE_RING_ASSERT(n > 0);
+
cons_head = r->cons.head;
prod_tail = r->prod.tail;
/* The subtraction is done between two unsigned 32bits value
-- 
1.9.1



[dpdk-dev] [PATCH] mempool: allow for user-owned mempool caches

2016-03-10 Thread Lazaros Koromilas
The mempool cache is only available to EAL threads as a per-lcore
resource. Change this so that the user can create and provide their own
cache on mempool get and put operations. This works with non-EAL threads
too. This commit introduces new API calls with the 'with_cache' suffix,
while the current ones default to the per-lcore local cache.

Signed-off-by: Lazaros Koromilas 
---
 lib/librte_mempool/rte_mempool.c |  65 +-
 lib/librte_mempool/rte_mempool.h | 442 ---
 2 files changed, 467 insertions(+), 40 deletions(-)

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index f8781e1..cebc2b7 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -375,6 +375,43 @@ rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, 
size_t elt_sz,
return usz;
 }

+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+static void
+mempool_cache_init(struct rte_mempool_cache *cache, uint32_t size)
+{
+   cache->size = size;
+   cache->flushthresh = CALC_CACHE_FLUSHTHRESH(size);
+   cache->len = 0;
+}
+
+/*
+ * Creates and initializes a cache for objects that are retrieved from and
+ * returned to an underlying mempool. This structure is identical to the
+ * structure included inside struct rte_mempool.
+ */
+struct rte_mempool_cache *
+rte_mempool_cache_create(uint32_t size)
+{
+   struct rte_mempool_cache *cache;
+
+   if (size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
+   rte_errno = EINVAL;
+   return NULL;
+   }
+
+   cache = rte_zmalloc("MEMPOOL_CACHE", sizeof(*cache), 
RTE_CACHE_LINE_SIZE);
+   if (cache == NULL) {
+   RTE_LOG(ERR, MEMPOOL, "Cannot allocate mempool cache!\n");
+   rte_errno = ENOMEM;
+   return NULL;
+   }
+
+   mempool_cache_init(cache, size);
+
+   return cache;
+}
+#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+
 #ifndef RTE_LIBRTE_XEN_DOM0
 /* stub if DOM0 support not configured */
 struct rte_mempool *
@@ -587,10 +624,18 @@ rte_mempool_xmem_create(const char *name, unsigned n, 
unsigned elt_size,
mp->elt_size = objsz.elt_size;
mp->header_size = objsz.header_size;
mp->trailer_size = objsz.trailer_size;
-   mp->cache_size = cache_size;
-   mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
+   mp->cache_size = cache_size; /* Keep this for backwards compat. */
mp->private_data_size = private_data_size;

+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+   {
+   unsigned lcore_id;
+   for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+   mempool_cache_init(&mp->local_cache[lcore_id],
+  cache_size);
+   }
+#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+
/* calculate address of the first element for continuous mempool. */
obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
private_data_size;
@@ -648,8 +693,8 @@ rte_mempool_count(const struct rte_mempool *mp)

 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
{
-   unsigned lcore_id;
-   if (mp->cache_size == 0)
+   unsigned lcore_id = rte_lcore_id();
+   if (mp->local_cache[lcore_id].size == 0)
return count;

for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
@@ -673,13 +718,17 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool 
*mp)
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
unsigned lcore_id;
unsigned count = 0;
+   unsigned cache_size;
unsigned cache_count;

fprintf(f, "  cache infos:\n");
-   fprintf(f, "cache_size=%"PRIu32"\n", mp->cache_size);
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+   cache_size = mp->local_cache[lcore_id].size;
+   fprintf(f, "cache_size[%u]=%"PRIu32"\n",
+   lcore_id, cache_size);
cache_count = mp->local_cache[lcore_id].len;
-   fprintf(f, "cache_count[%u]=%u\n", lcore_id, cache_count);
+   fprintf(f, "cache_count[%u]=%"PRIu32"\n",
+   lcore_id, cache_count);
count += cache_count;
}
fprintf(f, "total_cache_count=%u\n", count);
@@ -761,7 +810,9 @@ mempool_audit_cache(const struct rte_mempool *mp)
/* check cache size consistency */
unsigned lcore_id;
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-   if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
+   const struct rte_mempool_cache *cache;
+   cache = &mp->local_cache[lcore_id];
+   if (cache->len