[Nouveau] [PATCH 2/3] drm/nouveau: add lockless dynamic semaphore allocator (v2)
Changes in v2: - Addressed review comments - Fixed lockless algorithm (must not dec if negative in addition to if 0) - Made spinlock irqsave (fences are completed in IRQs) This patch adds code to allocate semaphores in a dynamic way using a lockless algorithm. 1. Semaphore BOs Semaphore BOs are BOs containing semaphores. Each is 4KB large and contains 1024 4-byte semaphores. They are pinned and mapped. Semaphore BOs are allocated on-demand and freed at device takedown. Those that are not fully allocated are kept on a free list. Each is assigned an handle. DMA objects and references are created on demand for each channel that needs to use a semaphore BO. Those objects and references are automatically destroyed at channel destruction time. Typically only a single semaphore BO will be used. 2. Semaphore allocation Each semaphore BO contains a bitmask of free semaphores within the BO. Allocation is done in a lockless fashion using a count of free semaphores and the bitmask. Semaphores are released once the fence on the waiting side passed. This is done by adding fields to nouveau_fence. Semaphore values are zeroed when the semaphore BO is allocated, and are afterwards only modified by the GPU. This is performed by storing a bitmask that allows to alternate between using the values 0 and 1 for a given semaphore. Signed-off-by: Luca Barbieri l...@luca-barbieri.com --- drivers/gpu/drm/nouveau/nouveau_drv.h |9 + drivers/gpu/drm/nouveau/nouveau_fence.c | 265 +++ drivers/gpu/drm/nouveau/nouveau_state.c |4 + 3 files changed, 278 insertions(+), 0 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index bb9024c..93e5427 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -621,6 +621,13 @@ struct drm_nouveau_private { struct { struct dentry *channel_root; } debugfs; + + struct { + spinlock_t free_list_lock; + struct nouveau_sem_bo *free_list; + uint32_t handles; + uint32_t max_handles; + } sem; }; static inline struct drm_nouveau_private * @@ -1142,6 +1149,8 @@ extern int nouveau_fence_flush(void *obj, void *arg); extern void nouveau_fence_unref(void **obj); extern void *nouveau_fence_ref(void *obj); extern void nouveau_fence_handler(struct drm_device *dev, int channel); +extern void nouveau_fence_device_init(struct drm_device *dev); +extern void nouveau_fence_device_takedown(struct drm_device *dev); /* nouveau_gem.c */ extern int nouveau_gem_new(struct drm_device *, struct nouveau_channel *, diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 9b1c2c3..7157148 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -32,6 +32,13 @@ #define USE_REFCNT (dev_priv-card_type = NV_10) +#define NOUVEAU_SEM_BO_SIZE PAGE_SIZE + +/* reading fences can be very expensive + * use a threshold that would only use up half a single sem_bo + */ +#define NOUVEAU_SEM_MIN_THRESHOLD (NOUVEAU_SEM_BO_SIZE / (NOUVEAU_MAX_CHANNEL_NR * 2)) + struct nouveau_fence { struct nouveau_channel *channel; struct kref refcount; @@ -47,6 +54,240 @@ nouveau_fence(void *sync_obj) return (struct nouveau_fence *)sync_obj; } +struct nouveau_sem_bo { + struct nouveau_sem_bo *next; + struct nouveau_bo *bo; + uint32_t handle; + + /* = 0: num_free + 1 slots are free, sem_bo is or is about to be on free_list + -1: all allocated, sem_bo is NOT on free_list + */ + atomic_t num_free; + + DECLARE_BITMAP(free_slots, NOUVEAU_SEM_BO_SIZE / sizeof(uint32_t)); + DECLARE_BITMAP(values, NOUVEAU_SEM_BO_SIZE / sizeof(uint32_t)); + DECLARE_BITMAP(channels, NOUVEAU_MAX_CHANNEL_NR); +}; + +struct nouveau_sem { + struct nouveau_sem_bo *sem_bo; + unsigned num; + uint32_t value; +}; + +static struct nouveau_sem_bo* +nouveau_sem_bo_alloc(struct drm_device *dev) +{ + struct drm_nouveau_private *dev_priv = dev-dev_private; + struct nouveau_sem_bo *sem_bo; + struct nouveau_bo *bo; + int flags = TTM_PL_FLAG_VRAM; + int ret; + bool is_iomem; + void *mem; + unsigned handle; + + do { + handle = dev_priv-sem.handles; + if (handle = dev_priv-sem.max_handles) + return NULL; + } while (cmpxchg(dev_priv-sem.handles, handle, handle + 1) != handle); + + sem_bo = kmalloc(sizeof(*sem_bo), GFP_KERNEL); + if (!sem_bo) + return NULL; + + sem_bo-handle = NvSem + handle; + + ret = nouveau_bo_new(dev, NULL, NOUVEAU_SEM_BO_SIZE, 0, flags, + 0, 0x, true, true, bo); + if (ret) + goto out_free; + + sem_bo-bo = bo; + + ret =
[Nouveau] [PATCH 2/3] drm/nouveau: add lockless dynamic semaphore allocator
This patch adds code to allocate semaphores in a dynamic way using an algorithm with a lockless fast path. 1. Semaphore BOs Semaphore BOs are BOs containing semaphores. Each is 4KB large and contains 1024 4-byte semaphores. They are pinned. Semaphore BOs are allocated on-demand and freed at device takedown. Those that are not fully allocated are kept on a free list. Each is assigned an handle. DMA objects and references are created on demand for each channel that needs to use a semaphore BO. Those objects and references are automatically destroyed at channel destruction time. Typically only a single semaphore BO will be needed. 2. Semaphore allocation Each semaphore BO contains a bitmask of free semaphores within the BO. Allocation is done in a lockless fashion using a count of free semaphores and the bitmask. Semaphores are released once the fence on the waiting side passed. This is done by adding fields to nouveau_fence. Semaphore values are zeroed when the semaphore BO is allocated, and are afterwards only modified by the GPU. This is performed by storing a bitmask that allows to alternate between using the values 0 and 1 for a given semaphore. Signed-off-by: Luca Barbieri l...@luca-barbieri.com --- drivers/gpu/drm/nouveau/nouveau_dma.h |1 + drivers/gpu/drm/nouveau/nouveau_drv.h |7 + drivers/gpu/drm/nouveau/nouveau_fence.c | 243 +++ drivers/gpu/drm/nouveau/nouveau_state.c |4 + 4 files changed, 255 insertions(+), 0 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index dabfd65..0658979 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -69,6 +69,7 @@ enum { NvGdiRect = 0x800c, NvImageBlit = 0x800d, NvSw= 0x800e, + NvSem = 0x8100, /* range of 16M handles */ /* G80+ display objects */ NvEvoVRAM = 0x0100, diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 2b78ee6..0a7abc7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -620,6 +620,11 @@ struct drm_nouveau_private { struct { struct dentry *channel_root; } debugfs; + + spinlock_t sem_bo_free_list_lock; + struct nouveau_sem_bo *sem_bo_free_list; + atomic_t sem_handles; + uint32_t sem_max_handles; }; static inline struct drm_nouveau_private * @@ -1141,6 +1146,8 @@ extern int nouveau_fence_flush(void *obj, void *arg); extern void nouveau_fence_unref(void **obj); extern void *nouveau_fence_ref(void *obj); extern void nouveau_fence_handler(struct drm_device *dev, int channel); +extern void nouveau_fence_device_init(struct drm_device *dev); +extern void nouveau_fence_device_takedown(struct drm_device *dev); /* nouveau_gem.c */ extern int nouveau_gem_new(struct drm_device *, struct nouveau_channel *, diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 9b1c2c3..01152f3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -32,6 +32,13 @@ #define USE_REFCNT (dev_priv-card_type = NV_10) +#define NOUVEAU_SEM_BO_SIZE PAGE_SIZE + +/* reading fences can be very expensive + * use a threshold that would only use up half a single sem_bo + */ +#define NOUVEAU_SEM_MIN_THRESHOLD (NOUVEAU_SEM_BO_SIZE / (NOUVEAU_MAX_CHANNEL_NR * 2)) + struct nouveau_fence { struct nouveau_channel *channel; struct kref refcount; @@ -47,6 +54,218 @@ nouveau_fence(void *sync_obj) return (struct nouveau_fence *)sync_obj; } +struct nouveau_sem_bo { + struct nouveau_sem_bo *next; + struct nouveau_bo *bo; + uint32_t handle; + + /* = 0: num_free + 1 slots are free, sem_bo is or is about to be on free_list + -1: all allocated, sem_bo is NOT on free_list + */ + atomic_t num_free; + + DECLARE_BITMAP(free_slots, NOUVEAU_SEM_BO_SIZE / sizeof(uint32_t)); + DECLARE_BITMAP(values, NOUVEAU_SEM_BO_SIZE / sizeof(uint32_t)); + DECLARE_BITMAP(channels, NOUVEAU_MAX_CHANNEL_NR); +}; + +struct nouveau_sem { + struct nouveau_sem_bo *sem_bo; + unsigned num; + uint32_t value; +}; + +struct nouveau_sem_bo* +nouveau_sem_bo_alloc(struct drm_device *dev) +{ + struct nouveau_sem_bo *sem_bo = kmalloc(sizeof(*sem_bo), GFP_KERNEL); + struct nouveau_bo *bo; + int flags = TTM_PL_FLAG_VRAM; + int ret; + bool is_iomem; + void *mem; + + sem_bo = kmalloc(sizeof(*sem_bo), GFP_KERNEL); + + if (!sem_bo) + return 0; + + ret = nouveau_bo_new(dev, NULL, NOUVEAU_SEM_BO_SIZE, 0, flags, + 0, 0x, true, true, bo); + if (ret) + goto out_free; + + sem_bo-bo = bo; + + ret =
Re: [Nouveau] [PATCH 2/3] drm/nouveau: add lockless dynamic semaphore allocator
Luca Barbieri l...@luca-barbieri.com writes: This patch adds code to allocate semaphores in a dynamic way using an algorithm with a lockless fast path. 1. Semaphore BOs Semaphore BOs are BOs containing semaphores. Each is 4KB large and contains 1024 4-byte semaphores. They are pinned. Semaphore BOs are allocated on-demand and freed at device takedown. Those that are not fully allocated are kept on a free list. Each is assigned an handle. DMA objects and references are created on demand for each channel that needs to use a semaphore BO. Those objects and references are automatically destroyed at channel destruction time. Typically only a single semaphore BO will be needed. 2. Semaphore allocation Each semaphore BO contains a bitmask of free semaphores within the BO. Allocation is done in a lockless fashion using a count of free semaphores and the bitmask. Semaphores are released once the fence on the waiting side passed. This is done by adding fields to nouveau_fence. Semaphore values are zeroed when the semaphore BO is allocated, and are afterwards only modified by the GPU. This is performed by storing a bitmask that allows to alternate between using the values 0 and 1 for a given semaphore. Signed-off-by: Luca Barbieri l...@luca-barbieri.com --- drivers/gpu/drm/nouveau/nouveau_dma.h |1 + drivers/gpu/drm/nouveau/nouveau_drv.h |7 + drivers/gpu/drm/nouveau/nouveau_fence.c | 243 +++ drivers/gpu/drm/nouveau/nouveau_state.c |4 + 4 files changed, 255 insertions(+), 0 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index dabfd65..0658979 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -69,6 +69,7 @@ enum { NvGdiRect = 0x800c, NvImageBlit = 0x800d, NvSw= 0x800e, + NvSem = 0x8100, /* range of 16M handles */ /* G80+ display objects */ NvEvoVRAM = 0x0100, diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 2b78ee6..0a7abc7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -620,6 +620,11 @@ struct drm_nouveau_private { struct { struct dentry *channel_root; } debugfs; + + spinlock_t sem_bo_free_list_lock; + struct nouveau_sem_bo *sem_bo_free_list; + atomic_t sem_handles; + uint32_t sem_max_handles; }; static inline struct drm_nouveau_private * @@ -1141,6 +1146,8 @@ extern int nouveau_fence_flush(void *obj, void *arg); extern void nouveau_fence_unref(void **obj); extern void *nouveau_fence_ref(void *obj); extern void nouveau_fence_handler(struct drm_device *dev, int channel); +extern void nouveau_fence_device_init(struct drm_device *dev); +extern void nouveau_fence_device_takedown(struct drm_device *dev); /* nouveau_gem.c */ extern int nouveau_gem_new(struct drm_device *, struct nouveau_channel *, diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 9b1c2c3..01152f3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -32,6 +32,13 @@ #define USE_REFCNT (dev_priv-card_type = NV_10) +#define NOUVEAU_SEM_BO_SIZE PAGE_SIZE + +/* reading fences can be very expensive + * use a threshold that would only use up half a single sem_bo + */ +#define NOUVEAU_SEM_MIN_THRESHOLD (NOUVEAU_SEM_BO_SIZE / (NOUVEAU_MAX_CHANNEL_NR * 2)) + struct nouveau_fence { struct nouveau_channel *channel; struct kref refcount; @@ -47,6 +54,218 @@ nouveau_fence(void *sync_obj) return (struct nouveau_fence *)sync_obj; } +struct nouveau_sem_bo { + struct nouveau_sem_bo *next; + struct nouveau_bo *bo; + uint32_t handle; + + /* = 0: num_free + 1 slots are free, sem_bo is or is about to be on free_list + -1: all allocated, sem_bo is NOT on free_list + */ + atomic_t num_free; + + DECLARE_BITMAP(free_slots, NOUVEAU_SEM_BO_SIZE / sizeof(uint32_t)); + DECLARE_BITMAP(values, NOUVEAU_SEM_BO_SIZE / sizeof(uint32_t)); + DECLARE_BITMAP(channels, NOUVEAU_MAX_CHANNEL_NR); +}; + +struct nouveau_sem { + struct nouveau_sem_bo *sem_bo; + unsigned num; + uint32_t value; +}; + +struct nouveau_sem_bo* +nouveau_sem_bo_alloc(struct drm_device *dev) +{ + struct nouveau_sem_bo *sem_bo = kmalloc(sizeof(*sem_bo), GFP_KERNEL); + struct nouveau_bo *bo; + int flags = TTM_PL_FLAG_VRAM; + int ret; + bool is_iomem; + void *mem; + + sem_bo = kmalloc(sizeof(*sem_bo), GFP_KERNEL); + + if (!sem_bo) + return 0; + + ret = nouveau_bo_new(dev, NULL, NOUVEAU_SEM_BO_SIZE, 0, flags, + 0, 0x, true, true, bo); +
Re: [Nouveau] [PATCH 2/3] drm/nouveau: add lockless dynamic semaphore allocator
How often do we expect cross-channel sync to kick in? Maybe 2-3 times per frame? I suspect contentions will be rare enough to make spinlocks as fast as atomics for all real-life cases, and they don't have such a high maintainability cost. What do you guys think? For the case of a single (or a few) GL application the requirements are indeed modest. I'm not sure that spinlocks or an otherwise reduced solution would be much simpler. You basically would just avoid the retrying code. Also if you have a multithreaded/multiprocess GPGPU application on large SMP machine things may change, as you may have a lot of commands and semaphores in flight, as well as high contention for anything global. Of course, currently we hold both the BKL and struct_mutex around things, which makes it all moot, but hopefully we'll switch to per-channel mutexes soon (I'm looking into that). ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH 2/3] drm/nouveau: add lockless dynamic semaphore allocator
Luca Barbieri l...@luca-barbieri.com writes: How often do we expect cross-channel sync to kick in? Maybe 2-3 times per frame? I suspect contentions will be rare enough to make spinlocks as fast as atomics for all real-life cases, and they don't have such a high maintainability cost. What do you guys think? For the case of a single (or a few) GL application the requirements are indeed modest. I'm not sure that spinlocks or an otherwise reduced solution would be much simpler. You basically would just avoid the retrying code. Also if you have a multithreaded/multiprocess GPGPU application on large SMP machine things may change, as you may have a lot of commands and semaphores in flight, as well as high contention for anything global. Sounds like premature optimization to me. I'm just stating my personal view here, but I have a feeling a patch with 60% of lines could do very well the same for most realistic cases. Maarten, Ben, what do you think about this? Of course, currently we hold both the BKL and struct_mutex around things, which makes it all moot, but hopefully we'll switch to per-channel mutexes soon (I'm looking into that). BTW, the kernel has some linked list helpers you might want to use for sem_bo_free_list, and probably the best place for the sem stuff to live is dev_priv-fence instead of the root of drm_nouveau_private. pgpECYGXD4Dlu.pgp Description: PGP signature ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH 2/3] drm/nouveau: add lockless dynamic semaphore allocator
Sounds like premature optimization to me. I'm just stating my personal view here, but I have a feeling a patch with 60% of lines could do very well the same for most realistic cases. Perhaps, but really, the only thing you would probably save by using spinlocks in the fast path is retrying in nouveau_sem_alloc, which should be at most 10 lines saved. You could save much more by supporting only a single static semaphore BO, and still retain almost all flexibility by making it large. However, it's somewhat inelegant, and why not just keep the functionality so we never need to revisit this again? BTW, the kernel has some linked list helpers you might want to use for sem_bo_free_list It is a singly linked list, and slist.h never got merged. I could possibly make it doubly linked, even though it's a bit useless. and probably the best place for the sem stuff to live is dev_priv-fence instead of the root of drm_nouveau_private. There is no fence currently in drm_nouveau_private. Adding a sem or fence substructure could make sense though. ___ Nouveau mailing list Nouveau@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau
Re: [Nouveau] [PATCH 2/3] drm/nouveau: add lockless dynamic semaphore allocator
On Mon, Feb 01, 2010 at 10:50:09AM +0100, Luca Barbieri wrote: This patch adds code to allocate semaphores in a dynamic way using an algorithm with a lockless fast path. some minor comments below 1. Semaphore BOs Semaphore BOs are BOs containing semaphores. Each is 4KB large and contains 1024 4-byte semaphores. They are pinned. Semaphore BOs are allocated on-demand and freed at device takedown. Those that are not fully allocated are kept on a free list. Each is assigned an handle. DMA objects and references are created on demand for each channel that needs to use a semaphore BO. Those objects and references are automatically destroyed at channel destruction time. Typically only a single semaphore BO will be needed. 2. Semaphore allocation Each semaphore BO contains a bitmask of free semaphores within the BO. Allocation is done in a lockless fashion using a count of free semaphores and the bitmask. Semaphores are released once the fence on the waiting side passed. This is done by adding fields to nouveau_fence. Semaphore values are zeroed when the semaphore BO is allocated, and are afterwards only modified by the GPU. This is performed by storing a bitmask that allows to alternate between using the values 0 and 1 for a given semaphore. Signed-off-by: Luca Barbieri l...@luca-barbieri.com --- drivers/gpu/drm/nouveau/nouveau_dma.h |1 + drivers/gpu/drm/nouveau/nouveau_drv.h |7 + drivers/gpu/drm/nouveau/nouveau_fence.c | 243 +++ drivers/gpu/drm/nouveau/nouveau_state.c |4 + 4 files changed, 255 insertions(+), 0 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index dabfd65..0658979 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -69,6 +69,7 @@ enum { NvGdiRect = 0x800c, NvImageBlit = 0x800d, NvSw= 0x800e, + NvSem = 0x8100, /* range of 16M handles */ /* G80+ display objects */ NvEvoVRAM = 0x0100, diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 2b78ee6..0a7abc7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -620,6 +620,11 @@ struct drm_nouveau_private { struct { struct dentry *channel_root; } debugfs; + + spinlock_t sem_bo_free_list_lock; + struct nouveau_sem_bo *sem_bo_free_list; + atomic_t sem_handles; + uint32_t sem_max_handles; }; static inline struct drm_nouveau_private * @@ -1141,6 +1146,8 @@ extern int nouveau_fence_flush(void *obj, void *arg); extern void nouveau_fence_unref(void **obj); extern void *nouveau_fence_ref(void *obj); extern void nouveau_fence_handler(struct drm_device *dev, int channel); +extern void nouveau_fence_device_init(struct drm_device *dev); +extern void nouveau_fence_device_takedown(struct drm_device *dev); /* nouveau_gem.c */ extern int nouveau_gem_new(struct drm_device *, struct nouveau_channel *, diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 9b1c2c3..01152f3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -32,6 +32,13 @@ #define USE_REFCNT (dev_priv-card_type = NV_10) +#define NOUVEAU_SEM_BO_SIZE PAGE_SIZE + +/* reading fences can be very expensive + * use a threshold that would only use up half a single sem_bo + */ +#define NOUVEAU_SEM_MIN_THRESHOLD (NOUVEAU_SEM_BO_SIZE / (NOUVEAU_MAX_CHANNEL_NR * 2)) + struct nouveau_fence { struct nouveau_channel *channel; struct kref refcount; @@ -47,6 +54,218 @@ nouveau_fence(void *sync_obj) return (struct nouveau_fence *)sync_obj; } +struct nouveau_sem_bo { + struct nouveau_sem_bo *next; + struct nouveau_bo *bo; + uint32_t handle; + + /* = 0: num_free + 1 slots are free, sem_bo is or is about to be on free_list + -1: all allocated, sem_bo is NOT on free_list + */ + atomic_t num_free; + + DECLARE_BITMAP(free_slots, NOUVEAU_SEM_BO_SIZE / sizeof(uint32_t)); + DECLARE_BITMAP(values, NOUVEAU_SEM_BO_SIZE / sizeof(uint32_t)); + DECLARE_BITMAP(channels, NOUVEAU_MAX_CHANNEL_NR); +}; + +struct nouveau_sem { + struct nouveau_sem_bo *sem_bo; + unsigned num; + uint32_t value; +}; + +struct nouveau_sem_bo* +nouveau_sem_bo_alloc(struct drm_device *dev) +{ + struct nouveau_sem_bo *sem_bo = kmalloc(sizeof(*sem_bo), GFP_KERNEL); + struct nouveau_bo *bo; + int flags = TTM_PL_FLAG_VRAM; + int ret; + bool is_iomem; + void *mem; + + sem_bo = kmalloc(sizeof(*sem_bo), GFP_KERNEL); double allocation + + if (!sem_bo) + return 0; sparse would probably complain about 0