On Friday, October 24, 2008 2:57 pm Jesse Barnes wrote:
> Ok this one doesn't crash and doesn't leave the flushing list full at
> leavevt time, so I think it's ready for some actual review.
>
> I'm using the patch I posted to intel-gfx@ to do tiled EXA pixmaps, but I
> think my approach of faulting in fence registers may not be the best one
> (though I haven't tried making the fence register allocator use LRU yet);
> it
> seems like we may want a big contiguous chunk of GTT space where pixmaps
> sit so we can re-use a single fence register to cover the needs of most
> pixmaps. Suggestions appreciated.
>
> This patch should be pretty safe to push upstream I think since the new
> code won't be used unless applications actually call the GTT mapping ioctl.

And with untested 915/830 fence register support.

Jesse

diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index bde64b8..9916366 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -262,6 +262,9 @@ static int drm_addmap_core(struct drm_device * dev, 
unsigned int offset,
                DRM_DEBUG("AGP offset = 0x%08lx, size = 0x%08lx\n", 
map->offset, map->size);
 
                break;
+       case _DRM_GEM:
+               DRM_ERROR("tried to rmmap GEM object\n");
+               break;
        }
        case _DRM_SCATTER_GATHER:
                if (!dev->sg) {
@@ -419,6 +422,9 @@ int drm_rmmap_locked(struct drm_device *dev, 
drm_local_map_t *map)
                dmah.size = map->size;
                __drm_pci_free(dev, &dmah);
                break;
+       case _DRM_GEM:
+               DRM_ERROR("tried to rmmap GEM object\n");
+               break;
        }
        drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 96f416a..aad8d76 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -236,6 +236,7 @@ int drm_lastclose(struct drm_device * dev)
                dev->lock.file_priv = NULL;
                wake_up_interruptible(&dev->lock.lock_queue);
        }
+       dev->dev_mapping = NULL;
        mutex_unlock(&dev->struct_mutex);
 
        DRM_DEBUG("lastclose completed\n");
@@ -290,6 +291,8 @@ EXPORT_SYMBOL(drm_init);
  */
 static void drm_cleanup(struct drm_device * dev)
 {
+       struct drm_driver *driver = dev->driver;
+
        DRM_DEBUG("\n");
 
        if (!dev) {
@@ -319,6 +322,9 @@ static void drm_cleanup(struct drm_device * dev)
        drm_ht_remove(&dev->map_hash);
        drm_ctxbitmap_cleanup(dev);
 
+       if (driver->driver_features & DRIVER_GEM)
+               drm_gem_destroy(dev);
+
        drm_put_minor(&dev->primary);
        if (drm_put_dev(dev))
                DRM_ERROR("Cannot unload module\n");
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 0d46627..0958cf6 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -147,11 +147,21 @@ int drm_open(struct inode *inode, struct file *filp)
                spin_lock(&dev->count_lock);
                if (!dev->open_count++) {
                        spin_unlock(&dev->count_lock);
-                       return drm_setup(dev);
+                       retcode = drm_setup(dev);
+                       goto out;
                }
                spin_unlock(&dev->count_lock);
        }
 
+out:
+       mutex_lock(&dev->struct_mutex);
+       if (dev->dev_mapping == NULL)
+               dev->dev_mapping = inode->i_mapping;
+       else if (dev->dev_mapping != inode->i_mapping)
+               WARN(1, "dev->dev_mapping not inode mapping (%p expected %p)\n",
+                    dev->dev_mapping, inode->i_mapping);
+       mutex_unlock(&dev->struct_mutex);
+
        return retcode;
 }
 EXPORT_SYMBOL(drm_open);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index ccd1afd..431dc3c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -64,6 +64,13 @@
  * up at a later date, and as our interface with shmfs for memory allocation.
  */
 
+/*
+ * We make up offsets for buffer objects so we can recognize them at
+ * mmap time.
+ */
+#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1)
+#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 16)
+
 /**
  * Initialize the GEM device fields
  */
@@ -71,6 +78,8 @@
 int
 drm_gem_init(struct drm_device *dev)
 {
+       struct drm_gem_mm *mm;
+
        spin_lock_init(&dev->object_name_lock);
        idr_init(&dev->object_name_idr);
        atomic_set(&dev->object_count, 0);
@@ -79,9 +88,41 @@ drm_gem_init(struct drm_device *dev)
        atomic_set(&dev->pin_memory, 0);
        atomic_set(&dev->gtt_count, 0);
        atomic_set(&dev->gtt_memory, 0);
+
+       mm = drm_calloc(1, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+       if (!mm) {
+               DRM_ERROR("out of memory\n");
+               return -ENOMEM;
+       }
+
+       dev->mm_private = mm;
+
+       if (drm_ht_create(&mm->offset_hash, 19)) {
+               drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+               return -ENOMEM;
+       }
+
+       if (drm_mm_init(&mm->offset_manager, DRM_FILE_PAGE_OFFSET_START,
+                       DRM_FILE_PAGE_OFFSET_SIZE)) {
+               drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+               drm_ht_remove(&mm->offset_hash);
+               return -ENOMEM;
+       }
+
        return 0;
 }
 
+void
+drm_gem_destroy(struct drm_device *dev)
+{
+       struct drm_gem_mm *mm = dev->mm_private;
+
+       drm_mm_takedown(&mm->offset_manager);
+       drm_ht_remove(&mm->offset_hash);
+       drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+       dev->mm_private = NULL;
+}
+
 /**
  * Allocate a GEM object of the specified size with shmfs backing store
  */
@@ -419,3 +460,58 @@ drm_gem_object_handle_free(struct kref *kref)
 }
 EXPORT_SYMBOL(drm_gem_object_handle_free);
 
+int
+drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+       struct drm_file *priv = filp->private_data;
+       struct drm_device *dev = priv->minor->dev;
+       struct drm_gem_mm *mm = dev->mm_private;
+       struct drm_map *map = NULL;
+       struct drm_gem_object *obj;
+       struct drm_hash_item *hash;
+       unsigned long prot;
+       int ret = 0;
+
+       mutex_lock(&dev->struct_mutex);
+
+       if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) {
+               mutex_unlock(&dev->struct_mutex);
+               return drm_mmap(filp, vma);
+       }
+
+       map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+       if (!map ||
+           ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) {
+               ret =  -EPERM;
+               goto out_unlock;
+       }
+
+       /* Check for valid size. */
+       if (map->size < vma->vm_end - vma->vm_start) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       obj = map->handle;
+       if (!obj->dev->driver->gem_vm_ops) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+       vma->vm_ops = obj->dev->driver->gem_vm_ops;
+       vma->vm_private_data = map->handle;
+       /* FIXME: use pgprot_writecombine when available */
+       prot = pgprot_val(vma->vm_page_prot);
+       prot |= _PAGE_CACHE_WC;
+       vma->vm_page_prot = __pgprot(prot);
+
+       vma->vm_file = filp;    /* Needed for drm_vm_open() */
+       drm_vm_open_locked(vma);
+
+out_unlock:
+       mutex_unlock(&dev->struct_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL(drm_gem_mmap);
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 3316067..af539f7 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -127,6 +127,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct 
drm_hash_item *item)
        }
        return 0;
 }
+EXPORT_SYMBOL(drm_ht_insert_item);
 
 /*
  * Just insert an item and return any "bits" bit key that hasn't been
@@ -188,6 +189,7 @@ int drm_ht_remove_item(struct drm_open_hash *ht, struct 
drm_hash_item *item)
        ht->fill--;
        return 0;
 }
+EXPORT_SYMBOL(drm_ht_remove_item);
 
 void drm_ht_remove(struct drm_open_hash *ht)
 {
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index c234c6f..3ffae02 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -267,6 +267,9 @@ static void drm_vm_shm_close(struct vm_area_struct *vma)
                                dmah.size = map->size;
                                __drm_pci_free(dev, &dmah);
                                break;
+                       case _DRM_GEM:
+                               DRM_ERROR("tried to rmmap GEM object\n");
+                               break;
                        }
                        drm_free(map, sizeof(*map), DRM_MEM_MAPS);
                }
@@ -399,7 +402,7 @@ static struct vm_operations_struct drm_vm_sg_ops = {
  * Create a new drm_vma_entry structure as the \p vma private data entry and
  * add it to drm_device::vmalist.
  */
-static void drm_vm_open_locked(struct vm_area_struct *vma)
+void drm_vm_open_locked(struct vm_area_struct *vma)
 {
        struct drm_file *priv = vma->vm_file->private_data;
        struct drm_device *dev = priv->minor->dev;
@@ -540,7 +543,7 @@ EXPORT_SYMBOL(drm_core_get_reg_ofs);
  * according to the mapping type and remaps the pages. Finally sets the file
  * pointer and calls vm_open().
  */
-static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
+int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 {
        struct drm_file *priv = filp->private_data;
        struct drm_device *dev = priv->minor->dev;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index db34780..9e9b76b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -953,6 +953,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
        DRM_IOCTL_DEF(DRM_I915_GEM_PREAD, i915_gem_pread_ioctl, 0),
        DRM_IOCTL_DEF(DRM_I915_GEM_PWRITE, i915_gem_pwrite_ioctl, 0),
        DRM_IOCTL_DEF(DRM_I915_GEM_MMAP, i915_gem_mmap_ioctl, 0),
+       DRM_IOCTL_DEF(DRM_I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, 0),
        DRM_IOCTL_DEF(DRM_I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, 0),
        DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, 0),
        DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0),
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a80ead2..1c87509 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -81,6 +81,10 @@ static int i915_resume(struct drm_device *dev)
        return 0;
 }
 
+static struct vm_operations_struct i915_gem_vm_ops = {
+       .fault = i915_gem_fault,
+};
+
 static struct drm_driver driver = {
        /* don't use mtrr's here, the Xserver or user space app should
         * deal with them for intel hardware.
@@ -111,13 +115,14 @@ static struct drm_driver driver = {
        .proc_cleanup = i915_gem_proc_cleanup,
        .gem_init_object = i915_gem_init_object,
        .gem_free_object = i915_gem_free_object,
+       .gem_vm_ops = &i915_gem_vm_ops,
        .ioctls = i915_ioctls,
        .fops = {
                 .owner = THIS_MODULE,
                 .open = drm_open,
                 .release = drm_release,
                 .ioctl = drm_ioctl,
-                .mmap = drm_mmap,
+                .mmap = drm_gem_mmap,
                 .poll = drm_poll,
                 .fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eae4ed3..19fa769 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -107,6 +107,12 @@ struct intel_opregion {
        int enabled;
 };
 
+#define I915_FENCE_REG_NONE -1
+
+struct drm_i915_fence_reg {
+       struct drm_gem_object *obj;
+};
+
 typedef struct drm_i915_private {
        struct drm_device *dev;
 
@@ -151,6 +157,10 @@ typedef struct drm_i915_private {
 
        struct intel_opregion opregion;
 
+       struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */
+       int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
+       int num_fence_regs; /* 8 on pre-965, 16 otherwise */
+
        /* Register state */
        u8 saveLBB;
        u32 saveDSPACNTR;
@@ -360,6 +370,21 @@ struct drm_i915_gem_object {
         * This is the same as gtt_space->start
         */
        uint32_t gtt_offset;
+       /**
+        * Required alignment for the object
+        */
+       uint32_t gtt_alignment;
+       /**
+        * Fake offset for use by mmap(2)
+        */
+       uint64_t mmap_offset;
+
+       /**
+        * Fence register bits (if any) for this object.  Will be set
+        * as needed when mapped into the GTT.
+        * Protected by dev->struct_mutex.
+        */
+       int fence_reg;
 
        /** Boolean whether this object has a valid gtt offset. */
        int gtt_bound;
@@ -372,6 +397,7 @@ struct drm_i915_gem_object {
 
        /** Current tiling mode for the object. */
        uint32_t tiling_mode;
+       uint32_t stride;
 
        /** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */
        uint32_t agp_type;
@@ -480,6 +506,8 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void 
*data,
                          struct drm_file *file_priv);
 int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
                        struct drm_file *file_priv);
+int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+                       struct drm_file *file_priv);
 int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
                              struct drm_file *file_priv);
 int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
@@ -514,6 +542,7 @@ uint32_t i915_get_gem_seqno(struct drm_device *dev);
 void i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_work_handler(struct work_struct *work);
 void i915_gem_clflush_object(struct drm_gem_object *obj);
+int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
@@ -558,6 +587,7 @@ extern void opregion_enable_asle(struct drm_device *dev);
 
 #define I915_READ(reg)          readl(dev_priv->regs + (reg))
 #define I915_WRITE(reg, val)     writel(val, dev_priv->regs + (reg))
+#define I915_WRITE64(reg, val) writeq(val, dev_priv->regs + (reg))
 #define I915_READ16(reg)       readw(dev_priv->regs + (reg))
 #define I915_WRITE16(reg, val) writel(val, dev_priv->regs + (reg))
 #define I915_READ8(reg)                readb(dev_priv->regs + (reg))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9ac73dd..4102853 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,10 @@ i915_gem_set_domain(struct drm_gem_object *obj,
 static int i915_gem_object_get_page_list(struct drm_gem_object *obj);
 static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
+static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
+                                          unsigned alignment);
+static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
+static int i915_gem_evict_something(struct drm_device *dev);
 
 static void
 i915_gem_cleanup_ringbuffer(struct drm_device *dev);
@@ -476,6 +480,133 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
        return 0;
 }
 
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * vma: VMA in question
+ * vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace.  The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room.  So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ */
+int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct drm_gem_object *obj = vma->vm_private_data;
+       struct drm_device *dev = obj->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       pgoff_t page_offset;
+       unsigned long pfn;
+       int ret = 0;
+
+       /* We don't use vmf->pgoff since that has the fake offset */
+       page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
+               PAGE_SHIFT;
+
+       /* Now bind it into the GTT if needed */
+       mutex_lock(&dev->struct_mutex);
+       if (!obj_priv->gtt_space) {
+               ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+               if (ret) {
+                       mutex_unlock(&dev->struct_mutex);
+                       return VM_FAULT_SIGBUS;
+               }
+               list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+       }
+
+       /* Need a new fence register? */
+       if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
+           obj_priv->tiling_mode != I915_TILING_NONE)
+               i915_gem_object_get_fence_reg(obj);
+
+       pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
+               page_offset;
+
+       /* Finally, remap it using the new GTT offset */
+       ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+
+       mutex_unlock(&dev->struct_mutex);
+
+       switch (ret) {
+       case -ENOMEM:
+       case -EAGAIN:
+               return VM_FAULT_OOM;
+       case -EFAULT:
+       case -EBUSY:
+               DRM_ERROR("can't insert pfn??  fault or busy...\n");
+               return VM_FAULT_SIGBUS;
+       default:
+               return VM_FAULT_NOPAGE;
+       }
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file_priv: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+                       struct drm_file *file_priv)
+{
+       struct drm_i915_gem_mmap_gtt *args = data;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_gem_object *obj;
+       struct drm_i915_gem_object *obj_priv;
+       int ret;
+
+       if (!(dev->driver->driver_features & DRIVER_GEM))
+               return -ENODEV;
+
+       mutex_lock(&dev->struct_mutex);
+       obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+       if (obj == NULL) {
+               drm_gem_object_unreference(obj);
+               mutex_unlock(&dev->struct_mutex);
+               return -EBADF;
+       }
+
+       obj_priv = obj->driver_private;
+       obj_priv->gtt_alignment = args->alignment;
+       args->addr_ptr = obj_priv->mmap_offset;
+
+       /*
+        * Pull it into the GTT so that we have a page list (makes the
+        * initial fault faster and any subsequent flushing possible).
+        */
+       if (!obj_priv->agp_mem) {
+               ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+               if (ret) {
+                       drm_gem_object_unreference(obj);
+                       mutex_unlock(&dev->struct_mutex);
+                       return ret;
+               }
+               list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+       }
+
+       drm_gem_object_unreference(obj);
+       mutex_unlock(&dev->struct_mutex);
+
+       return 0;
+}
+
 static void
 i915_gem_object_free_page_list(struct drm_gem_object *obj)
 {
@@ -642,10 +773,6 @@ i915_gem_retire_request(struct drm_device *dev,
                 */
                if (obj_priv->last_rendering_seqno != request->seqno)
                        return;
-#if WATCH_LRU
-               DRM_INFO("%s: retire %d moves to inactive list %p\n",
-                        __func__, request->seqno, obj);
-#endif
 
                if (obj->write_domain != 0) {
                        list_move_tail(&obj_priv->list,
@@ -774,7 +901,7 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
         * buffer to have made it to the inactive list, and we would need
         * a separate wait queue to handle that.
         */
-       if (ret == 0)
+       if (ret == 0 || ret == -ERESTARTSYS)
                i915_gem_retire_requests(dev);
 
        return ret;
@@ -906,7 +1033,9 @@ static int
 i915_gem_object_unbind(struct drm_gem_object *obj)
 {
        struct drm_device *dev = obj->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       loff_t offset;
        int ret = 0;
 
 #if WATCH_BUF
@@ -950,6 +1079,22 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 
        BUG_ON(obj_priv->active);
 
+       /* blow away mappings if mapped through GTT */
+       offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT;
+       unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1);
+
+       if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
+               if (IS_I965G(dev)) {
+                       I915_WRITE64(FENCE_REG_965_0 +
+                                    (obj_priv->fence_reg * 8), 0);
+               } else {
+                       I915_WRITE(FENCE_REG_830_0 +
+                                  (obj_priv->fence_reg * 4), 0);
+               }
+               dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
+               obj_priv->fence_reg = I915_FENCE_REG_NONE;
+       }
+
        i915_gem_object_free_page_list(obj);
 
        if (obj_priv->gtt_space) {
@@ -1093,6 +1238,180 @@ i915_gem_object_get_page_list(struct drm_gem_object 
*obj)
        return 0;
 }
 
+static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
+{
+       struct drm_gem_object *obj = reg->obj;
+       struct drm_device *dev = obj->dev;
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       int regnum = obj_priv->fence_reg;
+       uint64_t val;
+
+       val = ((obj_priv->gtt_offset + obj->size - 4096) &
+                   0xfffff000) << 32;
+       val |= obj_priv->gtt_offset & 0xfffff000;
+       val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
+       if (obj_priv->tiling_mode == I915_TILING_Y)
+               val |= 1 << I965_FENCE_TILING_Y_SHIFT;
+       val |= I965_FENCE_REG_VALID;
+
+       I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
+}
+
+static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
+{
+       struct drm_gem_object *obj = reg->obj;
+       struct drm_device *dev = obj->dev;
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       int regnum = obj_priv->fence_reg;
+       uint32_t val;
+       uint32_t pitch_val;
+
+       if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
+           (obj_priv->gtt_offset & (obj->size - 1))) {
+               WARN(1, "%s: object not 1M or size aligned\n", __FUNCTION__);
+               return;
+       }
+
+       if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) ||
+                                                      IS_I945GM(dev) ||
+                                                      IS_G33(dev)))
+               pitch_val = (obj_priv->stride / 128) - 1;
+       else
+               pitch_val = (obj_priv->stride / 512) - 1;
+
+       val = obj_priv->gtt_offset;
+       if (obj_priv->tiling_mode == I915_TILING_Y)
+               val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+       val |= I915_FENCE_SIZE_BITS(obj->size);
+       val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+       val |= I830_FENCE_REG_VALID;
+
+       I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
+}
+
+static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
+{
+       struct drm_gem_object *obj = reg->obj;
+       struct drm_device *dev = obj->dev;
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       int regnum = obj_priv->fence_reg;
+       uint32_t val;
+       uint32_t pitch_val;
+
+       if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
+           (obj_priv->gtt_offset & (obj->size - 1))) {
+               WARN(1, "%s: object not 1M or size aligned\n", __FUNCTION__);
+               return;
+       }
+
+       pitch_val = (obj_priv->stride / 128) - 1;
+
+       val = obj_priv->gtt_offset;
+       if (obj_priv->tiling_mode == I915_TILING_Y)
+               val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+       val |= I830_FENCE_SIZE_BITS(obj->size);
+       val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+       val |= I830_FENCE_REG_VALID;
+
+       I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
+
+}
+
+/**
+ * i915_gem_object_get_fence_reg - set up a fence reg for an object
+ * @obj: object to map through a fence reg
+ *
+ * When mapping objects through the GTT, userspace wants to be able to write
+ * to them without having to worry about swizzling if the object is tiled.
+ *
+ * This function walks the fence regs looking for a free one for @obj,
+ * stealing one if it can't find any.
+ *
+ * It then sets up the reg based on the object's properties: address, pitch
+ * and tiling format.
+ */
+static void
+i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
+{
+       struct drm_device *dev = obj->dev;
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       struct drm_i915_fence_reg *reg = NULL;
+       int i, ret;
+
+       switch (obj_priv->tiling_mode) {
+       case I915_TILING_NONE:
+               WARN(1, "allocating a fence for non-tiled object?\n");
+               break;
+       case I915_TILING_X:
+               WARN(obj_priv->stride & (512 - 1),
+                    "object is X tiled but has non-512B pitch\n");
+               break;
+       case I915_TILING_Y:
+               WARN(obj_priv->stride & (128 - 1),
+                    "object is Y tiled but has non-128B pitch\n");
+               break;
+       }
+
+       /* First try to find a free reg */
+       for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
+               reg = &dev_priv->fence_regs[i];
+               if (!reg->obj)
+                       break;
+       }
+
+       /* None available, try to steal one or wait for a user to finish */
+       if (i == dev_priv->num_fence_regs) {
+               struct drm_i915_gem_object *old_obj_priv = NULL;
+               loff_t offset;
+
+try_again:
+               /* Could try to use LRU here instead... */
+               for (i = dev_priv->fence_reg_start;
+                    i < dev_priv->num_fence_regs; i++) {
+                       reg = &dev_priv->fence_regs[i];
+                       old_obj_priv = reg->obj->driver_private;
+                       if (!old_obj_priv->pin_count)
+                               break;
+               }
+
+               /*
+                * Now things get ugly... we have to wait for one of the
+                * objects to finish before trying again.
+                */
+               if (i == dev_priv->num_fence_regs) {
+                       ret = i915_gem_object_wait_rendering(reg->obj);
+                       if (ret) {
+                               WARN(ret, "wait_rendering failed: %d\n", ret);
+                               return;
+                       }
+                       goto try_again;
+               }
+
+               /*
+                * Zap this virtual mapping so we can set up a fence again
+                * for this object next time we need it.
+                */
+               offset = ((loff_t) reg->obj->map_list.hash.key) << PAGE_SHIFT;
+               unmap_mapping_range(dev->dev_mapping, offset,
+                                   reg->obj->size, 1);
+               old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
+       }
+
+       obj_priv->fence_reg = i;
+       reg->obj = obj;
+
+       if (IS_I965G(dev))
+               i965_write_fence_reg(reg);
+       else if (IS_I915G(dev))
+               i915_write_fence_reg(reg);
+       else
+               i830_write_fence_reg(reg);
+}
+
 /**
  * Finds free space in the GTT aperture and binds the object there.
  */
@@ -2132,7 +2451,12 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void 
*data,
 
 int i915_gem_init_object(struct drm_gem_object *obj)
 {
+       struct drm_device *dev = obj->dev;
+       struct drm_gem_mm *mm = dev->mm_private;
        struct drm_i915_gem_object *obj_priv;
+       struct drm_map_list *list;
+       struct drm_map *map;
+       int ret = 0;
 
        obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER);
        if (obj_priv == NULL)
@@ -2151,12 +2475,63 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 
        obj->driver_private = obj_priv;
        obj_priv->obj = obj;
+       obj_priv->fence_reg = I915_FENCE_REG_NONE;
        INIT_LIST_HEAD(&obj_priv->list);
+
+       /* Set the object up for mmap'ing */
+       list = &obj->map_list;
+       list->map = drm_calloc(1, sizeof(struct drm_map_list),
+                              DRM_MEM_DRIVER);
+       if (!list->map)
+               return -ENOMEM;
+
+       map = list->map;
+       map->type = _DRM_GEM;
+       map->size = obj->size;
+       map->handle = obj;
+
+       /* Get a DRM GEM mmap offset allocated... */
+       list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
+                                                   obj->size / PAGE_SIZE, 0, 
0);
+       if (!list->file_offset_node) {
+               DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
+               ret = -ENOMEM;
+               goto out_free_list;
+       }
+
+       list->file_offset_node = drm_mm_get_block(list->file_offset_node,
+                                                 obj->size / PAGE_SIZE, 0);
+       if (!list->file_offset_node) {
+               ret = -ENOMEM;
+               goto out_free_list;
+       }
+
+       list->hash.key = list->file_offset_node->start;
+       if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
+               DRM_ERROR("failed to add to map hash\n");
+               goto out_free_mm;
+       }
+
+       /* By now we should be all set, any drm_mmap request on the offset
+        * below will get to our mmap & fault handler */
+       obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
+
        return 0;
+
+out_free_mm:
+       drm_mm_put_block(list->file_offset_node);
+out_free_list:
+       drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER);
+
+       return ret;
 }
 
 void i915_gem_free_object(struct drm_gem_object *obj)
 {
+       struct drm_device *dev = obj->dev;
+       struct drm_gem_mm *mm = dev->mm_private;
+       struct drm_map_list *list;
+       struct drm_map *map;
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
        while (obj_priv->pin_count > 0)
@@ -2164,6 +2539,20 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 
        i915_gem_object_unbind(obj);
 
+       list = &obj->map_list;
+       drm_ht_remove_item(&mm->offset_hash, &list->hash);
+
+       if (list->file_offset_node) {
+               drm_mm_put_block(list->file_offset_node);
+               list->file_offset_node = NULL;
+       }
+
+       map = list->map;
+       if (map) {
+               drm_free(map, sizeof(*map), DRM_MEM_DRIVER);
+               list->map = NULL;
+       }
+
        drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
        drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
 }
@@ -2255,8 +2644,7 @@ i915_gem_idle(struct drm_device *dev)
         */
        i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
                       ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
-       seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU |
-                                       I915_GEM_DOMAIN_GTT));
+       seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
 
        if (seqno == 0) {
                mutex_unlock(&dev->struct_mutex);
@@ -2289,7 +2677,15 @@ i915_gem_idle(struct drm_device *dev)
         * waited for a sequence higher than any pending execbuffer
         */
        BUG_ON(!list_empty(&dev_priv->mm.active_list));
-       BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
+       if (!list_empty(&dev_priv->mm.flushing_list)) {
+               struct drm_i915_gem_object *obj_priv;
+               DRM_ERROR("flushing list not empty, still has:\n");
+               list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list,
+                                   list) {
+                       DRM_ERROR("    %p: %d\n", obj_priv,
+                                 obj_priv->last_rendering_seqno);
+               }
+       }
 
        /* Request should now be empty as we've also waited
         * for the last request in the list
@@ -2304,7 +2700,6 @@ i915_gem_idle(struct drm_device *dev)
        }
 
        BUG_ON(!list_empty(&dev_priv->mm.active_list));
-       BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
        BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
        BUG_ON(!list_empty(&dev_priv->mm.request_list));
 
@@ -2554,5 +2949,13 @@ i915_gem_load(struct drm_device *dev)
                  i915_gem_vblank_work_handler);
        dev_priv->mm.next_gem_seqno = 1;
 
+       /* Old X drivers will take 0-2 for front, back, depth buffers */
+       dev_priv->fence_reg_start = 3;
+
+       if (IS_I965G(dev))
+               dev_priv->num_fence_regs = 16;
+       else
+               dev_priv->num_fence_regs = 8;
+
        i915_gem_detect_bit_6_swizzle(dev);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c 
b/drivers/gpu/drm/i915/i915_gem_tiling.c
index e8b85ac..1101aed 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -207,6 +207,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
                }
        }
        obj_priv->tiling_mode = args->tiling_mode;
+       obj_priv->stride = args->stride;
 
        mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5c2d9f2..36619d0 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -175,9 +175,26 @@
 #define   DISPLAY_PLANE_B           (1<<20)
 
 /*
- * Instruction and interrupt control regs
+ * Fence registers
  */
+#define FENCE_REG_830_0                        0x2000
+#define   I830_FENCE_START_MASK                0x07f80000
+#define   I830_FENCE_TILING_Y_SHIFT    12
+#define   I830_FENCE_SIZE_BITS(size)   ((get_order(size >> 19) - 1) << 8)
+#define   I830_FENCE_PITCH_SHIFT       4
+#define   I830_FENCE_REG_VALID         (1<<0)
+
+#define   I915_FENCE_START_MASK                0x0ff00000
+#define   I915_FENCE_SIZE_BITS(size)   ((get_order(size >> 20) - 1) << 8)
 
+#define FENCE_REG_965_0                        0x03000
+#define   I965_FENCE_PITCH_SHIFT       2
+#define   I965_FENCE_TILING_Y_SHIFT    1
+#define   I965_FENCE_REG_VALID         (1<<0)
+
+/*
+ * Instruction and interrupt control regs
+ */
 #define PRB0_TAIL      0x02030
 #define PRB0_HEAD      0x02034
 #define PRB0_START     0x02038
@@ -245,6 +262,7 @@
 #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
 #define GFX_FLSH_CNTL  0x02170 /* 915+ only */
 
+
 /*
  * Framebuffer compression (915+ only)
  */
diff --git a/include/drm/drm.h b/include/drm/drm.h
index f46ba4b..ec5daa6 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -173,6 +173,7 @@ enum drm_map_type {
        _DRM_AGP = 3,             /**< AGP/GART */
        _DRM_SCATTER_GATHER = 4,  /**< Scatter/gather memory for PCI DMA */
        _DRM_CONSISTENT = 5,      /**< Consistent memory for PCI DMA */
+       _DRM_GEM = 6,             /**< GEM object */
 };
 
 /**
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 59c796b..aa29b5a 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -523,6 +523,7 @@ struct drm_map_list {
        struct drm_hash_item hash;
        struct drm_map *map;                    /**< mapping */
        uint64_t user_token;
+       struct drm_mm_node *file_offset_node;   /**< fake offset */
 };
 
 typedef struct drm_map drm_local_map_t;
@@ -563,6 +564,14 @@ struct drm_ati_pcigart_info {
 };
 
 /**
+ * GEM specific mm private for tracking GEM objects
+ */
+struct drm_gem_mm {
+       struct drm_mm offset_manager;   /**< Offset mgmt for buffer objects */
+       struct drm_open_hash offset_hash; /**< User token hash table for maps */
+};
+
+/**
  * This structure defines the drm_mm memory object, which will be used by the
  * DRM for its buffer objects.
  */
@@ -579,6 +588,9 @@ struct drm_gem_object {
        /** File representing the shmem storage */
        struct file *filp;
 
+       /* Mapping info for this object */
+       struct drm_map_list map_list;
+
        /**
         * Size of the object, in bytes.  Immutable over the object's
         * lifetime.
@@ -724,6 +736,9 @@ struct drm_driver {
        int (*gem_init_object) (struct drm_gem_object *obj);
        void (*gem_free_object) (struct drm_gem_object *obj);
 
+       /* Driver private ops for this object */
+       struct vm_operations_struct *gem_vm_ops;
+
        int major;
        int minor;
        int patchlevel;
@@ -883,6 +898,8 @@ struct drm_device {
        struct drm_sg_mem *sg;  /**< Scatter gather memory */
        int num_crtcs;                  /**< Number of CRTCs on this device */
        void *dev_private;              /**< device private data */
+       void *mm_private;
+       struct address_space *dev_mapping;
        struct drm_sigdata sigdata;        /**< For block_all_signals */
        sigset_t sigmask;
 
@@ -999,6 +1016,8 @@ extern int drm_release(struct inode *inode, struct file 
*filp);
 
                                /* Mapping support (drm_vm.h) */
 extern int drm_mmap(struct file *filp, struct vm_area_struct *vma);
+extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma);
+extern void drm_vm_open_locked(struct vm_area_struct *vma);
 extern unsigned long drm_core_get_map_ofs(struct drm_map * map);
 extern unsigned long drm_core_get_reg_ofs(struct drm_device *dev);
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct 
*wait);
@@ -1255,10 +1274,12 @@ extern int drm_mm_add_space_to_tail(struct drm_mm *mm, 
unsigned long size);
 
 /* Graphics Execution Manager library functions (drm_gem.c) */
 int drm_gem_init(struct drm_device *dev);
+void drm_gem_destroy(struct drm_device *dev);
 void drm_gem_object_free(struct kref *kref);
 struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
                                            size_t size);
 void drm_gem_object_handle_free(struct kref *kref);
+int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
 static inline void
 drm_gem_object_reference(struct drm_gem_object *obj)
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index eb4b350..fed1d7c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -159,6 +159,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SW_FINISH 0x20
 #define DRM_I915_GEM_SET_TILING        0x21
 #define DRM_I915_GEM_GET_TILING        0x22
+#define DRM_I915_GEM_MMAP_GTT  0x23
 
 #define DRM_IOCTL_I915_INIT            DRM_IOW( DRM_COMMAND_BASE + 
DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH           DRM_IO ( DRM_COMMAND_BASE + 
DRM_I915_FLUSH)
@@ -186,6 +187,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_PREAD       DRM_IOW (DRM_COMMAND_BASE + 
DRM_I915_GEM_PREAD, struct 
drm_i915_gem_pread)
 #define DRM_IOCTL_I915_GEM_PWRITE      DRM_IOW (DRM_COMMAND_BASE + 
DRM_I915_GEM_PWRITE, struct 
drm_i915_gem_pwrite)
 #define DRM_IOCTL_I915_GEM_MMAP                DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_MMAP, struct 
drm_i915_gem_mmap)
+#define DRM_IOCTL_I915_GEM_MMAP_GTT    DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_MMAP_GTT, struct 
drm_i915_gem_mmap_gtt)
 #define DRM_IOCTL_I915_GEM_SET_DOMAIN  DRM_IOW (DRM_COMMAND_BASE + 
DRM_I915_GEM_SET_DOMAIN, struct 
drm_i915_gem_set_domain)
 #define DRM_IOCTL_I915_GEM_SW_FINISH   DRM_IOW (DRM_COMMAND_BASE + 
DRM_I915_GEM_SW_FINISH, struct 
drm_i915_gem_sw_finish)
 #define DRM_IOCTL_I915_GEM_SET_TILING  DRM_IOWR (DRM_COMMAND_BASE + 
DRM_I915_GEM_SET_TILING, struct 
drm_i915_gem_set_tiling)
@@ -380,6 +382,24 @@ struct drm_i915_gem_mmap {
        uint64_t addr_ptr;
 };
 
+struct drm_i915_gem_mmap_gtt {
+       /** Handle for the object being mapped. */
+       uint32_t handle;
+       uint32_t pad;
+       /** Offset in the object to map. */
+       uint64_t offset;
+       /**
+        * Length of data to map.
+        *
+        * The value will be page-aligned.
+        */
+       uint64_t size;
+       /** Returned pointer the data was mapped at */
+       uint64_t addr_ptr;      /* void *, but pointers are not 32/64 
compatible */
+       uint32_t flags;
+       uint32_t alignment;
+};
+
 struct drm_i915_gem_set_domain {
        /** Handle for the object */
        uint32_t handle;


-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

Reply via email to