Re: (ugly) GTT mapping patches

Jesse Barnes Thu, 21 Aug 2008 20:21:34 -0700

On Thursday, August 21, 2008 2:27 pm Jesse Barnes wrote:
> Here's what I've been hacking on wrt GTT mapping.  The kernel's mmap_region
> function was almost exactly what we needed, but it doesn't give us a way to
> avoid the backing store for the attached file, thus the new function.  It
> would obviously be much better to pull out the common functionality for
> both functions into a helper and provide mmap_io_region and mmap_region as
> thin wrappers around it, but even that's uglier than just using fops->mmap.
>  The latter would let us just check what kind of mapping we wanted to
> create and do it; we'd already have the VMA and address space taken care of
> by the parent do_mmap_pgoff function.  I think that would mean messing with
> shmem.c though, since it won't pass down an mmap call for us...
>
> On the plus side, these patches seem to work and bring performance on
> modesetting-gem back to reasonable levels.


Ok, as discussed on IRC this also needs either full pin/map/unmap/unpin on the 
client side, or real fault support like TTM, or we could fragment and/or run 
out of GTT space, which would be bad.

And for reference, here's the whole diff I was using to build with 
modesetting-gem of mesa/drm (which doesn't have Eric's latest changes yet).

-- 
Jesse Barnes, Intel Open Source Technology Center

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index a04498d..bf6f731 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -203,13 +203,6 @@ static u64 pci_size(u64 base, u64 maxbase, u64 mask)
 	return size;
 }
 
-enum pci_bar_type {
-	pci_bar_unknown,	/* Standard PCI BAR probe */
-	pci_bar_io,		/* An io port BAR */
-	pci_bar_mem32,		/* A 32-bit memory BAR */
-	pci_bar_mem64,		/* A 64-bit memory BAR */
-};
-
 static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar)
 {
 	if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
@@ -322,6 +315,46 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 	goto out;
 }
 
+/**
+ * pci_read_base - Read a BAR from a specified location
+ * @dev: The PCI device to read
+ * @type: The type of BAR to read
+ * @res: A struct resource to be filled in
+ * @reg: The address in PCI config space to read the BAR from.
+ *
+ * Some devices have BARs in unusual places.  This function lets a driver ask
+ * the PCI subsystem to read it and place it in the resource tree.  If it is
+ * like a ROM BAR with an enable in bit 0, the caller should specify a @type
+ * of io, mem32 or mem64.  If it's like a normal BAR with memory type in the
+ * low bits, specify unknown, even if the caller knows what kind of BAR it is.
+ *
+ * Returns -ENXIO if the BAR was not successfully read.  If the BAR is read,
+ * but no suitable parent resource can be found for the BAR, this function
+ * returns -ENODEV.  If the resource cannot be inserted into the resource tree,
+ * it will return -EBUSY.  Note that the resource is still 'live' for these
+ * last two cases; the caller should set res->flags to 0 if this is not wanted.
+ */
+int pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
+					struct resource *res, unsigned int reg)
+{
+	struct pci_bus_region region;
+	struct resource *parent;
+
+	__pci_read_base(dev, type, res, reg);
+	if (!res->flags)
+		return -ENXIO;
+
+	region.start = res->start;
+	region.end = res->end;
+	pcibios_bus_to_resource(dev, res, &region);
+
+	parent = pci_find_parent_resource(dev, res);
+	if (!parent)
+		return -ENODEV;
+	return request_resource(parent, res);
+}
+EXPORT_SYMBOL_GPL(pci_read_base);
+
 static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
 {
 	unsigned int pos, reg;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 335288b..dca69f9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -715,6 +715,17 @@ static inline int shmem_lock(struct file *file, int lock,
 #endif
 struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags);
 
+/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
+enum sgp_type {
+	SGP_READ,	/* don't exceed i_size, don't allocate page */
+	SGP_CACHE,	/* don't exceed i_size, may allocate page */
+	SGP_DIRTY,	/* like SGP_CACHE, but set new page dirty */
+	SGP_WRITE,	/* may exceed i_size, may allocate page */
+};
+
+int shmem_getpage(struct inode *inode, unsigned long idx,
+		  struct page **pagep, enum sgp_type sgp, int *type);
+
 int shmem_zero_setup(struct vm_area_struct *);
 
 #ifndef CONFIG_MMU
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b026949..e2c3c0b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -312,6 +312,16 @@ struct pci_bus {
 #define pci_bus_b(n)	list_entry(n, struct pci_bus, node)
 #define to_pci_bus(n)	container_of(n, struct pci_bus, dev)
 
+enum pci_bar_type {
+	pci_bar_unknown,	/* Standard PCI BAR probe */
+	pci_bar_io,		/* An io port BAR */
+	pci_bar_mem32,		/* A 32-bit memory BAR */
+	pci_bar_mem64,		/* A 64-bit memory BAR */
+};
+
+int pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
+					struct resource *res, unsigned int reg);
+
 /*
  * Error values that may be returned by PCI functions.
  */
diff --git a/mm/mmap.c b/mm/mmap.c
index 971d0ed..e94afde 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1244,6 +1244,171 @@ unacct_error:
 	return error;
 }
 
+/**
+ * mmap_io_region - map an I/O region, creating a new VMA if necessary
+ * @file: file to account mapping against
+ * @addr: user address to map
+ * @len: size of mapping
+ * @flags: mmap flags
+ * @vm_flags: VM protection bits
+ * @pgoff: pfn of backing pages
+ * @accountable: account for these pages?
+ *
+ * Normally drivers can simply override ->mmap and use remap_pfn_range
+ * themselves, but if remapping needs to be done in other functions (say ioctl)
+ * some function has to provide VMA allocation & linking services, thus
+ * this function.
+ */
+unsigned long mmap_io_region(struct file *file, unsigned long addr,
+			     unsigned long len, unsigned long flags,
+			     unsigned int vm_flags, unsigned long pgoff,
+			     int accountable)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma, *prev;
+	int correct_wcount = 0;
+	int error;
+	struct rb_node **rb_link, *rb_parent;
+	unsigned long charged = 0;
+	struct inode *inode =  file ? file->f_path.dentry->d_inode : NULL;
+
+	/* Clear old maps */
+	error = -ENOMEM;
+munmap_back:
+	vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+	if (vma && vma->vm_start < addr + len) {
+		if (do_munmap(mm, addr, len))
+			return -ENOMEM;
+		goto munmap_back;
+	}
+
+	/* Check against address space limit. */
+	if (!may_expand_vm(mm, len >> PAGE_SHIFT))
+		return -ENOMEM;
+
+	if (flags & MAP_NORESERVE)
+		vm_flags |= VM_NORESERVE;
+
+	if (accountable && (!(flags & MAP_NORESERVE) ||
+			    sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
+		if (vm_flags & VM_SHARED) {
+			/* Check memory availability in shmem_file_setup? */
+			vm_flags |= VM_ACCOUNT;
+		} else if (vm_flags & VM_WRITE) {
+			/*
+			 * Private writable mapping: check memory availability
+			 */
+			charged = len >> PAGE_SHIFT;
+			if (security_vm_enough_memory(charged))
+				return -ENOMEM;
+			vm_flags |= VM_ACCOUNT;
+		}
+	}
+
+	printk(KERN_ERR "%s: using vma %p\n", __FUNCTION__, vma);
+
+	/*
+	 * Determine the object being mapped and call the appropriate
+	 * specific mapper. the address has already been validated, but
+	 * not unmapped, but the maps are removed from the list.
+	 */
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (!vma) {
+		error = -ENOMEM;
+		goto unacct_error;
+	}
+
+	vma->vm_mm = mm;
+	vma->vm_start = addr;
+	vma->vm_end = addr + len;
+	vma->vm_flags = vm_flags;
+	vma->vm_page_prot = vm_get_page_prot(vm_flags);
+	vma->vm_pgoff = pgoff;
+
+	error = -EINVAL;
+	if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+		goto free_vma;
+	if (vm_flags & VM_DENYWRITE) {
+		error = deny_write_access(file);
+		if (error)
+			goto free_vma;
+		correct_wcount = 1;
+	}
+
+	get_file(file);
+	printk(KERN_ERR "remap_pfn_range(%p, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+	       vma, vma->vm_start, vma->vm_pgoff, len, vma->vm_page_prot);
+	error = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
+				vma->vm_page_prot);
+	if (error)
+		goto unmap_and_free_vma;
+	if (vm_flags & VM_EXECUTABLE)
+		added_exe_file_vma(mm);
+
+	/* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
+	 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
+	 * that memory reservation must be checked; but that reservation
+	 * belongs to shared memory object, not to vma: so now clear it.
+	 */
+	if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
+		vma->vm_flags &= ~VM_ACCOUNT;
+
+	/* Can addr have changed??
+	 *
+	 * Answer: Yes, several device drivers can do it in their
+	 *         f_op->mmap method. -DaveM
+	 */
+	addr = vma->vm_start;
+	pgoff = vma->vm_pgoff;
+	vm_flags = vma->vm_flags;
+
+	if (vma_wants_writenotify(vma))
+		vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
+
+	if (file && vma_merge(mm, prev, addr, vma->vm_end,
+			vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
+		mpol_put(vma_policy(vma));
+		kmem_cache_free(vm_area_cachep, vma);
+		fput(file);
+		if (vm_flags & VM_EXECUTABLE)
+			removed_exe_file_vma(mm);
+	} else {
+		vma_link(mm, vma, prev, rb_link, rb_parent);
+		file = vma->vm_file;
+	}
+
+	/* Once vma denies write, undo our temporary denial count */
+	if (correct_wcount)
+		atomic_inc(&inode->i_writecount);
+
+	mm->total_vm += len >> PAGE_SHIFT;
+	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
+	if (vm_flags & VM_LOCKED) {
+		mm->locked_vm += len >> PAGE_SHIFT;
+		make_pages_present(addr, addr + len);
+	}
+	if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
+		make_pages_present(addr, addr + len);
+	return addr;
+
+unmap_and_free_vma:
+	if (correct_wcount)
+		atomic_inc(&inode->i_writecount);
+	vma->vm_file = NULL;
+	fput(file);
+
+	/* Undo any partial mapping done by a device driver. */
+	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+	charged = 0;
+free_vma:
+	kmem_cache_free(vm_area_cachep, vma);
+unacct_error:
+	if (charged)
+		vm_unacct_memory(charged);
+	return error;
+}
+EXPORT_SYMBOL(mmap_io_region);
+
 /* Get an address range which is currently unmapped.
  * For shmat() with addr=0.
  *
diff --git a/mm/shmem.c b/mm/shmem.c
index 04fb4f1..174d99c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -77,14 +77,6 @@
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
 
-/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
-enum sgp_type {
-	SGP_READ,	/* don't exceed i_size, don't allocate page */
-	SGP_CACHE,	/* don't exceed i_size, may allocate page */
-	SGP_DIRTY,	/* like SGP_CACHE, but set new page dirty */
-	SGP_WRITE,	/* may exceed i_size, may allocate page */
-};
-
 #ifdef CONFIG_TMPFS
 static unsigned long shmem_default_max_blocks(void)
 {
@@ -97,9 +89,6 @@ static unsigned long shmem_default_max_inodes(void)
 }
 #endif
 
-static int shmem_getpage(struct inode *inode, unsigned long idx,
-			 struct page **pagep, enum sgp_type sgp, int *type);
-
 static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
 {
 	/*
@@ -1177,8 +1166,8 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  * vm. If we swap it in we mark it dirty since we also free the swap
  * entry since a page cannot live in both the swap and page cache
  */
-static int shmem_getpage(struct inode *inode, unsigned long idx,
-			struct page **pagep, enum sgp_type sgp, int *type)
+int shmem_getpage(struct inode *inode, unsigned long idx,
+		  struct page **pagep, enum sgp_type sgp, int *type)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
@@ -1431,6 +1420,7 @@ failed:
 	}
 	return error;
 }
+EXPORT_SYMBOL(shmem_getpage);
 
 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
@@ -2582,6 +2572,7 @@ put_memory:
 	shmem_unacct_size(flags, size);
 	return ERR_PTR(error);
 }
+EXPORT_SYMBOL(shmem_file_setup);
 
 /**
  * shmem_zero_setup - setup a shared anonymous mapping

-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/

--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

Re: (ugly) GTT mapping patches

Reply via email to