On Thursday, August 21, 2008 2:27 pm Jesse Barnes wrote:
> Here's what I've been hacking on wrt GTT mapping. The kernel's mmap_region
> function was almost exactly what we needed, but it doesn't give us a way to
> avoid the backing store for the attached file, thus the new function. It
> would obviously be much better to pull out the common functionality for
> both functions into a helper and provide mmap_io_region and mmap_region as
> thin wrappers around it, but even that's uglier than just using fops->mmap.
> The latter would let us just check what kind of mapping we wanted to
> create and do it; we'd already have the VMA and address space taken care of
> by the parent do_mmap_pgoff function. I think that would mean messing with
> shmem.c though, since it won't pass down an mmap call for us...
>
> On the plus side, these patches seem to work and bring performance on
> modesetting-gem back to reasonable levels.
Ok, as discussed on IRC this also needs either full pin/map/unmap/unpin on the
client side, or real fault support like TTM, or we could fragment and/or run
out of GTT space, which would be bad.
And for reference, here's the whole diff I was using to build with
modesetting-gem of mesa/drm (which doesn't have Eric's latest changes yet).
--
Jesse Barnes, Intel Open Source Technology Center
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index a04498d..bf6f731 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -203,13 +203,6 @@ static u64 pci_size(u64 base, u64 maxbase, u64 mask)
return size;
}
-enum pci_bar_type {
- pci_bar_unknown, /* Standard PCI BAR probe */
- pci_bar_io, /* An io port BAR */
- pci_bar_mem32, /* A 32-bit memory BAR */
- pci_bar_mem64, /* A 64-bit memory BAR */
-};
-
static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar)
{
if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
@@ -322,6 +315,46 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
goto out;
}
+/**
+ * pci_read_base - Read a BAR from a specified location
+ * @dev: The PCI device to read
+ * @type: The type of BAR to read
+ * @res: A struct resource to be filled in
+ * @reg: The address in PCI config space to read the BAR from.
+ *
+ * Some devices have BARs in unusual places. This function lets a driver ask
+ * the PCI subsystem to read it and place it in the resource tree. If it is
+ * like a ROM BAR with an enable in bit 0, the caller should specify a @type
+ * of io, mem32 or mem64. If it's like a normal BAR with memory type in the
+ * low bits, specify unknown, even if the caller knows what kind of BAR it is.
+ *
+ * Returns -ENXIO if the BAR was not successfully read. If the BAR is read,
+ * but no suitable parent resource can be found for the BAR, this function
+ * returns -ENODEV. If the resource cannot be inserted into the resource tree,
+ * it will return -EBUSY. Note that the resource is still 'live' for these
+ * last two cases; the caller should set res->flags to 0 if this is not wanted.
+ */
+int pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
+ struct resource *res, unsigned int reg)
+{
+ struct pci_bus_region region;
+ struct resource *parent;
+
+ __pci_read_base(dev, type, res, reg);
+ if (!res->flags)
+ return -ENXIO;
+
+ region.start = res->start;
+ region.end = res->end;
+ pcibios_bus_to_resource(dev, res, ®ion);
+
+ parent = pci_find_parent_resource(dev, res);
+ if (!parent)
+ return -ENODEV;
+ return request_resource(parent, res);
+}
+EXPORT_SYMBOL_GPL(pci_read_base);
+
static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
{
unsigned int pos, reg;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 335288b..dca69f9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -715,6 +715,17 @@ static inline int shmem_lock(struct file *file, int lock,
#endif
struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags);
+/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
+enum sgp_type {
+ SGP_READ, /* don't exceed i_size, don't allocate page */
+ SGP_CACHE, /* don't exceed i_size, may allocate page */
+ SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
+ SGP_WRITE, /* may exceed i_size, may allocate page */
+};
+
+int shmem_getpage(struct inode *inode, unsigned long idx,
+ struct page **pagep, enum sgp_type sgp, int *type);
+
int shmem_zero_setup(struct vm_area_struct *);
#ifndef CONFIG_MMU
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b026949..e2c3c0b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -312,6 +312,16 @@ struct pci_bus {
#define pci_bus_b(n) list_entry(n, struct pci_bus, node)
#define to_pci_bus(n) container_of(n, struct pci_bus, dev)
+enum pci_bar_type {
+ pci_bar_unknown, /* Standard PCI BAR probe */
+ pci_bar_io, /* An io port BAR */
+ pci_bar_mem32, /* A 32-bit memory BAR */
+ pci_bar_mem64, /* A 64-bit memory BAR */
+};
+
+int pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
+ struct resource *res, unsigned int reg);
+
/*
* Error values that may be returned by PCI functions.
*/
diff --git a/mm/mmap.c b/mm/mmap.c
index 971d0ed..e94afde 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1244,6 +1244,171 @@ unacct_error:
return error;
}
+/**
+ * mmap_io_region - map an I/O region, creating a new VMA if necessary
+ * @file: file to account mapping against
+ * @addr: user address to map
+ * @len: size of mapping
+ * @flags: mmap flags
+ * @vm_flags: VM protection bits
+ * @pgoff: pfn of backing pages
+ * @accountable: account for these pages?
+ *
+ * Normally drivers can simply override ->mmap and use remap_pfn_range
+ * themselves, but if remapping needs to be done in other functions (say ioctl)
+ * some function has to provide VMA allocation & linking services, thus
+ * this function.
+ */
+unsigned long mmap_io_region(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long flags,
+ unsigned int vm_flags, unsigned long pgoff,
+ int accountable)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma, *prev;
+ int correct_wcount = 0;
+ int error;
+ struct rb_node **rb_link, *rb_parent;
+ unsigned long charged = 0;
+ struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
+
+ /* Clear old maps */
+ error = -ENOMEM;
+munmap_back:
+ vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+ if (vma && vma->vm_start < addr + len) {
+ if (do_munmap(mm, addr, len))
+ return -ENOMEM;
+ goto munmap_back;
+ }
+
+ /* Check against address space limit. */
+ if (!may_expand_vm(mm, len >> PAGE_SHIFT))
+ return -ENOMEM;
+
+ if (flags & MAP_NORESERVE)
+ vm_flags |= VM_NORESERVE;
+
+ if (accountable && (!(flags & MAP_NORESERVE) ||
+ sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
+ if (vm_flags & VM_SHARED) {
+ /* Check memory availability in shmem_file_setup? */
+ vm_flags |= VM_ACCOUNT;
+ } else if (vm_flags & VM_WRITE) {
+ /*
+ * Private writable mapping: check memory availability
+ */
+ charged = len >> PAGE_SHIFT;
+ if (security_vm_enough_memory(charged))
+ return -ENOMEM;
+ vm_flags |= VM_ACCOUNT;
+ }
+ }
+
+ printk(KERN_ERR "%s: using vma %p\n", __FUNCTION__, vma);
+
+ /*
+ * Determine the object being mapped and call the appropriate
+ * specific mapper. the address has already been validated, but
+ * not unmapped, but the maps are removed from the list.
+ */
+ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ if (!vma) {
+ error = -ENOMEM;
+ goto unacct_error;
+ }
+
+ vma->vm_mm = mm;
+ vma->vm_start = addr;
+ vma->vm_end = addr + len;
+ vma->vm_flags = vm_flags;
+ vma->vm_page_prot = vm_get_page_prot(vm_flags);
+ vma->vm_pgoff = pgoff;
+
+ error = -EINVAL;
+ if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+ goto free_vma;
+ if (vm_flags & VM_DENYWRITE) {
+ error = deny_write_access(file);
+ if (error)
+ goto free_vma;
+ correct_wcount = 1;
+ }
+
+ get_file(file);
+ printk(KERN_ERR "remap_pfn_range(%p, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+ vma, vma->vm_start, vma->vm_pgoff, len, vma->vm_page_prot);
+ error = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
+ vma->vm_page_prot);
+ if (error)
+ goto unmap_and_free_vma;
+ if (vm_flags & VM_EXECUTABLE)
+ added_exe_file_vma(mm);
+
+ /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
+ * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
+ * that memory reservation must be checked; but that reservation
+ * belongs to shared memory object, not to vma: so now clear it.
+ */
+ if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
+ vma->vm_flags &= ~VM_ACCOUNT;
+
+ /* Can addr have changed??
+ *
+ * Answer: Yes, several device drivers can do it in their
+ * f_op->mmap method. -DaveM
+ */
+ addr = vma->vm_start;
+ pgoff = vma->vm_pgoff;
+ vm_flags = vma->vm_flags;
+
+ if (vma_wants_writenotify(vma))
+ vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
+
+ if (file && vma_merge(mm, prev, addr, vma->vm_end,
+ vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
+ mpol_put(vma_policy(vma));
+ kmem_cache_free(vm_area_cachep, vma);
+ fput(file);
+ if (vm_flags & VM_EXECUTABLE)
+ removed_exe_file_vma(mm);
+ } else {
+ vma_link(mm, vma, prev, rb_link, rb_parent);
+ file = vma->vm_file;
+ }
+
+ /* Once vma denies write, undo our temporary denial count */
+ if (correct_wcount)
+ atomic_inc(&inode->i_writecount);
+
+ mm->total_vm += len >> PAGE_SHIFT;
+ vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
+ if (vm_flags & VM_LOCKED) {
+ mm->locked_vm += len >> PAGE_SHIFT;
+ make_pages_present(addr, addr + len);
+ }
+ if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
+ make_pages_present(addr, addr + len);
+ return addr;
+
+unmap_and_free_vma:
+ if (correct_wcount)
+ atomic_inc(&inode->i_writecount);
+ vma->vm_file = NULL;
+ fput(file);
+
+ /* Undo any partial mapping done by a device driver. */
+ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+ charged = 0;
+free_vma:
+ kmem_cache_free(vm_area_cachep, vma);
+unacct_error:
+ if (charged)
+ vm_unacct_memory(charged);
+ return error;
+}
+EXPORT_SYMBOL(mmap_io_region);
+
/* Get an address range which is currently unmapped.
* For shmat() with addr=0.
*
diff --git a/mm/shmem.c b/mm/shmem.c
index 04fb4f1..174d99c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -77,14 +77,6 @@
/* Pretend that each entry is of this size in directory's i_size */
#define BOGO_DIRENT_SIZE 20
-/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
-enum sgp_type {
- SGP_READ, /* don't exceed i_size, don't allocate page */
- SGP_CACHE, /* don't exceed i_size, may allocate page */
- SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
- SGP_WRITE, /* may exceed i_size, may allocate page */
-};
-
#ifdef CONFIG_TMPFS
static unsigned long shmem_default_max_blocks(void)
{
@@ -97,9 +89,6 @@ static unsigned long shmem_default_max_inodes(void)
}
#endif
-static int shmem_getpage(struct inode *inode, unsigned long idx,
- struct page **pagep, enum sgp_type sgp, int *type);
-
static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
{
/*
@@ -1177,8 +1166,8 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
* vm. If we swap it in we mark it dirty since we also free the swap
* entry since a page cannot live in both the swap and page cache
*/
-static int shmem_getpage(struct inode *inode, unsigned long idx,
- struct page **pagep, enum sgp_type sgp, int *type)
+int shmem_getpage(struct inode *inode, unsigned long idx,
+ struct page **pagep, enum sgp_type sgp, int *type)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
@@ -1431,6 +1420,7 @@ failed:
}
return error;
}
+EXPORT_SYMBOL(shmem_getpage);
static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
@@ -2582,6 +2572,7 @@ put_memory:
shmem_unacct_size(flags, size);
return ERR_PTR(error);
}
+EXPORT_SYMBOL(shmem_file_setup);
/**
* shmem_zero_setup - setup a shared anonymous mapping
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel