Re: [PATCH v5 4/5] cramfs: add mmap support
On Fri, 6 Oct 2017, Christoph Hellwig wrote: > > + /* Don't map the last page if it contains some other data */ > > + if (unlikely(pgoff + pages == max_pages)) { > > + unsigned int partial = offset_in_page(inode->i_size); > > + if (partial) { > > + char *data = sbi->linear_virt_addr + offset; > > + data += (max_pages - 1) * PAGE_SIZE + partial; > > + if (memchr_inv(data, 0, PAGE_SIZE - partial) != NULL) { > > + pr_debug("mmap: %s: last page is shared\n", > > +file_dentry(file)->d_name.name); > > + pages--; > > + } > > + } > > + } > > Why is pgoff + pages == max_pages marked unlikely? Mapping the whole > file seems like a perfectly normal and likely case to me.. In practice it is not. This is typically used for executables where a mmap() call covers the executable and read-only segments located at the beginning of a file. The end of the file is covered by a r/w mapping where .data is normally located and we can't direct-mmap that. Whatever. I have no strong opinion here as this is hardly performance critical so it is removed. > Also if this was my code I'd really prefer to move this into a helper: > > static bool cramfs_mmap_last_page_is_shared(struct inode *inode, int offset) > { > unsigned int partial = offset_in_page(inode->i_size); > char *data = CRAMFS_SB(inode->i_sb)->linear_virt_addr + offset + > (inode->i_size & PAGE_MASK); > > return memchr_inv(data + partial, 0, PAGE_SIZE - partial); > } > > if (pgoff + pages == max_pages && offset_in_page(inode->i_size) && > cramfs_mmap_last_page_is_shared(inode, offset)) > pages--; > > as that's much more readable and the function name provides a good > documentation of what is going on. OK. The above got some details wrong but the idea is clear. Please see the new patch bleow. > > + if (pages != vma_pages(vma)) { > > here is how I would turn this around: > > if (!pages) > goto done; > > if (pages == vma_pages(vma)) { > remap_pfn_range(); > goto done; > } > > ... > for (i = 0; i < pages; i++) { > ... > vm_insert_mixed(); > nr_mapped++; > } > > > done: > pr_debug("mapped %d out ouf %d\n", ..); > if (pages != vma_pages(vma)) > vma->vm_ops = &generic_file_vm_ops; > return 0; > } > > In fact we probably could just set the vm_ops unconditionally, they > just wouldn't be called, but that might be more confusing then helpful. Well... For one thing generic_file_vm_ops is not exported to modules so now I simply call generic_file_readonly_mmap() at the beginning and let it validate the vma flags. That allowed for some simplifications at the end. Personally I think the if-else form is clearer over an additional goto label, but I moved the more common case first as you did above. At this point I hope you'll indulge me. Here's the latest version of patch 4/5: diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 6aa1d94ed8..e1b9192f23 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -15,7 +15,10 @@ #include #include +#include #include +#include +#include #include #include #include @@ -49,6 +52,7 @@ static inline struct cramfs_sb_info *CRAMFS_SB(struct super_block *sb) static const struct super_operations cramfs_ops; static const struct inode_operations cramfs_dir_inode_operations; static const struct file_operations cramfs_directory_operations; +static const struct file_operations cramfs_physmem_fops; static const struct address_space_operations cramfs_aops; static DEFINE_MUTEX(read_mutex); @@ -96,6 +100,10 @@ static struct inode *get_cramfs_inode(struct super_block *sb, case S_IFREG: inode->i_fop = &generic_ro_fops; inode->i_data.a_ops = &cramfs_aops; + if (IS_ENABLED(CONFIG_CRAMFS_PHYSMEM) && + CRAMFS_SB(sb)->flags & CRAMFS_FLAG_EXT_BLOCK_POINTERS && + CRAMFS_SB(sb)->linear_phys_addr) + inode->i_fop = &cramfs_physmem_fops; break; case S_IFDIR: inode->i_op = &cramfs_dir_inode_operations; @@ -277,6 +285,207 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, return NULL; } +/* + * For a mapping to be possible, we need a range of uncompressed and + * contiguous blocks. Return the offset for the first block and number of + * valid blocks for which that is true, or zero otherwise. + */ +static u32 cramfs_get_block_range(struct inode *inode, u32 pgoff, u32 *pages) +{ + struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); + int i; + u32 *blockptrs, first_block_addr; + + /* +*
Re: [PATCH v5 4/5] cramfs: add mmap support
> + /* Don't map the last page if it contains some other data */ > + if (unlikely(pgoff + pages == max_pages)) { > + unsigned int partial = offset_in_page(inode->i_size); > + if (partial) { > + char *data = sbi->linear_virt_addr + offset; > + data += (max_pages - 1) * PAGE_SIZE + partial; > + if (memchr_inv(data, 0, PAGE_SIZE - partial) != NULL) { > + pr_debug("mmap: %s: last page is shared\n", > + file_dentry(file)->d_name.name); > + pages--; > + } > + } > + } Why is pgoff + pages == max_pages marked unlikely? Mapping the whole file seems like a perfectly normal and likely case to me.. Also if this was my code I'd really prefer to move this into a helper: static bool cramfs_mmap_last_page_is_shared(struct inode *inode, int offset) { unsigned int partial = offset_in_page(inode->i_size); char *data = CRAMFS_SB(inode->i_sb)->linear_virt_addr + offset + (inode->i_size & PAGE_MASK); return memchr_inv(data + partial, 0, PAGE_SIZE - partial); } if (pgoff + pages == max_pages && offset_in_page(inode->i_size) && cramfs_mmap_last_page_is_shared(inode, offset)) pages--; as that's much more readable and the function name provides a good documentation of what is going on. > + if (pages != vma_pages(vma)) { here is how I would turn this around: if (!pages) goto done; if (pages == vma_pages(vma)) { remap_pfn_range(); goto done; } ... for (i = 0; i < pages; i++) { ... vm_insert_mixed(); nr_mapped++; } done: pr_debug("mapped %d out ouf %d\n", ..); if (pages != vma_pages(vma)) vma->vm_ops = &generic_file_vm_ops; return 0; } In fact we probably could just set the vm_ops unconditionally, they just wouldn't be called, but that might be more confusing then helpful.
[PATCH v5 4/5] cramfs: add mmap support
When cramfs_physmem is used then we have the opportunity to map files directly from ROM, directly into user space, saving on RAM usage. This gives us Execute-In-Place (XIP) support. For a file to be mmap()-able, the map area has to correspond to a range of uncompressed and contiguous blocks, and in the MMU case it also has to be page aligned. A version of mkcramfs with appropriate support is necessary to create such a filesystem image. In the MMU case it may happen for a vma structure to extend beyond the actual file size. This is notably the case in binfmt_elf.c:elf_map(). Or the file's last block is shared with other files and cannot be mapped as is. Rather than refusing to mmap it, we do a "mixed" map and let the regular fault handler populate the unmapped area with RAM-backed pages. In practice the unmapped area is seldom accessed so page faults might never occur before this area is discarded. In the non-MMU case it is the get_unmapped_area method that is responsible for providing the address where the actual data can be found. No mapping is necessary of course. Signed-off-by: Nicolas Pitre Tested-by: Chris Brandt --- fs/cramfs/inode.c | 194 ++ 1 file changed, 194 insertions(+) diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 6aa1d94ed8..071ce1eb58 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -15,7 +15,10 @@ #include #include +#include #include +#include +#include #include #include #include @@ -49,6 +52,7 @@ static inline struct cramfs_sb_info *CRAMFS_SB(struct super_block *sb) static const struct super_operations cramfs_ops; static const struct inode_operations cramfs_dir_inode_operations; static const struct file_operations cramfs_directory_operations; +static const struct file_operations cramfs_physmem_fops; static const struct address_space_operations cramfs_aops; static DEFINE_MUTEX(read_mutex); @@ -96,6 +100,10 @@ static struct inode *get_cramfs_inode(struct super_block *sb, case S_IFREG: inode->i_fop = &generic_ro_fops; inode->i_data.a_ops = &cramfs_aops; + if (IS_ENABLED(CONFIG_CRAMFS_PHYSMEM) && + CRAMFS_SB(sb)->flags & CRAMFS_FLAG_EXT_BLOCK_POINTERS && + CRAMFS_SB(sb)->linear_phys_addr) + inode->i_fop = &cramfs_physmem_fops; break; case S_IFDIR: inode->i_op = &cramfs_dir_inode_operations; @@ -277,6 +285,192 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, return NULL; } +/* + * For a mapping to be possible, we need a range of uncompressed and + * contiguous blocks. Return the offset for the first block and number of + * valid blocks for which that is true, or zero otherwise. + */ +static u32 cramfs_get_block_range(struct inode *inode, u32 pgoff, u32 *pages) +{ + struct super_block *sb = inode->i_sb; + struct cramfs_sb_info *sbi = CRAMFS_SB(sb); + int i; + u32 *blockptrs, first_block_addr; + + /* +* We can dereference memory directly here as this code may be +* reached only when there is a direct filesystem image mapping +* available in memory. +*/ + blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode) + pgoff * 4); + first_block_addr = blockptrs[0] & ~CRAMFS_BLK_FLAGS; + i = 0; + do { + u32 block_off = i * (PAGE_SIZE >> CRAMFS_BLK_DIRECT_PTR_SHIFT); + u32 expect = (first_block_addr + block_off) | +CRAMFS_BLK_FLAG_DIRECT_PTR | +CRAMFS_BLK_FLAG_UNCOMPRESSED; + if (blockptrs[i] != expect) { + pr_debug("range: block %d/%d got %#x expects %#x\n", +pgoff+i, pgoff + *pages - 1, +blockptrs[i], expect); + if (i == 0) + return 0; + break; + } + } while (++i < *pages); + + *pages = i; + return first_block_addr << CRAMFS_BLK_DIRECT_PTR_SHIFT; +} + +#ifdef CONFIG_MMU + +static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct inode *inode = file_inode(file); + struct super_block *sb = inode->i_sb; + struct cramfs_sb_info *sbi = CRAMFS_SB(sb); + unsigned int pages, max_pages, offset; + unsigned long address, pgoff = vma->vm_pgoff; + char *bailout_reason; + int ret; + + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + return -EINVAL; + + /* Could COW work here? */ + bailout_reason = "vma is writable"; + if (vma->vm_flags & VM_WRITE) + goto bailout; + + max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + bailout_reason = "beyond file limit"; + if (pgoff >= max_pages)