date:20130326

On Tue, 2013-03-26 at 09:18 -0700, Greg KH wrote:
> On Tue, Mar 26, 2013 at 08:56:20AM -0700, Kees Cook wrote:
> > On Sun, Mar 24, 2013 at 12:31 PM, Jiri Slaby  wrote:
> > > On 03/20/2013 11:25 PM, Sebastian Gottschall wrote:
> > >
> > >> Am 20.03.2013 21:15, schrieb Greg KH:
> > >>> I'm announcing the release of the 3.8.4 kernel.
> > > ...
> > >>>signal: always clear sa_restorer on execve
> > >> consider that this kernel is not compileable on mips targets due the
> > >> patch in kernel/signal.c
> > >>
> > >> SA_RESTORER is defined in mips, but sa_restorer does not exist on that
> > >> platform
> > >
> > > The same for ia64...
> > >
> > > kernel/signal.c: In function 'flush_signal_handlers':
> > > kernel/signal.c:441: error: 'struct sigaction' has no member named
> > > 'sa_restorer'
> > >
> > > Do we need this in 3.0, 3.4, 3.8 too?
> > > commit 522cff142d7d2f9230839c9e1f21a4d8bcc22a4a
> > > Author: Andrew Morton 
> > > Date:   Wed Mar 13 14:59:34 2013 -0700
> > >
> > > kernel/signal.c: use __ARCH_HAS_SA_RESTORER instead of SA_RESTORER
> > >
> > > __ARCH_HAS_SA_RESTORER is the preferred conditional for use in 3.9 and
> > > later kernels, per Kees.
> > 
> > Hrm, if so, we'll need to also backport the changes that aded
> > __ARCH_HAS_SA_RESTORER...
> 
> Ok, well, can someone provide me the needed patches?  Otherwise I will
> have to revert the offending change from the stable releases.

For the third time... use the attached patch followed by Andrew's commit
522cff142d7d2f9230839c9e1f21a4d8bcc22a4a.

Ben.

-- 
Ben Hutchings
I'm not a reverse psychological virus.  Please don't copy me into your sig.
From: Ben Hutchings 
Date: Sun, 25 Nov 2012 22:24:19 -0500
Subject: signal: Define __ARCH_HAS_SA_RESTORER so we know whether to clear sa_restorer

flush_signal_handlers() needs to know whether sigaction::sa_restorer
is defined, not whether SA_RESTORER is defined.  Define the
__ARCH_HAS_SA_RESTORER macro to indicate this.

Vaguely based on upstream commit 574c4866e33d 'consolidate kernel-side
struct sigaction declarations'.

Signed-off-by: Ben Hutchings 
Cc: Al Viro 
---
--- a/arch/arm/include/asm/signal.h
+++ b/arch/arm/include/asm/signal.h
@@ -29,6 +29,7 @@ struct sigaction {
 	__sigrestore_t sa_restorer;
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
+#define __ARCH_HAS_SA_RESTORER
 
 struct k_sigaction {
 	struct sigaction sa;
--- a/arch/avr32/include/asm/signal.h
+++ b/arch/avr32/include/asm/signal.h
@@ -29,6 +29,7 @@ struct sigaction {
 	__sigrestore_t sa_restorer;
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
+#define __ARCH_HAS_SA_RESTORER
 
 struct k_sigaction {
 	struct sigaction sa;
--- a/arch/cris/include/asm/signal.h
+++ b/arch/cris/include/asm/signal.h
@@ -29,6 +29,7 @@ struct sigaction {
 	void (*sa_restorer)(void);
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
+#define __ARCH_HAS_SA_RESTORER
 
 struct k_sigaction {
 	struct sigaction sa;
--- a/arch/h8300/include/asm/signal.h
+++ b/arch/h8300/include/asm/signal.h
@@ -29,6 +29,7 @@ struct sigaction {
 	void (*sa_restorer)(void);
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
+#define __ARCH_HAS_SA_RESTORER
 
 struct k_sigaction {
 	struct sigaction sa;
--- a/arch/m32r/include/asm/signal.h
+++ b/arch/m32r/include/asm/signal.h
@@ -22,6 +22,7 @@ struct sigaction {
 	__sigrestore_t sa_restorer;
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
+#define __ARCH_HAS_SA_RESTORER
 
 struct k_sigaction {
 	struct sigaction sa;
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -29,6 +29,7 @@ struct sigaction {
 	__sigrestore_t sa_restorer;
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
+#define __ARCH_HAS_SA_RESTORER
 
 struct k_sigaction {
 	struct sigaction sa;
--- a/arch/mn10300/include/asm/signal.h
+++ b/arch/mn10300/include/asm/signal.h
@@ -39,6 +39,7 @@ struct sigaction {
 	__sigrestore_t sa_restorer;
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
+#define __ARCH_HAS_SA_RESTORER
 
 struct k_sigaction {
 	struct sigaction sa;
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_SIGNAL_H
 #define _ASM_POWERPC_SIGNAL_H
 
+#define __ARCH_HAS_SA_RESTORER
 #include 
 
 #endif /* _ASM_POWERPC_SIGNAL_H */
--- a/arch/s390/include/asm/signal.h
+++ b/arch/s390/include/asm/signal.h
@@ -34,6 +34,7 @@ struct sigaction {
 void (*sa_restorer)(void);
 sigset_t sa_mask;   /* mask last for extensibility */
 };
+#define __ARCH_HAS_SA_RESTORER
 
 struct k_sigaction {
 struct sigaction sa;
--- a/arch/sparc/include/asm/signal.h
+++ b/arch/sparc/include/asm/signal.h
@@ -26,5 +26,7 @@ struct k_sigaction {
 	void			__user *ka_restorer;
 };
 
+#define __ARCH_HAS_SA_RESTORER
+
 #endif /* !(__ASSEMBLY__) */
 #endif /* !(__SPARC_SIGNAL_H) */
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,6 +31,9 @@ typedef sigset_t compat_sigset

Re: [PATCH 116/150] vfs,proc: guarantee unique inodes in /proc

On Tue, 2013-03-26 at 15:20 +, Luis Henriques wrote:
> 3.5.7.9 -stable review patch.  If anyone has any objections, please let me 
> know.
> 
> --
> 
> From: Linus Torvalds 
> 
> commit 51f0885e5415b4cc6535e9cdcc5145bfbc134353 upstream.
> 
> Dave Jones found another /proc issue with his Trinity tool: thanks to
> the namespace model, we can have multiple /proc dentries that point to
> the same inode, aliasing directories in /proc//net/ for example.
> 
> This ends up being a total disaster, because it acts like hardlinked
> directories, and causes locking problems.  We rely on the topological
> sort of the inodes pointed to by dentries, and if we have aliased
> directories, that odering becomes unreliable.
> 
> In short: don't do this.  Multiple dentries with the same (directory)
> inode is just a bad idea, and the namespace code should never have
> exposed things this way.  But we're kind of stuck with it.
> 
> This solves things by just always allocating a new inode during /proc
> dentry lookup, instead of using "iget_locked()" to look up existing
> inodes by superblock and number.  That actually simplies the code a bit,
> at the cost of potentially doing more inode [de]allocations.
> 
> That said, the inode lookup wasn't free either (and did a lot of locking
> of inodes), so it is probably not that noticeable.  We could easily keep
> the old lookup model for non-directory entries, but rather than try to
> be excessively clever this just implements the minimal and simplest
> workaround for the problem.
> 
> Reported-and-tested-by: Dave Jones 
> Analyzed-by: Al Viro 
> Signed-off-by: Linus Torvalds 
> [ luis: backported to 3.5; adjust context ]

Prior to commit d3d009cb965eae7e002ea5badf603ea8f4c34915, callers of
proc_get_inode() don't expect it to call pde_put() before returning NULL
- only when returning an existing inode, which it will never do after
this.  So I think you must either cherry-pick that first, or delete
'else pde_put(de);' as part of this fix.

Ben.

> Signed-off-by: Luis Henriques 
> ---
>  fs/proc/inode.c | 9 +++--
>  1 file changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/proc/inode.c b/fs/proc/inode.c
> index 7ac817b..b02ddd0 100644
> --- a/fs/proc/inode.c
> +++ b/fs/proc/inode.c
> @@ -443,12 +443,10 @@ static const struct file_operations 
> proc_reg_file_ops_no_compat = {
>  
>  struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry 
> *de)
>  {
> - struct inode * inode;
> + struct inode *inode = new_inode_pseudo(sb);
>  
> - inode = iget_locked(sb, de->low_ino);
> - if (!inode)
> - return NULL;
> - if (inode->i_state & I_NEW) {
> + if (inode) {
> + inode->i_ino = de->low_ino;
>   inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
>   PROC_I(inode)->fd = 0;
>   PROC_I(inode)->pde = de;
> @@ -477,7 +475,6 @@ struct inode *proc_get_inode(struct super_block *sb, 
> struct proc_dir_entry *de)
>   inode->i_fop = de->proc_fops;
>   }
>   }
> - unlock_new_inode(inode);
>   } else
>  pde_put(de);
>   return inode;

-- 
Ben Hutchings
I'm not a reverse psychological virus.  Please don't copy me into your sig.


signature.asc
Description: This is a digitally signed message part

[PATCHv2] usb: ehci: unlink_empty_async_suspended() only used with CONFIG_PM

2013-03-26 Thread Tony Prisk

Compiling with !CONFIG_PM generates an unused function warning on
unlink_empty_async_suspended().

Enclose the function in a #ifdef CONFIG_PM

Signed-off-by: Tony Prisk 
Acked-by: Alan Stern 
---
v2:
Tidy up blank line as requested by Alan Stern
 drivers/usb/host/ehci-q.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 23d1369..1ea4de1 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -1316,6 +1316,7 @@ static void unlink_empty_async(struct ehci_hcd *ehci)
}
 }
 
+#ifdef CONFIG_PM
 /* The root hub is suspended; unlink all the async QHs */
 static void unlink_empty_async_suspended(struct ehci_hcd *ehci)
 {
@@ -1328,6 +1329,7 @@ static void unlink_empty_async_suspended(struct ehci_hcd 
*ehci)
}
start_iaa_cycle(ehci, false);
 }
+#endif
 
 /* makes sure the async qh will become idle */
 /* caller must own ehci->lock */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] mm: remove swapcache page early

2013-03-26 Thread Kamezawa Hiroyuki

(2013/03/27 11:22), Minchan Kim wrote:
> Swap subsystem does lazy swap slot free with expecting the page
> would be swapped out again so we can't avoid unnecessary write.
> 
> But the problem in in-memory swap is that it consumes memory space
> until vm_swap_full(ie, used half of all of swap device) condition
> meet. It could be bad if we use multiple swap device, small in-memory swap
> and big storage swap or in-memory swap alone.
> 
> This patch changes vm_swap_full logic slightly so it could free
> swap slot early if the backed device is really fast.
> For it, I used SWP_SOLIDSTATE but It might be controversial.
> So let's add Ccing Shaohua and Hugh.
> If it's a problem for SSD, I'd like to create new type SWP_INMEMORY
> or something for z* family.
> 
> Other problem is zram is block device so that it can set SWP_INMEMORY
> or SWP_SOLIDSTATE easily(ie, actually, zram is already done) but
> I have no idea to use it for frontswap.
> 
> Any idea?
> 
Another thinkingin what case, in what system configuration, 
vm_swap_full() should return false and delay swp_entry freeing ?

Thanks,
-Kame


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

linux-next: Tree for Mar 27

Hi all,

Changes since 20130326:

The vl4-dvb tree still had its build failure for which I disabled a
staging driver.

The net-next tree gained conflicts against Linus' tree.

The block tree still had its build failure for which I applied a patch.

The usb tree gained a build failure for which I reverted a commit.

The char-misc tree lost its build failure.

The akpm tree lost a patch that turned up elsewhere and gained a build
failure for which I applied a patch.



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" as mentioned in the FAQ on the wiki
(see below).

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log files
in the Next directory.  Between each merge, the tree was built with
a ppc64_defconfig for powerpc and an allmodconfig for x86_64. After the
final fixups (if any), it is also built with powerpc allnoconfig (32 and
64 bit), ppc44x_defconfig and allyesconfig (minus
CONFIG_PROFILE_ALL_BRANCHES - this fails its final link) and i386, sparc,
sparc64 and arm defconfig. These builds also have
CONFIG_ENABLE_WARN_DEPRECATED, CONFIG_ENABLE_MUST_CHECK and
CONFIG_DEBUG_INFO disabled when necessary.

Below is a summary of the state of the merge.

We are up to 222 trees (counting Linus' and 31 trees of patches pending
for Linus' tree), more are welcome (even if they are currently empty).
Thanks to those who have contributed, and to those who haven't, please do.

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

There is a wiki covering stuff to do with linux-next at
http://linux.f-seidel.de/linux-next/pmwiki/ .  Thanks to Frank Seidel.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

$ git checkout master
$ git reset --hard stable
Merging origin/master (b175293 Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net)
Merging fixes/master (f9294e9 powerpc: define the conditions where the ePAPR 
idle hcall can be supported)
Merging kbuild-current/rc-fixes (6dbe51c Linux 3.9-rc1)
Merging arc-current/for-curr (367f3fc ARC: Fix the typo in event identifier 
flags used by ptrace)
Merging arm-current/fixes (68a154f ARM: 7681/1: hw_breakpoint: use warn_once to 
avoid spam from reset_ctrl_regs())
Merging m68k-current/for-linus (5618395 m68k: Sort out !CONFIG_MMU_SUN3 vs. 
CONFIG_HAS_DMA)
Merging powerpc-merge/merge (af81d78 powerpc: Rename USER_ESID_BITS* to 
ESID_BITS*)
Merging sparc/master (53b6809 Merge tag 'rdma-for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband)
Merging net/master (330305c ipv4: Fix ip-header identification for gso packets.)
Merging ipsec/master (799ef90 xfrm: Fix esn sequence number diff calculation in 
xfrm_replay_notify_esn())
Merging sound-current/for-linus (55a63d4 ALSA: hda - Fix DAC assignment for 
independent HP)
Merging pci-current/for-linus (249bfb8 PCI/PM: Clean up PME state when removing 
a device)
Merging wireless/master (0f49d64 mwifiex: reset skb->data after processing PCIe 
sleep confirm cmd respose)
Merging driver-core.current/driver-core-linus (e5110f4 sysfs: handle failure 
path correctly for readdir())
Merging tty.current/tty-linus (855f6fd Xilinx: ARM: UART: clear pending irqs 
before enabling irqs)
Merging usb.current/usb-linus (d78658d Merge tag 'for-usb-linus-2013-03-26' of 
git://git.kernel.org/pub/scm/linux/kernel/git/sarah/xhci into usb-linus)
Merging staging.current/staging-linus (e4317ce8 staging: comedi: s626: fix 
continuous acquisition)
Merging char-misc.current/char-misc-linus (347e089 VMCI: Fix process-to-process 
DRGAMs.)
Merging input-current/for-linus (4b7d293 Input: mms114 - Fix regulator enable 
and disable paths)
Merging md-current/for-linus (238f590 md: remove CONFIG_MULTICORE_RAID456 
entirely)
Merging audit-current/for-linus (c158a35 audit: no leading space in 
audit_log_d_path prefix)
Merging crypto-current/master (246bbed Revert "crypto: caam - add IPsec ESN 
support")
Merging ide/master (bf6b438 ide: gayle: use module_platform_driver_probe())
Merging dwmw2/master (63662139 params: Fix potential memory leak in 
add_sysfs_param())
Merging sh-current/sh-fixes-for-linus (4403310 SH: Convert out[bwl] macros to 
inline functions)
Merging irqdomain-current/irqdomain/merge (a0d271c Linux 3.6)
Mer

Re: [PATCH] intel-iommu: Synchronize gcmd value with global command register

2013-03-26 Thread Takao Indoh

(2013/03/26 23:46), Joerg Roedel wrote:
> On Thu, Mar 21, 2013 at 10:32:36AM +0900, Takao Indoh wrote:
>> In this function, clearing IRE bit in iommu->gcmd and writing it to
>> global command register. But initial value of iommu->gcmd is zero, so
>> this writel means clearing all bits in global command register.
> 
> Seems weird. Why is the value of gcmd zero in your case? The usage of
> this register is well encapsulated by the different parts of the VT-d
> driver. There are other places which enable/disable translation and qpi
> the same way it is done with interrupt remapping. So it looks to me that
> it is unlikely that gcmd is really zero in your case.
> 
> Can you explain that more and also describe what the actual misbehavior
> is you are trying to fix here?

Sure.
At first, please see the debug patch below.

diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index af8904d..3ffb029 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -484,12 +484,15 @@ static void iommu_disable_irq_remapping(struct 
intel_iommu *iommu)
if (!(sts & DMA_GSTS_IRES))
goto end;

+   printk("DEBUG1: %08x\n", sts);
+
iommu->gcmd &= ~DMA_GCMD_IRE;
writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);

IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
  readl, !(sts & DMA_GSTS_IRES), sts);

+   printk("DEBUG2: %08x\n", sts);
 end:
raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 }

This is the result in *kdump* kernel(second kernel).

DEBUG1: c700
DEBUG2: 4100

After writel, TES/QIES/IRES is disabled. I think only IRES should be
disabled here because this function is "iommu_disable_irq_remapping".
TES and QIES should be disabled by iommu_disable_translation() and
dmar_disable_qi() respectively.

This is what I found and what I am trying to fix. Next, let's see what
happened at boot time. Again, I'm talking about *kdump* kernel boot
time.

1. dmar_table_init() is called, and intel_iommu structure is allocated in
   alloc_iommu().

int alloc_iommu(struct dmar_drhd_unit *drhd)
{
struct intel_iommu *iommu;
(snip)
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);

iommu->gcmd is zero here.

2. intel_enable_irq_remapping() is called, and interrupt remapping is
   initialized.

static int __init intel_enable_irq_remapping(void)
{
(snip)
for_each_drhd_unit(drhd) {
struct intel_iommu *iommu = drhd->iommu;
(snip)
iommu_disable_irq_remapping(iommu);

iommu_disable_irq_remapping is called here. Note that iommu->gcmd is
still zero because anyone doesn't touch it yet.

static void iommu_disable_irq_remapping(struct intel_iommu *iommu)
{
(snip)
sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
if (!(sts & DMA_GSTS_IRES))
goto end;

iommu->gcmd &= ~DMA_GCMD_IRE;
writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);

The purpose of this code is clearing IRE bit of global command
register to disable interrupt remapping, right?

But as I wrote above, iommu->gcmd is always zero here at boot time. So
this code means claring *all* bit of global command register. As the
result of this, both of TE and QIE are also disabled.

The root cause of this problem is mismatch between iommu->gcmd and
global command register in the case of kdump. At boot time, initial
value of iommu->gcmd is zero as I wrote above, but actual global command
register is *not* zero because some bits like IRE/TE/QIE are already set
in *first* kernel. Therefore this patch synchronize them to fix this
problem.

Did I answer your question?

Thanks,
Takao Indoh

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] mm: remove swapcache page early

2013-03-26 Thread Kyungmin Park

Hi,

On Wed, Mar 27, 2013 at 11:22 AM, Minchan Kim  wrote:
> Swap subsystem does lazy swap slot free with expecting the page
> would be swapped out again so we can't avoid unnecessary write.
>
> But the problem in in-memory swap is that it consumes memory space
> until vm_swap_full(ie, used half of all of swap device) condition
> meet. It could be bad if we use multiple swap device, small in-memory swap
> and big storage swap or in-memory swap alone.
>
> This patch changes vm_swap_full logic slightly so it could free
> swap slot early if the backed device is really fast.
> For it, I used SWP_SOLIDSTATE but It might be controversial.
> So let's add Ccing Shaohua and Hugh.
> If it's a problem for SSD, I'd like to create new type SWP_INMEMORY
> or something for z* family.
I perfer to add new SWP_INMEMORY for z* family. as you know SSD and
memory is different characteristics.
and if new type is added, it doesn't need to modify lots of codes.

Do you have any data for it? do you get meaningful performance gain or
efficiency of z* family? If yes, please share it.

Thank you,
Kyungmin Park

>
> Other problem is zram is block device so that it can set SWP_INMEMORY
> or SWP_SOLIDSTATE easily(ie, actually, zram is already done) but
> I have no idea to use it for frontswap.
>
> Any idea?
>
> Other optimize point is we remove it unconditionally when we
> found it's exclusive when swap in happen.
> It could help frontswap family, too.
> What do you think about it?
>
> Cc: Hugh Dickins 
> Cc: Dan Magenheimer 
> Cc: Seth Jennings 
> Cc: Nitin Gupta 
> Cc: Konrad Rzeszutek Wilk 
> Cc: Shaohua Li 
> Signed-off-by: Minchan Kim 
> ---
>  include/linux/swap.h | 11 ---
>  mm/memory.c  |  3 ++-
>  mm/swapfile.c| 11 +++
>  mm/vmscan.c  |  2 +-
>  4 files changed, 18 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 2818a12..1f4df66 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -359,9 +359,14 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
>  extern atomic_long_t nr_swap_pages;
>  extern long total_swap_pages;
>
> -/* Swap 50% full? Release swapcache more aggressively.. */
> -static inline bool vm_swap_full(void)
> +/*
> + * Swap 50% full or fast backed device?
> + * Release swapcache more aggressively.
> + */
> +static inline bool vm_swap_full(struct swap_info_struct *si)
>  {
> +   if (si->flags & SWP_SOLIDSTATE)
> +   return true;
> return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages;
>  }
>
> @@ -405,7 +410,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, 
> swp_entry_t ent, bool swapout)
>  #define get_nr_swap_pages()0L
>  #define total_swap_pages   0L
>  #define total_swapcache_pages()0UL
> -#define vm_swap_full() 0
> +#define vm_swap_full(si)   0
>
>  #define si_swapinfo(val) \
> do { (val)->freeswap = (val)->totalswap = 0; } while (0)
> diff --git a/mm/memory.c b/mm/memory.c
> index 705473a..1ca21a9 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3084,7 +3084,8 @@ static int do_swap_page(struct mm_struct *mm, struct 
> vm_area_struct *vma,
> mem_cgroup_commit_charge_swapin(page, ptr);
>
> swap_free(entry);
> -   if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || 
> PageMlocked(page))
> +   if (likely(PageSwapCache(page)) && (vm_swap_full(page_swap_info(page))
> +   || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)))
> try_to_free_swap(page);
> unlock_page(page);
> if (page != swapcache) {
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 1bee6fa..f9cc701 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -293,7 +293,7 @@ checks:
> scan_base = offset = si->lowest_bit;
>
> /* reuse swap entry of cache-only swap if not busy. */
> -   if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
> +   if (vm_swap_full(si) && si->swap_map[offset] == SWAP_HAS_CACHE) {
> int swap_was_freed;
> spin_unlock(&si->lock);
> swap_was_freed = __try_to_reclaim_swap(si, offset);
> @@ -382,7 +382,8 @@ scan:
> spin_lock(&si->lock);
> goto checks;
> }
> -   if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) 
> {
> +   if (vm_swap_full(si) &&
> +   si->swap_map[offset] == SWAP_HAS_CACHE) {
> spin_lock(&si->lock);
> goto checks;
> }
> @@ -397,7 +398,8 @@ scan:
> spin_lock(&si->lock);
> goto checks;
> }
> -   if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) 
> {
> +   if (vm_swap_full(si) &&
> +   s

Re: [RFC PATCH v3 5/6] sched: pack the idle load balance

2013-03-26 Thread Alex Shi

On 03/26/2013 11:55 PM, Vincent Guittot wrote:
>> > So extrapolating that to a 4+4 big-little you'd get something like:
>> >
>> >   |   little  A9  ||   big A15 |
>> >   | 0 | 1 | 2 | 3 || 4 | 5 | 6 | 7 |
>> > --+---+---+---+---++---+---+---+---+
>> > buddy | 0 | 0 | 0 | 0 || 0 | 4 | 4 | 4 |
>> >
>> > Right?
> yes
> 
>> >
>> > So supposing the current ILB is 6, we'll only check 4, not 0-3, even
>> > though there might be a perfectly idle cpu in there.
> We will check 4,5,7 at MC level in order to pack in the group of A15
> (because they are not sharing the same power domain). If none of them
> are idle, we will look at CPU level and will check CPUs 0-3.

So you increase a fixed step here.
> 
>> >
>> > Also, your scheme fails to pack when cpus 0,4 are filled, even when
>> > there's idle cores around.
> The primary target is to pack the tasks only when we are in a not busy
> system so you will have a power improvement without performance
> decrease. is_light_task function returns false and  is_buddy_busy
> function true before the buddy is fully loaded and the scheduler will
> fall back into the default behavior which spreads tasks and races to
> idle.
> 
> We can extend the buddy CPU and the packing mechanism to fill one CPU
> before filling another buddy but it's not always the best choice for
> performance and/or power and thus it will imply to have a knob to
> select this full packing mode.

Just one buddy to pack tasks for whole level cpus definitely has
scalability problem. That is not good for powersaving in most of scenarios.


-- 
Thanks Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: linux-next: manual merge of the net-next tree with Linus' tree

2013-03-26 Thread David Miller

From: Stephen Rothwell 
Date: Wed, 27 Mar 2013 11:57:43 +1100

> Today's linux-next merge of the net-next tree got a conflict in
> include/net/ipip.h between commit 330305cc4a6b ("pv4: Fix ip-header
> identification for gso packets") from Linus' tree and commit c54419321455
> ("GRE: Refactor GRE tunneling code") from the net-next tree.
> 
> I just dropped the file (as the latter change did) and can carry the fix
> as necessary (no action is required).

Thanks, I'll take care of this when I next merge.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v3 3/6] sched: pack small tasks

2013-03-26 Thread Alex Shi

On 03/27/2013 12:33 PM, Preeti U Murthy wrote:
> Hi Peter,
> 
> On 03/26/2013 06:07 PM, Peter Zijlstra wrote:
>> On Fri, 2013-03-22 at 13:25 +0100, Vincent Guittot wrote:
>>> +static bool is_light_task(struct task_struct *p)
>>> +{
>>> +   /* A light task runs less than 20% in average */
>>> +   return ((p->se.avg.runnable_avg_sum  * 5) <
>>> +   (p->se.avg.runnable_avg_period));
>>> +}
>>
>> OK, so we have a 'problem' here, we initialize runnable_avg_* to 0, but
>> we want to 'assume' a fresh task is fully 'loaded'. IIRC Alex ran into
>> this as well.
>>
>> PJT, do you have any sane solution for this, I forgot what the result
>> of the last discussion was -- was there any?
> 
> The conclusion after last discussion between PJT and Alex was that the
> load contribution of a fresh task be set to "full" during "__sched_fork()".
> 
> task->se.avg.load_avg_contrib = task->se.load.weight during
> __sched_fork() is reflected in the latest power aware scheduler patchset
> by Alex.

Yes, the new forked runnable load was set as full utilisation in V5
power aware scheduling. PJT, Mike and I both agree on this. PJT just
discussion how to give the full load to new forked task. and we get
agreement in my coming V6 power aware scheduling patchset.

> 
> Thanks
> 
> Regards
> Preeti U Murthy
>>
> 


-- 
Thanks Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

linux-next: build warnings after merge of the regmap tree

Hi Mark,

After merging the regmap tree, today's linux-next build (x86_64
allmodconfig) produced these warnings:

In file included from drivers/base/regmap/regmap.c:24:0:
drivers/base/regmap/internal.h: In function 'regcache_get_val_addr':
drivers/base/regmap/internal.h:196:2: warning: return discards 'const' 
qualifier from pointer target type [enabled by default]
In file included from drivers/base/regmap/regcache.c:20:0:
drivers/base/regmap/internal.h: In function 'regcache_get_val_addr':
drivers/base/regmap/internal.h:196:2: warning: return discards 'const' 
qualifier from pointer target type [enabled by default]
In file included from drivers/base/regmap/regcache-rbtree.c:19:0:
drivers/base/regmap/internal.h: In function 'regcache_get_val_addr':
drivers/base/regmap/internal.h:196:2: warning: return discards 'const' 
qualifier from pointer target type [enabled by default]
In file included from drivers/base/regmap/regcache-lzo.c:17:0:
drivers/base/regmap/internal.h: In function 'regcache_get_val_addr':
drivers/base/regmap/internal.h:196:2: warning: return discards 'const' 
qualifier from pointer target type [enabled by default]
In file included from drivers/base/regmap/regcache-flat.c:17:0:
drivers/base/regmap/internal.h: In function 'regcache_get_val_addr':
drivers/base/regmap/internal.h:196:2: warning: return discards 'const' 
qualifier from pointer target type [enabled by default]
In file included from drivers/base/regmap/regmap-debugfs.c:19:0:
drivers/base/regmap/internal.h: In function 'regcache_get_val_addr':
drivers/base/regmap/internal.h:196:2: warning: return discards 'const' 
qualifier from pointer target type [enabled by default]
In file included from drivers/base/regmap/regmap-spi.c:18:0:
drivers/base/regmap/internal.h: In function 'regcache_get_val_addr':
drivers/base/regmap/internal.h:196:2: warning: return discards 'const' 
qualifier from pointer target type [enabled by default]
In file included from drivers/base/regmap/regmap-irq.c:22:0:
drivers/base/regmap/internal.h: In function 'regcache_get_val_addr':
drivers/base/regmap/internal.h:196:2: warning: return discards 'const' 
qualifier from pointer target type [enabled by default]

Introduced by commit 45c6a3e449ea ("regmap: cache: Provide a get address
of value operation").

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpKyIbYU6VW0.pgp
Description: PGP signature

linux-next: build failure after merge of the akpm tree

Hi Andrew,

After merging the akpm tree, today's linux-next build (powerpc
ppc64_defconfig) failed like this:

arch/powerpc/mm/numa.c: In function 'setup_node_to_cpumask_map':
arch/powerpc/mm/numa.c:72:2: error: expected ';' before 'for'
arch/powerpc/mm/numa.c:65:15: error: unused variable 'node' 
[-Werror=unused-variable]

Caused by commit de9a59a3ebd4 ("powerpc/mm/numa: use setup_nr_node_ids()
instead of opencoding").

I added this fix patch:

From: Stephen Rothwell 
Date: Wed, 27 Mar 2013 15:39:56 +1100
Subject: [PATCH] powerpc/mm/numa: use setup_nr_node_ids() fix

Signed-off-by: Stephen Rothwell 
---
 arch/powerpc/mm/numa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 7574ae3..b8020dc 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -66,7 +66,7 @@ static void __init setup_node_to_cpumask_map(void)
 
/* setup nr_node_ids if not done yet */
if (nr_node_ids == MAX_NUMNODES)
-   setup_nr_node_ids()
+   setup_nr_node_ids();
 
/* allocate the map */
for (node = 0; node < nr_node_ids; node++)
-- 
1.8.1

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpudGyrUdn6H.pgp
Description: PGP signature

Re: [PATCH 64/86] IP_GRE: Fix kernel panic in IP_GRE with GRE csum.

2013-03-26 Thread Steven Rostedt

On Wed, 2013-03-27 at 04:28 +, Ben Hutchings wrote:
> On Tue, 2013-03-26 at 13:22 -0400, Steven Rostedt wrote:
> 
> > 3.6.11.1 stable review patch.
> > If anyone has any objections, please let me know.
> > 
> > --
> > 
> > From: Pravin B Shelar 
> > 
> > [ Upstream commit d0a7cc630a337b0f56dc145e7eb6232852b14dd4 ]
> > 
> > Due to IP_GRE GSO support, GRE can recieve non linear skb which
> > results in panic in case of GRE_CSUM.  Following patch fixes it by
> > using correct csum API.
> > 
> > Bug introduced in commit 6b78f16e4bdde3936b (gre: add GSO support)
> 
> That commit went into 3.7, so I don't think this fix is needed for 3.6.

Ah, you're saying that because 6b78f16e4bdde3936b wasn't in 3.6, this
isn't needed for 3.6 either.

I created scripts to find the commits that made it into 3.7 and 3.8 that
were not added to 3.6 and just cherry-picked them. Unless they failed
because of a conflict, I really didn't look hard at them.

Thanks, I guess I can remove this change then.

-- Steve

> 
> Ben.
> 
> > Signed-off-by: Pravin B Shelar 
> > Acked-by: Eric Dumazet 
> > Signed-off-by: David S. Miller 
> > ---
> >  net/ipv4/ip_gre.c |6 +-
> >  1 file changed, 5 insertions(+), 1 deletion(-)
> > 
> > diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
> > index b062a98..3ee08ce 100644
> > --- a/net/ipv4/ip_gre.c
> > +++ b/net/ipv4/ip_gre.c
> > @@ -946,8 +946,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff 
> > *skb, struct net_device *dev
> > ptr--;
> > }
> > if (tunnel->parms.o_flags&GRE_CSUM) {
> > +   int offset = skb_transport_offset(skb);
> > +
> > *ptr = 0;
> > -   *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), 
> > skb->len - sizeof(struct iphdr));
> > +   *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
> > +skb->len - 
> > offset,
> > +0));
> > }
> > }
> >  
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v3 3/6] sched: pack small tasks

2013-03-26 Thread Preeti U Murthy

Hi Peter,

On 03/26/2013 06:07 PM, Peter Zijlstra wrote:
> On Fri, 2013-03-22 at 13:25 +0100, Vincent Guittot wrote:
>> +static bool is_light_task(struct task_struct *p)
>> +{
>> +   /* A light task runs less than 20% in average */
>> +   return ((p->se.avg.runnable_avg_sum  * 5) <
>> +   (p->se.avg.runnable_avg_period));
>> +}
> 
> OK, so we have a 'problem' here, we initialize runnable_avg_* to 0, but
> we want to 'assume' a fresh task is fully 'loaded'. IIRC Alex ran into
> this as well.
> 
> PJT, do you have any sane solution for this, I forgot what the result
> of the last discussion was -- was there any?

The conclusion after last discussion between PJT and Alex was that the
load contribution of a fresh task be set to "full" during "__sched_fork()".

task->se.avg.load_avg_contrib = task->se.load.weight during
__sched_fork() is reflected in the latest power aware scheduler patchset
by Alex.

Thanks

Regards
Preeti U Murthy
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH V3 2/4] cpufreq: governor: Implement per policy instances of governors

2013-03-26 Thread Viresh Kumar

On 27 March 2013 01:18, Jacob Shin  wrote:
> On Wed, Mar 27, 2013 at 01:02:15AM +0530, Viresh Kumar wrote:
>> +struct dbs_data *gdbs_data;
>> +
>
> Hmm .. I don't think this works for both ondemand and conservative
> governors running at the same time .

Yes, this should fix it (untested for now, i will provide a complete fix
today):

diff --git a/drivers/cpufreq/cpufreq_governor.c
b/drivers/cpufreq/cpufreq_governor.c
index f29feb4..54ca5fc 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -29,9 +29,6 @@

 #include "cpufreq_governor.h"

-/* Common data for platforms that don't need governor instance per policy */
-struct dbs_data *gdbs_data;
-
 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
u64 idle_time;
@@ -233,7 +230,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
if (have_multiple_policies())
dbs_data = policy->governor_data;
else
-   dbs_data = gdbs_data;
+   dbs_data = cdata->gdbs_data;

WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT));

@@ -289,7 +286,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
}

if (!have_multiple_policies())
-   gdbs_data = dbs_data;
+   cdata->gdbs_data = dbs_data;

return 0;
case CPUFREQ_GOV_POLICY_EXIT:
@@ -307,7 +304,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,

cdata->exit(dbs_data);
kfree(dbs_data);
-   gdbs_data = NULL;
+   cdata->gdbs_data = NULL;
}

policy->governor_data = NULL;
diff --git a/drivers/cpufreq/cpufreq_governor.h
b/drivers/cpufreq/cpufreq_governor.h
index 1f7de13..cc4a189 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -133,6 +133,9 @@ struct common_dbs_data {
int governor;
struct attribute_group *attr_group;

+   /* Common data for platforms that don't set have_multiple_policies */
+   struct dbs_data *gdbs_data;
+
struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu);
void *(*get_cpu_dbs_info_s)(int cpu);
void (*gov_dbs_timer)(struct work_struct *work);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 64/86] IP_GRE: Fix kernel panic in IP_GRE with GRE csum.

On Tue, 2013-03-26 at 13:22 -0400, Steven Rostedt wrote:

> 3.6.11.1 stable review patch.
> If anyone has any objections, please let me know.
> 
> --
> 
> From: Pravin B Shelar 
> 
> [ Upstream commit d0a7cc630a337b0f56dc145e7eb6232852b14dd4 ]
> 
> Due to IP_GRE GSO support, GRE can recieve non linear skb which
> results in panic in case of GRE_CSUM.  Following patch fixes it by
> using correct csum API.
> 
> Bug introduced in commit 6b78f16e4bdde3936b (gre: add GSO support)

That commit went into 3.7, so I don't think this fix is needed for 3.6.

Ben.

> Signed-off-by: Pravin B Shelar 
> Acked-by: Eric Dumazet 
> Signed-off-by: David S. Miller 
> ---
>  net/ipv4/ip_gre.c |6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
> index b062a98..3ee08ce 100644
> --- a/net/ipv4/ip_gre.c
> +++ b/net/ipv4/ip_gre.c
> @@ -946,8 +946,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff 
> *skb, struct net_device *dev
>   ptr--;
>   }
>   if (tunnel->parms.o_flags&GRE_CSUM) {
> + int offset = skb_transport_offset(skb);
> +
>   *ptr = 0;
> - *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), 
> skb->len - sizeof(struct iphdr));
> + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
> +  skb->len - 
> offset,
> +  0));
>   }
>   }
>  

-- 
Ben Hutchings
I'm not a reverse psychological virus.  Please don't copy me into your sig.


signature.asc
Description: This is a digitally signed message part

Re: linux-next: build failure after merge of the usb tree

2013-03-26 Thread Greg KH

On Wed, Mar 27, 2013 at 11:51:41AM +0800, Ming Lei wrote:
> Hi,
> 
> On Wed, Mar 27, 2013 at 11:41 AM, Stephen Rothwell  
> wrote:
> >
> >
> > I have reverted the commit from usb.current on the assumption that the
> > problem it solves has been, or will be, solved some other way.
> 
> The commit is needed in 3.9 linus tree and stable tree,  but for next
> tree or 3.10,
> a revert commit is required.
> 
> We know it is a problem, sorry, :-(

Yeah, sorry about that, I'll fix this up tomorrow when I merge the trees
together and resolve the conflict.

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: PROBLEM: All CPUs in soft lockup

2013-03-26 Thread li guang

seems tasks are hogging your cpu/memory resource,
did you check status your servicing processes?

在 2013-03-27三的 12:55 +1100，Robert Norris写道：
> In the last two weeks we've had three servers (identical hardware,
> software and load) hang. The details in this report are from one that
> hung last night.
> 
> They're all IMAP servers servicing many hundreds of users, so several
> thousand processes and active connections. There's been two major
> application level changes in the last couple of weeks, corresponding to
> the time where these hangs started. One is that we now do mail event
> notifications directly to user clients, so more TCP connections. The
> other is that we're now maintaining live search indexes, so a lot more
> disk and tmpfs IO.
> 
> All that said, we're not under what we'd consider to be heavy load. When
> they're running, the servers are fast and responsive.
> 
> During the hang itself, the machine responds to pings, and TCP
> connections can be established, but the servicing processes never
> respond. The console shows a new "BUG: soft lockup" line every few
> seconds, and will not respond to keyboard input. It is a virtual console
> though, which may or may not make a difference, I'm not sure.
> 
> The kernel is 3.4.33 with AUFS patches applied. However there are no
> AUFS mounts on this machine; we use this elsewhere. If you think that's
> a problem I can rebuild for this machine without it.
> 
> Attached are various bits of information requested in REPORTING-BUGS.
> I'm not entirely sure what else is relevant. I'm happy to supply any
> other information and test things, just let me know.
> 
> Thanks,
> Rob.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: linux-next: build failure after merge of the usb tree

2013-03-26 Thread Ming Lei

Hi,

On Wed, Mar 27, 2013 at 11:41 AM, Stephen Rothwell  
wrote:
>
>
> I have reverted the commit from usb.current on the assumption that the
> problem it solves has been, or will be, solved some other way.

The commit is needed in 3.9 linus tree and stable tree,  but for next
tree or 3.10,
a revert commit is required.

We know it is a problem, sorry, :-(


Thanks,
--
Ming Lei
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] watchdog: Add Broadcom BCM2835 watchdog timer driver

On 03/26/2013 11:50 AM, Lubomir Rintel wrote:
> This adds a driver for watchdog timer hardware present on Broadcom BCM2835 
> SoC,
> used in Raspberry Pi and Roku 2 devices.

Tested-by: Stephen Warren 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

linux-next: build failure after merge of the usb tree

Hi Greg,

After merging the usb tree, today's linux-next build (x86_64 allmodconfig)
failed like this:

drivers/usb/serial/usb-serial.c: In function 'usb_serial_probe':
drivers/usb/serial/usb-serial.c:887:3: error: 'struct usb_serial_port' has no 
member named 'delta_msr_wait'

Caused by commit eba0e3c3a0ba ("USB: serial: fix hang when opening port")
fro the usb.current tree interacting with commit 53ab34dc50ad ("USB:
serial: remove unused MSR-wait queue") from the usb tree.

I have reverted the commit from usb.current on the assumption that the
problem it solves has been, or will be, solved some other way.
-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpVSZRJdj9RH.pgp
Description: PGP signature

Re: [PATCH v3] watchdog: Add Broadcom BCM2835 watchdog timer driver

On 03/26/2013 11:50 AM, Lubomir Rintel wrote:
> This adds a driver for watchdog timer hardware present on Broadcom BCM2835 
> SoC,
> used in Raspberry Pi and Roku 2 devices.

Since this patch defines a new DT binding, you should send it to
devicetree-disc...@lists.ozlabs.org too.

> diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c

> +/*
> + * Watchdog driver for Broadcom BCM2835
> + *
> + * Interface to the Broadcom BCM2835 watchdog timer hardware is based on
> + * "bcm2708_wdog" driver written by Luke Diamand that was obtained from 
> branch
> + * "rpi-3.6.y" of git://github.com/raspberrypi/linux.git

I see that the patch isn't S-o-b Luke in the downstream kernel. However,
it is S-o-b Dom Cobley (popcornmix), and they both work for Broadcom, so
I think that's OK.

> +static int bcm2835_wdt_probe(struct platform_device *pdev)
> +{
> + struct device *dev = &pdev->dev;
> + struct device_node *np = dev->of_node;
> + struct bcm2835_wdt *wdt;
> + int err;
> +
> + wdt = devm_kzalloc(dev, sizeof(struct bcm2835_wdt), GFP_KERNEL);
> + if (!wdt) {
> + dev_err(dev, "Failed to allocate memory for watchdog device");
> + return -ENOMEM;
> + }
> +
> + spin_lock_init(&wdt->lock);
> +
> + wdt->base = of_iomap(np, 0);
> + if (!wdt->base) {
> + dev_err(dev, "Failed to remap watchdog regs");
> + return -ENODEV;
> + }
> +
> + platform_set_drvdata(pdev, wdt);
> + watchdog_set_drvdata(&bcm2835_wdt_wdd, wdt);

Do you really need both of those? I would have thought just one would
have been enough.

I'd be tempted to put the platform_set_drvdata() call right after the
devm_kzalloc() of wdt, but it's not a big deal either way.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] watchdog: Add Broadcom BCM2708 watchdog timer driver

On 03/24/2013 08:12 AM, Lubomir Rintel wrote:
> On Fri, 2013-03-22 at 20:24 -0600, Stephen Warren wrote:
> 
> Thank you for your response!
> 
>> On 03/22/2013 06:55 AM, Lubomir Rintel wrote:
>>> Signed-off-by: Lubomir Rintel 

>> A couple of general comments:
>>
>> 1)
>>
>> This driver touches the same registers that
>> arch/arm/mach-bcm2835/bcm2835.c uses to implement reboot and "power
>> off". Some co-ordination might be necessary.
>>
>> The implementation of bcm2835_power_off() could easily be moved into
>> this driver, to avoid some of the need for co-ordination.
>>
>> Moving bcm2835_restart() would be more tricky, since the ARM machine
>> descriptor needs a pointer to that function. I guess the kernel probably
>> ensures that none of the code in this watchdog driver is running by the
>> time bcm2835_restart() is called, although perhaps it'd be better to
>> have mach-bcm2835/bcm2835.c and this driver share a lock?
> 
> I need help here, I'm not sure what's the proper way to address this
> (whether to include the actual reboot code in the wdt driver or the
> platform driver).

I assume by "platform driver" you mean the code in
arch/arm/mach-bcm2835? The phrase "platform driver" usually refers to a
struct platform_driver, so that usage is a little unusual. I think you
would usually say "arch code" to refer to mach-bcm2835/, or something
like that!

> Is it okay to have the platform driver depend on watchdog timer?
> Is it okay for the platform driver not to reboot properly if the kernel
> is running without the wdt driver loaded?
> 
> (For now, I'll send a revised patch addressing the other issues so that
> it can be reviewed without addressing this yet.)

I guess what we should do here is merge the driver as you've posted it,
then later we can migrate any code from arch/arm/mach-bcm2835 into the
WDT driver.

IIRC, there certainly are some existing WDT drivers that implement the
reboot hook for their platforms, so it's probably OK to migrate that way
sometime, although indeed the issues you raise do deserve some thought.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

makedumpfile: benchmark on mmap() with /proc/vmcore on 2TB memory system

2013-03-26 Thread HATAYAMA Daisuke

Hello,

I finally did benchmark makedumpfile with mmap() on /proc/vmcore on
*2TB memory system*.

In summary, it tooks about 35 seconds to filter 2TB memory. This can be
compared to the two kernel-space filtering works:

- Cliff Wickman's 4 minutes on 8 TB memory system:
  http://lists.infradead.org/pipermail/kexec/2012-November/007177.html

- Jingbai Ma's 17.50 seconds on 1TB memory system:
  https://lkml.org/lkml/2013/3/7/275

= Machine spec

- System: PRIMEQUEST 1800E2
- CPU: Intel(R) Xeon(R) CPU E7- 8870  @ 2.40GHz (8 sockets, 10 cores, 2 threads)
  (*) only 1 lcpu is used in the 2nd kernel now.
- memory: 2TB
- kernel: 3.9-rc3 with the patch set in: https://lkml.org/lkml/2013/3/18/878
- kexec tools: v2.0.4
- makedumpfile
  - v1.5.2-map: git map branch
  - git://git.code.sf.net/p/makedumpfile/code
  - To use mmap, specify --map-size  option.

= Perofrmance of filtering processing

== How to measure

I measured performance of filtering processing by reading time
contained in makedumpfile's report message. For example:

$ makedumpfile --message-level 31 -p -d 31 /proc/vmcore vmcore-pd31
...
STEP [Checking for memory holes  ] : 0.163673 seconds
STEP [Excluding unnecessary pages] : 1.321702 seconds
STEP [Excluding free pages   ] : 0.489022 seconds
STEP [Copying data   ] : 26.221380 seconds

The message starting with "STEP [Excluding" corresponds to the message
of filtering processing.

- STEP [Excluding unnecessary pages] corresponds to the time for
  mem_map array logic.

- STEP [Excluding free pages ] corresponds to the time for free list
  logic.

The message is displayed multiple times in cyclic mode, exactly the
same number of cycles.

== Result

mmap

| map_size | unnecessay | unnecessary |  free list |
| [KB] | cyclic |  non-cyclic | non-cyclic |
|--++-+|
|4 |  66.212|   59.087|  75.165|
|8 |  51.594|   44.863|  75.657|
|   16 |  43.761|   36.338|  75.508|
|   32 |  39.235|   32.911|  76.061|
|   64 |  37.201|   30.201|  76.116|
|  128 |  35.901|   29.238|  76.261|
|  256 |  35.152|   28.506|  76.700|
|  512 |  34.711|   27.956|  77.660|
| 1024 |  34.432|   27.746|  79.319|
| 2048 |  34.361|   27.594|  84.331|
| 4096 |  34.236|   27.474|  91.517|
| 8192 |  34.173|   27.450| 105.648|
|16384 |  34.240|   27.448| 133.099|
|32768 |  34.291|   27.479| 184.488|

read

| unnecessary | unnecessary | free list  |
| cyclic  | non-cyclic  | non-cyclic |
|-+-+|
| 100.859588  | 93.881849   | 80.367015  |

== Discussion

- The best case shows the performance close to the ones in the
  kernel-space works by Cliff and Ma as mentioned first.

- The reason why times consumed for filtering unnecessary pages are
  different between cyclic mode nad non-cyclic mode is that the former
  does free pages filtering while the latter does not; in the latter,
  page filtering is done in free list logic.

= Performance degradation in cyclic mode

Next benchmark case is to measure how performance is changed in
cyclic-mode if the number of cycles is increased.

== How to measure

Similarly to the above, but in this benchmark I also added
--cyclic-buffer as parameter.

The command I executed was like:

  for buf_size in 4 8 16 ... 32768 ; do
time makedumpfile --cyclic-buffer ${buf_size} /proc/vmcore vmcore
rm -f ./vmcore
  done

I choosed buffers sizes as the number of cycles ranged from 1 to 8
because current existing huge system memory size is up to 16TB and if
crashkernel=512MB, the number of cycles would be at most 8.

== Result

mmap

| buf size | nr cycles |  1 |  2 |  3 | 4 | 5 | 6 | 7   
  | 8 |  total |
| [KB] |   ||||   |   |   | 
  |   ||
|--+---++++---+---+---+---+---+|
| 8747 | 8 |  4.695 |  4.470 |  4.582 | 4.512 | 4.935 | 4.790 | 
4.824 | 2.345 | 35.153 |
| 9371 | 8 |  5.010 |  4.782 |  4.891 | 4.996 | 5.280 | 5.108 | 
4.986 | 0.007 | 35.059 |
|10092 | 7 |  5.371 |  5.145 |  5.001 | 5.316 | 5.500 | 5.405 | 
2.593 | - | 34.330 |
|10933 | 7 |  5.816 |  5.581 |  5.533 | 6.169 | 6.163 | 5.882 | 
0.007 | - | 35.152 |
|11927 | 6 |  6.308 |  6.078 |  6.174 | 6.734 | 6.667 | 3.049 | -   
  | - | 35.010 |
|13120 | 5 |  6.967 |  6.641 |  6.973 | 7.427 | 6.899 | - | -   
  | - | 34.907 |
|14578 | 5 |  7.678 |  7.536 |  7.948 | 8.161 | 3.845 | - | -   
  | - | 35.167 |
|16400 | 4 |  8.942 |  8.697 |  9.529 | 9.276 | - | - | -   
  | - | 36.445 |
|18743 | 4 |  9.822 |  9.718 | 10.452 | 5

Re: [RT LATENCY] 249 microsecond latency caused by slub's unfreeze_partials() code.

2013-03-26 Thread Steven Rostedt

On Wed, 2013-03-27 at 11:59 +0900, Joonsoo Kim wrote:

> How about using spin_try_lock() in unfreeze_partials() and
> using spin_lock_contented() in get_partial_node() to reduce latency?
> IMHO, this doesn't make code more deterministic, but can maintain
> a benefit of cpu partial page with tolerable latency.

And what do you do when you fail the try lock? Try again, or just break
out?

We can run benchmarks, but I don't like playing games in -rt. It either
is deterministic, or it isn't.

-- Steve


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/5] clk: allow reentrant calls into the clk framework

2013-03-26 Thread Bill Huang

On Thu, 2013-02-28 at 12:49 +0800, Mike Turquette wrote:
> Reentrancy into the clock framework from the clk.h api is highly
> desirable.  This feature is necessary for clocks that are prepared and
> unprepared via i2c_transfer (which includes many PMICs and discrete
> audio chips) and it is also necessary for performing dynamic voltage &
> frequency scaling via clock rate-change notifiers.
> 
> This patch implements reentrancy by adding a global atomic_t which
> tracks the context of the current caller.  Context in this case is the
> return value from get_current().  The clk.h api implementations are
> modified to first see if the relevant global lock is already held and if
> so compare the global context (set by whoever is holding the lock)
> against their own context (via a call to get_current()).  If the two
> match then this function is a nested call from the one already holding
> the lock and we procede.  If the context does not match then procede to
> call mutex_lock and busy-wait for the existing task to complete.
> 
> Thus this patch set does not increase concurrency for unrelated calls
> into the clock framework.  Instead it simply allows reentrancy by the
> single task which is currently holding the global clock framework lock.
> 
> Thanks to Rajagoapl Venkat for the original idea to use get_current()
> and to David Brown for the suggestion to replace my previous rwlock
> scheme with atomic operations during code review at ELC 2013.
> 
> Signed-off-by: Mike Turquette 
> Cc: Rajagopal Venkat 
> Cc: David Brown 
> ---
Hi Mike,

Will this single patch be accepted? I guess you might not merge the
whole series but I think this one is useful, is it possible that you can
send out this single patch (or just merge this one) as an improvement of
CCF? Or you think otherwise?

Thanks,
Bill


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] cpufreq: cpu0: Fix mistake in Documentation example

2013-03-26 Thread Viresh Kumar

On 27 March 2013 04:20, Rafael J. Wysocki  wrote:
> On Tuesday, March 26, 2013 08:10:46 PM Viresh Kumar wrote:
>> "clock-latency" is incorrectly written as "transition-latency" in an example
>> present in Documentation of cpufreq-cpu0. Fix it.
>>
>> Signed-off-by: Viresh Kumar 
>
> Am I supposed to take this?

Yes.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] watchdog: Add Broadcom BCM2708 watchdog timer driver

On 03/24/2013 08:12 AM, Lubomir Rintel wrote:
> On Fri, 2013-03-22 at 20:24 -0600, Stephen Warren wrote:
> 
> Thank you for your response!
> 
>> On 03/22/2013 06:55 AM, Lubomir Rintel wrote:
>>> Signed-off-by: Lubomir Rintel 

>> I'm curious where you got the documentation to write this driver; this
>> HW module isn't described in BCM2835-ARM-Peripherals.pdf. I assume this
>> is based on the downstream kernel driver? If so, at least some credit in
>> the commit description might be appropriate. At least the relevant
>> commit downstream already has an appropriate Signed-off-by line:-)
> 
> Your guess is right, used bcm2708_wdog driver from rpi-3.6.y as a reference. 
> I'll add that information to the commit message.
> 
> The Signed-off-by line is indeed present, but unfortunately does not seem to 
> be 
> particularly appropriate:
> 
> Signed-off-by: popcornmix 

That s-o-b line maps to Dom Cobley. In a previous message on the
linux-rpi-kernel mailing list, he gave his permission to re-write the
name part of that to "Dom Cobley". That would make the s-o-b useful.

http://lists.infradead.org/pipermail/linux-rpi-kernel/2012-September/000154.html

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: ptracing a task from core_pattern pipe

2013-03-26 Thread Daniel Walker

On Mon, Mar 25, 2013 at 10:48:07AM +0100, Denys Vlasenko wrote:
> On 03/19/2013 09:19 PM, Oleg Nesterov wrote:
> >> The above is regarding the situation which I'm running my corepipe_app ,
> >> i.e. my system doesn't have a disk to save a core file for parsing.
> > 
> > Can't you process the data inplace? You do not need to save it to disk.
> 
> Daniel said:
> 
> >> I'm trying to get the "dumpers" registers and stack out when it fails.
> 
> Registers would be easy'ish to get from coredump:
> they are contained in note sections which are at the beginning
> of the coredump. You can implement necessary parsing without
> too much pain.
> 
> Getting at stack would be harder.

There exists /proc//mem and /proc//maps on these tasks. If
those don't work then that's a straight up defect..

> But by asking kernel to allow you to poke around dead task's
> address space with ptrace() calls you just shift difficulty away from you
> (today you need to implement in-memory ELF parsing) to kernel people
> (they will need to implement *and support* ptracing of coredumping
> tasks).
> 
> This is somewhat unfair, considering that coredumping code in kernel
> is already a source of many complications, and that kernel-side coding
> is harder than userspace.
> 
> I think you are lucky that ptrace attach even *works* on coredumping task.
> No documentation ever guaranteed such a thing.

There not much different from userspace between a task running, and one
dumping.. I think it should be the inverse, ptrace either needs to work
or the special case needs to be documented that it doesn't work.

However, I do agree that you can parse the core file..

Daniel
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/2] vfio-pci: Use byte granularity in config map

2013-03-26 Thread Alex Williamson

On Wed, 2013-03-27 at 10:35 +0800, Gavin Shan wrote:
> On Tue, Mar 26, 2013 at 12:17:03PM -0600, Alex Williamson wrote:
> >The config map previously used a byte per dword to map regions of
> >config space to capabilities.  Modulo a bug where we round the length
> >of capabilities down instead of up, this theoretically works well and
> >saves space so long as devices don't try to hide registers in the gaps
> >between capabilities.  Unfortunately they do exactly that so we need
> >byte granularity on our config space map.  Increase the allocation of
> >the config map and split accesses at capability region boundaries.
> >
> 
> Alex, one question that isn't related to the patch: With current 
> implementation,
> (pdev->cfg_size) bytes are used for capability bits. That's really waste of
> memory because the memory for specific capability will be reserved even though
> that capability can't be supported by the device. I'm not sure the following 
> scheme
> is workable in order to save more memory for that?
> 
> Organize the config space using RB-tree by following struct. The benefit 
> would be
> we won't reserve memory for those unsupported capabilities.
> 
> struct pci_cap_map {
> struct rb_node rb_node;
>   unsigned short start;   /* Start position */
>   unsigned short end; /* End position   */
>   unsigned short cap_id;  /* Capability ID  */
> };

I would definitely like to see some kind of data structure make this
more efficient.  The map obviously gives us direct indexing and if we
just look at legacy config space, it's often tightly packed.  256 bytes
vs (30 bytes * num_capability_regions) is nearly even on overall size.
The extended config space is where space efficiency falls apart.  We
don't actually have that many elements to track either, maybe up to a
dozen.  A sorted list would make the structure size half that of the
tree.  We could even use two lists to split legacy vs extended.
Interested in working on it?  Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] cpufreq: cpu0: Fix mistake in Documentation example

2013-03-26 Thread Shawn Guo

On Tue, Mar 26, 2013 at 08:10:46PM +0530, Viresh Kumar wrote:
> "clock-latency" is incorrectly written as "transition-latency" in an example
> present in Documentation of cpufreq-cpu0. Fix it.
> 
> Signed-off-by: Viresh Kumar 

Acked-by: Shawn Guo 

> ---
>  Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt 
> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> index 4416ccc..051f764 100644
> --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> @@ -32,7 +32,7 @@ cpus {
>   396000  95
>   198000  85
>   >;
> - transition-latency = <61036>; /* two CLK32 periods */
> + clock-latency = <61036>; /* two CLK32 periods */
>   };
>  
>   cpu@1 {
> -- 
> 1.7.12.rc2.18.g61b472e
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] hw_random: Add Broadcom BCM2835 RNG driver

On 03/24/2013 08:39 AM, Lubomir Rintel wrote:
> This adds a driver for random number generator present on Broadcom BCM2835 
> SoC,
> used in Raspberry Pi and Roku 2 devices.

This, coupled with the other two patches,
Tested-by: Stephen Warren 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [net-next 1/5] macvtap: set transport header before passing skb to lower device

2013-03-26 Thread Jason Wang

On 03/26/2013 11:06 PM, Eric Dumazet wrote:
> On Tue, 2013-03-26 at 14:19 +0800, Jason Wang wrote:
>> Set the transport header for 1) some drivers (e.g ixgbe) needs l4 header 2)
>> precise packet length estimation (introduced in 1def9238) needs l4 header to
>> compute header length.
>>
>> For the packets with partial checksum, the patch just set the transport 
>> header
>> to csum_start. Otherwise tries to use skb_flow_dissect() to get l4 offset, 
>> if it
>> fails, just pretend no l4 header.
>>
>> Cc: Eric Dumazet 
>> Signed-off-by: Jason Wang 
>> ---
>>  drivers/net/macvtap.c |9 +
>>  1 files changed, 9 insertions(+), 0 deletions(-)
>>
>> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
>> index a449439..acf6450 100644
>> --- a/drivers/net/macvtap.c
>> +++ b/drivers/net/macvtap.c
>> @@ -21,6 +21,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  
>>  /*
>>   * A macvtap queue is the central object of this driver, it connects
>> @@ -645,6 +646,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, 
>> struct msghdr *m,
>>  int vnet_hdr_len = 0;
>>  int copylen = 0;
>>  bool zerocopy = false;
>> +struct flow_keys keys;
>>  
>>  if (q->flags & IFF_VNET_HDR) {
>>  vnet_hdr_len = q->vnet_hdr_sz;
>> @@ -725,6 +727,13 @@ static ssize_t macvtap_get_user(struct macvtap_queue 
>> *q, struct msghdr *m,
>>  goto err_kfree;
>>  }
>>  
>> +if (skb->ip_summed == CHECKSUM_PARTIAL)
> where ip_summed is set to this value ?
>
>> +skb_set_transport_header(skb, skb_checksum_start_offset(skb));
>> +else if (skb_flow_dissect(skb, &keys))
>> +skb_set_transport_header(skb, keys.thoff);
>> +else
>> +skb_set_transport_header(skb, ETH_HLEN);
>> +
>>  rcu_read_lock_bh();
>>  vlan = rcu_dereference_bh(q->vlan);
>>  /* copy skb_ubuf_info for callback when skb has no error */
> This driver has nice helpers.

Yes, skb_set_partial_csum()
> You should add this code in a helper as well.

Ok
> Because its not clear at this point if csum_start/csum_offset/ip_summed
> are consistent.

Since David has applied the seires, will send patches on top.

Thanks
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] hw_random: Add Broadcom BCM2835 RNG driver

On 03/24/2013 08:39 AM, Lubomir Rintel wrote:
> This adds a driver for random number generator present on Broadcom BCM2835 
> SoC,
> used in Raspberry Pi and Roku 2 devices.

I think this looks OK from a quick glance aside from the authorship issue.

This email wasn't Cd'd to Herbert Xu, who is listed as a co-maintainer
of drivers/char/hw_random/, and looks to be more active in applying
hw_random patches than Matt. (Or, I can take this patch through the
bcm2835 ARM tree if it's ack'd by Matt or Herbert)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RT LATENCY] 249 microsecond latency caused by slub's unfreeze_partials() code.

2013-03-26 Thread Joonsoo Kim

On Mon, Mar 25, 2013 at 02:32:35PM -0400, Steven Rostedt wrote:
> On Mon, 2013-03-25 at 18:27 +, Christoph Lameter wrote:
> > On Mon, 25 Mar 2013, Steven Rostedt wrote:
> > 
> > > If this makes it more deterministic, and lower worse case latencies,
> > > then it's definitely worth the price.
> > 
> > Yes that would make it more deterministic. Maybe I should add an option
> > to be able to compile the allocator without cpu partial page support?
> 
> I agree that would be useful.

Hello, Steven and Christoph.

How about using spin_try_lock() in unfreeze_partials() and
using spin_lock_contented() in get_partial_node() to reduce latency?
IMHO, this doesn't make code more deterministic, but can maintain
a benefit of cpu partial page with tolerable latency.

Thanks.

> 
> -- Steve
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] bcm2835: Add Broadcom BCM2835 RNG to the device tree

On 03/24/2013 08:31 AM, Lubomir Rintel wrote:
> This adds a device tree binding for random number generator present on 
> Broadcom
> BCM2835 SoC, used in Raspberry Pi and Roku 2 devices.
> 
> Signed-off-by: Lubomir Rintel 
> Cc: Stephen Warren 
> Cc: linux-rpi-ker...@lists.infradead.org
> ---
> Changes for v2:
> - Split out from the driver changeset
> - Added documentation
> 
>  .../devicetree/bindings/rng/brcm,bcm2835.txt   |   13 +
>  arch/arm/boot/dts/bcm2835.dtsi |5 +
>  2 files changed, 18 insertions(+), 0 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/rng/brcm,bcm2835.txt

I think it's more typical to include the binding document with the
driver patch, so that one patch both defines and implements the binding.
A separate patch then instantiates the binding (i.e. adds entries to *.dts).

Patches that create new DT bindings should be posted to
devicetree-disc...@lists.ozlabs.org, although this one is so simple that
I doubt it will generate any discussion.

Patches that affect ARM files should be posted to
linux-arm-ker...@lists.infradead.org. I typically post there instead of
the main linux-kernel@ mailing list.

I assume you're going to repost this anyway once you've addressed the
authorship issue, so you'll have an opportunity to correct all this.

The actual content of this patch seems fine.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 2/9] mfd: Add the main bulk of core driver for SI476x code

From: Andrey Smirnov 

This patch adds main part(out of three) of the I2C driver for the
"core" of MFD device.

Acked-by: Hans Verkuil 
Signed-off-by: Andrey Smirnov 
---
 drivers/mfd/si476x-i2c.c |  886 ++
 1 file changed, 886 insertions(+)
 create mode 100644 drivers/mfd/si476x-i2c.c

diff --git a/drivers/mfd/si476x-i2c.c b/drivers/mfd/si476x-i2c.c
new file mode 100644
index 000..118c6b1
--- /dev/null
+++ b/drivers/mfd/si476x-i2c.c
@@ -0,0 +1,886 @@
+/*
+ * drivers/mfd/si476x-i2c.c -- Core device driver for si476x MFD
+ * device
+ *
+ * Copyright (C) 2012 Innovative Converged Devices(ICD)
+ * Copyright (C) 2013 Andrey Smirnov
+ *
+ * Author: Andrey Smirnov 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#define SI476X_MAX_IO_ERRORS   10
+#define SI476X_DRIVER_RDS_FIFO_DEPTH   128
+
+/**
+ * si476x_core_config_pinmux() - pin function configuration function
+ *
+ * @core: Core device structure
+ *
+ * Configure the functions of the pins of the radio chip.
+ *
+ * The function returns zero in case of succes or negative error code
+ * otherwise.
+ */
+static int si476x_core_config_pinmux(struct si476x_core *core)
+{
+   int err;
+   dev_dbg(&core->client->dev, "Configuring pinmux\n");
+   err = si476x_core_cmd_dig_audio_pin_cfg(core,
+   core->pinmux.dclk,
+   core->pinmux.dfs,
+   core->pinmux.dout,
+   core->pinmux.xout);
+   if (err < 0) {
+   dev_err(&core->client->dev,
+   "Failed to configure digital audio pins(err = %d)\n",
+   err);
+   return err;
+   }
+
+   err = si476x_core_cmd_zif_pin_cfg(core,
+ core->pinmux.iqclk,
+ core->pinmux.iqfs,
+ core->pinmux.iout,
+ core->pinmux.qout);
+   if (err < 0) {
+   dev_err(&core->client->dev,
+   "Failed to configure ZIF pins(err = %d)\n",
+   err);
+   return err;
+   }
+
+   err = si476x_core_cmd_ic_link_gpo_ctl_pin_cfg(core,
+ core->pinmux.icin,
+ core->pinmux.icip,
+ core->pinmux.icon,
+ core->pinmux.icop);
+   if (err < 0) {
+   dev_err(&core->client->dev,
+   "Failed to configure IC-Link/GPO pins(err = %d)\n",
+   err);
+   return err;
+   }
+
+   err = si476x_core_cmd_ana_audio_pin_cfg(core,
+   core->pinmux.lrout);
+   if (err < 0) {
+   dev_err(&core->client->dev,
+   "Failed to configure analog audio pins(err = %d)\n",
+   err);
+   return err;
+   }
+
+   err = si476x_core_cmd_intb_pin_cfg(core,
+  core->pinmux.intb,
+  core->pinmux.a1);
+   if (err < 0) {
+   dev_err(&core->client->dev,
+   "Failed to configure interrupt pins(err = %d)\n",
+   err);
+   return err;
+   }
+
+   return 0;
+}
+
+static inline void si476x_core_schedule_polling_work(struct si476x_core *core)
+{
+   schedule_delayed_work(&core->status_monitor,
+ usecs_to_jiffies(SI476X_STATUS_POLL_US));
+}
+
+/**
+ * si476x_core_start() - early chip startup function
+ * @core: Core device structure
+ * @soft: When set, this flag forces "soft" startup, where "soft"
+ * power down is the one done by sending appropriate command instead
+ * of using reset pin of the tuner
+ *
+ * Perform required startup sequence to correctly power
+ * up the chip and perform initial configuration. It does the
+ * following sequence of actions:
+ *   1. Claims and enables the power supplies VD and VIO1 required
+ *  for I2C interface of the chip operation.
+ *   2. Waits for 100us, pulls the reset line up, enables irq,
+ *  waits for an

[PATCH v8 4/9] mfd: Add header files and Kbuild plumbing for SI476x MFD core

From: Andrey Smirnov 

This patch adds all necessary header files and Kbuild plumbing for the
core driver for Silicon Laboratories Si476x series of AM/FM tuner
chips.

The driver as a whole is implemented as an MFD device and this patch
adds a core portion of it that provides all the necessary
functionality to the two other drivers that represent radio and audio
codec subsystems of the chip.

Acked-by: Hans Verkuil 
Acked-by: Sam Ravnborg 
Signed-off-by: Andrey Smirnov 
---
 drivers/mfd/Kconfig |   12 +
 drivers/mfd/Makefile|4 +
 include/linux/mfd/si476x-core.h |  525 +++
 3 files changed, 541 insertions(+)
 create mode 100644 include/linux/mfd/si476x-core.h

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 1c0abd4..3cd8f21 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -970,6 +970,18 @@ config MFD_WL1273_CORE
  driver connects the radio-wl1273 V4L2 module and the wl1273
  audio codec.
 
+config MFD_SI476X_CORE
+   tristate "Support for Silicon Laboratories 4761/64/68 AM/FM radio."
+   depends on I2C
+   select MFD_CORE
+   help
+ This is the core driver for the SI476x series of AM/FM
+ radio. This MFD driver connects the radio-si476x V4L2 module
+ and the si476x audio codec.
+
+ To compile this driver as a module, choose M here: the
+ module will be called si476x-core.
+
 config MFD_OMAP_USB_HOST
bool "Support OMAP USBHS core and TLL driver"
depends on USB_EHCI_HCD_OMAP || USB_OHCI_HCD_OMAP3
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 8b977f8..ca87ae8 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -131,6 +131,10 @@ obj-$(CONFIG_MFD_JZ4740_ADC)   += jz4740-adc.o
 obj-$(CONFIG_MFD_TPS6586X) += tps6586x.o
 obj-$(CONFIG_MFD_VX855)+= vx855.o
 obj-$(CONFIG_MFD_WL1273_CORE)  += wl1273-core.o
+
+si476x-core-y := si476x-cmd.o si476x-prop.o si476x-i2c.o
+obj-$(CONFIG_MFD_SI476X_CORE)  += si476x-core.o
+
 obj-$(CONFIG_MFD_CS5535)   += cs5535-mfd.o
 obj-$(CONFIG_MFD_OMAP_USB_HOST)+= omap-usb-host.o omap-usb-tll.o
 obj-$(CONFIG_MFD_PM8921_CORE)  += pm8921-core.o
diff --git a/include/linux/mfd/si476x-core.h b/include/linux/mfd/si476x-core.h
new file mode 100644
index 000..2136b26
--- /dev/null
+++ b/include/linux/mfd/si476x-core.h
@@ -0,0 +1,525 @@
+/*
+ * include/media/si476x-core.h -- Common definitions for si476x core
+ * device
+ *
+ * Copyright (C) 2012 Innovative Converged Devices(ICD)
+ *
+ * Author: Andrey Smirnov 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#ifndef SI476X_CORE_H
+#define SI476X_CORE_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+/* Command Timeouts */
+#define SI476X_DEFAULT_TIMEOUT 10
+#define SI476X_TIMEOUT_TUNE70
+#define SI476X_TIMEOUT_POWER_UP33
+#define SI476X_STATUS_POLL_US  0
+
+/*  si476x-i2c.c --- */
+
+enum si476x_freq_supported_chips {
+   SI476X_CHIP_SI4761 = 1,
+   SI476X_CHIP_SI4764,
+   SI476X_CHIP_SI4768,
+};
+
+enum si476x_mfd_cells {
+   SI476X_RADIO_CELL = 0,
+   SI476X_CODEC_CELL,
+   SI476X_MFD_CELLS,
+};
+
+/**
+ * enum si476x_power_state - possible power state of the si476x
+ * device.
+ *
+ * @SI476X_POWER_DOWN: In this state all regulators are turned off
+ * and the reset line is pulled low. The device is completely
+ * inactive.
+ * @SI476X_POWER_UP_FULL: In this state all the power regualtors are
+ * turned on, reset line pulled high, IRQ line is enabled(polling is
+ * active for polling use scenario) and device is turned on with
+ * POWER_UP command. The device is ready to be used.
+ * @SI476X_POWER_INCONSISTENT: This state indicates that previous
+ * power down was inconsistent, meaning some of the regulators were
+ * not turned down and thus use of the device, without power-cycling
+ * is impossible.
+ */
+enum si476x_power_state {
+   SI476X_POWER_DOWN   = 0,
+   SI476X_POWER_UP_FULL= 1,
+   SI476X_POWER_INCONSISTENT   = 2,
+};
+
+/**
+ * struct si476x_core - internal data structure representing the
+ * underlying "core" device which all the MFD cell-devices use.
+ *
+ * @client: Actual I2C client used to transfer commands to the chip.
+ * @chip_id: Last digit of the chip model(E.g. "1" for SI4761)
+ * @cells: MFD cell devices created by this driver.
+ * @cmd_lock: Mutex used to serialize all the reques

Re: [PATCH] hw_random: Add Broadcom BCM2835 RNG Driver

On 03/24/2013 08:37 AM, Lubomir Rintel wrote:
> On Fri, 2013-03-22 at 20:44 -0600, Stephen Warren wrote:
> 
> Thank you for your response!
> 
>> On 03/22/2013 06:55 AM, Lubomir Rintel wrote:
>>> Signed-off-by: Lubomir Rintel 
>>
>> A commit description would be useful.
>>
>>>  arch/arm/boot/dts/bcm2835.dtsi   |5 +
>>>  arch/arm/configs/bcm2835_defconfig   |3 +-
>>>  drivers/char/hw_random/Kconfig   |   12 +++
>>>  drivers/char/hw_random/Makefile  |1 +
>>>  drivers/char/hw_random/bcm2835-rng.c |  137 
>>> ++
>>
>> This should be split into 3 separate patches: (1) The driver itself, (2)
>> the change to bcm2835.dtsi, and (3) the change to bcm2835_defconfig.
>>
>> Since you're adding a new device to device tree for the first time, you
>> should write a binding document for it; most likely
>> Documentation/devicetree/bindings/rng/brcm,bcm2835.txt (or perhaps
>> /random/ rather than /rng/?)
> 
> Okay. I'm tempted to stick to "rng" instead of "random" as it seems more 
> consistent to me, but I don't have a strong reason to back it with. What 
> would 
> be a good reason for using "random"?

I figured that "random" might be more meaningful/understandable for some
people. It's not a big deal either way though.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 7/9] v4l2: Add documentation for the FM RX controls

Add appropriate documentation for all the newly added standard
controls.

Based on the patch by Manjunatha Halli [1]

[1] 
http://lists-archives.com/linux-kernel/27641303-media-update-docs-for-v4l2-fm-new-features.html

Acked-by: Hans Verkuil 
Signed-off-by: Andrey Smirnov 
---
 Documentation/DocBook/media/v4l/compat.xml |3 +
 Documentation/DocBook/media/v4l/controls.xml   |   72 
 .../DocBook/media/v4l/vidioc-g-ext-ctrls.xml   |9 +++
 3 files changed, 84 insertions(+)

diff --git a/Documentation/DocBook/media/v4l/compat.xml 
b/Documentation/DocBook/media/v4l/compat.xml
index 104a1a2..f418bc3 100644
--- a/Documentation/DocBook/media/v4l/compat.xml
+++ b/Documentation/DocBook/media/v4l/compat.xml
@@ -2310,6 +2310,9 @@ more information.

  Added FM Modulator (FM TX) Extended Control Class: 
V4L2_CTRL_CLASS_FM_TX and their Control IDs.

+
+ Added FM Receiver (FM RX) Extended Control Class: 
V4L2_CTRL_CLASS_FM_RX and their Control IDs.
+   

  Added Remote Controller chapter, describing the default Remote 
Controller mapping for media devices.

diff --git a/Documentation/DocBook/media/v4l/controls.xml 
b/Documentation/DocBook/media/v4l/controls.xml
index 1ad20cc..6aa647a 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -4687,4 +4687,76 @@ interface and may change in the future.
   
 
 
+
+
+  FM Receiver Control Reference
+
+  The FM Receiver (FM_RX) class includes controls for common 
features of
+  FM Reception capable devices.
+
+  
+  FM_RX Control IDs
+
+  
+
+
+
+
+
+
+
+  
+ID
+Type
+  Description
+  
+
+
+  
+  
+V4L2_CID_FM_RX_CLASS 
+class
+  The FM_RX class
+descriptor. Calling &VIDIOC-QUERYCTRL; for this control will return a
+description of this control class.
+  
+  
+V4L2_CID_RDS_RECEPTION 
+boolean
+  Enables/disables RDS
+ reception by the radio tuner
+  
+  
+   V4L2_CID_TUNE_DEEMPHASIS 
+   enum v4l2_deemphasis
+ 
+ Configures the 
de-emphasis value for reception.
+A de-emphasis filter is applied to the broadcast to accentuate the high audio 
frequencies.
+Depending on the region, a time constant of either 50 or 75 useconds is used. 
The enum v4l2_deemphasis
+defines possible values for de-emphasis. Here they are:
+   
+   
+ 
+   
+ 
V4L2_DEEMPHASIS_DISABLED 
+ No de-emphasis is applied.
+   
+   
+ 
V4L2_DEEMPHASIS_50_uS 
+ A de-emphasis of 50 uS is used.
+   
+   
+ 
V4L2_DEEMPHASIS_75_uS 
+ A de-emphasis of 75 uS is used.
+   
+ 
+   
+
+ 
+  
+
+  
+  
+
+  
 
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-ext-ctrls.xml 
b/Documentation/DocBook/media/v4l/vidioc-g-ext-ctrls.xml
index 4e16112..b3bb957 100644
--- a/Documentation/DocBook/media/v4l/vidioc-g-ext-ctrls.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-g-ext-ctrls.xml
@@ -319,6 +319,15 @@ These controls are described in .
  
+
+ 
+   V4L2_CTRL_CLASS_FM_RX
+   0xa1
+   The class containing FM Receiver (FM RX) controls.
+These controls are described in .
+ 
+

   
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 6/9] v4l2: Add standard controls for FM receivers

This commit introduces new class of standard controls
V4L2_CTRL_CLASS_FM_RX. This class is intended to all controls
pertaining to FM receiver chips. Also, two controls belonging to said
class are added as a part of this commit: V4L2_CID_TUNE_DEEMPHASIS and
V4L2_CID_RDS_RECEPTION.

This patch is based on the code found in the patch by Manjunatha Halli [1]

[1] 
http://lists-archives.com/linux-kernel/27641307-new-control-class-and-features-for-fm-rx.html

Acked-by: Hans Verkuil 
Signed-off-by: Andrey Smirnov 
---
 drivers/media/v4l2-core/v4l2-ctrls.c |   14 +++---
 include/uapi/linux/v4l2-controls.h   |   13 +
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c 
b/drivers/media/v4l2-core/v4l2-ctrls.c
index 6b28b58..8b89fb8 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -297,8 +297,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
"Text",
NULL
};
-   static const char * const tune_preemphasis[] = {
-   "No Preemphasis",
+   static const char * const tune_emphasis[] = {
+   "None",
"50 Microseconds",
"75 Microseconds",
NULL,
@@ -508,7 +508,9 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
case V4L2_CID_SCENE_MODE:
return scene_mode;
case V4L2_CID_TUNE_PREEMPHASIS:
-   return tune_preemphasis;
+   return tune_emphasis;
+   case V4L2_CID_TUNE_DEEMPHASIS:
+   return tune_emphasis;
case V4L2_CID_FLASH_LED_MODE:
return flash_led_mode;
case V4L2_CID_FLASH_STROBE_SOURCE:
@@ -799,6 +801,9 @@ const char *v4l2_ctrl_get_name(u32 id)
case V4L2_CID_DV_RX_POWER_PRESENT:  return "Power Present";
case V4L2_CID_DV_RX_RGB_RANGE:  return "Rx RGB Quantization 
Range";
 
+   case V4L2_CID_FM_RX_CLASS:  return "FM Radio Receiver 
Controls";
+   case V4L2_CID_TUNE_DEEMPHASIS:  return "De-Emphasis";
+   case V4L2_CID_RDS_RECEPTION:return "RDS Reception";
default:
return NULL;
}
@@ -846,6 +851,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum 
v4l2_ctrl_type *type,
case V4L2_CID_MPEG_VIDEO_MPEG4_QPEL:
case V4L2_CID_WIDE_DYNAMIC_RANGE:
case V4L2_CID_IMAGE_STABILIZATION:
+   case V4L2_CID_RDS_RECEPTION:
*type = V4L2_CTRL_TYPE_BOOLEAN;
*min = 0;
*max = *step = 1;
@@ -904,6 +910,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum 
v4l2_ctrl_type *type,
case V4L2_CID_DV_TX_RGB_RANGE:
case V4L2_CID_DV_RX_RGB_RANGE:
case V4L2_CID_TEST_PATTERN:
+   case V4L2_CID_TUNE_DEEMPHASIS:
*type = V4L2_CTRL_TYPE_MENU;
break;
case V4L2_CID_LINK_FREQ:
@@ -926,6 +933,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum 
v4l2_ctrl_type *type,
case V4L2_CID_IMAGE_SOURCE_CLASS:
case V4L2_CID_IMAGE_PROC_CLASS:
case V4L2_CID_DV_CLASS:
+   case V4L2_CID_FM_RX_CLASS:
*type = V4L2_CTRL_TYPE_CTRL_CLASS;
/* You can neither read not write these */
*flags |= V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY;
diff --git a/include/uapi/linux/v4l2-controls.h 
b/include/uapi/linux/v4l2-controls.h
index dcd6374..3e985be 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -59,6 +59,7 @@
 #define V4L2_CTRL_CLASS_IMAGE_SOURCE   0x009e  /* Image source 
controls */
 #define V4L2_CTRL_CLASS_IMAGE_PROC 0x009f  /* Image processing 
controls */
 #define V4L2_CTRL_CLASS_DV 0x00a0  /* Digital Video 
controls */
+#define V4L2_CTRL_CLASS_FM_RX  0x00a1  /* Digital Video 
controls */
 
 /* User-class control IDs */
 
@@ -825,4 +826,16 @@ enum v4l2_dv_rgb_range {
 #defineV4L2_CID_DV_RX_POWER_PRESENT(V4L2_CID_DV_CLASS_BASE 
+ 100)
 #define V4L2_CID_DV_RX_RGB_RANGE   (V4L2_CID_DV_CLASS_BASE + 101)
 
+#define V4L2_CID_FM_RX_CLASS_BASE  (V4L2_CTRL_CLASS_FM_RX | 0x900)
+#define V4L2_CID_FM_RX_CLASS   (V4L2_CTRL_CLASS_FM_RX | 1)
+
+#define V4L2_CID_TUNE_DEEMPHASIS   (V4L2_CID_FM_RX_CLASS_BASE + 1)
+enum v4l2_deemphasis {
+   V4L2_DEEMPHASIS_DISABLED= V4L2_PREEMPHASIS_DISABLED,
+   V4L2_DEEMPHASIS_50_uS   = V4L2_PREEMPHASIS_50_uS,
+   V4L2_DEEMPHASIS_75_uS   = V4L2_PREEMPHASIS_75_uS,
+};
+
+#define V4L2_CID_RDS_RECEPTION (V4L2_CID_FM_RX_CLASS_BASE + 2)
+
 #endif
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www

[PATCH v8 3/9] mfd: Add chip properties handling code for SI476X MFD

From: Andrey Smirnov 

This patch adds code related to manipulation of the properties of
SI476X chips.

Acked-by: Hans Verkuil 
Signed-off-by: Andrey Smirnov 
---
 drivers/mfd/si476x-prop.c |  242 +
 1 file changed, 242 insertions(+)
 create mode 100644 drivers/mfd/si476x-prop.c

diff --git a/drivers/mfd/si476x-prop.c b/drivers/mfd/si476x-prop.c
new file mode 100644
index 000..d1f548a
--- /dev/null
+++ b/drivers/mfd/si476x-prop.c
@@ -0,0 +1,242 @@
+/*
+ * drivers/mfd/si476x-prop.c -- Subroutines to access
+ * properties of si476x chips
+ *
+ * Copyright (C) 2012 Innovative Converged Devices(ICD)
+ * Copyright (C) 2013 Andrey Smirnov
+ *
+ * Author: Andrey Smirnov 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include 
+
+#include 
+#include 
+
+struct si476x_property_range {
+   u16 low, high;
+};
+
+static bool si476x_core_element_is_in_array(u16 element,
+   const u16 array[],
+   size_t size)
+{
+   int i;
+
+   for (i = 0; i < size; i++)
+   if (element == array[i])
+   return true;
+
+   return false;
+}
+
+static bool si476x_core_element_is_in_range(u16 element,
+   const struct si476x_property_range 
range[],
+   size_t size)
+{
+   int i;
+
+   for (i = 0; i < size; i++)
+   if (element <= range[i].high && element >= range[i].low)
+   return true;
+
+   return false;
+}
+
+static bool si476x_core_is_valid_property_a10(struct si476x_core *core,
+ u16 property)
+{
+   static const u16 valid_properties[] = {
+   0x,
+   0x0500, 0x0501,
+   0x0600,
+   0x0709, 0x070C, 0x070D, 0x70E, 0x710,
+   0x0718,
+   0x1207, 0x1208,
+   0x2007,
+   0x2300,
+   };
+
+   static const struct si476x_property_range valid_ranges[] = {
+   { 0x0200, 0x0203 },
+   { 0x0300, 0x0303 },
+   { 0x0400, 0x0404 },
+   { 0x0700, 0x0707 },
+   { 0x1100, 0x1102 },
+   { 0x1200, 0x1204 },
+   { 0x1300, 0x1306 },
+   { 0x2000, 0x2005 },
+   { 0x2100, 0x2104 },
+   { 0x2106, 0x2106 },
+   { 0x2200, 0x220E },
+   { 0x3100, 0x3104 },
+   { 0x3207, 0x320F },
+   { 0x3300, 0x3304 },
+   { 0x3500, 0x3517 },
+   { 0x3600, 0x3617 },
+   { 0x3700, 0x3717 },
+   { 0x4000, 0x4003 },
+   };
+
+   return  si476x_core_element_is_in_range(property, valid_ranges,
+   ARRAY_SIZE(valid_ranges)) ||
+   si476x_core_element_is_in_array(property, valid_properties,
+   ARRAY_SIZE(valid_properties));
+}
+
+static bool si476x_core_is_valid_property_a20(struct si476x_core *core,
+ u16 property)
+{
+   static const u16 valid_properties[] = {
+   0x071B,
+   0x1006,
+   0x2210,
+   0x3401,
+   };
+
+   static const struct si476x_property_range valid_ranges[] = {
+   { 0x2215, 0x2219 },
+   };
+
+   return  si476x_core_is_valid_property_a10(core, property) ||
+   si476x_core_element_is_in_range(property, valid_ranges,
+   ARRAY_SIZE(valid_ranges))  ||
+   si476x_core_element_is_in_array(property, valid_properties,
+   ARRAY_SIZE(valid_properties));
+}
+
+static bool si476x_core_is_valid_property_a30(struct si476x_core *core,
+ u16 property)
+{
+   static const u16 valid_properties[] = {
+   0x071C, 0x071D,
+   0x1007, 0x1008,
+   0x220F, 0x2214,
+   0x2301,
+   0x3105, 0x3106,
+   0x3402,
+   };
+
+   static const struct si476x_property_range valid_ranges[] = {
+   { 0x0405, 0x0411 },
+   { 0x2008, 0x200B },
+   { 0x2220, 0x2223 },
+   { 0x3100, 0x3106 },
+   };
+
+   return  si476x_core_is_valid_property_a20(core, property) ||
+   si476x_core_element_is_in

[PATCH v8 8/9] v4l2: Add private controls base for SI476X

Add a base to be used for allocation of all the SI476X specific
controls in the corresponding driver.

Acked-by: Hans Verkuil 
Signed-off-by: Andrey Smirnov 
---
 include/uapi/linux/v4l2-controls.h |4 
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/linux/v4l2-controls.h 
b/include/uapi/linux/v4l2-controls.h
index 3e985be..22e5170 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -147,6 +147,10 @@ enum v4l2_colorfx {
  * of controls. We reserve 16 controls for this driver. */
 #define V4L2_CID_USER_MEYE_BASE(V4L2_CID_USER_BASE + 
0x1000)
 
+/* The base for the si476x driver controls. See include/media/si476x.h for the 
list
+ * of controls. Total of 16 controls is reserved for that driver */
+#define V4L2_CID_USER_SI476X_BASE  (V4L2_CID_USER_BASE + 0x1010)
+
 /* MPEG-class control IDs */
 
 #define V4L2_CID_MPEG_BASE (V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 5/9] v4l2: Fix the type of V4L2_CID_TUNE_PREEMPHASIS in the documentation

Change the type of V4L2_CID_TUNE_PREEMPHASIS from 'integer' to 'enum
v4l2_preemphasis'

Acked-by: Hans Verkuil 
Signed-off-by: Andrey Smirnov 
---
 Documentation/DocBook/media/v4l/controls.xml |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/DocBook/media/v4l/controls.xml 
b/Documentation/DocBook/media/v4l/controls.xml
index 9e8f854..1ad20cc 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -3848,7 +3848,7 @@ in Hz. The range and step are driver-specific.
  
  
V4L2_CID_TUNE_PREEMPHASIS 
-   integer
+   enum v4l2_preemphasis
  
  Configures the 
pre-emphasis value for broadcasting.
 A pre-emphasis filter is applied to the broadcast to accentuate the high audio 
frequencies.
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 1/9] mfd: Add commands abstraction layer for SI476X MFD

From: Andrey Smirnov 

This patch adds all the functions used for exchanging commands with
the chip.

Acked-by: Hans Verkuil 
Signed-off-by: Andrey Smirnov 
---
 drivers/mfd/si476x-cmd.c | 1554 ++
 1 file changed, 1554 insertions(+)
 create mode 100644 drivers/mfd/si476x-cmd.c

diff --git a/drivers/mfd/si476x-cmd.c b/drivers/mfd/si476x-cmd.c
new file mode 100644
index 000..71ac2e8
--- /dev/null
+++ b/drivers/mfd/si476x-cmd.c
@@ -0,0 +1,1554 @@
+/*
+ * drivers/mfd/si476x-cmd.c -- Subroutines implementing command
+ * protocol of si476x series of chips
+ *
+ * Copyright (C) 2012 Innovative Converged Devices(ICD)
+ * Copyright (C) 2013 Andrey Smirnov
+ *
+ * Author: Andrey Smirnov 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#define msb(x)  ((u8)((u16) x >> 8))
+#define lsb(x)  ((u8)((u16) x &  0x00FF))
+
+
+
+#define CMD_POWER_UP   0x01
+#define CMD_POWER_UP_A10_NRESP 1
+#define CMD_POWER_UP_A10_NARGS 5
+
+#define CMD_POWER_UP_A20_NRESP 1
+#define CMD_POWER_UP_A20_NARGS 5
+
+#define POWER_UP_DELAY_MS  110
+
+#define CMD_POWER_DOWN 0x11
+#define CMD_POWER_DOWN_A10_NRESP   1
+
+#define CMD_POWER_DOWN_A20_NRESP   1
+#define CMD_POWER_DOWN_A20_NARGS   1
+
+#define CMD_FUNC_INFO  0x12
+#define CMD_FUNC_INFO_NRESP7
+
+#define CMD_SET_PROPERTY   0x13
+#define CMD_SET_PROPERTY_NARGS 5
+#define CMD_SET_PROPERTY_NRESP 1
+
+#define CMD_GET_PROPERTY   0x14
+#define CMD_GET_PROPERTY_NARGS 3
+#define CMD_GET_PROPERTY_NRESP 4
+
+#define CMD_AGC_STATUS 0x17
+#define CMD_AGC_STATUS_NRESP_A10   2
+#define CMD_AGC_STATUS_NRESP_A206
+
+#define PIN_CFG_BYTE(x) (0x7F & (x))
+#define CMD_DIG_AUDIO_PIN_CFG  0x18
+#define CMD_DIG_AUDIO_PIN_CFG_NARGS4
+#define CMD_DIG_AUDIO_PIN_CFG_NRESP5
+
+#define CMD_ZIF_PIN_CFG0x19
+#define CMD_ZIF_PIN_CFG_NARGS  4
+#define CMD_ZIF_PIN_CFG_NRESP  5
+
+#define CMD_IC_LINK_GPO_CTL_PIN_CFG0x1A
+#define CMD_IC_LINK_GPO_CTL_PIN_CFG_NARGS  4
+#define CMD_IC_LINK_GPO_CTL_PIN_CFG_NRESP  5
+
+#define CMD_ANA_AUDIO_PIN_CFG  0x1B
+#define CMD_ANA_AUDIO_PIN_CFG_NARGS1
+#define CMD_ANA_AUDIO_PIN_CFG_NRESP2
+
+#define CMD_INTB_PIN_CFG   0x1C
+#define CMD_INTB_PIN_CFG_NARGS 2
+#define CMD_INTB_PIN_CFG_A10_NRESP 6
+#define CMD_INTB_PIN_CFG_A20_NRESP 3
+
+#define CMD_FM_TUNE_FREQ   0x30
+#define CMD_FM_TUNE_FREQ_A10_NARGS 5
+#define CMD_FM_TUNE_FREQ_A20_NARGS 3
+#define CMD_FM_TUNE_FREQ_NRESP 1
+
+#define CMD_FM_RSQ_STATUS  0x32
+
+#define CMD_FM_RSQ_STATUS_A10_NARGS1
+#define CMD_FM_RSQ_STATUS_A10_NRESP17
+#define CMD_FM_RSQ_STATUS_A30_NARGS1
+#define CMD_FM_RSQ_STATUS_A30_NRESP23
+
+
+#define CMD_FM_SEEK_START  0x31
+#define CMD_FM_SEEK_START_NARGS1
+#define CMD_FM_SEEK_START_NRESP1
+
+#define CMD_FM_RDS_STATUS  0x36
+#define CMD_FM_RDS_STATUS_NARGS1
+#define CMD_FM_RDS_STATUS_NRESP16
+
+#define CMD_FM_RDS_BLOCKCOUNT  0x37
+#define CMD_FM_RDS_BLOCKCOUNT_NARGS1
+#define CMD_FM_RDS_BLOCKCOUNT_NRESP8
+
+#define CMD_FM_PHASE_DIVERSITY 0x38
+#define CMD_FM_PHASE_DIVERSITY_NARGS   1
+#define CMD_FM_PHASE_DIVERSITY_NRESP   1
+
+#define CMD_FM_PHASE_DIV_STATUS0x39
+#define CMD_FM_PHASE_DIV_STATUS_NRESP  2
+
+#define CMD_AM_TUNE_FREQ   0x40
+#define CMD_AM_TUNE_FREQ_NARGS 3
+#define CMD_AM_TUNE_FREQ_NRESP 1
+
+#define CMD_AM_RSQ_STATUS  0x42
+#define CMD_AM_RSQ_STATUS_NARGS1
+#define CMD_AM_RSQ_STATUS_NRESP13
+
+#define CMD_AM_SEEK_START

[PATCH v8 0/9] Driver for Si476x series of chips

Driver for Si476x series of chips

This is a eight version of the patchset originaly posted here:
https://lkml.org/lkml/2012/9/13/590

Second version of the patch was posted here:
https://lkml.org/lkml/2012/10/5/598

Third version of the patch was posted here:
https://lkml.org/lkml/2012/10/23/510

Fourth version of the patch was posted here:
https://lkml.org/lkml/2013/2/18/572

Fifth version of the patch was posted here:
https://lkml.org/lkml/2013/2/26/45

Sixth version of the patch was posted here:
https://lkml.org/lkml/2013/2/26/257

Seventh version of the patch was posted here:
https://lkml.org/lkml/2013/2/27/22


To save everyone's time I'll repost the original description of it:

This patchset contains a driver for a Silicon Laboratories 476x series
of radio tuners. The driver itself is implemented as an MFD devices
comprised of three parts: 
 1. Core device that provides all the other devices with basic
functionality and locking scheme.
 2. Radio device that translates between V4L2 subsystem requests into
Core device commands.
 3. Codec device that does similar to the earlier described task, but
for ALSA SoC subsystem.

v8 of this driver has following changes:
   - checkpatch.pl fixes

Pleas note that patches are not completely warning free, as far as
checkpatch.pl is concerned, because I skipped all the places where
80-character compliance can be acheived only by means of weird
indentation.

Andrey Smirnov (9):
  mfd: Add commands abstraction layer for SI476X MFD
  mfd: Add the main bulk of core driver for SI476x code
  mfd: Add chip properties handling code for SI476X MFD
  mfd: Add header files and Kbuild plumbing for SI476x MFD core
  v4l2: Fix the type of V4L2_CID_TUNE_PREEMPHASIS in the documentation
  v4l2: Add standard controls for FM receivers
  v4l2: Add documentation for the FM RX controls
  v4l2: Add private controls base for SI476X
  v4l2: Add a V4L2 driver for SI476X MFD

 Documentation/DocBook/media/v4l/compat.xml |3 +
 Documentation/DocBook/media/v4l/controls.xml   |   74 +-
 .../DocBook/media/v4l/vidioc-g-ext-ctrls.xml   |9 +
 Documentation/video4linux/si476x.txt   |  187 +++
 drivers/media/radio/Kconfig|   17 +
 drivers/media/radio/Makefile   |1 +
 drivers/media/radio/radio-si476x.c | 1599 
 drivers/media/v4l2-core/v4l2-ctrls.c   |   14 +-
 drivers/mfd/Kconfig|   12 +
 drivers/mfd/Makefile   |4 +
 drivers/mfd/si476x-cmd.c   | 1554 +++
 drivers/mfd/si476x-i2c.c   |  886 +++
 drivers/mfd/si476x-prop.c  |  242 +++
 include/linux/mfd/si476x-core.h|  525 +++
 include/media/si476x.h |  426 ++
 include/uapi/linux/v4l2-controls.h |   17 +
 16 files changed, 5566 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/video4linux/si476x.txt
 create mode 100644 drivers/media/radio/radio-si476x.c
 create mode 100644 drivers/mfd/si476x-cmd.c
 create mode 100644 drivers/mfd/si476x-i2c.c
 create mode 100644 drivers/mfd/si476x-prop.c
 create mode 100644 include/linux/mfd/si476x-core.h
 create mode 100644 include/media/si476x.h

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ 084/104] USB: serial: add modem-status-change wait queue

On Tue, 2013-03-26 at 16:36 +0100, Johan Hovold wrote:
> On Mon, Mar 25, 2013 at 2:06 AM, Ben Hutchings  wrote:
> > 3.2-stable review patch.  If anyone has any objections, please let me know.
> 
> This patch is incorrect as the wait-queue initialisation is missing. A
> fix has been posted to linux-usb:
> 
> http://marc.info/?l=linux-usb&m=136428758202815&w=2
> 
> and should show up in 3.9-rc5. This patch and the following
> use-after-free patches should not be applied without that fix.
[...]

OK, I've dropped these for now.

Ben.

-- 
Ben Hutchings
I'm not a reverse psychological virus.  Please don't copy me into your sig.


signature.asc
Description: This is a digitally signed message part

[RFC] mm: remove swapcache page early

2013-03-26 Thread Minchan Kim

Swap subsystem does lazy swap slot free with expecting the page
would be swapped out again so we can't avoid unnecessary write.

But the problem in in-memory swap is that it consumes memory space
until vm_swap_full(ie, used half of all of swap device) condition
meet. It could be bad if we use multiple swap device, small in-memory swap
and big storage swap or in-memory swap alone.

This patch changes vm_swap_full logic slightly so it could free
swap slot early if the backed device is really fast.
For it, I used SWP_SOLIDSTATE but It might be controversial.
So let's add Ccing Shaohua and Hugh.
If it's a problem for SSD, I'd like to create new type SWP_INMEMORY
or something for z* family.

Other problem is zram is block device so that it can set SWP_INMEMORY
or SWP_SOLIDSTATE easily(ie, actually, zram is already done) but
I have no idea to use it for frontswap.

Any idea?

Other optimize point is we remove it unconditionally when we
found it's exclusive when swap in happen.
It could help frontswap family, too.
What do you think about it?

Cc: Hugh Dickins 
Cc: Dan Magenheimer 
Cc: Seth Jennings 
Cc: Nitin Gupta 
Cc: Konrad Rzeszutek Wilk 
Cc: Shaohua Li 
Signed-off-by: Minchan Kim 
---
 include/linux/swap.h | 11 ---
 mm/memory.c  |  3 ++-
 mm/swapfile.c| 11 +++
 mm/vmscan.c  |  2 +-
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2818a12..1f4df66 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -359,9 +359,14 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
 extern atomic_long_t nr_swap_pages;
 extern long total_swap_pages;
 
-/* Swap 50% full? Release swapcache more aggressively.. */
-static inline bool vm_swap_full(void)
+/*
+ * Swap 50% full or fast backed device?
+ * Release swapcache more aggressively.
+ */
+static inline bool vm_swap_full(struct swap_info_struct *si)
 {
+   if (si->flags & SWP_SOLIDSTATE)
+   return true;
return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages;
 }
 
@@ -405,7 +410,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, 
swp_entry_t ent, bool swapout)
 #define get_nr_swap_pages()0L
 #define total_swap_pages   0L
 #define total_swapcache_pages()0UL
-#define vm_swap_full() 0
+#define vm_swap_full(si)   0
 
 #define si_swapinfo(val) \
do { (val)->freeswap = (val)->totalswap = 0; } while (0)
diff --git a/mm/memory.c b/mm/memory.c
index 705473a..1ca21a9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3084,7 +3084,8 @@ static int do_swap_page(struct mm_struct *mm, struct 
vm_area_struct *vma,
mem_cgroup_commit_charge_swapin(page, ptr);
 
swap_free(entry);
-   if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
+   if (likely(PageSwapCache(page)) && (vm_swap_full(page_swap_info(page))
+   || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)))
try_to_free_swap(page);
unlock_page(page);
if (page != swapcache) {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1bee6fa..f9cc701 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -293,7 +293,7 @@ checks:
scan_base = offset = si->lowest_bit;
 
/* reuse swap entry of cache-only swap if not busy. */
-   if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+   if (vm_swap_full(si) && si->swap_map[offset] == SWAP_HAS_CACHE) {
int swap_was_freed;
spin_unlock(&si->lock);
swap_was_freed = __try_to_reclaim_swap(si, offset);
@@ -382,7 +382,8 @@ scan:
spin_lock(&si->lock);
goto checks;
}
-   if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+   if (vm_swap_full(si) &&
+   si->swap_map[offset] == SWAP_HAS_CACHE) {
spin_lock(&si->lock);
goto checks;
}
@@ -397,7 +398,8 @@ scan:
spin_lock(&si->lock);
goto checks;
}
-   if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+   if (vm_swap_full(si) &&
+   si->swap_map[offset] == SWAP_HAS_CACHE) {
spin_lock(&si->lock);
goto checks;
}
@@ -763,7 +765,8 @@ int free_swap_and_cache(swp_entry_t entry)
 * Also recheck PageSwapCache now page is locked (above).
 */
if (PageSwapCache(page) && !PageWriteback(page) &&
-   (!page_mapped(page) || vm_swap_full())) {
+   (!page_mapped(page) ||
+ vm_swap_full(page_swap_info(page {
delete_from_

Re: RFC v2: Zynq Clock Controller

2013-03-26 Thread Mike Turquette

Quoting Sören Brinkmann (2013-03-25 17:03:24)
> On Mon, Mar 25, 2013 at 05:33:10PM -0600, Stephen Warren wrote:
> > On 03/25/2013 12:27 PM, Sören Brinkmann wrote:
> > > Hi Stephen,
> > > 
> > > On Mon, Mar 25, 2013 at 12:13:08PM -0600, Stephen Warren wrote:
> > >> On 03/20/2013 05:56 PM, Sören Brinkmann wrote:
> > >>> Hi,
> > >>>
> > >>> I spent some time working on this and incorporating feedback. Here's an 
> > >>> updated proposal for a clock controller for Zynq:
> > >>>
> > >>> Required properties:
> > >>>  - #clock-cells : Must be 1
> > >>>  - compatible : "xlnx,ps7-clkc"  (this may become 'xlnx,zynq-clkc' 
> > >>> terminology differs a bit between Xilinx internal and mainline)
> > >>>  - ps-clk-frequency : Frequency of the oscillator providing ps_clk in HZ
> > >>>  (usually 33 MHz oscillators are used for Zynq 
> > >>> platforms)
> > >>
> > >> This may have been mentioned before, but shouldn't the input clock be
> > >> represented as an actual clock in DT, and hence as an entry in this
> > >> node's clocks property? The crystal/... itself can be represented in DT
> > >> as a fixed-clock.
> > > Lars-Peter brought that up, too. Please refer to my answer to him.
> > > 
> > >>
> > >>>  - clock-output-names : List of strings used to name the clock outputs. 
> > >>> Shall be a list of the outputs given below.
> > >>
> > >> That shouldn't be required.
> > >
> > > When I want to support of_clk_get_parent_name() for my clocks, I think
> > > it is. And I'm inclined to not brake this functionality.
> > 
> > The solution here is to make clock parent names irrelevant.
> > 
> > Also note that device tree is supposed to describe HW. As such, this
> > kind of internal implementation detail of the Linux clock driver should
> > have basically zero effect on the DT binding definition.
> > 
> > >>> Optional properties:
> > >>>  - clocks : as described in the clock bindings
> > >>>  - clock-names : as described in the clock bindings
> > >>
> > >> I think clocks should be required, with at least the main crystal clock
> > >> input always present, but perhaps having some optional entries for the
> > >> (E)MIO feature you mention.
> > >
> > > This is why I have the xtal separate. This way these props are purely
> > > optional and the xtal frequency is obtained separately. It also makes it
> > > a little easier internally, because I don't have to cope with a variable
> > > name for the xtal this way.
> > > 
> > > Describing the xtal as fixed clock in DT means a mandatory entry for
> > > 'clocks' and 'clock-names' and a variable name for the xtal clock. I
> > > wanted to avoid this.
> > 
> > I don't see any benefit with some properties being purely optional.
> > Having optional entries in a property seems just fine to me.
> > 
> > The name of the crystal clock should be irrelevant; that issue simply
> > needs to be fixed. It's driving too much of this discussion, and it will
> > be irrelevant once it's fixed.
> > 
> > >>> Example:
> > >>> clkc: clkc {
> > >>> #clock-cells = <1>;
> > >>> compatible = "xlnx,ps7-clkc";
> > >>> ps-clk-frequency = <>;
> > >>> clock-output-names = "armpll", "ddrpll", "iopll", 
> > >>> "cpu_6or4x", "cpu_3or2x", "cpu_2x", "cpu_1x", "ddr2x", "ddr3x", "dci", 
> > >>> "lqspi", "smc", "pcap", "gem0", "gem1", "fclk0", "fclk1", "fclk2", 
> > >>> "fclk3", "can0", "can1", "sdio0", "sdio1", "uart0", "uart1", "spi0", 
> > >>> "spi1", "dma", "usb0_aper", "usb1_aper", "gem0_aper", "gem1_aper", 
> > >>> "sdio0_aper", "sdio1_aper", "spi0_aper", "spi1_aper", "can0_aper", 
> > >>> "can1_aper", "i2c0_aper", "i2c1_aper", "uart0_aper", "uart1_aper", 
> > >>> "gpio_aper", "lqspi_aper", "smc_aper", "swdt", "dbg_trc", "dbg_apb";  
> > >>> /* long list... explanation below */
> > >>> /* optional props */
> > >>> clocks = <&clkc 16>, <&clk_foo>;
> > >>> clock-names = "gem1_emio_clk", "can_mio_clk_23";
> > >>> };
> > >>>
> > >>> The downside of supporting this is, that I don't see a way around 
> > >>> explicitly listing the clock output names in the DT.
> > >>
> > >> (Please wrap your emails to ~74 characters or so)
> > > I changed my settings.
> > > 
> > >>
> > >> As Mike mentioned off-list, one can create a new clk registration API
> > >> that takes a struct clk* as parent rather than a char *clk_name.
> > >
> > > Then we also have to make sure clocks are probed in a specific order. To
> > > obtain a 'struct clk *' through clk_get() the requested clock has to be
> > > already been probed. Currently clock probing relies purely on data present
> > > in DT. This makes this proposal not that trivial, IMHO.
> > 
> > Simply use deferred probe.
> This would require major changes to the whole clock probing mechanism.

Which mechanism are you referring to?

> Currently, clocks can not defer probing. And in case of circular
> dependencies in the clock tree, it would rather requi

Re: [PATCH v6 5/5] hwmon: add ST-Ericsson ABX500 hwmon driver

2013-03-26 Thread Hongbo Zhang

On 26 March 2013 23:23, Guenter Roeck  wrote:
> On Tue, Mar 26, 2013 at 03:06:59PM +0800, Hongbo Zhang wrote:
>> Each of ST-Ericsson X500 chip set series consists of both ABX500 and DBX500
>> chips. This is ABX500 hwmon driver, where the abx500.c is a common layer for
>> all ABX500s, and the ab8500.c is specific for AB8500 chip. Under this 
>> designed
>> structure, other chip specific files can be added simply using the same 
>> common
>> layer abx500.c.
>>
>> Signed-off-by: Hongbo Zhang 
>
> Looks good except ...
>
>> diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c
>> index d9f1f25..05a1077 100644
>> --- a/drivers/power/ab8500_bmdata.c
>> +++ b/drivers/power/ab8500_bmdata.c
>> @@ -28,8 +28,10 @@ const struct abx500_res_to_temp 
>> ab8500_temp_tbl_a_thermistor[] = {
>>   {60, 13437},
>>   {65, 12500},
>>  };
>> +EXPORT_SYMBOL(ab8500_temp_tbl_a_thermistor);
>>
> Those exports should be in the previous patch where you start exporting the 
> symbols,
> not in this patch.
>
Sorry, This was really my carelessness, will update it.

> Thanks,
> Guenter
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] fuse: Consider the ->big_writes for allocing fuse_req when do writing

2013-03-26 Thread majianpeng

If fc->big_writes == 0, for write-operation it only do 4k in func 
fuse_fill_write_pages.
So when alloc fuse_req,we should consider this restrict in order to
do additional operations.
The additional operations are memset-opeartion in func fuse_requenst_alloc and 
alloc more space if nr_pages is larger than FUSE_REQ_MIN_PAGES.

Signed-off-by: Jianpeng Ma 
---
fs/fuse/file.c | 7 ++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 34b80ba..c584a31 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -912,8 +912,13 @@ static ssize_t fuse_perform_write(struct file *file,
do {
struct fuse_req *req;
ssize_t count;
- unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
+ unsigned nr_pages;

+ if (!fc->big_writes)
+ nr_pages = 1;
+ else
+ nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
+
req = fuse_get_req(fc, nr_pages);
if (IS_ERR(req)) {
err = PTR_ERR(req);
-- 
1.8.2.rc2.4.g7799588

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] memcg: Add memory.pressure_level events

2013-03-26 Thread Kamezawa Hiroyuki


(2013/03/22 16:13), Anton Vorontsov wrote:

With this patch userland applications that want to maintain the
interactivity/memory allocation cost can use the pressure level
notifications. The levels are defined like this:

The "low" level means that the system is reclaiming memory for new
allocations. Monitoring this reclaiming activity might be useful for
maintaining cache level. Upon notification, the program (typically
"Activity Manager") might analyze vmstat and act in advance (i.e.
prematurely shutdown unimportant services).

The "medium" level means that the system is experiencing medium memory
pressure, the system might be making swap, paging out active file caches,
etc. Upon this event applications may decide to further analyze
vmstat/zoneinfo/memcg or internal memory usage statistics and free any
resources that can be easily reconstructed or re-read from a disk.

The "critical" level means that the system is actively thrashing, it is
about to out of memory (OOM) or even the in-kernel OOM killer is on its
way to trigger. Applications should do whatever they can to help the
system. It might be too late to consult with vmstat or any other
statistics, so it's advisable to take an immediate action.

The events are propagated upward until the event is handled, i.e. the
events are not pass-through. Here is what this means: for example you have
three cgroups: A->B->C. Now you set up an event listener on cgroups A, B
and C, and suppose group C experiences some pressure. In this situation,
only group C will receive the notification, i.e. groups A and B will not
receive it. This is done to avoid excessive "broadcasting" of messages,
which disturbs the system and which is especially bad if we are low on
memory or thrashing. So, organize the cgroups wisely, or propagate the
events manually (or, ask us to implement the pass-through events,
explaining why would you need them.)

Performance wise, the memory pressure notifications feature itself is
lightweight and does not require much of bookkeeping, in contrast to the
rest of memcg features. Unfortunately, as of current memcg implementation,
pages accounting is an inseparable part and cannot be turned off. The good
news is that there are some efforts[1] to improve the situation; plus,
implementing the same, fully API-compatible[2] interface for
CONFIG_MEMCG=n case (e.g. embedded) is also a viable option, so it will
not require any changes on the userland side.

[1] http://permalink.gmane.org/gmane.linux.kernel.cgroups/6291
[2] http://lkml.org/lkml/2013/2/21/454

Signed-off-by: Anton Vorontsov 
Acked-by: Kirill A. Shutemov 
---

Hi all,

Here is a shiny new v3!

In v3:

- No changes in the code, just updated commit message to incorporate the
   answer to Minchan Kim's comment regarding applicability to embedded use
   cases in the light of memcg performance overhead, plus gave some
   references to Glauber Costa's memcg work.

- Rebased onto 3.9.0-rc3-next-20130321.

In v2:

- Addressed Glauber Costa's comments:
   o Use parent_mem_cgroup() instead of own parent function (also suggested
 by Kamezawa). This change also affected events distribution logic, so
 it became more like memory thresholds notifications, i.e. we deliver
 the event to the cgroup where the event originated, not to the parent
 cgroup; (This also addreses Kamezawa's remark regarding which cgroup
 receives which event.)
   o Register vmpressure cgroup file directly in memcontrol.c.

   - Addressed Greg Thelen's comments:
 o Fixed bool/int inconsistency in the code;
 o Fixed nr_scanned accounting;
 o Don't use cryptic 's', 'r' abbreviations; get rid of confusing
   'window' argument.

- Addressed Kamezawa Hiroyuki's comments:
   o Moved declarations from mm/internal.h into linux/vmpressue.h;
   o Removed Kconfig symbol. Vmpressure is pretty lightweight (especially
 comparing to the memcg accounting). If it ever causes any measurable
 performance effect, we want to fix it, not paper it over with a
 Kconfig option. :-)
   o Removed read operation on pressure_level cgroup file. In apps, we only
 use notifications, we don't need the content of the file, so let's
 keep things simple for now. Plus this resolves questions like what
 should we return there when the system is not reclaiming;
   o Reworded documentation;
   o Improved comments for vmpressure_prio().

Old changelogs/submissions:
   v2: http://lkml.org/lkml/2013/2/18/577
   v1: http://lkml.org/lkml/2013/2/10/140
   mempressure cgroup: http://lkml.org/lkml/2013/1/4/55

  Documentation/cgroups/memory.txt |  61 +-
  include/linux/vmpressure.h   |  47 
  mm/Makefile  |   2 +-
  mm/memcontrol.c  |  28 +
  mm/vmpressure.c  | 252 +++
  mm/vmscan.c  |   8 ++
  6 files changed, 396 insertions(+), 2 deletions(-)
  create mode 100644 include/linux/vmpressure.h
  cr

Re: [PATCH 4/4] f2fs: fix to give correct parent inode number for roll forward

2013-03-26 Thread Namjae Jeon

>   */
> -static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode
> *inode,
> +static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode
> *inode,
>   const unsigned char *name)
>  {
>   int i;
> @@ -108,7 +109,7 @@ static inline void set_cold_file(struct f2fs_sb_info
> *sbi, struct inode *inode,
>   int count = le32_to_cpu(sbi->raw_super->extension_count);
>   for (i = 0; i < count; i++) {
>   if (!is_multimedia_file(name, extlist[i])) {
> - F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT;
> + set_cold_file(inode);
>   break;
>   }
>   }
It is just my private opinion.
How about use this name "set_cold_file_from_list instead of set_cold_files ?

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH part2 0/4] Allow allocating pagetable on local node in movablemem_map.

2013-03-26 Thread Tang Chen


Hi Yinghai,

Would you please help to review this patch-set ?

And how do you think of the memblock flag idea ?

FYI, Liu Jiang has proposed a similar idea before.
https://lkml.org/lkml/2012/12/6/422

But we may have the following difference:
1) It is a flag, not a tag, which means a range may have several
   different attributes.
2) Mark node-lify-cycle data, and put it on local node, and free
   it when hot-removing.
3) Mark and reserve movable memory, as you did.

Thanks. :)

On 03/21/2013 05:21 PM, Tang Chen wrote:

Hi Yinghai, all,

This patch-set is based on Yinghai's tree:
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git 
for-x86-mm

For main line, we need to apply Yinghai's
"x86, ACPI, numa: Parse numa info early" patch-set first.
Please refer to:
v1: https://lkml.org/lkml/2013/3/7/642
v2: https://lkml.org/lkml/2013/3/10/47


In this part2 patch-set, we didi the following things:
1) Introduce a "bool hotpluggable" member into struct numa_memblk so that we are
able to know which memory ranges in numa_meminfo are hotpluggable.
All the related apis have been changed.
2) Introduce a new global variable "numa_meminfo_all" to store all the memory 
ranges
recorded in SRAT, because numa_cleanup_meminfo() will remove ranges higher 
than
max_pfn.
We need full numa memory info to limit zone_movable_pfn[].
3) Move movablemem_map sanitization after memory mapping is initialized so that
pagetable allocation will not be limited by movablemem_map.


On the other hand, we may have another way to solve this problem:

Not only pagetable and vmemmap pages, but also all the data whose life cycle is 
the
same as a node, could be put on local node.

1) Introduce a flag into memblock, such as "LOCAL_NODE_DATA", to mark out which
ranges have the same life cycle with node.
2) Only keep existing memory ranges in movablemem_map (no need to introduce
numa_meminfo_all), and exclude these LOCAL_NODE_DATA ranges.
3) When hot-removing, we are able to find out these ranges, and free them first.
This is very important.

Also, hot-add logic needs to be modified, too. As Yinghai mentioned before, I 
think
we can make memblock alive when memory is hot-added. And go with the same logic
as it is when booting.

How do you think?


Tang Chen (4):
   x86, mm, numa, acpi: Introduce numa_meminfo_all to store all the numa
 meminfo.
   x86, mm, numa, acpi: Introduce hotplug info into struct numa_meminfo.
   x86, mm, numa, acpi: Consider hotplug info when cleanup numa_meminfo.
   x86, mm, numa, acpi: Sanitize movablemem_map after memory mapping
 initialized.

  arch/x86/include/asm/numa.h |3 +-
  arch/x86/kernel/apic/numaq_32.c |2 +-
  arch/x86/mm/amdtopology.c   |3 +-
  arch/x86/mm/numa.c  |  161 +--
  arch/x86/mm/numa_internal.h |1 +
  arch/x86/mm/srat.c  |  141 +-
  6 files changed, 178 insertions(+), 133 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] Revert "mm: introduce VM_POPULATE flag to better deal with racy userspace programs"

2013-03-26 Thread Michel Lespinasse

This reverts commit 1869305009857cdeaabe6283bcdc2359c5784543.

VM_POPULATE only has any effect when userspace plays racy games with
vmas by trying to unmap and remap memory regions that mmap or mlock
are operating on.

Also, the only effect of VM_POPULATE when userspace plays such games
is that it avoids populating new memory regions that get remapped into
the address range that was being operated on by the original mmap or
mlock calls.

Let's remove VM_POPULATE as there isn't any strong argument to mandate
a new vm_flag.

Proposed-by: Hugh Dickins 
Signed-off-by: Michel Lespinasse 

---
 include/linux/mm.h   |  1 -
 include/linux/mman.h |  4 +---
 mm/fremap.c  | 12 ++--
 mm/mlock.c   | 11 +--
 mm/mmap.c|  4 +++-
 5 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7acc9dc73c9f..e19ff30ad0a2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -87,7 +87,6 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_PFNMAP  0x0400  /* Page-ranges managed without "struct 
page", just pure PFN */
 #define VM_DENYWRITE   0x0800  /* ETXTBSY on write attempts.. */
 
-#define VM_POPULATE 0x1000
 #define VM_LOCKED  0x2000
 #define VM_IO   0x4000 /* Memory mapped I/O or similar */
 
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 61c7a87e5d2b..9aa863da287f 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -79,8 +79,6 @@ calc_vm_flag_bits(unsigned long flags)
 {
return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
   _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
-  ((flags & MAP_LOCKED) ? (VM_LOCKED | VM_POPULATE) : 0) |
-  (((flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE) ?
-   VM_POPULATE : 0);
+  _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED);
 }
 #endif /* _LINUX_MMAN_H */
diff --git a/mm/fremap.c b/mm/fremap.c
index 4723ac8d2fc2..87da3590c61e 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -204,10 +204,8 @@ get_write_lock:
unsigned long addr;
struct file *file = get_file(vma->vm_file);
 
-   vm_flags = vma->vm_flags;
-   if (!(flags & MAP_NONBLOCK))
-   vm_flags |= VM_POPULATE;
-   addr = mmap_region(file, start, size, vm_flags, pgoff);
+   addr = mmap_region(file, start, size,
+   vma->vm_flags, pgoff);
fput(file);
if (IS_ERR_VALUE(addr)) {
err = addr;
@@ -226,12 +224,6 @@ get_write_lock:
mutex_unlock(&mapping->i_mmap_mutex);
}
 
-   if (!(flags & MAP_NONBLOCK) && !(vma->vm_flags & VM_POPULATE)) {
-   if (!has_write_lock)
-   goto get_write_lock;
-   vma->vm_flags |= VM_POPULATE;
-   }
-
if (vma->vm_flags & VM_LOCKED) {
/*
 * drop PG_Mlocked flag for over-mapped range
diff --git a/mm/mlock.c b/mm/mlock.c
index 1c5e33fce639..79b7cf7d1bca 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -358,7 +358,7 @@ static int do_mlock(unsigned long start, size_t len, int on)
 
newflags = vma->vm_flags & ~VM_LOCKED;
if (on)
-   newflags |= VM_LOCKED | VM_POPULATE;
+   newflags |= VM_LOCKED;
 
tmp = vma->vm_end;
if (tmp > end)
@@ -418,8 +418,7 @@ int __mm_populate(unsigned long start, unsigned long len, 
int ignore_errors)
 * range with the first VMA. Also, skip undesirable VMA types.
 */
nend = min(end, vma->vm_end);
-   if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_POPULATE)) !=
-   VM_POPULATE)
+   if (vma->vm_flags & (VM_IO | VM_PFNMAP))
continue;
if (nstart < vma->vm_start)
nstart = vma->vm_start;
@@ -492,9 +491,9 @@ static int do_mlockall(int flags)
struct vm_area_struct * vma, * prev = NULL;
 
if (flags & MCL_FUTURE)
-   current->mm->def_flags |= VM_LOCKED | VM_POPULATE;
+   current->mm->def_flags |= VM_LOCKED;
else
-   current->mm->def_flags &= ~(VM_LOCKED | VM_POPULATE);
+   current->mm->def_flags &= ~VM_LOCKED;
if (flags == MCL_FUTURE)
goto out;
 
@@ -503,7 +502,7 @@ static int do_mlockall(int flags)
 
newflags = vma->vm_flags & ~VM_LOCKED;
if (flags & MCL_CURRENT)
-   newflags |= VM_LOCKED | VM_POPULATE;
+   newflags |= VM_LOCKED;
 
/* Ignore errors */
mlock_fixup(vma

Re: [PATCH] clk: divider: Use DIV_ROUND_CLOSEST

2013-03-26 Thread Mike Turquette

Quoting Sören Brinkmann (2013-03-26 15:45:22)
> On Thu, Mar 21, 2013 at 10:15:31AM +0100, Uwe Kleine-König wrote:
> > Hello,
> > 
> > On Wed, Mar 20, 2013 at 07:50:51PM +0100, Sascha Hauer wrote:
> > > On Wed, Mar 20, 2013 at 09:32:51AM -0700, Sören Brinkmann wrote:
> > > > If the caller
> > > > doesn't like the returned frequency he can request a different one.
> > > > And he's eventually happy with the return value he calls
> > > > clk_set_rate() requesting the frequency clk_round_rate() returned.
> > > > Always rounding down seems a bit odd to me.
> > > > 
> > > > Another issue with the current implmentation:
> > > > clk_divider_round_rate() calls clk_divider_bestdiv(), which uses the 
> > > > ROUND_UP macro, returning a rather low frequency.
> > > 
> > > And that is correct. clk_divider_bestdiv is used to calculate the
> > > maximum parent frequency for which a given divider value does not
> > > exceed the desired rate.
> > The reason for that is that the (more?) usual constraint is like: This
> > mmc card can handle up to 100 MHz. Or this i2c device can handle up to
> > this and that frequency. Of course there are different constraints, e.g.
> > for a UART if the target baud speed is 38400 you better run at 38402
> > than at 19201.
> > 
> > I wonder if it depends on the clock if you want "best approximation <=
> > requested value" or "best approximation" or on the caller. In the former
> > case a flag for the clock would be the right thing (as suggested in this
> > thread). If however it's the caller of round_rate who knows better which
> > rounding is preferred than better extend the clk API.
> > 
> > Extending the API could just be a convenience function that doesn't
> > affect the implementations of the clk API. E.g.:
> > 
> >   long clk_round_rate_nearest(struct clk *clk, unsigned long rate)
> >   {
> >   long lower_limit = clk_round_rate(clk, rate);
> >   long upper_limit = clk_round_rate(clk, rate + (rate - 
> > lower_limit));
> > 
> >   if (rate - lower_limit < upper_limit - rate)
> >   return lower_limit;
> >   else
> >   return upper_limit;
> >   }
> > 
> I guess both approaches may work. Anybody has a preference?
> 

A dedicated function like the one Uwe defined is better than adding
subtlety to the existing clk_round_rate via a flag in a clock driver.

Regards,
Mike

> Sören
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 4/6] pci: Add PCIe driver for Samsung Exynos

2013-03-26 Thread Jingoo Han

On Wednesday, March 27, 2013 6:33 AM, Rob Herring wrote:
> 
> On 03/22/2013 11:07 PM, Jingoo Han wrote:
> > Exynos5440 has a PCIe controller which can be used as Root Complex.
> > This driver supports a PCIe controller as Root Complex mode.
> >
> > Signed-off-by: Surendranath Gurivireddy Balla 
> > Signed-off-by: Siva Reddy Kallam 
> > Signed-off-by: Jingoo Han 
> > ---
> >  .../devicetree/bindings/pci/exynos-pcie.txt|   56 +
> >  drivers/pci/host/Kconfig   |5 +
> >  drivers/pci/host/Makefile  |1 +
> >  drivers/pci/host/pci-exynos.c  | 1139 
> > 
> >  4 files changed, 1201 insertions(+), 0 deletions(-)
> >  create mode 100644 Documentation/devicetree/bindings/pci/exynos-pcie.txt
> >  create mode 100644 drivers/pci/host/Makefile
> >  create mode 100644 drivers/pci/host/pci-exynos.c
> 
> [...]
> 
> > +
> > +/* synopsis specific PCIE configuration registers*/
> 
> If this is a standard IP block, then the driver naming should reflect
> that. I suspect there are several others with the same IP block.

Sorry, I don't think so.
Only core block is a standard IP block, other parts are Exynos-specific.
So, it is hard to share with other PCIe IPs using synopsis core.

Best regards,
Jingoo Han

> 
> Rob

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/4] f2fs: do not skip writing file meta during fsync

2013-03-26 Thread Namjae Jeon

2013/3/27, Jaegeuk Kim :
> 2013-03-27 (수), 09:57 +0900, Namjae Jeon:
>> 2013/3/27, Jaegeuk Kim :
>> > 2013-03-26 (화), 09:48 +0900, Namjae Jeon:
>> >> 2013/3/25, Jaegeuk Kim :
>> >> > This patch removes data_version check flow during the fsync call.
>> >> > The original purpose for the use of data_version was to avoid writng
>> >> > inode
>> >> > pages redundantly by the fsync calls repeatedly.
>> >> Hi Jaegeuk.
>> >> > However, when user can modify file meta and then call fsync, we
>> >> > should
>> >> > not
>> >> > skip fsync procedure.
>> >> I have a question.
>> >> Which case does user can directly modify meta ? Recovery tool ?
>> >
>> > The meta means the inode information like atime, mtime, size, and so
>> > on,
>> > which can be modified by setattr() or something other vfs apis.
>> > Thanks,
>> I understood. Thanks for your explanation :)
>> One more,,
>> When inode state is  !(inode->i_state & I_DIRTY)), We don't need to skip
>> ?
>
> Even though fsync writes no data and the inode is clean, we should mark
> the inode to recover after power-off-recovery.
> Any data and its inode can be written to the disk clearly before fsync
> was called.
Okay, Clear.
Thanks Jaegeuk!

> Thanks,
>
>>
>> Thanks.
>> >
>> >>
>> >> Thanks.
>> >>
>> >> > So, let's remove this condition check and hope that user triggers in
>> >> > right
>> >> > manner.
>> >> >
>> >> > Signed-off-by: Jaegeuk Kim 
>> >> --
>> >> To unsubscribe from this list: send the line "unsubscribe
>> >> linux-kernel"
>> >> in
>> >> the body of a message to majord...@vger.kernel.org
>> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> >> Please read the FAQ at  http://www.tux.org/lkml/
>> >
>> > --
>> > Jaegeuk Kim
>> > Samsung
>> >
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel"
>> in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>
> --
> Jaegeuk Kim
> Samsung
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] nohz: Full dynticks base interface

2013-03-26 Thread Paul E. McKenney

On Wed, Mar 27, 2013 at 12:48:20AM +0100, Frederic Weisbecker wrote:
> 2013/3/25 Paul E. McKenney :
> > On Mon, Mar 25, 2013 at 06:12:12PM +0100, Frederic Weisbecker wrote:
> >> 2013/3/25 Paul E. McKenney :
> >> > On Sun, Mar 24, 2013 at 03:46:40PM +0100, Frederic Weisbecker wrote:
> >> >> 2013/3/24 Ingo Molnar :
> >> >> >
> >> >> > * Frederic Weisbecker  wrote:
> >> >> >
> >> >> >> Hi Ingo,
> >> >> >>
> >> >> >> This settles the initial ground to start a special full dynticks 
> >> >> >> tree in -tip
> >> >> >> that we can iterate incrementally to accelerate the development.
> >> >> >> It is based on tip:sched/core.
> >> >> >>
> >> >> >> I tried to rearrange a bit the naming. We are probably not yet done 
> >> >> >> with
> >> >> >> that but I guess we can fix it along with the rest.
> >> >> >>
> >> >> >> Please pull from:
> >> >> >>
> >> >> >> git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git
> >> >> >>   full-dynticks-for-mingo
> >> >> >>
> >> >> >> Changes on these commits since they were part of 3.9-rc1-nohz1:
> >> >> >>
> >> >> >> * Force a timekeeping CPU over the full dynticks range
> >> >> >> * Rename CONFIG_NO_HZ_FULL to CONFIG_NO_HZ_EXTENDED
> >> >> >> * Following *_nohz_extended_* APIs renames
> >> >> >> * Handle CPU hotplug for timekeeping
> >> >> >> * Rename full_nohz= kernel parameter to nohz_extended=
> >> >> >
> >> >> > Note that boot parameters suck for pretty much any purpose but quirks 
> >> >> > -
> >> >> > please also add a (default off!) Kconfig option to easily enable
> >> >> > nohz_extended for all CPUs.
> >> >> >
> >> >> > That way I will be able to test it automatically via randconfig and 
> >> >> > such.
> >> >>
> >> >> Sure, I'm adding such an option.
> >> >
> >> > Hmmm...  This would be an option to make all but one CPU an 
> >> > adaptive-ticks
> >> > CPU, right?  If so, this leads to the question of whether I should add a
> >> > matching no-CBs Kconfig option.  My guess is "no", because the existing
> >> > CONFIG_RCU_NOCB_CPU_ALL should work just fine -- there would be a CPU 
> >> > that
> >> > was not an adaptive-ticks CPU, but does have its RCU callbacks offloaded.
> >> >
> >> > Or am I missing something here?
> >>
> >> No that looks right. Now I wonder if I should select
> >> CONFIG_RCU_NOCB_CPU_ALL at the same time. Probably.
> >
> > Sounds like a good initial position to me.  If it somehow causes problems,
> > we can always change it later.
> 
> Ah "rcu: Provide compile-time control for no-CBs CPUs" is not yet in
> -tip so I can't do that yet. Ok for now I'm going to add
> CONFIG_NO_HZ_EXTENDED_ALL and will select the matching RCU config once
> it's visible upstream.

Good point...  I expect to be sending a pull request in a day or two.

Thanx, Paul

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/4] f2fs: do not skip writing file meta during fsync

2013-03-26 Thread Jaegeuk Kim

2013-03-27 (수), 09:57 +0900, Namjae Jeon:
> 2013/3/27, Jaegeuk Kim :
> > 2013-03-26 (화), 09:48 +0900, Namjae Jeon:
> >> 2013/3/25, Jaegeuk Kim :
> >> > This patch removes data_version check flow during the fsync call.
> >> > The original purpose for the use of data_version was to avoid writng
> >> > inode
> >> > pages redundantly by the fsync calls repeatedly.
> >> Hi Jaegeuk.
> >> > However, when user can modify file meta and then call fsync, we should
> >> > not
> >> > skip fsync procedure.
> >> I have a question.
> >> Which case does user can directly modify meta ? Recovery tool ?
> >
> > The meta means the inode information like atime, mtime, size, and so on,
> > which can be modified by setattr() or something other vfs apis.
> > Thanks,
> I understood. Thanks for your explanation :)
> One more,,
> When inode state is  !(inode->i_state & I_DIRTY)), We don't need to skip ?

Even though fsync writes no data and the inode is clean, we should mark
the inode to recover after power-off-recovery.
Any data and its inode can be written to the disk clearly before fsync
was called.
Thanks,

> 
> Thanks.
> >
> >>
> >> Thanks.
> >>
> >> > So, let's remove this condition check and hope that user triggers in
> >> > right
> >> > manner.
> >> >
> >> > Signed-off-by: Jaegeuk Kim 
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe linux-kernel"
> >> in
> >> the body of a message to majord...@vger.kernel.org
> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >> Please read the FAQ at  http://www.tux.org/lkml/
> >
> > --
> > Jaegeuk Kim
> > Samsung
> >
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Jaegeuk Kim
Samsung


signature.asc
Description: This is a digitally signed message part

Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

2013-03-26 Thread Li Zefan

> Although correct, it is a bit misleading. It is static in the sense it
> is held by a static variable. But it is acquired by kmalloc...
> 
> In any way, this is a tiny detail.
> 
> FWIW, I am fine with the patch you provided:
> 
> Acked-by: Glauber Costa 
> 

Michal, could you resend your final patch to Tejun in a new mail thread?
There are quite a few different patches inlined in this thread.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] staging: zsmalloc: Fix link error on ARM

2013-03-26 Thread Minchan Kim

On Wed, Mar 27, 2013 at 01:43:14AM +0100, Joerg Roedel wrote:
> On Wed, Mar 27, 2013 at 09:05:52AM +0900, Minchan Kim wrote:
> > And please Cc stable.
> 
> Okay, here it is. The result is compile-tested.
> 
> Changes since v1:
> 
> * Remove the module-export for unmap_kernel_range and make zsmalloc
>   built-in instead
> 
> Here is the patch:
> 
> >From 2b70502720b36909f9f39bdf27be21321a219c31 Mon Sep 17 00:00:00 2001
> From: Joerg Roedel 
> Date: Tue, 26 Mar 2013 23:24:22 +0100
> Subject: [PATCH v2] staging: zsmalloc: Fix link error on ARM
> 
> Testing the arm chromebook config against the upstream
> kernel produces a linker error for the zsmalloc module from
> staging. The symbol flush_tlb_kernel_range is not available
> there. Fix this by removing the reimplementation of
> unmap_kernel_range in the zsmalloc module and using the
> function directly. The unmap_kernel_range function is not
> usable by modules, so also disallow building the driver as a
> module for now.
> 
> Cc: sta...@vger.kernel.org
> Signed-off-by: Joerg Roedel 
Acked-by: Minchan Kim 

-- 
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

linux-next: manual merge of the net-next tree with Linus' tree

Hi all,

Today's linux-next merge of the net-next tree got a conflict in
include/net/ipip.h between commit 330305cc4a6b ("pv4: Fix ip-header
identification for gso packets") from Linus' tree and commit c54419321455
("GRE: Refactor GRE tunneling code") from the net-next tree.

I just dropped the file (as the latter change did) and can carry the fix
as necessary (no action is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgph08zeq0zTp.pgp
Description: PGP signature

Re: [PATCH 2/4] f2fs: do not skip writing file meta during fsync

2013-03-26 Thread Namjae Jeon

2013/3/27, Jaegeuk Kim :
> 2013-03-26 (화), 09:48 +0900, Namjae Jeon:
>> 2013/3/25, Jaegeuk Kim :
>> > This patch removes data_version check flow during the fsync call.
>> > The original purpose for the use of data_version was to avoid writng
>> > inode
>> > pages redundantly by the fsync calls repeatedly.
>> Hi Jaegeuk.
>> > However, when user can modify file meta and then call fsync, we should
>> > not
>> > skip fsync procedure.
>> I have a question.
>> Which case does user can directly modify meta ? Recovery tool ?
>
> The meta means the inode information like atime, mtime, size, and so on,
> which can be modified by setattr() or something other vfs apis.
> Thanks,
I understood. Thanks for your explanation :)
One more,,
When inode state is  !(inode->i_state & I_DIRTY)), We don't need to skip ?

Thanks.
>
>>
>> Thanks.
>>
>> > So, let's remove this condition check and hope that user triggers in
>> > right
>> > manner.
>> >
>> > Signed-off-by: Jaegeuk Kim 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel"
>> in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>
> --
> Jaegeuk Kim
> Samsung
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] timer_list: convert timer list to be a proper seq_file

2013-03-26 Thread Nathan Zimmer

When running with 4096 cores attemping to read /proc/timer_list will fail
with an ENOMEM condition.  On a sufficantly large systems the total amount
of data is more then 4mb, so it won't fit into a single buffer.  The
failure can also occur on smaller systems when memory fragmentation is
high as reported by Dave Jones.

Convert /proc/timer_list to a proper seq_file with its own iterator.  This
is a little more complex given that we have to make two passes with two
separate headers.

sysrq_timer_list_show also needed to be updated to reflect the fact that
now timer_list_show only does one cpu at at time.

Signed-off-by: Nathan Zimmer 
Reported-by: Dave Jones 
Cc: John Stultz 
Cc: Thomas Gleixner 
Cc: Stephen Boyd 

v2: Added comments on the iteration and other fixups pointed to by Andrew.
v3: Corrected the case where max_cpus != nr_cpu_ids by exiting early.
v5: Use seq_open_private and supply a proper iterator rather then a big mess.
---
 kernel/time/timer_list.c | 89 +---
 1 file changed, 77 insertions(+), 12 deletions(-)

diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 380a589..3bdf283 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -20,6 +20,13 @@
 
 #include 
 
+
+struct timer_list_iter {
+   int cpu;
+   bool second_pass;
+   u64 now;
+};
+
 typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
 
 DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
@@ -247,43 +254,101 @@ static void timer_list_show_tickdevices_header(struct 
seq_file *m)
 }
 #endif
 
-static int timer_list_show(struct seq_file *m, void *v)
+static inline void timer_list_header(struct seq_file *m, u64 now)
 {
-   u64 now = ktime_to_ns(ktime_get());
-   int cpu;
-
SEQ_printf(m, "Timer List Version: v0.7\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
SEQ_printf(m, "\n");
+}
+
+static int timer_list_show(struct seq_file *m, void *v)
+{
+   struct timer_list_iter *iter = v;
+   u64 now = ktime_to_ns(ktime_get());
+
+   if (iter->cpu == -1 && !iter->second_pass)
+   timer_list_header(m, now);
+   else if (!iter->second_pass)
+   print_cpu(m, iter->cpu, iter->now);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+   else if (iter->cpu == -1 && iter->second_pass)
+   timer_list_show_tickdevices_header(m);
+   else
+   print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
+#endif
+   return 0;
+}
+
+void sysrq_timer_list_show(void)
+{
+   u64 now = ktime_to_ns(ktime_get());
+   int cpu;
+
+   timer_list_header(NULL, now);
 
for_each_online_cpu(cpu)
-   print_cpu(m, cpu, now);
+   print_cpu(NULL, cpu, now);
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
-   timer_list_show_tickdevices_header(m);
+   timer_list_show_tickdevices_header(NULL);
for_each_online_cpu(cpu)
-   print_tickdevice(m, tick_get_device(cpu), cpu);
+   print_tickdevice(NULL, tick_get_device(cpu), cpu);
 #endif
+   return;
+}
 
-   return 0;
+static void *timer_list_start(struct seq_file *file, loff_t *offset)
+{
+   struct timer_list_iter *iter = file->private;
+
+   if (!*offset) {
+   iter->cpu = -1;
+   iter->now = ktime_to_ns(ktime_get());
+   } else if (iter->cpu >= nr_cpu_ids) {
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+   if (!iter->second_pass) {
+   iter->cpu = -1;
+   iter->second_pass = true;
+   } else
+   return NULL;
+#else
+   return NULL;
+#endif
+   }
+   return iter;
 }
 
-void sysrq_timer_list_show(void)
+static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset)
 {
-   timer_list_show(NULL, NULL);
+   struct timer_list_iter *iter = file->private;
+   iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
+   ++*offset;
+   return timer_list_start(file, offset);
 }
 
+static void timer_list_stop(struct seq_file *seq, void *v)
+{
+}
+
+static const struct seq_operations timer_list_sops = {
+   .start = timer_list_start,
+   .next = timer_list_next,
+   .stop = timer_list_stop,
+   .show = timer_list_show,
+};
+
 static int timer_list_open(struct inode *inode, struct file *filp)
 {
-   return single_open(filp, timer_list_show, NULL);
+   return seq_open_private(filp, &timer_list_sops,
+   sizeof(struct timer_list_iter));
 }
 
 static const struct file_operations timer_list_fops = {
.open   = timer_list_open,
.read   = seq_read,
.llseek = seq_lseek,
-   .release= single_release,
+   .release= seq_release_private,
 };
 
 static int __init init_timer_list_procfs(void)
-- 
1.8.1.2

[PATCH 1/2] timer_list: split timer_list_show_tickdevices

2013-03-26 Thread Nathan Zimmer

Split timer_list_show_tickdevices() out the header and just pull the rest up
to timer_list_show.  Also tweak the location of the whitespace.  This is all
to prep for the fix.

Signed-off-by: Nathan Zimmer 
Reported-by: Dave Jones 
Cc: John Stultz 
Cc: Thomas Gleixner 
Cc: Stephen Boyd 

v4: correct extra whitespace
---
 kernel/time/timer_list.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index af5a7e9..380a589 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -133,7 +133,6 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
int i;
 
-   SEQ_printf(m, "\n");
SEQ_printf(m, "cpu: %d\n", cpu);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
SEQ_printf(m, " clock %d:\n", i);
@@ -187,6 +186,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
 
 #undef P
 #undef P_ns
+   SEQ_printf(m, "\n");
 }
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -195,7 +195,6 @@ print_tickdevice(struct seq_file *m, struct tick_device 
*td, int cpu)
 {
struct clock_event_device *dev = td->evtdev;
 
-   SEQ_printf(m, "\n");
SEQ_printf(m, "Tick Device: mode: %d\n", td->mode);
if (cpu < 0)
SEQ_printf(m, "Broadcast device\n");
@@ -230,12 +229,11 @@ print_tickdevice(struct seq_file *m, struct tick_device 
*td, int cpu)
print_name_offset(m, dev->event_handler);
SEQ_printf(m, "\n");
SEQ_printf(m, " retries:%lu\n", dev->retries);
+   SEQ_printf(m, "\n");
 }
 
-static void timer_list_show_tickdevices(struct seq_file *m)
+static void timer_list_show_tickdevices_header(struct seq_file *m)
 {
-   int cpu;
-
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
print_tickdevice(m, tick_get_broadcast_device(), -1);
SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
@@ -246,12 +244,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
 #endif
SEQ_printf(m, "\n");
 #endif
-   for_each_online_cpu(cpu)
-   print_tickdevice(m, tick_get_device(cpu), cpu);
-   SEQ_printf(m, "\n");
 }
-#else
-static void timer_list_show_tickdevices(struct seq_file *m) { }
 #endif
 
 static int timer_list_show(struct seq_file *m, void *v)
@@ -262,12 +255,16 @@ static int timer_list_show(struct seq_file *m, void *v)
SEQ_printf(m, "Timer List Version: v0.7\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
+   SEQ_printf(m, "\n");
 
for_each_online_cpu(cpu)
print_cpu(m, cpu, now);
 
-   SEQ_printf(m, "\n");
-   timer_list_show_tickdevices(m);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+   timer_list_show_tickdevices_header(m);
+   for_each_online_cpu(cpu)
+   print_tickdevice(m, tick_get_device(cpu), cpu);
+#endif
 
return 0;
 }
-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 0/2] timer_list: Fix /proc/timer_list failure on 4096 cpus

2013-03-26 Thread Nathan Zimmer

On systems with 4096 cores attemping to read /proc/timer_list
fails because we are trying to push all the data into a single
kmalloc buffer.

A better solution is to not us the single_open mechanism but to
provide our own seq_operations and treat each cpu as an
individual record.

The output should be identical to the previous version.

v2: Added comments on the iteration and other fixups pointed to by Andrew.
v3: Corrected the case where max_cpus != nr_cpu_ids by exiting early.
v5: Use seq_open_private and supply a proper iterator rather then a big mess. 

Nathan Zimmer (2):
  timer_list: split timer_list_show_tickdevices
  timer_list: convert timer list to be a proper seq_file

 kernel/time/timer_list.c | 104 +--
 1 file changed, 83 insertions(+), 21 deletions(-)

-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Revert VM_POPULATE?

2013-03-26 Thread Michel Lespinasse

On Tue, Mar 26, 2013 at 5:26 PM, Hugh Dickins  wrote:
> Michel, I propose that we revert 3.9-rc1's VM_POPULATE flag - 186930500985
> "mm: introduce VM_POPULATE flag to better deal with racy userspace programs".
>
> Konstantin's 3.7 cleanup of VM_flags has left several bits below 32
> free, but sooner or later someone will want to come through again and
> free some more, and I think VM_POPULATE will be among the first to go.
>
> It just doesn't add much value, and flags a transient condition which
> then sticks around indefinitely.  Better we remove it now than later.
>
> You said yourself in the 0/8 or 1/8:
> - Patch 8 is optional to this entire series. It only helps to deal more
>   nicely with racy userspace programs that might modify their mappings
>   while we're trying to populate them. It adds a new VM_POPULATE flag
>   on the mappings we do want to populate, so that if userspace replaces
>   them with mappings it doesn't want populated, mm_populate() won't
>   populate those replacement mappings.
> when you were just testing the waters with 8/8 to see if it was wanted.
>
> I don't see any serious problem with it.  We can probably contrive
> a case in which someone mlocks-then-munlocks scattered segments of a
> large vma, and the VM_POPULATE flag left behind prevents the segments
> from being merged back into a single vma; but that can happen in other
> ways, so it doesn't count for much.
>
> (I presume VM_POPULATE is left uncleared, because there could always be
> races when it's cleared too soon - if userspace is racing with itself.)

Yes, VM_POPULATE is never cleared.

> I just don't see VM_POPLULATE solving any real problem: the kernel code
> appears to be safe enough without it, and if userspace wishes to play
> racing mmap games, oh, just let it.

All right. I have no major objections - the kernel will be fine
without VM_POPULATE, and the only downside of removing it is that we
might do more work to populate new mappings if userspace plays games,
as you say, unmapping and remapping vmas before the original mmap call
that created it returns (or while an mlock call that operates on it is
running). I don't care strongly about kernel behavior in such cases as
long as it doesn't affect other processes, so I'm OK with reverting
VM_POPULATE as long as others agree.

I'll send out a code review to do that.

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] staging: zsmalloc: Fix link error on ARM

2013-03-26 Thread Joerg Roedel

On Wed, Mar 27, 2013 at 09:05:52AM +0900, Minchan Kim wrote:
> And please Cc stable.

Okay, here it is. The result is compile-tested.

Changes since v1:

* Remove the module-export for unmap_kernel_range and make zsmalloc
  built-in instead

Here is the patch:

>From 2b70502720b36909f9f39bdf27be21321a219c31 Mon Sep 17 00:00:00 2001
From: Joerg Roedel 
Date: Tue, 26 Mar 2013 23:24:22 +0100
Subject: [PATCH v2] staging: zsmalloc: Fix link error on ARM

Testing the arm chromebook config against the upstream
kernel produces a linker error for the zsmalloc module from
staging. The symbol flush_tlb_kernel_range is not available
there. Fix this by removing the reimplementation of
unmap_kernel_range in the zsmalloc module and using the
function directly. The unmap_kernel_range function is not
usable by modules, so also disallow building the driver as a
module for now.

Cc: sta...@vger.kernel.org
Signed-off-by: Joerg Roedel 
---
 drivers/staging/zsmalloc/Kconfig |2 +-
 drivers/staging/zsmalloc/zsmalloc-main.c |5 +
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/zsmalloc/Kconfig b/drivers/staging/zsmalloc/Kconfig
index 9084565..7fab032 100644
--- a/drivers/staging/zsmalloc/Kconfig
+++ b/drivers/staging/zsmalloc/Kconfig
@@ -1,5 +1,5 @@
 config ZSMALLOC
-   tristate "Memory allocator for compressed pages"
+   bool "Memory allocator for compressed pages"
default n
help
  zsmalloc is a slab-based memory allocator designed to store
diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c 
b/drivers/staging/zsmalloc/zsmalloc-main.c
index e78d262..324e123 100644
--- a/drivers/staging/zsmalloc/zsmalloc-main.c
+++ b/drivers/staging/zsmalloc/zsmalloc-main.c
@@ -656,11 +656,8 @@ static inline void __zs_unmap_object(struct mapping_area 
*area,
struct page *pages[2], int off, int size)
 {
unsigned long addr = (unsigned long)area->vm_addr;
-   unsigned long end = addr + (PAGE_SIZE * 2);
 
-   flush_cache_vunmap(addr, end);
-   unmap_kernel_range_noflush(addr, PAGE_SIZE * 2);
-   flush_tlb_kernel_range(addr, end);
+   unmap_kernel_range(addr, PAGE_SIZE * 2);
 }
 
 #else /* USE_PGTABLE_MAPPING */
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 09/10] ioatdma: Adding write back descriptor error status support for ioatdma 3.3

2013-03-26 Thread Dan Williams



On 3/26/13 3:43 PM, "Dave Jiang"  wrote:

>v3.3 provides support for write back descriptor error status. This allows
>reporting of errors in a descriptor field. In supporting this, certain
>errors such as P/Q validation errors no longer halts the channel. The DMA
>engine can continue to execute until the end of the chain and allow
>software
>to report the "errors" up the stack. We are also going to mask those error
>interrupts and handle them when the "chain" has completed at the end.
>
>Signed-off-by: Dave Jiang 
>---
> drivers/dma/ioat/dma_v3.c|   87
>--
> drivers/dma/ioat/hw.h|   17 +++-
> drivers/dma/ioat/registers.h |1
> 3 files changed, 90 insertions(+), 15 deletions(-)
>
>diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
>index 230a8bc..83d44f3 100644
>--- a/drivers/dma/ioat/dma_v3.c
>+++ b/drivers/dma/ioat/dma_v3.c
>@@ -498,6 +498,32 @@ static bool ioat3_cleanup_preamble(struct
>ioat_chan_common *chan,
>   return true;
> }
> 
>+static void desc_get_errstat(struct ioat_ring_ent *desc)
>+{
>+  struct ioat_dma_descriptor *hw = desc->hw;
>+
>+  switch (hw->ctl_f.op) {
>+  case IOAT_OP_PQ_VAL:
>+  case IOAT_OP_PQ_VAL_16S:
>+  {
>+  struct ioat_pq_descriptor *pq = desc->pq;
>+
>+  /* check if there's error written */
>+  if (!pq->dwbes_f.wbes)
>+  return;
>+
>+  if (pq->dwbes_f.p_val_err)
>+  *desc->result |= SUM_CHECK_P_RESULT;
>+
>+  if (pq->dwbes_f.q_val_err)
>+  *desc->result |= SUM_CHECK_Q_RESULT;
>+  return;
>+  }
>+  default:
>+  return;
>+  }
>+}
>+
> /**
>  * __cleanup - reclaim used descriptors
>  * @ioat: channel (ring) to clean
>@@ -535,6 +561,10 @@ static void __cleanup(struct ioat2_dma_chan *ioat,
>dma_addr_t phys_complete)
>   prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
>   desc = ioat2_get_ring_ent(ioat, idx + i);
>   dump_desc_dbg(ioat, desc);
>+
>+  /* set err stat if we are using dwbes */
>+  desc_get_errstat(desc);
>+
>   tx = &desc->txd;
>   if (tx->cookie) {
>   dma_cookie_complete(tx);
>@@ -580,14 +610,15 @@ static void ioat3_cleanup(struct ioat2_dma_chan
>*ioat)
> {
>   struct ioat_chan_common *chan = &ioat->base;
>   u64 phys_complete;
>+  u32 chanerr;
> 
>   spin_lock_bh(&chan->cleanup_lock);
> 
>   if (ioat3_cleanup_preamble(chan, &phys_complete))
>   __cleanup(ioat, phys_complete);
> 
>+  chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
>   if (is_ioat_halted(*chan->completion)) {
>-  u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
> 
>   if (chanerr & IOAT_CHANERR_HANDLE_MASK) {
>   mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);

This is now incurring a mmio read for every cleanup which somewhat defeats
the point of posting the completion status to memory.  Should be able to
get away with just writel(IOAT_CHANERR_HANDLE_MASK) iff cleanup found came
across an error-writeback.


>@@ -595,6 +626,15 @@ static void ioat3_cleanup(struct ioat2_dma_chan
>*ioat)
>   }
>   }
> 
>+  /*
>+   * with DWBES we must clear the chanerr register at the end of the
>+   * chain in order to be able to issue the next command.
>+   */
>+  if (chanerr) {
>+  writel(chanerr & IOAT_CHANERR_HANDLE_MASK,
>+ chan->reg_base + IOAT_CHANERR_OFFSET);
>+  }
>+

 Does this also mean we need to re-write dmacount?  I.e. are writes to
dmacount ignored while chanerr is non-zero?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[git pull] vfs fixes

2013-03-26 Thread Al Viro

-stable fodder; assorted deadlock fixes.  Please, pull from
git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git for-linus

Shortlog:
Al Viro (3):
  Don't bother with redoing rw_verify_area() from default_file_splice_from()
  Nest rename_lock inside vfsmount_lock
  vt: synchronize_rcu() under spinlock is not nice...

Diffstat:
 drivers/tty/vt/vc_screen.c |6 --
 fs/dcache.c|   16 +++-
 fs/internal.h  |5 +
 fs/read_write.c|   25 +
 fs/splice.c|4 +++-
 5 files changed, 48 insertions(+), 8 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 138/150] sfc: Disable soft interrupt handling during efx_device_detach_sync()

On Tue, 2013-03-26 at 15:20 +, Luis Henriques wrote:
> 3.5.7.9 -stable review patch.  If anyone has any objections, please let me 
> know.
[...]

I applied all 16 of the sfc patches on top of 3.5.7.8 and the result
passed our standard 'overnight' test suite for in-tree drivers.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC v7 00/11] Support vrange for anonymous page

2013-03-26 Thread John Stultz

On 03/25/2013 01:42 AM, Minchan Kim wrote:

On Fri, Mar 22, 2013 at 10:06:56AM -0700, John Stultz wrote:
So, if I understand you properly, its more an issue of the the added
cost of making the purged range non-volatile, and re-faulting in the
pages if we purge them all, when we didn't actually have the memory
pressure to warrant purging the entire range? Hrm. Ok, I can sort of
see that. So if we do partial-purging, all the data in the range is
invalid - since we don't know which pages in particular were purged,
but the costs when marking the range non-volatile and the costs of
over-writing the pages with the re-created data will be slightly
cheaper.

It could be heavily cheaper with my experiment in this patchset.
Allocator could avoid minor fault from 105799867 to 9401.

I guess the other benefit is if you're using the SIGBUS semantics,
you might luck out and not actually touch a purged page. Where as if
the entire range is purged, the process will definitely hit the
SIGBUS if its accessing the volatile data.

Yes. I guess that's why Taras liked it.
Quote from old version
"
4) Having a new system call makes it easier for userspace apps to
detect kernels without this functionality.

I really like the proposed interface. I like the suggestion of having
explicit FULL|PARTIAL_VOLATILE. Why not include PARTIAL_VOLATILE as a
required 3rd param in first version with expectation that
FULL_VOLATILE will be added later(and returning some not-supported error
in meantime)?
"

Thanks again for the clarifications on your though process here!

I'm currently trying to rework your patches so we can reuse this for
file data as well as pure anonymous memory. The idea being that we add
one level of indirection: a vrange_root structure, which manages the
root of the rb interval tree as well as the lock. This vrange_root can
then be included in the mm_struct as well as address_space structures
depending on which type of memory we're dealing with. That way most of
the same infrastructure can be used to manage per-mm volatile ranges as
well as per-inode volatile ranges.

Sorting out how to handle vrange() calls that cross both anonymous and
file vmas will be interesting, and may have some of the drawbacks of the
vma based approach, but I think it will still be simpler. To start we
may just be able to require that any vrange() calls don't cross vma
types (possibly using separate syscalls for file and anonymous vranges).

Anyway, that's my current thinkig. You can preview my current attempt here:
http://git.linaro.org/gitweb?p=people/jstultz/android-dev.git;a=shortlog;h=refs/heads/dev/vrange-minchan

Thanks so much again for your moving this work forward!
-john
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Revert VM_POPULATE?

2013-03-26 Thread Hugh Dickins

Michel, I propose that we revert 3.9-rc1's VM_POPULATE flag - 186930500985
"mm: introduce VM_POPULATE flag to better deal with racy userspace programs".

Konstantin's 3.7 cleanup of VM_flags has left several bits below 32
free, but sooner or later someone will want to come through again and
free some more, and I think VM_POPULATE will be among the first to go.

It just doesn't add much value, and flags a transient condition which
then sticks around indefinitely.  Better we remove it now than later.

You said yourself in the 0/8 or 1/8:
- Patch 8 is optional to this entire series. It only helps to deal more
  nicely with racy userspace programs that might modify their mappings
  while we're trying to populate them. It adds a new VM_POPULATE flag
  on the mappings we do want to populate, so that if userspace replaces
  them with mappings it doesn't want populated, mm_populate() won't
  populate those replacement mappings.
when you were just testing the waters with 8/8 to see if it was wanted.

I don't see any serious problem with it.  We can probably contrive
a case in which someone mlocks-then-munlocks scattered segments of a
large vma, and the VM_POPULATE flag left behind prevents the segments
from being merged back into a single vma; but that can happen in other
ways, so it doesn't count for much.

(I presume VM_POPULATE is left uncleared, because there could always be
races when it's cleared too soon - if userspace is racing with itself.)

I just don't see VM_POPLULATE solving any real problem: the kernel code
appears to be safe enough without it, and if userspace wishes to play
racing mmap games, oh, just let it.

The original patch appears to revert cleanly, except in mm/mmap.c
where "*populate = true;" has since become "*populate = len;".

Hugh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] staging: zsmalloc: Fix link error on ARM

2013-03-26 Thread Joerg Roedel

On Wed, Mar 27, 2013 at 09:05:52AM +0900, Minchan Kim wrote:
> Oops, it was my fault. When I tested [1] on CONFIG_SMP machine on ARM,
> it worked well. It means it's not always problem on every CONFIG_SMP
> on ARM machine but some SMP machine define flush_tlb_kernel_range,
> others don't.
> 
> At that time, Russell King already suggested same thing with your patch
> and I meant to clean it up because the patch was already merged but I didn't.
> Because we didn't catch up that it breaks build on some configuration
> so I thought it's just clean up patch and Greg didn't want to accept
> NOT-BUG patch of any z* family.
> 
> Now, it's BUG patch.
> 
> Remained problem is that Greg doesn't want to export core function for
> staging driver and it's reasonable for me.

Okay, I see. So that is probably also the reason for the
reimplementation of unmap_kernel_range in the zsmalloc module :)

> So my opinion is remove zsmalloc module build and could recover it with
> making unmap_kernel_range exported function after we merged it into
> mainline.

Sounds reasonable, I update the patch to only allow zsmalloc to be
built-in. The benefit is that this still allows to use
unmap_kernel_range() in the driver.

Thanks,

Joerg


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/4] f2fs: do not skip writing file meta during fsync

2013-03-26 Thread Jaegeuk Kim

2013-03-26 (화), 09:48 +0900, Namjae Jeon:
> 2013/3/25, Jaegeuk Kim :
> > This patch removes data_version check flow during the fsync call.
> > The original purpose for the use of data_version was to avoid writng inode
> > pages redundantly by the fsync calls repeatedly.
> Hi Jaegeuk.
> > However, when user can modify file meta and then call fsync, we should not
> > skip fsync procedure.
> I have a question.
> Which case does user can directly modify meta ? Recovery tool ?

The meta means the inode information like atime, mtime, size, and so on,
which can be modified by setattr() or something other vfs apis.
Thanks,

> 
> Thanks.
> 
> > So, let's remove this condition check and hope that user triggers in right
> > manner.
> >
> > Signed-off-by: Jaegeuk Kim 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Jaegeuk Kim
Samsung


signature.asc
Description: This is a digitally signed message part

Re: [PATCH 0/2] lib/scatterlist: sg_page_iter: support for memory w/o backing pages

2013-03-26 Thread Daniel Vetter

On Tue, Mar 26, 2013 at 12:57:42PM -0700, Andrew Morton wrote:
> On Tue, 26 Mar 2013 15:50:20 +0100 Daniel Vetter  wrote:
> 
> > On Tue, Mar 26, 2013 at 03:14:17PM +0200, Imre Deak wrote:
> > > When adding sg_page_iter I haven't thought properly through the use case
> > > for sg lists w/o backing pages - which is specific to the i915 driver -
> > > so this patchset adds support for this.
> > > 
> > > It applies on the i915 tree [1], where the iterator is in use already.
> > > 
> > > [1] git://people.freedesktop.org/~danvet/drm-intel [nightly branch]
> > 
> > i915 patches are already included in linux-next, so should apply on top of
> > that, too. So can this go in through -mm for 3.10 or should I slurp it in
> > through drm-intel trees (once it passes review)? I'd like to ditch the
> > dummy page hack we're currently using (i.e. patch 2).
> 
> Please slurp it - there's little benefit in spreading it across two trees.

Done, patches should show up in linux-next soon.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Intel-gfx] [PATCH 0/2] lib/scatterlist: sg_page_iter: support for memory w/o backing pages

2013-03-26 Thread Daniel Vetter

On Tue, Mar 26, 2013 at 10:50:57PM +, Damien Lespiau wrote:
> On Tue, Mar 26, 2013 at 03:14:17PM +0200, Imre Deak wrote:
> > When adding sg_page_iter I haven't thought properly through the use case
> > for sg lists w/o backing pages - which is specific to the i915 driver -
> > so this patchset adds support for this.
> > 
> > It applies on the i915 tree [1], where the iterator is in use already.
> > 
> > [1] git://people.freedesktop.org/~danvet/drm-intel [nightly branch]
> > 
> > Imre Deak (2):
> >   lib/scatterlist: sg_page_iter: support sg lists w/o backing pages
> >   Revert "drm/i915: set dummy page for stolen objects"
> 
> Both patches are Reviewed-by: Damien Lespiau 

Both patches merged, with Imre's missing sob line rectified on the 2nd
one.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

regulator: ab8500: ambiguous meaning of delay setting in struct ab8500_regulator_info

2013-03-26 Thread Axel Lin

Hi,

The comment of struct ab8500_regulator_info says the delay means
"startup/set voltage delay in us".
I'm confused by the meaning, does it mean enable_time or set_voltage_time_sel
time or both?

 * @enable_time: Time taken for the regulator voltage output voltage to
 *   stabilise after being enabled, in microseconds.

 * @set_voltage_time_sel: Time taken for the regulator voltage output voltage
 *   to stabilise after being set to a new value, in microseconds.
 *   The function provides the from and to voltage selector, the
 *   function should return the worst case.

Current code only has delay setting for AB8500_LDO_TVOUT, it looks like it
means enable_time rather than the delay for set_voltage_time_sel.
(well, AB8500_LDO_TVOUT is fixed voltage, it does not implement 
set_voltage_time_sel)

ab8500_regulator_set_voltage_time_sel() returns info->delay, but all it's users
(AB8500_LDO_AUX1, AB8500_LDO_AUX2, AB8500_LDO_AUX3, AB8500_LDO_INTCORE)
do not has delay setting. ( So now ab8500_regulator_set_voltage_time_sel 
returns 0. )

Axel

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] staging: zsmalloc: Fix link error on ARM

2013-03-26 Thread Minchan Kim

On Tue, Mar 26, 2013 at 11:33:52PM +0100, Joerg Roedel wrote:
> Testing the arm chromebook config against the upstream
> kernel produces a linker error for the zsmalloc module from
> staging. The symbol flush_tlb_kernel_range is not available
> there. Fix this by removing the reimplementation of
> unmap_kernel_range in the zsmalloc module and using the
> function directly.
> 
> Signed-off-by: Joerg Roedel 


Oops, it was my fault. When I tested [1] on CONFIG_SMP machine on ARM,
it worked well. It means it's not always problem on every CONFIG_SMP
on ARM machine but some SMP machine define flush_tlb_kernel_range,
others don't.

At that time, Russell King already suggested same thing with your patch
and I meant to clean it up because the patch was already merged but I didn't.
Because we didn't catch up that it breaks build on some configuration
so I thought it's just clean up patch and Greg didn't want to accept
NOT-BUG patch of any z* family.

Now, it's BUG patch.

Remained problem is that Greg doesn't want to export core function for
staging driver and it's reasonable for me.
So my opinion is remove zsmalloc module build and could recover it with
making unmap_kernel_range exported function after we merged it into
mainline.

And please Cc stable.

[1] [99155188, zsmalloc: Fix TLB coherency and build problem]

> ---
>  drivers/staging/zsmalloc/zsmalloc-main.c |5 +
>  mm/vmalloc.c |1 +
>  2 files changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c 
> b/drivers/staging/zsmalloc/zsmalloc-main.c
> index e78d262..324e123 100644
> --- a/drivers/staging/zsmalloc/zsmalloc-main.c
> +++ b/drivers/staging/zsmalloc/zsmalloc-main.c
> @@ -656,11 +656,8 @@ static inline void __zs_unmap_object(struct mapping_area 
> *area,
>   struct page *pages[2], int off, int size)
>  {
>   unsigned long addr = (unsigned long)area->vm_addr;
> - unsigned long end = addr + (PAGE_SIZE * 2);
>  
> - flush_cache_vunmap(addr, end);
> - unmap_kernel_range_noflush(addr, PAGE_SIZE * 2);
> - flush_tlb_kernel_range(addr, end);
> + unmap_kernel_range(addr, PAGE_SIZE * 2);
>  }
>  
>  #else /* USE_PGTABLE_MAPPING */
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 0f751f2..f7cba11 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1266,6 +1266,7 @@ void unmap_kernel_range(unsigned long addr, unsigned 
> long size)
>   vunmap_page_range(addr, end);
>   flush_tlb_kernel_range(addr, end);
>  }
> +EXPORT_SYMBOL_GPL(unmap_kernel_range);
>  
>  int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
>  {
> -- 
> 1.7.9.5
> 
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: mailto:"d...@kvack.org";> em...@kvack.org 

-- 
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 09/10] ioatdma: Adding write back descriptor error status support for ioatdma 3.3


On 03/26/2013 04:47 PM, Dan Williams wrote:


On 3/26/13 3:43 PM, "Dave Jiang"  wrote:


v3.3 provides support for write back descriptor error status. This allows
reporting of errors in a descriptor field. In supporting this, certain
errors such as P/Q validation errors no longer halts the channel. The DMA
engine can continue to execute until the end of the chain and allow
software
to report the "errors" up the stack. We are also going to mask those error
interrupts and handle them when the "chain" has completed at the end.

Signed-off-by: Dave Jiang 
---
drivers/dma/ioat/dma_v3.c|   87
--
drivers/dma/ioat/hw.h|   17 +++-
drivers/dma/ioat/registers.h |1
3 files changed, 90 insertions(+), 15 deletions(-)

diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 230a8bc..83d44f3 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -498,6 +498,32 @@ static bool ioat3_cleanup_preamble(struct
ioat_chan_common *chan,
return true;
}

+static void desc_get_errstat(struct ioat_ring_ent *desc)
+{
+   struct ioat_dma_descriptor *hw = desc->hw;
+
+   switch (hw->ctl_f.op) {
+   case IOAT_OP_PQ_VAL:
+   case IOAT_OP_PQ_VAL_16S:
+   {
+   struct ioat_pq_descriptor *pq = desc->pq;
+
+   /* check if there's error written */
+   if (!pq->dwbes_f.wbes)
+   return;
+
+   if (pq->dwbes_f.p_val_err)
+   *desc->result |= SUM_CHECK_P_RESULT;
+
+   if (pq->dwbes_f.q_val_err)
+   *desc->result |= SUM_CHECK_Q_RESULT;
+   return;
+   }
+   default:
+   return;
+   }
+}
+
/**
  * __cleanup - reclaim used descriptors
  * @ioat: channel (ring) to clean
@@ -535,6 +561,10 @@ static void __cleanup(struct ioat2_dma_chan *ioat,
dma_addr_t phys_complete)
prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
desc = ioat2_get_ring_ent(ioat, idx + i);
dump_desc_dbg(ioat, desc);
+
+   /* set err stat if we are using dwbes */
+   desc_get_errstat(desc);
+
tx = &desc->txd;
if (tx->cookie) {
dma_cookie_complete(tx);
@@ -580,14 +610,15 @@ static void ioat3_cleanup(struct ioat2_dma_chan
*ioat)
{
struct ioat_chan_common *chan = &ioat->base;
u64 phys_complete;
+   u32 chanerr;

spin_lock_bh(&chan->cleanup_lock);

if (ioat3_cleanup_preamble(chan, &phys_complete))
__cleanup(ioat, phys_complete);

+   chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
if (is_ioat_halted(*chan->completion)) {
-   u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);

if (chanerr & IOAT_CHANERR_HANDLE_MASK) {
mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);

This is now incurring a mmio read for every cleanup which somewhat defeats
the point of posting the completion status to memory.  Should be able to
get away with just writel(IOAT_CHANERR_HANDLE_MASK) iff cleanup found came
across an error-writeback.

Ok I'll fix that.





@@ -595,6 +626,15 @@ static void ioat3_cleanup(struct ioat2_dma_chan
*ioat)
}
}

+   /*
+* with DWBES we must clear the chanerr register at the end of the
+* chain in order to be able to issue the next command.
+*/
+   if (chanerr) {
+   writel(chanerr & IOAT_CHANERR_HANDLE_MASK,
+  chan->reg_base + IOAT_CHANERR_OFFSET);
+   }
+

  Does this also mean we need to re-write dmacount?  I.e. are writes to
dmacount ignored while chanerr is non-zero?

I don't believe so. At least that hasn't been an issue with testing. I 
have asked them to remove that "feature" with future silicon.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 01/10] ioatdma: Adding PCI IDs for Intel Atom S1200 product family ioatdma devices

These should be good for the IOAT DMA devices on the Intel Atom S1269,
S1279, and S1289 platforms. We are also adding IOAT v3.3 definition for
the new DMA engine.

Signed-off-by: Dave Jiang 
---
 drivers/dma/ioat/hw.h  |6 ++
 drivers/dma/ioat/pci.c |6 ++
 2 files changed, 12 insertions(+)

diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
index 8cfa077..ce431f5 100644
--- a/drivers/dma/ioat/hw.h
+++ b/drivers/dma/ioat/hw.h
@@ -52,10 +52,16 @@
 #define PCI_DEVICE_ID_INTEL_IOAT_HSW8  0x2f2e
 #define PCI_DEVICE_ID_INTEL_IOAT_HSW9  0x2f2f
 
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD0  0x0C50
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD1  0x0C51
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD2  0x0C52
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD3  0x0C53
+
 #define IOAT_VER_1_20x12/* Version 1.2 */
 #define IOAT_VER_2_00x20/* Version 2.0 */
 #define IOAT_VER_3_00x30/* Version 3.0 */
 #define IOAT_VER_3_20x32/* Version 3.2 */
+#define IOAT_VER_3_30x33/* Version 3.3 */
 
 
 int system_has_dca_enabled(struct pci_dev *pdev);
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 67c8e83..1f63296 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -105,6 +105,12 @@ static struct pci_device_id ioat_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) },
 
+   /* I/OAT v3.3 platforms */
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) },
+
{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: linux-next: build failure after merge of the final tree (v4l-dvb tree related)

Hi Mauro,

On Tue, 26 Mar 2013 09:04:52 -0300 Mauro Carvalho Chehab  
wrote:
>
> Em Tue, 26 Mar 2013 17:18:47 +1100
> Stephen Rothwell  escreveu:
> 
> > After merging the final tree, today's linux-next build (powerpc
> > allyesconfig) failed like this:
> > 
> > drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c: In function 
> > 'solo_enc_default':
> > drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c:1031:7: error: case 
> > label does not reduce to an integer constant
> > drivers/staging/media/solo6x10/solo6x10-v4l2-enc.c:1035:7: error: case 
> > label does not reduce to an integer constant
> > 
> > I am not sure why this has suddenly appeared, but I have disabled the
> > driver for now using this patch:
> 
> Well, solo6x10 didn't use to have its own private ioctl's until now.
> Still, that's strange, as there are other drivers also using vidioc_default.
> 
> I suspect that those _IO* have its highest bit equal to 1 on powerpc
> being too big for int. If so, the enclosed patch should fix. 
> I'm still wandering why it didn't cause any compilation problems here.
> 
> Ok, I didn't try to compile it on powerpc, but still integers have 32
> bits on ppc, right?

yes.

> Anyway, could you please try the enclosed patch?

Doesn't help.

on powerpc, SOLO_IOC_G_MOTION_THRESHOLDS expands to

(((2U) << (((0 +8)+8)+13)) | ((('V')) << (0 +8)) | (((192 +0)) << 0) | 
(sizeof(struct solo_motion_thresholds) == sizeof(struct 
solo_motion_thresholds[1]) && sizeof(struct solo_motion_thresholds) < (1 << 
13)) ? sizeof(struct solo_motion_thresholds) : 
__invalid_size_argument_for_IOC))) << ((0 +8)+8)))

sizeof(struct solo_motion_thresholds) is 64*64*2 == 8192 which is not
less than 1 << 13

:-(
-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgprmD0_X2qie.pgp
Description: PGP signature

[ 00/98] 3.8.5-stable review

2013-03-26 Thread Greg Kroah-Hartman

This is the start of the stable review cycle for the 3.8.5 release.
There are 98 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Thu Mar 28 22:41:38 UTC 2013.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:
kernel.org/pub/linux/kernel/v3.0/stable-review/patch-3.8.5-rc1.gz
and the diffstat can be found below.

thanks,

greg k-h

-
Pseudo-Shortlog of commits:

Greg Kroah-Hartman 
Linux 3.8.5-rc1

Felix Fietkau 
rt2x00: error in configurations with mesh support disabled

Marek Szyprowski 
ARM: DMA-mapping: add missing GFP_DMA flag for atomic buffer allocation

Mikhail Kshevetskiy 
usb: musb: da8xx: Fix build breakage due to typo

Johan Hovold 
USB: io_ti: fix get_icount for two port adapters

Johan Hovold 
USB: garmin_gps: fix memory leak on disconnect

Jan Kara 
udf: Fix bitmap overflow on large filesystems with small block size

Rafael J. Wysocki 
ACPI: Rework acpi_get_child() to be more efficient

Ben Hutchings 
efivars: Fix check for CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE

Seth Forshee 
efivars: Add module parameter to disable use as a pstore backend

Seth Forshee 
efivars: Allow disabling use as a pstore backend

Johan Hovold 
USB: serial: fix interface refcounting

Andrzej Pietrasiewicz 
usb: gadget: ffs: fix enable multiple instances

Alan Stern 
USB: EHCI: fix regression in QH unlinking

Alan Stern 
USB: EHCI: fix regression during bus resume

Johan Hovold 
USB: cdc-acm: fix device unregistration

Hannes Reinecke 
USB: xhci: correctly enable interrupts

Dmitry Torokhov 
USB: xhci - fix bit definitions for IMAN register

CQ Tang 
x86-64: Fix the failure case in copy_user_handle_tail()

Mark Rutland 
clockevents: Don't allow dummy broadcast timers

Kent Overstreet 
nfsd: fix bad offset use

NeilBrown 
md/raid5: ensure sync and DISCARD don't happen at the same time.

Jonathan Brassow 
MD RAID5: Avoid accessing gendisk or queue structs when not available

NeilBrown 
md/raid5: schedule_construction should abort if nothing to do.

Takahisa Tanaka 
watchdog: sp5100_tco: Remove code that may cause a boot failure

Takahisa Tanaka 
watchdog: sp5100_tco: Set the AcpiMmioSel bitmask value to 1 instead of 2

Mike Marciniszyn 
IPoIB: Fix send lockup due to missed TX completion

Theodore Ts'o 
ext4: fix data=journal fast mount/umount hang

Lukas Czerner 
ext4: use s_extent_max_zeroout_kb value as number of kb

Theodore Ts'o 
ext4: use atomic64_t for the per-flexbg free_clusters count

Dmitry Artamonow 
usb-storage: add unusual_devs entry for Samsung YP-Z3 mp3 player

Zheng Liu 
ext4: fix the wrong number of the allocated blocks in ext4_split_extent()

Jan Kara 
jbd2: fix use after free in jbd2_journal_dirty_metadata()

Jeff Layton 
cifs: ignore everything in SPNEGO blob after mechTypes

Mateusz Guzik 
cifs: delay super block destruction until all cifsFileInfo objects are gone

Alex Deucher 
drm/radeon/benchmark: make sure bo blit copy exists before using it

Alex Deucher 
drm/radeon: fix backend map setup on 1 RB trinity boards

Alex Deucher 
drm/radeon: fix S/R on VM systems (cayman/TN/SI)

Alex Deucher 
drm/radeon: add support for Richland APUs

Alex Deucher 
drm/radeon: add Richland pci ids

Julia Lemire 
drm/mgag200: Bug fix: Modified pll algorithm for EH project

Mikulas Patocka 
dm verity: avoid deadlock

Joe Thornber 
dm thin: fix discard corruption

Laxman Dewangan 
ARM: tegra: fix register address of slink controller

Nicholas Bellinger 
target/file: Bump FD_MAX_SECTORS to 2048 to handle 1M sized I/Os

Andy Grover 
target/iscsi: Fix mutual CHAP auth on big-endian arches

Vladimir Davydov 
mqueue: sys_mq_open: do not call mnt_drop_write() if read-only

H Hartley Sweeten 
drivers/video/ep93xx-fb.c: include  for devm_ioremap()

Wanpeng Li 
mm/hugetlb: fix total hugetlbfs pages count when using memory overcommit 
accouting

Nicolas Ferre 
drivers/rtc/rtc-at91rm9200.c: use a variable for storing IMR

Torsten Duwe 
KMS: fix EDID detailed timing frame rate

Torsten Duwe 
KMS: fix EDID detailed timing vsync parsing

Laxman Dewangan 
i2c: tegra: check the clk_prepare_enable() return value

Daniel Vetter 
Revert "drm/i915: write backlight harder"

Kees Cook 
drm/i915: bounds check execbuffer relocation count

Bing Zhao 
mwifiex: fix potential out-of-boundary access to ibss rate table

Larry Finger 
rtlwifi: rtl8192cu: Fix problem that prevents reassociation

Larry Finger 
rtlwifi: rtl8192cu: Fix schedule while atomic bug splat

Steven Rostedt (Red Hat) 
tracing: Keep overwrite in sync between regular and snapshot buffers

Steven Rostedt (Red Hat) 
tracing: Protect tracer flags with t

[ 01/98] USB: EHCI: work around silicon bug in Intels EHCI controllers

2013-03-26 Thread Greg Kroah-Hartman

3.8-stable review patch.  If anyone has any objections, please let me know.

--

From: Alan Stern 

commit 6402c796d3b4205d3d7296157956c5100a05d7d6 upstream.

This patch (as1660) works around a hardware problem present in some
(if not all) Intel EHCI controllers.  After a QH has been unlinked
from the async schedule and the corresponding IAA interrupt has
occurred, the controller is not supposed access the QH and its qTDs.
There certainly shouldn't be any more DMA writes to those structures.
Nevertheless, Intel's controllers have been observed to perform a
final writeback to the QH's overlay region and to the most recent qTD.
For more information and a test program to determine whether this
problem is present in a particular controller, see

http://marc.info/?l=linux-usb&m=135492071812265&w=2
http://marc.info/?l=linux-usb&m=136182570800963&w=2

This patch works around the problem by always waiting for two IAA
cycles when unlinking an async QH.  The extra IAA delay gives the
controller time to perform its final writeback.

Surprisingly enough, the effects of this silicon bug have gone
undetected until quite recently.  More through luck than anything
else, it hasn't caused any apparent problems.  However, it does
interact badly with the path that follows this one, so it needs to be
addressed.

This is the first part of a fix for the regression reported at:

https://bugs.launchpad.net/bugs/1088733

Signed-off-by: Alan Stern 
Tested-by: Stephen Thirlwall 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/usb/host/ehci-hcd.c |6 ++
 drivers/usb/host/ehci-q.c   |   18 ++
 2 files changed, 16 insertions(+), 8 deletions(-)

--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -748,11 +748,9 @@ static irqreturn_t ehci_irq (struct usb_
/* guard against (alleged) silicon errata */
if (cmd & CMD_IAAD)
ehci_dbg(ehci, "IAA with IAAD still set?\n");
-   if (ehci->async_iaa) {
+   if (ehci->async_iaa)
COUNT(ehci->stats.iaa);
-   end_unlink_async(ehci);
-   } else
-   ehci_dbg(ehci, "IAA with nothing unlinked?\n");
+   end_unlink_async(ehci);
}
 
/* remote wakeup [4.3.1] */
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -1178,7 +1178,7 @@ static void single_unlink_async(struct e
struct ehci_qh  *prev;
 
/* Add to the end of the list of QHs waiting for the next IAAD */
-   qh->qh_state = QH_STATE_UNLINK;
+   qh->qh_state = QH_STATE_UNLINK_WAIT;
if (ehci->async_unlink)
ehci->async_unlink_last->unlink_next = qh;
else
@@ -1221,9 +1221,19 @@ static void start_iaa_cycle(struct ehci_
 
/* Do only the first waiting QH (nVidia bug?) */
qh = ehci->async_unlink;
-   ehci->async_iaa = qh;
-   ehci->async_unlink = qh->unlink_next;
-   qh->unlink_next = NULL;
+
+   /*
+* Intel (?) bug: The HC can write back the overlay region
+* even after the IAA interrupt occurs.  In self-defense,
+* always go through two IAA cycles for each QH.
+*/
+   if (qh->qh_state == QH_STATE_UNLINK_WAIT) {
+   qh->qh_state = QH_STATE_UNLINK;
+   } else {
+   ehci->async_iaa = qh;
+   ehci->async_unlink = qh->unlink_next;
+   qh->unlink_next = NULL;
+   }
 
/* Make sure the unlinks are all visible to the hardware */
wmb();


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[ 03/98] net/ipv4: Ensure that location of timestamp option is stored

2013-03-26 Thread Greg Kroah-Hartman

3.8-stable review patch.  If anyone has any objections, please let me know.

--


From: David Ward 

[ Upstream commit 4660c7f498c07c43173142ea95145e9dac5a6d14 ]

This is needed in order to detect if the timestamp option appears
more than once in a packet, to remove the option if the packet is
fragmented, etc. My previous change neglected to store the option
location when the router addresses were prespecified and Pointer >
Length. But now the option location is also stored when Flag is an
unrecognized value, to ensure these option handling behaviors are
still performed.

Signed-off-by: David Ward 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/ipv4/ip_options.c |5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -370,7 +370,6 @@ int ip_options_compile(struct net *net,
}
switch (optptr[3]&0xF) {
  case IPOPT_TS_TSONLY:
-   opt->ts = optptr - iph;
if (skb)
timeptr = &optptr[optptr[2]-1];
opt->ts_needtime = 1;
@@ -381,7 +380,6 @@ int ip_options_compile(struct net *net,
pp_ptr = optptr + 2;
goto error;
}
-   opt->ts = optptr - iph;
if (rt)  {
spec_dst_fill(&spec_dst, skb);
memcpy(&optptr[optptr[2]-1], 
&spec_dst, 4);
@@ -396,7 +394,6 @@ int ip_options_compile(struct net *net,
pp_ptr = optptr + 2;
goto error;
}
-   opt->ts = optptr - iph;
{
__be32 addr;
memcpy(&addr, 
&optptr[optptr[2]-1], 4);
@@ -429,12 +426,12 @@ int ip_options_compile(struct net *net,
pp_ptr = optptr + 3;
goto error;
}
-   opt->ts = optptr - iph;
if (skb) {
optptr[3] = 
(optptr[3]&0xF)|((overflow+1)<<4);
opt->is_changed = 1;
}
}
+   opt->ts = optptr - iph;
break;
  case IPOPT_RA:
if (optlen < 4) {


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 08/10] ioatdma: Adding support for 16 src PQ ops and super extended descriptors

v3.3 introduced 16 sources PQ operations. This also introduced super extended
descriptors to support the 16 srcs operations. This patch adds support for
the 16 sources ops and in turn adds the super extended descriptors for those
ops.

5 SED pools are created depending on the descriptor sizes. An SED can be a 64
bytes sized descriptor or larger and must be physically contiguous. A kmem
cache pool is created for allocating the software descriptor that manages the
hardware descriptor. The super extended descriptor will take place of extended
descriptor under certain operations and be "attached" to the op descriptor
during operation. This is a new feature for ioatdma v3.3.

Signed-off-by: Dave Jiang 
---
 drivers/dma/ioat/dma.h   |   18 ++
 drivers/dma/ioat/dma_v2.h|2 
 drivers/dma/ioat/dma_v3.c|  394 --
 drivers/dma/ioat/hw.h|   43 -
 drivers/dma/ioat/pci.c   |3 
 drivers/dma/ioat/registers.h |1 
 6 files changed, 439 insertions(+), 22 deletions(-)

diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 4fbf5c7..8c88724 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -81,6 +81,9 @@ struct ioatdma_device {
void __iomem *reg_base;
struct pci_pool *dma_pool;
struct pci_pool *completion_pool;
+#define MAX_SED_POOLS  5
+   struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
+   struct kmem_cache *sed_pool;
struct dma_device common;
u8 version;
struct msix_entry msix_entries[4];
@@ -96,6 +99,7 @@ struct ioatdma_device {
int (*init_device)(struct ioatdma_device *device);
 };
 
+
 enum ioat_hwbugs {
IOAT_LEGACY_COMPLETION_REQUIRED = (1 << 0),
 };
@@ -149,6 +153,20 @@ struct ioat_dma_chan {
u16 active;
 };
 
+/**
+ * struct ioat_sed_ent - wrapper around super extended hardware descriptor
+ * @hw: hardware SED
+ * @sed_dma: dma address for the SED
+ * @list: list member
+ * @parent: point to the dma descriptor that's the parent
+ */
+struct ioat_sed_ent {
+   struct ioat_sed_raw_descriptor *hw;
+   dma_addr_t dma;
+   struct ioat_ring_ent *parent;
+   unsigned int hw_pool;
+};
+
 static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
 {
return container_of(c, struct ioat_chan_common, common);
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
index e100f64..29bf944 100644
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -137,6 +137,7 @@ struct ioat_ring_ent {
#ifdef DEBUG
int id;
#endif
+   struct ioat_sed_ent *sed;
 };
 
 static inline struct ioat_ring_ent *
@@ -157,6 +158,7 @@ static inline void ioat2_set_chainaddr(struct 
ioat2_dma_chan *ioat, u64 addr)
 
 int ioat2_dma_probe(struct ioatdma_device *dev, int dca);
 int ioat3_dma_probe(struct ioatdma_device *dev, int dca);
+void ioat3_dma_remove(struct ioatdma_device *dev);
 struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem 
*iobase);
 struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem 
*iobase);
 int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index a55c346..230a8bc 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -55,7 +55,7 @@
 /*
  * Support routines for v3+ hardware
  */
-
+#include 
 #include 
 #include 
 #include 
@@ -70,6 +70,10 @@
 /* ioat hardware assumes at least two sources for raid operations */
 #define src_cnt_to_sw(x) ((x) + 2)
 #define src_cnt_to_hw(x) ((x) - 2)
+#define ndest_to_sw(x) ((x) + 1)
+#define ndest_to_hw(x) ((x) - 1)
+#define src16_cnt_to_sw(x) ((x) + 9)
+#define src16_cnt_to_hw(x) ((x) - 9)
 
 /* provide a lookup table for setting the source address in the base or
  * extended descriptor of an xor or pq descriptor
@@ -77,7 +81,18 @@
 static const u8 xor_idx_to_desc = 0xe0;
 static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
 static const u8 pq_idx_to_desc = 0xf8;
+static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2 };
 static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
+static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
+   0, 1, 2, 3, 4, 5, 6 };
+
+/*
+ * technically sources 1 and 2 do not require SED, but the op will have
+ * at least 9 descriptors so that's irrelevant.
+ */
+static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1 };
 
 static void ioat3_eh(struct ioat2_dma_chan *ioat);
 
@@ -103,6 +118,13 @@ static dma_addr_t pq_get_src(struct ioat_raw_descriptor 
*descs[2], int idx)
return raw->field[pq_idx_to_field[idx]];
 }
 
+static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
+{
+   struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[id

[PATCH 06/10] ioatdma: Removing PQ val disable for cb3.3