date:20190212

[RFC PATCH v1 08/25] printk: add ring buffer and kthread

2019-02-12 Thread John Ogness

The printk ring buffer provides an NMI-safe interface for writing
messages to a ring buffer. Using such a buffer for alleviates printk
callers from the current burdens of disabled preemption while calling
the console drivers (and possibly printing out many messages that
another task put into the log buffer).

Create a ring buffer to be used for storing messages to be
printed to the consoles.

Create a dedicated printk kthread to block on the ring buffer
and call the console drivers for the read messages.

NOTE: The printk_delay is relocated to _after_ the message is
  printed, where it makes more sense.

Signed-off-by: John Ogness 
---
 kernel/printk/printk.c | 105 +
 1 file changed, 105 insertions(+)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index d3d170374ceb..08e079b95652 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -44,6 +44,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -397,7 +399,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
printk_safe_exit_irqrestore(flags); \
} while (0)
 
+DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock);
+
 #ifdef CONFIG_PRINTK
+/* record buffer */
+DECLARE_STATIC_PRINTKRB(printk_rb, CONFIG_LOG_BUF_SHIFT, _cpulock);
+
 DECLARE_WAIT_QUEUE_HEAD(log_wait);
 /* the next printk record to read by syslog(READ) or /proc/kmsg */
 static u64 syslog_seq;
@@ -744,6 +751,10 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
return p - buf;
 }
 
+#define PRINTK_SPRINT_MAX (LOG_LINE_MAX + PREFIX_MAX)
+#define PRINTK_RECORD_MAX (sizeof(struct printk_log) + \
+   CONSOLE_EXT_LOG_MAX + PRINTK_SPRINT_MAX)
+
 /* /dev/kmsg - userspace message inject/listen interface */
 struct devkmsg_user {
u64 seq;
@@ -1566,6 +1577,34 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, 
int, len)
return do_syslog(type, buf, len, SYSLOG_FROM_READER);
 }
 
+static void format_text(struct printk_log *msg, u64 seq,
+   char *ext_text, size_t *ext_len,
+   char *text, size_t *len, bool time)
+{
+   if (suppress_message_printing(msg->level)) {
+   /*
+* Skip record that has level above the console
+* loglevel and update each console's local seq.
+*/
+   *len = 0;
+   *ext_len = 0;
+   return;
+   }
+
+   *len = msg_print_text(msg, console_msg_format & MSG_FORMAT_SYSLOG,
+ time, text, PRINTK_SPRINT_MAX);
+   if (nr_ext_console_drivers) {
+   *ext_len = msg_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX,
+   msg, seq);
+   *ext_len += msg_print_ext_body(ext_text + *ext_len,
+  CONSOLE_EXT_LOG_MAX - *ext_len,
+  log_dict(msg), msg->dict_len,
+  log_text(msg), msg->text_len);
+   } else {
+   *ext_len = 0;
+   }
+}
+
 /*
  * Special console_lock variants that help to reduce the risk of soft-lockups.
  * They allow to pass console_lock to another printk() call using a busy wait.
@@ -2899,6 +2938,72 @@ void wake_up_klogd(void)
preempt_enable();
 }
 
+static int printk_kthread_func(void *data)
+{
+   struct prb_iterator iter;
+   struct printk_log *msg;
+   size_t ext_len;
+   char *ext_text;
+   u64 master_seq;
+   size_t len;
+   char *text;
+   char *buf;
+   int ret;
+
+   ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
+   text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL);
+   buf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL);
+   if (!ext_text || !text || !buf)
+   return -1;
+
+   prb_iter_init(, _rb, NULL);
+
+   /* the printk kthread never exits */
+   for (;;) {
+   ret = prb_iter_wait_next(, buf,
+PRINTK_RECORD_MAX, _seq);
+   if (ret == -ERESTARTSYS) {
+   continue;
+   } else if (ret < 0) {
+   /* iterator invalid, start over */
+   prb_iter_init(, _rb, NULL);
+   continue;
+   }
+
+   msg = (struct printk_log *)buf;
+   format_text(msg, master_seq, ext_text, _len, text,
+   , printk_time);
+
+   console_lock();
+   if (len > 0 || ext_len > 0) {
+   call_console_drivers(ext_text, ext_len, text, len);
+   boot_delay_msec(msg->level);
+   printk_delay();
+   }
+   console_unlock();
+   }
+
+   kfree(ext_text);
+   kfree(text);
+   kfree(buf);
+
+   return 0;
+}
+
+static int

[RFC PATCH v1 12/25] printk: minimize console locking implementation

2019-02-12 Thread John Ogness

Since printing of the printk buffer is now handled by the printk
kthread, minimize the console locking functions to just handle
locking of the console.

NOTE: With this console_flush_on_panic will no longer flush.

Signed-off-by: John Ogness 
---
 kernel/printk/printk.c | 255 +
 1 file changed, 1 insertion(+), 254 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 073ff9fd6872..ece54c24ea0d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -209,19 +209,7 @@ static int nr_ext_console_drivers;
 
 static int __down_trylock_console_sem(unsigned long ip)
 {
-   int lock_failed;
-   unsigned long flags;
-
-   /*
-* Here and in __up_console_sem() we need to be in safe mode,
-* because spindump/WARN/etc from under console ->lock will
-* deadlock in printk()->down_trylock_console_sem() otherwise.
-*/
-   printk_safe_enter_irqsave(flags);
-   lock_failed = down_trylock(_sem);
-   printk_safe_exit_irqrestore(flags);
-
-   if (lock_failed)
+   if (down_trylock(_sem))
return 1;
mutex_acquire(_lock_dep_map, 0, 1, ip);
return 0;
@@ -230,13 +218,9 @@ static int __down_trylock_console_sem(unsigned long ip)
 
 static void __up_console_sem(unsigned long ip)
 {
-   unsigned long flags;
-
mutex_release(_lock_dep_map, 1, ip);
 
-   printk_safe_enter_irqsave(flags);
up(_sem);
-   printk_safe_exit_irqrestore(flags);
 }
 #define up_console_sem() __up_console_sem(_RET_IP_)
 
@@ -1498,82 +1482,6 @@ static void format_text(struct printk_log *msg, u64 seq,
 }
 
 /*
- * Special console_lock variants that help to reduce the risk of soft-lockups.
- * They allow to pass console_lock to another printk() call using a busy wait.
- */
-
-#ifdef CONFIG_LOCKDEP
-static struct lockdep_map console_owner_dep_map = {
-   .name = "console_owner"
-};
-#endif
-
-static DEFINE_RAW_SPINLOCK(console_owner_lock);
-static struct task_struct *console_owner;
-static bool console_waiter;
-
-/**
- * console_lock_spinning_enable - mark beginning of code where another
- * thread might safely busy wait
- *
- * This basically converts console_lock into a spinlock. This marks
- * the section where the console_lock owner can not sleep, because
- * there may be a waiter spinning (like a spinlock). Also it must be
- * ready to hand over the lock at the end of the section.
- */
-static void console_lock_spinning_enable(void)
-{
-   raw_spin_lock(_owner_lock);
-   console_owner = current;
-   raw_spin_unlock(_owner_lock);
-
-   /* The waiter may spin on us after setting console_owner */
-   spin_acquire(_owner_dep_map, 0, 0, _THIS_IP_);
-}
-
-/**
- * console_lock_spinning_disable_and_check - mark end of code where another
- * thread was able to busy wait and check if there is a waiter
- *
- * This is called at the end of the section where spinning is allowed.
- * It has two functions. First, it is a signal that it is no longer
- * safe to start busy waiting for the lock. Second, it checks if
- * there is a busy waiter and passes the lock rights to her.
- *
- * Important: Callers lose the lock if there was a busy waiter.
- * They must not touch items synchronized by console_lock
- * in this case.
- *
- * Return: 1 if the lock rights were passed, 0 otherwise.
- */
-static int console_lock_spinning_disable_and_check(void)
-{
-   int waiter;
-
-   raw_spin_lock(_owner_lock);
-   waiter = READ_ONCE(console_waiter);
-   console_owner = NULL;
-   raw_spin_unlock(_owner_lock);
-
-   if (!waiter) {
-   spin_release(_owner_dep_map, 1, _THIS_IP_);
-   return 0;
-   }
-
-   /* The waiter is now free to continue */
-   WRITE_ONCE(console_waiter, false);
-
-   spin_release(_owner_dep_map, 1, _THIS_IP_);
-
-   /*
-* Hand off console_lock to waiter. The waiter will perform
-* the up(). After this, the waiter is the console_lock owner.
-*/
-   mutex_release(_lock_dep_map, 1, _THIS_IP_);
-   return 1;
-}
-
-/*
  * Call the console drivers, asking them to write out
  * log_buf[start] to log_buf[end - 1].
  * The console_lock must be held.
@@ -1830,8 +1738,6 @@ static ssize_t msg_print_ext_header(char *buf, size_t 
size,
 static ssize_t msg_print_ext_body(char *buf, size_t size,
  char *dict, size_t dict_len,
  char *text, size_t text_len) { return 0; }
-static void console_lock_spinning_enable(void) { }
-static int console_lock_spinning_disable_and_check(void) { return 0; }
 static void call_console_drivers(const char *ext_text, size_t ext_len,
 const char *text, size_t len) {}
 static size_t msg_print_text(const struct printk_log *msg, bool syslog,
@@ -2066,35 +1972,6 @@ int is_console_locked(void)
 {
return console_locked;
 }

Re: Applied "spi: Add Renesas R-Car Gen3 RPC-IF SPI controller driver" to the spi tree

2019-02-12 Thread Marek Vasut

On 2/12/19 3:22 PM, Mark Brown wrote:
> The patch
> 
>spi: Add Renesas R-Car Gen3 RPC-IF SPI controller driver
> 
> has been applied to the spi tree at
> 
>https://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git 
> 
> All being well this means that it will be integrated into the linux-next
> tree (usually sometime in the next 24 hours) and sent to Linus during
> the next merge window (or sooner if it is a bug fix), however if
> problems are discovered then the patch may be dropped or reverted.  
> 
> You may get further e-mails resulting from automated or manual testing
> and review of the tree, please engage with people reporting problems and
> send followup patches addressing any issues that are reported if needed.
> 
> If any updates are required or you are submitting further changes they
> should be sent as incremental updates against current git, existing
> patches will not be replaced.
> 
> Please add any relevant lists and maintainers to the CCs when replying
> to this mail.

How did that happen when there were still comments and open topics ?

-- 
Best regards,
Marek Vasut

[PATCH] tools/power turbostat: return the exit status of a command

2019-02-12 Thread David Arcari

turbostat failed to return a non-zero exit status even though the
supplied command (turbostat ) failed.  Currently when turbostat
forks a command it returns zero instead of the actual exit status of the
command.  Modify the code to return the exit status.

Signed-off-by: David Arcari 
Cc: Len Brown 
Cc: Jirka Hladky 
Cc: linux-kernel@vger.kernel.org
---
 tools/power/x86/turbostat/turbostat.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c 
b/tools/power/x86/turbostat/turbostat.c
index 9327c0d..c3fad06 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -5077,6 +5077,9 @@ int fork_it(char **argv)
signal(SIGQUIT, SIG_IGN);
if (waitpid(child_pid, , 0) == -1)
err(status, "waitpid");
+
+   if (WIFEXITED(status))
+   status = WEXITSTATUS(status);
}
/*
 * n.b. fork_it() does not check for errors from for_all_cpus()
-- 
1.8.3.1

Re: [PATCH] drm/msm/a6xx: Add support for an interconnect path

2019-02-12 Thread Greg KH

On Tue, Feb 12, 2019 at 04:07:35PM +0200, Georgi Djakov wrote:
> Hi Greg,
> 
> On 2/12/19 12:16, Greg KH wrote:
> > On Tue, Feb 12, 2019 at 11:52:38AM +0200, Georgi Djakov wrote:
> >> From: Jordan Crouse 
> >>
> >> Try to get the interconnect path for the GPU and vote for the maximum
> >> bandwidth to support all frequencies. This is needed for performance.
> >> Later we will want to scale the bandwidth based on the frequency to
> >> also optimize for power but that will require some device tree
> >> infrastructure that does not yet exist.
> >>
> >> v6: use icc_set_bw() instead of icc_set()
> >> v5: Remove hardcoded interconnect name and just use the default
> >> v4: Don't use a port string at all to skip the need for names in the DT
> >> v3: Use macros and change port string per Georgi Djakov
> >>
> >> Signed-off-by: Jordan Crouse 
> >> Acked-by: Rob Clark 
> >> Reviewed-by: Evan Green 
> >> Signed-off-by: Georgi Djakov 
> >> ---
> >>
> >> Hi Greg,
> >>
> >> If not too late, could you please take this patch into char-misc-next.
> >> It is adding the first consumer of the interconnect API. We are just
> >> getting the code in place, without making it functional yet, as some
> >> DT bits are still needed to actually enable it. We have Rob's Ack to
> >> merge this together with the interconnect code. This patch has already
> >> spent some time in linux-next without any issues.
> > 
> > I have a question about the interconnect code.  Last week I saw a
> > presentation about the resctrl/RDT code from ARM that is coming (MPAM),
> > and it really looks like the same functionality as this interconnect
> > code.  In fact, this code looks like the existing resctrl stuff, right?
> 
> Thanks for the question! It's nice that MPAM is moving forward. When i
> looked into the MPAM draft spec an year ago, it was an optional
> extension mentioning mostly use-cases with VMs on server systems.
> 
> But anyway, MPAM is only available for ARMv8.2+ cores as an optional
> extension and aarch32 is not supported. In contrast to that, the
> interconnect code is generic and does not put any limitations on the
> platform/architecture that can use it - just the platform specific
> implementation would be different. We have discussed in that past that
> it can be used even on x86 platforms to provide hints to firmware.

Yes, but resctrl is arch independant.  It's not the "backend" that I'm
concerned about, it's the userspace and in-kernel api that I worry
about.

> > So why shouldn't we just drop the interconnect code and use resctrl
> > instead as it's already merged?
> 
> I haven't seen any MPAM code so far, but i assume that we can have an
> interconnect provider that implements this MPAM extension for systems
> that support it (and want to use it). Currently there are people working
> on various interconnect platform drivers from 5 different SoC vendors
> and we have agreed to use a common DT bindings (and API). I doubt that
> even a single one of these platforms is based on v8.2+. Probably such
> SoCs would be coming in the future and then i expect people making use
> of MPAM in some interconnect provider driver.

Again, don't focus on MPAM as-is, it's the resctrl api that I would like
to see explained why interconnect can't use.

thanks,

greg k-h

Re: [GIT PULL] csky fixes for v5.0-rc6

2019-02-12 Thread Arnd Bergmann

On Mon, Feb 11, 2019 at 7:28 PM Linus Torvalds
 wrote:
>
> On Sun, Feb 10, 2019 at 9:08 PM  wrote:
> >
> > arch/csky patches for 5.0-rc6
>
> The bulk of this seems to be new hardware support code.
>
> Please send it during the 5.1 merge window, not as fixes at post-rc6 time.

Aside from this, I also noticed that the new code is for drivers/irqchip,
which is a separate subsystems with its own maintainers, who were
neither on Cc nor did they Ack the patches.

Generally, those should get merged through the respective subsystem
trees as listed in the linux/MAINTAINERS file.

  Arnd

Re: Oops in rpc_clnt_debugfs_register() from debugfs change

2019-02-12 Thread Greg Kroah-Hartman

On Tue, Feb 12, 2019 at 02:31:14PM +, David Howells wrote:
> I've bisected an oops that occurs in rpc_clnt_debugfs_register() trying to
> dereference a pointer with -EACCES in it.  This is the causing commit, though
> I suspect the bug is in sunrpc expecting to see NULL rather than an error.
> 
> ff9fb72bc07705c00795ca48631f7fffe24d2c6b is the first bad commit
> commit ff9fb72bc07705c00795ca48631f7fffe24d2c6b
> Author: Greg Kroah-Hartman 
> Date:   Wed Jan 23 11:28:14 2019 +0100
> 
> debugfs: return error values, not NULL
> 
> When an error happens, debugfs should return an error pointer value, not
> NULL.  This will prevent the totally theoretical error where a debugfs
> call fails due to lack of memory, returning NULL, and that dentry value
> is then passed to another debugfs call, which would end up succeeding,
> creating a file at the root of the debugfs tree, but would then be
> impossible to remove (because you can not remove the directory NULL).
> 
> So, to make everyone happy, always return errors, this makes the users
> of debugfs much simpler (they do not have to ever check the return
> value), and everyone can rest easy.
> ...
> 
> The attached oops occurs during boot from the gssproxy process in
> rpc_clnt_debugfs_register().  The code at this point is:
> 
>0x8195cbdd <+450>:   mov0x50(%rax),%rcx   <--- oopsing
>0x8195cbe1 <+454>:   mov$0x821cc8ba,%rdx
>0x8195cbe8 <+461>:   mov$0x18,%esi
>0x8195cbed <+466>:   lea-0x30(%rbp),%rdi
>0x8195cbf1 <+470>:   callq  0x819db773 
> 
> RAX is -EACCES.
> 
> Looking in the source:
> 
>   len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
>   xprt->debugfs->d_name.name);
> 
> I think xprt->debugfs is the value in RAX.
> 
>   (gdb) p &((struct dentry *)0)->d_name.name
>   $5 = (const unsigned char **) 0x50 
> 
> which matches the offset on the oopsing MOV instruction.
> 
> This is with linus/master (aa0c38cf39de73bf7360a3da8f1707601261e518).

Ugh, yeah, I see the problem, sorry about that.

I wonder why the debugfs call is always failing, that's not good...

let me dig and see if I already have a patch for this...

greg k-h

[GIT PULL] sound fixes for 5.0-rc7

2019-02-12 Thread Takashi Iwai

Linus,

please pull sound fixes for v5.0-rc7 from:

  git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git 
tags/sound-5.0-rc7

The topmost commit is 00a399cad1a063e7665f06b6497a807db20441fd



sound fixes for 5.0-rc7

It's a bit of surprising that we've got more changes than hoped
at this late stage, but the all don't look too scaring but small
fixes.

One change in ALSA core side is again the PCM regression fix that
was partially addressed for OSS, but now the all relevant change
is reverted instead.  Also, a few ASoC core fixes for UAF and OOB
are included, while the rest are usual random device-specific
fixes.



Charles Keepax (1):
  ASoC: core: Allow soc_find_component lookups to match parent of_node

Fabio Estevam (1):
  ASoC: MAINTAINERS: fsl: Change Fabio's email address

Guennadi Liakhovetski (1):
  ASoC: topology: fix oops/use-after-free case with dai driver

Jiada Wang (1):
  ASoC: rsnd: ssiu: correct shift bit for ssiu9

Jurica Vukadin (1):
  ALSA: hda - Add quirk for HP EliteBook 840 G5

Kuninori Morimoto (2):
  ASoC: rsnd: fixup MIX kctrl registration
  ASoC: rsnd: fixup rsnd_ssi_master_clk_start() user count check

Manuel Reinhardt (1):
  ALSA: usb-audio: Fix implicit fb endpoint setup by quirk

Pierre-Louis Bossart (1):
  ASoC: dapm: fix out-of-bounds accesses to DAPM lookup tables

Russell King (1):
  ASoC: hdmi-codec: fix oops on re-probe

Shuming Fan (1):
  ASoC: rt5682: Correct the setting while select ASRC clk for AD/DA filter

Sylwester Nawrocki (1):
  ASoC: samsung: Prevent clk_get_rate() calls in atomic context

Takashi Iwai (1):
  ALSA: pcm: Revert capture stream behavior change in blocking mode

---
 MAINTAINERS|  4 ++--
 sound/core/pcm_lib.c   | 20 
 sound/pci/hda/patch_conexant.c |  1 +
 sound/soc/codecs/hdmi-codec.c  |  4 ++--
 sound/soc/codecs/rt5682.c  |  2 ++
 sound/soc/samsung/i2s.c| 10 +-
 sound/soc/sh/rcar/core.c   |  8 
 sound/soc/sh/rcar/ssi.c|  2 +-
 sound/soc/sh/rcar/ssiu.c   |  2 +-
 sound/soc/soc-core.c   |  9 +++--
 sound/soc/soc-dapm.c   | 24 
 sound/soc/soc-topology.c   |  5 +
 sound/usb/pcm.c|  9 -
 13 files changed, 66 insertions(+), 34 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9919840d54cd..41ce5f4ad838 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6151,7 +6151,7 @@ FREESCALE SOC SOUND DRIVERS
 M: Timur Tabi 
 M: Nicolin Chen 
 M: Xiubo Li 
-R: Fabio Estevam 
+R: Fabio Estevam 
 L: alsa-de...@alsa-project.org (moderated for non-subscribers)
 L: linuxppc-...@lists.ozlabs.org
 S: Maintained
@@ -10898,7 +10898,7 @@ F:  include/linux/nvmem-consumer.h
 F: include/linux/nvmem-provider.h
 
 NXP SGTL5000 DRIVER
-M: Fabio Estevam 
+M: Fabio Estevam 
 L: alsa-de...@alsa-project.org (moderated for non-subscribers)
 S: Maintained
 F: Documentation/devicetree/bindings/sound/sgtl5000.txt
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 6c99fa8ac5fa..6c0b30391ba9 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -2112,13 +2112,6 @@ int pcm_lib_apply_appl_ptr(struct snd_pcm_substream 
*substream,
return 0;
 }
 
-/* allow waiting for a capture stream that hasn't been started */
-#if IS_ENABLED(CONFIG_SND_PCM_OSS)
-#define wait_capture_start(substream)  ((substream)->oss.oss)
-#else
-#define wait_capture_start(substream)  false
-#endif
-
 /* the common loop for read/write data */
 snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream,
 void *data, bool interleaved,
@@ -2184,16 +2177,11 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct 
snd_pcm_substream *substream,
snd_pcm_update_hw_ptr(substream);
 
if (!is_playback &&
-   runtime->status->state == SNDRV_PCM_STATE_PREPARED) {
-   if (size >= runtime->start_threshold) {
-   err = snd_pcm_start(substream);
-   if (err < 0)
-   goto _end_unlock;
-   } else if (!wait_capture_start(substream)) {
-   /* nothing to do */
-   err = 0;
+   runtime->status->state == SNDRV_PCM_STATE_PREPARED &&
+   size >= runtime->start_threshold) {
+   err = snd_pcm_start(substream);
+   if (err < 0)
goto _end_unlock;
-   }
}
 
avail = snd_pcm_avail(substream);
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 152f54137082..a4ee7656d9ee 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -924,6 +924,7 @@ static const

Re: [PATCH] mm,memory_hotplug: Explicitly pass the head to isolate_huge_page

2019-02-12 Thread Michal Hocko

On Tue 12-02-19 14:45:49, Oscar Salvador wrote:
> On Tue, Feb 12, 2019 at 09:33:29AM +0100, Michal Hocko wrote:
> > >  
> > >   if (PageHuge(page)) {
> > >   struct page *head = compound_head(page);
> > > - pfn = page_to_pfn(head) + (1< > >   if (compound_order(head) > PFN_SECTION_SHIFT) {
> > >   ret = -EBUSY;
> > >   break;
> > >   }
> > 
> > Why are we doing this, btw? 
> 
> I assume you are referring to:
> 
> > > if (compound_order(head) > PFN_SECTION_SHIFT) {
> > > ret = -EBUSY;
> > > break;
> > > }

yes.

> I thought it was in case we stumble upon a gigantic page, and commit
> (c8721bbbdd36 mm: memory-hotplug: enable memory hotplug to handle hugepage)
> confirms it.
> 
> But I am not really sure if the above condition would still hold on powerpc,
> I wanted to check it but it is a bit more tricky than it is in x86_64 because
> of the different hugetlb sizes.
> Could it be that the above condition is not true, but still the order of that
> hugetlb page goes beyond MAX_ORDER? It is something I have to check.

This check doesn't make much sense in principle. Why should we bail out
based on a section size? We are offlining a pfn range. All that we care
about is whether the hugetlb is migrateable.
-- 
Michal Hocko
SUSE Labs

Re: [PATCH] parisc: use memblock_alloc() instead of custom get_memblock()

2019-02-12 Thread Mike Rapoport

On Tue, Feb 12, 2019 at 06:14:18AM -0800, Matthew Wilcox wrote:
> On Tue, Feb 12, 2019 at 03:59:50PM +0200, Mike Rapoport wrote:
> > -static void * __init get_memblock(unsigned long size)
> > -{
> > -   static phys_addr_t search_addr __initdata;
> > -   phys_addr_t phys;
> > -
> > -   if (!search_addr)
> > -   search_addr = PAGE_ALIGN(__pa((unsigned long) &_end));
> > -   search_addr = ALIGN(search_addr, size);
> > -   while (!memblock_is_region_memory(search_addr, size) ||
> > -   memblock_is_region_reserved(search_addr, size)) {
> > -   search_addr += size;
> > -   }
> > -   phys = search_addr;
> 
> This implies to me that the allocation will be 'size' aligned.
> 
> > if (!pmd) {
> > -   pmd = (pmd_t *) get_memblock(PAGE_SIZE << PMD_ORDER);
> > +   pmd = memblock_alloc(PAGE_SIZE << PMD_ORDER,
> > +SMP_CACHE_BYTES);
> 
> So why would this only need to be cacheline aligned?  It's pretty common
> for hardware to require that pgd/pud/pmd/pte tables be naturally aligned.
> 
> > @@ -700,7 +683,10 @@ static void __init pagetable_init(void)
> > }
> >  #endif
> >  
> > -   empty_zero_page = get_memblock(PAGE_SIZE);
> > +   empty_zero_page = memblock_alloc(PAGE_SIZE, SMP_CACHE_BYTES);
> 
> ... and surely the zero page also needs to be page aligned, by definition.
 
Right, I've completely missed the alignment. Will fix.

-- 
Sincerely yours,
Mike.

[PATCH] Documentation: change linux-4.x references to 5.x

2019-02-12 Thread Arnd Bergmann

As linux-5.0.x is coming up soon, the documentation should match,
in particular the README.rst file, so change all 4.x references
accordingly. There was a mix of lowercase and uppercase X here,
which I changed to using lowercase consistently.

Signed-off-by: Arnd Bergmann 
---
 Documentation/admin-guide/README.rst  |  32 ++---
 Documentation/process/applying-patches.rst| 117 +-
 .../translations/it_IT/admin-guide/README.rst |   2 +-
 3 files changed, 78 insertions(+), 73 deletions(-)

diff --git a/Documentation/admin-guide/README.rst 
b/Documentation/admin-guide/README.rst
index 0797eec76be1..47e577264198 100644
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@@ -1,9 +1,9 @@
 .. _readme:
 
-Linux kernel release 4.x 
+Linux kernel release 5.x 
 =
 
-These are the release notes for Linux version 4.  Read them carefully,
+These are the release notes for Linux version 5.  Read them carefully,
 as they tell you what this is all about, explain how to install the
 kernel, and what to do if something goes wrong.
 
@@ -63,7 +63,7 @@ Installing the kernel source
directory where you have permissions (e.g. your home directory) and
unpack it::
 
- xz -cd linux-4.X.tar.xz | tar xvf -
+ xz -cd linux-5.x.tar.xz | tar xvf -
 
Replace "X" with the version number of the latest kernel.
 
@@ -72,26 +72,26 @@ Installing the kernel source
files.  They should match the library, and not get messed up by
whatever the kernel-du-jour happens to be.
 
- - You can also upgrade between 4.x releases by patching.  Patches are
+ - You can also upgrade between 5.x releases by patching.  Patches are
distributed in the xz format.  To install by patching, get all the
newer patch files, enter the top level directory of the kernel source
-   (linux-4.X) and execute::
+   (linux-5.x) and execute::
 
- xz -cd ../patch-4.x.xz | patch -p1
+ xz -cd ../patch-5.x.xz | patch -p1
 
-   Replace "x" for all versions bigger than the version "X" of your current
+   Replace "x" for all versions bigger than the version "x" of your current
source tree, **in_order**, and you should be ok.  You may want to remove
the backup files (some-file-name~ or some-file-name.orig), and make sure
that there are no failed patches (some-file-name# or some-file-name.rej).
If there are, either you or I have made a mistake.
 
-   Unlike patches for the 4.x kernels, patches for the 4.x.y kernels
+   Unlike patches for the 5.x kernels, patches for the 5.x.y kernels
(also known as the -stable kernels) are not incremental but instead apply
-   directly to the base 4.x kernel.  For example, if your base kernel is 4.0
-   and you want to apply the 4.0.3 patch, you must not first apply the 4.0.1
-   and 4.0.2 patches. Similarly, if you are running kernel version 4.0.2 and
-   want to jump to 4.0.3, you must first reverse the 4.0.2 patch (that is,
-   patch -R) **before** applying the 4.0.3 patch. You can read more on this in
+   directly to the base 5.x kernel.  For example, if your base kernel is 5.0
+   and you want to apply the 5.0.3 patch, you must not first apply the 5.0.1
+   and 5.0.2 patches. Similarly, if you are running kernel version 5.0.2 and
+   want to jump to 5.0.3, you must first reverse the 5.0.2 patch (that is,
+   patch -R) **before** applying the 5.0.3 patch. You can read more on this in
:ref:`Documentation/process/applying-patches.rst `.
 
Alternatively, the script patch-kernel can be used to automate this
@@ -114,7 +114,7 @@ Installing the kernel source
 Software requirements
 -
 
-   Compiling and running the 4.x kernels requires up-to-date
+   Compiling and running the 5.x kernels requires up-to-date
versions of various software packages.  Consult
:ref:`Documentation/process/changes.rst ` for the minimum version 
numbers
required and how to get updates for these packages.  Beware that using
@@ -132,12 +132,12 @@ Build directory for the kernel
place for the output files (including .config).
Example::
 
- kernel source code: /usr/src/linux-4.X
+ kernel source code: /usr/src/linux-5.x
  build directory:/home/name/build/kernel
 
To configure and build the kernel, use::
 
- cd /usr/src/linux-4.X
+ cd /usr/src/linux-5.x
  make O=/home/name/build/kernel menuconfig
  make O=/home/name/build/kernel
  sudo make O=/home/name/build/kernel modules_install install
diff --git a/Documentation/process/applying-patches.rst 
b/Documentation/process/applying-patches.rst
index dc2ddc345044..fbb9297e6360 100644
--- a/Documentation/process/applying-patches.rst
+++ b/Documentation/process/applying-patches.rst
@@ -216,14 +216,14 @@ You can use the ``interdiff`` program 
(http://cyberelk.net/tim/patchutils/) to
 generate a patch representing the differences between two patches and then

Re: Oops in rpc_clnt_debugfs_register() from debugfs change

2019-02-12 Thread Greg Kroah-Hartman

On Tue, Feb 12, 2019 at 03:37:20PM +0100, Greg Kroah-Hartman wrote:
> On Tue, Feb 12, 2019 at 02:31:14PM +, David Howells wrote:
> > I've bisected an oops that occurs in rpc_clnt_debugfs_register() trying to
> > dereference a pointer with -EACCES in it.  This is the causing commit, 
> > though
> > I suspect the bug is in sunrpc expecting to see NULL rather than an error.
> > 
> > ff9fb72bc07705c00795ca48631f7fffe24d2c6b is the first bad commit
> > commit ff9fb72bc07705c00795ca48631f7fffe24d2c6b
> > Author: Greg Kroah-Hartman 
> > Date:   Wed Jan 23 11:28:14 2019 +0100
> > 
> > debugfs: return error values, not NULL
> > 
> > When an error happens, debugfs should return an error pointer value, not
> > NULL.  This will prevent the totally theoretical error where a debugfs
> > call fails due to lack of memory, returning NULL, and that dentry value
> > is then passed to another debugfs call, which would end up succeeding,
> > creating a file at the root of the debugfs tree, but would then be
> > impossible to remove (because you can not remove the directory NULL).
> > 
> > So, to make everyone happy, always return errors, this makes the users
> > of debugfs much simpler (they do not have to ever check the return
> > value), and everyone can rest easy.
> > ...
> > 
> > The attached oops occurs during boot from the gssproxy process in
> > rpc_clnt_debugfs_register().  The code at this point is:
> > 
> >0x8195cbdd <+450>:   mov0x50(%rax),%rcx   <--- oopsing
> >0x8195cbe1 <+454>:   mov$0x821cc8ba,%rdx
> >0x8195cbe8 <+461>:   mov$0x18,%esi
> >0x8195cbed <+466>:   lea-0x30(%rbp),%rdi
> >0x8195cbf1 <+470>:   callq  0x819db773 
> > 
> > RAX is -EACCES.
> > 
> > Looking in the source:
> > 
> > len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
> > xprt->debugfs->d_name.name);
> > 
> > I think xprt->debugfs is the value in RAX.
> > 
> > (gdb) p &((struct dentry *)0)->d_name.name
> > $5 = (const unsigned char **) 0x50 
> > 
> > which matches the offset on the oopsing MOV instruction.
> > 
> > This is with linus/master (aa0c38cf39de73bf7360a3da8f1707601261e518).
> 
> Ugh, yeah, I see the problem, sorry about that.
> 
> I wonder why the debugfs call is always failing, that's not good...
> 
> let me dig and see if I already have a patch for this...

I have a much larger cleanup patch for this code, but this single line
change should solve the issue for now.  Can you test it to verify?

thanks,

greg k-h

--

diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 45a033329cd4..19bb356230ed 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -146,7 +146,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
/* no "debugfs" dentry? Don't bother with the symlink. */
-   if (!xprt->debugfs) {
+   if (IS_ERR_OR_NULL(xprt->debugfs)) {
rcu_read_unlock();
return;
}

Re: [PATCH 5/5] kasan, slub: fix conflicts with CONFIG_SLAB_FREELIST_HARDENED

2019-02-12 Thread Andrey Konovalov

On Tue, Feb 12, 2019 at 2:43 PM Qian Cai  wrote:
>
>
>
> On 2/12/19 8:26 AM, Andrey Konovalov wrote:
> > Hm, did you apply all 6 patches (the one that you sent and these five)
> Yes.

I'm failing to reproduce this in QEMU. You're still using the same
config, right? Could you share whole dmesg until the first BUG?

Re: [RFC PATCH v2 0/4] mm, memory_hotplug: allocate memmap from hotadded memory

2019-02-12 Thread Michal Hocko

On Tue 12-02-19 14:56:58, Oscar Salvador wrote:
> On Tue, Feb 12, 2019 at 01:21:38PM +, Shameerali Kolothum Thodi wrote:
> > > Hi Oscar,
> > > 
> > > I ran tests on one of our arm64 machines. Particular machine doesn't 
> > > actually
> > > have
> > > the mechanics for hotplug, so was all 'faked', but software wise it's all 
> > > the
> > > same.
> > > 
> > > Upshot, seems to work as expected on arm64 as well.
> > > Tested-by: Jonathan Cameron 
> 
> Thanks Jonathan for having given it a spin, much appreciated!
> I was short of arm64 machines.
> 
> > (qemu) object_add memory-backend-ram,id=mem1,size=1G
> > (qemu) device_add pc-dimm,id=dimm1,memdev=mem1,node=1
> > root@ubuntu:~# 
> > root@ubuntu:~# numactl -H
> ...
> > node 1 cpus:
> > node 1 size: 1008 MB
> > node 1 free: 1008 MB
> > node distances:
> > node   0   1 
> >   0:  10  20 
> >   1:  20  10 
> > root@ubuntu:~#  
> 
> Ok, this is what I wanted to see.
> When you hotplugged 1GB, 16MB out of 1024MB  were spent
> for the memmap array, that is why you only see 1008MB there.
> 
> I am not sure what is the default section size for arm64, but assuming
> is 128MB, that would make sense as 1GB would mean 8 sections,
> and each section takes 2MB.
> 
> That means that at least the mechanism works.

Please make sure to test on a larger machine which has multi section
memblocks. This is where I was hitting on bugs hard.
-- 
Michal Hocko
SUSE Labs

Re: [PATCH 2/3] soc: amlogic: gx-socinfo: Add new SoC IDs and Packages IDs

2019-02-12 Thread Jerome Brunet

On Tue, 2019-02-12 at 15:23 +0100, Neil Armstrong wrote:
> This adds the G12A and G12B SoC ids and the S90X2 package ID.
^   
Small typo here |

> 
> Signed-off-by: Neil Armstrong 
> ---
>  drivers/soc/amlogic/meson-gx-socinfo.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/soc/amlogic/meson-gx-socinfo.c
> b/drivers/soc/amlogic/meson-gx-socinfo.c
> index 1ae339f5eadb..c1a9c34d861b 100644
> --- a/drivers/soc/amlogic/meson-gx-socinfo.c
> +++ b/drivers/soc/amlogic/meson-gx-socinfo.c
> @@ -37,6 +37,8 @@ static const struct meson_gx_soc_id {
>   { "AXG", 0x25 },
>   { "GXLX", 0x26 },
>   { "TXHD", 0x27 },
> + { "G12A", 0x28 },
> + { "G12B", 0x29 },
>  };
>  
>  static const struct meson_gx_package_id {
> @@ -53,11 +55,14 @@ static const struct meson_gx_package_id {
>   { "S905W", 0x21, 0xa0, 0xf0 },
>   { "S905L", 0x21, 0xc0, 0xf0 },
>   { "S905M2", 0x21, 0xe0, 0xf0 },
> + { "S805X", 0x21, 0x30, 0xf0 },
> + { "S805Y", 0x21, 0xb0, 0xf0 },
>   { "S912", 0x22, 0, 0x0 }, /* Only S912 is known for GXM */
>   { "962X", 0x24, 0x10, 0xf0 },
>   { "962E", 0x24, 0x20, 0xf0 },
>   { "A113X", 0x25, 0x37, 0xff },
>   { "A113D", 0x25, 0x22, 0xff },
> + { "S905X2", 0x28, 0x40, 0xf0 },
>  };
>  
>  static inline unsigned int socinfo_to_major(u32 socinfo)

Re: Applied "spi: Add Renesas R-Car Gen3 RPC-IF SPI controller driver" to the spi tree

2019-02-12 Thread Mark Brown

On Tue, Feb 12, 2019 at 03:33:12PM +0100, Marek Vasut wrote:
> On 2/12/19 3:22 PM, Mark Brown wrote:

> > Please add any relevant lists and maintainers to the CCs when replying
> > to this mail.

> How did that happen when there were still comments and open topics ?

Mason had submitted a new verison a couple of weeks ago and there'd been
no replies on the new version so I'd thought that all the prior issues
had been addressed.  I guess that's not the case?  The thread on the
version before was a bit long but seemed to peter out, though looking at
the archives it seems the main issue was that Mason stopped replying.

signature.asc
Description: PGP signature

Re: [RFC PATCH v1 01/25] printk-rb: add printk ring buffer documentation

2019-02-12 Thread Greg Kroah-Hartman

On Tue, Feb 12, 2019 at 03:29:39PM +0100, John Ogness wrote:
> The full documentation file for the printk ring buffer.
> 
> Signed-off-by: John Ogness 
> ---
>  Documentation/printk-ringbuffer.txt | 377 
> 

Nit, shouldn't this be in .rst format and tied into the "build the
kernel documentation" process somehow?

thanks,

greg k-h

Re: [RFC PATCH v1 03/25] printk-rb: define ring buffer struct and initializer

2019-02-12 Thread Greg Kroah-Hartman

On Tue, Feb 12, 2019 at 03:29:41PM +0100, John Ogness wrote:
> See Documentation/printk-ringbuffer.txt for details about the
> initializer arguments.

You can put that documentation here in the .h file and have it pulled
out automatically into the documentation files when they are created.
That way you always keep everything in sync properly.

thanks,

greg k-h

Re: [RFC PATCH v2 0/4] mm, memory_hotplug: allocate memmap from hotadded memory

2019-02-12 Thread Oscar Salvador

On Tue, Feb 12, 2019 at 03:42:42PM +0100, Michal Hocko wrote:
> Please make sure to test on a larger machine which has multi section
> memblocks. This is where I was hitting on bugs hard.

I tested the patchset with large memblocks (2GB) on x86_64, and worked
fine as well.
On powerpc I was only able to test it on normal memblocks, but I will check
if I can boost the memory there to get large memblocks.

And about arm64, I will talk to Jonathan off-list to see if we can do the same.

Btw, in the meantime, we could get some parts reviewed perhaps.
-- 
Oscar Salvador
SUSE L3

Re: [PATCH 0/2] driver core: Fixes related to device links

2019-02-12 Thread Ulf Hansson

On Tue, 12 Feb 2019 at 15:09, Greg Kroah-Hartman
 wrote:
>
> On Tue, Feb 12, 2019 at 01:01:13PM +0100, Rafael J. Wysocki wrote:
> > Hi Greg at al,
> >
> > These fix two issues on top of the recent device links material in
> > driver-core/driver-core-next.
> >
> > The first one fixes a race condition that may trigger when
> > __pm_runtime_set_status() is used incorrectly (that is, when it is
> > called with PM-runtime enabled for the target device and working).
> >
> > The second one fixes a supplier PM-runtime usage counter imbalance
> > resulting from adding and removing (e.g. in the error code path) a
> > stateless device link to it from within the consumer driver's probe
> > callback.
> >
> > Please refer to the patch changelogs for details.
>
> Looks good, all now queued up, thanks.

Greg, please don't get me wrong, but ~1.5 hours isn't sufficient for
me to review/test submitted patches.

I have been trying to collaborate (review/test) device links related
code with Rafael, but what's the point if you queue up the patches,
before I even got the change to look at them. Shall I interpret it as
you don't care about me reviewing this, then just tell me so I don't
have to waste my time.

Kind regards
Uffe

[PATCH-tip] genirq: Add missing documentation for tot_count

2019-02-12 Thread Waiman Long

Commit 1136b0728969 ("genirq: Avoid summation loops for /proc/stat") adds
a new tot_count field to the irq_desc structure without documenting it.
This patch adds the missing piece of documentation.

Signed-off-by: Waiman Long 
---
 include/linux/irqdesc.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 875c41b..1d679fe 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -28,6 +28,7 @@
  * @core_internal_state__do_not_mess_with_it: core internal status information
  * @depth: disable-depth, for nested irq_disable() calls
  * @wake_depth:enable depth, for multiple irq_set_irq_wake() 
callers
+ * @tot_count: stats field for non-percpu irqs
  * @irq_count: stats field to detect stalled irqs
  * @last_unhandled:aging timer for unhandled count
  * @irqs_unhandled:stats field for spurious unhandled interrupts
-- 
1.8.3.1

Re: [PATCH v2] mm/memory-hotplug: Add sysfs hot-remove trigger

2019-02-12 Thread Robin Murphy


On 12/02/2019 08:33, Michal Hocko wrote:

On Mon 11-02-19 17:50:46, Robin Murphy wrote:

ARCH_MEMORY_PROBE is a useful thing for testing and debugging hotplug,
but being able to exercise the (arguably trickier) hot-remove path would
be even more useful. Extend the feature to allow removal of offline
sections to be triggered manually to aid development.

Since process dictates the new sysfs entry be documented, let's also
document the existing probe entry to match - better 13-and-a-half years
late than never, as they say...


The probe sysfs is quite dubious already TBH. Apart from testing, is
anybody using it for something real? Do we need to keep an API for
something testing only? Why isn't a customer testing module enough for
such a purpose?


From the arm64 angle, beyond "conventional" servers where we can 
hopefully assume ACPI, I can imagine there being embedded/HPC setups 
(not all as esoteric as that distributed-memory dRedBox thing), as well 
as virtual machines, that are DT-based with minimal runtime firmware. 
I'm none too keen on the idea either, but if such systems want to 
support physical hotplug then driving it from userspace might be the 
only reasonable approach. I'm just loath to actually document it as 
anything other than a developer feature so as not to give the impression 
that I consider it anything other than a last resort for production use. 
I do note that my x86 distro kernel has ARCH_MEMORY_PROBE enabled 
despite it being "for testing".



In other words, why do we have to add an API that has to be maintained
for ever for a testing only purpose?


There's already half the API being maintained, though, so adding the 
corresponding other half alongside it doesn't seem like that great an 
overhead, regardless of how it ends up getting used. Ultimately, though, 
it's a patch I wrote because I needed it, and if everyone else is 
adamant that it's not useful enough then fair enough - it's at least in 
the list archives now so I can sleep happy that I've done my 
"contributing back" bit as best I could :)



Besides that, what is the reason to use __remove_memory rather than the
exported remove_memory which does an additional locking.


For the same reason that probe uses __add_memory() rather than 
add_memory() - I can't claim to understand *exactly* why 
lock_device_hotplug_sysfs() does what it does compared to 
lock_device_hotplug() (even after reading 5e33bc4165f3), but I can only 
assume it's safest to be consistent with the other attributes here.



Also, we do
trust root to do sane things but are we sure that the current BUG-land
mines in the hotplug code are ready enough to be exported for playing?


Well, the point of this particular implementation as opposed to other 
approaches is that it's impossible by construction to even attempt to 
remove something which isn't an exact, valid memory_block. AFAICS that 
should make it at least as robust as any other hot-remove caller.


Robin.


Signed-off-by: Robin Murphy 
---

v2: Use is_memblock_offlined() helper, write up documentation

  .../ABI/testing/sysfs-devices-memory  | 25 +++
  drivers/base/memory.c | 42 ++-
  2 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-memory 
b/Documentation/ABI/testing/sysfs-devices-memory
index deef3b5723cf..02a4250964e0 100644
--- a/Documentation/ABI/testing/sysfs-devices-memory
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -91,3 +91,28 @@ Description:
memory section directory.  For example, the following symbolic
link is created for memory section 9 on node0.
/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
+
+What:  /sys/devices/system/memory/probe
+Date:  October 2005
+Contact:   Linux Memory Management list 
+Description:
+   The file /sys/devices/system/memory/probe is write-only, and
+   when written will simulate a physical hot-add of a memory
+   section at the given address. For example, assuming a section
+   of unused memory exists at physical address 0x8000, it can
+   be introduced to the kernel with the following command:
+   # echo 0x8000 > /sys/devices/system/memory/probe
+Users: Memory hotplug testing and development
+
+What:  /sys/devices/system/memory/memoryX/remove
+Date:  February 2019
+Contact:   Linux Memory Management list 
+Description:
+   The file /sys/devices/system/memory/memoryX/remove is
+   write-only, and when written with a boolean 'true' value will
+   simulate a physical hot-remove of that memory section. For
+   example, assuming a 1GB section size, the section added by the
+   above "probe" example could be removed again with the following
+   command:
+   # echo 1 >

[PATCH 2/4] Expose O_PATHSTATIC to userspace

2019-02-12 Thread demiobenour

From: "Demi M. Obenour" 

This adds the file open flag O_PATHSTATIC, which ensures that symbolic
links are *never* followed, even in path components other than the last.
This is distinct from O_NOFOLLOW, which only prevents symlinks in the
*last* component from being followed.

This is useful for avoiding race conditions in userspace code that
should expose only a subset of the filesystem to clients.  This includes
FTP and SFTP servers, QEMU, and others.

Currently, O_NOFOLLOW must be set if O_PATHSTATIC is set.  Otherwise,
open() fails with -EINVAL.
---
 fs/fcntl.c   |  2 +-
 fs/namei.c   |  6 ++
 fs/open.c| 24 ++--
 include/linux/fcntl.h|  2 +-
 include/uapi/asm-generic/fcntl.h |  4 
 5 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 083185174c6d..6c85c4d0c006 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -1031,7 +1031,7 @@ static int __init fcntl_init(void)
 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
 * is defined as O_NONBLOCK on some platforms and not on others.
 */
-   BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
+   BUILD_BUG_ON(22 - 1 /* for O_RDONLY being 0 */ !=
HWEIGHT32(
(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
__FMODE_EXEC | __FMODE_NONOTIFY));
diff --git a/fs/namei.c b/fs/namei.c
index 54fbd2c7ba82..4c90f265c103 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3282,6 +3282,12 @@ static int do_last(struct nameidata *nd,
if (!(open_flag & O_CREAT)) {
if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+
+   if (open_flag & O_PATHSTATIC) {
+   nd->flags |= LOOKUP_NEVER_FOLLOW;
+   nd->flags &= ~LOOKUP_FOLLOW;
+   }
+
/* we _can_ be in RCU mode here */
error = lookup_fast(nd, , , );
if (likely(error > 0))
diff --git a/fs/open.c b/fs/open.c
index 0285ce7dbd51..717afa8179c0 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -940,6 +940,24 @@ static inline int build_open_flags(int flags, umode_t 
mode, struct open_flags *o
/* Must never be set by userspace */
flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
 
+   /*
+* If nonzero, setting O_PATHSTATIC but not O_NOFOLLOW fails with
+* -EINVAL.  Otherwise, setting O_PATHSTATIC automatically sets
+* O_NOFOLLOW.
+*/
+#define REQUIRE_NOFOLLOW_FOR_PATHSTATIC 1
+
+#if REQUIRE_NOFOLLOW_FOR_PATHSTATIC
+   /* O_PATHSTATIC doesn't make sense without O_NOFOLLOW */
+   if (unlikely((flags & O_PATHSTATIC) && !(flags & O_NOFOLLOW)))
+   return -EINVAL;
+#elif defined REQUIRE_NOFOLLOW_FOR_PATHSTATIC
+   if (flags & O_PATHSTATIC)
+   flags &= O_NOFOLLOW;
+#else
+#error REQUIRE_NOFOLLOW_FOR_PATHSTATIC must be defined
+#endif
+
/*
 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
 * check for O_DSYNC if the need any syncing at all we enforce it's
@@ -959,7 +977,7 @@ static inline int build_open_flags(int flags, umode_t mode, 
struct open_flags *o
 * If we have O_PATH in the open flag. Then we
 * cannot have anything other than the below set of flags
 */
-   flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
+   flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH | O_PATHSTATIC;
acc_mode = 0;
}
 
@@ -986,7 +1004,9 @@ static inline int build_open_flags(int flags, umode_t 
mode, struct open_flags *o
 
if (flags & O_DIRECTORY)
lookup_flags |= LOOKUP_DIRECTORY;
-   if (!(flags & O_NOFOLLOW))
+   if (flags & O_PATHSTATIC)
+   lookup_flags |= LOOKUP_NEVER_FOLLOW;
+   else if (!(flags & O_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
op->lookup_flags = lookup_flags;
return 0;
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 27dc7a60693e..6f91e1490592 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -9,7 +9,7 @@
(O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | 
\
 O_APPEND | O_NDELAY | O_NONBLOCK | O_NDELAY | __O_SYNC | O_DSYNC | \
 FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \
-O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE)
+O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE | O_PATHSTATIC)
 
 #ifndef force_o_largefile
 #define force_o_largefile() (BITS_PER_LONG != 32)
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 9dc0bf0c5a6e..314ea1cecf44 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -89,6 +89,10 @@
 #define __O_TMPFILE02000
 #endif
 
+#ifndef O_PATHSTATIC
+#define

[PATCH 3/4] Add AT_PATHSTATIC to linkat()

2019-02-12 Thread demiobenour

From: "Demi M. Obenour" 

This has the same meaning as O_PATHSTATIC does in openat(), and has the
same uses.
---
 fs/namei.c | 8 +++-
 include/uapi/linux/fcntl.h | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/namei.c b/fs/namei.c
index 4c90f265c103..b47f89af00f2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4291,8 +4291,14 @@ int do_linkat(int olddfd, const char __user *oldname, 
int newdfd,
how = LOOKUP_EMPTY;
}
 
-   if (flags & AT_SYMLINK_FOLLOW)
+   if (flags & AT_SYMLINK_FOLLOW) {
+   if (flags & AT_PATHSTATIC)
+   return -EINVAL;
how |= LOOKUP_FOLLOW;
+   }
+
+   if (flags & AT_PATHSTATIC)
+   how |= LOOKUP_NEVER_FOLLOW;
 retry:
error = user_path_at(olddfd, oldname, how, _path);
if (error)
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 6448cdd9a350..a2f65635c8fc 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -89,6 +89,7 @@
 #define AT_STATX_SYNC_AS_STAT  0x  /* - Do whatever stat() does */
 #define AT_STATX_FORCE_SYNC0x2000  /* - Force the attributes to be sync'd 
with the server */
 #define AT_STATX_DONT_SYNC 0x4000  /* - Don't sync attributes with the 
server */
+#define AT_PATHSTATIC  0x8000  /* Do not follow symbolic links 
anywhere. */
 
 
 #endif /* _UAPI_LINUX_FCNTL_H */
-- 
2.20.1

[PATCH 4/4] Return -EINVAL if userspace passes bogus flags to open()

2019-02-12 Thread demiobenour

From: "Demi M. Obenour" 

While testing the O_PATHSTATIC patch, I discovered that Linux does not
return any error if an invalid flag is passed to open(2).  This prevents
adding new flags without a (minor) risk of breaking userspace.
Therefore, add a check for invalid flags, and return -EINVAL if any are
found.
---
 fs/open.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/fs/open.c b/fs/open.c
index 717afa8179c0..eeaa2eeb342a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1074,6 +1074,13 @@ long do_sys_open(int dfd, const char __user *filename, 
int flags, umode_t mode)
if (fd)
return fd;
 
+   /*
+* Enforce that open flags are valid, to ensure that new flags can be
+* added later.
+*/
+   if (unlikely(flags & ~VALID_OPEN_FLAGS))
+   return -EINVAL;
+
tmp = getname(filename);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
-- 
2.20.1

Re: [PATCH] mei: expand minor range when registering chrdev region

2019-02-12 Thread cgxu519


On 2/12/19 5:29 PM, Greg KH wrote:

On Tue, Feb 12, 2019 at 02:02:52PM +0800, Chengguang Xu wrote:

Actually, total amount of available minor number
for a single major is MINORMARK + 1. So expand
minor range when registering chrdev region.

Signed-off-by: Chengguang Xu 
---
  drivers/misc/mei/main.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 87281b3695e6..3df54f1e1a8b 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -869,7 +869,7 @@ static const struct file_operations mei_fops = {
  
  static struct class *mei_class;

  static dev_t mei_devt;
-#define MEI_MAX_DEVS  MINORMASK
+#define MEI_MAX_DEVS  (MINORMASK + 1)

Why is this needed?  Have you really run out of that many minor nodes
for this driver?


Not really, practically maybe we cannot reach to the limit.
I was just curious why only one minor number left there and assumed
that was from a mistake(since I've seen similar mistake in other driver).
However, if it explicitly sets to MINORMASK for some reasons, then it's
better to keep as is.

Thanks

[PATCH 1/4] Add path resolution flag LOOKUP_NEVER_FOLLOW

2019-02-12 Thread demiobenour

From: "Demi M. Obenour" 

This adds the flag LOOKUP_NEVER_FOLLOW to path resolution, which tells
the code in fs/namei.c to never follow symlinks.  This flag overrides
LOOKUP_FOLLOW, since this makes internal APIs simpler: code can set the
flag without needing to also clear LOOKUP_FOLLOW, which is often set by
default.

This is a prerequisite to adding O_PATHSTATIC, but is also useful for
kernel-internal use.
---
 fs/namei.c| 4 +++-
 include/linux/namei.h | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/namei.c b/fs/namei.c
index 914178cdbe94..54fbd2c7ba82 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1720,7 +1720,9 @@ static int pick_link(struct nameidata *nd, struct path 
*link,
 {
int error;
struct saved *last;
-   if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) {
+   const int max_symlinks = (nd->flags & LOOKUP_NEVER_FOLLOW) ?
+   0 : MAXSYMLINKS;
+   if (unlikely(nd->total_link_count++ >= max_symlinks)) {
path_to_nameidata(link, nd);
return -ELOOP;
}
diff --git a/include/linux/namei.h b/include/linux/namei.h
index a78606e8e3df..f065502a653d 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -24,10 +24,12 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, 
LAST_BIND};
  *  - internal "there are more path components" flag
  *  - dentry cache is untrusted; force a real lookup
  *  - suppress terminal automount
+ *  - never follow symbolic links, even internally
  */
 #define LOOKUP_FOLLOW  0x0001
 #define LOOKUP_DIRECTORY   0x0002
 #define LOOKUP_AUTOMOUNT   0x0004
+#define LOOKUP_NEVER_FOLLOW0x0008
 
 #define LOOKUP_PARENT  0x0010
 #define LOOKUP_REVAL   0x0020
-- 
2.20.1

Re: Oops in rpc_clnt_debugfs_register() from debugfs change

2019-02-12 Thread David Howells

Greg Kroah-Hartman  wrote:

> - if (!xprt->debugfs) {
> + if (IS_ERR_OR_NULL(xprt->debugfs)) {

That works, though I don't much like the idea of there being an error there.

Looking in rpc_xprt_debugfs_register() there are two now-dodgy looking checks
on the result of debugfs calls.

David

Re: [PATCH] ser_gigaset: mark expected switch fall-through

2019-02-12 Thread Gustavo A. R. Silva




On 2/12/19 2:45 AM, Paul Bolle wrote:
> Gustavo A. R. Silva schreef op ma 11-02-2019 om 16:34 [-0600]:
>> In preparation to enabling -Wimplicit-fallthrough, mark switch
>> cases where we are expecting to fall through.
>>
>> This patch fixes the following warning:
>>
>> drivers/isdn/gigaset/ser-gigaset.c: In function ‘gigaset_tty_ioctl’:
>> drivers/isdn/gigaset/ser-gigaset.c:627:3: warning: this statement may fall 
>> through [-Wimplicit-fallthrough=]
>>switch (arg) {
>>^~
>> drivers/isdn/gigaset/ser-gigaset.c:638:2: note: here
>>   default:
>>   ^~~
>>
>> Warning level 3 was used: -Wimplicit-fallthrough=3
>>
>> Notice that, in this particular case, the code comment is modified
>> in accordance with what GCC is expecting to find.
>>
>> This patch is part of the ongoing efforts to enable
>> -Wimplicit-fallthrough.
>>
>> Signed-off-by: Gustavo A. R. Silva 
> 
> Acked-by: Paul Bolle 
> 

Thanks, Paul.

--
Gustavo

Re: Oops in rpc_clnt_debugfs_register() from debugfs change

2019-02-12 Thread Greg Kroah-Hartman

On Tue, Feb 12, 2019 at 03:42:14PM +0100, Greg Kroah-Hartman wrote:
> On Tue, Feb 12, 2019 at 03:37:20PM +0100, Greg Kroah-Hartman wrote:
> > On Tue, Feb 12, 2019 at 02:31:14PM +, David Howells wrote:
> > > I've bisected an oops that occurs in rpc_clnt_debugfs_register() trying to
> > > dereference a pointer with -EACCES in it.  This is the causing commit, 
> > > though
> > > I suspect the bug is in sunrpc expecting to see NULL rather than an error.
> > > 
> > > ff9fb72bc07705c00795ca48631f7fffe24d2c6b is the first bad commit
> > > commit ff9fb72bc07705c00795ca48631f7fffe24d2c6b
> > > Author: Greg Kroah-Hartman 
> > > Date:   Wed Jan 23 11:28:14 2019 +0100
> > > 
> > > debugfs: return error values, not NULL
> > > 
> > > When an error happens, debugfs should return an error pointer value, 
> > > not
> > > NULL.  This will prevent the totally theoretical error where a debugfs
> > > call fails due to lack of memory, returning NULL, and that dentry 
> > > value
> > > is then passed to another debugfs call, which would end up succeeding,
> > > creating a file at the root of the debugfs tree, but would then be
> > > impossible to remove (because you can not remove the directory NULL).
> > > 
> > > So, to make everyone happy, always return errors, this makes the users
> > > of debugfs much simpler (they do not have to ever check the return
> > > value), and everyone can rest easy.
> > > ...
> > > 
> > > The attached oops occurs during boot from the gssproxy process in
> > > rpc_clnt_debugfs_register().  The code at this point is:
> > > 
> > >0x8195cbdd <+450>:   mov0x50(%rax),%rcx   <--- oopsing
> > >0x8195cbe1 <+454>:   mov$0x821cc8ba,%rdx
> > >0x8195cbe8 <+461>:   mov$0x18,%esi
> > >0x8195cbed <+466>:   lea-0x30(%rbp),%rdi
> > >0x8195cbf1 <+470>:   callq  0x819db773 
> > > 
> > > RAX is -EACCES.
> > > 
> > > Looking in the source:
> > > 
> > >   len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
> > >   xprt->debugfs->d_name.name);
> > > 
> > > I think xprt->debugfs is the value in RAX.
> > > 
> > >   (gdb) p &((struct dentry *)0)->d_name.name
> > >   $5 = (const unsigned char **) 0x50 
> > > 
> > > which matches the offset on the oopsing MOV instruction.
> > > 
> > > This is with linus/master (aa0c38cf39de73bf7360a3da8f1707601261e518).
> > 
> > Ugh, yeah, I see the problem, sorry about that.
> > 
> > I wonder why the debugfs call is always failing, that's not good...
> > 
> > let me dig and see if I already have a patch for this...
> 
> I have a much larger cleanup patch for this code, but this single line
> change should solve the issue for now.  Can you test it to verify?
> 
> thanks,
> 
> greg k-h
> 
> --
> 
> diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
> index 45a033329cd4..19bb356230ed 100644
> --- a/net/sunrpc/debugfs.c
> +++ b/net/sunrpc/debugfs.c
> @@ -146,7 +146,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
>   rcu_read_lock();
>   xprt = rcu_dereference(clnt->cl_xprt);
>   /* no "debugfs" dentry? Don't bother with the symlink. */
> - if (!xprt->debugfs) {
> + if (IS_ERR_OR_NULL(xprt->debugfs)) {
>   rcu_read_unlock();
>   return;
>   }


And, if you want my larger fix that I will be sending to netdev one of
these days, here's that one.  It includes the above patch as part of it.

thanks,

greg k-h

---

commit 8d885c486153d1731c14a6a435774a4e9ccd1ebc
Author: Greg Kroah-Hartman 
Date:   Fri Jan 4 13:40:56 2019 +0100

sunrpc: fix changelog

diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 45a033329cd4..ca63f6ed873f 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -135,18 +135,15 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 
/* make the per-client dir */
clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir);
-   if (!clnt->cl_debugfs)
-   return;
 
/* make tasks file */
-   if (!debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs,
-clnt, _fops))
-   goto out_err;
+   debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs, clnt,
+   _fops);
 
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
/* no "debugfs" dentry? Don't bother with the symlink. */
-   if (!xprt->debugfs) {
+   if (IS_ERR_OR_NULL(xprt->debugfs)) {
rcu_read_unlock();
return;
}
@@ -157,8 +154,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
if (len >= sizeof(name))
goto out_err;
 
-   if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name))
-   goto out_err;
+   debugfs_create_symlink("xprt", clnt->cl_debugfs, name);
 
return;
 out_err:
@@ -237,15 +233,10 @@

Re: [PATCH 0/2] driver core: Fixes related to device links

2019-02-12 Thread Rafael J. Wysocki

On Tue, Feb 12, 2019 at 3:53 PM Ulf Hansson  wrote:
>
> On Tue, 12 Feb 2019 at 15:09, Greg Kroah-Hartman
>  wrote:
> >
> > On Tue, Feb 12, 2019 at 01:01:13PM +0100, Rafael J. Wysocki wrote:
> > > Hi Greg at al,
> > >
> > > These fix two issues on top of the recent device links material in
> > > driver-core/driver-core-next.
> > >
> > > The first one fixes a race condition that may trigger when
> > > __pm_runtime_set_status() is used incorrectly (that is, when it is
> > > called with PM-runtime enabled for the target device and working).
> > >
> > > The second one fixes a supplier PM-runtime usage counter imbalance
> > > resulting from adding and removing (e.g. in the error code path) a
> > > stateless device link to it from within the consumer driver's probe
> > > callback.
> > >
> > > Please refer to the patch changelogs for details.
> >
> > Looks good, all now queued up, thanks.
>
> Greg, please don't get me wrong, but ~1.5 hours isn't sufficient for
> me to review/test submitted patches.
>
> I have been trying to collaborate (review/test) device links related
> code with Rafael, but what's the point if you queue up the patches,
> before I even got the change to look at them. Shall I interpret it as
> you don't care about me reviewing this, then just tell me so I don't
> have to waste my time.

I certainly do care about that.

Moreover, if you find any issues in the patches, they still can be
dropped or incremental fixes on top of them can be done.

Your work and feedback here is much appreciated, please don't drop the ball. :-)

Re: Oops in rpc_clnt_debugfs_register() from debugfs change

2019-02-12 Thread Greg Kroah-Hartman

On Tue, Feb 12, 2019 at 02:57:34PM +, David Howells wrote:
> Greg Kroah-Hartman  wrote:
> 
> > -   if (!xprt->debugfs) {
> > +   if (IS_ERR_OR_NULL(xprt->debugfs)) {
> 
> That works, though I don't much like the idea of there being an error there.
> 
> Looking in rpc_xprt_debugfs_register() there are two now-dodgy looking checks
> on the result of debugfs calls.

now-dodgy checks are fine.  Well, they shouldn't matter, I've sent a
patch that just gets rid of those checks.

Ideally no one should need to check of debugfs is ok or not, the fact
that these functions keep getting called is a bit odd, I can look into
that some more, it shouldn't be needed...

thanks,

greg k-h

Re: [PATCH 0/2] driver core: Fixes related to device links

2019-02-12 Thread Greg Kroah-Hartman

On Tue, Feb 12, 2019 at 03:52:53PM +0100, Ulf Hansson wrote:
> On Tue, 12 Feb 2019 at 15:09, Greg Kroah-Hartman
>  wrote:
> >
> > On Tue, Feb 12, 2019 at 01:01:13PM +0100, Rafael J. Wysocki wrote:
> > > Hi Greg at al,
> > >
> > > These fix two issues on top of the recent device links material in
> > > driver-core/driver-core-next.
> > >
> > > The first one fixes a race condition that may trigger when
> > > __pm_runtime_set_status() is used incorrectly (that is, when it is
> > > called with PM-runtime enabled for the target device and working).
> > >
> > > The second one fixes a supplier PM-runtime usage counter imbalance
> > > resulting from adding and removing (e.g. in the error code path) a
> > > stateless device link to it from within the consumer driver's probe
> > > callback.
> > >
> > > Please refer to the patch changelogs for details.
> >
> > Looks good, all now queued up, thanks.
> 
> Greg, please don't get me wrong, but ~1.5 hours isn't sufficient for
> me to review/test submitted patches.
> 
> I have been trying to collaborate (review/test) device links related
> code with Rafael, but what's the point if you queue up the patches,
> before I even got the change to look at them. Shall I interpret it as
> you don't care about me reviewing this, then just tell me so I don't
> have to waste my time.

As they are just in my -testing branch, I can easily drop them now if
you find problems.  I didn't realize that Rafael was wanting you to
review this as they were marked as "fixes:" for previous patches.

thanks,

greg k-h

Re: [bpf-next 1/2] tcp: replace SOCK_DEBUG() with tcp_stats()

2019-02-12 Thread Eric Dumazet




On 02/12/2019 03:31 AM, Yafang Shao wrote:
> SOCK_DEBUG is a very ancient debugging interface, and it's not very useful
> for debugging.
> So this patch removes the SOCK_DEBUG() and introduce a new function
> tcp_stats() to trace this kind of events.
> Some MIBs are added for these events.
> 
> Regarding the SO_DEBUG in sock_{s,g}etsockopt, I think it is better to
> keep as-is, because if we return an errno to tell the application that
> this optname isn't supported for TCP, it may break the application.
> The application still can use this option but don't take any effect for
> TCP.
> 
> Signed-off-by: Yafang Shao 
> ---
>  include/uapi/linux/snmp.h |  3 +++
>  net/ipv4/proc.c   |  3 +++
>  net/ipv4/tcp_input.c  | 26 +++---
>  net/ipv6/tcp_ipv6.c   |  2 --
>  4 files changed, 17 insertions(+), 17 deletions(-)
> 
> diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
> index 86dc24a..fd5c09c 100644
> --- a/include/uapi/linux/snmp.h
> +++ b/include/uapi/linux/snmp.h
> @@ -283,6 +283,9 @@ enum
>   LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */
>   LINUX_MIB_TCPZEROWINDOWDROP,/* TCPZeroWindowDrop */
>   LINUX_MIB_TCPRCVQDROP,  /* TCPRcvQDrop */
> + LINUX_MIB_TCPINVALIDACK,/* TCPInvalidAck */
> + LINUX_MIB_TCPOLDACK,/* TCPOldAck */
> + LINUX_MIB_TCPPARTIALPACKET, /* TCPPartialPacket */
>   __LINUX_MIB_MAX
>  };
>  
> diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> index c3610b3..1b0320a 100644
> --- a/net/ipv4/proc.c
> +++ b/net/ipv4/proc.c
> @@ -291,6 +291,9 @@ static int sockstat_seq_show(struct seq_file *seq, void 
> *v)
>   SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
>   SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
>   SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
> + SNMP_MIB_ITEM("TCPInvalidAck", LINUX_MIB_TCPINVALIDACK),
> + SNMP_MIB_ITEM("TCPOldAck", LINUX_MIB_TCPOLDACK),
> + SNMP_MIB_ITEM("TCPPartialPacket", LINUX_MIB_TCPPARTIALPACKET),
>   SNMP_MIB_SENTINEL
>  };
>  
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 7a027dec..88deb1f 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3554,6 +3554,11 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 
> prior_delivered, int flag)
>   return delivered;
>  }
>  
> +static void tcp_stats(struct sock *sk, int mib_idx)
> +{
> + NET_INC_STATS(sock_net(sk), mib_idx);
> +}

This is not a very descriptive name.

Why is it static, and in net/ipv4/tcp_input.c ???

> +
>  /* This routine deals with incoming acks, but not outgoing ones. */
>  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
>  {
> @@ -3715,7 +3720,7 @@ static int tcp_ack(struct sock *sk, const struct 
> sk_buff *skb, int flag)
>   return 1;
>  
>  invalid_ack:
> - SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
> + tcp_stats(sk, LINUX_MIB_TCPINVALIDACK);
>   return -1;
>  
>  old_ack:
> @@ -3731,7 +3736,7 @@ static int tcp_ack(struct sock *sk, const struct 
> sk_buff *skb, int flag)
>   tcp_xmit_recovery(sk, rexmit);
>   }
>  
> - SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
> + tcp_stats(sk, LINUX_MIB_TCPOLDACK);
>   return 0;
>  }
>


These counters will add noise to an already crowded MIB space.

What bug do you expect to track and fix with these ?

I see many TCP patches coming adding icache pressure, enabling companies to 
build their own modified
TCP stack, but no real meat.

Re: [PATCH 11/15] PCI: pci-epf-test: Use pci_epc_get_features to get EPC features

2019-02-12 Thread Lorenzo Pieralisi

On Mon, Jan 07, 2019 at 12:11:44PM +0530, Kishon Vijay Abraham I wrote:

[...]

>  static int pci_epf_test_bind(struct pci_epf *epf)
>  {
>   int ret;
>   struct pci_epf_test *epf_test = epf_get_drvdata(epf);
>   struct pci_epf_header *header = epf->header;
> + const struct pci_epc_features *epc_features;
> + enum pci_barno test_reg_bar = BAR_0;
>   struct pci_epc *epc = epf->epc;
>   struct device *dev = >dev;
> + bool linkup_notifier = false;
> + bool msix_capable = false;
> + bool msi_capable = true;
>  
>   if (WARN_ON_ONCE(!epc))
>   return -EINVAL;
>  
> - if (epc->features & EPC_FEATURE_NO_LINKUP_NOTIFIER)
> - epf_test->linkup_notifier = false;
> - else
> - epf_test->linkup_notifier = true;
> -
> - epf_test->msix_available = epc->features & EPC_FEATURE_MSIX_AVAILABLE;
> + epc_features = pci_epc_get_features(epc, epf->func_no);

I think it would work out better if struct pci_epc_features was
allocated in the caller (stack) and pci_epc_get_features() take a
pointer parameter to it rather than the callee and the callee would just
have to fill it out, this also removes data in the driver that is not
really useful.

Is there any other reason behind the current design choice ?

Thanks,
Lorenzo

> + if (!epc_features) {
> + linkup_notifier = epc_features->linkup_notifier;
> + msix_capable = epc_features->msix_capable;
> + msi_capable = epc_features->msi_capable;
> + test_reg_bar = pci_epc_get_first_free_bar(epc_features);
> + pci_epf_configure_bar(epf, epc_features);
> + }
>  
> - epf_test->test_reg_bar = EPC_FEATURE_GET_BAR(epc->features);
> + epf_test->test_reg_bar = test_reg_bar;
>  
>   ret = pci_epc_write_header(epc, epf->func_no, header);
>   if (ret) {
> @@ -492,13 +509,15 @@ static int pci_epf_test_bind(struct pci_epf *epf)
>   if (ret)
>   return ret;
>  
> - ret = pci_epc_set_msi(epc, epf->func_no, epf->msi_interrupts);
> - if (ret) {
> - dev_err(dev, "MSI configuration failed\n");
> - return ret;
> + if (msi_capable) {
> + ret = pci_epc_set_msi(epc, epf->func_no, epf->msi_interrupts);
> + if (ret) {
> + dev_err(dev, "MSI configuration failed\n");
> + return ret;
> + }
>   }
>  
> - if (epf_test->msix_available) {
> + if (msix_capable) {
>   ret = pci_epc_set_msix(epc, epf->func_no, epf->msix_interrupts);
>   if (ret) {
>   dev_err(dev, "MSI-X configuration failed\n");
> @@ -506,7 +525,7 @@ static int pci_epf_test_bind(struct pci_epf *epf)
>   }
>   }
>  
> - if (!epf_test->linkup_notifier)
> + if (!linkup_notifier)
>   queue_work(kpcitest_workqueue, _test->cmd_handler.work);
>  
>   return 0;
> @@ -523,17 +542,6 @@ static int pci_epf_test_probe(struct pci_epf *epf)
>  {
>   struct pci_epf_test *epf_test;
>   struct device *dev = >dev;
> - const struct pci_epf_device_id *match;
> - struct pci_epf_test_data *data;
> - enum pci_barno test_reg_bar = BAR_0;
> - bool linkup_notifier = true;
> -
> - match = pci_epf_match_device(pci_epf_test_ids, epf);
> - data = (struct pci_epf_test_data *)match->driver_data;
> - if (data) {
> - test_reg_bar = data->test_reg_bar;
> - linkup_notifier = data->linkup_notifier;
> - }
>  
>   epf_test = devm_kzalloc(dev, sizeof(*epf_test), GFP_KERNEL);
>   if (!epf_test)
> @@ -541,8 +549,6 @@ static int pci_epf_test_probe(struct pci_epf *epf)
>  
>   epf->header = _header;
>   epf_test->epf = epf;
> - epf_test->test_reg_bar = test_reg_bar;
> - epf_test->linkup_notifier = linkup_notifier;
>  
>   INIT_DELAYED_WORK(_test->cmd_handler, pci_epf_test_cmd_handler);
>  
> -- 
> 2.17.1
>

Re: [PATCH] staging: vt6656: Use the correct style for SPDX license Identifier

2019-02-12 Thread Nishad Kamdar

On Tue, Feb 05, 2019 at 07:44:31PM +0100, Greg Kroah-Hartman wrote:
> On Tue, Feb 05, 2019 at 08:36:24PM +0530, Nishad Kamdar wrote:
> > This patch corrects the style for SPDX license Identifier in mac.h
> > by using "/* */" in place of "//" as per Linux kernel licensing rules.
> > Issue found by checkpatch.
> > 
> > Signed-off-by: Nishad Kamdar 
> > ---
> >  drivers/staging/vt6656/mac.h | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/staging/vt6656/mac.h b/drivers/staging/vt6656/mac.h
> > index 94e700fcd0b6..75166020f7c6 100644
> > --- a/drivers/staging/vt6656/mac.h
> > +++ b/drivers/staging/vt6656/mac.h
> > @@ -1,5 +1,5 @@
> > -// SPDX-License-Identifier: GPL-2.0+
> > -/*
> > +/* SPDX-License-Identifier: GPL-2.0+
> 
> Should really be:
> 
> /* SPDX-License-Identifier: GPL-2.0+ */
> 
> thanks,
> 
> greg k-h

Ok, i'll modify it.

Thanks for the review.

Regards,
Nishad

Re: [bpf-next 2/2] bpf: add BPF_SOCK_OPS_STATS_CB for tcp_stats()

2019-02-12 Thread Eric Dumazet




On 02/12/2019 03:31 AM, Yafang Shao wrote:
> Introuce this new op BPF_SOCK_OPS_STATS_CB for tcp_stats() such that it
> can be traced via BPF on a per socket basis.
> There's one argument in BPF_SOCK_OPS_STATS_CB, which is Linux MIB index
> LINUX_MIB_* to indicate the TCP event.
> All these Linux MIBs are defined in include/uapi/linux/snmp.h.
> 
> Signed-off-by: Yafang Shao 
> ---
>  include/uapi/linux/bpf.h | 5 +
>  net/ipv4/tcp_input.c | 1 +
>  2 files changed, 6 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 1777fa0..0314ddd 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2894,6 +2894,11 @@ enum {
>   BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
>* socket transition to LISTEN state.
>*/
> + BPF_SOCK_OPS_STATS_CB,  /*
> +  * Called on tcp_stats().
> +  * Arg1: Linux MIB index
> +  *   LINUX_MIB_*
> +  */
>  };
>  
>  /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 88deb1f..4acf458 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3557,6 +3557,7 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 
> prior_delivered, int flag)
>  static void tcp_stats(struct sock *sk, int mib_idx)
>  {
>   NET_INC_STATS(sock_net(sk), mib_idx);
> + tcp_call_bpf(sk, BPF_SOCK_OPS_STATS_CB, 1, _idx);
>  }
>  
>  /* This routine deals with incoming acks, but not outgoing ones. */
> 

If the plan is to add to all NET_INC_STATS() calls in TCP an additional 
tcp_call_bpf()
I will say no.

So far, tcp_call_bpf() has not been used in fast path.

Re: [PATCH v2] mm/memory-hotplug: Add sysfs hot-remove trigger

2019-02-12 Thread Michal Hocko

On Tue 12-02-19 14:54:36, Robin Murphy wrote:
> On 12/02/2019 08:33, Michal Hocko wrote:
> > On Mon 11-02-19 17:50:46, Robin Murphy wrote:
> > > ARCH_MEMORY_PROBE is a useful thing for testing and debugging hotplug,
> > > but being able to exercise the (arguably trickier) hot-remove path would
> > > be even more useful. Extend the feature to allow removal of offline
> > > sections to be triggered manually to aid development.
> > > 
> > > Since process dictates the new sysfs entry be documented, let's also
> > > document the existing probe entry to match - better 13-and-a-half years
> > > late than never, as they say...
> > 
> > The probe sysfs is quite dubious already TBH. Apart from testing, is
> > anybody using it for something real? Do we need to keep an API for
> > something testing only? Why isn't a customer testing module enough for
> > such a purpose?
> 
> From the arm64 angle, beyond "conventional" servers where we can hopefully
> assume ACPI, I can imagine there being embedded/HPC setups (not all as
> esoteric as that distributed-memory dRedBox thing), as well as virtual
> machines, that are DT-based with minimal runtime firmware. I'm none too keen
> on the idea either, but if such systems want to support physical hotplug
> then driving it from userspace might be the only reasonable approach. I'm
> just loath to actually document it as anything other than a developer
> feature so as not to give the impression that I consider it anything other
> than a last resort for production use.

This doesn't sound convicing to add an user API.

> I do note that my x86 distro kernel
> has ARCH_MEMORY_PROBE enabled despite it being "for testing".

Yeah, there have been mistakes done in the API land & hotplug in the
past.

> > In other words, why do we have to add an API that has to be maintained
> > for ever for a testing only purpose?
> 
> There's already half the API being maintained, though, so adding the
> corresponding other half alongside it doesn't seem like that great an
> overhead, regardless of how it ends up getting used.

As already said above. The hotplug user API is not something to follow
for the future development. So no, we are half broken let's continue is
not a reasonable argument.

> Ultimately, though,
> it's a patch I wrote because I needed it, and if everyone else is adamant
> that it's not useful enough then fair enough - it's at least in the list
> archives now so I can sleep happy that I've done my "contributing back" bit
> as best I could :)

I am not saing this is not useful. It is. But I do not think we want to
make it an official api without a strong usecase. And then we should
think twice to make the api both useable and reasonable. A kernel module
for playing sounds like more than sufficient.
-- 
Michal Hocko
SUSE Labs

Re: Oops in rpc_clnt_debugfs_register() from debugfs change

2019-02-12 Thread Greg Kroah-Hartman

On Tue, Feb 12, 2019 at 04:04:59PM +0100, Greg Kroah-Hartman wrote:
> On Tue, Feb 12, 2019 at 02:57:34PM +, David Howells wrote:
> > Greg Kroah-Hartman  wrote:
> > 
> > > - if (!xprt->debugfs) {
> > > + if (IS_ERR_OR_NULL(xprt->debugfs)) {
> > 
> > That works, though I don't much like the idea of there being an error there.
> > 
> > Looking in rpc_xprt_debugfs_register() there are two now-dodgy looking 
> > checks
> > on the result of debugfs calls.
> 
> now-dodgy checks are fine.  Well, they shouldn't matter, I've sent a
> patch that just gets rid of those checks.
> 
> Ideally no one should need to check of debugfs is ok or not, the fact
> that these functions keep getting called is a bit odd, I can look into
> that some more, it shouldn't be needed...

And here's a "final" version of this, that removes all of the "dodgy"
checks, with the exception of the "is this actually a dentry" check that
my first patch had, which is still required.

Overall it makes the code smaller and simpler, but for 5.0-final, I
think my original patch should be all that is needed.

thanks,

greg k-h

-

>From e6114e66bb7921b1e83e5ca0083893afa7816b45 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman 
Date: Fri, 4 Jan 2019 13:40:56 +0100
Subject: [PATCH] sunrpc: no need to check return value of debugfs_create 
functions

When calling debugfs functions, there is no need to ever check the
return value.  The function can work or not, but the code logic should
never do something different based on this.

Signed-off-by: Greg Kroah-Hartman 

---
 net/sunrpc/debugfs.c | 68 
 1 file changed, 12 insertions(+), 56 deletions(-)

diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 45a033329cd4..c7ad5772f5d9 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -11,7 +11,6 @@
 #include "netns.h"
 
 static struct dentry *topdir;
-static struct dentry *rpc_fault_dir;
 static struct dentry *rpc_clnt_dir;
 static struct dentry *rpc_xprt_dir;
 
@@ -125,28 +124,21 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
struct rpc_xprt *xprt;
 
-   /* Already registered? */
-   if (clnt->cl_debugfs || !rpc_clnt_dir)
-   return;
-
len = snprintf(name, sizeof(name), "%x", clnt->cl_clid);
if (len >= sizeof(name))
return;
 
/* make the per-client dir */
clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir);
-   if (!clnt->cl_debugfs)
-   return;
 
/* make tasks file */
-   if (!debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs,
-clnt, _fops))
-   goto out_err;
+   debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs, clnt,
+   _fops);
 
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
/* no "debugfs" dentry? Don't bother with the symlink. */
-   if (!xprt->debugfs) {
+   if (IS_ERR_OR_NULL(xprt->debugfs)) {
rcu_read_unlock();
return;
}
@@ -157,8 +149,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
if (len >= sizeof(name))
goto out_err;
 
-   if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name))
-   goto out_err;
+   debugfs_create_symlink("xprt", clnt->cl_debugfs, name);
 
return;
 out_err:
@@ -226,9 +217,6 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
static atomic_t cur_id;
charname[9]; /* 8 hex digits + NULL term */
 
-   if (!rpc_xprt_dir)
-   return;
-
id = (unsigned int)atomic_inc_return(_id);
 
len = snprintf(name, sizeof(name), "%x", id);
@@ -237,15 +225,10 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
 
/* make the per-client dir */
xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir);
-   if (!xprt->debugfs)
-   return;
 
/* make tasks file */
-   if (!debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs,
-xprt, _info_fops)) {
-   debugfs_remove_recursive(xprt->debugfs);
-   xprt->debugfs = NULL;
-   }
+   debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs, xprt,
+   _info_fops);
 
atomic_set(>inject_disconnect, rpc_inject_disconnect);
 }
@@ -308,28 +291,11 @@ static const struct file_operations fault_disconnect_fops 
= {
.release= fault_release,
 };
 
-static struct dentry *
-inject_fault_dir(struct dentry *topdir)
-{
-   struct dentry *faultdir;
-
-   faultdir = debugfs_create_dir("inject_fault", topdir);
-   if (!faultdir)
-   return NULL;
-
-   if (!debugfs_create_file("disconnect", S_IFREG | 0400, faultdir,
-NULL, _disconnect_fops))
-

[PATCH 2/4] mm: Move nr_deactivate accounting to shrink_active_list()

2019-02-12 Thread Kirill Tkhai

We know, which LRU is not active.

Signed-off-by: Kirill Tkhai 
---
 mm/vmscan.c |   10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 84542004a277..8d7d55e71511 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2040,12 +2040,6 @@ static unsigned move_active_pages_to_lru(struct lruvec 
*lruvec,
}
}
 
-   if (!is_active_lru(lru)) {
-   __count_vm_events(PGDEACTIVATE, nr_moved);
-   count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE,
-  nr_moved);
-   }
-
return nr_moved;
 }
 
@@ -2137,6 +2131,10 @@ static void shrink_active_list(unsigned long nr_to_scan,
 
nr_activate = move_active_pages_to_lru(lruvec, _active, _hold, lru);
nr_deactivate = move_active_pages_to_lru(lruvec, _inactive, _hold, 
lru - LRU_ACTIVE);
+
+   __count_vm_events(PGDEACTIVATE, nr_deactivate);
+   __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate);
+
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(>lru_lock);

[PATCH 0/8] arm64: meson: Add support for USB on Amlogic G12A

2019-02-12 Thread Neil Armstrong

This patchset adds support for USB on Amlogic G12A SoCs.

This patchset is composed with :
- bindings of the PHYs
- bindings of the USB Control Glue
- PHY Drivers
- USB Control Glue driver

Device Tree nodes will be added in a separate patchset.

The Amlogic G12A USB Complex is composed of :
- 2 USB Controllers :
 * DWC3 for USB2 and USB3 Host functionality
 * DWC2 for USB2 Peripheral functionality
- 2 USB2 OTG PHYs, only a single one will be routed to either DWC2 to DWC3
- 1 USB3 PHY shared with PCIE funcionnality
- A Glue to control PHY routing, setup and OTG detection

The Glue configures the UTMI 8bit interfaces for the USB2 PHYs, including
routing of the OTG PHY between the DWC3 and DWC2 controllers, and
setups the on-chip OTG mode selection for this PHY.

The PHYs are children of the Glue node since the Glue controls the interface
with the PHY, not the DWC3 controller.

The PHY interconnect is handled into ports subnodes, which eases describing
which PHY is enabled (like the USB3 shared PHY) and futures layouts on
derivatives of the G12A Family.

This drivers supports the on-probe setup of the OTG mode, and manually
via a debugfs interface. The IRQ mode change detect is yet to be added
in a future patchset, mainly due to lack of hardware to validate on.

Neil Armstrong (8):
  dt-bindings: phy: Add Amlogic G12A USB2 PHY Bindings
  dt-bindings: phy: Add Amlogic G12A USB3+PCIE Combo PHY Bindings
  dt-bindings: usb: dwc2: Add Amlogic G12A DWC2 Compatible
  dt-bindings: usb: dwc3: Add Amlogic G12A DWC3 Glue Bindings
  phy: amlogic: add Amlogic G12A USB2 PHY Driver
  phy: amlogic: Add Amlogic G12A USB3 + PCIE Combo PHY Driver
  usb: dwc2: Add Amlogic G12A DWC2 Params
  usb: dwc3: Add Amlogic G12A DWC3 glue

 .../bindings/phy/meson-g12a-usb2-phy.txt  |  22 +
 .../bindings/phy/meson-g12a-usb3-pcie-phy.txt |  25 +
 .../devicetree/bindings/usb/amlogic,dwc3.txt  | 109 +++
 .../devicetree/bindings/usb/dwc2.txt  |   1 +
 drivers/phy/amlogic/Kconfig   |  24 +
 drivers/phy/amlogic/Makefile  |   2 +
 drivers/phy/amlogic/phy-meson-g12a-usb2.c | 191 +
 .../phy/amlogic/phy-meson-g12a-usb3-pcie.c| 414 +++
 drivers/usb/dwc2/params.c |  12 +
 drivers/usb/dwc3/Kconfig  |   9 +
 drivers/usb/dwc3/Makefile |   1 +
 drivers/usb/dwc3/dwc3-meson-g12a.c| 650 ++
 12 files changed, 1460 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/phy/meson-g12a-usb2-phy.txt
 create mode 100644 
Documentation/devicetree/bindings/phy/meson-g12a-usb3-pcie-phy.txt
 create mode 100644 drivers/phy/amlogic/phy-meson-g12a-usb2.c
 create mode 100644 drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
 create mode 100644 drivers/usb/dwc3/dwc3-meson-g12a.c

-- 
2.20.1

[PATCH 4/4] mm: Generalize putback scan functions

2019-02-12 Thread Kirill Tkhai

This combines two similar functions move_active_pages_to_lru()
and putback_inactive_pages() into single move_pages_to_lru().
This remove duplicate code and makes object file size smaller.

Before:
   textdata bss dec hex filename
  570824732 128   61942f1f6 mm/vmscan.o
After:
   textdata bss dec hex filename
  551124600 128   59840e9c0 mm/vmscan.o

Signed-off-by: Kirill Tkhai 
---
 mm/vmscan.c |  124 ---
 1 file changed, 41 insertions(+), 83 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 88fa71e4c28f..66e70cf1dd94 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1807,33 +1807,53 @@ static int too_many_isolated(struct pglist_data *pgdat, 
int file,
return isolated > inactive;
 }
 
-static noinline_for_stack void
-putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
+/*
+ * This moves pages from @list to corresponding LRU list.
+ *
+ * We move them the other way if the page is referenced by one or more
+ * processes, from rmap.
+ *
+ * If the pages are mostly unmapped, the processing is fast and it is
+ * appropriate to hold zone_lru_lock across the whole operation.  But if
+ * the pages are mapped, the processing is slow (page_referenced()) so we
+ * should drop zone_lru_lock around each page.  It's impossible to balance
+ * this, so instead we remove the pages from the LRU while processing them.
+ * It is safe to rely on PG_active against the non-LRU pages in here because
+ * nobody will play with that bit on a non-LRU page.
+ *
+ * The downside is that we have to touch page->_refcount against each page.
+ * But we had to alter page->flags anyway.
+ *
+ * Returns the number of pages moved to the given lruvec.
+ */
+
+static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
+struct list_head *list)
 {
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+   int nr_pages, nr_moved = 0;
LIST_HEAD(pages_to_free);
+   struct page *page;
+   enum lru_list lru;
 
-   /*
-* Put back any unfreeable pages.
-*/
-   while (!list_empty(page_list)) {
-   struct page *page = lru_to_page(page_list);
-   int lru;
-
-   VM_BUG_ON_PAGE(PageLRU(page), page);
-   list_del(>lru);
+   while (!list_empty(list)) {
+   page = lru_to_page(list);
if (unlikely(!page_evictable(page))) {
+   list_del_init(>lru);
spin_unlock_irq(>lru_lock);
putback_lru_page(page);
spin_lock_irq(>lru_lock);
continue;
}
-
lruvec = mem_cgroup_page_lruvec(page, pgdat);
 
+   VM_BUG_ON_PAGE(PageLRU(page), page);
SetPageLRU(page);
lru = page_lru(page);
-   add_page_to_lru_list(page, lruvec, lru);
+
+   nr_pages = hpage_nr_pages(page);
+   update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
+   list_move(>lru, >lists[lru]);
 
if (put_page_testzero(page)) {
__ClearPageLRU(page);
@@ -1847,13 +1867,17 @@ putback_inactive_pages(struct lruvec *lruvec, struct 
list_head *page_list)
spin_lock_irq(>lru_lock);
} else
list_add(>lru, _to_free);
+   } else {
+   nr_moved += nr_pages;
}
}
 
/*
 * To save our caller's stack, now use input list for pages to free.
 */
-   list_splice(_to_free, page_list);
+   list_splice(_to_free, list);
+
+   return nr_moved;
 }
 
 /*
@@ -1945,7 +1969,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct 
lruvec *lruvec,
reclaim_stat->recent_rotated[0] = stat.nr_activate[0];
reclaim_stat->recent_rotated[1] = stat.nr_activate[1];
 
-   putback_inactive_pages(lruvec, _list);
+   move_pages_to_lru(lruvec, _list);
 
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
 
@@ -1982,72 +2006,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct 
lruvec *lruvec,
return nr_reclaimed;
 }
 
-/*
- * This moves pages from the active list to the inactive list.
- *
- * We move them the other way if the page is referenced by one or more
- * processes, from rmap.
- *
- * If the pages are mostly unmapped, the processing is fast and it is
- * appropriate to hold zone_lru_lock across the whole operation.  But if
- * the pages are mapped, the processing is slow (page_referenced()) so we
- * should drop zone_lru_lock around each page.  It's impossible to balance
- * this, so instead we remove the pages from the LRU while processing them.
- * It is safe to rely on PG_active against the non-LRU pages

[PATCH 3/4] mm: Remove pages_to_free argument of move_active_pages_to_lru()

2019-02-12 Thread Kirill Tkhai

We may use input argument list as output argument too.
This makes the function more similar to putback_inactive_pages().

Signed-off-by: Kirill Tkhai 
---
 mm/vmscan.c |   19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8d7d55e71511..88fa71e4c28f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2004,10 +2004,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct 
lruvec *lruvec,
 
 static unsigned move_active_pages_to_lru(struct lruvec *lruvec,
 struct list_head *list,
-struct list_head *pages_to_free,
 enum lru_list lru)
 {
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+   LIST_HEAD(pages_to_free);
struct page *page;
int nr_pages;
int nr_moved = 0;
@@ -2034,12 +2034,17 @@ static unsigned move_active_pages_to_lru(struct lruvec 
*lruvec,
(*get_compound_page_dtor(page))(page);
spin_lock_irq(>lru_lock);
} else
-   list_add(>lru, pages_to_free);
+   list_add(>lru, _to_free);
} else {
nr_moved += nr_pages;
}
}
 
+   /*
+* To save our caller's stack, now use input list for pages to free.
+*/
+   list_splice(_to_free, list);
+
return nr_moved;
 }
 
@@ -2129,8 +2134,10 @@ static void shrink_active_list(unsigned long nr_to_scan,
 */
reclaim_stat->recent_rotated[file] += nr_rotated;
 
-   nr_activate = move_active_pages_to_lru(lruvec, _active, _hold, lru);
-   nr_deactivate = move_active_pages_to_lru(lruvec, _inactive, _hold, 
lru - LRU_ACTIVE);
+   nr_activate = move_active_pages_to_lru(lruvec, _active, lru);
+   nr_deactivate = move_active_pages_to_lru(lruvec, _inactive, lru - 
LRU_ACTIVE);
+   /* Keep all free pages are in l_active list */
+   list_splice(_inactive, _active);
 
__count_vm_events(PGDEACTIVATE, nr_deactivate);
__count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate);
@@ -2138,8 +2145,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(>lru_lock);
 
-   mem_cgroup_uncharge_list(_hold);
-   free_unref_page_list(_hold);
+   mem_cgroup_uncharge_list(_active);
+   free_unref_page_list(_active);
trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate,
nr_deactivate, nr_rotated, sc->priority, file);
 }

[PATCH 0/4] mm: Generalize putback functions

2019-02-12 Thread Kirill Tkhai

Functions putback_inactive_pages() and move_active_pages_to_lru()
are almost similar, so this patchset merges them in only function.

---

Kirill Tkhai (4):
  mm: Move recent_rotated pages calculation to shrink_inactive_list()
  mm: Move nr_deactivate accounting to shrink_active_list()
  mm: Remove pages_to_free argument of move_active_pages_to_lru()
  mm: Generalize putback scan functions


 include/linux/vmstat.h |2 -
 mm/vmscan.c|  150 ++--
 2 files changed, 57 insertions(+), 95 deletions(-)

--
Signed-off-by: Kirill Tkhai

[PATCH 3/8] dt-bindings: usb: dwc2: Add Amlogic G12A DWC2 Compatible

2019-02-12 Thread Neil Armstrong

Adds the specific compatible string for the DWC2 IP found in the
Amlogic G12A SoC Family.

Signed-off-by: Neil Armstrong 
---
 Documentation/devicetree/bindings/usb/dwc2.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt 
b/Documentation/devicetree/bindings/usb/dwc2.txt
index 6dc3c4a34483..e150b7b227c9 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.txt
+++ b/Documentation/devicetree/bindings/usb/dwc2.txt
@@ -14,6 +14,7 @@ Required properties:
   - "amlogic,meson8-usb": The DWC2 USB controller instance in Amlogic Meson8 
SoCs;
   - "amlogic,meson8b-usb": The DWC2 USB controller instance in Amlogic Meson8b 
SoCs;
   - "amlogic,meson-gxbb-usb": The DWC2 USB controller instance in Amlogic S905 
SoCs;
+  - "amlogic,meson-g12a-usb": The DWC2 USB controller instance in Amlogic G12A 
SoCs;
   - "amcc,dwc-otg": The DWC2 USB controller instance in AMCC Canyonlands 460EX 
SoCs;
   - snps,dwc2: A generic DWC2 USB controller with default parameters.
   - "st,stm32f4x9-fsotg": The DWC2 USB FS/HS controller instance in STM32F4x9 
SoCs
-- 
2.20.1

[PATCH 1/4] mm: Move recent_rotated pages calculation to shrink_inactive_list()

2019-02-12 Thread Kirill Tkhai

Currently, struct reclaim_stat::nr_activate is a local variable,
used only in shrink_page_list(). This patch introduces another
local variable pgactivate to use instead of it, and reuses
nr_activate to account number of active pages.

Note, that we need nr_activate to be an array, since type of page
may change during shrink_page_list() (see ClearPageSwapBacked()).

Signed-off-by: Kirill Tkhai 
---
 include/linux/vmstat.h |2 +-
 mm/vmscan.c|   15 +++
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2db8d60981fe..bdeda4b079fe 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -26,7 +26,7 @@ struct reclaim_stat {
unsigned nr_congested;
unsigned nr_writeback;
unsigned nr_immediate;
-   unsigned nr_activate;
+   unsigned nr_activate[2];
unsigned nr_ref_keep;
unsigned nr_unmap_fail;
 };
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ac4806f0f332..84542004a277 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1107,6 +1107,7 @@ static unsigned long shrink_page_list(struct list_head 
*page_list,
LIST_HEAD(ret_pages);
LIST_HEAD(free_pages);
unsigned nr_reclaimed = 0;
+   unsigned pgactivate = 0;
 
memset(stat, 0, sizeof(*stat));
cond_resched();
@@ -1466,8 +1467,10 @@ static unsigned long shrink_page_list(struct list_head 
*page_list,
try_to_free_swap(page);
VM_BUG_ON_PAGE(PageActive(page), page);
if (!PageMlocked(page)) {
+   int type = page_is_file_cache(page);
SetPageActive(page);
-   stat->nr_activate++;
+   pgactivate++;
+   stat->nr_activate[type] += hpage_nr_pages(page);
count_memcg_page_event(page, PGACTIVATE);
}
 keep_locked:
@@ -1482,7 +1485,7 @@ static unsigned long shrink_page_list(struct list_head 
*page_list,
free_unref_page_list(_pages);
 
list_splice(_pages, page_list);
-   count_vm_events(PGACTIVATE, stat->nr_activate);
+   count_vm_events(PGACTIVATE, pgactivate);
 
return nr_reclaimed;
 }
@@ -1807,7 +1810,6 @@ static int too_many_isolated(struct pglist_data *pgdat, 
int file,
 static noinline_for_stack void
 putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
 {
-   struct zone_reclaim_stat *reclaim_stat = >reclaim_stat;
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
LIST_HEAD(pages_to_free);
 
@@ -1833,11 +1835,6 @@ putback_inactive_pages(struct lruvec *lruvec, struct 
list_head *page_list)
lru = page_lru(page);
add_page_to_lru_list(page, lruvec, lru);
 
-   if (is_active_lru(lru)) {
-   int file = is_file_lru(lru);
-   int numpages = hpage_nr_pages(page);
-   reclaim_stat->recent_rotated[file] += numpages;
-   }
if (put_page_testzero(page)) {
__ClearPageLRU(page);
__ClearPageActive(page);
@@ -1945,6 +1942,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct 
lruvec *lruvec,
count_memcg_events(lruvec_memcg(lruvec), PGSTEAL_DIRECT,
   nr_reclaimed);
}
+   reclaim_stat->recent_rotated[0] = stat.nr_activate[0];
+   reclaim_stat->recent_rotated[1] = stat.nr_activate[1];
 
putback_inactive_pages(lruvec, _list);

[PATCH 7/8] usb: dwc2: Add Amlogic G12A DWC2 Params

2019-02-12 Thread Neil Armstrong

This patchs sets the params for the DWC2 Controller found in the
Amlogic G12A SoC family.

It mainly sets the settings reported incorrect by the driver,
leaving the remaining detected automatically by the driver and
provided by the DT node.

Signed-off-by: Neil Armstrong 
---
 drivers/usb/dwc2/params.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c
index 24ff5f21cb25..442113246cba 100644
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -121,6 +121,16 @@ static void dwc2_set_amlogic_params(struct dwc2_hsotg 
*hsotg)
p->power_down = DWC2_POWER_DOWN_PARAM_NONE;
 }
 
+static void dwc2_set_amlogic_g12a_params(struct dwc2_hsotg *hsotg)
+{
+   struct dwc2_core_params *p = >params;
+
+   p->lpm = false;
+   p->lpm_clock_gating = false;
+   p->besl = false;
+   p->hird_threshold_en = false;
+}
+
 static void dwc2_set_amcc_params(struct dwc2_hsotg *hsotg)
 {
struct dwc2_core_params *p = >params;
@@ -167,6 +177,8 @@ const struct of_device_id dwc2_of_match_table[] = {
  .data = dwc2_set_amlogic_params },
{ .compatible = "amlogic,meson-gxbb-usb",
  .data = dwc2_set_amlogic_params },
+   { .compatible = "amlogic,meson-g12a-usb",
+ .data = dwc2_set_amlogic_g12a_params },
{ .compatible = "amcc,dwc-otg", .data = dwc2_set_amcc_params },
{ .compatible = "st,stm32f4x9-fsotg",
  .data = dwc2_set_stm32f4x9_fsotg_params },
-- 
2.20.1

[PATCH 8/8] usb: dwc3: Add Amlogic G12A DWC3 glue

2019-02-12 Thread Neil Armstrong

Adds support for Amlogic G12A USB Control Glue HW.

The Amlogic G12A SoC Family embeds 2 USB Controllers :
- a DWC3 IP configured as Host for USB2 and USB3
- a DWC2 IP configured as Peripheral USB2 Only

A glue connects these both controllers to 2 USB2 PHYs, and optionnally
to an USB3+PCIE Combo PHY shared with the PCIE controller.

The Glue configures the UTMI 8bit interfaces for the USB2 PHYs, including
routing of the OTG PHY between the DWC3 and DWC2 controllers, and
setups the on-chip OTG mode selection for this PHY.

The PHYs are childen of the Glue node since the Glue controls the interface
with the PHY, not the DWC3 controller.
The drivers collects the mode of each PHY and determine which PHY
is to be routed between the DWC2 and DWC3 controllers.

This drivers supports the on-probe setup of the OTG mode, and manually
via a debugfs interface. The IRQ mode change detect is yet to be added
in a future patchset, mainly due to lack of hardware to validate on.

Signed-off-by: Neil Armstrong 
---
 drivers/usb/dwc3/Kconfig   |   9 +
 drivers/usb/dwc3/Makefile  |   1 +
 drivers/usb/dwc3/dwc3-meson-g12a.c | 650 +
 3 files changed, 660 insertions(+)
 create mode 100644 drivers/usb/dwc3/dwc3-meson-g12a.c

diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index 1a0404fda596..4335e5e76bbb 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -93,6 +93,15 @@ config USB_DWC3_KEYSTONE
  Support of USB2/3 functionality in TI Keystone2 platforms.
  Say 'Y' or 'M' here if you have one such device
 
+config USB_DWC3_MESON_G12A
+   tristate "Amlogic Meson G12A Platforms"
+   depends on OF && COMMON_CLK
+   depends on ARCH_MESON || COMPILE_TEST
+   default USB_DWC3
+   help
+ Support USB2/3 functionality in Amlogic G12A platforms.
+Say 'Y' or 'M' if you have one such device.
+
 config USB_DWC3_OF_SIMPLE
tristate "Generic OF Simple Glue Layer"
depends on OF && COMMON_CLK
diff --git a/drivers/usb/dwc3/Makefile b/drivers/usb/dwc3/Makefile
index 6e3ef6144e5d..ae86da0dc5bd 100644
--- a/drivers/usb/dwc3/Makefile
+++ b/drivers/usb/dwc3/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_USB_DWC3_EXYNOS) += dwc3-exynos.o
 obj-$(CONFIG_USB_DWC3_PCI) += dwc3-pci.o
 obj-$(CONFIG_USB_DWC3_HAPS)+= dwc3-haps.o
 obj-$(CONFIG_USB_DWC3_KEYSTONE)+= dwc3-keystone.o
+obj-$(CONFIG_USB_DWC3_MESON_G12A)  += dwc3-meson-g12a.o
 obj-$(CONFIG_USB_DWC3_OF_SIMPLE)   += dwc3-of-simple.o
 obj-$(CONFIG_USB_DWC3_ST)  += dwc3-st.o
 obj-$(CONFIG_USB_DWC3_QCOM)+= dwc3-qcom.o
diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c 
b/drivers/usb/dwc3/dwc3-meson-g12a.c
new file mode 100644
index ..abeff2d56b1d
--- /dev/null
+++ b/drivers/usb/dwc3/dwc3-meson-g12a.c
@@ -0,0 +1,650 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * USB Glue for Amlogic G12A SoCs
+ *
+ * Copyright (c) 2019 BayLibre, SAS
+ * Author: Neil Armstrong 
+ */
+
+/*
+ * The USB is organized with a glue around the DWC3 Controller IP as :
+ * - Control registers for each USB2 Ports
+ * - Control registers for the USB PHY layer
+ * - SuperSpeed PHY can be enabled only if port is used
+ *
+ * TOFIX:
+ * - Add dynamic OTG switching with ID change interrupt
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* USB Glue Control Registers */
+
+#define USB_R0 0x00
+   #define USB_R0_P30_LANE0_TX2RX_LOOPBACK BIT(17)
+   #define USB_R0_P30_LANE0_EXT_PCLK_REQ   BIT(18)
+   #define USB_R0_P30_PCS_RX_LOS_MASK_VAL_MASK GENMASK(28, 19)
+   #define USB_R0_U2D_SS_SCALEDOWN_MODE_MASK   GENMASK(30, 29)
+   #define USB_R0_U2D_ACT  BIT(31)
+
+#define USB_R1 0x04
+   #define USB_R1_U3H_BIGENDIAN_GS BIT(0)
+   #define USB_R1_U3H_PME_ENABLE   BIT(1)
+   #define USB_R1_U3H_HUB_PORT_OVERCURRENT_MASKGENMASK(4, 2)
+   #define USB_R1_U3H_HUB_PORT_PERM_ATTACH_MASKGENMASK(9, 7)
+   #define USB_R1_U3H_HOST_U2_PORT_DISABLE_MASKGENMASK(13, 12)
+   #define USB_R1_U3H_HOST_U3_PORT_DISABLE BIT(16)
+   #define USB_R1_U3H_HOST_PORT_POWER_CONTROL_PRESENT  BIT(17)
+   #define USB_R1_U3H_HOST_MSI_ENABLE  BIT(18)
+   #define USB_R1_U3H_FLADJ_30MHZ_REG_MASK GENMASK(24, 19)
+   #define USB_R1_P30_PCS_TX_SWING_FULL_MASK   GENMASK(31, 25)
+
+#define USB_R2 0x08
+   #define USB_R2_P30_PCS_TX_DEEMPH_3P5DB_MASK GENMASK(25, 20)
+

[PATCH 6/8] phy: amlogic: Add Amlogic G12A USB3 + PCIE Combo PHY Driver

2019-02-12 Thread Neil Armstrong

This adds support for the shared USB3 + PCIE PHY found in the
Amlogic G12A SoC Family.

It supports USB3 Host mode or PCIE 2.0 mode, depending on the layout of
the board.

Selection is done by the #phy-cells, making the mode static and exclusive.

Signed-off-by: Neil Armstrong 
---
 drivers/phy/amlogic/Kconfig   |  12 +
 drivers/phy/amlogic/Makefile  |   1 +
 .../phy/amlogic/phy-meson-g12a-usb3-pcie.c| 414 ++
 3 files changed, 427 insertions(+)
 create mode 100644 drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c

diff --git a/drivers/phy/amlogic/Kconfig b/drivers/phy/amlogic/Kconfig
index 78d6e194dce9..7ccb9a756aba 100644
--- a/drivers/phy/amlogic/Kconfig
+++ b/drivers/phy/amlogic/Kconfig
@@ -48,3 +48,15 @@ config PHY_MESON_G12A_USB2
  Enable this to support the Meson USB2 PHYs found in Meson
  G12A SoCs.
  If unsure, say N.
+
+config PHY_MESON_G12A_USB3_PCIE
+   tristate "Meson G12A USB3+PCIE Combo PHY drivers"
+   default ARCH_MESON
+   depends on OF && (ARCH_MESON || COMPILE_TEST)
+   depends on USB_SUPPORT
+   select GENERIC_PHY
+   select REGMAP_MMIO
+   help
+ Enable this to support the Meson USB3 + PCIE Combi PHY found
+ in Meson G12A SoCs.
+ If unsure, say N.
diff --git a/drivers/phy/amlogic/Makefile b/drivers/phy/amlogic/Makefile
index 7d4d10f5a6b3..fdd008e1b19b 100644
--- a/drivers/phy/amlogic/Makefile
+++ b/drivers/phy/amlogic/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_PHY_MESON8B_USB2)  += phy-meson8b-usb2.o
 obj-$(CONFIG_PHY_MESON_GXL_USB2)   += phy-meson-gxl-usb2.o
 obj-$(CONFIG_PHY_MESON_G12A_USB2)  += phy-meson-g12a-usb2.o
 obj-$(CONFIG_PHY_MESON_GXL_USB3)   += phy-meson-gxl-usb3.o
+obj-$(CONFIG_PHY_MESON_G12A_USB3_PCIE) += phy-meson-g12a-usb3-pcie.o
diff --git a/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c 
b/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
new file mode 100644
index ..59eae98928e9
--- /dev/null
+++ b/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Amlogic G12A USB3 + PCIE Combo PHY driver
+ *
+ * Copyright (C) 2017 Amlogic, Inc. All rights reserved
+ * Copyright (C) 2019 BayLibre, SAS
+ * Author: Neil Armstrong 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PHY_R0 0x00
+   #define PHY_R0_PCIE_POWER_STATE GENMASK(4, 0)
+   #define PHY_R0_PCIE_USB3_SWITCH GENMASK(6, 5)
+
+#define PHY_R1 0x04
+   #define PHY_R1_PHY_TX1_TERM_OFFSET  GENMASK(4, 0)
+   #define PHY_R1_PHY_TX0_TERM_OFFSET  GENMASK(9, 5)
+   #define PHY_R1_PHY_RX1_EQ   GENMASK(12, 10)
+   #define PHY_R1_PHY_RX0_EQ   GENMASK(15, 13)
+   #define PHY_R1_PHY_LOS_LEVELGENMASK(20, 16)
+   #define PHY_R1_PHY_LOS_BIAS GENMASK(23, 21)
+   #define PHY_R1_PHY_REF_CLKDIV2  BIT(24)
+   #define PHY_R1_PHY_MPLL_MULTIPLIER  GENMASK(31, 25)
+
+#define PHY_R2 0x08
+   #define PHY_R2_PCS_TX_DEEMPH_GEN2_6DB   GENMASK(5, 0)
+   #define PHY_R2_PCS_TX_DEEMPH_GEN2_3P5DB GENMASK(11, 6)
+   #define PHY_R2_PCS_TX_DEEMPH_GEN1   GENMASK(17, 12)
+   #define PHY_R2_PHY_TX_VBOOST_LVLGENMASK(20, 18)
+
+#define PHY_R4 0x10
+   #define PHY_R4_PHY_CR_WRITE BIT(0)
+   #define PHY_R4_PHY_CR_READ  BIT(1)
+   #define PHY_R4_PHY_CR_DATA_IN   GENMASK(17, 2)
+   #define PHY_R4_PHY_CR_CAP_DATA  BIT(18)
+   #define PHY_R4_PHY_CR_CAP_ADDR  BIT(19)
+
+#define PHY_R5 0x14
+   #define PHY_R5_PHY_CR_DATA_OUT  GENMASK(15, 0)
+   #define PHY_R5_PHY_CR_ACK   BIT(16)
+   #define PHY_R5_PHY_BS_OUT   BIT(17)
+
+struct phy_g12a_usb3_pcie_priv {
+   struct regmap   *regmap;
+   struct regmap   *regmap_cr;
+   struct clk  *clk_ref;
+   struct reset_control*reset;
+   struct phy  *phy;
+   unsigned intmode;
+};
+
+static const struct regmap_config phy_g12a_usb3_pcie_regmap_conf = {
+   .reg_bits = 8,
+   .val_bits = 32,
+   .reg_stride = 4,
+   .max_register = PHY_R5,
+};
+
+static int phy_g12a_usb3_pcie_cr_bus_addr(struct phy_g12a_usb3_pcie_priv

[PATCH 5/8] phy: amlogic: add Amlogic G12A USB2 PHY Driver

2019-02-12 Thread Neil Armstrong

This adds support for the USB2 PHY found in the Amlogic G12A SoC Family.

It supports Host and/or Peripheral mode, depending on it's position.
The first PHY is only used as Host, but the second supports Dual modes
defined by the USB Control Glue HW in front of the USB Controllers.

Signed-off-by: Neil Armstrong 
---
 drivers/phy/amlogic/Kconfig   |  12 ++
 drivers/phy/amlogic/Makefile  |   1 +
 drivers/phy/amlogic/phy-meson-g12a-usb2.c | 191 ++
 3 files changed, 204 insertions(+)
 create mode 100644 drivers/phy/amlogic/phy-meson-g12a-usb2.c

diff --git a/drivers/phy/amlogic/Kconfig b/drivers/phy/amlogic/Kconfig
index 23fe1cda2f70..78d6e194dce9 100644
--- a/drivers/phy/amlogic/Kconfig
+++ b/drivers/phy/amlogic/Kconfig
@@ -36,3 +36,15 @@ config PHY_MESON_GXL_USB3
  Enable this to support the Meson USB3 PHY and OTG detection
  IP block found in Meson GXL and GXM SoCs.
  If unsure, say N.
+
+config PHY_MESON_G12A_USB2
+   tristate "Meson G12A USB2 PHY drivers"
+   default ARCH_MESON
+   depends on OF && (ARCH_MESON || COMPILE_TEST)
+   depends on USB_SUPPORT
+   select GENERIC_PHY
+   select REGMAP_MMIO
+   help
+ Enable this to support the Meson USB2 PHYs found in Meson
+ G12A SoCs.
+ If unsure, say N.
diff --git a/drivers/phy/amlogic/Makefile b/drivers/phy/amlogic/Makefile
index 4fd8848c194d..7d4d10f5a6b3 100644
--- a/drivers/phy/amlogic/Makefile
+++ b/drivers/phy/amlogic/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_PHY_MESON8B_USB2) += phy-meson8b-usb2.o
 obj-$(CONFIG_PHY_MESON_GXL_USB2)   += phy-meson-gxl-usb2.o
+obj-$(CONFIG_PHY_MESON_G12A_USB2)  += phy-meson-g12a-usb2.o
 obj-$(CONFIG_PHY_MESON_GXL_USB3)   += phy-meson-gxl-usb3.o
diff --git a/drivers/phy/amlogic/phy-meson-g12a-usb2.c 
b/drivers/phy/amlogic/phy-meson-g12a-usb2.c
new file mode 100644
index ..3b6271a8be02
--- /dev/null
+++ b/drivers/phy/amlogic/phy-meson-g12a-usb2.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Meson G12A USB2 PHY driver
+ *
+ * Copyright (C) 2017 Martin Blumenstingl 
+ * Copyright (C) 2017 Amlogic, Inc. All rights reserved
+ * Copyright (C) 2019 BayLibre, SAS
+ * Author: Neil Armstrong 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PHY_CTRL_R00x0
+#define PHY_CTRL_R10x4
+#define PHY_CTRL_R20x8
+#define PHY_CTRL_R30xc
+#define PHY_CTRL_R40x10
+#define PHY_CTRL_R50x14
+#define PHY_CTRL_R60x18
+#define PHY_CTRL_R70x1c
+#define PHY_CTRL_R80x20
+#define PHY_CTRL_R90x24
+#define PHY_CTRL_R10   0x28
+#define PHY_CTRL_R11   0x2c
+#define PHY_CTRL_R12   0x30
+#define PHY_CTRL_R13   0x34
+#define PHY_CTRL_R14   0x38
+#define PHY_CTRL_R15   0x3c
+#define PHY_CTRL_R16   0x40
+#define PHY_CTRL_R17   0x44
+#define PHY_CTRL_R18   0x48
+#define PHY_CTRL_R19   0x4c
+#define PHY_CTRL_R20   0x50
+#define PHY_CTRL_R21   0x54
+#define PHY_CTRL_R22   0x58
+#define PHY_CTRL_R23   0x5c
+
+#define RESET_COMPLETE_TIME1000
+#define PLL_RESET_COMPLETE_TIME100
+
+struct phy_meson_g12a_usb2_priv {
+   struct device   *dev;
+   struct regmap   *regmap;
+   struct clk  *clk;
+   struct reset_control*reset;
+};
+
+static const struct regmap_config phy_meson_g12a_usb2_regmap_conf = {
+   .reg_bits = 8,
+   .val_bits = 32,
+   .reg_stride = 4,
+   .max_register = PHY_CTRL_R23,
+};
+
+static int phy_meson_g12a_usb2_init(struct phy *phy)
+{
+   struct phy_meson_g12a_usb2_priv *priv = phy_get_drvdata(phy);
+   int ret;
+
+   ret = reset_control_reset(priv->reset);
+   if (ret)
+   return ret;
+
+   udelay(RESET_COMPLETE_TIME);
+
+   /* usb2_otg_aca_en == 0 */
+   regmap_update_bits(priv->regmap, PHY_CTRL_R21, BIT(2), 0);
+
+   /* PLL Setup : 24MHz

[PATCH 4/8] dt-bindings: usb: dwc3: Add Amlogic G12A DWC3 Glue Bindings

2019-02-12 Thread Neil Armstrong

Adds the bindings for the Amlogic G12A USB Glue HW.

The Amlogic G12A SoC Family embeds 2 USB Controllers :
- a DWC3 IP configured as Host for USB2 and USB3
- a DWC2 IP configured as Peripheral USB2 Only

A glue connects these both controllers to 2 USB2 PHYs,
and optionnally to an USB3+PCIE Combo PHY shared with the PCIE controller.

The Glue configures the UTMI 8bit interfaces for the USB2 PHYs, including
routing of the OTG PHY between the DWC3 and DWC2 controllers, and
setups the on-chip OTG mode selection for this PHY.

The PHYs are children of the Glue node since the Glue controls the interface
with the PHY, not the DWC3 controller.

The PHY interconnect is handled into ports subnodes, which eases describing
which PHY is enabled (like the USB3 shared PHY) and futures layouts on
derivatives of the G12A Family.

Signed-off-by: Neil Armstrong 
---
 .../devicetree/bindings/usb/amlogic,dwc3.txt  | 109 ++
 1 file changed, 109 insertions(+)

diff --git a/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt 
b/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt
index 9a8b631904fd..c7c4726ef10d 100644
--- a/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt
@@ -40,3 +40,112 @@ Example device nodes:
phy-names = "usb2-phy", "usb3-phy";
};
};
+
+Amlogic Meson G12A DWC3 USB SoC Controller Glue
+
+The Amlogic G12A embeds a DWC3 USB IP Core configured for USB2 and USB3
+in host-only mode, and a DWC2 IP Core configured for USB2 peripheral mode
+only.
+
+A glue connects the DWC3 core to USB2 PHYs and optionnaly to an USB3 PHY.
+
+One of the USB2 PHY can be re-routed in peripheral mode to a DWC2 USB IP.
+
+The DWC3 Glue controls the PHY routing and power, an interrupt line is
+connected to the Glue to serve as OTG ID change detection.
+
+Required properties:
+- compatible:  Should be "amlogic,meson-g12a-usb-ctrl"
+- clocks:  a handle for the "USB" clock
+- clock-names: must be "usb"
+- resets:  a handle for the shared "USB" reset line
+- reset-names: must be "usb"
+- reg: The base address and length of the registers
+- interrupts:  the interrupt specifier for the OTG detection
+
+Required child nodes:
+
+USB Ports are described as child 'port' nodes grouped under a 'ports' node,
+with #address-cells, #size-cells specified.
+
+Each 'port' sub-node identifies a possible USB Port served by an USB PHY
+identified by the 'phy' property as decribed in ../phy/phy-bindings.txt
+
+Each 'port' is identified by a reg property to number the port.
+
+The following table lists for each supported model the port number
+corresponding to each PHY serving a physical USB Port.
+
+ Family   Port 0 Port 1Port 2Port 3Port 4
+---
+ G12A USBHOST_A  USBOTG_B  Reserved  Reserved  USB3_0
+
+A child node must exist to represent the core DWC3 IP block. The name of
+the node is not important. The content of the node is defined in dwc3.txt.
+
+A child node must exist to represent the core DWC2 IP block. The name of
+the node is not important. The content of the node is defined in dwc2.txt.
+
+PHY documentation is provided in the following places:
+- Documentation/devicetree/bindings/phy/meson-g12a-usb2-phy.txt
+- Documentation/devicetree/bindings/phy/meson-g12a-usb3-pcie-phy.txt
+
+
+Example device nodes:
+   usb: usb@ffe09000 {
+   compatible = "amlogic,meson-g12a-usb-ctrl";
+   reg = <0x0 0xffe09000 0x0 0xa0>;
+   interrupts = ;
+   #address-cells = <2>;
+   #size-cells = <2>;
+   ranges;
+
+   clocks = < CLKID_USB>;
+   clock-names = "usb";
+   resets = < RESET_USB>;
+   reset-names = "usb";
+
+   ports {
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   /* USB2 Port 0 */
+   usb20: port@0 {
+   reg = <0>;
+   phys = <_phy0>;
+   };
+
+   /* USB2 Port 1 */
+   usb21: port@1 {
+   reg = <1>;
+   phys = <_phy1>;
+   };
+
+   /* USB3 Port 0 */
+   usb3: port@4 {
+   reg = <4>;
+   phys = <_pcie_phy PHY_TYPE_USB3>;
+   };
+   };
+
+   dwc2: usb@ff40 {
+   compatible =

[PATCH 1/8] dt-bindings: phy: Add Amlogic G12A USB2 PHY Bindings

2019-02-12 Thread Neil Armstrong

Add the Amlogic G12A Family USB2 OTG PHY Bindings

Signed-off-by: Neil Armstrong 
---
 .../bindings/phy/meson-g12a-usb2-phy.txt  | 22 +++
 1 file changed, 22 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/phy/meson-g12a-usb2-phy.txt

diff --git a/Documentation/devicetree/bindings/phy/meson-g12a-usb2-phy.txt 
b/Documentation/devicetree/bindings/phy/meson-g12a-usb2-phy.txt
new file mode 100644
index ..a6ebc3dea159
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/meson-g12a-usb2-phy.txt
@@ -0,0 +1,22 @@
+* Amlogic G12A USB2 PHY binding
+
+Required properties:
+- compatible:  Should be "amlogic,meson-g12a-usb2-phy"
+- reg: The base address and length of the registers
+- #phys-cells: must be 0 (see phy-bindings.txt in this directory)
+- clocks:  a phandle to the clock of this PHY
+- clock-names: must be "xtal"
+- resets:  a phandle to the reset line of this PHY
+- reset-names: must be "phy"
+- phy-supply:  see phy-bindings.txt in this directory
+
+Example:
+   usb2_phy0: phy@36000 {
+   compatible = "amlogic,g12a-usb2-phy";
+   reg = <0x0 0x36000 0x0 0x2000>;
+   clocks = <>;
+   clock-names = "xtal";
+   resets = < RESET_USB_PHY21>;
+   reset-names = "phy";
+   #phy-cells = <0>;
+   };
-- 
2.20.1

[PATCH 2/8] dt-bindings: phy: Add Amlogic G12A USB3+PCIE Combo PHY Bindings

2019-02-12 Thread Neil Armstrong

Add the Amlogic G12A Family USB3 + PCIE Combo PHY Bindings.

This PHY can provide exclusively USB3 or PCIE support on shared I/Os.

Signed-off-by: Neil Armstrong 
---
 .../bindings/phy/meson-g12a-usb3-pcie-phy.txt | 25 +++
 1 file changed, 25 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/phy/meson-g12a-usb3-pcie-phy.txt

diff --git a/Documentation/devicetree/bindings/phy/meson-g12a-usb3-pcie-phy.txt 
b/Documentation/devicetree/bindings/phy/meson-g12a-usb3-pcie-phy.txt
new file mode 100644
index ..714d751091f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/meson-g12a-usb3-pcie-phy.txt
@@ -0,0 +1,25 @@
+* Amlogic G12A USB3 + PCIE Combo PHY binding
+
+Required properties:
+- compatible:  Should be "amlogic,meson-g12a-usb3-pcie-phy"
+- #phys-cells: must be 1. The cell number is used to select the phy mode
+  as defined in  between PHY_TYPE_USB3 and PHY_TYPE_PCIE
+- reg: The base address and length of the registers
+- clocks:  a phandle to the 100MHz reference clock of this PHY
+- clock-names: must be "ref_clk"
+- resets:  phandle to the reset lines for:
+   - the PHY control
+   - the USB3+PCIE PHY
+   - the PHY registers
+
+Example:
+   usb3_pcie_phy: phy@46000 {
+   compatible = "amlogic,g12a-usb3-pcie-phy";
+   reg = <0x0 0x46000 0x0 0x2000>;
+   clocks = < CLKID_PCIE_PLL>;
+   clock-names = "ref_clk";
+   resets = < RESET_PCIE_CTRL_A>,
+< RESET_PCIE_PHY>,
+< RESET_PCIE_APB>;
+   #phy-cells = <1>;
+   };
-- 
2.20.1

[PATCH v2] parisc: use memblock_alloc() instead of custom get_memblock()

2019-02-12 Thread Mike Rapoport

The get_memblock() function implements custom bottom-up memblock allocator.
Setting 'memblock_bottom_up = true' before any memblock allocation is done
allows replacing get_memblock() calls with memblock_alloc().

Signed-off-by: Mike Rapoport 
---
v2: fix allocation alignment

 arch/parisc/mm/init.c | 52 +++
 1 file changed, 19 insertions(+), 33 deletions(-)

diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 059187a..d0b1662 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -79,36 +79,6 @@ static struct resource sysram_resources[MAX_PHYSMEM_RANGES] 
__read_mostly;
 physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly;
 int npmem_ranges __read_mostly;
 
-/*
- * get_memblock() allocates pages via memblock.
- * We can't use memblock_find_in_range(0, KERNEL_INITIAL_SIZE) here since it
- * doesn't allocate from bottom to top which is needed because we only created
- * the initial mapping up to KERNEL_INITIAL_SIZE in the assembly bootup code.
- */
-static void * __init get_memblock(unsigned long size)
-{
-   static phys_addr_t search_addr __initdata;
-   phys_addr_t phys;
-
-   if (!search_addr)
-   search_addr = PAGE_ALIGN(__pa((unsigned long) &_end));
-   search_addr = ALIGN(search_addr, size);
-   while (!memblock_is_region_memory(search_addr, size) ||
-   memblock_is_region_reserved(search_addr, size)) {
-   search_addr += size;
-   }
-   phys = search_addr;
-
-   if (phys)
-   memblock_reserve(phys, size);
-   else
-   panic("get_memblock() failed.\n");
-
-   memset(__va(phys), 0, size);
-
-   return __va(phys);
-}
-
 #ifdef CONFIG_64BIT
 #define MAX_MEM (~0UL)
 #else /* !CONFIG_64BIT */
@@ -321,6 +291,13 @@ static void __init setup_bootmem(void)
max_pfn = start_pfn + npages;
}
 
+   /*
+* We can't use memblock top-down allocations because we only
+* created the initial mapping up to KERNEL_INITIAL_SIZE in
+* the assembly bootup code.
+*/
+   memblock_set_bottom_up(true);
+
/* IOMMU is always used to access "high mem" on those boxes
 * that can support enough mem that a PCI device couldn't
 * directly DMA to any physical addresses.
@@ -442,7 +419,10 @@ static void __init map_pages(unsigned long start_vaddr,
 */
 
if (!pmd) {
-   pmd = (pmd_t *) get_memblock(PAGE_SIZE << PMD_ORDER);
+   pmd = memblock_alloc(PAGE_SIZE << PMD_ORDER,
+PAGE_SIZE << PMD_ORDER);
+   if (!pmd)
+   panic("pmd allocation failed.\n");
pmd = (pmd_t *) __pa(pmd);
}
 
@@ -461,7 +441,10 @@ static void __init map_pages(unsigned long start_vaddr,
 
pg_table = (pte_t *)pmd_address(*pmd);
if (!pg_table) {
-   pg_table = (pte_t *) get_memblock(PAGE_SIZE);
+   pg_table = memblock_alloc(PAGE_SIZE,
+ PAGE_SIZE);
+   if (!pg_table)
+   panic("page table allocation failed\n");
pg_table = (pte_t *) __pa(pg_table);
}
 
@@ -700,7 +683,10 @@ static void __init pagetable_init(void)
}
 #endif
 
-   empty_zero_page = get_memblock(PAGE_SIZE);
+   empty_zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+   if (!empty_zero_page)
+   panic("zero page allocation failed.\n");
+
 }
 
 static void __init gateway_init(void)
-- 
2.7.4

Re: [PATCH 2/2] chardev: showing minor range for chardev in the output of /proc/devices

2019-02-12 Thread cgxu519


On 2/12/19 5:02 PM, Greg KH wrote:

On Tue, Feb 12, 2019 at 04:47:39PM +0800, Chengguang Xu wrote:

Currently chardev allows to share major, showing
major with minor range for chardev will be more
helpful.

Signed-off-by: Chengguang Xu 
---
  fs/char_dev.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/char_dev.c b/fs/char_dev.c
index b25b1da097d5..6f00acdeb308 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -55,7 +55,9 @@ void chrdev_show(struct seq_file *f, off_t offset)
mutex_lock(_lock);
for (cd = chrdevs[major_to_index(offset)]; cd; cd = cd->next) {
if (cd->major == offset)
-   seq_printf(f, "%3d %s\n", cd->major, cd->name);
+   seq_printf(f, "%3d %s (%u-%u)\n", cd->major, cd->name,
+  cd->baseminor,
+  cd->baseminor + cd->minorct - 1);

You are changing the format of a userspace file, what tools are going to
break when you do this?


I'll remove this part in V2. Do you have any idea how to get the minor
range info for particular major? Or adding a similar file to somewhere
under /sys is acceptable?

Thanks

Re: [PATCH 2/2] chardev: showing minor range for chardev in the output of /proc/devices

2019-02-12 Thread Greg KH

On Tue, Feb 12, 2019 at 11:18:22PM +0800, cgxu519 wrote:
> On 2/12/19 5:02 PM, Greg KH wrote:
> > On Tue, Feb 12, 2019 at 04:47:39PM +0800, Chengguang Xu wrote:
> > > Currently chardev allows to share major, showing
> > > major with minor range for chardev will be more
> > > helpful.
> > > 
> > > Signed-off-by: Chengguang Xu 
> > > ---
> > >   fs/char_dev.c | 4 +++-
> > >   1 file changed, 3 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/fs/char_dev.c b/fs/char_dev.c
> > > index b25b1da097d5..6f00acdeb308 100644
> > > --- a/fs/char_dev.c
> > > +++ b/fs/char_dev.c
> > > @@ -55,7 +55,9 @@ void chrdev_show(struct seq_file *f, off_t offset)
> > >   mutex_lock(_lock);
> > >   for (cd = chrdevs[major_to_index(offset)]; cd; cd = cd->next) {
> > >   if (cd->major == offset)
> > > - seq_printf(f, "%3d %s\n", cd->major, cd->name);
> > > + seq_printf(f, "%3d %s (%u-%u)\n", cd->major, cd->name,
> > > +cd->baseminor,
> > > +cd->baseminor + cd->minorct - 1);
> > You are changing the format of a userspace file, what tools are going to
> > break when you do this?
> 
> I'll remove this part in V2. Do you have any idea how to get the minor
> range info for particular major? Or adding a similar file to somewhere
> under /sys is acceptable?

Why do you need to know the minor range?  What can userspace do with
this that actually matters?

thanks,

greg k-h

RE: [PATCH] mei: expand minor range when registering chrdev region

2019-02-12 Thread Winkler, Tomas


> 
> On 2/12/19 5:29 PM, Greg KH wrote:
> > On Tue, Feb 12, 2019 at 02:02:52PM +0800, Chengguang Xu wrote:
> >> Actually, total amount of available minor number for a single major
> >> is MINORMARK + 1. So expand minor range when registering chrdev
> >> region.
> >>
> >> Signed-off-by: Chengguang Xu 
> >> ---
> >>   drivers/misc/mei/main.c | 2 +-
> >>   1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index
> >> 87281b3695e6..3df54f1e1a8b 100644
> >> --- a/drivers/misc/mei/main.c
> >> +++ b/drivers/misc/mei/main.c
> >> @@ -869,7 +869,7 @@ static const struct file_operations mei_fops = {
> >>
> >>   static struct class *mei_class;
> >>   static dev_t mei_devt;
> >> -#define MEI_MAX_DEVS  MINORMASK
> >> +#define MEI_MAX_DEVS  (MINORMASK + 1)
> > Why is this needed?  Have you really run out of that many minor nodes
> > for this driver?
> 
> Not really, practically maybe we cannot reach to the limit.
> I was just curious why only one minor number left there and assumed that was
> from a mistake(since I've seen similar mistake in other driver).
> However, if it explicitly sets to MINORMASK for some reasons, then it's better
> to keep as is.

I guess this was a boilerplate code, but I'm sure we don't need so many devices.

Thanks
Tomas

Re: [5.0-rc5 regression] "scsi: kill off the legacy IO path" causes 5 minute delay during boot on Sun Blade 2500

2019-02-12 Thread James Bottomley

On Mon, 2019-02-11 at 19:50 -0700, Jens Axboe wrote:
> On 2/11/19 7:13 PM, James Bottomley wrote:
> > On Mon, 2019-02-11 at 09:31 -0700, Jens Axboe wrote:
> > > On 2/11/19 9:28 AM, James Bottomley wrote:
> > > > On Mon, 2019-02-11 at 08:46 -0700, Jens Axboe wrote:
> > > > > On 2/11/19 8:42 AM, James Bottomley wrote:
> > > > > > On Mon, 2019-02-11 at 08:28 -0700, Jens Axboe wrote:
> > > > > > > On 2/11/19 8:25 AM, James Bottomley wrote:
> > > > > > > > On Sun, 2019-02-10 at 09:35 -0700, Jens Axboe wrote:
> > > > > > > > > On 2/10/19 9:25 AM, James Bottomley wrote:
> > > > 
> > > > [...]
> > > > > > > > > > That check wasn't changed by the code removal.
> > > > > > > > > 
> > > > > > > > > As I said above, for sd. This isn't true for non-
> > > > > > > > > disks.
> > > > > > > > 
> > > > > > > > Yes, but the behaviour above doesn't change across a
> > > > > > > > switch
> > > > > > > > to MQ, so I don't quite understand how it bisects back
> > > > > > > > to
> > > > > > > > that change.  If we're not gathering entropy for the
> > > > > > > > device
> > > > > > > > now, we wouldn't have been before the switch, so the
> > > > > > > > entropy characteristics shouldn't have changed.
> > > > > > > 
> > > > > > > But it does, as I also wrote in that first email. The
> > > > > > > legacy
> > > > > > > queue flags had QUEUE_FLAG_ADD_RANDOM set by default, the
> > > > > > > MQ
> > > > > > > ones do not. Hence any non-sd device would previously
> > > > > > > ALWAYS
> > > > > > > have ADD_RANDOM set, now none of them do. Also see the
> > > > > > > patch
> > > > > > > I sent.
> > > > > > 
> > > > > > So your theory is that the disk in question never gets to
> > > > > > the
> > > > > > rotational check?  because the check will clear the flag if
> > > > > > it's non-rotational and set it if it's not, so the default
> > > > > > state of the flag shouldn't matter.
> > > > > 
> > > > > No, my point is about non-disks, devices that aren't driven
> > > > > by
> > > > > sd. The behavior for sd hasn't changed, as it sets/clears it
> > > > > unconditionally. 
> > > > 
> > > > I agree, but I don't think any of them were significant entropy
> > > > contributors before: things like nvme have always been outside
> > > > of
> > > > this and sr and st don't really contribute much to the seek
> > > > load
> > > > during boot because they're probed but not used by the boot
> > > > sequence, so I can't see how they would cause this
> > > > behaviour.  I
> > > > suppose it could be target probing, but even that seems
> > > > unlikely
> > > > because it should be dwarfed by the number of root disk reads
> > > > during boot.
> > > > 
> > > > For the rng to take an additional 5 minutes to initialize, we
> > > > must
> > > > have lost a significant entropy source somewhere.
> > > 
> > > I agree it's not a significant amount of entropy, but even just
> > > one
> > > bit could mean a long stall if that put us over the edge of just
> > > not
> > > having enough for whatever is blocking on /dev/random. Mikael's
> > > boot
> > > did have a CDROM, it's not impossible that the handful of
> > > commands we
> > > end up doing to that device would have contributed enough entropy
> > > to
> > > get the boot done without stalling for minutes.
> > > 
> > > One way to know for sure, and that's if Mikael tests the patch.
> > 
> > I think I've got the root cause.  I have one system in my test bed
> > exhibiting this behaviour.  It turns out the disk in it has no
> > characteristics VPD page.  The 0xB1 VPD was a SBC-3 addition, so
> > that's
> > not surprising.  However, the characteristics check bails before
> > setting the flags, so it takes the default flag which has flipped.
> > 
> > We can either fix this by setting the QUEUE_FLAG_ADD_RANDOM if
> > there's
> > no 0xB1 page or by setting the default as Jens proposed.
> 
> I'd recommend just doing my patch, since that'll be the same behavior
> that SCSI had before.

I've got the history now, it's this patch

Author: Xuewei Zhang 
Date:   Thu Sep 6 13:37:19 2018 -0700

scsi: sd: Contribute to randomness when running rotational device

It added the else branch to the if (rot == 1).  It's the position of
that else branch which is wrong because not all disks have a SBC-3
characteristics VPD page, so they're the ones under MQ which stop
contributing entropy.  Whichever patch we go with will need a fixes:
for this.

James

[PATCH] Input: st-keyscan - fix potential zalloc NULL dereference

2019-02-12 Thread gabriel.fernandez

From: Gabriel Fernandez 

This patch fixes the following static checker warning:

drivers/input/keyboard/st-keyscan.c:156 keyscan_probe()
error: potential zalloc NULL dereference: 'keypad_data->input_dev'

Reported-by: Dan Carpenter 
Signed-off-by: Gabriel Fernandez 
---
 drivers/input/keyboard/st-keyscan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/keyboard/st-keyscan.c 
b/drivers/input/keyboard/st-keyscan.c
index babcfb165e4f..3b85631fde91 100644
--- a/drivers/input/keyboard/st-keyscan.c
+++ b/drivers/input/keyboard/st-keyscan.c
@@ -153,6 +153,8 @@ static int keyscan_probe(struct platform_device *pdev)
 
input_dev->id.bustype = BUS_HOST;
 
+   keypad_data->input_dev = input_dev;
+
error = keypad_matrix_key_parse_dt(keypad_data);
if (error)
return error;
@@ -168,8 +170,6 @@ static int keyscan_probe(struct platform_device *pdev)
 
input_set_drvdata(input_dev, keypad_data);
 
-   keypad_data->input_dev = input_dev;
-
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
keypad_data->base = devm_ioremap_resource(>dev, res);
if (IS_ERR(keypad_data->base))
-- 
2.17.0

Re: use generic DMA mapping code in powerpc V4

2019-02-12 Thread Christoph Hellwig

On Tue, Feb 12, 2019 at 01:42:56PM +0100, Christian Zigotzky wrote:
> On 11 February 2019 at 08:38AM, Christoph Hellwig wrote:
>> On Sun, Feb 10, 2019 at 01:00:20PM +0100, Christian Zigotzky wrote:
>>> I tested the whole series today. The kernels boot and the P.A. Semi
>>> Ethernet works! :-) Thanks a lot!
>>>
>>> I also tested it in a virtual e5500 QEMU machine today. Unfortunately the
>>> kernel crashes.
>> This looks like a patch I fixed in mainline a while ago, but which
>> the powerpc tree didn't have yet.
>>
>> I've cherry picked this commit
>> ("swiotlb: clear io_tlb_start and io_tlb_end in swiotlb_exit")
>>
>> and added it to the powerpc-dma.6 tree, please retry with that one.
>>
> Hello Christoph,
>
> Have you added it to the powerpc-dma.6 tree yet? The last commit was 4 days 
> ago.

I added it, but forgot to push it out.  It is there now, sorry:

http://git.infradead.org/users/hch/misc.git/commitdiff/2cf0745b7420af4a3e871d5a970a45662dfae69c

[PATCH] mmc: sdhci-xenon: Mark expected switch fall-through

2019-02-12 Thread Gustavo A. R. Silva

In preparation to enabling -Wimplicit-fallthrough, mark switch
cases where we are expecting to fall through.

This patch fixes the following warning:

drivers/mmc/host/sdhci-xenon-phy.c: In function ‘xenon_emmc_phy_slow_mode’:
drivers/mmc/host/sdhci-xenon-phy.c:527:47: warning: this statement may fall 
through [-Wimplicit-fallthrough=]
   if ((priv->init_card_type == MMC_TYPE_SDIO) ||
   ^~
   params->slow_mode) {
   ~
drivers/mmc/host/sdhci-xenon-phy.c:534:2: note: here
  default:
  ^~~

Warning level 3 was used: -Wimplicit-fallthrough=3

Notice that, in this particular case, the code comment is modified
in accordance with what GCC is expecting to find.

This patch is part of the ongoing efforts to enable
-Wimplicit-fallthrough.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/mmc/host/sdhci-xenon-phy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-xenon-phy.c 
b/drivers/mmc/host/sdhci-xenon-phy.c
index 5b5eb53a63d2..8d07ee1b8f08 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -530,7 +530,7 @@ static bool xenon_emmc_phy_slow_mode(struct sdhci_host 
*host,
ret = true;
break;
}
-   /* else: fall through */
+   /* fall through */
default:
reg &= ~XENON_TIMING_ADJUST_SLOW_MODE;
ret = false;
-- 
2.20.1

Re: Oops in rpc_clnt_debugfs_register() from debugfs change

2019-02-12 Thread David Howells

Greg Kroah-Hartman  wrote:

> And, if you want my larger fix that I will be sending to netdev one of
> these days, here's that one.  It includes the above patch as part of it.

That works.

Tested-by: David Howells

Re: [5.0-rc5 regression] "scsi: kill off the legacy IO path" causes 5 minute delay during boot on Sun Blade 2500

2019-02-12 Thread Jens Axboe

On 2/12/19 8:24 AM, James Bottomley wrote:
> On Mon, 2019-02-11 at 19:50 -0700, Jens Axboe wrote:
>> On 2/11/19 7:13 PM, James Bottomley wrote:
>>> On Mon, 2019-02-11 at 09:31 -0700, Jens Axboe wrote:
 On 2/11/19 9:28 AM, James Bottomley wrote:
> On Mon, 2019-02-11 at 08:46 -0700, Jens Axboe wrote:
>> On 2/11/19 8:42 AM, James Bottomley wrote:
>>> On Mon, 2019-02-11 at 08:28 -0700, Jens Axboe wrote:
 On 2/11/19 8:25 AM, James Bottomley wrote:
> On Sun, 2019-02-10 at 09:35 -0700, Jens Axboe wrote:
>> On 2/10/19 9:25 AM, James Bottomley wrote:
>
> [...]
>>> That check wasn't changed by the code removal.
>>
>> As I said above, for sd. This isn't true for non-
>> disks.
>
> Yes, but the behaviour above doesn't change across a
> switch
> to MQ, so I don't quite understand how it bisects back
> to
> that change.  If we're not gathering entropy for the
> device
> now, we wouldn't have been before the switch, so the
> entropy characteristics shouldn't have changed.

 But it does, as I also wrote in that first email. The
 legacy
 queue flags had QUEUE_FLAG_ADD_RANDOM set by default, the
 MQ
 ones do not. Hence any non-sd device would previously
 ALWAYS
 have ADD_RANDOM set, now none of them do. Also see the
 patch
 I sent.
>>>
>>> So your theory is that the disk in question never gets to
>>> the
>>> rotational check?  because the check will clear the flag if
>>> it's non-rotational and set it if it's not, so the default
>>> state of the flag shouldn't matter.
>>
>> No, my point is about non-disks, devices that aren't driven
>> by
>> sd. The behavior for sd hasn't changed, as it sets/clears it
>> unconditionally. 
>
> I agree, but I don't think any of them were significant entropy
> contributors before: things like nvme have always been outside
> of
> this and sr and st don't really contribute much to the seek
> load
> during boot because they're probed but not used by the boot
> sequence, so I can't see how they would cause this
> behaviour.  I
> suppose it could be target probing, but even that seems
> unlikely
> because it should be dwarfed by the number of root disk reads
> during boot.
>
> For the rng to take an additional 5 minutes to initialize, we
> must
> have lost a significant entropy source somewhere.

 I agree it's not a significant amount of entropy, but even just
 one
 bit could mean a long stall if that put us over the edge of just
 not
 having enough for whatever is blocking on /dev/random. Mikael's
 boot
 did have a CDROM, it's not impossible that the handful of
 commands we
 end up doing to that device would have contributed enough entropy
 to
 get the boot done without stalling for minutes.

 One way to know for sure, and that's if Mikael tests the patch.
>>>
>>> I think I've got the root cause.  I have one system in my test bed
>>> exhibiting this behaviour.  It turns out the disk in it has no
>>> characteristics VPD page.  The 0xB1 VPD was a SBC-3 addition, so
>>> that's
>>> not surprising.  However, the characteristics check bails before
>>> setting the flags, so it takes the default flag which has flipped.
>>>
>>> We can either fix this by setting the QUEUE_FLAG_ADD_RANDOM if
>>> there's
>>> no 0xB1 page or by setting the default as Jens proposed.
>>
>> I'd recommend just doing my patch, since that'll be the same behavior
>> that SCSI had before.
> 
> I've got the history now, it's this patch
> 
> Author: Xuewei Zhang 
> Date:   Thu Sep 6 13:37:19 2018 -0700
> 
> scsi: sd: Contribute to randomness when running rotational device
> 
> It added the else branch to the if (rot == 1).  It's the position of
> that else branch which is wrong because not all disks have a SBC-3
> characteristics VPD page, so they're the ones under MQ which stop
> contributing entropy.  Whichever patch we go with will need a fixes:
> for this.

Ah, makes sense. I'd say we're _probably_ fine just fixing that then,
or at least it should be two separate patches.

-- 
Jens Axboe

Re: [PATCH v2 4/4] arm64: kprobes: Use arch_populate_kprobe_blacklist()

2019-02-12 Thread Masami Hiramatsu

On Mon, 11 Feb 2019 16:05:17 +
Marc Zyngier  wrote:

> On 11/02/2019 15:58, Will Deacon wrote:
> > [+Marc]
> > 
> > On Mon, Feb 11, 2019 at 10:10:23PM +0900, Masami Hiramatsu wrote:
> >> On Fri, 8 Feb 2019 09:15:19 +
> >> Will Deacon  wrote:
> >>> Did you send a new version of this series? I can't seem to spot it in my
> >>> inbox.
> >>
> >> Ah, OK. I just waited for James' patch series,
> >>
> >> https://patchwork.kernel.org/cover/10779489/
> >>
> >> Are those merged? I'd like to move this series on that.
> > 
> > Patches 2-4 are in mainline:
> > 
> > f7daa9c8fd19 arm64: hibernate: Clean the __hyp_text to PoC after resume
> > 8fac5cbdfe0f arm64: hyp-stub: Forbid kprobing of the hyp-stub
> > f2b3d8566d81 arm64: kprobe: Always blacklist the KVM world-switch code
> > 
> > Patch 1 is queued via kvm-arm (also for 5.0) but it doesn't seem to have
> > landed yet.
> 
> It was part of the pull request sent on Thursday[1], but Paolo hasn't
> pulled it yet.
> 
> Hopefully soon...

OK, then I'll send updated series since Patch1 is independent from
this series.

Thank you,

-- 
Masami Hiramatsu

Re: [PATCH] Input: st-keyscan - fix potential zalloc NULL dereference

2019-02-12 Thread Gabriel FERNANDEZ

Sorry ignore this patch (bad mailing list)

Best Regard

Gabriel

On 2/12/19 4:24 PM, gabriel.fernan...@st.com wrote:
> From: Gabriel Fernandez 
>
> This patch fixes the following static checker warning:
>
> drivers/input/keyboard/st-keyscan.c:156 keyscan_probe()
> error: potential zalloc NULL dereference: 'keypad_data->input_dev'
>
> Reported-by: Dan Carpenter 
> Signed-off-by: Gabriel Fernandez 
> ---
>   drivers/input/keyboard/st-keyscan.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/input/keyboard/st-keyscan.c 
> b/drivers/input/keyboard/st-keyscan.c
> index babcfb165e4f..3b85631fde91 100644
> --- a/drivers/input/keyboard/st-keyscan.c
> +++ b/drivers/input/keyboard/st-keyscan.c
> @@ -153,6 +153,8 @@ static int keyscan_probe(struct platform_device *pdev)
>   
>   input_dev->id.bustype = BUS_HOST;
>   
> + keypad_data->input_dev = input_dev;
> +
>   error = keypad_matrix_key_parse_dt(keypad_data);
>   if (error)
>   return error;
> @@ -168,8 +170,6 @@ static int keyscan_probe(struct platform_device *pdev)
>   
>   input_set_drvdata(input_dev, keypad_data);
>   
> - keypad_data->input_dev = input_dev;
> -
>   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>   keypad_data->base = devm_ioremap_resource(>dev, res);
>   if (IS_ERR(keypad_data->base))

Re: [PATCH 1/3] clocksource: timer-ti-dm: Fix pwm dmtimer usage of fck reparenting

2019-02-12 Thread Tony Lindgren

Hi,

* Daniel Lezcano  [190212 09:08]:
> 
> Do you want me to take it through my tree (1 et 3)?

No need to thanks, I already sent a pull request on them
as "[GIT PULL] omap soc regression fixes for v5.0-rc cycle"
with you in Cc.

Regards,

Tony

[PATCH] scsi: lpfc: fix a handful of indentation issues

2019-02-12 Thread Colin King

From: Colin Ian King 

There are a handful of statements that are indented incorrectly. Fix these.

Signed-off-by: Colin Ian King 
---
 drivers/scsi/lpfc/lpfc_bsg.c | 4 ++--
 drivers/scsi/lpfc/lpfc_debugfs.c | 4 ++--
 drivers/scsi/lpfc/lpfc_init.c| 2 +-
 drivers/scsi/lpfc/lpfc_mbox.c| 4 ++--
 drivers/scsi/lpfc/lpfc_sli.c | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 2dc564e59430..f2494d3b365c 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2947,7 +2947,7 @@ static int lpfcdiag_loop_post_rxbufs(struct lpfc_hba 
*phba, uint16_t rxxri,
cmd->un.cont64[i].addrLow = putPaddrLow(mp[i]->phys);
cmd->un.cont64[i].tus.f.bdeSize =
((struct lpfc_dmabufext *)mp[i])->size;
-   cmd->ulpBdeCount = ++i;
+   cmd->ulpBdeCount = ++i;
 
if ((--num_bde > 0) && (i < 2))
continue;
@@ -4682,7 +4682,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job 
*job,
 * Don't allow mailbox commands to be sent when blocked or when in
 * the middle of discovery
 */
-if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) {
+   if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) {
rc = -EAGAIN;
goto job_done;
}
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index ee98ea1c68f9..1215eaa530db 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1833,7 +1833,7 @@ lpfc_debugfs_disc_trc_open(struct inode *inode, struct 
file *file)
int rc = -ENOMEM;
 
if (!lpfc_debugfs_max_disc_trc) {
-rc = -ENOSPC;
+   rc = -ENOSPC;
goto out;
}
 
@@ -1883,7 +1883,7 @@ lpfc_debugfs_slow_ring_trc_open(struct inode *inode, 
struct file *file)
int rc = -ENOMEM;
 
if (!lpfc_debugfs_max_slow_ring_trc) {
-rc = -ENOSPC;
+   rc = -ENOSPC;
goto out;
}
 
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 900edae7ccc4..18424020ae40 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -10118,7 +10118,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
dev_printk(KERN_ERR, >dev,
"ioremap failed for SLI4 PCI config "
"registers.\n");
-   goto out;
+   goto out;
}
lpfc_sli4_bar0_register_memmap(phba, if_type);
}
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 4d3b94317515..8abe933bad09 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -2095,8 +2095,8 @@ lpfc_request_features(struct lpfc_hba *phba, struct 
lpfcMboxq *mboxq)
if (phba->nvmet_support) {
bf_set(lpfc_mbx_rq_ftr_rq_mrqp, >u.mqe.un.req_ftrs, 1);
/* iaab/iaar NOT set for now */
-bf_set(lpfc_mbx_rq_ftr_rq_iaab, >u.mqe.un.req_ftrs, 0);
-bf_set(lpfc_mbx_rq_ftr_rq_iaar, >u.mqe.un.req_ftrs, 0);
+   bf_set(lpfc_mbx_rq_ftr_rq_iaab, >u.mqe.un.req_ftrs, 0);
+   bf_set(lpfc_mbx_rq_ftr_rq_iaar, >u.mqe.un.req_ftrs, 0);
}
return;
 }
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 3596822605c3..d0817facdae3 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1008,7 +1008,7 @@ lpfc_test_rrq_active(struct lpfc_hba *phba, struct 
lpfc_nodelist *ndlp,
if (!ndlp->active_rrqs_xri_bitmap)
return 0;
if (test_bit(xritag, ndlp->active_rrqs_xri_bitmap))
-   return 1;
+   return 1;
else
return 0;
 }
-- 
2.20.1

[PATCH] mtd: lpddr_cmds: Mark expected switch fall-through

2019-02-12 Thread Gustavo A. R. Silva

In preparation to enabling -Wimplicit-fallthrough, mark switch
cases where we are expecting to fall through.

This patch fixes the following warning:

drivers/mtd/lpddr/lpddr_cmds.c: In function ‘chip_ready’:
drivers/mtd/lpddr/lpddr_cmds.c:319:6: warning: this statement may fall through 
[-Wimplicit-fallthrough=]
   if (mode == FL_READY && chip->oldstate == FL_READY)
  ^
drivers/mtd/lpddr/lpddr_cmds.c:322:2: note: here
  default:
  ^~~

Warning level 3 was used: -Wimplicit-fallthrough=3

This patch is part of the ongoing efforts to enable
-Wimplicit-fallthrough.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/mtd/lpddr/lpddr_cmds.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/lpddr/lpddr_cmds.c b/drivers/mtd/lpddr/lpddr_cmds.c
index b13557fe52bd..76a4c73e100e 100644
--- a/drivers/mtd/lpddr/lpddr_cmds.c
+++ b/drivers/mtd/lpddr/lpddr_cmds.c
@@ -318,6 +318,7 @@ static int chip_ready(struct map_info *map, struct flchip 
*chip, int mode)
/* Only if there's no operation suspended... */
if (mode == FL_READY && chip->oldstate == FL_READY)
return 0;
+   /* fall through */
 
default:
 sleep:
-- 
2.20.1

[PATCH v1] Input: st-keyscan - fix potential zalloc NULL dereference

2019-02-12 Thread gabriel.fernandez

From: Gabriel Fernandez 

This patch fixes the following static checker warning:

drivers/input/keyboard/st-keyscan.c:156 keyscan_probe()
error: potential zalloc NULL dereference: 'keypad_data->input_dev'

Reported-by: Dan Carpenter 
Signed-off-by: Gabriel Fernandez 
---
 drivers/input/keyboard/st-keyscan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/keyboard/st-keyscan.c 
b/drivers/input/keyboard/st-keyscan.c
index babcfb165e4f..3b85631fde91 100644
--- a/drivers/input/keyboard/st-keyscan.c
+++ b/drivers/input/keyboard/st-keyscan.c
@@ -153,6 +153,8 @@ static int keyscan_probe(struct platform_device *pdev)
 
input_dev->id.bustype = BUS_HOST;
 
+   keypad_data->input_dev = input_dev;
+
error = keypad_matrix_key_parse_dt(keypad_data);
if (error)
return error;
@@ -168,8 +170,6 @@ static int keyscan_probe(struct platform_device *pdev)
 
input_set_drvdata(input_dev, keypad_data);
 
-   keypad_data->input_dev = input_dev;
-
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
keypad_data->base = devm_ioremap_resource(>dev, res);
if (IS_ERR(keypad_data->base))
-- 
2.17.0

Re: [RFC PATCH v1 2/3] regulator: add regulator_desc_list_voltage_linear_range

2019-02-12 Thread Mark Brown

On Tue, Feb 12, 2019 at 04:18:46PM +0200, Matti Vaittinen wrote:

> Add regulator_desc_list_voltage_linear_range which can be used
> by drivers for getting the voltages before regulator is registered.
> This may be useful for drivers which need to fetch the voltage
> selectors at device-tree parsing callback.

This seems fine.


signature.asc
Description: PGP signature

Re: [PATCH] mtd: cfi_cmdset_0002: Mark expected switch fall-throughs

2019-02-12 Thread Gustavo A. R. Silva




On 2/9/19 2:29 AM, Tokunori Ikegami wrote:
> Reviewed-by: Tokunori Ikegami 
> 

Thanks, Tokunori.

--
Gustavo

>> -Original Message-
>> From: linux-mtd [mailto:linux-mtd-boun...@lists.infradead.org] On Behalf
>> Of Gustavo A. R. Silva
>> Sent: Saturday, February 9, 2019 3:06 AM
>> To: David Woodhouse; Brian Norris; Boris Brezillon; Marek Vasut; Richard
>> Weinberger
>> Cc: linux-...@lists.infradead.org; linux-kernel@vger.kernel.org; Gustavo
>> A. R. Silva
>> Subject: [PATCH] mtd: cfi_cmdset_0002: Mark expected switch fall-throughs
>>
>> In preparation to enabling -Wimplicit-fallthrough, mark switch
>> cases where we are expecting to fall through.
>>
>> This patch fixes the following warnings:
>>
>> drivers/mtd/chips/cfi_cmdset_0002.c: In function ‘get_chip’:
>> drivers/mtd/chips/cfi_cmdset_0002.c:870:6: warning: this statement may
>> fall through [-Wimplicit-fallthrough=]
>>if (mode == FL_READY && chip->oldstate == FL_READY)
>>   ^
>> drivers/mtd/chips/cfi_cmdset_0002.c:873:2: note: here
>>   default:
>>   ^~~
>> drivers/mtd/chips/cfi_cmdset_0002.c: In function ‘cfi_amdstd_sync’:
>> drivers/mtd/chips/cfi_cmdset_0002.c:2745:16: warning: this statement may
>> fall through [-Wimplicit-fallthrough=]
>> chip->state = FL_SYNCING;
>> ^~~~
>> drivers/mtd/chips/cfi_cmdset_0002.c:2750:3: note: here
>>case FL_SYNCING:
>>^~~~
>>
>> Warning level 3 was used: -Wimplicit-fallthrough=3
>>
>> This patch is part of the ongoing efforts to enabling
>> -Wimplicit-fallthrough.
>>
>> Signed-off-by: Gustavo A. R. Silva 
>> ---
>>  drivers/mtd/chips/cfi_cmdset_0002.c | 2 ++
>>  1 file changed, 2 insertions(+)
>>
>> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c
>> b/drivers/mtd/chips/cfi_cmdset_0002.c
>> index 72428b6bfc47..0b0a1874d3bf 100644
>> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
>> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
>> @@ -869,6 +869,7 @@ static int get_chip(struct map_info *map, struct flchip
>> *chip, unsigned long adr
>>  /* Only if there's no operation suspended... */
>>  if (mode == FL_READY && chip->oldstate == FL_READY)
>>  return 0;
>> +/* fall through */
>>
>>  default:
>>  sleep:
>> @@ -2747,6 +2748,7 @@ static void cfi_amdstd_sync (struct mtd_info *mtd)
>>   * as the whole point is that nobody can do anything
>>   * with the chip now anyway.
>>   */
>> +/* fall through */
>>  case FL_SYNCING:
>>  mutex_unlock(>mutex);
>>  break;
>> --
>> 2.20.1
>>
>>
>> __
>> Linux MTD discussion mailing list
>> http://lists.infradead.org/mailman/listinfo/linux-mtd/
>

[PATCH 2/2] ARC: enable uboot support unconditionally

2019-02-12 Thread Eugeniy Paltsev

After reworking U-boot args handling code and adding paranoid
arguments check we can eliminate CONFIG_ARC_UBOOT_SUPPORT and
enable uboot support unconditionally.

For JTAG case we can assume that core registers will come up
reset value of 0 or in worst case we rely on user passing
'-on=clear_regs' to Metaware debugger.

Signed-off-by: Eugeniy Paltsev 
---
 arch/arc/Kconfig| 12 
 arch/arc/configs/nps_defconfig  |  1 -
 arch/arc/configs/vdk_hs38_defconfig |  1 -
 arch/arc/configs/vdk_hs38_smp_defconfig |  2 --
 arch/arc/kernel/head.S  |  2 --
 arch/arc/kernel/setup.c |  2 --
 6 files changed, 20 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 376366a7db81..f9534417b201 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -191,7 +191,6 @@ config NR_CPUS
 
 config ARC_SMP_HALT_ON_RESET
bool "Enable Halt-on-reset boot mode"
-   default y if ARC_UBOOT_SUPPORT
help
  In SMP configuration cores can be configured as Halt-on-reset
  or they could all start at same time. For Halt-on-reset, non
@@ -515,17 +514,6 @@ config ARC_DBG_TLB_PARANOIA
 
 endif
 
-config ARC_UBOOT_SUPPORT
-   bool "Support uboot arg Handling"
-   help
- ARC Linux by default checks for uboot provided args as pointers to
- external cmdline or DTB. This however breaks in absence of uboot,
- when booting from Metaware debugger directly, as the registers are
- not zeroed out on reset by mdb and/or ARCv2 based cores. The bogus
- registers look like uboot args to kernel which then chokes.
- So only enable the uboot arg checking/processing if users are sure
- of uboot being in play.
-
 config ARC_BUILTIN_DTB_NAME
string "Built in DTB"
help
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
index 6e84060e7c90..621f59407d76 100644
--- a/arch/arc/configs/nps_defconfig
+++ b/arch/arc/configs/nps_defconfig
@@ -31,7 +31,6 @@ CONFIG_ARC_CACHE_LINE_SHIFT=5
 # CONFIG_ARC_HAS_LLSC is not set
 CONFIG_ARC_KVADDR_SIZE=402
 CONFIG_ARC_EMUL_UNALIGNED=y
-CONFIG_ARC_UBOOT_SUPPORT=y
 CONFIG_PREEMPT=y
 CONFIG_NET=y
 CONFIG_UNIX=y
diff --git a/arch/arc/configs/vdk_hs38_defconfig 
b/arch/arc/configs/vdk_hs38_defconfig
index 1e59a2e9c602..e447ace6fa1c 100644
--- a/arch/arc/configs/vdk_hs38_defconfig
+++ b/arch/arc/configs/vdk_hs38_defconfig
@@ -13,7 +13,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
-CONFIG_ARC_UBOOT_SUPPORT=y
 CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38"
 CONFIG_PREEMPT=y
 CONFIG_NET=y
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig 
b/arch/arc/configs/vdk_hs38_smp_defconfig
index b5c3f6c54b03..c82cdb10aaf4 100644
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -15,8 +15,6 @@ CONFIG_AXS103=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
 # CONFIG_ARC_TIMERS_64BIT is not set
-# CONFIG_ARC_SMP_HALT_ON_RESET is not set
-CONFIG_ARC_UBOOT_SUPPORT=y
 CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp"
 CONFIG_PREEMPT=y
 CONFIG_NET=y
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index fccea361e896..4b0deaff001c 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -90,7 +90,6 @@ ENTRY(stext)
st.ab   0, [r5, 4]
 1:
 
-#ifdef CONFIG_ARC_UBOOT_SUPPORT
; Uboot - kernel ABI
;r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
;r1 = magic number (always zero as of now)
@@ -99,7 +98,6 @@ ENTRY(stext)
st  r0, [@uboot_tag]
st  r1, [@uboot_magic]
st  r2, [@uboot_arg]
-#endif
 
; setup "current" tsk and optionally cache it in dedicated r25
mov r9, @init_task
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 84d394a37e79..fff946b0ab4f 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -490,7 +490,6 @@ void __init handle_uboot_args(void)
bool use_embedded_dtb = true;
bool append_cmdline = false;
 
-#ifdef CONFIG_ARC_UBOOT_SUPPORT
/* check that we know this tag */
if (uboot_tag != UBOOT_TAG_NONE &&
uboot_tag != UBOOT_TAG_CMDLINE &&
@@ -521,7 +520,6 @@ void __init handle_uboot_args(void)
append_cmdline = true;
 
 ignore_uboot_args:
-#endif
 
if (use_embedded_dtb) {
machine_desc = setup_machine_fdt(__dtb_start);
-- 
2.14.5

[PATCH 0/2] RC: rework U-boot arguments handling

2019-02-12 Thread Eugeniy Paltsev

Reworking U-boot args handling and enable uboot support
unconditionally.

Changes RFC->v1:
 * Don't add new ABI contract between kernel and uboot
 * Eliminate CONFIG_ARC_UBOOT_SUPPORT Kconfig option and
   enable uboot support unconditionally
 * Skip invalid U-boot args instead of panic
 * Check existing U-boot magic value
 * Improve uboot_arg validating
 * Minor code changes

Eugeniy Paltsev (2):
  ARC: U-boot: check arguments paranoidly
  ARC: enable uboot support unconditionally

 arch/arc/Kconfig| 12 -
 arch/arc/configs/nps_defconfig  |  1 -
 arch/arc/configs/vdk_hs38_defconfig |  1 -
 arch/arc/configs/vdk_hs38_smp_defconfig |  2 -
 arch/arc/kernel/head.S  |  7 ++-
 arch/arc/kernel/setup.c | 96 +++--
 6 files changed, 70 insertions(+), 49 deletions(-)

-- 
2.14.5

[PATCH 1/2] ARC: U-boot: check arguments paranoidly

2019-02-12 Thread Eugeniy Paltsev

Handle U-boot arguments paranoidly:
 * don't allow to pass unknown tag.
 * try to use external device tree blob only if corresponding tag
   (TAG_DTB) is set.
 * check that magic number is correct.
 * don't check uboot_tag if kernel build with no ARC_UBOOT_SUPPORT.

NOTE:
If U-boot args are invalid we skip them and try to use embedded device
tree blob. We can't panic on invalid U-boot args as we really pass
invalid args due to bug in U-boot code.
This happens if we don't provide external DTB to U-boot and
don't set 'bootargs' U-boot environment variable (which is default
case at least for HSDK board) In that case we will pass
{r0 = 1 (bootargs in r2); r1 = 0; r2 = 0;} to linux which is invalid.

NOTE:
We can safely check U-boot magic value (0x0) in linux passed via
r1 register as U-boot pass it from the beginning.

While I'm at it refactor U-boot arguments handling code.

Signed-off-by: Eugeniy Paltsev 
---
 arch/arc/kernel/head.S  |  5 +--
 arch/arc/kernel/setup.c | 92 +++--
 2 files changed, 69 insertions(+), 28 deletions(-)

diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 8b90d25a15cc..fccea361e896 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -93,10 +93,11 @@ ENTRY(stext)
 #ifdef CONFIG_ARC_UBOOT_SUPPORT
; Uboot - kernel ABI
;r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
-   ;r1 = magic number (board identity, unused as of now
+   ;r1 = magic number (always zero as of now)
;r2 = pointer to uboot provided cmdline or external DTB in mem
-   ; These are handled later in setup_arch()
+   ; These are handled later in handle_uboot_args()
st  r0, [@uboot_tag]
+   st  r1, [@uboot_magic]
st  r2, [@uboot_arg]
 #endif
 
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index feb90093e6b1..84d394a37e79 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -36,7 +36,8 @@ unsigned int intr_to_DE_cnt;
 
 /* Part of U-boot ABI: see head.S */
 int __initdata uboot_tag;
-char __initdata *uboot_arg;
+int __initdata uboot_magic;
+unsigned int __initdata uboot_arg;
 
 const struct machine_desc *machine_desc;
 
@@ -462,43 +463,82 @@ void setup_processor(void)
arc_chk_core_config();
 }
 
-static inline int is_kernel(unsigned long addr)
+static inline bool uboot_arg_invalid(unsigned int addr)
 {
-   if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
-   return 1;
-   return 0;
+   /*
+* Check that it is a untranslated address (although MMU is not enabled
+* yet, it being a high address ensures this is not by fluke)
+*/
+   if (addr < PAGE_OFFSET)
+   return true;
+
+   /* Check that address doesn't clobber resident kernel image */
+   return addr >= (unsigned int)_stext && addr <= (unsigned int)_end;
 }
 
-void __init setup_arch(char **cmdline_p)
+#define IGNORE_ARGS"Ignore U-boot args: "
+
+/* uboot_{tag, magic} values for U-boot - kernel ABI revision 0; see head.S */
+#define UBOOT_TAG_NONE 0
+#define UBOOT_TAG_CMDLINE  1
+#define UBOOT_TAG_DTB  2
+/* We always pass 0 as magic from U-boot */
+#define UBOOT_MAGIC_VAL0
+
+void __init handle_uboot_args(void)
 {
+   bool use_embedded_dtb = true;
+   bool append_cmdline = false;
+
 #ifdef CONFIG_ARC_UBOOT_SUPPORT
-   /* make sure that uboot passed pointer to cmdline/dtb is valid */
-   if (uboot_tag && is_kernel((unsigned long)uboot_arg))
-   panic("Invalid uboot arg\n");
+   /* check that we know this tag */
+   if (uboot_tag != UBOOT_TAG_NONE &&
+   uboot_tag != UBOOT_TAG_CMDLINE &&
+   uboot_tag != UBOOT_TAG_DTB) {
+   pr_warn(IGNORE_ARGS "invalid uboot tag: '%08x'\n", uboot_tag);
+   goto ignore_uboot_args;
+   }
+
+   if (uboot_magic != UBOOT_MAGIC_VAL) {
+   pr_warn(IGNORE_ARGS "non zero uboot magic\n");
+   goto ignore_uboot_args;
+   }
+
+   if (uboot_tag != UBOOT_TAG_NONE && uboot_arg_invalid(uboot_arg)) {
+   pr_warn(IGNORE_ARGS "invalid uboot arg: '%08x'\n", uboot_arg);
+   goto ignore_uboot_args;
+   }
+
+   /* see if U-boot passed an external Device Tree blob */
+   if (uboot_tag == UBOOT_TAG_DTB) {
+   machine_desc = setup_machine_fdt((void *)uboot_arg);
+
+   /* external Device Tree blob is invalid - use embedded one */
+   use_embedded_dtb = !machine_desc;
+   }
+
+   if (uboot_tag == UBOOT_TAG_CMDLINE)
+   append_cmdline = true;
 
-   /* See if u-boot passed an external Device Tree blob */
-   machine_desc = setup_machine_fdt(uboot_arg);/* uboot_tag == 2 */
-   if (!machine_desc)
+ignore_uboot_args:
 #endif
-   {
-   /* No, so try the embedded one */
+
+   if

Re: [PATCH 4.20 000/352] 4.20.8-stable review

2019-02-12 Thread shuah


On 2/11/19 7:13 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.20.8 release.
There are 352 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Wed Feb 13 14:17:03 UTC 2019.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:

https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.20.8-rc1.gz
or in the git tree and branch at:

git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-4.20.y
and the diffstat can be found below.

thanks,

greg k-h



Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 4.19 000/313] 4.19.21-stable review

2019-02-12 Thread shuah


On 2/11/19 7:14 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.19.21 release.
There are 313 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Wed Feb 13 14:17:25 UTC 2019.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:

https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.21-rc1.gz
or in the git tree and branch at:

git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-4.19.y
and the diffstat can be found below.

thanks,

greg k-h



Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 2/2] chardev: showing minor range for chardev in the output of /proc/devices

2019-02-12 Thread cgxu519


On 2/12/19 11:20 PM, Greg KH wrote:

On Tue, Feb 12, 2019 at 11:18:22PM +0800, cgxu519 wrote:

On 2/12/19 5:02 PM, Greg KH wrote:

On Tue, Feb 12, 2019 at 04:47:39PM +0800, Chengguang Xu wrote:

Currently chardev allows to share major, showing
major with minor range for chardev will be more
helpful.

Signed-off-by: Chengguang Xu 
---
   fs/char_dev.c | 4 +++-
   1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/char_dev.c b/fs/char_dev.c
index b25b1da097d5..6f00acdeb308 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -55,7 +55,9 @@ void chrdev_show(struct seq_file *f, off_t offset)
mutex_lock(_lock);
for (cd = chrdevs[major_to_index(offset)]; cd; cd = cd->next) {
if (cd->major == offset)
-   seq_printf(f, "%3d %s\n", cd->major, cd->name);
+   seq_printf(f, "%3d %s (%u-%u)\n", cd->major, cd->name,
+  cd->baseminor,
+  cd->baseminor + cd->minorct - 1);

You are changing the format of a userspace file, what tools are going to
break when you do this?

I'll remove this part in V2. Do you have any idea how to get the minor
range info for particular major? Or adding a similar file to somewhere
under /sys is acceptable?

Why do you need to know the minor range?  What can userspace do with
this that actually matters?


Assume that when we try to load a driver module and fail with -EBUSY
because of minor range overlapping, then what can we do for this case?
we even don't know what range has occupied and what range is available.

Also, I think we can obviously notice range overlapping bugs by showing
all registered minor ranges.

Thanks,
Chengguang.

Re: [PATCH 4.14 000/205] 4.14.99-stable review

2019-02-12 Thread shuah


On 2/11/19 7:16 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.14.99 release.
There are 205 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Wed Feb 13 14:17:19 UTC 2019.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:

https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.99-rc1.gz
or in the git tree and branch at:

git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-4.14.y
and the diffstat can be found below.

thanks,

greg k-h



Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 4.9 000/137] 4.9.156-stable review

2019-02-12 Thread shuah


On 2/11/19 7:18 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.9.156 release.
There are 137 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Wed Feb 13 14:17:22 UTC 2019.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:

https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.156-rc1.gz
or in the git tree and branch at:

git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-4.9.y
and the diffstat can be found below.

thanks,

greg k-h



Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

[PATCH v3 1/4] arm64: kprobes: Move extable address check into arch_prepare_kprobe()

2019-02-12 Thread Masami Hiramatsu

Move extable address check into arch_prepare_kprobe() from
arch_within_kprobe_blacklist().
The blacklist is exposed via debugfs as a list of symbols.
The extable entries are smaller, so must be filtered out
by arch_prepare_kprobe().

Signed-off-by: Masami Hiramatsu 
Reviewed-by: James Morse 
---
 Update in v2:
  - Update commit message.
  - Add Reviewed-by from James.
---
 arch/arm64/kernel/probes/kprobes.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/probes/kprobes.c 
b/arch/arm64/kernel/probes/kprobes.c
index f17afb99890c..9989ec9baa11 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -102,6 +102,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 
if (in_exception_text(probe_addr))
return -EINVAL;
+
+   if (search_exception_tables(probe_addr))
+   return -EINVAL;
+
if (probe_addr >= (unsigned long) __start_rodata &&
probe_addr <= (unsigned long) __end_rodata)
return -EINVAL;
@@ -479,8 +483,7 @@ bool arch_within_kprobe_blacklist(unsigned long addr)
(addr >= (unsigned long)__idmap_text_start &&
addr < (unsigned long)__idmap_text_end) ||
(addr >= (unsigned long)__hyp_text_start &&
-   addr < (unsigned long)__hyp_text_end) ||
-   !!search_exception_tables(addr))
+   addr < (unsigned long)__hyp_text_end))
return true;
 
if (!is_kernel_in_hyp_mode()) {

[PATCH v3 0/4] arm64: kprobes: Update blacklist checking on arm64

2019-02-12 Thread Masami Hiramatsu

Hello,

Here is the v3 series of update of the kprobe blacklist
checking on arm64.

I found that some blacklist checking code were mis-placed in
arch_prepare_kprobe() and arch_within_kprobe_blacklist().
Since the blacklist just filters by symbol, smaller than the
symbol, like extable must be checked in arch_prepare_kprobe().
Also, all function (symbol) level check must be done by blacklist.

For arm64, it checks the extable entry address in blacklist
and exception/irqentry function in arch_prepare_kprobe().
And, RODATA check is unneeded since kernel/kprobes.c
already ensures the probe address is in kernel-text area.

In v3, I rebased on the latest arm64 kernel which includes
James' KVM/HYP fixes for kprobes, and fix a reported bugs
in [4/4].

Changes in v3:
 - [4/4] Fixes to remove redundant blacklist of kprobe_text
   and add blacklist on exception_text.

Thank you,

---

Masami Hiramatsu (4):
  arm64: kprobes: Move extable address check into arch_prepare_kprobe()
  arm64: kprobes: Remove unneeded RODATA check
  arm64: kprobes: Move exception_text check in blacklist
  arm64: kprobes: Use arch_populate_kprobe_blacklist()


 arch/arm64/kernel/probes/kprobes.c |   52 +++-
 1 file changed, 27 insertions(+), 25 deletions(-)

--
Masami Hiramatsu (Linaro)

[PATCH v3 2/4] arm64: kprobes: Remove unneeded RODATA check

2019-02-12 Thread Masami Hiramatsu

Remove unneeded RODATA check from arch_prepare_kprobe().

Since check_kprobe_address_safe() already ensured that
the probe address is in kernel text, we don't need to
check whether the address in RODATA or not. That must
be always false.

Signed-off-by: Masami Hiramatsu 
---
 arch/arm64/kernel/probes/kprobes.c |6 --
 1 file changed, 6 deletions(-)

diff --git a/arch/arm64/kernel/probes/kprobes.c 
b/arch/arm64/kernel/probes/kprobes.c
index 9989ec9baa11..bd06b4b13fa9 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -91,8 +91,6 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, 
struct pt_regs *regs)
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
unsigned long probe_addr = (unsigned long)p->addr;
-   extern char __start_rodata[];
-   extern char __end_rodata[];
 
if (probe_addr & 0x3)
return -EINVAL;
@@ -106,10 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
if (search_exception_tables(probe_addr))
return -EINVAL;
 
-   if (probe_addr >= (unsigned long) __start_rodata &&
-   probe_addr <= (unsigned long) __end_rodata)
-   return -EINVAL;
-
/* decode instruction */
switch (arm_kprobe_decode_insn(p->addr, >ainsn)) {
case INSN_REJECTED: /* insn not supported */

[PATCH v3 3/4] arm64: kprobes: Move exception_text check in blacklist

2019-02-12 Thread Masami Hiramatsu

Move exception/irqentry text address check in blacklist,
since those are symbol based rejection.

If we prohibit probing on the symbols in exception_text,
those should be blacklisted.

Signed-off-by: Masami Hiramatsu 
---
 arch/arm64/kernel/probes/kprobes.c |6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/probes/kprobes.c 
b/arch/arm64/kernel/probes/kprobes.c
index bd06b4b13fa9..194262fca5cd 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -98,9 +98,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
/* copy instruction */
p->opcode = le32_to_cpu(*p->addr);
 
-   if (in_exception_text(probe_addr))
-   return -EINVAL;
-
if (search_exception_tables(probe_addr))
return -EINVAL;
 
@@ -477,7 +474,8 @@ bool arch_within_kprobe_blacklist(unsigned long addr)
(addr >= (unsigned long)__idmap_text_start &&
addr < (unsigned long)__idmap_text_end) ||
(addr >= (unsigned long)__hyp_text_start &&
-   addr < (unsigned long)__hyp_text_end))
+   addr < (unsigned long)__hyp_text_end) ||
+   in_exception_text(addr))
return true;
 
if (!is_kernel_in_hyp_mode()) {

[PATCH v3 4/4] arm64: kprobes: Use arch_populate_kprobe_blacklist()

2019-02-12 Thread Masami Hiramatsu

Use arch_populate_kprobe_blacklist() instead of
arch_within_kprobe_blacklist() so that we can see the full
blacklisted symbols under the debugfs.

Signed-off-by: Masami Hiramatsu 
---
  Changes in v3
   - Do not populate blacklist in __kprobe_text in
 arch_populate_kprobe_blacklist(), since it is already
 populated in populate_kprobe_blacklist().
   - Add exception entry text blacklist since those are rejected
 by in_exception_text().
---
 arch/arm64/kernel/probes/kprobes.c |   45 +---
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/kernel/probes/kprobes.c 
b/arch/arm64/kernel/probes/kprobes.c
index 194262fca5cd..37d913f33a89 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -465,26 +465,33 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned 
int esr)
return DBG_HOOK_HANDLED;
 }
 
-bool arch_within_kprobe_blacklist(unsigned long addr)
+int __init arch_populate_kprobe_blacklist(void)
 {
-   if ((addr >= (unsigned long)__kprobes_text_start &&
-   addr < (unsigned long)__kprobes_text_end) ||
-   (addr >= (unsigned long)__entry_text_start &&
-   addr < (unsigned long)__entry_text_end) ||
-   (addr >= (unsigned long)__idmap_text_start &&
-   addr < (unsigned long)__idmap_text_end) ||
-   (addr >= (unsigned long)__hyp_text_start &&
-   addr < (unsigned long)__hyp_text_end) ||
-   in_exception_text(addr))
-   return true;
-
-   if (!is_kernel_in_hyp_mode()) {
-   if ((addr >= (unsigned long)__hyp_idmap_text_start &&
-   addr < (unsigned long)__hyp_idmap_text_end))
-   return true;
-   }
-
-   return false;
+   int ret;
+
+   ret = kprobe_add_area_blacklist((unsigned long)__entry_text_start,
+   (unsigned long)__entry_text_end);
+   if (ret)
+   return ret;
+   ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+   (unsigned long)__irqentry_text_end);
+   if (ret)
+   return ret;
+   ret = kprobe_add_area_blacklist((unsigned long)__exception_text_start,
+   (unsigned long)__exception_text_end);
+   if (ret)
+   return ret;
+   ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start,
+   (unsigned long)__idmap_text_end);
+   if (ret)
+   return ret;
+   ret = kprobe_add_area_blacklist((unsigned long)__hyp_text_start,
+   (unsigned long)__hyp_text_end);
+   if (ret || is_kernel_in_hyp_mode())
+   return ret;
+   ret = kprobe_add_area_blacklist((unsigned long)__hyp_idmap_text_start,
+   (unsigned long)__hyp_idmap_text_end);
+   return ret;
 }
 
 void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)

Re: [PATCH v6 9/9] vsprintf: Avoid confusion between invalid address and value

2019-02-12 Thread Petr Mladek

On Fri 2019-02-08 19:27:17, Andy Shevchenko wrote:
> On Fri, Feb 08, 2019 at 04:23:10PM +0100, Petr Mladek wrote:
> > We are able to detect invalid values handled by %p[iI] printk specifier.
> > The current error message is "invalid address". It might cause confusion
> > against "(efault)" reported by the generic valid_pointer_address() check.
> > 
> > Let's unify the style and use the more appropriate error code description
> > "(einval)".
> 
> The proper one should be "invalid address family". The proposed change
> increases confusion.

I am confused. Is there any error code for "invalid address family"?

EINVAL is standard error code used when a wrong value is passed
as a parameter. In this case, the code is not able to handle
the given address family.

IMHO, the original message "invalid address" has been even more
confusing. Oops would happen if it was invalid. In fact, the value
was invalid.

Best Regards,
Petr

Re: [PATCH net] sctp: call gso_reset_checksum when computing checksum in sctp_gso_segment

2019-02-12 Thread Neil Horman

On Tue, Feb 12, 2019 at 06:47:30PM +0800, Xin Long wrote:
> Jianlin reported a panic when running sctp gso over gre over vlan device:
> 
>   [   84.772930] RIP: 0010:do_csum+0x6d/0x170
>   [   84.790605] Call Trace:
>   [   84.791054]  csum_partial+0xd/0x20
>   [   84.791657]  gre_gso_segment+0x2c3/0x390
>   [   84.792364]  inet_gso_segment+0x161/0x3e0
>   [   84.793071]  skb_mac_gso_segment+0xb8/0x120
>   [   84.793846]  __skb_gso_segment+0x7e/0x180
>   [   84.794581]  validate_xmit_skb+0x141/0x2e0
>   [   84.795297]  __dev_queue_xmit+0x258/0x8f0
>   [   84.795949]  ? eth_header+0x26/0xc0
>   [   84.796581]  ip_finish_output2+0x196/0x430
>   [   84.797295]  ? skb_gso_validate_network_len+0x11/0x80
>   [   84.798183]  ? ip_finish_output+0x169/0x270
>   [   84.798875]  ip_output+0x6c/0xe0
>   [   84.799413]  ? ip_append_data.part.50+0xc0/0xc0
>   [   84.800145]  iptunnel_xmit+0x144/0x1c0
>   [   84.800814]  ip_tunnel_xmit+0x62d/0x930 [ip_tunnel]
>   [   84.801699]  gre_tap_xmit+0xac/0xf0 [ip_gre]
>   [   84.802395]  dev_hard_start_xmit+0xa5/0x210
>   [   84.803086]  sch_direct_xmit+0x14f/0x340
>   [   84.803733]  __dev_queue_xmit+0x799/0x8f0
>   [   84.804472]  ip_finish_output2+0x2e0/0x430
>   [   84.805255]  ? skb_gso_validate_network_len+0x11/0x80
>   [   84.806154]  ip_output+0x6c/0xe0
>   [   84.806721]  ? ip_append_data.part.50+0xc0/0xc0
>   [   84.807516]  sctp_packet_transmit+0x716/0xa10 [sctp]
>   [   84.808337]  sctp_outq_flush+0xd7/0x880 [sctp]
> 
> It was caused by SKB_GSO_CB(skb)->csum_start not set in sctp_gso_segment.
> sctp_gso_segment() calls skb_segment() with 'feature | NETIF_F_HW_CSUM',
> which causes SKB_GSO_CB(skb)->csum_start not to be set in skb_segment().
> 
> For TCP/UDP, when feature supports HW_CSUM, CHECKSUM_PARTIAL will be set
> and gso_reset_checksum will be called to set SKB_GSO_CB(skb)->csum_start.
> 
> So SCTP should do the same as TCP/UDP, to call gso_reset_checksum() when
> computing checksum in sctp_gso_segment.
> 
> Reported-by: Jianlin Shi 
> Signed-off-by: Xin Long 
> ---
>  net/sctp/offload.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/net/sctp/offload.c b/net/sctp/offload.c
> index 123e9f2..edfcf16 100644
> --- a/net/sctp/offload.c
> +++ b/net/sctp/offload.c
> @@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
>  {
>   skb->ip_summed = CHECKSUM_NONE;
>   skb->csum_not_inet = 0;
> + gso_reset_checksum(skb, ~0);
>   return sctp_compute_cksum(skb, skb_transport_offset(skb));
>  }
>  
> -- 
> 2.1.0
> 
> 
> 
Acked-by: Neil Horman

Re: [RFC PATCH v7 05/16] arm64/mm: Add support for XPFO

2019-02-12 Thread Khalid Aziz

On 1/23/19 7:20 AM, Konrad Rzeszutek Wilk wrote:
> On Thu, Jan 10, 2019 at 02:09:37PM -0700, Khalid Aziz wrote:
>> From: Juerg Haefliger 
>>
>> Enable support for eXclusive Page Frame Ownership (XPFO) for arm64 and
>> provide a hook for updating a single kernel page table entry (which is
>> required by the generic XPFO code).
>>
>> v6: use flush_tlb_kernel_range() instead of __flush_tlb_one()
>>
>> CC: linux-arm-ker...@lists.infradead.org
>> Signed-off-by: Juerg Haefliger 
>> Signed-off-by: Tycho Andersen 
>> Signed-off-by: Khalid Aziz 
>> ---
>>  arch/arm64/Kconfig |  1 +
>>  arch/arm64/mm/Makefile |  2 ++
>>  arch/arm64/mm/xpfo.c   | 58 ++
>>  3 files changed, 61 insertions(+)
>>  create mode 100644 arch/arm64/mm/xpfo.c
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index ea2ab0330e3a..f0a9c0007d23 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -171,6 +171,7 @@ config ARM64
>>  select SWIOTLB
>>  select SYSCTL_EXCEPTION_TRACE
>>  select THREAD_INFO_IN_TASK
>> +select ARCH_SUPPORTS_XPFO
>>  help
>>ARM 64-bit (AArch64) Linux support.
>>  
>> diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
>> index 849c1df3d214..cca3808d9776 100644
>> --- a/arch/arm64/mm/Makefile
>> +++ b/arch/arm64/mm/Makefile
>> @@ -12,3 +12,5 @@ KASAN_SANITIZE_physaddr.o  += n
>>  
>>  obj-$(CONFIG_KASAN) += kasan_init.o
>>  KASAN_SANITIZE_kasan_init.o := n
>> +
>> +obj-$(CONFIG_XPFO)  += xpfo.o
>> diff --git a/arch/arm64/mm/xpfo.c b/arch/arm64/mm/xpfo.c
>> new file mode 100644
>> index ..678e2be848eb
>> --- /dev/null
>> +++ b/arch/arm64/mm/xpfo.c
>> @@ -0,0 +1,58 @@
>> +/*
>> + * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P.
>> + * Copyright (C) 2016 Brown University. All rights reserved.
>> + *
>> + * Authors:
>> + *   Juerg Haefliger 
>> + *   Vasileios P. Kemerlis 
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published 
>> by
>> + * the Free Software Foundation.
>> + */
>> +
>> +#include 
>> +#include 
>> +
>> +#include 
>> +
>> +/*
>> + * Lookup the page table entry for a virtual address and return a pointer to
>> + * the entry. Based on x86 tree.
>> + */
>> +static pte_t *lookup_address(unsigned long addr)
> 
> The x86 also has level. Would it make sense to include that in here?
> 

Possibly. ARM64 does not define page levels (as in the enum for page
levels) at this time but that can be added easily. Adding level to
lookup_address() for arm will make it uniform with x86 but is there any
other rationale besides that? Do you see a future use for this
information? The only other architecture I could see that defines
lookup_address() is sh but it uses it for trapped io only.

Thanks,
Khalid


pEpkey.asc
Description: application/pgp-keys

Re: [RFC PATCH v1 08/25] printk: add ring buffer and kthread

2019-02-12 Thread Sergey Senozhatsky

On (02/12/19 15:29), John Ogness wrote:
[..]
> +static int printk_kthread_func(void *data)
> +{
> + struct prb_iterator iter;
> + struct printk_log *msg;
> + size_t ext_len;
> + char *ext_text;
> + u64 master_seq;
> + size_t len;
> + char *text;
> + char *buf;
> + int ret;
> +
> + ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
> + text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL);
> + buf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL);
> + if (!ext_text || !text || !buf)
> + return -1;
> +
> + prb_iter_init(, _rb, NULL);
> +
> + /* the printk kthread never exits */
> + for (;;) {
> + ret = prb_iter_wait_next(, buf,
> +  PRINTK_RECORD_MAX, _seq);
> + if (ret == -ERESTARTSYS) {
> + continue;
> + } else if (ret < 0) {
> + /* iterator invalid, start over */
> + prb_iter_init(, _rb, NULL);
> + continue;
> + }
> +
> + msg = (struct printk_log *)buf;
> + format_text(msg, master_seq, ext_text, _len, text,
> + , printk_time);
> +
> + console_lock();
> + if (len > 0 || ext_len > 0) {
> + call_console_drivers(ext_text, ext_len, text, len);
> + boot_delay_msec(msg->level);
> + printk_delay();
> + }
> + console_unlock();
> + }

One thing that I have learned is that preemptible printk does not work
as expected; it wants to be 'atomic' and just stay busy as long as it can.
We tried preemptible printk at Samsung and the result was just bad:
   preempted printk kthread + slow serial console = lots of lost messages

We also had preemptile printk in the upstream kernel and reverted the
patch (see fd5f7cde1b85d4c8e09); same reasons - we had reports that
preemptible printk could "stall" for minutes.

-ss

Re: [PATCH net] sctp: set stream ext to NULL after freeing it in sctp_stream_outq_migrate

2019-02-12 Thread Neil Horman

On Tue, Feb 12, 2019 at 06:51:01PM +0800, Xin Long wrote:
> In sctp_stream_init(), after sctp_stream_outq_migrate() freed the
> surplus streams' ext, but sctp_stream_alloc_out() returns -ENOMEM,
> stream->outcnt will not be set to 'outcnt'.
> 
> With the bigger value on stream->outcnt, when closing the assoc and
> freeing its streams, the ext of those surplus streams will be freed
> again since those stream exts were not set to NULL after freeing in
> sctp_stream_outq_migrate(). Then the invalid-free issue reported by
> syzbot would be triggered.
> 
> We fix it by simply setting them to NULL after freeing.
> 
> Fixes: 5e32a431 ("sctp: introduce stream scheduler foundations")
> Reported-by: syzbot+58e480e7b28f2d890...@syzkaller.appspotmail.com
> Signed-off-by: Xin Long 
> ---
>  net/sctp/stream.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/net/sctp/stream.c b/net/sctp/stream.c
> index f246331..2936ed1 100644
> --- a/net/sctp/stream.c
> +++ b/net/sctp/stream.c
> @@ -144,8 +144,10 @@ static void sctp_stream_outq_migrate(struct sctp_stream 
> *stream,
>   }
>   }
>  
> - for (i = outcnt; i < stream->outcnt; i++)
> + for (i = outcnt; i < stream->outcnt; i++) {
>   kfree(SCTP_SO(stream, i)->ext);
> + SCTP_SO(stream, i)->ext = NULL;
> + }
>  }
>  
>  static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
> -- 
> 2.1.0
> 
> 
Acked-by: Neil Horman

Re: [PATCH 2/3] mm/filemap: initiate readahead even if IOCB_NOWAIT is set for the I/O

2019-02-12 Thread Jiri Kosina

On Fri, 1 Feb 2019, Dave Chinner wrote:

> So, I'll invite the incoherent, incandescent O_DIRECT rage flames of
> Linus to be unleashed again and point out the /other reference/ to
> IOCB_NOWAIT in mm/filemap.c. That is, in generic_file_read_iter(),
> in the *generic O_DIRECT read path*:
> 
>   if (iocb->ki_flags & IOCB_DIRECT) {
> .
>   if (iocb->ki_flags & IOCB_NOWAIT) {
>   if (filemap_range_has_page(mapping, iocb->ki_pos,
>  iocb->ki_pos + count - 1))
>   return -EAGAIN;
>   } else {
> .

OK, thanks Dave, this is a good point I've missed in this mail before 
(probabably as I focused only on the aspect of disagreement what NONBLOCK 
actually means :) ). I will look into fixing it for next iteration.

> It's effectively useless as a workaround because you can avoid the
> readahead IO being issued relatively easily:
> 
> void page_cache_sync_readahead(struct address_space *mapping,
>struct file_ra_state *ra, struct file *filp,
>pgoff_t offset, unsigned long req_size)
> {
> /* no read-ahead */
> if (!ra->ra_pages)
> return;
> 
> if (blk_cgroup_congested())
> return;
> 
> 
> IOWs, we just have to issue enough IO to congest the block device (or,
> even easier, a rate-limited cgroup), and we can still use RWF_NOWAIT
> to probe the page cache. Or if we can convince ra->ra_pages to be
> zero (e.g. it's on bdi device with no readahead configured because
> it's real fast) then it doesn't work there, either.

It's though questionable whether the noise level here wouldn't be too high 
already for any sidechannel to work reliably. So I'd suggest to operate 
under the assumption that it would be too noisy, unless anyone is able to 
prove otherwise.

Thanks,

-- 
Jiri Kosina
SUSE Labs

Re: [PATCH V2 3/4] nvme-pci: avoid irq allocation retrying via .calc_sets

2019-02-12 Thread Keith Busch

On Tue, Feb 12, 2019 at 05:04:38AM -0800, Ming Lei wrote:
> Currently pre-caculate each set vectors, and this way requires same
> 'max_vecs' and 'min_vecs' passed to pci_alloc_irq_vectors_affinity(),
> then nvme_setup_irqs() has to retry in case of allocation failure.
> 
> This usage & interface is a bit awkward because the retry should have
> been avoided by providing one reasonable 'min_vecs'.
> 
> Implement the callback of .calc_sets, so that pci_alloc_irq_vectors_affinity()
> can calculate each set's vector after IRQ vectors is allocated and
> before spread IRQ, then NVMe's retry in case of irq allocation failure
> can be removed.
> 
> Signed-off-by: Ming Lei 

Thanks, Ming, this whole series looks like a great improvement for
drivers using irq sets.

Minor nit below. Otherwise you may add my review for the whole series
if you spin a v3 for the other minor comments.

Reviewed-by: Keith Busch 

> +static void nvme_calc_irq_sets(struct irq_affinity *affd, int nvecs)
> +{
> + struct nvme_dev *dev = affd->priv;
> +
> + nvme_calc_io_queues(dev, nvecs);
> +
> + affd->set_vectors[HCTX_TYPE_DEFAULT] = 
> dev->io_queues[HCTX_TYPE_DEFAULT];
> + affd->set_vectors[HCTX_TYPE_READ] = dev->io_queues[HCTX_TYPE_READ];
> + affd->nr_sets = HCTX_TYPE_POLL;
> +}

The value of HCTX_TYPE_POLL happens to be 2, but that seems more of a
coincidence right now. Can we hard code 2 just in case the value changes?

Re: [PATCH 2/4] elf: use list_for_each_entry()

2019-02-12 Thread kbuild test robot

Hi Alexey,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.0-rc4]
[cannot apply to next-20190212]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Alexey-Dobriyan/elf-don-t-be-afraid-of-overflow/20190205-225931
config: nds32-defconfig (attached as .config)
compiler: nds32le-linux-gcc (GCC) 6.4.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=6.4.0 make.cross ARCH=nds32 

All errors (new ones prefixed by >>):

   fs/binfmt_elf.c: In function 'write_note_info':
>> fs/binfmt_elf.c:2124:19: error: 'tmp' undeclared (first use in this function)
  for (i = 0; i < tmp->num_notes; i++)
  ^~~
   fs/binfmt_elf.c:2124:19: note: each undeclared identifier is reported only 
once for each function it appears in

vim +/tmp +2124 fs/binfmt_elf.c

3aba481f Roland McGrath  2008-01-30  2111  
3aba481f Roland McGrath  2008-01-30  2112  static int write_note_info(struct 
elf_note_info *info,
ecc8c772 Al Viro 2013-10-05  2113  struct 
coredump_params *cprm)
3aba481f Roland McGrath  2008-01-30  2114  {
1e0d184d Alexey Dobriyan 2019-02-04  2115   struct elf_thread_status *ets;
3aba481f Roland McGrath  2008-01-30  2116   int i;
3aba481f Roland McGrath  2008-01-30  2117  
3aba481f Roland McGrath  2008-01-30  2118   for (i = 0; i < info->numnote; 
i++)
ecc8c772 Al Viro 2013-10-05  2119   if 
(!writenote(info->notes + i, cprm))
3aba481f Roland McGrath  2008-01-30  2120   return 0;
3aba481f Roland McGrath  2008-01-30  2121  
3aba481f Roland McGrath  2008-01-30  2122   /* write out the thread status 
notes section */
1e0d184d Alexey Dobriyan 2019-02-04  2123   list_for_each_entry(ets, 
>thread_list, list) {
3aba481f Roland McGrath  2008-01-30 @2124   for (i = 0; i < 
tmp->num_notes; i++)
ecc8c772 Al Viro 2013-10-05  2125   if 
(!writenote(>notes[i], cprm))
3aba481f Roland McGrath  2008-01-30  2126   return 
0;
3aba481f Roland McGrath  2008-01-30  2127   }
3aba481f Roland McGrath  2008-01-30  2128  
3aba481f Roland McGrath  2008-01-30  2129   return 1;
3aba481f Roland McGrath  2008-01-30  2130  }
3aba481f Roland McGrath  2008-01-30  2131  

:: The code at line 2124 was first introduced by commit
:: 3aba481fc94d83ff630d4b7cd2f7447010c4c6df elf core dump: notes reorg

:: TO: Roland McGrath 
:: CC: Ingo Molnar 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

Re: [RFC PATCH v7 05/16] arm64/mm: Add support for XPFO

2019-02-12 Thread Khalid Aziz

On 1/23/19 7:24 AM, Konrad Rzeszutek Wilk wrote:
> On Thu, Jan 10, 2019 at 02:09:37PM -0700, Khalid Aziz wrote:
>> From: Juerg Haefliger 
>>
>> Enable support for eXclusive Page Frame Ownership (XPFO) for arm64 and
>> provide a hook for updating a single kernel page table entry (which is
>> required by the generic XPFO code).
>>
>> v6: use flush_tlb_kernel_range() instead of __flush_tlb_one()
>>
>> CC: linux-arm-ker...@lists.infradead.org
>> Signed-off-by: Juerg Haefliger 
>> Signed-off-by: Tycho Andersen 
>> Signed-off-by: Khalid Aziz 
>> ---
>>  arch/arm64/Kconfig |  1 +
>>  arch/arm64/mm/Makefile |  2 ++
>>  arch/arm64/mm/xpfo.c   | 58 ++
>>  3 files changed, 61 insertions(+)
>>  create mode 100644 arch/arm64/mm/xpfo.c
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index ea2ab0330e3a..f0a9c0007d23 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -171,6 +171,7 @@ config ARM64
>>  select SWIOTLB
>>  select SYSCTL_EXCEPTION_TRACE
>>  select THREAD_INFO_IN_TASK
>> +select ARCH_SUPPORTS_XPFO
>>  help
>>ARM 64-bit (AArch64) Linux support.
>>  
>> diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
>> index 849c1df3d214..cca3808d9776 100644
>> --- a/arch/arm64/mm/Makefile
>> +++ b/arch/arm64/mm/Makefile
>> @@ -12,3 +12,5 @@ KASAN_SANITIZE_physaddr.o  += n
>>  
>>  obj-$(CONFIG_KASAN) += kasan_init.o
>>  KASAN_SANITIZE_kasan_init.o := n
>> +
>> +obj-$(CONFIG_XPFO)  += xpfo.o
>> diff --git a/arch/arm64/mm/xpfo.c b/arch/arm64/mm/xpfo.c
>> new file mode 100644
>> index ..678e2be848eb
>> --- /dev/null
>> +++ b/arch/arm64/mm/xpfo.c
>> @@ -0,0 +1,58 @@
>> +/*
>> + * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P.
>> + * Copyright (C) 2016 Brown University. All rights reserved.
>> + *
>> + * Authors:
>> + *   Juerg Haefliger 
>> + *   Vasileios P. Kemerlis 
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published 
>> by
>> + * the Free Software Foundation.
>> + */
>> +
>> +#include 
>> +#include 
>> +
>> +#include 
>> +
>> +/*
>> + * Lookup the page table entry for a virtual address and return a pointer to
>> + * the entry. Based on x86 tree.
>> + */
>> +static pte_t *lookup_address(unsigned long addr)
>> +{
>> +pgd_t *pgd;
>> +pud_t *pud;
>> +pmd_t *pmd;
>> +
>> +pgd = pgd_offset_k(addr);
>> +if (pgd_none(*pgd))
>> +return NULL;
>> +
>> +pud = pud_offset(pgd, addr);
>> +if (pud_none(*pud))
>> +return NULL;
>> +
>> +pmd = pmd_offset(pud, addr);
>> +if (pmd_none(*pmd))
>> +return NULL;
>> +
>> +return pte_offset_kernel(pmd, addr);
>> +}
>> +
>> +/* Update a single kernel page table entry */
>> +inline void set_kpte(void *kaddr, struct page *page, pgprot_t prot)
>> +{
>> +pte_t *pte = lookup_address((unsigned long)kaddr);
>> +
>> +set_pte(pte, pfn_pte(page_to_pfn(page), prot));
> 
> Thought on the other hand.. what if the page is PMD? Do you really want
> to do this?
> 
> What if 'pte' is NULL?
>> +}
>> +
>> +inline void xpfo_flush_kernel_tlb(struct page *page, int order)
>> +{
>> +unsigned long kaddr = (unsigned long)page_address(page);
>> +unsigned long size = PAGE_SIZE;
>> +
>> +flush_tlb_kernel_range(kaddr, kaddr + (1 << order) * size);
> 
> Ditto here. You are assuming it is PTE, but it may be PMD or such.
> Or worts - the lookup_address could be NULL.
> 
>> +}
>> -- 
>> 2.17.1
>>

Hi Konrad,

This makes sense. x86 version of set_kpte() checks pte for NULL and also
checks if the page is PMD. Now what you said about adding level to
lookup_address() for arm makes more sense.

Can someone with knowledge of arm64 mmu make recommendations here?

Thanks,
Khalid


pEpkey.asc
Description: application/pgp-keys

Re: [PATCH 2/5] kasan, kmemleak: pass tagged pointers to kmemleak

2019-02-12 Thread Vincenzo Frascino

On 11/02/2019 21:59, Andrey Konovalov wrote:
> Right now we call kmemleak hooks before assigning tags to pointers in
> KASAN hooks. As a result, when an objects gets allocated, kmemleak sees
> a differently tagged pointer, compared to the one it sees when the object
> gets freed. Fix it by calling KASAN hooks before kmemleak's ones.
>

Nit: Could you please add comments to the the code? It should prevent that the
code gets refactored in future, reintroducing the same issue.

> Reported-by: Qian Cai 
> Signed-off-by: Andrey Konovalov 
> ---
>  mm/slab.h| 6 ++
>  mm/slab_common.c | 2 +-
>  mm/slub.c| 3 ++-
>  3 files changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/slab.h b/mm/slab.h
> index 4190c24ef0e9..638ea1b25d39 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -437,11 +437,9 @@ static inline void slab_post_alloc_hook(struct 
> kmem_cache *s, gfp_t flags,
>  
>   flags &= gfp_allowed_mask;
>   for (i = 0; i < size; i++) {
> - void *object = p[i];
> -
> - kmemleak_alloc_recursive(object, s->object_size, 1,
> + p[i] = kasan_slab_alloc(s, p[i], flags);
> + kmemleak_alloc_recursive(p[i], s->object_size, 1,
>s->flags, flags);
> - p[i] = kasan_slab_alloc(s, object, flags);
>   }
>  
>   if (memcg_kmem_enabled())
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 81732d05e74a..fe524c8d0246 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1228,8 +1228,8 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned 
> int order)
>   flags |= __GFP_COMP;
>   page = alloc_pages(flags, order);
>   ret = page ? page_address(page) : NULL;
> - kmemleak_alloc(ret, size, 1, flags);
>   ret = kasan_kmalloc_large(ret, size, flags);
> + kmemleak_alloc(ret, size, 1, flags);
>   return ret;
>  }
>  EXPORT_SYMBOL(kmalloc_order);
> diff --git a/mm/slub.c b/mm/slub.c
> index 1e3d0ec4e200..4a3d7686902f 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1374,8 +1374,9 @@ static inline void dec_slabs_node(struct kmem_cache *s, 
> int node,
>   */
>  static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t 
> flags)
>  {
> + ptr = kasan_kmalloc_large(ptr, size, flags);
>   kmemleak_alloc(ptr, size, 1, flags);
> - return kasan_kmalloc_large(ptr, size, flags);
> + return ptr;
>  }
>  
>  static __always_inline void kfree_hook(void *x)
> 

-- 
Regards,
Vincenzo

Re: [PATCH 00/52] [RFC] virtio-fs: shared file system for virtual machines

2019-02-12 Thread Aneesh Kumar K.V

Vivek Goyal  writes:

> Hi,
>
> Here are RFC patches for virtio-fs. Looking for feedback on this approach.
>
> These patches should apply on top of 4.20-rc5. We have also put code for
> various components here.
>
> https://gitlab.com/virtio-fs
>
> Problem Description
> ===
> We want to be able to take a directory tree on the host and share it with
> guest[s]. Our goal is to be able to do it in a fast, consistent and secure
> manner. Our primary use case is kata containers, but it should be usable in
> other scenarios as well.
>
> Containers may rely on local file system semantics for shared volumes,
> read-write mounts that multiple containers access simultaneously.  File
> system changes must be visible to other containers with the same consistency
> expected of a local file system, including mmap MAP_SHARED.
>
> Existing Solutions
> ==
> We looked at existing solutions and virtio-9p already provides basic shared
> file system functionality although does not offer local file system semantics,
> causing some workloads and test suites to fail.

Can you elaborate on this? Is this with 9p2000.L ? We did quiet a lot of
work to make sure posix test suite pass on 9p file system. Also 
was the mount option with cache=loose?

-aneesh

[PATCH] usb: typec: tcpm: Remove unused functions

2019-02-12 Thread Guenter Roeck

tcpm_update_source_capabilities() and tcpm_update_sink_capabilities()
are not used anywhere, and I don't recall why I introduced those functions
in the first place. Effectively that means that we don't know if they even
work, or ever did. Lets remove them.

Reported-by: Kyle Tso 
Cc: Kyle Tso 
Signed-off-by: Guenter Roeck 
---
 drivers/usb/typec/tcpm/tcpm.c | 60 ---
 include/linux/usb/tcpm.h  |  6 -
 2 files changed, 66 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index f1c39a3c7534..a6b2413c59a6 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -4435,66 +4435,6 @@ static int tcpm_fw_get_caps(struct tcpm_port *port,
return 0;
 }
 
-int tcpm_update_source_capabilities(struct tcpm_port *port, const u32 *pdo,
-   unsigned int nr_pdo)
-{
-   if (tcpm_validate_caps(port, pdo, nr_pdo))
-   return -EINVAL;
-
-   mutex_lock(>lock);
-   port->nr_src_pdo = tcpm_copy_pdos(port->src_pdo, pdo, nr_pdo);
-   switch (port->state) {
-   case SRC_UNATTACHED:
-   case SRC_ATTACH_WAIT:
-   case SRC_TRYWAIT:
-   tcpm_set_cc(port, tcpm_rp_cc(port));
-   break;
-   case SRC_SEND_CAPABILITIES:
-   case SRC_NEGOTIATE_CAPABILITIES:
-   case SRC_READY:
-   case SRC_WAIT_NEW_CAPABILITIES:
-   tcpm_set_cc(port, tcpm_rp_cc(port));
-   tcpm_set_state(port, SRC_SEND_CAPABILITIES, 0);
-   break;
-   default:
-   break;
-   }
-   mutex_unlock(>lock);
-   return 0;
-}
-EXPORT_SYMBOL_GPL(tcpm_update_source_capabilities);
-
-int tcpm_update_sink_capabilities(struct tcpm_port *port, const u32 *pdo,
- unsigned int nr_pdo,
- unsigned int operating_snk_mw)
-{
-   if (tcpm_validate_caps(port, pdo, nr_pdo))
-   return -EINVAL;
-
-   mutex_lock(>lock);
-   port->nr_snk_pdo = tcpm_copy_pdos(port->snk_pdo, pdo, nr_pdo);
-   port->operating_snk_mw = operating_snk_mw;
-   port->update_sink_caps = true;
-
-   switch (port->state) {
-   case SNK_NEGOTIATE_CAPABILITIES:
-   case SNK_NEGOTIATE_PPS_CAPABILITIES:
-   case SNK_READY:
-   case SNK_TRANSITION_SINK:
-   case SNK_TRANSITION_SINK_VBUS:
-   if (port->pps_data.active)
-   tcpm_set_state(port, SNK_NEGOTIATE_PPS_CAPABILITIES, 0);
-   else
-   tcpm_set_state(port, SNK_NEGOTIATE_CAPABILITIES, 0);
-   break;
-   default:
-   break;
-   }
-   mutex_unlock(>lock);
-   return 0;
-}
-EXPORT_SYMBOL_GPL(tcpm_update_sink_capabilities);
-
 /* Power Supply access to expose source power information */
 enum tcpm_psy_online_states {
TCPM_PSY_OFFLINE = 0,
diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h
index 50c74a77db55..0c532ca3f079 100644
--- a/include/linux/usb/tcpm.h
+++ b/include/linux/usb/tcpm.h
@@ -159,12 +159,6 @@ struct tcpm_port;
 struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev 
*tcpc);
 void tcpm_unregister_port(struct tcpm_port *port);
 
-int tcpm_update_source_capabilities(struct tcpm_port *port, const u32 *pdo,
-   unsigned int nr_pdo);
-int tcpm_update_sink_capabilities(struct tcpm_port *port, const u32 *pdo,
- unsigned int nr_pdo,
- unsigned int operating_snk_mw);
-
 void tcpm_vbus_change(struct tcpm_port *port);
 void tcpm_cc_change(struct tcpm_port *port);
 void tcpm_pd_receive(struct tcpm_port *port,
-- 
2.7.4

[PATCH] qed: fix indentation issue with statements in an if-block

2019-02-12 Thread Colin King

From: Colin Ian King 

There are some statements in an if-block that are not correctly
indented. Fix these.

Signed-off-by: Colin Ian King 
---
 drivers/net/ethernet/qlogic/qed/qed_cxt.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c 
b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 35c9f484eb9f..e61d1d905415 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2135,12 +2135,12 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 
rdma_tasks)
struct qed_eth_pf_params *p_params =
_hwfn->pf_params.eth_pf_params;
 
-   if (!p_params->num_vf_cons)
-   p_params->num_vf_cons =
-   ETH_PF_PARAMS_VF_CONS_DEFAULT;
-   qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
-   p_params->num_cons,
-   p_params->num_vf_cons);
+   if (!p_params->num_vf_cons)
+   p_params->num_vf_cons =
+   ETH_PF_PARAMS_VF_CONS_DEFAULT;
+   qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
+   p_params->num_cons,
+   p_params->num_vf_cons);
p_hwfn->p_cxt_mngr->arfs_count = p_params->num_arfs_filters;
break;
}
-- 
2.20.1

Re: [RESEND PATCH v2] of: fix kmemleak crash caused by imbalance in early memory reservation

2019-02-12 Thread Rob Herring

On Mon, Feb 11, 2019 at 10:47 AM Marc Gonzalez  wrote:
>
> On 04/02/2019 15:37, Marc Gonzalez wrote:
>
> > Cc: sta...@vger.kernel.org # 3.15+
> > Fixes: 3f0c820664483 ("drivers: of: add initialization code for dynamic 
> > reserved memory")
> > Acked-by: Marek Szyprowski 
> > Acked-by: Prateek Patel 
> > Tested-by: Marc Gonzalez 
> > Signed-off-by: Mike Rapoport 
> > ---
> > Resend with DT CCed to reach robh's patch queue
> > I added CC: stable, Fixes, and Prateek's ack
> > Trim recipients list to minimize inconvenience
>
> Mike, Stephen,
>
> I'm confused over commit 3532b3b554a216f30edb841d29eef48521bdc592 in 
> linux-next
> "memblock: drop __memblock_alloc_base()"
>
> It's definitely going to conflict with the proposed patch
> over drivers/of/of_reserved_mem.c
>
> Rob, what's the next step then?

Rebase it on top of what's in linux-next and apply it to the tree
which has the above dependency. I'm guessing that is Andrew Morton's
tree.

Rob

Re: [PATCH 1/2] PM-runtime: Take suppliers into account in __pm_runtime_set_status()

2019-02-12 Thread Ulf Hansson

On Thu, 7 Feb 2019 at 19:46, Rafael J. Wysocki  wrote:
>
> From: Rafael J. Wysocki 
>
> If the target device has any suppliers, as reflected by device links
> to them, __pm_runtime_set_status() does not take them into account,
> which is not consistent with the other parts of the PM-runtime
> framework and may lead to programming mistakes.
>
> Modify __pm_runtime_set_status() to take suppliers into account by
> activating them upfront if the new status is RPM_ACTIVE and
> deactivating them on exit if the new status is RPM_SUSPENDED.
>
> If the activation of one of the suppliers fails, the new status
> will be RPM_SUSPENDED and the (remaining) suppliers will be
> deactivated on exit (the child count of the device's parent
> will be dropped too then).
>
> Of course, adding device links locking to __pm_runtime_set_status()
> means that it cannot be run fron interrupt context, so make it use
> spin_lock_irq() and spin_unlock_irq() instead of spin_lock_irqsave()
> and spin_unlock_irqrestore(), respectively.
>
> Signed-off-by: Rafael J. Wysocki 

Reviewed-by: Ulf Hansson 
Tested-by: Ulf Hansson 

Kind regards
Uffe


> ---
>  drivers/base/power/runtime.c |   45 
> ++-
>  1 file changed, 40 insertions(+), 5 deletions(-)
>
> Index: linux-pm/drivers/base/power/runtime.c
> ===
> --- linux-pm.orig/drivers/base/power/runtime.c
> +++ linux-pm/drivers/base/power/runtime.c
> @@ -1102,20 +1102,43 @@ EXPORT_SYMBOL_GPL(pm_runtime_get_if_in_u
>   * and the device parent's counter of unsuspended children is modified to
>   * reflect the new status.  If the new status is RPM_SUSPENDED, an idle
>   * notification request for the parent is submitted.
> + *
> + * If @dev has any suppliers (as reflected by device links to them), and 
> @status
> + * is RPM_ACTIVE, they will be activated upfront and if the activation of one
> + * of them fails, the status of @dev will be changed to RPM_SUSPENDED 
> (instead
> + * of the @status value) and the suppliers will be deacticated on exit.  The
> + * error returned by the failing supplier activation will be returned in that
> + * case.
>   */
>  int __pm_runtime_set_status(struct device *dev, unsigned int status)
>  {
> struct device *parent = dev->parent;
> -   unsigned long flags;
> bool notify_parent = false;
> int error = 0;
>
> if (status != RPM_ACTIVE && status != RPM_SUSPENDED)
> return -EINVAL;
>
> -   spin_lock_irqsave(>power.lock, flags);
> +   /*
> +* If the new status is RPM_ACTIVE, the suppliers can be activated
> +* upfront regardless of the current status, because next time
> +* rpm_put_suppliers() runs, the rpm_active refcounts of the links
> +* involved will be dropped down to one anyway.
> +*/
> +   if (status == RPM_ACTIVE) {
> +   int idx = device_links_read_lock();
> +
> +   error = rpm_get_suppliers(dev);
> +   if (error)
> +   status = RPM_SUSPENDED;
> +
> +   device_links_read_unlock(idx);
> +   }
> +
> +   spin_lock_irq(>power.lock);
>
> if (!dev->power.runtime_error && !dev->power.disable_depth) {
> +   status = dev->power.runtime_status;
> error = -EAGAIN;
> goto out;
> }
> @@ -1147,19 +1170,31 @@ int __pm_runtime_set_status(struct devic
>
> spin_unlock(>power.lock);
>
> -   if (error)
> +   if (error) {
> +   status = RPM_SUSPENDED;
> goto out;
> +   }
> }
>
>   out_set:
> __update_runtime_status(dev, status);
> -   dev->power.runtime_error = 0;
> +   if (!error)
> +   dev->power.runtime_error = 0;
> +
>   out:
> -   spin_unlock_irqrestore(>power.lock, flags);
> +   spin_unlock_irq(>power.lock);
>
> if (notify_parent)
> pm_request_idle(parent);
>
> +   if (status == RPM_SUSPENDED) {
> +   int idx = device_links_read_lock();
> +
> +   rpm_put_suppliers(dev);
> +
> +   device_links_read_unlock(idx);
> +   }
> +
> return error;
>  }
>  EXPORT_SYMBOL_GPL(__pm_runtime_set_status);
>

[PATCH] scsi: sd: fix entropy gathering for most rotational disks

2019-02-12 Thread James Bottomley

The problem is that the default for MQ is not to gather entropy,
whereas the default for the legacy queue was always to gather it.  The
original attempt to fix entropy gathering for rotational disks under MQ
added an else branch in sd_read_block_characteristics(). 
Unfortunately, the entire check isn't reached if the device has no
characteristics VPD page.  Since this page was only introduced in SBC-3 
and its optional anyway, most less expensive rotational disks don't
have one, meaning they all stopped gathering entropy when we made MQ
the default.  In a wholly unrelated change, openssl and openssh won't
function until the random number generator is initialised, meaning lots
of people have been seeing large delays before they could log into
systems with default MQ kernels due to this lack of entropy, because it
now can take tens of minutes to initialise the kernel random number
generator.

The fix is to set the non-rotational and add-randomness flags
unconditionally early on in the disk initialization path, so they can
be reset only if the device actually reports being non-rotational via
the VPD page.

Reported-by: Mikael Pettersson 
Fixes: 83e32a591077 ("scsi: sd: Contribute to randomness when running 
rotational device")
Cc: sta...@vger.kernel.org
Signed-off-by: James Bottomley 

---
I updated this slightly over the original proposal so we set the flags
even if the device doesn't have any VPD pages, so it should work for
very old disks.

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index d0a980915801..3b8093c48eba 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2971,9 +2971,6 @@ static void sd_read_block_characteristics(struct 
scsi_disk *sdkp)
if (rot == 1) {
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
-   } else {
-   blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
-   blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q);
}
 
if (sdkp->device->type == TYPE_ZBC) {
@@ -3110,6 +3107,15 @@ static int sd_revalidate_disk(struct gendisk *disk)
if (sdkp->media_present) {
sd_read_capacity(sdkp, buffer);
 
+   /*
+* set the default to rotational.  All non-rotational devices
+* support the block characteristics VPD page, which will
+* cause this to be updated correctly and any device which
+* doesn't support it should be treated as rotational.
+*/
+   blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
+   blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q);
+
if (scsi_device_supports_vpd(sdp)) {
sd_read_block_provisioning(sdkp);
sd_read_block_limits(sdkp);

< 1 2 3 4 5 6 7 8 9 10 >

501 - 600 of 1653 matches

Mail list logo