date:20161210

The driver was checking for non-NULL address of struct's members:
 - s3c_audio_pdata->type (union),
 - s3c_audio_pdata->type.i2s (embedded struct).

This is pointless as these will be always non-NULL.  The 's3c_audio_pdata'
is always initialized in static memory so it will be zeroed.
Additionally the 'type' member was an union with only one member.

It is safe to reorganize the structures to get rid of useless union and
checks for addresses to fix the coccinelle warning:
>> sound/soc/samsung/i2s.c:1270:2-4: ERROR: test of a variable/field 
address

Reported-by: kbuild test robot 
Signed-off-by: Krzysztof Kozlowski 

---

Not tested on the hardware.
---
 arch/arm/mach-s3c64xx/dev-audio.c  |  4 +---
 include/linux/platform_data/asoc-s3c.h |  6 ++
 sound/soc/samsung/i2s.c| 10 ++
 3 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/arch/arm/mach-s3c64xx/dev-audio.c 
b/arch/arm/mach-s3c64xx/dev-audio.c
index b57783371d52..247dcc0b691e 100644
--- a/arch/arm/mach-s3c64xx/dev-audio.c
+++ b/arch/arm/mach-s3c64xx/dev-audio.c
@@ -106,9 +106,7 @@ static struct s3c_audio_pdata i2sv4_pdata = {
.dma_playback = DMACH_HSI_I2SV40_TX,
.dma_capture = DMACH_HSI_I2SV40_RX,
.type = {
-   .i2s = {
-   .quirks = QUIRK_PRI_6CHAN,
-   },
+   .quirks = QUIRK_PRI_6CHAN,
},
 };
 
diff --git a/include/linux/platform_data/asoc-s3c.h 
b/include/linux/platform_data/asoc-s3c.h
index 15bf56ee8af7..90641a5daaf0 100644
--- a/include/linux/platform_data/asoc-s3c.h
+++ b/include/linux/platform_data/asoc-s3c.h
@@ -18,7 +18,7 @@
 
 extern void s3c64xx_ac97_setup_gpio(int);
 
-struct samsung_i2s {
+struct samsung_i2s_type {
 /* If the Primary DAI has 5.1 Channels */
 #define QUIRK_PRI_6CHAN(1 << 0)
 /* If the I2S block has a Stereo Overlay Channel */
@@ -47,7 +47,5 @@ struct s3c_audio_pdata {
void *dma_capture;
void *dma_play_sec;
void *dma_capture_mic;
-   union {
-   struct samsung_i2s i2s;
-   } type;
+   struct samsung_i2s_type type;
 };
diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index e00974bc5616..d55326289a4a 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -1218,7 +1218,6 @@ static int samsung_i2s_probe(struct platform_device *pdev)
 {
struct i2s_dai *pri_dai, *sec_dai = NULL;
struct s3c_audio_pdata *i2s_pdata = pdev->dev.platform_data;
-   struct samsung_i2s *i2s_cfg = NULL;
struct resource *res;
u32 regs_base, quirks = 0, idma_addr = 0;
struct device_node *np = pdev->dev.of_node;
@@ -1267,13 +1266,8 @@ static int samsung_i2s_probe(struct platform_device 
*pdev)
pri_dai->dma_capture.filter_data = i2s_pdata->dma_capture;
pri_dai->filter = i2s_pdata->dma_filter;
 
-   if (&i2s_pdata->type)
-   i2s_cfg = &i2s_pdata->type.i2s;
-
-   if (i2s_cfg) {
-   quirks = i2s_cfg->quirks;
-   idma_addr = i2s_cfg->idma_addr;
-   }
+   quirks = i2s_pdata->type.quirks;
+   idma_addr = i2s_pdata->type.idma_addr;
} else {
quirks = i2s_dai_data->quirks;
if (of_property_read_u32(np, "samsung,idma-addr",
-- 
2.7.4

Re: [PATCH 1/1 linux-next] fs: add BLOCKSIZE(inode)

2016-12-10 Thread kbuild test robot

Hi Fabian,

[auto build test WARNING on next-20161209]

url:
https://github.com/0day-ci/linux/commits/Fabian-Frederick/fs-add-BLOCKSIZE-inode/20161210-171013
config: x86_64-acpi-redef (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All warnings (new ones prefixed by >>):

>> drivers/media/dvb-frontends/sp887x.c:131:0: warning: "BLOCKSIZE" redefined
#define BLOCKSIZE 30

   In file included from include/linux/poll.h:9:0,
from drivers/media/dvb-core/dvbdev.h:27,
from drivers/media/dvb-core/dvb_frontend.h:47,
from drivers/media/dvb-frontends/sp887x.c:20:
   include/linux/fs.h:546:0: note: this is the location of the previous 
definition
#define BLOCKSIZE(node) (1 << inode->i_blkbits)


vim +/BLOCKSIZE +131 drivers/media/dvb-frontends/sp887x.c

^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
115   sp887x_writereg(state, 0x33c, 0x054);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
116   sp887x_writereg(state, 0x33b, 0x04c);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
117   sp887x_writereg(state, 0x328, 0x000);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
118   sp887x_writereg(state, 0x327, 0x005);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
119   sp887x_writereg(state, 0x326, 0x001);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
120   sp887x_writereg(state, 0x325, 0x001);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
121   sp887x_writereg(state, 0x324, 0x001);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
122   sp887x_writereg(state, 0x318, 0x050);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
123   sp887x_writereg(state, 0x317, 0x3fe);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
124   sp887x_writereg(state, 0x316, 0x001);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
125   sp887x_writereg(state, 0x313, 0x005);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
126   sp887x_writereg(state, 0x312, 0x002);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
127   sp887x_writereg(state, 0x306, 0x000);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
128   sp887x_writereg(state, 0x303, 0x000);
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
129  }
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
130  
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16 
@131  #define BLOCKSIZE 30
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
132  #define FW_SIZE 0x4000
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
133  /**
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
134   *  load firmware and setup MPEG interface...
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
135   */
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
136  static int sp887x_initial_setup (struct dvb_frontend* fe, const struct 
firmware *fw)
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
137  {
b8742700 drivers/media/dvb/frontends/sp887x.c Johannes Stezenbach 2005-05-16  
138   struct sp887x_state* state = fe->demodulator_priv;
^1da177e drivers/media/dvb/frontends/sp887x.c Linus Torvalds  2005-04-16  
139   u8 buf [BLOCKSIZE+2];

:: The code at line 131 was first introduced by commit
:: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2

:: TO: Linus Torvalds 
:: CC: Linus Torvalds 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

Re: [PATCH v4 2/2] perf probe: add sdt probes arguments into the uprobe cmd string

2016-12-10 Thread Masami Hiramatsu

On Fri, 9 Dec 2016 12:14:30 -0300
Arnaldo Carvalho de Melo  wrote:

> Em Wed, Dec 07, 2016 at 12:26:10PM +0900, Masami Hiramatsu escreveu:
> > Hello Alexis,
> > 
> > On Sat, 26 Nov 2016 01:58:03 +0100
> > Alexis Berlemont  wrote:
> > 
> > > An sdt probe can be associated with arguments but they were not passed
> > > to the user probe tracing interface (uprobe_events); this patch adapts
> > > the sdt argument descriptors according to the uprobe input format.
> > 
> > Great!
> 
> Yeah, good to see work in this area!
> 
> I applied the first patch, with Masami's ack, waiting for his concerns
> on this one to be addressed, ok?

Yes, I'm OK. Alexis, I'm happy to review/ack your next version! :)

Thank you,

> 
> - Arnaldo
>  
> > > 
> > > As the uprobe parser does not support scaled address mode, perf will
> > > skip arguments which cannot be adapted to the uprobe format.
> > 
> > OK, it seems that skipping argument is a good idea :)
> > I just tried to support fixed-number arguments in probe events,
> > but skipping it is better with older kernel.
> > 
> > I have some comments.
> > 
> > > Here are the results:
> > > 
> > > $ perf buildid-cache -v --add test_sdt
> > > $ perf probe -x test_sdt sdt_libfoo:table_frob
> > > $ perf probe -x test_sdt sdt_libfoo:table_diddle
> > > $ perf record -e sdt_libfoo:table_frob -e sdt_libfoo:table_diddle test_sdt
> > > $ perf script
> > > test_sdt  ...   666.255678:   sdt_libfoo:table_frob: (4004d7) arg0=0 
> > > arg1=0
> > > test_sdt  ...   666.255683: sdt_libfoo:table_diddle: (40051a) arg0=0 
> > > arg1=0
> > > test_sdt  ...   666.255686:   sdt_libfoo:table_frob: (4004d7) arg0=1 
> > > arg1=2
> > > test_sdt  ...   666.255689: sdt_libfoo:table_diddle: (40051a) arg0=3 
> > > arg1=4
> > > test_sdt  ...   666.255692:   sdt_libfoo:table_frob: (4004d7) arg0=2 
> > > arg1=4
> > > test_sdt  ...   666.255694: sdt_libfoo:table_diddle: (40051a) arg0=6 
> > > arg1=8
> > 
> > We'd better start with arg1, since sdt.h and original Dtrace SDT starts
> > arguments from arg1 (I'm not sure why) and dtrace/systemtap scripts
> > call it "arg1".
> > 
> > > 
> > > Signed-off-by: Alexis Berlemont 
> > > ---
> > >  tools/perf/arch/x86/util/perf_regs.c |  18 
> > >  tools/perf/util/perf_regs.c  |   4 +
> > >  tools/perf/util/perf_regs.h  |  13 +++
> > >  tools/perf/util/probe-file.c | 169 
> > > ++-
> > >  4 files changed, 200 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/tools/perf/arch/x86/util/perf_regs.c 
> > > b/tools/perf/arch/x86/util/perf_regs.c
> > > index c5db14f..52a1e65 100644
> > > --- a/tools/perf/arch/x86/util/perf_regs.c
> > > +++ b/tools/perf/arch/x86/util/perf_regs.c
> > > @@ -26,3 +26,21 @@ const struct sample_reg sample_reg_masks[] = {
> > >  #endif
> > >   SMPL_REG_END
> > >  };
> > > +
> > > +const struct sdt_name_reg sdt_reg_renamings[] = {
> > > + SDT_NAME_REG(eax, ax),
> > > + SDT_NAME_REG(rax, ax),
> > > + SDT_NAME_REG(ebx, bx),
> > > + SDT_NAME_REG(rbx, bx),
> > > + SDT_NAME_REG(ecx, cx),
> > > + SDT_NAME_REG(rcx, cx),
> > > + SDT_NAME_REG(edx, dx),
> > > + SDT_NAME_REG(rdx, dx),
> > > + SDT_NAME_REG(esi, si),
> > > + SDT_NAME_REG(rsi, si),
> > > + SDT_NAME_REG(edi, di),
> > > + SDT_NAME_REG(rdi, di),
> > > + SDT_NAME_REG(ebp, bp),
> > > + SDT_NAME_REG(rbp, bp),
> > > + SDT_NAME_REG_END,
> > > +};
> > 
> > It is not enough, rNN registers also have to take care, since
> > gcc adds 'd', 'w' or 'b'suffixes for those registers to indicate
> > its size. e.g. r15d means r15 register with 32 lower bits.
> > What we need is just cut them off, since probe event uses
> > length modifiers (like :u32)
> > 
> > > diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
> > > index c4023f2..1c21150 100644
> > > --- a/tools/perf/util/perf_regs.c
> > > +++ b/tools/perf/util/perf_regs.c
> > > @@ -6,6 +6,10 @@ const struct sample_reg __weak sample_reg_masks[] = {
> > >   SMPL_REG_END
> > >  };
> > >  
> > > +const struct sdt_name_reg __weak sdt_reg_renamings[] = {
> > > + SDT_NAME_REG_END,
> > > +};
> > > +
> > >  #ifdef HAVE_PERF_REGS_SUPPORT
> > >  int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
> > >  {
> > > diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
> > > index 679d6e4..41815ca 100644
> > > --- a/tools/perf/util/perf_regs.h
> > > +++ b/tools/perf/util/perf_regs.h
> > > @@ -15,6 +15,19 @@ struct sample_reg {
> > >  
> > >  extern const struct sample_reg sample_reg_masks[];
> > >  
> > > +struct sdt_name_reg {
> > > + const char *sdt_name;
> > > + const char *uprobe_name;
> > > +};
> > > +#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m}
> > > +#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL}
> > > +
> > > +/*
> > > + * The table sdt_reg_renamings is used for adjusting gcc/gas-generated
> > > + * registers before filling the uprobe tracer interface.
> > > + */
> > > +extern const struct sdt_name_reg sdt_reg_renamings[];
> > > +

Assalamu`Alaikum.

2016-12-10 Thread mohammad ouattala




Dear Sir/Madam.

Assalamu`Alaikum.

I am Dr mohammad ouattara, I have  ($10.6 Million us dollars) to transfer into 
your account,

I will send you more details about this deal and the procedures to follow when 
I receive a positive response from you, 

Have a great day,
Dr mohammad ouattara.

[PATCH] HID: asus: Fix keyboard support

2016-12-10 Thread Brendan McGrath

The previous submission which added Touchpad support broke the
Keyboard support of this driver. This patch:
1. fixes the Keyboard support (by assigning drvdata->input);
2. renames NOTEBOOK_QUIRKS to KEYBOARD_QUIRKS;
3. adds the NO_INIT_REPORT quirk to the KEYBOARD_QUIRKS; and
4. sets the input->name to 'Asus Keyboard' for the keyboard 

Signed-off-by: Brendan McGrath 
---
 drivers/hid/hid-asus.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 96179b2..34a703c 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -64,7 +64,8 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad");
 #define QUIRK_SKIP_INPUT_MAPPING   BIT(2)
 #define QUIRK_IS_MULTITOUCHBIT(3)
 
-#define NOTEBOOK_QUIRKSQUIRK_FIX_NOTEBOOK_REPORT
+#define KEYBOARD_QUIRKS(QUIRK_FIX_NOTEBOOK_REPORT | \
+QUIRK_NO_INIT_REPORTS)
 #define TOUCHPAD_QUIRKS(QUIRK_NO_INIT_REPORTS | \
 QUIRK_SKIP_INPUT_MAPPING | \
 QUIRK_IS_MULTITOUCH)
@@ -170,11 +171,11 @@ static int asus_raw_event(struct hid_device *hdev,
 
 static int asus_input_configured(struct hid_device *hdev, struct hid_input *hi)
 {
+   struct input_dev *input = hi->input;
struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
 
if (drvdata->quirks & QUIRK_IS_MULTITOUCH) {
int ret;
-   struct input_dev *input = hi->input;
 
input_set_abs_params(input, ABS_MT_POSITION_X, 0, MAX_X, 0, 0);
input_set_abs_params(input, ABS_MT_POSITION_Y, 0, MAX_Y, 0, 0);
@@ -191,10 +192,10 @@ static int asus_input_configured(struct hid_device *hdev, 
struct hid_input *hi)
hid_err(hdev, "Asus input mt init slots failed: %d\n", 
ret);
return ret;
}
-
-   drvdata->input = input;
}
 
+   drvdata->input = input;
+
return 0;
 }
 
@@ -286,7 +287,11 @@ static int asus_probe(struct hid_device *hdev, const 
struct hid_device_id *id)
goto err_stop_hw;
}
 
-   drvdata->input->name = "Asus TouchPad";
+   if (drvdata->quirks & QUIRK_IS_MULTITOUCH) {
+   drvdata->input->name = "Asus TouchPad";
+   } else {
+   drvdata->input->name = "Asus Keyboard";
+   }
 
if (drvdata->quirks & QUIRK_IS_MULTITOUCH) {
ret = asus_start_multitouch(hdev);
@@ -315,7 +320,7 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, 
__u8 *rdesc,
 
 static const struct hid_device_id asus_devices[] = {
{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK,
-USB_DEVICE_ID_ASUSTEK_NOTEBOOK_KEYBOARD), NOTEBOOK_QUIRKS},
+USB_DEVICE_ID_ASUSTEK_NOTEBOOK_KEYBOARD), KEYBOARD_QUIRKS},
{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK,
 USB_DEVICE_ID_ASUSTEK_TOUCHPAD), TOUCHPAD_QUIRKS },
{ }
-- 
2.7.4

Re: [PATCH v2 2/2] x86/KASLR/64: Determine kernel text mapping size at runtime

On Fri, Dec 09, 2016 at 10:41:58PM +0800, Baoquan He wrote:
> X86 64 kernel takes KERNEL_IMAGE_SIZE as the kernel text mapping size,
> and it's fixed as compiling time, changing from 512M to 1G as long as
> CONFIG_RANDOMIZE_BASE is enabled, though people specify kernel option
> "nokaslr" explicitly.
> 
> This could be a wrong behaviour.

A bunch of changes just because "this could be a wrong behavior". I'm
not really persuaded.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
--

Re: [PATCH] llist: Clarify comments about when locking is needed

2016-12-10 Thread Mathieu Desnoyers

- On Dec 10, 2016, at 5:13 AM, Joel Fernandes joe...@google.com wrote:

> llist.h comments are a bit confusing about when locking is needed versus when
> it isn't. Clarify these comments a bit more and be a bit more descriptive 
> about
> why locking is needed for llist_del_first.

Could rephrase the last sentence as:

Clarify these comments by being more descriptive about why locking
is needed for llist_del_first.

> 
> Cc: Huang Ying 
> Cc: Ingo Molnar 
> Cc: Will Deacon 
> Cc: Paul McKenney 
> Cc: Mathieu Desnoyers 
> Signed-off-by: Joel Fernandes 
> ---
> include/linux/llist.h | 37 +
> 1 file changed, 21 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/llist.h b/include/linux/llist.h
> index fd4ca0b..31822bb 100644
> --- a/include/linux/llist.h
> +++ b/include/linux/llist.h
> @@ -3,28 +3,33 @@
> /*
>  * Lock-less NULL terminated single linked list
>  *
> - * If there are multiple producers and multiple consumers, llist_add
> - * can be used in producers and llist_del_all can be used in
> - * consumers.  They can work simultaneously without lock.  But
> - * llist_del_first can not be used here.  Because llist_del_first
> - * depends on list->first->next does not changed if list->first is not
> - * changed during its operation, but llist_del_first, llist_add,
> - * llist_add (or llist_del_all, llist_add, llist_add) sequence in
> - * another consumer may violate that.
> - *
> - * If there are multiple producers and one consumer, llist_add can be
> - * used in producers and llist_del_all or llist_del_first can be used
> - * in the consumer.
> - *
> - * This can be summarized as follow:
> + * Cases where locking is not needed:
> + * If there are multiple producers and multiple consumers, llist_add can be
> + * used in producers and llist_del_all can be used in consumers 
> simultaneously
> + * without locking. Also a single consumer can use llist_del_first while
> multiple
> + * producers simultaneously use llist_add, without any locking.
> + *
> + * Cases where locking is needed:
> + * If we have multiple consumers with llist_del_first used in one consumer, 
> and
> + * llist_del_first or llist_del_all used in other consumers, then a lock is
> + * needed.  This is because llist_del_first depends on list->first->next not
> + * changing, but without lock protection, there's no way to be sure about 
> that
> + * if a preemption happens in the middle of the delete operation and on being
> + * preempted back, the list->first is the same as before causing the cmpxchg 
> in
> + * llist_del_first to succeed. For example, while a llist_del_first operation
> + * is in progress in one consumer, then - a llist_del_first, llist_add,

Is the "-" expected in this sentence ?

Other than that,

Acked-by: Mathieu Desnoyers 

> + * llist_add (or llist_del_all, llist_add, llist_add) sequence in another
> + * consumer may cause violations.
> + *
> + * This can be summarized as follows:
>  *
>  *   |   add| del_first |  del_all
>  * add   |- | - | -
>  * del_first |  | L | L
>  * del_all   |  |   | -
>  *
> - * Where "-" stands for no lock is needed, while "L" stands for lock
> - * is needed.
> + * Where, a particular row's operation can happen concurrently with a 
> column's
> + * operation, with "-" being no lock needed, while "L" being lock is needed.
>  *
>  * The list entries deleted via llist_del_all can be traversed with
>  * traversing function such as llist_for_each etc.  But the list
> --
> 2.8.0.rc3.226.g39d4020

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

[PATCH] ARM: add cmpxchg64 helper for ARMv7-M

2016-12-10 Thread Arnd Bergmann

A change to the netfilter code in net-next introduced the first caller of
cmpxchg64 that can get built on ARMv7-M, leading to an error from the
assembler that points out the lack of 64-bit atomics on this architecture:

/tmp/ccMe7djj.s: Assembler messages:
/tmp/ccMe7djj.s:367: Error: selected processor does not support `ldrexd 
r0,r1,[lr]' in Thumb mode
/tmp/ccMe7djj.s:371: Error: selected processor does not support `strexd 
ip,r2,r3,[lr]' in Thumb mode
/tmp/ccMe7djj.s:389: Error: selected processor does not support `ldrexd 
r8,r9,[r7]' in Thumb mode
/tmp/ccMe7djj.s:393: Error: selected processor does not support `strexd 
lr,r0,r1,[r7]' in Thumb mode
scripts/Makefile.build:299: recipe for target 'net/netfilter/nft_counter.o' 
failed

This makes ARMv7-M use the same emulation from asm-generic/cmpxchg-local.h
that we use on architectures earlier than ARMv6K, to fix the build. The
32-bit atomics are available on ARMv7-M and we keep using them there.
This ARM specific change is probably something we should do regardless
of the netfilter code.

However, looking at the new nft_counter_reset() function in nft_counter.c,
this looks incorrect to me not just on ARMv7-M but also on other
architectures, with at least the following possible race:

CPU A   CPU B
u64_stats_fetch_begin_irq
u64_stats_update_begin
fetch(upper 32 bits)
fetch(old)
cmpxchg64(counter, old, 0);
fetch(lower 32 bits)
u64_stats_fetch_retry_irq == true
store(upper 32 bits)
fetch(old)
cmpxchg64(counter, old, 0);
store(lower 32 bits)
u64_stats_update_end
u64_stats_fetch_retry_irq == true
fetch(old)
cmpxchg64(counter, old, 0);
u64_stats_fetch_retry_irq == false

In this example, the data returned by __nft_counter_reset() is zero
as we overwrite the per-cpu counter value during the retries.

Fixes: 43da04a593d8 ("netfilter: nf_tables: atomic dump and reset for stateful 
objects")
Signed-off-by: Arnd Bergmann 
---
 arch/arm/include/asm/cmpxchg.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index 97882f9bad12..12215515ba02 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -240,6 +240,7 @@ static inline unsigned long __cmpxchg_local(volatile void 
*ptr,
sizeof(*(ptr)));\
 })
 
+#ifndef CONFIG_CPU_V7M
 static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
 unsigned long long old,
 unsigned long long new)
@@ -273,6 +274,18 @@ static inline unsigned long long __cmpxchg64(unsigned long 
long *ptr,
 
 #define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n))
 
+#else
+
+/* ARMv7-M has 32-bit ldrex/strex but no ldrexd/strexd */
+
+#define cmpxchg64(ptr, o, n)   __cmpxchg64_local_generic((ptr), (o), 
(n))
+#define cmpxchg64_relaxed(ptr, o, n)   __cmpxchg64_local_generic((ptr), (o), 
(n))
+#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), 
(n))
+
+#include 
+
+#endif
+
 #endif /* __LINUX_ARM_ARCH__ >= 6 */
 
 #endif /* __ASM_ARM_CMPXCHG_H */
-- 
2.9.0

[PATCH net-next] net: mvneta: select GENERIC_ALLOCATOR

2016-12-10 Thread Arnd Bergmann

We previously relied on GENERIC_ALLOCATOR to be selected by CONFIG_ARM,
but now we can compile-test the driver on other architectures that
don't select it:

drivers/net/built-in.o: In function `mvneta_bm_remove':
mvneta_bm.c:(.text+0x4ee35): undefined reference to `gen_pool_free'

This adds an explicit select for the part of the driver that has
the dependency.

Fixes: a0627f776a45 ("net: marvell: Allow drivers to be built with 
COMPILE_TEST")
Signed-off-by: Arnd Bergmann 
---
 drivers/net/ethernet/marvell/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/Kconfig 
b/drivers/net/ethernet/marvell/Kconfig
index 3b8f11fe5e13..f4b7cf18fb0f 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -76,6 +76,7 @@ config MVNETA_BM
default y if MVNETA=y && MVNETA_BM_ENABLE!=n
default MVNETA_BM_ENABLE
select HWBM
+   select GENERIC_ALLOCATOR
help
  MVNETA_BM must not be 'm' if MVNETA=y, so this symbol ensures
  that all dependencies are met.
-- 
2.9.0

Re: [patch] nvme-fabrics: correct some printk information

2016-12-10 Thread Joe Perches

On Sat, 2016-12-10 at 12:06 +0300, Dan Carpenter wrote:
> We really don't care where "ctrl" is on the stack since we're just
> returning soon what we want is the actual ctrl pointer itself.
> 
> Signed-off-by: Dan Carpenter 
> 
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
[]
> @@ -2402,7 +2402,7 @@ enum blk_eh_timer_return
>  
>   dev_info(ctrl->ctrl.device,
>   "NVME-FC{%d}: new ctrl: NQN \"%s\" (%p)\n",
> - ctrl->cnum, ctrl->ctrl.opts->subsysnqn, &ctrl);
> + ctrl->cnum, ctrl->ctrl.opts->subsysnqn, ctrl);

Found by script or inspection?

If by script, it seems unlikely there's only 1 instance
where an address of an automatic pointer type is used
incorrectly.

[RFC PATCH] arm64: make WANT_HUGE_PMD_SHARE depends on HUGETLB_PAGE

2016-12-10 Thread zhongjiang

From: zhong jiang 

when HUGETLB_PAGE is disable, WANT_HUGE_PMD_SHARE contains the
fuctions should not be use. therefore, we add the dependency.

Signed-off-by: zhong jiang 
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 969ef88..694ca73 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -640,6 +640,7 @@ config SYS_SUPPORTS_HUGETLBFS
 
 config ARCH_WANT_HUGE_PMD_SHARE
def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
+   depends on HUGETLB_PAGE
 
 config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
-- 
1.8.3.1

Re: [PATCH 4.8 00/45] 4.8.14-stable review

On Fri, Dec 09, 2016 at 11:24:22AM -0700, Shuah Khan wrote:
> On 12/09/2016 09:20 AM, Greg Kroah-Hartman wrote:
> > This is the start of the stable review cycle for the 4.8.14 release.
> > There are 45 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> > 
> > Responses should be made by Sun Dec 11 16:17:38 UTC 2016.
> > Anything received after that time might be too late.
> > 
> > The whole patch series can be found in one patch at:
> > kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.8.14-rc1.gz
> > or in the git tree and branch at:
> >   git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> > linux-4.8.y
> > and the diffstat can be found below.
> > 
> > thanks,
> > 
> > greg k-h
> > 
> 
> Compiled and booted on my test system. No dmesg regressions.

Thanks for testing both of these and letting me know.

greg k-h

Re: [PATCH 4.8 00/45] 4.8.14-stable review

On Fri, Dec 09, 2016 at 02:36:38PM -0800, Guenter Roeck wrote:
> On Fri, Dec 09, 2016 at 05:20:29PM +0100, Greg Kroah-Hartman wrote:
> > This is the start of the stable review cycle for the 4.8.14 release.
> > There are 45 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> > 
> > Responses should be made by Sun Dec 11 16:17:38 UTC 2016.
> > Anything received after that time might be too late.
> > 
> Build results:
>   total: 149 pass: 149 fail: 0
> Qemu test results:
>   total: 122 pass: 122 fail: 0
> 
> Details are available at http://kerneltests.org/builders.

Thanks for testing all of these and letting me know.

greg k-h

Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers

2016-12-10 Thread Greg KH

On Fri, Dec 09, 2016 at 04:21:48PM -0800, Stephen Hemminger wrote:
> On Fri, 9 Dec 2016 22:35:05 +
> Haiyang Zhang  wrote:
> 
> > > > >
> > > > > Emulated NIC is already excluded in start of netvc notifier handler.
> > > > >
> > > > > static int netvsc_netdev_event(struct notifier_block *this,
> > > > >  unsigned long event, void *ptr)
> > > > > {
> > > > >   struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
> > > > >
> > > > >   /* Skip our own events */
> > > > >   if (event_dev->netdev_ops == &device_ops)
> > > > >   return NOTIFY_DONE;
> > > > >  
> > > >
> > > > Emulated device is not based on netvsc. It's the native Linux  
> > > (dec100M?)  
> > > > Driver. So this line doesn't exclude it. And how about other NIC type
> > > > may be added in the future?  
> > > 
> > > Sorry, forgot about that haven't used emulated device in years.
> > > The emulated device should appear to be on a PCI bus, but the serial
> > > would not match??  
> > 
> > It's not a vmbus device, not a hv_pci device either. Hv_PCI is a subset
> > of vmbus devices. So emulated NIC won't have hv_pci serial number.
> > 
> > In my patch, the following code ensure, we only try to get serial number
> > after confirming it's vmbus and hv_pci device:
> > 
> > +   if (!dev_is_vmbus(dev))
> > +   continue;
> > +
> > +   hdev = device_to_hv_device(dev);
> > +   if (hdev->device_id != HV_PCIE)
> > +   continue;
> 
> Ok, the walk back up the device tree is logically ok, but I don't
> know enough about PCI device tree to be assured that it is safe.
> Also, you could short circuit away most of the unwanted devices
> by making sure the vf_netdev->dev.parent is a PCI device.

Ugh, this seems really really messy.  Can't we just have the
netdev_event interface pass back a pointer to something that we "know"
what it is?  This walking the device tree is a mess, and not good.

I'd even argue that dev_is_pci() needs to be removed from the tree too,
as it shouldn't be needed either.  We did a lot of work on the driver
model to prevent the need for having to declare the "type" of 'struct
device' at all, and by doing this type of thing it goes against the
basic design of the model.

Yes, it makes things a bit "tougher" in places, but you don't do crazy
things like walk device trees to try to find random devices and then
think it's safe to actually use them :(

thanks,

greg k-h

[PATCH 1/5] net: ethernet: ti: cpsw: use same macros to get active slave

Use the same, more convenient macros, to get active slave.

Signed-off-by: Ivan Khoronzhuk 
---
 drivers/net/ethernet/ti/cpsw.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index b62d958..c45f7d2 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1624,10 +1624,7 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
struct cpsw_common *cpsw = priv->cpsw;
u32 ctrl, mtype;
 
-   if (cpsw->data.dual_emac)
-   slave = &cpsw->slaves[priv->emac_port];
-   else
-   slave = &cpsw->slaves[cpsw->data.active_slave];
+   slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
 
ctrl = slave_read(slave, CPSW2_CONTROL);
switch (cpsw->version) {
-- 
2.7.4

[PATCH 5/5] net: ethernet: ti: cpsw: sync rates for channels in dual emac mode

The channels are common for both ndevs in dual emac mode. Hence, keep
in sync their rates.

Signed-off-by: Ivan Khoronzhuk 
---
 drivers/net/ethernet/ti/cpsw.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 7ccfa63..b203143 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2020,9 +2020,10 @@ static int cpsw_ndo_set_tx_maxrate(struct net_device 
*ndev, int queue, u32 rate)
 {
struct cpsw_priv *priv = netdev_priv(ndev);
struct cpsw_common *cpsw = priv->cpsw;
+   struct cpsw_slave *slave;
u32 min_rate;
u32 ch_rate;
-   int ret;
+   int i, ret;
 
ch_rate = netdev_get_tx_queue(ndev, queue)->tx_maxrate;
if (ch_rate == rate)
@@ -2053,6 +2054,15 @@ static int cpsw_ndo_set_tx_maxrate(struct net_device 
*ndev, int queue, u32 rate)
if (ret)
return ret;
 
+   /* update rates for slaves tx queues */
+   for (i = 0; i < cpsw->data.slaves; i++) {
+   slave = &cpsw->slaves[i];
+   if (!slave->ndev)
+   continue;
+
+   netdev_get_tx_queue(slave->ndev, queue)->tx_maxrate = rate;
+   }
+
cpsw_split_res(ndev);
return ret;
 }
-- 
2.7.4

[PATCH 4/5] net: ethernet: ti: cpsw: re-split res only when speed is changed

Don't re-split res in the following cases:
- speed of phys is not changed
- speed of phys is changed and no rate limited channels
- speed of phys is changed and all channels are rate limited
- phy is unlinked while dev is open
- phy is linked back but speed is not changed

The maximum speed is sum of "linked" phys, thus res are split taken
in account two interfaces, both for dual emac mode and for
switch mode.

Signed-off-by: Ivan Khoronzhuk 
---
 drivers/net/ethernet/ti/cpsw.c | 64 ++
 1 file changed, 59 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index a2c2c06..7ccfa63 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -394,6 +394,7 @@ struct cpsw_common {
u32 irqs_table[IRQ_NUM];
struct cpts *cpts;
int rx_ch_num, tx_ch_num;
+   int speed;
 };
 
 struct cpsw_priv {
@@ -761,7 +762,6 @@ static void cpsw_split_res(struct net_device *ndev)
struct cpsw_vector *txv = cpsw->txv;
int i, ch_weight, rlim_ch_num = 0;
int budget, bigest_rate_ch = 0;
-   struct cpsw_slave *slave;
u32 ch_rate, max_rate;
int ch_budget = 0;
 
@@ -781,8 +781,16 @@ static void cpsw_split_res(struct net_device *ndev)
bigest_rate = 0;
max_rate = consumed_rate;
} else {
-   slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
-   max_rate = slave->phy->speed * 1000;
+   max_rate = cpsw->speed * 1000;
+
+   /* if max_rate is less then expected due to reduced link speed,
+* split proportionally according next potential max speed
+*/
+   if (max_rate < consumed_rate)
+   max_rate *= 10;
+
+   if (max_rate < consumed_rate)
+   max_rate *= 10;
 
ch_budget = (consumed_rate * CPSW_POLL_WEIGHT) / max_rate;
ch_budget = (CPSW_POLL_WEIGHT - ch_budget) /
@@ -1013,15 +1021,56 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
slave->mac_control = mac_control;
 }
 
+static int cpsw_get_common_speed(struct cpsw_common *cpsw)
+{
+   int i, speed;
+
+   for (i = 0, speed = 0; i < cpsw->data.slaves; i++)
+   if (cpsw->slaves[i].phy && cpsw->slaves[i].phy->link)
+   speed += cpsw->slaves[i].phy->speed;
+
+   return speed;
+}
+
+static int cpsw_need_resplit(struct cpsw_common *cpsw)
+{
+   int i, rlim_ch_num;
+   int speed, ch_rate;
+
+   /* re-split resources only in case speed was changed */
+   speed = cpsw_get_common_speed(cpsw);
+   if (speed == cpsw->speed || !speed)
+   return 0;
+
+   cpsw->speed = speed;
+
+   for (i = 0, rlim_ch_num = 0; i < cpsw->tx_ch_num; i++) {
+   ch_rate = cpdma_chan_get_rate(cpsw->txv[i].ch);
+   if (!ch_rate)
+   break;
+
+   rlim_ch_num++;
+   }
+
+   /* cases not dependent on speed */
+   if (!rlim_ch_num || rlim_ch_num == cpsw->tx_ch_num)
+   return 0;
+
+   return 1;
+}
+
 static void cpsw_adjust_link(struct net_device *ndev)
 {
struct cpsw_priv*priv = netdev_priv(ndev);
+   struct cpsw_common  *cpsw = priv->cpsw;
boollink = false;
 
for_each_slave(priv, _cpsw_adjust_link, priv, &link);
 
if (link) {
-   cpsw_split_res(priv->ndev);
+   if (cpsw_need_resplit(cpsw))
+   cpsw_split_res(ndev);
+
netif_carrier_on(ndev);
if (netif_running(ndev))
netif_tx_wake_all_queues(ndev);
@@ -1538,6 +1587,10 @@ static int cpsw_ndo_stop(struct net_device *ndev)
cpsw_ale_stop(cpsw->ale);
}
for_each_slave(priv, cpsw_slave_stop, cpsw);
+
+   if (cpsw_need_resplit(cpsw))
+   cpsw_split_res(ndev);
+
pm_runtime_put_sync(cpsw->dev);
if (cpsw->data.dual_emac)
cpsw->slaves[priv->emac_port].open_stat = false;
@@ -1983,7 +2036,7 @@ static int cpsw_ndo_set_tx_maxrate(struct net_device 
*ndev, int queue, u32 rate)
return -EINVAL;
}
 
-   if (rate > 2000) {
+   if (rate > cpsw->speed) {
dev_err(priv->dev, "The channel rate cannot be more than 
2Gbps");
return -EINVAL;
}
@@ -2998,6 +3051,7 @@ static int cpsw_probe(struct platform_device *pdev)
ndev->ethtool_ops = &cpsw_ethtool_ops;
netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
+   cpsw_split_res(ndev);
 
/* register the network device */
SET_NETDEV_DEV(ndev, &pdev->dev);
-- 
2.7.4

[PATCH 3/5] net: ethernet: ti: cpsw: combine budget and weight split and check

Re-split weight along with budget. It simplify code a little
and update state after every rate change. Also it's necessarily
to move arguments checks to this combined function. Replace
maximum rate check for an interface on maximum possible rate.

Signed-off-by: Ivan Khoronzhuk 
---
 drivers/net/ethernet/ti/cpsw.c | 107 +
 1 file changed, 34 insertions(+), 73 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 23213a3..a2c2c06 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -753,27 +753,18 @@ static void cpsw_rx_handler(void *token, int len, int 
status)
dev_kfree_skb_any(new_skb);
 }
 
-/* split budget depending on channel rates */
-static void cpsw_split_budget(struct net_device *ndev)
+static void cpsw_split_res(struct net_device *ndev)
 {
struct cpsw_priv *priv = netdev_priv(ndev);
+   u32 consumed_rate = 0, bigest_rate = 0;
struct cpsw_common *cpsw = priv->cpsw;
struct cpsw_vector *txv = cpsw->txv;
-   u32 consumed_rate, bigest_rate = 0;
+   int i, ch_weight, rlim_ch_num = 0;
int budget, bigest_rate_ch = 0;
struct cpsw_slave *slave;
-   int i, rlim_ch_num = 0;
u32 ch_rate, max_rate;
int ch_budget = 0;
 
-   if (cpsw->data.dual_emac)
-   slave = &cpsw->slaves[priv->emac_port];
-   else
-   slave = &cpsw->slaves[cpsw->data.active_slave];
-
-   max_rate = slave->phy->speed * 1000;
-
-   consumed_rate = 0;
for (i = 0; i < cpsw->tx_ch_num; i++) {
ch_rate = cpdma_chan_get_rate(txv[i].ch);
if (!ch_rate)
@@ -785,7 +776,14 @@ static void cpsw_split_budget(struct net_device *ndev)
 
if (cpsw->tx_ch_num == rlim_ch_num) {
max_rate = consumed_rate;
+   } else if (!rlim_ch_num) {
+   ch_budget = CPSW_POLL_WEIGHT / cpsw->tx_ch_num;
+   bigest_rate = 0;
+   max_rate = consumed_rate;
} else {
+   slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+   max_rate = slave->phy->speed * 1000;
+
ch_budget = (consumed_rate * CPSW_POLL_WEIGHT) / max_rate;
ch_budget = (CPSW_POLL_WEIGHT - ch_budget) /
(cpsw->tx_ch_num - rlim_ch_num);
@@ -793,22 +791,28 @@ static void cpsw_split_budget(struct net_device *ndev)
  (cpsw->tx_ch_num - rlim_ch_num);
}
 
-   /* split tx budget */
+   /* split tx weight/budget */
budget = CPSW_POLL_WEIGHT;
for (i = 0; i < cpsw->tx_ch_num; i++) {
ch_rate = cpdma_chan_get_rate(txv[i].ch);
if (ch_rate) {
txv[i].budget = (ch_rate * CPSW_POLL_WEIGHT) / max_rate;
if (!txv[i].budget)
-   txv[i].budget = 1;
+   txv[i].budget++;
if (ch_rate > bigest_rate) {
bigest_rate_ch = i;
bigest_rate = ch_rate;
}
+
+   ch_weight = (ch_rate * 100) / max_rate;
+   if (!ch_weight)
+   ch_weight++;
+   cpdma_chan_set_weight(cpsw->txv[i].ch, ch_weight);
} else {
txv[i].budget = ch_budget;
if (!bigest_rate_ch)
bigest_rate_ch = i;
+   cpdma_chan_set_weight(cpsw->txv[i].ch, 0);
}
 
budget -= txv[i].budget;
@@ -1017,7 +1021,7 @@ static void cpsw_adjust_link(struct net_device *ndev)
for_each_slave(priv, _cpsw_adjust_link, priv, &link);
 
if (link) {
-   cpsw_split_budget(priv->ndev);
+   cpsw_split_res(priv->ndev);
netif_carrier_on(ndev);
if (netif_running(ndev))
netif_tx_wake_all_queues(ndev);
@@ -1962,64 +1966,25 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device 
*ndev,
 static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 
rate)
 {
struct cpsw_priv *priv = netdev_priv(ndev);
-   int tx_ch_num = ndev->real_num_tx_queues;
-   u32 consumed_rate, min_rate, max_rate;
struct cpsw_common *cpsw = priv->cpsw;
-   struct cpsw_slave *slave;
-   int ret, i, weight;
-   int rlim_num = 0;
+   u32 min_rate;
u32 ch_rate;
+   int ret;
 
ch_rate = netdev_get_tx_queue(ndev, queue)->tx_maxrate;
if (ch_rate == rate)
return 0;
 
-   if (cpsw->data.dual_emac)
-   slave = &cpsw->slaves[priv->emac_port];
-   else
-   slave = &cpsw->slaves[cpsw->data.active_slave];
-   max_rate = slave->phy->speed;
-
-   consumed_rate = 0;
-   for (i =

[PATCH 1/5] net: ethernet: ti: cpsw: improve re-split policy

This patches add several simplifications and improvements to set
maximum rate for channels taking in account switch and dual emac mode.

Don't re-split res in the following cases:
- speed of phys is not changed
- speed of phys is changed and no rate limited channels
- speed of phys is changed and all channels are rate limited
- phy is unlinked while dev is open
- phy is linked back but speed is not changed

The maximum speed is sum of "linked" phys, thus res are split taken
into account two interfaces, both for dual emac mode and for
switch mode.

Tested on am572x

Based on net-next/master

Ivan Khoronzhuk (5):
  net: ethernet: ti: cpsw: use same macros to get active slave
  net: ethernet: ti: cpsw: don't start queue twice
  net: ethernet: ti: cpsw: combine budget and weight split and check
  net: ethernet: ti: cpsw: re-split res only when speed is changed
  net: ethernet: ti: cpsw: sync rates for channels in dual emac mode

 drivers/net/ethernet/ti/cpsw.c | 178 +++--
 1 file changed, 99 insertions(+), 79 deletions(-)

-- 
2.7.4

Re: Misalignment, MIPS, and ip_hdr(skb)->version

2016-12-10 Thread Felix Fietkau

On 2016-12-07 19:54, Jason A. Donenfeld wrote:
> On Wed, Dec 7, 2016 at 7:51 PM, David Miller  wrote:
>> It's so much better to analyze properly where the misalignment comes from
>> and address it at the source, as we have for various cases that trip up
>> Sparc too.
> 
> That's sort of my attitude too, hence starting this thread. Any
> pointers you have about this would be most welcome, so as not to
> perpetuate what already seems like an issue in other parts of the
> stack.
Hi Jason,

I'm the author of that hackish LEDE/OpenWrt patch that works around the
misalignment issues. Here's some context regarding that patch:

I intentionally put it in the target specific patches for only one of
our MIPS targets. There are a few ar71xx devices where the misalignment
cannot be fixed, because the Ethernet MAC has a 4-byte DMA alignment
requirement, and does not support inserting 2 bytes of padding to
correct the IP header misalignment.

With these limitations the choice was between this ugly network stack
patch or inserting a very expensive memmove in the data path (which is
better than taking the mis-alignment traps, but still hurts routing
performance significantly).

There are a lot of places in the network stack that assume full 32 bit
alignment, and you only get to see those once you start using more of
netfilter, play with various tunnel encapsulations, etc.

I think you have 3 options to deal with this properly:
1. add 3 bytes of padding
2. allocate a separate skb for decryption (might be more expensive)
3. save the header and decrypt to the start of the packet data
(overwriting the misaligned header).

I'm not sure what the performance impact of 2 and 3 is, so it's probably
best to stick with the padding.

I've taken a quick look at the wireguard message headers, and my
recommendation would be to insert the 3-byte padding in struct
message_header and remove __packed from your structs.
This will also remove misaligment of your own protocol fields.

- Felix

[PATCH 2/5] net: ethernet: ti: cpsw: don't start queue twice

No need to start queues after cpsw is started as it will be done
while cpsw_adjust_link(), after phy connection.

Signed-off-by: Ivan Khoronzhuk 
---
 drivers/net/ethernet/ti/cpsw.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c45f7d2..23213a3 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1506,8 +1506,6 @@ static int cpsw_ndo_open(struct net_device *ndev)
if (cpsw->data.dual_emac)
cpsw->slaves[priv->emac_port].open_stat = true;
 
-   netif_tx_start_all_queues(ndev);
-
return 0;
 
 err_cleanup:
-- 
2.7.4

Re: [PATCH v2 2/2] x86/KASLR/64: Determine kernel text mapping size at runtime

2016-12-10 Thread Baoquan He

On 12/10/16 at 11:31am, Borislav Petkov wrote:
> On Fri, Dec 09, 2016 at 10:41:58PM +0800, Baoquan He wrote:
> > X86 64 kernel takes KERNEL_IMAGE_SIZE as the kernel text mapping size,
> > and it's fixed as compiling time, changing from 512M to 1G as long as
> > CONFIG_RANDOMIZE_BASE is enabled, though people specify kernel option
> > "nokaslr" explicitly.
> > 
> > This could be a wrong behaviour.
> 
> A bunch of changes just because "this could be a wrong behavior". I'm
> not really persuaded.

Well, then apologize for this wrong expression. It should be "This is
a wrong behaviour."

Whether CONFIG_RANDOMIZE_BASE is yes or not, with 'nokaslr' specified,
Kernel text mapping size should be 512M, just the same as no kaslr code
compiled in.

Thanks
Baoquan

Re: [PATCH] ARM: add cmpxchg64 helper for ARMv7-M

2016-12-10 Thread Pablo Neira Ayuso

Hi Arnd,

On Sat, Dec 10, 2016 at 11:36:34AM +0100, Arnd Bergmann wrote:
> A change to the netfilter code in net-next introduced the first caller of
> cmpxchg64 that can get built on ARMv7-M, leading to an error from the
> assembler that points out the lack of 64-bit atomics on this architecture:
> 
> /tmp/ccMe7djj.s: Assembler messages:
> /tmp/ccMe7djj.s:367: Error: selected processor does not support `ldrexd 
> r0,r1,[lr]' in Thumb mode
> /tmp/ccMe7djj.s:371: Error: selected processor does not support `strexd 
> ip,r2,r3,[lr]' in Thumb mode
> /tmp/ccMe7djj.s:389: Error: selected processor does not support `ldrexd 
> r8,r9,[r7]' in Thumb mode
> /tmp/ccMe7djj.s:393: Error: selected processor does not support `strexd 
> lr,r0,r1,[r7]' in Thumb mode
> scripts/Makefile.build:299: recipe for target 'net/netfilter/nft_counter.o' 
> failed
> 
> This makes ARMv7-M use the same emulation from asm-generic/cmpxchg-local.h
> that we use on architectures earlier than ARMv6K, to fix the build. The
> 32-bit atomics are available on ARMv7-M and we keep using them there.
> This ARM specific change is probably something we should do regardless
> of the netfilter code.
> 
> However, looking at the new nft_counter_reset() function in nft_counter.c,
> this looks incorrect to me not just on ARMv7-M but also on other
> architectures, with at least the following possible race:

Right, Eric Dumazet already spotted this problem. I'm preparing a
patch that doesn't require cmpxchg64(). Will keep you on Cc. Thanks.

Re: [PATCH v2 2/2] x86/KASLR/64: Determine kernel text mapping size at runtime

On Sat, Dec 10, 2016 at 08:27:57PM +0800, Baoquan He wrote:
> Whether CONFIG_RANDOMIZE_BASE is yes or not, with 'nokaslr' specified,
> Kernel text mapping size should be 512M, just the same as no kaslr code
> compiled in.

"should be" still doesn't really explain what the problem is. What's
wrong with it remaining 1G?

IOW, something like "The problem is X and the issues it causes are Y.
That's why we need to do Z."

Now please replace X,Y and Z with the real content.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
--

Re: [kernel-hardening] [PATCH] siphash: add cryptographically secure hashtable function

2016-12-10 Thread Greg KH

On Fri, Dec 09, 2016 at 07:36:59PM +0100, Jason A. Donenfeld wrote:
> SipHash is a 64-bit keyed hash function that is actually a
> cryptographically secure PRF, like HMAC. Except SipHash is super fast,
> and is meant to be used as a hashtable keyed lookup function.
> 
> SipHash isn't just some new trendy hash function. It's been around for a
> while, and there really isn't anything that comes remotely close to
> being useful in the way SipHash is. With that said, why do we need this?
> 
> There are a variety of attacks known as "hashtable poisoning" in which an
> attacker forms some data such that the hash of that data will be the
> same, and then preceeds to fill up all entries of a hashbucket. This is
> a realistic and well-known denial-of-service vector.
> 
> Linux developers already seem to be aware that this is an issue, and
> various places that use hash tables in, say, a network context, use a
> non-cryptographically secure function (usually jhash) and then try to
> twiddle with the key on a time basis (or in many cases just do nothing
> and hope that nobody notices). While this is an admirable attempt at
> solving the problem, it doesn't actually fix it. SipHash fixes it.
> 
> (It fixes it in such a sound way that you could even build a stream
> cipher out of SipHash that would resist the modern cryptanalysis.)
> 
> There are a modicum of places in the kernel that are vulnerable to
> hashtable poisoning attacks, either via userspace vectors or network
> vectors, and there's not a reliable mechanism inside the kernel at the
> moment to fix it. The first step toward fixing these issues is actually
> getting a secure primitive into the kernel for developers to use. Then
> we can, bit by bit, port things over to it as deemed appropriate.
> 
> Dozens of languages are already using this internally for their hash
> tables. Some of the BSDs already use this in their kernels. SipHash is
> a widely known high-speed solution to a widely known problem, and it's
> time we catch-up.
> 
> Signed-off-by: Jason A. Donenfeld 
> Cc: Jean-Philippe Aumasson 
> Cc: Daniel J. Bernstein 
> ---
>  include/linux/siphash.h |  18 ++
>  lib/Makefile|   3 +-
>  lib/siphash.c   | 163 
> 
>  3 files changed, 183 insertions(+), 1 deletion(-)
>  create mode 100644 include/linux/siphash.h
>  create mode 100644 lib/siphash.c

This looks really nice, but we don't usually add stuff into lib/ unless
there is an actual user of the code :)

Have you tried converting any of the existing hash users to use this
instead?  If you did that, and it shows a solution for the known
problems with our existing hashes (as you point out above), I doubt
there would be any objection for this patch at all.

Minor coding style nits below:

> @@ -0,0 +1,18 @@
> +/* Copyright (C) 2016 Jason A. Donenfeld 
> + *
> + * SipHash: a fast short-input PRF
> + * https://131002.net/siphash/
> + */
> +
> +#ifndef _LINUX_SIPHASH_H
> +#define _LINUX_SIPHASH_H
> +
> +#include 
> +
> +enum siphash24_lengths {
> + SIPHASH24_KEY_LEN = 16
> +};
> +
> +uint64_t siphash24(const uint8_t *data, size_t len, const uint8_t 
> key[SIPHASH24_KEY_LEN]);

Please use u64 and u8 instead of the userspace uint64_t and uint8_t
types for kernel code.  Yes, the ship has probably sailed for trying to
strictly enforce it, but it's a good idea to do where ever possible.

> +
> +#endif /* _LINUX_SIPHASH_H */
> diff --git a/lib/Makefile b/lib/Makefile
> index 50144a3aeebd..d224337b0d01 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -22,7 +22,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
>sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
>flex_proportions.o ratelimit.o show_mem.o \
>is_single_threaded.o plist.o decompress.o kobject_uevent.o \
> -  earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
> +  earlycpio.o seq_buf.o siphash.o \
> +  nmi_backtrace.o nodemask.o win_minmax.o
>  
>  lib-$(CONFIG_MMU) += ioremap.o
>  lib-$(CONFIG_SMP) += cpumask.o
> diff --git a/lib/siphash.c b/lib/siphash.c
> new file mode 100644
> index ..022d86f04b9b
> --- /dev/null
> +++ b/lib/siphash.c
> @@ -0,0 +1,163 @@
> +/* Copyright (C) 2015-2016 Jason A. Donenfeld 
> + * Copyright (C) 2012-2014 Jean-Philippe Aumasson 
> 
> + * Copyright (C) 2012-2014 Daniel J. Bernstein 
> + *
> + * SipHash: a fast short-input PRF
> + * https://131002.net/siphash/
> + */

Any specific license for this code?  It's good to at the least say what
it is.  Yes, we know it will default to GPLv2 only as part of the whole
kernel tree, but it's good to be explicit for when someone wants to copy
this code for their own projects...

> +
> +#include 
> +#include 
> +#include 
> +
> +#define ROTL(x,b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b

Don't we have this in kernel.h somewhere?  Ah, yeah, it's rol64() in
bitops.h, no need to define it again please.

> +#define U8TO64(p) le64_to_cpu(*(__le64

Re: [PATCH] driver core: flush async calls before testing driver removal

2016-12-10 Thread Vladimir Zapolskiy

Hello Greg,

On 12/10/2016 09:32 AM, Greg Kroah-Hartman wrote:
> On Sat, Dec 10, 2016 at 02:15:19AM +0200, Vladimir Zapolskiy wrote:
>> If CONFIG_DEBUG_TEST_DRIVER_REMOVE option is enabled a number of false
>> positives are reported for ATA controller drivers, because ATA port
>> probes are done asynchronously, and the same problem may also touch
>> other asynchronously probed drivers.
>>
>> To reduce the rate of false reports on boot call async_synchronize_full()
>> before attempting to remove a driver, the same is done in delete_module()
>> syscall for all possible drivers and in __device_release_driver() function
>> for asynchronously probed drivers.
> 
> __device_release_driver() already calls this function, why call it
> again?
> 

__device_release_driver() is not called on test removal of drivers, if
CONFIG_DEBUG_TEST_DRIVER_REMOVE option is enabled.

This opens a possibility to races like one I've discovered:

  https://www.spinics.net/lists/linux-ide/msg53473.html

Next, async_synchronize_full() from __device_release_driver() is not
called in case of removal of ATA controller drivers, because
driver_allows_async_probing(drv) return value is false.

--
With best wishes,
Vladimir

Re: [PATCH] x86/kbuild: enable modversions for symbols exported from asm

On Fri, Dec 09, 2016 at 11:46:54PM +0100, Dodji Seketeli wrote:
> Hello,
> 
> Nicholas Piggin  a écrit:
> 
> [...]
> 
> > That said, a dwarf based checker tool should be able to do as good a job
> > (maybe a bit better because report is very informative and it may pick up
> > compiler alignments or padding options).
> 
> So, Nicholas was kind enough to send me the two Linux Kernel binaries
> that he built with the tiny little interface change that we were
> discussing earlier.  Here is what the abidiff[1] tools says about that
> interface change:
> 
> $ time ~/git/libabigail/kabidiff/build/tools/abidiff vmlinux.abi1.abi 
> vmlinux.abi2.abi
> Functions changes summary: 0 Removed, 1 Changed, 0 Added function
> Variables changes summary: 0 Removed, 0 Changed, 0 Added variable
> 
> 1 function with some indirect sub-type change:
> 
>   [C]'function int foo(blah*)' at memory.c:82:1 has some indirect 
> sub-type changes:
> parameter 1 of type 'blah*' has sub-type changes:
>   in pointed to type 'struct blah' at memory.c:78:1:
> type size changed from 32 to 64 bits
> 1 data member insertion:
>   'int blah::y', at offset 0 (in bits) at memory.c:79:1
> 1 data member change:
>  'int blah::x' offset changed from 0 to 32 (in bits) (by +32 bits)
> 
> 
> 
> real  0m2.595s
> user  0m2.489s
> sys   0m0.108s
> $ 
> 
> I kept the timing information to give you an idea of the time it takes
> on a non-optimized build of abidiff.
> 
> One could for instance want that types that are not defined in header
> files be kept out of the change report.  In that case it's possible to
> write a little suppression specification file like this one:
> 
> $ cat vmlinux.abignore 
> [suppress_type]
>   source_location_not_regexp = .*\\.h
> $
> 
> You can then pass that suppression file to the tool:
> 
> $ ~/git/libabigail/kabidiff/build/tools/abidiff --suppr vmlinux.abignore 
> vmlinux.abi1.abi vmlinux.abi2.abi
> Functions changes summary: 0 Removed, 0 Changed (1 filtered out), 0 Added 
> function
> Variables changes summary: 0 Removed, 0 Changed, 0 Added variable
> 
> 
> real  0m2.574s
> user  0m2.473s
> sys   0m0.102s
> $
> 
> So this is the kind of interface change analysis tool we are working on
> at the moment.
> 
> One could also imagine a tool that would compute a CRC that takes the
> very same suppression specification files into account, letting people
> to decide that some interface changes are OK.  That CRC would thus be
> added to the special ELF sections we already have today.  We could keep
> the modversion machinery, but with a greater dose of flexibility.
> Whenever modversion detects a change, abidiff would tell people what the
> change is exactly.
> 
> What do you guys think?

YES YES YES!!!

Now I don't work on a distro anymore, but I would think that something
like this would be really useful, pointing out exactly what changed is
very important for distro maintainers to determine what they want to do
(either fix up the abi change with strange hacks, or ignore it due to
the change being in an area they don't care at all about, i.e. a random
driver subsystem.)

So yes, I think this is really good stuff.  But if the distro
maintainers correct me and think it's useless, then I need to revisit my
view of exactly what they do for their customers :)

thanks,

greg k-h

Re: [PATCH] driver core: flush async calls before testing driver removal

2016-12-10 Thread Vladimir Zapolskiy

Hello Dmitry,

On 12/10/2016 03:59 AM, Dmitry Torokhov wrote:
> On Fri, Dec 9, 2016 at 4:15 PM, Vladimir Zapolskiy  wrote:
>> If CONFIG_DEBUG_TEST_DRIVER_REMOVE option is enabled a number of false
>> positives are reported for ATA controller drivers, because ATA port
>> probes are done asynchronously, and the same problem may also touch
>> other asynchronously probed drivers.
>>
>> To reduce the rate of false reports on boot call async_synchronize_full()
>> before attempting to remove a driver, the same is done in delete_module()
>> syscall for all possible drivers and in __device_release_driver() function
>> for asynchronously probed drivers.
> 
> I'd say CONFIG_DEBUG_TEST_DRIVER_REMOVE did what it was supposed to do
> and uncovered a big in ATA drivers. Since driver core did not
> asynchronously scheduled those actions it should not wait for their
> completion either, but either ATA core or drivers should wait for
> probing to complete before allowing remove() methods to run.
> 

can you please share the idea why?

I haven't managed to find any problems with ATA subsystem and drivers,
my fuzz testing by inserting delays to postpone scheduled execution
of async_port_probe() don't show any problems also.

So I believe the problem is with the test itself.

--
With best wishes,
Vladimir

Re: [PATCH] driver core: flush async calls before testing driver removal

On Sat, Dec 10, 2016 at 02:38:41PM +0200, Vladimir Zapolskiy wrote:
> Hello Greg,
> 
> On 12/10/2016 09:32 AM, Greg Kroah-Hartman wrote:
> > On Sat, Dec 10, 2016 at 02:15:19AM +0200, Vladimir Zapolskiy wrote:
> >> If CONFIG_DEBUG_TEST_DRIVER_REMOVE option is enabled a number of false
> >> positives are reported for ATA controller drivers, because ATA port
> >> probes are done asynchronously, and the same problem may also touch
> >> other asynchronously probed drivers.
> >>
> >> To reduce the rate of false reports on boot call async_synchronize_full()
> >> before attempting to remove a driver, the same is done in delete_module()
> >> syscall for all possible drivers and in __device_release_driver() function
> >> for asynchronously probed drivers.
> > 
> > __device_release_driver() already calls this function, why call it
> > again?
> > 
> 
> __device_release_driver() is not called on test removal of drivers, if
> CONFIG_DEBUG_TEST_DRIVER_REMOVE option is enabled.
> 
> This opens a possibility to races like one I've discovered:
> 
>   https://www.spinics.net/lists/linux-ide/msg53473.html
> 
> Next, async_synchronize_full() from __device_release_driver() is not
> called in case of removal of ATA controller drivers, because
> driver_allows_async_probing(drv) return value is false.

Hm, how does this not also get hit if you unbind/bind/unbind/bind/etc.
from userspace as well?  I don't think this is a
CONFIG_DEBUG_TEST_DRIVER_REMOVE issue, but just that this option finds
the problem corner cases as you are finding out :)

thanks,

greg k-h

Re: Misalignment, MIPS, and ip_hdr(skb)->version

2016-12-10 Thread Måns Rullgård

Felix Fietkau  writes:

> On 2016-12-07 19:54, Jason A. Donenfeld wrote:
>> On Wed, Dec 7, 2016 at 7:51 PM, David Miller  wrote:
>>> It's so much better to analyze properly where the misalignment comes from
>>> and address it at the source, as we have for various cases that trip up
>>> Sparc too.
>> 
>> That's sort of my attitude too, hence starting this thread. Any
>> pointers you have about this would be most welcome, so as not to
>> perpetuate what already seems like an issue in other parts of the
>> stack.
> Hi Jason,
>
> I'm the author of that hackish LEDE/OpenWrt patch that works around the
> misalignment issues. Here's some context regarding that patch:
>
> I intentionally put it in the target specific patches for only one of
> our MIPS targets. There are a few ar71xx devices where the misalignment
> cannot be fixed, because the Ethernet MAC has a 4-byte DMA alignment
> requirement, and does not support inserting 2 bytes of padding to
> correct the IP header misalignment.
>
> With these limitations the choice was between this ugly network stack
> patch or inserting a very expensive memmove in the data path (which is
> better than taking the mis-alignment traps, but still hurts routing
> performance significantly).

I solved this problem in an Ethernet driver by copying the initial part
of the packet to an aligned skb and appending the remainder using
skb_add_rx_frag().  The kernel network stack only cares about the
headers, so the alignment of the packet payload doesn't matter.

-- 
Måns Rullgård

Re: [RFC 0/5] rcu: Introduce leaf_node_for_each_mask_possible_cpu() and its friend

2016-12-10 Thread Boqun Feng

On Fri, Dec 09, 2016 at 08:28:05PM -0800, Paul E. McKenney wrote:
> On Sat, Dec 10, 2016 at 08:45:38AM +0800, Boqun Feng wrote:
> > On Fri, Dec 09, 2016 at 03:49:45PM -0800, Paul E. McKenney wrote:
> > > On Fri, Dec 09, 2016 at 04:48:22PM +0800, Boqun Feng wrote:
> > > > Hi Paul,
> > > > 
> > > > While reading the discussion at:
> > > > 
> > > > https://marc.info/?l=linux-kernel&m=148044253400769
> > > 
> > > This discussion was for stalls specifically, rather than for routine
> > > scans of the bitmasks.
> > > 
> > > But it does look to save some code, so worth looking into.
> > > 
> > > > I figured we might use this fact to save some extra checks in RCU core 
> > > > code,
> > > > currently we iterate over all the possible CPUs on a leaf node, check 
> > > > whether
> > > > they were masked in a certain mask and do something. However, given the 
> > > > fact
> > > > that the masks on a leaf node should always be sparse than the 
> > > > corresponding
> > > > part of cpu_possible_mask, we'd better iterate over all bits in a mask 
> > > > and
> > > > check whether the corresponding CPU is possible or not.
> > > > 
> > > > So I made this RFC, I did a simple build/boot/rcutorture test on my box 
> > > > with
> > > > SMP=4, nothing bad happens. Currently I'm waiting for the 0day and 
> > > > trying to
> > > > test this one a bigger system, in the meanwhile, looking forwards to any
> > > > comment and suggestion.
> > > > 
> > > > So thoughts?
> > > 
> > > By analogy with for_each_cpu() and for_each_possible_cpu(), the name
> > > should instead be for_each_leaf_node_cpu(), the tradition of excessively
> > > long names in RCU notwithstanding.  ;-)
> > > 
> > 
> > Make sense ;-)
> > 
> > I think it's more appropriate to call it for_each_leaf_node_mask_cpu(),
> > because we don't iterate all cpus of a leaf node. The word "possible"
> > could be dropped because obviously we won't iterate over "impossible"
> > cpus in a leaf node ;-)
> 
> C'mon, Boqun!  The for_each_leaf_node_cpu() is not only consistent
> with the for_each_cpu() family, it is shorter!  ;-)
> 

Sure ;-) But for_each_leaf_node_cpu() seems like an operation that
iterates over _all_ cpus in a leaf node, but I actually implement it as
an operation that iterates only the _masked_ cpus. So I feel like word
"mask" better be added in the name.

If we call it for_each_leaf_node_cpu(rnp, mask,...), we will rely on the
hope that readers could figure it out what the primitive actually does
by the indication of the parameter @mask.

I like shorter names too, but not sure whether putting "mask" in the
name is better. After all, naming is one of the most difficult
challenges in programming ;-)

Regards,
Boqun

>   Thanx, Paul
> 
> > Will modify that in next version.
> > 
> > Regards,
> > Boqun
> > 
> > >   Thanx, Paul
> > > 
> 
> 


signature.asc
Description: PGP signature

Re: [PATCH v2 2/2] x86/KASLR/64: Determine kernel text mapping size at runtime

2016-12-10 Thread Baoquan He

On 12/10/16 at 01:33pm, Borislav Petkov wrote:
> On Sat, Dec 10, 2016 at 08:27:57PM +0800, Baoquan He wrote:
> > Whether CONFIG_RANDOMIZE_BASE is yes or not, with 'nokaslr' specified,
> > Kernel text mapping size should be 512M, just the same as no kaslr code
> > compiled in.
> 
> "should be" still doesn't really explain what the problem is. What's
> wrong with it remaining 1G?
> 
> IOW, something like "The problem is X and the issues it causes are Y.
> That's why we need to do Z."
> 
> Now please replace X,Y and Z with the real content.

I made this patchset because of two things:

1) Fedora 25 defaults to enable CONFIG_RANDOMIZE_BASE. And this worries
maintainers of several Fedora component. People ever asked me how to
judge whether it's a kaslr kernel. I told them I usually read elf header
of kcore - "readelf -l /proc/kcore" to check it. If the 'VirtAddr' of
segments like kernel text, modules, direct mapping is changed, it should
be kaslr kernel. Then they said why I have specified 'nokaslr', the
virtual address of modules is not '0xa000', but
'0xc000'. OK, I realized this is not right, it need be
fixed.

2) The second thing I remember Dave said he judged the kaslr kernel by
the value of KERNEL_IMAGE_SIZE. Then I decide this is a wrong behaviour
and I should change it. But in v1 post, Dave denied this. Checking Crash
code, what he has done is like below:

if ((kt->flags2 & KASLR) && (THIS_KERNEL_VERSION >= LINUX(4,7,0)))  

machdep->machspec->modules_vaddr = __START_KERNEL_map + 
(machdep->machspec->kernel_image_size ?
machdep->machspec->kernel_image_size : GIGABYTES(1));

You can see that if a kaslr kernel, Dave will get modules_vaddr by
KERNEL_IAMGE_SIZE or 1G directly if that value is not exported. However
KERNEL_IMAGE_SIZE is always 1G as long as CONFIG_RANDOMIZE_BASE is set
to 'y', whether kaslr is enabled or not. As you said, in this case,
remaining 1G doesn't impact things.

So in v2 I didn't mention problem about Crash. But case 1) need be
cared, whether kaslr code is compiled or not, it should not confuse
people, should not make difference between kaslr code not compiled in
and kaslr code compiled in with 'nokaslr' specified. Now the thing is
I am wondering if confusing people is a problem. Except of this I
haven't get report that it impacts things and caused problem.

Usually in redhat we have a convention that when we fix a bug, we
should write patch log like this:
what: what problem you have met.
why:  why does it happen, what is the root cause you got from analysis.
how   How do you fix it in this patch.

I personally think the 'what' is 'Y' you mentioned, and 'why' is 'X'.
Whatever it is, it's good if people can easily understand what you say.
In this patch log, since the problem is so obvious, I mean the
confusing difference, when I descirbed the proble, the root cause has
been told too. 

So I would like to adjust the log as you suggested, does it please you?

X86 64 kernel takes KERNEL_IMAGE_SIZE as the kernel text mapping size,
and it's fixed as compiling time, changing from 512M to 1G as long as
CONFIG_RANDOMIZE_BASE is enabled, though people specify kernel option
"nokaslr" explicitly. This is really confusing people when check if it's
a kaslr kernel, E.g checking the outout of 'readelf -l /proc/kcore'.

This is obviously a wrong behaviour. CONFIG_RANDOMIZE_BASE should only
decide if the KASLR code need be compiled in. If user specify "nokaslr",
the kernel should behave as no KASLR code compiled in at all.

So in this patch, define a new MACRO KERNEL_MAPPING_SIZE to represent
the size of kernel text mapping area, and let KERNEL_IMAGE_SIZE limit
the size of kernel runtime space. And change to determine the size of
kernel text mapping area at runtime. Though KASLR code compiled in, if
"nokaslr" specified, still set kernel mapping size to be 512M.

Thanks
Baoquan

[PATCH 1/7] ARM: EXYNOS: Constify list of retention registers

The list of retention registers (release_ret_regs field of struct
exynos_pm_data and arrays with values) are not modified and can be made
const to improve the const safeness.

Signed-off-by: Krzysztof Kozlowski 
---
 arch/arm/mach-exynos/suspend.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 73df9f3ffbf7..d0e6ac7938f3 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -55,7 +55,7 @@ struct exynos_wkup_irq {
 struct exynos_pm_data {
const struct exynos_wkup_irq *wkup_irq;
unsigned int wake_disable_mask;
-   unsigned int *release_ret_regs;
+   const unsigned int *release_ret_regs;
 
void (*pm_prepare)(void);
void (*pm_resume_prepare)(void);
@@ -93,7 +93,7 @@ static const struct exynos_wkup_irq exynos5250_wkup_irq[] = {
{ /* sentinel */ },
 };
 
-static unsigned int exynos_release_ret_regs[] = {
+static const unsigned int exynos_release_ret_regs[] = {
S5P_PAD_RET_MAUDIO_OPTION,
S5P_PAD_RET_GPIO_OPTION,
S5P_PAD_RET_UART_OPTION,
@@ -104,7 +104,7 @@ static unsigned int exynos_release_ret_regs[] = {
REG_TABLE_END,
 };
 
-static unsigned int exynos3250_release_ret_regs[] = {
+static const unsigned int exynos3250_release_ret_regs[] = {
S5P_PAD_RET_MAUDIO_OPTION,
S5P_PAD_RET_GPIO_OPTION,
S5P_PAD_RET_UART_OPTION,
@@ -117,7 +117,7 @@ static unsigned int exynos3250_release_ret_regs[] = {
REG_TABLE_END,
 };
 
-static unsigned int exynos5420_release_ret_regs[] = {
+static const unsigned int exynos5420_release_ret_regs[] = {
EXYNOS_PAD_RET_DRAM_OPTION,
EXYNOS_PAD_RET_MAUDIO_OPTION,
EXYNOS_PAD_RET_JTAG_OPTION,
-- 
2.7.4

[RFC 4/7] ARM: s3c64xx: Annotate external clock frequencies __ro_after_init

The xtal_f and xusbxti_f static variables are modified only through
__init accessors (like s3c64xx_set_xtal_freq()).  Later these variables
are used only in read-only way so we can mark them __ro_after_init to
increase code safeness.

Signed-off-by: Krzysztof Kozlowski 

---

Looks safe, but not tested.
---
 arch/arm/mach-s3c64xx/common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-s3c64xx/common.c b/arch/arm/mach-s3c64xx/common.c
index 7c66ce1a6bb6..9843eb4dd04e 100644
--- a/arch/arm/mach-s3c64xx/common.c
+++ b/arch/arm/mach-s3c64xx/common.c
@@ -56,7 +56,8 @@
 #include "watchdog-reset.h"
 
 /* External clock frequency */
-static unsigned long xtal_f = 1200, xusbxti_f = 4800;
+static unsigned long xtal_f __ro_after_init = 1200;
+static unsigned long xusbxti_f __ro_after_init = 4800;
 
 void __init s3c64xx_set_xtal_freq(unsigned long freq)
 {
-- 
2.7.4

[PATCH 2/7] ARM: EXYNOS: Annotate iomem and pm_data pointers __ro_after_init

The pointers to __iomem sysram and exynos_pm_data are set only during
initcalls.  Later the pointers itself are used only in read-only way so
we can mark them __ro_after_init to increase code safeness.

Signed-off-by: Krzysztof Kozlowski 
---
 arch/arm/mach-exynos/exynos.c  | 4 ++--
 arch/arm/mach-exynos/mcpm-exynos.c | 2 +-
 arch/arm/mach-exynos/suspend.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index fa08ef99b4ad..a863f8ec0f7a 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -36,8 +36,8 @@ static struct platform_device exynos_cpuidle = {
.id= -1,
 };
 
-void __iomem *sysram_base_addr;
-void __iomem *sysram_ns_base_addr;
+void __iomem *sysram_base_addr __ro_after_init;
+void __iomem *sysram_ns_base_addr __ro_after_init;
 
 void __init exynos_sysram_init(void)
 {
diff --git a/arch/arm/mach-exynos/mcpm-exynos.c 
b/arch/arm/mach-exynos/mcpm-exynos.c
index f086bf615b29..038fd8c993d0 100644
--- a/arch/arm/mach-exynos/mcpm-exynos.c
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -32,7 +32,7 @@
 #define EXYNOS5420_USE_ARM_CORE_DOWN_STATE BIT(29)
 #define EXYNOS5420_USE_L2_COMMON_UP_STATE  BIT(30)
 
-static void __iomem *ns_sram_base_addr;
+static void __iomem *ns_sram_base_addr __ro_after_init;
 
 /*
  * The common v7_exit_coherency_flush API could not be used because of the
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index d0e6ac7938f3..339fe011d658 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -64,7 +64,7 @@ struct exynos_pm_data {
int (*cpu_suspend)(unsigned long);
 };
 
-static const struct exynos_pm_data *pm_data;
+static const struct exynos_pm_data *pm_data __ro_after_init;
 
 static int exynos5420_cpu_state;
 static unsigned int exynos_pmu_spare3;
-- 
2.7.4

[PATCH 5/7] ARM: SAMSUNG: Constify array of wake irqs passed to samsung_sync_wakemask

The samsung_sync_wakemask() iterates over passed array of wake irqs but
does not modify it.

Signed-off-by: Krzysztof Kozlowski 
---
 arch/arm/plat-samsung/include/plat/wakeup-mask.h | 2 +-
 arch/arm/plat-samsung/wakeup-mask.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/plat-samsung/include/plat/wakeup-mask.h 
b/arch/arm/plat-samsung/include/plat/wakeup-mask.h
index 43e4acd2e1c6..bbfa84b0505a 100644
--- a/arch/arm/plat-samsung/include/plat/wakeup-mask.h
+++ b/arch/arm/plat-samsung/include/plat/wakeup-mask.h
@@ -38,7 +38,7 @@ struct samsung_wakeup_mask {
  * required to be correct before we enter sleep.
  */
 extern void samsung_sync_wakemask(void __iomem *reg,
- struct samsung_wakeup_mask *masks,
+ const struct samsung_wakeup_mask *masks,
  int nr_masks);
 
 #endif /* __PLAT_WAKEUP_MASK_H */
diff --git a/arch/arm/plat-samsung/wakeup-mask.c 
b/arch/arm/plat-samsung/wakeup-mask.c
index 20c3d9117cc2..b9de6b543330 100644
--- a/arch/arm/plat-samsung/wakeup-mask.c
+++ b/arch/arm/plat-samsung/wakeup-mask.c
@@ -20,7 +20,7 @@
 #include 
 
 void samsung_sync_wakemask(void __iomem *reg,
-  struct samsung_wakeup_mask *mask, int nr_mask)
+  const struct samsung_wakeup_mask *mask, int nr_mask)
 {
struct irq_data *data;
u32 val;
-- 
2.7.4

[PATCH 7/7] ARM: s3c64xx: Constify wake_irqs

samsung_sync_wakemask() accepts pointer to const data so wake_irqs can
be made const to increase safeness.

Signed-off-by: Krzysztof Kozlowski 
---
 arch/arm/mach-s3c64xx/pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c
index 59d91b83b03d..b0be382ff6bb 100644
--- a/arch/arm/mach-s3c64xx/pm.c
+++ b/arch/arm/mach-s3c64xx/pm.c
@@ -285,7 +285,7 @@ static int s3c64xx_cpu_suspend(unsigned long arg)
 }
 
 /* mapping of interrupts to parts of the wakeup mask */
-static struct samsung_wakeup_mask wake_irqs[] = {
+static const struct samsung_wakeup_mask wake_irqs[] = {
{ .irq = IRQ_RTC_ALARM, .bit = S3C64XX_PWRCFG_RTC_ALARM_DISABLE, },
{ .irq = IRQ_RTC_TIC,   .bit = S3C64XX_PWRCFG_RTC_TICK_DISABLE, },
{ .irq = IRQ_PENDN, .bit = S3C64XX_PWRCFG_TS_DISABLE, },
-- 
2.7.4

[PATCH 6/7] ARM: s3c24xx: Constify wake_irqs

samsung_sync_wakemask() accepts pointer to const data so wake_irqs can
be made const to increase safeness.

Signed-off-by: Krzysztof Kozlowski 
---
 arch/arm/mach-s3c24xx/pm-s3c2412.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-s3c24xx/pm-s3c2412.c 
b/arch/arm/mach-s3c24xx/pm-s3c2412.c
index d75f95e487ee..0ae4d47a4663 100644
--- a/arch/arm/mach-s3c24xx/pm-s3c2412.c
+++ b/arch/arm/mach-s3c24xx/pm-s3c2412.c
@@ -53,7 +53,7 @@ static int s3c2412_cpu_suspend(unsigned long arg)
 }
 
 /* mapping of interrupts to parts of the wakeup mask */
-static struct samsung_wakeup_mask wake_irqs[] = {
+static const struct samsung_wakeup_mask wake_irqs[] = {
{ .irq = IRQ_RTC,   .bit = S3C2412_PWRCFG_RTC_MASKIRQ, },
 };
 
-- 
2.7.4

[PATCH 3/7] ARM: s3c24xx: Constify few integer tables

These arrays are not modified so they can be made const.

Signed-off-by: Krzysztof Kozlowski 
---
 arch/arm/mach-s3c24xx/bast-irq.c | 4 ++--
 arch/arm/mach-s3c24xx/iotiming-s3c2410.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-s3c24xx/bast-irq.c b/arch/arm/mach-s3c24xx/bast-irq.c
index 2bb08961e934..ad8f4cd7c327 100644
--- a/arch/arm/mach-s3c24xx/bast-irq.c
+++ b/arch/arm/mach-s3c24xx/bast-irq.c
@@ -44,7 +44,7 @@
 /* table of ISA irq nos to the relevant mask... zero means
  * the irq is not implemented
 */
-static unsigned char bast_pc104_irqmasks[] = {
+static const unsigned char bast_pc104_irqmasks[] = {
0,   /* 0 */
0,   /* 1 */
0,   /* 2 */
@@ -63,7 +63,7 @@ static unsigned char bast_pc104_irqmasks[] = {
0,   /* 15 */
 };
 
-static unsigned char bast_pc104_irqs[] = { 3, 5, 7, 10 };
+static const unsigned char bast_pc104_irqs[] = { 3, 5, 7, 10 };
 
 static void
 bast_pc104_mask(struct irq_data *data)
diff --git a/arch/arm/mach-s3c24xx/iotiming-s3c2410.c 
b/arch/arm/mach-s3c24xx/iotiming-s3c2410.c
index 65e5f9cb650f..b7970f1fa3d5 100644
--- a/arch/arm/mach-s3c24xx/iotiming-s3c2410.c
+++ b/arch/arm/mach-s3c24xx/iotiming-s3c2410.c
@@ -249,7 +249,7 @@ static int s3c2410_calc_bank(struct s3c_cpufreq_config *cfg,
return 0;
 }
 
-static unsigned int tacc_tab[] = {
+static const unsigned int tacc_tab[] = {
[0] = 1,
[1] = 2,
[2] = 3,
-- 
2.7.4

Re: Still OOM problems with 4.9er kernels

2016-12-10 Thread Gerhard Wiesinger

On 09.12.2016 22:42, Vlastimil Babka wrote:

On 12/09/2016 07:01 PM, Gerhard Wiesinger wrote:

On 09.12.2016 18:30, Michal Hocko wrote:

On Fri 09-12-16 17:58:14, Gerhard Wiesinger wrote:

On 09.12.2016 17:09, Michal Hocko wrote:

[...]

[97883.882611] Mem-Info:
[97883.883747] active_anon:2915 inactive_anon:3376 isolated_anon:0
active_file:3902 inactive_file:3639 isolated_file:0
unevictable:0 dirty:205 writeback:0 unstable:0
slab_reclaimable:9856 slab_unreclaimable:9682
mapped:3722 shmem:59 pagetables:2080 bounce:0
free:748 free_pcp:15 free_cma:0

there is still some page cache which doesn't seem to be neither dirty
nor under writeback. So it should be theoretically reclaimable but for
some reason we cannot seem to reclaim that memory.
There is still some anonymous memory and free swap so we could reclaim
it as well but it all seems pretty down and the memory pressure is
really large

Yes, it might be large on the update situation, but that should be handled
by a virtual memory system by the kernel, right?

Well this is what we try and call it memory reclaim. But if we are not
able to reclaim anything then we eventually have to give up and trigger
the OOM killer.

I'm not familiar with the Linux implementation of the VM system in
detail. But can't you reserve as much memory for the kernel (non
pageable) at least that you can swap everything out (even without
killing a process at least as long there is enough swap available, which
should be in all of my cases)?

We don't have such bulletproof reserves. In this case the amount of
anonymous memory that can be swapped out is relatively low, and either
something is pinning it in memory, or it's being swapped back in quickly.

Now the information that 4.4 made a difference is
interesting. I do not really see any major differences in the reclaim
between 4.3 and 4.4 kernels. The reason might be somewhere else as well.
E.g. some of the subsystem consumes much more memory than before.

Just curious, what kind of filesystem are you using?

I'm using ext4 only with virt-* drivers (storage, network). But it is
definitly a virtual memory allocation/swap usage issue.

Could you try some
additional debugging. Enabling reclaim related tracepoints might tell us
more. The following should tell us more
mount -t tracefs none /trace
echo 1 > /trace/events/vmscan/enable
echo 1 > /trace/events/writeback/writeback_congestion_wait/enable
cat /trace/trace_pipe > trace.log

Collecting /proc/vmstat over time might be helpful as well
mkdir logs
while true
do
cp /proc/vmstat vmstat.$(date +%s)
sleep 1s
done

Activated it. But I think it should be very easy to trigger also on your
side. A very small configured VM with a program running RAM
allocations/writes (I guess you have some testing programs already)
should be sufficient to trigger it. You can also use the attached
program which I used to trigger such situations some years ago. If it
doesn't help try to reduce the available CPU for the VM and also I/O
(e.g. use all CPU/IO on the host or other VMs).

Well it's not really a surprise that if the VM is small enough and
workload large enough, OOM killer will kick in. The exact threshold
might have changed between kernel versions for a number of possible reasons.

IMHO: The OOM killer should NOT kick in even on the highest workloads if
there is swap available.

https://www.spinics.net/lists/linux-mm/msg113665.html

Yeah, but I do think that "oom when you have 156MB free and 7GB
reclaimable, and haven't even tried swapping" counts as obviously
wrong.

So Linus also thinks that trying swapping is a must have. And there always was
enough swap available in my cases. Then it should swap out/swapin all the time
(which worked well in kernel 2.4/2.6 times).

Another topic: Why does the kernel prefer to swap in/swap out instead of
use cache pages/buffers (see vmstat 1 output below)?

BTW: Don't know if you have seen also my original message on the kernel
mailinglist only:

Linus had also OOM problems with 1kB RAM requests and a lot of free RAM
(use a translation service for the german page):
https://lkml.org/lkml/2016/11/30/64
https://marius.bloggt-in-braunschweig.de/2016/11/17/linuxkernel-4-74-8-und-der-oom-killer/
https://www.spinics.net/lists/linux-mm/msg113661.html

Yeah we were involved in the last one. The regressions were about
high-order allocations
though (the 1kB premise turned out to be misinterpretation) and there
were regressions
for those in 4.7/4.8. But yours are order-0.

With kernel 4.7./4.8 it was really reaproduceable at every dnf update.
With 4.9rc8 it has been much much better. So something must have
changed, too.

As far as I understood it the order is 2^order kB pagesize. I don't
think it makes a difference when swap is not used which order the memory
allocation request is.

BTW: What were the commit that introduced the regression anf fi

[PATCH] net: nicvf: use new api ethtool_{get|set}_link_ksettings

2016-12-10 Thread Philippe Reynes

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

Signed-off-by: Philippe Reynes 
---
 .../net/ethernet/cavium/thunder/nicvf_ethtool.c|   56 +++-
 1 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c 
b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index b048241..2e74bba 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -116,33 +116,34 @@ struct nicvf_stat {
 static const unsigned int nicvf_n_drv_stats = ARRAY_SIZE(nicvf_drv_stats);
 static const unsigned int nicvf_n_queue_stats = ARRAY_SIZE(nicvf_queue_stats);
 
-static int nicvf_get_settings(struct net_device *netdev,
- struct ethtool_cmd *cmd)
+static int nicvf_get_link_ksettings(struct net_device *netdev,
+   struct ethtool_link_ksettings *cmd)
 {
struct nicvf *nic = netdev_priv(netdev);
+   u32 supported, advertising;
 
-   cmd->supported = 0;
-   cmd->transceiver = XCVR_EXTERNAL;
+   supported = 0;
+   advertising = 0;
 
if (!nic->link_up) {
-   cmd->duplex = DUPLEX_UNKNOWN;
-   ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+   cmd->base.duplex = DUPLEX_UNKNOWN;
+   cmd->base.speed = SPEED_UNKNOWN;
return 0;
}
 
switch (nic->speed) {
case SPEED_1000:
-   cmd->port = PORT_MII | PORT_TP;
-   cmd->autoneg = AUTONEG_ENABLE;
-   cmd->supported |= SUPPORTED_MII | SUPPORTED_TP;
-   cmd->supported |= SUPPORTED_1000baseT_Full |
+   cmd->base.port = PORT_MII | PORT_TP;
+   cmd->base.autoneg = AUTONEG_ENABLE;
+   supported |= SUPPORTED_MII | SUPPORTED_TP;
+   supported |= SUPPORTED_1000baseT_Full |
  SUPPORTED_1000baseT_Half |
  SUPPORTED_100baseT_Full  |
  SUPPORTED_100baseT_Half  |
  SUPPORTED_10baseT_Full   |
  SUPPORTED_10baseT_Half;
-   cmd->supported |= SUPPORTED_Autoneg;
-   cmd->advertising |= ADVERTISED_1000baseT_Full |
+   supported |= SUPPORTED_Autoneg;
+   advertising |= ADVERTISED_1000baseT_Full |
ADVERTISED_1000baseT_Half |
ADVERTISED_100baseT_Full  |
ADVERTISED_100baseT_Half  |
@@ -151,24 +152,29 @@ static int nicvf_get_settings(struct net_device *netdev,
break;
case SPEED_1:
if (nic->mac_type == BGX_MODE_RXAUI) {
-   cmd->port = PORT_TP;
-   cmd->supported |= SUPPORTED_TP;
+   cmd->base.port = PORT_TP;
+   supported |= SUPPORTED_TP;
} else {
-   cmd->port = PORT_FIBRE;
-   cmd->supported |= SUPPORTED_FIBRE;
+   cmd->base.port = PORT_FIBRE;
+   supported |= SUPPORTED_FIBRE;
}
-   cmd->autoneg = AUTONEG_DISABLE;
-   cmd->supported |= SUPPORTED_1baseT_Full;
+   cmd->base.autoneg = AUTONEG_DISABLE;
+   supported |= SUPPORTED_1baseT_Full;
break;
case SPEED_4:
-   cmd->port = PORT_FIBRE;
-   cmd->autoneg = AUTONEG_DISABLE;
-   cmd->supported |= SUPPORTED_FIBRE;
-   cmd->supported |= SUPPORTED_4baseCR4_Full;
+   cmd->base.port = PORT_FIBRE;
+   cmd->base.autoneg = AUTONEG_DISABLE;
+   supported |= SUPPORTED_FIBRE;
+   supported |= SUPPORTED_4baseCR4_Full;
break;
}
-   cmd->duplex = nic->duplex;
-   ethtool_cmd_speed_set(cmd, nic->speed);
+   cmd->base.duplex = nic->duplex;
+   cmd->base.speed = nic->speed;
+
+   ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+   supported);
+   ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+   advertising);
 
return 0;
 }
@@ -770,7 +776,6 @@ static int nicvf_set_pauseparam(struct net_device *dev,
 }
 
 static const struct ethtool_ops nicvf_ethtool_ops = {
-   .get_settings   = nicvf_get_settings,
.get_link   = nicvf_get_link,
.get_drvinfo= nicvf_get_drvinfo,
.get_msglevel   = nicvf_get_msglevel,
@@ -793,6 +798,7 @@ static int nicvf_set_pauseparam(struct net_device *dev,
.get_pauseparam = nicvf_get_pauseparam,
.set_pauseparam

Re: [PATCH v2] USB: EHCI: spear: fix code warnings

On Thu, Dec 08, 2016 at 10:40:11PM +, csmanjuvi...@gmail.com wrote:
> From: Manjunath Goudar 
> 
> This patch will fix the checkpatch.pl following warnings
> WARNING: Block comments should align the * on each line
> WARNING: space prohibited before semicolon

I've said this before, only do one "type" of fix per patch, and not "fix
all coding style issues".

The subject is also a bit "harsh" there is not "code warnings", that
normally means a build warning.  These are just minor coding _style_
warnings.

So please break up all of your patches like this and resend them as a
single series of patches.  I'll drop all of your pending ones from my
queue right now.

thanks,

greg k-h

Re: [PATCH 2/2] USB: OHCI: pxa27x:fix code errors

On Sat, Dec 10, 2016 at 01:13:28AM +, csmanjuvi...@gmail.com wrote:
> From: Manjunath Goudar 
> 
> This patch will fix the checkpatch.pl following errors:
> 
> ERROR: space prohibited after that open parenthesis '('
> ERROR: space prohibited before that close parenthesis ')'
> 
> Signed-off-by: Manjunath Goudar 
> Cc: Alan Stern 
> Cc: Greg Kroah-Hartman 
> Cc: linux-...@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---
>  drivers/usb/host/ohci-pxa27x.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c
> index c73e1ae..0f33583 100644
> --- a/drivers/usb/host/ohci-pxa27x.c
> +++ b/drivers/usb/host/ohci-pxa27x.c
> @@ -158,9 +158,9 @@ static int pxa27x_ohci_select_pmm(struct pxa27x_ohci 
> *pxa_ohci, int mode)
>   uhcrhdb |= (0x7<<17);
>   break;
>   default:
> - printk( KERN_ERR
> + printk(KERN_ERR
>   "Invalid mode %d, set to non-power switch mode.\n",
> - mode );
> + mode);

Fix this properly and use dev_err() in a way that actually builds
properly this time.

thanks,

greg k-h

Re: [PATCH] tty: serial: fsl_lpuart: potential NULL dereference

On Sat, Dec 10, 2016 at 01:30:36AM +0300, Alexey Khoroshilov wrote:
> tty_port_tty_get() might return a tty which is NULL
> if the port is not associated with a tty
> (e.g. due to close or hangup).
> But lpuart_start_rx_dma() dereferences tty without any check.

Are you sure that tty could ever be NULL here?  This function is only
called in places that seem to have a valid tty, with the maybe exception
of the resume call path.  Can you audit this a bit better to be sure one
way or the other please?

thanks,

greg k-h

Re: [PATCH] siphash: add cryptographically secure hashtable function

2016-12-10 Thread Vegard Nossum

On 9 December 2016 at 19:36, Jason A. Donenfeld  wrote:
> SipHash is a 64-bit keyed hash function that is actually a
> cryptographically secure PRF, like HMAC. Except SipHash is super fast,
> and is meant to be used as a hashtable keyed lookup function.
>
> SipHash isn't just some new trendy hash function. It's been around for a
> while, and there really isn't anything that comes remotely close to
> being useful in the way SipHash is. With that said, why do we need this?
>
> There are a variety of attacks known as "hashtable poisoning" in which an
> attacker forms some data such that the hash of that data will be the
> same, and then preceeds to fill up all entries of a hashbucket. This is
> a realistic and well-known denial-of-service vector.
>
> Linux developers already seem to be aware that this is an issue, and
> various places that use hash tables in, say, a network context, use a
> non-cryptographically secure function (usually jhash) and then try to
> twiddle with the key on a time basis (or in many cases just do nothing
> and hope that nobody notices). While this is an admirable attempt at
> solving the problem, it doesn't actually fix it. SipHash fixes it.

Could you give some more concrete details/examples? Here's the IPv4
hash table from include/net/inet_sock.h / net/ipv4/inet_hashtables.c:

static inline unsigned int __inet_ehashfn(const __be32 laddr,
 const __u16 lport,
 const __be32 faddr,
 const __be16 fport,
 u32 initval)
{
   return jhash_3words((__force __u32) laddr,
   (__force __u32) faddr,
   ((__u32) lport) << 16 | (__force __u32)fport,
   initval);
}

static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
   const __u16 lport, const __be32 faddr,
   const __be16 fport)
{
   static u32 inet_ehash_secret __read_mostly;

   net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));

   return __inet_ehashfn(laddr, lport, faddr, fport,
 inet_ehash_secret + net_hash_mix(net));
}

There's a 32-bit secret random salt (inet_ehash_secret) which means
that in practice, inet_ehashfn() will select 1 out of 2^32 different
hash functions at random each time you boot the kernel; without
knowing which one it selected, how can a local or remote attacker can
force IPv4 connections/whatever to go into a single hash bucket?

It is not possible to obtain the secret salt directly (except by
reading from kernel memory, in which case you've lost already), nor is
it possible to obtain the result of inet_ehashfn() other than (maybe)
by a timing attack where you somehow need to detect that two
connections went into the same hash bucket and work backwards from
that to figure out how to land more connections into into the same
bucket -- but if they can do that, you've also already lost.

The same pattern is used for IPv6 hashtables and the dentry cache.

I suppose that using a hash function proven to be cryptographically
secure gives a hard guarantee (under some assumptions) that the
salt/key will give enough diversity between the (in the example above)
2^32 different hash functions that you cannot improve your chances of
guessing that two values will map to the same bucket regardless of the
salt/key. However, I am a bit doubtful that using a cryptographically
secure hash function will make much of a difference as long as the
attacker doesn't actually have any way to get the output/result of the
hash function (and given that the hash function isn't completely
trivial, of course).

I am happy to be proven wrong, but you make it sound very easy to
exploit the current situation, so I would just like to ask whether you
have a concrete way to do that?

Vegard

> There are a modicum of places in the kernel that are vulnerable to
> hashtable poisoning attacks, either via userspace vectors or network
> vectors, and there's not a reliable mechanism inside the kernel at the
> moment to fix it. The first step toward fixing these issues is actually
> getting a secure primitive into the kernel for developers to use. Then
> we can, bit by bit, port things over to it as deemed appropriate.
>
> Dozens of languages are already using this internally for their hash
> tables. Some of the BSDs already use this in their kernels. SipHash is
> a widely known high-speed solution to a widely known problem, and it's
> time we catch-up.

Re: [RFC PATCH] ARM: dts: Add support for Turris Omnia

2016-12-10 Thread Pavel Machek

Hi!

> >Yes. That is fine. It is just unusual. Most boards have gpio-led and
> >gpio-keys, which are easy to add. That is why i asked. Adding an LED
> >driver which talks to this M0 can be added later.
> 
> Actually the WiP driver for MCU LED interface, that we use in our
> kernel is here: 
> https://github.com/tmshlvck/omnia-linux/commit/2121afd8fbd2e4c720edcdd472b11b5303bc0dfb
> 
> It definitely needs some cleanup and it adds non-standard features
> (main PWM for all LEDs, autonomous blink mode, colors) via custom
> /sys files, which I suspect that is not going to be acceptable for
> upstream. Let's keep it for the next iteration.

Actually, LEDs that can do PWM intensity on their own are common and
supported.

LEDs that can do PWM pretty advanced patterns are common in the cellphones,
as is the color. Unfortunately, good support in kernel is missing. It
would be good to change that.

In n900, I have a LED that can compute prime numbers then blink them,
autonomously. We probably don't need to support _that_, but common
support for patterns would be good.

Best regards,
Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

RE: ATH9 driver issues on ARM64

2016-12-10 Thread Bharat Kumar Gogada

Hi,

After taking some more lecroy traces, we see that after 2nd ASSERT from EP on 
ARM64 we see continuous data movement of 32 dwords or 12 dwords and never sign 
of DEASSERT.
Comparatively on working traces (x86) after 2nd assert there are only BAR 
register reads and writes and then DEASSERT, for almost most of the interrupts 
and we haven't seen 12 or 32 dwords data movement on this trace.

I did not work on EP wifi/network drivers, any help why EP needs those many 
number of data at scan time ?

Regards,
Bharat

 
> Hello there,
> 
> as this is a thread about ath9k and ARM64, i'm not sure if i should answer 
> here
> or not, but i have similar "stalls" with ath9k on x86_64 (starting with 
> 4.9rc), stack
> trace is posted down below where the original ARM64 stall traces are.
> 
> Greetings,
> 
> Tobias
> 
> 
> On 08.12.2016 18:36, Kalle Valo wrote:
> > Bharat Kumar Gogada  writes:
> >
> >>   > [+cc Kalle, ath9k list]
> > Thanks, but please also CC linux-wireless. Full thread below for the
> > folks there.
> >
> >>> On Thu, Dec 08, 2016 at 01:49:42PM +, Bharat Kumar Gogada wrote:
>  Hi,
> 
>  Did anyone test Atheros ATH9
>  driver(drivers/net/wireless/ath/ath9k/)
>  on ARM64.  The end point is TP link wifi card with which supports
>  only legacy interrupts.
> >>> If it works on other arches and the arm64 PCI enumeration works, my
> >>> first guess would be an INTx issue, e.g., maybe the driver is
> >>> waiting for an interrupt that never arrives.
> >> We are not sure for now.
>  We are trying to test it on ARM64 with
>  (drivers/pci/host/pcie-xilinx-nwl.c) as root port.
> 
>  EP is getting enumerated and able to link up.
> 
>  But when we start scan system gets hanged.
> >>> When you say the system hangs when you start a scan, I assume you
> >>> mean a wifi scan, not the PCI enumeration.  A problem with a wifi
> >>> scan might cause a *process* to hang, but it shouldn't hang the
> >>> entire system.
> >>>
> >> Yes wifi scan.
>  When we took trace we see that after we start scan assert message
>  is sent but there is no de assert from end point.
> >>> Are you talking about a trace from a PCIe analyzer?  Do you see an
> >>> Assert_INTx PCIe message on the link?
> >>>
> >> Yes lecroy trace, yes we do see Assert_INTx and Deassert_INTx happening
> when we do interface link up.
> >> When we have less debug prints in Atheros driver, and do wifi scan we
> >> see Assert_INTx but never Deassert_INTx,
>  What might cause end point not sending de assert ?
> >>> If the endpoint doesn't send a Deassert_INTx message, I expect that
> >>> would mean the driver didn't service the interrupt and remove the
> >>> condition that caused the device to assert the interrupt in the
> >>> first place.
> >>>
> >>> If the driver didn't receive the interrupt, it couldn't service it,
> >>> of course.  You could add a printk in the ath9k interrupt service
> >>> routine to see if you ever get there.
> >>>
> >> The interrupt behavior is changing w.r.t amount of debug prints we
> >> add. (I kept many prints to aid debug) root@Xilinx-ZCU102-2016_3:~# iw dev
> wlan0 scan
> >> [   83.064675] ath9k: ath9k_iowrite32 ff800a400024
> >> [   83.069486] ath9k: ath9k_ioread32 ff800a400024
> >> [   83.074257] ath9k_hw_kill_interrupts 793
> >> [   83.078260] ath9k: ath9k_iowrite32 ff800a400024
> >> [   83.083107] ath9k: ath9k_ioread32 ff800a400024
> >> [   83.087882] ath9k_hw_kill_interrupts 793
> >> [   83.095450] ath9k_hw_enable_interrupts   821
> >> [   83.099557] ath9k_hw_enable_interrupts   825
> >> [   83.103721] ath9k_hw_enable_interrupts   832
> >> [   83.107887] ath9k: ath9k_iowrite32 ff800a400024
> >> [   83.112748] AR_SREV_9100 0
> >> [   83.115438] ath9k_hw_enable_interrupts   848
> >> [   83.119607] ath9k: ath9k_ioread32 ff800a400024
> >> [   83.124389] ath9k_hw_intrpend762
> >> [   83.127761] (AR_SREV_9340(ah) val 0
> >> [   83.131234] ath9k_hw_intrpend767
> >> [   83.134628] ath_isr  603
> >> [   83.137134] ath9k: ath9k_iowrite32 ff800a400024
> >> [   83.141995] ath9k: ath9k_ioread32 ff800a400024
> >> [   83.146771] ath9k_hw_kill_interrupts 793
> >> [   83.150864] ath9k_hw_enable_interrupts   821
> >> [   83.154971] ath9k_hw_enable_interrupts   825
> >> [   83.159135] ath9k_hw_enable_interrupts   832
> >> [   83.163300] ath9k: ath9k_iowrite32 ff800a400024
> >> [   83.168161] AR_SREV_9100 0
> >> [   83.170852] ath9k_hw_enable_interrupts   848
> >> [   83.170855] ath9k_hw_intrpend762
> >> [   83.178398] (AR_SREV_9340(ah) val 0
> >> [   83.181873] ath9k_hw_intrpend767
> >> [   83.185265] ath_isr  603
> >> [   83.187773] ath9k: ath9k_iowrite32 ff800a400024
> >> [   83.192635] ath9k: ath9k_ioread32 ff800a400024
> >> [   83.197411] ath9k_hw_kill_interrupts 793
> >> [   83.201414] ath9k: ath9k_ioread32 ff800a400024
> >> [   83.206258] ath9k_hw_enable_interrupts   821
> >> [   83.210368] ath9k_

Re: [kernel-hardening] Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-10 Thread Jason A. Donenfeld

Hi Herbert,

On Sat, Dec 10, 2016 at 6:37 AM, Herbert Xu  wrote:
> As for AEAD we never had a sync interface to begin with and I
> don't think I'm going to add one.

That's too bad to hear. I hope you'll reconsider. Modern cryptographic
design is heading more and more in the direction of using AEADs for
interesting things, and having a sync interface would be a lot easier
for implementing these protocols. In the same way many protocols need
a hash of some data, now protocols often want some particular data
encrypted with an AEAD using a particular key and nonce and AD. One
protocol that comes to mind is Noise [1].

I know that in my own [currently external to the tree] kernel code, I
just forego the use of the crypto API all together, and one of the
primary reasons for that is lack of a sync interface for AEADs. When I
eventually send this upstream, presumably everyone will want me to use
the crypto API, and having a sync AEAD interface would be personally
helpful for that. I guess I could always write the sync interface
myself, but I imagine you'd prefer having the design control etc.

Jason

[1] http://noiseprotocol.org/

Re: [Question] New mmap64 syscall?

2016-12-10 Thread Pavel Machek

Hi!

> > Most of these advantages should eventually go away, when struct-reorg makes
> > it way into the compiler. That said, it’s a marginal (but real) improvement 
> > for a
> > subset of SPEC.
> > 
> > In the real world, the importance of ILP32 as an aid to transition legacy 
> > code
> > that is not 64bit clean… and this should drive the ILP32 discussion. That we
> > get a boost in our SPEC scores is just a nice extra that we get from it 
> 
> To bring this back from the philosophical questions of ABI design
> to the specific point of what file offset width you want for mmap()
> on 32-bit architectures.
> 
> For all I can tell, using mmap() to access a file that is many thousand
> times larger than your virtual address space is completely crazy.

Dunno. Wanting to mmap part of a partition does not seem too crazy... I'm pretty
sure there's some tool out there that uses mmap(), just because mmap() was nicer
to use then read(). And when the partition is big, the offset may be big.

Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

Re: [PATCH 1/2] mailbox: mailbox-test: add support for fasync/poll

2016-12-10 Thread Jassi Brar

On Fri, Dec 9, 2016 at 12:26 AM, Sudeep Holla  wrote:
>
>
> On 29/11/16 14:37, Sudeep Holla wrote:
>> Currently the read operation on the message debug file returns error if
>> there's no data ready to be read. It expects the userspace to retry if
>> it fails. Since the mailbox response could be asynchronous, it would be
>> good to add support to block the read until the data is available.
>>
>> We can also implement poll file operations so that the userspace can
>> wait to become ready to perform any I/O.
>>
>> This patch implements the poll and fasync file operation callback for
>> the test mailbox device.
>>
>> Cc: Lee Jones 
>> Cc: Jassi Brar 
>> Signed-off-by: Sudeep Holla 
>
> Gentle Ping!
>
Would be good to have Lee's ack.

Re: [Question] New mmap64 syscall?

2016-12-10 Thread Pavel Machek

On Sat 2016-12-10 10:10:01, Pavel Machek wrote:
> Hi!
> 
> > > Most of these advantages should eventually go away, when struct-reorg 
> > > makes
> > > it way into the compiler. That said, it’s a marginal (but real) 
> > > improvement for a
> > > subset of SPEC.
> > > 
> > > In the real world, the importance of ILP32 as an aid to transition legacy 
> > > code
> > > that is not 64bit clean… and this should drive the ILP32 discussion. That 
> > > we
> > > get a boost in our SPEC scores is just a nice extra that we get from it 
> > 
> > To bring this back from the philosophical questions of ABI design
> > to the specific point of what file offset width you want for mmap()
> > on 32-bit architectures.
> > 
> > For all I can tell, using mmap() to access a file that is many thousand
> > times larger than your virtual address space is completely crazy.
> 
> Dunno. Wanting to mmap part of a partition does not seem too crazy... I'm 
> pretty
> sure there's some tool out there that uses mmap(), just because mmap() was 
> nicer
> to use then read(). And when the partition is big, the offset may be big.

Actually, if I wrote something like jpegrecover, I'd use mmap() for that 
(because
otherwise I'd be keeping copy of disk in anonymous memory, increasing memory 
pressure).

jpegrecover definitely makes sense on partitions...

Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

Re: [PATCH] siphash: add cryptographically secure hashtable function

2016-12-10 Thread George Spelvin

> There's a 32-bit secret random salt (inet_ehash_secret) which means
> that in practice, inet_ehashfn() will select 1 out of 2^32 different
> hash functions at random each time you boot the kernel; without
> knowing which one it selected, how can a local or remote attacker can
> force IPv4 connections/whatever to go into a single hash bucket?

By figuring out the salt.  The thing is, the timing of hash table lookups
*is externally visible*.  If I create connections to the target, then
see which ones make responses on previous connections slightly slower,
I gain information about the salt.

I dont't know *where* in the hash table the collissions occur, but I
know *which* inputs collide, and that's enough for me to learn something.

(I need more connections than the size of the hash table, but even
with just one IP source I can use 64K ports on my end times however
many the target has open on its end.)

With enough information (google "unicity distance") I can recover the
entire salt.  It's not like I care about the cryptographic strength of
the hash; simply trying all 4 billion possible seeds is pretty fast on
a 4 GHz processor.

Once that happens, I can choose a target connection whose timing I can't
observe directly and pack its hash chain without being obvious about it.

> I am happy to be proven wrong, but you make it sound very easy to
> exploit the current situation, so I would just like to ask whether you
> have a concrete way to do that?

I don't think anyone's implemented an attack on this particular hash
table yet, and the reason it hasn't been urgent is that it's just a mild
DoS attack it makes the computer noticeably slower withough disabling
it completely.

But the general style of attack is well known and has been repeatedly
demonstrated.  Its practicality is not in question.  The only question is
whether it's *more* practical that simpler techniques that don't depend
on any such algorithmic subtlety like brute-force flooding.

But if the history of Internet security has taught us one thing, it's
that naively hoping something won't be a problem is doomed.


The main issue is performance.  IPv6 addresses are big, and although
SipHash is fast by the standard of cryptographic hashes, it's far slower
than jhash or any other non-cryptographic hash.

Re: [PATCH] ASoC: topology: avoid uninitialized kcontrol_type

2016-12-10 Thread Takashi Sakamoto


Hi Arnd,

On Dec 9 2016 20:51, Arnd Bergmann wrote:

When num_kcontrols is zero, widget->dobj.widget.kcontrol_type
gets set to an uninitialized local variable:

sound/soc/soc-topology.c: In function 'soc_tplg_dapm_widget_create':
sound/soc/soc-topology.c:1566:36: error: 'kcontrol_type' may be used 
uninitialized in this function [-Werror=maybe-uninitialized]

I could not figure out which of the valid types would be appropriate
here, so this sets it to '0', which is invalid but at least well-defined
here. There is probably a better way to address the issue.

Fixes: eea3dd4f1247 ("ASoC: topology: Only free TLV for volume mixers of a 
widget")
Signed-off-by: Arnd Bergmann 
---
 sound/soc/soc-topology.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 11feb19e9730..65670b2b408c 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1485,6 +1485,7 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg 
*tplg,
tplg->pos +=
(sizeof(struct snd_soc_tplg_dapm_widget) + w->priv.size);
if (w->num_kcontrols == 0) {
+   kcontrol_type = 0;
template.num_kcontrols = 0;
goto widget;
}


Reported-by: Dan Carpenter 
Reviewed-by: Takashi Sakamoto 

This bug was already reported but not fixed yet:
[alsa-devel] [bug report] ASoC: topology: Only free TLV for volume 
mixers of a widget

http://mailman.alsa-project.org/pipermail/alsa-devel/2016-December/115577.html

As you said, this solution looks band-aid, however we have no 
alternatives to fix the warning...



Regards

Takashi Sakamoto

Could we have request_firmware_nowait with FW_OPT_NO_WARN?

2016-12-10 Thread Rafał Miłecki

Hi,

In brcmfmac we use request_firmware_nowait and if fetching firmware
with NVRAM variables fails then we try to fallback to the platform one
(see brcmf_fw_request_code_done & brcmf_fw_request_nvram_done).

Some problem for us is that on devices with platform NVRAM we get this error:
Direct firmware load for brcm/brcmfmac43602-pcie.txt failed with error -2
(which is harmless if getting platform NVRAM succeeds). This error is
quite confusing for users. They think something went wrong, they
expect problems & they report it back to us. Obviously I don't want
ugly hacks like:
pr_info("Got platform NVRAM, ignore above error\n");

So it would be nice to have version of request_firmware_nowait with
FW_OPT_NO_WARN. If requesting firmware NVRAM fails *and* getting
platform NVRAM fails, then I could to print error on my own.
Does it make sense? Can you see a point of my request?

Do you have any suggestion for this? If and how I could proceed with
implementation?

request_firmware_nowait already has "bool uevent" argument, I don't
want it to have argument per every available option. I was thinking
about moving FW_OPT_* defines to the include/linux/firmware.h but I'm
not sure if it's OK as they depend on:
CONFIG_FW_LOADER_USER_HELPER
and
CONFIG_FW_LOADER_USER_HELPER_FALLBACK
With defines placed in firmware.h I could replace "bool uevent" with
"unsigned int opt_flags".
Does it sound like a good plan? Or do you have any better idea?

-- 
Rafał

WARNING: kernel stack frame pointer at ffffffff82e03f40 in swapper:0 has bad value (null)

Hey,

I see the line in $Subject on rc8+tip/master.

Fixed already?

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Re: [PATCH v2 2/2] x86/KASLR/64: Determine kernel text mapping size at runtime

On Sat, Dec 10, 2016 at 09:41:56PM +0800, Baoquan He wrote:
> 1) Fedora 25 defaults to enable CONFIG_RANDOMIZE_BASE. And this worries
> maintainers of several Fedora component. People ever asked me how to
> judge whether it's a kaslr kernel. I told them I usually read elf header
> of kcore - "readelf -l /proc/kcore" to check it. If the 'VirtAddr' of
> segments like kernel text, modules, direct mapping is changed, it should
> be kaslr kernel. Then they said why I have specified 'nokaslr', the
> virtual address of modules is not '0xa000', but
> '0xc000'. OK, I realized this is not right, it need be
> fixed.

So people want to know whether the kernel they're running has KASLR
enabled or not.

Clearly they can grep their config. And then check whether "nokaslr" has
been added to the kernel command line or not. Done.

> So in v2 I didn't mention problem about Crash. But case 1) need be
> cared, whether kaslr code is compiled or not, it should not confuse
> people, should not make difference between kaslr code not compiled in
> and kaslr code compiled in with 'nokaslr' specified.

That's exactly the point - people should *not* care whether it is a
kernel with KASLR enabled or not - stuff should just work. So what
you're trying to "fix" here is an exercise of pointlessness, IMO. Unless
you give me a real, valid reason why people need a *defined* interface
to ask whether the kernel has KASLR enabled or not.

And even then, looking at KERNEL_IMAGE_SIZE is still the wrong way to do
it.

-- 
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 
(AG Nürnberg)
--

Re: [PATCHSET 0/6] perf sched timehist: Introduce --idle-hist option (v2)

2016-12-10 Thread David Ahern

On 12/8/16 7:47 AM, Namhyung Kim wrote:
> Hi,
> 
> This patchset implements the idle hist feature which analyze reason of system
> idle.  Sometimes I need to investigate what makes CPUs to go idle even though
> I have jobs to do.  It may be due to I/O, waiting on lock or whatever.
> 

...

> Namhyung Kim (6):
>   perf sched timehist: Split is_idle_sample()
>   perf sched timehist: Introduce struct idle_time_data
>   perf sched timehist: Save callchain when entering idle
>   perf sched timehist: Skip non-idle events when necessary
>   perf sched timehist: Add -I/--idle-hist option
>   perf sched timehist: Show callchains for idle stat
> 
>  tools/perf/Documentation/perf-sched.txt |   4 +
>  tools/perf/builtin-sched.c  | 252 
> +++-
>  2 files changed, 222 insertions(+), 34 deletions(-)
> 

LGTM

Acked-by: David Ahern 

Suggested improvement: Add the length of the time window. ie., data collected 
(or analyzed over if --time is used) for N.M seconds. Puts the amount of idle 
time into perspective.

Re: enabling COMPILE_TEST support for GCC plugins in v4.11

2016-12-10 Thread Emese Revfy

On Fri, 9 Dec 2016 11:12:18 -0800
Kees Cook  wrote:

> On Fri, Dec 9, 2016 at 2:40 AM, Arnd Bergmann  wrote:
> > On Thursday, December 8, 2016 11:00:42 AM CET Kees Cook wrote:
> >
> >> If you have a moment, applying 215e2aa6c024[1] and reverting
> >> a519167e753e for an allyesconfig/allmodconfig build should let you
> >> know if things are working correctly with headers installed. If anyone
> >> sees any problems, please let me know and I can queue up fixes.
> >
> > Using gcc-4.9.3 or gcc-5.3.1 for an ARM allmodconfig build, I get tons of
> > errors such as this one:
> >
> > /git/arm-soc/init/initramfs.c: In function 'error':
> > /git/arm-soc/init/initramfs.c:50:1: error: unrecognizable insn:
> >  }
> >  ^
> > (insn 26 25 27 5 (set (reg:SI 111 [ local_entropy.243 ])
> > (rotatert:SI (reg:SI 116 [ local_entropy.243 ])
> > (const_int -30 [0xffe2]))) -1
> >  (nil))
> > *** WARNING *** there are active plugins, do not report this as a bug 
> > unless you can reproduce it without enabling any plugins.
> > Event| Plugins
> > PLUGIN_ATTRIBUTES| latent_entropy_plugin
> > PLUGIN_START_UNIT| latent_entropy_plugin
> > /git/arm-soc/init/initramfs.c:50:1: internal compiler error: in 
> > extract_insn, at recog.c:2202
> > /git/arm-soc/arch/arm/vfp/vfpmodule.c: In function 'vfp_init':
> > /git/arm-soc/arch/arm/vfp/vfpmodule.c:824:1: error: unrecognizable insn:
> >  }
> >  ^
> > (insn 138 137 139 17 (set (reg:SI 165 [ local_entropy.93 ])
> > (rotatert:SI (reg:SI 150 [ local_entropy.93 ])
> > (const_int -9 [0xfff7]))) -1
> >  (nil))
> > *** WARNING *** there are active plugins, do not report this as a bug 
> > unless you can reproduce it without enabling any plugins.
> 
> Well that's exciting! :P

Hi,

You can find the fix here:
https://github.com/ephox-gcc-plugins/latent_entropy/commit/c91275a1bfcebbcfc0ca1af03396e06039f04db8

-- 
Emese

[PATCH] remove unused parameter

2016-12-10 Thread Weiwei Jia

Remove unused parameter in wakeup_gran function for Completely
Fair Scheduling (CFS).

Signed-off-by: Weiwei Jia 
---
 kernel/sched/fair.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c242944..f4375d4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5763,7 +5763,7 @@ static void task_dead_fair(struct task_struct *p)
 #endif /* CONFIG_SMP */
 
 static unsigned long
-wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
+wakeup_gran(struct sched_entity *se)
 {
unsigned long gran = sysctl_sched_wakeup_granularity;
 
@@ -5805,7 +5805,7 @@ static void task_dead_fair(struct task_struct *p)
if (vdiff <= 0)
return -1;
 
-   gran = wakeup_gran(curr, se);
+   gran = wakeup_gran(se);
if (vdiff > gran)
return 1;
 
-- 
1.9.1

Re: [PATCH/RFC net-next] net: fec: allow "mini jumbo" frames

Hi Nikita,

Nikita Yushchenko  writes:

> This adds support for MTU slightly larger than default, on modern
> FEC flavours.
>
> Currently FEC driver uses single hardware Rx buffer per frame. On most
> FEC flavours, size of single buffer is limited by 11-bit field, and
> has to be multiple of 64 (in the worst case). Thus maximum usable Rx
> buffer size is 1984 bytes.
>
> Of those:
> - 2 bytes are used for IP header alignment,
> - 14 bytes are used by ethhdr,
> - up to 8 bytes are needed for VLAN and/or DSA tags,
> - 4 bytes are needed for CRC.
>
> Thus maximum MTU possible within current RX architecture is 1956.
>
> This patch allows exactly that. For further increase, Rx architecture
> change is needed.
>
> Use of MTU=1956 gives about 1.5% throughput improvement between two Vybrid
> boards, compared to default MTU=1500.
>
> Signed-off-by: Nikita Yushchenko 

For what it's worth, I have tested your patch on my ZII Rev B boards
(see vf610-zii-dev-rev-b.dts) which have a FEC as the master net device
of their DSA trees. They still work as expected.

Tested-by: Vivien Didelot 

Thanks,

Vivien

Re: WARNING: kernel stack frame pointer at ffffffff82e03f40 in swapper:0 has bad value (null)

2016-12-10 Thread Josh Poimboeuf

On Sat, Dec 10, 2016 at 05:17:49PM +0100, Borislav Petkov wrote:
> Hey,
> 
> I see the line in $Subject on rc8+tip/master.
> 
> Fixed already?

That's a new one.  Was there anything else printed?  Were you doing
anything special when it happened?  Do you see it reliably?  I should
probably figure out a way to dump more data for that warning.

-- 
Josh

Re: [PATCH v3 00/15] livepatch: hybrid consistency model

2016-12-10 Thread Josh Poimboeuf

On Sat, Dec 10, 2016 at 04:46:17PM +1100, Balbir Singh wrote:
> On Thu, 2016-12-08 at 12:08 -0600, Josh Poimboeuf wrote:
> > Dusting the cobwebs off the consistency model again.  This is based on
> > linux-next/master.
> > 
> > v1 was posted on 2015-02-09:
> > 
> >   https://lkml.kernel.org/r/cover.1423499826.git.jpoim...@redhat.com
> > 
> > v2 was posted on 2016-04-28:
> > 
> >   https://lkml.kernel.org/r/cover.1461875890.git.jpoim...@redhat.com
> > 
> > The biggest issue from v2 was finding a decent way to detect preemption
> > and page faults on the stack of a sleeping task.  
> 
> Could you please elaborate on this? Preemption of a sleeping task and
> faults as in the future (time) preemption and faults?

The normal way for a task to go to sleep is to call schedule().  objtool
ensures the stack trace is reliable in that case, by making sure that
all functions save the frame pointer on the stack before calling out to
another function.

But a task can also go to sleep in a few other ways.  One way is by
preemption, where an interrupt handler interrupts the task and calls
preempt_schedule_irq().  Another way is by a page fault exception.  In
both cases, there's no guarantee that the interrupted function saved the
frame pointer on the stack beforehand.  So the stack trace might be
unreliable.  Fortunately, interrupts and exceptions leave evidence
behind on the stack.  So when walking the stack of a sleeping task, we
can detect when an IRQ or exception occurred, and consider such a stack
unreliable.

-- 
Josh

[PATCH] mm: fadvise: avoid expensive remote LRU cache draining after FADV_DONTNEED

2016-12-10 Thread Johannes Weiner

When FADV_DONTNEED cannot drop all pages in the range, it observes
that some pages might still be on per-cpu LRU caches after recent
instantiation and so initiates remote calls to all CPUs to flush their
local caches. However, in most cases, the fadvise happens from the
same context that instantiated the pages, and any pre-LRU pages in the
specified range are most likely sitting on the local CPU's LRU cache,
and so in many cases this results in unnecessary remote calls, which,
in a loaded system, can hold up the fadvise() call significantly.

Try to avoid the remote call by flushing the local LRU cache before
even attempting to invalidate anything. It's a cheap operation, and
the local LRU cache is the most likely to hold any pre-LRU pages in
the specified fadvise range.

Signed-off-by: Johannes Weiner 
---
 mm/fadvise.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/mm/fadvise.c b/mm/fadvise.c
index 6c707bfe02fd..a43013112581 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -139,7 +139,20 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, 
loff_t, len, int, advice)
}
 
if (end_index >= start_index) {
-   unsigned long count = invalidate_mapping_pages(mapping,
+   unsigned long count;
+
+   /*
+* It's common to FADV_DONTNEED right after
+* the read or write that instantiates the
+* pages, in which case there will be some
+* sitting on the local LRU cache. Try to
+* avoid the expensive remote drain and the
+* second cache tree walk below by flushing
+* them out right away.
+*/
+   lru_add_drain();
+
+   count = invalidate_mapping_pages(mapping,
start_index, end_index);
 
/*
-- 
2.10.2

Re: WARNING: kernel stack frame pointer at ffffffff82e03f40 in swapper:0 has bad value (null)

On Sat, Dec 10, 2016 at 11:04:44AM -0600, Josh Poimboeuf wrote:
> That's a new one.  Was there anything else printed?

It is the first line that appears in dmesg when I boot:

[0.00] WARNING: kernel stack frame pointer at b5e03f40 in 
swapper:0 has bad value   (null)
[0.00] Linux version 4.9.0-rc8+ (boris@gondor) (gcc version 6.2.0 
20161109 (Debian 6.2.0-13) ) #1 SMP PREEMPT Sat Dec 10 13:25:29 CET 2016
[0.00] Command line: BOOT_IMAGE=/boot/vmlinuz-4.9.0-rc8+ root=/dev/sda7 
ro earlyprintk=ttyS0,115200 console=ttyS0,115200 console=tty0 root=/dev/sda7 
log_buf_len=10M resume=/dev/sda5 no_console_suspend ignore_loglevel
[0.00] KERNEL supported cpus:
[0.00]   Intel GenuineIntel
[0.00]   AMD AuthenticAMD
[0.00]   Centaur CentaurHauls
[0.00] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point 
registers'
...

> Were you doing anything special when it happened?

Not really - just booting ;-)

> Do you see it reliably?

2 of 2 boots.

> I should probably figure out a way to dump more data for that warning.

Sure, I can test patches.

Thanks.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Re: [PATCH] iio: adc: max1027: allocate DMA-safe buffer

On 09/12/16 10:24, Marcus Folkesson wrote:
> The buffer needs to be DMA-safe when used with spi_read()
> 
> Signed-off-by: Marcus Folkesson 
Please read the documentation in include/linux/gfp.h about GFP_DMA.

Specifically:
220  * GFP_DMA exists for historical reasons and should be avoided where 
possible.
221  *   The flags indicates that the caller requires that the lowest zone be
222  *   used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
223  *   it would require careful auditing as some users really require it and
224  *   others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
225  *   lowest zone as a type of emergency reserve.

Seems unlikely this applies!  This caught me by surprise as I didn't even know
that flag existed - hence I went digging.

Jonathan
> ---
>  drivers/iio/adc/max1027.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c
> index 712fbd2b1f16..ff1f1f15a873 100644
> --- a/drivers/iio/adc/max1027.c
> +++ b/drivers/iio/adc/max1027.c
> @@ -435,7 +435,7 @@ static int max1027_probe(struct spi_device *spi)
>  
>   st->buffer = devm_kmalloc(&indio_dev->dev,
> indio_dev->num_channels * 2,
> -   GFP_KERNEL);
> +   GFP_KERNEL | GFP_DMA);
>   if (st->buffer == NULL) {
>   dev_err(&indio_dev->dev, "Can't allocate buffer\n");
>   return -ENOMEM;
>

Re: [RFC 0/5] rcu: Introduce leaf_node_for_each_mask_possible_cpu() and its friend

2016-12-10 Thread Paul E. McKenney

On Sat, Dec 10, 2016 at 09:36:29PM +0800, Boqun Feng wrote:
> On Fri, Dec 09, 2016 at 08:28:05PM -0800, Paul E. McKenney wrote:
> > On Sat, Dec 10, 2016 at 08:45:38AM +0800, Boqun Feng wrote:
> > > On Fri, Dec 09, 2016 at 03:49:45PM -0800, Paul E. McKenney wrote:
> > > > On Fri, Dec 09, 2016 at 04:48:22PM +0800, Boqun Feng wrote:
> > > > > Hi Paul,
> > > > > 
> > > > > While reading the discussion at:
> > > > > 
> > > > > https://marc.info/?l=linux-kernel&m=148044253400769
> > > > 
> > > > This discussion was for stalls specifically, rather than for routine
> > > > scans of the bitmasks.
> > > > 
> > > > But it does look to save some code, so worth looking into.
> > > > 
> > > > > I figured we might use this fact to save some extra checks in RCU 
> > > > > core code,
> > > > > currently we iterate over all the possible CPUs on a leaf node, check 
> > > > > whether
> > > > > they were masked in a certain mask and do something. However, given 
> > > > > the fact
> > > > > that the masks on a leaf node should always be sparse than the 
> > > > > corresponding
> > > > > part of cpu_possible_mask, we'd better iterate over all bits in a 
> > > > > mask and
> > > > > check whether the corresponding CPU is possible or not.
> > > > > 
> > > > > So I made this RFC, I did a simple build/boot/rcutorture test on my 
> > > > > box with
> > > > > SMP=4, nothing bad happens. Currently I'm waiting for the 0day and 
> > > > > trying to
> > > > > test this one a bigger system, in the meanwhile, looking forwards to 
> > > > > any
> > > > > comment and suggestion.
> > > > > 
> > > > > So thoughts?
> > > > 
> > > > By analogy with for_each_cpu() and for_each_possible_cpu(), the name
> > > > should instead be for_each_leaf_node_cpu(), the tradition of excessively
> > > > long names in RCU notwithstanding.  ;-)
> > > > 
> > > 
> > > Make sense ;-)
> > > 
> > > I think it's more appropriate to call it for_each_leaf_node_mask_cpu(),
> > > because we don't iterate all cpus of a leaf node. The word "possible"
> > > could be dropped because obviously we won't iterate over "impossible"
> > > cpus in a leaf node ;-)
> > 
> > C'mon, Boqun!  The for_each_leaf_node_cpu() is not only consistent
> > with the for_each_cpu() family, it is shorter!  ;-)
> 
> Sure ;-) But for_each_leaf_node_cpu() seems like an operation that
> iterates over _all_ cpus in a leaf node, but I actually implement it as
> an operation that iterates only the _masked_ cpus. So I feel like word
> "mask" better be added in the name.

Although that is a fair point, the same can be said of for_each_cpu().
Which people seem to be able to use without undue pain.

> If we call it for_each_leaf_node_cpu(rnp, mask,...), we will rely on the
> hope that readers could figure it out what the primitive actually does
> by the indication of the parameter @mask.
> 
> I like shorter names too, but not sure whether putting "mask" in the
> name is better. After all, naming is one of the most difficult
> challenges in programming ;-)

The two most difficult challenges in programming are the last two hard
things that the person speaking worked on.  ;-)

Consistency is more important than the stand-alone understanding of
this particular name.  You can always add a comment pointing out that
it follows for_each_cpu().

Thanx, Paul

> Regards,
> Boqun
> 
> > Thanx, Paul
> > 
> > > Will modify that in next version.
> > > 
> > > Regards,
> > > Boqun
> > > 
> > > > Thanx, Paul
> > > > 
> > 
> >

Re: [PATCH v12 0/5] mxs-lradc: Split driver into MFD

On 09/12/16 21:08, Ksenija Stanojevic wrote:
> Split existing driver mxs-lradc into MFD with touchscreen and
> IIO part.
> 
> Tested on I.MX28
Hi All,

At first glance it looks like this is over to Dmitry and Lee.
If they are both happy...

Lee, I'm assuming this whole lot will go through MFD.  Please work your
magic with an immutable branch as I guess my Dmitry and I may well want
to pull this in at some point over the coming cycle as well.

If for whatever reason you would prefer I pick it up in the first instance
and do a branch for you and Dmitry, give me a shout.

Coming together very nicely and will be good to finally get this sorted!

Thanks,

Jonathan
> 
> Ksenija Stanojevic (5):
>   mfd: mxs-lradc: Add support for mxs-lradc MFD
>   iio: adc: mxs-lradc: Add support for adc driver
>   input: touchscreen: mxs-lradc: Add support for touchscreen
>   iio: adc: mxs-lradc: Remove driver
>   mfd: Move binding document
> 
>  .../bindings/{iio/adc => mfd}/mxs-lradc.txt|0
>  drivers/iio/adc/Kconfig|   27 +-
>  drivers/iio/adc/Makefile   |2 +-
>  drivers/iio/adc/mxs-lradc-adc.c|  843 ++
>  drivers/iio/adc/mxs-lradc.c| 1750 
> 
>  drivers/input/touchscreen/Kconfig  |   10 +
>  drivers/input/touchscreen/Makefile |1 +
>  drivers/input/touchscreen/mxs-lradc-ts.c   |  739 +
>  drivers/mfd/Kconfig|   17 +
>  drivers/mfd/Makefile   |1 +
>  drivers/mfd/mxs-lradc.c|  264 +++
>  include/linux/mfd/mxs-lradc.h  |  187 +++
>  12 files changed, 2076 insertions(+), 1765 deletions(-)
>  rename Documentation/devicetree/bindings/{iio/adc => mfd}/mxs-lradc.txt 
> (100%)
>  create mode 100644 drivers/iio/adc/mxs-lradc-adc.c
>  delete mode 100644 drivers/iio/adc/mxs-lradc.c
>  create mode 100644 drivers/input/touchscreen/mxs-lradc-ts.c
>  create mode 100644 drivers/mfd/mxs-lradc.c
>  create mode 100644 include/linux/mfd/mxs-lradc.h
>

Re: [kernel-hardening] Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-10 Thread Andy Lutomirski

cc: Viro because I'm talking about iov_iter.

On Sat, Dec 10, 2016 at 6:45 AM, Jason A. Donenfeld  wrote:
> Hi Herbert,
>
> On Sat, Dec 10, 2016 at 6:37 AM, Herbert Xu  
> wrote:
>> As for AEAD we never had a sync interface to begin with and I
>> don't think I'm going to add one.
>
> That's too bad to hear. I hope you'll reconsider. Modern cryptographic
> design is heading more and more in the direction of using AEADs for
> interesting things, and having a sync interface would be a lot easier
> for implementing these protocols. In the same way many protocols need
> a hash of some data, now protocols often want some particular data
> encrypted with an AEAD using a particular key and nonce and AD. One
> protocol that comes to mind is Noise [1].
>

I think that sync vs async has gotten conflated with
vectored-vs-nonvectored and the results are unfortunate.

There are a lot of users in the tree that are trying to do crypto on
very small pieces of data and want to have that data consist of the
concatenation of two small buffers and/or want to use primitives that
don't have "sync" interfaces.  These users are stuck using async
interfaces even though using async implementations makes no sense for
them.

I'd love to see the API restructured a bit to decouple all of these
considerations.  One approach might be to teach iov_iter about
scatterlists.  Then, for each primitive, there could be two entry
points:

1. A simplified and lower-overhead entry.  You pass it an iov_iter
(and, depending on what the operation is, an output iov_iter), it does
the crypto synchronously, and returns.  Operating in-place might be
permitted for some primitives.

2. A full-featured async entry.  You pass it iov_iters and it requires
that the iov_iters be backed by scatterlists in order to operate
asynchronously.

I see no reason that the decisions to use virtual vs physical
addressing or to do vectored vs non-vectored IO should be tied up with
asynchronicity.

--Andy

Re: [PATCH] Staging: iio: impedance-analyzer: ad5933: fix wrong comments

On 07/12/16 17:44, Nizam Haider wrote:
> according to datasheet complete control register is of 2 bytes.
> http://www.analog.com/media/en/technical-documentation/data-sheets/AD5933.pdf
> 
> Signed-off-by: Nizam Haider 
Applied to the togreg branch of iio.git - initially pushed out as testing so the
autobuilders can completely ignore this patch ;)

Jonathan
> ---
>  drivers/staging/iio/impedance-analyzer/ad5933.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c 
> b/drivers/staging/iio/impedance-analyzer/ad5933.c
> index 3892a74..f39e03a 100644
> --- a/drivers/staging/iio/impedance-analyzer/ad5933.c
> +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
> @@ -23,8 +23,8 @@
>  #include 
>  
>  /* AD5933/AD5934 Registers */
> -#define AD5933_REG_CONTROL_HB0x80/* R/W, 2 bytes */
> -#define AD5933_REG_CONTROL_LB0x81/* R/W, 2 bytes */
> +#define AD5933_REG_CONTROL_HB0x80/* R/W, 1 byte */
> +#define AD5933_REG_CONTROL_LB0x81/* R/W, 1 byte */
>  #define AD5933_REG_FREQ_START0x82/* R/W, 3 bytes */
>  #define AD5933_REG_FREQ_INC  0x85/* R/W, 3 bytes */
>  #define AD5933_REG_INC_NUM   0x88/* R/W, 2 bytes, 9 bit */
>

core.c:undefined reference to `fpu_save'

2016-12-10 Thread kbuild test robot

Hi Andrew,

It's probably a bug fix that unveils the link errors.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   810ac7b7558d7830e72d8dbf34b851fce39e08b0
commit: c60f169202c7643991a8b4bfeea60e06843d5b5a 
arch/mn10300/kernel/fpu-nofpu.c: needs asm/elf.h
date:   9 months ago
config: mn10300-allnoconfig (attached as .config)
compiler: am33_2.0-linux-gcc (GCC) 6.2.0
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout c60f169202c7643991a8b4bfeea60e06843d5b5a
# save the attached .config to linux build tree
make.cross ARCH=mn10300 

All errors (new ones prefixed by >>):

   kernel/built-in.o: In function `.L410':
>> core.c:(.sched.text+0x28a): undefined reference to `fpu_save'

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

[PATCH v2] llist: Clarify comments about when locking is needed

2016-12-10 Thread Joel Fernandes

llist.h comments are a bit confusing about when locking is needed versus when
it isn't. Clarify these comments a bit more by being a bit more descriptive
about why locking is needed for llist_del_first.

Cc: Huang Ying 
Cc: Ingo Molnar 
Cc: Will Deacon 
Cc: Paul McKenney 
Acked-by: Mathieu Desnoyers 
Signed-off-by: Joel Fernandes 
---
v2 changes:
Minor changes to comment and commit message based on Mathieu's suggestions
(https://lkml.org/lkml/2016/12/10/39)

 include/linux/llist.h | 37 +
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/include/linux/llist.h b/include/linux/llist.h
index fd4ca0b..31822bb 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -3,28 +3,33 @@
 /*
  * Lock-less NULL terminated single linked list
  *
- * If there are multiple producers and multiple consumers, llist_add
- * can be used in producers and llist_del_all can be used in
- * consumers.  They can work simultaneously without lock.  But
- * llist_del_first can not be used here.  Because llist_del_first
- * depends on list->first->next does not changed if list->first is not
- * changed during its operation, but llist_del_first, llist_add,
- * llist_add (or llist_del_all, llist_add, llist_add) sequence in
- * another consumer may violate that.
- *
- * If there are multiple producers and one consumer, llist_add can be
- * used in producers and llist_del_all or llist_del_first can be used
- * in the consumer.
- *
- * This can be summarized as follow:
+ * Cases where locking is not needed:
+ * If there are multiple producers and multiple consumers, llist_add can be
+ * used in producers and llist_del_all can be used in consumers simultaneously
+ * without locking. Also a single consumer can use llist_del_first while
+ * multiple producers simultaneously use llist_add, without any locking.
+ *
+ * Cases where locking is needed:
+ * If we have multiple consumers with llist_del_first used in one consumer, and
+ * llist_del_first or llist_del_all used in other consumers, then a lock is
+ * needed.  This is because llist_del_first depends on list->first->next not
+ * changing, but without lock protection, there's no way to be sure about that
+ * if a preemption happens in the middle of the delete operation and on being
+ * preempted back, the list->first is the same as before causing the cmpxchg in
+ * llist_del_first to succeed. For example, while a llist_del_first operation
+ * is in progress in one consumer, then a llist_del_first, llist_add,
+ * llist_add (or llist_del_all, llist_add, llist_add) sequence in another
+ * consumer may cause violations.
+ *
+ * This can be summarized as follows:
  *
  *   |   add| del_first |  del_all
  * add   |- | - | -
  * del_first |  | L | L
  * del_all   |  |   | -
  *
- * Where "-" stands for no lock is needed, while "L" stands for lock
- * is needed.
+ * Where, a particular row's operation can happen concurrently with a column's
+ * operation, with "-" being no lock needed, while "L" being lock is needed.
  *
  * The list entries deleted via llist_del_all can be traversed with
  * traversing function such as llist_for_each etc.  But the list
-- 
2.8.0.rc3.226.g39d4020

[PATCH v2 0/5] Fix ups to make lustre_idl.h a proper UAPI header

From: James Simmons James Simmons 

Fix ups to make lustre_idl.h a proper UAPI header

The header lustre_idl.h is a UAPI header which contains extras
that don't belong. This patch set moves a bunch of very kernel
specific material out of the header. Lastly proper byteorder
functions are used so this file can build in user space as well.

Ben Evans (4):
  staging: lustre: obdclass: Create a header for obdo related functions
  staging: lustre: obdclass: style cleanup for obdo related functions
  staging: lustre: headers: sort headers affected by obdo move
  staging: lustre: headers: Move functions out of lustre_idl.h

James Simmons (1):
  staging: lustre: headers: use proper byteorder functions in lustre_idl.h

 .../lustre/lustre/include/lustre/lustre_idl.h  | 174 -
 drivers/staging/lustre/lustre/include/lustre_dlm.h |  13 ++
 .../staging/lustre/lustre/include/lustre_obdo.h|  54 +++
 .../staging/lustre/lustre/include/lustre_swab.h|   6 +
 drivers/staging/lustre/lustre/ldlm/ldlm_internal.h |   6 +
 drivers/staging/lustre/lustre/ldlm/ldlm_request.c  |  27 
 drivers/staging/lustre/lustre/mdc/mdc_lib.c|   6 +
 drivers/staging/lustre/lustre/obdclass/obdo.c  |  54 +++
 drivers/staging/lustre/lustre/osc/osc_io.c |   2 +
 drivers/staging/lustre/lustre/osc/osc_request.c|  16 +-
 10 files changed, 206 insertions(+), 152 deletions(-)
 create mode 100644 drivers/staging/lustre/lustre/include/lustre_obdo.h

--
1.8.3.1

[PATCH v2 1/5] staging: lustre: obdclass: Create a header for obdo related functions

From: Ben Evans 

Remove all obdo related functions from lustre_idl.h
Create lustre_odbo.h. Include where appropriate.
Make the functions lustre_get_wire_obdo and
lustre_set_wire_obdo to not be inlined functions.

Signed-off-by: Ben Evans 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6401
Reviewed-on: http://review.whamcloud.com/16917
Reviewed-on: http://review.whamcloud.com/19266
Reviewed-by: James Simmons 
Reviewed-by: Andreas Dilger 
Reviewed-by: John L. Hammond 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---

Changelog:

v1) Initial patch from earlier version that broke build

v2) Include lustre_obdo.h to osc_request.c so it can
build. Next move style changes from moved functions
to separate patch.

 .../lustre/lustre/include/lustre/lustre_idl.h  | 46 --
 .../staging/lustre/lustre/include/lustre_obdo.h| 54 ++
 drivers/staging/lustre/lustre/obdclass/obdo.c  | 53 +
 drivers/staging/lustre/lustre/osc/osc_io.c |  2 +
 drivers/staging/lustre/lustre/osc/osc_request.c|  1 +
 5 files changed, 110 insertions(+), 46 deletions(-)
 create mode 100644 drivers/staging/lustre/lustre/include/lustre_obdo.h

diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h 
b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 65ce503..b0eb80d 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -3130,52 +3130,6 @@ struct obdo {
 #define o_cksum   o_nlink
 #define o_grant_used o_data_version
 
-static inline void lustre_set_wire_obdo(const struct obd_connect_data *ocd,
-   struct obdo *wobdo,
-   const struct obdo *lobdo)
-{
-   *wobdo = *lobdo;
-   wobdo->o_flags &= ~OBD_FL_LOCAL_MASK;
-   if (!ocd)
-   return;
-
-   if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) &&
-   fid_seq_is_echo(ostid_seq(&lobdo->o_oi))) {
-   /* Currently OBD_FL_OSTID will only be used when 2.4 echo
-* client communicate with pre-2.4 server
-*/
-   wobdo->o_oi.oi.oi_id = fid_oid(&lobdo->o_oi.oi_fid);
-   wobdo->o_oi.oi.oi_seq = fid_seq(&lobdo->o_oi.oi_fid);
-   }
-}
-
-static inline void lustre_get_wire_obdo(const struct obd_connect_data *ocd,
-   struct obdo *lobdo,
-   const struct obdo *wobdo)
-{
-   __u32 local_flags = 0;
-
-   if (lobdo->o_valid & OBD_MD_FLFLAGS)
-   local_flags = lobdo->o_flags & OBD_FL_LOCAL_MASK;
-
-   *lobdo = *wobdo;
-   if (local_flags != 0) {
-   lobdo->o_valid |= OBD_MD_FLFLAGS;
-   lobdo->o_flags &= ~OBD_FL_LOCAL_MASK;
-   lobdo->o_flags |= local_flags;
-   }
-   if (!ocd)
-   return;
-
-   if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) &&
-   fid_seq_is_echo(wobdo->o_oi.oi.oi_seq)) {
-   /* see above */
-   lobdo->o_oi.oi_fid.f_seq = wobdo->o_oi.oi.oi_seq;
-   lobdo->o_oi.oi_fid.f_oid = wobdo->o_oi.oi.oi_id;
-   lobdo->o_oi.oi_fid.f_ver = 0;
-   }
-}
-
 /* request structure for OST's */
 struct ost_body {
struct  obdo oa;
diff --git a/drivers/staging/lustre/lustre/include/lustre_obdo.h 
b/drivers/staging/lustre/lustre/include/lustre_obdo.h
new file mode 100644
index 000..1e12f8c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_obdo.h
@@ -0,0 +1,54 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ *
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Ben Evans.
+ *
+ * Define obdo associated functions
+ *   obdo:  OBject Device o...
+ */
+
+#ifndef _LUSTRE_OBDO_H_
+#define _LUSTRE_OBDO_H_
+
+#include "lustre/lustre_idl.h"
+
+/**
+ * Create an obdo to send over the wire
+ */
+void lustre_set_wire_obdo(const struct obd

[PATCH v2 5/5] staging: lustre: headers: use proper byteorder functions in lustre_idl.h

In order for lustre_idl.h to be usable for both user
land and kernel space it has to use the proper
byteorder functions.

Signed-off-by: James Simmons 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6245
Reviewed-on: http://review.whamcloud.com/16916
Reviewed-by: Frank Zago 
Reviewed-by: Dmitry Eremin 
Reviewed-by: Oleg Drokin 
Reviewed-by: John L. Hammond 
Signed-off-by: James Simmons 
---
 .../lustre/lustre/include/lustre/lustre_idl.h  | 55 --
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h 
b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index cd2dbfb..3d74d56 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -69,6 +69,9 @@
 #ifndef _LUSTRE_IDL_H_
 #define _LUSTRE_IDL_H_
 
+#include 
+#include 
+
 #include "../../../include/linux/libcfs/libcfs.h"
 #include "../../../include/linux/lnet/types.h"
 
@@ -687,30 +690,30 @@ static inline void lu_igif_build(struct lu_fid *fid, 
__u32 ino, __u32 gen)
  */
 static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src)
 {
-   dst->f_seq = cpu_to_le64(fid_seq(src));
-   dst->f_oid = cpu_to_le32(fid_oid(src));
-   dst->f_ver = cpu_to_le32(fid_ver(src));
+   dst->f_seq = __cpu_to_le64(fid_seq(src));
+   dst->f_oid = __cpu_to_le32(fid_oid(src));
+   dst->f_ver = __cpu_to_le32(fid_ver(src));
 }
 
 static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
 {
-   dst->f_seq = le64_to_cpu(fid_seq(src));
-   dst->f_oid = le32_to_cpu(fid_oid(src));
-   dst->f_ver = le32_to_cpu(fid_ver(src));
+   dst->f_seq = __le64_to_cpu(fid_seq(src));
+   dst->f_oid = __le32_to_cpu(fid_oid(src));
+   dst->f_ver = __le32_to_cpu(fid_ver(src));
 }
 
 static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src)
 {
-   dst->f_seq = cpu_to_be64(fid_seq(src));
-   dst->f_oid = cpu_to_be32(fid_oid(src));
-   dst->f_ver = cpu_to_be32(fid_ver(src));
+   dst->f_seq = __cpu_to_be64(fid_seq(src));
+   dst->f_oid = __cpu_to_be32(fid_oid(src));
+   dst->f_ver = __cpu_to_be32(fid_ver(src));
 }
 
 static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
 {
-   dst->f_seq = be64_to_cpu(fid_seq(src));
-   dst->f_oid = be32_to_cpu(fid_oid(src));
-   dst->f_ver = be32_to_cpu(fid_ver(src));
+   dst->f_seq = __be64_to_cpu(fid_seq(src));
+   dst->f_oid = __be32_to_cpu(fid_oid(src));
+   dst->f_ver = __be32_to_cpu(fid_ver(src));
 }
 
 static inline bool fid_is_sane(const struct lu_fid *fid)
@@ -747,8 +750,8 @@ static inline void ostid_cpu_to_le(const struct ost_id 
*src_oi,
   struct ost_id *dst_oi)
 {
if (fid_seq_is_mdt0(ostid_seq(src_oi))) {
-   dst_oi->oi.oi_id = cpu_to_le64(src_oi->oi.oi_id);
-   dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
+   dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id);
+   dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq);
} else {
fid_cpu_to_le(&dst_oi->oi_fid, &src_oi->oi_fid);
}
@@ -758,8 +761,8 @@ static inline void ostid_le_to_cpu(const struct ost_id 
*src_oi,
   struct ost_id *dst_oi)
 {
if (fid_seq_is_mdt0(ostid_seq(src_oi))) {
-   dst_oi->oi.oi_id = le64_to_cpu(src_oi->oi.oi_id);
-   dst_oi->oi.oi_seq = le64_to_cpu(src_oi->oi.oi_seq);
+   dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id);
+   dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq);
} else {
fid_le_to_cpu(&dst_oi->oi_fid, &src_oi->oi_fid);
}
@@ -866,7 +869,7 @@ enum lu_dirpage_flags {
 
 static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp)
 {
-   if (le32_to_cpu(dp->ldp_flags) & LDF_EMPTY)
+   if (__le32_to_cpu(dp->ldp_flags) & LDF_EMPTY)
return NULL;
else
return dp->ldp_entries;
@@ -876,8 +879,8 @@ static inline struct lu_dirent *lu_dirent_next(struct 
lu_dirent *ent)
 {
struct lu_dirent *next;
 
-   if (le16_to_cpu(ent->lde_reclen) != 0)
-   next = ((void *)ent) + le16_to_cpu(ent->lde_reclen);
+   if (__le16_to_cpu(ent->lde_reclen) != 0)
+   next = ((void *)ent) + __le16_to_cpu(ent->lde_reclen);
else
next = NULL;
 
@@ -1438,15 +1441,15 @@ static inline __u64 lmm_oi_seq(const struct ost_id *oi)
 static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi,
const struct ost_id *src_oi)
 {
-   dst_oi->oi.oi_id = le64_to_cpu(src_oi->oi.oi_id);
-   dst_oi->oi.oi_seq = le64_to_cpu(src_oi->oi.oi_seq);
+   dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id);
+   dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq);
 }
 
 stati

[PATCH v2 4/5] staging: lustre: headers: Move functions out of lustre_idl.h

From: Ben Evans 

Migrate functions set/get_mrc_cr_flags, ldlm_res_eq
ldlm_extent_overlap, ldlm_extent_contain,
ldlm_request_bufsize, and alll the PTLRPC dump_*
functions out of lustre_idl.h which is a UAPI header
to the places in the kernel code they are actually used.
Delete unused lmv_mds_md_stripe_count and
agent_req_in_final_state.

Signed-off-by: Ben Evans 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6401
Reviewed-on: http://review.whamcloud.com/21484
Reviewed-by: Frank Zago 
Reviewed-by: James Simmons 
Reviewed-by: Andreas Dilger 
Reviewed-by: John L. Hammond 
Signed-off-by: James Simmons 
---
 .../lustre/lustre/include/lustre/lustre_idl.h  | 73 --
 drivers/staging/lustre/lustre/include/lustre_dlm.h | 13 
 .../staging/lustre/lustre/include/lustre_swab.h|  6 ++
 drivers/staging/lustre/lustre/ldlm/ldlm_internal.h |  6 ++
 drivers/staging/lustre/lustre/ldlm/ldlm_request.c  | 27 
 drivers/staging/lustre/lustre/mdc/mdc_lib.c|  6 ++
 6 files changed, 58 insertions(+), 73 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h 
b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index b0eb80d..cd2dbfb 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -2130,17 +2130,6 @@ struct mdt_rec_create {
__u32  cr_padding_4;   /* rr_padding_4 */
 };
 
-static inline void set_mrc_cr_flags(struct mdt_rec_create *mrc, __u64 flags)
-{
-   mrc->cr_flags_l = (__u32)(flags & 0xUll);
-   mrc->cr_flags_h = (__u32)(flags >> 32);
-}
-
-static inline __u64 get_mrc_cr_flags(struct mdt_rec_create *mrc)
-{
-   return ((__u64)(mrc->cr_flags_l) | ((__u64)mrc->cr_flags_h << 32));
-}
-
 /* instance of mdt_reint_rec */
 struct mdt_rec_link {
__u32  lk_opcode;
@@ -2403,25 +2392,6 @@ static inline int lmv_mds_md_stripe_count_get(const 
union lmv_mds_md *lmm)
}
 }
 
-static inline int lmv_mds_md_stripe_count_set(union lmv_mds_md *lmm,
- unsigned int stripe_count)
-{
-   int rc = 0;
-
-   switch (le32_to_cpu(lmm->lmv_magic)) {
-   case LMV_MAGIC_V1:
-   lmm->lmv_md_v1.lmv_stripe_count = cpu_to_le32(stripe_count);
-   break;
-   case LMV_USER_MAGIC:
-   lmm->lmv_user_md.lum_stripe_count = cpu_to_le32(stripe_count);
-   break;
-   default:
-   rc = -EINVAL;
-   break;
-   }
-   return rc;
-}
-
 enum fld_rpc_opc {
FLD_QUERY   = 900,
FLD_READ= 901,
@@ -2502,12 +2472,6 @@ struct ldlm_res_id {
 #define PLDLMRES(res)  (res)->lr_name.name[0], (res)->lr_name.name[1], \
(res)->lr_name.name[2], (res)->lr_name.name[3]
 
-static inline bool ldlm_res_eq(const struct ldlm_res_id *res0,
-  const struct ldlm_res_id *res1)
-{
-   return !memcmp(res0, res1, sizeof(*res0));
-}
-
 /* lock types */
 enum ldlm_mode {
LCK_MINMODE = 0,
@@ -2540,19 +2504,6 @@ struct ldlm_extent {
__u64 gid;
 };
 
-static inline int ldlm_extent_overlap(const struct ldlm_extent *ex1,
- const struct ldlm_extent *ex2)
-{
-   return (ex1->start <= ex2->end) && (ex2->start <= ex1->end);
-}
-
-/* check if @ex1 contains @ex2 */
-static inline int ldlm_extent_contain(const struct ldlm_extent *ex1,
- const struct ldlm_extent *ex2)
-{
-   return (ex1->start <= ex2->start) && (ex1->end >= ex2->end);
-}
-
 struct ldlm_inodebits {
__u64 bits;
 };
@@ -2627,18 +2578,6 @@ struct ldlm_request {
struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES];
 };
 
-/* If LDLM_ENQUEUE, 1 slot is already occupied, 1 is available.
- * Otherwise, 2 are available.
- */
-#define ldlm_request_bufsize(count, type)  \
-({   \
-   int _avail = LDLM_LOCKREQ_HANDLES;\
-   _avail -= (type == LDLM_ENQUEUE ? LDLM_ENQUEUE_CANCEL_OFF : 0); \
-   sizeof(struct ldlm_request) +  \
-   (count > _avail ? count - _avail : 0) *  \
-   sizeof(struct lustre_handle);  \
-})
-
 struct ldlm_reply {
__u32 lock_flags;
__u32 lock_padding; /* also fix lustre_swab_ldlm_reply */
@@ -2942,12 +2881,6 @@ static inline const char *agent_req_status2name(const 
enum agent_req_status ars)
}
 }
 
-static inline bool agent_req_in_final_state(enum agent_req_status ars)
-{
-   return ((ars == ARS_SUCCEED) || (ars == ARS_FAILED) ||
-   (ars == ARS_CANCELED));
-}
-
 struct llog_agent_req_rec {
struct llog_rec_hdr arr_hdr;/**< record header */
__u32   arr_status; /**< status of the request */
@@ -

[PATCH v2 3/5] staging: lustre: headers: sort headers affected by obdo move

From: Ben Evans 

It was found if you sort the headers alphabetically
that it reduced patch conflicts. This patch sorts
the headers alphabetically and also place linux
header first, then uapi header and finally the
lustre kernel headers.

Signed-off-by: Ben Evans 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6401
Reviewed-on: http://review.whamcloud.com/16917
Reviewed-on: http://review.whamcloud.com/19266
Reviewed-by: James Simmons 
Reviewed-by: Andreas Dilger 
Reviewed-by: John L. Hammond 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---

Changelog:

v1) Initial patch
v2) rebase patch against newer base patch that now
includes lustre_obdo.h in osc_request.c.

 drivers/staging/lustre/lustre/osc/osc_request.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c 
b/drivers/staging/lustre/lustre/osc/osc_request.c
index 99aefa5..0273ccd 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -34,20 +34,21 @@
 
 #include "../../include/linux/libcfs/libcfs.h"
 
-#include "../include/lustre_dlm.h"
-#include "../include/lustre_net.h"
+#include "../include/lustre/lustre_ioctl.h"
 #include "../include/lustre/lustre_user.h"
-#include "../include/obd_cksum.h"
 
-#include "../include/lustre_ha.h"
 #include "../include/lprocfs_status.h"
-#include "../include/lustre/lustre_ioctl.h"
 #include "../include/lustre_debug.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_fid.h"
+#include "../include/lustre_ha.h"
+#include "../include/lustre_net.h"
 #include "../include/lustre_obdo.h"
 #include "../include/lustre_param.h"
-#include "../include/lustre_fid.h"
-#include "../include/obd_class.h"
 #include "../include/obd.h"
+#include "../include/obd_cksum.h"
+#include "../include/obd_class.h"
+
 #include "osc_internal.h"
 #include "osc_cl_internal.h"
 
-- 
1.8.3.1

[PATCH v2 2/5] staging: lustre: obdclass: style cleanup for obdo related functions

From: Ben Evans 

Change the style of lustre_get_wire_obdo and
lustre_set_wire_obdo to conform to linux kernel
standard.

Signed-off-by: Ben Evans 
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6401
Reviewed-on: http://review.whamcloud.com/16917
Reviewed-on: http://review.whamcloud.com/19266
Reviewed-by: James Simmons 
Reviewed-by: Andreas Dilger 
Reviewed-by: John L. Hammond 
Reviewed-by: Oleg Drokin 
Signed-off-by: James Simmons 
---
 drivers/staging/lustre/lustre/obdclass/obdo.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c 
b/drivers/staging/lustre/lustre/obdclass/obdo.c
index 241e60b..b1dfa16 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -139,7 +139,8 @@ void lustre_set_wire_obdo(const struct obd_connect_data 
*ocd,
 
if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) &&
fid_seq_is_echo(ostid_seq(&lobdo->o_oi))) {
-   /* Currently OBD_FL_OSTID will only be used when 2.4 echo
+   /*
+* Currently OBD_FL_OSTID will only be used when 2.4 echo
 * client communicate with pre-2.4 server
 */
wobdo->o_oi.oi.oi_id = fid_oid(&lobdo->o_oi.oi_fid);
@@ -154,13 +155,13 @@ void lustre_set_wire_obdo(const struct obd_connect_data 
*ocd,
 void lustre_get_wire_obdo(const struct obd_connect_data *ocd,
  struct obdo *lobdo, const struct obdo *wobdo)
 {
-   __u32 local_flags = 0;
+   u32 local_flags = 0;
 
if (lobdo->o_valid & OBD_MD_FLFLAGS)
local_flags = lobdo->o_flags & OBD_FL_LOCAL_MASK;
 
*lobdo = *wobdo;
-   if (local_flags != 0) {
+   if (local_flags) {
lobdo->o_valid |= OBD_MD_FLFLAGS;
lobdo->o_flags &= ~OBD_FL_LOCAL_MASK;
lobdo->o_flags |= local_flags;
-- 
1.8.3.1

Re: [PATCH v2] llist: Clarify comments about when locking is needed

2016-12-10 Thread Mathieu Desnoyers

- On Dec 10, 2016, at 7:03 PM, Joel Fernandes joe...@google.com wrote:

> llist.h comments are a bit confusing about when locking is needed versus when
> it isn't. Clarify these comments a bit more by being a bit more descriptive
> about why locking is needed for llist_del_first.

As I stated in my earlier review, please remove a couple of "a bit"
from the changelog.

Thanks,

Mathieu

> 
> Cc: Huang Ying 
> Cc: Ingo Molnar 
> Cc: Will Deacon 
> Cc: Paul McKenney 
> Acked-by: Mathieu Desnoyers 
> Signed-off-by: Joel Fernandes 
> ---
> v2 changes:
> Minor changes to comment and commit message based on Mathieu's suggestions
> (https://lkml.org/lkml/2016/12/10/39)
> 
> include/linux/llist.h | 37 +
> 1 file changed, 21 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/llist.h b/include/linux/llist.h
> index fd4ca0b..31822bb 100644
> --- a/include/linux/llist.h
> +++ b/include/linux/llist.h
> @@ -3,28 +3,33 @@
> /*
>  * Lock-less NULL terminated single linked list
>  *
> - * If there are multiple producers and multiple consumers, llist_add
> - * can be used in producers and llist_del_all can be used in
> - * consumers.  They can work simultaneously without lock.  But
> - * llist_del_first can not be used here.  Because llist_del_first
> - * depends on list->first->next does not changed if list->first is not
> - * changed during its operation, but llist_del_first, llist_add,
> - * llist_add (or llist_del_all, llist_add, llist_add) sequence in
> - * another consumer may violate that.
> - *
> - * If there are multiple producers and one consumer, llist_add can be
> - * used in producers and llist_del_all or llist_del_first can be used
> - * in the consumer.
> - *
> - * This can be summarized as follow:
> + * Cases where locking is not needed:
> + * If there are multiple producers and multiple consumers, llist_add can be
> + * used in producers and llist_del_all can be used in consumers 
> simultaneously
> + * without locking. Also a single consumer can use llist_del_first while
> + * multiple producers simultaneously use llist_add, without any locking.
> + *
> + * Cases where locking is needed:
> + * If we have multiple consumers with llist_del_first used in one consumer, 
> and
> + * llist_del_first or llist_del_all used in other consumers, then a lock is
> + * needed.  This is because llist_del_first depends on list->first->next not
> + * changing, but without lock protection, there's no way to be sure about 
> that
> + * if a preemption happens in the middle of the delete operation and on being
> + * preempted back, the list->first is the same as before causing the cmpxchg 
> in
> + * llist_del_first to succeed. For example, while a llist_del_first operation
> + * is in progress in one consumer, then a llist_del_first, llist_add,
> + * llist_add (or llist_del_all, llist_add, llist_add) sequence in another
> + * consumer may cause violations.
> + *
> + * This can be summarized as follows:
>  *
>  *   |   add| del_first |  del_all
>  * add   |- | - | -
>  * del_first |  | L | L
>  * del_all   |  |   | -
>  *
> - * Where "-" stands for no lock is needed, while "L" stands for lock
> - * is needed.
> + * Where, a particular row's operation can happen concurrently with a 
> column's
> + * operation, with "-" being no lock needed, while "L" being lock is needed.
>  *
>  * The list entries deleted via llist_del_all can be traversed with
>  * traversing function such as llist_for_each etc.  But the list
> --
> 2.8.0.rc3.226.g39d4020

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

Re: [PATCH v2 3/5] staging: lustre: headers: sort headers affected by obdo move

On Sat, Dec 10, 2016 at 01:05:59PM -0500, James Simmons wrote:
> From: Ben Evans 
> 
> It was found if you sort the headers alphabetically
> that it reduced patch conflicts. This patch sorts
> the headers alphabetically and also place linux
> header first, then uapi header and finally the
> lustre kernel headers.

I still don't agree, when did you last have a patch conflict with this
file in the .h section?  And exactly how hard was it to fix it up?

I'm all for cleanups, but really, this is useless.  And I said so the
last time you sent it...

greg k-h

Re: [PATCH v2 5/5] staging: lustre: headers: use proper byteorder functions in lustre_idl.h

On Sat, Dec 10, 2016 at 01:06:01PM -0500, James Simmons wrote:
> In order for lustre_idl.h to be usable for both user
> land and kernel space it has to use the proper
> byteorder functions.

Why would userspace need/want all of these inline functions?  A uapi
header file should just have a the structures that are passed
user/kernel and any needed ioctls.  Why would they ever care about
strange byte flip functions and a ton of inline functions?

I don't think this is needed, of if it is, I really don't want to see
your crazy userspace code...

thanks,

greg k-h

Re: [PATCH] iio: misc: add a generic regulator driver

On 06/12/16 11:12, Bartosz Golaszewski wrote:
> 2016-12-03 10:11 GMT+01:00 Jonathan Cameron :
>> On 30/11/16 10:10, Lars-Peter Clausen wrote:
>>> On 11/29/2016 04:35 PM, Bartosz Golaszewski wrote:
 2016-11-29 16:30 GMT+01:00 Lars-Peter Clausen :
> On 11/29/2016 04:22 PM, Bartosz Golaszewski wrote:
> [...]
>> diff --git 
>> a/Documentation/devicetree/bindings/iio/misc/iio-regulator.txt 
>> b/Documentation/devicetree/bindings/iio/misc/iio-regulator.txt
>> new file mode 100644
>> index 000..147458f
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/iio/misc/iio-regulator.txt
>> @@ -0,0 +1,18 @@
>> +Industrial IO regulator device driver
>> +-
>> +
>> +This document describes the bindings for the iio-regulator - a dummy 
>> device
>> +driver representing a physical regulator within the iio framework.
>
> No bindings for drivers, only for hardware. So this wont work.
>

 What about exporting regulator attributes analogous to the one in this
 patch from the iio-core when a *-supply property is specified for a
 node?
>>>
>>> The problem with exposing direct control to the regulator is that it allows
>>> to modify the hardware state without the drivers knowledge. If you
>>> power-cycle a device all previous configuration that has been written to the
>>> device is reset. The device driver needs to be aware of this otherwise its
>>> assumed state and the actual device state can divert which will result in
>>> undefined behavior. Also access to the device will fail unexpectedly when
>>> the regulator is turned off. So I think generally the driver should
>>> explicitly control the regulator, power-up when needed, power-down when not.
>> I agree with what Lars has said.
>>
>> There 'may' be some argument to ultimately have a bridge driver from
>> regulators to IIO.  That would be for cases where the divide between a 
>> regulator
>> and a DAC is blurred.  However it would still have to play nicely with the
>> regulator framework and any other devices registered on that regulator.
>> Ultimately the ideal in that case would then be to describe what the DAC is
>> actually being used to do but that's a more complex issue!
>>
>> That doesn't seem to be what you are targeting here.
>>
>> What it sounds like you need is to have the hardware well enough described 
>> that
>> the standard runtime power management can disable the regulator just fine 
>> when
>> it is not in use.  This may mean improving the power management in the 
>> relevant
>> drivers.
>>
>> Jonathan
>>
>> p.s. If ever proposing to do something 'unusual' with a regulator you should
>> bring in the regulator framework maintainers in the cc list.
>>>
>>> - Lars
>>>
>>
> 
> I wrote the initial patch quickly and didn't give it much of a
> thought. Now I realized I completely missed the point and managed to
> confuse everybody - myself included.
> 
> So the problem we have is not power-cycling the adc - it's
> power-cycling the device connected to a probe on which there's an adc.
> What I was trying to do was adding support for the power-switch on
> baylibre-acme[1] probes.
> 
> For example: we have a USB probe on which the VBUS signal goes through
> a power load switch and than through the adc. The adc (in this case
> ina226) is always powered on, while the fixed regulator I wanted to
> enable/disable actually drives the power switch to cut/restore power
> to the connected USB device i.e. there's no real regulator - just a
> GPIO driving the power switch.
> 
> A typical use case is measuring the power consumption of development
> boards[2]. Rebooting them remotely using acme probes is already done,
> but we're using the obsolete /sys/class/gpio interface.
> 
> We're already using libiio to read the measured data from the power
> monitor, that's why we'd like to use the iio framework for
> power-cycling the devices as well. My question is: would bridging the
> regulator framework be the right solution? Should we look for
> something else? Bridge the GPIO framework instead?
Definitely doesn't fit inside standard scope of IIO - though I can see
why you were thinking along these lines.

Mark Brown, any thoughts?

Effectively we are are looking at something that (in general form) might
be the equivalent of controlling a lab bench supply... So regulators
at the edge of the known world, with no visibility of what lies beyond.
> 
> Best regards,
> Bartosz Golaszewski
> 
> [1] http://baylibre.com/acme/
> [2] https://github.com/BayLibre/POWERCI
> --
> To unsubscribe from this list: send the line "unsubscribe linux-iio" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

Re: [PATCH v2] llist: Clarify comments about when locking is needed

2016-12-10 Thread Joel Fernandes

On Sat, Dec 10, 2016 at 10:15 AM, Mathieu Desnoyers
 wrote:
> - On Dec 10, 2016, at 7:03 PM, Joel Fernandes joe...@google.com wrote:
>
>> llist.h comments are a bit confusing about when locking is needed versus when
>> it isn't. Clarify these comments a bit more by being a bit more descriptive
>> about why locking is needed for llist_del_first.
>
> As I stated in my earlier review, please remove a couple of "a bit"
> from the changelog.
>

I'm sorry I missed that. I will update it correctly in the next rev.

Regards,
Joel



> Thanks,
>
> Mathieu
>
>>
>> Cc: Huang Ying 
>> Cc: Ingo Molnar 
>> Cc: Will Deacon 
>> Cc: Paul McKenney 
>> Acked-by: Mathieu Desnoyers 
>> Signed-off-by: Joel Fernandes 
>> ---
>> v2 changes:
>> Minor changes to comment and commit message based on Mathieu's suggestions
>> (https://lkml.org/lkml/2016/12/10/39)
>>
>> include/linux/llist.h | 37 +
>> 1 file changed, 21 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/linux/llist.h b/include/linux/llist.h
>> index fd4ca0b..31822bb 100644
>> --- a/include/linux/llist.h
>> +++ b/include/linux/llist.h
>> @@ -3,28 +3,33 @@
>> /*
>>  * Lock-less NULL terminated single linked list
>>  *
>> - * If there are multiple producers and multiple consumers, llist_add
>> - * can be used in producers and llist_del_all can be used in
>> - * consumers.  They can work simultaneously without lock.  But
>> - * llist_del_first can not be used here.  Because llist_del_first
>> - * depends on list->first->next does not changed if list->first is not
>> - * changed during its operation, but llist_del_first, llist_add,
>> - * llist_add (or llist_del_all, llist_add, llist_add) sequence in
>> - * another consumer may violate that.
>> - *
>> - * If there are multiple producers and one consumer, llist_add can be
>> - * used in producers and llist_del_all or llist_del_first can be used
>> - * in the consumer.
>> - *
>> - * This can be summarized as follow:
>> + * Cases where locking is not needed:
>> + * If there are multiple producers and multiple consumers, llist_add can be
>> + * used in producers and llist_del_all can be used in consumers 
>> simultaneously
>> + * without locking. Also a single consumer can use llist_del_first while
>> + * multiple producers simultaneously use llist_add, without any locking.
>> + *
>> + * Cases where locking is needed:
>> + * If we have multiple consumers with llist_del_first used in one consumer, 
>> and
>> + * llist_del_first or llist_del_all used in other consumers, then a lock is
>> + * needed.  This is because llist_del_first depends on list->first->next not
>> + * changing, but without lock protection, there's no way to be sure about 
>> that
>> + * if a preemption happens in the middle of the delete operation and on 
>> being
>> + * preempted back, the list->first is the same as before causing the 
>> cmpxchg in
>> + * llist_del_first to succeed. For example, while a llist_del_first 
>> operation
>> + * is in progress in one consumer, then a llist_del_first, llist_add,
>> + * llist_add (or llist_del_all, llist_add, llist_add) sequence in another
>> + * consumer may cause violations.
>> + *
>> + * This can be summarized as follows:
>>  *
>>  *   |   add| del_first |  del_all
>>  * add   |- | - | -
>>  * del_first |  | L | L
>>  * del_all   |  |   | -
>>  *
>> - * Where "-" stands for no lock is needed, while "L" stands for lock
>> - * is needed.
>> + * Where, a particular row's operation can happen concurrently with a 
>> column's
>> + * operation, with "-" being no lock needed, while "L" being lock is needed.
>>  *
>>  * The list entries deleted via llist_del_all can be traversed with
>>  * traversing function such as llist_for_each etc.  But the list
>> --
>> 2.8.0.rc3.226.g39d4020
>
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com

Re: [PATCH v6 4/9] dt-bindings: iio: iio-mux: document iio-mux bindings

On 06/12/16 09:18, Peter Rosin wrote:
> On 2016-12-06 00:26, Rob Herring wrote:
>> On Wed, Nov 30, 2016 at 09:16:58AM +0100, Peter Rosin wrote:
>>> Signed-off-by: Peter Rosin 
>>> ---
>>>  .../bindings/iio/multiplexer/iio-mux.txt   | 40 
>>> ++
>>>  MAINTAINERS|  6 
>>>  2 files changed, 46 insertions(+)
>>>  create mode 100644 
>>> Documentation/devicetree/bindings/iio/multiplexer/iio-mux.txt
>>
>> I'm still not convinced about this binding, but don't really have more 
>> comments ATM. Sending 6 versions in 2 weeks or so doesn't really help 
>> either.
> 
> Sorry about the noise, I'll try to be more careful going forward. On
> the flip side, I haven't touched the code since v6.
> 
> I don't see how bindings that are as flexible as the current (and
> original) phandle link between the mux consumer and the mux controller
> would look, and at the same time be simpler to understand. You need
> to be able to refer to a mux controller from several mux consumers, and
> you need to support several mux controllers in one node (the ADG792A
> case). And, AFAICT, the complex case wasn't really the problem, it was
> that it is overly complex to describe the simple case of one mux
> consumer and one mux controller. But in your comment for v2 [1] you
> said that I was working around limitations with shared GPIO pins. But
> solving that in the GPIO subsystem would not solve all that the
> phandle approach is solving, since you would not have support for
> ADG792A (or other non-GPIO controlled muxes). So, I think listing
> the gpio pins inside the mux consumer node is a non-starter, the mux
> controller has to live in its own node with its own compatible.
> 
> Would you be happier if I managed to marry the phandle approach with
> the option of having the mux controller as a child node of the mux
> consumer for the simple case?
> 
> I added an example at the end of this message (the same as the first
> example in v4 [2], at least in principle) for easy comparison between
> the phandle and the controller-in-child-node approaches. I can't say
> that I personally find the difference all that significant, and do not
> think it is worth it. As I see it, the "simple option" would just muddy
> the waters...
> 
> [1] http://marc.info/?l=linux-kernel&m=147948334204795&w=2
> [2] http://marc.info/?l=linux-kernel&m=148001364904240&w=2
> 
>>> diff --git a/Documentation/devicetree/bindings/iio/multiplexer/iio-mux.txt 
>>> b/Documentation/devicetree/bindings/iio/multiplexer/iio-mux.txt
>>> new file mode 100644
>>> index ..8080cf790d82
>>> --- /dev/null
>>> +++ b/Documentation/devicetree/bindings/iio/multiplexer/iio-mux.txt
>>> @@ -0,0 +1,40 @@
>>> +IIO multiplexer bindings
>>> +
>>> +If a multiplexer is used to select which hardware signal is fed to
>>> +e.g. an ADC channel, these bindings describe that situation.
>>> +
>>> +Required properties:
>>> +- compatible : "iio-mux"
>>
>> This is a Linuxism. perhaps "adc-mux".
> 
> No, that's not general enough, it could just as well be used to mux a
> temperature sensor. Or whatever. Hmmm, given that "iio-mux" is bad, perhaps
> "io-channel-mux" is better? That matches the io-channels property used to
> refer to the parent channel.
analog-mux maybe? Makes more sense out of context (though with io-channels 
defined on
the next line you have plenty of context here ;)
> 
>>> +- io-channels : Channel node of the parent channel that has multiplexed
>>> +   input.
>>> +- io-channel-names : Should be "parent".
>>> +- #address-cells = <1>;
>>> +- #size-cells = <0>;
>>> +- mux-controls : Mux controller node to use for operating the mux
>>> +- channels : List of strings, labeling the mux controller states.
>>> +
>>> +The multiplexer state as described in ../misc/mux-controller.txt
>>> +
>>> +For each non-empty string in the channels property, an iio channel will
>>> +be created. The number of this iio channel is the same as the index into
>>> +the list of strings in the channels property, and also matches the mux
>>> +controller state.
>>> +
>>> +Example:
>>> +   mux: mux-controller {
>>> +   compatible = "mux-gpio";
>>> +   #mux-control-cells = <0>;
>>> +
>>> +   mux-gpios = <&pioA 0 GPIO_ACTIVE_HIGH>,
>>> +   <&pioA 1 GPIO_ACTIVE_HIGH>;
>>> +   };
>>> +
>>> +   adc-mux {
>>> +   compatible = "iio-mux";
>>> +   io-channels = <&adc 0>;
>>> +   io-channel-names = "parent";
>>> +
>>> +   mux-controls = <&mux>;
>>> +
>>> +   channels = "sync", "in", system-regulator";
>>> +   };
> 
> Describing the same as above, but with the mux controller as a child
> node.
> 
>   adc-mux {
>   compatible = "iio-mux";
>   io-channels = <&adc 0>;
>   io-channel-names = "parent";
> 
>   channels = "sync", "in", system-regulator";
> 
>   mux-controller {
>   compatible =

[PATCH net 2/3] net: bridge: add helper to set topology change

Add a __br_set_topology_change helper to set the topology change value.

This can be later extended to add actions when the topology change flag
is set or cleared.

Signed-off-by: Vivien Didelot 
---
 net/bridge/br_private_stp.h |  1 +
 net/bridge/br_stp.c | 10 --
 net/bridge/br_stp_if.c  |  2 +-
 net/bridge/br_stp_timer.c   |  2 +-
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
index 2fe910c..3f7543a 100644
--- a/net/bridge/br_private_stp.h
+++ b/net/bridge/br_private_stp.h
@@ -61,6 +61,7 @@ void br_received_tcn_bpdu(struct net_bridge_port *p);
 void br_transmit_config(struct net_bridge_port *p);
 void br_transmit_tcn(struct net_bridge *br);
 void br_topology_change_detection(struct net_bridge *br);
+void __br_set_topology_change(struct net_bridge *br, unsigned char val);
 
 /* br_stp_bpdu.c */
 void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 6ebe2a0..8d7b4c7 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -234,7 +234,7 @@ static void br_record_config_timeout_values(struct 
net_bridge *br,
br->max_age = bpdu->max_age;
br->hello_time = bpdu->hello_time;
br->forward_delay = bpdu->forward_delay;
-   br->topology_change = bpdu->topology_change;
+   __br_set_topology_change(br, bpdu->topology_change);
 }
 
 /* called under bridge lock */
@@ -344,7 +344,7 @@ void br_topology_change_detection(struct net_bridge *br)
isroot ? "propagating" : "sending tcn bpdu");
 
if (isroot) {
-   br->topology_change = 1;
+   __br_set_topology_change(br, 1);
mod_timer(&br->topology_change_timer, jiffies
  + br->bridge_forward_delay + br->bridge_max_age);
} else if (!br->topology_change_detected) {
@@ -603,6 +603,12 @@ int br_set_ageing_time(struct net_bridge *br, clock_t 
ageing_time)
return 0;
 }
 
+/* called under bridge lock */
+void __br_set_topology_change(struct net_bridge *br, unsigned char val)
+{
+   br->topology_change = val;
+}
+
 void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
 {
br->bridge_forward_delay = t;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 2efbba5..6c1e214 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -81,7 +81,7 @@ void br_stp_disable_bridge(struct net_bridge *br)
 
}
 
-   br->topology_change = 0;
+   __br_set_topology_change(br, 0);
br->topology_change_detected = 0;
spin_unlock_bh(&br->lock);
 
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index da058b8..7ddb38e 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -125,7 +125,7 @@ static void br_topology_change_timer_expired(unsigned long 
arg)
br_debug(br, "topo change timer expired\n");
spin_lock(&br->lock);
br->topology_change_detected = 0;
-   br->topology_change = 0;
+   __br_set_topology_change(br, 0);
spin_unlock(&br->lock);
 }
 
-- 
2.10.2

[PATCH net 0/3] net: bridge: fast ageing on topology change

802.1D [1] specifies that the bridges in a network must use a short
value to age out dynamic entries in the Filtering Database for a period,
once a topology change has been communicated by the root bridge.

This patchset fixes this for the in-kernel STP implementation.

Once the topology change flag is set in a net_bridge instance, the
ageing time value is shorten to twice the forward delay used by the
topology.

When the topology change flag is cleared, the ageing time configured for
the bridge is restored.

To accomplish that, a new bridge_ageing_time member is added to the
net_bridge structure, to store the user configured bridge ageing time.

Two helpers are added to offload the ageing time and set the topology
change flag in the net_bridge instance. Then the required logic is added
in the topology change helper if in-kernel STP is used.

This has been tested on the following topology:

+--+
| root bridge  |
|  1  2  3  4  |
+--+--+--+--+--+
   |  |  |  |  ++
   |  |  |  +--| laptop |
   |  |  | ++
+--+--+--+-+
|  1  2  3 |
| slave bridge |
+--+

When unplugging/replugging the laptop, the slave bridge (under test)
gets the topology change flag sent by the root bridge, and fast ageing
is triggered on the bridges. Once the topology change timer of the root
bridge expires, the topology change flag is cleared and the configured
ageing time is restored on the bridges.

A similar test has been done between two bridges under test.
When changing the forward delay of the root bridge with:

# echo 3000 > /sys/class/net/br0/bridge/forward_delay

the ageing time correctly changes on both bridges from 300s to 60s while
the TOPOLOGY_CHANGE flag is present.

[1] "8.3.5 Notifying topology changes",
http://profesores.elo.utfsm.cl/~agv/elo309/doc/802.1D-1998.pdf

No change since RFC: https://lkml.org/lkml/2016/10/19/828

Vivien Didelot (3):
  net: bridge: add helper to offload ageing time
  net: bridge: add helper to set topology change
  net: bridge: shorten ageing time on topology change

 net/bridge/br_device.c  |  2 +-
 net/bridge/br_private.h |  4 ++-
 net/bridge/br_private_stp.h |  1 +
 net/bridge/br_stp.c | 65 ++---
 net/bridge/br_stp_if.c  | 14 +++---
 net/bridge/br_stp_timer.c   |  2 +-
 6 files changed, 65 insertions(+), 23 deletions(-)

-- 
2.10.2

[PATCH net 3/3] net: bridge: shorten ageing time on topology change

802.1D [1] specifies that the bridges must use a short value to age out
dynamic entries in the Filtering Database for a period, once a topology
change has been communicated by the root bridge.

Add a bridge_ageing_time member in the net_bridge structure to store the
bridge ageing time value configured by the user (ioctl/netlink/sysfs).

If we are using in-kernel STP, shorten the ageing time value to twice
the forward delay used by the topology when the topology change flag is
set. When the flag is cleared, restore the configured ageing time.

[1] "8.3.5 Notifying topology changes ",
http://profesores.elo.utfsm.cl/~agv/elo309/doc/802.1D-1998.pdf

Signed-off-by: Vivien Didelot 
---
 net/bridge/br_device.c  |  2 +-
 net/bridge/br_private.h |  3 ++-
 net/bridge/br_stp.c | 27 +++
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 89a687f..207318a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -409,7 +409,7 @@ void br_dev_setup(struct net_device *dev)
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
-   br->ageing_time = BR_DEFAULT_AGEING_TIME;
+   br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME;
 
br_netfilter_rtable_init(br);
br_stp_timer_init(br);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3c294b4..43efeb9 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -300,10 +300,11 @@ struct net_bridge
unsigned long   max_age;
unsigned long   hello_time;
unsigned long   forward_delay;
-   unsigned long   bridge_max_age;
unsigned long   ageing_time;
+   unsigned long   bridge_max_age;
unsigned long   bridge_hello_time;
unsigned long   bridge_forward_delay;
+   unsigned long   bridge_ageing_time;
 
u8  group_addr[ETH_ALEN];
boolgroup_addr_set;
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 8d7b4c7..71fd1a4 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -597,7 +597,11 @@ int br_set_ageing_time(struct net_bridge *br, clock_t 
ageing_time)
if (err)
return err;
 
+   spin_lock_bh(&br->lock);
+   br->bridge_ageing_time = t;
br->ageing_time = t;
+   spin_unlock_bh(&br->lock);
+
mod_timer(&br->gc_timer, jiffies);
 
return 0;
@@ -606,6 +610,29 @@ int br_set_ageing_time(struct net_bridge *br, clock_t 
ageing_time)
 /* called under bridge lock */
 void __br_set_topology_change(struct net_bridge *br, unsigned char val)
 {
+   unsigned long t;
+   int err;
+
+   if (br->stp_enabled == BR_KERNEL_STP && br->topology_change != val) {
+   /* On topology change, set the bridge ageing time to twice the
+* forward delay. Otherwise, restore its default ageing time.
+*/
+
+   if (val) {
+   t = 2 * br->forward_delay;
+   br_debug(br, "decreasing ageing time to %lu\n", t);
+   } else {
+   t = br->bridge_ageing_time;
+   br_debug(br, "restoring ageing time to %lu\n", t);
+   }
+
+   err = __set_ageing_time(br->dev, t);
+   if (err)
+   br_warn(br, "error offloading ageing time\n");
+   else
+   br->ageing_time = t;
+   }
+
br->topology_change = val;
 }
 
-- 
2.10.2

[PATCH net 1/3] net: bridge: add helper to offload ageing time