[PATCH net] net/ncsi: Don't limit vids based on hot_channel

2017-10-10 Thread Samuel Mendoza-Jonas
Currently we drop any new VLAN ids if there are more than the current
(or last used) channel can support. Most importantly this is a problem
if no channel has been selected yet, resulting in a segfault.

Secondly this does not necessarily reflect the capabilities of any other
channels. Instead only drop a new VLAN id if we are already tracking the
maximum allowed by the NCSI specification. Per-channel limits are
already handled by ncsi_add_filter(), but add a message to set_one_vid()
to make it obvious that the channel can not support any more VLAN ids.

Signed-off-by: Samuel Mendoza-Jonas 
---
 net/ncsi/internal.h|  1 +
 net/ncsi/ncsi-manage.c | 17 +
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index af3d636534ef..d30f7bd741d0 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -286,6 +286,7 @@ struct ncsi_dev_priv {
struct work_struct  work;/* For channel management */
struct packet_type  ptype;   /* NCSI packet Rx handler */
struct list_headnode;/* Form NCSI device list  */
+#define NCSI_MAX_VLAN_VIDS 15
struct list_headvlan_vids;   /* List of active VLAN IDs */
 };
 
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 3fd3c39e6278..b6a449aa9d4b 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -732,6 +732,10 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct 
ncsi_channel *nc,
if (index < 0) {
netdev_err(ndp->ndev.dev,
   "Failed to add new VLAN tag, error %d\n", index);
+   if (index == -ENOSPC)
+   netdev_err(ndp->ndev.dev,
+  "Channel %u already has all VLAN filters 
set\n",
+  nc->id);
return -1;
}
 
@@ -1403,7 +1407,6 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
 
 int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
-   struct ncsi_channel_filter *ncf;
struct ncsi_dev_priv *ndp;
unsigned int n_vids = 0;
struct vlan_vid *vlan;
@@ -1420,7 +1423,6 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 
proto, u16 vid)
}
 
ndp = TO_NCSI_DEV_PRIV(nd);
-   ncf = ndp->hot_channel->filters[NCSI_FILTER_VLAN];
 
/* Add the VLAN id to our internal list */
list_for_each_entry_rcu(vlan, >vlan_vids, list) {
@@ -1431,12 +1433,11 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 
proto, u16 vid)
return 0;
}
}
-
-   if (n_vids >= ncf->total) {
-   netdev_info(dev,
-   "NCSI Channel supports up to %u VLAN tags but %u 
are already set\n",
-   ncf->total, n_vids);
-   return -EINVAL;
+   if (n_vids >= NCSI_MAX_VLAN_VIDS) {
+   netdev_warn(dev,
+   "tried to add vlan id %u but NCSI max already 
registered (%u)\n",
+   vid, NCSI_MAX_VLAN_VIDS);
+   return -ENOSPC;
}
 
vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
-- 
2.14.2



[PATCH net] net/ncsi: Don't limit vids based on hot_channel

2017-10-10 Thread Samuel Mendoza-Jonas
Currently we drop any new VLAN ids if there are more than the current
(or last used) channel can support. Most importantly this is a problem
if no channel has been selected yet, resulting in a segfault.

Secondly this does not necessarily reflect the capabilities of any other
channels. Instead only drop a new VLAN id if we are already tracking the
maximum allowed by the NCSI specification. Per-channel limits are
already handled by ncsi_add_filter(), but add a message to set_one_vid()
to make it obvious that the channel can not support any more VLAN ids.

Signed-off-by: Samuel Mendoza-Jonas 
---
 net/ncsi/internal.h|  1 +
 net/ncsi/ncsi-manage.c | 17 +
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index af3d636534ef..d30f7bd741d0 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -286,6 +286,7 @@ struct ncsi_dev_priv {
struct work_struct  work;/* For channel management */
struct packet_type  ptype;   /* NCSI packet Rx handler */
struct list_headnode;/* Form NCSI device list  */
+#define NCSI_MAX_VLAN_VIDS 15
struct list_headvlan_vids;   /* List of active VLAN IDs */
 };
 
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 3fd3c39e6278..b6a449aa9d4b 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -732,6 +732,10 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct 
ncsi_channel *nc,
if (index < 0) {
netdev_err(ndp->ndev.dev,
   "Failed to add new VLAN tag, error %d\n", index);
+   if (index == -ENOSPC)
+   netdev_err(ndp->ndev.dev,
+  "Channel %u already has all VLAN filters 
set\n",
+  nc->id);
return -1;
}
 
@@ -1403,7 +1407,6 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
 
 int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
-   struct ncsi_channel_filter *ncf;
struct ncsi_dev_priv *ndp;
unsigned int n_vids = 0;
struct vlan_vid *vlan;
@@ -1420,7 +1423,6 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 
proto, u16 vid)
}
 
ndp = TO_NCSI_DEV_PRIV(nd);
-   ncf = ndp->hot_channel->filters[NCSI_FILTER_VLAN];
 
/* Add the VLAN id to our internal list */
list_for_each_entry_rcu(vlan, >vlan_vids, list) {
@@ -1431,12 +1433,11 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 
proto, u16 vid)
return 0;
}
}
-
-   if (n_vids >= ncf->total) {
-   netdev_info(dev,
-   "NCSI Channel supports up to %u VLAN tags but %u 
are already set\n",
-   ncf->total, n_vids);
-   return -EINVAL;
+   if (n_vids >= NCSI_MAX_VLAN_VIDS) {
+   netdev_warn(dev,
+   "tried to add vlan id %u but NCSI max already 
registered (%u)\n",
+   vid, NCSI_MAX_VLAN_VIDS);
+   return -ENOSPC;
}
 
vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
-- 
2.14.2



Re: [PATCH V1] pinctrl: qcom: spmi-gpio: Update GPIO EN_CTL when setting pin config

2017-10-10 Thread Bjorn Andersson
On Mon 09 Oct 17:17 PDT 2017, Fenglin Wu wrote:

> On 10/9/2017 1:56 PM, Bjorn Andersson wrote:
> > On Sun 08 Oct 22:34 PDT 2017, Fenglin Wu wrote:
> > 
> > > On 10/6/2017 12:27 AM, Bjorn Andersson wrote:
[..]
> > > > But I spotted another issue while reviewing this; currently the initial
> > > > state of is_enabled is unconditionally set to enabled in
> > > > pmic_gpio_populate(), so reading the initial pinconf or configuring a
> > > > pinmux before setting a pinconf will operate on the potentially wrong
> > > > information.
> > > > 
> > > > So I think the initial value should be read out from REG_EN_CTL rather
> > > > than being just "true".
> > > > 
> > > > Can you please either submit another patch for this?
> > > 
> > > Hmm, considering a GPIO which is disabled by default in hardware
> > > setting, what's its expected state if we only define "function" for it?
> > > I was thinking we need to enable it once it has any setting in pinmux or
> > > pinconf. If you think that we need to keep its original state until we
> > > set pinconf for it, yes, I can submit a change to address this.
> > > 
> > 
> > Are there valid cases where only function should be selected and no
> > other configuration is used? If so it makes sense to make
> > pmic_gpio_set_mux() enable the block.
> > 
> > 
> > Regardless of this, if there are disabled pins that are not mentioned in
> > DT they will still appear as enabled in the debugfs interface; and this
> > I consider an error worth fixing.
> How about we do both: read the HW initial state in pmic_gpio_populate(),
> and also enable the GPIO block in pmic_gpio_set_mux()?
> 

That sounds good.

Please do this as two separate patches, with the commit message clearly
describing a case where the pinconf does not affect the function of the
pin, so a pinmux is the only thing needed.

Regards,
Bjorn


Re: [PATCH V1] pinctrl: qcom: spmi-gpio: Update GPIO EN_CTL when setting pin config

2017-10-10 Thread Bjorn Andersson
On Mon 09 Oct 17:17 PDT 2017, Fenglin Wu wrote:

> On 10/9/2017 1:56 PM, Bjorn Andersson wrote:
> > On Sun 08 Oct 22:34 PDT 2017, Fenglin Wu wrote:
> > 
> > > On 10/6/2017 12:27 AM, Bjorn Andersson wrote:
[..]
> > > > But I spotted another issue while reviewing this; currently the initial
> > > > state of is_enabled is unconditionally set to enabled in
> > > > pmic_gpio_populate(), so reading the initial pinconf or configuring a
> > > > pinmux before setting a pinconf will operate on the potentially wrong
> > > > information.
> > > > 
> > > > So I think the initial value should be read out from REG_EN_CTL rather
> > > > than being just "true".
> > > > 
> > > > Can you please either submit another patch for this?
> > > 
> > > Hmm, considering a GPIO which is disabled by default in hardware
> > > setting, what's its expected state if we only define "function" for it?
> > > I was thinking we need to enable it once it has any setting in pinmux or
> > > pinconf. If you think that we need to keep its original state until we
> > > set pinconf for it, yes, I can submit a change to address this.
> > > 
> > 
> > Are there valid cases where only function should be selected and no
> > other configuration is used? If so it makes sense to make
> > pmic_gpio_set_mux() enable the block.
> > 
> > 
> > Regardless of this, if there are disabled pins that are not mentioned in
> > DT they will still appear as enabled in the debugfs interface; and this
> > I consider an error worth fixing.
> How about we do both: read the HW initial state in pmic_gpio_populate(),
> and also enable the GPIO block in pmic_gpio_set_mux()?
> 

That sounds good.

Please do this as two separate patches, with the commit message clearly
describing a case where the pinconf does not affect the function of the
pin, so a pinmux is the only thing needed.

Regards,
Bjorn


Re: [RFC 1/2] ARM: dts: exynos: update the usbdrd phy and ref clk

2017-10-10 Thread Anand Moon
Hi Vivek,

On 10 October 2017 at 11:57, Vivek Gautam  wrote:
>
>
> On 10/08/2017 06:06 PM, Anand Moon wrote:
>>
>> Hi Krzysztof,
>>
>> On 6 October 2017 at 12:08, Krzysztof Kozlowski  wrote:
>>>
>>> On Fri, Oct 6, 2017 at 6:36 AM, Anand Moon  wrote:

 update the usbdrd link control and phy contol clks.
>>>
>>> The commit title and especially commit message should explain why you
>>> are doing this and what are you doing. "Update" is not enough.
>>> Everything could be called update.
>>>
>>> Therefore I do not understand the reason behind the patch.
>>>
>>> BR,
>>> Krzysztof
>>
>> so as per the driver.
>> @clk: phy clock for register access
>> @ref_clk: reference clock to PHY block from which PHY's operational
>> clocks are derived
>>
>> Both CLK_SCLK_USBPHY300 and CLK_SCLK_USBD300 belong to FSYS Clock
>> and CLK_USBD300 clk is being used by the usbdrd dwc3 module.
>
>
> From what i vaguely remember, the CLK_SCLK* are the parent clocks going to
> the
> FSYS block. In this FSYS block the two clocks - CLK_USBD300, and
> CLK_SCLK_USBPHY300
> are coming.
>
> "phy" - represents the AHB clock used only for the register writes, and is
> required only
> during register access. Since we don't need this clock for phy operation,
> your next change
> that removes the clk_disable() sounds incorrect to me.
> Just to double check, this AHB clock should be 200MHz (from what i remember)
> "ref_clk" - the phy reference that clocks the phy PLL. This is a 24MHz
> clock.
>
> Clubbing the changes in two patches:
> - You change the "phy" clock from CLK_USBD300 to CLK_SCLK_USBPHY300, and
> then
>   you _had_ to remove the clk_disable().
>   I think you needed the second patch just because you introduced this
> change in the clocks.
>
> - Like Krzysztof mentioned in the thread, if there's a performance
> improvement you may
> want to double check the clock rates.
>

Yes their is slight improvement with these changes
I will share my test result once I add few more changes to this drive.

>
> Best regards
> Vivek
>

Thank for your explanation on the clk internals.
I have read few detail on the initial mainline list.

[0] https://lkml.org/lkml/2014/4/8/247

CLK_GATE_TOP_SCLK_FSYS

SCLK_USBDRD301   Gating SUSPEND_CLK for USBDRD30_1
SCLK_USBDRD300   Gating SUSPEND_CLK for USBDRD30_0
SCLK_USBPHY300Gating USB30_SCLK_100M for USBDRD30_PHY_0
Gating USB20_PICO_CLKCORE
for PICO PHY
SCLK_USBPHY300Gating USB30_SCLK_100M for USBDRD30_PHY_1

So we did not considered the SUSPEN_CLK for phy.

Below is the clk structure diagram for usb drd phy.

[1] https://lkml.org/lkml/2014/4/10/240

Here is how it shown in manual.

   ___
   |  |
SUSPEND_CLK |  |
 - |  | PHY ||
   |  | controller
|--|
   |  |___||
   |
   |
   |
   |
   |
   |
  |
   | USB 3.0
|  USB30_SCLK_100M- |---|
   |   DRD
 |   |
|>vbus
   -
|
   |
   |Controller
|   |
 |-||
   |
   |Pipe interface  |   |
USB 3.0 DRD  ||
   |  
  |---|
|||
   | |   PHY   |
  |UTMI+ Interface   |
|
   | | Link cont.|
|---|
   |
   | |-|
 |
|__|
   |
   |
   |__|


So how can we support SUSPEND_CLK ?
Do we need to keep this SUSPEND_CLK enable ?

As of now my dts change are wrong
How about below changes.

 _phy0 {
-   clocks = < CLK_USBD300>, < CLK_SCLK_USBPHY300>;
+   clocks = < CLK_SCLK_USBD300>, < CLK_SCLK_USBPHY300>;
clock-names = 

Re: [RFC 1/2] ARM: dts: exynos: update the usbdrd phy and ref clk

2017-10-10 Thread Anand Moon
Hi Vivek,

On 10 October 2017 at 11:57, Vivek Gautam  wrote:
>
>
> On 10/08/2017 06:06 PM, Anand Moon wrote:
>>
>> Hi Krzysztof,
>>
>> On 6 October 2017 at 12:08, Krzysztof Kozlowski  wrote:
>>>
>>> On Fri, Oct 6, 2017 at 6:36 AM, Anand Moon  wrote:

 update the usbdrd link control and phy contol clks.
>>>
>>> The commit title and especially commit message should explain why you
>>> are doing this and what are you doing. "Update" is not enough.
>>> Everything could be called update.
>>>
>>> Therefore I do not understand the reason behind the patch.
>>>
>>> BR,
>>> Krzysztof
>>
>> so as per the driver.
>> @clk: phy clock for register access
>> @ref_clk: reference clock to PHY block from which PHY's operational
>> clocks are derived
>>
>> Both CLK_SCLK_USBPHY300 and CLK_SCLK_USBD300 belong to FSYS Clock
>> and CLK_USBD300 clk is being used by the usbdrd dwc3 module.
>
>
> From what i vaguely remember, the CLK_SCLK* are the parent clocks going to
> the
> FSYS block. In this FSYS block the two clocks - CLK_USBD300, and
> CLK_SCLK_USBPHY300
> are coming.
>
> "phy" - represents the AHB clock used only for the register writes, and is
> required only
> during register access. Since we don't need this clock for phy operation,
> your next change
> that removes the clk_disable() sounds incorrect to me.
> Just to double check, this AHB clock should be 200MHz (from what i remember)
> "ref_clk" - the phy reference that clocks the phy PLL. This is a 24MHz
> clock.
>
> Clubbing the changes in two patches:
> - You change the "phy" clock from CLK_USBD300 to CLK_SCLK_USBPHY300, and
> then
>   you _had_ to remove the clk_disable().
>   I think you needed the second patch just because you introduced this
> change in the clocks.
>
> - Like Krzysztof mentioned in the thread, if there's a performance
> improvement you may
> want to double check the clock rates.
>

Yes their is slight improvement with these changes
I will share my test result once I add few more changes to this drive.

>
> Best regards
> Vivek
>

Thank for your explanation on the clk internals.
I have read few detail on the initial mainline list.

[0] https://lkml.org/lkml/2014/4/8/247

CLK_GATE_TOP_SCLK_FSYS

SCLK_USBDRD301   Gating SUSPEND_CLK for USBDRD30_1
SCLK_USBDRD300   Gating SUSPEND_CLK for USBDRD30_0
SCLK_USBPHY300Gating USB30_SCLK_100M for USBDRD30_PHY_0
Gating USB20_PICO_CLKCORE
for PICO PHY
SCLK_USBPHY300Gating USB30_SCLK_100M for USBDRD30_PHY_1

So we did not considered the SUSPEN_CLK for phy.

Below is the clk structure diagram for usb drd phy.

[1] https://lkml.org/lkml/2014/4/10/240

Here is how it shown in manual.

   ___
   |  |
SUSPEND_CLK |  |
 - |  | PHY ||
   |  | controller
|--|
   |  |___||
   |
   |
   |
   |
   |
   |
  |
   | USB 3.0
|  USB30_SCLK_100M- |---|
   |   DRD
 |   |
|>vbus
   -
|
   |
   |Controller
|   |
 |-||
   |
   |Pipe interface  |   |
USB 3.0 DRD  ||
   |  
  |---|
|||
   | |   PHY   |
  |UTMI+ Interface   |
|
   | | Link cont.|
|---|
   |
   | |-|
 |
|__|
   |
   |
   |__|


So how can we support SUSPEND_CLK ?
Do we need to keep this SUSPEND_CLK enable ?

As of now my dts change are wrong
How about below changes.

 _phy0 {
-   clocks = < CLK_USBD300>, < CLK_SCLK_USBPHY300>;
+   clocks = < CLK_SCLK_USBD300>, < CLK_SCLK_USBPHY300>;
clock-names = "phy", "ref";
samsung,pmu-syscon = <_system_controller>;
 };

[PATCH 1/1] vmbus: hvsock: add proper sync for vmbus_hvsock_device_unregister()

2017-10-10 Thread kys
From: Dexuan Cui 

Without the patch, vmbus_hvsock_device_unregister() can destroy the device
prematurely when close() is called, and can cause NULl dereferencing or
potential data loss (the last portion of the data stream may be dropped
prematurely).

Please consider this for 4.14.

Signed-off-by: Dexuan Cui 
Cc: Haiyang Zhang 
Cc: Stephen Hemminger 
Signed-off-by: K. Y. Srinivasan 
---
 drivers/hv/channel_mgmt.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 018d2e0f8ec5..379b0df123be 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -937,7 +937,10 @@ void vmbus_hvsock_device_unregister(struct vmbus_channel 
*channel)
 {
BUG_ON(!is_hvsock_channel(channel));
 
-   channel->rescind = true;
+   /* We always get a rescind msg when a connection is closed. */
+   while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind))
+   msleep(1);
+
vmbus_device_unregister(channel->device_obj);
 }
 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
-- 
2.14.1



[PATCH 1/1] vmbus: hvsock: add proper sync for vmbus_hvsock_device_unregister()

2017-10-10 Thread kys
From: Dexuan Cui 

Without the patch, vmbus_hvsock_device_unregister() can destroy the device
prematurely when close() is called, and can cause NULl dereferencing or
potential data loss (the last portion of the data stream may be dropped
prematurely).

Please consider this for 4.14.

Signed-off-by: Dexuan Cui 
Cc: Haiyang Zhang 
Cc: Stephen Hemminger 
Signed-off-by: K. Y. Srinivasan 
---
 drivers/hv/channel_mgmt.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 018d2e0f8ec5..379b0df123be 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -937,7 +937,10 @@ void vmbus_hvsock_device_unregister(struct vmbus_channel 
*channel)
 {
BUG_ON(!is_hvsock_channel(channel));
 
-   channel->rescind = true;
+   /* We always get a rescind msg when a connection is closed. */
+   while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind))
+   msleep(1);
+
vmbus_device_unregister(channel->device_obj);
 }
 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
-- 
2.14.1



[PATCH] ahci: Add support for Cavium's fifth generation SATA controller

2017-10-10 Thread Radha Mohan Chintakuntla
From: Radha Mohan Chintakuntla 

This patch adds support for Cavium's fifth generation SATA controller.
It is an on-chip controller and complies with AHCI 1.3.1. As the
controller uses 64-bit addresses it cannot use the standard AHCI BAR5
and so uses BAR4.

Signed-off-by: Radha Mohan Chintakuntla 
---
 drivers/ata/ahci.c |9 +++--
 1 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 9f78bb0..5443cb7 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -57,6 +57,7 @@ enum {
AHCI_PCI_BAR_STA2X11= 0,
AHCI_PCI_BAR_CAVIUM = 0,
AHCI_PCI_BAR_ENMOTUS= 2,
+   AHCI_PCI_BAR_CAVIUM_GEN5= 4,
AHCI_PCI_BAR_STANDARD   = 5,
 };
 
@@ -1570,8 +1571,12 @@ static int ahci_init_one(struct pci_dev *pdev, const 
struct pci_device_id *ent)
ahci_pci_bar = AHCI_PCI_BAR_STA2X11;
else if (pdev->vendor == 0x1c44 && pdev->device == 0x8000)
ahci_pci_bar = AHCI_PCI_BAR_ENMOTUS;
-   else if (pdev->vendor == 0x177d && pdev->device == 0xa01c)
-   ahci_pci_bar = AHCI_PCI_BAR_CAVIUM;
+   else if (pdev->vendor == PCI_VENDOR_ID_CAVIUM) {
+   if (pdev->device == 0xa01c)
+   ahci_pci_bar = AHCI_PCI_BAR_CAVIUM;
+   if (pdev->device == 0xa084)
+   ahci_pci_bar = AHCI_PCI_BAR_CAVIUM_GEN5;
+   }
 
/* acquire resources */
rc = pcim_enable_device(pdev);
-- 
1.7.1



[PATCH] ahci: Add support for Cavium's fifth generation SATA controller

2017-10-10 Thread Radha Mohan Chintakuntla
From: Radha Mohan Chintakuntla 

This patch adds support for Cavium's fifth generation SATA controller.
It is an on-chip controller and complies with AHCI 1.3.1. As the
controller uses 64-bit addresses it cannot use the standard AHCI BAR5
and so uses BAR4.

Signed-off-by: Radha Mohan Chintakuntla 
---
 drivers/ata/ahci.c |9 +++--
 1 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 9f78bb0..5443cb7 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -57,6 +57,7 @@ enum {
AHCI_PCI_BAR_STA2X11= 0,
AHCI_PCI_BAR_CAVIUM = 0,
AHCI_PCI_BAR_ENMOTUS= 2,
+   AHCI_PCI_BAR_CAVIUM_GEN5= 4,
AHCI_PCI_BAR_STANDARD   = 5,
 };
 
@@ -1570,8 +1571,12 @@ static int ahci_init_one(struct pci_dev *pdev, const 
struct pci_device_id *ent)
ahci_pci_bar = AHCI_PCI_BAR_STA2X11;
else if (pdev->vendor == 0x1c44 && pdev->device == 0x8000)
ahci_pci_bar = AHCI_PCI_BAR_ENMOTUS;
-   else if (pdev->vendor == 0x177d && pdev->device == 0xa01c)
-   ahci_pci_bar = AHCI_PCI_BAR_CAVIUM;
+   else if (pdev->vendor == PCI_VENDOR_ID_CAVIUM) {
+   if (pdev->device == 0xa01c)
+   ahci_pci_bar = AHCI_PCI_BAR_CAVIUM;
+   if (pdev->device == 0xa084)
+   ahci_pci_bar = AHCI_PCI_BAR_CAVIUM_GEN5;
+   }
 
/* acquire resources */
rc = pcim_enable_device(pdev);
-- 
1.7.1



Re: [PATCH 1/2] mm, memory_hotplug: do not fail offlining too early

2017-10-10 Thread Michael Ellerman
Michael Ellerman  writes:
> Michal Hocko  writes:
>> On Tue 10-10-17 23:05:08, Michael Ellerman wrote:
>>> Michal Hocko  writes:
>>> > From: Michal Hocko 
>>> > Memory offlining can fail just too eagerly under a heavy memory pressure.
>>> >
>>> > [ 5410.336792] page:ea22a646bd00 count:255 mapcount:252 
>>> > mapping:88ff926c9f38 index:0x3
>>> > [ 5410.336809] flags: 0x9855fe40010048(uptodate|active|mappedtodisk)
>>> > [ 5410.336811] page dumped because: isolation failed
>>> > [ 5410.336813] page->mem_cgroup:8801cd662000
>>> > [ 5420.655030] memory offlining [mem 0x18b58000-0x18b5] failed
>>> >
>>> > Isolation has failed here because the page is not on LRU. Most probably
>>> > because it was on the pcp LRU cache or it has been removed from the LRU
>>> > already but it hasn't been freed yet. In both cases the page doesn't look
>>> > non-migrable so retrying more makes sense.
>>> 
>>> This breaks offline for me.
>>> 
>>> Prior to this commit:
>>>   /sys/devices/system/memory/memory0# time echo 0 > online
>>>   -bash: echo: write error: Device or resource busy
>>>   
>>>   real  0m0.001s
>>>   user  0m0.000s
>>>   sys   0m0.001s
>>> 
>>> After:
>>>   /sys/devices/system/memory/memory0# time echo 0 > online
>>>   -bash: echo: write error: Device or resource busy
>>>   
>>>   real  2m0.009s
>>>   user  0m0.000s
>>>   sys   1m25.035s
>>> 
>>> There's no way that block can be removed, it contains the kernel text,
>>> so it should instantly fail - which it used to.
>>
>> OK, that means that start_isolate_page_range should have failed but it
>> hasn't for some reason. I strongly suspect has_unmovable_pages is doing
>> something wrong. Is the kernel text marked somehow? E.g. PageReserved?
>
> I'm not sure how the text is marked, will have to dig into that.

Yeah it's reserved:

  $ grep __init_begin /proc/kallsyms
  c0d7 T __init_begin
  $ ./page-types -r -a 0x0,0xd7
   flagspage-count   MB  symbolic-flags 
long-symbolic-flags
  0x0001   215   13  
__r___ reserved
   total   215   13


I added some printks, we're getting EBUSY from do_migrate_range(pfn, end_pfn).

So we seem to just have an infinite loop:

  repeat:
/* start memory hot removal */
ret = -EINTR;
if (signal_pending(current))
goto failed_removal;
  
cond_resched();
lru_add_drain_all_cpuslocked();
drain_all_pages(zone);
  
pfn = scan_movable_pages(start_pfn, end_pfn);
if (pfn) { /* We have movable pages */
ret = do_migrate_range(pfn, end_pfn);
printk_ratelimited("memory-hotplug: migrate range returned 
%ld\n", ret);
goto repeat;
}


eg:

  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  __offline_pages: 354031 callbacks suppressed
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  __offline_pages: 355794 callbacks suppressed


cheers


Re: [PATCH 1/2] mm, memory_hotplug: do not fail offlining too early

2017-10-10 Thread Michael Ellerman
Michael Ellerman  writes:
> Michal Hocko  writes:
>> On Tue 10-10-17 23:05:08, Michael Ellerman wrote:
>>> Michal Hocko  writes:
>>> > From: Michal Hocko 
>>> > Memory offlining can fail just too eagerly under a heavy memory pressure.
>>> >
>>> > [ 5410.336792] page:ea22a646bd00 count:255 mapcount:252 
>>> > mapping:88ff926c9f38 index:0x3
>>> > [ 5410.336809] flags: 0x9855fe40010048(uptodate|active|mappedtodisk)
>>> > [ 5410.336811] page dumped because: isolation failed
>>> > [ 5410.336813] page->mem_cgroup:8801cd662000
>>> > [ 5420.655030] memory offlining [mem 0x18b58000-0x18b5] failed
>>> >
>>> > Isolation has failed here because the page is not on LRU. Most probably
>>> > because it was on the pcp LRU cache or it has been removed from the LRU
>>> > already but it hasn't been freed yet. In both cases the page doesn't look
>>> > non-migrable so retrying more makes sense.
>>> 
>>> This breaks offline for me.
>>> 
>>> Prior to this commit:
>>>   /sys/devices/system/memory/memory0# time echo 0 > online
>>>   -bash: echo: write error: Device or resource busy
>>>   
>>>   real  0m0.001s
>>>   user  0m0.000s
>>>   sys   0m0.001s
>>> 
>>> After:
>>>   /sys/devices/system/memory/memory0# time echo 0 > online
>>>   -bash: echo: write error: Device or resource busy
>>>   
>>>   real  2m0.009s
>>>   user  0m0.000s
>>>   sys   1m25.035s
>>> 
>>> There's no way that block can be removed, it contains the kernel text,
>>> so it should instantly fail - which it used to.
>>
>> OK, that means that start_isolate_page_range should have failed but it
>> hasn't for some reason. I strongly suspect has_unmovable_pages is doing
>> something wrong. Is the kernel text marked somehow? E.g. PageReserved?
>
> I'm not sure how the text is marked, will have to dig into that.

Yeah it's reserved:

  $ grep __init_begin /proc/kallsyms
  c0d7 T __init_begin
  $ ./page-types -r -a 0x0,0xd7
   flagspage-count   MB  symbolic-flags 
long-symbolic-flags
  0x0001   215   13  
__r___ reserved
   total   215   13


I added some printks, we're getting EBUSY from do_migrate_range(pfn, end_pfn).

So we seem to just have an infinite loop:

  repeat:
/* start memory hot removal */
ret = -EINTR;
if (signal_pending(current))
goto failed_removal;
  
cond_resched();
lru_add_drain_all_cpuslocked();
drain_all_pages(zone);
  
pfn = scan_movable_pages(start_pfn, end_pfn);
if (pfn) { /* We have movable pages */
ret = do_migrate_range(pfn, end_pfn);
printk_ratelimited("memory-hotplug: migrate range returned 
%ld\n", ret);
goto repeat;
}


eg:

  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  __offline_pages: 354031 callbacks suppressed
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  memory-hotplug: migrate range returned -16
  __offline_pages: 355794 callbacks suppressed


cheers


[PATCH v4 1/5] of/platform: Generalize /reserved-memory handling

2017-10-10 Thread Bjorn Andersson
By iterating over all /reserved-memory child nodes and match each one to
a list of compatibles that we want to treat specially, we can easily
extend the list of compatibles to handle - without having to resort to
of_platform_populate() that would create unnecessary platform_devices.

Reviewed-by: Rob Herring 
Signed-off-by: Bjorn Andersson 
---

Changes since v3:
- Picked up Rob's R-b

Changes since v2:
- Simplify logic per Rob's suggestion.

Changes since v1:
- New patch
 drivers/of/platform.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index b19524623498..ee89f096f0f3 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -497,6 +497,11 @@ int of_platform_default_populate(struct device_node *root,
 EXPORT_SYMBOL_GPL(of_platform_default_populate);
 
 #ifndef CONFIG_PPC
+static const struct of_device_id reserved_mem_matches[] = {
+   { .compatible = "ramoops" },
+   {}
+};
+
 static int __init of_platform_default_populate_init(void)
 {
struct device_node *node;
@@ -505,15 +510,12 @@ static int __init of_platform_default_populate_init(void)
return -ENODEV;
 
/*
-* Handle ramoops explicitly, since it is inside /reserved-memory,
-* which lacks a "compatible" property.
+* Handle certain compatibles explicitly, since we don't want to create
+* platform_devices for every node in /reserved-memory with a
+* "compatible",
 */
-   node = of_find_node_by_path("/reserved-memory");
-   if (node) {
-   node = of_find_compatible_node(node, NULL, "ramoops");
-   if (node)
-   of_platform_device_create(node, NULL, NULL);
-   }
+   for_each_matching_node(node, reserved_mem_matches)
+   of_platform_device_create(node, NULL, NULL);
 
/* Populate everything else. */
of_platform_default_populate(NULL, NULL, NULL);
-- 
2.12.0



[PATCH v4 1/5] of/platform: Generalize /reserved-memory handling

2017-10-10 Thread Bjorn Andersson
By iterating over all /reserved-memory child nodes and match each one to
a list of compatibles that we want to treat specially, we can easily
extend the list of compatibles to handle - without having to resort to
of_platform_populate() that would create unnecessary platform_devices.

Reviewed-by: Rob Herring 
Signed-off-by: Bjorn Andersson 
---

Changes since v3:
- Picked up Rob's R-b

Changes since v2:
- Simplify logic per Rob's suggestion.

Changes since v1:
- New patch
 drivers/of/platform.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index b19524623498..ee89f096f0f3 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -497,6 +497,11 @@ int of_platform_default_populate(struct device_node *root,
 EXPORT_SYMBOL_GPL(of_platform_default_populate);
 
 #ifndef CONFIG_PPC
+static const struct of_device_id reserved_mem_matches[] = {
+   { .compatible = "ramoops" },
+   {}
+};
+
 static int __init of_platform_default_populate_init(void)
 {
struct device_node *node;
@@ -505,15 +510,12 @@ static int __init of_platform_default_populate_init(void)
return -ENODEV;
 
/*
-* Handle ramoops explicitly, since it is inside /reserved-memory,
-* which lacks a "compatible" property.
+* Handle certain compatibles explicitly, since we don't want to create
+* platform_devices for every node in /reserved-memory with a
+* "compatible",
 */
-   node = of_find_node_by_path("/reserved-memory");
-   if (node) {
-   node = of_find_compatible_node(node, NULL, "ramoops");
-   if (node)
-   of_platform_device_create(node, NULL, NULL);
-   }
+   for_each_matching_node(node, reserved_mem_matches)
+   of_platform_device_create(node, NULL, NULL);
 
/* Populate everything else. */
of_platform_default_populate(NULL, NULL, NULL);
-- 
2.12.0



[PATCH v4 2/5] of: reserved_mem: Accessor for acquiring reserved_mem

2017-10-10 Thread Bjorn Andersson
In some cases drivers referencing a reserved-memory region might want to
remap the entire region, but when defining the reserved-memory by "size"
the client driver has no means to know the associated base address of
the reserved memory region.

This patch adds an accessor for such drivers to acquire a handle to
their associated reserved-memory for this purpose.

A complicating factor for the implementation is that the reserved_mem
objects are created from the flattened DeviceTree, as such we can't
use the device_node address for comparison. Fortunately the name of the
node will be used as "name" of the reserved_mem and will be used when
building the full_name, so we can compare the "name" with the basename
of the full_name to find the match.

Reviewed-by: Rob Herring 
Signed-off-by: Bjorn Andersson 
---

Changes since v3:
- Renamed function from _get() to _lookup()
- Picked up Rob's R-b

Changes since v2:
- None

Changes since v1:
- Previous patch provided interface to resolve memory-region reference, instead
  of direct lookup by device_node

 drivers/of/of_reserved_mem.c| 26 ++
 include/linux/of_reserved_mem.h |  5 +
 2 files changed, 31 insertions(+)

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index d507c3569a88..b108c7a1f74c 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -397,3 +397,29 @@ void of_reserved_mem_device_release(struct device *dev)
rmem->ops->device_release(rmem, dev);
 }
 EXPORT_SYMBOL_GPL(of_reserved_mem_device_release);
+
+/**
+ * of_reserved_mem_lookup() - acquire reserved_mem from a device node
+ * @np:node pointer of the desired reserved-memory region
+ *
+ * This function allows drivers to acquire a reference to the reserved_mem
+ * struct based on a device node handle.
+ *
+ * Returns a reserved_mem reference, or NULL on error.
+ */
+struct reserved_mem *of_reserved_mem_lookup(struct device_node *np)
+{
+   const char *name;
+   int i;
+
+   if (!np->full_name)
+   return NULL;
+
+   name = kbasename(np->full_name);
+   for (i = 0; i < reserved_mem_count; i++)
+   if (!strcmp(reserved_mem[i].name, name))
+   return _mem[i];
+
+   return NULL;
+}
+EXPORT_SYMBOL_GPL(of_reserved_mem_lookup);
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index f8e1992d6423..c58f780104f9 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -44,6 +44,7 @@ int early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 void fdt_init_reserved_mem(void);
 void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
   phys_addr_t base, phys_addr_t size);
+struct reserved_mem *of_reserved_mem_lookup(struct device_node *np);
 #else
 static inline int of_reserved_mem_device_init_by_idx(struct device *dev,
struct device_node *np, int idx)
@@ -55,6 +56,10 @@ static inline void of_reserved_mem_device_release(struct 
device *pdev) { }
 static inline void fdt_init_reserved_mem(void) { }
 static inline void fdt_reserved_mem_save_node(unsigned long node,
const char *uname, phys_addr_t base, phys_addr_t size) { }
+static inline struct reserved_mem *of_reserved_mem_lookup(struct device_node 
*np)
+{
+   return NULL;
+}
 #endif
 
 /**
-- 
2.12.0



[PATCH v4 2/5] of: reserved_mem: Accessor for acquiring reserved_mem

2017-10-10 Thread Bjorn Andersson
In some cases drivers referencing a reserved-memory region might want to
remap the entire region, but when defining the reserved-memory by "size"
the client driver has no means to know the associated base address of
the reserved memory region.

This patch adds an accessor for such drivers to acquire a handle to
their associated reserved-memory for this purpose.

A complicating factor for the implementation is that the reserved_mem
objects are created from the flattened DeviceTree, as such we can't
use the device_node address for comparison. Fortunately the name of the
node will be used as "name" of the reserved_mem and will be used when
building the full_name, so we can compare the "name" with the basename
of the full_name to find the match.

Reviewed-by: Rob Herring 
Signed-off-by: Bjorn Andersson 
---

Changes since v3:
- Renamed function from _get() to _lookup()
- Picked up Rob's R-b

Changes since v2:
- None

Changes since v1:
- Previous patch provided interface to resolve memory-region reference, instead
  of direct lookup by device_node

 drivers/of/of_reserved_mem.c| 26 ++
 include/linux/of_reserved_mem.h |  5 +
 2 files changed, 31 insertions(+)

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index d507c3569a88..b108c7a1f74c 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -397,3 +397,29 @@ void of_reserved_mem_device_release(struct device *dev)
rmem->ops->device_release(rmem, dev);
 }
 EXPORT_SYMBOL_GPL(of_reserved_mem_device_release);
+
+/**
+ * of_reserved_mem_lookup() - acquire reserved_mem from a device node
+ * @np:node pointer of the desired reserved-memory region
+ *
+ * This function allows drivers to acquire a reference to the reserved_mem
+ * struct based on a device node handle.
+ *
+ * Returns a reserved_mem reference, or NULL on error.
+ */
+struct reserved_mem *of_reserved_mem_lookup(struct device_node *np)
+{
+   const char *name;
+   int i;
+
+   if (!np->full_name)
+   return NULL;
+
+   name = kbasename(np->full_name);
+   for (i = 0; i < reserved_mem_count; i++)
+   if (!strcmp(reserved_mem[i].name, name))
+   return _mem[i];
+
+   return NULL;
+}
+EXPORT_SYMBOL_GPL(of_reserved_mem_lookup);
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index f8e1992d6423..c58f780104f9 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -44,6 +44,7 @@ int early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 void fdt_init_reserved_mem(void);
 void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
   phys_addr_t base, phys_addr_t size);
+struct reserved_mem *of_reserved_mem_lookup(struct device_node *np);
 #else
 static inline int of_reserved_mem_device_init_by_idx(struct device *dev,
struct device_node *np, int idx)
@@ -55,6 +56,10 @@ static inline void of_reserved_mem_device_release(struct 
device *pdev) { }
 static inline void fdt_init_reserved_mem(void) { }
 static inline void fdt_reserved_mem_save_node(unsigned long node,
const char *uname, phys_addr_t base, phys_addr_t size) { }
+static inline struct reserved_mem *of_reserved_mem_lookup(struct device_node 
*np)
+{
+   return NULL;
+}
 #endif
 
 /**
-- 
2.12.0



[PATCH v4 5/5] arm64: dts: msm8916: Mark rmtfs node as qcom,rmtfs-mem compatible

2017-10-10 Thread Bjorn Andersson
Now that we have a binding defined for the shared file system memory use
this to describe the rmtfs memory region.

Signed-off-by: Bjorn Andersson 
---

Changes since v3:
- None

Changes since v2:
- Update compatible

Changes since v1:
- New patch

 arch/arm64/boot/dts/qcom/msm8916.dtsi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi 
b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index 039991f80831..f6ae6f9b27e1 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -69,8 +69,11 @@
};
 
rmtfs@8670 {
+   compatible = "qcom,rmtfs-mem";
reg = <0x0 0x8670 0x0 0xe>;
no-map;
+
+   qcom,client-id = <1>;
};
 
rfsa@867e0 {
-- 
2.12.0



[PATCH v4 5/5] arm64: dts: msm8916: Mark rmtfs node as qcom,rmtfs-mem compatible

2017-10-10 Thread Bjorn Andersson
Now that we have a binding defined for the shared file system memory use
this to describe the rmtfs memory region.

Signed-off-by: Bjorn Andersson 
---

Changes since v3:
- None

Changes since v2:
- Update compatible

Changes since v1:
- New patch

 arch/arm64/boot/dts/qcom/msm8916.dtsi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi 
b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index 039991f80831..f6ae6f9b27e1 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -69,8 +69,11 @@
};
 
rmtfs@8670 {
+   compatible = "qcom,rmtfs-mem";
reg = <0x0 0x8670 0x0 0xe>;
no-map;
+
+   qcom,client-id = <1>;
};
 
rfsa@867e0 {
-- 
2.12.0



[PATCH v4 4/5] soc: qcom: Remote filesystem memory driver

2017-10-10 Thread Bjorn Andersson
The Qualcomm remote file system protocol is used by certain remoteprocs,
in particular the modem, to read and write persistent storage in
platforms where only the application CPU has physical storage access.

The protocol is based on a set of QMI-encoded control-messages and a
shared memory buffer for exchaning the data. This driver implements the
latter, providing the user space service access to the carved out chunk
of memory.

Signed-off-by: Bjorn Andersson 
---

Changes since v3:
- Dropped linux/of_fdt.h include
- Update to follow name change of of_reserved_mem_lookup()

Changes since v2:
- Renamed driver to "rmtfs_mem" in attempt to clarify that this is not a
  file system, but some chunk of memory.

Changes since v1:
- RFSA device represented direclty by the reserved-memory node

 drivers/of/platform.c|   1 +
 drivers/soc/qcom/Kconfig |  11 ++
 drivers/soc/qcom/Makefile|   1 +
 drivers/soc/qcom/rmtfs_mem.c | 271 +++
 4 files changed, 284 insertions(+)
 create mode 100644 drivers/soc/qcom/rmtfs_mem.c

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index ee89f096f0f3..e7548c9a9915 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -498,6 +498,7 @@ EXPORT_SYMBOL_GPL(of_platform_default_populate);
 
 #ifndef CONFIG_PPC
 static const struct of_device_id reserved_mem_matches[] = {
+   { .compatible = "qcom,rmtfs-mem" },
{ .compatible = "ramoops" },
{}
 };
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 9fca977ef18d..6dff89eaf3f8 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -24,6 +24,17 @@ config QCOM_PM
  modes. It interface with various system drivers to put the cores in
  low power modes.
 
+config QCOM_RMTFS_MEM
+   tristate "Qualcomm Remote Filesystem memory driver"
+   depends on ARCH_QCOM
+   help
+ The Qualcomm remote filesystem memory driver is used for allocating
+ and exposing regions of shared memory with remote processors for the
+ purpose of exchanging sector-data between the remote filesystem
+ service and its clients.
+
+ Say y here if you intend to boot the modem remoteproc.
+
 config QCOM_SMEM
tristate "Qualcomm Shared Memory Manager (SMEM)"
depends on ARCH_QCOM
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 414f0de274fa..541c1f40d126 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
 obj-$(CONFIG_QCOM_MDT_LOADER)  += mdt_loader.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
+obj-$(CONFIG_QCOM_RMTFS_MEM)   += rmtfs_mem.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
 obj-$(CONFIG_QCOM_SMEM) += smem.o
 obj-$(CONFIG_QCOM_SMEM_STATE) += smem_state.o
diff --git a/drivers/soc/qcom/rmtfs_mem.c b/drivers/soc/qcom/rmtfs_mem.c
new file mode 100644
index ..f6f2f0cb3b3a
--- /dev/null
+++ b/drivers/soc/qcom/rmtfs_mem.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2017 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define QCOM_RMTFS_MEM_DEV_MAX (MINORMASK + 1)
+
+static dev_t qcom_rmtfs_mem_major;
+
+struct qcom_rmtfs_mem {
+   struct device dev;
+   struct cdev cdev;
+
+   void *base;
+   phys_addr_t addr;
+   phys_addr_t size;
+
+   unsigned int client_id;
+};
+
+static ssize_t qcom_rmtfs_mem_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+
+static DEVICE_ATTR(phys_addr, 0400, qcom_rmtfs_mem_show, NULL);
+static DEVICE_ATTR(size, 0400, qcom_rmtfs_mem_show, NULL);
+static DEVICE_ATTR(client_id, 0400, qcom_rmtfs_mem_show, NULL);
+
+static ssize_t qcom_rmtfs_mem_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+   struct qcom_rmtfs_mem *rmtfs_mem = container_of(dev,
+   struct qcom_rmtfs_mem,
+   dev);
+
+   if (attr == _attr_phys_addr)
+   return sprintf(buf, "%pa\n", _mem->addr);
+   if (attr == _attr_size)
+   return sprintf(buf, "%pa\n", _mem->size);
+   if (attr == _attr_client_id)
+   return sprintf(buf, "%d\n", 

[PATCH v4 0/5] Qualcomm remote filesystem shared memory driver

2017-10-10 Thread Bjorn Andersson
Some remote processors (in particular the modem) found in Qualcomm platforms
stores configuration parameters and other data in a file system. As the remotes
does not have direct storage access it needs to relay block accesses through a
service running on the application CPU.

The memory is described in DeviceTree by a new reserved-memory compatible and
the implementation provides the user space service a read/write interface to
this chunk of memory.

Bjorn Andersson (5):
  of/platform: Generalize /reserved-memory handling
  of: reserved_mem: Accessor for acquiring reserved_mem
  dt-binding: soc: qcom: Add binding for rmtfs memory
  soc: qcom: Remote filesystem memory driver
  arm64: dts: msm8916: Mark rmtfs node as qcom,rmtfs-mem compatible

 .../bindings/reserved-memory/qcom,rmtfs-mem.txt|  51 
 arch/arm64/boot/dts/qcom/msm8916.dtsi  |   3 +
 drivers/of/of_reserved_mem.c   |  26 ++
 drivers/of/platform.c  |  19 +-
 drivers/soc/qcom/Kconfig   |  11 +
 drivers/soc/qcom/Makefile  |   1 +
 drivers/soc/qcom/rmtfs_mem.c   | 271 +
 include/linux/of_reserved_mem.h|   5 +
 8 files changed, 379 insertions(+), 8 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt
 create mode 100644 drivers/soc/qcom/rmtfs_mem.c

-- 
2.12.0



[PATCH v4 0/5] Qualcomm remote filesystem shared memory driver

2017-10-10 Thread Bjorn Andersson
Some remote processors (in particular the modem) found in Qualcomm platforms
stores configuration parameters and other data in a file system. As the remotes
does not have direct storage access it needs to relay block accesses through a
service running on the application CPU.

The memory is described in DeviceTree by a new reserved-memory compatible and
the implementation provides the user space service a read/write interface to
this chunk of memory.

Bjorn Andersson (5):
  of/platform: Generalize /reserved-memory handling
  of: reserved_mem: Accessor for acquiring reserved_mem
  dt-binding: soc: qcom: Add binding for rmtfs memory
  soc: qcom: Remote filesystem memory driver
  arm64: dts: msm8916: Mark rmtfs node as qcom,rmtfs-mem compatible

 .../bindings/reserved-memory/qcom,rmtfs-mem.txt|  51 
 arch/arm64/boot/dts/qcom/msm8916.dtsi  |   3 +
 drivers/of/of_reserved_mem.c   |  26 ++
 drivers/of/platform.c  |  19 +-
 drivers/soc/qcom/Kconfig   |  11 +
 drivers/soc/qcom/Makefile  |   1 +
 drivers/soc/qcom/rmtfs_mem.c   | 271 +
 include/linux/of_reserved_mem.h|   5 +
 8 files changed, 379 insertions(+), 8 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt
 create mode 100644 drivers/soc/qcom/rmtfs_mem.c

-- 
2.12.0



[PATCH v4 4/5] soc: qcom: Remote filesystem memory driver

2017-10-10 Thread Bjorn Andersson
The Qualcomm remote file system protocol is used by certain remoteprocs,
in particular the modem, to read and write persistent storage in
platforms where only the application CPU has physical storage access.

The protocol is based on a set of QMI-encoded control-messages and a
shared memory buffer for exchaning the data. This driver implements the
latter, providing the user space service access to the carved out chunk
of memory.

Signed-off-by: Bjorn Andersson 
---

Changes since v3:
- Dropped linux/of_fdt.h include
- Update to follow name change of of_reserved_mem_lookup()

Changes since v2:
- Renamed driver to "rmtfs_mem" in attempt to clarify that this is not a
  file system, but some chunk of memory.

Changes since v1:
- RFSA device represented direclty by the reserved-memory node

 drivers/of/platform.c|   1 +
 drivers/soc/qcom/Kconfig |  11 ++
 drivers/soc/qcom/Makefile|   1 +
 drivers/soc/qcom/rmtfs_mem.c | 271 +++
 4 files changed, 284 insertions(+)
 create mode 100644 drivers/soc/qcom/rmtfs_mem.c

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index ee89f096f0f3..e7548c9a9915 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -498,6 +498,7 @@ EXPORT_SYMBOL_GPL(of_platform_default_populate);
 
 #ifndef CONFIG_PPC
 static const struct of_device_id reserved_mem_matches[] = {
+   { .compatible = "qcom,rmtfs-mem" },
{ .compatible = "ramoops" },
{}
 };
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 9fca977ef18d..6dff89eaf3f8 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -24,6 +24,17 @@ config QCOM_PM
  modes. It interface with various system drivers to put the cores in
  low power modes.
 
+config QCOM_RMTFS_MEM
+   tristate "Qualcomm Remote Filesystem memory driver"
+   depends on ARCH_QCOM
+   help
+ The Qualcomm remote filesystem memory driver is used for allocating
+ and exposing regions of shared memory with remote processors for the
+ purpose of exchanging sector-data between the remote filesystem
+ service and its clients.
+
+ Say y here if you intend to boot the modem remoteproc.
+
 config QCOM_SMEM
tristate "Qualcomm Shared Memory Manager (SMEM)"
depends on ARCH_QCOM
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 414f0de274fa..541c1f40d126 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
 obj-$(CONFIG_QCOM_MDT_LOADER)  += mdt_loader.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
+obj-$(CONFIG_QCOM_RMTFS_MEM)   += rmtfs_mem.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
 obj-$(CONFIG_QCOM_SMEM) += smem.o
 obj-$(CONFIG_QCOM_SMEM_STATE) += smem_state.o
diff --git a/drivers/soc/qcom/rmtfs_mem.c b/drivers/soc/qcom/rmtfs_mem.c
new file mode 100644
index ..f6f2f0cb3b3a
--- /dev/null
+++ b/drivers/soc/qcom/rmtfs_mem.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2017 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define QCOM_RMTFS_MEM_DEV_MAX (MINORMASK + 1)
+
+static dev_t qcom_rmtfs_mem_major;
+
+struct qcom_rmtfs_mem {
+   struct device dev;
+   struct cdev cdev;
+
+   void *base;
+   phys_addr_t addr;
+   phys_addr_t size;
+
+   unsigned int client_id;
+};
+
+static ssize_t qcom_rmtfs_mem_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+
+static DEVICE_ATTR(phys_addr, 0400, qcom_rmtfs_mem_show, NULL);
+static DEVICE_ATTR(size, 0400, qcom_rmtfs_mem_show, NULL);
+static DEVICE_ATTR(client_id, 0400, qcom_rmtfs_mem_show, NULL);
+
+static ssize_t qcom_rmtfs_mem_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+   struct qcom_rmtfs_mem *rmtfs_mem = container_of(dev,
+   struct qcom_rmtfs_mem,
+   dev);
+
+   if (attr == _attr_phys_addr)
+   return sprintf(buf, "%pa\n", _mem->addr);
+   if (attr == _attr_size)
+   return sprintf(buf, "%pa\n", _mem->size);
+   if (attr == _attr_client_id)
+   return sprintf(buf, "%d\n", rmtfs_mem->client_id);
+
+   

[PATCH v4 3/5] dt-binding: soc: qcom: Add binding for rmtfs memory

2017-10-10 Thread Bjorn Andersson
This adds the binding for describing shared memory used to exchange file
system blocks between the RMTFS client and service. A client for this is
generally found in the modem firmware and is used for accessing
persistent storage for things such as radio calibration.

Acked-by: Rob Herring 
Signed-off-by: Bjorn Andersson 
---

Changes since v3:
- None 

Changes since v2:
- Renamed compatible to "rmtfs-mem" in attempt to clarify that this is not a
  file system, but some chunk of memory.

Changed since v1:
- Memory described in a single reserved-memory node, rather than by reference
  from a "dummy" node
- qcom,vmdid added

 .../bindings/reserved-memory/qcom,rmtfs-mem.txt| 51 ++
 1 file changed, 51 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt

diff --git 
a/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt 
b/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt
new file mode 100644
index ..8562ba1dce69
--- /dev/null
+++ b/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt
@@ -0,0 +1,51 @@
+Qualcomm Remote File System Memory binding
+
+This binding describes the Qualcomm remote filesystem memory, which serves the
+purpose of describing the shared memory region used for remote processors to
+access block device data using the Remote Filesystem protocol.
+
+- compatible:
+   Usage: required
+   Value type: 
+   Definition: must be:
+   "qcom,rmtfs-mem"
+
+- reg:
+   Usage: required for static allocation
+   Value type: 
+   Definition: must specify base address and size of the memory region,
+   as described in reserved-memory.txt
+
+- size:
+   Usage: required for dynamic allocation
+   Value type: 
+   Definition: must specify a size of the memory region, as described in
+   reserved-memory.txt
+
+- qcom,client-id:
+   Usage: required
+   Value type: 
+   Definition: identifier of the client to use this region for buffers.
+
+- qcom,vmid:
+   Usage: optional
+   Value type: 
+   Definition: vmid of the remote processor, to set up memory protection.
+
+= EXAMPLE
+The following example shows the remote filesystem memory setup for APQ8016,
+with the rmtfs region for the Hexagon DSP (id #1) located at 0x8670.
+
+   reserved-memory {
+   #address-cells = <2>;
+   #size-cells = <2>;
+   ranges;
+
+   rmtfs@8670 {
+   compatible = "qcom,rmtfs-mem";
+   reg = <0x0 0x8670 0x0 0xe>;
+   no-map;
+
+   qcom,client-id = <1>;
+   };
+   };
-- 
2.12.0



[PATCH v4 3/5] dt-binding: soc: qcom: Add binding for rmtfs memory

2017-10-10 Thread Bjorn Andersson
This adds the binding for describing shared memory used to exchange file
system blocks between the RMTFS client and service. A client for this is
generally found in the modem firmware and is used for accessing
persistent storage for things such as radio calibration.

Acked-by: Rob Herring 
Signed-off-by: Bjorn Andersson 
---

Changes since v3:
- None 

Changes since v2:
- Renamed compatible to "rmtfs-mem" in attempt to clarify that this is not a
  file system, but some chunk of memory.

Changed since v1:
- Memory described in a single reserved-memory node, rather than by reference
  from a "dummy" node
- qcom,vmdid added

 .../bindings/reserved-memory/qcom,rmtfs-mem.txt| 51 ++
 1 file changed, 51 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt

diff --git 
a/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt 
b/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt
new file mode 100644
index ..8562ba1dce69
--- /dev/null
+++ b/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt
@@ -0,0 +1,51 @@
+Qualcomm Remote File System Memory binding
+
+This binding describes the Qualcomm remote filesystem memory, which serves the
+purpose of describing the shared memory region used for remote processors to
+access block device data using the Remote Filesystem protocol.
+
+- compatible:
+   Usage: required
+   Value type: 
+   Definition: must be:
+   "qcom,rmtfs-mem"
+
+- reg:
+   Usage: required for static allocation
+   Value type: 
+   Definition: must specify base address and size of the memory region,
+   as described in reserved-memory.txt
+
+- size:
+   Usage: required for dynamic allocation
+   Value type: 
+   Definition: must specify a size of the memory region, as described in
+   reserved-memory.txt
+
+- qcom,client-id:
+   Usage: required
+   Value type: 
+   Definition: identifier of the client to use this region for buffers.
+
+- qcom,vmid:
+   Usage: optional
+   Value type: 
+   Definition: vmid of the remote processor, to set up memory protection.
+
+= EXAMPLE
+The following example shows the remote filesystem memory setup for APQ8016,
+with the rmtfs region for the Hexagon DSP (id #1) located at 0x8670.
+
+   reserved-memory {
+   #address-cells = <2>;
+   #size-cells = <2>;
+   ranges;
+
+   rmtfs@8670 {
+   compatible = "qcom,rmtfs-mem";
+   reg = <0x0 0x8670 0x0 0xe>;
+   no-map;
+
+   qcom,client-id = <1>;
+   };
+   };
-- 
2.12.0



[Intel-gfx] [GVT-g] [ANNOUNCE] 2017-Q3 release of XenGT (Intel GVT-g for Xen)

2017-10-10 Thread Xu, Terrence
Hi all,

We are pleased to announce an update of Intel GVT-g for Xen.

Intel GVT-g is a full GPU virtualization solution with mediated pass-through, 
starting from 4th generation Intel Core(TM) processors with Intel processor 
graphics. A virtual GPU instance is maintained for each VM, with part of 
performance critical resources directly assigned. The capability of running 
native graphics driver inside a VM, without hypervisor intervention in 
performance critical paths, achieves a good balance among performance, feature, 
and sharing capability. GVT-g for Xen hypervisor is XenGT.


Repositories
-    Xen :  https://github.com/01org/igvtg-xen (tag: 2017-q3-xengt-stable-4.9)
-    Kernel: https://github.com/01org/gvt-linux/ (tag: 2017-q3-gvt-stable-4.12)
-    Qemu: https://github.com/01org/igvtg-qemu (tag: 2017-q3-stable-2.9.0)


This update consists of:
-    Kernel version upgraded to 4.12 from 4.11.
-    Live migration feature preliminary supported.
-    QoS feature preliminary supported.
-    IOMMU feature supported.
-    OVMF feature supported.
-    VGPU reset feature optimization, with related issues be fixed.
-    Supported server platforms: Intel(r) Xeon(r) E3_v4, E3_v5 and E3_v6 with 
Intel Graphics processor, E3_v6 is new supported platform.
-    Supported client platforms: Intel(r) Core(tm) 5th generation (code name: 
Broadwell), 6th generation (code name: Skylake) and 7th generation (code name: 
Kabylake), 7th generation is new supported platform.
-    Validated Guest OS: Windows7 32bit, Window7 64bit, Windows8.1 64bit, 
Windows10 64bit and Linux.
-    GVT-g only supports remote display not local display by this release. 
-    Remote protocol: only guest-side remoting protocol is supported, host-side 
remoting connection like SPICE is working in progress. For example, user can 
use X11VNC for Guest Linux VM or TightVNC for Guest Windows VM.


Limitation or known issues:
-    GVT-g can support maximum 7 Guest VMs due to host graphics resource 
limitation. When user runs 7 VMs simultaneously, host OS can only run in text 
mode.
-    In order to support Guest Windows7 32bit VM, user is recommended to 
configure vgt_low_gm_sz=128 / 256 / 512 in HVM file because Guest Windows7 
32bit VM needs more graphics resource than other Guest VM.
-    In order to support Guest VM high resolution and screen resolution 
adjustable in Guest Windows8.1 64bit VM and Guest Windows10 64bit VM, user is 
recommended to configure vgt_low_gm_sz=64 / 128 / 256 / 512 in HVM file to get 
larger VM aperture size.
-    Some 3rd party applications/tools like 3DMark which including special 
DirectX12 feature test ,it will trigger Guest VM GPU reset.
-    In corner case, Guest Windows 7 32bit VM may be killed automatically by 
Xen when Guest VM runs into TDR. This issues happens only on Broadwell 
platform. The workaround is to disable part of viridian feature in Guest VM hvm 
file by adding viridian=["all", "!apic_assist"].
-    In corner case, Linux Guest VM may GPU hang while running special 
Intel-GPU-Tools test case on it.
-    For live migration feature, we cannot migrate Guest Windows VM when Guest 
VM memory is 2048M or 4096M, user is recommended to configure Guest VM memory 
to 1024MB.


Setup guide:
https://github.com/01org/gvt-linux/wiki/GVTg_Setup_Guide


This is the first GVT-g community release based on new Upstream architecture 
design, refer to the following document for new architecture introduction:
https://01.org/igvt-g/documentation/intel-gvt-g-new-architecture-introduction


Please subscribe to join the mailing list if you want to learn more about GVT-g 
project: 
https://lists.01.org/mailman/listinfo/igvt-g
Please subscribe to join the mailing list if you want to contribute/review 
latest GVT-g upstream patches:
https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev


Official GVT-g portal: 
https://01.org/igvt-g


More information about background, architecture and others about Intel GVT-g, 
can be found at:
https://01.org/igvt-g
https://www.usenix.org/conference/atc14/technical-sessions/presentation/tian
http://events.linuxfoundation.org/sites/events/files/slides/XenGT-Xen%20Summit-v7_0.pdf
http://events.linuxfoundation.org/sites/events/files/slides/XenGT-Xen%20Summit-REWRITE%203RD%20v4.pdf
https://01.org/xen/blogs/srclarkx/2013/graphics-virtualization-xengt


Note: 
The XenGT project should be considered a work in progress. As such it is not a 
complete product nor should it be considered one. Extra care should be taken 
when testing and configuring a system to use the XenGT project.



Thanks
Terrence
Tel: +86-21-6116 5390
MP: +86-1356 4367 024
Mail: terrence...@intel.com

___
GVT-g mailing list
igv...@lists.01.org
https://lists.01.org/mailman/listinfo/igvt-g

___
Intel-gfx mailing list
intel-...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [GVT-g] [ANNOUNCE] 2017-Q3 release of XenGT (Intel GVT-g for Xen)

2017-10-10 Thread Xu, Terrence
Hi all,

We are pleased to announce an update of Intel GVT-g for Xen.

Intel GVT-g is a full GPU virtualization solution with mediated pass-through, 
starting from 4th generation Intel Core(TM) processors with Intel processor 
graphics. A virtual GPU instance is maintained for each VM, with part of 
performance critical resources directly assigned. The capability of running 
native graphics driver inside a VM, without hypervisor intervention in 
performance critical paths, achieves a good balance among performance, feature, 
and sharing capability. GVT-g for Xen hypervisor is XenGT.


Repositories
-    Xen :  https://github.com/01org/igvtg-xen (tag: 2017-q3-xengt-stable-4.9)
-    Kernel: https://github.com/01org/gvt-linux/ (tag: 2017-q3-gvt-stable-4.12)
-    Qemu: https://github.com/01org/igvtg-qemu (tag: 2017-q3-stable-2.9.0)


This update consists of:
-    Kernel version upgraded to 4.12 from 4.11.
-    Live migration feature preliminary supported.
-    QoS feature preliminary supported.
-    IOMMU feature supported.
-    OVMF feature supported.
-    VGPU reset feature optimization, with related issues be fixed.
-    Supported server platforms: Intel(r) Xeon(r) E3_v4, E3_v5 and E3_v6 with 
Intel Graphics processor, E3_v6 is new supported platform.
-    Supported client platforms: Intel(r) Core(tm) 5th generation (code name: 
Broadwell), 6th generation (code name: Skylake) and 7th generation (code name: 
Kabylake), 7th generation is new supported platform.
-    Validated Guest OS: Windows7 32bit, Window7 64bit, Windows8.1 64bit, 
Windows10 64bit and Linux.
-    GVT-g only supports remote display not local display by this release. 
-    Remote protocol: only guest-side remoting protocol is supported, host-side 
remoting connection like SPICE is working in progress. For example, user can 
use X11VNC for Guest Linux VM or TightVNC for Guest Windows VM.


Limitation or known issues:
-    GVT-g can support maximum 7 Guest VMs due to host graphics resource 
limitation. When user runs 7 VMs simultaneously, host OS can only run in text 
mode.
-    In order to support Guest Windows7 32bit VM, user is recommended to 
configure vgt_low_gm_sz=128 / 256 / 512 in HVM file because Guest Windows7 
32bit VM needs more graphics resource than other Guest VM.
-    In order to support Guest VM high resolution and screen resolution 
adjustable in Guest Windows8.1 64bit VM and Guest Windows10 64bit VM, user is 
recommended to configure vgt_low_gm_sz=64 / 128 / 256 / 512 in HVM file to get 
larger VM aperture size.
-    Some 3rd party applications/tools like 3DMark which including special 
DirectX12 feature test ,it will trigger Guest VM GPU reset.
-    In corner case, Guest Windows 7 32bit VM may be killed automatically by 
Xen when Guest VM runs into TDR. This issues happens only on Broadwell 
platform. The workaround is to disable part of viridian feature in Guest VM hvm 
file by adding viridian=["all", "!apic_assist"].
-    In corner case, Linux Guest VM may GPU hang while running special 
Intel-GPU-Tools test case on it.
-    For live migration feature, we cannot migrate Guest Windows VM when Guest 
VM memory is 2048M or 4096M, user is recommended to configure Guest VM memory 
to 1024MB.


Setup guide:
https://github.com/01org/gvt-linux/wiki/GVTg_Setup_Guide


This is the first GVT-g community release based on new Upstream architecture 
design, refer to the following document for new architecture introduction:
https://01.org/igvt-g/documentation/intel-gvt-g-new-architecture-introduction


Please subscribe to join the mailing list if you want to learn more about GVT-g 
project: 
https://lists.01.org/mailman/listinfo/igvt-g
Please subscribe to join the mailing list if you want to contribute/review 
latest GVT-g upstream patches:
https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev


Official GVT-g portal: 
https://01.org/igvt-g


More information about background, architecture and others about Intel GVT-g, 
can be found at:
https://01.org/igvt-g
https://www.usenix.org/conference/atc14/technical-sessions/presentation/tian
http://events.linuxfoundation.org/sites/events/files/slides/XenGT-Xen%20Summit-v7_0.pdf
http://events.linuxfoundation.org/sites/events/files/slides/XenGT-Xen%20Summit-REWRITE%203RD%20v4.pdf
https://01.org/xen/blogs/srclarkx/2013/graphics-virtualization-xengt


Note: 
The XenGT project should be considered a work in progress. As such it is not a 
complete product nor should it be considered one. Extra care should be taken 
when testing and configuring a system to use the XenGT project.



Thanks
Terrence
Tel: +86-21-6116 5390
MP: +86-1356 4367 024
Mail: terrence...@intel.com

___
GVT-g mailing list
igv...@lists.01.org
https://lists.01.org/mailman/listinfo/igvt-g

___
Intel-gfx mailing list
intel-...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [PATCH v5 03/10] kexec_file: factor out arch_kexec_kernel_*() from x86, powerpc

2017-10-10 Thread AKASHI Takahiro
On Tue, Oct 10, 2017 at 12:02:01PM +0100, Julien Thierry wrote:

[snip]

> >--- a/kernel/kexec_file.c
> >+++ b/kernel/kexec_file.c
> >@@ -26,30 +26,79 @@
> >  #include 
> >  #include "kexec_internal.h"
> >
> >+const __weak struct kexec_file_ops * const kexec_file_loaders[] = {NULL};
> >+
> >  static int kexec_calculate_store_digests(struct kimage *image);
> >
> >+int _kexec_kernel_image_probe(struct kimage *image, void *buf,
> >+  unsigned long buf_len)
> >+{
> >+ const struct kexec_file_ops *fops;
> >+ int ret = -ENOEXEC;
> >+
> >+ for (fops = kexec_file_loaders[0]; fops && fops->probe; ++fops) {
> 
> Hmm, that's not gonna work (and I see that what I said in the previous
> patch was not 100% correct either).

Can you elaborate this a bit more?

I'm sure that, with my code, any member of fops, cannot be changed;
"const struct kexec_file_ops *fops" means that fops is a pointer to
"constant sturct kexec_file_ops," while "struct kexec_file_ops *
const kexec_file_loaders[]" means that kexec_file_loaders is a "constant
array" of pointers to "constant struct kexec_file_ops."

Thanks,
-Takahiro AKASHI


> 'fops' should be of type 'const struct kexec_file_ops **', and the loop
> should be:
> 
> for (fops = _file_loaders[0]; *fops && (*fops)->probe; ++fops)
> 
> With some additional dereferences in the body of the loop.
> 
> Unless you prefer the previous state of the loop (with i and the break
> inside), but I still think this looks better.
> 
> Cheers,
> 
> --
> Julien Thierry
> IMPORTANT NOTICE: The contents of this email and any attachments are 
> confidential and may also be privileged. If you are not the intended 
> recipient, please notify the sender immediately and do not disclose the 
> contents to any other person, use it for any purpose, or store or copy the 
> information in any medium. Thank you.


Re: [PATCH v5 03/10] kexec_file: factor out arch_kexec_kernel_*() from x86, powerpc

2017-10-10 Thread AKASHI Takahiro
On Tue, Oct 10, 2017 at 12:02:01PM +0100, Julien Thierry wrote:

[snip]

> >--- a/kernel/kexec_file.c
> >+++ b/kernel/kexec_file.c
> >@@ -26,30 +26,79 @@
> >  #include 
> >  #include "kexec_internal.h"
> >
> >+const __weak struct kexec_file_ops * const kexec_file_loaders[] = {NULL};
> >+
> >  static int kexec_calculate_store_digests(struct kimage *image);
> >
> >+int _kexec_kernel_image_probe(struct kimage *image, void *buf,
> >+  unsigned long buf_len)
> >+{
> >+ const struct kexec_file_ops *fops;
> >+ int ret = -ENOEXEC;
> >+
> >+ for (fops = kexec_file_loaders[0]; fops && fops->probe; ++fops) {
> 
> Hmm, that's not gonna work (and I see that what I said in the previous
> patch was not 100% correct either).

Can you elaborate this a bit more?

I'm sure that, with my code, any member of fops, cannot be changed;
"const struct kexec_file_ops *fops" means that fops is a pointer to
"constant sturct kexec_file_ops," while "struct kexec_file_ops *
const kexec_file_loaders[]" means that kexec_file_loaders is a "constant
array" of pointers to "constant struct kexec_file_ops."

Thanks,
-Takahiro AKASHI


> 'fops' should be of type 'const struct kexec_file_ops **', and the loop
> should be:
> 
> for (fops = _file_loaders[0]; *fops && (*fops)->probe; ++fops)
> 
> With some additional dereferences in the body of the loop.
> 
> Unless you prefer the previous state of the loop (with i and the break
> inside), but I still think this looks better.
> 
> Cheers,
> 
> --
> Julien Thierry
> IMPORTANT NOTICE: The contents of this email and any attachments are 
> confidential and may also be privileged. If you are not the intended 
> recipient, please notify the sender immediately and do not disclose the 
> contents to any other person, use it for any purpose, or store or copy the 
> information in any medium. Thank you.


[PATCH] VFS: Handle lazytime in do_mount()

2017-10-10 Thread Markus Trippelsdorf
Since commit e462ec50cb5fa ("VFS: Differentiate mount flags (MS_*) from
internal superblock flags") the lazytime mount option doesn't get passed
on anymore.

Fix the issue by handling the option in do_mount().

Reviewed-by: Lukas Czerner 
Signed-off-by: Markus Trippelsdorf 
---
 fs/namespace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 54059b142d6b..b633838b8f02 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2823,7 +2823,8 @@ long do_mount(const char *dev_name, const char __user 
*dir_name,
SB_MANDLOCK |
SB_DIRSYNC |
SB_SILENT |
-   SB_POSIXACL);
+   SB_POSIXACL |
+   SB_LAZYTIME);
 
if (flags & MS_REMOUNT)
retval = do_remount(, flags, sb_flags, mnt_flags,
-- 
Markus


[PATCH] VFS: Handle lazytime in do_mount()

2017-10-10 Thread Markus Trippelsdorf
Since commit e462ec50cb5fa ("VFS: Differentiate mount flags (MS_*) from
internal superblock flags") the lazytime mount option doesn't get passed
on anymore.

Fix the issue by handling the option in do_mount().

Reviewed-by: Lukas Czerner 
Signed-off-by: Markus Trippelsdorf 
---
 fs/namespace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 54059b142d6b..b633838b8f02 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2823,7 +2823,8 @@ long do_mount(const char *dev_name, const char __user 
*dir_name,
SB_MANDLOCK |
SB_DIRSYNC |
SB_SILENT |
-   SB_POSIXACL);
+   SB_POSIXACL |
+   SB_LAZYTIME);
 
if (flags & MS_REMOUNT)
retval = do_remount(, flags, sb_flags, mnt_flags,
-- 
Markus


[Intel-gfx] [GVT-g] [ANNOUNCE] 2017-Q3 release of KVMGT (Intel GVT-g for KVM)

2017-10-10 Thread Xu, Terrence
Hi all,

We are pleased to announce an update of Intel GVT-g for KVM.

Intel GVT-g for KVM (a.k.a. KVMGT) is a full GPU virtualization solution with 
mediated pass-through, starting from 5th generation Intel Core(TM) processors 
with Intel processor graphics.  A virtual GPU instance is maintained for each 
VM, with part of performance critical resources directly assigned. The 
capability of running native graphics driver inside a VM, without hypervisor 
intervention in performance critical paths, achieves a good balance among 
performance, feature, and sharing capability.
    

Repositories:
-    Kernel: https://github.com/01org/gvt-linux/  (tag: 2017-q3-gvt-stable-4.12)
-    Qemu: https://github.com/01org/igvtg-qemu  (tag: 2017-q3-stable-2.9.0)


This update consists of:
-    Kernel version upgraded to 4.12 from 4.11.
-    Live migration feature preliminary supported.
-    QoS feature preliminary supported.
-    IOMMU feature supported.
-    OVMF feature supported.
-    VGPU reset feature optimization, with related issues be fixed.
-    Supported server platforms: Intel(r) Xeon(r) E3_v4, E3_v5 and E3_v6 with 
Intel Graphics processor, the E3_v6 is new supported platform.
-    Supported client platforms: Intel(r) Core(tm) 5th generation (code name: 
Broadwell), 6th generation (code name: Skylake) and 7th generation (code name: 
Kabylake), the 7th generation is new supported platform.
-    Validated Guest OS: Windows7 32bit, Window7 64bit, Windows8.1 64bit, 
Windows10 64bit and Linux.
-    GVT-g only supports remote display not local display by this release. 
-    Remote protocol: only guest-side remoting protocol is supported, host-side 
remoting connection like SPICE is working in progress. For example, user can 
use X11VNC for Guest Linux VM or TightVNC for Guest Windows VM.


Limitation or known issues:
-    GVT-g can support maximum 7 Guest VMs due to host graphics resource 
limitation. When user runs 7 VMs simultaneously, host OS can only run in text 
mode.
-    In order to support Guest Windows7 32bit VM, user can only uses vGPU 
type1, type2, type4 not type8 because Guest Windows7 32bit VM needs more 
graphics resource than other Guest VM.
-    Some 3rd party applications/tools like GPU_Z, Passmark 9.0 may read/write 
GPU MSR directly, it will trigger Guest VM BSOD since those MSRs are unhandled 
registers in KVMGT. The workaround is to set MSR read /write ignore flag to 1 
in host grub file by adding "kvm.ignore_msrs=1".
-    Some 3rd party applications/tools like 3DMark which including special 
DirectX12 feature test ,it will trigger Guest VM GPU reset.
-    In corner case, Linux Guest VM may GPU hang while running special 
Intel-GPU-Tools test case on it.
-    In corner case, for live migration feature, the fake GPU reset happening 
while migrating Linux Guest VM which Guest VM running 3D workload.  
-    Guest Windows VM often GPU hang while the IOMMU feature enabled. This 
issues happens only on a few Broadwell platforms due to the hardware problem. 
The workaround is to turn off the integrated graphics engine on IOMMU in hot 
grub file by adding "intel_iommu=igfx_off".


Setup guide:
https://github.com/01org/gvt-linux/wiki/GVTg_Setup_Guide


This is the first GVT-g community release based on new Upstream architecture 
design, refer to the following document for new architecture introduction:
https://01.org/igvt-g/documentation/intel-gvt-g-new-architecture-introduction 


Please subscribe to join the mailing list if you want to learn more about GVT-g 
project: 
https://lists.01.org/mailman/listinfo/igvt-g
Please subscribe to join the mailing list if you want to contribute/review 
latest GVT-g upstream patches:
https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev


Official GVT-g portal:
https://01.org/igvt-g


More information about background, architecture and others about Intel GVT-g, 
can be found at:
http://www.linux-kvm.org/images/f/f3/01x08b-KVMGT-a.pdf
https://www.usenix.org/conference/atc14/technical-sessions/presentation/tian


Note:
The KVMGT project should be considered a work in progress. As such it is not a 
complete product nor should it be considered one. Extra care should be taken 
when testing and configuring a system to use the KVMGT project.



Thanks
Terrence
Tel: +86-21-6116 5390
MP: +86-1356 4367 024
Mail: terrence...@intel.com

___
GVT-g mailing list
igv...@lists.01.org
https://lists.01.org/mailman/listinfo/igvt-g

___
Intel-gfx mailing list
intel-...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [GVT-g] [ANNOUNCE] 2017-Q3 release of KVMGT (Intel GVT-g for KVM)

2017-10-10 Thread Xu, Terrence
Hi all,

We are pleased to announce an update of Intel GVT-g for KVM.

Intel GVT-g for KVM (a.k.a. KVMGT) is a full GPU virtualization solution with 
mediated pass-through, starting from 5th generation Intel Core(TM) processors 
with Intel processor graphics.  A virtual GPU instance is maintained for each 
VM, with part of performance critical resources directly assigned. The 
capability of running native graphics driver inside a VM, without hypervisor 
intervention in performance critical paths, achieves a good balance among 
performance, feature, and sharing capability.
    

Repositories:
-    Kernel: https://github.com/01org/gvt-linux/  (tag: 2017-q3-gvt-stable-4.12)
-    Qemu: https://github.com/01org/igvtg-qemu  (tag: 2017-q3-stable-2.9.0)


This update consists of:
-    Kernel version upgraded to 4.12 from 4.11.
-    Live migration feature preliminary supported.
-    QoS feature preliminary supported.
-    IOMMU feature supported.
-    OVMF feature supported.
-    VGPU reset feature optimization, with related issues be fixed.
-    Supported server platforms: Intel(r) Xeon(r) E3_v4, E3_v5 and E3_v6 with 
Intel Graphics processor, the E3_v6 is new supported platform.
-    Supported client platforms: Intel(r) Core(tm) 5th generation (code name: 
Broadwell), 6th generation (code name: Skylake) and 7th generation (code name: 
Kabylake), the 7th generation is new supported platform.
-    Validated Guest OS: Windows7 32bit, Window7 64bit, Windows8.1 64bit, 
Windows10 64bit and Linux.
-    GVT-g only supports remote display not local display by this release. 
-    Remote protocol: only guest-side remoting protocol is supported, host-side 
remoting connection like SPICE is working in progress. For example, user can 
use X11VNC for Guest Linux VM or TightVNC for Guest Windows VM.


Limitation or known issues:
-    GVT-g can support maximum 7 Guest VMs due to host graphics resource 
limitation. When user runs 7 VMs simultaneously, host OS can only run in text 
mode.
-    In order to support Guest Windows7 32bit VM, user can only uses vGPU 
type1, type2, type4 not type8 because Guest Windows7 32bit VM needs more 
graphics resource than other Guest VM.
-    Some 3rd party applications/tools like GPU_Z, Passmark 9.0 may read/write 
GPU MSR directly, it will trigger Guest VM BSOD since those MSRs are unhandled 
registers in KVMGT. The workaround is to set MSR read /write ignore flag to 1 
in host grub file by adding "kvm.ignore_msrs=1".
-    Some 3rd party applications/tools like 3DMark which including special 
DirectX12 feature test ,it will trigger Guest VM GPU reset.
-    In corner case, Linux Guest VM may GPU hang while running special 
Intel-GPU-Tools test case on it.
-    In corner case, for live migration feature, the fake GPU reset happening 
while migrating Linux Guest VM which Guest VM running 3D workload.  
-    Guest Windows VM often GPU hang while the IOMMU feature enabled. This 
issues happens only on a few Broadwell platforms due to the hardware problem. 
The workaround is to turn off the integrated graphics engine on IOMMU in hot 
grub file by adding "intel_iommu=igfx_off".


Setup guide:
https://github.com/01org/gvt-linux/wiki/GVTg_Setup_Guide


This is the first GVT-g community release based on new Upstream architecture 
design, refer to the following document for new architecture introduction:
https://01.org/igvt-g/documentation/intel-gvt-g-new-architecture-introduction 


Please subscribe to join the mailing list if you want to learn more about GVT-g 
project: 
https://lists.01.org/mailman/listinfo/igvt-g
Please subscribe to join the mailing list if you want to contribute/review 
latest GVT-g upstream patches:
https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev


Official GVT-g portal:
https://01.org/igvt-g


More information about background, architecture and others about Intel GVT-g, 
can be found at:
http://www.linux-kvm.org/images/f/f3/01x08b-KVMGT-a.pdf
https://www.usenix.org/conference/atc14/technical-sessions/presentation/tian


Note:
The KVMGT project should be considered a work in progress. As such it is not a 
complete product nor should it be considered one. Extra care should be taken 
when testing and configuring a system to use the KVMGT project.



Thanks
Terrence
Tel: +86-21-6116 5390
MP: +86-1356 4367 024
Mail: terrence...@intel.com

___
GVT-g mailing list
igv...@lists.01.org
https://lists.01.org/mailman/listinfo/igvt-g

___
Intel-gfx mailing list
intel-...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[git pull] vfs.git fixes

2017-10-10 Thread Al Viro
Fairly old DIO bug caught by Andreas (3.10+) and several slightly
younger blk_rq_map_user_iov() bugs, both on map and copy codepaths (Vitaly
and me).

The following changes since commit 8a5776a5f49812d29fe4b2d0a2d71675c3facf3f:

  Linux 4.14-rc4 (2017-10-08 20:53:29 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git for-linus

for you to fetch changes up to 1cfd0ddd82232804e03f3023f6a58b50dfef0574:

  bio_copy_user_iov(): don't ignore ->iov_offset (2017-10-10 23:55:14 -0400)


Al Viro (2):
  more bio_map_user_iov() leak fixes
  bio_copy_user_iov(): don't ignore ->iov_offset

Andreas Gruenbacher (1):
  direct-io: Prevent NULL pointer access in submit_page_section

Vitaly Mayatskikh (1):
  fix unbalanced page refcounting in bio_map_user_iov

 block/bio.c| 26 +++---
 fs/direct-io.c |  3 ++-
 2 files changed, 21 insertions(+), 8 deletions(-)


[git pull] vfs.git fixes

2017-10-10 Thread Al Viro
Fairly old DIO bug caught by Andreas (3.10+) and several slightly
younger blk_rq_map_user_iov() bugs, both on map and copy codepaths (Vitaly
and me).

The following changes since commit 8a5776a5f49812d29fe4b2d0a2d71675c3facf3f:

  Linux 4.14-rc4 (2017-10-08 20:53:29 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git for-linus

for you to fetch changes up to 1cfd0ddd82232804e03f3023f6a58b50dfef0574:

  bio_copy_user_iov(): don't ignore ->iov_offset (2017-10-10 23:55:14 -0400)


Al Viro (2):
  more bio_map_user_iov() leak fixes
  bio_copy_user_iov(): don't ignore ->iov_offset

Andreas Gruenbacher (1):
  direct-io: Prevent NULL pointer access in submit_page_section

Vitaly Mayatskikh (1):
  fix unbalanced page refcounting in bio_map_user_iov

 block/bio.c| 26 +++---
 fs/direct-io.c |  3 ++-
 2 files changed, 21 insertions(+), 8 deletions(-)


Re: [PATCH] powerpc/perf: Fix IMC initialization crash

2017-10-10 Thread Madhavan Srinivasan



On Wednesday 11 October 2017 09:41 AM, Michael Ellerman wrote:

Anju T Sudhakar  writes:


Call trace observed with latest firmware, and upstream kernel.

[   14.499938] NIP [c00f318c] init_imc_pmu+0x8c/0xcf0
[   14.499973] LR [c00f33f8] init_imc_pmu+0x2f8/0xcf0
[   14.57] Call Trace:
[   14.500027] [c03fed18f710] [c00f33c8] init_imc_pmu+0x2c8/0xcf0 
(unreliable)
[   14.500080] [c03fed18f800] [c00b5ec0] 
opal_imc_counters_probe+0x300/0x400
[   14.500132] [c03fed18f900] [c0807ef4] 
platform_drv_probe+0x64/0x110
[   14.500185] [c03fed18f980] [c0804b58] 
driver_probe_device+0x3d8/0x580
[   14.500236] [c03fed18fa10] [c0804e4c] __driver_attach+0x14c/0x1a0
[   14.500302] [c03fed18fa90] [c080156c] bus_for_each_dev+0x8c/0xf0
[   14.500353] [c03fed18fae0] [c0803fa4] driver_attach+0x34/0x50
[   14.500397] [c03fed18fb00] [c0803688] bus_add_driver+0x298/0x350
[   14.500449] [c03fed18fb90] [c080605c] driver_register+0x9c/0x180
[   14.500500] [c03fed18fc00] [c0807dec] 
__platform_driver_register+0x5c/0x70
[   14.500552] [c03fed18fc20] [c101cee0] 
opal_imc_driver_init+0x2c/0x40
[   14.500603] [c03fed18fc40] [c000d084] do_one_initcall+0x64/0x1d0
[   14.500654] [c03fed18fd00] [c100434c] 
kernel_init_freeable+0x280/0x374
[   14.500705] [c03fed18fdc0] [c000d314] kernel_init+0x24/0x160
[   14.500750] [c03fed18fe30] [c000b4e8] 
ret_from_kernel_thread+0x5c/0x74
[   14.500799] Instruction dump:
[   14.500827] 4082024c 2f890002 419e054c 2e890003 41960094 2e890001 3ba0ffea 
419602d8
[   14.500884] 419e0290 2f890003 419e02a8 e93e0118  2fa3 419e0010 
4827ba41
[   14.500945] ---[ end trace 27b734ad26f1add4 ]---
[   15.908719]
[   16.908869] Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007
[   16.908869]
[   18.125813] ---[ end Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007]

While registering nest imc at init, cpu-hotplug callback 
`nest_pmu_cpumask_init()`
makes an opal call to stop the engine. And if the OPAL call fails,
imc_common_cpuhp_mem_free() is invoked to cleanup memory and cpuhotplug setup.

But when cleaning up the attribute group, we were dereferencing the attribute
element array without checking whether the backing element is not NULL. This
causes the kernel panic.

Factor out the memory freeing part from imc_common_cpuhp_mem_free() to handle
the failing case gracefully.

Signed-off-by: Anju T Sudhakar 
Reported-by: Pridhiviraj Paidipeddi 
---
  arch/powerpc/perf/imc-pmu.c | 23 ---
  1 file changed, 16 insertions(+), 7 deletions(-)

It's the week before rc5, so I'd really like just the absolute minimal
fix. There's sufficient code movement here that I can't even immediately
see where the bug fix is.

mpe,

We have just re-factored the code to handle the memory freeing and fixed 
a leak.

This is minimal fix. And there are no risks in taking this in.

Reviewed-by: Madhavan Srinivasan 

Maddy



cheers





Re: [PATCH] powerpc/perf: Fix IMC initialization crash

2017-10-10 Thread Madhavan Srinivasan



On Wednesday 11 October 2017 09:41 AM, Michael Ellerman wrote:

Anju T Sudhakar  writes:


Call trace observed with latest firmware, and upstream kernel.

[   14.499938] NIP [c00f318c] init_imc_pmu+0x8c/0xcf0
[   14.499973] LR [c00f33f8] init_imc_pmu+0x2f8/0xcf0
[   14.57] Call Trace:
[   14.500027] [c03fed18f710] [c00f33c8] init_imc_pmu+0x2c8/0xcf0 
(unreliable)
[   14.500080] [c03fed18f800] [c00b5ec0] 
opal_imc_counters_probe+0x300/0x400
[   14.500132] [c03fed18f900] [c0807ef4] 
platform_drv_probe+0x64/0x110
[   14.500185] [c03fed18f980] [c0804b58] 
driver_probe_device+0x3d8/0x580
[   14.500236] [c03fed18fa10] [c0804e4c] __driver_attach+0x14c/0x1a0
[   14.500302] [c03fed18fa90] [c080156c] bus_for_each_dev+0x8c/0xf0
[   14.500353] [c03fed18fae0] [c0803fa4] driver_attach+0x34/0x50
[   14.500397] [c03fed18fb00] [c0803688] bus_add_driver+0x298/0x350
[   14.500449] [c03fed18fb90] [c080605c] driver_register+0x9c/0x180
[   14.500500] [c03fed18fc00] [c0807dec] 
__platform_driver_register+0x5c/0x70
[   14.500552] [c03fed18fc20] [c101cee0] 
opal_imc_driver_init+0x2c/0x40
[   14.500603] [c03fed18fc40] [c000d084] do_one_initcall+0x64/0x1d0
[   14.500654] [c03fed18fd00] [c100434c] 
kernel_init_freeable+0x280/0x374
[   14.500705] [c03fed18fdc0] [c000d314] kernel_init+0x24/0x160
[   14.500750] [c03fed18fe30] [c000b4e8] 
ret_from_kernel_thread+0x5c/0x74
[   14.500799] Instruction dump:
[   14.500827] 4082024c 2f890002 419e054c 2e890003 41960094 2e890001 3ba0ffea 
419602d8
[   14.500884] 419e0290 2f890003 419e02a8 e93e0118  2fa3 419e0010 
4827ba41
[   14.500945] ---[ end trace 27b734ad26f1add4 ]---
[   15.908719]
[   16.908869] Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007
[   16.908869]
[   18.125813] ---[ end Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007]

While registering nest imc at init, cpu-hotplug callback 
`nest_pmu_cpumask_init()`
makes an opal call to stop the engine. And if the OPAL call fails,
imc_common_cpuhp_mem_free() is invoked to cleanup memory and cpuhotplug setup.

But when cleaning up the attribute group, we were dereferencing the attribute
element array without checking whether the backing element is not NULL. This
causes the kernel panic.

Factor out the memory freeing part from imc_common_cpuhp_mem_free() to handle
the failing case gracefully.

Signed-off-by: Anju T Sudhakar 
Reported-by: Pridhiviraj Paidipeddi 
---
  arch/powerpc/perf/imc-pmu.c | 23 ---
  1 file changed, 16 insertions(+), 7 deletions(-)

It's the week before rc5, so I'd really like just the absolute minimal
fix. There's sufficient code movement here that I can't even immediately
see where the bug fix is.

mpe,

We have just re-factored the code to handle the memory freeing and fixed 
a leak.

This is minimal fix. And there are no risks in taking this in.

Reviewed-by: Madhavan Srinivasan 

Maddy



cheers





Re: BUG: KASAN: global-out-of-bounds in strscpy+0x807/0x970

2017-10-10 Thread Tyler Hicks
On 10/10/2017 10:32 PM, Simon Brewer wrote:
> Hint start looking at this thread. https://lkml.org/lkml/2017/7/18/874
> 
> Summary: strscpy and KASAN are currently incompatible.  strscpy does a
> 64 bit speculative fetch on a char pointer (for efficiency reasons). 
> KASAN spots this and flags an error.

Thanks, Simon. I had already reviewed the loop in
seccomp_names_from_actions_logged() and couldn't spot an issue so my
next step was to take a look at strscpy() itself. Your reply was well
timed. :)

@Kees, this is a false positive. I picked strscpy() because of its sane
return codes for easy error handling but its word-at-a-time complexity
is overkill for this sysctl. Are you alright with this KASAN false
positive or would you like me to change over to strlcpy()?

Tyler

> 
> On 11 October 2017 at 12:46, Jakub Kicinski  > wrote:
> 
> On Tue, 10 Oct 2017 21:44:01 -0400, Tyler Hicks wrote:
> > On 10/10/2017 09:28 PM, Jakub Kicinski wrote:
> > > I'm hitting this on sysctl -a with net-next (4.14-rc4).
> >
> > Hey Jakub - thanks for the bug report!
> >
> > >
> > > I saw that seccomp_actions_logged_handler was introduced
> > > not-so-long-ago by Tyler, is there a fix for this?
> >
> > No, this is the first I've heard of it. I'll have a look.
> 
> Thanks! :)
> 
> 




signature.asc
Description: OpenPGP digital signature


Re: BUG: KASAN: global-out-of-bounds in strscpy+0x807/0x970

2017-10-10 Thread Tyler Hicks
On 10/10/2017 10:32 PM, Simon Brewer wrote:
> Hint start looking at this thread. https://lkml.org/lkml/2017/7/18/874
> 
> Summary: strscpy and KASAN are currently incompatible.  strscpy does a
> 64 bit speculative fetch on a char pointer (for efficiency reasons). 
> KASAN spots this and flags an error.

Thanks, Simon. I had already reviewed the loop in
seccomp_names_from_actions_logged() and couldn't spot an issue so my
next step was to take a look at strscpy() itself. Your reply was well
timed. :)

@Kees, this is a false positive. I picked strscpy() because of its sane
return codes for easy error handling but its word-at-a-time complexity
is overkill for this sysctl. Are you alright with this KASAN false
positive or would you like me to change over to strlcpy()?

Tyler

> 
> On 11 October 2017 at 12:46, Jakub Kicinski  > wrote:
> 
> On Tue, 10 Oct 2017 21:44:01 -0400, Tyler Hicks wrote:
> > On 10/10/2017 09:28 PM, Jakub Kicinski wrote:
> > > I'm hitting this on sysctl -a with net-next (4.14-rc4).
> >
> > Hey Jakub - thanks for the bug report!
> >
> > >
> > > I saw that seccomp_actions_logged_handler was introduced
> > > not-so-long-ago by Tyler, is there a fix for this?
> >
> > No, this is the first I've heard of it. I'll have a look.
> 
> Thanks! :)
> 
> 




signature.asc
Description: OpenPGP digital signature


Re: [PATCH v15 4/7] drm/i915/gvt: Add opregion support

2017-10-10 Thread Du, Changbin
On Tue, Oct 10, 2017 at 05:50:04PM +0800, Tina Zhang wrote:
> Windows guest driver needs vbt in opregion, to configure the setting
> for display. Without opregion support, the display registers won't
> be set and this blocks display model to get the correct information
> of the guest display plane.
> 
> This patch is to provide a virtual opregion for guest. Current
> implementation is to fill the virtual opregion with the content in
> host's opregion. The original author of this patch is Xiaoguang Chen.
> 
> Signed-off-by: Bing Niu 
> Signed-off-by: Tina Zhang 
> ---
>  drivers/gpu/drm/i915/gvt/hypercall.h |   1 +
>  drivers/gpu/drm/i915/gvt/kvmgt.c | 109 
> ++-
>  drivers/gpu/drm/i915/gvt/mpt.h   |  15 +
>  drivers/gpu/drm/i915/gvt/opregion.c  |  26 +++--
>  drivers/gpu/drm/i915/gvt/vgpu.c  |   4 ++
>  5 files changed, 146 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h 
> b/drivers/gpu/drm/i915/gvt/hypercall.h
> index df7f33a..32c345c 100644
> --- a/drivers/gpu/drm/i915/gvt/hypercall.h
> +++ b/drivers/gpu/drm/i915/gvt/hypercall.h
> @@ -55,6 +55,7 @@ struct intel_gvt_mpt {
> unsigned long mfn, unsigned int nr, bool map);
>   int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
>bool map);
> + int (*set_opregion)(void *vgpu);
Seems we try to hide struct intel_vgpu for kvmgt, but acctually kvmgt already
use it. So set type as struct intel_vgpu directly? I am not sure about xengt,
but the code shows that 'handle' is correct thing?

>  };
>  
>  extern struct intel_gvt_mpt xengt_mpt;
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c 
> b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index fd0c85f..6b0a330 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -53,11 +53,23 @@ static const struct intel_gvt_ops *intel_gvt_ops;
>  #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << 
> VFIO_PCI_OFFSET_SHIFT)
>  #define VFIO_PCI_OFFSET_MASK(((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
>  
> +#define OPREGION_SIGNATURE "IntelGraphicsMem"
> +
> +struct vfio_region;
> +struct intel_vgpu_regops {
> + size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
> + size_t count, loff_t *ppos, bool iswrite);
> + void (*release)(struct intel_vgpu *vgpu,
> + struct vfio_region *region);
> +};
> +
>  struct vfio_region {
>   u32 type;
>   u32 subtype;
>   size_t  size;
>   u32 flags;
> + const struct intel_vgpu_regops  *ops;
> + void*data;
>  };
>  
>  struct kvmgt_pgfn {
> @@ -430,6 +442,91 @@ static void kvmgt_protect_table_del(struct 
> kvmgt_guest_info *info,
>   }
>  }
>  
> +static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
> + size_t count, loff_t *ppos, bool iswrite)
Personally I think intel_vgpu_rw_opregion() is better. :)

> +{
> + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
> + VFIO_PCI_NUM_REGIONS;
> + void *base = vgpu->vdev.region[i].data;
> + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
> +
> + if (pos >= vgpu->vdev.region[i].size || iswrite) {
> + gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
> + return -EINVAL;
> + }
> + count = min(count, (size_t)(vgpu->vdev.region[i].size - pos));
> + memcpy(buf, base + pos, count);
> +
> + return count;
> +}
> +
> +static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
> + struct vfio_region *region)
> +{
> + memunmap(region->data);
> +}
> +
> +static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
> + .rw = intel_vgpu_reg_rw_opregion,
> + .release = intel_vgpu_reg_release_opregion,
> +};
> +
> +static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
Maybe full name xx_register_region? coufusing between a register and region.

> + unsigned int type, unsigned int subtype,
> + const struct intel_vgpu_regops *ops,
> + size_t size, u32 flags, void *data)
> +{
> + struct vfio_region *region;
> +
> + region = krealloc(vgpu->vdev.region,
> + (vgpu->vdev.num_regions + 1) * sizeof(*region),
> + GFP_KERNEL);
> + if (!region)
> + return -ENOMEM;
> +
> + vgpu->vdev.region = region;
> + vgpu->vdev.region[vgpu->vdev.num_regions].type = type;
> + vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype;
> + vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops;
> + vgpu->vdev.region[vgpu->vdev.num_regions].size = size;
> + vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags;
> + vgpu->vdev.region[vgpu->vdev.num_regions].data = data;
> 

Re: [PATCH v15 4/7] drm/i915/gvt: Add opregion support

2017-10-10 Thread Du, Changbin
On Tue, Oct 10, 2017 at 05:50:04PM +0800, Tina Zhang wrote:
> Windows guest driver needs vbt in opregion, to configure the setting
> for display. Without opregion support, the display registers won't
> be set and this blocks display model to get the correct information
> of the guest display plane.
> 
> This patch is to provide a virtual opregion for guest. Current
> implementation is to fill the virtual opregion with the content in
> host's opregion. The original author of this patch is Xiaoguang Chen.
> 
> Signed-off-by: Bing Niu 
> Signed-off-by: Tina Zhang 
> ---
>  drivers/gpu/drm/i915/gvt/hypercall.h |   1 +
>  drivers/gpu/drm/i915/gvt/kvmgt.c | 109 
> ++-
>  drivers/gpu/drm/i915/gvt/mpt.h   |  15 +
>  drivers/gpu/drm/i915/gvt/opregion.c  |  26 +++--
>  drivers/gpu/drm/i915/gvt/vgpu.c  |   4 ++
>  5 files changed, 146 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h 
> b/drivers/gpu/drm/i915/gvt/hypercall.h
> index df7f33a..32c345c 100644
> --- a/drivers/gpu/drm/i915/gvt/hypercall.h
> +++ b/drivers/gpu/drm/i915/gvt/hypercall.h
> @@ -55,6 +55,7 @@ struct intel_gvt_mpt {
> unsigned long mfn, unsigned int nr, bool map);
>   int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
>bool map);
> + int (*set_opregion)(void *vgpu);
Seems we try to hide struct intel_vgpu for kvmgt, but acctually kvmgt already
use it. So set type as struct intel_vgpu directly? I am not sure about xengt,
but the code shows that 'handle' is correct thing?

>  };
>  
>  extern struct intel_gvt_mpt xengt_mpt;
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c 
> b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index fd0c85f..6b0a330 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -53,11 +53,23 @@ static const struct intel_gvt_ops *intel_gvt_ops;
>  #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << 
> VFIO_PCI_OFFSET_SHIFT)
>  #define VFIO_PCI_OFFSET_MASK(((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
>  
> +#define OPREGION_SIGNATURE "IntelGraphicsMem"
> +
> +struct vfio_region;
> +struct intel_vgpu_regops {
> + size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
> + size_t count, loff_t *ppos, bool iswrite);
> + void (*release)(struct intel_vgpu *vgpu,
> + struct vfio_region *region);
> +};
> +
>  struct vfio_region {
>   u32 type;
>   u32 subtype;
>   size_t  size;
>   u32 flags;
> + const struct intel_vgpu_regops  *ops;
> + void*data;
>  };
>  
>  struct kvmgt_pgfn {
> @@ -430,6 +442,91 @@ static void kvmgt_protect_table_del(struct 
> kvmgt_guest_info *info,
>   }
>  }
>  
> +static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
> + size_t count, loff_t *ppos, bool iswrite)
Personally I think intel_vgpu_rw_opregion() is better. :)

> +{
> + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
> + VFIO_PCI_NUM_REGIONS;
> + void *base = vgpu->vdev.region[i].data;
> + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
> +
> + if (pos >= vgpu->vdev.region[i].size || iswrite) {
> + gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
> + return -EINVAL;
> + }
> + count = min(count, (size_t)(vgpu->vdev.region[i].size - pos));
> + memcpy(buf, base + pos, count);
> +
> + return count;
> +}
> +
> +static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
> + struct vfio_region *region)
> +{
> + memunmap(region->data);
> +}
> +
> +static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
> + .rw = intel_vgpu_reg_rw_opregion,
> + .release = intel_vgpu_reg_release_opregion,
> +};
> +
> +static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
Maybe full name xx_register_region? coufusing between a register and region.

> + unsigned int type, unsigned int subtype,
> + const struct intel_vgpu_regops *ops,
> + size_t size, u32 flags, void *data)
> +{
> + struct vfio_region *region;
> +
> + region = krealloc(vgpu->vdev.region,
> + (vgpu->vdev.num_regions + 1) * sizeof(*region),
> + GFP_KERNEL);
> + if (!region)
> + return -ENOMEM;
> +
> + vgpu->vdev.region = region;
> + vgpu->vdev.region[vgpu->vdev.num_regions].type = type;
> + vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype;
> + vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops;
> + vgpu->vdev.region[vgpu->vdev.num_regions].size = size;
> + vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags;
> + vgpu->vdev.region[vgpu->vdev.num_regions].data = data;
> + vgpu->vdev.num_regions++;
> +
> +

Re: [alsa-devel] [Patch v6 2/7] slimbus: Add messaging APIs to slimbus framework

2017-10-10 Thread Vinod Koul
On Fri, Oct 06, 2017 at 05:51:31PM +0200, srinivas.kandaga...@linaro.org wrote:

>   mutex_init(>m_ctrl);
> + spin_lock_init(>tx.lock);
> + spin_lock_init(>rx.lock);

locks galore :) My assumption is that you want to optimize these? But given
that audio user is going to be serialized do we practically need two locks?

> +
> + ctrl->pending_wr = kcalloc((ctrl->tx.n - 1),
> +sizeof(struct slim_pending),
> +GFP_KERNEL);
> + if (!ctrl->pending_wr) {
> + ret = -ENOMEM;
> + goto wr_alloc_failed;
> + }
> +
> + sema_init(>tx_sem, (ctrl->tx.n - 1));

i though v5 comment from Arnd was not to use semaphores..

> +/* Copyright (c) 2011-2016, The Linux Foundation. All rights reserved.

2017?

> +int slim_processtxn(struct slim_controller *ctrl,

slim_process_txn seems more readable to me

> + struct slim_msg_txn *txn)
> +{
> + int ret, i = 0;
> + unsigned long flags;
> + u8 *buf;
> + bool async = false;
> + struct slim_cb_data cbd;
> + DECLARE_COMPLETION_ONSTACK(done);
> + bool need_tid = slim_tid_txn(txn->mt, txn->mc);
> +
> + if (!txn->msg->comp_cb) {
> + txn->msg->comp_cb = slim_sync_default_cb;
> + cbd.comp = 
> + txn->msg->ctx = 
> + } else {
> + async = true;
> + }
> +
> + buf = slim_get_tx(ctrl, txn, need_tid);
> + if (!buf)
> + return -ENOMEM;
> +
> + if (need_tid) {
> + spin_lock_irqsave(>txn_lock, flags);
> + for (i = 0; i < ctrl->last_tid; i++) {
> + if (ctrl->tid_tbl[i] == NULL)
> + break;
> + }
> + if (i >= ctrl->last_tid) {
> + if (ctrl->last_tid == (SLIM_MAX_TIDS - 1)) {
> + spin_unlock_irqrestore(>txn_lock, flags);
> + slim_return_tx(ctrl, -ENOMEM);
> + return -ENOMEM;
> + }
> + ctrl->last_tid++;
> + }
> + ctrl->tid_tbl[i] = txn->msg;
> + txn->tid = i;
> + spin_unlock_irqrestore(>txn_lock, flags);
> + }
> +
> + ret = ctrl->xfer_msg(ctrl, txn, buf);
> +
> + if (!ret && !async) { /* sync transaction */
> + /* Fine-tune calculation after bandwidth management */
> + unsigned long ms = txn->rl + 100;
> +
> + ret = wait_for_completion_timeout(,
> +   msecs_to_jiffies(ms));
> + if (!ret)
> + slim_return_tx(ctrl, -ETIMEDOUT);
> +
> + ret = cbd.ret;
> + }
> +
> + if (ret && need_tid) {
> + spin_lock_irqsave(>txn_lock, flags);
> + /* Invalidate the transaction */
> + ctrl->tid_tbl[txn->tid] = NULL;
> + spin_unlock_irqrestore(>txn_lock, flags);
> + }
> + if (ret)
> + dev_err(>dev, "Tx:MT:0x%x, MC:0x%x, LA:0x%x failed:%d\n",
> + txn->mt, txn->mc, txn->la, ret);
> + if (!async) {
> + txn->msg->comp_cb = NULL;
> + txn->msg->ctx = NULL;
> + }
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(slim_processtxn);

that is interesting, I was expecting this to be internal API. So users are
expected to use this which is not very convenient IMO. Can we hide the gory
details and give users simple tx/rx or read/write APIs. FWIW most of the
usage would be thru regmap where people would call regmap_read/write()

> +
> +static int slim_val_inf_sanity(struct slim_controller *ctrl,
> +struct slim_val_inf *msg, u8 mc)
> +{
> + if (!msg || msg->num_bytes > 16 ||
> + (msg->start_offset + msg->num_bytes) > 0xC00)
> + goto reterr;

line break here

> + switch (mc) {
> + case SLIM_MSG_MC_REQUEST_VALUE:
> + case SLIM_MSG_MC_REQUEST_INFORMATION:

what does MC refer to?

> + if (msg->rbuf != NULL)
> + return 0;
> + break;

after each break too

> +static u16 slim_slicecodefromsize(u16 req)

hmmm Linux code doesnt prefernamesnames like this :)

> +EXPORT_SYMBOL_GPL(slim_request_inf_element);
> +
> +

unnecessary double space

> +struct slim_val_inf {
> + u16 start_offset;
> + u8  num_bytes;
> + u8  *rbuf;
> + const u8*wbuf;

can we do read and write, if not it can be a buf which maybe rbuf or wbug
based on type

-- 
~Vinod


Re: [alsa-devel] [Patch v6 2/7] slimbus: Add messaging APIs to slimbus framework

2017-10-10 Thread Vinod Koul
On Fri, Oct 06, 2017 at 05:51:31PM +0200, srinivas.kandaga...@linaro.org wrote:

>   mutex_init(>m_ctrl);
> + spin_lock_init(>tx.lock);
> + spin_lock_init(>rx.lock);

locks galore :) My assumption is that you want to optimize these? But given
that audio user is going to be serialized do we practically need two locks?

> +
> + ctrl->pending_wr = kcalloc((ctrl->tx.n - 1),
> +sizeof(struct slim_pending),
> +GFP_KERNEL);
> + if (!ctrl->pending_wr) {
> + ret = -ENOMEM;
> + goto wr_alloc_failed;
> + }
> +
> + sema_init(>tx_sem, (ctrl->tx.n - 1));

i though v5 comment from Arnd was not to use semaphores..

> +/* Copyright (c) 2011-2016, The Linux Foundation. All rights reserved.

2017?

> +int slim_processtxn(struct slim_controller *ctrl,

slim_process_txn seems more readable to me

> + struct slim_msg_txn *txn)
> +{
> + int ret, i = 0;
> + unsigned long flags;
> + u8 *buf;
> + bool async = false;
> + struct slim_cb_data cbd;
> + DECLARE_COMPLETION_ONSTACK(done);
> + bool need_tid = slim_tid_txn(txn->mt, txn->mc);
> +
> + if (!txn->msg->comp_cb) {
> + txn->msg->comp_cb = slim_sync_default_cb;
> + cbd.comp = 
> + txn->msg->ctx = 
> + } else {
> + async = true;
> + }
> +
> + buf = slim_get_tx(ctrl, txn, need_tid);
> + if (!buf)
> + return -ENOMEM;
> +
> + if (need_tid) {
> + spin_lock_irqsave(>txn_lock, flags);
> + for (i = 0; i < ctrl->last_tid; i++) {
> + if (ctrl->tid_tbl[i] == NULL)
> + break;
> + }
> + if (i >= ctrl->last_tid) {
> + if (ctrl->last_tid == (SLIM_MAX_TIDS - 1)) {
> + spin_unlock_irqrestore(>txn_lock, flags);
> + slim_return_tx(ctrl, -ENOMEM);
> + return -ENOMEM;
> + }
> + ctrl->last_tid++;
> + }
> + ctrl->tid_tbl[i] = txn->msg;
> + txn->tid = i;
> + spin_unlock_irqrestore(>txn_lock, flags);
> + }
> +
> + ret = ctrl->xfer_msg(ctrl, txn, buf);
> +
> + if (!ret && !async) { /* sync transaction */
> + /* Fine-tune calculation after bandwidth management */
> + unsigned long ms = txn->rl + 100;
> +
> + ret = wait_for_completion_timeout(,
> +   msecs_to_jiffies(ms));
> + if (!ret)
> + slim_return_tx(ctrl, -ETIMEDOUT);
> +
> + ret = cbd.ret;
> + }
> +
> + if (ret && need_tid) {
> + spin_lock_irqsave(>txn_lock, flags);
> + /* Invalidate the transaction */
> + ctrl->tid_tbl[txn->tid] = NULL;
> + spin_unlock_irqrestore(>txn_lock, flags);
> + }
> + if (ret)
> + dev_err(>dev, "Tx:MT:0x%x, MC:0x%x, LA:0x%x failed:%d\n",
> + txn->mt, txn->mc, txn->la, ret);
> + if (!async) {
> + txn->msg->comp_cb = NULL;
> + txn->msg->ctx = NULL;
> + }
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(slim_processtxn);

that is interesting, I was expecting this to be internal API. So users are
expected to use this which is not very convenient IMO. Can we hide the gory
details and give users simple tx/rx or read/write APIs. FWIW most of the
usage would be thru regmap where people would call regmap_read/write()

> +
> +static int slim_val_inf_sanity(struct slim_controller *ctrl,
> +struct slim_val_inf *msg, u8 mc)
> +{
> + if (!msg || msg->num_bytes > 16 ||
> + (msg->start_offset + msg->num_bytes) > 0xC00)
> + goto reterr;

line break here

> + switch (mc) {
> + case SLIM_MSG_MC_REQUEST_VALUE:
> + case SLIM_MSG_MC_REQUEST_INFORMATION:

what does MC refer to?

> + if (msg->rbuf != NULL)
> + return 0;
> + break;

after each break too

> +static u16 slim_slicecodefromsize(u16 req)

hmmm Linux code doesnt prefernamesnames like this :)

> +EXPORT_SYMBOL_GPL(slim_request_inf_element);
> +
> +

unnecessary double space

> +struct slim_val_inf {
> + u16 start_offset;
> + u8  num_bytes;
> + u8  *rbuf;
> + const u8*wbuf;

can we do read and write, if not it can be a buf which maybe rbuf or wbug
based on type

-- 
~Vinod


Re: [PATCH 2/3] watchdog: orion: don't enable rstout if an interrupt is configured

2017-10-10 Thread Chris Packham
On 11/10/17 16:42, Guenter Roeck wrote:
> On 10/10/2017 07:29 PM, Chris Packham wrote:
>> The orion_wdt_irq invokes panic() so we are going to reset the CPU
>> regardless.  By not setting this bit we get a chance to gather debug
>> from the panic output before the system is reset.
>>
>> Signed-off-by: Chris Packham 
> 
> Unless I am missing something, this assumes that the interrupt is
> handled, ie that the system is not stuck with interrupts disabled.
> This makes the watchdog less reliable. This added verbosity comes
> at a significant cost. I'd like to get input from others if this
> is acceptable.
> 
> That would be different if there was a means to configure a pretimeout,
> ie a means to tell the system to generate an irq first, followed by a
> hard reset if the interrupt is not served. that does not seem to be
> the case here, though.
> 

As far as I can tell there is no pretimeout capability in the orion 
/armada WDT. I have got a work-in-progress patch that enables the RSTOUT 
in the interrupt handler which some-what mitigates your concern but 
still requires the interrupt to be handled at least once.

Another option would be to use one of the spare global timers to provide 
the interrupt-driven aspect.

Of course if the irq isn't specified then the existing behaviour is 
retained which would make the 3/3 patch of this series the debatable part.


> Guenter
> 
>> ---
>>drivers/watchdog/orion_wdt.c | 25 +
>>1 file changed, 17 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
>> index ea676d233e1e..ce88f339ef7f 100644
>> --- a/drivers/watchdog/orion_wdt.c
>> +++ b/drivers/watchdog/orion_wdt.c
>> @@ -71,6 +71,7 @@ struct orion_watchdog {
>>  unsigned long clk_rate;
>>  struct clk *clk;
>>  const struct orion_watchdog_data *data;
>> +int irq;
>>};
>>
>>static int orion_wdt_clock_init(struct platform_device *pdev,
>> @@ -203,9 +204,11 @@ static int armada375_start(struct watchdog_device 
>> *wdt_dev)
>>  dev->data->wdt_enable_bit);
>>
>>  /* Enable reset on watchdog */
>> -reg = readl(dev->rstout);
>> -reg |= dev->data->rstout_enable_bit;
>> -writel(reg, dev->rstout);
>> +if (!dev->irq) {
>> +reg = readl(dev->rstout);
>> +reg |= dev->data->rstout_enable_bit;
>> +writel(reg, dev->rstout);
>> +}
>>
>>  atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit, 0);
>>  return 0;
>> @@ -228,9 +231,12 @@ static int armada370_start(struct watchdog_device 
>> *wdt_dev)
>>  dev->data->wdt_enable_bit);
>>
>>  /* Enable reset on watchdog */
>> -reg = readl(dev->rstout);
>> -reg |= dev->data->rstout_enable_bit;
>> -writel(reg, dev->rstout);
>> +if (!dev->irq) {
>> +reg = readl(dev->rstout);
>> +reg |= dev->data->rstout_enable_bit;
>> +writel(reg, dev->rstout);
>> +}
>> +
>>  return 0;
>>}
>>
>> @@ -247,8 +253,9 @@ static int orion_start(struct watchdog_device *wdt_dev)
>>  dev->data->wdt_enable_bit);
>>
>>  /* Enable reset on watchdog */
>> -atomic_io_modify(dev->rstout, dev->data->rstout_enable_bit,
>> -  dev->data->rstout_enable_bit);
>> +if (!dev->irq)
>> +atomic_io_modify(dev->rstout, dev->data->rstout_enable_bit,
>> +  dev->data->rstout_enable_bit);
>>
>>  return 0;
>>}
>> @@ -595,6 +602,8 @@ static int orion_wdt_probe(struct platform_device *pdev)
>>  dev_err(>dev, "failed to request IRQ\n");
>>  goto disable_clk;
>>  }
>> +
>> +dev->irq = irq;
>>  }
>>
>>  watchdog_set_nowayout(>wdt, nowayout);
>>
> 
> 



Re: [PATCH 2/3] watchdog: orion: don't enable rstout if an interrupt is configured

2017-10-10 Thread Chris Packham
On 11/10/17 16:42, Guenter Roeck wrote:
> On 10/10/2017 07:29 PM, Chris Packham wrote:
>> The orion_wdt_irq invokes panic() so we are going to reset the CPU
>> regardless.  By not setting this bit we get a chance to gather debug
>> from the panic output before the system is reset.
>>
>> Signed-off-by: Chris Packham 
> 
> Unless I am missing something, this assumes that the interrupt is
> handled, ie that the system is not stuck with interrupts disabled.
> This makes the watchdog less reliable. This added verbosity comes
> at a significant cost. I'd like to get input from others if this
> is acceptable.
> 
> That would be different if there was a means to configure a pretimeout,
> ie a means to tell the system to generate an irq first, followed by a
> hard reset if the interrupt is not served. that does not seem to be
> the case here, though.
> 

As far as I can tell there is no pretimeout capability in the orion 
/armada WDT. I have got a work-in-progress patch that enables the RSTOUT 
in the interrupt handler which some-what mitigates your concern but 
still requires the interrupt to be handled at least once.

Another option would be to use one of the spare global timers to provide 
the interrupt-driven aspect.

Of course if the irq isn't specified then the existing behaviour is 
retained which would make the 3/3 patch of this series the debatable part.


> Guenter
> 
>> ---
>>drivers/watchdog/orion_wdt.c | 25 +
>>1 file changed, 17 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
>> index ea676d233e1e..ce88f339ef7f 100644
>> --- a/drivers/watchdog/orion_wdt.c
>> +++ b/drivers/watchdog/orion_wdt.c
>> @@ -71,6 +71,7 @@ struct orion_watchdog {
>>  unsigned long clk_rate;
>>  struct clk *clk;
>>  const struct orion_watchdog_data *data;
>> +int irq;
>>};
>>
>>static int orion_wdt_clock_init(struct platform_device *pdev,
>> @@ -203,9 +204,11 @@ static int armada375_start(struct watchdog_device 
>> *wdt_dev)
>>  dev->data->wdt_enable_bit);
>>
>>  /* Enable reset on watchdog */
>> -reg = readl(dev->rstout);
>> -reg |= dev->data->rstout_enable_bit;
>> -writel(reg, dev->rstout);
>> +if (!dev->irq) {
>> +reg = readl(dev->rstout);
>> +reg |= dev->data->rstout_enable_bit;
>> +writel(reg, dev->rstout);
>> +}
>>
>>  atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit, 0);
>>  return 0;
>> @@ -228,9 +231,12 @@ static int armada370_start(struct watchdog_device 
>> *wdt_dev)
>>  dev->data->wdt_enable_bit);
>>
>>  /* Enable reset on watchdog */
>> -reg = readl(dev->rstout);
>> -reg |= dev->data->rstout_enable_bit;
>> -writel(reg, dev->rstout);
>> +if (!dev->irq) {
>> +reg = readl(dev->rstout);
>> +reg |= dev->data->rstout_enable_bit;
>> +writel(reg, dev->rstout);
>> +}
>> +
>>  return 0;
>>}
>>
>> @@ -247,8 +253,9 @@ static int orion_start(struct watchdog_device *wdt_dev)
>>  dev->data->wdt_enable_bit);
>>
>>  /* Enable reset on watchdog */
>> -atomic_io_modify(dev->rstout, dev->data->rstout_enable_bit,
>> -  dev->data->rstout_enable_bit);
>> +if (!dev->irq)
>> +atomic_io_modify(dev->rstout, dev->data->rstout_enable_bit,
>> +  dev->data->rstout_enable_bit);
>>
>>  return 0;
>>}
>> @@ -595,6 +602,8 @@ static int orion_wdt_probe(struct platform_device *pdev)
>>  dev_err(>dev, "failed to request IRQ\n");
>>  goto disable_clk;
>>  }
>> +
>> +dev->irq = irq;
>>  }
>>
>>  watchdog_set_nowayout(>wdt, nowayout);
>>
> 
> 



Re: [PATCH 4.4 00/47] 4.4.92-stable review

2017-10-10 Thread Sumit Semwal
Hi Greg,

On 11 October 2017 at 09:16, Tom Gall  wrote:
>
>> On Oct 10, 2017, at 2:50 PM, Greg Kroah-Hartman  
>> wrote:
>>
>> This is the start of the stable review cycle for the 4.4.92 release.
>> There are 47 patches in this series, all will be posted as a response
>> to this one.  If anyone has any issues with these being applied, please
>> let me know.
>>
>> Responses should be made by Thu Oct 12 19:50:01 UTC 2017.
>> Anything received after that time might be too late.
>>
>> The whole patch series can be found in one patch at:
>>   kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.92-rc1.gz
>> or in the git tree and branch at:
>>  git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
>> linux-4.4.y
>> and the diffstat can be found below.
>>
>> thanks,
>>
>> greg k-h
>>
>
>
> On HiKey (arm64) when running ltp-sched with this rc we’re seeing some sort
> of scheduler issue or  maybe some kind of memory corruption.
>
> Raw output of interest :
>
> https://lkft.validation.linaro.org/scheduler/job/46192#L5291
>
> ltp-sched-tests__url: git://github.com/linux-test-project/ltp.git
> ltp-sched-tests__version: “20170929"
> kernel-config: 
> http://snapshots.linaro.org/openembedded/lkft/morty/hikey/rpb/linaro-hikey-stable-rc-4.4/31/defconfig
> build-location: 
> http://snapshots.linaro.org/openembedded/lkft/morty/hikey/rpb/linaro-hikey-stable-rc-4.4/31

Comparing with other boards in the test setup, we don't see this issue
with x15 (arm), juno (arm64) or x86.

We will investigate to check if any conflicting hikey-specific
(non-lts) patches are causing this, or need adaptation to the
sched/cpuset patch added in this LTS.

This, IMHO, shouldn't cause us to delay this stable release.

Best,
Sumit.


Re: [PATCH 4.4 00/47] 4.4.92-stable review

2017-10-10 Thread Sumit Semwal
Hi Greg,

On 11 October 2017 at 09:16, Tom Gall  wrote:
>
>> On Oct 10, 2017, at 2:50 PM, Greg Kroah-Hartman  
>> wrote:
>>
>> This is the start of the stable review cycle for the 4.4.92 release.
>> There are 47 patches in this series, all will be posted as a response
>> to this one.  If anyone has any issues with these being applied, please
>> let me know.
>>
>> Responses should be made by Thu Oct 12 19:50:01 UTC 2017.
>> Anything received after that time might be too late.
>>
>> The whole patch series can be found in one patch at:
>>   kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.92-rc1.gz
>> or in the git tree and branch at:
>>  git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
>> linux-4.4.y
>> and the diffstat can be found below.
>>
>> thanks,
>>
>> greg k-h
>>
>
>
> On HiKey (arm64) when running ltp-sched with this rc we’re seeing some sort
> of scheduler issue or  maybe some kind of memory corruption.
>
> Raw output of interest :
>
> https://lkft.validation.linaro.org/scheduler/job/46192#L5291
>
> ltp-sched-tests__url: git://github.com/linux-test-project/ltp.git
> ltp-sched-tests__version: “20170929"
> kernel-config: 
> http://snapshots.linaro.org/openembedded/lkft/morty/hikey/rpb/linaro-hikey-stable-rc-4.4/31/defconfig
> build-location: 
> http://snapshots.linaro.org/openembedded/lkft/morty/hikey/rpb/linaro-hikey-stable-rc-4.4/31

Comparing with other boards in the test setup, we don't see this issue
with x15 (arm), juno (arm64) or x86.

We will investigate to check if any conflicting hikey-specific
(non-lts) patches are causing this, or need adaptation to the
sched/cpuset patch added in this LTS.

This, IMHO, shouldn't cause us to delay this stable release.

Best,
Sumit.


RE: [PATCH 01/10] perf record: new interfaces to read ring buffer to file

2017-10-10 Thread Liang, Kan
> >  /* When check_messup is true, 'end' must points to a good entry */
> > static union perf_event *  perf_mmap__read(struct perf_mmap *md, bool
> > check_messup, u64 start, diff --git a/tools/perf/util/evlist.h
> > b/tools/perf/util/evlist.h index b1c14f1..1ce4857 100644
> > --- a/tools/perf/util/evlist.h
> > +++ b/tools/perf/util/evlist.h
> > @@ -39,6 +39,16 @@ struct perf_mmap {
> > char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
> >  };
> >
> > +struct perf_mmap_read {
> > +   struct perf_mmap*md;
> > +   u64 head;
> > +   u64 start;
> > +   u64 end;
> 
> So there will be always a one-on-one association of 'struct perf_mmap_read'
> and 'struct perf_mmap', why not go on adding more fields to 'struct
> perf_mmap' as we need

The fields in 'struct perf_mmap' needs to be recalculated before each reading.
So I put them in a new struct.  

> but not doing it all at once (backward, snapshotting,
> overwrite, etc) but first the simple part, make the most basic mode:
> 
>   perf record -a
> 
>   perf top
> 
> work, multithreaded, leaving the other more complicated modes fallbacking
> to the old format, then when we have it solid, go on getting the other
> features.

Agree. 
When I did perf top optimization, I also tried Namhyung's perf top multi-thread 
patch.
https://lwn.net/Articles/667469/
I think it may be a good start point.

I didn't work on his patch. Because the root cause of bad perf top performance
is non overwrite mode, which generate lots of samples shortly. It exceeds KNL's
computational capability. Multi-threading doesn't help much on this case.
So I tried to use overwrite mode then.

> 
> In the end, having the two formats supported will be needed anyway, and
> we can as well ask for processing with both perf.data file formats to compare
> results, while we strenghten out the new code.
>
> I just think we should do this in a more fine grained way to avoid too much
> code churn as well as having a fallback to the old code, that albeit non
> scalable, is what we have been using and can help in certifying that the new
> one works well, by comparing its outputs.

I already extended the multithreading support for event synthesization in perf
record. 
https://github.com/kliang2/perf.git perf_record_opt
I will send it out for review shortly after rebasing on the latest perf/core.

In the patch series, I realloc buffer for each thread to temporarily keep the
processing result, and write them to the perf.data at the end of event
synthesization. The number of synthesized event is not big (hundreds of
Kilobyte). So I think it should be OK to do that.

Thanks,
Kan
> 
> - Arnaldo
> 
> > +   booloverwrite;
> > +   boolbackward;
> > +   unsigned long   size;
> > +};
> > +
> >  static inline size_t
> >  perf_mmap__mmap_len(struct perf_mmap *map)  { @@ -193,6 +203,11
> @@
> > void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int
> > idx);
> >
> >  void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
> >
> > +int perf_mmap__read_init(struct perf_mmap *md, struct
> perf_mmap_read *read,
> > +bool overwrite, bool backward);
> > +int perf_mmap__read_to_file(struct perf_mmap_read *read,
> > +   struct perf_data_file *file);
> > +
> >  int perf_evlist__open(struct perf_evlist *evlist);  void
> > perf_evlist__close(struct perf_evlist *evlist);
> >
> > --
> > 2.5.5


RE: [PATCH 01/10] perf record: new interfaces to read ring buffer to file

2017-10-10 Thread Liang, Kan
> >  /* When check_messup is true, 'end' must points to a good entry */
> > static union perf_event *  perf_mmap__read(struct perf_mmap *md, bool
> > check_messup, u64 start, diff --git a/tools/perf/util/evlist.h
> > b/tools/perf/util/evlist.h index b1c14f1..1ce4857 100644
> > --- a/tools/perf/util/evlist.h
> > +++ b/tools/perf/util/evlist.h
> > @@ -39,6 +39,16 @@ struct perf_mmap {
> > char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
> >  };
> >
> > +struct perf_mmap_read {
> > +   struct perf_mmap*md;
> > +   u64 head;
> > +   u64 start;
> > +   u64 end;
> 
> So there will be always a one-on-one association of 'struct perf_mmap_read'
> and 'struct perf_mmap', why not go on adding more fields to 'struct
> perf_mmap' as we need

The fields in 'struct perf_mmap' needs to be recalculated before each reading.
So I put them in a new struct.  

> but not doing it all at once (backward, snapshotting,
> overwrite, etc) but first the simple part, make the most basic mode:
> 
>   perf record -a
> 
>   perf top
> 
> work, multithreaded, leaving the other more complicated modes fallbacking
> to the old format, then when we have it solid, go on getting the other
> features.

Agree. 
When I did perf top optimization, I also tried Namhyung's perf top multi-thread 
patch.
https://lwn.net/Articles/667469/
I think it may be a good start point.

I didn't work on his patch. Because the root cause of bad perf top performance
is non overwrite mode, which generate lots of samples shortly. It exceeds KNL's
computational capability. Multi-threading doesn't help much on this case.
So I tried to use overwrite mode then.

> 
> In the end, having the two formats supported will be needed anyway, and
> we can as well ask for processing with both perf.data file formats to compare
> results, while we strenghten out the new code.
>
> I just think we should do this in a more fine grained way to avoid too much
> code churn as well as having a fallback to the old code, that albeit non
> scalable, is what we have been using and can help in certifying that the new
> one works well, by comparing its outputs.

I already extended the multithreading support for event synthesization in perf
record. 
https://github.com/kliang2/perf.git perf_record_opt
I will send it out for review shortly after rebasing on the latest perf/core.

In the patch series, I realloc buffer for each thread to temporarily keep the
processing result, and write them to the perf.data at the end of event
synthesization. The number of synthesized event is not big (hundreds of
Kilobyte). So I think it should be OK to do that.

Thanks,
Kan
> 
> - Arnaldo
> 
> > +   booloverwrite;
> > +   boolbackward;
> > +   unsigned long   size;
> > +};
> > +
> >  static inline size_t
> >  perf_mmap__mmap_len(struct perf_mmap *map)  { @@ -193,6 +203,11
> @@
> > void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int
> > idx);
> >
> >  void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
> >
> > +int perf_mmap__read_init(struct perf_mmap *md, struct
> perf_mmap_read *read,
> > +bool overwrite, bool backward);
> > +int perf_mmap__read_to_file(struct perf_mmap_read *read,
> > +   struct perf_data_file *file);
> > +
> >  int perf_evlist__open(struct perf_evlist *evlist);  void
> > perf_evlist__close(struct perf_evlist *evlist);
> >
> > --
> > 2.5.5


Re: [PATCH] powerpc/perf: Fix IMC initialization crash

2017-10-10 Thread Michael Ellerman
Anju T Sudhakar  writes:

> Call trace observed with latest firmware, and upstream kernel.
>
> [   14.499938] NIP [c00f318c] init_imc_pmu+0x8c/0xcf0
> [   14.499973] LR [c00f33f8] init_imc_pmu+0x2f8/0xcf0
> [   14.57] Call Trace:
> [   14.500027] [c03fed18f710] [c00f33c8] init_imc_pmu+0x2c8/0xcf0 
> (unreliable)
> [   14.500080] [c03fed18f800] [c00b5ec0] 
> opal_imc_counters_probe+0x300/0x400
> [   14.500132] [c03fed18f900] [c0807ef4] 
> platform_drv_probe+0x64/0x110
> [   14.500185] [c03fed18f980] [c0804b58] 
> driver_probe_device+0x3d8/0x580
> [   14.500236] [c03fed18fa10] [c0804e4c] 
> __driver_attach+0x14c/0x1a0
> [   14.500302] [c03fed18fa90] [c080156c] 
> bus_for_each_dev+0x8c/0xf0
> [   14.500353] [c03fed18fae0] [c0803fa4] driver_attach+0x34/0x50
> [   14.500397] [c03fed18fb00] [c0803688] 
> bus_add_driver+0x298/0x350
> [   14.500449] [c03fed18fb90] [c080605c] 
> driver_register+0x9c/0x180
> [   14.500500] [c03fed18fc00] [c0807dec] 
> __platform_driver_register+0x5c/0x70
> [   14.500552] [c03fed18fc20] [c101cee0] 
> opal_imc_driver_init+0x2c/0x40
> [   14.500603] [c03fed18fc40] [c000d084] 
> do_one_initcall+0x64/0x1d0
> [   14.500654] [c03fed18fd00] [c100434c] 
> kernel_init_freeable+0x280/0x374
> [   14.500705] [c03fed18fdc0] [c000d314] kernel_init+0x24/0x160
> [   14.500750] [c03fed18fe30] [c000b4e8] 
> ret_from_kernel_thread+0x5c/0x74
> [   14.500799] Instruction dump:
> [   14.500827] 4082024c 2f890002 419e054c 2e890003 41960094 2e890001 3ba0ffea 
> 419602d8 
> [   14.500884] 419e0290 2f890003 419e02a8 e93e0118  2fa3 
> 419e0010 4827ba41 
> [   14.500945] ---[ end trace 27b734ad26f1add4 ]---
> [   15.908719] 
> [   16.908869] Kernel panic - not syncing: Attempted to kill init! 
> exitcode=0x0007
> [   16.908869] 
> [   18.125813] ---[ end Kernel panic - not syncing: Attempted to kill init! 
> exitcode=0x0007]
>
> While registering nest imc at init, cpu-hotplug callback 
> `nest_pmu_cpumask_init()`
> makes an opal call to stop the engine. And if the OPAL call fails, 
> imc_common_cpuhp_mem_free() is invoked to cleanup memory and cpuhotplug setup.
>
> But when cleaning up the attribute group, we were dereferencing the attribute
> element array without checking whether the backing element is not NULL. This
> causes the kernel panic.
>
> Factor out the memory freeing part from imc_common_cpuhp_mem_free() to handle
> the failing case gracefully.
>
> Signed-off-by: Anju T Sudhakar 
> Reported-by: Pridhiviraj Paidipeddi 
> ---
>  arch/powerpc/perf/imc-pmu.c | 23 ---
>  1 file changed, 16 insertions(+), 7 deletions(-)

It's the week before rc5, so I'd really like just the absolute minimal
fix. There's sufficient code movement here that I can't even immediately
see where the bug fix is.

cheers


Re: [PATCH] powerpc/perf: Fix IMC initialization crash

2017-10-10 Thread Michael Ellerman
Anju T Sudhakar  writes:

> Call trace observed with latest firmware, and upstream kernel.
>
> [   14.499938] NIP [c00f318c] init_imc_pmu+0x8c/0xcf0
> [   14.499973] LR [c00f33f8] init_imc_pmu+0x2f8/0xcf0
> [   14.57] Call Trace:
> [   14.500027] [c03fed18f710] [c00f33c8] init_imc_pmu+0x2c8/0xcf0 
> (unreliable)
> [   14.500080] [c03fed18f800] [c00b5ec0] 
> opal_imc_counters_probe+0x300/0x400
> [   14.500132] [c03fed18f900] [c0807ef4] 
> platform_drv_probe+0x64/0x110
> [   14.500185] [c03fed18f980] [c0804b58] 
> driver_probe_device+0x3d8/0x580
> [   14.500236] [c03fed18fa10] [c0804e4c] 
> __driver_attach+0x14c/0x1a0
> [   14.500302] [c03fed18fa90] [c080156c] 
> bus_for_each_dev+0x8c/0xf0
> [   14.500353] [c03fed18fae0] [c0803fa4] driver_attach+0x34/0x50
> [   14.500397] [c03fed18fb00] [c0803688] 
> bus_add_driver+0x298/0x350
> [   14.500449] [c03fed18fb90] [c080605c] 
> driver_register+0x9c/0x180
> [   14.500500] [c03fed18fc00] [c0807dec] 
> __platform_driver_register+0x5c/0x70
> [   14.500552] [c03fed18fc20] [c101cee0] 
> opal_imc_driver_init+0x2c/0x40
> [   14.500603] [c03fed18fc40] [c000d084] 
> do_one_initcall+0x64/0x1d0
> [   14.500654] [c03fed18fd00] [c100434c] 
> kernel_init_freeable+0x280/0x374
> [   14.500705] [c03fed18fdc0] [c000d314] kernel_init+0x24/0x160
> [   14.500750] [c03fed18fe30] [c000b4e8] 
> ret_from_kernel_thread+0x5c/0x74
> [   14.500799] Instruction dump:
> [   14.500827] 4082024c 2f890002 419e054c 2e890003 41960094 2e890001 3ba0ffea 
> 419602d8 
> [   14.500884] 419e0290 2f890003 419e02a8 e93e0118  2fa3 
> 419e0010 4827ba41 
> [   14.500945] ---[ end trace 27b734ad26f1add4 ]---
> [   15.908719] 
> [   16.908869] Kernel panic - not syncing: Attempted to kill init! 
> exitcode=0x0007
> [   16.908869] 
> [   18.125813] ---[ end Kernel panic - not syncing: Attempted to kill init! 
> exitcode=0x0007]
>
> While registering nest imc at init, cpu-hotplug callback 
> `nest_pmu_cpumask_init()`
> makes an opal call to stop the engine. And if the OPAL call fails, 
> imc_common_cpuhp_mem_free() is invoked to cleanup memory and cpuhotplug setup.
>
> But when cleaning up the attribute group, we were dereferencing the attribute
> element array without checking whether the backing element is not NULL. This
> causes the kernel panic.
>
> Factor out the memory freeing part from imc_common_cpuhp_mem_free() to handle
> the failing case gracefully.
>
> Signed-off-by: Anju T Sudhakar 
> Reported-by: Pridhiviraj Paidipeddi 
> ---
>  arch/powerpc/perf/imc-pmu.c | 23 ---
>  1 file changed, 16 insertions(+), 7 deletions(-)

It's the week before rc5, so I'd really like just the absolute minimal
fix. There's sufficient code movement here that I can't even immediately
see where the bug fix is.

cheers


Re: [PATCH] printk: hash addresses printed with %p

2017-10-10 Thread Joe Perches
On Wed, 2017-10-11 at 14:48 +1100, Tobin C. Harding wrote:
> Currently there are many places in the kernel where addresses are being
> printed using an unadorned %p. Kernel pointers should be printed using
> %pK allowing some control via the kptr_restrict sysctl. Exposing addresses
> gives attackers sensitive information about the kernel layout in memory.
[]
> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
[]
> @@ -1591,6 +1591,35 @@ char *device_node_string(char *buf, char *end, struct 
> device_node *dn,
>   return widen_string(buf, buf - buf_start, end, spec);
>  }
>  
> +static long get_random_odd_long(void)
> +{
> + long val = 0;
> +
> + while((val & 1) == 0) {
> + val = get_random_long();
> + }
> +
> + return val;
> +}

Perhaps

static long get_random_odd_long(void)
{
return get_random_long() | 1L;
}



Re: [PATCH] mm/page-writeback.c: fix bug caused by disable periodic writeback

2017-10-10 Thread Yafang Shao
2017-10-10 17:33 GMT+08:00 Jan Kara :
> On Tue 10-10-17 17:14:48, Yafang Shao wrote:
>> 2017-10-10 16:48 GMT+08:00 Jan Kara :
>> > On Tue 10-10-17 16:00:29, Yafang Shao wrote:
>> >> 2017-10-10 6:42 GMT+08:00 Andrew Morton :
>> >> > On Sat,  7 Oct 2017 06:58:04 +0800 Yafang Shao  
>> >> > wrote:
>> >> >
>> >> >> After disable periodic writeback by writing 0 to
>> >> >> dirty_writeback_centisecs, the handler wb_workfn() will not be
>> >> >> entered again until the dirty background limit reaches or
>> >> >> sync syscall is executed or no enough free memory available or
>> >> >> vmscan is triggered.
>> >> >> So the periodic writeback can't be enabled by writing a non-zero
>> >> >> value to dirty_writeback_centisecs
>> >> >> As it can be disabled by sysctl, it should be able to enable by
>> >> >> sysctl as well.
>> >> >>
>> >> >> ...
>> >> >>
>> >> >> --- a/mm/page-writeback.c
>> >> >> +++ b/mm/page-writeback.c
>> >> >> @@ -1972,7 +1972,13 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
>> >> >>  int dirty_writeback_centisecs_handler(struct ctl_table *table, int 
>> >> >> write,
>> >> >>   void __user *buffer, size_t *length, loff_t *ppos)
>> >> >>  {
>> >> >> - proc_dointvec(table, write, buffer, length, ppos);
>> >> >> + unsigned int old_interval = dirty_writeback_interval;
>> >> >> + int ret;
>> >> >> +
>> >> >> + ret = proc_dointvec(table, write, buffer, length, ppos);
>> >> >> + if (!ret && !old_interval && dirty_writeback_interval)
>> >> >> + wakeup_flusher_threads(0, WB_REASON_PERIODIC);
>> >> >> +
>> >> >>   return 0;
>> >> >
>> >> > We could do with a code comment here, explaining why this code exists.
>> >> >
>> >>
>> >> OK. I will comment here.
>> >>
>> >> > And...  I'm not sure it works correctly?  For example, if a device
>> >> > doesn't presently have bdi_has_dirty_io() then wakeup_flusher_threads()
>> >> > will skip it and the periodic writeback still won't be started?
>> >> >
>> >>
>> >> That's an issue.
>> >> The periodic writeback won't be started.
>> >>
>> >> Maybe we'd better call  wb_wakeup_delayed(wb) here to bypass the
>> >> bdi_has_dirty_io() check ?
>> >
>> > Well, wb_wakeup_delayed() would be more appropriate but you'd then have to
>> > iterate over all bdis and wbs to be able to call it which IMO isn't worth
>> > the pain for a special case like this. But the decision is worth mentioning
>> > in the comment. Also wakeup_flusher_threads() does in principle what you
>> > need - see my reply to Andrew for details.
>> >
>> > Honza
>>
>> Thanks for your explaination. I understood.
>> I will mention it in the comment.
>>
>> Should we do the wakeup whenever dirty_writeback_interval changes ?
>> If we still use wakeup_flusher_threads(), it will wakeup the flusher
>> threads immediately after we make the change.
>
> Yes, I think we should wakeup for every change of dirty_writeback_interval.
> And immediate wakeup is not a problem IMO.
>

Got it!


Thanks
Yafang


Re: [PATCH] mm/page-writeback.c: fix bug caused by disable periodic writeback

2017-10-10 Thread Yafang Shao
2017-10-10 17:33 GMT+08:00 Jan Kara :
> On Tue 10-10-17 17:14:48, Yafang Shao wrote:
>> 2017-10-10 16:48 GMT+08:00 Jan Kara :
>> > On Tue 10-10-17 16:00:29, Yafang Shao wrote:
>> >> 2017-10-10 6:42 GMT+08:00 Andrew Morton :
>> >> > On Sat,  7 Oct 2017 06:58:04 +0800 Yafang Shao  
>> >> > wrote:
>> >> >
>> >> >> After disable periodic writeback by writing 0 to
>> >> >> dirty_writeback_centisecs, the handler wb_workfn() will not be
>> >> >> entered again until the dirty background limit reaches or
>> >> >> sync syscall is executed or no enough free memory available or
>> >> >> vmscan is triggered.
>> >> >> So the periodic writeback can't be enabled by writing a non-zero
>> >> >> value to dirty_writeback_centisecs
>> >> >> As it can be disabled by sysctl, it should be able to enable by
>> >> >> sysctl as well.
>> >> >>
>> >> >> ...
>> >> >>
>> >> >> --- a/mm/page-writeback.c
>> >> >> +++ b/mm/page-writeback.c
>> >> >> @@ -1972,7 +1972,13 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
>> >> >>  int dirty_writeback_centisecs_handler(struct ctl_table *table, int 
>> >> >> write,
>> >> >>   void __user *buffer, size_t *length, loff_t *ppos)
>> >> >>  {
>> >> >> - proc_dointvec(table, write, buffer, length, ppos);
>> >> >> + unsigned int old_interval = dirty_writeback_interval;
>> >> >> + int ret;
>> >> >> +
>> >> >> + ret = proc_dointvec(table, write, buffer, length, ppos);
>> >> >> + if (!ret && !old_interval && dirty_writeback_interval)
>> >> >> + wakeup_flusher_threads(0, WB_REASON_PERIODIC);
>> >> >> +
>> >> >>   return 0;
>> >> >
>> >> > We could do with a code comment here, explaining why this code exists.
>> >> >
>> >>
>> >> OK. I will comment here.
>> >>
>> >> > And...  I'm not sure it works correctly?  For example, if a device
>> >> > doesn't presently have bdi_has_dirty_io() then wakeup_flusher_threads()
>> >> > will skip it and the periodic writeback still won't be started?
>> >> >
>> >>
>> >> That's an issue.
>> >> The periodic writeback won't be started.
>> >>
>> >> Maybe we'd better call  wb_wakeup_delayed(wb) here to bypass the
>> >> bdi_has_dirty_io() check ?
>> >
>> > Well, wb_wakeup_delayed() would be more appropriate but you'd then have to
>> > iterate over all bdis and wbs to be able to call it which IMO isn't worth
>> > the pain for a special case like this. But the decision is worth mentioning
>> > in the comment. Also wakeup_flusher_threads() does in principle what you
>> > need - see my reply to Andrew for details.
>> >
>> > Honza
>>
>> Thanks for your explaination. I understood.
>> I will mention it in the comment.
>>
>> Should we do the wakeup whenever dirty_writeback_interval changes ?
>> If we still use wakeup_flusher_threads(), it will wakeup the flusher
>> threads immediately after we make the change.
>
> Yes, I think we should wakeup for every change of dirty_writeback_interval.
> And immediate wakeup is not a problem IMO.
>

Got it!


Thanks
Yafang


Re: [PATCH] printk: hash addresses printed with %p

2017-10-10 Thread Joe Perches
On Wed, 2017-10-11 at 14:48 +1100, Tobin C. Harding wrote:
> Currently there are many places in the kernel where addresses are being
> printed using an unadorned %p. Kernel pointers should be printed using
> %pK allowing some control via the kptr_restrict sysctl. Exposing addresses
> gives attackers sensitive information about the kernel layout in memory.
[]
> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
[]
> @@ -1591,6 +1591,35 @@ char *device_node_string(char *buf, char *end, struct 
> device_node *dn,
>   return widen_string(buf, buf - buf_start, end, spec);
>  }
>  
> +static long get_random_odd_long(void)
> +{
> + long val = 0;
> +
> + while((val & 1) == 0) {
> + val = get_random_long();
> + }
> +
> + return val;
> +}

Perhaps

static long get_random_odd_long(void)
{
return get_random_long() | 1L;
}



Re: [alsa-devel] [Patch v6 1/7] slimbus: Device management on SLIMbus

2017-10-10 Thread Vinod Koul
On Tue, Oct 10, 2017 at 06:21:34PM +0100, Srinivas Kandagatla wrote:
> On 10/10/17 17:49, Vinod Koul wrote:

> +static int slim_device_probe(struct device *dev)
> +{
> + struct slim_device  *sbdev;
> + struct slim_driver  *sbdrv;
> + int status = 0;
> +
> + sbdev = to_slim_device(dev);
> + sbdrv = to_slim_driver(dev->driver);
> +
> + sbdev->driver = sbdrv;
> +
> + if (sbdrv->probe)
> + status = sbdrv->probe(sbdev);
> +
> + if (status)
> + sbdev->driver = NULL;
> + else if (sbdrv->device_up)
> + schedule_slim_report(sbdev->ctrl, sbdev, true);
> >>>
> >>>can you please explain what this is trying to do?
> >>
> >>It is scheduling a device_up() callback in workqueue for reporting
> >>discovered device.
> >
> >any reason for that? Would the device not announce itself on the bus and
> >then you can synchronously update the device.
> You are correct,  Device should announce itself in all cases. core should
> only call this callback only when device is announced, it does not make
> sense for this call in slim_device_probe(). Will remove it from here in next
> version.

Okay great. Btw do you need a notify being scheduled in those cases? I guess
your controller would get an interrupt and you will handle that in bottom
half and then cll this, so why not call in the bottom half?

> +/**
> + * slim_register_controller: Controller bring-up and registration.
> ...
> +
> + mutex_init(>m_ctrl);
> + ret = device_register(>dev);
> >>>
> >>>one more device_register?? Can you explain why
> >>>
> >>
> >>This is a device for each controller.
> >
> >wont the controller have its own platform_device?
> 
> Reason could be that slim_register controller can be called from any code
> not just platform devices..

ah which cases would those be. I was expecting that you would have a
platform_device as a slimbus controller which would call slim_register?

-- 
~Vinod


Re: [alsa-devel] [Patch v6 1/7] slimbus: Device management on SLIMbus

2017-10-10 Thread Vinod Koul
On Tue, Oct 10, 2017 at 06:21:34PM +0100, Srinivas Kandagatla wrote:
> On 10/10/17 17:49, Vinod Koul wrote:

> +static int slim_device_probe(struct device *dev)
> +{
> + struct slim_device  *sbdev;
> + struct slim_driver  *sbdrv;
> + int status = 0;
> +
> + sbdev = to_slim_device(dev);
> + sbdrv = to_slim_driver(dev->driver);
> +
> + sbdev->driver = sbdrv;
> +
> + if (sbdrv->probe)
> + status = sbdrv->probe(sbdev);
> +
> + if (status)
> + sbdev->driver = NULL;
> + else if (sbdrv->device_up)
> + schedule_slim_report(sbdev->ctrl, sbdev, true);
> >>>
> >>>can you please explain what this is trying to do?
> >>
> >>It is scheduling a device_up() callback in workqueue for reporting
> >>discovered device.
> >
> >any reason for that? Would the device not announce itself on the bus and
> >then you can synchronously update the device.
> You are correct,  Device should announce itself in all cases. core should
> only call this callback only when device is announced, it does not make
> sense for this call in slim_device_probe(). Will remove it from here in next
> version.

Okay great. Btw do you need a notify being scheduled in those cases? I guess
your controller would get an interrupt and you will handle that in bottom
half and then cll this, so why not call in the bottom half?

> +/**
> + * slim_register_controller: Controller bring-up and registration.
> ...
> +
> + mutex_init(>m_ctrl);
> + ret = device_register(>dev);
> >>>
> >>>one more device_register?? Can you explain why
> >>>
> >>
> >>This is a device for each controller.
> >
> >wont the controller have its own platform_device?
> 
> Reason could be that slim_register controller can be called from any code
> not just platform devices..

ah which cases would those be. I was expecting that you would have a
platform_device as a slimbus controller which would call slim_register?

-- 
~Vinod


Re: [PATCH 0/4] RCU: introduce noref debug

2017-10-10 Thread Paul E. McKenney
On Mon, Oct 09, 2017 at 06:53:12PM +0200, Paolo Abeni wrote:
> On Fri, 2017-10-06 at 09:34 -0700, Paul E. McKenney wrote:
> > On Fri, Oct 06, 2017 at 05:10:09PM +0200, Paolo Abeni wrote:
> > > Hi,
> > > 
> > > On Fri, 2017-10-06 at 06:34 -0700, Paul E. McKenney wrote:
> > > > On Fri, Oct 06, 2017 at 02:57:45PM +0200, Paolo Abeni wrote:
> > > > > The networking subsystem is currently using some kind of long-lived
> > > > > RCU-protected, references to avoid the overhead of full book-keeping.
> > > > > 
> > > > > Such references - skb_dst() noref - are stored inside the skbs and 
> > > > > can be
> > > > > moved across relevant slices of the network stack, with the users
> > > > > being in charge of properly clearing the relevant skb - or properly 
> > > > > refcount
> > > > > the related dst references - before the skb escapes the RCU section.
> > > > > 
> > > > > We currently don't have any deterministic debug infrastructure to 
> > > > > check
> > > > > the dst noref usages - and the introduction of others noref artifact 
> > > > > is
> > > > > currently under discussion.
> > > > > 
> > > > > This series tries to tackle the above introducing an RCU debug 
> > > > > infrastructure
> > > > > aimed at spotting incorrect noref pointer usage, in patch one. The
> > > > > infrastructure is small and must be explicitly enabled via a newly 
> > > > > introduced
> > > > > build option.
> > > > > 
> > > > > Patch two uses such infrastructure to track dst noref usage in the 
> > > > > networking
> > > > > stack.
> > > > > 
> > > > > Patch 3 and 4 are bugfixes for small buglet found running this 
> > > > > infrastructure
> > > > > on basic scenarios.
> > > 
> > > Thank you for the prompt reply!
> > > > 
> > > > This patchset does not look like it handles rcu_read_lock() nesting.
> > > > For example, given code like this:
> > > > 
> > > > void foo(void)
> > > > {
> > > > rcu_read_lock();
> > > > rcu_track_noref(, , true);
> > > > do_something();
> > > > rcu_track_noref(, , false);
> > > > rcu_read_unlock();
> > > > }
> > > > 
> > > > void bar(void)
> > > > {
> > > > rcu_read_lock();
> > > > rcu_track_noref(, , true);
> > > > do_something_more();
> > > > foo();
> > > > do_something_else();
> > > > rcu_track_noref(, , false);
> > > > rcu_read_unlock();
> > > > }
> > > > 
> > > > void grill(void)
> > > > {
> > > > foo();
> > > > }
> > > > 
> > > > It looks like foo()'s rcu_read_unlock() will complain about key1.
> > > > You could remove foo()'s rcu_read_lock() and rcu_read_unlock(), but
> > > > that will break the call from grill().
> > > 
> > > Actually the code should cope correctly with your example; when foo()'s
> > > rcu_read_unlock() is called, 'cache' contains:
> > > 
> > > { { , , 1},  // ...
> > > 
> > > and when the related __rcu_check_noref() is invoked preempt_count() is
> > > 2 - because the check is called before decreasing the preempt counter.
> > > 
> > > In the main loop inside __rcu_check_noref() we will hit always the
> > > 'continue' statement because 'cache->store[i].nesting != nesting', so
> > > no warn will be triggered.
> > 
> > You are right, it was too early, and my example wasn't correct.  How
> > about this one?
> > 
> > void foo(void (*f)(struct s *sp), struct s **spp)
> > {
> > rcu_read_lock();
> > rcu_track_noref(, , true);
> > f(spp);
> > rcu_track_noref(, , false);
> > rcu_read_unlock();
> > }
> > 
> > void barcb(struct s **spp)
> > {
> > *spp = 
> > rcu_track_noref(, *spp, true);
> > }
> > 
> > void bar(void)
> > {
> > struct s *sp;
> > 
> > rcu_read_lock();
> > rcu_track_noref(, , true);
> > do_something_more();
> > foo(barcb, );
> > do_something_else(sp);
> > rcu_track_noref(, sp, false);
> > rcu_track_noref(, , false);
> > rcu_read_unlock();
> > }
> > 
> > void grillcb(struct s **spp)
> > {
> > *spp
> > }
> > 
> > void grill(void)
> > {
> > foo();
> > }
> 
> You are right: this will generate a splat, even if the code it safe.
> The false positive can be avoided looking for leaked references only in
> the outermost rcu unlook. I did a previous implementation performing
> such check, but it emitted very generic splat so I tried to be more
> strict. The latter choice allowed to find/do 3/4.
> 
> What about using save_stack_trace() in rcu_track_noref(, true) and
> reporting such stack trace when the check in the outer most rcu fails?
> 
> the current strict/false-postive-prone check could be enabled under an
> additional build flag.

Linus and 

Re: [PATCH 0/4] RCU: introduce noref debug

2017-10-10 Thread Paul E. McKenney
On Mon, Oct 09, 2017 at 06:53:12PM +0200, Paolo Abeni wrote:
> On Fri, 2017-10-06 at 09:34 -0700, Paul E. McKenney wrote:
> > On Fri, Oct 06, 2017 at 05:10:09PM +0200, Paolo Abeni wrote:
> > > Hi,
> > > 
> > > On Fri, 2017-10-06 at 06:34 -0700, Paul E. McKenney wrote:
> > > > On Fri, Oct 06, 2017 at 02:57:45PM +0200, Paolo Abeni wrote:
> > > > > The networking subsystem is currently using some kind of long-lived
> > > > > RCU-protected, references to avoid the overhead of full book-keeping.
> > > > > 
> > > > > Such references - skb_dst() noref - are stored inside the skbs and 
> > > > > can be
> > > > > moved across relevant slices of the network stack, with the users
> > > > > being in charge of properly clearing the relevant skb - or properly 
> > > > > refcount
> > > > > the related dst references - before the skb escapes the RCU section.
> > > > > 
> > > > > We currently don't have any deterministic debug infrastructure to 
> > > > > check
> > > > > the dst noref usages - and the introduction of others noref artifact 
> > > > > is
> > > > > currently under discussion.
> > > > > 
> > > > > This series tries to tackle the above introducing an RCU debug 
> > > > > infrastructure
> > > > > aimed at spotting incorrect noref pointer usage, in patch one. The
> > > > > infrastructure is small and must be explicitly enabled via a newly 
> > > > > introduced
> > > > > build option.
> > > > > 
> > > > > Patch two uses such infrastructure to track dst noref usage in the 
> > > > > networking
> > > > > stack.
> > > > > 
> > > > > Patch 3 and 4 are bugfixes for small buglet found running this 
> > > > > infrastructure
> > > > > on basic scenarios.
> > > 
> > > Thank you for the prompt reply!
> > > > 
> > > > This patchset does not look like it handles rcu_read_lock() nesting.
> > > > For example, given code like this:
> > > > 
> > > > void foo(void)
> > > > {
> > > > rcu_read_lock();
> > > > rcu_track_noref(, , true);
> > > > do_something();
> > > > rcu_track_noref(, , false);
> > > > rcu_read_unlock();
> > > > }
> > > > 
> > > > void bar(void)
> > > > {
> > > > rcu_read_lock();
> > > > rcu_track_noref(, , true);
> > > > do_something_more();
> > > > foo();
> > > > do_something_else();
> > > > rcu_track_noref(, , false);
> > > > rcu_read_unlock();
> > > > }
> > > > 
> > > > void grill(void)
> > > > {
> > > > foo();
> > > > }
> > > > 
> > > > It looks like foo()'s rcu_read_unlock() will complain about key1.
> > > > You could remove foo()'s rcu_read_lock() and rcu_read_unlock(), but
> > > > that will break the call from grill().
> > > 
> > > Actually the code should cope correctly with your example; when foo()'s
> > > rcu_read_unlock() is called, 'cache' contains:
> > > 
> > > { { , , 1},  // ...
> > > 
> > > and when the related __rcu_check_noref() is invoked preempt_count() is
> > > 2 - because the check is called before decreasing the preempt counter.
> > > 
> > > In the main loop inside __rcu_check_noref() we will hit always the
> > > 'continue' statement because 'cache->store[i].nesting != nesting', so
> > > no warn will be triggered.
> > 
> > You are right, it was too early, and my example wasn't correct.  How
> > about this one?
> > 
> > void foo(void (*f)(struct s *sp), struct s **spp)
> > {
> > rcu_read_lock();
> > rcu_track_noref(, , true);
> > f(spp);
> > rcu_track_noref(, , false);
> > rcu_read_unlock();
> > }
> > 
> > void barcb(struct s **spp)
> > {
> > *spp = 
> > rcu_track_noref(, *spp, true);
> > }
> > 
> > void bar(void)
> > {
> > struct s *sp;
> > 
> > rcu_read_lock();
> > rcu_track_noref(, , true);
> > do_something_more();
> > foo(barcb, );
> > do_something_else(sp);
> > rcu_track_noref(, sp, false);
> > rcu_track_noref(, , false);
> > rcu_read_unlock();
> > }
> > 
> > void grillcb(struct s **spp)
> > {
> > *spp
> > }
> > 
> > void grill(void)
> > {
> > foo();
> > }
> 
> You are right: this will generate a splat, even if the code it safe.
> The false positive can be avoided looking for leaked references only in
> the outermost rcu unlook. I did a previous implementation performing
> such check, but it emitted very generic splat so I tried to be more
> strict. The latter choice allowed to find/do 3/4.
> 
> What about using save_stack_trace() in rcu_track_noref(, true) and
> reporting such stack trace when the check in the outer most rcu fails?
> 
> the current strict/false-postive-prone check could be enabled under an
> additional build flag.

Linus and 

Re: [PATCH v2 1/2] kbuild: Add a cache for generated variables

2017-10-10 Thread Masahiro Yamada
Hi Douglas,


2017-10-05 7:37 GMT+09:00 Douglas Anderson :
> While timing a "no-op" build of the kernel (incrementally building the
> kernel even though nothing changed) in the Chrome OS build system I
> found that it was much slower than I expected.
>
> Digging into things a bit, I found that quite a bit of the time was
> spent invoking the C compiler even though we weren't actually building
> anything.  Currently in the Chrome OS build system the C compiler is
> called through a number of wrappers (one of which is written in
> python!) and can take upwards of 100 ms to invoke even if we're not
> doing anything difficult, so these invocations of the compiler were
> taking a lot of time.  Worse the invocations couldn't seem to take
> advantage of the multiple cores on my system.
>
> Certainly it seems like we could make the compiler invocations in the
> Chrome OS build system faster, but only to a point.  Inherently
> invoking a program as big as a C compiler is a fairly heavy
> operation.  Thus even if we can speed the compiler calls it made sense
> to track down what was happening.
>
> It turned out that all the compiler invocations were coming from
> usages like this in the kernel's Makefile:
>
> KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,)
>
> Due to the way cc-option and similar statements work the above
> contains an implicit call to the C compiler.  ...and due to the fact
> that we're storing the result in KBUILD_CFLAGS, a simply expanded
> variable, the call will happen every time the Makefile is parsed, even
> if there are no users of KBUILD_CFLAGS.
>
> Rather than redoing this computation every time, it makes a lot of
> sense to cache the result of all of the Makefile's compiler calls just
> like we do when we compile a ".c" file to a ".o" file.  Conceptually
> this is quite a simple idea.  ...and since the calls to invoke the
> compiler and similar tools are centrally located in the Kbuild.include
> file this doesn't even need to be super invasive.
>
> Implementing the cache in a simple-to-use and efficient way is not
> quite as simple as it first sounds, though.  To get maximum speed we
> really want the cache in a format that make can natively understand
> and make doesn't really have an ability to load/parse files. ...but
> make _can_ import other Makefiles, so the solution is to store the
> cache in Makefile format.  This requires coming up with a valid/unique
> Makefile variable name for each value to be cached, but that's
> solvable with some cleverness.
>
> After this change, we'll automatically create a ".cache.mk" file that
> will contain our cached variables.  We'll load this on each invocation
> of make and will avoid recomputing anything that's already in our
> cache.  The cache is stored in a format that it shouldn't need any
> invalidation since anything that might change should affect the "key"
> and any old cached value won't be used.
>
> Signed-off-by: Douglas Anderson 


I reviewed and tested this patch more closely.

V2 is almost good, but
I see some problem and things that should be improved.
(including bike-shed)


[1]

If you apply this patch and run "make clean"
on a machine without "sphinx-build" installed,
you will see a mysterious error message like follows:


$ make clean
Documentation/Makefile:24: The 'sphinx-build' command was not found.
Make sure you have Sphinx installed and in PATH, or set the
SPHINXBUILD make variable to point to the full path of the
'sphinx-build' executable.

Detected OS: Ubuntu 16.04.2 LTS.
Warning: better to also install "dot".
Warning: better to also install "rsvg-convert".
ERROR: please install "virtualenv", otherwise, build won't work.
You should run:

sudo apt-get install graphviz librsvg2-bin virtualenv
virtualenv sphinx_1.4
. sphinx_1.4/bin/activate
pip install -r Documentation/sphinx/requirements.txt

Can't build as 2 mandatory dependencies are missing at
./scripts/sphinx-pre-install line 566.



This comes from the ".DEFAULT" target
when "make clean" descends into Documentation/ directory.


You can fix it by adding

$(make-cache): ;

to scripts/Kbuild.include


This will prevent Make from searching
a target that would generate $(make-cache).


(Of course, we can fix Documentation/Makefile
to not use '.DEFAULT',
but canceling $(make-cache) rule is a good thing.)


You will need this
https://patchwork.kernel.org/patch/9998651/

before adding the target to Kbuild.include




[2] Please clean up .cache.mk

Adding .cache.mk pattern around line 1540 will be good.



A few more comments below.



> +--- 3.14 $(LD) support function cache
> +
> +One thing to realize about all the calls to the above support functions
> +is that each use of them requires a full invocation of an external tool, like
> +the C compiler, assembler, or linker.  If nothing else that invocation will
> +cause a fork/exec/shared library link.  In some build environments, however, 
> it
> +could also 

Re: [PATCH v2 1/2] kbuild: Add a cache for generated variables

2017-10-10 Thread Masahiro Yamada
Hi Douglas,


2017-10-05 7:37 GMT+09:00 Douglas Anderson :
> While timing a "no-op" build of the kernel (incrementally building the
> kernel even though nothing changed) in the Chrome OS build system I
> found that it was much slower than I expected.
>
> Digging into things a bit, I found that quite a bit of the time was
> spent invoking the C compiler even though we weren't actually building
> anything.  Currently in the Chrome OS build system the C compiler is
> called through a number of wrappers (one of which is written in
> python!) and can take upwards of 100 ms to invoke even if we're not
> doing anything difficult, so these invocations of the compiler were
> taking a lot of time.  Worse the invocations couldn't seem to take
> advantage of the multiple cores on my system.
>
> Certainly it seems like we could make the compiler invocations in the
> Chrome OS build system faster, but only to a point.  Inherently
> invoking a program as big as a C compiler is a fairly heavy
> operation.  Thus even if we can speed the compiler calls it made sense
> to track down what was happening.
>
> It turned out that all the compiler invocations were coming from
> usages like this in the kernel's Makefile:
>
> KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,)
>
> Due to the way cc-option and similar statements work the above
> contains an implicit call to the C compiler.  ...and due to the fact
> that we're storing the result in KBUILD_CFLAGS, a simply expanded
> variable, the call will happen every time the Makefile is parsed, even
> if there are no users of KBUILD_CFLAGS.
>
> Rather than redoing this computation every time, it makes a lot of
> sense to cache the result of all of the Makefile's compiler calls just
> like we do when we compile a ".c" file to a ".o" file.  Conceptually
> this is quite a simple idea.  ...and since the calls to invoke the
> compiler and similar tools are centrally located in the Kbuild.include
> file this doesn't even need to be super invasive.
>
> Implementing the cache in a simple-to-use and efficient way is not
> quite as simple as it first sounds, though.  To get maximum speed we
> really want the cache in a format that make can natively understand
> and make doesn't really have an ability to load/parse files. ...but
> make _can_ import other Makefiles, so the solution is to store the
> cache in Makefile format.  This requires coming up with a valid/unique
> Makefile variable name for each value to be cached, but that's
> solvable with some cleverness.
>
> After this change, we'll automatically create a ".cache.mk" file that
> will contain our cached variables.  We'll load this on each invocation
> of make and will avoid recomputing anything that's already in our
> cache.  The cache is stored in a format that it shouldn't need any
> invalidation since anything that might change should affect the "key"
> and any old cached value won't be used.
>
> Signed-off-by: Douglas Anderson 


I reviewed and tested this patch more closely.

V2 is almost good, but
I see some problem and things that should be improved.
(including bike-shed)


[1]

If you apply this patch and run "make clean"
on a machine without "sphinx-build" installed,
you will see a mysterious error message like follows:


$ make clean
Documentation/Makefile:24: The 'sphinx-build' command was not found.
Make sure you have Sphinx installed and in PATH, or set the
SPHINXBUILD make variable to point to the full path of the
'sphinx-build' executable.

Detected OS: Ubuntu 16.04.2 LTS.
Warning: better to also install "dot".
Warning: better to also install "rsvg-convert".
ERROR: please install "virtualenv", otherwise, build won't work.
You should run:

sudo apt-get install graphviz librsvg2-bin virtualenv
virtualenv sphinx_1.4
. sphinx_1.4/bin/activate
pip install -r Documentation/sphinx/requirements.txt

Can't build as 2 mandatory dependencies are missing at
./scripts/sphinx-pre-install line 566.



This comes from the ".DEFAULT" target
when "make clean" descends into Documentation/ directory.


You can fix it by adding

$(make-cache): ;

to scripts/Kbuild.include


This will prevent Make from searching
a target that would generate $(make-cache).


(Of course, we can fix Documentation/Makefile
to not use '.DEFAULT',
but canceling $(make-cache) rule is a good thing.)


You will need this
https://patchwork.kernel.org/patch/9998651/

before adding the target to Kbuild.include




[2] Please clean up .cache.mk

Adding .cache.mk pattern around line 1540 will be good.



A few more comments below.



> +--- 3.14 $(LD) support function cache
> +
> +One thing to realize about all the calls to the above support functions
> +is that each use of them requires a full invocation of an external tool, like
> +the C compiler, assembler, or linker.  If nothing else that invocation will
> +cause a fork/exec/shared library link.  In some build environments, however, 
> it
> +could also involve traversing thorough one or more 

RE: [RFC v5 4/8] platform: x86: Add generic Intel IPC driver

2017-10-10 Thread Chakravarty, Souvik K
On October 11, 2017 3:39 AM, Kuppuswamy Sathyanarayanan wrote:
> Hi,
> 
> 
> On 10/08/2017 09:53 PM, Chakravarty, Souvik K wrote:
> >> From: sathyanarayanan.kuppusw...@linux.intel.com
> >> [mailto:sathyanarayanan.kuppusw...@linux.intel.com]
> >> Sent: Sunday, October 8, 2017 3:50 AM
> >> To: a.zu...@towertech.it; x...@kernel.org; w...@iguana.be;
> >> mi...@redhat.com; alexandre.bell...@free-electrons.com; Zha, Qipeng
> >> ; h...@zytor.com; dvh...@infradead.org;
> >> t...@linutronix.de; lee.jo...@linaro.org; a...@infradead.org;
> >> Chakravarty, Souvik K 
> >> Cc: linux-...@vger.kernel.org; linux-watch...@vger.kernel.org; linux-
> >> ker...@vger.kernel.org; platform-driver-...@vger.kernel.org;
> >> sathyao...@gmail.com; Kuppuswamy Sathyanarayanan
> >> 
> >> Subject: [RFC v5 4/8] platform: x86: Add generic Intel IPC driver
> >>
> >> From: Kuppuswamy Sathyanarayanan
> >> 
> >>
> >> Currently intel_scu_ipc.c, intel_pmc_ipc.c and intel_punit_ipc.c
> >> redundantly implements the same IPC features and has lot of code
> >> duplication between them. This driver addresses this issue by
> >> grouping the common IPC functionalities under the same driver.
> >>
> >> Signed-off-by: Kuppuswamy Sathyanarayanan
> >> 
> >> ---
> >>   drivers/platform/x86/Kconfig|   8 +
> >>   drivers/platform/x86/Makefile   |   1 +
> >>   drivers/platform/x86/intel_ipc_dev.c| 576
> >> 
> >>   include/linux/platform_data/x86/intel_ipc_dev.h | 206 +
> >>   4 files changed, 791 insertions(+)
> >>   create mode 100644 drivers/platform/x86/intel_ipc_dev.c
> >>   create mode 100644 include/linux/platform_data/x86/intel_ipc_dev.h
> >>
> >> Changes since v4:
> >>   * None
> >>
> >> Changes since v3:
> >>   * Fixed NULL pointer exception in intel_ipc_dev_get().
> >>   * Fixed error in check for duplicate intel_ipc_dev.
> >>   * Added custom interrupt handler support.
> >>   * Used char array for error string conversion.
> >>   * Added put dev support.
> >>   * Added devm_* variant of intel_ipc_dev_get().
> >>
> >> Changes since v2:
> >>   * Added ipc_dev_cmd API support.
> >>
> >> diff --git a/drivers/platform/x86/Kconfig
> >> b/drivers/platform/x86/Kconfig index da2d9ba..724ee696 100644
> >> --- a/drivers/platform/x86/Kconfig
> >> +++ b/drivers/platform/x86/Kconfig
> >> @@ -1153,6 +1153,14 @@ config SILEAD_DMI
> >>  with the OS-image for the device. This option supplies the missing
> >>  information. Enable this for x86 tablets with Silead touchscreens.
> >>
> >> +config INTEL_IPC_DEV
> >> +  bool "Intel IPC Device Driver"
> >> +  depends on X86_64
> >> +  ---help---
> >> +This driver implements core features of Intel IPC device. Devices
> >> +like PMC, SCU, PUNIT, etc can use interfaces provided by this
> >> +driver to implement IPC protocol of their respective device.
> >> +
> >>   endif # X86_PLATFORM_DEVICES
> >>
> >>   config PMC_ATOM
> >> diff --git a/drivers/platform/x86/Makefile
> >> b/drivers/platform/x86/Makefile index 2b315d0..99a1af1 100644
> >> --- a/drivers/platform/x86/Makefile
> >> +++ b/drivers/platform/x86/Makefile
> >> @@ -84,3 +84,4 @@ obj-$(CONFIG_PMC_ATOM)   +=
> >> pmc_atom.o
> >>   obj-$(CONFIG_MLX_PLATFORM)   += mlx-platform.o
> >>   obj-$(CONFIG_MLX_CPLD_PLATFORM)  += mlxcpld-hotplug.o
> >>   obj-$(CONFIG_INTEL_TURBO_MAX_3) += intel_turbo_max_3.o
> >> +obj-$(CONFIG_INTEL_IPC_DEV)   += intel_ipc_dev.o
> >> diff --git a/drivers/platform/x86/intel_ipc_dev.c
> >> b/drivers/platform/x86/intel_ipc_dev.c
> >> new file mode 100644
> >> index 000..f55ddec
> >> --- /dev/null
> >> +++ b/drivers/platform/x86/intel_ipc_dev.c
> >> @@ -0,0 +1,576 @@
> >> +/*
> >> + * intel_ipc_dev.c: Intel IPC device class driver
> >> + *
> >> + * (C) Copyright 2017 Intel Corporation
> >> + *
> >> + * This program is free software; you can redistribute it and/or
> >> + * modify it under the terms of the GNU General Public License
> >> + * as published by the Free Software Foundation; version 2
> >> + * of the License.
> >> + *
> >> + */
> >> +
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +
> >> +/* mutex to sync different ipc devices in same channel */ static
> >> +struct mutex channel_lock[IPC_CHANNEL_MAX];
> >> +
> >> +static char *ipc_err_sources[] = {
> >> +  [IPC_DEV_ERR_NONE] =
> >> +  "No error",
> >> +  [IPC_DEV_ERR_CMD_NOT_SUPPORTED] =
> >> +  "Command not-supported/Invalid",
> >> +  [IPC_DEV_ERR_CMD_NOT_SERVICED] =
> >> +  "Command not-serviced/Invalid param",
> >> +  [IPC_DEV_ERR_UNABLE_TO_SERVICE] =
> >> +  "Unable-to-service/Cmd-timeout",
> >> +  [IPC_DEV_ERR_CMD_INVALID] =
> 

RE: [RFC v5 4/8] platform: x86: Add generic Intel IPC driver

2017-10-10 Thread Chakravarty, Souvik K
On October 11, 2017 3:39 AM, Kuppuswamy Sathyanarayanan wrote:
> Hi,
> 
> 
> On 10/08/2017 09:53 PM, Chakravarty, Souvik K wrote:
> >> From: sathyanarayanan.kuppusw...@linux.intel.com
> >> [mailto:sathyanarayanan.kuppusw...@linux.intel.com]
> >> Sent: Sunday, October 8, 2017 3:50 AM
> >> To: a.zu...@towertech.it; x...@kernel.org; w...@iguana.be;
> >> mi...@redhat.com; alexandre.bell...@free-electrons.com; Zha, Qipeng
> >> ; h...@zytor.com; dvh...@infradead.org;
> >> t...@linutronix.de; lee.jo...@linaro.org; a...@infradead.org;
> >> Chakravarty, Souvik K 
> >> Cc: linux-...@vger.kernel.org; linux-watch...@vger.kernel.org; linux-
> >> ker...@vger.kernel.org; platform-driver-...@vger.kernel.org;
> >> sathyao...@gmail.com; Kuppuswamy Sathyanarayanan
> >> 
> >> Subject: [RFC v5 4/8] platform: x86: Add generic Intel IPC driver
> >>
> >> From: Kuppuswamy Sathyanarayanan
> >> 
> >>
> >> Currently intel_scu_ipc.c, intel_pmc_ipc.c and intel_punit_ipc.c
> >> redundantly implements the same IPC features and has lot of code
> >> duplication between them. This driver addresses this issue by
> >> grouping the common IPC functionalities under the same driver.
> >>
> >> Signed-off-by: Kuppuswamy Sathyanarayanan
> >> 
> >> ---
> >>   drivers/platform/x86/Kconfig|   8 +
> >>   drivers/platform/x86/Makefile   |   1 +
> >>   drivers/platform/x86/intel_ipc_dev.c| 576
> >> 
> >>   include/linux/platform_data/x86/intel_ipc_dev.h | 206 +
> >>   4 files changed, 791 insertions(+)
> >>   create mode 100644 drivers/platform/x86/intel_ipc_dev.c
> >>   create mode 100644 include/linux/platform_data/x86/intel_ipc_dev.h
> >>
> >> Changes since v4:
> >>   * None
> >>
> >> Changes since v3:
> >>   * Fixed NULL pointer exception in intel_ipc_dev_get().
> >>   * Fixed error in check for duplicate intel_ipc_dev.
> >>   * Added custom interrupt handler support.
> >>   * Used char array for error string conversion.
> >>   * Added put dev support.
> >>   * Added devm_* variant of intel_ipc_dev_get().
> >>
> >> Changes since v2:
> >>   * Added ipc_dev_cmd API support.
> >>
> >> diff --git a/drivers/platform/x86/Kconfig
> >> b/drivers/platform/x86/Kconfig index da2d9ba..724ee696 100644
> >> --- a/drivers/platform/x86/Kconfig
> >> +++ b/drivers/platform/x86/Kconfig
> >> @@ -1153,6 +1153,14 @@ config SILEAD_DMI
> >>  with the OS-image for the device. This option supplies the missing
> >>  information. Enable this for x86 tablets with Silead touchscreens.
> >>
> >> +config INTEL_IPC_DEV
> >> +  bool "Intel IPC Device Driver"
> >> +  depends on X86_64
> >> +  ---help---
> >> +This driver implements core features of Intel IPC device. Devices
> >> +like PMC, SCU, PUNIT, etc can use interfaces provided by this
> >> +driver to implement IPC protocol of their respective device.
> >> +
> >>   endif # X86_PLATFORM_DEVICES
> >>
> >>   config PMC_ATOM
> >> diff --git a/drivers/platform/x86/Makefile
> >> b/drivers/platform/x86/Makefile index 2b315d0..99a1af1 100644
> >> --- a/drivers/platform/x86/Makefile
> >> +++ b/drivers/platform/x86/Makefile
> >> @@ -84,3 +84,4 @@ obj-$(CONFIG_PMC_ATOM)   +=
> >> pmc_atom.o
> >>   obj-$(CONFIG_MLX_PLATFORM)   += mlx-platform.o
> >>   obj-$(CONFIG_MLX_CPLD_PLATFORM)  += mlxcpld-hotplug.o
> >>   obj-$(CONFIG_INTEL_TURBO_MAX_3) += intel_turbo_max_3.o
> >> +obj-$(CONFIG_INTEL_IPC_DEV)   += intel_ipc_dev.o
> >> diff --git a/drivers/platform/x86/intel_ipc_dev.c
> >> b/drivers/platform/x86/intel_ipc_dev.c
> >> new file mode 100644
> >> index 000..f55ddec
> >> --- /dev/null
> >> +++ b/drivers/platform/x86/intel_ipc_dev.c
> >> @@ -0,0 +1,576 @@
> >> +/*
> >> + * intel_ipc_dev.c: Intel IPC device class driver
> >> + *
> >> + * (C) Copyright 2017 Intel Corporation
> >> + *
> >> + * This program is free software; you can redistribute it and/or
> >> + * modify it under the terms of the GNU General Public License
> >> + * as published by the Free Software Foundation; version 2
> >> + * of the License.
> >> + *
> >> + */
> >> +
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +#include 
> >> +
> >> +/* mutex to sync different ipc devices in same channel */ static
> >> +struct mutex channel_lock[IPC_CHANNEL_MAX];
> >> +
> >> +static char *ipc_err_sources[] = {
> >> +  [IPC_DEV_ERR_NONE] =
> >> +  "No error",
> >> +  [IPC_DEV_ERR_CMD_NOT_SUPPORTED] =
> >> +  "Command not-supported/Invalid",
> >> +  [IPC_DEV_ERR_CMD_NOT_SERVICED] =
> >> +  "Command not-serviced/Invalid param",
> >> +  [IPC_DEV_ERR_UNABLE_TO_SERVICE] =
> >> +  "Unable-to-service/Cmd-timeout",
> >> +  [IPC_DEV_ERR_CMD_INVALID] =
> >> +  "Command-invalid/Cmd-locked",
> >> +  [IPC_DEV_ERR_CMD_FAILED] =
> >> +  "Command-failed/Invalid-VR-id",
> >> +  [IPC_DEV_ERR_EMSECURITY] =
> >> +  

Re: [PATCH v16 09/10] x86/arch_prctl: Selftest for ARCH_[GET|SET]_CPUID

2017-10-10 Thread Kyle Huey
On Tue, Oct 10, 2017 at 8:35 PM, Wanpeng Li  wrote:
> Hi Kyle,
> 2017-03-20 16:16 GMT+08:00 Kyle Huey :
>> Test disabling and reenabling the cpuid instruction via the new arch_prctl
>> ARCH_SET_CPUID, retrieving the current state via ARCH_GET_CPUID, and the
>> expected behaviors across fork() and exec().
>>
>> Signed-off-by: Kyle Huey 
>> ---
>>  tools/testing/selftests/x86/Makefile  |   2 +-
>>  tools/testing/selftests/x86/cpuid_fault.c | 251 
>> ++
>
> I'm not sure why this commit is not merged to upstream. I test
> 4.14-rc3 w/ this testcase on a haswell client, however I encounter the
> below splat, any idea?

Thanks for pointing out that this never got merged.  That's quite
disappointing, especially after reviewers insisted I write this test.

The failure you're seeing is because the values of ARCH_GET_CPUID and
ARCH_SET_CPUID changed and the values hardcoded in the test are no
longer accurate.  If you set them to the correct values (0x1011 and
0x1012 respectively) the test should pass.

- Kyle

> # ./cpuid_fault_64
> cpuid() == {d, 756e6547, 6c65746e, 49656e69}
> arch_prctl(ARCH_GET_CPUID); ARCH_GET_CPUID is unsupported on this kernel.
>
> Regards,
> Wanpeng Li
>
>>  2 files changed, 252 insertions(+), 1 deletion(-)
>>  create mode 100644 tools/testing/selftests/x86/cpuid_fault.c
>>
>> diff --git a/tools/testing/selftests/x86/Makefile 
>> b/tools/testing/selftests/x86/Makefile
>> index 38e0a9ca5d71..acda4e5fcf25 100644
>> --- a/tools/testing/selftests/x86/Makefile
>> +++ b/tools/testing/selftests/x86/Makefile
>> @@ -6,7 +6,7 @@ include ../lib.mk
>>
>>  TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
>> ptrace_syscall test_mremap_vdso \
>> check_initial_reg_state sigreturn ldt_gdt iopl 
>> mpx-mini-test ioperm \
>> -   protection_keys test_vdso
>> +   protection_keys test_vdso cpuid_fault
>>  TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso 
>> unwind_vdso \
>> test_FCMOV test_FCOMI test_FISTTP \
>> vdso_restorer
>> diff --git a/tools/testing/selftests/x86/cpuid_fault.c 
>> b/tools/testing/selftests/x86/cpuid_fault.c
>> new file mode 100644
>> index ..e3b93c28c655
>> --- /dev/null
>> +++ b/tools/testing/selftests/x86/cpuid_fault.c
>> @@ -0,0 +1,251 @@
>> +
>> +/*
>> + * Tests for arch_prctl(ARCH_GET_CPUID, ...) / arch_prctl(ARCH_SET_CPUID, 
>> ...)
>> + *
>> + * Basic test to test behaviour of ARCH_GET_CPUID and ARCH_SET_CPUID
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include 
>> +#include 
>> +
>> +/*
>> +#define ARCH_GET_CPUID 0x1005
>> +#define ARCH_SET_CPUID 0x1006
>> +#ifdef __x86_64__
>> +#define SYS_arch_prctl 158
>> +#else
>> +#define SYS_arch_prctl 384
>> +#endif
>> +*/
>> +
>> +const char *cpuid_names[] = {
>> +   [0] = "[cpuid disabled]",
>> +   [1] = "[cpuid enabled]",
>> +};
>> +
>> +int arch_prctl(int option, unsigned long arg2)
>> +{
>> +   return syscall(SYS_arch_prctl, option, arg2);
>> +}
>> +
>> +int cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
>> + unsigned int *edx)
>> +{
>> +   return __get_cpuid(0, eax, ebx, ecx, edx);
>> +}
>> +
>> +int do_child_exec_test(int eax, int ebx, int ecx, int edx)
>> +{
>> +   int cpuid_val = 0, child = 0, status = 0;
>> +
>> +   printf("arch_prctl(ARCH_GET_CPUID); ");
>> +
>> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
>> +   if (cpuid_val < 0)
>> +   errx(1, "ARCH_GET_CPUID fails now, but not before?");
>> +
>> +   printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
>> +   if (cpuid_val != 0)
>> +   errx(1, "How did cpuid get re-enabled on fork?");
>> +
>> +   child = fork();
>> +   if (child == 0) {
>> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
>> +   if (cpuid_val < 0)
>> +   errx(1, "ARCH_GET_CPUID fails now, but not before?");
>> +
>> +   printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
>> +   if (cpuid_val != 0)
>> +   errx(1, "How did cpuid get re-enabled on fork?");
>> +
>> +   printf("exec\n");
>> +   execl("/proc/self/exe", "cpuid-fault", "-early-return", 
>> NULL);
>> +   }
>> +
>> +   if (child != waitpid(child, , 0))
>> +   errx(1, "waitpid failed!?");
>> +
>> +   if (WEXITSTATUS(status) != 0)
>> +   errx(1, "Execed child exited abnormally");
>> +
>> +   return 0;
>> +}
>> +
>> +int child_received_signal;
>> +
>> +void child_sigsegv_cb(int sig)
>> +{
>> +   int cpuid_val = 0;
>> +
>> +   child_received_signal = 1;
>> +   printf("[ SIG_SEGV ]\n");
>> +   printf("arch_prctl(ARCH_GET_CPUID); ");
>> +
>> 

Re: [PATCH v16 09/10] x86/arch_prctl: Selftest for ARCH_[GET|SET]_CPUID

2017-10-10 Thread Kyle Huey
On Tue, Oct 10, 2017 at 8:35 PM, Wanpeng Li  wrote:
> Hi Kyle,
> 2017-03-20 16:16 GMT+08:00 Kyle Huey :
>> Test disabling and reenabling the cpuid instruction via the new arch_prctl
>> ARCH_SET_CPUID, retrieving the current state via ARCH_GET_CPUID, and the
>> expected behaviors across fork() and exec().
>>
>> Signed-off-by: Kyle Huey 
>> ---
>>  tools/testing/selftests/x86/Makefile  |   2 +-
>>  tools/testing/selftests/x86/cpuid_fault.c | 251 
>> ++
>
> I'm not sure why this commit is not merged to upstream. I test
> 4.14-rc3 w/ this testcase on a haswell client, however I encounter the
> below splat, any idea?

Thanks for pointing out that this never got merged.  That's quite
disappointing, especially after reviewers insisted I write this test.

The failure you're seeing is because the values of ARCH_GET_CPUID and
ARCH_SET_CPUID changed and the values hardcoded in the test are no
longer accurate.  If you set them to the correct values (0x1011 and
0x1012 respectively) the test should pass.

- Kyle

> # ./cpuid_fault_64
> cpuid() == {d, 756e6547, 6c65746e, 49656e69}
> arch_prctl(ARCH_GET_CPUID); ARCH_GET_CPUID is unsupported on this kernel.
>
> Regards,
> Wanpeng Li
>
>>  2 files changed, 252 insertions(+), 1 deletion(-)
>>  create mode 100644 tools/testing/selftests/x86/cpuid_fault.c
>>
>> diff --git a/tools/testing/selftests/x86/Makefile 
>> b/tools/testing/selftests/x86/Makefile
>> index 38e0a9ca5d71..acda4e5fcf25 100644
>> --- a/tools/testing/selftests/x86/Makefile
>> +++ b/tools/testing/selftests/x86/Makefile
>> @@ -6,7 +6,7 @@ include ../lib.mk
>>
>>  TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
>> ptrace_syscall test_mremap_vdso \
>> check_initial_reg_state sigreturn ldt_gdt iopl 
>> mpx-mini-test ioperm \
>> -   protection_keys test_vdso
>> +   protection_keys test_vdso cpuid_fault
>>  TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso 
>> unwind_vdso \
>> test_FCMOV test_FCOMI test_FISTTP \
>> vdso_restorer
>> diff --git a/tools/testing/selftests/x86/cpuid_fault.c 
>> b/tools/testing/selftests/x86/cpuid_fault.c
>> new file mode 100644
>> index ..e3b93c28c655
>> --- /dev/null
>> +++ b/tools/testing/selftests/x86/cpuid_fault.c
>> @@ -0,0 +1,251 @@
>> +
>> +/*
>> + * Tests for arch_prctl(ARCH_GET_CPUID, ...) / arch_prctl(ARCH_SET_CPUID, 
>> ...)
>> + *
>> + * Basic test to test behaviour of ARCH_GET_CPUID and ARCH_SET_CPUID
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include 
>> +#include 
>> +
>> +/*
>> +#define ARCH_GET_CPUID 0x1005
>> +#define ARCH_SET_CPUID 0x1006
>> +#ifdef __x86_64__
>> +#define SYS_arch_prctl 158
>> +#else
>> +#define SYS_arch_prctl 384
>> +#endif
>> +*/
>> +
>> +const char *cpuid_names[] = {
>> +   [0] = "[cpuid disabled]",
>> +   [1] = "[cpuid enabled]",
>> +};
>> +
>> +int arch_prctl(int option, unsigned long arg2)
>> +{
>> +   return syscall(SYS_arch_prctl, option, arg2);
>> +}
>> +
>> +int cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
>> + unsigned int *edx)
>> +{
>> +   return __get_cpuid(0, eax, ebx, ecx, edx);
>> +}
>> +
>> +int do_child_exec_test(int eax, int ebx, int ecx, int edx)
>> +{
>> +   int cpuid_val = 0, child = 0, status = 0;
>> +
>> +   printf("arch_prctl(ARCH_GET_CPUID); ");
>> +
>> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
>> +   if (cpuid_val < 0)
>> +   errx(1, "ARCH_GET_CPUID fails now, but not before?");
>> +
>> +   printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
>> +   if (cpuid_val != 0)
>> +   errx(1, "How did cpuid get re-enabled on fork?");
>> +
>> +   child = fork();
>> +   if (child == 0) {
>> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
>> +   if (cpuid_val < 0)
>> +   errx(1, "ARCH_GET_CPUID fails now, but not before?");
>> +
>> +   printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
>> +   if (cpuid_val != 0)
>> +   errx(1, "How did cpuid get re-enabled on fork?");
>> +
>> +   printf("exec\n");
>> +   execl("/proc/self/exe", "cpuid-fault", "-early-return", 
>> NULL);
>> +   }
>> +
>> +   if (child != waitpid(child, , 0))
>> +   errx(1, "waitpid failed!?");
>> +
>> +   if (WEXITSTATUS(status) != 0)
>> +   errx(1, "Execed child exited abnormally");
>> +
>> +   return 0;
>> +}
>> +
>> +int child_received_signal;
>> +
>> +void child_sigsegv_cb(int sig)
>> +{
>> +   int cpuid_val = 0;
>> +
>> +   child_received_signal = 1;
>> +   printf("[ SIG_SEGV ]\n");
>> +   printf("arch_prctl(ARCH_GET_CPUID); ");
>> +
>> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
>> + 

Re: [PATCH v2] hwmon: xgene: Support hwmon v2

2017-10-10 Thread Guenter Roeck

On 10/10/2017 05:10 PM, Hoan Tran wrote:

This patch supports xgene-hwmon v2 which uses the non-cachable memory
as the PCC shared memory.

Signed-off-by: Hoan Tran 
---

v2
  - Map PCC shared mem by ioremap() in case hwmon is v2



So I assume you expect me to replace the (already accepted) v1
of this patch with this one ?

Assuming the change is needed, I really have to ask: Has this version
of the patch been tested ?


  drivers/hwmon/xgene-hwmon.c | 52 +++--
  1 file changed, 41 insertions(+), 11 deletions(-)

diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
index 9c0dbb8..52be7cd 100644
--- a/drivers/hwmon/xgene-hwmon.c
+++ b/drivers/hwmon/xgene-hwmon.c
@@ -91,6 +91,11 @@
  #define to_xgene_hwmon_dev(cl)\
container_of(cl, struct xgene_hwmon_dev, mbox_client)
  
+enum xgene_hwmon_version {

+   XGENE_HWMON_V1 = 0,
+   XGENE_HWMON_V2 = 1,
+};
+
  struct slimpro_resp_msg {
u32 msg;
u32 param1;
@@ -99,6 +104,7 @@ struct slimpro_resp_msg {
  
  struct xgene_hwmon_dev {

struct device   *dev;
+   int version;
struct mbox_chan*mbox_chan;
struct mbox_client  mbox_client;
int mbox_idx;
@@ -135,6 +141,15 @@ static u16 xgene_word_tst_and_clr(u16 *addr, u16 mask)
return ret;
  }
  
+static void *xgene_pcc_ioremap(struct xgene_hwmon_dev *ctx,

+  phys_addr_t phys, size_t size)
+{
+   if (ctx->version == XGENE_HWMON_V2)
+   return (void __force *)ioremap(phys, size);
+

Is that typecast really necessary ?


+   return memremap(phys, size, MEMREMAP_WB);
+}
+
  static int xgene_hwmon_pcc_rd(struct xgene_hwmon_dev *ctx, u32 *msg)
  {
struct acpi_pcct_shared_memory *generic_comm_base = ctx->pcc_comm_addr;
@@ -609,6 +624,15 @@ static void xgene_hwmon_tx_done(struct mbox_client *cl, 
void *msg, int ret)
}
  }
  
+#ifdef CONFIG_ACPI

+static const struct acpi_device_id xgene_hwmon_acpi_match[] = {
+   {"APMC0D29", XGENE_HWMON_V1},
+   {"APMC0D8A", XGENE_HWMON_V2},
+   {},
+};
+MODULE_DEVICE_TABLE(acpi, xgene_hwmon_acpi_match);
+#endif
+
  static int xgene_hwmon_probe(struct platform_device *pdev)
  {
struct xgene_hwmon_dev *ctx;
@@ -623,6 +647,20 @@ static int xgene_hwmon_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, ctx);
cl = >mbox_client;
  
+#ifdef CONFIG_ACPI

+   ctx->version = -EINVAL;
+   if (ACPI_COMPANION(>dev)) {
+   const struct acpi_device_id *acpi_id;
+
+   acpi_id = acpi_match_device(xgene_hwmon_acpi_match, >dev);
+   if (acpi_id)
+   ctx->version = (int)acpi_id->driver_data;
+   }
+
+   if (ctx->version < 0)
+   return -ENODEV;


This doesn't make sense. Just return EINVAL if acpi_id is 0 above. There
is no need to assign a negative value to ctx->version.

I also don't see why ctx->version is necessary in the first place. In reality
it is just a parameter to xgene_pcc_ioremap(). Which I think should be inline
and not a separate function.


+#endif


What if ACPI is enabled in the build but the system is running on HW which
does not support ACPI ? Is that guaranteed to never happen ? Why not use
acpi_disabled instead ?


+
spin_lock_init(>kfifo_lock);
mutex_init(>rd_mutex);
  
@@ -690,9 +728,9 @@ static int xgene_hwmon_probe(struct platform_device *pdev)

 */
ctx->comm_base_addr = cppc_ss->base_address;
if (ctx->comm_base_addr) {
-   ctx->pcc_comm_addr = memremap(ctx->comm_base_addr,
-   cppc_ss->length,
-   MEMREMAP_WB);
+   ctx->pcc_comm_addr = xgene_pcc_ioremap(ctx,
+   ctx->comm_base_addr,
+   cppc_ss->length);


Inline, please. The extra function adds more complexity than it is worth.


} else {
dev_err(>dev, "Failed to get PCC comm region\n");
rc = -ENODEV;
@@ -758,14 +796,6 @@ static int xgene_hwmon_remove(struct platform_device *pdev)
return 0;
  }
  
-#ifdef CONFIG_ACPI

-static const struct acpi_device_id xgene_hwmon_acpi_match[] = {
-   {"APMC0D29", 0},
-   {},
-};
-MODULE_DEVICE_TABLE(acpi, xgene_hwmon_acpi_match);
-#endif
-
  static const struct of_device_id xgene_hwmon_of_match[] = {
{.compatible = "apm,xgene-slimpro-hwmon"},
{}





Re: [PATCH v2] hwmon: xgene: Support hwmon v2

2017-10-10 Thread Guenter Roeck

On 10/10/2017 05:10 PM, Hoan Tran wrote:

This patch supports xgene-hwmon v2 which uses the non-cachable memory
as the PCC shared memory.

Signed-off-by: Hoan Tran 
---

v2
  - Map PCC shared mem by ioremap() in case hwmon is v2



So I assume you expect me to replace the (already accepted) v1
of this patch with this one ?

Assuming the change is needed, I really have to ask: Has this version
of the patch been tested ?


  drivers/hwmon/xgene-hwmon.c | 52 +++--
  1 file changed, 41 insertions(+), 11 deletions(-)

diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
index 9c0dbb8..52be7cd 100644
--- a/drivers/hwmon/xgene-hwmon.c
+++ b/drivers/hwmon/xgene-hwmon.c
@@ -91,6 +91,11 @@
  #define to_xgene_hwmon_dev(cl)\
container_of(cl, struct xgene_hwmon_dev, mbox_client)
  
+enum xgene_hwmon_version {

+   XGENE_HWMON_V1 = 0,
+   XGENE_HWMON_V2 = 1,
+};
+
  struct slimpro_resp_msg {
u32 msg;
u32 param1;
@@ -99,6 +104,7 @@ struct slimpro_resp_msg {
  
  struct xgene_hwmon_dev {

struct device   *dev;
+   int version;
struct mbox_chan*mbox_chan;
struct mbox_client  mbox_client;
int mbox_idx;
@@ -135,6 +141,15 @@ static u16 xgene_word_tst_and_clr(u16 *addr, u16 mask)
return ret;
  }
  
+static void *xgene_pcc_ioremap(struct xgene_hwmon_dev *ctx,

+  phys_addr_t phys, size_t size)
+{
+   if (ctx->version == XGENE_HWMON_V2)
+   return (void __force *)ioremap(phys, size);
+

Is that typecast really necessary ?


+   return memremap(phys, size, MEMREMAP_WB);
+}
+
  static int xgene_hwmon_pcc_rd(struct xgene_hwmon_dev *ctx, u32 *msg)
  {
struct acpi_pcct_shared_memory *generic_comm_base = ctx->pcc_comm_addr;
@@ -609,6 +624,15 @@ static void xgene_hwmon_tx_done(struct mbox_client *cl, 
void *msg, int ret)
}
  }
  
+#ifdef CONFIG_ACPI

+static const struct acpi_device_id xgene_hwmon_acpi_match[] = {
+   {"APMC0D29", XGENE_HWMON_V1},
+   {"APMC0D8A", XGENE_HWMON_V2},
+   {},
+};
+MODULE_DEVICE_TABLE(acpi, xgene_hwmon_acpi_match);
+#endif
+
  static int xgene_hwmon_probe(struct platform_device *pdev)
  {
struct xgene_hwmon_dev *ctx;
@@ -623,6 +647,20 @@ static int xgene_hwmon_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, ctx);
cl = >mbox_client;
  
+#ifdef CONFIG_ACPI

+   ctx->version = -EINVAL;
+   if (ACPI_COMPANION(>dev)) {
+   const struct acpi_device_id *acpi_id;
+
+   acpi_id = acpi_match_device(xgene_hwmon_acpi_match, >dev);
+   if (acpi_id)
+   ctx->version = (int)acpi_id->driver_data;
+   }
+
+   if (ctx->version < 0)
+   return -ENODEV;


This doesn't make sense. Just return EINVAL if acpi_id is 0 above. There
is no need to assign a negative value to ctx->version.

I also don't see why ctx->version is necessary in the first place. In reality
it is just a parameter to xgene_pcc_ioremap(). Which I think should be inline
and not a separate function.


+#endif


What if ACPI is enabled in the build but the system is running on HW which
does not support ACPI ? Is that guaranteed to never happen ? Why not use
acpi_disabled instead ?


+
spin_lock_init(>kfifo_lock);
mutex_init(>rd_mutex);
  
@@ -690,9 +728,9 @@ static int xgene_hwmon_probe(struct platform_device *pdev)

 */
ctx->comm_base_addr = cppc_ss->base_address;
if (ctx->comm_base_addr) {
-   ctx->pcc_comm_addr = memremap(ctx->comm_base_addr,
-   cppc_ss->length,
-   MEMREMAP_WB);
+   ctx->pcc_comm_addr = xgene_pcc_ioremap(ctx,
+   ctx->comm_base_addr,
+   cppc_ss->length);


Inline, please. The extra function adds more complexity than it is worth.


} else {
dev_err(>dev, "Failed to get PCC comm region\n");
rc = -ENODEV;
@@ -758,14 +796,6 @@ static int xgene_hwmon_remove(struct platform_device *pdev)
return 0;
  }
  
-#ifdef CONFIG_ACPI

-static const struct acpi_device_id xgene_hwmon_acpi_match[] = {
-   {"APMC0D29", 0},
-   {},
-};
-MODULE_DEVICE_TABLE(acpi, xgene_hwmon_acpi_match);
-#endif
-
  static const struct of_device_id xgene_hwmon_of_match[] = {
{.compatible = "apm,xgene-slimpro-hwmon"},
{}





[PATCH] kbuild: add forward declaration of default target to Makefile.asm-generic

2017-10-10 Thread Masahiro Yamada
$(kbuild-file) and Kbuild.include are included before the default
target "all".

We will add a target into Kbuild.include.  In advance, add a forward
declaration of the default target.

Signed-off-by: Masahiro Yamada 
---

 scripts/Makefile.asm-generic | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic
index a6c8c17..982fb7e 100644
--- a/scripts/Makefile.asm-generic
+++ b/scripts/Makefile.asm-generic
@@ -5,6 +5,9 @@
 # and for each file listed in this file with generic-y creates
 # a small wrapper file in $(obj) (arch/$(SRCARCH)/include/generated/$(src))
 
+PHONY += all
+all:
+
 kbuild-file := $(srctree)/arch/$(SRCARCH)/include/$(src)/Kbuild
 -include $(kbuild-file)
 
-- 
2.7.4



[PATCH] kbuild: add forward declaration of default target to Makefile.asm-generic

2017-10-10 Thread Masahiro Yamada
$(kbuild-file) and Kbuild.include are included before the default
target "all".

We will add a target into Kbuild.include.  In advance, add a forward
declaration of the default target.

Signed-off-by: Masahiro Yamada 
---

 scripts/Makefile.asm-generic | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic
index a6c8c17..982fb7e 100644
--- a/scripts/Makefile.asm-generic
+++ b/scripts/Makefile.asm-generic
@@ -5,6 +5,9 @@
 # and for each file listed in this file with generic-y creates
 # a small wrapper file in $(obj) (arch/$(SRCARCH)/include/generated/$(src))
 
+PHONY += all
+all:
+
 kbuild-file := $(srctree)/arch/$(SRCARCH)/include/$(src)/Kbuild
 -include $(kbuild-file)
 
-- 
2.7.4



Re: [RFC] yamldt v0.5, now a DTS compiler too

2017-10-10 Thread David Gibson
On Tue, Oct 10, 2017 at 06:19:03PM +0300, Pantelis Antoniou wrote:
> Hi David,
> 
> > On Oct 10, 2017, at 04:50 , David Gibson  
> > wrote:
> > 
> > On Mon, Oct 09, 2017 at 06:07:28PM +0300, Pantelis Antoniou wrote:
> >> Hi David,
> >> 
> >>> On Oct 9, 2017, at 03:00 , David Gibson  
> >>> wrote:
> >>> 
> >>> On Sun, Oct 08, 2017 at 04:08:03PM -0700, Frank Rowand wrote:
>  On 10/07/17 03:23, Pantelis Antoniou wrote:
> > Hi Rob,
> > 
> >> On Oct 6, 2017, at 16:55 , Rob Herring  wrote:
> >> 
> >> On Tue, Oct 3, 2017 at 12:39 PM, Pantelis Antoniou
> >>  wrote:
> >>> Hi Rob,
>  
>  < snip >
>  
> >>> eBPF is portable, can be serialized after compiling in the schema file
> >>> and can be executed in the kernel.
> >> 
> >> Executing in the kernel is a non-goal for me.
>  
>  Executing in the kernel is an anti-goal for me.
>  
>  We are trying to reduce the device tree footprint inside the kernel,
>  not increase it.
>  
>  99.99% of the validation should be possible statically, in the compile
>  phase.
>  
>  
> >>> By stripping out all documentation related properties and nodes 
> >>> keeping
> >>> only the compiled filters you can generate a dtb blob that passed to
> >>> kernel can be used for verification of all runtime changes in the
> >>> kernel's live tree. eBPF is enforcing an execution model that is 
> >>> 'safe'
> >>> so we can be sure that no foul play is possible.
>  
>  Run time changes can be assumed correct (short of bugs in the overlay
>  application code), if the base tree is validated, the overlay is 
>  validated,
>  and the interface between the live tree and the overlay is a
>  connector.
> >>> 
> >>> In addition, no amount of schema validation can really protect the
> >>> kernel from a bad DT.  Even if the schemas can 100% verify that the DT
> >>> is "syntactically" correct, which is ambitious, it can't protect
> >>> against a DT which is in the right form, but contains information that
> >>> is simply wrong for the hardware in question.  That can stuff the
> >>> kernel at least as easily as an incorrectly formatted DT.
> >>> 
> >> 
> >> I disagree.
> >> 
> >> There are multiple levels of validation. For now we’re only talking about
> >> binding validation. There can be SoC level validation, board level 
> >> validation,
> >> revision level validation and finally application specific validation.
> >> 
> >> Binding validation is making sure properties/nodes follow the binding 
> >> document.
> >> For instance that for a foo device there’s a mandatory interrupt property.
> >> 
> >> Simplified
> >> 
> >> interrupt = ;
> >> 
> >> Binding validation would ‘catch’ errors like assigning a string or not 
> >> having the
> >> interrupt property available.
> >> 
> >> SoC level validation would list the available interrupt number that a given
> >> SoC would support for that device.
> >> 
> >> For example that interrupt can only take the values 10 or 99 in a given 
> >> SoC.
> >> 
> >> Board level validation would narrow this down even further to a value of 
> >> 10 for
> >> a given board model.
> > 
> >> Similar revision level validation would place further restriction on the 
> >> allowed
> >> configuration.
> >> 
> >> Finally application specific validation could place restriction based on 
> >> the intended
> >> application that piece of hardware is used for. For instance devices that 
> >> should not
> >> exceed a given power budget would have restrictions on the clock frequency 
> >> of the processor
> >> or bus frequencies etc.
> > 
> > This doesn't help.  In order to do this, the validator would need
> > information that's essentially equivalent to the content of DT, at
> > which point there's no point to the DT at all - and you're left with
> > the problem of validating the information that the validator has.
> 
> That would be the case if hardware IP only has a single way to be configured.

Right, and if if there's more than one way, then the validator can't
possibly tell whether the DT has the right one.

DTs must always come from a trusted source, because if they don't,
then you don't need the DT in the first place (you could build your
own).

> The industry standard nowadays is picking reusable IP blocks and integrating 
> them
> together in an SoC. The driver and the binding is common for every platform 
> that uses
> that block, but the allowed configuration varies according to what the 
> hardware
> people uses in a given instance.

> > Fundamentally a validator that's useful *cannot* tell the difference
> > between a correct tree and one which _could_ be correct for some
> > theoretical hardware, but isn't for this particular hardware.
> 
> That’s why there’s reason for a nested hierarchy of bindings IMO.

Nothing about how 

Re: [RFC] yamldt v0.5, now a DTS compiler too

2017-10-10 Thread David Gibson
On Tue, Oct 10, 2017 at 06:19:03PM +0300, Pantelis Antoniou wrote:
> Hi David,
> 
> > On Oct 10, 2017, at 04:50 , David Gibson  
> > wrote:
> > 
> > On Mon, Oct 09, 2017 at 06:07:28PM +0300, Pantelis Antoniou wrote:
> >> Hi David,
> >> 
> >>> On Oct 9, 2017, at 03:00 , David Gibson  
> >>> wrote:
> >>> 
> >>> On Sun, Oct 08, 2017 at 04:08:03PM -0700, Frank Rowand wrote:
>  On 10/07/17 03:23, Pantelis Antoniou wrote:
> > Hi Rob,
> > 
> >> On Oct 6, 2017, at 16:55 , Rob Herring  wrote:
> >> 
> >> On Tue, Oct 3, 2017 at 12:39 PM, Pantelis Antoniou
> >>  wrote:
> >>> Hi Rob,
>  
>  < snip >
>  
> >>> eBPF is portable, can be serialized after compiling in the schema file
> >>> and can be executed in the kernel.
> >> 
> >> Executing in the kernel is a non-goal for me.
>  
>  Executing in the kernel is an anti-goal for me.
>  
>  We are trying to reduce the device tree footprint inside the kernel,
>  not increase it.
>  
>  99.99% of the validation should be possible statically, in the compile
>  phase.
>  
>  
> >>> By stripping out all documentation related properties and nodes 
> >>> keeping
> >>> only the compiled filters you can generate a dtb blob that passed to
> >>> kernel can be used for verification of all runtime changes in the
> >>> kernel's live tree. eBPF is enforcing an execution model that is 
> >>> 'safe'
> >>> so we can be sure that no foul play is possible.
>  
>  Run time changes can be assumed correct (short of bugs in the overlay
>  application code), if the base tree is validated, the overlay is 
>  validated,
>  and the interface between the live tree and the overlay is a
>  connector.
> >>> 
> >>> In addition, no amount of schema validation can really protect the
> >>> kernel from a bad DT.  Even if the schemas can 100% verify that the DT
> >>> is "syntactically" correct, which is ambitious, it can't protect
> >>> against a DT which is in the right form, but contains information that
> >>> is simply wrong for the hardware in question.  That can stuff the
> >>> kernel at least as easily as an incorrectly formatted DT.
> >>> 
> >> 
> >> I disagree.
> >> 
> >> There are multiple levels of validation. For now we’re only talking about
> >> binding validation. There can be SoC level validation, board level 
> >> validation,
> >> revision level validation and finally application specific validation.
> >> 
> >> Binding validation is making sure properties/nodes follow the binding 
> >> document.
> >> For instance that for a foo device there’s a mandatory interrupt property.
> >> 
> >> Simplified
> >> 
> >> interrupt = ;
> >> 
> >> Binding validation would ‘catch’ errors like assigning a string or not 
> >> having the
> >> interrupt property available.
> >> 
> >> SoC level validation would list the available interrupt number that a given
> >> SoC would support for that device.
> >> 
> >> For example that interrupt can only take the values 10 or 99 in a given 
> >> SoC.
> >> 
> >> Board level validation would narrow this down even further to a value of 
> >> 10 for
> >> a given board model.
> > 
> >> Similar revision level validation would place further restriction on the 
> >> allowed
> >> configuration.
> >> 
> >> Finally application specific validation could place restriction based on 
> >> the intended
> >> application that piece of hardware is used for. For instance devices that 
> >> should not
> >> exceed a given power budget would have restrictions on the clock frequency 
> >> of the processor
> >> or bus frequencies etc.
> > 
> > This doesn't help.  In order to do this, the validator would need
> > information that's essentially equivalent to the content of DT, at
> > which point there's no point to the DT at all - and you're left with
> > the problem of validating the information that the validator has.
> 
> That would be the case if hardware IP only has a single way to be configured.

Right, and if if there's more than one way, then the validator can't
possibly tell whether the DT has the right one.

DTs must always come from a trusted source, because if they don't,
then you don't need the DT in the first place (you could build your
own).

> The industry standard nowadays is picking reusable IP blocks and integrating 
> them
> together in an SoC. The driver and the binding is common for every platform 
> that uses
> that block, but the allowed configuration varies according to what the 
> hardware
> people uses in a given instance.

> > Fundamentally a validator that's useful *cannot* tell the difference
> > between a correct tree and one which _could_ be correct for some
> > theoretical hardware, but isn't for this particular hardware.
> 
> That’s why there’s reason for a nested hierarchy of bindings IMO.

Nothing about how you structure the validation can change the basic
fact that there are only two possibilities.  Either:

a) You 

[PATCH] printk: hash addresses printed with %p

2017-10-10 Thread Tobin C. Harding
Currently there are many places in the kernel where addresses are being
printed using an unadorned %p. Kernel pointers should be printed using
%pK allowing some control via the kptr_restrict sysctl. Exposing addresses
gives attackers sensitive information about the kernel layout in memory.

We can reduce the attack surface by hashing all addresses printed with
%p. This will of course break some users, forcing code printing needed
addresses to be updated.

For what it's worth, usage of unadorned %p can be broken down as follows

git grep '%p[^KFfSsBRrbMmIiEUVKNhdDgCGO]' | wc -l

arch: 2512
block: 20
crypto: 12
fs: 1221
include: 147
kernel: 109
lib: 77
mm: 120
net: 1516
security: 11
sound: 168
virt: 2
drivers: 8420

Add function ptr_to_id() to map an address to a unique identifier. This
mapping is created by calling ptr_obfuscate() to hash the address. The
hashing algorithm is carried out in two stages. First the address is
xor'd by a random value then we multiply the xor production by a second
random value.

Signed-off-by: Tobin C. Harding 
---

This is version 2 of the series (of which I sent only the cover letter,
failing to send the actual patches)

[PATCH 0/3] add %pX specifier

Implementing changes as suggested by Linus (in response to the cover
letter). Patch 2 and 3 of the original series dropped.

 include/linux/printk.h | 17 +
 lib/vsprintf.c | 35 +--
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index e10f27468322..60c3d018efcf 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -41,6 +41,23 @@ static inline const char *printk_skip_headers(const char 
*buffer)
return buffer;
 }
 
+/*
+ * Obfuscates pointer (algorithm taken from kptr_obfuscate(). See 
kernel/kcmp.c)
+ * v is the pointer value, randval is some random value, oddval is some random
+ * odd value.
+ *
+ * The obfuscation is done in two steps. First we xor the kernel pointer with
+ * a random value, which puts pointer into a new position in a reordered space.
+ * Secondly we multiply the xor production with a large odd random number to
+ * permute its bits even more (the odd multiplier guarantees that the product
+ * is unique ever after the high bits are truncated, since any odd number is
+ * relative prime to 2^n).
+ */
+static inline long ptr_obfuscate(long v, long randval, long oddval)
+{
+   return (v ^ randval) * oddval;
+}
+
 #define CONSOLE_EXT_LOG_MAX8192
 
 /* printk's without a loglevel use this.. */
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 86c3385b9eb3..399cc090be75 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1591,6 +1591,35 @@ char *device_node_string(char *buf, char *end, struct 
device_node *dn,
return widen_string(buf, buf - buf_start, end, spec);
 }
 
+static long get_random_odd_long(void)
+{
+   long val = 0;
+
+   while((val & 1) == 0) {
+   val = get_random_long();
+   }
+
+   return val;
+}
+
+/* Maps a pointer to a unique identifier. */
+static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec 
spec)
+{
+   long hashval;
+   static long randval = 0;
+   static long oddval = 0;
+
+   if (oddval == 0 && randval == 0) {
+   randval = get_random_long();
+   oddval = get_random_odd_long();
+   }
+
+   hashval = ptr_obfuscate((unsigned long)ptr, randval, oddval);
+   spec.base = 16;
+
+   return number(buf, end, hashval, spec);
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -1703,6 +1732,9 @@ int kptr_restrict __read_mostly;
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
  * pointer to the real address.
+ *
+ * Default behaviour (unadorned %p) is to hash the address, rendering it useful
+ * as a unique identifier.
  */
 static noinline_for_stack
 char *pointer(const char *fmt, char *buf, char *end, void *ptr,
@@ -1858,14 +1890,13 @@ char *pointer(const char *fmt, char *buf, char *end, 
void *ptr,
return device_node_string(buf, end, ptr, spec, fmt + 1);
}
}
-   spec.flags |= SMALL;
if (spec.field_width == -1) {
spec.field_width = default_width;
spec.flags |= ZEROPAD;
}
spec.base = 16;
 
-   return number(buf, end, (unsigned long) ptr, spec);
+   return ptr_to_id(buf, end, ptr, spec);
 }
 
 /*
-- 
2.7.4



[PATCH] printk: hash addresses printed with %p

2017-10-10 Thread Tobin C. Harding
Currently there are many places in the kernel where addresses are being
printed using an unadorned %p. Kernel pointers should be printed using
%pK allowing some control via the kptr_restrict sysctl. Exposing addresses
gives attackers sensitive information about the kernel layout in memory.

We can reduce the attack surface by hashing all addresses printed with
%p. This will of course break some users, forcing code printing needed
addresses to be updated.

For what it's worth, usage of unadorned %p can be broken down as follows

git grep '%p[^KFfSsBRrbMmIiEUVKNhdDgCGO]' | wc -l

arch: 2512
block: 20
crypto: 12
fs: 1221
include: 147
kernel: 109
lib: 77
mm: 120
net: 1516
security: 11
sound: 168
virt: 2
drivers: 8420

Add function ptr_to_id() to map an address to a unique identifier. This
mapping is created by calling ptr_obfuscate() to hash the address. The
hashing algorithm is carried out in two stages. First the address is
xor'd by a random value then we multiply the xor production by a second
random value.

Signed-off-by: Tobin C. Harding 
---

This is version 2 of the series (of which I sent only the cover letter,
failing to send the actual patches)

[PATCH 0/3] add %pX specifier

Implementing changes as suggested by Linus (in response to the cover
letter). Patch 2 and 3 of the original series dropped.

 include/linux/printk.h | 17 +
 lib/vsprintf.c | 35 +--
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index e10f27468322..60c3d018efcf 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -41,6 +41,23 @@ static inline const char *printk_skip_headers(const char 
*buffer)
return buffer;
 }
 
+/*
+ * Obfuscates pointer (algorithm taken from kptr_obfuscate(). See 
kernel/kcmp.c)
+ * v is the pointer value, randval is some random value, oddval is some random
+ * odd value.
+ *
+ * The obfuscation is done in two steps. First we xor the kernel pointer with
+ * a random value, which puts pointer into a new position in a reordered space.
+ * Secondly we multiply the xor production with a large odd random number to
+ * permute its bits even more (the odd multiplier guarantees that the product
+ * is unique ever after the high bits are truncated, since any odd number is
+ * relative prime to 2^n).
+ */
+static inline long ptr_obfuscate(long v, long randval, long oddval)
+{
+   return (v ^ randval) * oddval;
+}
+
 #define CONSOLE_EXT_LOG_MAX8192
 
 /* printk's without a loglevel use this.. */
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 86c3385b9eb3..399cc090be75 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1591,6 +1591,35 @@ char *device_node_string(char *buf, char *end, struct 
device_node *dn,
return widen_string(buf, buf - buf_start, end, spec);
 }
 
+static long get_random_odd_long(void)
+{
+   long val = 0;
+
+   while((val & 1) == 0) {
+   val = get_random_long();
+   }
+
+   return val;
+}
+
+/* Maps a pointer to a unique identifier. */
+static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec 
spec)
+{
+   long hashval;
+   static long randval = 0;
+   static long oddval = 0;
+
+   if (oddval == 0 && randval == 0) {
+   randval = get_random_long();
+   oddval = get_random_odd_long();
+   }
+
+   hashval = ptr_obfuscate((unsigned long)ptr, randval, oddval);
+   spec.base = 16;
+
+   return number(buf, end, hashval, spec);
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -1703,6 +1732,9 @@ int kptr_restrict __read_mostly;
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
  * pointer to the real address.
+ *
+ * Default behaviour (unadorned %p) is to hash the address, rendering it useful
+ * as a unique identifier.
  */
 static noinline_for_stack
 char *pointer(const char *fmt, char *buf, char *end, void *ptr,
@@ -1858,14 +1890,13 @@ char *pointer(const char *fmt, char *buf, char *end, 
void *ptr,
return device_node_string(buf, end, ptr, spec, fmt + 1);
}
}
-   spec.flags |= SMALL;
if (spec.field_width == -1) {
spec.field_width = default_width;
spec.flags |= ZEROPAD;
}
spec.base = 16;
 
-   return number(buf, end, (unsigned long) ptr, spec);
+   return ptr_to_id(buf, end, ptr, spec);
 }
 
 /*
-- 
2.7.4



Re: [PATCH] f2fs/crypto: drop crypto key at evict_inode only

2017-10-10 Thread Chao Yu
On 2017/10/11 1:56, Jaegeuk Kim wrote:
> This patch avoids dropping crypto key in f2fs_drop_inode, so we can guarantee
> it happens only at evict_inode.
> 
> Signed-off-by: Jaegeuk Kim 

Reviewed-by: Chao Yu 

Thanks,

> ---
>  fs/f2fs/super.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index dc05b16df514..f10a1b1380c2 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -717,7 +717,6 @@ static int f2fs_drop_inode(struct inode *inode)
>  
>   sb_end_intwrite(inode->i_sb);
>  
> - fscrypt_put_encryption_info(inode, NULL);
>   spin_lock(>i_lock);
>   atomic_dec(>i_count);
>   }
> 



Re: [PATCH] f2fs/crypto: drop crypto key at evict_inode only

2017-10-10 Thread Chao Yu
On 2017/10/11 1:56, Jaegeuk Kim wrote:
> This patch avoids dropping crypto key in f2fs_drop_inode, so we can guarantee
> it happens only at evict_inode.
> 
> Signed-off-by: Jaegeuk Kim 

Reviewed-by: Chao Yu 

Thanks,

> ---
>  fs/f2fs/super.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index dc05b16df514..f10a1b1380c2 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -717,7 +717,6 @@ static int f2fs_drop_inode(struct inode *inode)
>  
>   sb_end_intwrite(inode->i_sb);
>  
> - fscrypt_put_encryption_info(inode, NULL);
>   spin_lock(>i_lock);
>   atomic_dec(>i_count);
>   }
> 



Re: [PATCH 4.4 00/47] 4.4.92-stable review

2017-10-10 Thread Tom Gall

> On Oct 10, 2017, at 2:50 PM, Greg Kroah-Hartman  
> wrote:
> 
> This is the start of the stable review cycle for the 4.4.92 release.
> There are 47 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Thu Oct 12 19:50:01 UTC 2017.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.92-rc1.gz
> or in the git tree and branch at:
>  git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.4.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 


On HiKey (arm64) when running ltp-sched with this rc we’re seeing some sort 
of scheduler issue or  maybe some kind of memory corruption.

Raw output of interest : 

https://lkft.validation.linaro.org/scheduler/job/46192#L5291 

ltp-sched-tests__url: git://github.com/linux-test-project/ltp.git
ltp-sched-tests__version: “20170929"
kernel-config: 
http://snapshots.linaro.org/openembedded/lkft/morty/hikey/rpb/linaro-hikey-stable-rc-4.4/31/defconfig
build-location: 
http://snapshots.linaro.org/openembedded/lkft/morty/hikey/rpb/linaro-hikey-stable-rc-4.4/31

Re: [PATCH 4.4 00/47] 4.4.92-stable review

2017-10-10 Thread Tom Gall

> On Oct 10, 2017, at 2:50 PM, Greg Kroah-Hartman  
> wrote:
> 
> This is the start of the stable review cycle for the 4.4.92 release.
> There are 47 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Thu Oct 12 19:50:01 UTC 2017.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.92-rc1.gz
> or in the git tree and branch at:
>  git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.4.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 


On HiKey (arm64) when running ltp-sched with this rc we’re seeing some sort 
of scheduler issue or  maybe some kind of memory corruption.

Raw output of interest : 

https://lkft.validation.linaro.org/scheduler/job/46192#L5291 

ltp-sched-tests__url: git://github.com/linux-test-project/ltp.git
ltp-sched-tests__version: “20170929"
kernel-config: 
http://snapshots.linaro.org/openembedded/lkft/morty/hikey/rpb/linaro-hikey-stable-rc-4.4/31/defconfig
build-location: 
http://snapshots.linaro.org/openembedded/lkft/morty/hikey/rpb/linaro-hikey-stable-rc-4.4/31

[PATCH] checkpatch: statement should end at a close brace at the outermost level.

2017-10-10 Thread Jiang Biao
Statement should end at a close brace at the outermost level in
ctx_statement_block.

The way to reproduce the bug,
1, Add two external function declarations into line 505 of
kernel/stop_machine.c, such as,
int foo1(void);
int foo2(void);
2, Format a patch for that, and use the checkpatch.pl to check.
3, The first declaration(foo1()) would not be warned, because the
statement does not end at the '}' before it.

Signed-off-by: Jiang Biao 
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index dd2c262..f220cfc 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1330,7 +1330,7 @@ sub ctx_statement_block {
 
# Statement ends at the ';' or a close '}' at the
# outermost level.
-   if ($level == 0 && $c eq ';') {
+   if ($level == 0 && ($c eq ';' || $c eq '}')) {
last;
}
 
-- 
2.7.4



[PATCH] checkpatch: statement should end at a close brace at the outermost level.

2017-10-10 Thread Jiang Biao
Statement should end at a close brace at the outermost level in
ctx_statement_block.

The way to reproduce the bug,
1, Add two external function declarations into line 505 of
kernel/stop_machine.c, such as,
int foo1(void);
int foo2(void);
2, Format a patch for that, and use the checkpatch.pl to check.
3, The first declaration(foo1()) would not be warned, because the
statement does not end at the '}' before it.

Signed-off-by: Jiang Biao 
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index dd2c262..f220cfc 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1330,7 +1330,7 @@ sub ctx_statement_block {
 
# Statement ends at the ';' or a close '}' at the
# outermost level.
-   if ($level == 0 && $c eq ';') {
+   if ($level == 0 && ($c eq ';' || $c eq '}')) {
last;
}
 
-- 
2.7.4



Re: [PATCH 2/3] watchdog: orion: don't enable rstout if an interrupt is configured

2017-10-10 Thread Guenter Roeck

On 10/10/2017 07:29 PM, Chris Packham wrote:

The orion_wdt_irq invokes panic() so we are going to reset the CPU
regardless.  By not setting this bit we get a chance to gather debug
from the panic output before the system is reset.

Signed-off-by: Chris Packham 


Unless I am missing something, this assumes that the interrupt is
handled, ie that the system is not stuck with interrupts disabled.
This makes the watchdog less reliable. This added verbosity comes
at a significant cost. I'd like to get input from others if this
is acceptable.

That would be different if there was a means to configure a pretimeout,
ie a means to tell the system to generate an irq first, followed by a
hard reset if the interrupt is not served. that does not seem to be
the case here, though.

Guenter


---
  drivers/watchdog/orion_wdt.c | 25 +
  1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
index ea676d233e1e..ce88f339ef7f 100644
--- a/drivers/watchdog/orion_wdt.c
+++ b/drivers/watchdog/orion_wdt.c
@@ -71,6 +71,7 @@ struct orion_watchdog {
unsigned long clk_rate;
struct clk *clk;
const struct orion_watchdog_data *data;
+   int irq;
  };
  
  static int orion_wdt_clock_init(struct platform_device *pdev,

@@ -203,9 +204,11 @@ static int armada375_start(struct watchdog_device *wdt_dev)
dev->data->wdt_enable_bit);
  
  	/* Enable reset on watchdog */

-   reg = readl(dev->rstout);
-   reg |= dev->data->rstout_enable_bit;
-   writel(reg, dev->rstout);
+   if (!dev->irq) {
+   reg = readl(dev->rstout);
+   reg |= dev->data->rstout_enable_bit;
+   writel(reg, dev->rstout);
+   }
  
  	atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit, 0);

return 0;
@@ -228,9 +231,12 @@ static int armada370_start(struct watchdog_device *wdt_dev)
dev->data->wdt_enable_bit);
  
  	/* Enable reset on watchdog */

-   reg = readl(dev->rstout);
-   reg |= dev->data->rstout_enable_bit;
-   writel(reg, dev->rstout);
+   if (!dev->irq) {
+   reg = readl(dev->rstout);
+   reg |= dev->data->rstout_enable_bit;
+   writel(reg, dev->rstout);
+   }
+
return 0;
  }
  
@@ -247,8 +253,9 @@ static int orion_start(struct watchdog_device *wdt_dev)

dev->data->wdt_enable_bit);
  
  	/* Enable reset on watchdog */

-   atomic_io_modify(dev->rstout, dev->data->rstout_enable_bit,
- dev->data->rstout_enable_bit);
+   if (!dev->irq)
+   atomic_io_modify(dev->rstout, dev->data->rstout_enable_bit,
+ dev->data->rstout_enable_bit);
  
  	return 0;

  }
@@ -595,6 +602,8 @@ static int orion_wdt_probe(struct platform_device *pdev)
dev_err(>dev, "failed to request IRQ\n");
goto disable_clk;
}
+
+   dev->irq = irq;
}
  
  	watchdog_set_nowayout(>wdt, nowayout);






Re: [PATCH 2/3] watchdog: orion: don't enable rstout if an interrupt is configured

2017-10-10 Thread Guenter Roeck

On 10/10/2017 07:29 PM, Chris Packham wrote:

The orion_wdt_irq invokes panic() so we are going to reset the CPU
regardless.  By not setting this bit we get a chance to gather debug
from the panic output before the system is reset.

Signed-off-by: Chris Packham 


Unless I am missing something, this assumes that the interrupt is
handled, ie that the system is not stuck with interrupts disabled.
This makes the watchdog less reliable. This added verbosity comes
at a significant cost. I'd like to get input from others if this
is acceptable.

That would be different if there was a means to configure a pretimeout,
ie a means to tell the system to generate an irq first, followed by a
hard reset if the interrupt is not served. that does not seem to be
the case here, though.

Guenter


---
  drivers/watchdog/orion_wdt.c | 25 +
  1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
index ea676d233e1e..ce88f339ef7f 100644
--- a/drivers/watchdog/orion_wdt.c
+++ b/drivers/watchdog/orion_wdt.c
@@ -71,6 +71,7 @@ struct orion_watchdog {
unsigned long clk_rate;
struct clk *clk;
const struct orion_watchdog_data *data;
+   int irq;
  };
  
  static int orion_wdt_clock_init(struct platform_device *pdev,

@@ -203,9 +204,11 @@ static int armada375_start(struct watchdog_device *wdt_dev)
dev->data->wdt_enable_bit);
  
  	/* Enable reset on watchdog */

-   reg = readl(dev->rstout);
-   reg |= dev->data->rstout_enable_bit;
-   writel(reg, dev->rstout);
+   if (!dev->irq) {
+   reg = readl(dev->rstout);
+   reg |= dev->data->rstout_enable_bit;
+   writel(reg, dev->rstout);
+   }
  
  	atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit, 0);

return 0;
@@ -228,9 +231,12 @@ static int armada370_start(struct watchdog_device *wdt_dev)
dev->data->wdt_enable_bit);
  
  	/* Enable reset on watchdog */

-   reg = readl(dev->rstout);
-   reg |= dev->data->rstout_enable_bit;
-   writel(reg, dev->rstout);
+   if (!dev->irq) {
+   reg = readl(dev->rstout);
+   reg |= dev->data->rstout_enable_bit;
+   writel(reg, dev->rstout);
+   }
+
return 0;
  }
  
@@ -247,8 +253,9 @@ static int orion_start(struct watchdog_device *wdt_dev)

dev->data->wdt_enable_bit);
  
  	/* Enable reset on watchdog */

-   atomic_io_modify(dev->rstout, dev->data->rstout_enable_bit,
- dev->data->rstout_enable_bit);
+   if (!dev->irq)
+   atomic_io_modify(dev->rstout, dev->data->rstout_enable_bit,
+ dev->data->rstout_enable_bit);
  
  	return 0;

  }
@@ -595,6 +602,8 @@ static int orion_wdt_probe(struct platform_device *pdev)
dev_err(>dev, "failed to request IRQ\n");
goto disable_clk;
}
+
+   dev->irq = irq;
}
  
  	watchdog_set_nowayout(>wdt, nowayout);






Re: [PATCH 1/3] watchdog: orion: fix typo

2017-10-10 Thread Guenter Roeck

On 10/10/2017 07:29 PM, Chris Packham wrote:

Correct typo in comment "insterted" -> "inserted".

Signed-off-by: Chris Packham 


Reviewed-by: Guenter Roeck 


---
  drivers/watchdog/orion_wdt.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
index 83af7d6cc37c..ea676d233e1e 100644
--- a/drivers/watchdog/orion_wdt.c
+++ b/drivers/watchdog/orion_wdt.c
@@ -576,7 +576,7 @@ static int orion_wdt_probe(struct platform_device *pdev)
/*
 * Let's make sure the watchdog is fully stopped, unless it's
 * explicitly enabled. This may be the case if the module was
-* removed and re-insterted, or if the bootloader explicitly
+* removed and re-inserted, or if the bootloader explicitly
 * set a running watchdog before booting the kernel.
 */
if (!orion_wdt_enabled(>wdt))





Re: [PATCH 1/3] watchdog: orion: fix typo

2017-10-10 Thread Guenter Roeck

On 10/10/2017 07:29 PM, Chris Packham wrote:

Correct typo in comment "insterted" -> "inserted".

Signed-off-by: Chris Packham 


Reviewed-by: Guenter Roeck 


---
  drivers/watchdog/orion_wdt.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
index 83af7d6cc37c..ea676d233e1e 100644
--- a/drivers/watchdog/orion_wdt.c
+++ b/drivers/watchdog/orion_wdt.c
@@ -576,7 +576,7 @@ static int orion_wdt_probe(struct platform_device *pdev)
/*
 * Let's make sure the watchdog is fully stopped, unless it's
 * explicitly enabled. This may be the case if the module was
-* removed and re-insterted, or if the bootloader explicitly
+* removed and re-inserted, or if the bootloader explicitly
 * set a running watchdog before booting the kernel.
 */
if (!orion_wdt_enabled(>wdt))





Re: [PATCH v16 09/10] x86/arch_prctl: Selftest for ARCH_[GET|SET]_CPUID

2017-10-10 Thread Wanpeng Li
Hi Kyle,
2017-03-20 16:16 GMT+08:00 Kyle Huey :
> Test disabling and reenabling the cpuid instruction via the new arch_prctl
> ARCH_SET_CPUID, retrieving the current state via ARCH_GET_CPUID, and the
> expected behaviors across fork() and exec().
>
> Signed-off-by: Kyle Huey 
> ---
>  tools/testing/selftests/x86/Makefile  |   2 +-
>  tools/testing/selftests/x86/cpuid_fault.c | 251 
> ++

I'm not sure why this commit is not merged to upstream. I test
4.14-rc3 w/ this testcase on a haswell client, however I encounter the
below splat, any idea?

# ./cpuid_fault_64
cpuid() == {d, 756e6547, 6c65746e, 49656e69}
arch_prctl(ARCH_GET_CPUID); ARCH_GET_CPUID is unsupported on this kernel.

Regards,
Wanpeng Li

>  2 files changed, 252 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/x86/cpuid_fault.c
>
> diff --git a/tools/testing/selftests/x86/Makefile 
> b/tools/testing/selftests/x86/Makefile
> index 38e0a9ca5d71..acda4e5fcf25 100644
> --- a/tools/testing/selftests/x86/Makefile
> +++ b/tools/testing/selftests/x86/Makefile
> @@ -6,7 +6,7 @@ include ../lib.mk
>
>  TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
> ptrace_syscall test_mremap_vdso \
> check_initial_reg_state sigreturn ldt_gdt iopl 
> mpx-mini-test ioperm \
> -   protection_keys test_vdso
> +   protection_keys test_vdso cpuid_fault
>  TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso 
> unwind_vdso \
> test_FCMOV test_FCOMI test_FISTTP \
> vdso_restorer
> diff --git a/tools/testing/selftests/x86/cpuid_fault.c 
> b/tools/testing/selftests/x86/cpuid_fault.c
> new file mode 100644
> index ..e3b93c28c655
> --- /dev/null
> +++ b/tools/testing/selftests/x86/cpuid_fault.c
> @@ -0,0 +1,251 @@
> +
> +/*
> + * Tests for arch_prctl(ARCH_GET_CPUID, ...) / arch_prctl(ARCH_SET_CPUID, 
> ...)
> + *
> + * Basic test to test behaviour of ARCH_GET_CPUID and ARCH_SET_CPUID
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +
> +/*
> +#define ARCH_GET_CPUID 0x1005
> +#define ARCH_SET_CPUID 0x1006
> +#ifdef __x86_64__
> +#define SYS_arch_prctl 158
> +#else
> +#define SYS_arch_prctl 384
> +#endif
> +*/
> +
> +const char *cpuid_names[] = {
> +   [0] = "[cpuid disabled]",
> +   [1] = "[cpuid enabled]",
> +};
> +
> +int arch_prctl(int option, unsigned long arg2)
> +{
> +   return syscall(SYS_arch_prctl, option, arg2);
> +}
> +
> +int cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
> + unsigned int *edx)
> +{
> +   return __get_cpuid(0, eax, ebx, ecx, edx);
> +}
> +
> +int do_child_exec_test(int eax, int ebx, int ecx, int edx)
> +{
> +   int cpuid_val = 0, child = 0, status = 0;
> +
> +   printf("arch_prctl(ARCH_GET_CPUID); ");
> +
> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
> +   if (cpuid_val < 0)
> +   errx(1, "ARCH_GET_CPUID fails now, but not before?");
> +
> +   printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
> +   if (cpuid_val != 0)
> +   errx(1, "How did cpuid get re-enabled on fork?");
> +
> +   child = fork();
> +   if (child == 0) {
> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
> +   if (cpuid_val < 0)
> +   errx(1, "ARCH_GET_CPUID fails now, but not before?");
> +
> +   printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
> +   if (cpuid_val != 0)
> +   errx(1, "How did cpuid get re-enabled on fork?");
> +
> +   printf("exec\n");
> +   execl("/proc/self/exe", "cpuid-fault", "-early-return", NULL);
> +   }
> +
> +   if (child != waitpid(child, , 0))
> +   errx(1, "waitpid failed!?");
> +
> +   if (WEXITSTATUS(status) != 0)
> +   errx(1, "Execed child exited abnormally");
> +
> +   return 0;
> +}
> +
> +int child_received_signal;
> +
> +void child_sigsegv_cb(int sig)
> +{
> +   int cpuid_val = 0;
> +
> +   child_received_signal = 1;
> +   printf("[ SIG_SEGV ]\n");
> +   printf("arch_prctl(ARCH_GET_CPUID); ");
> +
> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
> +   if (cpuid_val < 0)
> +   errx(1, "ARCH_GET_CPUID fails now, but not before?");
> +
> +   printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
> +   printf("arch_prctl(ARCH_SET_CPUID, 1)\n");
> +   if (arch_prctl(ARCH_SET_CPUID, 1) != 0)
> +   exit(errno);
> +
> +   printf("cpuid() == ");
> +}
> +
> +int do_child_test(void)
> +{
> +   unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
> +
> +   signal(SIGSEGV, child_sigsegv_cb);
> +
> +   /* the child starts out with cpuid disabled, the signal handler
> + 

Re: [PATCH v16 09/10] x86/arch_prctl: Selftest for ARCH_[GET|SET]_CPUID

2017-10-10 Thread Wanpeng Li
Hi Kyle,
2017-03-20 16:16 GMT+08:00 Kyle Huey :
> Test disabling and reenabling the cpuid instruction via the new arch_prctl
> ARCH_SET_CPUID, retrieving the current state via ARCH_GET_CPUID, and the
> expected behaviors across fork() and exec().
>
> Signed-off-by: Kyle Huey 
> ---
>  tools/testing/selftests/x86/Makefile  |   2 +-
>  tools/testing/selftests/x86/cpuid_fault.c | 251 
> ++

I'm not sure why this commit is not merged to upstream. I test
4.14-rc3 w/ this testcase on a haswell client, however I encounter the
below splat, any idea?

# ./cpuid_fault_64
cpuid() == {d, 756e6547, 6c65746e, 49656e69}
arch_prctl(ARCH_GET_CPUID); ARCH_GET_CPUID is unsupported on this kernel.

Regards,
Wanpeng Li

>  2 files changed, 252 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/x86/cpuid_fault.c
>
> diff --git a/tools/testing/selftests/x86/Makefile 
> b/tools/testing/selftests/x86/Makefile
> index 38e0a9ca5d71..acda4e5fcf25 100644
> --- a/tools/testing/selftests/x86/Makefile
> +++ b/tools/testing/selftests/x86/Makefile
> @@ -6,7 +6,7 @@ include ../lib.mk
>
>  TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
> ptrace_syscall test_mremap_vdso \
> check_initial_reg_state sigreturn ldt_gdt iopl 
> mpx-mini-test ioperm \
> -   protection_keys test_vdso
> +   protection_keys test_vdso cpuid_fault
>  TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso 
> unwind_vdso \
> test_FCMOV test_FCOMI test_FISTTP \
> vdso_restorer
> diff --git a/tools/testing/selftests/x86/cpuid_fault.c 
> b/tools/testing/selftests/x86/cpuid_fault.c
> new file mode 100644
> index ..e3b93c28c655
> --- /dev/null
> +++ b/tools/testing/selftests/x86/cpuid_fault.c
> @@ -0,0 +1,251 @@
> +
> +/*
> + * Tests for arch_prctl(ARCH_GET_CPUID, ...) / arch_prctl(ARCH_SET_CPUID, 
> ...)
> + *
> + * Basic test to test behaviour of ARCH_GET_CPUID and ARCH_SET_CPUID
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +
> +/*
> +#define ARCH_GET_CPUID 0x1005
> +#define ARCH_SET_CPUID 0x1006
> +#ifdef __x86_64__
> +#define SYS_arch_prctl 158
> +#else
> +#define SYS_arch_prctl 384
> +#endif
> +*/
> +
> +const char *cpuid_names[] = {
> +   [0] = "[cpuid disabled]",
> +   [1] = "[cpuid enabled]",
> +};
> +
> +int arch_prctl(int option, unsigned long arg2)
> +{
> +   return syscall(SYS_arch_prctl, option, arg2);
> +}
> +
> +int cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
> + unsigned int *edx)
> +{
> +   return __get_cpuid(0, eax, ebx, ecx, edx);
> +}
> +
> +int do_child_exec_test(int eax, int ebx, int ecx, int edx)
> +{
> +   int cpuid_val = 0, child = 0, status = 0;
> +
> +   printf("arch_prctl(ARCH_GET_CPUID); ");
> +
> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
> +   if (cpuid_val < 0)
> +   errx(1, "ARCH_GET_CPUID fails now, but not before?");
> +
> +   printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
> +   if (cpuid_val != 0)
> +   errx(1, "How did cpuid get re-enabled on fork?");
> +
> +   child = fork();
> +   if (child == 0) {
> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
> +   if (cpuid_val < 0)
> +   errx(1, "ARCH_GET_CPUID fails now, but not before?");
> +
> +   printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
> +   if (cpuid_val != 0)
> +   errx(1, "How did cpuid get re-enabled on fork?");
> +
> +   printf("exec\n");
> +   execl("/proc/self/exe", "cpuid-fault", "-early-return", NULL);
> +   }
> +
> +   if (child != waitpid(child, , 0))
> +   errx(1, "waitpid failed!?");
> +
> +   if (WEXITSTATUS(status) != 0)
> +   errx(1, "Execed child exited abnormally");
> +
> +   return 0;
> +}
> +
> +int child_received_signal;
> +
> +void child_sigsegv_cb(int sig)
> +{
> +   int cpuid_val = 0;
> +
> +   child_received_signal = 1;
> +   printf("[ SIG_SEGV ]\n");
> +   printf("arch_prctl(ARCH_GET_CPUID); ");
> +
> +   cpuid_val = arch_prctl(ARCH_GET_CPUID, 0);
> +   if (cpuid_val < 0)
> +   errx(1, "ARCH_GET_CPUID fails now, but not before?");
> +
> +   printf("cpuid_val == %s\n", cpuid_names[cpuid_val]);
> +   printf("arch_prctl(ARCH_SET_CPUID, 1)\n");
> +   if (arch_prctl(ARCH_SET_CPUID, 1) != 0)
> +   exit(errno);
> +
> +   printf("cpuid() == ");
> +}
> +
> +int do_child_test(void)
> +{
> +   unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
> +
> +   signal(SIGSEGV, child_sigsegv_cb);
> +
> +   /* the child starts out with cpuid disabled, the signal handler
> +* attempts to enable and retry
> +  

RE: [RFC v5 6/8] platform/x86: intel_punit_ipc: Use generic intel ipc device calls

2017-10-10 Thread Chakravarty, Souvik K


> -Original Message-
> From: platform-driver-x86-ow...@vger.kernel.org [mailto:platform-driver-
> x86-ow...@vger.kernel.org] On Behalf Of sathyanarayanan kuppuswamy
> Sent: Wednesday, October 11, 2017 3:59 AM
> To: Chakravarty, Souvik K ;
> a.zu...@towertech.it; x...@kernel.org; w...@iguana.be;
> mi...@redhat.com; alexandre.bell...@free-electrons.com; Zha, Qipeng
> ; h...@zytor.com; dvh...@infradead.org;
> t...@linutronix.de; lee.jo...@linaro.org; a...@infradead.org
> Cc: linux-...@vger.kernel.org; linux-watch...@vger.kernel.org; linux-
> ker...@vger.kernel.org; platform-driver-...@vger.kernel.org;
> sathyao...@gmail.com
> Subject: Re: [RFC v5 6/8] platform/x86: intel_punit_ipc: Use generic intel ipc
> device calls
> 
> 
> 
> On 10/08/2017 10:07 PM, Chakravarty, Souvik K wrote:
> >> From: sathyanarayanan.kuppusw...@linux.intel.com
> >> [mailto:sathyanarayanan.kuppusw...@linux.intel.com]
> >> Sent: Sunday, October 8, 2017 3:50 AM
> >> To: a.zu...@towertech.it; x...@kernel.org; w...@iguana.be;
> >> mi...@redhat.com; alexandre.bell...@free-electrons.com; Zha, Qipeng
> >> ; h...@zytor.com; dvh...@infradead.org;
> >> t...@linutronix.de; lee.jo...@linaro.org; a...@infradead.org;
> >> Chakravarty, Souvik K 
> >> Cc: linux-...@vger.kernel.org; linux-watch...@vger.kernel.org; linux-
> >> ker...@vger.kernel.org; platform-driver-...@vger.kernel.org;
> >> sathyao...@gmail.com; Kuppuswamy Sathyanarayanan
> >> 
> >> Subject: [RFC v5 6/8] platform/x86: intel_punit_ipc: Use generic
> >> intel ipc device calls
> >>
> >> From: Kuppuswamy Sathyanarayanan
> >> 
> >>
> >> Removed redundant IPC helper functions and refactored the driver to
> >> use APIs provided by generic IPC driver. This patch also cleans-up
> >> PUNIT IPC user
> >> drivers(intel_telemetry_pltdrv.c) to use APIs provided by generic IPC
> driver.
> >>
> >> Signed-off-by: Kuppuswamy Sathyanarayanan
> >> 
> >> ---
> >>   arch/x86/include/asm/intel_punit_ipc.h| 125 +--
> >>   drivers/platform/x86/Kconfig  |   1 +
> >>   drivers/platform/x86/intel_punit_ipc.c| 303 
> >> ++
> >>   drivers/platform/x86/intel_telemetry_pltdrv.c |  97 +
> >>   4 files changed, 223 insertions(+), 303 deletions(-)
> >>
> >> Changes since v4:
> >>   * None
> >>
> >> Changes since v2:
> >>   * Added unique name to PUNIT BIOS, GTD, & ISP regmaps.
> >>   * Added intel_ipc_dev_put() support.
> >>
> >> Changes since v1:
> >>   * Removed custom APIs.
> >>   * Cleaned up PUNIT IPC user drivers to use APIs provided by generic
> >> IPC driver.
> >>
> >> diff --git a/arch/x86/include/asm/intel_punit_ipc.h
> >> b/arch/x86/include/asm/intel_punit_ipc.h
> >> index 201eb9d..cf1630c 100644
> >> --- a/arch/x86/include/asm/intel_punit_ipc.h
> >> +++ b/arch/x86/include/asm/intel_punit_ipc.h
> >> @@ -1,10 +1,8 @@
> >>   #ifndef _ASM_X86_INTEL_PUNIT_IPC_H_
> >>   #define  _ASM_X86_INTEL_PUNIT_IPC_H_
> >>
> >> -/*
> >> - * Three types of 8bit P-Unit IPC commands are supported,
> >> - * bit[7:6]: [00]: BIOS; [01]: GTD; [10]: ISPD.
> >> - */
> >> +#include 
> >> +
> >>   typedef enum {
> >>BIOS_IPC = 0,
> >>GTDRIVER_IPC,
> >> @@ -12,61 +10,60 @@ typedef enum {
> >>RESERVED_IPC,
> >>   } IPC_TYPE;
> >>
> >> -#define IPC_TYPE_OFFSET   6
> >> -#define IPC_PUNIT_BIOS_CMD_BASE   (BIOS_IPC <<
> >> IPC_TYPE_OFFSET)
> >> -#define IPC_PUNIT_GTD_CMD_BASE(GTDDRIVER_IPC <<
> >> IPC_TYPE_OFFSET)
> >> -#define IPC_PUNIT_ISPD_CMD_BASE   (ISPDRIVER_IPC <<
> >> IPC_TYPE_OFFSET)
> >> -#define IPC_PUNIT_CMD_TYPE_MASK   (RESERVED_IPC <<
> >> IPC_TYPE_OFFSET)
> >> +#define PUNIT_BIOS_IPC_DEV"punit_bios_ipc"
> >> +#define PUNIT_GTD_IPC_DEV "punit_gtd_ipc"
> >> +#define PUNIT_ISP_IPC_DEV "punit_isp_ipc"
> >> +#define PUNIT_PARAM_LEN   3
> >>
> >>   /* BIOS => Pcode commands */
> >> -#define IPC_PUNIT_BIOS_ZERO
>   (IPC_PUNIT_BIOS_CMD_BASE
> >> | 0x00)
> >> -#define IPC_PUNIT_BIOS_VR_INTERFACE
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x01)
> >> -#define IPC_PUNIT_BIOS_READ_PCS
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x02)
> >> -#define IPC_PUNIT_BIOS_WRITE_PCS
>   (IPC_PUNIT_BIOS_CMD_BASE
> >> | 0x03)
> >> -#define IPC_PUNIT_BIOS_READ_PCU_CONFIG
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x04)
> >> -#define IPC_PUNIT_BIOS_WRITE_PCU_CONFIG
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x05)
> >> -#define IPC_PUNIT_BIOS_READ_PL1_SETTING
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x06)
> >> -#define IPC_PUNIT_BIOS_WRITE_PL1_SETTING
>   (IPC_PUNIT_BIOS_CMD_BASE
> >> | 0x07)
> >> -#define IPC_PUNIT_BIOS_TRIGGER_VDD_RAM
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x08)
> >> -#define IPC_PUNIT_BIOS_READ_TELE_INFO

RE: [RFC v5 6/8] platform/x86: intel_punit_ipc: Use generic intel ipc device calls

2017-10-10 Thread Chakravarty, Souvik K


> -Original Message-
> From: platform-driver-x86-ow...@vger.kernel.org [mailto:platform-driver-
> x86-ow...@vger.kernel.org] On Behalf Of sathyanarayanan kuppuswamy
> Sent: Wednesday, October 11, 2017 3:59 AM
> To: Chakravarty, Souvik K ;
> a.zu...@towertech.it; x...@kernel.org; w...@iguana.be;
> mi...@redhat.com; alexandre.bell...@free-electrons.com; Zha, Qipeng
> ; h...@zytor.com; dvh...@infradead.org;
> t...@linutronix.de; lee.jo...@linaro.org; a...@infradead.org
> Cc: linux-...@vger.kernel.org; linux-watch...@vger.kernel.org; linux-
> ker...@vger.kernel.org; platform-driver-...@vger.kernel.org;
> sathyao...@gmail.com
> Subject: Re: [RFC v5 6/8] platform/x86: intel_punit_ipc: Use generic intel ipc
> device calls
> 
> 
> 
> On 10/08/2017 10:07 PM, Chakravarty, Souvik K wrote:
> >> From: sathyanarayanan.kuppusw...@linux.intel.com
> >> [mailto:sathyanarayanan.kuppusw...@linux.intel.com]
> >> Sent: Sunday, October 8, 2017 3:50 AM
> >> To: a.zu...@towertech.it; x...@kernel.org; w...@iguana.be;
> >> mi...@redhat.com; alexandre.bell...@free-electrons.com; Zha, Qipeng
> >> ; h...@zytor.com; dvh...@infradead.org;
> >> t...@linutronix.de; lee.jo...@linaro.org; a...@infradead.org;
> >> Chakravarty, Souvik K 
> >> Cc: linux-...@vger.kernel.org; linux-watch...@vger.kernel.org; linux-
> >> ker...@vger.kernel.org; platform-driver-...@vger.kernel.org;
> >> sathyao...@gmail.com; Kuppuswamy Sathyanarayanan
> >> 
> >> Subject: [RFC v5 6/8] platform/x86: intel_punit_ipc: Use generic
> >> intel ipc device calls
> >>
> >> From: Kuppuswamy Sathyanarayanan
> >> 
> >>
> >> Removed redundant IPC helper functions and refactored the driver to
> >> use APIs provided by generic IPC driver. This patch also cleans-up
> >> PUNIT IPC user
> >> drivers(intel_telemetry_pltdrv.c) to use APIs provided by generic IPC
> driver.
> >>
> >> Signed-off-by: Kuppuswamy Sathyanarayanan
> >> 
> >> ---
> >>   arch/x86/include/asm/intel_punit_ipc.h| 125 +--
> >>   drivers/platform/x86/Kconfig  |   1 +
> >>   drivers/platform/x86/intel_punit_ipc.c| 303 
> >> ++
> >>   drivers/platform/x86/intel_telemetry_pltdrv.c |  97 +
> >>   4 files changed, 223 insertions(+), 303 deletions(-)
> >>
> >> Changes since v4:
> >>   * None
> >>
> >> Changes since v2:
> >>   * Added unique name to PUNIT BIOS, GTD, & ISP regmaps.
> >>   * Added intel_ipc_dev_put() support.
> >>
> >> Changes since v1:
> >>   * Removed custom APIs.
> >>   * Cleaned up PUNIT IPC user drivers to use APIs provided by generic
> >> IPC driver.
> >>
> >> diff --git a/arch/x86/include/asm/intel_punit_ipc.h
> >> b/arch/x86/include/asm/intel_punit_ipc.h
> >> index 201eb9d..cf1630c 100644
> >> --- a/arch/x86/include/asm/intel_punit_ipc.h
> >> +++ b/arch/x86/include/asm/intel_punit_ipc.h
> >> @@ -1,10 +1,8 @@
> >>   #ifndef _ASM_X86_INTEL_PUNIT_IPC_H_
> >>   #define  _ASM_X86_INTEL_PUNIT_IPC_H_
> >>
> >> -/*
> >> - * Three types of 8bit P-Unit IPC commands are supported,
> >> - * bit[7:6]: [00]: BIOS; [01]: GTD; [10]: ISPD.
> >> - */
> >> +#include 
> >> +
> >>   typedef enum {
> >>BIOS_IPC = 0,
> >>GTDRIVER_IPC,
> >> @@ -12,61 +10,60 @@ typedef enum {
> >>RESERVED_IPC,
> >>   } IPC_TYPE;
> >>
> >> -#define IPC_TYPE_OFFSET   6
> >> -#define IPC_PUNIT_BIOS_CMD_BASE   (BIOS_IPC <<
> >> IPC_TYPE_OFFSET)
> >> -#define IPC_PUNIT_GTD_CMD_BASE(GTDDRIVER_IPC <<
> >> IPC_TYPE_OFFSET)
> >> -#define IPC_PUNIT_ISPD_CMD_BASE   (ISPDRIVER_IPC <<
> >> IPC_TYPE_OFFSET)
> >> -#define IPC_PUNIT_CMD_TYPE_MASK   (RESERVED_IPC <<
> >> IPC_TYPE_OFFSET)
> >> +#define PUNIT_BIOS_IPC_DEV"punit_bios_ipc"
> >> +#define PUNIT_GTD_IPC_DEV "punit_gtd_ipc"
> >> +#define PUNIT_ISP_IPC_DEV "punit_isp_ipc"
> >> +#define PUNIT_PARAM_LEN   3
> >>
> >>   /* BIOS => Pcode commands */
> >> -#define IPC_PUNIT_BIOS_ZERO
>   (IPC_PUNIT_BIOS_CMD_BASE
> >> | 0x00)
> >> -#define IPC_PUNIT_BIOS_VR_INTERFACE
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x01)
> >> -#define IPC_PUNIT_BIOS_READ_PCS
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x02)
> >> -#define IPC_PUNIT_BIOS_WRITE_PCS
>   (IPC_PUNIT_BIOS_CMD_BASE
> >> | 0x03)
> >> -#define IPC_PUNIT_BIOS_READ_PCU_CONFIG
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x04)
> >> -#define IPC_PUNIT_BIOS_WRITE_PCU_CONFIG
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x05)
> >> -#define IPC_PUNIT_BIOS_READ_PL1_SETTING
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x06)
> >> -#define IPC_PUNIT_BIOS_WRITE_PL1_SETTING
>   (IPC_PUNIT_BIOS_CMD_BASE
> >> | 0x07)
> >> -#define IPC_PUNIT_BIOS_TRIGGER_VDD_RAM
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x08)
> >> -#define IPC_PUNIT_BIOS_READ_TELE_INFO
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x09)
> >> -#define IPC_PUNIT_BIOS_READ_TELE_TRACE_CTRL
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x0a)
> >> -#define IPC_PUNIT_BIOS_WRITE_TELE_TRACE_CTRL
> >>(IPC_PUNIT_BIOS_CMD_BASE | 0x0b)
> >> -#define 

Re: [PATCH] udf: Fix 64-bit sign extension issues affecting blocks > 0x7FFFFFFF

2017-10-10 Thread Steve Magnani

Jan -

On 10/10/2017 02:33 AM, Jan Kara wrote:

On Mon 09-10-17 10:04:52, Steve Magnani wrote:

...the patch seems to be mixing two changes into one which I'd prefer to be
  separate patches:

1) Changes so that physical block numbers are stored in uint32_t (and
accompanying format string changes). Also when doing this, could you please
create a dedicated type like

typedef uint32_t udf_pblk_t;

and use it instead of uint32_t? That way it would be cleaner what's going
on. Thanks!
I agree with this in principle and in fact do something like it in my 
application code for just that reason. But, doing a complete job of this 
in the driver would increase the scope far beyond what is needed to fix 
the bugs I see and beyond what I am able to support. Would it be 
acceptable to limit usage of this type to a subset of the places it 
could ultimately be used? (Example: use it in udf_readdir(), which has a 
bug requiring a type change, but not necessarily in udf_read_tagged(), 
which doesn't).



2) Changes fixing signedness in various format strings for various types -
put these in a separare patch please.
Sure - but would you be opposed to putting _all_ of the format string 
changes in that patch? There are some format string changes (i.e., in 
unicode.c) that obviously don't have anything to do with block numbers, 
but I think almost all of the rest do. It gets a little murky when block 
numbers or counts are used in calculations. The unifying idea behind all 
the format string changes is preventing sign extension from causing 
unsigned values to be printed as negative, so on that basis I think an 
argument can be made that they all "go" together.

--- a/fs/udf/balloc.c   (revision 26779)
+++ b/fs/udf/balloc.c   (working copy)

...

@@ -151,7 +151,7 @@
bh = bitmap->s_block_bitmap[bitmap_nr];
for (i = 0; i < count; i++) {
if (udf_set_bit(bit + i, bh->b_data)) {
-   udf_debug("bit %ld already set\n", bit + i);
+   udf_debug("bit %lu already set\n", bit + i);

This change looks wrong - bit and i are signed. However they are ints, not
longs, so that should indeed be fixed.
'bit' and 'i' are ints in the function _below_ this change, but unsigned 
long within this function. So I think this is correct.


Regards,


 Steven J. Magnani   "I claim this network for MARS!
 www.digidescorp.com  Earthling, return my space modulator!"

 #include 



Re: [PATCH] udf: Fix 64-bit sign extension issues affecting blocks > 0x7FFFFFFF

2017-10-10 Thread Steve Magnani

Jan -

On 10/10/2017 02:33 AM, Jan Kara wrote:

On Mon 09-10-17 10:04:52, Steve Magnani wrote:

...the patch seems to be mixing two changes into one which I'd prefer to be
  separate patches:

1) Changes so that physical block numbers are stored in uint32_t (and
accompanying format string changes). Also when doing this, could you please
create a dedicated type like

typedef uint32_t udf_pblk_t;

and use it instead of uint32_t? That way it would be cleaner what's going
on. Thanks!
I agree with this in principle and in fact do something like it in my 
application code for just that reason. But, doing a complete job of this 
in the driver would increase the scope far beyond what is needed to fix 
the bugs I see and beyond what I am able to support. Would it be 
acceptable to limit usage of this type to a subset of the places it 
could ultimately be used? (Example: use it in udf_readdir(), which has a 
bug requiring a type change, but not necessarily in udf_read_tagged(), 
which doesn't).



2) Changes fixing signedness in various format strings for various types -
put these in a separare patch please.
Sure - but would you be opposed to putting _all_ of the format string 
changes in that patch? There are some format string changes (i.e., in 
unicode.c) that obviously don't have anything to do with block numbers, 
but I think almost all of the rest do. It gets a little murky when block 
numbers or counts are used in calculations. The unifying idea behind all 
the format string changes is preventing sign extension from causing 
unsigned values to be printed as negative, so on that basis I think an 
argument can be made that they all "go" together.

--- a/fs/udf/balloc.c   (revision 26779)
+++ b/fs/udf/balloc.c   (working copy)

...

@@ -151,7 +151,7 @@
bh = bitmap->s_block_bitmap[bitmap_nr];
for (i = 0; i < count; i++) {
if (udf_set_bit(bit + i, bh->b_data)) {
-   udf_debug("bit %ld already set\n", bit + i);
+   udf_debug("bit %lu already set\n", bit + i);

This change looks wrong - bit and i are signed. However they are ints, not
longs, so that should indeed be fixed.
'bit' and 'i' are ints in the function _below_ this change, but unsigned 
long within this function. So I think this is correct.


Regards,


 Steven J. Magnani   "I claim this network for MARS!
 www.digidescorp.com  Earthling, return my space modulator!"

 #include 



Re: [PATCH] scripts/checkpatch.pl: fix false warning of externschecking.

2017-10-10 Thread Joe Perches
On Wed, 2017-10-11 at 11:21 +0800, jiang.bi...@zte.com.cn wrote:
> > On Tue, 2017-10-10 at 16:42 +0800, Jiang Biao wrote:
> > > When adding a function declaration in a .c file without an extern
> > > keywork decoration, checkpatch.pl will complain *externs should be
> > > avoided in .c files* false warning. This patch fix the bug.
> > nack.
> > The point is that external function declarations should be
> > done via #include.
> Hi Joe,

Hello Jiang

> Understood. But there are already some external function declarations in 
> .c files in current kernel, for instance line 505 in kernel/stop_machine.c,
> 
> extern void sched_set_stop_task(int cpu, struct task_struct *stop);
> 
> Should these be fixed? But maybe in some rare cases, external funciton 
> declarations in .c files are necessary.

Perhaps, but in those cases checkpatch messages should be ignored.

cheers, Joe


Re: [PATCH] scripts/checkpatch.pl: fix false warning of externschecking.

2017-10-10 Thread Joe Perches
On Wed, 2017-10-11 at 11:21 +0800, jiang.bi...@zte.com.cn wrote:
> > On Tue, 2017-10-10 at 16:42 +0800, Jiang Biao wrote:
> > > When adding a function declaration in a .c file without an extern
> > > keywork decoration, checkpatch.pl will complain *externs should be
> > > avoided in .c files* false warning. This patch fix the bug.
> > nack.
> > The point is that external function declarations should be
> > done via #include.
> Hi Joe,

Hello Jiang

> Understood. But there are already some external function declarations in 
> .c files in current kernel, for instance line 505 in kernel/stop_machine.c,
> 
> extern void sched_set_stop_task(int cpu, struct task_struct *stop);
> 
> Should these be fixed? But maybe in some rare cases, external funciton 
> declarations in .c files are necessary.

Perhaps, but in those cases checkpatch messages should be ignored.

cheers, Joe


Re: [PATCH 0/3] add %pX specifier

2017-10-10 Thread Joe Perches
On Wed, 2017-10-11 at 10:32 +1100, Tobin C. Harding wrote:
> On Tue, Oct 10, 2017 at 04:15:01PM -0700, Linus Torvalds wrote:
> > On Tue, Oct 10, 2017 at 4:09 PM, Tobin C. Harding  wrote:
> > > 
> > > I did not understand the code (specifically why the right shift of 16 
> > > twice?)
> > 
> > It's a traditional trick to get the upper 32 bits.
> > 
> > So it basically splits the (possibly 64-bit) pointer into the lower 32
> > bits and the upper 32 bits for a hash such as "jhash()" that takes
> > data that is "unsigned int".
> > 
> > (NOTE! Using jhash here is not acceptable, since it's not
> > cryptographically safe, but think of it as an example of a hash that
> > takes 32-bit input).
> > 
> > Doing ">> 32" is undefined on 32-bit architectures, and wouldn't work.
> > 
> > But doing >> 16 >> 16 is a fine way to say "shift right by 32 on a
> > 64-bit architecture" while also being well-defined on a 32-bit one.
> > 
> >Linus
> 
> Awesome, thanks.

Another way is using the upper_32_bits() macro.
It's perhaps a bit more readable.


Re: [PATCH 0/3] add %pX specifier

2017-10-10 Thread Joe Perches
On Wed, 2017-10-11 at 10:32 +1100, Tobin C. Harding wrote:
> On Tue, Oct 10, 2017 at 04:15:01PM -0700, Linus Torvalds wrote:
> > On Tue, Oct 10, 2017 at 4:09 PM, Tobin C. Harding  wrote:
> > > 
> > > I did not understand the code (specifically why the right shift of 16 
> > > twice?)
> > 
> > It's a traditional trick to get the upper 32 bits.
> > 
> > So it basically splits the (possibly 64-bit) pointer into the lower 32
> > bits and the upper 32 bits for a hash such as "jhash()" that takes
> > data that is "unsigned int".
> > 
> > (NOTE! Using jhash here is not acceptable, since it's not
> > cryptographically safe, but think of it as an example of a hash that
> > takes 32-bit input).
> > 
> > Doing ">> 32" is undefined on 32-bit architectures, and wouldn't work.
> > 
> > But doing >> 16 >> 16 is a fine way to say "shift right by 32 on a
> > 64-bit architecture" while also being well-defined on a 32-bit one.
> > 
> >Linus
> 
> Awesome, thanks.

Another way is using the upper_32_bits() macro.
It's perhaps a bit more readable.


Re: [PATCH v1 2/2] mtd: mtk-nor: add suspend/resume support

2017-10-10 Thread Guochun Mao
gentle ping...

On Thu, 2017-09-21 at 20:45 +0800, Guochun Mao wrote:
> Abstract functions of clock setting, to avoid duplicated code,
> these functions been used in new feature.
> Implement suspend/resume functions.
> 
> Signed-off-by: Guochun Mao 
> ---
>  drivers/mtd/spi-nor/mtk-quadspi.c |   70 
> ++---
>  1 file changed, 58 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c 
> b/drivers/mtd/spi-nor/mtk-quadspi.c
> index c258c7a..abe455c 100644
> --- a/drivers/mtd/spi-nor/mtk-quadspi.c
> +++ b/drivers/mtd/spi-nor/mtk-quadspi.c
> @@ -404,6 +404,29 @@ static int mt8173_nor_write_reg(struct spi_nor *nor, u8 
> opcode, u8 *buf,
>   return ret;
>  }
>  
> +static void mt8173_nor_disable_clk(struct mt8173_nor *mt8173_nor)
> +{
> + clk_disable_unprepare(mt8173_nor->spi_clk);
> + clk_disable_unprepare(mt8173_nor->nor_clk);
> +}
> +
> +static int mt8173_nor_enable_clk(struct mt8173_nor *mt8173_nor)
> +{
> + int ret;
> +
> + ret = clk_prepare_enable(mt8173_nor->spi_clk);
> + if (ret)
> + return ret;
> +
> + ret = clk_prepare_enable(mt8173_nor->nor_clk);
> + if (ret) {
> + clk_disable_unprepare(mt8173_nor->spi_clk);
> + return ret;
> + }
> +
> + return 0;
> +}
> +
>  static int mtk_nor_init(struct mt8173_nor *mt8173_nor,
>   struct device_node *flash_node)
>  {
> @@ -468,15 +491,11 @@ static int mtk_nor_drv_probe(struct platform_device 
> *pdev)
>   return PTR_ERR(mt8173_nor->nor_clk);
>  
>   mt8173_nor->dev = >dev;
> - ret = clk_prepare_enable(mt8173_nor->spi_clk);
> +
> + ret = mt8173_nor_enable_clk(mt8173_nor);
>   if (ret)
>   return ret;
>  
> - ret = clk_prepare_enable(mt8173_nor->nor_clk);
> - if (ret) {
> - clk_disable_unprepare(mt8173_nor->spi_clk);
> - return ret;
> - }
>   /* only support one attached flash */
>   flash_np = of_get_next_available_child(pdev->dev.of_node, NULL);
>   if (!flash_np) {
> @@ -487,10 +506,9 @@ static int mtk_nor_drv_probe(struct platform_device 
> *pdev)
>   ret = mtk_nor_init(mt8173_nor, flash_np);
>  
>  nor_free:
> - if (ret) {
> - clk_disable_unprepare(mt8173_nor->spi_clk);
> - clk_disable_unprepare(mt8173_nor->nor_clk);
> - }
> + if (ret)
> + mt8173_nor_disable_clk(mt8173_nor);
> +
>   return ret;
>  }
>  
> @@ -498,11 +516,38 @@ static int mtk_nor_drv_remove(struct platform_device 
> *pdev)
>  {
>   struct mt8173_nor *mt8173_nor = platform_get_drvdata(pdev);
>  
> - clk_disable_unprepare(mt8173_nor->spi_clk);
> - clk_disable_unprepare(mt8173_nor->nor_clk);
> + mt8173_nor_disable_clk(mt8173_nor);
> +
> + return 0;
> +}
> +
> +#ifdef CONFIG_PM_SLEEP
> +static int mtk_nor_suspend(struct device *dev)
> +{
> + struct mt8173_nor *mt8173_nor = dev_get_drvdata(dev);
> +
> + mt8173_nor_disable_clk(mt8173_nor);
> +
>   return 0;
>  }
>  
> +static int mtk_nor_resume(struct device *dev)
> +{
> + struct mt8173_nor *mt8173_nor = dev_get_drvdata(dev);
> +
> + return mt8173_nor_enable_clk(mt8173_nor);
> +}
> +
> +static const struct dev_pm_ops mtk_nor_dev_pm_ops = {
> + .suspend = mtk_nor_suspend,
> + .resume = mtk_nor_resume,
> +};
> +
> +#define MTK_NOR_DEV_PM_OPS   (_nor_dev_pm_ops)
> +#else
> +#define MTK_NOR_DEV_PM_OPS   NULL
> +#endif
> +
>  static const struct of_device_id mtk_nor_of_ids[] = {
>   { .compatible = "mediatek,mt8173-nor"},
>   { /* sentinel */ }
> @@ -514,6 +559,7 @@ static int mtk_nor_drv_remove(struct platform_device 
> *pdev)
>   .remove = mtk_nor_drv_remove,
>   .driver = {
>   .name = "mtk-nor",
> + .pm = MTK_NOR_DEV_PM_OPS,
>   .of_match_table = mtk_nor_of_ids,
>   },
>  };




Re: [PATCH v1 2/2] mtd: mtk-nor: add suspend/resume support

2017-10-10 Thread Guochun Mao
gentle ping...

On Thu, 2017-09-21 at 20:45 +0800, Guochun Mao wrote:
> Abstract functions of clock setting, to avoid duplicated code,
> these functions been used in new feature.
> Implement suspend/resume functions.
> 
> Signed-off-by: Guochun Mao 
> ---
>  drivers/mtd/spi-nor/mtk-quadspi.c |   70 
> ++---
>  1 file changed, 58 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c 
> b/drivers/mtd/spi-nor/mtk-quadspi.c
> index c258c7a..abe455c 100644
> --- a/drivers/mtd/spi-nor/mtk-quadspi.c
> +++ b/drivers/mtd/spi-nor/mtk-quadspi.c
> @@ -404,6 +404,29 @@ static int mt8173_nor_write_reg(struct spi_nor *nor, u8 
> opcode, u8 *buf,
>   return ret;
>  }
>  
> +static void mt8173_nor_disable_clk(struct mt8173_nor *mt8173_nor)
> +{
> + clk_disable_unprepare(mt8173_nor->spi_clk);
> + clk_disable_unprepare(mt8173_nor->nor_clk);
> +}
> +
> +static int mt8173_nor_enable_clk(struct mt8173_nor *mt8173_nor)
> +{
> + int ret;
> +
> + ret = clk_prepare_enable(mt8173_nor->spi_clk);
> + if (ret)
> + return ret;
> +
> + ret = clk_prepare_enable(mt8173_nor->nor_clk);
> + if (ret) {
> + clk_disable_unprepare(mt8173_nor->spi_clk);
> + return ret;
> + }
> +
> + return 0;
> +}
> +
>  static int mtk_nor_init(struct mt8173_nor *mt8173_nor,
>   struct device_node *flash_node)
>  {
> @@ -468,15 +491,11 @@ static int mtk_nor_drv_probe(struct platform_device 
> *pdev)
>   return PTR_ERR(mt8173_nor->nor_clk);
>  
>   mt8173_nor->dev = >dev;
> - ret = clk_prepare_enable(mt8173_nor->spi_clk);
> +
> + ret = mt8173_nor_enable_clk(mt8173_nor);
>   if (ret)
>   return ret;
>  
> - ret = clk_prepare_enable(mt8173_nor->nor_clk);
> - if (ret) {
> - clk_disable_unprepare(mt8173_nor->spi_clk);
> - return ret;
> - }
>   /* only support one attached flash */
>   flash_np = of_get_next_available_child(pdev->dev.of_node, NULL);
>   if (!flash_np) {
> @@ -487,10 +506,9 @@ static int mtk_nor_drv_probe(struct platform_device 
> *pdev)
>   ret = mtk_nor_init(mt8173_nor, flash_np);
>  
>  nor_free:
> - if (ret) {
> - clk_disable_unprepare(mt8173_nor->spi_clk);
> - clk_disable_unprepare(mt8173_nor->nor_clk);
> - }
> + if (ret)
> + mt8173_nor_disable_clk(mt8173_nor);
> +
>   return ret;
>  }
>  
> @@ -498,11 +516,38 @@ static int mtk_nor_drv_remove(struct platform_device 
> *pdev)
>  {
>   struct mt8173_nor *mt8173_nor = platform_get_drvdata(pdev);
>  
> - clk_disable_unprepare(mt8173_nor->spi_clk);
> - clk_disable_unprepare(mt8173_nor->nor_clk);
> + mt8173_nor_disable_clk(mt8173_nor);
> +
> + return 0;
> +}
> +
> +#ifdef CONFIG_PM_SLEEP
> +static int mtk_nor_suspend(struct device *dev)
> +{
> + struct mt8173_nor *mt8173_nor = dev_get_drvdata(dev);
> +
> + mt8173_nor_disable_clk(mt8173_nor);
> +
>   return 0;
>  }
>  
> +static int mtk_nor_resume(struct device *dev)
> +{
> + struct mt8173_nor *mt8173_nor = dev_get_drvdata(dev);
> +
> + return mt8173_nor_enable_clk(mt8173_nor);
> +}
> +
> +static const struct dev_pm_ops mtk_nor_dev_pm_ops = {
> + .suspend = mtk_nor_suspend,
> + .resume = mtk_nor_resume,
> +};
> +
> +#define MTK_NOR_DEV_PM_OPS   (_nor_dev_pm_ops)
> +#else
> +#define MTK_NOR_DEV_PM_OPS   NULL
> +#endif
> +
>  static const struct of_device_id mtk_nor_of_ids[] = {
>   { .compatible = "mediatek,mt8173-nor"},
>   { /* sentinel */ }
> @@ -514,6 +559,7 @@ static int mtk_nor_drv_remove(struct platform_device 
> *pdev)
>   .remove = mtk_nor_drv_remove,
>   .driver = {
>   .name = "mtk-nor",
> + .pm = MTK_NOR_DEV_PM_OPS,
>   .of_match_table = mtk_nor_of_ids,
>   },
>  };




Re: [PATCH v1 1/2] dt-bindings: mtd: add new compatible strings and improve description

2017-10-10 Thread Guochun Mao
gentle ping...

On Thu, 2017-09-21 at 20:45 +0800, Guochun Mao wrote:
> Add "mediatak,mt2712-nor" and "mediatek,mt7622-nor"
> for nor flash node's compatible strings.
> Explicate the fallback compatible.
> 
> Acked-by: Rob Herring 
> Signed-off-by: Guochun Mao 
> ---
>  .../devicetree/bindings/mtd/mtk-quadspi.txt|   15 +--
>  1 file changed, 9 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt 
> b/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt
> index 840f940..56d3668 100644
> --- a/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt
> +++ b/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt
> @@ -1,13 +1,16 @@
>  * Serial NOR flash controller for MTK MT81xx (and similar)
>  
>  Required properties:
> -- compatible:  The possible values are:
> -   "mediatek,mt2701-nor"
> -   "mediatek,mt7623-nor"
> +- compatible:  For mt8173, compatible should be 
> "mediatek,mt8173-nor",
> +   and it's the fallback compatible for other Soc.
> +   For every other SoC, should contain both the SoC-specific 
> compatible
> +   string and "mediatek,mt8173-nor".
> +   The possible values are:
> +   "mediatek,mt2701-nor", "mediatek,mt8173-nor"
> +   "mediatek,mt2712-nor", "mediatek,mt8173-nor"
> +   "mediatek,mt7622-nor", "mediatek,mt8173-nor"
> +   "mediatek,mt7623-nor", "mediatek,mt8173-nor"
> "mediatek,mt8173-nor"
> -   For mt8173, compatible should be "mediatek,mt8173-nor".
> -   For every other SoC, should contain both the SoC-specific 
> compatible string
> -   and "mediatek,mt8173-nor".
>  - reg: physical base address and length of the controller's 
> register
>  - clocks:  the phandle of the clocks needed by the nor controller
>  - clock-names: the names of the clocks




Re: [PATCH v1 1/2] dt-bindings: mtd: add new compatible strings and improve description

2017-10-10 Thread Guochun Mao
gentle ping...

On Thu, 2017-09-21 at 20:45 +0800, Guochun Mao wrote:
> Add "mediatak,mt2712-nor" and "mediatek,mt7622-nor"
> for nor flash node's compatible strings.
> Explicate the fallback compatible.
> 
> Acked-by: Rob Herring 
> Signed-off-by: Guochun Mao 
> ---
>  .../devicetree/bindings/mtd/mtk-quadspi.txt|   15 +--
>  1 file changed, 9 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt 
> b/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt
> index 840f940..56d3668 100644
> --- a/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt
> +++ b/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt
> @@ -1,13 +1,16 @@
>  * Serial NOR flash controller for MTK MT81xx (and similar)
>  
>  Required properties:
> -- compatible:  The possible values are:
> -   "mediatek,mt2701-nor"
> -   "mediatek,mt7623-nor"
> +- compatible:  For mt8173, compatible should be 
> "mediatek,mt8173-nor",
> +   and it's the fallback compatible for other Soc.
> +   For every other SoC, should contain both the SoC-specific 
> compatible
> +   string and "mediatek,mt8173-nor".
> +   The possible values are:
> +   "mediatek,mt2701-nor", "mediatek,mt8173-nor"
> +   "mediatek,mt2712-nor", "mediatek,mt8173-nor"
> +   "mediatek,mt7622-nor", "mediatek,mt8173-nor"
> +   "mediatek,mt7623-nor", "mediatek,mt8173-nor"
> "mediatek,mt8173-nor"
> -   For mt8173, compatible should be "mediatek,mt8173-nor".
> -   For every other SoC, should contain both the SoC-specific 
> compatible string
> -   and "mediatek,mt8173-nor".
>  - reg: physical base address and length of the controller's 
> register
>  - clocks:  the phandle of the clocks needed by the nor controller
>  - clock-names: the names of the clocks




Re: [PATCH v16 3/5] virtio-balloon: VIRTIO_BALLOON_F_SG

2017-10-10 Thread Wei Wang
On 10/11/2017 10:26 AM, Tetsuo Handa wrote:
> Wei Wang wrote:
>> On 10/10/2017 09:09 PM, Tetsuo Handa wrote:
>>> Wei Wang wrote:
> And even if we could remove balloon_lock, you still cannot use
> __GFP_DIRECT_RECLAIM at xb_set_page(). I think you will need to use
> "whether it is safe to wait" flag from
> "[PATCH] virtio: avoid possible OOM lockup at virtballoon_oom_notify()" .
 Without the lock being held, why couldn't we use __GFP_DIRECT_RECLAIM at
 xb_set_page()?
>>> Because of dependency shown below.
>>>
>>> leak_balloon()
>>>xb_set_page()
>>>  xb_preload(GFP_KERNEL)
>>>kmalloc(GFP_KERNEL)
>>>  __alloc_pages_may_oom()
>>>Takes oom_lock
>>>out_of_memory()
>>>  blocking_notifier_call_chain()
>>>leak_balloon()
>>>  xb_set_page()
>>>xb_preload(GFP_KERNEL)
>>>  kmalloc(GFP_KERNEL)
>>>__alloc_pages_may_oom()
>>>  Fails to take oom_lock and loop forever
>> __alloc_pages_may_oom() uses mutex_trylock(_lock).
> Yes. But this mutex_trylock(_lock) is semantically mutex_lock(_lock)
> because __alloc_pages_slowpath() will continue looping until
> mutex_trylock(_lock) succeeds (or somebody releases memory).
>
>> I think the second __alloc_pages_may_oom() will not continue since the
>> first one is in progress.
> The second __alloc_pages_may_oom() will be called repeatedly because
> __alloc_pages_slowpath() will continue looping (unless somebody releases
> memory).
>

OK, I see, thanks. So, the point is that the OOM code path should not
have memory allocation, and the
old leak_balloon (without the F_SG feature) don't need xb_preload(). I
think one solution would be to let
the OOM uses the old leak_balloon() code path, and we can add one more
parameter to leak_balloon
to control that:

leak_balloon(struct virtio_balloon *vb, size_t num, bool oom)



>>> By the way, is xb_set_page() safe?
>>> Sleeping in the kernel with preemption disabled is a bug, isn't it?
>>> __radix_tree_preload() returns 0 with preemption disabled upon success.
>>> xb_preload() disables preemption if __radix_tree_preload() fails.
>>> Then, kmalloc() is called with preemption disabled, isn't it?
>>> But xb_set_page() calls xb_preload(GFP_KERNEL) which might sleep with
>>> preemption disabled.
>> Yes, I think that should not be expected, thanks.
>>
>> I plan to change it like this:
>>
>> bool xb_preload(gfp_t gfp)
>> {
>> if (!this_cpu_read(ida_bitmap)) {
>> struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
>>
>> if (!bitmap)
>> return false;
>> bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
>> kfree(bitmap);
>> }
> Excuse me, but you are allocating per-CPU memory when running CPU might
> change at this line? What happens if running CPU has changed at this line?
> Will it work even with new CPU's ida_bitmap == NULL ?
>


Yes, it will be detected in xb_set_bit(): when ida_bitmap = NULL on the
new CPU, xb_set_bit() will
return -EAGAIN to the caller, and the caller should restart from
xb_preload().

Best,
Wei





Re: [PATCH v16 3/5] virtio-balloon: VIRTIO_BALLOON_F_SG

2017-10-10 Thread Wei Wang
On 10/11/2017 10:26 AM, Tetsuo Handa wrote:
> Wei Wang wrote:
>> On 10/10/2017 09:09 PM, Tetsuo Handa wrote:
>>> Wei Wang wrote:
> And even if we could remove balloon_lock, you still cannot use
> __GFP_DIRECT_RECLAIM at xb_set_page(). I think you will need to use
> "whether it is safe to wait" flag from
> "[PATCH] virtio: avoid possible OOM lockup at virtballoon_oom_notify()" .
 Without the lock being held, why couldn't we use __GFP_DIRECT_RECLAIM at
 xb_set_page()?
>>> Because of dependency shown below.
>>>
>>> leak_balloon()
>>>xb_set_page()
>>>  xb_preload(GFP_KERNEL)
>>>kmalloc(GFP_KERNEL)
>>>  __alloc_pages_may_oom()
>>>Takes oom_lock
>>>out_of_memory()
>>>  blocking_notifier_call_chain()
>>>leak_balloon()
>>>  xb_set_page()
>>>xb_preload(GFP_KERNEL)
>>>  kmalloc(GFP_KERNEL)
>>>__alloc_pages_may_oom()
>>>  Fails to take oom_lock and loop forever
>> __alloc_pages_may_oom() uses mutex_trylock(_lock).
> Yes. But this mutex_trylock(_lock) is semantically mutex_lock(_lock)
> because __alloc_pages_slowpath() will continue looping until
> mutex_trylock(_lock) succeeds (or somebody releases memory).
>
>> I think the second __alloc_pages_may_oom() will not continue since the
>> first one is in progress.
> The second __alloc_pages_may_oom() will be called repeatedly because
> __alloc_pages_slowpath() will continue looping (unless somebody releases
> memory).
>

OK, I see, thanks. So, the point is that the OOM code path should not
have memory allocation, and the
old leak_balloon (without the F_SG feature) don't need xb_preload(). I
think one solution would be to let
the OOM uses the old leak_balloon() code path, and we can add one more
parameter to leak_balloon
to control that:

leak_balloon(struct virtio_balloon *vb, size_t num, bool oom)



>>> By the way, is xb_set_page() safe?
>>> Sleeping in the kernel with preemption disabled is a bug, isn't it?
>>> __radix_tree_preload() returns 0 with preemption disabled upon success.
>>> xb_preload() disables preemption if __radix_tree_preload() fails.
>>> Then, kmalloc() is called with preemption disabled, isn't it?
>>> But xb_set_page() calls xb_preload(GFP_KERNEL) which might sleep with
>>> preemption disabled.
>> Yes, I think that should not be expected, thanks.
>>
>> I plan to change it like this:
>>
>> bool xb_preload(gfp_t gfp)
>> {
>> if (!this_cpu_read(ida_bitmap)) {
>> struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
>>
>> if (!bitmap)
>> return false;
>> bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
>> kfree(bitmap);
>> }
> Excuse me, but you are allocating per-CPU memory when running CPU might
> change at this line? What happens if running CPU has changed at this line?
> Will it work even with new CPU's ida_bitmap == NULL ?
>


Yes, it will be detected in xb_set_bit(): when ida_bitmap = NULL on the
new CPU, xb_set_bit() will
return -EAGAIN to the caller, and the caller should restart from
xb_preload().

Best,
Wei





Re: [PATCH 0/2] Add support for ZSTD-compressed kernel

2017-10-10 Thread Adam Borowski
On Wed, Oct 11, 2017 at 02:01:41AM +, Nick Terrell wrote:
> On 10/10/17, 5:08 PM, "Adam Borowski"  wrote:
> > On Tue, Oct 10, 2017 at 10:40:13PM +, Nick Terrell wrote:
> > > On 10/10/17, 2:56 PM, "h...@zytor.com"  wrote:
> > > >On October 10, 2017 2:22:42 PM PDT, Nick Terrell  wrote:
> > > >>This patch set adds support for a ZSTD-compressed kernel and ramdisk
> > > >>images in the kernel boot process. It only integrates the support with
> > > >>x86, though the first patch is generic to all architectures.
> 
> > > Comparing the command line tools on a kernel image that is 68970616 B 
> > > large:
> > >
> > > | Algorithm | Compression Ratio | Decompression MB/s |
> > > |---|---||
> > > | zstd  |  4.42 |  436.5 |
> > > | gzip  |  3.72 |  134.1 |
> > > | xz|  4.83 |   53.1 |
> > > | lz4   |  3.18 | 1682.2 |
> > > | lzo   |  3.36 |  389.6 |
> > > | bzip2 |  4.03 |   33.3 |
> 
> > Perhaps it'd be a good idea to cull some of bad algorithms?  I don't know
> > the memory used by those, but envelope of the table you just shown suggests
> > using bzip2 and lzo is pointless.  So is gzip, but it's widespread as the
> > default for initramfs producers, thus it'd be unsafe to kill it.
> 
> I'm not sure there is a great use case for bzip2. It requires more memory
> than xz, compresses worse, and decompresses slower. lzo in the kernel might
> decompress a bit faster than zstd (looking back at the BtrFS benchmarks, it
> did). More importantly, it uses less memory than zstd. When decompressing
> the kernel zstd only needs 192 KB, but for initramfs, it will need more.
> Still, unless you really need 5% more compression, lz4 is probably a better
> option than lzo for speed.

The main reason I'm raising this question is, bzip2 has no other users in
the kernel, thus removing support for it would allow us to delete its code.

As for lzo, even if there are cases when it's a bit faster (overcoming the
9% lead zstd has in your userspace benchmark), it wouldn't be faster by
much -- certainly nowhere to make anyone want the massive compression loss.

Thus, let's enumerate use cases:
* a fast modern machine: zstd boots a second faster at the cost of 1.3MB
  image size; doesn't matter much either way.  No one wants weaker options.
* a weaker machine: zstd rapidly gains as boot times rise.
* a very slow machine with fast I/O that boots often: lz4.
* disk space at extreme premium: xz.

In no case I can think of other algorithms are the rational choice.
There's little gain in removing the rest, though: the code is used for other
purposes in the kernel, and doesn't get compiled in unless manually
selected.

Zstd looks like a good default, but it's nowhere near mature enough: this
patch is x86-only, tools which people may use to analyze compressed kernels
don't know about zstd yet, etc.

Thus, I'd propose the following plan:
* add zstd
* remove bzip2
* update recommendations (Kconfig text)
* in a year or two, consider making zstd the default

> > > I know that this isn't a real benchmark of the kernel decompression. I
> > > still need to figure out how to time the kernel decompression. If you have
> > > any suggestions let me know. Otherwise, I'll get back to you when I've
> > > figured out how to run the benchmark.
> 
> I've found a way to benchmark the kernel decompression time during boot
> with QEMU. I add timestamps to every line of the output. I also had to
> print 100 lines before the decompression starts to get consistent results.
> 
> I've found that zstd is decompressing 2x slower than it should. I narrowed
> down the problem to ZSTD_wildcopy() and ZSTD_copy8() in
> lib/zstd/zstd_internal.h. ZSTD_wildcopy() calls memcpy(src, dst, 8) in
> a loop and doesn't handle the freestanding memcpy() well. Replacing it with
> __builtin_mcmpy(src, dst, 8) doubles the speed.

... and by maturing I meant issues like this.

> I'm not an expert in freestanding gcc compilation, but I believe it is okay
> to call __builtin_memcpy() in freestanding mode, and gcc will either
> inline it, or add the right function call. The difference being that gcc
> will be able to apply its memcpy() analysis. I also see that
> arch/x86/boot/string.h defines memcpy() to __builtin_memcpy. Is it safe to
> directly use __builtin_memcpy() in lib/zstd/zstd_internal.h?

Try it, and see if it builds and fails to crash. :)


Meow!
-- 
⢀⣴⠾⠻⢶⣦⠀ We domesticated dogs 36000 years ago; together we chased
⣾⠁⢰⠒⠀⣿⡁ animals, hung out and licked or scratched our private parts.
⢿⡄⠘⠷⠚⠋⠀ Cats domesticated us 9500 years ago, and immediately we got
⠈⠳⣄ agriculture, towns then cities. -- whitroth on /.


Re: [PATCH 0/2] Add support for ZSTD-compressed kernel

2017-10-10 Thread Adam Borowski
On Wed, Oct 11, 2017 at 02:01:41AM +, Nick Terrell wrote:
> On 10/10/17, 5:08 PM, "Adam Borowski"  wrote:
> > On Tue, Oct 10, 2017 at 10:40:13PM +, Nick Terrell wrote:
> > > On 10/10/17, 2:56 PM, "h...@zytor.com"  wrote:
> > > >On October 10, 2017 2:22:42 PM PDT, Nick Terrell  wrote:
> > > >>This patch set adds support for a ZSTD-compressed kernel and ramdisk
> > > >>images in the kernel boot process. It only integrates the support with
> > > >>x86, though the first patch is generic to all architectures.
> 
> > > Comparing the command line tools on a kernel image that is 68970616 B 
> > > large:
> > >
> > > | Algorithm | Compression Ratio | Decompression MB/s |
> > > |---|---||
> > > | zstd  |  4.42 |  436.5 |
> > > | gzip  |  3.72 |  134.1 |
> > > | xz|  4.83 |   53.1 |
> > > | lz4   |  3.18 | 1682.2 |
> > > | lzo   |  3.36 |  389.6 |
> > > | bzip2 |  4.03 |   33.3 |
> 
> > Perhaps it'd be a good idea to cull some of bad algorithms?  I don't know
> > the memory used by those, but envelope of the table you just shown suggests
> > using bzip2 and lzo is pointless.  So is gzip, but it's widespread as the
> > default for initramfs producers, thus it'd be unsafe to kill it.
> 
> I'm not sure there is a great use case for bzip2. It requires more memory
> than xz, compresses worse, and decompresses slower. lzo in the kernel might
> decompress a bit faster than zstd (looking back at the BtrFS benchmarks, it
> did). More importantly, it uses less memory than zstd. When decompressing
> the kernel zstd only needs 192 KB, but for initramfs, it will need more.
> Still, unless you really need 5% more compression, lz4 is probably a better
> option than lzo for speed.

The main reason I'm raising this question is, bzip2 has no other users in
the kernel, thus removing support for it would allow us to delete its code.

As for lzo, even if there are cases when it's a bit faster (overcoming the
9% lead zstd has in your userspace benchmark), it wouldn't be faster by
much -- certainly nowhere to make anyone want the massive compression loss.

Thus, let's enumerate use cases:
* a fast modern machine: zstd boots a second faster at the cost of 1.3MB
  image size; doesn't matter much either way.  No one wants weaker options.
* a weaker machine: zstd rapidly gains as boot times rise.
* a very slow machine with fast I/O that boots often: lz4.
* disk space at extreme premium: xz.

In no case I can think of other algorithms are the rational choice.
There's little gain in removing the rest, though: the code is used for other
purposes in the kernel, and doesn't get compiled in unless manually
selected.

Zstd looks like a good default, but it's nowhere near mature enough: this
patch is x86-only, tools which people may use to analyze compressed kernels
don't know about zstd yet, etc.

Thus, I'd propose the following plan:
* add zstd
* remove bzip2
* update recommendations (Kconfig text)
* in a year or two, consider making zstd the default

> > > I know that this isn't a real benchmark of the kernel decompression. I
> > > still need to figure out how to time the kernel decompression. If you have
> > > any suggestions let me know. Otherwise, I'll get back to you when I've
> > > figured out how to run the benchmark.
> 
> I've found a way to benchmark the kernel decompression time during boot
> with QEMU. I add timestamps to every line of the output. I also had to
> print 100 lines before the decompression starts to get consistent results.
> 
> I've found that zstd is decompressing 2x slower than it should. I narrowed
> down the problem to ZSTD_wildcopy() and ZSTD_copy8() in
> lib/zstd/zstd_internal.h. ZSTD_wildcopy() calls memcpy(src, dst, 8) in
> a loop and doesn't handle the freestanding memcpy() well. Replacing it with
> __builtin_mcmpy(src, dst, 8) doubles the speed.

... and by maturing I meant issues like this.

> I'm not an expert in freestanding gcc compilation, but I believe it is okay
> to call __builtin_memcpy() in freestanding mode, and gcc will either
> inline it, or add the right function call. The difference being that gcc
> will be able to apply its memcpy() analysis. I also see that
> arch/x86/boot/string.h defines memcpy() to __builtin_memcpy. Is it safe to
> directly use __builtin_memcpy() in lib/zstd/zstd_internal.h?

Try it, and see if it builds and fails to crash. :)


Meow!
-- 
⢀⣴⠾⠻⢶⣦⠀ We domesticated dogs 36000 years ago; together we chased
⣾⠁⢰⠒⠀⣿⡁ animals, hung out and licked or scratched our private parts.
⢿⡄⠘⠷⠚⠋⠀ Cats domesticated us 9500 years ago, and immediately we got
⠈⠳⣄ agriculture, towns then cities. -- whitroth on /.


  1   2   3   4   5   6   7   8   9   10   >