Re: [PATCH] Input: gpio_keys_polled - use struct_size() in devm_kzalloc()

2019-06-22 Thread Dmitry Torokhov
On Wed, Jun 19, 2019 at 09:26:55AM -0500, Gustavo A. R. Silva wrote:
> One of the more common cases of allocation size calculations is finding
> the size of a structure that has a zero-sized array at the end, along
> with memory for some number of elements for that array. For example:
> 
> struct gpio_keys_polled_dev {
>   ...
> struct gpio_keys_button_data data[0];
> };
> 
> size = sizeof(struct gpio_keys_polled_dev) + count * sizeof(struct 
> gpio_keys_button_data);
> instance = devm_kzalloc(dev, size, GFP_KERNEL);
> 
> Instead of leaving these open-coded and prone to type mistakes, we can
> now use the new struct_size() helper:
> 
> instance = devm_kzalloc(dev, struct_size(instance, data, count), GFP_KERNEL);
> 
> Notice that, in this case, variable size is not necessary, hence it
> is removed.
> 
> This code was detected with the help of Coccinelle.
> 
> Signed-off-by: Gustavo A. R. Silva 

Applied, thank you.

> ---
>  drivers/input/keyboard/gpio_keys_polled.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/input/keyboard/gpio_keys_polled.c 
> b/drivers/input/keyboard/gpio_keys_polled.c
> index edc7262103b9..c4087be0c2e0 100644
> --- a/drivers/input/keyboard/gpio_keys_polled.c
> +++ b/drivers/input/keyboard/gpio_keys_polled.c
> @@ -235,7 +235,6 @@ static int gpio_keys_polled_probe(struct platform_device 
> *pdev)
>   struct gpio_keys_polled_dev *bdev;
>   struct input_polled_dev *poll_dev;
>   struct input_dev *input;
> - size_t size;
>   int error;
>   int i;
>  
> @@ -250,9 +249,8 @@ static int gpio_keys_polled_probe(struct platform_device 
> *pdev)
>   return -EINVAL;
>   }
>  
> - size = sizeof(struct gpio_keys_polled_dev) +
> - pdata->nbuttons * sizeof(struct gpio_keys_button_data);
> - bdev = devm_kzalloc(dev, size, GFP_KERNEL);
> + bdev = devm_kzalloc(dev, struct_size(bdev, data, pdata->nbuttons),
> + GFP_KERNEL);
>   if (!bdev) {
>   dev_err(dev, "no memory for private data\n");
>   return -ENOMEM;
> -- 
> 2.21.0
> 

-- 
Dmitry


[PATCH 1/2] Input: edt-ft5x06 - use get_unaligned_be16()

2019-06-22 Thread Dmitry Torokhov
Instead of doing conversion by hand, let's use the proper accessors.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/touchscreen/edt-ft5x06.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/input/touchscreen/edt-ft5x06.c 
b/drivers/input/touchscreen/edt-ft5x06.c
index c639ebce914c..ec770226e119 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define WORK_REGISTER_THRESHOLD0x00
 #define WORK_REGISTER_REPORT_RATE  0x08
@@ -239,8 +240,8 @@ static irqreturn_t edt_ft5x06_ts_isr(int irq, void *dev_id)
if (tsdata->version == EDT_M06 && type == TOUCH_EVENT_DOWN)
continue;
 
-   x = ((buf[0] << 8) | buf[1]) & 0x0fff;
-   y = ((buf[2] << 8) | buf[3]) & 0x0fff;
+   x = get_unaligned_be16(buf) & 0x0fff;
+   y = get_unaligned_be16(buf + 2) & 0x0fff;
/* The FT5x26 send the y coordinate first */
if (tsdata->version == EV_FT)
swap(x, y);
-- 
2.22.0.410.gd8fdbe21b5-goog



[PATCH 2/2] Input: edt-ft5x06 - simplify event reporting code

2019-06-22 Thread Dmitry Torokhov
Now that input_mt_report_slot_state() returns true if slot is active we no
longer need a temporary for the slot state.

Signed-off-by: Dmitry Torokhov 
---
 drivers/input/touchscreen/edt-ft5x06.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/input/touchscreen/edt-ft5x06.c 
b/drivers/input/touchscreen/edt-ft5x06.c
index ec770226e119..3cc4341bbdff 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -229,7 +229,6 @@ static irqreturn_t edt_ft5x06_ts_isr(int irq, void *dev_id)
 
for (i = 0; i < tsdata->max_support_points; i++) {
u8 *buf = &rdbuf[i * tplen + offset];
-   bool down;
 
type = buf[0] >> 6;
/* ignore Reserved events */
@@ -247,16 +246,12 @@ static irqreturn_t edt_ft5x06_ts_isr(int irq, void 
*dev_id)
swap(x, y);
 
id = (buf[2] >> 4) & 0x0f;
-   down = type != TOUCH_EVENT_UP;
 
input_mt_slot(tsdata->input, id);
-   input_mt_report_slot_state(tsdata->input, MT_TOOL_FINGER, down);
-
-   if (!down)
-   continue;
-
-   touchscreen_report_pos(tsdata->input, &tsdata->prop, x, y,
-  true);
+   if (input_mt_report_slot_state(tsdata->input, MT_TOOL_FINGER,
+  type != TOUCH_EVENT_UP))
+   touchscreen_report_pos(tsdata->input, &tsdata->prop,
+  x, y, true);
}
 
input_mt_report_pointer_emulation(tsdata->input, true);
-- 
2.22.0.410.gd8fdbe21b5-goog



Re: [PATCH v8 1/5] Input: elan_i2c: Export the device id whitelist

2019-06-22 Thread Dmitry Torokhov
On Fri, Jun 21, 2019 at 07:50:42AM -0700, Jeffrey Hugo wrote:
> Elan_i2c and hid-quirks work in conjunction to decide which devices each
> driver will handle.  Elan_i2c has a whitelist of devices that should be
> consumed by hid-quirks so that there is one master list of devices to
> handoff between the drivers.  Put the ids in a header file so that
> hid-quirks can consume it instead of duplicating the list.
> 
> Signed-off-by: Jeffrey Hugo 

Benjamin, are you happy with this version?

Thanks.

-- 
Dmitry


Re: [PATCH v1 2/3] OPP: Add function to look up required OPP's for a given OPP

2019-06-22 Thread Saravana Kannan
On Sat, Jun 22, 2019 at 9:28 PM Chanwoo Choi  wrote:
>
> Hi,
>
> 2019년 6월 23일 (일) 오전 6:42, Saravana Kannan 님이 작성:
> >
> > On Sat, Jun 22, 2019 at 4:50 AM Chanwoo Choi  wrote:
> > >
> > > Hi,
> > >
> > > Absolutely, I like this approach. I think that it is necessary to make
> > > the connection
> > > between frequencies of devices.
> >
> > Happy to hear that.
> >
> > > But, I have a question on below.
> > >
> > > 2019년 6월 22일 (토) 오전 9:35, Saravana Kannan 님이 작성:
> > > >
> > > > Add a function that allows looking up required OPPs given a source OPP
> > > > table, destination OPP table and the source OPP.
> > > >
> > > > Signed-off-by: Saravana Kannan 
> > > > ---
> > > >  drivers/opp/core.c | 54 ++
> > > >  include/linux/pm_opp.h | 11 +
> > > >  2 files changed, 65 insertions(+)
> > > >
> > > > diff --git a/drivers/opp/core.c b/drivers/opp/core.c
> > > > index 74c7bdc6f463..4f7870bffbf8 100644
> > > > --- a/drivers/opp/core.c
> > > > +++ b/drivers/opp/core.c
> > > > @@ -1830,6 +1830,60 @@ void dev_pm_opp_put_genpd_virt_dev(struct 
> > > > opp_table *opp_table,
> > > > dev_err(virt_dev, "Failed to find required device 
> > > > entry\n");
> > > >  }
> > > >
> > > > +/**
> > > > + * dev_pm_opp_xlate_opp() - Find required OPP for src_table OPP.
> > > > + * @src_table: OPP table which has dst_table as one of its required 
> > > > OPP table.
> > > > + * @dst_table: Required OPP table of the src_table.
> > > > + * @pstate: OPP of the src_table.
> > > > + *
> > > > + * This function returns the OPP (present in @dst_table) pointed out 
> > > > by the
> > > > + * "required-opps" property of the OPP (present in @src_table).
> > > > + *
> > > > + * The callers are required to call dev_pm_opp_put() for the returned 
> > > > OPP after
> > > > + * use.
> > > > + *
> > > > + * Return: destination table OPP on success, otherwise NULL on errors.
> > > > + */
> > > > +struct dev_pm_opp *dev_pm_opp_xlate_opp(struct opp_table *src_table,
> > > > +   struct opp_table *dst_table,
> > > > +   struct dev_pm_opp *src_opp)
> > > > +{
> > > > +   struct dev_pm_opp *opp, *dest_opp = NULL;
> > > > +   int i;
> > > > +
> > > > +   if (!src_table || !dst_table || !src_opp)
> > > > +   return NULL;
> > > > +
> > > > +   for (i = 0; i < src_table->required_opp_count; i++) {
> > > > +   if (src_table->required_opp_tables[i]->np == 
> > > > dst_table->np)
> > > > +   break;
> > > > +   }
> > > > +
> > > > +   if (unlikely(i == src_table->required_opp_count)) {
> > > > +   pr_err("%s: Couldn't find matching OPP table (%p: 
> > > > %p)\n",
> > > > +  __func__, src_table, dst_table);
> > > > +   return NULL;
> > > > +   }
> > > > +
> > > > +   mutex_lock(&src_table->lock);
> > > > +
> > > > +   list_for_each_entry(opp, &src_table->opp_list, node) {
> > > > +   if (opp == src_opp) {
> > > > +   dest_opp = opp->required_opps[i];
> > >
> > > Correct me if I am wrong. This patch assume that 'i' index is same on 
> > > between
> > > [1] and [2]. But in order to guarantee this assumption, all OPP entries
> > > in the same opp_table have to have the same number of 'required-opps' 
> > > properties
> > > and keep the sequence among 'required-opps' entries.
> > >
> > > [1] src_table->required_opp_tables[i]->np
> > > [2] opp->required_opps[I];
> > >
> > > For example, three OPP entries in the 'parent_bus_opp'
> > > have the different sequence of 'required-opps' and the different
> > > number of 'required-opps'. Is it no problem?
> > >
> > > parent_bus_opp: opp_table {
> > > compatible = "operating-points-v2";
> > >
> > > opp2 {
> > > opp-hz = /bits/ 64 <20>;
> > > required-opps = <&child_bus_a_opp2>, <&child_bus_b_opp2>,
> > > <&child_bus_c_opp2>;
> > > };
> > >
> > > opp1 {
> > > opp-hz = /bits/ 64 <20>;
> > > // change the sequence between child_bus_b_opp2  and 
> > > child_bus_c_opp2
> > > required-opps = <&child_bus_a_opp2>, <&child_bus_c_opp2>,
> > > <&child_bus_b_opp2>
> > > };
> > >
> > > opp0 {
> > > opp-hz = /bits/ 64 <20>;
> > > // missing 'child_bus_a_opp2'
> > > required-opps = <&child_bus_c_opp2>, <&child_bus_b_opp2>
> > > };
> > >
> > > }
> > >
> >
> > I get your question. If I'm not mistaken the OPP framework DT parsing
> > code makes the assumption that the required-opps list has the phandles
> > in the same order for each "row" in the OPP table. It actually only
> > looks at the first OPP entry to figure out the list of required OPP
> > tables.
>
> Thanks for description. It is the limitation of 'required-opps' until now.
>
> >
> > Technically one can write code to deal with random order of the
> > required-opp list, but doesn't seem like that's wo

Re: [Patch 1/1] Input: edt-ft5x06 - disable irq handling during suspend

2019-06-22 Thread Dmitry Torokhov
On Sat, Jun 22, 2019 at 01:37:10PM +0300, Andy Shevchenko wrote:
> On Fri, Jun 21, 2019 at 9:53 PM Benoit Parrot  wrote:
> >
> > As a wakeup source when the system is in suspend there is little point
> > trying to access a register across the i2c bus as it is probably still
> > inactive. We need to prevent the irq handler from being called during
> > suspend.
> >
> 
> Hmm... But how OS will know what the event to handle afterwards?
> I mean shouldn't we guarantee somehow the delivery of the event to the
> input, in this case, subsystem followed by corresponding user space?

If we are using level interrupts then it will work OK, however it is
really easy to lose edge here, as replaying disabled edge triggered
interrupts is not really reliable.

Benoit, what kind of interrupt do you use in your system?

Thanks.

-- 
Dmitry


[PATCH v6 6/6] uprobe: collapse THP pmd after removing all uprobes

2019-06-22 Thread Song Liu
After all uprobes are removed from the huge page (with PTE pgtable), it
is possible to collapse the pmd and benefit from THP again. This patch
does the collapse by setting AS_COLLAPSE_PMD. khugepage would retrace
the page table.

A check for vma->anon_vma is removed from retract_page_tables(). The
check was initially marked as "probably overkill". The code works well
without the check.

An issue on earlier version was discovered by kbuild test robot.

Reported-by: kbuild test robot 
Signed-off-by: Song Liu 
---
 kernel/events/uprobes.c | 6 +-
 mm/khugepaged.c | 3 ---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a20d7b43a056..418382259f61 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -474,6 +474,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct 
mm_struct *mm,
struct page *old_page, *new_page;
struct vm_area_struct *vma;
int ret, is_register, ref_ctr_updated = 0;
+   struct page *orig_page = NULL;
 
is_register = is_swbp_insn(&opcode);
uprobe = container_of(auprobe, struct uprobe, arch);
@@ -512,7 +513,6 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct 
mm_struct *mm,
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
if (!is_register) {
-   struct page *orig_page;
pgoff_t index;
 
index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
@@ -540,6 +540,10 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, 
struct mm_struct *mm,
if (ret && is_register && ref_ctr_updated)
update_ref_ctr(uprobe, mm, -1);
 
+   if (!ret && orig_page && PageTransCompound(orig_page))
+   set_bit(AS_COLLAPSE_PMD,
+   &compound_head(orig_page)->mapping->flags);
+
return ret;
 }
 
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 9b980327fd9b..2e277a2d731f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1302,9 +1302,6 @@ static void retract_page_tables(struct address_space 
*mapping, pgoff_t pgoff,
 
i_mmap_lock_write(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-   /* probably overkill */
-   if (vma->anon_vma)
-   continue;
addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
if (addr & ~HPAGE_PMD_MASK)
continue;
-- 
2.17.1



[PATCH v6 3/6] mm, thp: introduce FOLL_SPLIT_PMD

2019-06-22 Thread Song Liu
This patches introduces a new foll_flag: FOLL_SPLIT_PMD. As the name says
FOLL_SPLIT_PMD splits huge pmd for given mm_struct, the underlining huge
page stays as-is.

FOLL_SPLIT_PMD is useful for cases where we need to use regular pages,
but would switch back to huge page and huge pmd on. One of such example
is uprobe. The following patches use FOLL_SPLIT_PMD in uprobe.

Signed-off-by: Song Liu 
---
 include/linux/mm.h | 1 +
 mm/gup.c   | 8 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0ab8c7d84cd0..e605acc4fc81 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2642,6 +2642,7 @@ struct page *follow_page(struct vm_area_struct *vma, 
unsigned long address,
 #define FOLL_COW   0x4000  /* internal GUP flag */
 #define FOLL_ANON  0x8000  /* don't do file mappings */
 #define FOLL_LONGTERM  0x1 /* mapping lifetime is indefinite: see below */
+#define FOLL_SPLIT_PMD 0x2 /* split huge pmd before returning */
 
 /*
  * NOTE on FOLL_LONGTERM:
diff --git a/mm/gup.c b/mm/gup.c
index ddde097cf9e4..41f2a1fcc6f0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -398,7 +398,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct 
*vma,
spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
}
-   if (flags & FOLL_SPLIT) {
+   if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
int ret;
page = pmd_page(*pmd);
if (is_huge_zero_page(page)) {
@@ -407,7 +407,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct 
*vma,
split_huge_pmd(vma, pmd, address);
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
-   } else {
+   } else if (flags & FOLL_SPLIT) {
if (unlikely(!try_get_page(page))) {
spin_unlock(ptl);
return ERR_PTR(-ENOMEM);
@@ -419,6 +419,10 @@ static struct page *follow_pmd_mask(struct vm_area_struct 
*vma,
put_page(page);
if (pmd_none(*pmd))
return no_page_table(vma, flags);
+   } else {  /* flags & FOLL_SPLIT_PMD */
+   spin_unlock(ptl);
+   split_huge_pmd(vma, pmd, address);
+   ret = pte_alloc(mm, pmd);
}
 
return ret ? ERR_PTR(ret) :
-- 
2.17.1



[PATCH v6 5/6] khugepaged: enable collapse pmd for pte-mapped THP

2019-06-22 Thread Song Liu
khugepaged needs exclusive mmap_sem to access page table. When it fails
to lock mmap_sem, the page will fault in as pte-mapped THP. As the page
is already a THP, khugepaged will not handle this pmd again.

This patch enables the khugepaged to retry retract_page_tables().

A new flag AS_COLLAPSE_PMD is introduced to show the address_space may
contain pte-mapped THPs. When khugepaged fails to trylock the mmap_sem,
it sets AS_COLLAPSE_PMD. Then, at a later time, khugepaged will retry
compound pages in this address_space.

Since collapse may happen at an later time, some pages may already fault
in. To handle these pages properly, it is necessary to prepare the pmd
before collapsing. prepare_pmd_for_collapse() is introduced to prepare
the pmd by removing rmap, adjusting refcount and mm_counter.

prepare_pmd_for_collapse() also double checks whether all ptes in this
pmd are mapping to the same THP. This is necessary because some subpage
of the THP may be replaced, for example by uprobe. In such cases, it
is not possible to collapse the pmd, so we fall back.

Signed-off-by: Song Liu 
---
 include/linux/pagemap.h |  1 +
 mm/khugepaged.c | 69 +++--
 2 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9ec3544baee2..eac881de2a46 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -29,6 +29,7 @@ enum mapping_flags {
AS_EXITING  = 4,/* final truncate in progress */
/* writeback related tags are not used */
AS_NO_WRITEBACK_TAGS = 5,
+   AS_COLLAPSE_PMD = 6,/* try collapse pmd for THP */
 };
 
 /**
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a4f90a1b06f5..9b980327fd9b 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1254,7 +1254,47 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
 }
 
 #if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
-static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+
+/* return whether the pmd is ready for collapse */
+bool prepare_pmd_for_collapse(struct vm_area_struct *vma, pgoff_t pgoff,
+ struct page *hpage, pmd_t *pmd)
+{
+   unsigned long haddr = page_address_in_vma(hpage, vma);
+   unsigned long addr;
+   int i, count = 0;
+
+   /* step 1: check all mapped PTEs are to this huge page */
+   for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
+   pte_t *pte = pte_offset_map(pmd, addr);
+
+   if (pte_none(*pte))
+   continue;
+
+   if (hpage + i != vm_normal_page(vma, addr, *pte))
+   return false;
+   count++;
+   }
+
+   /* step 2: adjust rmap */
+   for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
+   pte_t *pte = pte_offset_map(pmd, addr);
+   struct page *page;
+
+   if (pte_none(*pte))
+   continue;
+   page = vm_normal_page(vma, addr, *pte);
+   page_remove_rmap(page, false);
+   }
+
+   /* step 3: set proper refcount and mm_counters. */
+   page_ref_sub(hpage, count);
+   add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
+   return true;
+}
+
+extern pid_t sysctl_dump_pt_pid;
+static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
+   struct page *hpage)
 {
struct vm_area_struct *vma;
unsigned long addr;
@@ -1273,21 +1313,21 @@ static void retract_page_tables(struct address_space 
*mapping, pgoff_t pgoff)
pmd = mm_find_pmd(vma->vm_mm, addr);
if (!pmd)
continue;
-   /*
-* We need exclusive mmap_sem to retract page table.
-* If trylock fails we would end up with pte-mapped THP after
-* re-fault. Not ideal, but it's more important to not disturb
-* the system too much.
-*/
if (down_write_trylock(&vma->vm_mm->mmap_sem)) {
spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
-   /* assume page table is clear */
+
+   if (!prepare_pmd_for_collapse(vma, pgoff, hpage, pmd)) {
+   spin_unlock(ptl);
+   up_write(&vma->vm_mm->mmap_sem);
+   continue;
+   }
_pmd = pmdp_collapse_flush(vma, addr, pmd);
spin_unlock(ptl);
up_write(&vma->vm_mm->mmap_sem);
mm_dec_nr_ptes(vma->vm_mm);
pte_free(vma->vm_mm, pmd_pgtable(_pmd));
-   }
+   } else
+   set_bit(AS_COLLAPSE_PMD, &mapping->flags);
}
i_mmap_unlock_write(mapping);

[PATCH v6 2/6] uprobe: use original page when all uprobes are removed

2019-06-22 Thread Song Liu
Currently, uprobe swaps the target page with a anonymous page in both
install_breakpoint() and remove_breakpoint(). When all uprobes on a page
are removed, the given mm is still using an anonymous page (not the
original page).

This patch allows uprobe to use original page when possible (all uprobes
on the page are already removed).

Signed-off-by: Song Liu 
---
 kernel/events/uprobes.c | 45 +
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 78f61bfc6b79..f7c61a1ef720 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -160,16 +160,19 @@ static int __replace_page(struct vm_area_struct *vma, 
unsigned long addr,
int err;
struct mmu_notifier_range range;
struct mem_cgroup *memcg;
+   bool orig = new_page->mapping != NULL;  /* new_page == orig_page */
 
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
addr + PAGE_SIZE);
 
VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
 
-   err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
-   false);
-   if (err)
-   return err;
+   if (!orig) {
+   err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
+   &memcg, false);
+   if (err)
+   return err;
+   }
 
/* For try_to_free_swap() and munlock_vma_page() below */
lock_page(old_page);
@@ -177,15 +180,24 @@ static int __replace_page(struct vm_area_struct *vma, 
unsigned long addr,
mmu_notifier_invalidate_range_start(&range);
err = -EAGAIN;
if (!page_vma_mapped_walk(&pvmw)) {
-   mem_cgroup_cancel_charge(new_page, memcg, false);
+   if (!orig)
+   mem_cgroup_cancel_charge(new_page, memcg, false);
goto unlock;
}
VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
 
get_page(new_page);
-   page_add_new_anon_rmap(new_page, vma, addr, false);
-   mem_cgroup_commit_charge(new_page, memcg, false, false);
-   lru_cache_add_active_or_unevictable(new_page, vma);
+   if (orig) {
+   lock_page(new_page);  /* for page_add_file_rmap() */
+   page_add_file_rmap(new_page, false);
+   unlock_page(new_page);
+   inc_mm_counter(mm, mm_counter_file(new_page));
+   dec_mm_counter(mm, MM_ANONPAGES);
+   } else {
+   page_add_new_anon_rmap(new_page, vma, addr, false);
+   mem_cgroup_commit_charge(new_page, memcg, false, false);
+   lru_cache_add_active_or_unevictable(new_page, vma);
+   }
 
if (!PageAnon(old_page)) {
dec_mm_counter(mm, mm_counter_file(old_page));
@@ -501,6 +513,23 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, 
struct mm_struct *mm,
copy_highpage(new_page, old_page);
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
+   if (!is_register) {
+   struct page *orig_page;
+   pgoff_t index;
+
+   index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
+   orig_page = find_get_page(vma->vm_file->f_inode->i_mapping,
+ index);
+
+   if (orig_page) {
+   if (pages_identical(new_page, orig_page)) {
+   put_page(new_page);
+   new_page = orig_page;
+   } else
+   put_page(orig_page);
+   }
+   }
+
ret = __replace_page(vma, vaddr, old_page, new_page);
put_page(new_page);
 put_old:
-- 
2.17.1



[PATCH v6 1/6] mm: move memcmp_pages() and pages_identical()

2019-06-22 Thread Song Liu
This patch moves memcmp_pages() to mm/util.c and pages_identical() to
mm.h, so that we can use them in other files.

Signed-off-by: Song Liu 
---
 include/linux/mm.h |  7 +++
 mm/ksm.c   | 18 --
 mm/util.c  | 13 +
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dd0b5f4e1e45..0ab8c7d84cd0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2891,5 +2891,12 @@ void __init setup_nr_node_ids(void);
 static inline void setup_nr_node_ids(void) {}
 #endif
 
+extern int memcmp_pages(struct page *page1, struct page *page2);
+
+static inline int pages_identical(struct page *page1, struct page *page2)
+{
+   return !memcmp_pages(page1, page2);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/ksm.c b/mm/ksm.c
index 81c20ed57bf6..6f153f976c4c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1030,24 +1030,6 @@ static u32 calc_checksum(struct page *page)
return checksum;
 }
 
-static int memcmp_pages(struct page *page1, struct page *page2)
-{
-   char *addr1, *addr2;
-   int ret;
-
-   addr1 = kmap_atomic(page1);
-   addr2 = kmap_atomic(page2);
-   ret = memcmp(addr1, addr2, PAGE_SIZE);
-   kunmap_atomic(addr2);
-   kunmap_atomic(addr1);
-   return ret;
-}
-
-static inline int pages_identical(struct page *page1, struct page *page2)
-{
-   return !memcmp_pages(page1, page2);
-}
-
 static int write_protect_page(struct vm_area_struct *vma, struct page *page,
  pte_t *orig_pte)
 {
diff --git a/mm/util.c b/mm/util.c
index 9834c4ab7d8e..750e586d50bc 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -755,3 +755,16 @@ int get_cmdline(struct task_struct *task, char *buffer, 
int buflen)
 out:
return res;
 }
+
+int memcmp_pages(struct page *page1, struct page *page2)
+{
+   char *addr1, *addr2;
+   int ret;
+
+   addr1 = kmap_atomic(page1);
+   addr2 = kmap_atomic(page2);
+   ret = memcmp(addr1, addr2, PAGE_SIZE);
+   kunmap_atomic(addr2);
+   kunmap_atomic(addr1);
+   return ret;
+}
-- 
2.17.1



[PATCH v6 4/6] uprobe: use FOLL_SPLIT_PMD instead of FOLL_SPLIT

2019-06-22 Thread Song Liu
This patches uses newly added FOLL_SPLIT_PMD in uprobe. This enables easy
regroup of huge pmd after the uprobe is disabled (in next patch).

Signed-off-by: Song Liu 
---
 kernel/events/uprobes.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f7c61a1ef720..a20d7b43a056 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -153,7 +153,7 @@ static int __replace_page(struct vm_area_struct *vma, 
unsigned long addr,
 {
struct mm_struct *mm = vma->vm_mm;
struct page_vma_mapped_walk pvmw = {
-   .page = old_page,
+   .page = compound_head(old_page),
.vma = vma,
.address = addr,
};
@@ -165,8 +165,6 @@ static int __replace_page(struct vm_area_struct *vma, 
unsigned long addr,
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
addr + PAGE_SIZE);
 
-   VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
-
if (!orig) {
err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
&memcg, false);
@@ -483,7 +481,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct 
mm_struct *mm,
 retry:
/* Read the page with vaddr into memory */
ret = get_user_pages_remote(NULL, mm, vaddr, 1,
-   FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
+   FOLL_FORCE | FOLL_SPLIT_PMD, &old_page, &vma, NULL);
if (ret <= 0)
return ret;
 
-- 
2.17.1



[PATCH v6 0/6] THP aware uprobe

2019-06-22 Thread Song Liu
This set makes uprobe aware of THPs.

Currently, when uprobe is attached to text on THP, the page is split by
FOLL_SPLIT. As a result, uprobe eliminates the performance benefit of THP.

This set makes uprobe THP-aware. Instead of FOLL_SPLIT, we introduces
FOLL_SPLIT_PMD, which only split PMD for uprobe. After all uprobes within
the THP are removed, the PTEs are regrouped into huge PMD.

Note that, with uprobes attached, the process runs with PTEs for the huge
page. The performance benefit of THP is recovered _after_ all uprobes on
the huge page are detached.

This set (plus a few THP patches) is also available at

   https://github.com/liu-song-6/linux/tree/uprobe-thp

Changes v5 => v6:
1. Enable khugepaged to collapse pmd for pte-mapped THP
   (Kirill A. Shutemov).
2. uprobe asks khuagepaged to collaspe pmd. (Kirill A. Shutemov)

Note: Theast two patches in v6 the set apply _after_ v7 of set "Enable THP
  for text section of non-shmem files"

Changes v4 => v5:
1. Propagate pte_alloc() error out of follow_pmd_mask().

Changes since v3:
1. Simplify FOLL_SPLIT_PMD case in follow_pmd_mask(), (Kirill A. Shutemov)
2. Fix try_collapse_huge_pmd() to match change in follow_pmd_mask().

Changes since v2:
1. For FOLL_SPLIT_PMD, populated the page table in follow_pmd_mask().
2. Simplify logic in uprobe_write_opcode. (Oleg Nesterov)
3. Fix page refcount handling with FOLL_SPLIT_PMD.
4. Much more testing, together with THP on ext4 and btrfs (sending in
   separate set).
5. Rebased.

Changes since v1:
1. introduces FOLL_SPLIT_PMD, instead of modifying split_huge_pmd*();
2. reuse pages_identical() from ksm.c;
3. rewrite most of try_collapse_huge_pmd().

Song Liu (6):
  mm: move memcmp_pages() and pages_identical()
  uprobe: use original page when all uprobes are removed
  mm, thp: introduce FOLL_SPLIT_PMD
  uprobe: use FOLL_SPLIT_PMD instead of FOLL_SPLIT
  khugepaged: enable collapse pmd for pte-mapped THP
  uprobe: collapse THP pmd after removing all uprobes

 include/linux/mm.h  |  8 +
 include/linux/pagemap.h |  1 +
 kernel/events/uprobes.c | 55 ---
 mm/gup.c|  8 +++--
 mm/khugepaged.c | 72 +
 mm/ksm.c| 18 ---
 mm/util.c   | 13 
 7 files changed, 130 insertions(+), 45 deletions(-)

--
2.17.1


[PATCH v7 0/6] Enable THP for text section of non-shmem files

2019-06-22 Thread Song Liu
Changes v6 => v7:
1. Avoid accessing vma without holding mmap_sem (Hillf Dayton)
2. In collapse_file() use readahead API instead of gup API. This matches
   better with existing logic for shmem.
3. Add inline documentation for @nr_thps (kbuild test robot)

Changes v5 => v6:
1. Improve THP stats in 3/6, (Kirill).

Changes v4 => v5:
1. Move the logic to drop THP from pagecache to open() path (Rik).
2. Revise description of CONFIG_READ_ONLY_THP_FOR_FS.

Changes v3 => v4:
1. Put the logic to drop THP from pagecache in a separate function (Rik).
2. Move the function to drop THP from pagecache to exit_mmap().
3. Revise confusing commit log 6/6.

Changes v2 => v3:
1. Removed the limitation (cannot write to file with THP) by truncating
   whole file during sys_open (see 6/6);
2. Fixed a VM_BUG_ON_PAGE() in filemap_fault() (see 2/6);
3. Split function rename to a separate patch (Rik);
4. Updated condition in hugepage_vma_check() (Rik).

Changes v1 => v2:
1. Fixed a missing mem_cgroup_commit_charge() for non-shmem case.

This set follows up discussion at LSF/MM 2019. The motivation is to put
text section of an application in THP, and thus reduces iTLB miss rate and
improves performance. Both Facebook and Oracle showed strong interests to
this feature.

To make reviews easier, this set aims a mininal valid product. Current
version of the work does not have any changes to file system specific
code. This comes with some limitations (discussed later).

This set enables an application to "hugify" its text section by simply
running something like:

  madvise(0x60, 0x8, MADV_HUGEPAGE);

Before this call, the /proc//maps looks like:

0040-074d r-xp  00:27 2006927 app

After this call, part of the text section is split out and mapped to
THP:

0040-00425000 r-xp  00:27 2006927 app
0060-00e0 r-xp 0020 00:27 2006927 app   <<< on THP
00e0-074d r-xp 00a0 00:27 2006927 app

Limitations:

1. This only works for text section (vma with VM_DENYWRITE).
2. Original limitation #2 is removed in v3.

We gated this feature with an experimental config, READ_ONLY_THP_FOR_FS.
Once we get better support on the write path, we can remove the config and
enable it by default.

Tested cases:
1. Tested with btrfs and ext4.
2. Tested with real work application (memcache like caching service).
3. Tested with "THP aware uprobe":
   https://patchwork.kernel.org/project/linux-mm/list/?series=131339

This set (plus a few uprobe patches) is also available at

   https://github.com/liu-song-6/linux/tree/uprobe-thp

Please share your comments and suggestions on this.

Thanks!

Song Liu (6):
  filemap: check compound_head(page)->mapping in filemap_fault()
  filemap: update offset check in filemap_fault()
  mm,thp: stats for file backed THP
  khugepaged: rename collapse_shmem() and khugepaged_scan_shmem()
  mm,thp: add read-only THP support for (non-shmem) FS
  mm,thp: avoid writes to file with THP in pagecache

 drivers/base/node.c|   6 +++
 fs/inode.c |   3 ++
 fs/namei.c |  22 +++-
 fs/proc/meminfo.c  |   4 ++
 fs/proc/task_mmu.c |   4 +-
 include/linux/fs.h |  32 
 include/linux/mmzone.h |   2 +
 mm/Kconfig |  11 
 mm/filemap.c   |   9 ++--
 mm/khugepaged.c| 113 +++--
 mm/rmap.c  |  12 +++--
 mm/vmstat.c|   2 +
 12 files changed, 184 insertions(+), 36 deletions(-)

--
2.17.1


[PATCH v7 1/6] filemap: check compound_head(page)->mapping in filemap_fault()

2019-06-22 Thread Song Liu
Currently, filemap_fault() avoids trace condition with truncate by
checking page->mapping == mapping. This does not work for compound
pages. This patch let it check compound_head(page)->mapping instead.

Acked-by: Rik van Riel 
Signed-off-by: Song Liu 
---
 mm/filemap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index df2006ba0cfa..f5b79a43946d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2517,7 +2517,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
goto out_retry;
 
/* Did it get truncated? */
-   if (unlikely(page->mapping != mapping)) {
+   if (unlikely(compound_head(page)->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto retry_find;
-- 
2.17.1



[PATCH v7 5/6] mm,thp: add read-only THP support for (non-shmem) FS

2019-06-22 Thread Song Liu
This patch is (hopefully) the first step to enable THP for non-shmem
filesystems.

This patch enables an application to put part of its text sections to THP
via madvise, for example:

madvise((void *)0x60, 0x20, MADV_HUGEPAGE);

We tried to reuse the logic for THP on tmpfs.

Currently, write is not supported for non-shmem THP. khugepaged will only
process vma with VM_DENYWRITE. The next patch will handle writes, which
would only happen when the vma with VM_DENYWRITE is unmapped.

An EXPERIMENTAL config, READ_ONLY_THP_FOR_FS, is added to gate this
feature.

Acked-by: Rik van Riel 
Signed-off-by: Song Liu 
---
 mm/Kconfig  | 11 ++
 mm/filemap.c|  4 +--
 mm/khugepaged.c | 90 -
 mm/rmap.c   | 12 ---
 4 files changed, 96 insertions(+), 21 deletions(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index f0c76ba47695..0a8fd589406d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -762,6 +762,17 @@ config GUP_BENCHMARK
 
  See tools/testing/selftests/vm/gup_benchmark.c
 
+config READ_ONLY_THP_FOR_FS
+   bool "Read-only THP for filesystems (EXPERIMENTAL)"
+   depends on TRANSPARENT_HUGE_PAGECACHE && SHMEM
+
+   help
+ Allow khugepaged to put read-only file-backed pages in THP.
+
+ This is marked experimental because it is a new feature. Write
+ support of file THPs will be developed in the next few release
+ cycles.
+
 config ARCH_HAS_PTE_SPECIAL
bool
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 5f072a113535..e79ceccdc6df 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -203,8 +203,8 @@ static void unaccount_page_cache_page(struct address_space 
*mapping,
__mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
if (PageTransHuge(page))
__dec_node_page_state(page, NR_SHMEM_THPS);
-   } else {
-   VM_BUG_ON_PAGE(PageTransHuge(page), page);
+   } else if (PageTransHuge(page)) {
+   __dec_node_page_state(page, NR_FILE_THPS);
}
 
/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 158cad542627..090127e4e185 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -48,6 +48,7 @@ enum scan_result {
SCAN_CGROUP_CHARGE_FAIL,
SCAN_EXCEED_SWAP_PTE,
SCAN_TRUNCATED,
+   SCAN_PAGE_HAS_PRIVATE,
 };
 
 #define CREATE_TRACE_POINTS
@@ -404,7 +405,11 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
(vm_flags & VM_NOHUGEPAGE) ||
test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
return false;
-   if (shmem_file(vma->vm_file)) {
+
+   if (shmem_file(vma->vm_file) ||
+   (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+vma->vm_file &&
+(vm_flags & VM_DENYWRITE))) {
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
return false;
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
@@ -456,8 +461,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long hstart, hend;
 
/*
-* khugepaged does not yet work on non-shmem files or special
-* mappings. And file-private shmem THP is not supported.
+* khugepaged only supports read-only files for non-shmem files.
+* khugepaged does not yet work on special mappings. And
+* file-private shmem THP is not supported.
 */
if (!hugepage_vma_check(vma, vm_flags))
return 0;
@@ -1287,12 +1293,12 @@ static void retract_page_tables(struct address_space 
*mapping, pgoff_t pgoff)
 }
 
 /**
- * collapse_file - collapse small tmpfs/shmem pages into huge one.
+ * collapse_file - collapse filemap/tmpfs/shmem pages into huge one.
  *
  * Basic scheme is simple, details are more complex:
  *  - allocate and lock a new huge page;
  *  - scan page cache replacing old pages with the new one
- *+ swap in pages if necessary;
+ *+ swap/gup in pages if necessary;
  *+ fill in gaps;
  *+ keep old pages around in case rollback is required;
  *  - if replacing succeeds:
@@ -1316,7 +1322,11 @@ static void collapse_file(struct mm_struct *mm,
LIST_HEAD(pagelist);
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
int nr_none = 0, result = SCAN_SUCCEED;
+   bool is_shmem = shmem_file(file);
 
+#ifndef CONFIG_READ_ONLY_THP_FOR_FS
+   VM_BUG_ON(!is_shmem);
+#endif
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 
/* Only allocate from the target node */
@@ -1348,7 +1358,8 @@ static void collapse_file(struct mm_struct *mm,
} while (1);
 
__SetPageLocked(new_page);
-   __SetPageSwapBacked(new_page);
+   if (is_shmem)
+   __SetPageSwapBacked(new_page);
new_page->index = start;
new_page->mapping = mapping;
 
@@ -1363,7 +1374,7 @@ static void collapse_file(struct mm_struct *mm,
struct page *

[PATCH v7 3/6] mm,thp: stats for file backed THP

2019-06-22 Thread Song Liu
In preparation for non-shmem THP, this patch adds a few stats and exposes
them in /proc/meminfo, /sys/bus/node/devices//meminfo, and
/proc//task//smaps.

This patch is mostly a rewrite of Kirill A. Shutemov's earlier version:
https://lkml.org/lkml/2017/1/26/284.

Acked-by: Rik van Riel 
Signed-off-by: Song Liu 
---
 drivers/base/node.c| 6 ++
 fs/proc/meminfo.c  | 4 
 fs/proc/task_mmu.c | 4 +++-
 include/linux/mmzone.h | 2 ++
 mm/vmstat.c| 2 ++
 5 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 8598fcbd2a17..71ae2dc93489 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -426,6 +426,8 @@ static ssize_t node_read_meminfo(struct device *dev,
   "Node %d AnonHugePages:  %8lu kB\n"
   "Node %d ShmemHugePages: %8lu kB\n"
   "Node %d ShmemPmdMapped: %8lu kB\n"
+  "Node %d FileHugePages: %8lu kB\n"
+  "Node %d FilePmdMapped: %8lu kB\n"
 #endif
,
   nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
@@ -451,6 +453,10 @@ static ssize_t node_read_meminfo(struct device *dev,
   nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
   HPAGE_PMD_NR),
   nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
+  HPAGE_PMD_NR),
+  nid, K(node_page_state(pgdat, NR_FILE_THPS) *
+  HPAGE_PMD_NR),
+  nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) *
   HPAGE_PMD_NR)
 #endif
   );
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 568d90e17c17..bac395fc11f9 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -136,6 +136,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
show_val_kb(m, "ShmemPmdMapped: ",
global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
+   show_val_kb(m, "FileHugePages: ",
+   global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
+   show_val_kb(m, "FilePmdMapped: ",
+   global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
 #endif
 
 #ifdef CONFIG_CMA
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 01d4eb0e6bd1..0360e3b2ba89 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -413,6 +413,7 @@ struct mem_size_stats {
unsigned long lazyfree;
unsigned long anonymous_thp;
unsigned long shmem_thp;
+   unsigned long file_thp;
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
@@ -563,7 +564,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
else if (is_zone_device_page(page))
/* pass */;
else
-   VM_BUG_ON_PAGE(1, page);
+   mss->file_thp += HPAGE_PMD_SIZE;
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), 
locked);
 }
 #else
@@ -767,6 +768,7 @@ static void __show_smap(struct seq_file *m, const struct 
mem_size_stats *mss)
SEQ_PUT_DEC(" kB\nLazyFree:   ", mss->lazyfree);
SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+   SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
  mss->private_hugetlb >> 10, 7);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 70394cabaf4e..827f9b777938 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -234,6 +234,8 @@ enum node_stat_item {
NR_SHMEM,   /* shmem pages (included tmpfs/GEM pages) */
NR_SHMEM_THPS,
NR_SHMEM_PMDMAPPED,
+   NR_FILE_THPS,
+   NR_FILE_PMDMAPPED,
NR_ANON_THPS,
NR_UNSTABLE_NFS,/* NFS unstable pages */
NR_VMSCAN_WRITE,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index fd7e16ca6996..6afc892a148a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1158,6 +1158,8 @@ const char * const vmstat_text[] = {
"nr_shmem",
"nr_shmem_hugepages",
"nr_shmem_pmdmapped",
+   "nr_file_hugepages",
+   "nr_file_pmdmapped",
"nr_anon_transparent_hugepages",
"nr_unstable",
"nr_vmscan_write",
-- 
2.17.1



[PATCH v7 6/6] mm,thp: avoid writes to file with THP in pagecache

2019-06-22 Thread Song Liu
In previous patch, an application could put part of its text section in
THP via madvise(). These THPs will be protected from writes when the
application is still running (TXTBSY). However, after the application
exits, the file is available for writes.

This patch avoids writes to file THP by dropping page cache for the file
when the file is open for write. A new counter nr_thps is added to struct
address_space. In do_last(), if the file is open for write and nr_thps
is non-zero, we drop page cache for the whole file.

Reported-by: kbuild test robot 
Signed-off-by: Song Liu 
---
 fs/inode.c |  3 +++
 fs/namei.c | 22 +-
 include/linux/fs.h | 32 
 mm/filemap.c   |  1 +
 mm/khugepaged.c|  4 +++-
 5 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index df6542ec3b88..518113a4e219 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -181,6 +181,9 @@ int inode_init_always(struct super_block *sb, struct inode 
*inode)
mapping->flags = 0;
mapping->wb_err = 0;
atomic_set(&mapping->i_mmap_writable, 0);
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+   atomic_set(&mapping->nr_thps, 0);
+#endif
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
mapping->writeback_index = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 20831c2fbb34..de64f24b58e9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3249,6 +3249,22 @@ static int lookup_open(struct nameidata *nd, struct path 
*path,
return error;
 }
 
+/*
+ * The file is open for write, so it is not mmapped with VM_DENYWRITE. If
+ * it still has THP in page cache, drop the whole file from pagecache
+ * before processing writes. This helps us avoid handling write back of
+ * THP for now.
+ */
+static inline void release_file_thp(struct file *file)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+   struct inode *inode = file_inode(file);
+
+   if (inode_is_open_for_write(inode) && filemap_nr_thps(inode->i_mapping))
+   truncate_pagecache(inode, 0);
+#endif
+}
+
 /*
  * Handle the last step of open()
  */
@@ -3418,7 +3434,11 @@ static int do_last(struct nameidata *nd,
goto out;
 opened:
error = ima_file_check(file, op->acc_mode);
-   if (!error && will_truncate)
+   if (error)
+   goto out;
+
+   release_file_thp(file);
+   if (will_truncate)
error = handle_truncate(file);
 out:
if (unlikely(error > 0)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f7fdfe93e25d..082fc581c7fc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -427,6 +427,7 @@ int pagecache_write_end(struct file *, struct address_space 
*mapping,
  * @i_pages: Cached pages.
  * @gfp_mask: Memory allocation flags to use for allocating pages.
  * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @nr_thps: Number of THPs in the pagecache (non-shmem only).
  * @i_mmap: Tree of private and shared mappings.
  * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
  * @nrpages: Number of page entries, protected by the i_pages lock.
@@ -444,6 +445,10 @@ struct address_space {
struct xarray   i_pages;
gfp_t   gfp_mask;
atomic_ti_mmap_writable;
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+   /* number of thp, only for non-shmem files */
+   atomic_tnr_thps;
+#endif
struct rb_root_cached   i_mmap;
struct rw_semaphore i_mmap_rwsem;
unsigned long   nrpages;
@@ -2790,6 +2795,33 @@ static inline errseq_t filemap_sample_wb_err(struct 
address_space *mapping)
return errseq_sample(&mapping->wb_err);
 }
 
+static inline int filemap_nr_thps(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+   return atomic_read(&mapping->nr_thps);
+#else
+   return 0;
+#endif
+}
+
+static inline void filemap_nr_thps_inc(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+   atomic_inc(&mapping->nr_thps);
+#else
+   WARN_ON_ONCE(1);
+#endif
+}
+
+static inline void filemap_nr_thps_dec(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+   atomic_dec(&mapping->nr_thps);
+#else
+   WARN_ON_ONCE(1);
+#endif
+}
+
 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
   int datasync);
 extern int vfs_fsync(struct file *file, int datasync);
diff --git a/mm/filemap.c b/mm/filemap.c
index e79ceccdc6df..a8e86c136381 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -205,6 +205,7 @@ static void unaccount_page_cache_page(struct address_space 
*mapping,
__dec_node_page_state(page, NR_SHMEM_THPS);
} else if (PageTransHuge(page)) {
__dec_node_page_state(page, NR_FILE_THPS);
+   filemap_nr_thps_dec(mapping);
}
 
/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.

[PATCH v7 2/6] filemap: update offset check in filemap_fault()

2019-06-22 Thread Song Liu
With THP, current check of offset:

VM_BUG_ON_PAGE(page->index != offset, page);

is no longer accurate. Update it to:

VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);

Acked-by: Rik van Riel 
Signed-off-by: Song Liu 
---
 mm/filemap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index f5b79a43946d..5f072a113535 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2522,7 +2522,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
put_page(page);
goto retry_find;
}
-   VM_BUG_ON_PAGE(page->index != offset, page);
+   VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
 
/*
 * We have a locked page in the page cache, now we need to check
-- 
2.17.1



[PATCH v7 4/6] khugepaged: rename collapse_shmem() and khugepaged_scan_shmem()

2019-06-22 Thread Song Liu
Next patch will add khugepaged support of non-shmem files. This patch
renames these two functions to reflect the new functionality:

collapse_shmem()=>  collapse_file()
khugepaged_scan_shmem() =>  khugepaged_scan_file()

Signed-off-by: Song Liu 
---
 mm/khugepaged.c | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 0f7419938008..158cad542627 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1287,7 +1287,7 @@ static void retract_page_tables(struct address_space 
*mapping, pgoff_t pgoff)
 }
 
 /**
- * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
+ * collapse_file - collapse small tmpfs/shmem pages into huge one.
  *
  * Basic scheme is simple, details are more complex:
  *  - allocate and lock a new huge page;
@@ -1304,10 +1304,11 @@ static void retract_page_tables(struct address_space 
*mapping, pgoff_t pgoff)
  *+ restore gaps in the page cache;
  *+ unlock and free huge page;
  */
-static void collapse_shmem(struct mm_struct *mm,
-   struct address_space *mapping, pgoff_t start,
+static void collapse_file(struct mm_struct *mm,
+   struct file *file, pgoff_t start,
struct page **hpage, int node)
 {
+   struct address_space *mapping = file->f_mapping;
gfp_t gfp;
struct page *new_page;
struct mem_cgroup *memcg;
@@ -1563,11 +1564,11 @@ static void collapse_shmem(struct mm_struct *mm,
/* TODO: tracepoints */
 }
 
-static void khugepaged_scan_shmem(struct mm_struct *mm,
-   struct address_space *mapping,
-   pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm,
+   struct file *file, pgoff_t start, struct page **hpage)
 {
struct page *page = NULL;
+   struct address_space *mapping = file->f_mapping;
XA_STATE(xas, &mapping->i_pages, start);
int present, swap;
int node = NUMA_NO_NODE;
@@ -1631,16 +1632,15 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
result = SCAN_EXCEED_NONE_PTE;
} else {
node = khugepaged_find_target_node();
-   collapse_shmem(mm, mapping, start, hpage, node);
+   collapse_file(mm, file, start, hpage, node);
}
}
 
/* TODO: tracepoints */
 }
 #else
-static void khugepaged_scan_shmem(struct mm_struct *mm,
-   struct address_space *mapping,
-   pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm,
+   struct file *file, pgoff_t start, struct page **hpage)
 {
BUILD_BUG();
 }
@@ -1722,8 +1722,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int 
pages,
file = get_file(vma->vm_file);
up_read(&mm->mmap_sem);
ret = 1;
-   khugepaged_scan_shmem(mm, file->f_mapping,
-   pgoff, hpage);
+   khugepaged_scan_file(mm, file, pgoff, hpage);
fput(file);
} else {
ret = khugepaged_scan_pmd(mm, vma,
-- 
2.17.1



Re: [PATCHv4 26/28] x86/vdso: Align VDSO functions by CPU L1 cache line

2019-06-22 Thread Andrei Vagin
On Fri, Jun 14, 2019 at 04:13:31PM +0200, Thomas Gleixner wrote:
> On Wed, 12 Jun 2019, Dmitry Safonov wrote:
> 
> > From: Andrei Vagin 
> > 
> > After performance testing VDSO patches a noticeable 20% regression was
> > found on gettime_perf selftest with a cold cache.
> > As it turns to be, before time namespaces introduction, VDSO functions
> > were quite aligned to cache lines, but adding a new code to adjust
> > timens offset inside namespace created a small shift and vdso functions
> > become unaligned on cache lines.
> > 
> > Add align to vdso functions with gcc option to fix performance drop.
> > 
> > Coping the resulting numbers from cover letter:
> > 
> > Hot CPU cache (more gettime_perf.c cycles - the better):
> > | before | CONFIG_TIME_NS=n | host| inside timens
> > ||--|-|-
> > cycles  | 139887013  | 139453003| 139899785   | 128792458
> > diff (%)| 100| 99.7 | 100 | 92
> 
> Why is CONFIG_TIME_NS=n behaving worse than current mainline and
> worse than 'host' mode?

We had to specify a precision of these numbers, it is more than this
0.3%, so at that time I decided that here is nothing to worry about. I
did these measurments a few mounth ago for the second version of this
series. I repeated measurments for this set of patches:

| before| CONFIG_TIME_NS=n | host  | inside timens
--
| 144645498 | 142916801| 140364862 | 132378440
| 143440633 | 141545739| 140540053 | 132714190
| 144876395 | 144650599| 140026814 | 131843318
| 143984551 | 144595770| 140359260 | 131683544
| 144875682 | 143799788| 140692618 | 131300332
--
avg | 144364551 | 143501739| 140396721 | 131983964
diff %  | 100   | 99.4 | 97.2  | 91.4
-
stdev % | 0.4   | 0.9  | 0.1   | 0.4

> 
> > Cold cache (lesser tsc per gettime_perf_cold.c cycle - the better):
> > | before | CONFIG_TIME_NS=n | host| inside timens
> > ||--|-|-
> > tsc | 6748   | 6718 | 6862| 12682
> > diff (%)| 100| 99.6 | 101.7   | 188
> 
> Weird, now CONFIG_TIME_NS=n is better than current mainline and 'host' mode
> drops.

The precision of these numbers is much smaller than of the previous set.
These numbers are for the second version of this series, so I decided to
repeat measurements for this version. When I run the test, I found that
there is some degradation in compare with v5.0. I bisected and found
that the problem is in 2b539aefe9e4 ("mm/resource: Let
walk_system_ram_range() search child resources"). At this point, I
realized that my test isn't quite right. On each iteration, the test
starts a new process, then do start=rdtsc();clock_gettime();end=rdtsc()
and prints (end-start). The problem here is that when clock_gettime() is
called the first time, vdso pages are not mapped into a process address
space, so the test measures how fast vdso pages are mapped into the
process address space. I modified this test, now it uses the clflush
instruction to drop cpu caches.  Here are the results:

   | before| CONFIG_TIME_NS=n | host  | inside timens
--
tsc| 434   | 433  | 437   | 477
stdev(tsc) | 5 | 5| 5 | 3
diff (%)   | 1 | 1| 100.1 | 109

Here is the source code for the modified test:
https://github.com/avagin/linux-task-diag/blob/wip/timens-rfc-v4/tools/testing/selftests/timens/gettime_perf_cold.c

This test does 10K iterations. At the first glance, the numbers look
noisy, so I sort them and take only 8K numbers in the middle:

$ ./gettime_perf_cold > raw
$ cat raw | sort -n | tail -n 9000 | head -n 8000 > results

> 
> Either I'm misreading the numbers or missing something or I'm just confused
> as usual :)
> 
> Thanks,
>   
>   
>   
>   
>   
>   
>   
>   
>   
> 

Re: [PATCH][next] lkdtm: remove redundant initialization of ret

2019-06-22 Thread Greg Kroah-Hartman
On Fri, Jun 21, 2019 at 09:26:05AM -0700, Kees Cook wrote:
> On Fri, Jun 21, 2019 at 04:05:09PM +0200, Greg Kroah-Hartman wrote:
> > On Fri, Jun 21, 2019 at 04:03:47PM +0200, Greg Kroah-Hartman wrote:
> > > On Fri, Jun 14, 2019 at 10:43:11AM +0100, Colin King wrote:
> > > > From: Colin Ian King 
> > > > 
> > > > The variable ret is being initialized with the value -EINVAL however
> > > > this value is never read and ret is being re-assigned later on. Hence
> > > > the initialization is redundant and can be removed.
> > > > 
> > > > Addresses-Coverity: ("Unused value")
> > > > Signed-off-by: Colin Ian King 
> > > > Acked-by: Kees Cook 
> > > > ---
> > > >  drivers/misc/lkdtm/core.c | 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > > 
> > > > diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
> > > > index bba093224813..92df35fdeab0 100644
> > > > --- a/drivers/misc/lkdtm/core.c
> > > > +++ b/drivers/misc/lkdtm/core.c
> > > > @@ -390,7 +390,7 @@ static int __init lkdtm_module_init(void)
> > > >  {
> > > > struct crashpoint *crashpoint = NULL;
> > > > const struct crashtype *crashtype = NULL;
> > > > -   int ret = -EINVAL;
> > > > +   int ret;
> > > > int i;
> > > >  
> > > > /* Neither or both of these need to be set */
> > > > -- 
> > > > 2.20.1
> > > > 
> > > 
> > > With this patch now applied, I get this build warning:
> > > drivers/misc/lkdtm/core.c: In function lkdtm_module_init:
> > > drivers/misc/lkdtm/core.c:467:9: warning: ret may be used uninitialized 
> > > in this function [-Wmaybe-uninitialized]
> > >   return ret;
> > >  ^~~
> > > 
> > > So are you _sure_ it shouldn't be initialized?
> > 
> > In looking at the code in my tree, ret is used uninitialized with this
> > patch, so maybe coverity is wrong, or I don't have all of the needed
> > patches?
> 
> The path went away when the check for debugfs_create_file() was removed.
> I thought that patch was in your tree already?

Ah, other tree, sorry, my fault.  I'll go queue this up to the place
that patch is...

thanks,

greg k-h


Re: [PATCH v5.2-rc5] Bluetooth: Fix regression with minimum encryption key size alignment

2019-06-22 Thread Greg KH
On Sat, Jun 22, 2019 at 08:21:07PM +0200, Marcel Holtmann wrote:
> Hi Sasha,
> 
> > [This is an automated email]
> > 
> > This commit has been processed because it contains a "Fixes:" tag,
> > fixing commit: d5bb334a8e17 Bluetooth: Align minimum encryption key size 
> > for LE and BR/EDR connections.
> > 
> > The bot has tested the following trees: v5.1.12, v4.19.53, v4.14.128, 
> > v4.9.182, v4.4.182.
> > 
> > v5.1.12: Build failed! Errors:
> >net/bluetooth/l2cap_core.c:1356:24: error: ‘HCI_MIN_ENC_KEY_SIZE’ 
> > undeclared (first use in this function); did you mean 
> > ‘SMP_MIN_ENC_KEY_SIZE’?
> > 
> > v4.19.53: Build failed! Errors:
> >net/bluetooth/l2cap_core.c:1355:24: error: ‘HCI_MIN_ENC_KEY_SIZE’ 
> > undeclared (first use in this function); did you mean 
> > ‘SMP_MIN_ENC_KEY_SIZE’?
> > 
> > v4.14.128: Build failed! Errors:
> >net/bluetooth/l2cap_core.c:1355:24: error: ‘HCI_MIN_ENC_KEY_SIZE’ 
> > undeclared (first use in this function); did you mean 
> > ‘SMP_MIN_ENC_KEY_SIZE’?
> > 
> > v4.9.182: Build OK!
> > v4.4.182: Build OK!
> > 
> > How should we proceed with this patch?
> 
> either you reapply commit d5bb334a8e17 first or I have to send a version that 
> combines both into a single commit for easy applying.

I can reapply it...

thanks,

greg k-h


[PATCH v3 9/9] ARM: dts: sun8i: s3: add devicetree for Lichee zero plus w/ S3

2019-06-22 Thread Icenowy Zheng
Lichee zero plus is a core board made by Sipeed, which includes on-board
TF slot or SMT SD NAND, and optional SPI NOR or eMMC, a UART debug
header, a microUSB slot and a gold finger connector for expansion. It
can use either Sochip S3 or Allwinner S3L SoC.

Add the basic device tree for the core board, w/o optional onboard
storage, and with S3 SoC.

Signed-off-by: Icenowy Zheng 
---
Changes in v3:
- Drop common regulator DTSI usage and added vcc3v3 regulator.

 arch/arm/boot/dts/Makefile|  1 +
 .../boot/dts/sun8i-s3-lichee-zero-plus.dts|  8 
 .../dts/sun8i-s3-s3l-lichee-zero-plus.dtsi| 44 +++
 3 files changed, 53 insertions(+)
 create mode 100644 arch/arm/boot/dts/sun8i-s3-lichee-zero-plus.dts
 create mode 100644 arch/arm/boot/dts/sun8i-s3-s3l-lichee-zero-plus.dtsi

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index c4742afe41a7..d24dec29245e 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -1113,6 +1113,7 @@ dtb-$(CONFIG_MACH_SUN8I) += \
sun8i-r16-nintendo-super-nes-classic.dtb \
sun8i-r16-parrot.dtb \
sun8i-r40-bananapi-m2-ultra.dtb \
+   sun8i-s3-lichee-zero-plus.dtb \
sun8i-t3-cqa3t-bv3.dtb \
sun8i-v3s-licheepi-zero.dtb \
sun8i-v3s-licheepi-zero-dock.dtb \
diff --git a/arch/arm/boot/dts/sun8i-s3-lichee-zero-plus.dts 
b/arch/arm/boot/dts/sun8i-s3-lichee-zero-plus.dts
new file mode 100644
index ..7d2f6b145190
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-s3-lichee-zero-plus.dts
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Icenowy Zheng 
+ */
+
+/dts-v1/;
+#include "sun8i-s3.dtsi"
+#include "sun8i-s3-s3l-lichee-zero-plus.dtsi"
diff --git a/arch/arm/boot/dts/sun8i-s3-s3l-lichee-zero-plus.dtsi 
b/arch/arm/boot/dts/sun8i-s3-s3l-lichee-zero-plus.dtsi
new file mode 100644
index ..e68f738c3046
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-s3-s3l-lichee-zero-plus.dtsi
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Icenowy Zheng 
+ */
+
+#include 
+
+/ {
+   aliases {
+   serial0 = &uart0;
+   };
+
+   chosen {
+   stdout-path = "serial0:115200n8";
+   };
+
+   reg_vcc3v3: vcc3v3 {
+   compatible = "regulator-fixed";
+   regulator-name = "vcc3v3";
+   regulator-min-microvolt = <330>;
+   regulator-max-microvolt = <330>;
+   };
+};
+
+&mmc0 {
+   broken-cd;
+   bus-width = <4>;
+   vmmc-supply = <®_vcc3v3>;
+   status = "okay";
+};
+
+&uart0 {
+   pinctrl-0 = <&uart0_pb_pins>;
+   pinctrl-names = "default";
+   status = "okay";
+};
+
+&usb_otg {
+   dr_mode = "otg";
+   status = "okay";
+};
+
+&usbphy {
+   usb0_id_det-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>;
+   status = "okay";
+};
-- 
2.21.0



[PATCH v3 3/9] dt-bindings: clk: sunxi-ccu: add compatible string for V3 CCU

2019-06-22 Thread Icenowy Zheng
Despite Allwinner V3 and V3s shares the same die, one peripheral (I2S)
is only available on V3, and thus the clocks is not declared for V3s
CCU.

Add a V3 CCU compatible string to the binding to prepare for a CCU
driver that provide I2S clock on V3, but not on V3s.

Signed-off-by: Icenowy Zheng 
Reviewed-by: Rob Herring 
---
No changes in v3.

Changes in v2:
- Adapt to YAML binding document in linux-next.

 .../devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml   | 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml 
b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
index c935405458fe..1bde87fc94c5 100644
--- a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
@@ -31,6 +31,7 @@ properties:
   - allwinner,sun8i-h3-ccu
   - allwinner,sun8i-h3-r-ccu
   - allwinner,sun8i-r40-ccu
+  - allwinner,sun8i-v3-ccu
   - allwinner,sun8i-v3s-ccu
   - allwinner,sun9i-a80-ccu
   - allwinner,sun50i-a64-ccu
-- 
2.21.0



[PATCH v3 6/9] ARM: sunxi: dts: s3/s3l/v3: add DTSI files for S3/S3L/V3 SoCs

2019-06-22 Thread Icenowy Zheng
The Allwinner S3/S3L/V3 SoCs all share the same die with the V3s SoC,
but with more GPIO wired out of the package.

Add DTSI files for these SoCs. The DTSI file for V3 just replaces the
pinctrl compatible string, and the S3/S3L DTSI files just include the V3
DTSI file.

Signed-off-by: Icenowy Zheng 
---
No changes in v3/v2.

 arch/arm/boot/dts/sun8i-s3.dtsi  |  6 ++
 arch/arm/boot/dts/sun8i-s3l.dtsi |  6 ++
 arch/arm/boot/dts/sun8i-v3.dtsi  | 14 ++
 3 files changed, 26 insertions(+)
 create mode 100644 arch/arm/boot/dts/sun8i-s3.dtsi
 create mode 100644 arch/arm/boot/dts/sun8i-s3l.dtsi
 create mode 100644 arch/arm/boot/dts/sun8i-v3.dtsi

diff --git a/arch/arm/boot/dts/sun8i-s3.dtsi b/arch/arm/boot/dts/sun8i-s3.dtsi
new file mode 100644
index ..0f41a25ecb30
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-s3.dtsi
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Icenowy Zheng 
+ */
+
+#include "sun8i-v3.dtsi"
diff --git a/arch/arm/boot/dts/sun8i-s3l.dtsi b/arch/arm/boot/dts/sun8i-s3l.dtsi
new file mode 100644
index ..0f41a25ecb30
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-s3l.dtsi
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Icenowy Zheng 
+ */
+
+#include "sun8i-v3.dtsi"
diff --git a/arch/arm/boot/dts/sun8i-v3.dtsi b/arch/arm/boot/dts/sun8i-v3.dtsi
new file mode 100644
index ..6ae8645ade50
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-v3.dtsi
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Icenowy Zheng 
+ */
+
+#include "sun8i-v3s.dtsi"
+
+&ccu {
+   compatible = "allwinner,sun8i-v3-ccu";
+};
+
+&pio {
+   compatible = "allwinner,sun8i-v3-pinctrl";
+};
-- 
2.21.0



[PATCH v3 5/9] dt-bindings: vendor-prefixes: add SoChip

2019-06-22 Thread Icenowy Zheng
Shenzhen SoChip Technology Co., Ltd. is a hardware vendor that produces
EVBs with Allwinner chips. There's also a SoC named S3 that is developed
by Allwinner (based on Allwinner V3/V3s) but branded SoChip.

Add the vendor prefix for SoChip.

Signed-off-by: Icenowy Zheng 
Reviewed-by: Rob Herring 
---
No changes in v3.

Changes in v2:
- Add the review tag by Rob.

 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml 
b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index d14604e58d96..92f50cac1055 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -859,6 +859,8 @@ patternProperties:
 description: Standard Microsystems Corporation
   "^snps,.*":
 description: Synopsys, Inc.
+  "^sochip,.*":
+description: Shenzhen SoChip Technology Co., Ltd.
   "^socionext,.*":
 description: Socionext Inc.
   "^solidrun,.*":
-- 
2.21.0



[PATCH v3 4/9] clk: sunxi-ng: v3s: add Allwinner V3 support

2019-06-22 Thread Icenowy Zheng
Allwinner V3 has the same main die with V3s, but with more pins wired.
There's a I2S bus on V3 that is not available on V3s.

Add the V3-only peripheral's clocks and reset to the V3s CCU driver,
bound to a new V3 compatible string. The driver name is not changed
because it's part of the device tree binding (the header file name).

Signed-off-by: Icenowy Zheng 
---
No changes in v3/v2.

 drivers/clk/sunxi-ng/ccu-sun8i-v3s.c  | 225 +-
 drivers/clk/sunxi-ng/ccu-sun8i-v3s.h  |   2 +-
 include/dt-bindings/clock/sun8i-v3s-ccu.h |   4 +
 include/dt-bindings/reset/sun8i-v3s-ccu.h |   3 +
 4 files changed, 231 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c 
b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
index 90a9ca5fac80..fb04e564d55d 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
@@ -235,6 +235,8 @@ static SUNXI_CCU_GATE(bus_codec_clk,"bus-codec",
"apb1",
  0x068, BIT(0), 0);
 static SUNXI_CCU_GATE(bus_pio_clk, "bus-pio",  "apb1",
  0x068, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_i2s0_clk,"bus-i2s0", "apb1",
+ 0x068, BIT(12), 0);
 
 static SUNXI_CCU_GATE(bus_i2c0_clk,"bus-i2c0", "apb2",
  0x06c, BIT(0), 0);
@@ -306,6 +308,11 @@ static SUNXI_CCU_MP_WITH_MUX_GATE(spi0_clk, "spi0", 
mod0_default_parents, 0x0a0,
  BIT(31),  /* gate */
  0);
 
+static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x",
+   "pll-audio-2x", "pll-audio" };
+static SUNXI_CCU_MUX_WITH_GATE(i2s0_clk, "i2s0", i2s_parents,
+  0x0b0, 16, 2, BIT(31), CLK_SET_RATE_PARENT);
+
 static SUNXI_CCU_GATE(usb_phy0_clk,"usb-phy0", "osc24M",
  0x0cc, BIT(8), 0);
 static SUNXI_CCU_GATE(usb_ohci0_clk,   "usb-ohci0","osc24M",
@@ -439,6 +446,80 @@ static struct ccu_common *sun8i_v3s_ccu_clks[] = {
&mipi_csi_clk.common,
 };
 
+static struct ccu_common *sun8i_v3_ccu_clks[] = {
+   &pll_cpu_clk.common,
+   &pll_audio_base_clk.common,
+   &pll_video_clk.common,
+   &pll_ve_clk.common,
+   &pll_ddr0_clk.common,
+   &pll_periph0_clk.common,
+   &pll_isp_clk.common,
+   &pll_periph1_clk.common,
+   &pll_ddr1_clk.common,
+   &cpu_clk.common,
+   &axi_clk.common,
+   &ahb1_clk.common,
+   &apb1_clk.common,
+   &apb2_clk.common,
+   &ahb2_clk.common,
+   &bus_ce_clk.common,
+   &bus_dma_clk.common,
+   &bus_mmc0_clk.common,
+   &bus_mmc1_clk.common,
+   &bus_mmc2_clk.common,
+   &bus_dram_clk.common,
+   &bus_emac_clk.common,
+   &bus_hstimer_clk.common,
+   &bus_spi0_clk.common,
+   &bus_otg_clk.common,
+   &bus_ehci0_clk.common,
+   &bus_ohci0_clk.common,
+   &bus_ve_clk.common,
+   &bus_tcon0_clk.common,
+   &bus_csi_clk.common,
+   &bus_de_clk.common,
+   &bus_codec_clk.common,
+   &bus_pio_clk.common,
+   &bus_i2s0_clk.common,
+   &bus_i2c0_clk.common,
+   &bus_i2c1_clk.common,
+   &bus_uart0_clk.common,
+   &bus_uart1_clk.common,
+   &bus_uart2_clk.common,
+   &bus_ephy_clk.common,
+   &bus_dbg_clk.common,
+   &mmc0_clk.common,
+   &mmc0_sample_clk.common,
+   &mmc0_output_clk.common,
+   &mmc1_clk.common,
+   &mmc1_sample_clk.common,
+   &mmc1_output_clk.common,
+   &mmc2_clk.common,
+   &mmc2_sample_clk.common,
+   &mmc2_output_clk.common,
+   &ce_clk.common,
+   &spi0_clk.common,
+   &i2s0_clk.common,
+   &usb_phy0_clk.common,
+   &usb_ohci0_clk.common,
+   &dram_clk.common,
+   &dram_ve_clk.common,
+   &dram_csi_clk.common,
+   &dram_ohci_clk.common,
+   &dram_ehci_clk.common,
+   &de_clk.common,
+   &tcon_clk.common,
+   &csi_misc_clk.common,
+   &csi0_mclk_clk.common,
+   &csi1_sclk_clk.common,
+   &csi1_mclk_clk.common,
+   &ve_clk.common,
+   &ac_dig_clk.common,
+   &avs_clk.common,
+   &mbus_clk.common,
+   &mipi_csi_clk.common,
+};
+
 /* We hardcode the divider to 4 for now */
 static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
"pll-audio-base", 4, 1, CLK_SET_RATE_PARENT);
@@ -528,6 +609,85 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = {
.num= CLK_NUMBER,
 };
 
+static struct clk_hw_onecell_data sun8i_v3_hw_clks = {
+   .hws= {
+   [CLK_PLL_CPU]   = &pll_cpu_clk.common.hw,
+   [CLK_PLL_AUDIO_BASE]= &pll_audio_base_clk.common.hw,
+   [CLK_PLL_AUDIO] = &pll_audio_clk.hw,
+   [CLK_PLL_AUDIO_2X]  = &pll_audio_2x_clk.hw,
+   [CLK_PLL_AUDIO_4X]  = &pll_audio_4x_clk.hw,
+   [CLK_PLL_AUDIO_8X]  = &pll_audio_8x_clk.hw,
+   

[PATCH v3 8/9] dt-bindings: arm: sunxi: add binding for Lichee Zero Plus core board

2019-06-22 Thread Icenowy Zheng
The Lichee Zero Plus is a core board made by Sipeed, with a microUSB
connector on it, TF slot or WSON8 SD chip, optional eMMC or SPI Flash.
It has a gold finger connector for expansion, and UART is available from
reserved pins w/ 2.54mm pitch. The board can use either SoChip S3 or
Allwinner V3L SoCs.

Add the device tree binding of the basic version of the core board --
w/o eMMC or SPI Flash, w/ TF slot or WSON8 SD, and use S3 SoC.

Signed-off-by: Icenowy Zheng 
---
No changes in v3.

Patch introduced in v2.

 Documentation/devicetree/bindings/arm/sunxi.yaml | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml 
b/Documentation/devicetree/bindings/arm/sunxi.yaml
index 000a00d12d6a..48c126a7a848 100644
--- a/Documentation/devicetree/bindings/arm/sunxi.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -353,6 +353,11 @@ properties:
   - const: licheepi,licheepi-zero
   - const: allwinner,sun8i-v3s
 
+  - description: Lichee Zero Plus (with S3, without eMMC/SPI Flash)
+items:
+  - const: sipeed,lichee-zero-plus
+  - const: allwinner,sun8i-s3
+
   - description: Linksprite PCDuino
 items:
   - const: linksprite,a10-pcduino
-- 
2.21.0



[PATCH v3 7/9] dt-bindings: vendor-prefixes: add Sipeed

2019-06-22 Thread Icenowy Zheng
Shenzhen Sipeed Technology Co., Ltd. is a company focused on development
kits, which also contains rebranded Lichee Pi series.

Add its vendor prefix binding.

Signed-off-by: Icenowy Zheng 
---
Changes in v3:
- Rebased because of the addition of sinlinx and sinovoip.

Patch introduced in v2.

 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml 
b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 92f50cac1055..bbbce4c9cc4e 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -847,6 +847,8 @@ patternProperties:
 description: Sinlinx Electronics Technology Co., LTD
   "^sinovoip,.*":
 description: SinoVoip Co., Ltd
+  "^sipeed,.*":
+description: Shenzhen Sipeed Technology Co., Ltd.
   "^sirf,.*":
 description: SiRF Technology, Inc.
   "^sis,.*":
-- 
2.21.0



[PATCH v3 1/9] pinctrl: sunxi: v3s: introduce support for V3

2019-06-22 Thread Icenowy Zheng
Introduce the GPIO pins that is only available on V3 (not on V3s) to the
V3s pinctrl driver.

Signed-off-by: Icenowy Zheng 
---
Changes in v3:
- Fixed code alignment.
- Fixed LVDS function number.

Changes in v2:
- Dropped the driver rename patch and apply the changes directly on V3s
  driver.

 drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c | 473 +-
 drivers/pinctrl/sunxi/pinctrl-sunxi.h |   2 +
 2 files changed, 366 insertions(+), 109 deletions(-)

diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c 
b/drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c
index 6704ce8e5e3d..721c997d472b 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c
@@ -1,5 +1,5 @@
 /*
- * Allwinner V3s SoCs pinctrl driver.
+ * Allwinner V3/V3s SoCs pinctrl driver.
  *
  * Copyright (C) 2016 Icenowy Zheng 
  *
@@ -28,235 +28,433 @@ static const struct sunxi_desc_pin sun8i_v3s_pins[] = {
SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 0),
  SUNXI_FUNCTION(0x0, "gpio_in"),
  SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "uart2"), /* TX */
- SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 0)),  /* PB_EINT0 */
+ SUNXI_FUNCTION(0x2, "uart2"), /* TX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 0)),  /* PB_EINT0 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 1),
  SUNXI_FUNCTION(0x0, "gpio_in"),
  SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "uart2"), /* RX */
- SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 1)),  /* PB_EINT1 */
+ SUNXI_FUNCTION(0x2, "uart2"), /* RX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 1)),  /* PB_EINT1 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 2),
  SUNXI_FUNCTION(0x0, "gpio_in"),
  SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "uart2"), /* RTS */
- SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 2)),  /* PB_EINT2 */
+ SUNXI_FUNCTION(0x2, "uart2"), /* RTS */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 2)),  /* PB_EINT2 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 3),
  SUNXI_FUNCTION(0x0, "gpio_in"),
  SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "uart2"), /* D1 */
- SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 3)),  /* PB_EINT3 */
+ SUNXI_FUNCTION(0x2, "uart2"), /* D1 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 3)),  /* PB_EINT3 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 4),
  SUNXI_FUNCTION(0x0, "gpio_in"),
  SUNXI_FUNCTION(0x1, "gpio_out"),
  SUNXI_FUNCTION(0x2, "pwm0"),
- SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 4)),  /* PB_EINT4 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 4)),  /* PB_EINT4 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 5),
  SUNXI_FUNCTION(0x0, "gpio_in"),
  SUNXI_FUNCTION(0x1, "gpio_out"),
  SUNXI_FUNCTION(0x2, "pwm1"),
- SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 5)),  /* PB_EINT5 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 5)),  /* PB_EINT5 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 6),
  SUNXI_FUNCTION(0x0, "gpio_in"),
  SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "i2c0"),  /* SCK */
- SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 6)),  /* PB_EINT6 */
+ SUNXI_FUNCTION(0x2, "i2c0"),  /* SCK */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 6)),  /* PB_EINT6 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 7),
  SUNXI_FUNCTION(0x0, "gpio_in"),
  SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "i2c0"),  /* SDA */
- SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 7)),  /* PB_EINT7 */
+ SUNXI_FUNCTION(0x2, "i2c0"),  /* SDA */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 7)),  /* PB_EINT7 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 8),
  SUNXI_FUNCTION(0x0, "gpio_in"),
  SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "i2c1"),  /* SDA */
- SUNXI_FUNCTION(0x3, "uart0"), /* TX */
- SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 8)),  /* PB_EINT8 */
+ SUNXI_FUNCTION(0x2, "i2c1"),  /* SDA */
+ SUNXI_FUNCTION(0x3, "uart0"), /* TX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 8)),  /* PB_EINT8 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(B, 9),
  SUNXI_FUNCTION(0x0, "gpio_in"),
  SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "i2c1"),  /* SCK */
- 

[PATCH v3 2/9] clk: sunxi-ng: v3s: add the missing PLL_DDR1

2019-06-22 Thread Icenowy Zheng
The user manual of V3/V3s/S3 declares a PLL_DDR1, however it's forgot
when developing the V3s CCU driver.

Add back the missing PLL_DDR1.

Fixes: d0f11d14b0bc ("clk: sunxi-ng: add support for V3s CCU")
Signed-off-by: Icenowy Zheng 
---
No changes in v3/v2.

 drivers/clk/sunxi-ng/ccu-sun8i-v3s.c | 19 +++
 drivers/clk/sunxi-ng/ccu-sun8i-v3s.h |  6 --
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c 
b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
index cbbf06d42c2c..90a9ca5fac80 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
@@ -77,7 +77,7 @@ static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_ve_clk, "pll-ve",
BIT(28),/* lock */
0);
 
-static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr_clk, "pll-ddr",
+static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr0_clk, "pll-ddr0",
"osc24M", 0x020,
8, 5,   /* N */
4, 2,   /* K */
@@ -116,6 +116,14 @@ static 
SUNXI_CCU_NK_WITH_GATE_LOCK_POSTDIV(pll_periph1_clk, "pll-periph1",
   2,   /* post-div */
   0);
 
+static SUNXI_CCU_NM_WITH_GATE_LOCK(pll_ddr1_clk, "pll-ddr1",
+  "osc24M", 0x04c,
+  8, 7,/* N */
+  0, 2,/* M */
+  BIT(31), /* gate */
+  BIT(28), /* lock */
+  0);
+
 static const char * const cpu_parents[] = { "osc32k", "osc24M",
 "pll-cpu", "pll-cpu" };
 static SUNXI_CCU_MUX(cpu_clk, "cpu", cpu_parents,
@@ -303,7 +311,8 @@ static SUNXI_CCU_GATE(usb_phy0_clk, "usb-phy0", 
"osc24M",
 static SUNXI_CCU_GATE(usb_ohci0_clk,   "usb-ohci0","osc24M",
  0x0cc, BIT(16), 0);
 
-static const char * const dram_parents[] = { "pll-ddr", "pll-periph0-2x" };
+static const char * const dram_parents[] = { "pll-ddr0", "pll-ddr1",
+"pll-periph0-2x" };
 static SUNXI_CCU_M_WITH_MUX(dram_clk, "dram", dram_parents,
0x0f4, 0, 4, 20, 2, CLK_IS_CRITICAL);
 
@@ -363,10 +372,11 @@ static struct ccu_common *sun8i_v3s_ccu_clks[] = {
&pll_audio_base_clk.common,
&pll_video_clk.common,
&pll_ve_clk.common,
-   &pll_ddr_clk.common,
+   &pll_ddr0_clk.common,
&pll_periph0_clk.common,
&pll_isp_clk.common,
&pll_periph1_clk.common,
+   &pll_ddr1_clk.common,
&cpu_clk.common,
&axi_clk.common,
&ahb1_clk.common,
@@ -451,11 +461,12 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = {
[CLK_PLL_AUDIO_8X]  = &pll_audio_8x_clk.hw,
[CLK_PLL_VIDEO] = &pll_video_clk.common.hw,
[CLK_PLL_VE]= &pll_ve_clk.common.hw,
-   [CLK_PLL_DDR]   = &pll_ddr_clk.common.hw,
+   [CLK_PLL_DDR0]  = &pll_ddr0_clk.common.hw,
[CLK_PLL_PERIPH0]   = &pll_periph0_clk.common.hw,
[CLK_PLL_PERIPH0_2X]= &pll_periph0_2x_clk.hw,
[CLK_PLL_ISP]   = &pll_isp_clk.common.hw,
[CLK_PLL_PERIPH1]   = &pll_periph1_clk.common.hw,
+   [CLK_PLL_DDR1]  = &pll_ddr1_clk.common.hw,
[CLK_CPU]   = &cpu_clk.common.hw,
[CLK_AXI]   = &axi_clk.common.hw,
[CLK_AHB1]  = &ahb1_clk.common.hw,
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h 
b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
index fbc1da8b4520..10af324bd6b1 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
@@ -20,7 +20,7 @@
 #define CLK_PLL_AUDIO_8X   5
 #define CLK_PLL_VIDEO  6
 #define CLK_PLL_VE 7
-#define CLK_PLL_DDR8
+#define CLK_PLL_DDR0   8
 #define CLK_PLL_PERIPH09
 #define CLK_PLL_PERIPH0_2X 10
 #define CLK_PLL_ISP11
@@ -49,6 +49,8 @@
 
 /* And the GPU module clock is exported */
 
-#define CLK_NUMBER (CLK_MIPI_CSI + 1)
+#define CLK_PLL_DDR1   74
+
+#define CLK_NUMBER (CLK_PLL_DDR1 + 1)
 
 #endif /* _CCU_SUN8I_H3_H_ */
-- 
2.21.0



[PATCH v3 0/9] Support for Allwinner V3/S3L and Sochip S3

2019-06-22 Thread Icenowy Zheng
This patchset tries to add support for Allwinner V3/S3L and Sochip S3.

Allwinner V3/V3s/S3L and Sochip S3 share the same die, but with
different package. V3 is BGA w/o co-packaged DDR, V3s is QFP w/ DDR2,
S3L is BGA w/ DDR2 and S3 is BGA w/ DDR3. (S3 and S3L is compatible
for pinout, but because of different DDR, DDR voltage is different
between the two variants). Because of the pin count of V3s is
restricted due to the package, some pins are not bound on V3s, but
they're bound on V3/S3/S3L.

Currently the kernel is only prepared for the features available on V3s.
This patchset adds the features missing on V3s for using them on
V3/S3/S3L, and add bindings for V3/S3/S3L. It also adds a S3 SoM by
Sipeed, called Lichee Zero Plus.

Icenowy Zheng (9):
  pinctrl: sunxi: v3s: introduce support for V3
  clk: sunxi-ng: v3s: add the missing PLL_DDR1
  dt-bindings: clk: sunxi-ccu: add compatible string for V3 CCU
  clk: sunxi-ng: v3s: add Allwinner V3 support
  dt-bindings: vendor-prefixes: add SoChip
  ARM: sunxi: dts: s3/s3l/v3: add DTSI files for S3/S3L/V3 SoCs
  dt-bindings: vendor-prefixes: add Sipeed
  dt-bindings: arm: sunxi: add binding for Lichee Zero Plus core board
  ARM: dts: sun8i: s3: add devicetree for Lichee zero plus w/ S3

 .../devicetree/bindings/arm/sunxi.yaml|   5 +
 .../clock/allwinner,sun4i-a10-ccu.yaml|   1 +
 .../devicetree/bindings/vendor-prefixes.yaml  |   4 +
 arch/arm/boot/dts/Makefile|   1 +
 .../boot/dts/sun8i-s3-lichee-zero-plus.dts|   8 +
 .../dts/sun8i-s3-s3l-lichee-zero-plus.dtsi|  44 ++
 arch/arm/boot/dts/sun8i-s3.dtsi   |   6 +
 arch/arm/boot/dts/sun8i-s3l.dtsi  |   6 +
 arch/arm/boot/dts/sun8i-v3.dtsi   |  14 +
 drivers/clk/sunxi-ng/ccu-sun8i-v3s.c  | 244 -
 drivers/clk/sunxi-ng/ccu-sun8i-v3s.h  |   6 +-
 drivers/pinctrl/sunxi/pinctrl-sun8i-v3s.c | 473 ++
 drivers/pinctrl/sunxi/pinctrl-sunxi.h |   2 +
 include/dt-bindings/clock/sun8i-v3s-ccu.h |   4 +
 include/dt-bindings/reset/sun8i-v3s-ccu.h |   3 +
 15 files changed, 704 insertions(+), 117 deletions(-)
 create mode 100644 arch/arm/boot/dts/sun8i-s3-lichee-zero-plus.dts
 create mode 100644 arch/arm/boot/dts/sun8i-s3-s3l-lichee-zero-plus.dtsi
 create mode 100644 arch/arm/boot/dts/sun8i-s3.dtsi
 create mode 100644 arch/arm/boot/dts/sun8i-s3l.dtsi
 create mode 100644 arch/arm/boot/dts/sun8i-v3.dtsi

-- 
2.21.0



Re: [PATCH v3] arm64: defconfig: Enable Panfrost and Lima drivers

2019-06-22 Thread Chanwoo Choi
Hi Krzysztof,

2019년 6월 23일 (일) 오전 4:20, Krzysztof Kozlowski 님이 작성:
>
> Enable support for Mali GPU with Panfrost and Lima drivers for:
> 1. Samsung Exynos5433 and Exynos7 (having Mali T760),
> 2. Allwiner A64 and H5 (Mali 400/450).
>
> Signed-off-by: Krzysztof Kozlowski 
>
> ---
>
> Changes since v1:
> 1. Enable Lima driver
> ---
>  arch/arm64/configs/defconfig | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
> index fbbc065415d4..3d31611368af 100644
> --- a/arch/arm64/configs/defconfig
> +++ b/arch/arm64/configs/defconfig
> @@ -518,6 +518,8 @@ CONFIG_DRM_HISI_HIBMC=m
>  CONFIG_DRM_HISI_KIRIN=m
>  CONFIG_DRM_MESON=m
>  CONFIG_DRM_PL111=m
> +CONFIG_DRM_LIMA=m
> +CONFIG_DRM_PANFROST=m
>  CONFIG_FB=y
>  CONFIG_FB_MODE_HELPERS=y
>  CONFIG_BACKLIGHT_GENERIC=m
> @@ -718,7 +720,6 @@ CONFIG_ARCH_TEGRA_194_SOC=y
>  CONFIG_ARCH_K3_AM6_SOC=y
>  CONFIG_SOC_TI=y
>  CONFIG_TI_SCI_PM_DOMAINS=y
> -CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y

Exynos5433-tm2 board support the exynos-bus device which
used the simple_ondmenad governor of devfreq.
Why do you remove this configuration from the defconfig?

>  CONFIG_EXTCON_USB_GPIO=y
>  CONFIG_EXTCON_USBC_CROS_EC=y
>  CONFIG_MEMORY=y
> --
> 2.17.1
>


-- 
Best Regards,
Chanwoo Choi


Re: [PATCH v1 2/3] OPP: Add function to look up required OPP's for a given OPP

2019-06-22 Thread Chanwoo Choi
Hi,

2019년 6월 23일 (일) 오전 6:42, Saravana Kannan 님이 작성:
>
> On Sat, Jun 22, 2019 at 4:50 AM Chanwoo Choi  wrote:
> >
> > Hi,
> >
> > Absolutely, I like this approach. I think that it is necessary to make
> > the connection
> > between frequencies of devices.
>
> Happy to hear that.
>
> > But, I have a question on below.
> >
> > 2019년 6월 22일 (토) 오전 9:35, Saravana Kannan 님이 작성:
> > >
> > > Add a function that allows looking up required OPPs given a source OPP
> > > table, destination OPP table and the source OPP.
> > >
> > > Signed-off-by: Saravana Kannan 
> > > ---
> > >  drivers/opp/core.c | 54 ++
> > >  include/linux/pm_opp.h | 11 +
> > >  2 files changed, 65 insertions(+)
> > >
> > > diff --git a/drivers/opp/core.c b/drivers/opp/core.c
> > > index 74c7bdc6f463..4f7870bffbf8 100644
> > > --- a/drivers/opp/core.c
> > > +++ b/drivers/opp/core.c
> > > @@ -1830,6 +1830,60 @@ void dev_pm_opp_put_genpd_virt_dev(struct 
> > > opp_table *opp_table,
> > > dev_err(virt_dev, "Failed to find required device 
> > > entry\n");
> > >  }
> > >
> > > +/**
> > > + * dev_pm_opp_xlate_opp() - Find required OPP for src_table OPP.
> > > + * @src_table: OPP table which has dst_table as one of its required OPP 
> > > table.
> > > + * @dst_table: Required OPP table of the src_table.
> > > + * @pstate: OPP of the src_table.
> > > + *
> > > + * This function returns the OPP (present in @dst_table) pointed out by 
> > > the
> > > + * "required-opps" property of the OPP (present in @src_table).
> > > + *
> > > + * The callers are required to call dev_pm_opp_put() for the returned 
> > > OPP after
> > > + * use.
> > > + *
> > > + * Return: destination table OPP on success, otherwise NULL on errors.
> > > + */
> > > +struct dev_pm_opp *dev_pm_opp_xlate_opp(struct opp_table *src_table,
> > > +   struct opp_table *dst_table,
> > > +   struct dev_pm_opp *src_opp)
> > > +{
> > > +   struct dev_pm_opp *opp, *dest_opp = NULL;
> > > +   int i;
> > > +
> > > +   if (!src_table || !dst_table || !src_opp)
> > > +   return NULL;
> > > +
> > > +   for (i = 0; i < src_table->required_opp_count; i++) {
> > > +   if (src_table->required_opp_tables[i]->np == 
> > > dst_table->np)
> > > +   break;
> > > +   }
> > > +
> > > +   if (unlikely(i == src_table->required_opp_count)) {
> > > +   pr_err("%s: Couldn't find matching OPP table (%p: %p)\n",
> > > +  __func__, src_table, dst_table);
> > > +   return NULL;
> > > +   }
> > > +
> > > +   mutex_lock(&src_table->lock);
> > > +
> > > +   list_for_each_entry(opp, &src_table->opp_list, node) {
> > > +   if (opp == src_opp) {
> > > +   dest_opp = opp->required_opps[i];
> >
> > Correct me if I am wrong. This patch assume that 'i' index is same on 
> > between
> > [1] and [2]. But in order to guarantee this assumption, all OPP entries
> > in the same opp_table have to have the same number of 'required-opps' 
> > properties
> > and keep the sequence among 'required-opps' entries.
> >
> > [1] src_table->required_opp_tables[i]->np
> > [2] opp->required_opps[I];
> >
> > For example, three OPP entries in the 'parent_bus_opp'
> > have the different sequence of 'required-opps' and the different
> > number of 'required-opps'. Is it no problem?
> >
> > parent_bus_opp: opp_table {
> > compatible = "operating-points-v2";
> >
> > opp2 {
> > opp-hz = /bits/ 64 <20>;
> > required-opps = <&child_bus_a_opp2>, <&child_bus_b_opp2>,
> > <&child_bus_c_opp2>;
> > };
> >
> > opp1 {
> > opp-hz = /bits/ 64 <20>;
> > // change the sequence between child_bus_b_opp2  and 
> > child_bus_c_opp2
> > required-opps = <&child_bus_a_opp2>, <&child_bus_c_opp2>,
> > <&child_bus_b_opp2>
> > };
> >
> > opp0 {
> > opp-hz = /bits/ 64 <20>;
> > // missing 'child_bus_a_opp2'
> > required-opps = <&child_bus_c_opp2>, <&child_bus_b_opp2>
> > };
> >
> > }
> >
>
> I get your question. If I'm not mistaken the OPP framework DT parsing
> code makes the assumption that the required-opps list has the phandles
> in the same order for each "row" in the OPP table. It actually only
> looks at the first OPP entry to figure out the list of required OPP
> tables.

Thanks for description. It is the limitation of 'required-opps' until now.

>
> Technically one can write code to deal with random order of the
> required-opp list, but doesn't seem like that's worth it because
> there's no need to have that order all mixed up in DT. And even if
> someone wants to add support for that, I don't think improving the DT
> parsing to handle random order would be part of this patch series.

I understand the existing ' required-opps' only consider the same sequence
of entries which are inclu

[PATCH v3] e1000e: Make watchdog use delayed work

2019-06-22 Thread Detlev Casanova
Use delayed work instead of timers to run the watchdog of the e1000e
driver.

Simplify the code with one less middle function.

Signed-off-by: Detlev Casanova 
---
 drivers/net/ethernet/intel/e1000e/e1000.h  |  5 +-
 drivers/net/ethernet/intel/e1000e/netdev.c | 54 --
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h 
b/drivers/net/ethernet/intel/e1000e/e1000.h
index be13227f1697..34cd67951aec 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -186,12 +186,13 @@ struct e1000_phy_regs {
 
 /* board specific private data structure */
 struct e1000_adapter {
-   struct timer_list watchdog_timer;
struct timer_list phy_info_timer;
struct timer_list blink_timer;
 
struct work_struct reset_task;
-   struct work_struct watchdog_task;
+   struct delayed_work watchdog_task;
+
+   struct workqueue_struct *e1000_workqueue;
 
const struct e1000_info *ei;
 
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0e09bede42a2..6fb6918bc136 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1780,7 +1780,8 @@ static irqreturn_t e1000_intr_msi(int __always_unused 
irq, void *data)
}
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
-   mod_timer(&adapter->watchdog_timer, jiffies + 1);
+   queue_delayed_work(adapter->e1000_workqueue,
+  &adapter->watchdog_task, 1);
}
 
/* Reset on uncorrectable ECC error */
@@ -1860,7 +1861,8 @@ static irqreturn_t e1000_intr(int __always_unused irq, 
void *data)
}
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
-   mod_timer(&adapter->watchdog_timer, jiffies + 1);
+   queue_delayed_work(adapter->e1000_workqueue,
+  &adapter->watchdog_task, 1);
}
 
/* Reset on uncorrectable ECC error */
@@ -1905,7 +1907,8 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
hw->mac.get_link_status = true;
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
-   mod_timer(&adapter->watchdog_timer, jiffies + 1);
+   queue_delayed_work(adapter->e1000_workqueue,
+  &adapter->watchdog_task, 1);
}
 
if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -4278,7 +4281,6 @@ void e1000e_down(struct e1000_adapter *adapter, bool 
reset)
 
napi_synchronize(&adapter->napi);
 
-   del_timer_sync(&adapter->watchdog_timer);
del_timer_sync(&adapter->phy_info_timer);
 
spin_lock(&adapter->stats64_lock);
@@ -5150,25 +5152,11 @@ static void e1000e_check_82574_phy_workaround(struct 
e1000_adapter *adapter)
}
 }
 
-/**
- * e1000_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
- **/
-static void e1000_watchdog(struct timer_list *t)
-{
-   struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
-
-   /* Do the rest outside of interrupt context */
-   schedule_work(&adapter->watchdog_task);
-
-   /* TODO: make this use queue_delayed_work() */
-}
-
 static void e1000_watchdog_task(struct work_struct *work)
 {
struct e1000_adapter *adapter = container_of(work,
 struct e1000_adapter,
-watchdog_task);
+watchdog_task.work);
struct net_device *netdev = adapter->netdev;
struct e1000_mac_info *mac = &adapter->hw.mac;
struct e1000_phy_info *phy = &adapter->hw.phy;
@@ -5395,8 +5383,9 @@ static void e1000_watchdog_task(struct work_struct *work)
 
/* Reset the timer */
if (!test_bit(__E1000_DOWN, &adapter->state))
-   mod_timer(&adapter->watchdog_timer,
- round_jiffies(jiffies + 2 * HZ));
+   queue_delayed_work(adapter->e1000_workqueue,
+  &adapter->watchdog_task,
+  round_jiffies(2 * HZ));
 }
 
 #define E1000_TX_FLAGS_CSUM0x0001
@@ -7251,11 +7240,21 @@ static int e1000_probe(struct pci_dev *pdev, const 
struct pci_device_id *ent)
goto err_eeprom;
}
 
-   timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
+   adapter->e1000_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
+ 

Re: [PATCH V34 22/29] Lock down tracing and perf kprobes when in confidentiality mode

2019-06-22 Thread Masami Hiramatsu
On Fri, 21 Jun 2019 17:03:51 -0700
Matthew Garrett  wrote:

> From: David Howells 
> 
> Disallow the creation of perf and ftrace kprobes when the kernel is
> locked down in confidentiality mode by preventing their registration.
> This prevents kprobes from being used to access kernel memory to steal
> crypto data, but continues to allow the use of kprobes from signed
> modules.

Looks (and sounds) good to me.

Acked-by: Masami Hiramatsu 

Thank you,

> 
> Reported-by: Alexei Starovoitov 
> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> Cc: Naveen N. Rao 
> Cc: Anil S Keshavamurthy 
> Cc: da...@davemloft.net
> Cc: Masami Hiramatsu 
> ---
>  include/linux/security.h | 1 +
>  kernel/trace/trace_kprobe.c  | 5 +
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 7 insertions(+)
> 
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 3875f6df2ecc..e6e3e2403474 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -96,6 +96,7 @@ enum lockdown_reason {
>   LOCKDOWN_MMIOTRACE,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_KCORE,
> + LOCKDOWN_KPROBES,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
>  
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 5d5129b05df7..5a76a0f79d48 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -11,6 +11,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "trace_dynevent.h"
>  #include "trace_kprobe_selftest.h"
> @@ -415,6 +416,10 @@ static int __register_trace_kprobe(struct trace_kprobe 
> *tk)
>  {
>   int i, ret;
>  
> + ret = security_locked_down(LOCKDOWN_KPROBES);
> + if (ret)
> + return ret;
> +
>   if (trace_probe_is_registered(&tk->tp))
>   return -EINVAL;
>  
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 4c9b324dfc55..5a08c17f224d 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -32,6 +32,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_MMIOTRACE] = "unsafe mmio",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_KCORE] = "/proc/kcore access",
> + [LOCKDOWN_KPROBES] = "use of kprobes",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
>  
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 


-- 
Masami Hiramatsu 


Re: [PATCH V34 29/29] lockdown: Print current->comm in restriction messages

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:58PM -0700, Matthew Garrett wrote:
> Print the content of current->comm in messages generated by lockdown to
> indicate a restriction that was hit.  This makes it a bit easier to find
> out what caused the message.
> 
> The message now patterned something like:
> 
> Lockdown: :  is restricted; see man kernel_lockdown.7
> 
> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> ---
>  security/lockdown/lockdown.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 98f9ee0026d5..9ca6f442fbc7 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -83,8 +83,8 @@ static int lockdown_is_locked_down(enum lockdown_reason 
> what)
>  {
>   if ((kernel_locked_down >= what)) {

To satisfy my paranoia, can you just add here:

if (WARN(what > LOCKDOWN_..._MAX))
return -EPERM;

With that:

Reviewed-by: Kees Cook 

-Kees

>   if (lockdown_reasons[what])
> - pr_notice("Lockdown: %s is restricted; see man 
> kernel_lockdown.7\n",
> -   lockdown_reasons[what]);
> + pr_notice("Lockdown: %s: %s is restricted; see man 
> kernel_lockdown.7\n",
> +   current->comm, lockdown_reasons[what]);
>   return -EPERM;
>   }
>  
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 24/29] Lock down perf when in confidentiality mode

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:53PM -0700, Matthew Garrett wrote:
> From: David Howells 
> 
> Disallow the use of certain perf facilities that might allow userspace to
> access kernel data.
> 
> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> Cc: Peter Zijlstra 
> Cc: Ingo Molnar 
> Cc: Arnaldo Carvalho de Melo 
> ---
>  include/linux/security.h | 1 +
>  kernel/events/core.c | 7 +++
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 9 insertions(+)
> 
> diff --git a/include/linux/security.h b/include/linux/security.h
> index de0d37b1fe79..53ea85889a48 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -98,6 +98,7 @@ enum lockdown_reason {
>   LOCKDOWN_KCORE,
>   LOCKDOWN_KPROBES,
>   LOCKDOWN_BPF_READ,
> + LOCKDOWN_PERF,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
>  
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 72d06e302e99..77f36551756e 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -10731,6 +10731,13 @@ SYSCALL_DEFINE5(perf_event_open,
>   return -EINVAL;
>   }
>  
> + err = security_locked_down(LOCKDOWN_PERF);
> + if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR))
> + /* REGS_INTR can leak data, lockdown must prevent this */
> + return err;
> + else
> + err = 0;
> +
>   /* Only privileged users can get physical addresses */
>   if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
>   perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))

With moar capable() ordering fixed...

Reviewed-by: Kees Cook 

-Kees

> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 2eea2cc13117..a7e75c614416 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -34,6 +34,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_KCORE] = "/proc/kcore access",
>   [LOCKDOWN_KPROBES] = "use of kprobes",
>   [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
> + [LOCKDOWN_PERF] = "unsafe use of perf",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
>  
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 23/29] bpf: Restrict bpf when kernel lockdown is in confidentiality mode

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:52PM -0700, Matthew Garrett wrote:
> From: David Howells 
> 
> There are some bpf functions can be used to read kernel memory:
> bpf_probe_read, bpf_probe_write_user and bpf_trace_printk.  These allow
> private keys in kernel memory (e.g. the hibernation image signing key) to
> be read by an eBPF program and kernel memory to be altered without
> restriction. Disable them if the kernel has been locked down in
> confidentiality mode.
> 
> Suggested-by: Alexei Starovoitov 
> Signed-off-by: David Howells 

Reviewed-by: Kees Cook 

-Kees

> Signed-off-by: Matthew Garrett 
> cc: net...@vger.kernel.org
> cc: Chun-Yi Lee 
> cc: Alexei Starovoitov 
> Cc: Daniel Borkmann 
> ---
>  include/linux/security.h |  1 +
>  kernel/trace/bpf_trace.c | 20 +++-
>  security/lockdown/lockdown.c |  1 +
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/security.h b/include/linux/security.h
> index e6e3e2403474..de0d37b1fe79 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -97,6 +97,7 @@ enum lockdown_reason {
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_KCORE,
>   LOCKDOWN_KPROBES,
> + LOCKDOWN_BPF_READ,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
>  
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index d64c00afceb5..638f9b00a8df 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -137,6 +137,10 @@ BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const 
> void *, unsafe_ptr)
>  {
>   int ret;
>  
> + ret = security_locked_down(LOCKDOWN_BPF_READ);
> + if (ret)
> + return ret;
> +
>   ret = probe_kernel_read(dst, unsafe_ptr, size);
>   if (unlikely(ret < 0))
>   memset(dst, 0, size);
> @@ -156,6 +160,12 @@ static const struct bpf_func_proto bpf_probe_read_proto 
> = {
>  BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
>  u32, size)
>  {
> + int ret;
> +
> + ret = security_locked_down(LOCKDOWN_BPF_READ);
> + if (ret)
> + return ret;
> +
>   /*
>* Ensure we're in user context which is safe for the helper to
>* run. This helper has no business in a kthread.
> @@ -205,7 +215,11 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, 
> u64, arg1,
>   int fmt_cnt = 0;
>   u64 unsafe_addr;
>   char buf[64];
> - int i;
> + int i, ret;
> +
> + ret = security_locked_down(LOCKDOWN_BPF_READ);
> + if (ret)
> + return ret;
>  
>   /*
>* bpf_check()->check_func_arg()->check_stack_boundary()
> @@ -534,6 +548,10 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
>  {
>   int ret;
>  
> + ret = security_locked_down(LOCKDOWN_BPF_READ);
> + if (ret)
> + return ret;
> +
>   /*
>* The strncpy_from_unsafe() call will likely not fill the entire
>* buffer, but that's okay in this circumstance as we're probing
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 5a08c17f224d..2eea2cc13117 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -33,6 +33,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_KCORE] = "/proc/kcore access",
>   [LOCKDOWN_KPROBES] = "use of kprobes",
> + [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
>  
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 22/29] Lock down tracing and perf kprobes when in confidentiality mode

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:51PM -0700, Matthew Garrett wrote:
> From: David Howells 
> 
> Disallow the creation of perf and ftrace kprobes when the kernel is
> locked down in confidentiality mode by preventing their registration.
> This prevents kprobes from being used to access kernel memory to steal
> crypto data, but continues to allow the use of kprobes from signed
> modules.
> 
> Reported-by: Alexei Starovoitov 
> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> Cc: Naveen N. Rao 
> Cc: Anil S Keshavamurthy 
> Cc: da...@davemloft.net
> Cc: Masami Hiramatsu 

Reviewed-by: Kees Cook 

-Kees

> ---
>  include/linux/security.h | 1 +
>  kernel/trace/trace_kprobe.c  | 5 +
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 7 insertions(+)
> 
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 3875f6df2ecc..e6e3e2403474 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -96,6 +96,7 @@ enum lockdown_reason {
>   LOCKDOWN_MMIOTRACE,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_KCORE,
> + LOCKDOWN_KPROBES,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
>  
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 5d5129b05df7..5a76a0f79d48 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -11,6 +11,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "trace_dynevent.h"
>  #include "trace_kprobe_selftest.h"
> @@ -415,6 +416,10 @@ static int __register_trace_kprobe(struct trace_kprobe 
> *tk)
>  {
>   int i, ret;
>  
> + ret = security_locked_down(LOCKDOWN_KPROBES);
> + if (ret)
> + return ret;
> +
>   if (trace_probe_is_registered(&tk->tp))
>   return -EINVAL;
>  
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 4c9b324dfc55..5a08c17f224d 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -32,6 +32,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_MMIOTRACE] = "unsafe mmio",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_KCORE] = "/proc/kcore access",
> + [LOCKDOWN_KPROBES] = "use of kprobes",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
>  
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 21/29] Lock down /proc/kcore

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:50PM -0700, Matthew Garrett wrote:
> From: David Howells 
> 
> Disallow access to /proc/kcore when the kernel is locked down to prevent
> access to cryptographic data. This is limited to lockdown
> confidentiality mode and is still permitted in integrity mode.
> 
> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> ---
>  fs/proc/kcore.c  | 5 +
>  include/linux/security.h | 1 +
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 7 insertions(+)
> 
> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
> index d29d869abec1..4e95edb1e282 100644
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -31,6 +31,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include "internal.h"
>  
> @@ -545,6 +546,10 @@ read_kcore(struct file *file, char __user *buffer, 
> size_t buflen, loff_t *fpos)
>  
>  static int open_kcore(struct inode *inode, struct file *filp)
>  {
> + int ret = security_locked_down(LOCKDOWN_KCORE);
> +
> + if (ret)
> + return ret;
>   if (!capable(CAP_SYS_RAWIO))
>   return -EPERM;

Another capable() check ordering fix needed. With that:

Reviewed-by: Kees Cook 

-Kees

>  
> diff --git a/include/linux/security.h b/include/linux/security.h
> index c649cb91e762..3875f6df2ecc 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -95,6 +95,7 @@ enum lockdown_reason {
>   LOCKDOWN_MODULE_PARAMETERS,
>   LOCKDOWN_MMIOTRACE,
>   LOCKDOWN_INTEGRITY_MAX,
> + LOCKDOWN_KCORE,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
>  
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index cd86ed9f4d4b..4c9b324dfc55 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -31,6 +31,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
>   [LOCKDOWN_MMIOTRACE] = "unsafe mmio",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
> + [LOCKDOWN_KCORE] = "/proc/kcore access",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
>  
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 20/29] x86/mmiotrace: Lock down the testmmiotrace module

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:49PM -0700, Matthew Garrett wrote:
> From: David Howells 
> 
> The testmmiotrace module shouldn't be permitted when the kernel is locked
> down as it can be used to arbitrarily read and write MMIO space. This is
> a runtime check rather than buildtime in order to allow configurations
> where the same kernel may be run in both locked down or permissive modes
> depending on local policy.
> 
> Suggested-by: Thomas Gleixner 
> Signed-off-by: David Howells 

-Kees

> Signed-off-by: Matthew Garrett 
> cc: Thomas Gleixner 
> cc: Steven Rostedt 
> cc: Ingo Molnar 
> cc: "H. Peter Anvin" 
> cc: x...@kernel.org
> ---
>  arch/x86/mm/testmmiotrace.c  | 5 +
>  include/linux/security.h | 1 +
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 7 insertions(+)
> 
> diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
> index f6ae6830b341..6b9486baa2e9 100644
> --- a/arch/x86/mm/testmmiotrace.c
> +++ b/arch/x86/mm/testmmiotrace.c
> @@ -7,6 +7,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  static unsigned long mmio_address;
>  module_param_hw(mmio_address, ulong, iomem, 0);
> @@ -114,6 +115,10 @@ static void do_test_bulk_ioremapping(void)
>  static int __init init(void)
>  {
>   unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
> + int ret = security_locked_down(LOCKDOWN_MMIOTRACE);
> +
> + if (ret)
> + return ret;
>  
>   if (mmio_address == 0) {
>   pr_err("you have to use the module argument mmio_address.\n");
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 88064d7f6827..c649cb91e762 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -93,6 +93,7 @@ enum lockdown_reason {
>   LOCKDOWN_PCMCIA_CIS,
>   LOCKDOWN_TIOCSSERIAL,
>   LOCKDOWN_MODULE_PARAMETERS,
> + LOCKDOWN_MMIOTRACE,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index d03c4c296af7..cd86ed9f4d4b 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -29,6 +29,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
>   [LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
>   [LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
> + [LOCKDOWN_MMIOTRACE] = "unsafe mmio",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 19/29] Lock down module params that specify hardware parameters (eg. ioport)

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:48PM -0700, Matthew Garrett wrote:
> From: David Howells 
> 
> Provided an annotation for module parameters that specify hardware
> parameters (such as io ports, iomem addresses, irqs, dma channels, fixed
> dma buffers and other types).
> 
> Suggested-by: Alan Cox 
> Signed-off-by: David Howells 

Reviewed-by: Kees Cook 

-Kees

> Signed-off-by: Matthew Garrett 
> ---
>  include/linux/security.h |  1 +
>  kernel/params.c  | 27 ++-
>  security/lockdown/lockdown.c |  1 +
>  3 files changed, 24 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 61e3f4a62d16..88064d7f6827 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -92,6 +92,7 @@ enum lockdown_reason {
>   LOCKDOWN_ACPI_TABLES,
>   LOCKDOWN_PCMCIA_CIS,
>   LOCKDOWN_TIOCSSERIAL,
> + LOCKDOWN_MODULE_PARAMETERS,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/kernel/params.c b/kernel/params.c
> index ce89f757e6da..f94fe79e331d 100644
> --- a/kernel/params.c
> +++ b/kernel/params.c
> @@ -24,6 +24,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #ifdef CONFIG_SYSFS
>  /* Protects all built-in parameters, modules use their own param_lock */
> @@ -108,13 +109,19 @@ bool parameq(const char *a, const char *b)
>   return parameqn(a, b, strlen(a)+1);
>  }
>  
> -static void param_check_unsafe(const struct kernel_param *kp)
> +static bool param_check_unsafe(const struct kernel_param *kp,
> +const char *doing)
>  {
>   if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
>   pr_notice("Setting dangerous option %s - tainting kernel\n",
> kp->name);
>   add_taint(TAINT_USER, LOCKDEP_STILL_OK);
>   }
> +
> + if (kp->flags & KERNEL_PARAM_FL_HWPARAM &&
> + security_locked_down(LOCKDOWN_MODULE_PARAMETERS))
> + return false;
> + return true;
>  }
>  
>  static int parse_one(char *param,
> @@ -144,8 +151,10 @@ static int parse_one(char *param,
>   pr_debug("handling %s with %p\n", param,
>   params[i].ops->set);
>   kernel_param_lock(params[i].mod);
> - param_check_unsafe(¶ms[i]);
> - err = params[i].ops->set(val, ¶ms[i]);
> + if (param_check_unsafe(¶ms[i], doing))
> + err = params[i].ops->set(val, ¶ms[i]);
> + else
> + err = -EPERM;
>   kernel_param_unlock(params[i].mod);
>   return err;
>   }
> @@ -553,6 +562,12 @@ static ssize_t param_attr_show(struct module_attribute 
> *mattr,
>   return count;
>  }
>  
> +#ifdef CONFIG_MODULES
> +#define mod_name(mod) (mod)->name
> +#else
> +#define mod_name(mod) "unknown"
> +#endif
> +
>  /* sysfs always hands a nul-terminated string in buf.  We rely on that. */
>  static ssize_t param_attr_store(struct module_attribute *mattr,
>   struct module_kobject *mk,
> @@ -565,8 +580,10 @@ static ssize_t param_attr_store(struct module_attribute 
> *mattr,
>   return -EPERM;
>  
>   kernel_param_lock(mk->mod);
> - param_check_unsafe(attribute->param);
> - err = attribute->param->ops->set(buf, attribute->param);
> + if (param_check_unsafe(attribute->param, mod_name(mk->mod)))
> + err = attribute->param->ops->set(buf, attribute->param);
> + else
> + err = -EPERM;
>   kernel_param_unlock(mk->mod);
>   if (!err)
>   return len;
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index c89046dc2155..d03c4c296af7 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -28,6 +28,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_ACPI_TABLES] = "modified ACPI tables",
>   [LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
>   [LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
> + [LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 18/29] Lock down TIOCSSERIAL

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:47PM -0700, Matthew Garrett wrote:
> From: David Howells 
> 
> Lock down TIOCSSERIAL as that can be used to change the ioport and irq
> settings on a serial port.  This only appears to be an issue for the serial
> drivers that use the core serial code.  All other drivers seem to either
> ignore attempts to change port/irq or give an error.
> 
> Reported-by: Greg Kroah-Hartman 
> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> cc: Jiri Slaby 
> Cc: linux-ser...@vger.kernel.org
> ---
>  drivers/tty/serial/serial_core.c | 5 +
>  include/linux/security.h | 1 +
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 7 insertions(+)
> 
> diff --git a/drivers/tty/serial/serial_core.c 
> b/drivers/tty/serial/serial_core.c
> index 351843f847c0..a84f231a5df4 100644
> --- a/drivers/tty/serial/serial_core.c
> +++ b/drivers/tty/serial/serial_core.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -852,6 +853,10 @@ static int uart_set_info(struct tty_struct *tty, struct 
> tty_port *port,
>   new_flags = (__force upf_t)new_info->flags;
>   old_custom_divisor = uport->custom_divisor;
>  
> + retval = security_locked_down(LOCKDOWN_TIOCSSERIAL);
> + if (retval && (change_port || change_irq))
> + goto exit;
> +
>   if (!capable(CAP_SYS_ADMIN)) {
>   retval = -EPERM;
>   if (change_irq || change_port ||

This should be moved after the capable test. With that fixed:

Reviewed-by: Kees Cook 

-Kees

> diff --git a/include/linux/security.h b/include/linux/security.h
> index 03c125b277ca..61e3f4a62d16 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -91,6 +91,7 @@ enum lockdown_reason {
>   LOCKDOWN_MSR,
>   LOCKDOWN_ACPI_TABLES,
>   LOCKDOWN_PCMCIA_CIS,
> + LOCKDOWN_TIOCSSERIAL,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 7be3e8fb5847..c89046dc2155 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -27,6 +27,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_MSR] = "raw MSR access",
>   [LOCKDOWN_ACPI_TABLES] = "modified ACPI tables",
>   [LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
> + [LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 16/29] acpi: Disable ACPI table override if the kernel is locked down

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:45PM -0700, Matthew Garrett wrote:
> From: Linn Crosetto 
> 
> From the kernel documentation (initrd_table_override.txt):
> 
>   If the ACPI_INITRD_TABLE_OVERRIDE compile option is true, it is possible
>   to override nearly any ACPI table provided by the BIOS with an
>   instrumented, modified one.
> 
> When lockdown is enabled, the kernel should disallow any unauthenticated
> changes to kernel space.  ACPI tables contain code invoked by the kernel,
> so do not allow ACPI tables to be overridden if the kernel is locked down.
> 
> Signed-off-by: Linn Crosetto 

Reviewed-by: Kees Cook 

-Kees

> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> cc: linux-a...@vger.kernel.org
> ---
>  drivers/acpi/tables.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
> index 8fccbe49612a..41d9ccd0e075 100644
> --- a/drivers/acpi/tables.c
> +++ b/drivers/acpi/tables.c
> @@ -34,6 +34,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "internal.h"
>  
>  #ifdef CONFIG_ACPI_CUSTOM_DSDT
> @@ -539,6 +540,11 @@ void __init acpi_table_upgrade(void)
>   if (table_nr == 0)
>   return;
>  
> + if (security_locked_down(LOCKDOWN_ACPI_TABLES)) {
> + pr_notice("kernel is locked down, ignoring table override\n");
> + return;
> + }
> +
>   acpi_tables_addr =
>   memblock_find_in_range(0, ACPI_TABLE_UPGRADE_MAX_PHYS,
>  all_tables_size, PAGE_SIZE);
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 17/29] Prohibit PCMCIA CIS storage when the kernel is locked down

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:46PM -0700, Matthew Garrett wrote:
> From: David Howells 
> 
> Prohibit replacement of the PCMCIA Card Information Structure when the
> kernel is locked down.
> 
> Suggested-by: Dominik Brodowski 
> Signed-off-by: David Howells 

Reviewed-by: Kees Cook 

-Kees

> Signed-off-by: Matthew Garrett 
> ---
>  drivers/pcmcia/cistpl.c  | 5 +
>  include/linux/security.h | 1 +
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 7 insertions(+)
> 
> diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
> index ac0672b8dfca..379c53610102 100644
> --- a/drivers/pcmcia/cistpl.c
> +++ b/drivers/pcmcia/cistpl.c
> @@ -24,6 +24,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -1578,6 +1579,10 @@ static ssize_t pccard_store_cis(struct file *filp, 
> struct kobject *kobj,
>   struct pcmcia_socket *s;
>   int error;
>  
> + error = security_locked_down(LOCKDOWN_PCMCIA_CIS);
> + if (error)
> + return error;
> +
>   s = to_socket(container_of(kobj, struct device, kobj));
>  
>   if (off)
> diff --git a/include/linux/security.h b/include/linux/security.h
> index cc2b5ee4cadd..03c125b277ca 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -90,6 +90,7 @@ enum lockdown_reason {
>   LOCKDOWN_IOPORT,
>   LOCKDOWN_MSR,
>   LOCKDOWN_ACPI_TABLES,
> + LOCKDOWN_PCMCIA_CIS,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 1725224f0024..7be3e8fb5847 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -26,6 +26,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_IOPORT] = "raw io port access",
>   [LOCKDOWN_MSR] = "raw MSR access",
>   [LOCKDOWN_ACPI_TABLES] = "modified ACPI tables",
> + [LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 14/29] ACPI: Limit access to custom_method when the kernel is locked down

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:43PM -0700, Matthew Garrett wrote:
> From: Matthew Garrett 
> 
> custom_method effectively allows arbitrary access to system memory, making
> it possible for an attacker to circumvent restrictions on module loading.
> Disable it if the kernel is locked down.
> 
> Signed-off-by: Matthew Garrett 

Reviewed-by: Kees Cook 

-Kees

> Signed-off-by: David Howells 
> cc: linux-a...@vger.kernel.org
> ---
>  drivers/acpi/custom_method.c | 6 ++
>  include/linux/security.h | 1 +
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 8 insertions(+)
> 
> diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c
> index aa972dc5cb7e..6e56f9f43492 100644
> --- a/drivers/acpi/custom_method.c
> +++ b/drivers/acpi/custom_method.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "internal.h"
>  
> @@ -28,6 +29,11 @@ static ssize_t cm_write(struct file *file, const char 
> __user * user_buf,
>  
>   struct acpi_table_header table;
>   acpi_status status;
> + int ret;
> +
> + ret = security_locked_down(LOCKDOWN_ACPI_TABLES);
> + if (ret)
> + return ret;
>  
>   if (!(*ppos)) {
>   /* parse the table header to get the table length */
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 30bc6f058926..cc2b5ee4cadd 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -89,6 +89,7 @@ enum lockdown_reason {
>   LOCKDOWN_PCI_ACCESS,
>   LOCKDOWN_IOPORT,
>   LOCKDOWN_MSR,
> + LOCKDOWN_ACPI_TABLES,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 297a065e6261..1725224f0024 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -25,6 +25,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_PCI_ACCESS] = "direct PCI access",
>   [LOCKDOWN_IOPORT] = "raw io port access",
>   [LOCKDOWN_MSR] = "raw MSR access",
> + [LOCKDOWN_ACPI_TABLES] = "modified ACPI tables",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 15/29] acpi: Ignore acpi_rsdp kernel param when the kernel has been locked down

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:44PM -0700, Matthew Garrett wrote:
> From: Josh Boyer 
> 
> This option allows userspace to pass the RSDP address to the kernel, which
> makes it possible for a user to modify the workings of hardware .  Reject
> the option when the kernel is locked down.
> 
> Signed-off-by: Josh Boyer 

Reviewed-by: Kees Cook 

-Kees

> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> cc: Dave Young 
> cc: linux-a...@vger.kernel.org
> ---
>  drivers/acpi/osl.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
> index f29e427d0d1d..60cda8a0f36b 100644
> --- a/drivers/acpi/osl.c
> +++ b/drivers/acpi/osl.c
> @@ -40,6 +40,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -194,7 +195,7 @@ acpi_physical_address __init 
> acpi_os_get_root_pointer(void)
>   acpi_physical_address pa;
>  
>  #ifdef CONFIG_KEXEC
> - if (acpi_rsdp)
> + if (acpi_rsdp && !security_locked_down(LOCKDOWN_ACPI_TABLES))
>   return acpi_rsdp;
>  #endif
>   pa = acpi_arch_get_root_pointer();
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH] watchdog: convert remaining drivers to use SPDX license identifier

2019-06-22 Thread William Breathitt Gray
On Thu, Jun 20, 2019 at 09:28:46AM -0700, Guenter Roeck wrote:
> This gets rid of the unnecessary license boilerplate, and avoids
> having to deal with individual patches one by one.
> 
> No functional changes.
> 
> Signed-off-by: Guenter Roeck 
> ---
> Note: Several drivers include a paragraph such as
> 
> "Neither  nor  admit liability nor
>  provide warranty for any of this software. This material is
>  provided "AS-IS" and at no charge."
> 
> Presumably this is covered by the GPL license. However, since I am not
> an attorney, I am not sure, and I opted for leaving such paragraphs in
> place.
[...]
>  drivers/watchdog/ebc-c384_wdt.c|  9 -
[...]
> diff --git a/drivers/watchdog/ebc-c384_wdt.c b/drivers/watchdog/ebc-c384_wdt.c
> index c176f59fea28..8ef4b0df3855 100644
> --- a/drivers/watchdog/ebc-c384_wdt.c
> +++ b/drivers/watchdog/ebc-c384_wdt.c
> @@ -2,15 +2,6 @@
>  /*
>   * Watchdog timer driver for the WinSystems EBC-C384
>   * Copyright (C) 2016 William Breathitt Gray
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License, version 2, as
> - * published by the Free Software Foundation.
> - *
> - * This program is distributed in the hope that it will be useful, but
> - * WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> - * General Public License for more details.
>   */
>  #include 
>  #include 

Acked-by: William Breathitt Gray 


Re: [PATCH V34 12/29] x86: Lock down IO port access when the kernel is locked down

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:41PM -0700, Matthew Garrett wrote:
> From: Matthew Garrett 
> 
> IO port access would permit users to gain access to PCI configuration
> registers, which in turn (on a lot of hardware) give access to MMIO
> register space. This would potentially permit root to trigger arbitrary
> DMA, so lock it down by default.
> 
> This also implicitly locks down the KDADDIO, KDDELIO, KDENABIO and
> KDDISABIO console ioctls.
> 
> Signed-off-by: Matthew Garrett 

Reviewed-by: Kees Cook 

-Kees

> Signed-off-by: David Howells 
> cc: x...@kernel.org
> ---
>  arch/x86/kernel/ioport.c | 7 +--
>  include/linux/security.h | 1 +
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
> index 0fe1c8782208..61a89d3c0382 100644
> --- a/arch/x86/kernel/ioport.c
> +++ b/arch/x86/kernel/ioport.c
> @@ -11,6 +11,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -31,7 +32,8 @@ long ksys_ioperm(unsigned long from, unsigned long num, int 
> turn_on)
>  
>   if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
>   return -EINVAL;
> - if (turn_on && !capable(CAP_SYS_RAWIO))
> + if (turn_on && (!capable(CAP_SYS_RAWIO) ||
> + security_locked_down(LOCKDOWN_IOPORT)))
>   return -EPERM;
>  
>   /*
> @@ -126,7 +128,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
>   return -EINVAL;
>   /* Trying to gain more privileges? */
>   if (level > old) {
> - if (!capable(CAP_SYS_RAWIO))
> + if (!capable(CAP_SYS_RAWIO) ||
> + security_locked_down(LOCKDOWN_IOPORT))
>   return -EPERM;
>   }
>   regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 1b849f10dec6..60569b7e9465 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -87,6 +87,7 @@ enum lockdown_reason {
>   LOCKDOWN_KEXEC,
>   LOCKDOWN_HIBERNATION,
>   LOCKDOWN_PCI_ACCESS,
> + LOCKDOWN_IOPORT,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index e2ee8a16b94c..895ef3ba1b4c 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -23,6 +23,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_KEXEC] = "kexec of unsigned images",
>   [LOCKDOWN_HIBERNATION] = "hibernation",
>   [LOCKDOWN_PCI_ACCESS] = "direct PCI access",
> + [LOCKDOWN_IOPORT] = "raw io port access",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 10/29] hibernate: Disable when the kernel is locked down

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:39PM -0700, Matthew Garrett wrote:
> From: Josh Boyer 
> 
> There is currently no way to verify the resume image when returning
> from hibernate.  This might compromise the signed modules trust model,
> so until we can work with signed hibernate images we disable it when the
> kernel is locked down.
> 
> Signed-off-by: Josh Boyer 

Reviewed-by: Kees Cook 

-Kees

> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> Cc: r...@rjwysocki.net
> Cc: pa...@ucw.cz
> cc: linux...@vger.kernel.org
> ---
>  include/linux/security.h | 1 +
>  kernel/power/hibernate.c | 3 ++-
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 00a31ab2e5ba..a051f21a1144 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -85,6 +85,7 @@ enum lockdown_reason {
>   LOCKDOWN_MODULE_SIGNATURE,
>   LOCKDOWN_DEV_MEM,
>   LOCKDOWN_KEXEC,
> + LOCKDOWN_HIBERNATION,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
> index abef759de7c8..3a9cb2d3da4a 100644
> --- a/kernel/power/hibernate.c
> +++ b/kernel/power/hibernate.c
> @@ -32,6 +32,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include "power.h"
> @@ -70,7 +71,7 @@ static const struct platform_hibernation_ops 
> *hibernation_ops;
>  
>  bool hibernation_available(void)
>  {
> - return (nohibernate == 0);
> + return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION);
>  }
>  
>  /**
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 08fcd8116db3..ce5b3da9bd09 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -21,6 +21,7 @@ static char 
> *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
>   [LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port",
>   [LOCKDOWN_KEXEC] = "kexec of unsigned images",
> + [LOCKDOWN_HIBERNATION] = "hibernation",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 11/29] PCI: Lock down BAR access when the kernel is locked down

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:40PM -0700, Matthew Garrett wrote:
> From: Matthew Garrett 
> 
> Any hardware that can potentially generate DMA has to be locked down in
> order to avoid it being possible for an attacker to modify kernel code,
> allowing them to circumvent disabled module loading or module signing.
> Default to paranoid - in future we can potentially relax this for
> sufficiently IOMMU-isolated devices.
> 
> Signed-off-by: David Howells 

Reviewed-by: Kees Cook 

-Kees

> Signed-off-by: Matthew Garrett 
> Acked-by: Bjorn Helgaas 
> cc: linux-...@vger.kernel.org
> ---
>  drivers/pci/pci-sysfs.c  | 16 
>  drivers/pci/proc.c   | 14 --
>  drivers/pci/syscall.c|  4 +++-
>  include/linux/security.h |  1 +
>  security/lockdown/lockdown.c |  1 +
>  5 files changed, 33 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> index 25794c27c7a4..e1011efb5a31 100644
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -903,6 +903,11 @@ static ssize_t pci_write_config(struct file *filp, 
> struct kobject *kobj,
>   unsigned int size = count;
>   loff_t init_off = off;
>   u8 *data = (u8 *) buf;
> + int ret;
> +
> + ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
> + if (ret)
> + return ret;
>  
>   if (off > dev->cfg_size)
>   return 0;
> @@ -1165,6 +1170,11 @@ static int pci_mmap_resource(struct kobject *kobj, 
> struct bin_attribute *attr,
>   int bar = (unsigned long)attr->private;
>   enum pci_mmap_state mmap_type;
>   struct resource *res = &pdev->resource[bar];
> + int ret;
> +
> + ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
> + if (ret)
> + return ret;
>  
>   if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
>   return -EINVAL;
> @@ -1241,6 +1251,12 @@ static ssize_t pci_write_resource_io(struct file 
> *filp, struct kobject *kobj,
>struct bin_attribute *attr, char *buf,
>loff_t off, size_t count)
>  {
> + int ret;
> +
> + ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
> + if (ret)
> + return ret;
> +
>   return pci_resource_io(filp, kobj, attr, buf, off, count, true);
>  }
>  
> diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
> index 6fa1627ce08d..a72258d70407 100644
> --- a/drivers/pci/proc.c
> +++ b/drivers/pci/proc.c
> @@ -13,6 +13,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include "pci.h"
>  
> @@ -115,7 +116,11 @@ static ssize_t proc_bus_pci_write(struct file *file, 
> const char __user *buf,
>   struct pci_dev *dev = PDE_DATA(ino);
>   int pos = *ppos;
>   int size = dev->cfg_size;
> - int cnt;
> + int cnt, ret;
> +
> + ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
> + if (ret)
> + return ret;
>  
>   if (pos >= size)
>   return 0;
> @@ -196,6 +201,10 @@ static long proc_bus_pci_ioctl(struct file *file, 
> unsigned int cmd,
>  #endif /* HAVE_PCI_MMAP */
>   int ret = 0;
>  
> + ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
> + if (ret)
> + return ret;
> +
>   switch (cmd) {
>   case PCIIOC_CONTROLLER:
>   ret = pci_domain_nr(dev->bus);
> @@ -237,7 +246,8 @@ static int proc_bus_pci_mmap(struct file *file, struct 
> vm_area_struct *vma)
>   struct pci_filp_private *fpriv = file->private_data;
>   int i, ret, write_combine = 0, res_bit = IORESOURCE_MEM;
>  
> - if (!capable(CAP_SYS_RAWIO))
> + if (!capable(CAP_SYS_RAWIO) ||
> + security_locked_down(LOCKDOWN_PCI_ACCESS))
>   return -EPERM;
>  
>   if (fpriv->mmap_state == pci_mmap_io) {
> diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c
> index d96626c614f5..31e39558d49d 100644
> --- a/drivers/pci/syscall.c
> +++ b/drivers/pci/syscall.c
> @@ -7,6 +7,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include "pci.h"
> @@ -90,7 +91,8 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, 
> unsigned long, dfn,
>   u32 dword;
>   int err = 0;
>  
> - if (!capable(CAP_SYS_ADMIN))
> + if (!capable(CAP_SYS_ADMIN) ||
> + security_locked_down(LOCKDOWN_PCI_ACCESS))
>   return -EPERM;
>  
>   dev = pci_get_domain_bus_and_slot(0, bus, dfn);
> diff --git a/include/linux/security.h b/include/linux/security.h
> index a051f21a1144..1b849f10dec6 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -86,6 +86,7 @@ enum lockdown_reason {
>   LOCKDOWN_DEV_MEM,
>   LOCKDOWN_KEXEC,
>   LOCKDOWN_HIBERNATION,
> + LOCKDOWN_PCI_ACCESS,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index ce5b3da9bd09..e2ee8a16b94c 100644
> --- a/securit

Re: [PATCH V34 05/29] Restrict /dev/{mem,kmem,port} when the kernel is locked down

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:34PM -0700, Matthew Garrett wrote:
> From: Matthew Garrett 
> 
> Allowing users to read and write to core kernel memory makes it possible
> for the kernel to be subverted, avoiding module loading restrictions, and
> also to steal cryptographic information.
> 
> Disallow /dev/mem and /dev/kmem from being opened this when the kernel has
> been locked down to prevent this.
> 
> Also disallow /dev/port from being opened to prevent raw ioport access and
> thus DMA from being used to accomplish the same thing.
> 
> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> Cc: x...@kernel.org
> ---
>  drivers/char/mem.c   | 6 +-
>  include/linux/security.h | 1 +
>  security/lockdown/lockdown.c | 1 +
>  3 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/char/mem.c b/drivers/char/mem.c
> index b08dc50f9f26..93c02493f0fa 100644
> --- a/drivers/char/mem.c
> +++ b/drivers/char/mem.c
> @@ -29,8 +29,8 @@
>  #include 
>  #include 
>  #include 
> -
>  #include 
> +#include 
>  
>  #ifdef CONFIG_IA64
>  # include 
> @@ -786,6 +786,10 @@ static loff_t memory_lseek(struct file *file, loff_t 
> offset, int orig)
>  
>  static int open_port(struct inode *inode, struct file *filp)
>  {
> + int ret = security_locked_down(LOCKDOWN_DEV_MEM);
> +
> + if (ret)
> + return ret;
>   return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;

Usually the ordering for LSM tests tends to follow capable checks, which
allows for things like audit to generate logs for capability rejections,
etc. I'd expect this to be:

if (!capable(CAP_SYS_RAWIO))
return -EPERM;

return security_locked_down(LOCKDOWN_DEV_MEM)

With that fixed:

Reviewed-by: Kees Cook 

-Kees

>  }
>  
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 46d85cd63b06..200175c8605a 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -83,6 +83,7 @@ enum lsm_event {
>  enum lockdown_reason {
>   LOCKDOWN_NONE,
>   LOCKDOWN_MODULE_SIGNATURE,
> + LOCKDOWN_DEV_MEM,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 25a3a5b0aa9c..565c87451f0f 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -19,6 +19,7 @@ static enum lockdown_reason kernel_locked_down;
>  static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_NONE] = "none",
>   [LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
> + [LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 04/29] Enforce module signatures if the kernel is locked down

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:33PM -0700, Matthew Garrett wrote:
> From: David Howells 
> 
> If the kernel is locked down, require that all modules have valid
> signatures that we can verify.
> 
> I have adjusted the errors generated:
> 
>  (1) If there's no signature (ENODATA) or we can't check it (ENOPKG,
>  ENOKEY), then:
> 
>  (a) If signatures are enforced then EKEYREJECTED is returned.
> 
>  (b) If there's no signature or we can't check it, but the kernel is
>locked down then EPERM is returned (this is then consistent with
>other lockdown cases).
> 
>  (2) If the signature is unparseable (EBADMSG, EINVAL), the signature fails
>  the check (EKEYREJECTED) or a system error occurs (eg. ENOMEM), we
>  return the error we got.
> 
> Note that the X.509 code doesn't check for key expiry as the RTC might not
> be valid or might not have been transferred to the kernel's clock yet.
> 
>  [Modified by Matthew Garrett to remove the IMA integration. This will
>   be replaced with integration with the IMA architecture policy
>   patchset.]
> 
> Signed-off-by: David Howells 
> Signed-off-by: Matthew Garrett 
> Cc: Jessica Yu 
> ---
>  include/linux/security.h |  1 +
>  kernel/module.c  | 38 +---
>  security/lockdown/lockdown.c |  1 +
>  3 files changed, 33 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/security.h b/include/linux/security.h
> index c808d344ec75..46d85cd63b06 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -82,6 +82,7 @@ enum lsm_event {
>   */
>  enum lockdown_reason {
>   LOCKDOWN_NONE,
> + LOCKDOWN_MODULE_SIGNATURE,
>   LOCKDOWN_INTEGRITY_MAX,
>   LOCKDOWN_CONFIDENTIALITY_MAX,
>  };
> diff --git a/kernel/module.c b/kernel/module.c
> index 0b9aa8ab89f0..6aa681edd660 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -2763,8 +2763,9 @@ static inline void kmemleak_load_module(const struct 
> module *mod,
>  #ifdef CONFIG_MODULE_SIG
>  static int module_sig_check(struct load_info *info, int flags)
>  {
> - int err = -ENOKEY;
> + int ret, err = -ENODATA;
>   const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
> + const char *reason;
>   const void *mod = info->hdr;
>  
>   /*
> @@ -2779,16 +2780,39 @@ static int module_sig_check(struct load_info *info, 
> int flags)
>   err = mod_verify_sig(mod, info);
>   }
>  
> - if (!err) {
> + switch (err) {
> + case 0:
>   info->sig_ok = true;
>   return 0;
> - }
>  
> - /* Not having a signature is only an error if we're strict. */
> - if (err == -ENOKEY && !is_module_sig_enforced())
> - err = 0;
> + /* We don't permit modules to be loaded into trusted kernels
> +  * without a valid signature on them, but if we're not
> +  * enforcing, certain errors are non-fatal.
> +  */
> + case -ENODATA:
> + reason = "Loading of unsigned module";
> + goto decide;
> + case -ENOPKG:
> + reason = "Loading of module with unsupported crypto";
> + goto decide;
> + case -ENOKEY:
> + reason = "Loading of module with unavailable key";
> + decide:
> + if (is_module_sig_enforced()) {
> + pr_notice("%s is rejected\n", reason);
> + return -EKEYREJECTED;
> + }
>  
> - return err;
> + ret = security_locked_down(LOCKDOWN_MODULE_SIGNATURE);
> + return ret;

return security_locked_down(LOCKDOWN_MODULE_SIGNATURE); ? Means no need
to add "ret". Regardless:

Reviewed-by: Kees Cook 

-Kees


> +
> + /* All other errors are fatal, including nomem, unparseable
> +  * signatures and signature check failures - even if signatures
> +  * aren't required.
> +  */
> + default:
> + return err;
> + }
>  }
>  #else /* !CONFIG_MODULE_SIG */
>  static int module_sig_check(struct load_info *info, int flags)
> diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
> index 8e39b36b8f33..25a3a5b0aa9c 100644
> --- a/security/lockdown/lockdown.c
> +++ b/security/lockdown/lockdown.c
> @@ -18,6 +18,7 @@ static enum lockdown_reason kernel_locked_down;
>  
>  static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
>   [LOCKDOWN_NONE] = "none",
> + [LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
>   [LOCKDOWN_INTEGRITY_MAX] = "integrity",
>   [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
>  };
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Linux 5.2-rc6

2019-06-22 Thread Linus Torvalds
Uhh-uh.

I really was hoping that we'd continue to have an increasingly quiet
and shrinking rc series. But that was not to be.

rc6 is the biggest rc in number of commits we've had so far for this
5.2 cycle (obviously ignoring the merge window itself and rc1). And
it's not just because of trivial patches (although admittedly we have
those too), but we obviously had the TCP SACK/fragmentation/mss fixes
in there, and they in turn required some fixes too.

Happily we did pick up on the problem quickly - largely thanks to the
patches making it into distro kernels quickly and then causing
problems for the steam client of all things - but it's still something
that doesn't exactly make me get the warm and fuzzies at this point in
the release cycle.

I'm also doing this rc on a Saturday, because I am going to spend all
of tomorrow on a plane once again. So I'm traveling first for a
conference and then for some R&R on a liveaboard, so I'm going to have
spotty access to email for a few days, and then for a week I'll be
entirely incommunicado. So rc7 will be delayed.

I was thinking that I timed it all really well in what should be the
quietest period of the release cycle for me, and now I obviously hope
that last week really was a fluke.

Anyway, if something happens when I'm offline, Greg can presumably
step up, although he'll have the same conference travel (but
presumably at least the reverse jetlag ;)

With all that out of the way, I'm still reasonably optimistic that
we're on track for a calm final part of the release, and I don't think
there is anything particularly bad on the horizon.

And while we did have some excitement this week, _most_ of it by far
was the usual small fixes. Including the by now expected SPDX updates,
so the diffstat looks a bit messy again.

Anyway, ignoring the SPDX updates (and you should, even if they
dominate the diffstat), about a quarter of the rc6 update is
networking (the TCP fixes being a fairly small part of it - the bulk
is still network driver and other networking fixes, including bpf).
Another quarter is selftests (mostly bpf) and documentation.

The rest other driver updates (gpu, rdma, thunderbolt, usb..) arch
updates (x86, risc-v and arm[64]), and misc other updates (overlayfs
etc).

But honestly, most of it really is pretty small (again - ignoring the
SPDX noise), so despite my misgivings I don't think we're really in
trouble.

Shortlog appended for the brave souls who want to look at details,

 Linus

---

Aaron Lewis (2):
  kvm: tests: Sort tests in the Makefile alphabetically
  tests: kvm: Check for a kernel warning

Aaron Ma (1):
  Input: elantech - enable middle button support on 2 ThinkPads

Alaa Hleihel (2):
  net/mlx5: Avoid reloading already removed devices
  net/mlx5e: Avoid detaching non-existing netdev under switchdev mode

Alakesh Haloi (1):
  selftests: bpf: fix compiler warning in flow_dissector test

Alan Tull (1):
  MAINTAINERS: fpga: hand off maintainership to Moritz

Alexander Dahl (1):
  can: usb: Kconfig: Remove duplicate menu entry

Alexander Mikhaylenko (1):
  Input: synaptics - enable SMBus on ThinkPad E480 and E580

Alexei Starovoitov (1):
  bpf, x64: fix stack layout of JITed bpf code

Amir Goldstein (3):
  ovl: fix wrong flags check in FS_IOC_FS[SG]ETXATTR ioctls
  ovl: make i_ino consistent with st_ino in more cases
  fanotify: update connector fsid cache on add mark

Anders Roxell (1):
  net: dsa: fix warning same module names

Andreas Schwab (1):
  riscv: export pm_power_off again

Andrew Jones (1):
  KVM: arm/arm64: Fix emulated ptimer irq injection

Andrey Smirnov (1):
  Input: uinput - add compat ioctl number translation for UI_*_FF_UPLOAD

Andy Gross (1):
  MAINTAINERS: Change QCOM repo location

Andy Lutomirski (1):
  x86/vdso: Prevent segfaults due to hoisted vclock reads

Andy Strohman (1):
  nl80211: fix station_info pertid memory leak

Anisse Astier (2):
  arm64: ssbd: explicitly depend on 
  arm64/sve:  should not depend on 

Anna Schumaker (1):
  Revert "SUNRPC: Declare RPC timers as TIMER_DEFERRABLE"

Anson Huang (2):
  firmware: imx: SCU irq should ONLY be enabled after SCU IPC is ready
  Input: imx_keypad - make sure keyboard can always wake up system

Anssi Hannula (1):
  can: xilinx_can: use correct bittiming_const for CAN FD core

Arnd Bergmann (6):
  firmware: trusted_foundations: add ARMv7 dependency
  ARM: ixp4xx: don't select SERIAL_OF_PLATFORM
  ARM: ixp4xx: mark ixp4xx_irq_setup as __init
  ARM: ixp4xx: include irqs.h where needed
  ovl: fix bogus -Wmaybe-unitialized warning
  habanalabs: use u64_to_user_ptr() for reading user pointers

Arthur Fabre (1):
  bpf: Fix out of bounds memory access in bpf_sk_storage

Arun Easi (1):
  scsi: qla2xxx: Fix hardlockup in abort command during driver remove

Avraham Stern (1):
  cfg80211: report measurement start TSF correctly

Re: [PATCH net-next] hinic: implement the statistical interface of ethtool

2019-06-22 Thread David Miller
From: Xue Chaojing 
Date: Thu, 20 Jun 2019 05:58:08 +

> + p = (char *)(&txq_stats) +
> + hinic_tx_queue_stats[j].offset;

Parenthesis around &txq_stats is unnecessary.

> + p = (char *)(&rxq_stats) +
> + hinic_rx_queue_stats[j].offset;

Similarly for &rxq_stats.

> + p = (char *)(net_stats) + hinic_netdev_stats[j].offset;

Similarly for net_stats.

> + p = (char *)(&vport_stats) + hinic_function_stats[j].offset;

And &vport_stats.

> + p = (char *)(port_stats) + hinic_port_stats[j].offset;

And port_stats.


Re: [PATCH V34 03/29] security: Add a static lockdown policy LSM

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:32PM -0700, Matthew Garrett wrote:
> While existing LSMs can be extended to handle lockdown policy,
> distributions generally want to be able to apply a straightforward
> static policy. This patch adds a simple LSM that can be configured to
> reject either integrity or all lockdown queries, and can be configured
> at runtime (through securityfs), boot time (via a kernel parameter) or
> build time (via a kconfig option). Based on initial code by David
> Howells.
> 
> Signed-off-by: Matthew Garrett 

Reviewed-by: Kees Cook 

-Kees

> Cc: David Howells 
> ---
>  .../admin-guide/kernel-parameters.txt |   9 +
>  include/linux/security.h  |   4 +
>  security/Kconfig  |   3 +-
>  security/Makefile |   2 +
>  security/lockdown/Kconfig |  47 +
>  security/lockdown/Makefile|   1 +
>  security/lockdown/lockdown.c  | 172 ++
>  7 files changed, 237 insertions(+), 1 deletion(-)
>  create mode 100644 security/lockdown/Kconfig
>  create mode 100644 security/lockdown/Makefile
>  create mode 100644 security/lockdown/lockdown.c
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt 
> b/Documentation/admin-guide/kernel-parameters.txt
> index 2b8ee90bb644..fa336f6cd5bc 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2239,6 +2239,15 @@
>   lockd.nlm_udpport=M [NFS] Assign UDP port.
>   Format: 
>  
> + lockdown=   [SECURITY]
> + { integrity | confidentiality }
> + Enable the kernel lockdown feature. If set to
> + integrity, kernel features that allow userland to
> + modify the running kernel are disabled. If set to
> + confidentiality, kernel features that allow userland
> + to extract confidential information from the kernel
> + are also disabled.
> +
>   locktorture.nreaders_stress= [KNL]
>   Set the number of locking read-acquisition kthreads.
>   Defaults to being automatically set based on the
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 9eaf02e70707..c808d344ec75 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -76,6 +76,10 @@ enum lsm_event {
>   LSM_POLICY_CHANGE,
>  };
>  
> +/*
> + *  If you add to this, remember to extend lockdown_reasons in
> + *  security/lockdown/lockdown.c.
> + */
>  enum lockdown_reason {
>   LOCKDOWN_NONE,
>   LOCKDOWN_INTEGRITY_MAX,
> diff --git a/security/Kconfig b/security/Kconfig
> index 1d6463fb1450..c35aa72103df 100644
> --- a/security/Kconfig
> +++ b/security/Kconfig
> @@ -236,12 +236,13 @@ source "security/apparmor/Kconfig"
>  source "security/loadpin/Kconfig"
>  source "security/yama/Kconfig"
>  source "security/safesetid/Kconfig"
> +source "security/lockdown/Kconfig"
>  
>  source "security/integrity/Kconfig"
>  
>  config LSM
>   string "Ordered list of enabled LSMs"
> - default "yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
> + default 
> "lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
>   help
> A comma-separated list of LSMs, in initialization order.
> Any LSMs left off this list will be ignored. This can be
> diff --git a/security/Makefile b/security/Makefile
> index c598b904938f..be1dd9d2cb2f 100644
> --- a/security/Makefile
> +++ b/security/Makefile
> @@ -11,6 +11,7 @@ subdir-$(CONFIG_SECURITY_APPARMOR)  += apparmor
>  subdir-$(CONFIG_SECURITY_YAMA)   += yama
>  subdir-$(CONFIG_SECURITY_LOADPIN)+= loadpin
>  subdir-$(CONFIG_SECURITY_SAFESETID)+= safesetid
> +subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM)   += lockdown
>  
>  # always enable default capabilities
>  obj-y+= commoncap.o
> @@ -27,6 +28,7 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor/
>  obj-$(CONFIG_SECURITY_YAMA)  += yama/
>  obj-$(CONFIG_SECURITY_LOADPIN)   += loadpin/
>  obj-$(CONFIG_SECURITY_SAFESETID)   += safesetid/
> +obj-$(CONFIG_SECURITY_LOCKDOWN_LSM)  += lockdown/
>  obj-$(CONFIG_CGROUP_DEVICE)  += device_cgroup.o
>  
>  # Object integrity file lists
> diff --git a/security/lockdown/Kconfig b/security/lockdown/Kconfig
> new file mode 100644
> index ..7374ba76d8eb
> --- /dev/null
> +++ b/security/lockdown/Kconfig
> @@ -0,0 +1,47 @@
> +config SECURITY_LOCKDOWN_LSM
> + bool "Basic module for enforcing kernel lockdown"
> + depends on SECURITY
> + help
> +   Build support for an LSM that enforces a coarse kernel lockdown
> +   behaviour.
> +
> +config SECURITY_LOCKDOWN_LSM_EARLY
> + bool "Enable lockdown LSM early in init"
> + depends on 

Re: [PATCH V34 02/29] security: Add a "locked down" LSM hook

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:31PM -0700, Matthew Garrett wrote:
> Add a mechanism to allow LSMs to make a policy decision around whether
> kernel functionality that would allow tampering with or examining the
> runtime state of the kernel should be permitted.
> 
> Signed-off-by: Matthew Garrett 

Acked-by: Kees Cook 

-Kees

> ---
>  include/linux/lsm_hooks.h |  2 ++
>  include/linux/security.h  | 11 +++
>  security/security.c   |  6 ++
>  3 files changed, 19 insertions(+)
> 
> diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
> index 66fd1eac7a32..df2aebc99838 100644
> --- a/include/linux/lsm_hooks.h
> +++ b/include/linux/lsm_hooks.h
> @@ -1790,6 +1790,7 @@ union security_list_options {
>   int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux);
>   void (*bpf_prog_free_security)(struct bpf_prog_aux *aux);
>  #endif /* CONFIG_BPF_SYSCALL */
> + int (*locked_down)(enum lockdown_reason what);
>  };
>  
>  struct security_hook_heads {
> @@ -2027,6 +2028,7 @@ struct security_hook_heads {
>   struct hlist_head bpf_prog_alloc_security;
>   struct hlist_head bpf_prog_free_security;
>  #endif /* CONFIG_BPF_SYSCALL */
> + struct hlist_head locked_down;
>  } __randomize_layout;
>  
>  /*
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 1bb6fb2f1523..9eaf02e70707 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -76,6 +76,12 @@ enum lsm_event {
>   LSM_POLICY_CHANGE,
>  };
>  
> +enum lockdown_reason {
> + LOCKDOWN_NONE,
> + LOCKDOWN_INTEGRITY_MAX,
> + LOCKDOWN_CONFIDENTIALITY_MAX,
> +};
> +
>  /* These functions are in security/commoncap.c */
>  extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
>  int cap, unsigned int opts);
> @@ -389,6 +395,7 @@ void security_inode_invalidate_secctx(struct inode 
> *inode);
>  int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
>  int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
>  int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
> +int security_locked_down(enum lockdown_reason what);
>  #else /* CONFIG_SECURITY */
>  
>  static inline int call_lsm_notifier(enum lsm_event event, void *data)
> @@ -1189,6 +1196,10 @@ static inline int security_inode_getsecctx(struct 
> inode *inode, void **ctx, u32
>  {
>   return -EOPNOTSUPP;
>  }
> +static inline int security_locked_down(enum lockdown_reason what)
> +{
> + return 0;
> +}
>  #endif   /* CONFIG_SECURITY */
>  
>  #ifdef CONFIG_SECURITY_NETWORK
> diff --git a/security/security.c b/security/security.c
> index 487e1f3eb2df..553f50e9a106 100644
> --- a/security/security.c
> +++ b/security/security.c
> @@ -2382,3 +2382,9 @@ void security_bpf_prog_free(struct bpf_prog_aux *aux)
>   call_void_hook(bpf_prog_free_security, aux);
>  }
>  #endif /* CONFIG_BPF_SYSCALL */
> +
> +int security_locked_down(enum lockdown_reason what)
> +{
> + return call_int_hook(locked_down, 0, what);
> +}
> +EXPORT_SYMBOL(security_locked_down);
> -- 
> 2.22.0.410.gd8fdbe21b5-goog
> 

-- 
Kees Cook


Re: [PATCH V34 01/29] security: Support early LSMs

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 05:03:30PM -0700, Matthew Garrett wrote:
> The lockdown module is intended to allow for kernels to be locked down
> early in boot - sufficiently early that we don't have the ability to
> kmalloc() yet. Add support for early initialisation of some LSMs, and
> then add them to the list of names when we do full initialisation later.
> Early LSMs are initialised in link order and cannot be overridden via
> boot parameters, and cannot make use of kmalloc() (since the allocator
> isn't initialised yet).
> 
> Signed-off-by: Matthew Garrett 

Acked-by: Kees Cook 

-Kees

> ---
>  include/asm-generic/vmlinux.lds.h |  8 -
>  include/linux/lsm_hooks.h |  6 
>  include/linux/security.h  |  1 +
>  init/main.c   |  1 +
>  security/security.c   | 50 ++-
>  5 files changed, 57 insertions(+), 9 deletions(-)
> 
> diff --git a/include/asm-generic/vmlinux.lds.h 
> b/include/asm-generic/vmlinux.lds.h
> index f8f6f04c4453..e1963352fdb6 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -208,8 +208,13 @@
>   __start_lsm_info = .;   \
>   KEEP(*(.lsm_info.init)) \
>   __end_lsm_info = .;
> +#define EARLY_LSM_TABLE(). = ALIGN(8);   \
> + __start_early_lsm_info = .; \
> + KEEP(*(.early_lsm_info.init))   \
> + __end_early_lsm_info = .;
>  #else
>  #define LSM_TABLE()
> +#define EARLY_LSM_TABLE()
>  #endif
>  
>  #define ___OF_TABLE(cfg, name)   _OF_TABLE_##cfg(name)
> @@ -610,7 +615,8 @@
>   ACPI_PROBE_TABLE(irqchip)   \
>   ACPI_PROBE_TABLE(timer) \
>   EARLYCON_TABLE()\
> - LSM_TABLE()
> + LSM_TABLE() \
> + EARLY_LSM_TABLE()
>  
>  #define INIT_TEXT\
>   *(.init.text .init.text.*)  \
> diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
> index a240a3fc5fc4..66fd1eac7a32 100644
> --- a/include/linux/lsm_hooks.h
> +++ b/include/linux/lsm_hooks.h
> @@ -2085,12 +2085,18 @@ struct lsm_info {
>  };
>  
>  extern struct lsm_info __start_lsm_info[], __end_lsm_info[];
> +extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[];
>  
>  #define DEFINE_LSM(lsm)  
> \
>   static struct lsm_info __lsm_##lsm  \
>   __used __section(.lsm_info.init)\
>   __aligned(sizeof(unsigned long))
>  
> +#define DEFINE_EARLY_LSM(lsm)
> \
> + static struct lsm_info __early_lsm_##lsm\
> + __used __section(.early_lsm_info.init)  \
> + __aligned(sizeof(unsigned long))
> +
>  #ifdef CONFIG_SECURITY_SELINUX_DISABLE
>  /*
>   * Assuring the safety of deleting a security module is up to
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 49f2685324b0..1bb6fb2f1523 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -194,6 +194,7 @@ int unregister_lsm_notifier(struct notifier_block *nb);
>  
>  /* prototypes */
>  extern int security_init(void);
> +extern int early_security_init(void);
>  
>  /* Security operations */
>  int security_binder_set_context_mgr(struct task_struct *mgr);
> diff --git a/init/main.c b/init/main.c
> index 598e278b46f7..f3faeb89c75f 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -563,6 +563,7 @@ asmlinkage __visible void __init start_kernel(void)
>   boot_cpu_init();
>   page_address_init();
>   pr_notice("%s", linux_banner);
> + early_security_init();
>   setup_arch(&command_line);
>   /*
>* Set up the the initial canary and entropy after arch
> diff --git a/security/security.c b/security/security.c
> index 23cbb1a295a3..487e1f3eb2df 100644
> --- a/security/security.c
> +++ b/security/security.c
> @@ -37,6 +37,7 @@
>  
>  /* How many LSMs were built into the kernel? */
>  #define LSM_COUNT (__end_lsm_info - __start_lsm_info)
> +#define EARLY_LSM_COUNT (__end_early_lsm_info - __start_early_lsm_info)
>  
>  struct security_hook_heads security_hook_heads __lsm_ro_after_init;
>  static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain);
> @@ -281,6 +282,8 @@ static void __init ordered_lsm_parse(const char *order, 
> const char *origin)
>  static void __init lsm_early_cred(struct cred *cred);
>  static void __init lsm_early_task(struct task_struct *task);
>  
> +static int lsm_append(const char *new, char **result);
> +
>  static v

Re: [PATCH] flow_dissector: Fix vlan header offset in __skb_flow_dissect

2019-06-22 Thread David Miller
From: YueHaibing 
Date: Thu, 20 Jun 2019 00:01:32 +0800

> @@ -785,6 +785,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
>   skb && skb_vlan_tag_present(skb)) {
>   proto = skb->protocol;
>   } else {
> + if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX)
> + nhoff -=  sizeof(*vlan);

Even if this would have turned out to be the desired fix, you would need
to get rid of the extra spaces in that last statement.


Re: [net v1] net: stmmac: set IC bit when transmitting frames with HW timestamp

2019-06-22 Thread David Miller
From: Voon Weifeng 
Date: Wed, 19 Jun 2019 22:41:48 +0800

> From: Roland Hii 
> 
> When transmitting certain PTP frames, e.g. SYNC and DELAY_REQ, the
> PTP daemon, e.g. ptp4l, is polling the driver for the frame transmit
> hardware timestamp. The polling will most likely timeout if the tx
> coalesce is enabled due to the Interrupt-on-Completion (IC) bit is
> not set in tx descriptor for those frames.
> 
> This patch will ignore the tx coalesce parameter and set the IC bit
> when transmitting PTP frames which need to report out the frame
> transmit hardware timestamp to user space.
> 
> Fixes: f748be531d70 ("net: stmmac: Rework coalesce timer and fix multi-queue 
> races")
> Signed-off-by: Roland Hii 
> Signed-off-by: Ong Boon Leong 
> Signed-off-by: Voon Weifeng 

Applied and queued up for -stable.


Re: [net v1] net: stmmac: fixed new system time seconds value calculation

2019-06-22 Thread David Miller
From: Voon Weifeng 
Date: Wed, 19 Jun 2019 22:13:48 +0800

> From: Roland Hii 
> 
> When ADDSUB bit is set, the system time seconds field is calculated as
> the complement of the seconds part of the update value.
> 
> For example, if 3.1 seconds need to be subtracted from the
> system time, this field is calculated as
> 2^32 - 3 = 4294967296 - 3 = 0x1 - 3 = 0xFFFD
> 
> Previously, the 0x1 is mistakenly written as 1.
> 
> This is further simplified from
>   sec = (0x1ULL - sec);
> to
>   sec = -sec;
> 
> Fixes: ba1ffd74df74 ("stmmac: fix PTP support for GMAC4")
> Signed-off-by: Roland Hii 
> Signed-off-by: Ong Boon Leong 
> Signed-off-by: Voon Weifeng 

Applied and queued up for -stable.


[PATCH v2] kexec: fix warnig of crash_zero_bytes in crash.c

2019-06-22 Thread Tiezhu Yang
Fix the following sparse warning:

arch/x86/kernel/crash.c:59:15:
warning: symbol 'crash_zero_bytes' was not declared. Should it be static?

First, make crash_zero_bytes static. In addition, crash_zero_bytes
is used when CONFIG_KEXEC_FILE is set, so make it only available
under CONFIG_KEXEC_FILE. Otherwise, if CONFIG_KEXEC_FILE is not set,
the following warning will appear when make crash_zero_bytes static:

arch/x86/kernel/crash.c:59:22:
warning: ‘crash_zero_bytes’ defined but not used [-Wunused-variable]

Fixes: dd5f726076cc ("kexec: support for kexec on panic using new system call")
Signed-off-by: Tiezhu Yang 
Cc: Vivek Goyal 
---
 arch/x86/kernel/crash.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 576b2e1..f13480e 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -56,7 +56,9 @@ struct crash_memmap_data {
  */
 crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
 EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
-unsigned long crash_zero_bytes;
+#ifdef CONFIG_KEXEC_FILE
+static unsigned long crash_zero_bytes;
+#endif
 
 static inline void cpu_crash_vmclear_loaded_vmcss(void)
 {
-- 
1.8.3.1

[PATCH] x86/vdso: Give the [ph]vclock_page declarations real types

2019-06-22 Thread Andy Lutomirski
Clean up the vDSO code a bit by giving pvclock_page and hvclock_page
their actual types instead of u8[PAGE_SIZE].  This shouldn't
materially affect the generated code.

Heavily based on a patch from Linus.

Cc: Borislav Petkov 
Cc: Peter Zijlstra 
Signed-off-by: Linus Torvalds 
Signed-off-by: Andy Lutomirski 
---
 arch/x86/entry/vdso/vclock_gettime.c | 36 ++--
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/arch/x86/entry/vdso/vclock_gettime.c 
b/arch/x86/entry/vdso/vclock_gettime.c
index 4aed41f638bb..907efc5015ec 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -28,13 +28,33 @@ extern int __vdso_clock_gettime(clockid_t clock, struct 
timespec *ts);
 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
 extern time_t __vdso_time(time_t *t);
 
+/*
+ * Declare the memory-mapped vclock data pages.  These come from hypervisors.
+ * If we ever reintroduce something like direct access to an MMIO clock like
+ * the HPET again, it will go here as well.
+ *
+ * A load from any of these pages will segfault if the clock in question is
+ * disabled, so appropriate compiler barriers and checks need to be used
+ * to prevent stray loads.
+ *
+ * These declarations MUST NOT be const.  The compiler will assume that
+ * an extern const variable has genuinely constant contents, and the
+ * resulting code won't work, since the whole point is that these pages
+ * change over time, possibly while we're accessing them.
+ */
+
 #ifdef CONFIG_PARAVIRT_CLOCK
-extern u8 pvclock_page[PAGE_SIZE]
+/*
+ * This is the vCPU 0 pvclock page.  We only use pvclock from the vDSO
+ * if the hypervisor tells us that all vCPUs can get valid data from the
+ * vCPU 0 page.
+ */
+extern struct pvclock_vsyscall_time_info pvclock_page
__attribute__((visibility("hidden")));
 #endif
 
 #ifdef CONFIG_HYPERV_TSCPAGE
-extern u8 hvclock_page[PAGE_SIZE]
+extern struct ms_hyperv_tsc_page hvclock_page
__attribute__((visibility("hidden")));
 #endif
 
@@ -69,14 +89,9 @@ notrace static long vdso_fallback_gettime(long clock, struct 
timespec *ts)
 #endif
 
 #ifdef CONFIG_PARAVIRT_CLOCK
-static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
-{
-   return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
-}
-
 static notrace u64 vread_pvclock(void)
 {
-   const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
+   const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti;
u32 version;
u64 ret;
 
@@ -117,10 +132,7 @@ static notrace u64 vread_pvclock(void)
 #ifdef CONFIG_HYPERV_TSCPAGE
 static notrace u64 vread_hvclock(void)
 {
-   const struct ms_hyperv_tsc_page *tsc_pg =
-   (const struct ms_hyperv_tsc_page *)&hvclock_page;
-
-   return hv_read_tsc_page(tsc_pg);
+   return hv_read_tsc_page(&hvclock_page);
 }
 #endif
 
-- 
2.21.0



Re: [PATCH v3 1/2] media: vimc: stream: add missing function documentation

2019-06-22 Thread Mauro Carvalho Chehab
Em Sat, 22 Jun 2019 18:51:06 -0300
André Almeida  escreveu:

> Hello Mauro,
> 
> On 6/21/19 6:17 PM, Mauro Carvalho Chehab wrote:
> > Em Mon, 17 Jun 2019 10:32:20 -0300
> > André Almeida  escreveu:
> >  
> >> Add comments at vimc_streamer_s_stream and vimc_streamer_thread, making
> >> the vimc-stream totally documented.  
> > I'm applying it right now.
> >
> > Yet, if this is fully documented, IMO you should add it to
> > Documentation/media/v4l-drivers, replacing the comments to kernel-doc
> > markups.  
> 
> This suggestion is a great improvement and it's simple to apply to the
> source. Where do you believe we can place this at[1]? Maybe something like
> 
> 
> Source code documentation
> -
> 
> vimc-streamer
> 
> 
> .. kernel-doc:: drivers/media/platform/vimc/vimc-streamer.c
>:internal:
> 
> 
> at the end of the file?
> 
Yeah, this should be enough.

> > That would make easier for the ones to read the comments and, if someone
> > changes a function call, warnings will be produced, and the developer
> > will be warned.
> > Thanks,
> > Mauro  
> 
> Thanks,
>     André
> 
> [1]
> https://git.linuxtv.org/media_tree.git/tree/Documentation/media/v4l-drivers/vimc.rst
> 



Thanks,
Mauro


Re: [PATCH v3 1/2] media: vimc: stream: add missing function documentation

2019-06-22 Thread André Almeida
Hello Mauro,

On 6/21/19 6:17 PM, Mauro Carvalho Chehab wrote:
> Em Mon, 17 Jun 2019 10:32:20 -0300
> André Almeida  escreveu:
>
>> Add comments at vimc_streamer_s_stream and vimc_streamer_thread, making
>> the vimc-stream totally documented.
> I'm applying it right now.
>
> Yet, if this is fully documented, IMO you should add it to
> Documentation/media/v4l-drivers, replacing the comments to kernel-doc
> markups.

This suggestion is a great improvement and it's simple to apply to the
source. Where do you believe we can place this at[1]? Maybe something like


Source code documentation
-

vimc-streamer


.. kernel-doc:: drivers/media/platform/vimc/vimc-streamer.c
   :internal:


at the end of the file?

> That would make easier for the ones to read the comments and, if someone
> changes a function call, warnings will be produced, and the developer
> will be warned.
> Thanks,
> Mauro

Thanks,
    André

[1]
https://git.linuxtv.org/media_tree.git/tree/Documentation/media/v4l-drivers/vimc.rst



Re: [PATCH v1 3/3] PM / devfreq: Add required OPPs support to passive governor

2019-06-22 Thread Saravana Kannan
On Sat, Jun 22, 2019 at 5:01 AM Chanwoo Choi  wrote:
>
> Hi,
>
> Absolutely, I agree this approach.

Thanks!

> But, I add some comments on below. please check them.
>
> 2019년 6월 22일 (토) 오전 9:36, Saravana Kannan 님이 작성:
> >
> > Look at the required OPPs of the "parent" device to determine the OPP that
> > is required from the slave device managed by the passive governor. This
> > allows having mappings between a parent device and a slave device even when
> > they don't have the same number of OPPs.
> >
> > Signed-off-by: Saravana Kannan 
> > ---
> >  drivers/devfreq/governor_passive.c | 25 +++--
> >  1 file changed, 23 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/devfreq/governor_passive.c 
> > b/drivers/devfreq/governor_passive.c
> > index 3bc29acbd54e..bd4a98bb15b1 100644
> > --- a/drivers/devfreq/governor_passive.c
> > +++ b/drivers/devfreq/governor_passive.c
> > @@ -21,8 +21,9 @@ static int devfreq_passive_get_target_freq(struct devfreq 
> > *devfreq,
> > struct devfreq_passive_data *p_data
> > = (struct devfreq_passive_data *)devfreq->data;
> > struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent;
> > +   struct opp_table *opp_table = NULL, *c_opp_table = NULL;
>
> In this function, the base device is passive devfreq device.
> So, I think that better to define the 'parent_opp_table' instead of 
> 'opp_table'
> indicating the OPP table of parent devfreq device. And better to define
> just 'opp_table' instead of 'c_opp_table' indicating the passive devfreq 
> device.
> - opp_table -> parent_opp_table
> - c_opp_table -> opp_table

Sounds good. I did it that way at first, but I wanted to keep the diff
simple in my first patch series. So renamed it. I can do the rename
that you suggested. Makes sense to me.

> > unsigned long child_freq = ULONG_MAX;
> > -   struct dev_pm_opp *opp;
> > +   struct dev_pm_opp *opp = NULL, *c_opp = NULL;
>
> Ditto. I think that better to define the variables as following:
> - opp -> parent_opp
> - c_cpp -> opp

Will do.

>
> > int i, count, ret = 0;
> >
> > /*
> > @@ -65,7 +66,20 @@ static int devfreq_passive_get_target_freq(struct 
> > devfreq *devfreq,
> > goto out;
> > }
> >
> > -   dev_pm_opp_put(opp);
> > +   opp_table = dev_pm_opp_get_opp_table(parent_devfreq->dev.parent);
>
> devfreq_passive_get_target_freq() is called frequently for DVFS support.
> I think that you have to add 'struct opp_table *opp_table' instance to
> 'struct devfreq'
> and then get 'opp_table' instance in the devfreq_add_device().

Sounds good. I had wanted to do that anyway, but didn't think it was
part of this series. But I can add that change to this series.

> devfreq_add_device() already get the OPP information by using
> dev_pm_opp_get_suspend_opp_freq().
> You can add following code nearby dev_pm_opp_get_suspend_opp_freq() in
> devfreq_add_device().
> - devfreq->opp_table = dev_pm_opp_get_opp_table(dev);

Will do something like that.

I'll send out an updated patch series on Monday or Tuesday. Hopefully
Viresh would have replied by then to give his opinion on whether this
is okay by him.

Thanks,
Saravana


Re: [PATCH v1 2/3] OPP: Add function to look up required OPP's for a given OPP

2019-06-22 Thread Saravana Kannan
On Sat, Jun 22, 2019 at 4:50 AM Chanwoo Choi  wrote:
>
> Hi,
>
> Absolutely, I like this approach. I think that it is necessary to make
> the connection
> between frequencies of devices.

Happy to hear that.

> But, I have a question on below.
>
> 2019년 6월 22일 (토) 오전 9:35, Saravana Kannan 님이 작성:
> >
> > Add a function that allows looking up required OPPs given a source OPP
> > table, destination OPP table and the source OPP.
> >
> > Signed-off-by: Saravana Kannan 
> > ---
> >  drivers/opp/core.c | 54 ++
> >  include/linux/pm_opp.h | 11 +
> >  2 files changed, 65 insertions(+)
> >
> > diff --git a/drivers/opp/core.c b/drivers/opp/core.c
> > index 74c7bdc6f463..4f7870bffbf8 100644
> > --- a/drivers/opp/core.c
> > +++ b/drivers/opp/core.c
> > @@ -1830,6 +1830,60 @@ void dev_pm_opp_put_genpd_virt_dev(struct opp_table 
> > *opp_table,
> > dev_err(virt_dev, "Failed to find required device entry\n");
> >  }
> >
> > +/**
> > + * dev_pm_opp_xlate_opp() - Find required OPP for src_table OPP.
> > + * @src_table: OPP table which has dst_table as one of its required OPP 
> > table.
> > + * @dst_table: Required OPP table of the src_table.
> > + * @pstate: OPP of the src_table.
> > + *
> > + * This function returns the OPP (present in @dst_table) pointed out by the
> > + * "required-opps" property of the OPP (present in @src_table).
> > + *
> > + * The callers are required to call dev_pm_opp_put() for the returned OPP 
> > after
> > + * use.
> > + *
> > + * Return: destination table OPP on success, otherwise NULL on errors.
> > + */
> > +struct dev_pm_opp *dev_pm_opp_xlate_opp(struct opp_table *src_table,
> > +   struct opp_table *dst_table,
> > +   struct dev_pm_opp *src_opp)
> > +{
> > +   struct dev_pm_opp *opp, *dest_opp = NULL;
> > +   int i;
> > +
> > +   if (!src_table || !dst_table || !src_opp)
> > +   return NULL;
> > +
> > +   for (i = 0; i < src_table->required_opp_count; i++) {
> > +   if (src_table->required_opp_tables[i]->np == dst_table->np)
> > +   break;
> > +   }
> > +
> > +   if (unlikely(i == src_table->required_opp_count)) {
> > +   pr_err("%s: Couldn't find matching OPP table (%p: %p)\n",
> > +  __func__, src_table, dst_table);
> > +   return NULL;
> > +   }
> > +
> > +   mutex_lock(&src_table->lock);
> > +
> > +   list_for_each_entry(opp, &src_table->opp_list, node) {
> > +   if (opp == src_opp) {
> > +   dest_opp = opp->required_opps[i];
>
> Correct me if I am wrong. This patch assume that 'i' index is same on between
> [1] and [2]. But in order to guarantee this assumption, all OPP entries
> in the same opp_table have to have the same number of 'required-opps' 
> properties
> and keep the sequence among 'required-opps' entries.
>
> [1] src_table->required_opp_tables[i]->np
> [2] opp->required_opps[I];
>
> For example, three OPP entries in the 'parent_bus_opp'
> have the different sequence of 'required-opps' and the different
> number of 'required-opps'. Is it no problem?
>
> parent_bus_opp: opp_table {
> compatible = "operating-points-v2";
>
> opp2 {
> opp-hz = /bits/ 64 <20>;
> required-opps = <&child_bus_a_opp2>, <&child_bus_b_opp2>,
> <&child_bus_c_opp2>;
> };
>
> opp1 {
> opp-hz = /bits/ 64 <20>;
> // change the sequence between child_bus_b_opp2  and child_bus_c_opp2
> required-opps = <&child_bus_a_opp2>, <&child_bus_c_opp2>,
> <&child_bus_b_opp2>
> };
>
> opp0 {
> opp-hz = /bits/ 64 <20>;
> // missing 'child_bus_a_opp2'
> required-opps = <&child_bus_c_opp2>, <&child_bus_b_opp2>
> };
>
> }
>

I get your question. If I'm not mistaken the OPP framework DT parsing
code makes the assumption that the required-opps list has the phandles
in the same order for each "row" in the OPP table. It actually only
looks at the first OPP entry to figure out the list of required OPP
tables.

Technically one can write code to deal with random order of the
required-opp list, but doesn't seem like that's worth it because
there's no need to have that order all mixed up in DT. And even if
someone wants to add support for that, I don't think improving the DT
parsing to handle random order would be part of this patch series.

-Saravana

> --
> Best Regards,
> Chanwoo Choi
>
> --
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to kernel-team+unsubscr...@android.com.
>


Re: [PATCH net-next 6/6] net/mssc/ocelot: Add basic Felix switch driver

2019-06-22 Thread Andrew Lunn
On Fri, Jun 21, 2019 at 06:38:52PM +0300, Claudiu Manoil wrote:
> This supports a switch core ethernet device from Microsemi
> (VSC9959) that can be integrated on different SoCs as a PCIe
> endpoint device.
> 
> The switchdev functionality is provided by the core Ocelot
> switch driver. In this regard, the current driver is an
> instance of Microsemi's Ocelot core driver.
> 
> The patch adds the PCI device driver part and defines the
> register map for the Felix switch core, as it has some
> differences in register addresses and bitfield mappings
> compared to the Ocelot switch.  Also some registers or
> bitfields present on Ocelot are not available on Felix.
> That's why this driver has its own register map instance.
> Other than that, the common registers and bitfields have the
> same functionality and share the same name.
> 
> In a few cases, some h/w operations have to be done differently
> on Felix due to missing bitfields.  This is the case for the
> switch core reset and init.  Because for this operation Ocelot
> uses some bits that are not present on Felix, the later has to
> use a register from the global registers block (GCB) instead.
> 
> Signed-off-by: Catalin Horghidan 
> Signed-off-by: Claudiu Manoil 
> ---
>  drivers/net/ethernet/mscc/Kconfig   |   8 +
>  drivers/net/ethernet/mscc/Makefile  |   9 +-
>  drivers/net/ethernet/mscc/felix_board.c | 392 +
>  drivers/net/ethernet/mscc/felix_regs.c  | 448 
>  drivers/net/ethernet/mscc/ocelot.h  |   7 +
>  5 files changed, 862 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/net/ethernet/mscc/felix_board.c
>  create mode 100644 drivers/net/ethernet/mscc/felix_regs.c
> 
> diff --git a/drivers/net/ethernet/mscc/Kconfig 
> b/drivers/net/ethernet/mscc/Kconfig
> index bcec0587cf61..e5a7fa69307e 100644
> --- a/drivers/net/ethernet/mscc/Kconfig
> +++ b/drivers/net/ethernet/mscc/Kconfig
> @@ -29,4 +29,12 @@ config MSCC_OCELOT_SWITCH_OCELOT
> This driver supports the Ocelot network switch device as present on
> the Ocelot SoCs.
>  
> +config MSCC_FELIX_SWITCH
> + tristate "Felix switch driver"
> + depends on MSCC_OCELOT_SWITCH
> + depends on PCI
> + help
> +   This driver supports the Felix network switch device, connected as a
> +   PCI device.
> +
>  endif # NET_VENDOR_MICROSEMI
> diff --git a/drivers/net/ethernet/mscc/Makefile 
> b/drivers/net/ethernet/mscc/Makefile
> index 9a36c26095c8..81593feb2ea1 100644
> --- a/drivers/net/ethernet/mscc/Makefile
> +++ b/drivers/net/ethernet/mscc/Makefile
> @@ -1,5 +1,10 @@
>  # SPDX-License-Identifier: (GPL-2.0 OR MIT)
>  obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot_common.o
>  mscc_ocelot_common-y := ocelot.o ocelot_io.o
> -mscc_ocelot_common-y += ocelot_regs.o ocelot_tc.o ocelot_police.o 
> ocelot_ace.o ocelot_flower.o
> -obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_board.o
> +mscc_ocelot_common-y += ocelot_tc.o ocelot_police.o ocelot_ace.o 
> ocelot_flower.o
> +
> +obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += mscc_ocelot.o
> +mscc_ocelot-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) := ocelot_regs.o 
> ocelot_board.o
> +
> +obj-$(CONFIG_MSCC_FELIX_SWITCH) += mscc_felix.o
> +mscc_felix-$(CONFIG_MSCC_FELIX_SWITCH) := felix_regs.o felix_board.o
> diff --git a/drivers/net/ethernet/mscc/felix_board.c 
> b/drivers/net/ethernet/mscc/felix_board.c
> new file mode 100644
> index ..57f7a897b3ae
> --- /dev/null
> +++ b/drivers/net/ethernet/mscc/felix_board.c
> @@ -0,0 +1,392 @@
> +// SPDX-License-Identifier: (GPL-2.0 OR MIT)
> +/* Felix Switch driver
> + *
> + * Copyright 2018-2019 NXP
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include "ocelot.h"
> +
> +#define FELIX_DRV_VER_MAJ 1
> +#define FELIX_DRV_VER_MIN 0
> +
> +#define FELIX_DRV_STR"Felix Switch driver"
> +#define FELIX_DRV_VER_STR __stringify(FELIX_DRV_VER_MAJ) "." \
> +   __stringify(FELIX_DRV_VER_MIN)

Driver version strings are pretty pointless. What you really want to
know if the specific kernel version.

> +
> +#define FELIX_PORT_RES_START 0x010
> +#define FELIX_PORT_RES_SIZE  0x1

This should really be in device tree. You then get a lot closer to the
binding for mscc-ocelot, and you can reuse more of its code.

> +static void felix_release_ports(struct ocelot *ocelot)
> +{
> + struct ocelot_port *ocelot_port;
> + struct phy_device *phydev;
> + struct device_node *dn;
> + int i;
> +
> + for (i = 0; i < ocelot->num_phys_ports; i++) {
> + ocelot_port = ocelot->ports[i];
> + if (!ocelot_port || !ocelot_port->phy || !ocelot_port->dev)
> + continue;

Phys are often optional, e.g. an RGMII interface to another switch, or
an SFP port.

> +
> + phydev = ocelot_port->phy;
> + unregister_netdev(ocelot_port->dev);
> + free_netdev(ocelot_port

Re: [PATCH] structleak: disable BYREF_ALL in combination with KASAN_STACK

2019-06-22 Thread Kees Cook
On Fri, Jun 21, 2019 at 03:50:02PM +0200, Ard Biesheuvel wrote:
> On Fri, 21 Jun 2019 at 15:44, Arnd Bergmann  wrote:
> > One pattern I have seen here is temporary variables from macros or
> > inline functions whose lifetime now extends over the entire function
> > rather than just the basic block in which they are defined, see e.g.
> > lpfc_debug_dump_qe() being inlined multiple times into
> > lpfc_debug_dump_all_queues(). Each instance of the local
> > "char line_buf[LPFC_LBUF_SZ];" seems to add on to the previous
> > one now, where the behavior without the structleak plugin is that
> > they don't.

Ewww.

> Right, that seems to be due to the fact that this code
> 
> /* split the first bb where we can put the forced initializers */
> gcc_assert(single_succ_p(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
> bb = single_succ(ENTRY_BLOCK_PTR_FOR_FN(cfun));
> if (!single_pred_p(bb)) {
> split_edge(single_succ_edge(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
> gcc_assert(single_succ_p(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
> }
> 
> puts all the initializers at the beginning of the function rather than
> inside the scope of the definition.

Do you see a sane way to improve this? I hadn't noticed that this
actually moved it up to the start of the function. :(

-- 
Kees Cook


Re: [PATCH 08/16] nfsd: escape high characters in binary data

2019-06-22 Thread Kees Cook
On Sat, Jun 22, 2019 at 03:00:58PM -0400, J. Bruce Fields wrote:
> The logic around ESCAPE_NP and the "only" string is really confusing.  I
> started assuming I could just add an ESCAPE_NONASCII flag and stick "
> and \ into the "only" string, but it doesn't work that way.

Yeah, if ESCAPE_NP isn't specified, the "only" characters are passed
through. It'd be nice to have an "add" or a clearer way to do actual
ctype subsets, etc. If there isn't an obviously clear way to refactor
it, just skip it for now and I'm happy to ack your original patch. :)


-- 
Kees Cook


[PATCH V1] scsi: ufs-bsg: complete ufs-bsg job only if no error

2019-06-22 Thread Bean Huo (beanhuo)
From: Bean Huo 

In the case of UPIU/DME request execution failed in UFS device, 
ufs_bsg_request() will complete this failed bsg job by calling
bsg_job_done(). Meanwhile, it returns this error status to blk-mq
layer, then trigger blk-mq complete this request again, this will
cause below panic.

[   68.673050] Call trace:
[   68.675491]  __ll_sc___cmpxchg_case_acq_32+0x4/0x20
[   68.680369]  complete+0x28/0x70
[   68.683510]  blk_end_sync_rq+0x24/0x30
[   68.687255]  blk_mq_end_request+0xb8/0x118
[   68.691350]  bsg_job_put+0x4c/0x58
[   68.694747]  bsg_complete+0x20/0x30
[   68.698231]  blk_done_softirq+0xb4/0xe8
[   68.702066]  __do_softirq+0x154/0x3f0
[   68.705726]  run_ksoftirqd+0x4c/0x68
[   68.709298]  smpboot_thread_fn+0x22c/0x268
[   68.713394]  kthread+0x130/0x138
[   68.716619]  ret_from_fork+0x10/0x1c
[   68.720193] Code: f84107fe d65f03c0 d503201f f9800011 (885ffc10) 
[   68.726298] ---[ end trace d92825bff6326e66 ]---
[   68.730913] Kernel panic - not syncing: Fatal exception in interrupt

This patch is to fix this issue. The solution is we complete
the ufs-bsg job only if no error happened.

Signed-off-by: Bean Huo 
---
 drivers/scsi/ufs/ufs_bsg.c | 7 ---
 drivers/scsi/ufs/ufshcd.c  | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c
index 869e71f..d5516dc 100644
--- a/drivers/scsi/ufs/ufs_bsg.c
+++ b/drivers/scsi/ufs/ufs_bsg.c
@@ -122,7 +122,7 @@ static int ufs_bsg_request(struct bsg_job *job)
memcpy(&uc, &bsg_request->upiu_req.uc, UIC_CMD_SIZE);
ret = ufshcd_send_uic_cmd(hba, &uc);
if (ret)
-   dev_dbg(hba->dev,
+   dev_err(hba->dev,
"send uic cmd: error code %d\n", ret);
 
memcpy(&bsg_reply->upiu_rsp.uc, &uc, UIC_CMD_SIZE);
@@ -143,13 +143,14 @@ static int ufs_bsg_request(struct bsg_job *job)
sg_copy_from_buffer(job->request_payload.sg_list,
job->request_payload.sg_cnt,
desc_buff, desc_len);
-
kfree(desc_buff);
 
 out:
bsg_reply->result = ret;
job->reply_len = sizeof(struct ufs_bsg_reply);
-   bsg_job_done(job, ret, bsg_reply->reply_payload_rcv_len);
+   /* complete the job here only if no error */
+   if (ret == 0)
+   bsg_job_done(job, ret, bsg_reply->reply_payload_rcv_len);
 
return ret;
 }
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 04d3686..4718041 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3776,7 +3776,7 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, 
struct uic_command *cmd)
 }
 
 /**
- * ufshcd_uic_change_pwr_mode - Perform the UIC power mode chage
+ * ufshcd_uic_change_pwr_mode - Perform the UIC power mode change
  * using DME_SET primitives.
  * @hba: per adapter instance
  * @mode: powr mode value
-- 
2.7.4


[PATCH v2] lkdtm: Check for SMEP clearing protections

2019-06-22 Thread Kees Cook
This adds an x86-specific test for pinned cr4 bits. A successful test
will validate pinning and check the ROP-style call-middle-of-function
defense, if needed. For example, in the case of native_write_cr4()
looking like this:

8171bce0 :
8171bce0:   48 8b 35 79 46 f2 00mov0xf24679(%rip),%rsi
8171bce7:   48 09 f7or %rsi,%rdi
8171bcea:   0f 22 e7mov%rdi,%cr4
...
8171bd5a:   c3  retq

The UNSET_SMEP test will jump to 8171bcea (the mov to cr4)
instead of 8171bce0 (native_write_cr4() entry) to simulate a
direct-call bypass attempt.

Expected successful results:

  # echo UNSET_SMEP > /sys/kernel/debug/provoke-crash/DIRECT
  # dmesg
  [   79.594433] lkdtm: Performing direct entry UNSET_SMEP
  [   79.596459] lkdtm: trying to clear SMEP normally
  [   79.598406] lkdtm: ok: SMEP did not get cleared
  [   79.599981] lkdtm: trying to clear SMEP with call gadget
  [   79.601810] [ cut here ]
  [   79.603421] Attempt to unpin cr4 bits: 10; bypass attack?!
  ...
  [   79.650170] ---[ end trace 2452ca0f6126242e ]---
  [   79.650937] lkdtm: ok: SMEP removal was reverted

Signed-off-by: Kees Cook 
---
v2:
- rebase to linux-next
---
 drivers/misc/lkdtm/bugs.c  | 66 ++
 drivers/misc/lkdtm/core.c  |  1 +
 drivers/misc/lkdtm/lkdtm.h |  1 +
 3 files changed, 68 insertions(+)

diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index d9fcfd3b5af0..99a2dce1625b 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -266,3 +266,69 @@ void lkdtm_STACK_GUARD_PAGE_TRAILING(void)
 
pr_err("FAIL: accessed page after stack!\n");
 }
+
+void lkdtm_UNSET_SMEP(void)
+{
+#ifdef CONFIG_X86_64
+#define MOV_CR4_DEPTH  64
+   void (*direct_write_cr4)(unsigned long val);
+   unsigned char *insn;
+   unsigned long cr4;
+   int i;
+
+   cr4 = native_read_cr4();
+
+   if ((cr4 & X86_CR4_SMEP) != X86_CR4_SMEP) {
+   pr_err("FAIL: SMEP not in use\n");
+   return;
+   }
+   cr4 &= ~(X86_CR4_SMEP);
+
+   pr_info("trying to clear SMEP normally\n");
+   native_write_cr4(cr4);
+   if (cr4 == native_read_cr4()) {
+   pr_err("FAIL: pinning SMEP failed!\n");
+   cr4 |= X86_CR4_SMEP;
+   pr_info("restoring SMEP\n");
+   native_write_cr4(cr4);
+   return;
+   }
+   pr_info("ok: SMEP did not get cleared\n");
+
+   /*
+* To test the post-write pinning verification we need to call
+* directly into the middle of native_write_cr4() where the
+* cr4 write happens, skipping any pinning. This searches for
+* the cr4 writing instruction.
+*/
+   insn = (unsigned char *)native_write_cr4;
+   for (i = 0; i < MOV_CR4_DEPTH; i++) {
+   /* mov %rdi, %cr4 */
+   if (insn[i] == 0x0f && insn[i+1] == 0x22 && insn[i+2] == 0xe7)
+   break;
+   /* mov %rdi,%rax; mov %rax, %cr4 */
+   if (insn[i]   == 0x48 && insn[i+1] == 0x89 &&
+   insn[i+2] == 0xf8 && insn[i+3] == 0x0f &&
+   insn[i+4] == 0x22 && insn[i+5] == 0xe0)
+   break;
+   }
+   if (i >= MOV_CR4_DEPTH) {
+   pr_info("ok: cannot locate cr4 writing call gadget\n");
+   return;
+   }
+   direct_write_cr4 = (void *)(insn + i);
+
+   pr_info("trying to clear SMEP with call gadget\n");
+   direct_write_cr4(cr4);
+   if (native_read_cr4() & X86_CR4_SMEP) {
+   pr_info("ok: SMEP removal was reverted\n");
+   } else {
+   pr_err("FAIL: cleared SMEP not detected!\n");
+   cr4 |= X86_CR4_SMEP;
+   pr_info("restoring SMEP\n");
+   native_write_cr4(cr4);
+   }
+#else
+   pr_err("FAIL: this test is x86_64-only\n");
+#endif
+}
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 945f2059d423..52e99ab9cedc 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -114,6 +114,7 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(CORRUPT_USER_DS),
CRASHTYPE(STACK_GUARD_PAGE_LEADING),
CRASHTYPE(STACK_GUARD_PAGE_TRAILING),
+   CRASHTYPE(UNSET_SMEP),
CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
CRASHTYPE(OVERWRITE_ALLOCATION),
CRASHTYPE(WRITE_AFTER_FREE),
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index c5ae0b37587d..6a284a87a037 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -26,6 +26,7 @@ void lkdtm_CORRUPT_LIST_DEL(void);
 void lkdtm_CORRUPT_USER_DS(void);
 void lkdtm_STACK_GUARD_PAGE_LEADING(void);
 void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
+void lkdtm_UNSET_SMEP(void);
 
 /* lkdtm_heap.c */
 void __init lkdtm_heap_init(void);
-- 

Re: BUG: unable to handle kernel paging request in cpuacct_account_field

2019-06-22 Thread Thomas Gleixner
On Fri, 21 Jun 2019, syzbot wrote:

Cc+: 

> Hello,
> 
> syzbot found the following crash on:
> 
> HEAD commit:abf02e29 Merge tag 'pm-5.2-rc6' of git://git.kernel.org/pu..
> git tree:   upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=164d94f6a0
> kernel config:  https://syzkaller.appspot.com/x/.config?x=e5c77f8090a3b96b
> dashboard link: https://syzkaller.appspot.com/bug?extid=a952f743523593b39174
> compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
> syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=1372abc6a0
> 
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+a952f743523593b39...@syzkaller.appspotmail.com
> 
> BUG: unable to handle page fault for address: de202771d9b9
> #PF: supervisor read access in kernel mode
> #PF: error_code(0x) - not-present page
> PGD 0 P4D 0
> Thread overran stack, or stack corrupted
> Oops:  [#1] PREEMPT SMP KASAN
> CPU: 1 PID: 8777 Comm: syz-executor.5 Not tainted 5.2.0-rc5+ #38
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google
> 01/01/2011
> RIP: 0010:cpuacct_account_field+0x12b/0x2f0 kernel/sched/cpuacct.c:366
> Code: c5 60 02 75 88 48 89 fa 48 c1 ea 03 42 80 3c 32 00 0f 85 81 01 00 00 48
> 03 1c c5 60 02 75 88 4a 8d 3c 3b 48 89 f8 48 c1 e8 03 <42> 80 3c 30 00 0f 85
> 89 01 00 00 4e 01 2c 3b 49 8d bc 24 28 01 00
> RSP: 0018:8880ae909c78 EFLAGS: 00010802
> RAX: 1fffe2202771d9b9 RBX: 11013b8ecdb8 RCX: 83358d3e
> RDX: 110ea04d RSI: 83358d4c RDI: 11013b8ecdc8
> RBP: 8880ae909ca8 R08: 888090866500 R09: 0001
> R10: fbfff1141b45 R11: 888090866500 R12: 88808cfecc80
> R13: 00966a20 R14: dc00 R15: 0010
> FS:  55ff5940() GS:8880ae90() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: de202771d9b9 CR3: 8533e000 CR4: 001406e0
> Call Trace:
> 
> cgroup_account_cputime_field include/linux/cgroup.h:789 [inline]
> task_group_account_field kernel/sched/cputime.c:109 [inline]
> account_system_index_time+0x11d/0x390 kernel/sched/cputime.c:172
> irqtime_account_process_tick.isra.0+0x386/0x490 kernel/sched/cputime.c:389
> account_process_tick+0x27f/0x350 kernel/sched/cputime.c:484
> update_process_times+0x25/0x80 kernel/time/timer.c:1637
> tick_sched_handle+0xa2/0x190 kernel/time/tick-sched.c:167
> tick_sched_timer+0x47/0x130 kernel/time/tick-sched.c:1298
> __run_hrtimer kernel/time/hrtimer.c:1389 [inline]
> __hrtimer_run_queues+0x33b/0xdd0 kernel/time/hrtimer.c:1451
> hrtimer_interrupt+0x314/0x770 kernel/time/hrtimer.c:1509
> local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1041 [inline]
> smp_apic_timer_interrupt+0x111/0x550 arch/x86/kernel/apic/apic.c:1066
> apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:806
> 
> Modules linked in:
> CR2: de202771d9b9
> ---[ end trace 4a0799d29c250606 ]---
> RIP: 0010:cpuacct_account_field+0x12b/0x2f0 kernel/sched/cpuacct.c:366
> Code: c5 60 02 75 88 48 89 fa 48 c1 ea 03 42 80 3c 32 00 0f 85 81 01 00 00 48
> 03 1c c5 60 02 75 88 4a 8d 3c 3b 48 89 f8 48 c1 e8 03 <42> 80 3c 30 00 0f 85
> 89 01 00 00 4e 01 2c 3b 49 8d bc 24 28 01 00
> RSP: 0018:8880ae909c78 EFLAGS: 00010802
> RAX: 1fffe2202771d9b9 RBX: 11013b8ecdb8 RCX: 83358d3e
> RDX: 110ea04d RSI: 83358d4c RDI: 11013b8ecdc8
> RBP: 8880ae909ca8 R08: 888090866500 R09: 0001
> R10: fbfff1141b45 R11: 888090866500 R12: 88808cfecc80
> R13: 00966a20 R14: dc00 R15: 0010
> FS:  55ff5940() GS:8880ae90() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: de202771d9b9 CR3: 8533e000 CR4: 001406e0
> 
> 
> ---
> This bug is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at syzkal...@googlegroups.com.
> 
> syzbot will keep track of this bug report. See:
> https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
> syzbot can test patches for this bug, for details see:
> https://goo.gl/tpsmEJ#testing-patches


[tip:timers/core] posix-timers: Use spin_lock_irq() in itimer_delete()

2019-06-22 Thread tip-bot for Sebastian Andrzej Siewior
Commit-ID:  7586addb99322faf4d096fc8beb140f879409212
Gitweb: https://git.kernel.org/tip/7586addb99322faf4d096fc8beb140f879409212
Author: Sebastian Andrzej Siewior 
AuthorDate: Fri, 21 Jun 2019 16:36:43 +0200
Committer:  Thomas Gleixner 
CommitDate: Sat, 22 Jun 2019 12:14:22 +0200

posix-timers: Use spin_lock_irq() in itimer_delete()

itimer_delete() uses spin_lock_irqsave() to obtain a `flags' variable
which can then be passed to unlock_timer(). It uses already spin_lock
locking for the structure instead of lock_timer() because it has a timer
which can not be removed by others at this point. The cleanup is always
performed with enabled interrupts.

Use spin_lock_irq() / spin_unlock_irq() so the `flags' variable can be
removed.

Signed-off-by: Sebastian Andrzej Siewior 
Signed-off-by: Thomas Gleixner 
Link: https://lkml.kernel.org/r/20190621143643.25649-3-bige...@linutronix.de

---
 kernel/time/posix-timers.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index caa63e58e3d8..d7f2d91acdac 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -980,18 +980,16 @@ retry_delete:
  */
 static void itimer_delete(struct k_itimer *timer)
 {
-   unsigned long flags;
-
 retry_delete:
-   spin_lock_irqsave(&timer->it_lock, flags);
+   spin_lock_irq(&timer->it_lock);
 
if (timer_delete_hook(timer) == TIMER_RETRY) {
-   unlock_timer(timer, flags);
+   spin_unlock_irq(&timer->it_lock);
goto retry_delete;
}
list_del(&timer->list);
 
-   unlock_timer(timer, flags);
+   spin_unlock_irq(&timer->it_lock);
release_posix_timer(timer, IT_ID_SET);
 }
 


[tip:timers/core] posix-timers: Remove "it_signal = NULL" assignment in itimer_delete()

2019-06-22 Thread tip-bot for Sebastian Andrzej Siewior
Commit-ID:  12063d431078be73d11cb5e48a17c6db5f0d8254
Gitweb: https://git.kernel.org/tip/12063d431078be73d11cb5e48a17c6db5f0d8254
Author: Sebastian Andrzej Siewior 
AuthorDate: Fri, 21 Jun 2019 16:36:42 +0200
Committer:  Thomas Gleixner 
CommitDate: Sat, 22 Jun 2019 12:14:22 +0200

posix-timers: Remove "it_signal = NULL" assignment in itimer_delete()

itimer_delete() is invoked during do_exit(). At this point it is the
last thread in the group dying and doing the clean up.
Since it is the last thread in the group, there can not be any other
task attempting to lock the itimer which means the NULL assignment (which
avoids lookups in __lock_timer()) is not required.

The assignment and comment was copied in commit 0e568881178ff ("[PATCH]
fix posix-timers to have proper per-process scope") from
sys_timer_delete() which was/is the syscall interface and requires the
assignment.

Remove the superfluous ->it_signal = NULL assignment.

Signed-off-by: Sebastian Andrzej Siewior 
Signed-off-by: Thomas Gleixner 
Link: https://lkml.kernel.org/r/20190621143643.25649-2-bige...@linutronix.de

---
 kernel/time/posix-timers.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 29176635991f..caa63e58e3d8 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -990,11 +990,6 @@ retry_delete:
goto retry_delete;
}
list_del(&timer->list);
-   /*
-* This keeps any tasks waiting on the spin lock from thinking
-* they got something (see the lock code above).
-*/
-   timer->it_signal = NULL;
 
unlock_timer(timer, flags);
release_posix_timer(timer, IT_ID_SET);


[tip:timers/core] timekeeping: Add missing _ns functions for coarse accessors

2019-06-22 Thread tip-bot for Jason A. Donenfeld
Commit-ID:  4c54294d01e605a9f992361b924c5d8b12822a6d
Gitweb: https://git.kernel.org/tip/4c54294d01e605a9f992361b924c5d8b12822a6d
Author: Jason A. Donenfeld 
AuthorDate: Fri, 21 Jun 2019 22:32:49 +0200
Committer:  Thomas Gleixner 
CommitDate: Sat, 22 Jun 2019 12:11:28 +0200

timekeeping: Add missing _ns functions for coarse accessors

This further unifies the accessors for the fast and coarse functions, so
that the same types of functions are available for each. There was also
a bit of confusion with the documentation, which prior advertised a
function that has never existed. Finally, the vanilla ktime_get_coarse()
was omitted from the API originally, so this fills this oversight.

Signed-off-by: Jason A. Donenfeld 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Arnd Bergmann 
Link: https://lkml.kernel.org/r/20190621203249.3909-3-ja...@zx2c4.com

---
 Documentation/core-api/timekeeping.rst | 10 +++---
 include/linux/timekeeping.h| 28 
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/Documentation/core-api/timekeeping.rst 
b/Documentation/core-api/timekeeping.rst
index 4d92b1ac8024..15fc58e85ef9 100644
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@@ -99,16 +99,20 @@ Coarse and fast_ns access
 
 Some additional variants exist for more specialized cases:
 
-.. c:function:: ktime_t ktime_get_coarse_boottime( void )
+.. c:function:: ktime_t ktime_get_coarse( void )
+   ktime_t ktime_get_coarse_boottime( void )
ktime_t ktime_get_coarse_real( void )
ktime_t ktime_get_coarse_clocktai( void )
-   ktime_t ktime_get_coarse_raw( void )
+
+.. c:function:: u64 ktime_get_coarse_ns( void )
+   u64 ktime_get_coarse_boot_ns( void )
+   u64 ktime_get_coarse_real_ns( void )
+   u64 ktime_get_coarse_clocktai_ns( void )
 
 .. c:function:: void ktime_get_coarse_ts64( struct timespec64 * )
void ktime_get_coarse_boottime_ts64( struct timespec64 * )
void ktime_get_coarse_real_ts64( struct timespec64 * )
void ktime_get_coarse_clocktai_ts64( struct timespec64 * )
-   void ktime_get_coarse_raw_ts64( struct timespec64 * )
 
These are quicker than the non-coarse versions, but less accurate,
corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index fd6123722ea8..dcffc00755f2 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -113,6 +113,34 @@ static inline ktime_t ktime_get_coarse_clocktai(void)
return ktime_get_coarse_with_offset(TK_OFFS_TAI);
 }
 
+static inline ktime_t ktime_get_coarse(void)
+{
+   struct timespec64 ts;
+
+   ktime_get_coarse_ts64(&ts);
+   return timespec64_to_ktime(ts);
+}
+
+static inline u64 ktime_get_coarse_ns(void)
+{
+   return ktime_to_ns(ktime_get_coarse());
+}
+
+static inline u64 ktime_get_coarse_real_ns(void)
+{
+   return ktime_to_ns(ktime_get_coarse_real());
+}
+
+static inline u64 ktime_get_coarse_boot_ns(void)
+{
+   return ktime_to_ns(ktime_get_coarse_boottime());
+}
+
+static inline u64 ktime_get_coarse_clocktai_ns(void)
+{
+   return ktime_to_ns(ktime_get_coarse_clocktai());
+}
+
 /**
  * ktime_mono_to_real - Convert monotonic time to clock realtime
  */


[tip:timers/core] timekeeping: Use proper clock specifier names in functions

2019-06-22 Thread tip-bot for Jason A. Donenfeld
Commit-ID:  9285ec4c8b61d4930a575081abeba2cd4f449a74
Gitweb: https://git.kernel.org/tip/9285ec4c8b61d4930a575081abeba2cd4f449a74
Author: Jason A. Donenfeld 
AuthorDate: Fri, 21 Jun 2019 22:32:48 +0200
Committer:  Thomas Gleixner 
CommitDate: Sat, 22 Jun 2019 12:11:27 +0200

timekeeping: Use proper clock specifier names in functions

This makes boot uniformly boottime and tai uniformly clocktai, to
address the remaining oversights.

Signed-off-by: Jason A. Donenfeld 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Arnd Bergmann 
Link: https://lkml.kernel.org/r/20190621203249.3909-2-ja...@zx2c4.com

---
 Documentation/core-api/timekeeping.rst |  2 +-
 arch/x86/kvm/pmu.c |  4 ++--
 arch/x86/kvm/x86.c | 12 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |  2 +-
 drivers/iio/humidity/dht11.c   |  8 
 drivers/iio/industrialio-core.c|  4 ++--
 drivers/infiniband/hw/mlx4/alias_GUID.c|  6 +++---
 drivers/leds/trigger/ledtrig-activity.c|  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c |  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/rx.c|  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c  |  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/utils.c |  2 +-
 drivers/net/wireless/mac80211_hwsim.c  |  2 +-
 drivers/net/wireless/ti/wlcore/main.c  |  2 +-
 drivers/net/wireless/ti/wlcore/rx.c|  2 +-
 drivers/net/wireless/ti/wlcore/tx.c|  2 +-
 drivers/net/wireless/virt_wifi.c   |  2 +-
 include/linux/timekeeping.h|  4 ++--
 include/net/cfg80211.h |  2 +-
 kernel/bpf/syscall.c   |  2 +-
 kernel/events/core.c   |  4 ++--
 kernel/fork.c  |  2 +-
 22 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/Documentation/core-api/timekeeping.rst 
b/Documentation/core-api/timekeeping.rst
index 93cbeb9daec0..4d92b1ac8024 100644
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@@ -65,7 +65,7 @@ different format depending on what is required by the user:
 .. c:function:: u64 ktime_get_ns( void )
u64 ktime_get_boottime_ns( void )
u64 ktime_get_real_ns( void )
-   u64 ktime_get_tai_ns( void )
+   u64 ktime_get_clocktai_ns( void )
u64 ktime_get_raw_ns( void )
 
Same as the plain ktime_get functions, but returning a u64 number
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index dd745b58ffd8..1aea628ef6b8 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -264,10 +264,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, 
unsigned idx, u64 *data)
ctr_val = rdtsc();
break;
case VMWARE_BACKDOOR_PMC_REAL_TIME:
-   ctr_val = ktime_get_boot_ns();
+   ctr_val = ktime_get_boottime_ns();
break;
case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
-   ctr_val = ktime_get_boot_ns() +
+   ctr_val = ktime_get_boottime_ns() +
vcpu->kvm->arch.kvmclock_offset;
break;
default:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 83aefd759846..81a0914a1ec1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1731,7 +1731,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data 
*msr)
 
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
-   ns = ktime_get_boot_ns();
+   ns = ktime_get_boottime_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
 
if (vcpu->arch.virtual_tsc_khz) {
@@ -2073,7 +2073,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
spin_lock(&ka->pvclock_gtod_sync_lock);
if (!ka->use_master_clock) {
spin_unlock(&ka->pvclock_gtod_sync_lock);
-   return ktime_get_boot_ns() + ka->kvmclock_offset;
+   return ktime_get_boottime_ns() + ka->kvmclock_offset;
}
 
hv_clock.tsc_timestamp = ka->master_cycle_now;
@@ -2089,7 +2089,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
   &hv_clock.tsc_to_system_mul);
ret = __pvclock_read_cycles(&hv_clock, rdtsc());
} else
-   ret = ktime_get_boot_ns() + ka->kvmclock_offset;
+   ret = ktime_get_boottime_ns() + ka->kvmclock_offset;
 
put_cpu();
 
@@ -2188,7 +2188,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
}
if (!use_master_clock) {
host_tsc = rdtsc();
-   kernel_ns = ktime_get_boot_ns();
+   kernel_ns = ktime_get_boot

[tip:timers/core] timekeeping: Use proper ktime_add when adding nsecs in coarse offset

2019-06-22 Thread tip-bot for Jason A. Donenfeld
Commit-ID:  0354c1a3cdf31f44b035cfad14d32282e815a572
Gitweb: https://git.kernel.org/tip/0354c1a3cdf31f44b035cfad14d32282e815a572
Author: Jason A. Donenfeld 
AuthorDate: Fri, 21 Jun 2019 22:32:47 +0200
Committer:  Thomas Gleixner 
CommitDate: Sat, 22 Jun 2019 12:11:27 +0200

timekeeping: Use proper ktime_add when adding nsecs in coarse offset

While this doesn't actually amount to a real difference, since the macro
evaluates to the same thing, every place else operates on ktime_t using
these functions, so let's not break the pattern.

Fixes: e3ff9c3678b4 ("timekeeping: Repair ktime_get_coarse*() granularity")
Signed-off-by: Jason A. Donenfeld 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Arnd Bergmann 
Link: https://lkml.kernel.org/r/20190621203249.3909-1-ja...@zx2c4.com

---
 kernel/time/timekeeping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 44b726bab4bd..d911c8470149 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -819,7 +819,7 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
 
} while (read_seqcount_retry(&tk_core.seq, seq));
 
-   return base + nsecs;
+   return ktime_add_ns(base, nsecs);
 }
 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
 


[PATCH v3] arm64: defconfig: Enable Panfrost and Lima drivers

2019-06-22 Thread Krzysztof Kozlowski
Enable support for Mali GPU with Panfrost and Lima drivers for:
1. Samsung Exynos5433 and Exynos7 (having Mali T760),
2. Allwiner A64 and H5 (Mali 400/450).

Signed-off-by: Krzysztof Kozlowski 

---

Changes since v1:
1. Enable Lima driver
---
 arch/arm64/configs/defconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index fbbc065415d4..3d31611368af 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -518,6 +518,8 @@ CONFIG_DRM_HISI_HIBMC=m
 CONFIG_DRM_HISI_KIRIN=m
 CONFIG_DRM_MESON=m
 CONFIG_DRM_PL111=m
+CONFIG_DRM_LIMA=m
+CONFIG_DRM_PANFROST=m
 CONFIG_FB=y
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_BACKLIGHT_GENERIC=m
@@ -718,7 +720,6 @@ CONFIG_ARCH_TEGRA_194_SOC=y
 CONFIG_ARCH_K3_AM6_SOC=y
 CONFIG_SOC_TI=y
 CONFIG_TI_SCI_PM_DOMAINS=y
-CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
 CONFIG_EXTCON_USB_GPIO=y
 CONFIG_EXTCON_USBC_CROS_EC=y
 CONFIG_MEMORY=y
-- 
2.17.1



Re: [RFC PATCH RT 3/4] rcu: unlock special: Treat irq and preempt disabled the same

2019-06-22 Thread Paul E. McKenney
On Fri, Jun 21, 2019 at 05:26:06PM -0700, Paul E. McKenney wrote:
> On Thu, Jun 20, 2019 at 06:08:19PM -0500, Scott Wood wrote:
> > On Thu, 2019-06-20 at 15:25 -0700, Paul E. McKenney wrote:
> > > On Thu, Jun 20, 2019 at 04:59:30PM -0500, Scott Wood wrote:
> > > > On Thu, 2019-06-20 at 14:10 -0700, Paul E. McKenney wrote:
> > > > > On Tue, Jun 18, 2019 at 08:19:07PM -0500, Scott Wood wrote:
> > > > > > [Note: Just before posting this I noticed that the invoke_rcu_core
> > > > > > stuff
> > > > > >  is part of the latest RCU pull request, and it has a patch that
> > > > > >  addresses this in a more complicated way that appears to deal with
> > > > > > the
> > > > > >  bare irq-disabled sequence as well.
> > > > > 
> > > > > Far easier to deal with it than to debug the lack of it.  ;-)
> > > > > 
> > > > > >  Assuming we need/want to support such sequences, is the
> > > > > >  invoke_rcu_core() call actually going to result in scheduling any
> > > > > >  sooner?  resched_curr() just does the same setting of need_resched
> > > > > >  when it's the same cpu.
> > > > > > ]
> > > > > 
> > > > > Yes, invoke_rcu_core() can in some cases invoke the scheduler sooner.
> > > > > Setting the CPU-local bits might not have effect until the next
> > > > > interrupt.
> > > > 
> > > > Maybe I'm missing something, but I don't see how (in the non-use_softirq
> > > > case).  It just calls wake_up_process(), which in resched_curr() will
> > > > set
> > > > need_resched but not do an IPI-to-self.
> > > 
> > > The common non-rt case will be use_softirq.  Or are you referring
> > > specifically to this block of code in current -rcu?
> > > 
> > >   } else if (exp && irqs_were_disabled && !use_softirq &&
> > >  !t->rcu_read_unlock_special.b.deferred_qs) {
> > >   // Safe to awaken and we get no help from enabling
> > >   // irqs, unlike bh/preempt.
> > >   invoke_rcu_core();
> > 
> > Yes, that one.  If that block is removed the else path should be sufficient,
> > now that an IPI-to-self has been added.
> 
> I will give it a try and let you know what happens.

How about the following?

Thanx, Paul



commit 2fd23b1b649bf7e0754fa1dfce01e945bc62f4af
Author: Paul E. McKenney 
Date:   Sat Jun 22 12:05:54 2019 -0700

rcu: Simplify rcu_read_unlock_special() deferred wakeups

In !use_softirq runs, we clearly cannot rely on raise_softirq() and
its lightweight bit setting, so we must instead do some form of wakeup.
In the absence of a self-IPI when interrupts are disabled, these wakeups
can be delayed until the next interrupt occurs.  This means that calling
invoke_rcu_core() doesn't actually do any expediting.

In this case, it is better to take the "else" clause, which sets the
current CPU's resched bits and, if there is an expedited grace period
in flight, uses IRQ-work to force the needed self-IPI.  This commit
therefore removes the "else if" clause that calls invoke_rcu_core().

Reported-by: Scott Wood 
Signed-off-by: Paul E. McKenney 

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 841060fce33c..c631413f457f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -629,17 +629,12 @@ static void rcu_read_unlock_special(struct task_struct *t)
// Using softirq, safe to awaken, and we get
// no help from enabling irqs, unlike bh/preempt.
raise_softirq_irqoff(RCU_SOFTIRQ);
-   } else if (exp && irqs_were_disabled && !use_softirq &&
-  !t->rcu_read_unlock_special.b.deferred_qs) {
-   // Safe to awaken and we get no help from enabling
-   // irqs, unlike bh/preempt.
-   invoke_rcu_core();
} else {
// Enabling BH or preempt does reschedule, so...
// Also if no expediting or NO_HZ_FULL, slow is OK.
set_tsk_need_resched(current);
set_preempt_need_resched();
-   if (IS_ENABLED(CONFIG_IRQ_WORK) &&
+   if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
!rdp->defer_qs_iw_pending && exp) {
// Get scheduler to re-evaluate and call hooks.
// If !IRQ_WORK, FQS scan will eventually IPI.



Re: [PATCH 08/16] nfsd: escape high characters in binary data

2019-06-22 Thread J. Bruce Fields
On Fri, Jun 21, 2019 at 03:26:00PM -0700, Kees Cook wrote:
> On Fri, Jun 21, 2019 at 01:45:44PM -0400, J. Bruce Fields wrote:
> > I'm not sure who to get review from for this kind of thing.
> > 
> > Kees, you seem to be one of the only people to touch string_helpers.c
> > at all recently, any ideas?
> 
> Hi! Yeah, I'm happy to take a look. Notes below...

Thanks!

> > On Thu, Jun 20, 2019 at 10:51:07AM -0400, J. Bruce Fields wrote:
> > > From: "J. Bruce Fields" 
> > > 
> > > I'm exposing some information about NFS clients in pseudofiles.  I
> > > expect to eventually have simple tools to help read those pseudofiles.
> > > 
> > > But it's also helpful if the raw files are human-readable to the extent
> > > possible.  It aids debugging and makes them usable on systems that don't
> > > have the latest nfs-utils.
> > > 
> > > A minor challenge there is opaque client-generated protocol objects like
> > > state owners and client identifiers.  Some clients generate those to
> > > include handy information in plain ascii.  But they may also include
> > > arbitrary byte sequences.
> > > 
> > > I think the simplest approach is to limit to isprint(c) && isascii(c)
> > > and escape everything else.
> 
> Can you get the same functionality out of sprintf's %pE (escaped
> string)? If not, maybe we should expand the flags available?

Nothing against it, I just didn't want it to do that for one user,
but...

> 
>  * - 'E[achnops]' For an escaped buffer, where rules are defined by
>  * combination
>  *of the following flags (see string_escape_mem() for
>  *the
>  *details):
>  *  a - ESCAPE_ANY
>  *  c - ESCAPE_SPECIAL
>  *  h - ESCAPE_HEX
>  *  n - ESCAPE_NULL
>  *  o - ESCAPE_OCTAL
>  *  p - ESCAPE_NP
>  *  s - ESCAPE_SPACE
>  *By default ESCAPE_ANY_NP is used.
> 
> This doesn't cover escaping >0x7f and " and \
> 
> And perhaps I should rework kstrdup_quotable() to have that flag? It's
> not currently escaping non-ascii and it probably should. Maybe
> "ESCAPE_QUOTABLE" as "q"?

... but if you think there's a lot of existing users that really want
this behavior, then great.

I'll look into that.

The logic around ESCAPE_NP and the "only" string is really confusing.  I
started assuming I could just add an ESCAPE_NONASCII flag and stick "
and \ into the "only" string, but it doesn't work that way.

---b.


Re: [PATCH v5.2-rc5] Bluetooth: Fix regression with minimum encryption key size alignment

2019-06-22 Thread Marcel Holtmann
Hi Sasha,

> [This is an automated email]
> 
> This commit has been processed because it contains a "Fixes:" tag,
> fixing commit: d5bb334a8e17 Bluetooth: Align minimum encryption key size for 
> LE and BR/EDR connections.
> 
> The bot has tested the following trees: v5.1.12, v4.19.53, v4.14.128, 
> v4.9.182, v4.4.182.
> 
> v5.1.12: Build failed! Errors:
>net/bluetooth/l2cap_core.c:1356:24: error: ‘HCI_MIN_ENC_KEY_SIZE’ 
> undeclared (first use in this function); did you mean ‘SMP_MIN_ENC_KEY_SIZE’?
> 
> v4.19.53: Build failed! Errors:
>net/bluetooth/l2cap_core.c:1355:24: error: ‘HCI_MIN_ENC_KEY_SIZE’ 
> undeclared (first use in this function); did you mean ‘SMP_MIN_ENC_KEY_SIZE’?
> 
> v4.14.128: Build failed! Errors:
>net/bluetooth/l2cap_core.c:1355:24: error: ‘HCI_MIN_ENC_KEY_SIZE’ 
> undeclared (first use in this function); did you mean ‘SMP_MIN_ENC_KEY_SIZE’?
> 
> v4.9.182: Build OK!
> v4.4.182: Build OK!
> 
> How should we proceed with this patch?

either you reapply commit d5bb334a8e17 first or I have to send a version that 
combines both into a single commit for easy applying.

Regards

Marcel



Re: [PATCH v2] riscv: dts: Add DT node for SiFive FU540 Ethernet controller driver

2019-06-22 Thread Troy Benjegerdes



> On Jun 21, 2019, at 2:14 PM, Atish Patra  wrote:
> 
> On Fri, 2019-06-21 at 16:23 +0530, Yash Shah wrote:
>> DT node for SiFive FU540-C000 GEMGXL Ethernet controller driver added
>> 
>> Signed-off-by: Yash Shah 
>> ---
>> arch/riscv/boot/dts/sifive/fu540-c000.dtsi  | 16
>> 
>> arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts |  9 +
>> 2 files changed, 25 insertions(+)
>> 
>> diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
>> b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
>> index 4e8fbde..c53b4ea 100644
>> --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
>> +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
>> @@ -225,5 +225,21 @@
>>  #address-cells = <1>;
>>  #size-cells = <0>;
>>  };
>> +eth0: ethernet@1009 {
>> +compatible = "sifive,fu540-macb";
>> +interrupt-parent = <&plic0>;
>> +interrupts = <53>;
>> +reg = <0x0 0x1009 0x0 0x2000
>> +   0x0 0x100a 0x0 0x1000>;
>> +reg-names = "control";
>> +status = "disabled";
>> +local-mac-address = [00 00 00 00 00 00];
>> +clock-names = "pclk", "hclk";
>> +clocks = <&prci PRCI_CLK_GEMGXLPLL>,
>> + <&prci PRCI_CLK_GEMGXLPLL>;
>> +#address-cells = <1>;
>> +#size-cells = <0>;
>> +};
>> +
>>  };
>> };
>> diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
>> b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
>> index 4da8870..d783bf2 100644
>> --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
>> +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
>> @@ -63,3 +63,12 @@
>>  disable-wp;
>>  };
>> };
>> +
>> +ð0 {
>> +status = "okay";
>> +phy-mode = "gmii";
>> +phy-handle = <&phy1>;
>> +phy1: ethernet-phy@0 {
>> +reg = <0>;
>> +};
>> +};
> 
> Thanks. I am able to boot Unleashed with networking enabled with this
> patch.
> 
> FWIW, 
> Tested-by: Atish Patra 
> 
> Regards,
> Atish

I am able to boot using a build from the dev/new-dts branch of
my freedom-u-sdk development tree [1] which has this patch
with an additional reset-gpios entry for the ethernet phy in the
DTS provided by the legacy U-boot[2].

Tested-by: Troy Benjegerdes 

[1] https://github.com/tmagik/freedom-u-sdk 
[2] 
https://github.com/sifive/HiFive_U-Boot/blob/081373fa3eb0ca79ba3f4a703e8e83a15135a6d1/arch/riscv/dts/hifive_u540.dts#L73

Re: [PATCH v5.2-rc5] Bluetooth: Fix regression with minimum encryption key size alignment

2019-06-22 Thread Sasha Levin
Hi,

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag,
fixing commit: d5bb334a8e17 Bluetooth: Align minimum encryption key size for LE 
and BR/EDR connections.

The bot has tested the following trees: v5.1.12, v4.19.53, v4.14.128, v4.9.182, 
v4.4.182.

v5.1.12: Build failed! Errors:
net/bluetooth/l2cap_core.c:1356:24: error: ‘HCI_MIN_ENC_KEY_SIZE’ 
undeclared (first use in this function); did you mean 
‘SMP_MIN_ENC_KEY_SIZE’?

v4.19.53: Build failed! Errors:
net/bluetooth/l2cap_core.c:1355:24: error: ‘HCI_MIN_ENC_KEY_SIZE’ 
undeclared (first use in this function); did you mean 
‘SMP_MIN_ENC_KEY_SIZE’?

v4.14.128: Build failed! Errors:
net/bluetooth/l2cap_core.c:1355:24: error: ‘HCI_MIN_ENC_KEY_SIZE’ 
undeclared (first use in this function); did you mean 
‘SMP_MIN_ENC_KEY_SIZE’?

v4.9.182: Build OK!
v4.4.182: Build OK!

How should we proceed with this patch?

--
Thanks,
Sasha


Re: [PATCH v5 02/25] mm: userfault: return VM_FAULT_RETRY on signals

2019-06-22 Thread Linus Torvalds
So I still think this all *may* ok, but at a minimum some of the
comments are misleading, and we need more docs on what happens with
normal signals.

I'm picking on just the first one I noticed, but I think there were
other architectures with this too:

On Wed, Jun 19, 2019 at 7:20 PM Peter Xu  wrote:
>
> diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
> index 6836095251ed..3517820aea07 100644
> --- a/arch/arc/mm/fault.c
> +++ b/arch/arc/mm/fault.c
> @@ -139,17 +139,14 @@ void do_page_fault(unsigned long address, struct 
> pt_regs *regs)
>  */
> fault = handle_mm_fault(vma, address, flags);
>
> -   if (fatal_signal_pending(current)) {
> -
> +   if (unlikely((fault & VM_FAULT_RETRY) && signal_pending(current))) {
> +   if (fatal_signal_pending(current) && !user_mode(regs))
> +   goto no_context;
> /*
>  * if fault retry, mmap_sem already relinquished by core mm
>  * so OK to return to user mode (with signal handled first)
>  */
> -   if (fault & VM_FAULT_RETRY) {
> -   if (!user_mode(regs))
> -   goto no_context;
> -   return;
> -   }
> +   return;
> }

So note how the end result of this is:

 (a) if a fatal signal is pending, and we're returning to kernel mode,
we do the exception handling

 (b) otherwise, if *any* signal is pending, we'll just return and
retry the page fault

I have nothing against (a), and (b) is likely also ok, but it's worth
noting that (b) happens for kernel returns too. But the comment talks
about returning to user mode.

Is it ok to return to kernel mode when signals are pending? The signal
won't be handled, and we'll just retry the access.

Will we possibly keep retrying forever? When we take the fault again,
we'll set the FAULT_FLAG_ALLOW_RETRY again, so any fault handler that
says "if it allows retry, and signals are pending, just return" would
keep never making any progress, and we'd be stuck taking page faults
in kernel mode forever.

So I think the x86 code sequence is the much safer and more correct
one, because it will actually retry once, and set FAULT_FLAG_TRIED
(and it will clear the "FAULT_FLAG_ALLOW_RETRY" flag - but you'll
remove that clearing later in the series).

> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 46df4c6aae46..dcd7c1393be3 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -1463,16 +1463,20 @@ void do_user_addr_fault(struct pt_regs *regs,
>  * that we made any progress. Handle this case first.
>  */
> if (unlikely(fault & VM_FAULT_RETRY)) {
> +   bool is_user = flags & FAULT_FLAG_USER;
> +
> /* Retry at most once */
> if (flags & FAULT_FLAG_ALLOW_RETRY) {
> flags &= ~FAULT_FLAG_ALLOW_RETRY;
> flags |= FAULT_FLAG_TRIED;
> +   if (is_user && signal_pending(tsk))
> +   return;
> if (!fatal_signal_pending(tsk))
> goto retry;
> }
>
> /* User mode? Just return to handle the fatal exception */
> -   if (flags & FAULT_FLAG_USER)
> +   if (is_user)
> return;
>
> /* Not returning to user mode? Handle exceptions or die: */

However, I think the real issue is that it just needs documentation
that a fault handler must not react to signal_pending() as part of the
fault handling itself (ie the VM_FAULT_RETRY can not be *because* of a
non-fatal signal), and there needs to be some guarantee of forward
progress.

At that point the "infinite page faults in kernel mode due to pending
signals" issue goes away. But it's not obvious in this patch, at
least.

   Linus


Re: d_lookup: Unable to handle kernel paging request

2019-06-22 Thread Vicente Bergas

Hi Al,
i think have a hint of what is going on.
With the last kernel built with your sentinels at hlist_bl_*lock
it is very easy to reproduce the issue.
In fact it is so unstable that i had to connect a serial port
in order to save the kernel trace.
Unfortunately all the traces are at different addresses and
your sentinel did not trigger.

Now i am writing this email from that same buggy kernel, which is
v5.2-rc5-224-gbed3c0d84e7e.

The difference is that I changed the bootloader.
Before was booting 5.1.12 and kexec into this one.
Now booting from u-boot into this one.
I will continue booting with u-boot for some time to be sure it is
stable and confirm this is the cause.

In case it is, who is the most probable offender?
the kernel before kexec or the kernel after?

The original report was sent to you because you appeared as the maintainer
of fs/dcache.c, which appeared on the trace. Should this be redirected
somewhere else now?

Regards,
 Vicenç.

On Wednesday, June 19, 2019 7:09:40 PM CEST, Al Viro wrote:

On Wed, Jun 19, 2019 at 06:51:51PM +0200, Vicente Bergas wrote:


What's your config, BTW?  SMP and DEBUG_SPINLOCK, specifically...


Hi Al,
here it is:
https://paste.debian.net/1088517


Aha...  So LIST_BL_LOCKMASK is 1 there (same as on distro builds)...

Hell knows - how about
static inline void hlist_bl_lock(struct hlist_bl_head *b)
{
BUG_ON(((u32)READ_ONCE(*b)&~LIST_BL_LOCKMASK) == 0x0100);
bit_spin_lock(0, (unsigned long *)b);
}

and

static inline void hlist_bl_unlock(struct hlist_bl_head *b)
{
__bit_spin_unlock(0, (unsigned long *)b);
BUG_ON(((u32)READ_ONCE(*b)&~LIST_BL_LOCKMASK) == 0x0100);
}

to see if we can narrow down where that happens?




Re: [tip:x86/cpu] x86/cpu: Create Zhaoxin processors architecture support file

2019-06-22 Thread Joe Perches
On Sat, 2019-06-22 at 03:16 -0700, tip-bot for Tony W Wang-oc wrote:
> Commit-ID:  761fdd5e3327db6c646a09bab5ad48cd42680cd2
> Gitweb: 
> https://git.kernel.org/tip/761fdd5e3327db6c646a09bab5ad48cd42680cd2
> Author: Tony W Wang-oc 
> AuthorDate: Tue, 18 Jun 2019 08:37:05 +
> Committer:  Thomas Gleixner 
> CommitDate: Sat, 22 Jun 2019 11:45:57 +0200
> 
> x86/cpu: Create Zhaoxin processors architecture support file
> 
[]
> diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
[]
> +static void init_zhaoxin_cap(struct cpuinfo_x86 *c)
> +{
> + u32  lo, hi;
> +
> + /* Test for Extended Feature Flags presence */
> + if (cpuid_eax(0xC000) >= 0xC001) {
> + u32 tmp = cpuid_edx(0xC001);
> +
> + /* Enable ACE unit, if present and disabled */
> + if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {

trivia:

Perhaps this is more intelligible for humans to read
and it deduplicates the comment as:

if ((tmp & ACE_PRESENT) && !(tmp & ACE_ENABLED))

The compiler produces the same object code.




Re: [PATCH V10 13/15] phy: tegra: Add PCIe PIPE2UPHY support

2019-06-22 Thread Vidya Sagar

On 6/21/2019 4:53 AM, Dmitry Osipenko wrote:

20.06.2019 10:24, Kishon Vijay Abraham I пишет:



On 12/06/19 3:23 PM, Vidya Sagar wrote:

Synopsys DesignWare core based PCIe controllers in Tegra 194 SoC interface
with Universal PHY (UPHY) module through a PIPE2UPHY (P2U) module.
For each PCIe lane of a controller, there is a P2U unit instantiated at
hardware level. This driver provides support for the programming required
for each P2U that is going to be used for a PCIe controller.


One minor comment below. With that fixed

Acked-by: Kishon Vijay Abraham I 


Signed-off-by: Vidya Sagar 
---
Changes since [v9]:
* Used _relaxed() versions of readl() & writel()

Changes since [v8]:
* Made it dependent on ARCH_TEGRA_194_SOC directly instead of ARCH_TEGRA

Changes since [v7]:
* Changed P2U driver file name from pcie-p2u-tegra194.c to phy-tegra194-p2u.c

Changes since [v6]:
* None

Changes since [v5]:
* Addressed review comments from Thierry

Changes since [v4]:
* None

Changes since [v3]:
* Rebased on top of linux-next top of the tree

Changes since [v2]:
* Replaced spaces with tabs in Kconfig file
* Sorted header file inclusion alphabetically

Changes since [v1]:
* Added COMPILE_TEST in Kconfig
* Removed empty phy_ops implementations
* Modified code according to DT documentation file modifications

  drivers/phy/tegra/Kconfig|   7 ++
  drivers/phy/tegra/Makefile   |   1 +
  drivers/phy/tegra/phy-tegra194-p2u.c | 120 +++
  3 files changed, 128 insertions(+)
  create mode 100644 drivers/phy/tegra/phy-tegra194-p2u.c

diff --git a/drivers/phy/tegra/Kconfig b/drivers/phy/tegra/Kconfig
index e516967d695b..f9817c3ae85f 100644
--- a/drivers/phy/tegra/Kconfig
+++ b/drivers/phy/tegra/Kconfig
@@ -7,3 +7,10 @@ config PHY_TEGRA_XUSB
  
  	  To compile this driver as a module, choose M here: the module will

  be called phy-tegra-xusb.
+
+config PHY_TEGRA194_P2U
+   tristate "NVIDIA Tegra194 PIPE2UPHY PHY driver"
+   depends on ARCH_TEGRA_194_SOC || COMPILE_TEST
+   select GENERIC_PHY
+   help
+ Enable this to support the P2U (PIPE to UPHY) that is part of Tegra 
19x SOCs.
diff --git a/drivers/phy/tegra/Makefile b/drivers/phy/tegra/Makefile
index 64ccaeacb631..320dd389f34d 100644
--- a/drivers/phy/tegra/Makefile
+++ b/drivers/phy/tegra/Makefile
@@ -6,3 +6,4 @@ phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_124_SOC) += xusb-tegra124.o
  phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_132_SOC) += xusb-tegra124.o
  phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_210_SOC) += xusb-tegra210.o
  phy-tegra-xusb-$(CONFIG_ARCH_TEGRA_186_SOC) += xusb-tegra186.o
+obj-$(CONFIG_PHY_TEGRA194_P2U) += phy-tegra194-p2u.o
diff --git a/drivers/phy/tegra/phy-tegra194-p2u.c 
b/drivers/phy/tegra/phy-tegra194-p2u.c
new file mode 100644
index ..df009abd2482
--- /dev/null
+++ b/drivers/phy/tegra/phy-tegra194-p2u.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * P2U (PIPE to UPHY) driver for Tegra T194 SoC
+ *
+ * Copyright (C) 2019 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define P2U_PERIODIC_EQ_CTRL_GEN3  0xc0
+#define P2U_PERIODIC_EQ_CTRL_GEN3_PERIODIC_EQ_EN   BIT(0)
+#define P2U_PERIODIC_EQ_CTRL_GEN3_INIT_PRESET_EQ_TRAIN_EN  BIT(1)
+#define P2U_PERIODIC_EQ_CTRL_GEN4  0xc4
+#define P2U_PERIODIC_EQ_CTRL_GEN4_INIT_PRESET_EQ_TRAIN_EN  BIT(1)
+
+#define P2U_RX_DEBOUNCE_TIME   0xa4
+#define P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_MASK   0x
+#define P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_VAL160
+
+struct tegra_p2u {
+   void __iomem *base;
+};
+
+static inline void p2u_writel(struct tegra_p2u *phy, const u32 value,
+ const u32 reg)
+{
+   writel_relaxed(value, phy->base + reg);
+}
+
+static inline u32 p2u_readl(struct tegra_p2u *phy, const u32 reg)
+{
+   return readl_relaxed(phy->base + reg);
+}
+
+static int tegra_p2u_power_on(struct phy *x)
+{
+   struct tegra_p2u *phy = phy_get_drvdata(x);
+   u32 val;
+
+   val = p2u_readl(phy, P2U_PERIODIC_EQ_CTRL_GEN3);
+   val &= ~P2U_PERIODIC_EQ_CTRL_GEN3_PERIODIC_EQ_EN;
+   val |= P2U_PERIODIC_EQ_CTRL_GEN3_INIT_PRESET_EQ_TRAIN_EN;
+   p2u_writel(phy, val, P2U_PERIODIC_EQ_CTRL_GEN3);
+
+   val = p2u_readl(phy, P2U_PERIODIC_EQ_CTRL_GEN4);
+   val |= P2U_PERIODIC_EQ_CTRL_GEN4_INIT_PRESET_EQ_TRAIN_EN;
+   p2u_writel(phy, val, P2U_PERIODIC_EQ_CTRL_GEN4);
+
+   val = p2u_readl(phy, P2U_RX_DEBOUNCE_TIME);
+   val &= ~P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_MASK;
+   val |= P2U_RX_DEBOUNCE_TIME_DEBOUNCE_TIMER_VAL;
+   p2u_writel(phy, val, P2U_RX_DEBOUNCE_TIME);
+
+   return 0;
+}
+
+static const struct phy_ops ops = {
+   .power_on = tegra_p2u_power_on,
+   .owner = THIS_MODULE,
+};
+
+static int tegra_p2u_probe(struct platform_device *pdev)
+{
+   struct phy_provider *phy_provider;
+   struct d

Re: [PATCH] ARM: dts: rockchip: add ethernet phy node for tinker board

2019-06-22 Thread Andrew Lunn
On Sat, Jun 22, 2019 at 11:50:10PM +0900, Katsuhiro Suzuki wrote:
> Hello,

Hi Katsuhiro

Please also report this to netdev, and the stmmac maintainers.

./scripts/get_maintainer.pl -f drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
Giuseppe Cavallaro  (supporter:STMMAC ETHERNET DRIVER)
Alexandre Torgue  (supporter:STMMAC ETHERNET DRIVER)
Jose Abreu  (supporter:STMMAC ETHERNET DRIVER)
"David S. Miller"  (odd fixer:NETWORKING DRIVERS)
Maxime Coquelin  (maintainer:ARM/STM32 ARCHITECTURE)
net...@vger.kernel.org (open list:STMMAC ETHERNET DRIVER)
linux-st...@st-md-mailman.stormreply.com (moderated list:ARM/STM32 ARCHITECTURE)
linux-arm-ker...@lists.infradead.org (moderated list:ARM/STM32 ARCHITECTURE)
linux-kernel@vger.kernel.org (open list)

> I have not bisect commit of root cause yet... Is it better to bisect
> and find problem instead of sending this patch?

My guess is that it is one of these three which broken it:

74371272f97f net: stmmac: Convert to phylink and remove phylib logic
eeef2f6b9f6e net: stmmac: Start adding phylink support
9ad372fc5aaf net: stmmac: Prepare to convert to phylink

 Andrew


Q: incorrect llist_empty() call in flush_smp_call_function_queue ?

2019-06-22 Thread Luigi Rizzo
flush_smp_call_function_queue() starts with the code below.
My impression is that the !llist_empty(head) term below is wrong
and should be replaced by !entry

llist_del_all(head) is  xchg(&head->first, NULL) so it leaves the list empty,
the only chance that !llist_empty(head) is true is when a new element is
added between llist_del_add and the test of the condition, whereas judging
from the comment the intent seems to be that the warning should be printed
also when there are previous elements.

static void flush_smp_call_function_queue(bool warn_cpu_offline)
{
...
head = this_cpu_ptr(&call_single_queue);
entry = llist_del_all(head);
entry = llist_reverse_order(entry);

/* There shouldn't be any pending callbacks on an offline CPU. */
if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
 !warned && !llist_empty(head))) {
warned = true;
WARN(1, "IPI on offline CPU %d\n", smp_processor_id());

--

cheers
luigi


RE: [PATCH V7 00/11] misc: xilinx sd-fec drive

2019-06-22 Thread Dragan Cvetic



> -Original Message-
> From: Greg KH [mailto:gre...@linuxfoundation.org]
> Sent: Saturday 22 June 2019 07:02
> To: Dragan Cvetic 
> Cc: a...@arndb.de; Michal Simek ; 
> linux-arm-ker...@lists.infradead.org; robh...@kernel.org;
> mark.rutl...@arm.com; devicet...@vger.kernel.org; 
> linux-kernel@vger.kernel.org; Derek Kiernan 
> Subject: Re: [PATCH V7 00/11] misc: xilinx sd-fec drive
> 
> On Fri, Jun 21, 2019 at 05:49:45PM +, Dragan Cvetic wrote:
> >
> >
> > > -Original Message-
> > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > Sent: Friday 21 June 2019 15:16
> > > To: Dragan Cvetic 
> > > Cc: a...@arndb.de; Michal Simek ; 
> > > linux-arm-ker...@lists.infradead.org; robh...@kernel.org;
> > > mark.rutl...@arm.com; devicet...@vger.kernel.org; 
> > > linux-kernel@vger.kernel.org; Derek Kiernan 
> > > Subject: Re: [PATCH V7 00/11] misc: xilinx sd-fec drive
> > >
> > > On Tue, Jun 11, 2019 at 06:29:34PM +0100, Dragan Cvetic wrote:
> > > > This patchset is adding the full Soft Decision Forward Error
> > > > Correction (SD-FEC) driver implementation, driver DT binding and
> > > > driver documentation.
> > > >
> > > > Forward Error Correction (FEC) codes such as Low Density Parity
> > > > Check (LDPC) and turbo codes provide a means to control errors in
> > > > data transmissions over unreliable or noisy communication
> > > > channels. The SD-FEC Integrated Block is an optimized block for
> > > > soft-decision decoding of these codes. Fixed turbo codes are
> > > > supported directly, whereas custom and standardized LDPC codes
> > > > are supported through the ability to specify the parity check
> > > > matrix through an AXI4-Lite bus or using the optional programmable
> > > > (PL)-based support logic. For the further information see
> > > > https://www.xilinx.com/support/documentation/ip_documentation/
> > > > sd_fec/v1_1/pg256-sdfec-integrated-block.pdf
> > > >
> > > > This driver is a platform device driver which supports SDFEC16
> > > > (16nm) IP. SD-FEC driver supports LDPC decoding and encoding and
> > > > Turbo code decoding. LDPC codes can be specified on
> > > > a codeword-by-codeword basis, also a custom LDPC code can be used.
> > > >
> > > > The SD-FEC driver exposes a char device interface and supports
> > > > file operations: open(), close(), poll() and ioctl(). The driver
> > > > allows only one usage of the device, open() limits the number of
> > > > driver instances. The driver also utilize Common Clock Framework
> > > > (CCF).
> > > >
> > > > The control and monitoring is supported over ioctl system call.
> > > > The features supported by ioctl():
> > > > - enable or disable data pipes to/from device
> > > > - configure the FEC algorithm parameters
> > > > - set the order of data
> > > > - provide a control of a SDFEC bypass option
> > > > - activates/deactivates SD-FEC
> > > > - collect and provide statistical data
> > > > - enable/disable interrupt mode
> > >
> > > Is there any userspace tool that talks to this device using these custom
> > > ioctls yet?
> > >
> > Tools no, but could be the customer who is using the driver.
> 
> I don't understand this.  Who has written code to talk to these
> special ioctls from userspace?  Is there a pointer to that code
> anywhere?
> 

The code which use this driver are written by the driver maintainers they are 
examples APP and test code which are not public.


> > > Doing a one-off ioctl api is always a risky thing, you are pretty much
> > > just creating brand new system calls for one piece of hardware.
> > >
> >
> > Why is that wrong and what is the risk?
> 
> You now have custom syscalls for one specfic piece of hardware that you
> now have to maintain working properly for the next 40+ years.  You have
> to make sure those calls are correct and that this is the correct api to
> talk to this hardware.

This is very specific HW, it's high speed Forward Error Correction HW.
I'll be happy if I maintain this for the next 40+ years.

Actually, forgive me asking, what architecture would make me not maintain this 
driver next 40+ years?
 

> 
> > What would you propose?
> > Definitely, I have to read about this.
> 
> What is this hardware and what is it used for?  Who will be talking to

The Soft-Decision Forward Error Correction (SD-FEC) integrated block supports 
Low Density Parity Check (LDPC) decoding and encoding and Turbo code decoding.
SD-FEC use case is in high data rate applications such as 4G, 5G and DOCSIS3.1 
Cable Access.
A high performance SD-FEC (i.e. >1Gbps), is a block used to enable these 
systems to function under non-ideal environments.

> it from userspace?  What userspace workload uses it?  What tools need to

There will be APP which configures the HW for the use cases listed above.

Thanks

Dragan

> talk to it?  Where is the code that uses these new apis?
> 
> thanks,
> 
> greg k-h


Re: [PATCH V34 10/29] hibernate: Disable when the kernel is locked down

2019-06-22 Thread Pavel Machek
On Fri 2019-06-21 17:03:39, Matthew Garrett wrote:
> From: Josh Boyer 
> 
> There is currently no way to verify the resume image when returning
> from hibernate.  This might compromise the signed modules trust model,
> so until we can work with signed hibernate images we disable it when the
> kernel is locked down.

I keep getting these...

IIRC suse has patches to verify the images.
Pavel 



-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


signature.asc
Description: Digital signature


Re: [PATCH] mm/gup: continue VM_FAULT_RETRY processing event for pre-faults

2019-06-22 Thread Andrea Arcangeli
Hello everyone,

On Wed, May 22, 2019 at 02:18:03PM -0700, Andrew Morton wrote:
> > arch/x86/kernel/fpu/signal.c:198:8-31:  -> gup with !pages

This simply had not to return -EFAULT if ret < nr_pages.. but ret >= 0.

Instead it did:

   if (ret == nr_pages)
   goto retry;
   return -EFAULT;

That was the bug and the correct code would have been:

ret = get_user_pages_unlocked(pages=NULL)
if (ret < 0)
   return -EFAULT;
goto retry;

This eventually should have worked fine but it was less efficient
because it's still acting in a full prefault mode and it just tells
GUP that pages = NULL and so all it is trying to do is to issue the
blocking I/O after the mmap_sem has been released already.

Overall the solution applied in commit
b81ff1013eb8eef2934ca7e8cf53d553c1029e84 looks nicer.

Alternatively it could have used down_read(); get_user_pages(); which
prevents get_user_pages to drop the mmap_sem and break the loop if
some blocking I/O had to be executed outside mmap_sem. But that would
have the side effect of breaking userfaultfd (uffd requires
gup_locked/unlocked and FAULT_FLAG_ALLOW_RETRY to be set in the fault
flags).

Eventually we need to allow VM_FAULT_RETRY to be returned even if
FOLL_TRIED is set, so in theory get_user_pages_unlocked(pages=NULL) in
a loop must eventually stop returning VM_FAULT_RETRY. FOLL_TRIED could
still disambiguate if VM_FAULT_RETRY should or should not be returned
so that it is only returned only if it cannnot be avoided
(i.e. userfaultfd case).

With gup_unlocked(pages=NULL) however all we are interested about is
to execute the blocking I/O and we don't care to map anything in the
pagetables. A later page fault has to happen anyway for sure because
pages was == NULL, it just needs to be a fast one.

> > arch/x86/mm/mpx.c:423:11-25:  -> gup with !pages

Note that get_user_pages is never affected by whatever change after
the below, !locked check in gup_locked:

if (!locked)
/* VM_FAULT_RETRY couldn't trigger, bypass */
return ret;

The bypass means when locked is NULL, there is a 1:1 bypass from
__get_user_pages<->get_user_pages and the VM_FAULT_RETRY dance never
runs.

get_user_pages in fact can't support userfaultfd, which makes ptrace
and core dump and the hwpoison non blocking in VM_FAULT_RETRY.

All places that must support userfaultfd must use
get_user_pages_unlocked/locked or somehow end up with
FAULT_FLAG_ALLOW_RETRY set in the fault flags.

> > virt/kvm/async_pf.c:90:1-22:  -> gup with !pages

Didn't this get slowed down with the commit
df17277b2a85c00f5710e33ce238ba4114687a28?

I mean it was a feature not a bug to skip that additional
__get_user_pages(FOLL_TRIED).

> > virt/kvm/kvm_main.c:1437:6-20:  -> gup with !pages

Like for mpx.c get_user_pages is agnostic to all these gup_locked
changes because it sets locked = NULL, it couldn't break the loop
early because it couldn't return VM_FAULT_RETRY.

> 
> OK.

Commit df17277b2a85c00f5710e33ce238ba4114687a28 is now applied.

So I think the effect it has is to make async_pf.c slower and we
didn't solve anything.

There are two __get_user_pages:

1)  ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
   vmas, locked);


if (called by get_user_pages)
return ret; /* bypass the whole VM_FAULT_RETRY logic */


*locked = 1;
lock_dropped = true;
down_read(&mm->mmap_sem);
2)  ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
   pages, NULL, NULL);


The problem introduced is that 2) is getting executed with pages==NULL
but there's no point to ever run 2) with pages = NULL.

async_pf especially uses nr_pages == 1, so it couldn't get any more
optimal than it already was.

Before df17277b2a85c00f5710e33ce238ba4114687a28 we broke the loop as
soon as the first __get_user_pages returned VM_FAULT_RETRY.

We can argue if we shouldn't have broken the loop and we should have
kept executing only the first __get_user_pages (marked "1)" above) for
the whole range, but nr_pages == 1 is common and in such case there's
no difference between the two behaviors.

The prefetch callers with nr_pages == 1, didn't even check the retval
at all:

down_read(&mm->mmap_sem);
get_user_pages_remote(NULL, mm, addr, 1, FOLL_WRITE, NULL, NULL,
&locked); pages NULL

// retval ignored

It should probably check for retval < 0... but the fault will be
retried for good later still with get_user_pages_unlocked() but with
pages != NULL, so it'll find out later if it's a segfault.

Now if we change the code to skip the second __get_user_pages it's not
clear if we can return nr_pages because we may still not have faulted
in the whole range 

Re: [GIT PULL] PCI fixes for v5.2

2019-06-22 Thread pr-tracker-bot
The pull request you sent on Sat, 22 Jun 2019 11:16:23 -0500:

> git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git 
> tags/pci-v5.2-fixes-1

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/b253d5f3ecc95c2b4e8d4a525fd754c9e32b0f6e

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker


Re: [GIT PULL] SCSI fixes for 5.2-rc5

2019-06-22 Thread pr-tracker-bot
The pull request you sent on Sat, 22 Jun 2019 08:31:49 -0700:

> git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git scsi-fixes

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/f4102766463a66026bd4af6c30cbbd01f10e6c42

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker


Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.2-5 tag

2019-06-22 Thread pr-tracker-bot
The pull request you sent on Sat, 22 Jun 2019 21:52:06 +1000:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
> tags/powerpc-5.2-5

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/a8282bf087bcfb348ad97c8ed1f457bc11fd9709

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker


[PATCH V7 3/3] PCI: dwc: Export APIs to support .remove() implementation

2019-06-22 Thread Vidya Sagar
Export all configuration space access APIs and also other APIs to
support host controller drivers of DesignWare core based implementations
while adding support for .remove() hook to build their respective drivers
as modules

Signed-off-by: Vidya Sagar 
Acked-by: Gustavo Pimentel 
---
Changes from v6:
* None

Changes from v5:
* None

Changes from v4:
* Removed __ (underscore) from dw_pcie_{write/read}_dbi API names

Changes from v3:
* Exported only __dw_pcie_{read/write}_dbi() APIs instead of
  dw_pcie_read{l/w/b}_dbi & dw_pcie_write{l/w/b}_dbi APIs.

Changes from v2:
* Rebased on top of linux-next top of the tree branch

Changes from v1:
* s/Designware/DesignWare

 drivers/pci/controller/dwc/pcie-designware-host.c | 4 
 drivers/pci/controller/dwc/pcie-designware.c  | 4 
 2 files changed, 8 insertions(+)

diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c 
b/drivers/pci/controller/dwc/pcie-designware-host.c
index d069e4290180..f93252d0da5b 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -311,6 +311,7 @@ void dw_pcie_msi_init(struct pcie_port *pp)
dw_pcie_wr_own_conf(pp, PCIE_MSI_ADDR_HI, 4,
upper_32_bits(msi_target));
 }
+EXPORT_SYMBOL_GPL(dw_pcie_msi_init);
 
 int dw_pcie_host_init(struct pcie_port *pp)
 {
@@ -495,6 +496,7 @@ int dw_pcie_host_init(struct pcie_port *pp)
dw_pcie_free_msi(pp);
return ret;
 }
+EXPORT_SYMBOL_GPL(dw_pcie_host_init);
 
 void dw_pcie_host_deinit(struct pcie_port *pp)
 {
@@ -503,6 +505,7 @@ void dw_pcie_host_deinit(struct pcie_port *pp)
if (pci_msi_enabled() && !pp->ops->msi_host_init)
dw_pcie_free_msi(pp);
 }
+EXPORT_SYMBOL_GPL(dw_pcie_host_deinit);
 
 static int dw_pcie_access_other_conf(struct pcie_port *pp, struct pci_bus *bus,
 u32 devfn, int where, int size, u32 *val,
@@ -695,3 +698,4 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
val |= PORT_LOGIC_SPEED_CHANGE;
dw_pcie_wr_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, val);
 }
+EXPORT_SYMBOL_GPL(dw_pcie_setup_rc);
diff --git a/drivers/pci/controller/dwc/pcie-designware.c 
b/drivers/pci/controller/dwc/pcie-designware.c
index 0b383feb13de..dc9cdcd72ffc 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -34,6 +34,7 @@ int dw_pcie_read(void __iomem *addr, int size, u32 *val)
 
return PCIBIOS_SUCCESSFUL;
 }
+EXPORT_SYMBOL_GPL(dw_pcie_read);
 
 int dw_pcie_write(void __iomem *addr, int size, u32 val)
 {
@@ -51,6 +52,7 @@ int dw_pcie_write(void __iomem *addr, int size, u32 val)
 
return PCIBIOS_SUCCESSFUL;
 }
+EXPORT_SYMBOL_GPL(dw_pcie_write);
 
 u32 dw_pcie_read_dbi(struct dw_pcie *pci, u32 reg, size_t size)
 {
@@ -66,6 +68,7 @@ u32 dw_pcie_read_dbi(struct dw_pcie *pci, u32 reg, size_t 
size)
 
return val;
 }
+EXPORT_SYMBOL_GPL(dw_pcie_read_dbi);
 
 void dw_pcie_write_dbi(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
 {
@@ -80,6 +83,7 @@ void dw_pcie_write_dbi(struct dw_pcie *pci, u32 reg, size_t 
size, u32 val)
if (ret)
dev_err(pci->dev, "Write DBI address failed\n");
 }
+EXPORT_SYMBOL_GPL(dw_pcie_write_dbi);
 
 u32 dw_pcie_read_dbi2(struct dw_pcie *pci, u32 reg, size_t size)
 {
-- 
2.17.1



[PATCH V7 2/3] PCI: dwc: Cleanup DBI,ATU read and write APIs

2019-06-22 Thread Vidya Sagar
Cleanup DBI read and write APIs by removing "__" (underscore) from their
names as there are no no-underscore versions and the underscore versions
are already doing what no-underscore versions typically do. It also removes
passing dbi/dbi2 base address as one of the arguments as the same can be
derived with in read and write APIs. Since dw_pcie_{readl/writel}_dbi()
APIs can't be used for ATU read/write as ATU base address could be
different from DBI base address, this patch attempts to implement
ATU read/write APIs using ATU base address without using
dw_pcie_{readl/writel}_dbi() APIs.

Signed-off-by: Vidya Sagar 
---
Changes from v6:
* Modified ATU read/write APIs to use implementation specific DBI read/write
  APIs if present.

Changes from v5:
* Removed passing base address as one of the arguments as the same can be 
derived within
  the API itself.
* Modified ATU read/write APIs to call dw_pcie_{write/read}() API

Changes from v4:
* This is a new patch in this series

 drivers/pci/controller/dwc/pcie-designware.c | 28 +--
 drivers/pci/controller/dwc/pcie-designware.h | 51 +---
 2 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware.c 
b/drivers/pci/controller/dwc/pcie-designware.c
index 9d7c51c32b3b..0b383feb13de 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -52,64 +52,60 @@ int dw_pcie_write(void __iomem *addr, int size, u32 val)
return PCIBIOS_SUCCESSFUL;
 }
 
-u32 __dw_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
-  size_t size)
+u32 dw_pcie_read_dbi(struct dw_pcie *pci, u32 reg, size_t size)
 {
int ret;
u32 val;
 
if (pci->ops->read_dbi)
-   return pci->ops->read_dbi(pci, base, reg, size);
+   return pci->ops->read_dbi(pci, pci->dbi_base, reg, size);
 
-   ret = dw_pcie_read(base + reg, size, &val);
+   ret = dw_pcie_read(pci->dbi_base + reg, size, &val);
if (ret)
dev_err(pci->dev, "Read DBI address failed\n");
 
return val;
 }
 
-void __dw_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
-size_t size, u32 val)
+void dw_pcie_write_dbi(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
 {
int ret;
 
if (pci->ops->write_dbi) {
-   pci->ops->write_dbi(pci, base, reg, size, val);
+   pci->ops->write_dbi(pci, pci->dbi_base, reg, size, val);
return;
}
 
-   ret = dw_pcie_write(base + reg, size, val);
+   ret = dw_pcie_write(pci->dbi_base + reg, size, val);
if (ret)
dev_err(pci->dev, "Write DBI address failed\n");
 }
 
-u32 __dw_pcie_read_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg,
-   size_t size)
+u32 dw_pcie_read_dbi2(struct dw_pcie *pci, u32 reg, size_t size)
 {
int ret;
u32 val;
 
if (pci->ops->read_dbi2)
-   return pci->ops->read_dbi2(pci, base, reg, size);
+   return pci->ops->read_dbi2(pci, pci->dbi_base2, reg, size);
 
-   ret = dw_pcie_read(base + reg, size, &val);
+   ret = dw_pcie_read(pci->dbi_base2 + reg, size, &val);
if (ret)
dev_err(pci->dev, "read DBI address failed\n");
 
return val;
 }
 
-void __dw_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg,
- size_t size, u32 val)
+void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
 {
int ret;
 
if (pci->ops->write_dbi2) {
-   pci->ops->write_dbi2(pci, base, reg, size, val);
+   pci->ops->write_dbi2(pci, pci->dbi_base2, reg, size, val);
return;
}
 
-   ret = dw_pcie_write(base + reg, size, val);
+   ret = dw_pcie_write(pci->dbi_base2 + reg, size, val);
if (ret)
dev_err(pci->dev, "write DBI address failed\n");
 }
diff --git a/drivers/pci/controller/dwc/pcie-designware.h 
b/drivers/pci/controller/dwc/pcie-designware.h
index 14762e262758..657e25e2c789 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -254,14 +254,10 @@ struct dw_pcie {
 int dw_pcie_read(void __iomem *addr, int size, u32 *val);
 int dw_pcie_write(void __iomem *addr, int size, u32 val);
 
-u32 __dw_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
-  size_t size);
-void __dw_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
-size_t size, u32 val);
-u32 __dw_pcie_read_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg,
-   size_t size);
-void __dw_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg,
- size_t size, u32 val);
+u32 dw_pcie_read_dbi(struct dw_pcie *pci, u32 reg, size_t size);
+void dw_pcie_write_dbi(s

  1   2   3   >