[ndctl PATCH] ndctl, list: Add controller temperature

2018-06-13 Thread Dan Williams
Emit the controller temperature if the smart payload includes this
field.

Signed-off-by: Dan Williams 
---
 ndctl/util/json-smart.c |   10 ++
 1 file changed, 10 insertions(+)

diff --git a/ndctl/util/json-smart.c b/ndctl/util/json-smart.c
index 4020423bb8c8..9482b35a43d6 100644
--- a/ndctl/util/json-smart.c
+++ b/ndctl/util/json-smart.c
@@ -109,6 +109,16 @@ struct json_object *util_dimm_health_to_json(struct 
ndctl_dimm *dimm)
json_object_object_add(jhealth, "temperature_celsius", 
jobj);
}
 
+   if (flags & ND_SMART_CTEMP_VALID) {
+   unsigned int temp = ndctl_cmd_smart_get_ctrl_temperature(cmd);
+   double t = ndctl_decode_smart_temperature(temp);
+
+   jobj = json_object_new_double(t);
+   if (jobj)
+   json_object_object_add(jhealth,
+   "controller_temperature_celsius", jobj);
+   }
+
if (flags & ND_SMART_SPARES_VALID) {
unsigned int spares = ndctl_cmd_smart_get_spares(cmd);
 

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [fstests PATCH] generic/223: skip when using DAX

2018-06-13 Thread Ross Zwisler
On Wed, Jun 13, 2018 at 03:25:58PM -0700, Darrick J. Wong wrote:
> On Wed, Jun 13, 2018 at 03:07:42PM -0600, Ross Zwisler wrote:
> > As of these upstream kernel commits:
> > 
> > commit 6e2608dfd934 ("xfs, dax: introduce xfs_dax_aops")
> > commit 5f0663bb4a64 ("ext4, dax: introduce ext4_dax_aops")
> > 
> > generic/223 fails on XFS and ext4 because filesystems mounted with DAX no
> > longer support bmap.  This is desired behavior and will not be fixed,
> > according to:
> > 
> > https://lists.01.org/pipermail/linux-nvdimm/2018-April/015383.html
> > 
> > So, just skip over generic/223 when using DAX so we don't throw false
> > positive test failures.
> 
> Just because we decided not to support FIBMAP on XFSDAX doesn't mean we
> should let this test bitrot. :)
> 
> Just out of curiosity, does the following patch fix g/223 for you?

Yep!  This makes generic/223 pass in my setup for both DAX and non-DAX, with
both XFS and ext4.
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [fstests PATCH] generic/223: skip when using DAX

2018-06-13 Thread Darrick J. Wong
On Wed, Jun 13, 2018 at 03:07:42PM -0600, Ross Zwisler wrote:
> As of these upstream kernel commits:
> 
> commit 6e2608dfd934 ("xfs, dax: introduce xfs_dax_aops")
> commit 5f0663bb4a64 ("ext4, dax: introduce ext4_dax_aops")
> 
> generic/223 fails on XFS and ext4 because filesystems mounted with DAX no
> longer support bmap.  This is desired behavior and will not be fixed,
> according to:
> 
> https://lists.01.org/pipermail/linux-nvdimm/2018-April/015383.html
> 
> So, just skip over generic/223 when using DAX so we don't throw false
> positive test failures.

Just because we decided not to support FIBMAP on XFSDAX doesn't mean we
should let this test bitrot. :)

Just out of curiosity, does the following patch fix g/223 for you?

--D

diff --git a/src/t_stripealign.c b/src/t_stripealign.c
index 05ed36b5..690f743a 100644
--- a/src/t_stripealign.c
+++ b/src/t_stripealign.c
@@ -17,8 +17,13 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
-#define FIBMAP  _IO(0x00, 1)/* bmap access */
+#define FIEMAP_EXTENT_ACCEPTABLE   (FIEMAP_EXTENT_LAST | \
+   FIEMAP_EXTENT_DATA_ENCRYPTED | FIEMAP_EXTENT_ENCODED | \
+   FIEMAP_EXTENT_UNWRITTEN | FIEMAP_EXTENT_MERGED | \
+   FIEMAP_EXTENT_SHARED)
 
 /*
  * If only filename given, print first block.
@@ -28,11 +33,14 @@
 
 int main(int argc, char ** argv)
 {
-   int fd;
-   int ret;
-   int sunit = 0;  /* in blocks */
-   char*filename;
-   unsigned intblock = 0;
+   struct stat sb;
+   struct fiemap   *fie;
+   struct fiemap_extent*fe;
+   int fd;
+   int ret;
+   int sunit = 0;  /* in blocks */
+   char*filename;
+   unsigned long long  block;
 
 if (argc < 3) {
 printf("Usage: %s  \n", argv[0]);
@@ -48,21 +56,63 @@ int main(int argc, char ** argv)
 return 1;
 }
 
-   ret = ioctl(fd, FIBMAP, );
-   if (ret < 0) {
+   ret = fstat(fd, );
+   if (ret) {
+   perror(filename);
close(fd);
-   perror("fibmap");
return 1;
}
 
-   close(fd);
+   fie = calloc(1, sizeof(struct fiemap) + sizeof(struct fiemap_extent));
+   if (!fie) {
+   close(fd);
+   perror("malloc");
+   return 1;
+   }
+   fie->fm_length = 1;
+   fie->fm_flags = FIEMAP_FLAG_SYNC;
+   fie->fm_extent_count = 1;
+
+   ret = ioctl(fd, FS_IOC_FIEMAP, fie);
+   if (ret < 0) {
+   unsigned intbmap = 0;
+
+   ret = ioctl(fd, FIBMAP, );
+   if (ret < 0) {
+   perror("fibmap");
+   free(fie);
+   close(fd);
+   return 1;
+   }
+   block = bmap;
+   goto check;
+   }
 
+
+   if (fie->fm_mapped_extents != 1) {
+   printf("%s: no extents?\n", filename);
+   free(fie);
+   close(fd);
+   return 1;
+   }
+   fe = >fm_extents[0];
+   if (fe->fe_flags & ~FIEMAP_EXTENT_ACCEPTABLE) {
+   printf("%s: bad flags 0x%x\n", filename, fe->fe_flags);
+   free(fie);
+   close(fd);
+   return 1;
+   }
+
+   block = fie->fm_extents[0].fe_physical / sb.st_blksize;
+check:
if (block % sunit) {
-   printf("%s: Start block %u not multiple of sunit %u\n",
+   printf("%s: Start block %llu not multiple of sunit %u\n",
filename, block, sunit);
return 1;
} else
printf("%s: well-aligned\n", filename);
+   free(fie);
+   close(fd);
 
return 0;
 }
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v2 0/2] ndctl, test: add some helper function and cleanup test script

2018-06-13 Thread Verma, Vishal L
On Tue, 2018-06-12 at 21:56 -0400, Masayoshi Mizuma wrote:
> Some test scripts have same function. So, let's cleanup
> to stop the duplication by intoducing 'test/common' file.
> Test scripts includes the common file to use the functions.
> 
> Changelog:
>   - Add SPDX style license header to the new test/common file.
>   - Fix wrong test expression in detect() on test/firmware-update.sh

Thanks for the updates, applied.

-Vishal

> 
> Masayoshi Mizuma (2):
>   ndctl, test: add some helper function for test script
>   ndctl, test: cleanup test scripts
> 
>  test/blk-exhaust.sh   | 21 +++---
>  test/btt-check.sh | 35 +++
>  test/btt-errors.sh| 20 -
>  test/btt-pad-compat.sh| 52 +
>  test/clear.sh | 23 ---
>  test/common   | 60 +++
>  test/create.sh| 21 +++---
>  test/daxdev-errors.sh | 19 +++--
>  test/firmware-update.sh   | 26 -
>  test/inject-error.sh  | 31 ++--
>  test/label-compat.sh  | 21 +++---
>  test/multi-dax.sh | 19 +++--
>  test/pmem-errors.sh   | 20 -
>  test/rescan-partitions.sh | 44 +++-
>  test/sector-mode.sh   |  5 +---
>  15 files changed, 151 insertions(+), 266 deletions(-)
>  create mode 100644 test/common
> 
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [Qemu-devel] [RFC PATCH 1/1] nvdimm: let qemu requiring section alignment of pmem resource.

2018-06-13 Thread Igor Mammedov
On Tue, 12 Jun 2018 23:04:25 +0800
Haozhong Zhang  wrote:

> On 06/11/18 19:55, Dan Williams wrote:
> > On Mon, Jun 11, 2018 at 9:26 AM, Stefan Hajnoczi  
> > wrote:  
> > > On Mon, Jun 11, 2018 at 06:54:25PM +0800, Zhang Yi wrote:  
> > >> Nvdimm driver use Memory hot-plug APIs to map it's pmem resource,
> > >> which at a section granularity.
> > >>
> > >> When QEMU emulated the vNVDIMM device, decrease the label-storage,
> > >> QEMU will put the vNVDIMMs directly next to one another in physical
> > >> address space, which means that the boundary between them won't
> > >> align to the 128 MB memory section size.  
> > >
> > > I'm having a hard time parsing this.
> > >
> > > Where does the "128 MB memory section size" come from?  ACPI?
> > > A chipset-specific value?
> > >  
> > 
> > The devm_memremap_pages() implementation use the memory hotplug core
> > to allocate the 'struct page' array/map for persistent memory. Memory
> > hotplug can only be performed in terms of sections, 128MB on x86_64.  
> 
> IIUC, it also affects the normal RAM hotplug to a Linux VM on QEMU. If
> that is the case, it will be helpful to lift this option to pc-dimm.

Default alignment on page size boundary is implemented for the reason
that QEMU has no idea about guest os alignments req. and these requirements
might vary greatly depending on guest os running.
With some guests it works just fine even with 2M alignments/dimm sizes.

So it's up to upper layers which know what guest os is running to pick
plugged dimm sizes. So if a particular linux version minimum block size
is 128, then mgmt needs to plug dimm with size which is multiple of that.
That should satisfy whatever alignment req guest os has.

In case of nvdimm we need to fix address allocation in QEMU to account
for label size which broke above rule leading to "overlap" over label
area of nvdimm which isn't mapped into guest address space, but that's
probably it.

PS:
not related to patch question.
Intel guys contributed most of the code to nvdimm and continue actively
to develop it. Can we have a designated maintainer for nvdimm part from
Intel in addition to authors who just code/merge feature and disappear
(not reachable) shortly after that?


> Thanks,
> Haozhong
> 
> > There is some limited support for allowing devm_memremap_pages() to
> > overlap 'System RAM' within a given section, but it does not currently
> > support multiple devm_memremap_pages() calls overlapping within the
> > same section. There is currently a kernel bug where we do not handle
> > this unsupported configuration gracefully. The fix will cause
> > configurations configurations that try to overlap 2 persistent memory
> > ranges in the same section to fail.
> > 
> > The proposed fix is trying to make sure that QEMU does not run afoul
> > of this constraint.
> > 
> > There is currently no line of sight to reduce the minimum memory
> > hotplug alignment size to less than 128M. Also, as other architectures
> > outside of x86_64 add devm_memremap_pages() support, the minimum
> > section alignment constraint might change and is a property of a guest
> > OS. My understanding is that some guest OSes might expect an even
> > larger persistent memory minimum alignment.
> >   

___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v11 4/7] mm, fs, dax: handle layout changes to pinned dax mappings

2018-06-13 Thread Jan Kara
On Tue 12-06-18 15:05:36, Ross Zwisler wrote:
> On Fri, May 18, 2018 at 06:35:13PM -0700, Dan Williams wrote:
> > Background:
> > 
> > get_user_pages() in the filesystem pins file backed memory pages for
> > access by devices performing dma. However, it only pins the memory pages
> > not the page-to-file offset association. If a file is truncated the
> > pages are mapped out of the file and dma may continue indefinitely into
> > a page that is owned by a device driver. This breaks coherency of the
> > file vs dma, but the assumption is that if userspace wants the
> > file-space truncated it does not matter what data is inbound from the
> > device, it is not relevant anymore. The only expectation is that dma can
> > safely continue while the filesystem reallocates the block(s).
> > 
> > Problem:
> > 
> > This expectation that dma can safely continue while the filesystem
> > changes the block map is broken by dax. With dax the target dma page
> > *is* the filesystem block. The model of leaving the page pinned for dma,
> > but truncating the file block out of the file, means that the filesytem
> > is free to reallocate a block under active dma to another file and now
> > the expected data-incoherency situation has turned into active
> > data-corruption.
> > 
> > Solution:
> > 
> > Defer all filesystem operations (fallocate(), truncate()) on a dax mode
> > file while any page/block in the file is under active dma. This solution
> > assumes that dma is transient. Cases where dma operations are known to
> > not be transient, like RDMA, have been explicitly disabled via
> > commits like 5f1d43de5416 "IB/core: disable memory registration of
> > filesystem-dax vmas".
> > 
> > The dax_layout_busy_page() routine is called by filesystems with a lock
> > held against mm faults (i_mmap_lock) to find pinned / busy dax pages.
> > The process of looking up a busy page invalidates all mappings
> > to trigger any subsequent get_user_pages() to block on i_mmap_lock.
> > The filesystem continues to call dax_layout_busy_page() until it finally
> > returns no more active pages. This approach assumes that the page
> > pinning is transient, if that assumption is violated the system would
> > have likely hung from the uncompleted I/O.
> > 
> > Cc: Jeff Moyer 
> > Cc: Dave Chinner 
> > Cc: Matthew Wilcox 
> > Cc: Alexander Viro 
> > Cc: "Darrick J. Wong" 
> > Cc: Ross Zwisler 
> > Cc: Dave Hansen 
> > Cc: Andrew Morton 
> > Reported-by: Christoph Hellwig 
> > Reviewed-by: Christoph Hellwig 
> > Reviewed-by: Jan Kara 
> > Signed-off-by: Dan Williams 
> > ---
> <>
> > @@ -492,6 +505,90 @@ static void *grab_mapping_entry(struct address_space 
> > *mapping, pgoff_t index,
> > return entry;
> >  }
> >  
> > +/**
> > + * dax_layout_busy_page - find first pinned page in @mapping
> > + * @mapping: address space to scan for a page with ref count > 1
> > + *
> > + * DAX requires ZONE_DEVICE mapped pages. These pages are never
> > + * 'onlined' to the page allocator so they are considered idle when
> > + * page->count == 1. A filesystem uses this interface to determine if
> > + * any page in the mapping is busy, i.e. for DMA, or other
> > + * get_user_pages() usages.
> > + *
> > + * It is expected that the filesystem is holding locks to block the
> > + * establishment of new mappings in this address_space. I.e. it expects
> > + * to be able to run unmap_mapping_range() and subsequently not race
> > + * mapping_mapped() becoming true.
> > + */
> > +struct page *dax_layout_busy_page(struct address_space *mapping)
> > +{
> > +   pgoff_t indices[PAGEVEC_SIZE];
> > +   struct page *page = NULL;
> > +   struct pagevec pvec;
> > +   pgoff_t index, end;
> > +   unsigned i;
> > +
> > +   /*
> > +* In the 'limited' case get_user_pages() for dax is disabled.
> > +*/
> > +   if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
> > +   return NULL;
> > +
> > +   if (!dax_mapping(mapping) || !mapping_mapped(mapping))
> > +   return NULL;
> > +
> > +   pagevec_init();
> > +   index = 0;
> > +   end = -1;
> > +
> > +   /*
> > +* If we race get_user_pages_fast() here either we'll see the
> > +* elevated page count in the pagevec_lookup and wait, or
> > +* get_user_pages_fast() will see that the page it took a reference
> > +* against is no longer mapped in the page tables and bail to the
> > +* get_user_pages() slow path.  The slow path is protected by
> > +* pte_lock() and pmd_lock(). New references are not taken without
> > +* holding those locks, and unmap_mapping_range() will not zero the
> > +* pte or pmd without holding the respective lock, so we are
> > +* guaranteed to either see new references or prevent new
> > +* references from being established.
> > +*/
> > +   unmap_mapping_range(mapping, 0, 0, 1);
> > +
> > +   while (index < end && pagevec_lookup_entries(, mapping, index,
> > +   min(end - index, (pgoff_t)PAGEVEC_SIZE),
> > +   indices)) {

Re: [PATCH v5 2/4] resource: Use list_head to link sibling resource

2018-06-13 Thread Baoquan He
On 06/12/18 at 05:10pm, Julia Lawall wrote:
> This looks wrong.  After a list iterator, the index variable points to a
> dummy structure.
> 
> julia
> 
> url:
> https://github.com/0day-ci/linux/commits/Baoquan-He/resource-Use-list_head-to-link-sibling-resource/20180612-113600
> :: branch date: 7 hours ago
> :: commit date: 7 hours ago
> 
> >> kernel/resource.c:265:17-20: ERROR: invalid reference to the index 
> >> variable of the iterator on line 253
> 
> # 
> https://github.com/0day-ci/linux/commit/e906f15906750a86913ba2b1f08bad99129d3dfc
> git remote add linux-review https://github.com/0day-ci/linux
> git remote update linux-review
> git checkout e906f15906750a86913ba2b1f08bad99129d3dfc
> vim +265 kernel/resource.c
> 
> ^1da177e4 Linus Torvalds 2005-04-16  247
> 5eeec0ec9 Yinghai Lu 2009-12-22  248  static void 
> __release_child_resources(struct resource *r)
> 5eeec0ec9 Yinghai Lu 2009-12-22  249  {
> e906f1590 Baoquan He 2018-06-12  250  struct resource *tmp, *next;
> 5eeec0ec9 Yinghai Lu 2009-12-22  251  resource_size_t size;
> 5eeec0ec9 Yinghai Lu 2009-12-22  252
> e906f1590 Baoquan He 2018-06-12 @253  list_for_each_entry_safe(tmp, 
> next, >child, sibling) {
> 5eeec0ec9 Yinghai Lu 2009-12-22  254  tmp->parent = NULL;
> e906f1590 Baoquan He 2018-06-12  255  
> INIT_LIST_HEAD(>sibling);


list_del_init(>sibling);

Thanks, Julia. Here I should use list_del_init(>list) to
replace INIT_LIST_HEAD(>sibling). 

> 5eeec0ec9 Yinghai Lu 2009-12-22  256  
> __release_child_resources(tmp);
> 5eeec0ec9 Yinghai Lu 2009-12-22  257
> 5eeec0ec9 Yinghai Lu 2009-12-22  258  printk(KERN_DEBUG 
> "release child resource %pR\n", tmp);
> 5eeec0ec9 Yinghai Lu 2009-12-22  259  /* need to restore 
> size, and keep flags */
> 5eeec0ec9 Yinghai Lu 2009-12-22  260  size = 
> resource_size(tmp);
> 5eeec0ec9 Yinghai Lu 2009-12-22  261  tmp->start = 0;
> 5eeec0ec9 Yinghai Lu 2009-12-22  262  tmp->end = size - 1;
> 5eeec0ec9 Yinghai Lu 2009-12-22  263  }
> e906f1590 Baoquan He 2018-06-12  264
> e906f1590 Baoquan He 2018-06-12 @265  INIT_LIST_HEAD(>child);
> 5eeec0ec9 Yinghai Lu 2009-12-22  266  }
> 5eeec0ec9 Yinghai Lu 2009-12-22  267
> 
> ---
> 0-DAY kernel test infrastructureOpen Source Technology Center
> https://lists.01.org/pipermail/kbuild-all   Intel Corporation
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm