[PATCH 4/4] libnvdimm/btt: implement ->set_read_only to hook into BLKROSET processing

2021-10-27 Thread Huaisheng Ye
Implement the ->set_read_only method for nd_btt.

Signed-off-by: Huaisheng Ye 
---
 drivers/nvdimm/btt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 92dec4952297..91fcdac7858f 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1514,6 +1514,7 @@ static const struct block_device_operations btt_fops = {
.submit_bio =   btt_submit_bio,
.rw_page =  btt_rw_page,
.getgeo =   btt_getgeo,
+   .set_read_only =nd_set_ro,
 };
 
 static int btt_blk_init(struct btt *btt)
-- 
2.27.0




[PATCH 3/4] libnvdimm/blk: implement ->set_read_only to hook into BLKROSET processing

2021-10-27 Thread Huaisheng Ye
Implement the ->set_read_only method for nd_blk.

Signed-off-by: Huaisheng Ye 
---
 drivers/nvdimm/blk.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 088d3dd6f6fa..9d76980ebff7 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -226,6 +226,7 @@ static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
 static const struct block_device_operations nd_blk_fops = {
.owner = THIS_MODULE,
.submit_bio =  nd_blk_submit_bio,
+   .set_read_only = nd_set_ro,
 };
 
 static void nd_blk_release_disk(void *disk)
-- 
2.27.0




[PATCH 2/4] libnvdimm/pmem: implement ->set_read_only to hook into BLKROSET processing

2021-10-27 Thread Huaisheng Ye
Implement the ->set_read_only method for nd_pmem.

Signed-off-by: Huaisheng Ye 
---
 drivers/nvdimm/pmem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ef4950f80832..38ede1f44f5f 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -282,6 +282,7 @@ static const struct block_device_operations pmem_fops = {
.owner =THIS_MODULE,
.submit_bio =   pmem_submit_bio,
.rw_page =  pmem_rw_page,
+   .set_read_only =nd_set_ro,
 };
 
 static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
-- 
2.27.0




[PATCH 1/4] libnvdimm: add a ro state control function for nvdimm

2021-10-27 Thread Huaisheng Ye
libndctl failed to pass when writing pmem disk.
Here is the error message below,

namespace6.0: failed to write /dev/pmem6
check_namespaces: namespace6.0 validate_bdev failed
ndctl-test1 failed: -6

Commit 98f49b63e84d4ee1a5c327d0b5f4e8699f6c70fe removes set_device_ro and
e00adcadf3af7a8335026d71ab9f0e0a922191ac adds a new set_read_only method
to allow for driver-specific processing when changing the block device's
read-only state.

Current drivers nd_pmem, nd_blk and nd_btt don't have the capability to
enable or disable write protect (read-only) state. Without that,
blkdev_roset just modifies the value of bd_read_only of struct block_device
and returns success to ioctl of block device. Error would happen when writing
read-only disk next.

Add ro state control function in libnvdimm for this purpose.

Signed-off-by: Huaisheng Ye 
---
 drivers/nvdimm/bus.c | 17 +
 drivers/nvdimm/nd.h  |  1 +
 2 files changed, 18 insertions(+)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 9dc7f3edd42b..299dd5e11ae7 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -636,6 +636,23 @@ void nvdimm_check_and_set_ro(struct gendisk *disk)
 }
 EXPORT_SYMBOL(nvdimm_check_and_set_ro);
 
+int nd_set_ro(struct block_device *bdev, bool ro)
+{
+   struct gendisk *disk = bdev->bd_disk;
+   struct device *dev = disk_to_dev(disk)->parent;
+   int disk_ro = get_disk_ro(disk);
+
+   /* nothing to change with ro state */
+   if (disk_ro == ro)
+   return 0;
+
+   dev_info(dev, "set %s to read-%s\n",
+disk->disk_name, ro ? "only" : "write");
+   set_disk_ro(disk, ro);
+   return 0;
+}
+EXPORT_SYMBOL(nd_set_ro);
+
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf)
 {
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 5467ebbb4a6b..f1cf3eb21292 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -512,6 +512,7 @@ void nvdimm_bus_lock(struct device *dev);
 void nvdimm_bus_unlock(struct device *dev);
 bool is_nvdimm_bus_locked(struct device *dev);
 void nvdimm_check_and_set_ro(struct gendisk *disk);
+int nd_set_ro(struct block_device *bdev, bool ro);
 void nvdimm_drvdata_release(struct kref *kref);
 void put_ndd(struct nvdimm_drvdata *ndd);
 int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
-- 
2.27.0




[PATCH 0/4] add ro state control function for nvdimm drivers

2021-10-27 Thread Huaisheng Ye
libndctl failed to pass for the reason of writing pmem disk when running
ndctl testing.

Here is the error message below,

namespace6.0: failed to write /dev/pmem6
check_namespaces: namespace6.0 validate_bdev failed
ndctl-test1 failed: -6

Commit 98f49b63e84d4ee1a5c327d0b5f4e8699f6c70fe removes set_device_ro and
e00adcadf3af7a8335026d71ab9f0e0a922191ac adds a new set_read_only method
to allow for driver-specific processing when changing the block device's
read-only state.

Current drivers nd_pmem, nd_blk and nd_btt don't have the capability to
enable or disable write protect (read-only) state. Without that,
blkdev_roset just modifies the value of bd_read_only of struct block_device
and returns success to ioctl of block device. Error would happen when writing
read-only disk next.

Add ro state control function in libnvdimm for this purpose, and implement
set_read_only for BLKROSET.

Huaisheng Ye (4):
  libnvdimm: add a ro state control function for nvdimm
  libnvdimm/pmem: implement ->set_read_only to hook into BLKROSET
processing
  libnvdimm/blk: implement ->set_read_only to hook into BLKROSET
processing
  libnvdimm/btt: implement ->set_read_only to hook into BLKROSET
processing

 drivers/nvdimm/blk.c  |  1 +
 drivers/nvdimm/btt.c  |  1 +
 drivers/nvdimm/bus.c  | 17 +
 drivers/nvdimm/nd.h   |  1 +
 drivers/nvdimm/pmem.c |  1 +
 5 files changed, 21 insertions(+)

-- 
2.27.0




[PATCH] dm thin metadata: Remove unused local variable when create thin and snap

2020-09-15 Thread Huaisheng Ye
From: Huaisheng Ye 

The local variable disk details is not used during the creating of thin & snap
devices. Remove them from dm-thin-metadata, and add pointer validity check for
pointer value in btree_lookup_raw. Skip memory copy when the caller doesn't need
the value.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-thin-metadata.c | 6 ++
 drivers/md/persistent-data/dm-btree.c | 3 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index b461836..6ebb212 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1051,12 +1051,11 @@ static int __create_thin(struct dm_pool_metadata *pmd,
int r;
dm_block_t dev_root;
uint64_t key = dev;
-   struct disk_device_details details_le;
struct dm_thin_device *td;
__le64 value;
 
r = dm_btree_lookup(>details_info, pmd->details_root,
-   , _le);
+   , NULL);
if (!r)
return -EEXIST;
 
@@ -1129,12 +1128,11 @@ static int __create_snap(struct dm_pool_metadata *pmd,
dm_block_t origin_root;
uint64_t key = origin, dev_key = dev;
struct dm_thin_device *td;
-   struct disk_device_details details_le;
__le64 value;
 
/* check this device is unused */
r = dm_btree_lookup(>details_info, pmd->details_root,
-   _key, _le);
+   _key, NULL);
if (!r)
return -EEXIST;
 
diff --git a/drivers/md/persistent-data/dm-btree.c 
b/drivers/md/persistent-data/dm-btree.c
index 8aae062..ef6e78d 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -366,7 +366,8 @@ static int btree_lookup_raw(struct ro_spine *s, dm_block_t 
block, uint64_t key,
} while (!(flags & LEAF_NODE));
 
*result_key = le64_to_cpu(ro_node(s)->keys[i]);
-   memcpy(v, value_ptr(ro_node(s), i), value_size);
+   if (v)
+   memcpy(v, value_ptr(ro_node(s), i), value_size);
 
return 0;
 }
-- 
1.8.3.1



[PATCH] dm writecache: correct uncommitted_block when discarding uncommitted entry

2020-06-12 Thread Huaisheng Ye
From: Huaisheng Ye 

When uncommitted entry has been discarded, correct wc->uncommitted_block
for getting the exact number.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 4367cc7..64b4527 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -855,6 +855,8 @@ static void writecache_discard(struct dm_writecache *wc, 
sector_t start, sector_
}
discarded_something = true;
}
+   if (!writecache_entry_is_committed(wc, e))
+   wc->uncommitted_blocks--;
writecache_free_entry(wc, e);
}
 
-- 
1.8.3.1



[PATCH] dm writecache: skip writecache_wait when using pmem mode

2020-06-12 Thread Huaisheng Ye
From: Huaisheng Ye 

The array bio_in_progress is only used with ssd mode. So skip
writecache_wait_for_ios in writecache_discard when pmem mode.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 66f3a3b..4367cc7 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -849,8 +849,10 @@ static void writecache_discard(struct dm_writecache *wc, 
sector_t start, sector_
 
if (likely(!e->write_in_progress)) {
if (!discarded_something) {
-   writecache_wait_for_ios(wc, READ);
-   writecache_wait_for_ios(wc, WRITE);
+   if (!WC_MODE_PMEM(wc)) {
+   writecache_wait_for_ios(wc, READ);
+   writecache_wait_for_ios(wc, WRITE);
+   }
discarded_something = true;
}
writecache_free_entry(wc, e);
-- 
1.8.3.1



[PATCH] dm writecache: reinitialize lru in writeback instead of endio

2020-05-30 Thread Huaisheng Ye
From: Huaisheng Ye 

When wc_entry has been removed from wbl->list in writeback, it will
be not used again except waiting to be set free in writecache_free_entry.

That is a little of annoying, it has to reinitialize lru of wc_entry
in endio before calling writecache_free_entry.

Using list_del_init instead of list_del in writeback for simpler code.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 7bbc21b..66f3a3b 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1519,7 +1519,6 @@ static void __writecache_endio_pmem(struct dm_writecache 
*wc, struct list_head *
e = wb->wc_list[i];
BUG_ON(!e->write_in_progress);
e->write_in_progress = false;
-   INIT_LIST_HEAD(>lru);
if (!writecache_has_error(wc))
writecache_free_entry(wc, e);
BUG_ON(!wc->writeback_size);
@@ -1555,7 +1554,6 @@ static void __writecache_endio_ssd(struct dm_writecache 
*wc, struct list_head *l
do {
BUG_ON(!e->write_in_progress);
e->write_in_progress = false;
-   INIT_LIST_HEAD(>lru);
if (!writecache_has_error(wc))
writecache_free_entry(wc, e);
 
@@ -1654,7 +1652,7 @@ static void __writecache_writeback_pmem(struct 
dm_writecache *wc, struct writeba
while (wbl->size) {
wbl->size--;
e = container_of(wbl->list.prev, struct wc_entry, lru);
-   list_del(>lru);
+   list_del_init(>lru);
 
max_pages = e->wc_list_contiguous;
 
@@ -1685,7 +1683,7 @@ static void __writecache_writeback_pmem(struct 
dm_writecache *wc, struct writeba
if (!wc_add_block(wb, f, GFP_NOWAIT | __GFP_NOWARN))
break;
wbl->size--;
-   list_del(>lru);
+   list_del_init(>lru);
wb->wc_list[wb->wc_list_n++] = f;
e = f;
}
@@ -1712,7 +1710,7 @@ static void __writecache_writeback_ssd(struct 
dm_writecache *wc, struct writebac
 
wbl->size--;
e = container_of(wbl->list.prev, struct wc_entry, lru);
-   list_del(>lru);
+   list_del_init(>lru);
 
n_sectors = e->wc_list_contiguous << (wc->block_size_bits - 
SECTOR_SHIFT);
 
@@ -1732,7 +1730,7 @@ static void __writecache_writeback_ssd(struct 
dm_writecache *wc, struct writebac
wbl->size--;
f = container_of(wbl->list.prev, struct wc_entry, lru);
BUG_ON(f != e + 1);
-   list_del(>lru);
+   list_del_init(>lru);
e = f;
}
 
-- 
1.8.3.1



[PATCH] dm-writecache: fix typo when alloc workqueue for writeback_wq

2019-02-20 Thread Huaisheng Ye
From: Huaisheng Ye 

The workqueue's name should be "writecache-writeback" instead of 
"writecache-writeabck".

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2b8cee3..f782287 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1859,7 +1859,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned 
argc, char **argv)
goto bad;
}
 
-   wc->writeback_wq = alloc_workqueue("writecache-writeabck", 
WQ_MEM_RECLAIM, 1);
+   wc->writeback_wq = alloc_workqueue("writecache-writeback", 
WQ_MEM_RECLAIM, 1);
if (!wc->writeback_wq) {
r = -ENOMEM;
ti->error = "Could not allocate writeback workqueue";
-- 
1.8.3.1




[PATCH v3 5/5] dm-writecache: output seq_count within status

2019-01-30 Thread Huaisheng Ye
From: Huaisheng Ye 

seq_count is important for flush operations, output it within status
for debugging and analyzing code behavior.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2c1e825..3396710 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -2231,9 +2231,9 @@ static void writecache_status(struct dm_target *ti, 
status_type_t type,
 
switch (type) {
case STATUSTYPE_INFO:
-   DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc),
+   DMEMIT("%ld %llu %llu %llu %llu", writecache_has_error(wc),
   (unsigned long long)wc->n_blocks, (unsigned long 
long)wc->freelist_size,
-  (unsigned long long)wc->writeback_size);
+  (unsigned long long)wc->writeback_size, wc->seq_count);
break;
case STATUSTYPE_TABLE:
DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's',
-- 
1.8.3.1




[PATCH v3 4/5] Documentation/device-mapper: add optional parameter reinit

2019-01-30 Thread Huaisheng Ye
From: Huaisheng Ye 

Add intro and usage guide for reinit.

Signed-off-by: Huaisheng Ye 
---
 Documentation/device-mapper/writecache.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/device-mapper/writecache.txt 
b/Documentation/device-mapper/writecache.txt
index 01532b3..255c68c 100644
--- a/Documentation/device-mapper/writecache.txt
+++ b/Documentation/device-mapper/writecache.txt
@@ -45,6 +45,10 @@ Constructor parameters:
afterwards
- some underlying devices perform better with fua, some
  with nofua. The user should test it
+   reinit  (by default off)
+   applicable only to persistent memory - use the REINIT flag
+   when the surper block has messy data, that would cause fn ctr
+   failed to work with invalid magic or version in the superblock
 
 Status:
 1. error indicator - 0 if there was no error, otherwise error number
-- 
1.8.3.1




[PATCH v3 3/5] dm-writecache: expand pmem_reinit for struct dm_writecache

2019-01-30 Thread Huaisheng Ye
From: Huaisheng Ye 

When use persistent memory as cache data device, sometimes
the super block of pmem has messy data stored in it. That would
have risk to lead fn ctr failed to work due to invalid magic or
version.

Here we expand pmem_reinit to optional parameters in order to solve
this issue. When user gets pmem device, which has unrelated data,
as cache device, he should use paramenter 'reinit' to avoid s.magic
and s.version don't equal to NULL or correct
MEMORY_SUPERBLOCK_MAGIC/VERSION.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index c69317c..2c1e825 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -149,6 +149,7 @@ struct dm_writecache {
 
bool pmem_mode:1;
bool writeback_fua:1;
+   bool pmem_reinit:1;
 
bool overwrote_committed:1;
bool memory_vmapped:1;
@@ -2026,6 +2027,10 @@ static int writecache_ctr(struct dm_target *ti, unsigned 
argc, char **argv)
wc->writeback_fua = false;
wc->writeback_fua_set = true;
} else goto invalid_optional;
+   } else if (!strcasecmp(string, "reinit")) {
+   if (WC_MODE_PMEM(wc))
+   wc->pmem_reinit = true;
+   else goto invalid_optional;
} else {
 invalid_optional:
r = -EINVAL;
@@ -2127,7 +2132,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned 
argc, char **argv)
ti->error = "Hardware memory error when reading superblock";
goto bad;
}
-   if (!le32_to_cpu(s.magic) && !le32_to_cpu(s.version)) {
+   if (wc->pmem_reinit || (!le32_to_cpu(s.magic) && 
!le32_to_cpu(s.version))) {
r = init_memory(wc);
if (r) {
ti->error = "Unable to initialize device";
-- 
1.8.3.1




[PATCH v3 2/5] dm-writecache: get rid of memory_data flush to writecache_flush_entry

2019-01-30 Thread Huaisheng Ye
From: Huaisheng Ye 

writecache_flush_region only works when SSD mode.
If wc->pmem_mode sets, writecache_flush_region doesn't need to be called in
writecache_flush_entry.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2d8e0c0..c69317c 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -688,8 +688,6 @@ static void writecache_poison_lists(struct dm_writecache 
*wc)
 static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry 
*e)
 {
writecache_flush_region(wc, memory_entry(wc, e));
-   if (WC_MODE_PMEM(wc))
-   writecache_flush_region(wc, memory_data(wc, e));
 }
 
 static bool writecache_entry_is_committed(struct dm_writecache *wc, struct 
wc_entry *e)
-- 
1.8.3.1




[PATCH v3 0/5] Optimize writecache when using pmem as cache

2019-01-30 Thread Huaisheng Ye
From: Huaisheng Ye 

This patch set could be used for dm-writecache when use persistent
memory as cache data device.

Patch 1 and 2 go towards removing unused parameter and codes which
actually doesn't really work.

Patch 3 and 4 are targeted at solving problem fn ctr failed to work
due to invalid magic or version, which is caused by the super block
of pmem has messy data stored.

Patch 5 is used for getting the status of seq_count.

Changes Since v2: 
- seq_count is important for flush operations, output it within status
  for debugging and analyzing code behavior.
[1]: https://lkml.org/lkml/2019/1/3/43
[2]: https://lkml.org/lkml/2019/1/9/6

Huaisheng Ye (5):
  dm-writecache: remove unused size to writecache_flush_region
  dm-writecache: get rid of memory_data flush to writecache_flush_entry
  dm-writecache: expand pmem_reinit for struct dm_writecache
  Documentation/device-mapper: add optional parameter reinit
  dm-writecache: output seq_count within status

 Documentation/device-mapper/writecache.txt |  4 
 drivers/md/dm-writecache.c | 23 +--
 2 files changed, 17 insertions(+), 10 deletions(-)

-- 
1.8.3.1




[PATCH v3 1/5] dm-writecache: remove unused size to writecache_flush_region

2019-01-30 Thread Huaisheng Ye
From: Huaisheng Ye 

writecache_flush_region doesn't use size to calculate flush region.
That uses _set_bits to mark the region in dirty_bitmap directly.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2d50eec..2d8e0c0 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -418,7 +418,7 @@ static void writecache_flush_all_metadata(struct 
dm_writecache *wc)
memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size);
 }
 
-static void writecache_flush_region(struct dm_writecache *wc, void *ptr, 
size_t size)
+static void writecache_flush_region(struct dm_writecache *wc, void *ptr)
 {
if (!WC_MODE_PMEM(wc))
__set_bit(((char *)ptr - (char *)wc->memory_map) / 
BITMAP_GRANULARITY,
@@ -657,7 +657,7 @@ static void writecache_free_entry(struct dm_writecache *wc, 
struct wc_entry *e)
writecache_unlink(wc, e);
writecache_add_to_freelist(wc, e);
clear_seq_count(wc, e);
-   writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct 
wc_memory_entry));
+   writecache_flush_region(wc, memory_entry(wc, e));
if (unlikely(waitqueue_active(>freelist_wait)))
wake_up(>freelist_wait);
 }
@@ -687,9 +687,9 @@ static void writecache_poison_lists(struct dm_writecache 
*wc)
 
 static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry 
*e)
 {
-   writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct 
wc_memory_entry));
+   writecache_flush_region(wc, memory_entry(wc, e));
if (WC_MODE_PMEM(wc))
-   writecache_flush_region(wc, memory_data(wc, e), wc->block_size);
+   writecache_flush_region(wc, memory_data(wc, e));
 }
 
 static bool writecache_entry_is_committed(struct dm_writecache *wc, struct 
wc_entry *e)
@@ -733,7 +733,7 @@ static void writecache_flush(struct dm_writecache *wc)
 
wc->seq_count++;
pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
-   writecache_flush_region(wc, (wc)->seq_count, sizeof 
sb(wc)->seq_count);
+   writecache_flush_region(wc, (wc)->seq_count);
writecache_commit_flushed(wc);
 
wc->overwrote_committed = false;
@@ -1757,7 +1757,7 @@ static int init_memory(struct dm_writecache *wc)
writecache_flush_all_metadata(wc);
writecache_commit_flushed(wc);
pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
-   writecache_flush_region(wc, (wc)->magic, sizeof sb(wc)->magic);
+   writecache_flush_region(wc, (wc)->magic);
writecache_commit_flushed(wc);
 
return 0;
-- 
1.8.3.1




[RFC PATCH v2 4/4] Documentation/device-mapper: add optional parameter reinit

2019-01-08 Thread Huaisheng Ye
From: Huaisheng Ye 

Add intro and usage guide for reinit.

Signed-off-by: Huaisheng Ye 
---
 Documentation/device-mapper/writecache.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/device-mapper/writecache.txt 
b/Documentation/device-mapper/writecache.txt
index 01532b3..255c68c 100644
--- a/Documentation/device-mapper/writecache.txt
+++ b/Documentation/device-mapper/writecache.txt
@@ -45,6 +45,10 @@ Constructor parameters:
afterwards
- some underlying devices perform better with fua, some
  with nofua. The user should test it
+   reinit  (by default off)
+   applicable only to persistent memory - use the REINIT flag
+   when the surper block has messy data, that would cause fn ctr
+   failed to work with invalid magic or version in the superblock
 
 Status:
 1. error indicator - 0 if there was no error, otherwise error number
-- 
1.8.3.1




[RFC PATCH v2 1/4] dm-writecache: remove unused size to writecache_flush_region

2019-01-08 Thread Huaisheng Ye
From: Huaisheng Ye 

writecache_flush_region doesn't use size to calculate flush region.
That uses _set_bits to mark the region in dirty_bitmap directly.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2d50eec..2d8e0c0 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -418,7 +418,7 @@ static void writecache_flush_all_metadata(struct 
dm_writecache *wc)
memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size);
 }
 
-static void writecache_flush_region(struct dm_writecache *wc, void *ptr, 
size_t size)
+static void writecache_flush_region(struct dm_writecache *wc, void *ptr)
 {
if (!WC_MODE_PMEM(wc))
__set_bit(((char *)ptr - (char *)wc->memory_map) / 
BITMAP_GRANULARITY,
@@ -657,7 +657,7 @@ static void writecache_free_entry(struct dm_writecache *wc, 
struct wc_entry *e)
writecache_unlink(wc, e);
writecache_add_to_freelist(wc, e);
clear_seq_count(wc, e);
-   writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct 
wc_memory_entry));
+   writecache_flush_region(wc, memory_entry(wc, e));
if (unlikely(waitqueue_active(>freelist_wait)))
wake_up(>freelist_wait);
 }
@@ -687,9 +687,9 @@ static void writecache_poison_lists(struct dm_writecache 
*wc)
 
 static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry 
*e)
 {
-   writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct 
wc_memory_entry));
+   writecache_flush_region(wc, memory_entry(wc, e));
if (WC_MODE_PMEM(wc))
-   writecache_flush_region(wc, memory_data(wc, e), wc->block_size);
+   writecache_flush_region(wc, memory_data(wc, e));
 }
 
 static bool writecache_entry_is_committed(struct dm_writecache *wc, struct 
wc_entry *e)
@@ -733,7 +733,7 @@ static void writecache_flush(struct dm_writecache *wc)
 
wc->seq_count++;
pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
-   writecache_flush_region(wc, (wc)->seq_count, sizeof 
sb(wc)->seq_count);
+   writecache_flush_region(wc, (wc)->seq_count);
writecache_commit_flushed(wc);
 
wc->overwrote_committed = false;
@@ -1757,7 +1757,7 @@ static int init_memory(struct dm_writecache *wc)
writecache_flush_all_metadata(wc);
writecache_commit_flushed(wc);
pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
-   writecache_flush_region(wc, (wc)->magic, sizeof sb(wc)->magic);
+   writecache_flush_region(wc, (wc)->magic);
writecache_commit_flushed(wc);
 
return 0;
-- 
1.8.3.1




[RFC PATCH v2 2/4] dm-writecache: get rid of memory_data flush to writecache_flush_entry

2019-01-08 Thread Huaisheng Ye
From: Huaisheng Ye 

writecache_flush_region only works when SSD mode.
If wc->pmem_mode sets, writecache_flush_region doesn't need to be called in
writecache_flush_entry.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2d8e0c0..c69317c 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -688,8 +688,6 @@ static void writecache_poison_lists(struct dm_writecache 
*wc)
 static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry 
*e)
 {
writecache_flush_region(wc, memory_entry(wc, e));
-   if (WC_MODE_PMEM(wc))
-   writecache_flush_region(wc, memory_data(wc, e));
 }
 
 static bool writecache_entry_is_committed(struct dm_writecache *wc, struct 
wc_entry *e)
-- 
1.8.3.1




[RFC PATCH v2 3/4] dm-writecache: expand pmem_reinit for struct dm_writecache

2019-01-08 Thread Huaisheng Ye
From: Huaisheng Ye 

When use persistent memory as cache data device, sometimes
the super block of pmem has messy data stored in it. That would
have risk to lead fn ctr failed to work due to invalid magic or
version.

Here we expand pmem_reinit to optional parameters in order to solve
this issue. When user gets pmem device, which has unrelated data in it,
as cache device, he should use paramenter 'reinit' to avoid s.magic
and s.version don't equal to NULL or correct
MEMORY_SUPERBLOCK_MAGIC/VERSION.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index c69317c..2c1e825 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -149,6 +149,7 @@ struct dm_writecache {
 
bool pmem_mode:1;
bool writeback_fua:1;
+   bool pmem_reinit:1;
 
bool overwrote_committed:1;
bool memory_vmapped:1;
@@ -2026,6 +2027,10 @@ static int writecache_ctr(struct dm_target *ti, unsigned 
argc, char **argv)
wc->writeback_fua = false;
wc->writeback_fua_set = true;
} else goto invalid_optional;
+   } else if (!strcasecmp(string, "reinit")) {
+   if (WC_MODE_PMEM(wc))
+   wc->pmem_reinit = true;
+   else goto invalid_optional;
} else {
 invalid_optional:
r = -EINVAL;
@@ -2127,7 +2132,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned 
argc, char **argv)
ti->error = "Hardware memory error when reading superblock";
goto bad;
}
-   if (!le32_to_cpu(s.magic) && !le32_to_cpu(s.version)) {
+   if (wc->pmem_reinit || (!le32_to_cpu(s.magic) && 
!le32_to_cpu(s.version))) {
r = init_memory(wc);
if (r) {
ti->error = "Unable to initialize device";
-- 
1.8.3.1




[RFC PATCH v2 0/4] add parameter for pmem cache device init

2019-01-08 Thread Huaisheng Ye
From: Huaisheng Ye 

This patch set could be used for dm-writecache when use persistent
memory as cache data device.

Patch 1 and 2 go towards removing unused parameter and codes which
actually doesn't really work.

Patch 3 and 4 are targeted at solving problem fn ctr failed to work
due to invalid magic or version, which is caused by the super block
of pmem has messy data stored.

Changes Since v1:
- add optional parameter reinit to avoid invalid magic or
  version.
[1]: https://lkml.org/lkml/2019/1/3/43

Huaisheng Ye (4):
  dm-writecache: remove unused size to writecache_flush_region
  dm-writecache: get rid of memory_data flush to writecache_flush_entry
  dm-writecache: expand pmem_reinit for struct dm_writecache
  Documentation/device-mapper: add optional parameter reinit

 Documentation/device-mapper/writecache.txt |  4 
 drivers/md/dm-writecache.c | 19 +++
 2 files changed, 15 insertions(+), 8 deletions(-)

-- 
1.8.3.1




[PATCH] dma-mapping: remove unused attrs parameter to dma_common_get_sgtable

2019-01-03 Thread Huaisheng Ye
From: Huaisheng Ye 

dma_common_get_sgtable has parameter attrs which is not used at all.
Remove it.

Signed-off-by: Huaisheng Ye 
Acked-by: Stefano Stabellini 
---
 drivers/xen/swiotlb-xen.c   | 2 +-
 include/linux/dma-mapping.h | 2 +-
 kernel/dma/mapping.c| 6 ++
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 989cf87..b8cb1da 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -677,7 +677,7 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, 
dma_addr_t dev_addr,
   handle, size, attrs);
}
 #endif
-   return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs);
+   return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size);
 }
 
 const struct dma_map_ops xen_swiotlb_dma_ops = {
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ba521d5..f9e7c2a 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -523,7 +523,7 @@ int dma_mmap_attrs(struct device *dev, struct 
vm_area_struct *vma,
 
 int
 dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void 
*cpu_addr,
-   dma_addr_t dma_addr, size_t size, unsigned long attrs);
+   dma_addr_t dma_addr, size_t size);
 
 int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index d7c34d2..0c28b71 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -203,8 +203,7 @@ void dmam_release_declared_memory(struct device *dev)
  * Create scatter-list for the already allocated DMA buffer.
  */
 int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
-void *cpu_addr, dma_addr_t dma_addr, size_t size,
-unsigned long attrs)
+void *cpu_addr, dma_addr_t dma_addr, size_t size)
 {
struct page *page;
int ret;
@@ -234,8 +233,7 @@ int dma_get_sgtable_attrs(struct device *dev, struct 
sg_table *sgt,
if (!dma_is_direct(ops) && ops->get_sgtable)
return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
attrs);
-   return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
-   attrs);
+   return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size);
 }
 EXPORT_SYMBOL(dma_get_sgtable_attrs);
 
-- 
1.8.3.1




[PATCH 2/2] dm-writecache: get rid of memory_data flush to writecache_flush_entry

2019-01-03 Thread Huaisheng Ye
From: Huaisheng Ye 

writecache_flush_region only works when SSD mode.
If wc->pmem_mode sets, writecache_flush_region doesn't need to be called in
writecache_flush_entry.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2d8e0c0..c69317c 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -688,8 +688,6 @@ static void writecache_poison_lists(struct dm_writecache 
*wc)
 static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry 
*e)
 {
writecache_flush_region(wc, memory_entry(wc, e));
-   if (WC_MODE_PMEM(wc))
-   writecache_flush_region(wc, memory_data(wc, e));
 }
 
 static bool writecache_entry_is_committed(struct dm_writecache *wc, struct 
wc_entry *e)
-- 
1.8.3.1




[PATCH 1/2] dm-writecache: remove unused size to writecache_flush_region

2019-01-03 Thread Huaisheng Ye
From: Huaisheng Ye 

writecache_flush_region doesn't use size to calculate flush region.
That uses _set_bits to mark the region in dirty_bitmap directly.

Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2d50eec..2d8e0c0 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -418,7 +418,7 @@ static void writecache_flush_all_metadata(struct 
dm_writecache *wc)
memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size);
 }
 
-static void writecache_flush_region(struct dm_writecache *wc, void *ptr, 
size_t size)
+static void writecache_flush_region(struct dm_writecache *wc, void *ptr)
 {
if (!WC_MODE_PMEM(wc))
__set_bit(((char *)ptr - (char *)wc->memory_map) / 
BITMAP_GRANULARITY,
@@ -657,7 +657,7 @@ static void writecache_free_entry(struct dm_writecache *wc, 
struct wc_entry *e)
writecache_unlink(wc, e);
writecache_add_to_freelist(wc, e);
clear_seq_count(wc, e);
-   writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct 
wc_memory_entry));
+   writecache_flush_region(wc, memory_entry(wc, e));
if (unlikely(waitqueue_active(>freelist_wait)))
wake_up(>freelist_wait);
 }
@@ -687,9 +687,9 @@ static void writecache_poison_lists(struct dm_writecache 
*wc)
 
 static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry 
*e)
 {
-   writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct 
wc_memory_entry));
+   writecache_flush_region(wc, memory_entry(wc, e));
if (WC_MODE_PMEM(wc))
-   writecache_flush_region(wc, memory_data(wc, e), wc->block_size);
+   writecache_flush_region(wc, memory_data(wc, e));
 }
 
 static bool writecache_entry_is_committed(struct dm_writecache *wc, struct 
wc_entry *e)
@@ -733,7 +733,7 @@ static void writecache_flush(struct dm_writecache *wc)
 
wc->seq_count++;
pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
-   writecache_flush_region(wc, (wc)->seq_count, sizeof 
sb(wc)->seq_count);
+   writecache_flush_region(wc, (wc)->seq_count);
writecache_commit_flushed(wc);
 
wc->overwrote_committed = false;
@@ -1757,7 +1757,7 @@ static int init_memory(struct dm_writecache *wc)
writecache_flush_all_metadata(wc);
writecache_commit_flushed(wc);
pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
-   writecache_flush_region(wc, (wc)->magic, sizeof sb(wc)->magic);
+   writecache_flush_region(wc, (wc)->magic);
writecache_commit_flushed(wc);
 
return 0;
-- 
1.8.3.1




[PATCH] dma-mapping: remove unused attrs parameter to dma_common_get_sgtable

2019-01-03 Thread Huaisheng Ye
From: Huaisheng Ye 

dma_common_get_sgtable has parameter attrs which is not used at all.
Remove it.

Signed-off-by: Huaisheng Ye 
---
 drivers/xen/swiotlb-xen.c   | 2 +-
 include/linux/dma-mapping.h | 5 ++---
 kernel/dma/mapping.c| 3 +--
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 2a7f545..2dc17a5 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -697,7 +697,7 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, 
dma_addr_t dev_addr,
   handle, size, attrs);
}
 #endif
-   return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs);
+   return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size);
 }
 
 static int xen_swiotlb_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index d327bdd..bbfad44 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -483,7 +483,7 @@ void *dma_common_pages_remap(struct page **pages, size_t 
size,
 
 int
 dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void 
*cpu_addr,
-   dma_addr_t dma_addr, size_t size, unsigned long attrs);
+   dma_addr_t dma_addr, size_t size);
 
 static inline int
 dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr,
@@ -495,8 +495,7 @@ void *dma_common_pages_remap(struct page **pages, size_t 
size,
if (ops->get_sgtable)
return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
attrs);
-   return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
-   attrs);
+   return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size);
 }
 
 #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 58dec7a..6b33f10 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -202,8 +202,7 @@ void dmam_release_declared_memory(struct device *dev)
  * Create scatter-list for the already allocated DMA buffer.
  */
 int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
-void *cpu_addr, dma_addr_t dma_addr, size_t size,
-unsigned long attrs)
+void *cpu_addr, dma_addr_t dma_addr, size_t size)
 {
struct page *page;
int ret;
-- 
1.8.3.1




[PATCH] drm/amd/display: fix compiler warnings about wm variable

2018-12-13 Thread Huaisheng Ye
From: Huaisheng Ye 

There are compiler warnings within functions 'dcn10_log_hubbub_state’
and 'dcn10_get_hubbub_state’. This patch avoids the compiler reports
the following warning when building amdgpu.ko.

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer.c: In 
function ‘dcn10_log_hubbub_state’:
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer.c:94:9: 
warning: missing braces around initializer [-Wmissing-braces]
  struct dcn_hubbub_wm wm = {0};
 ^
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer.c:94:9: 
warning: (near initialization for ‘wm.sets’) [-Wmissing-braces]

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer_debug.c: In 
function ‘dcn10_get_hubbub_state’:
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer_debug.c:74:9: 
warning: missing braces around initializer [-Wmissing-braces]
  struct dcn_hubbub_wm wm = {0};
 ^
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer_debug.c:74:9: 
warning: (near initialization for ‘wm.sets’) [-Wmissing-braces]

Signed-off-by: Huaisheng Ye 
---
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c   | 3 ++-
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 193184a..e96933a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -91,9 +91,10 @@ static void log_mpc_crc(struct dc *dc,
 void dcn10_log_hubbub_state(struct dc *dc, struct dc_log_buffer_ctx *log_ctx)
 {
struct dc_context *dc_ctx = dc->ctx;
-   struct dcn_hubbub_wm wm = {0};
+   struct dcn_hubbub_wm wm;
int i;
 
+   memset(, 0, sizeof(struct dcn_hubbub_wm));
hubbub1_wm_read_state(dc->res_pool->hubbub, );
 
DTN_INFO("HUBBUB WM:  data_urgent  pte_meta_urgent"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index 6415890..f5610ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -71,7 +71,7 @@ static unsigned int snprintf_count(char *pBuf, unsigned int 
bufSize, char *fmt,
 static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned 
int bufSize)
 {
struct dc_context *dc_ctx = dc->ctx;
-   struct dcn_hubbub_wm wm = {0};
+   struct dcn_hubbub_wm wm;
int i;
 
unsigned int chars_printed = 0;
@@ -80,6 +80,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, 
char *pBuf, unsigned i
const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 
1000;
static const unsigned int frac = 1000;
 
+   memset(, 0, sizeof(struct dcn_hubbub_wm));
hubbub1_wm_read_state(dc->res_pool->hubbub, );
 
chars_printed = snprintf_count(pBuf, remaining_buffer, 
"wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_chanage\n");
-- 
1.8.3.1





[PATCH v2 6/6] filesystem-dax: Do not request kaddr and pfn when not required

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

Some functions within fs/dax don't need to get local pointer kaddr
or variable pfn from direct_access. Using NULL instead of having to
pass in useless pointer or variable that caller then just throw away.

Signed-off-by: Huaisheng Ye 
---
 fs/dax.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 6411928..959a533 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -647,7 +647,6 @@ static int copy_user_dax(struct block_device *bdev, struct 
dax_device *dax_dev,
 {
void *vto, *kaddr;
pgoff_t pgoff;
-   pfn_t pfn;
long rc;
int id;
 
@@ -656,7 +655,7 @@ static int copy_user_dax(struct block_device *bdev, struct 
dax_device *dax_dev,
return rc;
 
id = dax_read_lock();
-   rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), , );
+   rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), , NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
@@ -967,7 +966,6 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, 
size_t size,
 {
const sector_t sector = dax_iomap_sector(iomap, pos);
pgoff_t pgoff;
-   void *kaddr;
int id, rc;
long length;
 
@@ -976,7 +974,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, 
size_t size,
return rc;
id = dax_read_lock();
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
-  , pfnp);
+  NULL, pfnp);
if (length < 0) {
rc = length;
goto out;
@@ -1052,15 +1050,13 @@ int __dax_zero_page_range(struct block_device *bdev,
pgoff_t pgoff;
long rc, id;
void *kaddr;
-   pfn_t pfn;
 
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, );
if (rc)
return rc;
 
id = dax_read_lock();
-   rc = dax_direct_access(dax_dev, pgoff, 1, ,
-   );
+   rc = dax_direct_access(dax_dev, pgoff, 1, , NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
@@ -1116,7 +1112,6 @@ int __dax_zero_page_range(struct block_device *bdev,
ssize_t map_len;
pgoff_t pgoff;
void *kaddr;
-   pfn_t pfn;
 
if (fatal_signal_pending(current)) {
ret = -EINTR;
@@ -1128,7 +1123,7 @@ int __dax_zero_page_range(struct block_device *bdev,
break;
 
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
-   , );
+   , NULL);
if (map_len < 0) {
ret = map_len;
break;
-- 
1.8.3.1




[PATCH v2 6/6] filesystem-dax: Do not request kaddr and pfn when not required

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

Some functions within fs/dax don't need to get local pointer kaddr
or variable pfn from direct_access. Using NULL instead of having to
pass in useless pointer or variable that caller then just throw away.

Signed-off-by: Huaisheng Ye 
---
 fs/dax.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 6411928..959a533 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -647,7 +647,6 @@ static int copy_user_dax(struct block_device *bdev, struct 
dax_device *dax_dev,
 {
void *vto, *kaddr;
pgoff_t pgoff;
-   pfn_t pfn;
long rc;
int id;
 
@@ -656,7 +655,7 @@ static int copy_user_dax(struct block_device *bdev, struct 
dax_device *dax_dev,
return rc;
 
id = dax_read_lock();
-   rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), , );
+   rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), , NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
@@ -967,7 +966,6 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, 
size_t size,
 {
const sector_t sector = dax_iomap_sector(iomap, pos);
pgoff_t pgoff;
-   void *kaddr;
int id, rc;
long length;
 
@@ -976,7 +974,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, 
size_t size,
return rc;
id = dax_read_lock();
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
-  , pfnp);
+  NULL, pfnp);
if (length < 0) {
rc = length;
goto out;
@@ -1052,15 +1050,13 @@ int __dax_zero_page_range(struct block_device *bdev,
pgoff_t pgoff;
long rc, id;
void *kaddr;
-   pfn_t pfn;
 
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, );
if (rc)
return rc;
 
id = dax_read_lock();
-   rc = dax_direct_access(dax_dev, pgoff, 1, ,
-   );
+   rc = dax_direct_access(dax_dev, pgoff, 1, , NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
@@ -1116,7 +1112,6 @@ int __dax_zero_page_range(struct block_device *bdev,
ssize_t map_len;
pgoff_t pgoff;
void *kaddr;
-   pfn_t pfn;
 
if (fatal_signal_pending(current)) {
ret = -EINTR;
@@ -1128,7 +1123,7 @@ int __dax_zero_page_range(struct block_device *bdev,
break;
 
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
-   , );
+   , NULL);
if (map_len < 0) {
ret = map_len;
break;
-- 
1.8.3.1




[PATCH v2 5/6] md/dm-writecache: Don't request pointer dummy_addr when not required

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

Function persistent_memory_claim doesn't need to get local pointer
dummy_addr from direct_access. Using NULL instead of having to pass
in a useless local pointer that caller then just throw away.

Suggested-by: Ross Zwisler 
Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 87107c9..9d79084 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -268,9 +268,8 @@ static int persistent_memory_claim(struct dm_writecache *wc)
i = 0;
do {
long daa;
-   void *dummy_addr;
daa = dax_direct_access(wc->ssd_dev->dax_dev, i, p - i,
-   _addr, );
+   NULL, );
if (daa <= 0) {
r = daa ? daa : -EINVAL;
goto err3;
-- 
1.8.3.1




[PATCH v2 5/6] md/dm-writecache: Don't request pointer dummy_addr when not required

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

Function persistent_memory_claim doesn't need to get local pointer
dummy_addr from direct_access. Using NULL instead of having to pass
in a useless local pointer that caller then just throw away.

Suggested-by: Ross Zwisler 
Signed-off-by: Huaisheng Ye 
---
 drivers/md/dm-writecache.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 87107c9..9d79084 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -268,9 +268,8 @@ static int persistent_memory_claim(struct dm_writecache *wc)
i = 0;
do {
long daa;
-   void *dummy_addr;
daa = dax_direct_access(wc->ssd_dev->dax_dev, i, p - i,
-   _addr, );
+   NULL, );
if (daa <= 0) {
r = daa ? daa : -EINVAL;
goto err3;
-- 
1.8.3.1




[PATCH v2 3/6] tools/testing/nvdimm: kaddr and pfn can be NULL to ->direct_access()

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

The mock / test version of pmem_direct_access() needs to check the
validity of pointers kaddr and pfn for NULL assignment. If anyone
equals to NULL, it doesn't need to calculate the value.

If pointer equals to NULL, that is to say callers may have no need for
kaddr or pfn, so this patch is prepared for allowing them to pass in
NULL instead of having to pass in a local pointer or variable that
they then just throw away.

Suggested-by: Dan Williams 
Signed-off-by: Huaisheng Ye 
---
 tools/testing/nvdimm/pmem-dax.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
index b53596a..2e7fd82 100644
--- a/tools/testing/nvdimm/pmem-dax.c
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -31,17 +31,21 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t 
pgoff,
if (get_nfit_res(pmem->phys_addr + offset)) {
struct page *page;
 
-   *kaddr = pmem->virt_addr + offset;
+   if (kaddr)
+   *kaddr = pmem->virt_addr + offset;
page = vmalloc_to_page(pmem->virt_addr + offset);
-   *pfn = page_to_pfn_t(page);
+   if (pfn)
+   *pfn = page_to_pfn_t(page);
pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
__func__, pmem, pgoff, page_to_pfn(page));
 
return 1;
}
 
-   *kaddr = pmem->virt_addr + offset;
-   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+   if (kaddr)
+   *kaddr = pmem->virt_addr + offset;
+   if (pfn)
+   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
 
/*
 * If badblocks are present, limit known good range to the
-- 
1.8.3.1




[PATCH v2 3/6] tools/testing/nvdimm: kaddr and pfn can be NULL to ->direct_access()

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

The mock / test version of pmem_direct_access() needs to check the
validity of pointers kaddr and pfn for NULL assignment. If anyone
equals to NULL, it doesn't need to calculate the value.

If pointer equals to NULL, that is to say callers may have no need for
kaddr or pfn, so this patch is prepared for allowing them to pass in
NULL instead of having to pass in a local pointer or variable that
they then just throw away.

Suggested-by: Dan Williams 
Signed-off-by: Huaisheng Ye 
---
 tools/testing/nvdimm/pmem-dax.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
index b53596a..2e7fd82 100644
--- a/tools/testing/nvdimm/pmem-dax.c
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -31,17 +31,21 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t 
pgoff,
if (get_nfit_res(pmem->phys_addr + offset)) {
struct page *page;
 
-   *kaddr = pmem->virt_addr + offset;
+   if (kaddr)
+   *kaddr = pmem->virt_addr + offset;
page = vmalloc_to_page(pmem->virt_addr + offset);
-   *pfn = page_to_pfn_t(page);
+   if (pfn)
+   *pfn = page_to_pfn_t(page);
pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
__func__, pmem, pgoff, page_to_pfn(page));
 
return 1;
}
 
-   *kaddr = pmem->virt_addr + offset;
-   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+   if (kaddr)
+   *kaddr = pmem->virt_addr + offset;
+   if (pfn)
+   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
 
/*
 * If badblocks are present, limit known good range to the
-- 
1.8.3.1




[PATCH v2 4/6] dax/super: Do not request a pointer kaddr when not required

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

Function __bdev_dax_supported doesn't need to get local pointer kaddr
from direct_access. Using NULL instead of having to pass in a useless
local pointer that caller then just throw away.

Signed-off-by: Huaisheng Ye 
---
 drivers/dax/super.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 45276ab..6e928f3 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -89,7 +89,6 @@ bool __bdev_dax_supported(struct block_device *bdev, int 
blocksize)
struct request_queue *q;
pgoff_t pgoff;
int err, id;
-   void *kaddr;
pfn_t pfn;
long len;
char buf[BDEVNAME_SIZE];
@@ -122,7 +121,7 @@ bool __bdev_dax_supported(struct block_device *bdev, int 
blocksize)
}
 
id = dax_read_lock();
-   len = dax_direct_access(dax_dev, pgoff, 1, , );
+   len = dax_direct_access(dax_dev, pgoff, 1, NULL, );
dax_read_unlock(id);
 
put_dax(dax_dev);
-- 
1.8.3.1




[PATCH v2 4/6] dax/super: Do not request a pointer kaddr when not required

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

Function __bdev_dax_supported doesn't need to get local pointer kaddr
from direct_access. Using NULL instead of having to pass in a useless
local pointer that caller then just throw away.

Signed-off-by: Huaisheng Ye 
---
 drivers/dax/super.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 45276ab..6e928f3 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -89,7 +89,6 @@ bool __bdev_dax_supported(struct block_device *bdev, int 
blocksize)
struct request_queue *q;
pgoff_t pgoff;
int err, id;
-   void *kaddr;
pfn_t pfn;
long len;
char buf[BDEVNAME_SIZE];
@@ -122,7 +121,7 @@ bool __bdev_dax_supported(struct block_device *bdev, int 
blocksize)
}
 
id = dax_read_lock();
-   len = dax_direct_access(dax_dev, pgoff, 1, , );
+   len = dax_direct_access(dax_dev, pgoff, 1, NULL, );
dax_read_unlock(id);
 
put_dax(dax_dev);
-- 
1.8.3.1




[PATCH v2 2/6] s390, dcssblk: kaddr and pfn can be NULL to ->direct_access()

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

dcssblk_direct_access() needs to check the validity of pointers kaddr
and pfn for NULL assignment. If anyone equals to NULL, it doesn't need
to calculate the value.

If either of them is equal to NULL, that is to say callers may
have no need for kaddr or pfn, so this patch is prepared for allowing
them to pass in NULL instead of having to pass in a pointer or local
variable that they then just throw away.

Signed-off-by: Huaisheng Ye 
---
 drivers/s390/block/dcssblk.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index ed60728..23e526c 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -922,9 +922,11 @@ static DEVICE_ATTR(save, S_IWUSR | S_IRUSR, 
dcssblk_save_show,
unsigned long dev_sz;
 
dev_sz = dev_info->end - dev_info->start + 1;
-   *kaddr = (void *) dev_info->start + offset;
-   *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset),
-   PFN_DEV|PFN_SPECIAL);
+   if (kaddr)
+   *kaddr = (void *) dev_info->start + offset;
+   if (pfn)
+   *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset),
+   PFN_DEV|PFN_SPECIAL);
 
return (dev_sz - offset) / PAGE_SIZE;
 }
-- 
1.8.3.1




[PATCH v2 0/6] kaddr and pfn can be NULL to ->direct_access()

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

Changes since v1 [1]:
* Involve the previous patches for pfn can be NULL.
* Reword the patch descriptions according to Christian's comment.
* According to Ross's suggestion, replace local pointer dummy_addr
  with NULL within md/dm-writecache for direct_access.

[1]: https://lkml.org/lkml/2018/7/24/199

Some functions within fs/dax, dax/super and md/dm-writecache don't
need to get local pointer kaddr or variable pfn from direct_access.
Assigning NULL to kaddr or pfn to ->direct_access() is more
straightforward and simple than offering a useless local pointer or
variable.

So all ->direct_access() need to check the validity of pointer kaddr
and pfn for NULL assignment. If either of them is equal to NULL, that
is to say callers may have no need for kaddr or pfn, so this series of
patch are prepared for allowing them to pass in NULL instead of having
to pass in a local pointer or variable that they then just throw away.

Huaisheng Ye (6):
  libnvdimm, pmem: kaddr and pfn can be NULL to ->direct_access()
  s390, dcssblk: kaddr and pfn can be NULL to ->direct_access()
  tools/testing/nvdimm: kaddr and pfn can be NULL to ->direct_access()
  dax/super: Do not request a pointer kaddr when not required
  md/dm-writecache: Don't request pointer dummy_addr when not required
  filesystem-dax: Do not request kaddr and pfn when not required

 drivers/dax/super.c |  3 +--
 drivers/md/dm-writecache.c  |  3 +--
 drivers/nvdimm/pmem.c   |  7 +--
 drivers/s390/block/dcssblk.c|  8 +---
 fs/dax.c| 13 -
 tools/testing/nvdimm/pmem-dax.c | 12 
 6 files changed, 24 insertions(+), 22 deletions(-)

-- 
1.8.3.1




[PATCH v2 2/6] s390, dcssblk: kaddr and pfn can be NULL to ->direct_access()

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

dcssblk_direct_access() needs to check the validity of pointers kaddr
and pfn for NULL assignment. If anyone equals to NULL, it doesn't need
to calculate the value.

If either of them is equal to NULL, that is to say callers may
have no need for kaddr or pfn, so this patch is prepared for allowing
them to pass in NULL instead of having to pass in a pointer or local
variable that they then just throw away.

Signed-off-by: Huaisheng Ye 
---
 drivers/s390/block/dcssblk.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index ed60728..23e526c 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -922,9 +922,11 @@ static DEVICE_ATTR(save, S_IWUSR | S_IRUSR, 
dcssblk_save_show,
unsigned long dev_sz;
 
dev_sz = dev_info->end - dev_info->start + 1;
-   *kaddr = (void *) dev_info->start + offset;
-   *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset),
-   PFN_DEV|PFN_SPECIAL);
+   if (kaddr)
+   *kaddr = (void *) dev_info->start + offset;
+   if (pfn)
+   *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset),
+   PFN_DEV|PFN_SPECIAL);
 
return (dev_sz - offset) / PAGE_SIZE;
 }
-- 
1.8.3.1




[PATCH v2 0/6] kaddr and pfn can be NULL to ->direct_access()

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

Changes since v1 [1]:
* Involve the previous patches for pfn can be NULL.
* Reword the patch descriptions according to Christian's comment.
* According to Ross's suggestion, replace local pointer dummy_addr
  with NULL within md/dm-writecache for direct_access.

[1]: https://lkml.org/lkml/2018/7/24/199

Some functions within fs/dax, dax/super and md/dm-writecache don't
need to get local pointer kaddr or variable pfn from direct_access.
Assigning NULL to kaddr or pfn to ->direct_access() is more
straightforward and simple than offering a useless local pointer or
variable.

So all ->direct_access() need to check the validity of pointer kaddr
and pfn for NULL assignment. If either of them is equal to NULL, that
is to say callers may have no need for kaddr or pfn, so this series of
patch are prepared for allowing them to pass in NULL instead of having
to pass in a local pointer or variable that they then just throw away.

Huaisheng Ye (6):
  libnvdimm, pmem: kaddr and pfn can be NULL to ->direct_access()
  s390, dcssblk: kaddr and pfn can be NULL to ->direct_access()
  tools/testing/nvdimm: kaddr and pfn can be NULL to ->direct_access()
  dax/super: Do not request a pointer kaddr when not required
  md/dm-writecache: Don't request pointer dummy_addr when not required
  filesystem-dax: Do not request kaddr and pfn when not required

 drivers/dax/super.c |  3 +--
 drivers/md/dm-writecache.c  |  3 +--
 drivers/nvdimm/pmem.c   |  7 +--
 drivers/s390/block/dcssblk.c|  8 +---
 fs/dax.c| 13 -
 tools/testing/nvdimm/pmem-dax.c | 12 
 6 files changed, 24 insertions(+), 22 deletions(-)

-- 
1.8.3.1




[PATCH v2 1/6] libnvdimm, pmem: kaddr and pfn can be NULL to ->direct_access()

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

pmem_direct_access() needs to check the validity of pointers kaddr
and pfn for NULL assignment. If anyone equals to NULL, it doesn't need
to calculate the value.

If pointer equals to NULL, that is to say callers may have no need for
kaddr or pfn, so this patch is prepared for allowing them to pass in
NULL instead of having to pass in a pointer or local variable that
they then just throw away.

Signed-off-by: Huaisheng Ye 
---
 drivers/nvdimm/pmem.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8b1fd7f..ecf9024 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -227,8 +227,11 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, 
pgoff_t pgoff,
if (unlikely(is_bad_pmem(>bb, PFN_PHYS(pgoff) / 512,
PFN_PHYS(nr_pages
return -EIO;
-   *kaddr = pmem->virt_addr + offset;
-   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+   if (kaddr)
+   *kaddr = pmem->virt_addr + offset;
+   if (pfn)
+   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
 
/*
 * If badblocks are present, limit known good range to the
-- 
1.8.3.1




[PATCH v2 1/6] libnvdimm, pmem: kaddr and pfn can be NULL to ->direct_access()

2018-07-25 Thread Huaisheng Ye
From: Huaisheng Ye 

pmem_direct_access() needs to check the validity of pointers kaddr
and pfn for NULL assignment. If anyone equals to NULL, it doesn't need
to calculate the value.

If pointer equals to NULL, that is to say callers may have no need for
kaddr or pfn, so this patch is prepared for allowing them to pass in
NULL instead of having to pass in a pointer or local variable that
they then just throw away.

Signed-off-by: Huaisheng Ye 
---
 drivers/nvdimm/pmem.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8b1fd7f..ecf9024 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -227,8 +227,11 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, 
pgoff_t pgoff,
if (unlikely(is_bad_pmem(>bb, PFN_PHYS(pgoff) / 512,
PFN_PHYS(nr_pages
return -EIO;
-   *kaddr = pmem->virt_addr + offset;
-   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+   if (kaddr)
+   *kaddr = pmem->virt_addr + offset;
+   if (pfn)
+   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
 
/*
 * If badblocks are present, limit known good range to the
-- 
1.8.3.1




[PATCH 4/5] filesystem-dax: Do not request a pointer kaddr when not required

2018-07-24 Thread Huaisheng Ye
From: Huaisheng Ye 

Some functions within fs/dax don't need to get pointer kaddr from
direct_access. In support of allowing memmap initialization to run
in the background elide requests for pointer kaddr when not required.

Signed-off-by: Huaisheng Ye 
---
 fs/dax.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index aaec72de..abdb9e2 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -870,7 +870,6 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, 
size_t size,
 {
const sector_t sector = dax_iomap_sector(iomap, pos);
pgoff_t pgoff;
-   void *kaddr;
int id, rc;
long length;
 
@@ -879,7 +878,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, 
size_t size,
return rc;
id = dax_read_lock();
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
-  , pfnp);
+  NULL, pfnp);
if (length < 0) {
rc = length;
goto out;
-- 
1.8.3.1




[PATCH 4/5] filesystem-dax: Do not request a pointer kaddr when not required

2018-07-24 Thread Huaisheng Ye
From: Huaisheng Ye 

Some functions within fs/dax don't need to get pointer kaddr from
direct_access. In support of allowing memmap initialization to run
in the background elide requests for pointer kaddr when not required.

Signed-off-by: Huaisheng Ye 
---
 fs/dax.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index aaec72de..abdb9e2 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -870,7 +870,6 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, 
size_t size,
 {
const sector_t sector = dax_iomap_sector(iomap, pos);
pgoff_t pgoff;
-   void *kaddr;
int id, rc;
long length;
 
@@ -879,7 +878,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, 
size_t size,
return rc;
id = dax_read_lock();
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
-  , pfnp);
+  NULL, pfnp);
if (length < 0) {
rc = length;
goto out;
-- 
1.8.3.1




Re: [PATCH v2 0/4] Assigning NULL to gfn of dax_direct_access if useless

2018-07-04 Thread Huaisheng Ye





  On Thu, 05 Jul 2018 00:48:40 +0800 Dan Williams 
 wrote  
 > On Wed, Jul 4, 2018 at 9:38 AM, Huaisheng Ye  wrote: 
 > > From: Huaisheng Ye  
 > > 
 > > Changes since v1 [1]: 
 > > * Collect Jan's reviewed-by. 
 > > * According to Dan's suggestion, update the unit test infrastructure 
 > >   tools/testing/nvdimm/pmem-dax.c for checking the validity of gfn. 
 > > 
 > > [1]: https://lkml.org/lkml/2018/7/4/81 
 > > 
 > > --- 
 > > 
 > > Some functions within fs/dax don't need to get gfn from direct_access. 
 >  
 > Any reason you are calling it 'gfn'? I'm assuming that is a typo and 
 > it should be 'pfn'. 

Oh, sorry.
I made a mistake, it is a typo. I will correct it right now.

---
Cheers,
Huaisheng Ye



Re: [PATCH v2 0/4] Assigning NULL to gfn of dax_direct_access if useless

2018-07-04 Thread Huaisheng Ye





  On Thu, 05 Jul 2018 00:48:40 +0800 Dan Williams 
 wrote  
 > On Wed, Jul 4, 2018 at 9:38 AM, Huaisheng Ye  wrote: 
 > > From: Huaisheng Ye  
 > > 
 > > Changes since v1 [1]: 
 > > * Collect Jan's reviewed-by. 
 > > * According to Dan's suggestion, update the unit test infrastructure 
 > >   tools/testing/nvdimm/pmem-dax.c for checking the validity of gfn. 
 > > 
 > > [1]: https://lkml.org/lkml/2018/7/4/81 
 > > 
 > > --- 
 > > 
 > > Some functions within fs/dax don't need to get gfn from direct_access. 
 >  
 > Any reason you are calling it 'gfn'? I'm assuming that is a typo and 
 > it should be 'pfn'. 

Oh, sorry.
I made a mistake, it is a typo. I will correct it right now.

---
Cheers,
Huaisheng Ye



[PATCH v1] arch/x86/kernel/pci-dma: Remove useless parameter of arch_dma_alloc_attrs

2018-05-24 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

arch_dma_alloc_attrs has parameter gfp which is not used at all.
Remove it.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Marek Szyprowski <m.szyprow...@samsung.com>
Cc: Robin Murphy <robin.mur...@arm.com>
Cc: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: Tom Lendacky <thomas.lenda...@amd.com>
Cc: Kate Stewart <kstew...@linuxfoundation.org>
Cc: Randy Dunlap <rdun...@infradead.org>
Cc: Michal Hocko <mho...@suse.com>
---
 arch/x86/include/asm/dma-mapping.h | 2 +-
 arch/x86/kernel/pci-dma.c  | 2 +-
 include/linux/dma-mapping.h| 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h 
b/arch/x86/include/asm/dma-mapping.h
index 89ce4bf..ef59747 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -33,7 +33,7 @@ static inline const struct dma_map_ops 
*get_arch_dma_ops(struct bus_type *bus)
 int arch_dma_supported(struct device *dev, u64 mask);
 #define arch_dma_supported arch_dma_supported
 
-bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
+bool arch_dma_alloc_attrs(struct device **dev);
 #define arch_dma_alloc_attrs arch_dma_alloc_attrs
 
 #endif
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 77625b6..94d1a49 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -76,7 +76,7 @@ void __init pci_iommu_alloc(void)
}
 }
 
-bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
+bool arch_dma_alloc_attrs(struct device **dev)
 {
if (!*dev)
*dev = _dma_fallback_dev;
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f8ab1c0..c80bb09 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -502,7 +502,7 @@ void *dma_common_pages_remap(struct page **pages, size_t 
size,
 #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
 
 #ifndef arch_dma_alloc_attrs
-#define arch_dma_alloc_attrs(dev, flag)(true)
+#define arch_dma_alloc_attrs(dev)  (true)
 #endif
 
 static inline void *dma_alloc_attrs(struct device *dev, size_t size,
@@ -521,7 +521,7 @@ static inline void *dma_alloc_attrs(struct device *dev, 
size_t size,
/* let the implementation decide on the zone to allocate from: */
flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
 
-   if (!arch_dma_alloc_attrs(, ))
+   if (!arch_dma_alloc_attrs())
return NULL;
if (!ops->alloc)
return NULL;
-- 
1.8.3.1




[PATCH v1] arch/x86/kernel/pci-dma: Remove useless parameter of arch_dma_alloc_attrs

2018-05-24 Thread Huaisheng Ye
From: Huaisheng Ye 

arch_dma_alloc_attrs has parameter gfp which is not used at all.
Remove it.

Signed-off-by: Huaisheng Ye 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
Cc: Christoph Hellwig 
Cc: Marek Szyprowski 
Cc: Robin Murphy 
Cc: Konrad Rzeszutek Wilk 
Cc: Andrew Morton 
Cc: Greg Kroah-Hartman 
Cc: Tom Lendacky 
Cc: Kate Stewart 
Cc: Randy Dunlap 
Cc: Michal Hocko 
---
 arch/x86/include/asm/dma-mapping.h | 2 +-
 arch/x86/kernel/pci-dma.c  | 2 +-
 include/linux/dma-mapping.h| 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h 
b/arch/x86/include/asm/dma-mapping.h
index 89ce4bf..ef59747 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -33,7 +33,7 @@ static inline const struct dma_map_ops 
*get_arch_dma_ops(struct bus_type *bus)
 int arch_dma_supported(struct device *dev, u64 mask);
 #define arch_dma_supported arch_dma_supported
 
-bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
+bool arch_dma_alloc_attrs(struct device **dev);
 #define arch_dma_alloc_attrs arch_dma_alloc_attrs
 
 #endif
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 77625b6..94d1a49 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -76,7 +76,7 @@ void __init pci_iommu_alloc(void)
}
 }
 
-bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
+bool arch_dma_alloc_attrs(struct device **dev)
 {
if (!*dev)
*dev = _dma_fallback_dev;
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f8ab1c0..c80bb09 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -502,7 +502,7 @@ void *dma_common_pages_remap(struct page **pages, size_t 
size,
 #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
 
 #ifndef arch_dma_alloc_attrs
-#define arch_dma_alloc_attrs(dev, flag)(true)
+#define arch_dma_alloc_attrs(dev)  (true)
 #endif
 
 static inline void *dma_alloc_attrs(struct device *dev, size_t size,
@@ -521,7 +521,7 @@ static inline void *dma_alloc_attrs(struct device *dev, 
size_t size,
/* let the implementation decide on the zone to allocate from: */
flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
 
-   if (!arch_dma_alloc_attrs(, ))
+   if (!arch_dma_alloc_attrs())
return NULL;
if (!ops->alloc)
return NULL;
-- 
1.8.3.1




[RFC PATCH v3 6/9] mm/vmpressure: update usage of zone modifiers

2018-05-24 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, replace (__GFP_HIGHMEM | __GFP_MOVABLE) with
__GFP_ZONE_MOVABLE.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: zhongjiang <zhongji...@huawei.com>
Cc: Minchan Kim <minc...@kernel.org>
Cc: Dan Carpenter <dan.carpen...@oracle.com>
Cc: David Rientjes <rient...@google.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 mm/vmpressure.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 85350ce..30a40e2 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -256,7 +256,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool 
tree,
 * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so
 * we account it too.
 */
-   if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS)))
+   if (!(gfp & (__GFP_ZONE_MOVABLE | __GFP_IO | __GFP_FS)))
return;
 
/*
-- 
1.8.3.1




[RFC PATCH v3 6/9] mm/vmpressure: update usage of zone modifiers

2018-05-24 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, replace (__GFP_HIGHMEM | __GFP_MOVABLE) with
__GFP_ZONE_MOVABLE.

Signed-off-by: Huaisheng Ye 
Cc: Andrew Morton 
Cc: zhongjiang 
Cc: Minchan Kim 
Cc: Dan Carpenter 
Cc: David Rientjes 
Cc: Christoph Hellwig 
---
 mm/vmpressure.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 85350ce..30a40e2 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -256,7 +256,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool 
tree,
 * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so
 * we account it too.
 */
-   if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS)))
+   if (!(gfp & (__GFP_ZONE_MOVABLE | __GFP_IO | __GFP_FS)))
return;
 
/*
-- 
1.8.3.1




[RFC PATCH v3 5/9] drivers/block/zram/zram_drv: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, replace (__GFP_HIGHMEM | __GFP_MOVABLE) with
__GFP_ZONE_MOVABLE.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Minchan Kim <minc...@kernel.org>
Cc: Nitin Gupta <ngu...@vflare.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.w...@gmail.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 drivers/block/zram/zram_drv.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 0f3fadd..1bb5ca8 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1004,14 +1004,12 @@ static int __zram_bvec_write(struct zram *zram, struct 
bio_vec *bvec,
handle = zs_malloc(zram->mem_pool, comp_len,
__GFP_KSWAPD_RECLAIM |
__GFP_NOWARN |
-   __GFP_HIGHMEM |
-   __GFP_MOVABLE);
+   __GFP_ZONE_MOVABLE);
if (!handle) {
zcomp_stream_put(zram->comp);
atomic64_inc(>stats.writestall);
handle = zs_malloc(zram->mem_pool, comp_len,
-   GFP_NOIO | __GFP_HIGHMEM |
-   __GFP_MOVABLE);
+   GFP_NOIO | __GFP_ZONE_MOVABLE);
if (handle)
goto compress_again;
return -ENOMEM;
-- 
1.8.3.1



[RFC PATCH v3 5/9] drivers/block/zram/zram_drv: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, replace (__GFP_HIGHMEM | __GFP_MOVABLE) with
__GFP_ZONE_MOVABLE.

Signed-off-by: Huaisheng Ye 
Cc: Minchan Kim 
Cc: Nitin Gupta 
Cc: Sergey Senozhatsky 
Cc: Christoph Hellwig 
---
 drivers/block/zram/zram_drv.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 0f3fadd..1bb5ca8 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1004,14 +1004,12 @@ static int __zram_bvec_write(struct zram *zram, struct 
bio_vec *bvec,
handle = zs_malloc(zram->mem_pool, comp_len,
__GFP_KSWAPD_RECLAIM |
__GFP_NOWARN |
-   __GFP_HIGHMEM |
-   __GFP_MOVABLE);
+   __GFP_ZONE_MOVABLE);
if (!handle) {
zcomp_stream_put(zram->comp);
atomic64_inc(>stats.writestall);
handle = zs_malloc(zram->mem_pool, comp_len,
-   GFP_NOIO | __GFP_HIGHMEM |
-   __GFP_MOVABLE);
+   GFP_NOIO | __GFP_ZONE_MOVABLE);
if (handle)
goto compress_again;
return -ENOMEM;
-- 
1.8.3.1



[RFC PATCH v3 9/9] arch/x86/include/asm/page.h: update usage of movableflags

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: Kate Stewart <kstew...@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: x...@kernel.org <x...@kernel.org>
Cc: Philippe Ombredanne <pombreda...@nexb.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 arch/x86/include/asm/page.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48..a47f42d 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -35,7 +35,8 @@ static inline void copy_user_page(void *to, void *from, 
unsigned long vaddr,
 }
 
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-   alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+   alloc_page_vma((movableflags ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER) \
+   | __GFP_ZERO, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 #ifndef __pa
-- 
1.8.3.1



[RFC PATCH v3 9/9] arch/x86/include/asm/page.h: update usage of movableflags

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
Cc: Kate Stewart 
Cc: Greg Kroah-Hartman 
Cc: x...@kernel.org 
Cc: Philippe Ombredanne 
Cc: Christoph Hellwig 
---
 arch/x86/include/asm/page.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48..a47f42d 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -35,7 +35,8 @@ static inline void copy_user_page(void *to, void *from, 
unsigned long vaddr,
 }
 
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-   alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+   alloc_page_vma((movableflags ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER) \
+   | __GFP_ZERO, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 #ifndef __pa
-- 
1.8.3.1



[RFC PATCH v3 8/9] include/linux/highmem.h: update usage of movableflags

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Kate Stewart <kstew...@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Philippe Ombredanne <pombreda...@nexb.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 include/linux/highmem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 0690679..5383c9e 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -159,8 +159,8 @@ static inline void clear_user_highpage(struct page *page, 
unsigned long vaddr)
struct vm_area_struct *vma,
unsigned long vaddr)
 {
-   struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags,
-   vma, vaddr);
+   struct page *page = alloc_page_vma(movableflags ?
+   GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER, vma, vaddr);
 
if (page)
clear_user_highpage(page, vaddr);
-- 
1.8.3.1



[RFC PATCH v3 8/9] include/linux/highmem.h: update usage of movableflags

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye 
Cc: Kate Stewart 
Cc: Greg Kroah-Hartman 
Cc: Thomas Gleixner 
Cc: Philippe Ombredanne 
Cc: Christoph Hellwig 
---
 include/linux/highmem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 0690679..5383c9e 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -159,8 +159,8 @@ static inline void clear_user_highpage(struct page *page, 
unsigned long vaddr)
struct vm_area_struct *vma,
unsigned long vaddr)
 {
-   struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags,
-   vma, vaddr);
+   struct page *page = alloc_page_vma(movableflags ?
+   GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER, vma, vaddr);
 
if (page)
clear_user_highpage(page, vaddr);
-- 
1.8.3.1



[RFC PATCH v3 7/9] mm/zsmalloc: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, Use GFP_NORMAL_UNMOVABLE() to clear bottom 4 bits of
GFP bitmaks.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Minchan Kim <minc...@kernel.org>
Cc: Nitin Gupta <ngu...@vflare.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.w...@gmail.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 mm/zsmalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 61cb05d..e250c69 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -345,7 +345,7 @@ static void destroy_cache(struct zs_pool *pool)
 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
 {
return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-   gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   GFP_NORMAL_UNMOVABLE(gfp));
 }
 
 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
@@ -356,7 +356,7 @@ static void cache_free_handle(struct zs_pool *pool, 
unsigned long handle)
 static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
 {
return kmem_cache_alloc(pool->zspage_cachep,
-   flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   GFP_NORMAL_UNMOVABLE(flags));
 }
 
 static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
-- 
1.8.3.1



[RFC PATCH v3 7/9] mm/zsmalloc: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, Use GFP_NORMAL_UNMOVABLE() to clear bottom 4 bits of
GFP bitmaks.

Signed-off-by: Huaisheng Ye 
Cc: Minchan Kim 
Cc: Nitin Gupta 
Cc: Sergey Senozhatsky 
Cc: Christoph Hellwig 
---
 mm/zsmalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 61cb05d..e250c69 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -345,7 +345,7 @@ static void destroy_cache(struct zs_pool *pool)
 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
 {
return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-   gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   GFP_NORMAL_UNMOVABLE(gfp));
 }
 
 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
@@ -356,7 +356,7 @@ static void cache_free_handle(struct zs_pool *pool, 
unsigned long handle)
 static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
 {
return kmem_cache_alloc(pool->zspage_cachep,
-   flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   GFP_NORMAL_UNMOVABLE(flags));
 }
 
 static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
-- 
1.8.3.1



[RFC PATCH v3 4/9] fs/btrfs/extent_io: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MASK to replace (__GFP_DMA32 | __GFP_HIGHMEM).

In function alloc_extent_state, it is obvious that __GFP_DMA is not
the expecting zone type.

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Chris Mason <c...@fb.com>
Cc: Josef Bacik <jba...@fb.com>
Cc: David Sterba <dste...@suse.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 fs/btrfs/extent_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e99b329..f41fc61 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -220,7 +220,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
 * The given mask might be not appropriate for the slab allocator,
 * drop the unsupported bits
 */
-   mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
+   mask = GFP_NORMAL(mask);
state = kmem_cache_alloc(extent_state_cache, mask);
if (!state)
return state;
-- 
1.8.3.1



[RFC PATCH v3 4/9] fs/btrfs/extent_io: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MASK to replace (__GFP_DMA32 | __GFP_HIGHMEM).

In function alloc_extent_state, it is obvious that __GFP_DMA is not
the expecting zone type.

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye 
Cc: Chris Mason 
Cc: Josef Bacik 
Cc: David Sterba 
Cc: Christoph Hellwig 
---
 fs/btrfs/extent_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e99b329..f41fc61 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -220,7 +220,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
 * The given mask might be not appropriate for the slab allocator,
 * drop the unsupported bits
 */
-   mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
+   mask = GFP_NORMAL(mask);
state = kmem_cache_alloc(extent_state_cache, mask);
if (!state)
return state;
-- 
1.8.3.1



[RFC PATCH v3 3/9] drivers/xen/swiotlb-xen: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM).

In function xen_swiotlb_alloc_coherent, it is obvious that __GFP_DMA32
is not the expecting zone type.

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
Cc: Boris Ostrovsky <boris.ostrov...@oracle.com>
Cc: Juergen Gross <jgr...@suse.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 drivers/xen/swiotlb-xen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index e1c6089..359 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -301,7 +301,7 @@ int __ref xen_swiotlb_init(int verbose, bool early)
* machine physical layout.  We can't allocate highmem
* because we can't return a pointer to it.
*/
-   flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
+   flags = GFP_NORMAL(flags);
 
/* On ARM this function returns an ioremap'ped virtual address for
 * which virt_to_phys doesn't return the corresponding physical
-- 
1.8.3.1



[RFC PATCH v3 3/9] drivers/xen/swiotlb-xen: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM).

In function xen_swiotlb_alloc_coherent, it is obvious that __GFP_DMA32
is not the expecting zone type.

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye 
Cc: Konrad Rzeszutek Wilk 
Cc: Boris Ostrovsky 
Cc: Juergen Gross 
Cc: Christoph Hellwig 
---
 drivers/xen/swiotlb-xen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index e1c6089..359 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -301,7 +301,7 @@ int __ref xen_swiotlb_init(int verbose, bool early)
* machine physical layout.  We can't allocate highmem
* because we can't return a pointer to it.
*/
-   flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
+   flags = GFP_NORMAL(flags);
 
/* On ARM this function returns an ioremap'ped virtual address for
 * which virt_to_phys doesn't return the corresponding physical
-- 
1.8.3.1



[RFC PATCH v3 2/9] include/linux/dma-mapping: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Marek Szyprowski <m.szyprow...@samsung.com>
Cc: Robin Murphy <robin.mur...@arm.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 include/linux/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f8ab1c0..8fe524d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -519,7 +519,7 @@ static inline void *dma_alloc_attrs(struct device *dev, 
size_t size,
return cpu_addr;
 
/* let the implementation decide on the zone to allocate from: */
-   flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
+   flag = GFP_NORMAL(flag);
 
if (!arch_dma_alloc_attrs(, ))
return NULL;
-- 
1.8.3.1



[RFC PATCH v3 2/9] include/linux/dma-mapping: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye 
Cc: Christoph Hellwig 
Cc: Marek Szyprowski 
Cc: Robin Murphy 
Cc: Christoph Hellwig 
---
 include/linux/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f8ab1c0..8fe524d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -519,7 +519,7 @@ static inline void *dma_alloc_attrs(struct device *dev, 
size_t size,
return cpu_addr;
 
/* let the implementation decide on the zone to allocate from: */
-   flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
+   flag = GFP_NORMAL(flag);
 
if (!arch_dma_alloc_attrs(, ))
return NULL;
-- 
1.8.3.1



[RFC PATCH v3 1/9] include/linux/gfp.h: get rid of GFP_ZONE_TABLE/BAD

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Replace GFP_ZONE_TABLE and GFP_ZONE_BAD with encoded zone number.

Delete ___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 from GFP bitmasks,
the bottom three bits of GFP mask is reserved for storing encoded
zone number.

The encoding method is XOR. Get zone number from enum zone_type,
then encode the number with ZONE_NORMAL by XOR operation.
The goal is to make sure ZONE_NORMAL can be encoded to zero. So,
the compatibility can be guaranteed, such as GFP_KERNEL and GFP_ATOMIC
can be used as before.

Reserve __GFP_MOVABLE in bit 3, so that it can continue to be used as
a flag. Same as before, __GFP_MOVABLE respresents movable migrate type
for ZONE_DMA, ZONE_DMA32, and ZONE_NORMAL. But when it is enabled with
__GFP_HIGHMEM, ZONE_MOVABLE shall be returned instead of ZONE_HIGHMEM.
__GFP_ZONE_MOVABLE is created to realize it.

With this patch, just enabling __GFP_MOVABLE and __GFP_HIGHMEM is not
enough to get ZONE_MOVABLE from gfp_zone. All subsystems should use
GFP_HIGHUSER_MOVABLE directly to achieve that.

Decode zone number directly from bottom three bits of flags in gfp_zone.
The theory of encoding and decoding is,
A ^ B ^ B = A

Suggested-by: Matthew Wilcox <wi...@infradead.org>
Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Vlastimil Babka <vba...@suse.cz>
Cc: Michal Hocko <mho...@suse.com>
Cc: Mel Gorman <mgor...@techsingularity.net>
Cc: Kate Stewart <kstew...@linuxfoundation.org>
Cc: "Levin, Alexander (Sasha Levin)" <alexander.le...@verizon.com>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: Christoph Hellwig <h...@infradead.org>
---
 include/linux/gfp.h | 107 ++--
 1 file changed, 20 insertions(+), 87 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b..f76ccd76 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -16,9 +16,7 @@
  */
 
 /* Plain integer GFP bitmasks. Do not use this directly. */
-#define ___GFP_DMA 0x01u
-#define ___GFP_HIGHMEM 0x02u
-#define ___GFP_DMA32   0x04u
+#define ___GFP_ZONE_MASK   0x07u
 #define ___GFP_MOVABLE 0x08u
 #define ___GFP_RECLAIMABLE 0x10u
 #define ___GFP_HIGH0x20u
@@ -53,11 +51,15 @@
  * without the underscores and use them consistently. The definitions here may
  * be used in bit comparisons.
  */
-#define __GFP_DMA  ((__force gfp_t)___GFP_DMA)
-#define __GFP_HIGHMEM  ((__force gfp_t)___GFP_HIGHMEM)
-#define __GFP_DMA32((__force gfp_t)___GFP_DMA32)
+#define __GFP_DMA  ((__force gfp_t)OPT_ZONE_DMA ^ ZONE_NORMAL)
+#define __GFP_HIGHMEM  ((__force gfp_t)OPT_ZONE_HIGHMEM ^ ZONE_NORMAL)
+#define __GFP_DMA32((__force gfp_t)OPT_ZONE_DMA32 ^ ZONE_NORMAL)
 #define __GFP_MOVABLE  ((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE 
allowed */
-#define GFP_ZONEMASK   (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+#define GFP_ZONEMASK   ((__force gfp_t)___GFP_ZONE_MASK | ___GFP_MOVABLE)
+/* bottom 3 bits of GFP bitmasks are used for zone number encoded*/
+#define __GFP_ZONE_MASK ((__force gfp_t)___GFP_ZONE_MASK)
+#define __GFP_ZONE_MOVABLE \
+   ((__force gfp_t)(ZONE_MOVABLE ^ ZONE_NORMAL) | ___GFP_MOVABLE)
 
 /*
  * Page mobility and placement hints
@@ -268,6 +270,13 @@
  *   available and will not wake kswapd/kcompactd on failure. The _LIGHT
  *   version does not attempt reclaim/compaction at all and is by default used
  *   in page fault path, while the non-light is used by khugepaged.
+ *
+ * GFP_NORMAL() is used to clear bottom 3 bits of GFP bitmask. Actually it
+ *   returns encoded ZONE_NORMAL bits.
+ *
+ * GFP_NORMAL_UNMOVABLE() is similar to GFP_NORMAL, but it clear bottom 4 bits
+ *   of GFP bitmask. Excepting the encoded ZONE_NORMAL bits, it clears MOVABLE
+ *   flags as well.
  */
 #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
@@ -279,10 +288,12 @@
 #define GFP_DMA__GFP_DMA
 #define GFP_DMA32  __GFP_DMA32
 #define GFP_HIGHUSER   (GFP_USER | __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE   (GFP_HIGHUSER | __GFP_MOVABLE)
+#define GFP_HIGHUSER_MOVABLE   (GFP_USER | __GFP_ZONE_MOVABLE)
 #define GFP_TRANSHUGE_LIGHT((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
 #define GFP_TRANSHUGE  (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
+#define GFP_NORMAL(gfp)((gfp) & ~__GFP_ZONE_MASK)
+#define GFP_NORMAL_UNMOVABLE(gfp) ((gfp) & ~GFP_ZONEMASK)
 
 /* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -326,87 +337,9 @@ static inline bool gfpflags_allow_blocking(const gfp_t 
gfp_flags)
 #define OPT_ZONE_DMA32 ZONE_NORMAL
 #endif
 
-/*
- * GFP_ZONE_TABLE is a wor

[RFC PATCH v3 1/9] include/linux/gfp.h: get rid of GFP_ZONE_TABLE/BAD

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Replace GFP_ZONE_TABLE and GFP_ZONE_BAD with encoded zone number.

Delete ___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 from GFP bitmasks,
the bottom three bits of GFP mask is reserved for storing encoded
zone number.

The encoding method is XOR. Get zone number from enum zone_type,
then encode the number with ZONE_NORMAL by XOR operation.
The goal is to make sure ZONE_NORMAL can be encoded to zero. So,
the compatibility can be guaranteed, such as GFP_KERNEL and GFP_ATOMIC
can be used as before.

Reserve __GFP_MOVABLE in bit 3, so that it can continue to be used as
a flag. Same as before, __GFP_MOVABLE respresents movable migrate type
for ZONE_DMA, ZONE_DMA32, and ZONE_NORMAL. But when it is enabled with
__GFP_HIGHMEM, ZONE_MOVABLE shall be returned instead of ZONE_HIGHMEM.
__GFP_ZONE_MOVABLE is created to realize it.

With this patch, just enabling __GFP_MOVABLE and __GFP_HIGHMEM is not
enough to get ZONE_MOVABLE from gfp_zone. All subsystems should use
GFP_HIGHUSER_MOVABLE directly to achieve that.

Decode zone number directly from bottom three bits of flags in gfp_zone.
The theory of encoding and decoding is,
A ^ B ^ B = A

Suggested-by: Matthew Wilcox 
Signed-off-by: Huaisheng Ye 
Cc: Andrew Morton 
Cc: Vlastimil Babka 
Cc: Michal Hocko 
Cc: Mel Gorman 
Cc: Kate Stewart 
Cc: "Levin, Alexander (Sasha Levin)" 
Cc: Greg Kroah-Hartman 
Cc: Christoph Hellwig 
---
 include/linux/gfp.h | 107 ++--
 1 file changed, 20 insertions(+), 87 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b..f76ccd76 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -16,9 +16,7 @@
  */
 
 /* Plain integer GFP bitmasks. Do not use this directly. */
-#define ___GFP_DMA 0x01u
-#define ___GFP_HIGHMEM 0x02u
-#define ___GFP_DMA32   0x04u
+#define ___GFP_ZONE_MASK   0x07u
 #define ___GFP_MOVABLE 0x08u
 #define ___GFP_RECLAIMABLE 0x10u
 #define ___GFP_HIGH0x20u
@@ -53,11 +51,15 @@
  * without the underscores and use them consistently. The definitions here may
  * be used in bit comparisons.
  */
-#define __GFP_DMA  ((__force gfp_t)___GFP_DMA)
-#define __GFP_HIGHMEM  ((__force gfp_t)___GFP_HIGHMEM)
-#define __GFP_DMA32((__force gfp_t)___GFP_DMA32)
+#define __GFP_DMA  ((__force gfp_t)OPT_ZONE_DMA ^ ZONE_NORMAL)
+#define __GFP_HIGHMEM  ((__force gfp_t)OPT_ZONE_HIGHMEM ^ ZONE_NORMAL)
+#define __GFP_DMA32((__force gfp_t)OPT_ZONE_DMA32 ^ ZONE_NORMAL)
 #define __GFP_MOVABLE  ((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE 
allowed */
-#define GFP_ZONEMASK   (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+#define GFP_ZONEMASK   ((__force gfp_t)___GFP_ZONE_MASK | ___GFP_MOVABLE)
+/* bottom 3 bits of GFP bitmasks are used for zone number encoded*/
+#define __GFP_ZONE_MASK ((__force gfp_t)___GFP_ZONE_MASK)
+#define __GFP_ZONE_MOVABLE \
+   ((__force gfp_t)(ZONE_MOVABLE ^ ZONE_NORMAL) | ___GFP_MOVABLE)
 
 /*
  * Page mobility and placement hints
@@ -268,6 +270,13 @@
  *   available and will not wake kswapd/kcompactd on failure. The _LIGHT
  *   version does not attempt reclaim/compaction at all and is by default used
  *   in page fault path, while the non-light is used by khugepaged.
+ *
+ * GFP_NORMAL() is used to clear bottom 3 bits of GFP bitmask. Actually it
+ *   returns encoded ZONE_NORMAL bits.
+ *
+ * GFP_NORMAL_UNMOVABLE() is similar to GFP_NORMAL, but it clear bottom 4 bits
+ *   of GFP bitmask. Excepting the encoded ZONE_NORMAL bits, it clears MOVABLE
+ *   flags as well.
  */
 #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
@@ -279,10 +288,12 @@
 #define GFP_DMA__GFP_DMA
 #define GFP_DMA32  __GFP_DMA32
 #define GFP_HIGHUSER   (GFP_USER | __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE   (GFP_HIGHUSER | __GFP_MOVABLE)
+#define GFP_HIGHUSER_MOVABLE   (GFP_USER | __GFP_ZONE_MOVABLE)
 #define GFP_TRANSHUGE_LIGHT((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
 #define GFP_TRANSHUGE  (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
+#define GFP_NORMAL(gfp)((gfp) & ~__GFP_ZONE_MASK)
+#define GFP_NORMAL_UNMOVABLE(gfp) ((gfp) & ~GFP_ZONEMASK)
 
 /* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -326,87 +337,9 @@ static inline bool gfpflags_allow_blocking(const gfp_t 
gfp_flags)
 #define OPT_ZONE_DMA32 ZONE_NORMAL
 #endif
 
-/*
- * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
- * zone to use given the lowest 4 bits of gfp_t. Entries are GFP_ZONES_SHIFT
- * bits long and there are 16 of them to cover all possible combinations of
- * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM.
- *
- * The zone fallback order is MOVABLE=>HI

[RFC PATCH v3 0/9] get rid of GFP_ZONE_TABLE/BAD

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Changes since v2: [2]
* According to Christoph's suggestion, rebase patches to current
  mainline from v4.16.

* Follow the advice of Matthew, create macros like GFP_NORMAL and
  GFP_NORMAL_UNMOVABLE to clear bottom 3 and 4 bits of GFP bitmask.

* Delete some patches because of kernel updating.

[2]: https://marc.info/?l=linux-mm=152691610014027=2

Tested by Lenovo Thinksystem server.

Initmem setup node 0 [mem 0x1000-0x00043fff]
[0.00] On node 0 totalpages: 4111666
[0.00]   DMA zone: 64 pages used for memmap
[0.00]   DMA zone: 23 pages reserved
[0.00]   DMA zone: 3999 pages, LIFO batch:0
[0.00] mminit::memmap_init Initialising map node 0 zone 0 pfns 1 -> 
4096 
[0.00]   DMA32 zone: 10935 pages used for memmap
[0.00]   DMA32 zone: 699795 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 0 zone 1 pfns 4096 -> 
1048576
[0.00]   Normal zone: 53248 pages used for memmap
[0.00]   Normal zone: 3407872 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 0 zone 2 pfns 1048576 
-> 4456448
[0.00] mminit::memmap_init Initialising map node 0 zone 3 pfns 1 -> 
4456448
[0.00] Initmem setup node 1 [mem 0x00238000-0x00277fff]
[0.00] On node 1 totalpages: 4194304
[0.00]   Normal zone: 65536 pages used for memmap
[0.00]   Normal zone: 4194304 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 1 zone 2 pfns 37224448 
-> 41418752
[0.00] mminit::memmap_init Initialising map node 1 zone 3 pfns 37224448 
-> 41418752
...
[0.00] mminit::zonelist general 0:DMA = 0:DMA
[0.00] mminit::zonelist general 0:DMA32 = 0:DMA32 0:DMA
[0.00] mminit::zonelist general 0:Normal = 0:Normal 0:DMA32 0:DMA 
1:Normal
[0.00] mminit::zonelist thisnode 0:DMA = 0:DMA
[0.00] mminit::zonelist thisnode 0:DMA32 = 0:DMA32 0:DMA
[0.00] mminit::zonelist thisnode 0:Normal = 0:Normal 0:DMA32 0:DMA
[0.00] mminit::zonelist general 1:Normal = 1:Normal 0:Normal 0:DMA32 
0:DMA
[0.00] mminit::zonelist thisnode 1:Normal = 1:Normal
[0.00] Built 2 zonelists, mobility grouping on.  Total pages: 8176164
[0.00] Policy zone: Normal
[0.00] Kernel command line: BOOT_IMAGE=/vmlinuz-4.17.0-rc6-gfp09+ 
root=/dev/mapper/fedora-root ro rd.lvm.lv=fedora/root rd.lvm.lv=fedora/swap 
debug 
LANG=en_US.UTF-8 mminit_loglevel=4 console=tty0 console=ttyS0,115200n8 
memblock=debug
earlyprintk=serial,0x3f8,115200

---

Replace GFP_ZONE_TABLE and GFP_ZONE_BAD with encoded zone number.

Delete ___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 from GFP bitmasks,
the bottom three bits of GFP mask is reserved for storing encoded
zone number.

The encoding method is XOR. Get zone number from enum zone_type,
then encode the number with ZONE_NORMAL by XOR operation.
The goal is to make sure ZONE_NORMAL can be encoded to zero. So,
the compatibility can be guaranteed, such as GFP_KERNEL and GFP_ATOMIC
can be used as before.

Reserve __GFP_MOVABLE in bit 3, so that it can continue to be used as
a flag. Same as before, __GFP_MOVABLE respresents movable migrate type
for ZONE_DMA, ZONE_DMA32, and ZONE_NORMAL. But when it is enabled with
__GFP_HIGHMEM, ZONE_MOVABLE shall be returned instead of ZONE_HIGHMEM.
__GFP_ZONE_MOVABLE is created to realize it.

With this patch, just enabling __GFP_MOVABLE and __GFP_HIGHMEM is not
enough to get ZONE_MOVABLE from gfp_zone. All callers should use
GFP_HIGHUSER_MOVABLE or __GFP_ZONE_MOVABLE directly to achieve that.

Decode zone number directly from bottom three bits of flags in gfp_zone.
The theory of encoding and decoding is,
A ^ B ^ B = A

Changes since v1:[1]

* Create __GFP_ZONE_MOVABLE and modify GFP_HIGHUSER_MOVABLE to help
  callers to get ZONE_MOVABLE. Try to create __GFP_ZONE_MASK to mask
  lowest 3 bits of GFP bitmasks.

* Modify some callers' gfp flag to update usage of address zone
  modifiers.

* Modify inline function gfp_zone to get better performance according
  to Matthew's suggestion.

[1]: https://marc.info/?l=linux-mm=152596791931266=2

---

Huaisheng Ye (9):
  include/linux/gfp.h: get rid of GFP_ZONE_TABLE/BAD
  include/linux/dma-mapping: update usage of zone modifiers
  drivers/xen/swiotlb-xen: update usage of zone modifiers
  fs/btrfs/extent_io: update usage of zone modifiers
  drivers/block/zram/zram_drv: update usage of zone modifiers
  mm/vmpressure: update usage of zone modifiers
  mm/zsmalloc: update usage of zone modifiers
  include/linux/highmem.h: update usage of movableflags
  arch/x86/include/asm/page.h: update usage of movableflags

 arch/x86/include/asm/page.h   |   3 +-
 drivers/block/zram/zram_drv.c |   6 +--
 drivers/xen/swiotlb-xen.c |   2 +-
 fs/btrfs/extent_io.c  |   2 +-
 include/linux/dma-mapping.h   |   2 +-
 include/lin

[RFC PATCH v3 0/9] get rid of GFP_ZONE_TABLE/BAD

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Changes since v2: [2]
* According to Christoph's suggestion, rebase patches to current
  mainline from v4.16.

* Follow the advice of Matthew, create macros like GFP_NORMAL and
  GFP_NORMAL_UNMOVABLE to clear bottom 3 and 4 bits of GFP bitmask.

* Delete some patches because of kernel updating.

[2]: https://marc.info/?l=linux-mm=152691610014027=2

Tested by Lenovo Thinksystem server.

Initmem setup node 0 [mem 0x1000-0x00043fff]
[0.00] On node 0 totalpages: 4111666
[0.00]   DMA zone: 64 pages used for memmap
[0.00]   DMA zone: 23 pages reserved
[0.00]   DMA zone: 3999 pages, LIFO batch:0
[0.00] mminit::memmap_init Initialising map node 0 zone 0 pfns 1 -> 
4096 
[0.00]   DMA32 zone: 10935 pages used for memmap
[0.00]   DMA32 zone: 699795 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 0 zone 1 pfns 4096 -> 
1048576
[0.00]   Normal zone: 53248 pages used for memmap
[0.00]   Normal zone: 3407872 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 0 zone 2 pfns 1048576 
-> 4456448
[0.00] mminit::memmap_init Initialising map node 0 zone 3 pfns 1 -> 
4456448
[0.00] Initmem setup node 1 [mem 0x00238000-0x00277fff]
[0.00] On node 1 totalpages: 4194304
[0.00]   Normal zone: 65536 pages used for memmap
[0.00]   Normal zone: 4194304 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 1 zone 2 pfns 37224448 
-> 41418752
[0.00] mminit::memmap_init Initialising map node 1 zone 3 pfns 37224448 
-> 41418752
...
[0.00] mminit::zonelist general 0:DMA = 0:DMA
[0.00] mminit::zonelist general 0:DMA32 = 0:DMA32 0:DMA
[0.00] mminit::zonelist general 0:Normal = 0:Normal 0:DMA32 0:DMA 
1:Normal
[0.00] mminit::zonelist thisnode 0:DMA = 0:DMA
[0.00] mminit::zonelist thisnode 0:DMA32 = 0:DMA32 0:DMA
[0.00] mminit::zonelist thisnode 0:Normal = 0:Normal 0:DMA32 0:DMA
[0.00] mminit::zonelist general 1:Normal = 1:Normal 0:Normal 0:DMA32 
0:DMA
[0.00] mminit::zonelist thisnode 1:Normal = 1:Normal
[0.00] Built 2 zonelists, mobility grouping on.  Total pages: 8176164
[0.00] Policy zone: Normal
[0.00] Kernel command line: BOOT_IMAGE=/vmlinuz-4.17.0-rc6-gfp09+ 
root=/dev/mapper/fedora-root ro rd.lvm.lv=fedora/root rd.lvm.lv=fedora/swap 
debug 
LANG=en_US.UTF-8 mminit_loglevel=4 console=tty0 console=ttyS0,115200n8 
memblock=debug
earlyprintk=serial,0x3f8,115200

---

Replace GFP_ZONE_TABLE and GFP_ZONE_BAD with encoded zone number.

Delete ___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 from GFP bitmasks,
the bottom three bits of GFP mask is reserved for storing encoded
zone number.

The encoding method is XOR. Get zone number from enum zone_type,
then encode the number with ZONE_NORMAL by XOR operation.
The goal is to make sure ZONE_NORMAL can be encoded to zero. So,
the compatibility can be guaranteed, such as GFP_KERNEL and GFP_ATOMIC
can be used as before.

Reserve __GFP_MOVABLE in bit 3, so that it can continue to be used as
a flag. Same as before, __GFP_MOVABLE respresents movable migrate type
for ZONE_DMA, ZONE_DMA32, and ZONE_NORMAL. But when it is enabled with
__GFP_HIGHMEM, ZONE_MOVABLE shall be returned instead of ZONE_HIGHMEM.
__GFP_ZONE_MOVABLE is created to realize it.

With this patch, just enabling __GFP_MOVABLE and __GFP_HIGHMEM is not
enough to get ZONE_MOVABLE from gfp_zone. All callers should use
GFP_HIGHUSER_MOVABLE or __GFP_ZONE_MOVABLE directly to achieve that.

Decode zone number directly from bottom three bits of flags in gfp_zone.
The theory of encoding and decoding is,
A ^ B ^ B = A

Changes since v1:[1]

* Create __GFP_ZONE_MOVABLE and modify GFP_HIGHUSER_MOVABLE to help
  callers to get ZONE_MOVABLE. Try to create __GFP_ZONE_MASK to mask
  lowest 3 bits of GFP bitmasks.

* Modify some callers' gfp flag to update usage of address zone
  modifiers.

* Modify inline function gfp_zone to get better performance according
  to Matthew's suggestion.

[1]: https://marc.info/?l=linux-mm=152596791931266=2

---

Huaisheng Ye (9):
  include/linux/gfp.h: get rid of GFP_ZONE_TABLE/BAD
  include/linux/dma-mapping: update usage of zone modifiers
  drivers/xen/swiotlb-xen: update usage of zone modifiers
  fs/btrfs/extent_io: update usage of zone modifiers
  drivers/block/zram/zram_drv: update usage of zone modifiers
  mm/vmpressure: update usage of zone modifiers
  mm/zsmalloc: update usage of zone modifiers
  include/linux/highmem.h: update usage of movableflags
  arch/x86/include/asm/page.h: update usage of movableflags

 arch/x86/include/asm/page.h   |   3 +-
 drivers/block/zram/zram_drv.c |   6 +--
 drivers/xen/swiotlb-xen.c |   2 +-
 fs/btrfs/extent_io.c  |   2 +-
 include/linux/dma-mapping.h   |   2 +-
 include/linux/gfp

[RFC PATCH v3 0/9] get rid of GFP_ZONE_TABLE/BAD

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Changes since v2: [2]
* According to Christoph's suggestion, rebase patches to current
  mainline from v4.16.

* Follow the advice of Matthew, create macros like GFP_NORMAL and
  GFP_NORMAL_UNMOVABLE to clear bottom 3 and 4 bits of GFP bitmask.

* Delete some patches because of kernel updating.

[2]: https://marc.info/?l=linux-mm=152691610014027=2

Tested by Lenovo Thinksystem server.

Initmem setup node 0 [mem 0x1000-0x00043fff]
[0.00] On node 0 totalpages: 4111666
[0.00]   DMA zone: 64 pages used for memmap
[0.00]   DMA zone: 23 pages reserved
[0.00]   DMA zone: 3999 pages, LIFO batch:0
[0.00] mminit::memmap_init Initialising map node 0 zone 0 pfns 1 -> 
4096 
[0.00]   DMA32 zone: 10935 pages used for memmap
[0.00]   DMA32 zone: 699795 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 0 zone 1 pfns 4096 -> 
1048576
[0.00]   Normal zone: 53248 pages used for memmap
[0.00]   Normal zone: 3407872 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 0 zone 2 pfns 1048576 
-> 4456448
[0.00] mminit::memmap_init Initialising map node 0 zone 3 pfns 1 -> 
4456448
[0.00] Initmem setup node 1 [mem 0x00238000-0x00277fff]
[0.00] On node 1 totalpages: 4194304
[0.00]   Normal zone: 65536 pages used for memmap
[0.00]   Normal zone: 4194304 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 1 zone 2 pfns 37224448 
-> 41418752
[0.00] mminit::memmap_init Initialising map node 1 zone 3 pfns 37224448 
-> 41418752
...
[0.00] mminit::zonelist general 0:DMA = 0:DMA
[0.00] mminit::zonelist general 0:DMA32 = 0:DMA32 0:DMA
[0.00] mminit::zonelist general 0:Normal = 0:Normal 0:DMA32 0:DMA 
1:Normal
[0.00] mminit::zonelist thisnode 0:DMA = 0:DMA
[0.00] mminit::zonelist thisnode 0:DMA32 = 0:DMA32 0:DMA
[0.00] mminit::zonelist thisnode 0:Normal = 0:Normal 0:DMA32 0:DMA
[0.00] mminit::zonelist general 1:Normal = 1:Normal 0:Normal 0:DMA32 
0:DMA
[0.00] mminit::zonelist thisnode 1:Normal = 1:Normal
[0.00] Built 2 zonelists, mobility grouping on.  Total pages: 8176164
[0.00] Policy zone: Normal
[0.00] Kernel command line: BOOT_IMAGE=/vmlinuz-4.17.0-rc6-gfp09+ 
root=/dev/mapper/fedora-root ro rd.lvm.lv=fedora/root rd.lvm.lv=fedora/swap 
debug 
LANG=en_US.UTF-8 mminit_loglevel=4 console=tty0 console=ttyS0,115200n8 
memblock=debug
earlyprintk=serial,0x3f8,115200

---

Replace GFP_ZONE_TABLE and GFP_ZONE_BAD with encoded zone number.

Delete ___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 from GFP bitmasks,
the bottom three bits of GFP mask is reserved for storing encoded
zone number.

The encoding method is XOR. Get zone number from enum zone_type,
then encode the number with ZONE_NORMAL by XOR operation.
The goal is to make sure ZONE_NORMAL can be encoded to zero. So,
the compatibility can be guaranteed, such as GFP_KERNEL and GFP_ATOMIC
can be used as before.

Reserve __GFP_MOVABLE in bit 3, so that it can continue to be used as
a flag. Same as before, __GFP_MOVABLE respresents movable migrate type
for ZONE_DMA, ZONE_DMA32, and ZONE_NORMAL. But when it is enabled with
__GFP_HIGHMEM, ZONE_MOVABLE shall be returned instead of ZONE_HIGHMEM.
__GFP_ZONE_MOVABLE is created to realize it.

With this patch, just enabling __GFP_MOVABLE and __GFP_HIGHMEM is not
enough to get ZONE_MOVABLE from gfp_zone. All callers should use
GFP_HIGHUSER_MOVABLE or __GFP_ZONE_MOVABLE directly to achieve that.

Decode zone number directly from bottom three bits of flags in gfp_zone.
The theory of encoding and decoding is,
A ^ B ^ B = A

Changes since v1:[1]

* Create __GFP_ZONE_MOVABLE and modify GFP_HIGHUSER_MOVABLE to help
  callers to get ZONE_MOVABLE. Try to create __GFP_ZONE_MASK to mask
  lowest 3 bits of GFP bitmasks.

* Modify some callers' gfp flag to update usage of address zone
  modifiers.

* Modify inline function gfp_zone to get better performance according
  to Matthew's suggestion.

[1]: https://marc.info/?l=linux-mm=152596791931266=2

---

Huaisheng Ye (9):
  include/linux/gfp.h: get rid of GFP_ZONE_TABLE/BAD
  include/linux/dma-mapping: update usage of zone modifiers
  drivers/xen/swiotlb-xen: update usage of zone modifiers
  fs/btrfs/extent_io: update usage of zone modifiers
  drivers/block/zram/zram_drv: update usage of zone modifiers
  mm/vmpressure: update usage of zone modifiers
  mm/zsmalloc: update usage of zone modifiers
  include/linux/highmem.h: update usage of movableflags
  arch/x86/include/asm/page.h: update usage of movableflags

 arch/x86/include/asm/page.h   |   3 +-
 drivers/block/zram/zram_drv.c |   6 +--
 drivers/xen/swiotlb-xen.c |   2 +-
 fs/btrfs/extent_io.c  |   2 +-
 include/linux/dma-mapping.h   |   2 +-
 include/lin

[RFC PATCH v3 0/9] get rid of GFP_ZONE_TABLE/BAD

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Changes since v2: [2]
* According to Christoph's suggestion, rebase patches to current
  mainline from v4.16.

* Follow the advice of Matthew, create macros like GFP_NORMAL and
  GFP_NORMAL_UNMOVABLE to clear bottom 3 and 4 bits of GFP bitmask.

* Delete some patches because of kernel updating.

[2]: https://marc.info/?l=linux-mm=152691610014027=2

Tested by Lenovo Thinksystem server.

Initmem setup node 0 [mem 0x1000-0x00043fff]
[0.00] On node 0 totalpages: 4111666
[0.00]   DMA zone: 64 pages used for memmap
[0.00]   DMA zone: 23 pages reserved
[0.00]   DMA zone: 3999 pages, LIFO batch:0
[0.00] mminit::memmap_init Initialising map node 0 zone 0 pfns 1 -> 
4096 
[0.00]   DMA32 zone: 10935 pages used for memmap
[0.00]   DMA32 zone: 699795 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 0 zone 1 pfns 4096 -> 
1048576
[0.00]   Normal zone: 53248 pages used for memmap
[0.00]   Normal zone: 3407872 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 0 zone 2 pfns 1048576 
-> 4456448
[0.00] mminit::memmap_init Initialising map node 0 zone 3 pfns 1 -> 
4456448
[0.00] Initmem setup node 1 [mem 0x00238000-0x00277fff]
[0.00] On node 1 totalpages: 4194304
[0.00]   Normal zone: 65536 pages used for memmap
[0.00]   Normal zone: 4194304 pages, LIFO batch:31
[0.00] mminit::memmap_init Initialising map node 1 zone 2 pfns 37224448 
-> 41418752
[0.00] mminit::memmap_init Initialising map node 1 zone 3 pfns 37224448 
-> 41418752
...
[0.00] mminit::zonelist general 0:DMA = 0:DMA
[0.00] mminit::zonelist general 0:DMA32 = 0:DMA32 0:DMA
[0.00] mminit::zonelist general 0:Normal = 0:Normal 0:DMA32 0:DMA 
1:Normal
[0.00] mminit::zonelist thisnode 0:DMA = 0:DMA
[0.00] mminit::zonelist thisnode 0:DMA32 = 0:DMA32 0:DMA
[0.00] mminit::zonelist thisnode 0:Normal = 0:Normal 0:DMA32 0:DMA
[0.00] mminit::zonelist general 1:Normal = 1:Normal 0:Normal 0:DMA32 
0:DMA
[0.00] mminit::zonelist thisnode 1:Normal = 1:Normal
[0.00] Built 2 zonelists, mobility grouping on.  Total pages: 8176164
[0.00] Policy zone: Normal
[0.00] Kernel command line: BOOT_IMAGE=/vmlinuz-4.17.0-rc6-gfp09+ 
root=/dev/mapper/fedora-root ro rd.lvm.lv=fedora/root rd.lvm.lv=fedora/swap 
debug 
LANG=en_US.UTF-8 mminit_loglevel=4 console=tty0 console=ttyS0,115200n8 
memblock=debug
earlyprintk=serial,0x3f8,115200

---

Replace GFP_ZONE_TABLE and GFP_ZONE_BAD with encoded zone number.

Delete ___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 from GFP bitmasks,
the bottom three bits of GFP mask is reserved for storing encoded
zone number.

The encoding method is XOR. Get zone number from enum zone_type,
then encode the number with ZONE_NORMAL by XOR operation.
The goal is to make sure ZONE_NORMAL can be encoded to zero. So,
the compatibility can be guaranteed, such as GFP_KERNEL and GFP_ATOMIC
can be used as before.

Reserve __GFP_MOVABLE in bit 3, so that it can continue to be used as
a flag. Same as before, __GFP_MOVABLE respresents movable migrate type
for ZONE_DMA, ZONE_DMA32, and ZONE_NORMAL. But when it is enabled with
__GFP_HIGHMEM, ZONE_MOVABLE shall be returned instead of ZONE_HIGHMEM.
__GFP_ZONE_MOVABLE is created to realize it.

With this patch, just enabling __GFP_MOVABLE and __GFP_HIGHMEM is not
enough to get ZONE_MOVABLE from gfp_zone. All callers should use
GFP_HIGHUSER_MOVABLE or __GFP_ZONE_MOVABLE directly to achieve that.

Decode zone number directly from bottom three bits of flags in gfp_zone.
The theory of encoding and decoding is,
A ^ B ^ B = A

Changes since v1:[1]

* Create __GFP_ZONE_MOVABLE and modify GFP_HIGHUSER_MOVABLE to help
  callers to get ZONE_MOVABLE. Try to create __GFP_ZONE_MASK to mask
  lowest 3 bits of GFP bitmasks.

* Modify some callers' gfp flag to update usage of address zone
  modifiers.

* Modify inline function gfp_zone to get better performance according
  to Matthew's suggestion.

[1]: https://marc.info/?l=linux-mm=152596791931266=2

---

Huaisheng Ye (9):
  include/linux/gfp.h: get rid of GFP_ZONE_TABLE/BAD
  include/linux/dma-mapping: update usage of zone modifiers
  drivers/xen/swiotlb-xen: update usage of zone modifiers
  fs/btrfs/extent_io: update usage of zone modifiers
  drivers/block/zram/zram_drv: update usage of zone modifiers
  mm/vmpressure: update usage of zone modifiers
  mm/zsmalloc: update usage of zone modifiers
  include/linux/highmem.h: update usage of movableflags
  arch/x86/include/asm/page.h: update usage of movableflags

 arch/x86/include/asm/page.h   |   3 +-
 drivers/block/zram/zram_drv.c |   6 +--
 drivers/xen/swiotlb-xen.c |   2 +-
 fs/btrfs/extent_io.c  |   2 +-
 include/linux/dma-mapping.h   |   2 +-
 include/linux/gfp

[RFC PATCH v3 2/9] include/linux/dma-mapping: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Marek Szyprowski <m.szyprow...@samsung.com>
Cc: Robin Murphy <robin.mur...@arm.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 include/linux/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f8ab1c0..8fe524d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -519,7 +519,7 @@ static inline void *dma_alloc_attrs(struct device *dev, 
size_t size,
return cpu_addr;
 
/* let the implementation decide on the zone to allocate from: */
-   flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
+   flag = GFP_NORMAL(flag);
 
if (!arch_dma_alloc_attrs(, ))
return NULL;
-- 
1.8.3.1




[RFC PATCH v3 2/9] include/linux/dma-mapping: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye 
Cc: Christoph Hellwig 
Cc: Marek Szyprowski 
Cc: Robin Murphy 
Cc: Christoph Hellwig 
---
 include/linux/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f8ab1c0..8fe524d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -519,7 +519,7 @@ static inline void *dma_alloc_attrs(struct device *dev, 
size_t size,
return cpu_addr;
 
/* let the implementation decide on the zone to allocate from: */
-   flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
+   flag = GFP_NORMAL(flag);
 
if (!arch_dma_alloc_attrs(, ))
return NULL;
-- 
1.8.3.1




[RFC PATCH v3 9/9] arch/x86/include/asm/page.h: update usage of movableflags

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: Kate Stewart <kstew...@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: x...@kernel.org <x...@kernel.org>
Cc: Philippe Ombredanne <pombreda...@nexb.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 arch/x86/include/asm/page.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48..a47f42d 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -35,7 +35,8 @@ static inline void copy_user_page(void *to, void *from, 
unsigned long vaddr,
 }
 
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-   alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+   alloc_page_vma((movableflags ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER) \
+   | __GFP_ZERO, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 #ifndef __pa
-- 
1.8.3.1




[RFC PATCH v3 9/9] arch/x86/include/asm/page.h: update usage of movableflags

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
Cc: Kate Stewart 
Cc: Greg Kroah-Hartman 
Cc: x...@kernel.org 
Cc: Philippe Ombredanne 
Cc: Christoph Hellwig 
---
 arch/x86/include/asm/page.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48..a47f42d 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -35,7 +35,8 @@ static inline void copy_user_page(void *to, void *from, 
unsigned long vaddr,
 }
 
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-   alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+   alloc_page_vma((movableflags ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER) \
+   | __GFP_ZERO, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 #ifndef __pa
-- 
1.8.3.1




[RFC PATCH v3 4/9] fs/btrfs/extent_io: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MASK to replace (__GFP_DMA32 | __GFP_HIGHMEM).

In function alloc_extent_state, it is obvious that __GFP_DMA is not
the expecting zone type.

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Chris Mason <c...@fb.com>
Cc: Josef Bacik <jba...@fb.com>
Cc: David Sterba <dste...@suse.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 fs/btrfs/extent_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e99b329..f41fc61 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -220,7 +220,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
 * The given mask might be not appropriate for the slab allocator,
 * drop the unsupported bits
 */
-   mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
+   mask = GFP_NORMAL(mask);
state = kmem_cache_alloc(extent_state_cache, mask);
if (!state)
return state;
-- 
1.8.3.1




[RFC PATCH v3 4/9] fs/btrfs/extent_io: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MASK to replace (__GFP_DMA32 | __GFP_HIGHMEM).

In function alloc_extent_state, it is obvious that __GFP_DMA is not
the expecting zone type.

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye 
Cc: Chris Mason 
Cc: Josef Bacik 
Cc: David Sterba 
Cc: Christoph Hellwig 
---
 fs/btrfs/extent_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e99b329..f41fc61 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -220,7 +220,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
 * The given mask might be not appropriate for the slab allocator,
 * drop the unsupported bits
 */
-   mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
+   mask = GFP_NORMAL(mask);
state = kmem_cache_alloc(extent_state_cache, mask);
if (!state)
return state;
-- 
1.8.3.1




[RFC PATCH v3 8/9] include/linux/highmem.h: update usage of movableflags

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Kate Stewart <kstew...@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Philippe Ombredanne <pombreda...@nexb.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 include/linux/highmem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 0690679..5383c9e 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -159,8 +159,8 @@ static inline void clear_user_highpage(struct page *page, 
unsigned long vaddr)
struct vm_area_struct *vma,
unsigned long vaddr)
 {
-   struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags,
-   vma, vaddr);
+   struct page *page = alloc_page_vma(movableflags ?
+   GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER, vma, vaddr);
 
if (page)
clear_user_highpage(page, vaddr);
-- 
1.8.3.1




[RFC PATCH v3 8/9] include/linux/highmem.h: update usage of movableflags

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye 
Cc: Kate Stewart 
Cc: Greg Kroah-Hartman 
Cc: Thomas Gleixner 
Cc: Philippe Ombredanne 
Cc: Christoph Hellwig 
---
 include/linux/highmem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 0690679..5383c9e 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -159,8 +159,8 @@ static inline void clear_user_highpage(struct page *page, 
unsigned long vaddr)
struct vm_area_struct *vma,
unsigned long vaddr)
 {
-   struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags,
-   vma, vaddr);
+   struct page *page = alloc_page_vma(movableflags ?
+   GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER, vma, vaddr);
 
if (page)
clear_user_highpage(page, vaddr);
-- 
1.8.3.1




[RFC PATCH v3 7/9] mm/zsmalloc: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, Use GFP_NORMAL_UNMOVABLE() to clear bottom 4 bits of
GFP bitmaks.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Minchan Kim <minc...@kernel.org>
Cc: Nitin Gupta <ngu...@vflare.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.w...@gmail.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 mm/zsmalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 61cb05d..e250c69 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -345,7 +345,7 @@ static void destroy_cache(struct zs_pool *pool)
 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
 {
return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-   gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   GFP_NORMAL_UNMOVABLE(gfp));
 }
 
 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
@@ -356,7 +356,7 @@ static void cache_free_handle(struct zs_pool *pool, 
unsigned long handle)
 static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
 {
return kmem_cache_alloc(pool->zspage_cachep,
-   flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   GFP_NORMAL_UNMOVABLE(flags));
 }
 
 static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
-- 
1.8.3.1




[RFC PATCH v3 7/9] mm/zsmalloc: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, Use GFP_NORMAL_UNMOVABLE() to clear bottom 4 bits of
GFP bitmaks.

Signed-off-by: Huaisheng Ye 
Cc: Minchan Kim 
Cc: Nitin Gupta 
Cc: Sergey Senozhatsky 
Cc: Christoph Hellwig 
---
 mm/zsmalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 61cb05d..e250c69 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -345,7 +345,7 @@ static void destroy_cache(struct zs_pool *pool)
 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
 {
return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-   gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   GFP_NORMAL_UNMOVABLE(gfp));
 }
 
 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
@@ -356,7 +356,7 @@ static void cache_free_handle(struct zs_pool *pool, 
unsigned long handle)
 static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
 {
return kmem_cache_alloc(pool->zspage_cachep,
-   flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   GFP_NORMAL_UNMOVABLE(flags));
 }
 
 static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
-- 
1.8.3.1




[RFC PATCH v3 1/9] include/linux/gfp.h: get rid of GFP_ZONE_TABLE/BAD

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Replace GFP_ZONE_TABLE and GFP_ZONE_BAD with encoded zone number.

Delete ___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 from GFP bitmasks,
the bottom three bits of GFP mask is reserved for storing encoded
zone number.

The encoding method is XOR. Get zone number from enum zone_type,
then encode the number with ZONE_NORMAL by XOR operation.
The goal is to make sure ZONE_NORMAL can be encoded to zero. So,
the compatibility can be guaranteed, such as GFP_KERNEL and GFP_ATOMIC
can be used as before.

Reserve __GFP_MOVABLE in bit 3, so that it can continue to be used as
a flag. Same as before, __GFP_MOVABLE respresents movable migrate type
for ZONE_DMA, ZONE_DMA32, and ZONE_NORMAL. But when it is enabled with
__GFP_HIGHMEM, ZONE_MOVABLE shall be returned instead of ZONE_HIGHMEM.
__GFP_ZONE_MOVABLE is created to realize it.

With this patch, just enabling __GFP_MOVABLE and __GFP_HIGHMEM is not
enough to get ZONE_MOVABLE from gfp_zone. All subsystems should use
GFP_HIGHUSER_MOVABLE directly to achieve that.

Decode zone number directly from bottom three bits of flags in gfp_zone.
The theory of encoding and decoding is,
A ^ B ^ B = A

Suggested-by: Matthew Wilcox <wi...@infradead.org>
Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Vlastimil Babka <vba...@suse.cz>
Cc: Michal Hocko <mho...@suse.com>
Cc: Mel Gorman <mgor...@techsingularity.net>
Cc: Kate Stewart <kstew...@linuxfoundation.org>
Cc: "Levin, Alexander (Sasha Levin)" <alexander.le...@verizon.com>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: Christoph Hellwig <h...@infradead.org>
---
 include/linux/gfp.h | 107 ++--
 1 file changed, 20 insertions(+), 87 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b..f76ccd76 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -16,9 +16,7 @@
  */
 
 /* Plain integer GFP bitmasks. Do not use this directly. */
-#define ___GFP_DMA 0x01u
-#define ___GFP_HIGHMEM 0x02u
-#define ___GFP_DMA32   0x04u
+#define ___GFP_ZONE_MASK   0x07u
 #define ___GFP_MOVABLE 0x08u
 #define ___GFP_RECLAIMABLE 0x10u
 #define ___GFP_HIGH0x20u
@@ -53,11 +51,15 @@
  * without the underscores and use them consistently. The definitions here may
  * be used in bit comparisons.
  */
-#define __GFP_DMA  ((__force gfp_t)___GFP_DMA)
-#define __GFP_HIGHMEM  ((__force gfp_t)___GFP_HIGHMEM)
-#define __GFP_DMA32((__force gfp_t)___GFP_DMA32)
+#define __GFP_DMA  ((__force gfp_t)OPT_ZONE_DMA ^ ZONE_NORMAL)
+#define __GFP_HIGHMEM  ((__force gfp_t)OPT_ZONE_HIGHMEM ^ ZONE_NORMAL)
+#define __GFP_DMA32((__force gfp_t)OPT_ZONE_DMA32 ^ ZONE_NORMAL)
 #define __GFP_MOVABLE  ((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE 
allowed */
-#define GFP_ZONEMASK   (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+#define GFP_ZONEMASK   ((__force gfp_t)___GFP_ZONE_MASK | ___GFP_MOVABLE)
+/* bottom 3 bits of GFP bitmasks are used for zone number encoded*/
+#define __GFP_ZONE_MASK ((__force gfp_t)___GFP_ZONE_MASK)
+#define __GFP_ZONE_MOVABLE \
+   ((__force gfp_t)(ZONE_MOVABLE ^ ZONE_NORMAL) | ___GFP_MOVABLE)
 
 /*
  * Page mobility and placement hints
@@ -268,6 +270,13 @@
  *   available and will not wake kswapd/kcompactd on failure. The _LIGHT
  *   version does not attempt reclaim/compaction at all and is by default used
  *   in page fault path, while the non-light is used by khugepaged.
+ *
+ * GFP_NORMAL() is used to clear bottom 3 bits of GFP bitmask. Actually it
+ *   returns encoded ZONE_NORMAL bits.
+ *
+ * GFP_NORMAL_UNMOVABLE() is similar to GFP_NORMAL, but it clear bottom 4 bits
+ *   of GFP bitmask. Excepting the encoded ZONE_NORMAL bits, it clears MOVABLE
+ *   flags as well.
  */
 #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
@@ -279,10 +288,12 @@
 #define GFP_DMA__GFP_DMA
 #define GFP_DMA32  __GFP_DMA32
 #define GFP_HIGHUSER   (GFP_USER | __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE   (GFP_HIGHUSER | __GFP_MOVABLE)
+#define GFP_HIGHUSER_MOVABLE   (GFP_USER | __GFP_ZONE_MOVABLE)
 #define GFP_TRANSHUGE_LIGHT((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
 #define GFP_TRANSHUGE  (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
+#define GFP_NORMAL(gfp)((gfp) & ~__GFP_ZONE_MASK)
+#define GFP_NORMAL_UNMOVABLE(gfp) ((gfp) & ~GFP_ZONEMASK)
 
 /* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -326,87 +337,9 @@ static inline bool gfpflags_allow_blocking(const gfp_t 
gfp_flags)
 #define OPT_ZONE_DMA32 ZONE_NORMAL
 #endif
 
-/*
- * GFP_ZONE_TABLE is a wor

[RFC PATCH v3 1/9] include/linux/gfp.h: get rid of GFP_ZONE_TABLE/BAD

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Replace GFP_ZONE_TABLE and GFP_ZONE_BAD with encoded zone number.

Delete ___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 from GFP bitmasks,
the bottom three bits of GFP mask is reserved for storing encoded
zone number.

The encoding method is XOR. Get zone number from enum zone_type,
then encode the number with ZONE_NORMAL by XOR operation.
The goal is to make sure ZONE_NORMAL can be encoded to zero. So,
the compatibility can be guaranteed, such as GFP_KERNEL and GFP_ATOMIC
can be used as before.

Reserve __GFP_MOVABLE in bit 3, so that it can continue to be used as
a flag. Same as before, __GFP_MOVABLE respresents movable migrate type
for ZONE_DMA, ZONE_DMA32, and ZONE_NORMAL. But when it is enabled with
__GFP_HIGHMEM, ZONE_MOVABLE shall be returned instead of ZONE_HIGHMEM.
__GFP_ZONE_MOVABLE is created to realize it.

With this patch, just enabling __GFP_MOVABLE and __GFP_HIGHMEM is not
enough to get ZONE_MOVABLE from gfp_zone. All subsystems should use
GFP_HIGHUSER_MOVABLE directly to achieve that.

Decode zone number directly from bottom three bits of flags in gfp_zone.
The theory of encoding and decoding is,
A ^ B ^ B = A

Suggested-by: Matthew Wilcox 
Signed-off-by: Huaisheng Ye 
Cc: Andrew Morton 
Cc: Vlastimil Babka 
Cc: Michal Hocko 
Cc: Mel Gorman 
Cc: Kate Stewart 
Cc: "Levin, Alexander (Sasha Levin)" 
Cc: Greg Kroah-Hartman 
Cc: Christoph Hellwig 
---
 include/linux/gfp.h | 107 ++--
 1 file changed, 20 insertions(+), 87 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b..f76ccd76 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -16,9 +16,7 @@
  */
 
 /* Plain integer GFP bitmasks. Do not use this directly. */
-#define ___GFP_DMA 0x01u
-#define ___GFP_HIGHMEM 0x02u
-#define ___GFP_DMA32   0x04u
+#define ___GFP_ZONE_MASK   0x07u
 #define ___GFP_MOVABLE 0x08u
 #define ___GFP_RECLAIMABLE 0x10u
 #define ___GFP_HIGH0x20u
@@ -53,11 +51,15 @@
  * without the underscores and use them consistently. The definitions here may
  * be used in bit comparisons.
  */
-#define __GFP_DMA  ((__force gfp_t)___GFP_DMA)
-#define __GFP_HIGHMEM  ((__force gfp_t)___GFP_HIGHMEM)
-#define __GFP_DMA32((__force gfp_t)___GFP_DMA32)
+#define __GFP_DMA  ((__force gfp_t)OPT_ZONE_DMA ^ ZONE_NORMAL)
+#define __GFP_HIGHMEM  ((__force gfp_t)OPT_ZONE_HIGHMEM ^ ZONE_NORMAL)
+#define __GFP_DMA32((__force gfp_t)OPT_ZONE_DMA32 ^ ZONE_NORMAL)
 #define __GFP_MOVABLE  ((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE 
allowed */
-#define GFP_ZONEMASK   (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+#define GFP_ZONEMASK   ((__force gfp_t)___GFP_ZONE_MASK | ___GFP_MOVABLE)
+/* bottom 3 bits of GFP bitmasks are used for zone number encoded*/
+#define __GFP_ZONE_MASK ((__force gfp_t)___GFP_ZONE_MASK)
+#define __GFP_ZONE_MOVABLE \
+   ((__force gfp_t)(ZONE_MOVABLE ^ ZONE_NORMAL) | ___GFP_MOVABLE)
 
 /*
  * Page mobility and placement hints
@@ -268,6 +270,13 @@
  *   available and will not wake kswapd/kcompactd on failure. The _LIGHT
  *   version does not attempt reclaim/compaction at all and is by default used
  *   in page fault path, while the non-light is used by khugepaged.
+ *
+ * GFP_NORMAL() is used to clear bottom 3 bits of GFP bitmask. Actually it
+ *   returns encoded ZONE_NORMAL bits.
+ *
+ * GFP_NORMAL_UNMOVABLE() is similar to GFP_NORMAL, but it clear bottom 4 bits
+ *   of GFP bitmask. Excepting the encoded ZONE_NORMAL bits, it clears MOVABLE
+ *   flags as well.
  */
 #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
@@ -279,10 +288,12 @@
 #define GFP_DMA__GFP_DMA
 #define GFP_DMA32  __GFP_DMA32
 #define GFP_HIGHUSER   (GFP_USER | __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE   (GFP_HIGHUSER | __GFP_MOVABLE)
+#define GFP_HIGHUSER_MOVABLE   (GFP_USER | __GFP_ZONE_MOVABLE)
 #define GFP_TRANSHUGE_LIGHT((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
 #define GFP_TRANSHUGE  (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
+#define GFP_NORMAL(gfp)((gfp) & ~__GFP_ZONE_MASK)
+#define GFP_NORMAL_UNMOVABLE(gfp) ((gfp) & ~GFP_ZONEMASK)
 
 /* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -326,87 +337,9 @@ static inline bool gfpflags_allow_blocking(const gfp_t 
gfp_flags)
 #define OPT_ZONE_DMA32 ZONE_NORMAL
 #endif
 
-/*
- * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
- * zone to use given the lowest 4 bits of gfp_t. Entries are GFP_ZONES_SHIFT
- * bits long and there are 16 of them to cover all possible combinations of
- * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM.
- *
- * The zone fallback order is MOVABLE=>HI

[RFC PATCH v3 5/9] drivers/block/zram/zram_drv: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, replace (__GFP_HIGHMEM | __GFP_MOVABLE) with
__GFP_ZONE_MOVABLE.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Minchan Kim <minc...@kernel.org>
Cc: Nitin Gupta <ngu...@vflare.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.w...@gmail.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 drivers/block/zram/zram_drv.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 0f3fadd..1bb5ca8 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1004,14 +1004,12 @@ static int __zram_bvec_write(struct zram *zram, struct 
bio_vec *bvec,
handle = zs_malloc(zram->mem_pool, comp_len,
__GFP_KSWAPD_RECLAIM |
__GFP_NOWARN |
-   __GFP_HIGHMEM |
-   __GFP_MOVABLE);
+   __GFP_ZONE_MOVABLE);
if (!handle) {
zcomp_stream_put(zram->comp);
atomic64_inc(>stats.writestall);
handle = zs_malloc(zram->mem_pool, comp_len,
-   GFP_NOIO | __GFP_HIGHMEM |
-   __GFP_MOVABLE);
+   GFP_NOIO | __GFP_ZONE_MOVABLE);
if (handle)
goto compress_again;
return -ENOMEM;
-- 
1.8.3.1




[RFC PATCH v3 5/9] drivers/block/zram/zram_drv: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, replace (__GFP_HIGHMEM | __GFP_MOVABLE) with
__GFP_ZONE_MOVABLE.

Signed-off-by: Huaisheng Ye 
Cc: Minchan Kim 
Cc: Nitin Gupta 
Cc: Sergey Senozhatsky 
Cc: Christoph Hellwig 
---
 drivers/block/zram/zram_drv.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 0f3fadd..1bb5ca8 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1004,14 +1004,12 @@ static int __zram_bvec_write(struct zram *zram, struct 
bio_vec *bvec,
handle = zs_malloc(zram->mem_pool, comp_len,
__GFP_KSWAPD_RECLAIM |
__GFP_NOWARN |
-   __GFP_HIGHMEM |
-   __GFP_MOVABLE);
+   __GFP_ZONE_MOVABLE);
if (!handle) {
zcomp_stream_put(zram->comp);
atomic64_inc(>stats.writestall);
handle = zs_malloc(zram->mem_pool, comp_len,
-   GFP_NOIO | __GFP_HIGHMEM |
-   __GFP_MOVABLE);
+   GFP_NOIO | __GFP_ZONE_MOVABLE);
if (handle)
goto compress_again;
return -ENOMEM;
-- 
1.8.3.1




[RFC PATCH v3 3/9] drivers/xen/swiotlb-xen: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM).

In function xen_swiotlb_alloc_coherent, it is obvious that __GFP_DMA32
is not the expecting zone type.

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
Cc: Boris Ostrovsky <boris.ostrov...@oracle.com>
Cc: Juergen Gross <jgr...@suse.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 drivers/xen/swiotlb-xen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index e1c6089..359 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -301,7 +301,7 @@ int __ref xen_swiotlb_init(int verbose, bool early)
* machine physical layout.  We can't allocate highmem
* because we can't return a pointer to it.
*/
-   flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
+   flags = GFP_NORMAL(flags);
 
/* On ARM this function returns an ioremap'ped virtual address for
 * which virt_to_phys doesn't return the corresponding physical
-- 
1.8.3.1




[RFC PATCH v3 3/9] drivers/xen/swiotlb-xen: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM).

In function xen_swiotlb_alloc_coherent, it is obvious that __GFP_DMA32
is not the expecting zone type.

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Use GFP_NORMAL() to clear bottom 3 bits of GFP bitmaks.

Signed-off-by: Huaisheng Ye 
Cc: Konrad Rzeszutek Wilk 
Cc: Boris Ostrovsky 
Cc: Juergen Gross 
Cc: Christoph Hellwig 
---
 drivers/xen/swiotlb-xen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index e1c6089..359 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -301,7 +301,7 @@ int __ref xen_swiotlb_init(int verbose, bool early)
* machine physical layout.  We can't allocate highmem
* because we can't return a pointer to it.
*/
-   flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
+   flags = GFP_NORMAL(flags);
 
/* On ARM this function returns an ioremap'ped virtual address for
 * which virt_to_phys doesn't return the corresponding physical
-- 
1.8.3.1




[RFC PATCH v3 6/9] mm/vmpressure: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, replace (__GFP_HIGHMEM | __GFP_MOVABLE) with
__GFP_ZONE_MOVABLE.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: zhongjiang <zhongji...@huawei.com>
Cc: Minchan Kim <minc...@kernel.org>
Cc: Dan Carpenter <dan.carpen...@oracle.com>
Cc: David Rientjes <rient...@google.com>
Cc: Christoph Hellwig <h...@infradead.org>
---
 mm/vmpressure.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 85350ce..30a40e2 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -256,7 +256,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool 
tree,
 * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so
 * we account it too.
 */
-   if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS)))
+   if (!(gfp & (__GFP_ZONE_MOVABLE | __GFP_IO | __GFP_FS)))
return;
 
/*
-- 
1.8.3.1




[RFC PATCH v3 6/9] mm/vmpressure: update usage of zone modifiers

2018-05-23 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, replace (__GFP_HIGHMEM | __GFP_MOVABLE) with
__GFP_ZONE_MOVABLE.

Signed-off-by: Huaisheng Ye 
Cc: Andrew Morton 
Cc: zhongjiang 
Cc: Minchan Kim 
Cc: Dan Carpenter 
Cc: David Rientjes 
Cc: Christoph Hellwig 
---
 mm/vmpressure.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 85350ce..30a40e2 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -256,7 +256,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool 
tree,
 * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so
 * we account it too.
 */
-   if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS)))
+   if (!(gfp & (__GFP_ZONE_MOVABLE | __GFP_IO | __GFP_FS)))
return;
 
/*
-- 
1.8.3.1




[RFC PATCH v2 01/12] include/linux/gfp.h: get rid of GFP_ZONE_TABLE/BAD

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Replace GFP_ZONE_TABLE and GFP_ZONE_BAD with encoded zone number.

Delete ___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 from GFP bitmasks,
the bottom three bits of GFP mask is reserved for storing encoded
zone number.

The encoding method is XOR. Get zone number from enum zone_type,
then encode the number with ZONE_NORMAL by XOR operation.
The goal is to make sure ZONE_NORMAL can be encoded to zero. So,
the compatibility can be guaranteed, such as GFP_KERNEL and GFP_ATOMIC
can be used as before.

Reserve __GFP_MOVABLE in bit 3, so that it can continue to be used as
a flag. Same as before, __GFP_MOVABLE respresents movable migrate type
for ZONE_DMA, ZONE_DMA32, and ZONE_NORMAL. But when it is enabled with
__GFP_HIGHMEM, ZONE_MOVABLE shall be returned instead of ZONE_HIGHMEM.
__GFP_ZONE_MOVABLE is created to realize it.

With this patch, just enabling __GFP_MOVABLE and __GFP_HIGHMEM is not
enough to get ZONE_MOVABLE from gfp_zone. All subsystems should use
GFP_HIGHUSER_MOVABLE directly to achieve that.

Decode zone number directly from bottom three bits of flags in gfp_zone.
The theory of encoding and decoding is,
A ^ B ^ B = A

Suggested-by: Matthew Wilcox <wi...@infradead.org>
Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Vlastimil Babka <vba...@suse.cz>
Cc: Michal Hocko <mho...@suse.com>
Cc: Mel Gorman <mgor...@techsingularity.net>
Cc: Kate Stewart <kstew...@linuxfoundation.org>
Cc: "Levin, Alexander (Sasha Levin)" <alexander.le...@verizon.com>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
---
 include/linux/gfp.h | 98 ++---
 1 file changed, 11 insertions(+), 87 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b..ab0fb7f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -16,9 +16,7 @@
  */
 
 /* Plain integer GFP bitmasks. Do not use this directly. */
-#define ___GFP_DMA 0x01u
-#define ___GFP_HIGHMEM 0x02u
-#define ___GFP_DMA32   0x04u
+#define ___GFP_ZONE_MASK   0x07u
 #define ___GFP_MOVABLE 0x08u
 #define ___GFP_RECLAIMABLE 0x10u
 #define ___GFP_HIGH0x20u
@@ -53,11 +51,15 @@
  * without the underscores and use them consistently. The definitions here may
  * be used in bit comparisons.
  */
-#define __GFP_DMA  ((__force gfp_t)___GFP_DMA)
-#define __GFP_HIGHMEM  ((__force gfp_t)___GFP_HIGHMEM)
-#define __GFP_DMA32((__force gfp_t)___GFP_DMA32)
+#define __GFP_DMA  ((__force gfp_t)OPT_ZONE_DMA ^ ZONE_NORMAL)
+#define __GFP_HIGHMEM  ((__force gfp_t)OPT_ZONE_HIGHMEM ^ ZONE_NORMAL)
+#define __GFP_DMA32((__force gfp_t)OPT_ZONE_DMA32 ^ ZONE_NORMAL)
 #define __GFP_MOVABLE  ((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE 
allowed */
-#define GFP_ZONEMASK   (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+#define GFP_ZONEMASK   ((__force gfp_t)___GFP_ZONE_MASK | ___GFP_MOVABLE)
+/* bottom 3 bits of GFP bitmasks are used for zone number encoded*/
+#define __GFP_ZONE_MASK ((__force gfp_t)___GFP_ZONE_MASK)
+#define __GFP_ZONE_MOVABLE \
+   ((__force gfp_t)(ZONE_MOVABLE ^ ZONE_NORMAL) | ___GFP_MOVABLE)
 
 /*
  * Page mobility and placement hints
@@ -279,7 +281,7 @@
 #define GFP_DMA__GFP_DMA
 #define GFP_DMA32  __GFP_DMA32
 #define GFP_HIGHUSER   (GFP_USER | __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE   (GFP_HIGHUSER | __GFP_MOVABLE)
+#define GFP_HIGHUSER_MOVABLE   (GFP_USER | __GFP_ZONE_MOVABLE)
 #define GFP_TRANSHUGE_LIGHT((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
 #define GFP_TRANSHUGE  (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
@@ -326,87 +328,9 @@ static inline bool gfpflags_allow_blocking(const gfp_t 
gfp_flags)
 #define OPT_ZONE_DMA32 ZONE_NORMAL
 #endif
 
-/*
- * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
- * zone to use given the lowest 4 bits of gfp_t. Entries are GFP_ZONES_SHIFT
- * bits long and there are 16 of them to cover all possible combinations of
- * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM.
- *
- * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
- * But GFP_MOVABLE is not only a zone specifier but also an allocation
- * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
- * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1".
- *
- *   bit   result
- *   =
- *   0x0=> NORMAL
- *   0x1=> DMA or NORMAL
- *   0x2=> HIGHMEM or NORMAL
- *   0x3=> BAD (DMA+HIGHMEM)
- *   0x4=> DMA32 or DMA or NORMAL
- *   0x5=> BAD (DMA+DMA32)
- *   0x6=> BAD (HIGHMEM+DMA32)
- *   0x7=> BAD (HIGHMEM+DMA32+DMA)
- *   0x8=> NORMA

[RFC PATCH v2 01/12] include/linux/gfp.h: get rid of GFP_ZONE_TABLE/BAD

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye 

Replace GFP_ZONE_TABLE and GFP_ZONE_BAD with encoded zone number.

Delete ___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 from GFP bitmasks,
the bottom three bits of GFP mask is reserved for storing encoded
zone number.

The encoding method is XOR. Get zone number from enum zone_type,
then encode the number with ZONE_NORMAL by XOR operation.
The goal is to make sure ZONE_NORMAL can be encoded to zero. So,
the compatibility can be guaranteed, such as GFP_KERNEL and GFP_ATOMIC
can be used as before.

Reserve __GFP_MOVABLE in bit 3, so that it can continue to be used as
a flag. Same as before, __GFP_MOVABLE respresents movable migrate type
for ZONE_DMA, ZONE_DMA32, and ZONE_NORMAL. But when it is enabled with
__GFP_HIGHMEM, ZONE_MOVABLE shall be returned instead of ZONE_HIGHMEM.
__GFP_ZONE_MOVABLE is created to realize it.

With this patch, just enabling __GFP_MOVABLE and __GFP_HIGHMEM is not
enough to get ZONE_MOVABLE from gfp_zone. All subsystems should use
GFP_HIGHUSER_MOVABLE directly to achieve that.

Decode zone number directly from bottom three bits of flags in gfp_zone.
The theory of encoding and decoding is,
A ^ B ^ B = A

Suggested-by: Matthew Wilcox 
Signed-off-by: Huaisheng Ye 
Cc: Andrew Morton 
Cc: Vlastimil Babka 
Cc: Michal Hocko 
Cc: Mel Gorman 
Cc: Kate Stewart 
Cc: "Levin, Alexander (Sasha Levin)" 
Cc: Greg Kroah-Hartman 
---
 include/linux/gfp.h | 98 ++---
 1 file changed, 11 insertions(+), 87 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b..ab0fb7f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -16,9 +16,7 @@
  */
 
 /* Plain integer GFP bitmasks. Do not use this directly. */
-#define ___GFP_DMA 0x01u
-#define ___GFP_HIGHMEM 0x02u
-#define ___GFP_DMA32   0x04u
+#define ___GFP_ZONE_MASK   0x07u
 #define ___GFP_MOVABLE 0x08u
 #define ___GFP_RECLAIMABLE 0x10u
 #define ___GFP_HIGH0x20u
@@ -53,11 +51,15 @@
  * without the underscores and use them consistently. The definitions here may
  * be used in bit comparisons.
  */
-#define __GFP_DMA  ((__force gfp_t)___GFP_DMA)
-#define __GFP_HIGHMEM  ((__force gfp_t)___GFP_HIGHMEM)
-#define __GFP_DMA32((__force gfp_t)___GFP_DMA32)
+#define __GFP_DMA  ((__force gfp_t)OPT_ZONE_DMA ^ ZONE_NORMAL)
+#define __GFP_HIGHMEM  ((__force gfp_t)OPT_ZONE_HIGHMEM ^ ZONE_NORMAL)
+#define __GFP_DMA32((__force gfp_t)OPT_ZONE_DMA32 ^ ZONE_NORMAL)
 #define __GFP_MOVABLE  ((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE 
allowed */
-#define GFP_ZONEMASK   (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+#define GFP_ZONEMASK   ((__force gfp_t)___GFP_ZONE_MASK | ___GFP_MOVABLE)
+/* bottom 3 bits of GFP bitmasks are used for zone number encoded*/
+#define __GFP_ZONE_MASK ((__force gfp_t)___GFP_ZONE_MASK)
+#define __GFP_ZONE_MOVABLE \
+   ((__force gfp_t)(ZONE_MOVABLE ^ ZONE_NORMAL) | ___GFP_MOVABLE)
 
 /*
  * Page mobility and placement hints
@@ -279,7 +281,7 @@
 #define GFP_DMA__GFP_DMA
 #define GFP_DMA32  __GFP_DMA32
 #define GFP_HIGHUSER   (GFP_USER | __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE   (GFP_HIGHUSER | __GFP_MOVABLE)
+#define GFP_HIGHUSER_MOVABLE   (GFP_USER | __GFP_ZONE_MOVABLE)
 #define GFP_TRANSHUGE_LIGHT((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
 #define GFP_TRANSHUGE  (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
@@ -326,87 +328,9 @@ static inline bool gfpflags_allow_blocking(const gfp_t 
gfp_flags)
 #define OPT_ZONE_DMA32 ZONE_NORMAL
 #endif
 
-/*
- * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
- * zone to use given the lowest 4 bits of gfp_t. Entries are GFP_ZONES_SHIFT
- * bits long and there are 16 of them to cover all possible combinations of
- * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM.
- *
- * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
- * But GFP_MOVABLE is not only a zone specifier but also an allocation
- * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
- * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1".
- *
- *   bit   result
- *   =
- *   0x0=> NORMAL
- *   0x1=> DMA or NORMAL
- *   0x2=> HIGHMEM or NORMAL
- *   0x3=> BAD (DMA+HIGHMEM)
- *   0x4=> DMA32 or DMA or NORMAL
- *   0x5=> BAD (DMA+DMA32)
- *   0x6=> BAD (HIGHMEM+DMA32)
- *   0x7=> BAD (HIGHMEM+DMA32+DMA)
- *   0x8=> NORMAL (MOVABLE+0)
- *   0x9=> DMA or NORMAL (MOVABLE+DMA)
- *   0xa=> MOVABLE (Movable is valid only if HIGHMEM is set too)
- *   0xb=> BAD (MOVABLE+HIGHMEM+DMA)
- *   0xc=> DMA32 (MOVABLE+DMA32)
- *   0xd=> BAD (MOVABLE+DMA32+DMA)
- * 

[RFC PATCH v2 04/12] drivers/iommu/amd_iommu: update usage of address zone modifiers

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated by OR.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Joerg Roedel <j...@8bytes.org>
---
 drivers/iommu/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 74788fd..3921d53 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2614,7 +2614,7 @@ static void *alloc_coherent(struct device *dev, size_t 
size,
dma_dom   = to_dma_ops_domain(domain);
size  = PAGE_ALIGN(size);
dma_mask  = dev->coherent_dma_mask;
-   flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+   flag &= ~__GFP_ZONE_MASK;
flag |= __GFP_ZERO;
 
page = alloc_pages(flag | __GFP_NOWARN,  get_order(size));
-- 
1.8.3.1



[RFC PATCH v2 04/12] drivers/iommu/amd_iommu: update usage of address zone modifiers

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated by OR.

Signed-off-by: Huaisheng Ye 
Cc: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 74788fd..3921d53 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2614,7 +2614,7 @@ static void *alloc_coherent(struct device *dev, size_t 
size,
dma_dom   = to_dma_ops_domain(domain);
size  = PAGE_ALIGN(size);
dma_mask  = dev->coherent_dma_mask;
-   flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+   flag &= ~__GFP_ZONE_MASK;
flag |= __GFP_ZERO;
 
page = alloc_pages(flag | __GFP_NOWARN,  get_order(size));
-- 
1.8.3.1



[RFC PATCH v2 10/12] mm/zsmalloc: update usage of address zone modifiers

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, replace (__GFP_HIGHMEM | __GFP_MOVABLE) with
__GFP_ZONE_MOVABLE.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Minchan Kim <minc...@kernel.org>
Cc: Nitin Gupta <ngu...@vflare.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.w...@gmail.com>
---
 mm/zsmalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c301350..06b2902 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -343,7 +343,7 @@ static void destroy_cache(struct zs_pool *pool)
 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
 {
return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-   gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   gfp & ~__GFP_ZONE_MOVABLE);
 }
 
 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
@@ -354,7 +354,7 @@ static void cache_free_handle(struct zs_pool *pool, 
unsigned long handle)
 static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
 {
return kmem_cache_alloc(pool->zspage_cachep,
-   flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   flags & ~__GFP_ZONE_MOVABLE);
 }
 
 static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
-- 
1.8.3.1



[RFC PATCH v2 10/12] mm/zsmalloc: update usage of address zone modifiers

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MOVABLE to replace (__GFP_HIGHMEM | __GFP_MOVABLE).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.

__GFP_ZONE_MOVABLE contains encoded ZONE_MOVABLE and __GFP_MOVABLE flag.

With GFP_ZONE_TABLE, __GFP_HIGHMEM ORing __GFP_MOVABLE means gfp_zone
should return ZONE_MOVABLE. In order to keep that compatible with
GFP_ZONE_TABLE, replace (__GFP_HIGHMEM | __GFP_MOVABLE) with
__GFP_ZONE_MOVABLE.

Signed-off-by: Huaisheng Ye 
Cc: Minchan Kim 
Cc: Nitin Gupta 
Cc: Sergey Senozhatsky 
---
 mm/zsmalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c301350..06b2902 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -343,7 +343,7 @@ static void destroy_cache(struct zs_pool *pool)
 static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
 {
return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
-   gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   gfp & ~__GFP_ZONE_MOVABLE);
 }
 
 static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
@@ -354,7 +354,7 @@ static void cache_free_handle(struct zs_pool *pool, 
unsigned long handle)
 static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
 {
return kmem_cache_alloc(pool->zspage_cachep,
-   flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+   flags & ~__GFP_ZONE_MOVABLE);
 }
 
 static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
-- 
1.8.3.1



[RFC PATCH v2 12/12] arch/x86/include/asm/page.h: update usage of movableflags

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: Kate Stewart <kstew...@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: x...@kernel.org <x...@kernel.org>
Cc: Philippe Ombredanne <pombreda...@nexb.com>
---
 arch/x86/include/asm/page.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48..a47f42d 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -35,7 +35,8 @@ static inline void copy_user_page(void *to, void *from, 
unsigned long vaddr,
 }
 
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-   alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+   alloc_page_vma((movableflags ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER) \
+   | __GFP_ZERO, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 #ifndef __pa
-- 
1.8.3.1



[RFC PATCH v2 12/12] arch/x86/include/asm/page.h: update usage of movableflags

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye 

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
Cc: Kate Stewart 
Cc: Greg Kroah-Hartman 
Cc: x...@kernel.org 
Cc: Philippe Ombredanne 
---
 arch/x86/include/asm/page.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48..a47f42d 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -35,7 +35,8 @@ static inline void copy_user_page(void *to, void *from, 
unsigned long vaddr,
 }
 
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-   alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+   alloc_page_vma((movableflags ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER) \
+   | __GFP_ZERO, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 #ifndef __pa
-- 
1.8.3.1



[RFC PATCH v2 05/12] include/linux/dma-mapping: update usage of address zone modifiers

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Marek Szyprowski <m.szyprow...@samsung.com>
Cc: Robin Murphy <robin.mur...@arm.com>
---
 include/linux/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index eb9eab4..3da0293 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -523,7 +523,7 @@ static inline void *dma_alloc_attrs(struct device *dev, 
size_t size,
 * decide on the way of zeroing the memory given that the memory
 * returned should always be zeroed.
 */
-   flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_ZERO);
+   flag &= ~(__GFP_ZONE_MASK | __GFP_ZERO);
 
if (!arch_dma_alloc_attrs(, ))
return NULL;
-- 
1.8.3.1



[RFC PATCH v2 11/12] include/linux/highmem: update usage of movableflags

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Kate Stewart <kstew...@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Philippe Ombredanne <pombreda...@nexb.com>
---
 include/linux/highmem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 776f90f..da34260 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -159,8 +159,8 @@ static inline void clear_user_highpage(struct page *page, 
unsigned long vaddr)
struct vm_area_struct *vma,
unsigned long vaddr)
 {
-   struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags,
-   vma, vaddr);
+   struct page *page = alloc_page_vma(movableflags ?
+   GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER, vma, vaddr);
 
if (page)
clear_user_highpage(page, vaddr);
-- 
1.8.3.1



[RFC PATCH v2 05/12] include/linux/dma-mapping: update usage of address zone modifiers

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated with
each others by OR.

Signed-off-by: Huaisheng Ye 
Cc: Christoph Hellwig 
Cc: Marek Szyprowski 
Cc: Robin Murphy 
---
 include/linux/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index eb9eab4..3da0293 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -523,7 +523,7 @@ static inline void *dma_alloc_attrs(struct device *dev, 
size_t size,
 * decide on the way of zeroing the memory given that the memory
 * returned should always be zeroed.
 */
-   flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_ZERO);
+   flag &= ~(__GFP_ZONE_MASK | __GFP_ZERO);
 
if (!arch_dma_alloc_attrs(, ))
return NULL;
-- 
1.8.3.1



[RFC PATCH v2 11/12] include/linux/highmem: update usage of movableflags

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye 

GFP_HIGHUSER_MOVABLE doesn't equal to GFP_HIGHUSER | __GFP_MOVABLE,
modify it to adapt patch of getting rid of GFP_ZONE_TABLE/BAD.

Signed-off-by: Huaisheng Ye 
Cc: Kate Stewart 
Cc: Greg Kroah-Hartman 
Cc: Thomas Gleixner 
Cc: Philippe Ombredanne 
---
 include/linux/highmem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 776f90f..da34260 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -159,8 +159,8 @@ static inline void clear_user_highpage(struct page *page, 
unsigned long vaddr)
struct vm_area_struct *vma,
unsigned long vaddr)
 {
-   struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags,
-   vma, vaddr);
+   struct page *page = alloc_page_vma(movableflags ?
+   GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER, vma, vaddr);
 
if (page)
clear_user_highpage(page, vaddr);
-- 
1.8.3.1



[RFC PATCH v2 03/12] arch/x86/kernel/pci-calgary_64: update usage of address zone modifiers

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye <ye...@lenovo.com>

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated by OR.

Signed-off-by: Huaisheng Ye <ye...@lenovo.com>
Cc: Muli Ben-Yehuda <mu...@mulix.org>
Cc: Jon Mason <jdma...@kudzu.us>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
---
 arch/x86/kernel/pci-calgary_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 35c461f..c89717d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -445,7 +445,7 @@ static void* calgary_alloc_coherent(struct device *dev, 
size_t size,
npages = size >> PAGE_SHIFT;
order = get_order(size);
 
-   flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+   flag &= ~__GFP_ZONE_MASK;
 
/* alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages(flag, order);
-- 
1.8.3.1



[RFC PATCH v2 03/12] arch/x86/kernel/pci-calgary_64: update usage of address zone modifiers

2018-05-21 Thread Huaisheng Ye
From: Huaisheng Ye 

Use __GFP_ZONE_MASK to replace (__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32).

___GFP_DMA, ___GFP_HIGHMEM and ___GFP_DMA32 have been deleted from GFP
bitmasks, the bottom three bits of GFP mask is reserved for storing
encoded zone number.
__GFP_DMA, __GFP_HIGHMEM and __GFP_DMA32 should not be operated by OR.

Signed-off-by: Huaisheng Ye 
Cc: Muli Ben-Yehuda 
Cc: Jon Mason 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
---
 arch/x86/kernel/pci-calgary_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 35c461f..c89717d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -445,7 +445,7 @@ static void* calgary_alloc_coherent(struct device *dev, 
size_t size,
npages = size >> PAGE_SHIFT;
order = get_order(size);
 
-   flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+   flag &= ~__GFP_ZONE_MASK;
 
/* alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages(flag, order);
-- 
1.8.3.1



  1   2   >