[PATCH v7 6/9] qcow2_format.py: Dump bitmap table serialized entries

2020-06-11 Thread Andrey Shinkevich
Add bitmap table information to the QCOW2 metadata dump.
It extends the output of the test case #291

Bitmap name   bitmap-1
...
Bitmap table   typeoffset   size
0  serialized  4718592  65536
1  serialized  4294967296   65536
2  serialized  5348033147437056 65536
3  serialized  1379227385882214465536
4  serialized  4718592  65536
5  serialized  4294967296   65536
6  serialized  4503608217305088 65536
7  serialized  1407374883553280065536

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Eric Blake 
---
 tests/qemu-iotests/291.out | 50 ++
 tests/qemu-iotests/qcow2_format.py | 40 ++
 2 files changed, 90 insertions(+)

diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out
index d847419..595327c 100644
--- a/tests/qemu-iotests/291.out
+++ b/tests/qemu-iotests/291.out
@@ -42,6 +42,16 @@ type  1
 granularity_bits  19
 name_size 2
 extra_data_size   0
+Bitmap table   typeoffset   size
+0  serialized  5046272  65536
+1  all-zeroes  065536
+2  all-zeroes  065536
+3  all-zeroes  065536
+4  all-zeroes  065536
+5  all-zeroes  065536
+6  all-zeroes  065536
+7  all-zeroes  065536
+
 
 Bitmap name   b2
 flag  auto
@@ -53,6 +63,16 @@ type  1
 granularity_bits  16
 name_size 2
 extra_data_size   0
+Bitmap table   typeoffset   size
+0  serialized  5177344  65536
+1  all-zeroes  065536
+2  all-zeroes  065536
+3  all-zeroes  065536
+4  all-zeroes  065536
+5  all-zeroes  065536
+6  all-zeroes  065536
+7  all-zeroes  065536
+
 
 
 === Bitmap preservation not possible to non-qcow2 ===
@@ -128,6 +148,16 @@ type  1
 granularity_bits  19
 name_size 2
 extra_data_size   0
+Bitmap table   typeoffset   size
+0  serialized  4587520  65536
+1  all-zeroes  065536
+2  all-zeroes  065536
+3  all-zeroes  065536
+4  all-zeroes  065536
+5  all-zeroes  065536
+6  all-zeroes  065536
+7  all-zeroes  065536
+
 
 Bitmap name   b2
 flag  auto
@@ -139,6 +169,16 @@ type  1
 granularity_bits  16
 name_size 2
 extra_data_size   0
+Bitmap table   typeoffset   size
+0  serialized  4718592  65536
+1  serialized  4294967296   65536
+2  serialized  5348033147437056 65536
+3  serialized  1379227385882214465536
+4  serialized  4718592  65536
+5  serialized  4294967296   65536
+6  serialized  4503608217305088 65536
+7  serialized  1407374883553280065536
+
 
 Bitmap name   b0
 table size8 (bytes)
@@ -149,6 +189,16 @@ type  1
 granularity_bits  16
 name_size 2
 extra_data_size   0
+Bitmap table   typeoffset   size
+0  serialized  5242880  65536
+1  all-zeroes  065536
+2  all-zeroes  065536
+3  all-zeroes  065536
+4  all-zeroes  065536
+5  all-zeroes  065536
+6  all-zeroes  065536
+7  all-zeroes  065536
+
 
 
 === Check bitmap contents ===
diff --git a/tests/qemu-iotests/qcow2_format.py 

[PATCH v7 9/9] qcow2_format.py: support dumping metadata in JSON format

2020-06-11 Thread Andrey Shinkevich
Implementation of dumping QCOW2 image metadata.
The sample output:
{
"Header_extensions": [
{
"name": "Feature table",
"magic": 1745090647,
"length": 192,
"data_str": ""
},
{
"name": "Bitmaps",
"magic": 595929205,
"length": 24,
"data": {
"nb_bitmaps": 2,
"reserved32": 0,
"bitmap_directory_size": 64,
"bitmap_directory_offset": 1048576,
"entries": [
{
"name": "bitmap-1",
"bitmap_table_offset": 589824,
"bitmap_table_size": 1,
"flags": [
"auto"
],
"type": 1,
"granularity_bits": 16,
"name_size": 8,
"extra_data_size": 0,
"bitmap_table": {
"table_entries": [
{
"type": "serialized",
"offset": 655360
},
...

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/qcow2_format.py | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index bdd5f68..35daec9 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -18,6 +18,15 @@
 
 import struct
 import string
+import json
+
+
+class ComplexEncoder(json.JSONEncoder):
+def default(self, obj):
+if hasattr(obj, 'get_fields_dict'):
+return obj.get_fields_dict()
+else:
+return json.JSONEncoder.default(self, obj)
 
 
 class Qcow2Field:
@@ -95,6 +104,11 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 self.fields_dict = self.__dict__.copy()
 
 def dump(self, dump_json=None):
+if dump_json:
+print(json.dumps(self.get_fields_dict(), indent=4,
+ cls=ComplexEncoder))
+return
+
 for f in self.fields:
 value = self.__dict__[f[2]]
 if isinstance(f[1], str):
@@ -150,6 +164,9 @@ class Qcow2BitmapExt(Qcow2Struct):
 for bm in self.bitmaps:
 bm.dump_bitmap_dir_entry(dump_json)
 
+def get_fields_dict(self):
+return self.fields_dict
+
 
 BME_FLAG_IN_USE = 1 << 0
 BME_FLAG_AUTO = 1 << 1
@@ -202,6 +219,12 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 super().dump()
 self.bitmap_table.print_bitmap_table(self.cluster_size)
 
+def get_fields_dict(self):
+bmp_name = dict(name=self.name)
+bme_dict = {**bmp_name, **self.fields_dict}
+bme_dict['flags'] = self.bitmap_flags
+return bme_dict
+
 
 class Qcow2BitmapTableEntry:
 
@@ -217,6 +240,9 @@ class Qcow2BitmapTableEntry:
 else:
 self.type = 'all-zeroes'
 
+def get_fields_dict(self):
+return dict(type=self.type, offset=self.offset)
+
 
 class Qcow2BitmapTable:
 
@@ -232,6 +258,18 @@ class Qcow2BitmapTable:
 print(f'{i:<14} {entry.type:<15} {entry.offset:<24} 
{cluster_size}')
 print("")
 
+def get_fields_dict(self):
+return dict(table_entries=self.entries)
+
+
+class Qcow2HeaderExtensionsDoc:
+
+def __init__(self, extensions):
+self.extensions = extensions
+
+def get_fields_dict(self):
+return dict(Header_extensions=self.extensions)
+
 
 QCOW2_EXT_MAGIC_BITMAPS = 0x23852875
 
@@ -247,6 +285,9 @@ class QcowHeaderExtension(Qcow2Struct):
 0x44415441: 'Data file'
 }
 
+def get_fields_dict(self):
+return self.mapping.get(self.value, "")
+
 fields = (
 ('u32', Magic, 'magic'),
 ('u32', '{}', 'length')
@@ -307,6 +348,16 @@ class QcowHeaderExtension(Qcow2Struct):
 else:
 self.obj.dump(dump_json)
 
+def get_fields_dict(self):
+ext_name = dict(name=self.Magic(self.magic))
+he_dict = {**ext_name, **self.fields_dict}
+if self.obj is not None:
+he_dict.update(data=self.obj)
+else:
+he_dict.update(data_str=self.data_str)
+
+return he_dict
+
 @classmethod
 def create(cls, magic, data):
 return QcowHeaderExtension(magic, len(data), data)
@@ -409,7 +460,16 @@ class QcowHeader(Qcow2Struct):
 fd.write(buf)
 
 def dump_extensions(self, dump_json=None):
+if dump_json:
+ext_doc = Qcow2HeaderExtensionsDoc(self.extensions)
+print(json.dumps(ext_doc.get_fields_dict(), indent=4,
+ cls=ComplexEncoder))
+return
+
 for ex in self.extensions:
   

[PATCH v7 5/9] qcow2_format.py: pass cluster size to substructures

2020-06-11 Thread Andrey Shinkevich
The cluster size of an image is the QcowHeader class member and may be
obtained by dependent extension structures such as Qcow2BitmapExt for
further bitmap table details print.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/qcow2_format.py | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index 90eff92..eb99119 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -116,6 +116,10 @@ class Qcow2BitmapExt(Qcow2Struct):
 ('u64', '{:#x}', 'bitmap_directory_offset')
 )
 
+def __init__(self, data, cluster_size):
+super().__init__(data=data)
+self.cluster_size = cluster_size
+
 def read_bitmap_directory(self, fd):
 self.bitmaps = []
 fd.seek(self.bitmap_directory_offset)
@@ -123,7 +127,8 @@ class Qcow2BitmapExt(Qcow2Struct):
 
 for n in range(self.nb_bitmaps):
 buf = fd.read(buf_size)
-dir_entry = Qcow2BitmapDirEntry(data=buf)
+dir_entry = Qcow2BitmapDirEntry(data=buf,
+cluster_size=self.cluster_size)
 fd.seek(dir_entry.extra_data_size, FROM_CURRENT)
 bitmap_name = fd.read(dir_entry.name_size)
 dir_entry.name = bitmap_name.decode('ascii')
@@ -159,8 +164,9 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 ('u32', '{}','extra_data_size')
 )
 
-def __init__(self, data):
+def __init__(self, data, cluster_size):
 super().__init__(data=data)
+self.cluster_size = cluster_size
 
 self.bitmap_table_bytes = self.bitmap_table_size \
 * struct.calcsize('Q')
@@ -205,11 +211,13 @@ class QcowHeaderExtension(Qcow2Struct):
 # then padding to next multiply of 8
 )
 
-def __init__(self, magic=None, length=None, data=None, fd=None):
+def __init__(self, magic=None, length=None, data=None, fd=None,
+ cluster_size=None):
 """
 Support both loading from fd and creation from user data.
 For fd-based creation current position in a file will be used to read
 the data.
+The cluster_size value may be obtained by dependent structures.
 
 This should be somehow refactored and functionality should be moved to
 superclass (to allow creation of any qcow2 struct), but then, fields
@@ -243,7 +251,8 @@ class QcowHeaderExtension(Qcow2Struct):
 self.data_str = data_str
 
 if self.magic == QCOW2_EXT_MAGIC_BITMAPS:
-self.obj = Qcow2BitmapExt(data=self.data)
+self.obj = Qcow2BitmapExt(data=self.data,
+  cluster_size=cluster_size)
 else:
 self.obj = None
 
@@ -318,7 +327,7 @@ class QcowHeader(Qcow2Struct):
 end = self.cluster_size
 
 while fd.tell() < end:
-ext = QcowHeaderExtension(fd=fd)
+ext = QcowHeaderExtension(fd=fd, cluster_size=self.cluster_size)
 if ext.magic == 0:
 break
 else:
-- 
1.8.3.1




[PATCH v7 2/9] qcow2: Fix capitalization of header extension constant.

2020-06-11 Thread Andrey Shinkevich
Make the capitalization of the hexadecimal numbers consistent for the
QCOW2 header extension constants in docs/interop/qcow2.txt.

Suggested-by: Eric Blake 
Signed-off-by: Andrey Shinkevich 
---
 block/qcow2.c  | 2 +-
 docs/interop/qcow2.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 0cd2e67..80dfe5f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -66,7 +66,7 @@ typedef struct {
 } QEMU_PACKED QCowExtension;
 
 #define  QCOW2_EXT_MAGIC_END 0
-#define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
+#define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xe2792aca
 #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
 #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
 #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index cb72346..f072e27 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -231,7 +231,7 @@ be stored. Each extension has a structure like the 
following:
 
 Byte  0 -  3:   Header extension type:
 0x - End of the header extension area
-0xE2792ACA - Backing file format name string
+0xe2792aca - Backing file format name string
 0x6803f857 - Feature name table
 0x23852875 - Bitmaps extension
 0x0537be77 - Full disk encryption header pointer
-- 
1.8.3.1




[PATCH v7 1/9] iotests: Fix for magic hexadecimal output in 291

2020-06-11 Thread Andrey Shinkevich
This issue was introduced in the earlier patch:
"qcow2_format: refactor QcowHeaderExtension as a subclass of
Qcow2Struct".

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/291.out | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out
index 1d4f9cd..ccfcdc5 100644
--- a/tests/qemu-iotests/291.out
+++ b/tests/qemu-iotests/291.out
@@ -16,17 +16,17 @@ wrote 1048576/1048576 bytes at offset 2097152
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Check resulting qcow2 header extensions:
 Header extension:
-magic 3799591626 (Backing format)
+magic 0xe2792aca (Backing format)
 length5
 data  'qcow2'
 
 Header extension:
-magic 1745090647 (Feature table)
+magic 0x6803f857 (Feature table)
 length336
 data  
 
 Header extension:
-magic 595929205 (Bitmaps)
+magic 0x23852875 (Bitmaps)
 length24
 nb_bitmaps2
 reserved320
@@ -86,12 +86,12 @@ Format specific information:
 corrupt: false
 Check resulting qcow2 header extensions:
 Header extension:
-magic 1745090647 (Feature table)
+magic 0x6803f857 (Feature table)
 length336
 data  
 
 Header extension:
-magic 595929205 (Bitmaps)
+magic 0x23852875 (Bitmaps)
 length24
 nb_bitmaps3
 reserved320
-- 
1.8.3.1




[PATCH v7 0/9] iotests: Dump QCOW2 dirty bitmaps metadata

2020-06-11 Thread Andrey Shinkevich
Note: based on the Vladimir's series
  [v5 00/13] iotests: Dump QCOW2 dirty bitmaps metadata

Add dirty bitmap information to QCOW2 metadata dump in the qcow2_format.py.

v7:
  01: Fix for magic hexadecimal output in 291
  02: Bitmap table output format improvement.
  03: Incremental change in the test 291 output.

v6:
  01: Fixing capitalization of header extension constant.
  (Suggested by Eric)
  02: The cluster size global variable discarded and passed as a parameter.
  03: Re-based to Vladimir's v5 series.
  04: The code of passing qcow2.py JSON format key moved to separate patch.
  05: Making dict(s) for dumping in JSON format was substituted with a copy
  of __dict__.

v5: The Vladimir's preliminary series
v4: The Vladimir's preliminary series

Andrey Shinkevich (9):
  iotests: Fix for magic hexadecimal output in 291
  qcow2: Fix capitalization of header extension constant.
  qcow2_format.py: make printable data an extension class member
  qcow2_format.py: Dump bitmap directory information
  qcow2_format.py: pass cluster size to substructures
  qcow2_format.py: Dump bitmap table serialized entries
  qcow2.py: Introduce '-j' key to dump in JSON format
  qcow2_format.py: collect fields to dump in JSON format
  qcow2_format.py: support dumping metadata in JSON format

 block/qcow2.c  |   2 +-
 docs/interop/qcow2.txt |   2 +-
 tests/qemu-iotests/291.out | 112 ++-
 tests/qemu-iotests/qcow2.py|  20 +++-
 tests/qemu-iotests/qcow2_format.py | 217 ++---
 5 files changed, 327 insertions(+), 26 deletions(-)

-- 
1.8.3.1




[PATCH v7 7/9] qcow2.py: Introduce '-j' key to dump in JSON format

2020-06-11 Thread Andrey Shinkevich
Add the command key to the qcow2.py arguments list to dump QCOW2
metadata in JSON format. Here is the suggested way to do that. The
implementation of the dump in JSON format is in the patch that follows.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/qcow2.py| 20 +++-
 tests/qemu-iotests/qcow2_format.py | 18 +-
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/tests/qemu-iotests/qcow2.py b/tests/qemu-iotests/qcow2.py
index 8c187e9..b08d8fc 100755
--- a/tests/qemu-iotests/qcow2.py
+++ b/tests/qemu-iotests/qcow2.py
@@ -24,16 +24,19 @@ from qcow2_format import (
 )
 
 
+dump_json = False
+
+
 def cmd_dump_header(fd):
 h = QcowHeader(fd)
-h.dump()
+h.dump(dump_json)
 print()
-h.dump_extensions()
+h.dump_extensions(dump_json)
 
 
 def cmd_dump_header_exts(fd):
 h = QcowHeader(fd)
-h.dump_extensions()
+h.dump_extensions(dump_json)
 
 
 def cmd_set_header(fd, name, value):
@@ -132,6 +135,11 @@ cmds = [
 
 
 def main(filename, cmd, args):
+global dump_json
+dump_json = '-j' in sys.argv
+if dump_json:
+sys.argv.remove('-j')
+args.remove('-j')
 fd = open(filename, "r+b")
 try:
 for name, handler, num_args, desc in cmds:
@@ -149,12 +157,14 @@ def main(filename, cmd, args):
 
 
 def usage():
-print("Usage: %s   [, ...]" % sys.argv[0])
+print("Usage: %s   [, ...] [, ...]" % sys.argv[0])
 print("")
 print("Supported commands:")
 for name, handler, num_args, desc in cmds:
 print("%-20s - %s" % (name, desc))
-
+print("")
+print("Supported keys:")
+print("%-20s - %s" % ('-j', 'Dump in JSON format'))
 
 if __name__ == '__main__':
 if len(sys.argv) < 3:
diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index c1c2773..a19f3b3 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -92,7 +92,7 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 self.__dict__ = dict((field[2], values[i])
  for i, field in enumerate(self.fields))
 
-def dump(self):
+def dump(self, dump_json=None):
 for f in self.fields:
 value = self.__dict__[f[2]]
 if isinstance(f[1], str):
@@ -143,10 +143,10 @@ class Qcow2BitmapExt(Qcow2Struct):
 def load(self, fd):
 self.read_bitmap_directory(fd)
 
-def dump(self):
-super().dump()
+def dump(self, dump_json=None):
+super().dump(dump_json)
 for bm in self.bitmaps:
-bm.dump_bitmap_dir_entry()
+bm.dump_bitmap_dir_entry(dump_json)
 
 
 BME_FLAG_IN_USE = 1 << 0
@@ -190,7 +190,7 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 table = [e[0] for e in struct.iter_unpack('>Q', fd.read(table_size))]
 self.bitmap_table = Qcow2BitmapTable(table)
 
-def dump_bitmap_dir_entry(self):
+def dump_bitmap_dir_entry(self, dump_json=None):
 print()
 print(f'{"Bitmap name":<25} {self.name}')
 for fl in self.bitmap_flags:
@@ -296,13 +296,13 @@ class QcowHeaderExtension(Qcow2Struct):
 else:
 self.obj = None
 
-def dump(self):
+def dump(self, dump_json=None):
 super().dump()
 
 if self.obj is None:
 print(f'{"data":<25} {self.data_str}')
 else:
-self.obj.dump()
+self.obj.dump(dump_json)
 
 @classmethod
 def create(cls, magic, data):
@@ -405,8 +405,8 @@ class QcowHeader(Qcow2Struct):
 buf = buf[0:header_bytes-1]
 fd.write(buf)
 
-def dump_extensions(self):
+def dump_extensions(self, dump_json=None):
 for ex in self.extensions:
 print('Header extension:')
-ex.dump()
+ex.dump(dump_json)
 print()
-- 
1.8.3.1




[PATCH v7 4/9] qcow2_format.py: Dump bitmap directory information

2020-06-11 Thread Andrey Shinkevich
Read and dump entries from the bitmap directory of QCOW2 image.
It extends the output in the test case #291.

Header extension:
magic 0x23852875 (Bitmaps)
...
Bitmap name   bitmap-1
flag  auto
table size8 (bytes)
bitmap_table_offset   0x9
bitmap_table_size 1
flags 0
type  1
granularity_bits  16
name_size 8
extra_data_size   0

Suggested-by: Kevin Wolf 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/291.out | 52 ++
 tests/qemu-iotests/qcow2_format.py | 75 ++
 2 files changed, 127 insertions(+)

diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out
index ccfcdc5..d847419 100644
--- a/tests/qemu-iotests/291.out
+++ b/tests/qemu-iotests/291.out
@@ -33,6 +33,27 @@ reserved320
 bitmap_directory_size 0x40
 bitmap_directory_offset   0x51
 
+Bitmap name   b1
+table size8 (bytes)
+bitmap_table_offset   0x4e
+bitmap_table_size 1
+flags 0
+type  1
+granularity_bits  19
+name_size 2
+extra_data_size   0
+
+Bitmap name   b2
+flag  auto
+table size8 (bytes)
+bitmap_table_offset   0x50
+bitmap_table_size 1
+flags 2
+type  1
+granularity_bits  16
+name_size 2
+extra_data_size   0
+
 
 === Bitmap preservation not possible to non-qcow2 ===
 
@@ -98,6 +119,37 @@ reserved320
 bitmap_directory_size 0x60
 bitmap_directory_offset   0x52
 
+Bitmap name   b1
+table size8 (bytes)
+bitmap_table_offset   0x47
+bitmap_table_size 1
+flags 0
+type  1
+granularity_bits  19
+name_size 2
+extra_data_size   0
+
+Bitmap name   b2
+flag  auto
+table size8 (bytes)
+bitmap_table_offset   0x49
+bitmap_table_size 1
+flags 2
+type  1
+granularity_bits  16
+name_size 2
+extra_data_size   0
+
+Bitmap name   b0
+table size8 (bytes)
+bitmap_table_offset   0x51
+bitmap_table_size 1
+flags 0
+type  1
+granularity_bits  16
+name_size 2
+extra_data_size   0
+
 
 === Check bitmap contents ===
 
diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index d4f..90eff92 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -103,6 +103,10 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 print('{:<25} {}'.format(f[2], value_str))
 
 
+# seek relative to the current position in the file
+FROM_CURRENT = 1
+
+
 class Qcow2BitmapExt(Qcow2Struct):
 
 fields = (
@@ -112,6 +116,73 @@ class Qcow2BitmapExt(Qcow2Struct):
 ('u64', '{:#x}', 'bitmap_directory_offset')
 )
 
+def read_bitmap_directory(self, fd):
+self.bitmaps = []
+fd.seek(self.bitmap_directory_offset)
+buf_size = struct.calcsize(Qcow2BitmapDirEntry.fmt)
+
+for n in range(self.nb_bitmaps):
+buf = fd.read(buf_size)
+dir_entry = Qcow2BitmapDirEntry(data=buf)
+fd.seek(dir_entry.extra_data_size, FROM_CURRENT)
+bitmap_name = fd.read(dir_entry.name_size)
+dir_entry.name = bitmap_name.decode('ascii')
+self.bitmaps.append(dir_entry)
+entry_raw_size = dir_entry.bitmap_dir_entry_raw_size()
+shift = ((entry_raw_size + 7) & ~7) - entry_raw_size
+fd.seek(shift, FROM_CURRENT)
+
+def load(self, fd):
+self.read_bitmap_directory(fd)
+
+def dump(self):
+super().dump()
+for bm in self.bitmaps:
+bm.dump_bitmap_dir_entry()
+
+
+BME_FLAG_IN_USE = 1 << 0
+BME_FLAG_AUTO = 1 << 1
+
+
+class Qcow2BitmapDirEntry(Qcow2Struct):
+
+name = ''
+
+fields = (
+('u64', '{:#x}', 'bitmap_table_offset'),
+('u32', '{}','bitmap_table_size'),
+('u32', '{}','flags'),
+('u8',  '{}','type'),
+('u8',  '{}','granularity_bits'),
+('u16', '{}','name_size'),
+('u32', '{}','extra_data_size')
+)
+
+def __init__(self, data):
+super().__init__(data=data)
+
+self.bitmap_table_bytes = self.bitmap_table_size \
+* struct.calcsize('Q')
+
+self.bitmap_flags = []
+if (self.flags & BME_FLAG_IN_USE):
+self.bitmap_flags.append("in-use")
+if (self.flags & 

[PATCH v7 8/9] qcow2_format.py: collect fields to dump in JSON format

2020-06-11 Thread Andrey Shinkevich
As __dict__ is being extended with class members we do not want to
print in JSON format dump, make a light copy of the initial __dict__
and extend the copy by adding lists we have to print in the JSON
output.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/qcow2_format.py | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index a19f3b3..bdd5f68 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -92,6 +92,8 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 self.__dict__ = dict((field[2], values[i])
  for i, field in enumerate(self.fields))
 
+self.fields_dict = self.__dict__.copy()
+
 def dump(self, dump_json=None):
 for f in self.fields:
 value = self.__dict__[f[2]]
@@ -102,7 +104,6 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 
 print('{:<25} {}'.format(f[2], value_str))
 
-
 # seek relative to the current position in the file
 FROM_CURRENT = 1
 
@@ -142,6 +143,7 @@ class Qcow2BitmapExt(Qcow2Struct):
 
 def load(self, fd):
 self.read_bitmap_directory(fd)
+self.fields_dict.update(entries=self.bitmaps)
 
 def dump(self, dump_json=None):
 super().dump(dump_json)
@@ -189,6 +191,7 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 table_size = self.bitmap_table_bytes * struct.calcsize('Q')
 table = [e[0] for e in struct.iter_unpack('>Q', fd.read(table_size))]
 self.bitmap_table = Qcow2BitmapTable(table)
+self.fields_dict.update(bitmap_table=self.bitmap_table)
 
 def dump_bitmap_dir_entry(self, dump_json=None):
 print()
-- 
1.8.3.1




[PATCH v7 3/9] qcow2_format.py: make printable data an extension class member

2020-06-11 Thread Andrey Shinkevich
Let us differ binary data type from string one for the extension data
variable and keep the string as the QcowHeaderExtension class member.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/qcow2_format.py | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index 0f65fd1..d4f 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -164,6 +164,13 @@ class QcowHeaderExtension(Qcow2Struct):
 self.data = fd.read(padded)
 assert self.data is not None
 
+data_str = self.data[:self.length]
+if all(c in string.printable.encode('ascii') for c in data_str):
+data_str = f"'{ data_str.decode('ascii') }'"
+else:
+data_str = ''
+self.data_str = data_str
+
 if self.magic == QCOW2_EXT_MAGIC_BITMAPS:
 self.obj = Qcow2BitmapExt(data=self.data)
 else:
@@ -173,12 +180,7 @@ class QcowHeaderExtension(Qcow2Struct):
 super().dump()
 
 if self.obj is None:
-data = self.data[:self.length]
-if all(c in string.printable.encode('ascii') for c in data):
-data = f"'{ data.decode('ascii') }'"
-else:
-data = ''
-print(f'{"data":<25} {data}')
+print(f'{"data":<25} {self.data_str}')
 else:
 self.obj.dump()
 
-- 
1.8.3.1




Re: [PATCH v6 1/8] qcow2: Fix capitalization of header extension constant.

2020-06-11 Thread Eric Blake

On 6/11/20 4:19 PM, Andrey Shinkevich wrote:

Make the capitalization of the hexadecimal numbers consistent for the
QCOW2 header extension constants in docs/interop/qcow2.txt.

Suggested-by: Eric Blake 
Signed-off-by: Andrey Shinkevich 
---
  block/qcow2.c  | 2 +-
  docs/interop/qcow2.txt | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)



tests/qemu-iotests/114 has an instance that could be adjusted as well 
(no change to 114.out).


With that added,
Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




[PATCH v6 6/8] qcow2.py: Introduce '-j' key to dump in JSON format

2020-06-11 Thread Andrey Shinkevich
Add the command key to the qcow2.py arguments list to dump QCOW2
metadata in JSON format. Here is the suggested way to do that. The
implementation of the dump in JSON format is in the patch that follows.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/qcow2.py| 20 +++-
 tests/qemu-iotests/qcow2_format.py | 18 +-
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/tests/qemu-iotests/qcow2.py b/tests/qemu-iotests/qcow2.py
index 8c187e9..b08d8fc 100755
--- a/tests/qemu-iotests/qcow2.py
+++ b/tests/qemu-iotests/qcow2.py
@@ -24,16 +24,19 @@ from qcow2_format import (
 )
 
 
+dump_json = False
+
+
 def cmd_dump_header(fd):
 h = QcowHeader(fd)
-h.dump()
+h.dump(dump_json)
 print()
-h.dump_extensions()
+h.dump_extensions(dump_json)
 
 
 def cmd_dump_header_exts(fd):
 h = QcowHeader(fd)
-h.dump_extensions()
+h.dump_extensions(dump_json)
 
 
 def cmd_set_header(fd, name, value):
@@ -132,6 +135,11 @@ cmds = [
 
 
 def main(filename, cmd, args):
+global dump_json
+dump_json = '-j' in sys.argv
+if dump_json:
+sys.argv.remove('-j')
+args.remove('-j')
 fd = open(filename, "r+b")
 try:
 for name, handler, num_args, desc in cmds:
@@ -149,12 +157,14 @@ def main(filename, cmd, args):
 
 
 def usage():
-print("Usage: %s   [, ...]" % sys.argv[0])
+print("Usage: %s   [, ...] [, ...]" % sys.argv[0])
 print("")
 print("Supported commands:")
 for name, handler, num_args, desc in cmds:
 print("%-20s - %s" % (name, desc))
-
+print("")
+print("Supported keys:")
+print("%-20s - %s" % ('-j', 'Dump in JSON format'))
 
 if __name__ == '__main__':
 if len(sys.argv) < 3:
diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index 2e7c058..95db7a9 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -92,7 +92,7 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 self.__dict__ = dict((field[2], values[i])
  for i, field in enumerate(self.fields))
 
-def dump(self):
+def dump(self, dump_json=None):
 for f in self.fields:
 value = self.__dict__[f[2]]
 if isinstance(f[1], str):
@@ -143,10 +143,10 @@ class Qcow2BitmapExt(Qcow2Struct):
 def load(self, fd):
 self.read_bitmap_directory(fd)
 
-def dump(self):
-super().dump()
+def dump(self, dump_json=None):
+super().dump(dump_json)
 for bm in self.bitmaps:
-bm.dump_bitmap_dir_entry()
+bm.dump_bitmap_dir_entry(dump_json)
 
 
 BME_FLAG_IN_USE = 1 << 0
@@ -190,7 +190,7 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 table = [e[0] for e in struct.iter_unpack('>Q', fd.read(table_size))]
 self.bitmap_table = Qcow2BitmapTable(table)
 
-def dump_bitmap_dir_entry(self):
+def dump_bitmap_dir_entry(self, dump_json=None):
 print()
 print(f'{"Bitmap name":<25} {self.name}')
 for fl in self.bitmap_flags:
@@ -298,13 +298,13 @@ class QcowHeaderExtension(Qcow2Struct):
 else:
 self.obj = None
 
-def dump(self):
+def dump(self, dump_json=None):
 super().dump()
 
 if self.obj is None:
 print(f'{"data":<25} {self.data_str}')
 else:
-self.obj.dump()
+self.obj.dump(dump_json)
 
 @classmethod
 def create(cls, magic, data):
@@ -407,8 +407,8 @@ class QcowHeader(Qcow2Struct):
 buf = buf[0:header_bytes-1]
 fd.write(buf)
 
-def dump_extensions(self):
+def dump_extensions(self, dump_json=None):
 for ex in self.extensions:
 print('Header extension:')
-ex.dump()
+ex.dump(dump_json)
 print()
-- 
1.8.3.1




[PATCH v6 1/8] qcow2: Fix capitalization of header extension constant.

2020-06-11 Thread Andrey Shinkevich
Make the capitalization of the hexadecimal numbers consistent for the
QCOW2 header extension constants in docs/interop/qcow2.txt.

Suggested-by: Eric Blake 
Signed-off-by: Andrey Shinkevich 
---
 block/qcow2.c  | 2 +-
 docs/interop/qcow2.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 0cd2e67..80dfe5f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -66,7 +66,7 @@ typedef struct {
 } QEMU_PACKED QCowExtension;
 
 #define  QCOW2_EXT_MAGIC_END 0
-#define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
+#define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xe2792aca
 #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
 #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
 #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index cb72346..f072e27 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -231,7 +231,7 @@ be stored. Each extension has a structure like the 
following:
 
 Byte  0 -  3:   Header extension type:
 0x - End of the header extension area
-0xE2792ACA - Backing file format name string
+0xe2792aca - Backing file format name string
 0x6803f857 - Feature name table
 0x23852875 - Bitmaps extension
 0x0537be77 - Full disk encryption header pointer
-- 
1.8.3.1




[PATCH v6 2/8] qcow2_format.py: make printable data an extension class member

2020-06-11 Thread Andrey Shinkevich
Let us differ binary data type from string one for the extension data
variable and keep the string as the QcowHeaderExtension class member.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/qcow2_format.py | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index 0f65fd1..d4f 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -164,6 +164,13 @@ class QcowHeaderExtension(Qcow2Struct):
 self.data = fd.read(padded)
 assert self.data is not None
 
+data_str = self.data[:self.length]
+if all(c in string.printable.encode('ascii') for c in data_str):
+data_str = f"'{ data_str.decode('ascii') }'"
+else:
+data_str = ''
+self.data_str = data_str
+
 if self.magic == QCOW2_EXT_MAGIC_BITMAPS:
 self.obj = Qcow2BitmapExt(data=self.data)
 else:
@@ -173,12 +180,7 @@ class QcowHeaderExtension(Qcow2Struct):
 super().dump()
 
 if self.obj is None:
-data = self.data[:self.length]
-if all(c in string.printable.encode('ascii') for c in data):
-data = f"'{ data.decode('ascii') }'"
-else:
-data = ''
-print(f'{"data":<25} {data}')
+print(f'{"data":<25} {self.data_str}')
 else:
 self.obj.dump()
 
-- 
1.8.3.1




[PATCH v6 3/8] qcow2_format.py: Dump bitmap directory info

2020-06-11 Thread Andrey Shinkevich
Read and dump entries from the bitmap directory of QCOW2 image with the
script qcow2.py.

Header extension:
magic 0x23852875 (Bitmaps)
...
Bitmap name   bitmap-1
flag  auto
table size8 (bytes)
bitmap_table_offset   0x9
bitmap_table_size 1
flags 0
type  1
granularity_bits  16
name_size 8
extra_data_size   0

Suggested-by: Kevin Wolf 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/qcow2_format.py | 75 ++
 1 file changed, 75 insertions(+)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index d4f..a7868a7 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -103,6 +103,10 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 print('{:<25} {}'.format(f[2], value_str))
 
 
+# seek relative to the current position in the file
+FROM_CURRENT = 1
+
+
 class Qcow2BitmapExt(Qcow2Struct):
 
 fields = (
@@ -112,6 +116,73 @@ class Qcow2BitmapExt(Qcow2Struct):
 ('u64', '{:#x}', 'bitmap_directory_offset')
 )
 
+def read_bitmap_directory(self, fd):
+self.bitmaps = []
+fd.seek(self.bitmap_directory_offset)
+buf_size = struct.calcsize(Qcow2BitmapDirEntry.fmt)
+
+for n in range(self.nb_bitmaps):
+buf = fd.read(buf_size)
+dir_entry = Qcow2BitmapDirEntry(data=buf)
+fd.seek(dir_entry.extra_data_size, FROM_CURRENT)
+bitmap_name = fd.read(dir_entry.name_size)
+dir_entry.name = bitmap_name.decode('ascii')
+self.bitmaps.append(dir_entry)
+entry_raw_size = dir_entry.bitmap_dir_entry_raw_size()
+shift = ((entry_raw_size + 7) & ~7) - entry_raw_size
+fd.seek(shift, FROM_CURRENT)
+
+def load(self, fd):
+self.read_bitmap_directory(fd)
+
+def dump(self):
+super().dump()
+for bm in self.bitmaps:
+bm.dump_bitmap_dir_entry()
+
+
+BME_FLAG_IN_USE = 1 << 0
+BME_FLAG_AUTO = 1 << 1
+
+
+class Qcow2BitmapDirEntry(Qcow2Struct):
+
+name = ''
+
+fields = (
+('u64', '{:#x}', 'bitmap_table_offset'),
+('u32', '{}','bitmap_table_size'),
+('u32', '{}','flags'),
+('u8',  '{}','type'),
+('u8',  '{}','granularity_bits'),
+('u16', '{}','name_size'),
+('u32', '{}','extra_data_size')
+)
+
+def __init__(self, data):
+super().__init__(data=data)
+
+self.bitmap_table_bytes = self.bitmap_table_size \
+* struct.calcsize('Q')
+
+self.bitmap_flags = []
+if (self.flags & BME_FLAG_IN_USE):
+self.bitmap_flags.append("in-use")
+if (self.flags & BME_FLAG_AUTO):
+self.bitmap_flags.append("auto")
+
+def bitmap_dir_entry_raw_size(self):
+return struct.calcsize(self.fmt) + self.name_size + \
+self.extra_data_size
+
+def dump_bitmap_dir_entry(self):
+print()
+print(f'{"Bitmap name":<25} {self.name}')
+for fl in self.bitmap_flags:
+print(f'{"flag":<25} {fl}')
+print(f'{"table size ":<25} {self.bitmap_table_bytes} {"(bytes)"}')
+super().dump()
+
 
 QCOW2_EXT_MAGIC_BITMAPS = 0x23852875
 
@@ -253,6 +324,10 @@ class QcowHeader(Qcow2Struct):
 else:
 self.extensions.append(ext)
 
+for ext in self.extensions:
+if ext.obj is not None:
+ext.obj.load(fd)
+
 def update_extensions(self, fd):
 
 fd.seek(self.header_length)
-- 
1.8.3.1




[PATCH v6 4/8] qcow2_format.py: pass cluster size to substructures

2020-06-11 Thread Andrey Shinkevich
The cluster size of an image is the QcowHeader class member and may be
obtained by dependent extension structures such as Qcow2BitmapExt for
further bitmap table details print.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/qcow2_format.py | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index a7868a7..6589f68 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -116,6 +116,10 @@ class Qcow2BitmapExt(Qcow2Struct):
 ('u64', '{:#x}', 'bitmap_directory_offset')
 )
 
+def __init__(self, data, cluster_size):
+super().__init__(data=data)
+self.cluster_size = cluster_size
+
 def read_bitmap_directory(self, fd):
 self.bitmaps = []
 fd.seek(self.bitmap_directory_offset)
@@ -123,7 +127,8 @@ class Qcow2BitmapExt(Qcow2Struct):
 
 for n in range(self.nb_bitmaps):
 buf = fd.read(buf_size)
-dir_entry = Qcow2BitmapDirEntry(data=buf)
+dir_entry = Qcow2BitmapDirEntry(data=buf,
+cluster_size=self.cluster_size)
 fd.seek(dir_entry.extra_data_size, FROM_CURRENT)
 bitmap_name = fd.read(dir_entry.name_size)
 dir_entry.name = bitmap_name.decode('ascii')
@@ -159,8 +164,9 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 ('u32', '{}','extra_data_size')
 )
 
-def __init__(self, data):
+def __init__(self, data, cluster_size):
 super().__init__(data=data)
+self.cluster_size = cluster_size
 
 self.bitmap_table_bytes = self.bitmap_table_size \
 * struct.calcsize('Q')
@@ -205,11 +211,13 @@ class QcowHeaderExtension(Qcow2Struct):
 # then padding to next multiply of 8
 )
 
-def __init__(self, magic=None, length=None, data=None, fd=None):
+def __init__(self, magic=None, length=None, data=None, fd=None,
+ cluster_size=None):
 """
 Support both loading from fd and creation from user data.
 For fd-based creation current position in a file will be used to read
 the data.
+The cluster_size value may be obtained by dependent structures.
 
 This should be somehow refactored and functionality should be moved to
 superclass (to allow creation of any qcow2 struct), but then, fields
@@ -243,7 +251,8 @@ class QcowHeaderExtension(Qcow2Struct):
 self.data_str = data_str
 
 if self.magic == QCOW2_EXT_MAGIC_BITMAPS:
-self.obj = Qcow2BitmapExt(data=self.data)
+self.obj = Qcow2BitmapExt(data=self.data,
+  cluster_size=cluster_size)
 else:
 self.obj = None
 
@@ -318,7 +327,7 @@ class QcowHeader(Qcow2Struct):
 end = self.cluster_size
 
 while fd.tell() < end:
-ext = QcowHeaderExtension(fd=fd)
+ext = QcowHeaderExtension(fd=fd, cluster_size=self.cluster_size)
 if ext.magic == 0:
 break
 else:
-- 
1.8.3.1




[PATCH v6 0/8] iotests: Dump QCOW2 dirty bitmaps metadata

2020-06-11 Thread Andrey Shinkevich
Note: based on the Vladimir's series
  [v5 00/13] iotests: Dump QCOW2 dirty bitmaps metadata

Add dirty bitmap information to QCOW2 metadata dump in the qcow2_format.py.

v6:
  01: Fixing capitalization of header extension constant.
  (Suggested by Eric)
  02: The cluster size global variable discarded and passed as a parameter.
  03: Re-based to Vladimir's v5 series.
  04: The code of passing qcow2.py JSON format key moved to separate patch.
  05: Making dict(s) for dumping in JSON format was substituted with a copy
  of __dict__.

v5: The Vladimir's preliminary series
v4: The Vladimir's preliminary series

Andrey Shinkevich (8):
  qcow2: Fix capitalization of header extension constant.
  qcow2_format.py: make printable data an extension class member
  qcow2_format.py: Dump bitmap directory info
  qcow2_format.py: pass cluster size to substructures
  qcow2_format.py: Dump bitmap table serialized entries
  qcow2.py: Introduce '-j' key to dump in JSON format
  qcow2_format.py: collect fields to dump in JSON format
  qcow2_format.py: support dumping metadata in JSON  format

 block/qcow2.c  |   2 +-
 docs/interop/qcow2.txt |   2 +-
 tests/qemu-iotests/qcow2.py|  20 +++-
 tests/qemu-iotests/qcow2_format.py | 219 ++---
 4 files changed, 222 insertions(+), 21 deletions(-)

-- 
1.8.3.1




[PATCH v6 8/8] qcow2_format.py: support dumping metadata in JSON format

2020-06-11 Thread Andrey Shinkevich
Implementation of dumping QCOW2 image metadata.
The sample output:
{
"Header_extensions": [
{
"name": "Feature table",
"magic": 1745090647,
"length": 192,
"data_str": ""
},
{
"name": "Bitmaps",
"magic": 595929205,
"length": 24,
"data": {
"nb_bitmaps": 2,
"reserved32": 0,
"bitmap_directory_size": 64,
"bitmap_directory_offset": 1048576,
"entries": [
{
"name": "bitmap-1",
"bitmap_table_offset": 589824,
"bitmap_table_size": 1,
"flags": [
"auto"
],
"type": 1,
"granularity_bits": 16,
"name_size": 8,
"extra_data_size": 0,
"bitmap_table": {
"table_entries": [
{
"type": "serialized",
"offset": 655360
},
...

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/qcow2_format.py | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index bb2f13b..d87f8e0 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -18,6 +18,15 @@
 
 import struct
 import string
+import json
+
+
+class ComplexEncoder(json.JSONEncoder):
+def default(self, obj):
+if hasattr(obj, 'get_fields_dict'):
+return obj.get_fields_dict()
+else:
+return json.JSONEncoder.default(self, obj)
 
 
 class Qcow2Field:
@@ -95,6 +104,11 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 self.fields_dict = self.__dict__.copy()
 
 def dump(self, dump_json=None):
+if dump_json:
+print(json.dumps(self.get_fields_dict(), indent=4,
+ cls=ComplexEncoder))
+return
+
 for f in self.fields:
 value = self.__dict__[f[2]]
 if isinstance(f[1], str):
@@ -150,6 +164,9 @@ class Qcow2BitmapExt(Qcow2Struct):
 for bm in self.bitmaps:
 bm.dump_bitmap_dir_entry(dump_json)
 
+def get_fields_dict(self):
+return self.fields_dict
+
 
 BME_FLAG_IN_USE = 1 << 0
 BME_FLAG_AUTO = 1 << 1
@@ -202,6 +219,12 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 super().dump()
 self.bitmap_table.print_bitmap_table(self.cluster_size)
 
+def get_fields_dict(self):
+bmp_name = dict(name=self.name)
+bme_dict = {**bmp_name, **self.fields_dict}
+bme_dict['flags'] = self.bitmap_flags
+return bme_dict
+
 
 class Qcow2BitmapTableEntry:
 
@@ -218,6 +241,9 @@ class Qcow2BitmapTableEntry:
 else:
 self.type = 'all-zeroes'
 
+def get_fields_dict(self):
+return dict(type=self.type, offset=self.offset)
+
 
 class Qcow2BitmapTable:
 
@@ -234,6 +260,18 @@ class Qcow2BitmapTable:
  cluster_size))
 print("")
 
+def get_fields_dict(self):
+return dict(table_entries=self.entries)
+
+
+class Qcow2HeaderExtensionsDoc:
+
+def __init__(self, extensions):
+self.extensions = extensions
+
+def get_fields_dict(self):
+return dict(Header_extensions=self.extensions)
+
 
 QCOW2_EXT_MAGIC_BITMAPS = 0x23852875
 
@@ -249,6 +287,9 @@ class QcowHeaderExtension(Qcow2Struct):
 0x44415441: 'Data file'
 }
 
+def get_fields_dict(self):
+return self.mapping.get(self.value, "")
+
 fields = (
 ('u32', Magic, 'magic'),
 ('u32', '{}', 'length')
@@ -309,6 +350,16 @@ class QcowHeaderExtension(Qcow2Struct):
 else:
 self.obj.dump(dump_json)
 
+def get_fields_dict(self):
+ext_name = dict(name=self.Magic(self.magic))
+he_dict = {**ext_name, **self.fields_dict}
+if self.obj is not None:
+he_dict.update(data=self.obj)
+else:
+he_dict.update(data_str=self.data_str)
+
+return he_dict
+
 @classmethod
 def create(cls, magic, data):
 return QcowHeaderExtension(magic, len(data), data)
@@ -411,7 +462,16 @@ class QcowHeader(Qcow2Struct):
 fd.write(buf)
 
 def dump_extensions(self, dump_json=None):
+if dump_json:
+ext_doc = Qcow2HeaderExtensionsDoc(self.extensions)
+print(json.dumps(ext_doc.get_fields_dict(), indent=4,
+ cls=ComplexEncoder))
+return
+
 for ex in self.extensions:
 print('Header 

[PATCH v6 5/8] qcow2_format.py: Dump bitmap table serialized entries

2020-06-11 Thread Andrey Shinkevich
Add bitmap table information to the QCOW2 metadata dump.

Bitmap name   bitmap-1
...
Bitmap tabletypeoffset  size
0   serialized  0xa 65536
1   all-zeroes  0x0 65536
2   all-zeroes  0x0 65536
3   all-zeroes  0x0 65536
4   all-zeroes  0x0 65536
5   all-zeroes  0x0 65536
6   all-zeroes  0x0 65536
7   all-zeroes  0x0 65536

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Eric Blake 
---
 tests/qemu-iotests/qcow2_format.py | 42 ++
 1 file changed, 42 insertions(+)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index 6589f68..2e7c058 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -137,6 +137,9 @@ class Qcow2BitmapExt(Qcow2Struct):
 shift = ((entry_raw_size + 7) & ~7) - entry_raw_size
 fd.seek(shift, FROM_CURRENT)
 
+for bm in self.bitmaps:
+bm.read_bitmap_table(fd)
+
 def load(self, fd):
 self.read_bitmap_directory(fd)
 
@@ -181,6 +184,12 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 return struct.calcsize(self.fmt) + self.name_size + \
 self.extra_data_size
 
+def read_bitmap_table(self, fd):
+fd.seek(self.bitmap_table_offset)
+table_size = self.bitmap_table_bytes * struct.calcsize('Q')
+table = [e[0] for e in struct.iter_unpack('>Q', fd.read(table_size))]
+self.bitmap_table = Qcow2BitmapTable(table)
+
 def dump_bitmap_dir_entry(self):
 print()
 print(f'{"Bitmap name":<25} {self.name}')
@@ -188,6 +197,39 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 print(f'{"flag":<25} {fl}')
 print(f'{"table size ":<25} {self.bitmap_table_bytes} {"(bytes)"}')
 super().dump()
+self.bitmap_table.print_bitmap_table(self.cluster_size)
+
+
+class Qcow2BitmapTableEntry:
+
+BME_TABLE_ENTRY_OFFSET_MASK = 0x00fffe00
+BME_TABLE_ENTRY_FLAG_ALL_ONES = 1
+bmte_type = ['all-zeroes', 'all-ones', 'serialized']
+
+def __init__(self, entry):
+self.offset = entry & self.BME_TABLE_ENTRY_OFFSET_MASK
+if self.offset:
+self.type = 'serialized'
+elif entry & self.BME_TABLE_ENTRY_FLAG_ALL_ONES:
+self.type = 'all-ones'
+else:
+self.type = 'all-zeroes'
+
+
+class Qcow2BitmapTable:
+
+def __init__(self, raw_table):
+self.entries = []
+for entry in raw_table:
+self.entries.append(Qcow2BitmapTableEntry(entry))
+
+def print_bitmap_table(self, cluster_size):
+bitmap_table = enumerate(self.entries)
+print("Bitmap table\ttype\t\toffset\t\tsize")
+for i, entry in bitmap_table:
+print("\t%-4d\t%s\t%#x\t\t%d" % (i, entry.type, entry.offset,
+ cluster_size))
+print("")
 
 
 QCOW2_EXT_MAGIC_BITMAPS = 0x23852875
-- 
1.8.3.1




[PATCH v6 7/8] qcow2_format.py: collect fields to dump in JSON format

2020-06-11 Thread Andrey Shinkevich
As __dict__ is being extended with class members we do not want to
print in JSON format dump, make a light copy of the initial __dict__
and extend the copy by adding lists we have to print in the JSON
output.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/qcow2_format.py | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/qcow2_format.py 
b/tests/qemu-iotests/qcow2_format.py
index 95db7a9..bb2f13b 100644
--- a/tests/qemu-iotests/qcow2_format.py
+++ b/tests/qemu-iotests/qcow2_format.py
@@ -92,6 +92,8 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 self.__dict__ = dict((field[2], values[i])
  for i, field in enumerate(self.fields))
 
+self.fields_dict = self.__dict__.copy()
+
 def dump(self, dump_json=None):
 for f in self.fields:
 value = self.__dict__[f[2]]
@@ -102,7 +104,6 @@ class Qcow2Struct(metaclass=Qcow2StructMeta):
 
 print('{:<25} {}'.format(f[2], value_str))
 
-
 # seek relative to the current position in the file
 FROM_CURRENT = 1
 
@@ -142,6 +143,7 @@ class Qcow2BitmapExt(Qcow2Struct):
 
 def load(self, fd):
 self.read_bitmap_directory(fd)
+self.fields_dict.update(entries=self.bitmaps)
 
 def dump(self, dump_json=None):
 super().dump(dump_json)
@@ -189,6 +191,7 @@ class Qcow2BitmapDirEntry(Qcow2Struct):
 table_size = self.bitmap_table_bytes * struct.calcsize('Q')
 table = [e[0] for e in struct.iter_unpack('>Q', fd.read(table_size))]
 self.bitmap_table = Qcow2BitmapTable(table)
+self.fields_dict.update(bitmap_table=self.bitmap_table)
 
 def dump_bitmap_dir_entry(self, dump_json=None):
 print()
-- 
1.8.3.1




pls consider this is [v3] Re: [PATCH 0/2] block: propagate discard alignment from format drivers to the guest

2020-06-11 Thread Denis V. Lunev
On 6/11/20 8:16 PM, Denis V. Lunev wrote:
> Nowaday SCSI drivers in guests are able to align UNMAP requests before
> sending to the device. Right now QEMU provides an ability to set
> this via "discard_granularity" property of the block device which could
> be used by management layer.
>
> Though, in particular, from the point of QEMU, there is
> pdiscard_granularity on the format driver level, f.e. on QCOW2 or iSCSI.
> It would be beneficial to pass this value as a default for this
> property.
>
> Technically this should reduce the amount of use less UNMAP requests
> from the guest to the host. Basic test confirms this. Fedora 31 guest
> during 'fstrim /' on 32 Gb disk has issued 401/415 requests with/without
> proper alignment to QEMU.
>
> Changes from v2:
> - 172 iotest fixed
>
> Changes from v1:
> - fixed typos in description
> - added machine type compatibility layer as suggested by Kevin
>
> Signed-off-by: Denis V. Lunev 
> CC: Kevin Wolf 
> CC: Max Reitz 
> CC: Eduardo Habkost 
> CC: Marcel Apfelbaum 
> CC: John Snow 
> CC: Paolo Bonzini 
> CC: Fam Zheng 
>
>
Sorry for missed v3 tag in the subject :(



[PATCH 1/2] block: propagate discard alignment from format drivers to the guest

2020-06-11 Thread Denis V. Lunev
Nowaday SCSI drivers in guests are able to align UNMAP requests before
sending to the device. Right now QEMU provides an ability to set
this via "discard_granularity" property of the block device which could
be used by management layer.

Though, in particular, from the point of QEMU, there is
pdiscard_granularity on the format driver level, f.e. on QCOW2 or iSCSI.
It would be beneficial to pass this value as a default for this
property.

Technically this should reduce the amount of use less UNMAP requests
from the guest to the host. Basic test confirms this.

Signed-off-by: Denis V. Lunev 
CC: Kevin Wolf 
CC: Max Reitz 
CC: Eduardo Habkost 
CC: Marcel Apfelbaum 
CC: John Snow 
CC: Paolo Bonzini 
CC: Fam Zheng 
---
 block/block-backend.c  | 11 +++
 hw/core/machine.c  | 15 ++-
 hw/ide/qdev.c  |  3 ++-
 hw/scsi/scsi-disk.c|  5 -
 include/hw/block/block.h   |  2 +-
 include/sysemu/block-backend.h |  6 ++
 6 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 6936b25c83..9342a475cb 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -,6 +,17 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry 
*geo)
 return bdrv_probe_geometry(blk_bs(blk), geo);
 }
 
+int blk_discard_granularity(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+
+if (bs == NULL) {
+return DEFAULT_DISCARD_GRANULARITY;
+}
+
+return bs->bl.pdiscard_alignment;
+}
+
 /*
  * Updates the BlockBackendRootState object with data from the currently
  * attached BlockDriverState.
diff --git a/hw/core/machine.c b/hw/core/machine.c
index bb3a7b18b1..08a242d606 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -28,7 +28,20 @@
 #include "hw/mem/nvdimm.h"
 #include "migration/vmstate.h"
 
-GlobalProperty hw_compat_5_0[] = {};
+GlobalProperty hw_compat_5_0[] = {
+{ "ide-cd", "discard_granularity", "0x" },
+{ "ide-hd", "discard_granularity", "0x" },
+{ "ide-drive", "discard_granularity", "0x" },
+{ "scsi-hd", "discard_granularity", "0x" },
+{ "scsi-cd", "discard_granularity", "0x" },
+{ "scsi-disk", "discard_granularity", "0x" },
+{ "virtio-blk-pci", "discard_granularity", "0x" },
+{ "xen-block", "discard_granularity", "0x" },
+{ "usb-storage", "discard_granularity", "0x" },
+{ "swim-drive", "discard_granularity", "0x" },
+{ "floppy", "discard_granularity", "0x" },
+{ "nvme", "discard_granularity", "0x" },
+};
 const size_t hw_compat_5_0_len = G_N_ELEMENTS(hw_compat_5_0);
 
 GlobalProperty hw_compat_4_2[] = {
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 06b11583f5..e515dbeb0e 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -179,7 +179,8 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind 
kind, Error **errp)
 }
 }
 
-if (dev->conf.discard_granularity == -1) {
+if (dev->conf.discard_granularity == -1 ||
+dev->conf.discard_granularity == -2) {
 dev->conf.discard_granularity = 512;
 } else if (dev->conf.discard_granularity &&
dev->conf.discard_granularity != 512) {
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 387503e11b..6b809608e4 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -48,7 +48,6 @@
 #define SCSI_MAX_INQUIRY_LEN256
 #define SCSI_MAX_MODE_LEN   256
 
-#define DEFAULT_DISCARD_GRANULARITY (4 * KiB)
 #define DEFAULT_MAX_UNMAP_SIZE  (1 * GiB)
 #define DEFAULT_MAX_IO_SIZE INT_MAX /* 2 GB - 1 block */
 
@@ -2381,6 +2380,10 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
 if (s->qdev.conf.discard_granularity == -1) {
 s->qdev.conf.discard_granularity =
 MAX(s->qdev.conf.logical_block_size, DEFAULT_DISCARD_GRANULARITY);
+} else if (s->qdev.conf.discard_granularity == -2) {
+s->qdev.conf.discard_granularity =
+MAX(s->qdev.conf.logical_block_size,
+blk_discard_granularity(s->qdev.conf.blk));
 }
 
 if (!s->version) {
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index d7246f3862..53d4a38044 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -54,7 +54,7 @@ static inline unsigned int get_physical_block_exp(BlockConf 
*conf)
 DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),\
 DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0),\
 DEFINE_PROP_UINT32("discard_granularity", _state,   \
-   _conf.discard_granularity, -1),  \
+   _conf.discard_granularity, -2),  \
 DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce,   \
 ON_OFF_AUTO_AUTO),  \
 

[PATCH 0/2] block: propagate discard alignment from format drivers to the guest

2020-06-11 Thread Denis V. Lunev
Nowaday SCSI drivers in guests are able to align UNMAP requests before
sending to the device. Right now QEMU provides an ability to set
this via "discard_granularity" property of the block device which could
be used by management layer.

Though, in particular, from the point of QEMU, there is
pdiscard_granularity on the format driver level, f.e. on QCOW2 or iSCSI.
It would be beneficial to pass this value as a default for this
property.

Technically this should reduce the amount of use less UNMAP requests
from the guest to the host. Basic test confirms this. Fedora 31 guest
during 'fstrim /' on 32 Gb disk has issued 401/415 requests with/without
proper alignment to QEMU.

Changes from v2:
- 172 iotest fixed

Changes from v1:
- fixed typos in description
- added machine type compatibility layer as suggested by Kevin

Signed-off-by: Denis V. Lunev 
CC: Kevin Wolf 
CC: Max Reitz 
CC: Eduardo Habkost 
CC: Marcel Apfelbaum 
CC: John Snow 
CC: Paolo Bonzini 
CC: Fam Zheng 





[PATCH 2/2] iotests: fix 172 test

2020-06-11 Thread Denis V. Lunev
Default discard granularity is set to -2 now.

Signed-off-by: Denis V. Lunev 
CC: Kevin Wolf 
CC: Max Reitz 
CC: Eduardo Habkost 
CC: Marcel Apfelbaum 
CC: John Snow 
CC: Paolo Bonzini 
CC: Fam Zheng 
---
 tests/qemu-iotests/172.out | 106 ++---
 1 file changed, 53 insertions(+), 53 deletions(-)

diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
index 7abbe82427..fb6d3efe7b 100644
--- a/tests/qemu-iotests/172.out
+++ b/tests/qemu-iotests/172.out
@@ -28,7 +28,7 @@ Testing:
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 0 (0x0)
-discard_granularity = 4294967295 (0x)
+discard_granularity = 4294967294 (0xfffe)
 write-cache = "auto"
 share-rw = false
 drive-type = "288"
@@ -58,7 +58,7 @@ Testing: -fda TEST_DIR/t.qcow2
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 0 (0x0)
-discard_granularity = 4294967295 (0x)
+discard_granularity = 4294967294 (0xfffe)
 write-cache = "auto"
 share-rw = false
 drive-type = "144"
@@ -85,7 +85,7 @@ Testing: -fdb TEST_DIR/t.qcow2
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 0 (0x0)
-discard_granularity = 4294967295 (0x)
+discard_granularity = 4294967294 (0xfffe)
 write-cache = "auto"
 share-rw = false
 drive-type = "144"
@@ -96,7 +96,7 @@ Testing: -fdb TEST_DIR/t.qcow2
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 0 (0x0)
-discard_granularity = 4294967295 (0x)
+discard_granularity = 4294967294 (0xfffe)
 write-cache = "auto"
 share-rw = false
 drive-type = "288"
@@ -123,7 +123,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 0 (0x0)
-discard_granularity = 4294967295 (0x)
+discard_granularity = 4294967294 (0xfffe)
 write-cache = "auto"
 share-rw = false
 drive-type = "144"
@@ -134,7 +134,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 0 (0x0)
-discard_granularity = 4294967295 (0x)
+discard_granularity = 4294967294 (0xfffe)
 write-cache = "auto"
 share-rw = false
 drive-type = "144"
@@ -164,7 +164,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 0 (0x0)
-discard_granularity = 4294967295 (0x)
+discard_granularity = 4294967294 (0xfffe)
 write-cache = "auto"
 share-rw = false
 drive-type = "144"
@@ -191,7 +191,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 0 (0x0)
-discard_granularity = 4294967295 (0x)
+discard_granularity = 4294967294 (0xfffe)
 write-cache = "auto"
 share-rw = false
 drive-type = "144"
@@ -202,7 +202,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 0 (0x0)
-discard_granularity = 4294967295 (0x)
+discard_granularity = 4294967294 (0xfffe)
 write-cache = "auto"
 share-rw = false
 drive-type = "288"
@@ -229,7 +229,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive 
if=floppy,file=TEST_DIR/t
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 0 (0x0)
-discard_granularity = 4294967295 (0x)
+discard_granularity = 4294967294 (0xfffe)
 write-cache = "auto"
 share-rw = false
 drive-type = "144"
@@ -240,7 +240,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive 
if=floppy,file=TEST_DIR/t
 physical_block_size = 512 (0x200)
 min_io_size = 0 (0x0)
 opt_io_size = 

[PATCH v3 0/4] block: seriously improve savevm performance

2020-06-11 Thread Denis V. Lunev
This series do standard basic things:
- it creates intermediate buffer for all writes from QEMU migration code
  to QCOW2 image,
- this buffer is sent to disk asynchronously, allowing several writes to
  run in parallel.

In general, migration code is fantastically inefficent (by observation),
buffers are not aligned and sent with arbitrary pieces, a lot of time
less than 100 bytes at a chunk, which results in read-modify-write
operations with non-cached operations. It should also be noted that all
operations are performed into unallocated image blocks, which also suffer
due to partial writes to such new clusters.

This patch series is an implementation of idea discussed in the RFC
posted by Denis Plotnikov
https://lists.gnu.org/archive/html/qemu-devel/2020-04/msg01925.html
Results with this series over NVME are better than original code
original rfcthis
cached:  1.79s  2.38s   1.27s
non-cached:  3.29s  1.31s   0.81s

Changes from v2:
- code moved from QCOW2 level to generic block level
- created bdrv_flush_vmstate helper to fix 022, 029 tests
- added recursive for bs->file in bdrv_co_flush_vmstate (fix 267)
- fixed blk_save_vmstate helper
- fixed coroutine wait as Vladimir suggested with waiting fixes from me

Changes from v1:
- patchew warning fixed
- fixed validation that only 1 waiter is allowed in patch 1

Signed-off-by: Denis V. Lunev 
CC: Kevin Wolf 
CC: Max Reitz 
CC: Stefan Hajnoczi 
CC: Fam Zheng 
CC: Juan Quintela 
CC: "Dr. David Alan Gilbert" 
CC: Vladimir Sementsov-Ogievskiy 
CC: Denis Plotnikov 





[PATCH 1/4] migration/savevm: respect qemu_fclose() error code in save_snapshot()

2020-06-11 Thread Denis V. Lunev
qemu_fclose() could return error, f.e. if bdrv_co_flush() will return
the error.

This validation will become more important once we will start waiting of
asynchronous IO operations, started from bdrv_write_vmstate(), which are
coming soon.

Signed-off-by: Denis V. Lunev 
CC: Kevin Wolf 
CC: Max Reitz 
CC: Stefan Hajnoczi 
CC: Fam Zheng 
CC: Juan Quintela 
CC: "Dr. David Alan Gilbert" 
CC: Vladimir Sementsov-Ogievskiy 
CC: Denis Plotnikov 
---
 migration/savevm.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index c00a6807d9..0ff5bb40ed 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2628,7 +2628,7 @@ int save_snapshot(const char *name, Error **errp)
 {
 BlockDriverState *bs, *bs1;
 QEMUSnapshotInfo sn1, *sn = , old_sn1, *old_sn = _sn1;
-int ret = -1;
+int ret = -1, ret2;
 QEMUFile *f;
 int saved_vm_running;
 uint64_t vm_state_size;
@@ -2712,10 +2712,14 @@ int save_snapshot(const char *name, Error **errp)
 }
 ret = qemu_savevm_state(f, errp);
 vm_state_size = qemu_ftell(f);
-qemu_fclose(f);
+ret2 = qemu_fclose(f);
 if (ret < 0) {
 goto the_end;
 }
+if (ret2 < 0) {
+ret = ret2;
+goto the_end;
+}
 
 /* The bdrv_all_create_snapshot() call that follows acquires the AioContext
  * for itself.  BDRV_POLL_WHILE() does not support nested locking because
-- 
2.17.1




[PATCH 3/4] block, migration: add bdrv_flush_vmstate helper

2020-06-11 Thread Denis V. Lunev
Right now bdrv_fclose() is just calling bdrv_flush().

The problem is that migration code is working inefficently from black
layer terms and are frequently called for very small pieces of not
properly aligned data. Block layer is capable to work this way, but
this is very slow.

This patch is a preparation for the introduction of the intermediate
buffer at block driver state. It would be beneficial to separate
conventional bdrv_flush() from closing QEMU file from migration code.

The patch also forces bdrv_flush_vmstate() operation inside
synchronous blk_save_vmstate() operation. This helper is used from
qemu-io only.

Signed-off-by: Denis V. Lunev 
CC: Kevin Wolf 
CC: Max Reitz 
CC: Stefan Hajnoczi 
CC: Fam Zheng 
CC: Juan Quintela 
CC: "Dr. David Alan Gilbert" 
CC: Vladimir Sementsov-Ogievskiy 
CC: Denis Plotnikov 
---
 block/block-backend.c |  6 +-
 block/io.c| 39 +++
 include/block/block.h |  1 +
 migration/savevm.c|  3 +++
 4 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 9342a475cb..2107ace699 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2177,7 +2177,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool 
exact,
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
  int64_t pos, int size)
 {
-int ret;
+int ret, ret2;
 
 if (!blk_is_available(blk)) {
 return -ENOMEDIUM;
@@ -2187,6 +2187,10 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t 
*buf,
 if (ret < 0) {
 return ret;
 }
+ret2 = bdrv_flush_vmstate(blk_bs(blk));
+if (ret2 < 0) {
+return ret;
+}
 
 if (ret == size && !blk->enable_write_cache) {
 ret = bdrv_flush(blk_bs(blk));
diff --git a/block/io.c b/block/io.c
index 121ce17a49..fbf352f39d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2725,6 +2725,45 @@ int bdrv_readv_vmstate(BlockDriverState *bs, 
QEMUIOVector *qiov, int64_t pos)
 return bdrv_rw_vmstate(bs, qiov, pos, true);
 }
 
+
+typedef struct FlushVMStateCo {
+BlockDriverState *bs;
+int ret;
+} FlushVMStateCo;
+
+static int coroutine_fn bdrv_co_flush_vmstate(BlockDriverState *bs)
+{
+return 0;
+}
+
+static void coroutine_fn bdrv_flush_vmstate_co_entry(void *opaque)
+{
+FlushVMStateCo *rwco = opaque;
+
+rwco->ret = bdrv_co_flush_vmstate(rwco->bs);
+aio_wait_kick();
+}
+
+int bdrv_flush_vmstate(BlockDriverState *bs)
+{
+Coroutine *co;
+FlushVMStateCo flush_co = {
+.bs = bs,
+.ret = NOT_DONE,
+};
+
+if (qemu_in_coroutine()) {
+/* Fast-path if already in coroutine context */
+bdrv_flush_vmstate_co_entry(_co);
+} else {
+co = qemu_coroutine_create(bdrv_flush_vmstate_co_entry, _co);
+bdrv_coroutine_enter(bs, co);
+BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
+}
+
+return flush_co.ret;
+}
+
 /**/
 /* async I/Os */
 
diff --git a/include/block/block.h b/include/block/block.h
index 25e299605e..024525b87d 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -572,6 +572,7 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t 
*buf,
 
 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
   int64_t pos, int size);
+int bdrv_flush_vmstate(BlockDriverState *bs);
 
 void bdrv_img_create(const char *filename, const char *fmt,
  const char *base_filename, const char *base_fmt,
diff --git a/migration/savevm.c b/migration/savevm.c
index 0ff5bb40ed..9698c909d7 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -150,6 +150,9 @@ static ssize_t block_get_buffer(void *opaque, uint8_t *buf, 
int64_t pos,
 
 static int bdrv_fclose(void *opaque, Error **errp)
 {
+int err = bdrv_flush_vmstate(opaque);
+if (err < 0)
+return err;
 return bdrv_flush(opaque);
 }
 
-- 
2.17.1




[PATCH 2/4] block/aio_task: allow start/wait task from any coroutine

2020-06-11 Thread Denis V. Lunev
From: Vladimir Sementsov-Ogievskiy 

Currently, aio task pool assumes that there is a main coroutine, which
creates tasks and wait for them. Let's remove the restriction by using
CoQueue. Code becomes clearer, interface more obvious.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Denis V. Lunev 
CC: Kevin Wolf 
CC: Max Reitz 
CC: Stefan Hajnoczi 
CC: Fam Zheng 
CC: Juan Quintela 
CC: "Dr. David Alan Gilbert" 
CC: Vladimir Sementsov-Ogievskiy 
CC: Denis Plotnikov 
---
 block/aio_task.c | 21 ++---
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/block/aio_task.c b/block/aio_task.c
index 88989fa248..cf62e5c58b 100644
--- a/block/aio_task.c
+++ b/block/aio_task.c
@@ -27,11 +27,10 @@
 #include "block/aio_task.h"
 
 struct AioTaskPool {
-Coroutine *main_co;
 int status;
 int max_busy_tasks;
 int busy_tasks;
-bool waiting;
+CoQueue waiters;
 };
 
 static void coroutine_fn aio_task_co(void *opaque)
@@ -52,31 +51,23 @@ static void coroutine_fn aio_task_co(void *opaque)
 
 g_free(task);
 
-if (pool->waiting) {
-pool->waiting = false;
-aio_co_wake(pool->main_co);
-}
+qemu_co_queue_restart_all(>waiters);
 }
 
 void coroutine_fn aio_task_pool_wait_one(AioTaskPool *pool)
 {
 assert(pool->busy_tasks > 0);
-assert(qemu_coroutine_self() == pool->main_co);
 
-pool->waiting = true;
-qemu_coroutine_yield();
+qemu_co_queue_wait(>waiters, NULL);
 
-assert(!pool->waiting);
 assert(pool->busy_tasks < pool->max_busy_tasks);
 }
 
 void coroutine_fn aio_task_pool_wait_slot(AioTaskPool *pool)
 {
-if (pool->busy_tasks < pool->max_busy_tasks) {
-return;
+while (pool->busy_tasks >= pool->max_busy_tasks) {
+aio_task_pool_wait_one(pool);
 }
-
-aio_task_pool_wait_one(pool);
 }
 
 void coroutine_fn aio_task_pool_wait_all(AioTaskPool *pool)
@@ -98,8 +89,8 @@ AioTaskPool *coroutine_fn aio_task_pool_new(int 
max_busy_tasks)
 {
 AioTaskPool *pool = g_new0(AioTaskPool, 1);
 
-pool->main_co = qemu_coroutine_self();
 pool->max_busy_tasks = max_busy_tasks;
+qemu_co_queue_init(>waiters);
 
 return pool;
 }
-- 
2.17.1




[PATCH 4/4] block/io: improve savevm performance

2020-06-11 Thread Denis V. Lunev
This patch does 2 standard basic things:
- it creates intermediate buffer for all writes from QEMU migration code
  to block driver,
- this buffer is sent to disk asynchronously, allowing several writes to
  run in parallel.

Thus bdrv_vmstate_write() is becoming asynchronous. All pending operations
completion are performed in newly invented bdrv_flush_vmstate().

In general, migration code is fantastically inefficent (by observation),
buffers are not aligned and sent with arbitrary pieces, a lot of time
less than 100 bytes at a chunk, which results in read-modify-write
operations if target file descriptor is opened with O_DIRECT. It should
also be noted that all operations are performed into unallocated image
blocks, which also suffer due to partial writes to such new clusters
even on cached file descriptors.

Snapshot creation time (2 GB Fedora-31 VM running over NVME storage):
original fixed
cached:  1.79s   1.27s
non-cached:  3.29s   0.81s

The difference over HDD would be more significant :)

Signed-off-by: Denis V. Lunev 
CC: Kevin Wolf 
CC: Max Reitz 
CC: Stefan Hajnoczi 
CC: Fam Zheng 
CC: Juan Quintela 
CC: "Dr. David Alan Gilbert" 
CC: Vladimir Sementsov-Ogievskiy 
CC: Denis Plotnikov 
---
 block/io.c| 121 +-
 include/block/block_int.h |   8 +++
 2 files changed, 127 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index fbf352f39d..698f1eef76 100644
--- a/block/io.c
+++ b/block/io.c
@@ -26,6 +26,7 @@
 #include "trace.h"
 #include "sysemu/block-backend.h"
 #include "block/aio-wait.h"
+#include "block/aio_task.h"
 #include "block/blockjob.h"
 #include "block/blockjob_int.h"
 #include "block/block_int.h"
@@ -2633,6 +2634,102 @@ typedef struct BdrvVmstateCo {
 int ret;
 } BdrvVmstateCo;
 
+typedef struct BdrvVMStateTask {
+AioTask task;
+
+BlockDriverState *bs;
+int64_t offset;
+void *buf;
+size_t bytes;
+} BdrvVMStateTask;
+
+typedef struct BdrvSaveVMState {
+AioTaskPool *pool;
+BdrvVMStateTask *t;
+} BdrvSaveVMState;
+
+
+static coroutine_fn int bdrv_co_vmstate_save_task_entry(AioTask *task)
+{
+int err = 0;
+BdrvVMStateTask *t = container_of(task, BdrvVMStateTask, task);
+
+if (t->bytes != 0) {
+QEMUIOVector local_qiov;
+qemu_iovec_init_buf(_qiov, t->buf, t->bytes);
+
+bdrv_inc_in_flight(t->bs);
+err = t->bs->drv->bdrv_save_vmstate(t->bs, _qiov, t->offset);
+bdrv_dec_in_flight(t->bs);
+}
+
+qemu_vfree(t->buf);
+return err;
+}
+
+static BdrvVMStateTask *bdrv_vmstate_task_create(BlockDriverState *bs,
+ int64_t pos, size_t size)
+{
+BdrvVMStateTask *t = g_new(BdrvVMStateTask, 1);
+
+*t = (BdrvVMStateTask) {
+.task.func = bdrv_co_vmstate_save_task_entry,
+.buf = qemu_blockalign(bs, size),
+.offset = pos,
+.bs = bs,
+};
+
+return t;
+}
+
+static int bdrv_co_do_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+   int64_t pos)
+{
+BdrvSaveVMState *state = bs->savevm_state;
+BdrvVMStateTask *t;
+size_t buf_size = MAX(bdrv_get_cluster_size(bs), 1 * MiB);
+size_t to_copy;
+size_t off;
+
+if (state == NULL) {
+state = g_new(BdrvSaveVMState, 1);
+*state = (BdrvSaveVMState) {
+.pool = aio_task_pool_new(BDRV_VMSTATE_WORKERS_MAX),
+.t = bdrv_vmstate_task_create(bs, pos, buf_size),
+};
+
+bs->savevm_state = state;
+}
+
+if (aio_task_pool_status(state->pool) < 0) {
+return aio_task_pool_status(state->pool);
+}
+
+t = state->t;
+if (t->offset + t->bytes != pos) {
+/* Normally this branch is not reachable from migration */
+return bs->drv->bdrv_save_vmstate(bs, qiov, pos);
+}
+
+off = 0;
+while (1) {
+to_copy = MIN(qiov->size - off, buf_size - t->bytes);
+qemu_iovec_to_buf(qiov, off, t->buf + t->bytes, to_copy);
+t->bytes += to_copy;
+if (t->bytes < buf_size) {
+return qiov->size;
+}
+
+aio_task_pool_start_task(state->pool, >task);
+
+pos += to_copy;
+off += to_copy;
+state->t = t = bdrv_vmstate_task_create(bs, pos, buf_size);
+}
+
+return qiov->size;
+}
+
 static int coroutine_fn
 bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read)
@@ -2648,7 +2745,7 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector 
*qiov, int64_t pos,
 if (is_read) {
 ret = drv->bdrv_load_vmstate(bs, qiov, pos);
 } else {
-ret = drv->bdrv_save_vmstate(bs, qiov, pos);
+ret = bdrv_co_do_save_vmstate(bs, qiov, pos);
 }
 } else if (bs->file) {
 ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
@@ -2733,7 +2830,27 @@ typedef struct 

[PATCH v4 4/4] block/io: auto-no-fallback for write-zeroes

2020-06-11 Thread Vladimir Sementsov-Ogievskiy
When BDRV_REQ_NO_FALLBACK is supported, the NBD driver supports a
larger request size.  Add code to try large zero requests with a
NO_FALLBACK request prior to having to split a request into chunks
according to max_pwrite_zeroes.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/io.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/block/io.c b/block/io.c
index 3fae97da2d..ad219cb220 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1778,6 +1778,7 @@ static int coroutine_fn 
bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
 bs->bl.request_alignment);
 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
+bool auto_no_fallback = false;
 
 assert(alignment % bs->bl.request_alignment == 0);
 
@@ -1785,6 +1786,16 @@ static int coroutine_fn 
bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 return -ENOMEDIUM;
 }
 
+if (!(flags & BDRV_REQ_NO_FALLBACK) &&
+(bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK) &&
+bs->bl.max_pwrite_zeroes && bs->bl.max_pwrite_zeroes < bytes &&
+bs->bl.max_pwrite_zeroes < bs->bl.max_pwrite_zeroes_fast)
+{
+assert(drv->bdrv_co_pwrite_zeroes);
+flags |= BDRV_REQ_NO_FALLBACK;
+auto_no_fallback = true;
+}
+
 if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
 return -ENOTSUP;
 }
@@ -1829,6 +1840,14 @@ static int coroutine_fn 
bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 if (drv->bdrv_co_pwrite_zeroes) {
 ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
  flags & bs->supported_zero_flags);
+if (ret == -ENOTSUP && auto_no_fallback) {
+auto_no_fallback = false;
+flags &= ~BDRV_REQ_NO_FALLBACK;
+max_write_zeroes =
+QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pwrite_zeroes,
+ INT_MAX), alignment);
+continue;
+}
 if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
 !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
 need_flush = true;
-- 
2.21.0




[PATCH v4 2/4] block/nbd: define new max_write_zero_fast limit

2020-06-11 Thread Vladimir Sementsov-Ogievskiy
The NBD spec was recently updated to clarify that max_block doesn't
relate to NBD_CMD_WRITE_ZEROES with NBD_CMD_FLAG_FAST_ZERO (which
mirrors Qemu flag BDRV_REQ_NO_FALLBACK).

bs->bl.max_write_zero_fast is zero by default which means using
max_pwrite_zeroes. Update nbd driver to allow larger requests with
BDRV_REQ_NO_FALLBACK.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/nbd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/nbd.c b/block/nbd.c
index 4ac23c8f62..b0584cf68d 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -1956,6 +1956,7 @@ static void nbd_refresh_limits(BlockDriverState *bs, 
Error **errp)
 
 bs->bl.request_alignment = min;
 bs->bl.max_pdiscard = QEMU_ALIGN_DOWN(INT_MAX, min);
+bs->bl.max_pwrite_zeroes_fast = bs->bl.max_pdiscard;
 bs->bl.max_pwrite_zeroes = max;
 bs->bl.max_transfer = max;
 
-- 
2.21.0




[PATCH v4 1/4] block: add max_pwrite_zeroes_fast to BlockLimits

2020-06-11 Thread Vladimir Sementsov-Ogievskiy
The NBD spec was recently updated to clarify that max_block doesn't
relate to NBD_CMD_WRITE_ZEROES with NBD_CMD_FLAG_FAST_ZERO (which
mirrors Qemu flag BDRV_REQ_NO_FALLBACK). To drop the restriction we
need new max_pwrite_zeroes_fast.

Default value of new max_pwrite_zeroes_fast is zero and it means
use max_pwrite_zeroes. So this commit semantically changes nothing.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block_int.h |  8 
 block/io.c| 17 -
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 791de6a59c..277e32fe31 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -626,6 +626,14 @@ typedef struct BlockLimits {
  * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
 int32_t max_pwrite_zeroes;
 
+/*
+ * Maximum number of bytes that can zeroed at once if flag
+ * BDRV_REQ_NO_FALLBACK specified. Must be multiple of
+ * pwrite_zeroes_alignment.
+ * If 0, max_pwrite_zeroes is used for no-fallback case.
+ */
+int64_t max_pwrite_zeroes_fast;
+
 /* Optimal alignment for write zeroes requests in bytes. A power
  * of 2 is best but not mandatory.  Must be a multiple of
  * bl.request_alignment, and must be less than max_pwrite_zeroes
diff --git a/block/io.c b/block/io.c
index df8f2a98d4..0af62a53fd 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1774,12 +1774,13 @@ static int coroutine_fn 
bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 bool need_flush = false;
 int head = 0;
 int tail = 0;
-
-int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
+int max_write_zeroes;
 int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
 bs->bl.request_alignment);
 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
 
+assert(alignment % bs->bl.request_alignment == 0);
+
 if (!drv) {
 return -ENOMEDIUM;
 }
@@ -1788,12 +1789,18 @@ static int coroutine_fn 
bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 return -ENOTSUP;
 }
 
-assert(alignment % bs->bl.request_alignment == 0);
-head = offset % alignment;
-tail = (offset + bytes) % alignment;
+if ((flags & BDRV_REQ_NO_FALLBACK) && bs->bl.max_pwrite_zeroes_fast) {
+max_write_zeroes = bs->bl.max_pwrite_zeroes_fast;
+} else {
+max_write_zeroes = bs->bl.max_pwrite_zeroes;
+}
+max_write_zeroes = MIN_NON_ZERO(max_write_zeroes, INT_MAX);
 max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
 assert(max_write_zeroes >= bs->bl.request_alignment);
 
+head = offset % alignment;
+tail = (offset + bytes) % alignment;
+
 while (bytes > 0 && !ret) {
 int num = bytes;
 
-- 
2.21.0




Re: [PATCH v8 3/4] vhost-user block device backend server

2020-06-11 Thread Stefan Hajnoczi
On Fri, Jun 05, 2020 at 07:35:37AM +0800, Coiby Xu wrote:
> +static void coroutine_fn vu_block_virtio_process_req(void *opaque)
> +{
> +struct req_data *data = opaque;
> +VuServer *server = data->server;
> +VuVirtq *vq = data->vq;
> +VuVirtqElement *elem = data->elem;
> +uint32_t type;
> +VuBlockReq *req;
> +
> +VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
> +BlockBackend *backend = vdev_blk->backend;
> +
> +struct iovec *in_iov = elem->in_sg;
> +struct iovec *out_iov = elem->out_sg;
> +unsigned in_num = elem->in_num;
> +unsigned out_num = elem->out_num;
> +/* refer to hw/block/virtio_blk.c */
> +if (elem->out_num < 1 || elem->in_num < 1) {
> +error_report("virtio-blk request missing headers");
> +free(elem);
> +return;
> +}
> +
> +req = g_new0(VuBlockReq, 1);

elem was allocated with enough space for VuBlockReq. Can this allocation
be eliminated?

  typedef struct VuBlockReq {
- VuVirtqElement *elem;
+ VuVirtqElement elem;
  int64_t sector_num;
  size_t size;
  struct virtio_blk_inhdr *in;
  struct virtio_blk_outhdr out;
  VuServer *server;
  struct VuVirtq *vq;
  } VuBlockReq;

  req = vu_queue_pop(vu_dev, vq, sizeof(*req));

> +req->server = server;
> +req->vq = vq;
> +req->elem = elem;
> +
> +if (unlikely(iov_to_buf(out_iov, out_num, 0, >out,
> +sizeof(req->out)) != sizeof(req->out))) {
> +error_report("virtio-blk request outhdr too short");
> +goto err;
> +}
> +
> +iov_discard_front(_iov, _num, sizeof(req->out));
> +
> +if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
> +error_report("virtio-blk request inhdr too short");
> +goto err;
> +}
> +
> +/* We always touch the last byte, so just see how big in_iov is.  */
> +req->in = (void *)in_iov[in_num - 1].iov_base
> +  + in_iov[in_num - 1].iov_len
> +  - sizeof(struct virtio_blk_inhdr);
> +iov_discard_back(in_iov, _num, sizeof(struct virtio_blk_inhdr));
> +
> +
> +type = le32toh(req->out.type);

This implementation assumes the request is always little-endian. This is
true for VIRTIO 1.0+ but not for older versions. Please check that
VIRTIO_F_VERSION_1 has been set.

In QEMU code the le32_to_cpu(), le64_to_cpu(), etc are common used
instead of le32toh(), etc.

> +switch (type & ~VIRTIO_BLK_T_BARRIER) {
> +case VIRTIO_BLK_T_IN:
> +case VIRTIO_BLK_T_OUT: {
> +ssize_t ret = 0;
> +bool is_write = type & VIRTIO_BLK_T_OUT;
> +req->sector_num = le64toh(req->out.sector);
> +
> +int64_t offset = req->sector_num * vdev_blk->blk_size;
> +QEMUIOVector *qiov = g_new0(QEMUIOVector, 1);

This can be allocated on the stack:

  QEMUIOVector qiov;

> +static void vhost_user_blk_server_free(VuBlockDev *vu_block_device)
> +{

I'm unsure why this is a separate from vu_block_free(). Neither of these
functions actually free VuBlockDev, so the name could be changed to
vhost_user_blk_server_stop().

> +if (!vu_block_device) {
> +return;
> +}
> +vhost_user_server_stop(_block_device->vu_server);
> +vu_block_free(vu_block_device);
> +
> +}
> +
> +/*
> + * A exported drive can serve multiple multiple clients simutateously,
> + * thus no need to export the same drive twice.

This comment is outdated. Only one client is served at a time.

> +static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
> +Error **errp)
> +{
> +
> +const char *name = vu_block_device->node_name;
> +SocketAddress *addr = vu_block_device->addr;
> +char *unix_socket = vu_block_device->addr->u.q_unix.path;
> +
> +if (vu_block_dev_find(name)) {
> +error_setg(errp, "Vhost-user-blk server with node-name '%s' "
> +   "has already been started",
> +   name);
> +return;
> +}

I think blk_new() permissions should prevent multiple writers. Having
multiple readers would be okay. Therefore this check can be removed.

> +
> +if (vu_block_dev_find_by_unix_socket(unix_socket)) {
> +error_setg(errp, "Vhost-user-blk server with with socket_path '%s' "
> +   "has already been started", unix_socket);
> +return;
> +}

Is it a problem if the same path is reused? I don't see an issue if the
user creates a vhost-user-blk server, connects a client, unlinks the
UNIX domain socket, and creates a new vhost-user-blk server with the
same path. It might be a little confusing but if the user wants to do
it, I don't see a reason to stop them.

> +
> +if (!vu_block_init(vu_block_device, errp)) {
> +return;
> +}
> +
> +
> +AioContext *ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
> +
> +if (!vhost_user_server_start(VHOST_USER_BLK_MAX_QUEUES, addr, ctx,
> + 

Re: [PATCH v8 33/34] qcow2: Assert that expand_zero_clusters_in_l1() does not support subclusters

2020-06-11 Thread Eric Blake

On 6/11/20 8:24 AM, Alberto Garcia wrote:

On Wed 10 Jun 2020 09:43:53 PM CEST, Eric Blake wrote:

On 6/10/20 10:03 AM, Alberto Garcia wrote:

This function is only used by qcow2_expand_zero_clusters() to
downgrade a qcow2 image to a previous version. It is however not
possible to downgrade an image with extended L2 entries because older
versions of qcow2 do not have this feature.


Well, it _is_ possible, but it would involve rewriting the entire
L1/L2 tables (including all internal snapshots)


Right :-) Let's try this way:

 This function is only used by qcow2_expand_zero_clusters() to
 downgrade a qcow2 image to a previous version. This would require
 transforming all extended L2 entries into normal L2 entries but
 this is not a simple task and there are no plans to implement this
 at the moment.


Works for me.

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: Clarification regarding new qemu-img convert --target-is-zero flag

2020-06-11 Thread Sam Eiderman
On Wed, Jun 10, 2020 at 7:31 PM Kevin Wolf  wrote:

> Am 10.06.2020 um 17:26 hat Sam Eiderman geschrieben:
> > Thanks for the clarification Kevin,
> >
> > Well first I want to discuss unallocated blocks.
> > From my understanding operating systems do not rely on disks to be
> > zero initialized, on the contrary, physical disks usually contain
> > garbage.
> > So an unallocated block should never be treated as zero by any real
> > world application.
>
> I think this is a dangerous assumption to make. The guest did have
> access to these unallocated blocks before, and they read as zero, so not
> writing these to the conversion target does change the virtual disk.
> Whether or not this is a harmless change for the guest depends on the
> software running in the VM.
>

I see your point


>
> > Now assuming that I only care about the allocated content of the
> > disks, I would like to save io/time zeroing out unallocated blocks.
> >
> > A real world example would be flushing a 500GB vmdk on a real SSD
> > disk, if the vmdk contained only 2GB of data, no point in writing
> > 498GB of zeroes to that SSD - reducing its lifespan for nothing.
>
> Don't pretty much all SSDs support efficient zeroing/hole punching these
> days so that the blocks would actually be deallocated on the disk level?
>
> > Now from what I understand --target-is-zero will give me this behavior
> > even though that I really use it as a "--skip-prezeroing-target"
> > (sorry for the bad name)
> > (This is only true if later *allocated zeroes* are indeed copied
> correctly)
>
> As you noticed later, it doesn't.
>
> The behaviour you want is more like -B, except that you don't have a
> backing file. If you also pass -n, the actual filename you pass isn't
> even used, so I guess '-B "" -n' should do the trick?
>
> Kevin
>
>


Re: [PATCH v8 33/34] qcow2: Assert that expand_zero_clusters_in_l1() does not support subclusters

2020-06-11 Thread Alberto Garcia
On Wed 10 Jun 2020 09:43:53 PM CEST, Eric Blake wrote:
> On 6/10/20 10:03 AM, Alberto Garcia wrote:
>> This function is only used by qcow2_expand_zero_clusters() to
>> downgrade a qcow2 image to a previous version. It is however not
>> possible to downgrade an image with extended L2 entries because older
>> versions of qcow2 do not have this feature.
>
> Well, it _is_ possible, but it would involve rewriting the entire
> L1/L2 tables (including all internal snapshots)

Right :-) Let's try this way:

This function is only used by qcow2_expand_zero_clusters() to
downgrade a qcow2 image to a previous version. This would require
transforming all extended L2 entries into normal L2 entries but
this is not a simple task and there are no plans to implement this
at the moment.

Berto



Re: [PATCH] block/aio_task: allow start/wait task from any coroutine

2020-06-11 Thread Vladimir Sementsov-Ogievskiy

11.06.2020 15:31, Denis V. Lunev wrote:

On 6/11/20 10:36 AM, Vladimir Sementsov-Ogievskiy wrote:

Currently, aio task pool assumes that there is a main coroutine, which
creates tasks and wait for them. Let's remove the restriction by using
CoQueue. Code becomes clearer, interface more obvious.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---

Hi! Here is my counter-propasal for
"[PATCH 1/2] aio: allow to wait for coroutine pool from different coroutine"
by Denis. I'm sure that if we are going to change something here, better
is make the interface work from any coroutine without the restriction of
only-on-waiter at the moment.

(Note, that it is still not thread-safe)


  block/aio_task.c | 15 ---
  1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/block/aio_task.c b/block/aio_task.c
index 88989fa248..d48b29ff83 100644
--- a/block/aio_task.c
+++ b/block/aio_task.c
@@ -27,11 +27,10 @@
  #include "block/aio_task.h"
  
  struct AioTaskPool {

-Coroutine *main_co;
  int status;
  int max_busy_tasks;
  int busy_tasks;
-bool waiting;
+CoQueue waiters;
  };
  
  static void coroutine_fn aio_task_co(void *opaque)

@@ -52,21 +51,15 @@ static void coroutine_fn aio_task_co(void *opaque)
  
  g_free(task);
  
-if (pool->waiting) {

-pool->waiting = false;
-aio_co_wake(pool->main_co);
-}
+qemu_co_queue_next(>waiters);

nope, this will wakeup only single waiter.
the code will deadlock If there are 2 waiters for the last
entry.



Hmm, right.
The problem is that original design combines into one thing two different:
 
  1. wait for all tasks to complete

  2. wait for a free slot, to start a new task

2. should work as is, with qemu_co_queue_next() and co-queue, and for 1. we
should have separate yield-point, to be waken up when busy_tasks becomes 0.

I'll resend.



--
Best regards,
Vladimir



Re: [PATCH] block/aio_task: allow start/wait task from any coroutine

2020-06-11 Thread Denis V. Lunev
On 6/11/20 10:36 AM, Vladimir Sementsov-Ogievskiy wrote:
> Currently, aio task pool assumes that there is a main coroutine, which
> creates tasks and wait for them. Let's remove the restriction by using
> CoQueue. Code becomes clearer, interface more obvious.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>
> Hi! Here is my counter-propasal for
> "[PATCH 1/2] aio: allow to wait for coroutine pool from different coroutine"
> by Denis. I'm sure that if we are going to change something here, better
> is make the interface work from any coroutine without the restriction of
> only-on-waiter at the moment.
>
> (Note, that it is still not thread-safe)
>
>
>  block/aio_task.c | 15 ---
>  1 file changed, 4 insertions(+), 11 deletions(-)
>
> diff --git a/block/aio_task.c b/block/aio_task.c
> index 88989fa248..d48b29ff83 100644
> --- a/block/aio_task.c
> +++ b/block/aio_task.c
> @@ -27,11 +27,10 @@
>  #include "block/aio_task.h"
>  
>  struct AioTaskPool {
> -Coroutine *main_co;
>  int status;
>  int max_busy_tasks;
>  int busy_tasks;
> -bool waiting;
> +CoQueue waiters;
>  };
>  
>  static void coroutine_fn aio_task_co(void *opaque)
> @@ -52,21 +51,15 @@ static void coroutine_fn aio_task_co(void *opaque)
>  
>  g_free(task);
>  
> -if (pool->waiting) {
> -pool->waiting = false;
> -aio_co_wake(pool->main_co);
> -}
> +qemu_co_queue_next(>waiters);
nope, this will wakeup only single waiter.
the code will deadlock If there are 2 waiters for the last
entry.

You need something like qemu_co_queue_restart_all() here
at least.

Den



Re: [PATCH v7 0/9] acpi: i386 tweaks

2020-06-11 Thread Igor Mammedov
On Wed, 10 Jun 2020 17:53:46 +0200
Gerd Hoffmann  wrote:

> On Wed, Jun 10, 2020 at 10:54:26AM -0400, Michael S. Tsirkin wrote:
> > On Wed, Jun 10, 2020 at 01:40:02PM +0200, Igor Mammedov wrote:  
> > > On Wed, 10 Jun 2020 11:41:22 +0200
> > > Gerd Hoffmann  wrote:
> > >   
> > > > First batch of microvm patches, some generic acpi stuff.
> > > > Split the acpi-build.c monster, specifically split the
> > > > pc and q35 and pci bits into a separate file which we
> > > > can skip building at some point in the future.
> > > >   
> > > It looks like series is missing patch to whitelist changed ACPI tables in
> > > bios-table-test.  
> > 
> > Right. Does it pass make check?  
> 
> No, but after 'git cherry-pick 9b20a3365d73dad4ad144eab9c5827dbbb2e9f21' it 
> does.
> 
> > > Do we already have test case for microvm in bios-table-test,
> > > if not it's probably time to add it.  
> > 
> > Separately :)  
> 
> Especially as this series is just preparing cleanups and doesn't
> actually add acpi support to microvm yet.
> 
> But, yes, adding a testcase sounds useful.
Sorry for confusion, I didn't mean to do it within this series

> 
> take care,
>   Gerd
> 




Re: Clarification regarding new qemu-img convert --target-is-zero flag

2020-06-11 Thread David Edmondson
On Wednesday, 2020-06-10 at 11:21:27 -05, Eric Blake wrote:

> On 6/10/20 10:57 AM, David Edmondson wrote:
>> On Wednesday, 2020-06-10 at 10:48:52 -05, Eric Blake wrote:
>> 
>>> On 6/10/20 10:42 AM, David Edmondson wrote:
 On Wednesday, 2020-06-10 at 18:29:33 +03, Sam Eiderman wrote:

> Excuse me,
>
> Vladimir already pointed out in the first comment that it will skip
> writing real zeroes later.

 Right. That's why you want something like "--no-need-to-zero-initialise"
 (the name keeps getting longer!), which would still write zeroes to the
 blocks that should contain zeroes, as opposed to writing zeroes to
 prepare the device.
>>>
>>> Or maybe something like:
>>>
>>> qemu-img convert --skip-unallocated
>> 
>> This seems fine.
>> 
>>> which says that a pre-zeroing pass may be attempted, but it if fails,
>> 
>> This bit puzzles me. In what circumstances might we attempt but fail?
>> Does it really mean "if it can be done instantly, it will be done, but
>> not if it costs something"?
>
> A fast pre-zeroing pass is faster than writing explicit zeroes.  If such 
> a fast pass works, then you can avoid further I/O for all subsequent 
> zero sections; the unallocated sections will now happen to read as zero, 
> but that is not a problem since the content of unallocated portions is 
> not guaranteed.
>
> But if pre-zeroing is not fast, then you have to spend the extra I/O to 
> explicitly zero the portions that are allocated but read as zero, while 
> still skipping the unallocated portions.

The lack of deterministic behaviour would worry me. If the caller can't
be sure whether the unallocated portions of the device will be zeroed or
not, it feels as though the number of potential use cases is reduced.

The optimisation is focused on images where there are a significant
number of allocated zero blocks. Is that a common case? (It obviously
exists, because many images generated before "--target-is-zero" will be
like that, but perhaps they would be better cleaned by an unallocator.)

>> I'd be more inclined to go for "unallocated blocks will not be written",
>> without any attempts to pre-zero.
>
> But that can be slower, when pre-zeroing is fast.  "Unallocated blocks 
> need not be written" allows for optimizations, "unallocated blocks must 
> not be touched" does not.

"unallocated blocks may not be written" would be fine.

dme.
-- 
There is love in you.



Re: [PATCH v8 09/10] acpi: drop build_piix4_pm()

2020-06-11 Thread Philippe Mathieu-Daudé
On 6/11/20 9:29 AM, Gerd Hoffmann wrote:
> The _SB.PCI0.PX13.P13C opregion (holds isa device enable bits)
> is not used any more, remove it from DSDT.

Maybe:

"is not used any more (see previous commit), remove it from DSDT."

(if Michael can do the change when applying if you agree).

Reviewed-by: Philippe Mathieu-Daudé 

> 
> Signed-off-by: Gerd Hoffmann 
> Reviewed-by: Igor Mammedow 
> ---
>  hw/i386/acpi-build.c | 16 
>  1 file changed, 16 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 750fcf9baa37..02cf4199c2e9 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -1320,21 +1320,6 @@ static void build_q35_isa_bridge(Aml *table)
>  aml_append(table, scope);
>  }
>  
> -static void build_piix4_pm(Aml *table)
> -{
> -Aml *dev;
> -Aml *scope;
> -
> -scope =  aml_scope("_SB.PCI0");
> -dev = aml_device("PX13");
> -aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010003)));
> -
> -aml_append(dev, aml_operation_region("P13C", AML_PCI_CONFIG,
> - aml_int(0x00), 0xff));
> -aml_append(scope, dev);
> -aml_append(table, scope);
> -}
> -
>  static void build_piix4_isa_bridge(Aml *table)
>  {
>  Aml *dev;
> @@ -1486,7 +1471,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>  aml_append(dsdt, sb_scope);
>  
>  build_hpet_aml(dsdt);
> -build_piix4_pm(dsdt);
>  build_piix4_isa_bridge(dsdt);
>  build_isa_devices_aml(dsdt);
>  build_piix4_pci_hotplug(dsdt);
> 




Re: [PATCH v8 10/10] acpi: q35: drop _SB.PCI0.ISA.LPCD opregion.

2020-06-11 Thread Philippe Mathieu-Daudé
On 6/11/20 11:12 AM, Michael S. Tsirkin wrote:
> On Thu, Jun 11, 2020 at 10:31:01AM +0200, Philippe Mathieu-Daudé wrote:
>> On 6/11/20 10:27 AM, Philippe Mathieu-Daudé wrote:
>>> Hi Gerd,
>>>
>>> On 6/11/20 9:29 AM, Gerd Hoffmann wrote:
 Seems to be unused.

 Signed-off-by: Gerd Hoffmann 
 Reviewed-by: Igor Mammedov 
 ---
  hw/i386/acpi-build.c | 11 ---
  1 file changed, 11 deletions(-)

 diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
 index 02cf4199c2e9..d93ea40c58b9 100644
 --- a/hw/i386/acpi-build.c
 +++ b/hw/i386/acpi-build.c
 @@ -1296,7 +1296,6 @@ static void build_q35_isa_bridge(Aml *table)
  {
  Aml *dev;
  Aml *scope;
 -Aml *field;
  
  scope =  aml_scope("_SB.PCI0");
  dev = aml_device("ISA");
 @@ -1306,16 +1305,6 @@ static void build_q35_isa_bridge(Aml *table)
  aml_append(dev, aml_operation_region("PIRQ", AML_PCI_CONFIG,
   aml_int(0x60), 0x0C));
  
 -aml_append(dev, aml_operation_region("LPCD", AML_PCI_CONFIG,
 - aml_int(0x80), 0x02));
 -field = aml_field("LPCD", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
 -aml_append(field, aml_named_field("COMA", 3));
 -aml_append(field, aml_reserved_field(1));
 -aml_append(field, aml_named_field("COMB", 3));
 -aml_append(field, aml_reserved_field(1));
 -aml_append(field, aml_named_field("LPTD", 2));
 -aml_append(dev, field);
 -
  aml_append(scope, dev);
  aml_append(table, scope);
  }

>>>
>>> I'm a bit confused, isn't it use to describe these
>>> devices?
>>>
>>> (qemu) info qtree
>>> bus: main-system-bus
>>>   type System
>>>   dev: q35-pcihost, id ""
>>> bus: pcie.0
>>>   type PCIE
>>>   dev: ICH9-LPC, id ""
>>> gpio-out "gsi" 24
>>> class ISA bridge, addr 00:1f.0, pci id 8086:2918 (sub 1af4:1100)
>>> bus: isa.0
>>>   type ISA
>>>   dev: port92, id ""
>>> gpio-out "a20" 1
>>>   dev: vmmouse, id ""
>>>   dev: vmport, id ""
>>>   dev: isa-parallel, id ""
>>> index = 0 (0x0)
>>> iobase = 888 (0x378)
>>> irq = 7 (0x7)
>>> chardev = "parallel0"
>>> isa irq 7
>>>   dev: isa-serial, id ""
>>> index = 1 (0x1)
>>> iobase = 760 (0x2f8)
>>> irq = 3 (0x3)
>>> chardev = "serial1"
>>> wakeup = 0 (0x0)
>>> isa irq 3
>>>   dev: isa-serial, id ""
>>> index = 0 (0x0)
>>> iobase = 1016 (0x3f8)
>>> irq = 4 (0x4)
>>> chardev = "serial0"
>>> wakeup = 0 (0x0)
>>> isa irq 4
>>>
>>
>> Ah, this patch complements the previous "acpi: drop serial/parallel
>> enable bits from dsdt", right? Maybe better to include this change
>> with the build_q35_isa_bridge() part. Like in a single patch:
>> "acpi: q35: drop lpc/serial/parallel enable bits from dsdt"
>>
>> Then keep the PIIX part of the patches.
> 
> Don't see why really.

OK, no problem on my side. Series fully reviewed!




Re: [PATCH v8 01/10] qtest: allow DSDT acpi table changes

2020-06-11 Thread Michael S. Tsirkin
On Thu, Jun 11, 2020 at 09:29:10AM +0200, Gerd Hoffmann wrote:
> Signed-off-by: Gerd Hoffmann 

OK, thanks! What is still missing is resulting ASL
differences in the expected files for reviewers.
See ./tests/qtest/bios-tables-test.c step 7.

I'm working on a tool that will show the changes
more easily so they can be included with each
change, but that will do for now.

Thanks and sorry about all the bureaucracy.

> ---
>  tests/qtest/bios-tables-test-allowed-diff.h | 17 +
>  1 file changed, 17 insertions(+)
> 
> diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
> b/tests/qtest/bios-tables-test-allowed-diff.h
> index dfb8523c8bf4..6a052c50447a 100644
> --- a/tests/qtest/bios-tables-test-allowed-diff.h
> +++ b/tests/qtest/bios-tables-test-allowed-diff.h
> @@ -1 +1,18 @@
>  /* List of comma-separated changed AML files to ignore */
> +"tests/data/acpi/pc/DSDT",
> +"tests/data/acpi/pc/DSDT.acpihmat",
> +"tests/data/acpi/pc/DSDT.bridge",
> +"tests/data/acpi/pc/DSDT.cphp",
> +"tests/data/acpi/pc/DSDT.dimmpxm",
> +"tests/data/acpi/pc/DSDT.ipmikcs",
> +"tests/data/acpi/pc/DSDT.memhp",
> +"tests/data/acpi/pc/DSDT.numamem",
> +"tests/data/acpi/q35/DSDT",
> +"tests/data/acpi/q35/DSDT.acpihmat",
> +"tests/data/acpi/q35/DSDT.bridge",
> +"tests/data/acpi/q35/DSDT.cphp",
> +"tests/data/acpi/q35/DSDT.dimmpxm",
> +"tests/data/acpi/q35/DSDT.ipmibt",
> +"tests/data/acpi/q35/DSDT.memhp",
> +"tests/data/acpi/q35/DSDT.mmio64",
> +"tests/data/acpi/q35/DSDT.numamem",
> -- 
> 2.18.4




Re: [PATCH v8 10/10] acpi: q35: drop _SB.PCI0.ISA.LPCD opregion.

2020-06-11 Thread Michael S. Tsirkin
On Thu, Jun 11, 2020 at 10:31:01AM +0200, Philippe Mathieu-Daudé wrote:
> On 6/11/20 10:27 AM, Philippe Mathieu-Daudé wrote:
> > Hi Gerd,
> > 
> > On 6/11/20 9:29 AM, Gerd Hoffmann wrote:
> >> Seems to be unused.
> >>
> >> Signed-off-by: Gerd Hoffmann 
> >> Reviewed-by: Igor Mammedov 
> >> ---
> >>  hw/i386/acpi-build.c | 11 ---
> >>  1 file changed, 11 deletions(-)
> >>
> >> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> >> index 02cf4199c2e9..d93ea40c58b9 100644
> >> --- a/hw/i386/acpi-build.c
> >> +++ b/hw/i386/acpi-build.c
> >> @@ -1296,7 +1296,6 @@ static void build_q35_isa_bridge(Aml *table)
> >>  {
> >>  Aml *dev;
> >>  Aml *scope;
> >> -Aml *field;
> >>  
> >>  scope =  aml_scope("_SB.PCI0");
> >>  dev = aml_device("ISA");
> >> @@ -1306,16 +1305,6 @@ static void build_q35_isa_bridge(Aml *table)
> >>  aml_append(dev, aml_operation_region("PIRQ", AML_PCI_CONFIG,
> >>   aml_int(0x60), 0x0C));
> >>  
> >> -aml_append(dev, aml_operation_region("LPCD", AML_PCI_CONFIG,
> >> - aml_int(0x80), 0x02));
> >> -field = aml_field("LPCD", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
> >> -aml_append(field, aml_named_field("COMA", 3));
> >> -aml_append(field, aml_reserved_field(1));
> >> -aml_append(field, aml_named_field("COMB", 3));
> >> -aml_append(field, aml_reserved_field(1));
> >> -aml_append(field, aml_named_field("LPTD", 2));
> >> -aml_append(dev, field);
> >> -
> >>  aml_append(scope, dev);
> >>  aml_append(table, scope);
> >>  }
> >>
> > 
> > I'm a bit confused, isn't it use to describe these
> > devices?
> > 
> > (qemu) info qtree
> > bus: main-system-bus
> >   type System
> >   dev: q35-pcihost, id ""
> > bus: pcie.0
> >   type PCIE
> >   dev: ICH9-LPC, id ""
> > gpio-out "gsi" 24
> > class ISA bridge, addr 00:1f.0, pci id 8086:2918 (sub 1af4:1100)
> > bus: isa.0
> >   type ISA
> >   dev: port92, id ""
> > gpio-out "a20" 1
> >   dev: vmmouse, id ""
> >   dev: vmport, id ""
> >   dev: isa-parallel, id ""
> > index = 0 (0x0)
> > iobase = 888 (0x378)
> > irq = 7 (0x7)
> > chardev = "parallel0"
> > isa irq 7
> >   dev: isa-serial, id ""
> > index = 1 (0x1)
> > iobase = 760 (0x2f8)
> > irq = 3 (0x3)
> > chardev = "serial1"
> > wakeup = 0 (0x0)
> > isa irq 3
> >   dev: isa-serial, id ""
> > index = 0 (0x0)
> > iobase = 1016 (0x3f8)
> > irq = 4 (0x4)
> > chardev = "serial0"
> > wakeup = 0 (0x0)
> > isa irq 4
> > 
> 
> Ah, this patch complements the previous "acpi: drop serial/parallel
> enable bits from dsdt", right? Maybe better to include this change
> with the build_q35_isa_bridge() part. Like in a single patch:
> "acpi: q35: drop lpc/serial/parallel enable bits from dsdt"
> 
> Then keep the PIIX part of the patches.

Don't see why really.

-- 
MST




Re: [PATCH 2/2] qcow2: improve savevm performance

2020-06-11 Thread Vladimir Sementsov-Ogievskiy

11.06.2020 11:25, Denis V. Lunev wrote:

On 6/11/20 11:04 AM, Vladimir Sementsov-Ogievskiy wrote:

10.06.2020 22:00, Denis V. Lunev wrote:

This patch does 2 standard basic things:
- it creates intermediate buffer for all writes from QEMU migration code
    to QCOW2 image,
- this buffer is sent to disk asynchronously, allowing several writes to
    run in parallel.

In general, migration code is fantastically inefficent (by observation),
buffers are not aligned and sent with arbitrary pieces, a lot of time
less than 100 bytes at a chunk, which results in read-modify-write
operations with non-cached operations. It should also be noted that all
operations are performed into unallocated image blocks, which also
suffer
due to partial writes to such new clusters.

Snapshot creation time (2 GB Fedora-31 VM running over NVME storage):
  original fixed
cached:  1.79s   1.27s
non-cached:  3.29s   0.81s

The difference over HDD would be more significant :)

Signed-off-by: Denis V. Lunev 


If I follow correctly, you make qcow2_save_vmstate implicitly
asynchronous:
it may return immediately after creating a task, and task is executing in
parallel.

I think, block-layer is unprepared for such behavior, it rely on the
fact that
.bdrv_save_vmstate is synchronous.

For example, look at bdrv_co_rw_vmstate(). It calls
drv->bdrv_save_vmstate
inside pair of bdrv_inc_in_flight()/bdrv_dec_in_flight(). It means
that with
this patch, we may break drained section.


Drained sections are not involved into the equation - the guest
is stopped at the moment.

If we are talking about in_flight count, it should not be a problem
even if the guest is running. We could just put inc/dec into
qcow2_co_vmstate_task_entry().


No, we at least should do "inc" in qcow2_save_vmstate, when we are inside
original generic inc/dec. Not a big deal still.




Next, it's a kind of cache for vmstate-write operation. It seems for
me that
it's not directly related to qcow2. So, we can implement it in generic
block
layer, where we can handle in_fligth requests. Can we keep
.bdrv_save_vmstate
handlers of format drivers as is, keep them synchronous, but instead
change
generic interface to be (optionally?) cached?


This has been discussed already in the previous thread. .bdrv_save_vmstate
is implemented in QCOW2 and sheepdog only. Thus other formats are
mostly non-interested.


Hmm, yes, sorry, I forget. It seems that sheepdoc project is dead. Who knows,
is it correct?

OK, it's probably OK to make the interface optianally-async, if we add in-flight
request correctly but at least we should document it around .bdrv_save_vmstate
handler declaration.



--
Best regards,
Vladimir



Re: [PATCH] iotests: Add copyright line in qcow2.py

2020-06-11 Thread Stefan Hajnoczi
On Tue, Jun 09, 2020 at 03:59:44PM -0500, Eric Blake wrote:
> The file qcow2.py was originally contributed in 2012 by Kevin Wolf,
> but was not given traditional boilerplate headers at the time.  The
> missing license was just rectified (commit 16306a7b39) using the
> project-default GPLv2+, but as Vladimir is not at Red Hat, he did not
> add a Copyright line.  All earlier contributions have come from CC'd
> authors, where all but Stefan used a Red Hat address at the time of
> the contribution, and that copyright carries over to the split to
> qcow2_format.py (d5262c7124).
> 
> CC: Kevin Wolf 
> CC: Stefan Hajnoczi 
> CC: Eduardo Habkost 
> CC: Max Reitz 
> CC: Philippe Mathieu-Daudé 
> CC: Paolo Bonzini 
> Signed-off-by: Eric Blake 
> 
> ---
> Commit ids above assume my bitmaps pull request does not have to be respun...
> Based-on: <20200609205245.3548257-1-ebl...@redhat.com>
> ---
>  tests/qemu-iotests/qcow2.py| 2 ++
>  tests/qemu-iotests/qcow2_format.py | 1 +
>  2 files changed, 3 insertions(+)

The git history shows which lines were contributed by IBM and is more
detailed than a single copyright line at the top of the file. It's also
common to make smaller contributions, like the one I made here, without
adding a copyright line. In light of this I see no issue with adding a
Red Hat copyright line and there is no need to add one for IBM:

Acked-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [PATCH v8 10/10] acpi: q35: drop _SB.PCI0.ISA.LPCD opregion.

2020-06-11 Thread Philippe Mathieu-Daudé
On 6/11/20 10:27 AM, Philippe Mathieu-Daudé wrote:
> Hi Gerd,
> 
> On 6/11/20 9:29 AM, Gerd Hoffmann wrote:
>> Seems to be unused.
>>
>> Signed-off-by: Gerd Hoffmann 
>> Reviewed-by: Igor Mammedov 
>> ---
>>  hw/i386/acpi-build.c | 11 ---
>>  1 file changed, 11 deletions(-)
>>
>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>> index 02cf4199c2e9..d93ea40c58b9 100644
>> --- a/hw/i386/acpi-build.c
>> +++ b/hw/i386/acpi-build.c
>> @@ -1296,7 +1296,6 @@ static void build_q35_isa_bridge(Aml *table)
>>  {
>>  Aml *dev;
>>  Aml *scope;
>> -Aml *field;
>>  
>>  scope =  aml_scope("_SB.PCI0");
>>  dev = aml_device("ISA");
>> @@ -1306,16 +1305,6 @@ static void build_q35_isa_bridge(Aml *table)
>>  aml_append(dev, aml_operation_region("PIRQ", AML_PCI_CONFIG,
>>   aml_int(0x60), 0x0C));
>>  
>> -aml_append(dev, aml_operation_region("LPCD", AML_PCI_CONFIG,
>> - aml_int(0x80), 0x02));
>> -field = aml_field("LPCD", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
>> -aml_append(field, aml_named_field("COMA", 3));
>> -aml_append(field, aml_reserved_field(1));
>> -aml_append(field, aml_named_field("COMB", 3));
>> -aml_append(field, aml_reserved_field(1));
>> -aml_append(field, aml_named_field("LPTD", 2));
>> -aml_append(dev, field);
>> -
>>  aml_append(scope, dev);
>>  aml_append(table, scope);
>>  }
>>
> 
> I'm a bit confused, isn't it use to describe these
> devices?
> 
> (qemu) info qtree
> bus: main-system-bus
>   type System
>   dev: q35-pcihost, id ""
> bus: pcie.0
>   type PCIE
>   dev: ICH9-LPC, id ""
> gpio-out "gsi" 24
> class ISA bridge, addr 00:1f.0, pci id 8086:2918 (sub 1af4:1100)
> bus: isa.0
>   type ISA
>   dev: port92, id ""
> gpio-out "a20" 1
>   dev: vmmouse, id ""
>   dev: vmport, id ""
>   dev: isa-parallel, id ""
> index = 0 (0x0)
> iobase = 888 (0x378)
> irq = 7 (0x7)
> chardev = "parallel0"
> isa irq 7
>   dev: isa-serial, id ""
> index = 1 (0x1)
> iobase = 760 (0x2f8)
> irq = 3 (0x3)
> chardev = "serial1"
> wakeup = 0 (0x0)
> isa irq 3
>   dev: isa-serial, id ""
> index = 0 (0x0)
> iobase = 1016 (0x3f8)
> irq = 4 (0x4)
> chardev = "serial0"
> wakeup = 0 (0x0)
> isa irq 4
> 

Ah, this patch complements the previous "acpi: drop serial/parallel
enable bits from dsdt", right? Maybe better to include this change
with the build_q35_isa_bridge() part. Like in a single patch:
"acpi: q35: drop lpc/serial/parallel enable bits from dsdt"

Then keep the PIIX part of the patches.




Re: [PATCH v8 10/10] acpi: q35: drop _SB.PCI0.ISA.LPCD opregion.

2020-06-11 Thread Philippe Mathieu-Daudé
Hi Gerd,

On 6/11/20 9:29 AM, Gerd Hoffmann wrote:
> Seems to be unused.
> 
> Signed-off-by: Gerd Hoffmann 
> Reviewed-by: Igor Mammedov 
> ---
>  hw/i386/acpi-build.c | 11 ---
>  1 file changed, 11 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 02cf4199c2e9..d93ea40c58b9 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -1296,7 +1296,6 @@ static void build_q35_isa_bridge(Aml *table)
>  {
>  Aml *dev;
>  Aml *scope;
> -Aml *field;
>  
>  scope =  aml_scope("_SB.PCI0");
>  dev = aml_device("ISA");
> @@ -1306,16 +1305,6 @@ static void build_q35_isa_bridge(Aml *table)
>  aml_append(dev, aml_operation_region("PIRQ", AML_PCI_CONFIG,
>   aml_int(0x60), 0x0C));
>  
> -aml_append(dev, aml_operation_region("LPCD", AML_PCI_CONFIG,
> - aml_int(0x80), 0x02));
> -field = aml_field("LPCD", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
> -aml_append(field, aml_named_field("COMA", 3));
> -aml_append(field, aml_reserved_field(1));
> -aml_append(field, aml_named_field("COMB", 3));
> -aml_append(field, aml_reserved_field(1));
> -aml_append(field, aml_named_field("LPTD", 2));
> -aml_append(dev, field);
> -
>  aml_append(scope, dev);
>  aml_append(table, scope);
>  }
> 

I'm a bit confused, isn't it use to describe these
devices?

(qemu) info qtree
bus: main-system-bus
  type System
  dev: q35-pcihost, id ""
bus: pcie.0
  type PCIE
  dev: ICH9-LPC, id ""
gpio-out "gsi" 24
class ISA bridge, addr 00:1f.0, pci id 8086:2918 (sub 1af4:1100)
bus: isa.0
  type ISA
  dev: port92, id ""
gpio-out "a20" 1
  dev: vmmouse, id ""
  dev: vmport, id ""
  dev: isa-parallel, id ""
index = 0 (0x0)
iobase = 888 (0x378)
irq = 7 (0x7)
chardev = "parallel0"
isa irq 7
  dev: isa-serial, id ""
index = 1 (0x1)
iobase = 760 (0x2f8)
irq = 3 (0x3)
chardev = "serial1"
wakeup = 0 (0x0)
isa irq 3
  dev: isa-serial, id ""
index = 0 (0x0)
iobase = 1016 (0x3f8)
irq = 4 (0x4)
chardev = "serial0"
wakeup = 0 (0x0)
isa irq 4




Re: [PATCH 2/2] qcow2: improve savevm performance

2020-06-11 Thread Denis V. Lunev
On 6/11/20 11:04 AM, Vladimir Sementsov-Ogievskiy wrote:
> 10.06.2020 22:00, Denis V. Lunev wrote:
>> This patch does 2 standard basic things:
>> - it creates intermediate buffer for all writes from QEMU migration code
>>    to QCOW2 image,
>> - this buffer is sent to disk asynchronously, allowing several writes to
>>    run in parallel.
>>
>> In general, migration code is fantastically inefficent (by observation),
>> buffers are not aligned and sent with arbitrary pieces, a lot of time
>> less than 100 bytes at a chunk, which results in read-modify-write
>> operations with non-cached operations. It should also be noted that all
>> operations are performed into unallocated image blocks, which also
>> suffer
>> due to partial writes to such new clusters.
>>
>> Snapshot creation time (2 GB Fedora-31 VM running over NVME storage):
>>  original fixed
>> cached:  1.79s   1.27s
>> non-cached:  3.29s   0.81s
>>
>> The difference over HDD would be more significant :)
>>
>> Signed-off-by: Denis V. Lunev 
>
> If I follow correctly, you make qcow2_save_vmstate implicitly
> asynchronous:
> it may return immediately after creating a task, and task is executing in
> parallel.
>
> I think, block-layer is unprepared for such behavior, it rely on the
> fact that
> .bdrv_save_vmstate is synchronous.
>
> For example, look at bdrv_co_rw_vmstate(). It calls
> drv->bdrv_save_vmstate
> inside pair of bdrv_inc_in_flight()/bdrv_dec_in_flight(). It means
> that with
> this patch, we may break drained section.
>
Drained sections are not involved into the equation - the guest
is stopped at the moment.

If we are talking about in_flight count, it should not be a problem
even if the guest is running. We could just put inc/dec into
qcow2_co_vmstate_task_entry().

> Next, it's a kind of cache for vmstate-write operation. It seems for
> me that
> it's not directly related to qcow2. So, we can implement it in generic
> block
> layer, where we can handle in_fligth requests. Can we keep
> .bdrv_save_vmstate
> handlers of format drivers as is, keep them synchronous, but instead
> change
> generic interface to be (optionally?) cached?

This has been discussed already in the previous thread. .bdrv_save_vmstate
is implemented in QCOW2 and sheepdog only. Thus other formats are
mostly non-interested.

>
>> ---
>>   block/qcow2.c | 111 +-
>>   block/qcow2.h |   4 ++
>>   2 files changed, 113 insertions(+), 2 deletions(-)
>>
>> diff --git a/block/qcow2.c b/block/qcow2.c
>> index 0cd2e6757e..e6232f32e2 100644
>> --- a/block/qcow2.c
>> +++ b/block/qcow2.c
>> @@ -4797,11 +4797,43 @@ static int qcow2_make_empty(BlockDriverState
>> *bs)
>>   return ret;
>>   }
>>   +
>> +typedef struct Qcow2VMStateTask {
>> +    AioTask task;
>> +
>> +    BlockDriverState *bs;
>> +    int64_t offset;
>> +    void *buf;
>> +    size_t bytes;
>> +} Qcow2VMStateTask;
>> +
>> +typedef struct Qcow2SaveVMState {
>> +    AioTaskPool *pool;
>> +    Qcow2VMStateTask *t;
>> +} Qcow2SaveVMState;
>> +
>>   static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
>>   {
>>   BDRVQcow2State *s = bs->opaque;
>> +    Qcow2SaveVMState *state = s->savevm_state;
>>   int ret;
>>   +    if (state != NULL) {
>> +    aio_task_pool_start_task(state->pool, >t->task);
>> +
>> +    aio_task_pool_wait_all(state->pool);
>> +    ret = aio_task_pool_status(state->pool);
>> +
>> +    aio_task_pool_free(state->pool);
>> +    g_free(state);
>> +
>> +    s->savevm_state = NULL;
>> +
>> +    if (ret < 0) {
>> +    return ret;
>> +    }
>> +    }
>> +
>>   qemu_co_mutex_lock(>lock);
>>   ret = qcow2_write_caches(bs);
>>   qemu_co_mutex_unlock(>lock);
>> @@ -5098,14 +5130,89 @@ static int
>> qcow2_has_zero_init(BlockDriverState *bs)
>>   }
>>   }
>>   +
>> +static coroutine_fn int qcow2_co_vmstate_task_entry(AioTask *task)
>> +{
>> +    int err = 0;
>> +    Qcow2VMStateTask *t = container_of(task, Qcow2VMStateTask, task);
>> +
>> +    if (t->bytes != 0) {
>> +    QEMUIOVector local_qiov;
>> +    qemu_iovec_init_buf(_qiov, t->buf, t->bytes);
>> +    err = t->bs->drv->bdrv_co_pwritev_part(t->bs, t->offset,
>> t->bytes,
>> +   _qiov, 0, 0);
>> +    }
>> +
>> +    qemu_vfree(t->buf);
>> +    return err;
>> +}
>> +
>> +static Qcow2VMStateTask *qcow2_vmstate_task_create(BlockDriverState
>> *bs,
>> +    int64_t pos,
>> size_t size)
>> +{
>> +    BDRVQcow2State *s = bs->opaque;
>> +    Qcow2VMStateTask *t = g_new(Qcow2VMStateTask, 1);
>> +
>> +    *t = (Qcow2VMStateTask) {
>> +    .task.func = qcow2_co_vmstate_task_entry,
>> +    .buf = qemu_blockalign(bs, size),
>> +    .offset = qcow2_vm_state_offset(s) + pos,
>> +    .bs = bs,
>> +    };
>> +
>> +    return t;
>> +}
>> +
>>   static int qcow2_save_vmstate(BlockDriverState *bs, 

Re: [PATCH v8 02/10] acpi: move aml builder code for floppy device

2020-06-11 Thread Philippe Mathieu-Daudé
On 6/11/20 9:29 AM, Gerd Hoffmann wrote:
> Signed-off-by: Gerd Hoffmann 
> Reviewed-by: Igor Mammedov 

Reviewed-by: Philippe Mathieu-Daudé 

> ---
>  hw/block/fdc.c   | 83 
>  hw/i386/acpi-build.c | 83 
>  stubs/cmos.c |  7 
>  stubs/Makefile.objs  |  1 +
>  4 files changed, 91 insertions(+), 83 deletions(-)
>  create mode 100644 stubs/cmos.c
> 
> diff --git a/hw/block/fdc.c b/hw/block/fdc.c
> index c5fb9d6ece77..b4d2eaf66dcd 100644
> --- a/hw/block/fdc.c
> +++ b/hw/block/fdc.c
> @@ -32,6 +32,8 @@
>  #include "qapi/error.h"
>  #include "qemu/error-report.h"
>  #include "qemu/timer.h"
> +#include "hw/i386/pc.h"
> +#include "hw/acpi/aml-build.h"
>  #include "hw/irq.h"
>  #include "hw/isa/isa.h"
>  #include "hw/qdev-properties.h"
> @@ -2765,6 +2767,85 @@ void isa_fdc_get_drive_max_chs(FloppyDriveType type,
>  (*maxc)--;
>  }
>  
> +static Aml *build_fdinfo_aml(int idx, FloppyDriveType type)
> +{
> +Aml *dev, *fdi;
> +uint8_t maxc, maxh, maxs;
> +
> +isa_fdc_get_drive_max_chs(type, , , );
> +
> +dev = aml_device("FLP%c", 'A' + idx);
> +
> +aml_append(dev, aml_name_decl("_ADR", aml_int(idx)));
> +
> +fdi = aml_package(16);
> +aml_append(fdi, aml_int(idx));  /* Drive Number */
> +aml_append(fdi,
> +aml_int(cmos_get_fd_drive_type(type)));  /* Device Type */
> +/*
> + * the values below are the limits of the drive, and are thus independent
> + * of the inserted media
> + */
> +aml_append(fdi, aml_int(maxc));  /* Maximum Cylinder Number */
> +aml_append(fdi, aml_int(maxs));  /* Maximum Sector Number */
> +aml_append(fdi, aml_int(maxh));  /* Maximum Head Number */
> +/*
> + * SeaBIOS returns the below values for int 0x13 func 0x08 regardless of
> + * the drive type, so shall we
> + */
> +aml_append(fdi, aml_int(0xAF));  /* disk_specify_1 */
> +aml_append(fdi, aml_int(0x02));  /* disk_specify_2 */
> +aml_append(fdi, aml_int(0x25));  /* disk_motor_wait */
> +aml_append(fdi, aml_int(0x02));  /* disk_sector_siz */
> +aml_append(fdi, aml_int(0x12));  /* disk_eot */
> +aml_append(fdi, aml_int(0x1B));  /* disk_rw_gap */
> +aml_append(fdi, aml_int(0xFF));  /* disk_dtl */
> +aml_append(fdi, aml_int(0x6C));  /* disk_formt_gap */
> +aml_append(fdi, aml_int(0xF6));  /* disk_fill */
> +aml_append(fdi, aml_int(0x0F));  /* disk_head_sttl */
> +aml_append(fdi, aml_int(0x08));  /* disk_motor_strt */
> +
> +aml_append(dev, aml_name_decl("_FDI", fdi));
> +return dev;
> +}
> +
> +static void fdc_isa_build_aml(ISADevice *isadev, Aml *scope)
> +{
> +Aml *dev;
> +Aml *crs;
> +int i;
> +
> +#define ACPI_FDE_MAX_FD 4
> +uint32_t fde_buf[5] = {
> +0, 0, 0, 0, /* presence of floppy drives #0 - #3 */
> +cpu_to_le32(2)  /* tape presence (2 == never present) */
> +};
> +
> +crs = aml_resource_template();
> +aml_append(crs, aml_io(AML_DECODE16, 0x03F2, 0x03F2, 0x00, 0x04));
> +aml_append(crs, aml_io(AML_DECODE16, 0x03F7, 0x03F7, 0x00, 0x01));
> +aml_append(crs, aml_irq_no_flags(6));
> +aml_append(crs,
> +aml_dma(AML_COMPATIBILITY, AML_NOTBUSMASTER, AML_TRANSFER8, 2));
> +
> +dev = aml_device("FDC0");
> +aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0700")));
> +aml_append(dev, aml_name_decl("_CRS", crs));
> +
> +for (i = 0; i < MIN(MAX_FD, ACPI_FDE_MAX_FD); i++) {
> +FloppyDriveType type = isa_fdc_get_drive_type(isadev, i);
> +
> +if (type < FLOPPY_DRIVE_TYPE_NONE) {
> +fde_buf[i] = cpu_to_le32(1);  /* drive present */
> +aml_append(dev, build_fdinfo_aml(i, type));
> +}
> +}
> +aml_append(dev, aml_name_decl("_FDE",
> +   aml_buffer(sizeof(fde_buf), (uint8_t *)fde_buf)));
> +
> +aml_append(scope, dev);
> +}
> +
>  static const VMStateDescription vmstate_isa_fdc ={
>  .name = "fdc",
>  .version_id = 2,
> @@ -2798,11 +2879,13 @@ static Property isa_fdc_properties[] = {
>  static void isabus_fdc_class_init(ObjectClass *klass, void *data)
>  {
>  DeviceClass *dc = DEVICE_CLASS(klass);
> +ISADeviceClass *isa = ISA_DEVICE_CLASS(klass);
>  
>  dc->realize = isabus_fdc_realize;
>  dc->fw_name = "fdc";
>  dc->reset = fdctrl_external_reset_isa;
>  dc->vmsd = _isa_fdc;
> +isa->build_aml = fdc_isa_build_aml;
>  device_class_set_props(dc, isa_fdc_properties);
>  set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
>  }
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 473cbdfffd05..7726d5c0f7cb 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -937,85 +937,6 @@ static void build_hpet_aml(Aml *table)
>  aml_append(table, scope);
>  }
>  
> -static Aml *build_fdinfo_aml(int idx, FloppyDriveType type)
> -{
> -Aml *dev, *fdi;
> -uint8_t maxc, maxh, maxs;
> -
> -

Re: [PATCH 2/2] qcow2: improve savevm performance

2020-06-11 Thread Vladimir Sementsov-Ogievskiy

10.06.2020 22:00, Denis V. Lunev wrote:

This patch does 2 standard basic things:
- it creates intermediate buffer for all writes from QEMU migration code
   to QCOW2 image,
- this buffer is sent to disk asynchronously, allowing several writes to
   run in parallel.

In general, migration code is fantastically inefficent (by observation),
buffers are not aligned and sent with arbitrary pieces, a lot of time
less than 100 bytes at a chunk, which results in read-modify-write
operations with non-cached operations. It should also be noted that all
operations are performed into unallocated image blocks, which also suffer
due to partial writes to such new clusters.

Snapshot creation time (2 GB Fedora-31 VM running over NVME storage):
 original fixed
cached:  1.79s   1.27s
non-cached:  3.29s   0.81s

The difference over HDD would be more significant :)

Signed-off-by: Denis V. Lunev 


If I follow correctly, you make qcow2_save_vmstate implicitly asynchronous:
it may return immediately after creating a task, and task is executing in
parallel.

I think, block-layer is unprepared for such behavior, it rely on the fact that
.bdrv_save_vmstate is synchronous.

For example, look at bdrv_co_rw_vmstate(). It calls drv->bdrv_save_vmstate
inside pair of bdrv_inc_in_flight()/bdrv_dec_in_flight(). It means that with
this patch, we may break drained section.

Next, it's a kind of cache for vmstate-write operation. It seems for me that
it's not directly related to qcow2. So, we can implement it in generic block
layer, where we can handle in_fligth requests. Can we keep .bdrv_save_vmstate
handlers of format drivers as is, keep them synchronous, but instead change
generic interface to be (optionally?) cached?


---
  block/qcow2.c | 111 +-
  block/qcow2.h |   4 ++
  2 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 0cd2e6757e..e6232f32e2 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4797,11 +4797,43 @@ static int qcow2_make_empty(BlockDriverState *bs)
  return ret;
  }
  
+

+typedef struct Qcow2VMStateTask {
+AioTask task;
+
+BlockDriverState *bs;
+int64_t offset;
+void *buf;
+size_t bytes;
+} Qcow2VMStateTask;
+
+typedef struct Qcow2SaveVMState {
+AioTaskPool *pool;
+Qcow2VMStateTask *t;
+} Qcow2SaveVMState;
+
  static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
  {
  BDRVQcow2State *s = bs->opaque;
+Qcow2SaveVMState *state = s->savevm_state;
  int ret;
  
+if (state != NULL) {

+aio_task_pool_start_task(state->pool, >t->task);
+
+aio_task_pool_wait_all(state->pool);
+ret = aio_task_pool_status(state->pool);
+
+aio_task_pool_free(state->pool);
+g_free(state);
+
+s->savevm_state = NULL;
+
+if (ret < 0) {
+return ret;
+}
+}
+
  qemu_co_mutex_lock(>lock);
  ret = qcow2_write_caches(bs);
  qemu_co_mutex_unlock(>lock);
@@ -5098,14 +5130,89 @@ static int qcow2_has_zero_init(BlockDriverState *bs)
  }
  }
  
+

+static coroutine_fn int qcow2_co_vmstate_task_entry(AioTask *task)
+{
+int err = 0;
+Qcow2VMStateTask *t = container_of(task, Qcow2VMStateTask, task);
+
+if (t->bytes != 0) {
+QEMUIOVector local_qiov;
+qemu_iovec_init_buf(_qiov, t->buf, t->bytes);
+err = t->bs->drv->bdrv_co_pwritev_part(t->bs, t->offset, t->bytes,
+   _qiov, 0, 0);
+}
+
+qemu_vfree(t->buf);
+return err;
+}
+
+static Qcow2VMStateTask *qcow2_vmstate_task_create(BlockDriverState *bs,
+int64_t pos, size_t size)
+{
+BDRVQcow2State *s = bs->opaque;
+Qcow2VMStateTask *t = g_new(Qcow2VMStateTask, 1);
+
+*t = (Qcow2VMStateTask) {
+.task.func = qcow2_co_vmstate_task_entry,
+.buf = qemu_blockalign(bs, size),
+.offset = qcow2_vm_state_offset(s) + pos,
+.bs = bs,
+};
+
+return t;
+}
+
  static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t pos)
  {
  BDRVQcow2State *s = bs->opaque;
+Qcow2SaveVMState *state = s->savevm_state;
+Qcow2VMStateTask *t;
+size_t buf_size = MAX(s->cluster_size, 1 * MiB);
+size_t to_copy;
+size_t off;
  
  BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);

-return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
- qiov->size, qiov, 0, 0);
+
+if (state == NULL) {
+state = g_new(Qcow2SaveVMState, 1);
+*state = (Qcow2SaveVMState) {
+.pool = aio_task_pool_new(QCOW2_MAX_WORKERS),
+.t = qcow2_vmstate_task_create(bs, pos, buf_size),
+};
+
+s->savevm_state = state;
+}
+
+if (aio_task_pool_status(state->pool) != 0) {
+return 

[PATCH] block/aio_task: allow start/wait task from any coroutine

2020-06-11 Thread Vladimir Sementsov-Ogievskiy
Currently, aio task pool assumes that there is a main coroutine, which
creates tasks and wait for them. Let's remove the restriction by using
CoQueue. Code becomes clearer, interface more obvious.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---

Hi! Here is my counter-propasal for
"[PATCH 1/2] aio: allow to wait for coroutine pool from different coroutine"
by Denis. I'm sure that if we are going to change something here, better
is make the interface work from any coroutine without the restriction of
only-on-waiter at the moment.

(Note, that it is still not thread-safe)


 block/aio_task.c | 15 ---
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/block/aio_task.c b/block/aio_task.c
index 88989fa248..d48b29ff83 100644
--- a/block/aio_task.c
+++ b/block/aio_task.c
@@ -27,11 +27,10 @@
 #include "block/aio_task.h"
 
 struct AioTaskPool {
-Coroutine *main_co;
 int status;
 int max_busy_tasks;
 int busy_tasks;
-bool waiting;
+CoQueue waiters;
 };
 
 static void coroutine_fn aio_task_co(void *opaque)
@@ -52,21 +51,15 @@ static void coroutine_fn aio_task_co(void *opaque)
 
 g_free(task);
 
-if (pool->waiting) {
-pool->waiting = false;
-aio_co_wake(pool->main_co);
-}
+qemu_co_queue_next(>waiters);
 }
 
 void coroutine_fn aio_task_pool_wait_one(AioTaskPool *pool)
 {
 assert(pool->busy_tasks > 0);
-assert(qemu_coroutine_self() == pool->main_co);
 
-pool->waiting = true;
-qemu_coroutine_yield();
+qemu_co_queue_wait(>waiters, NULL);
 
-assert(!pool->waiting);
 assert(pool->busy_tasks < pool->max_busy_tasks);
 }
 
@@ -98,8 +91,8 @@ AioTaskPool *coroutine_fn aio_task_pool_new(int 
max_busy_tasks)
 {
 AioTaskPool *pool = g_new0(AioTaskPool, 1);
 
-pool->main_co = qemu_coroutine_self();
 pool->max_busy_tasks = max_busy_tasks;
+qemu_co_queue_init(>waiters);
 
 return pool;
 }
-- 
2.21.0




[PATCH v8 10/10] acpi: q35: drop _SB.PCI0.ISA.LPCD opregion.

2020-06-11 Thread Gerd Hoffmann
Seems to be unused.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Igor Mammedov 
---
 hw/i386/acpi-build.c | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 02cf4199c2e9..d93ea40c58b9 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1296,7 +1296,6 @@ static void build_q35_isa_bridge(Aml *table)
 {
 Aml *dev;
 Aml *scope;
-Aml *field;
 
 scope =  aml_scope("_SB.PCI0");
 dev = aml_device("ISA");
@@ -1306,16 +1305,6 @@ static void build_q35_isa_bridge(Aml *table)
 aml_append(dev, aml_operation_region("PIRQ", AML_PCI_CONFIG,
  aml_int(0x60), 0x0C));
 
-aml_append(dev, aml_operation_region("LPCD", AML_PCI_CONFIG,
- aml_int(0x80), 0x02));
-field = aml_field("LPCD", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
-aml_append(field, aml_named_field("COMA", 3));
-aml_append(field, aml_reserved_field(1));
-aml_append(field, aml_named_field("COMB", 3));
-aml_append(field, aml_reserved_field(1));
-aml_append(field, aml_named_field("LPTD", 2));
-aml_append(dev, field);
-
 aml_append(scope, dev);
 aml_append(table, scope);
 }
-- 
2.18.4




[PATCH v8 02/10] acpi: move aml builder code for floppy device

2020-06-11 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
Reviewed-by: Igor Mammedov 
---
 hw/block/fdc.c   | 83 
 hw/i386/acpi-build.c | 83 
 stubs/cmos.c |  7 
 stubs/Makefile.objs  |  1 +
 4 files changed, 91 insertions(+), 83 deletions(-)
 create mode 100644 stubs/cmos.c

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index c5fb9d6ece77..b4d2eaf66dcd 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -32,6 +32,8 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
+#include "hw/i386/pc.h"
+#include "hw/acpi/aml-build.h"
 #include "hw/irq.h"
 #include "hw/isa/isa.h"
 #include "hw/qdev-properties.h"
@@ -2765,6 +2767,85 @@ void isa_fdc_get_drive_max_chs(FloppyDriveType type,
 (*maxc)--;
 }
 
+static Aml *build_fdinfo_aml(int idx, FloppyDriveType type)
+{
+Aml *dev, *fdi;
+uint8_t maxc, maxh, maxs;
+
+isa_fdc_get_drive_max_chs(type, , , );
+
+dev = aml_device("FLP%c", 'A' + idx);
+
+aml_append(dev, aml_name_decl("_ADR", aml_int(idx)));
+
+fdi = aml_package(16);
+aml_append(fdi, aml_int(idx));  /* Drive Number */
+aml_append(fdi,
+aml_int(cmos_get_fd_drive_type(type)));  /* Device Type */
+/*
+ * the values below are the limits of the drive, and are thus independent
+ * of the inserted media
+ */
+aml_append(fdi, aml_int(maxc));  /* Maximum Cylinder Number */
+aml_append(fdi, aml_int(maxs));  /* Maximum Sector Number */
+aml_append(fdi, aml_int(maxh));  /* Maximum Head Number */
+/*
+ * SeaBIOS returns the below values for int 0x13 func 0x08 regardless of
+ * the drive type, so shall we
+ */
+aml_append(fdi, aml_int(0xAF));  /* disk_specify_1 */
+aml_append(fdi, aml_int(0x02));  /* disk_specify_2 */
+aml_append(fdi, aml_int(0x25));  /* disk_motor_wait */
+aml_append(fdi, aml_int(0x02));  /* disk_sector_siz */
+aml_append(fdi, aml_int(0x12));  /* disk_eot */
+aml_append(fdi, aml_int(0x1B));  /* disk_rw_gap */
+aml_append(fdi, aml_int(0xFF));  /* disk_dtl */
+aml_append(fdi, aml_int(0x6C));  /* disk_formt_gap */
+aml_append(fdi, aml_int(0xF6));  /* disk_fill */
+aml_append(fdi, aml_int(0x0F));  /* disk_head_sttl */
+aml_append(fdi, aml_int(0x08));  /* disk_motor_strt */
+
+aml_append(dev, aml_name_decl("_FDI", fdi));
+return dev;
+}
+
+static void fdc_isa_build_aml(ISADevice *isadev, Aml *scope)
+{
+Aml *dev;
+Aml *crs;
+int i;
+
+#define ACPI_FDE_MAX_FD 4
+uint32_t fde_buf[5] = {
+0, 0, 0, 0, /* presence of floppy drives #0 - #3 */
+cpu_to_le32(2)  /* tape presence (2 == never present) */
+};
+
+crs = aml_resource_template();
+aml_append(crs, aml_io(AML_DECODE16, 0x03F2, 0x03F2, 0x00, 0x04));
+aml_append(crs, aml_io(AML_DECODE16, 0x03F7, 0x03F7, 0x00, 0x01));
+aml_append(crs, aml_irq_no_flags(6));
+aml_append(crs,
+aml_dma(AML_COMPATIBILITY, AML_NOTBUSMASTER, AML_TRANSFER8, 2));
+
+dev = aml_device("FDC0");
+aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0700")));
+aml_append(dev, aml_name_decl("_CRS", crs));
+
+for (i = 0; i < MIN(MAX_FD, ACPI_FDE_MAX_FD); i++) {
+FloppyDriveType type = isa_fdc_get_drive_type(isadev, i);
+
+if (type < FLOPPY_DRIVE_TYPE_NONE) {
+fde_buf[i] = cpu_to_le32(1);  /* drive present */
+aml_append(dev, build_fdinfo_aml(i, type));
+}
+}
+aml_append(dev, aml_name_decl("_FDE",
+   aml_buffer(sizeof(fde_buf), (uint8_t *)fde_buf)));
+
+aml_append(scope, dev);
+}
+
 static const VMStateDescription vmstate_isa_fdc ={
 .name = "fdc",
 .version_id = 2,
@@ -2798,11 +2879,13 @@ static Property isa_fdc_properties[] = {
 static void isabus_fdc_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
+ISADeviceClass *isa = ISA_DEVICE_CLASS(klass);
 
 dc->realize = isabus_fdc_realize;
 dc->fw_name = "fdc";
 dc->reset = fdctrl_external_reset_isa;
 dc->vmsd = _isa_fdc;
+isa->build_aml = fdc_isa_build_aml;
 device_class_set_props(dc, isa_fdc_properties);
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 }
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 473cbdfffd05..7726d5c0f7cb 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -937,85 +937,6 @@ static void build_hpet_aml(Aml *table)
 aml_append(table, scope);
 }
 
-static Aml *build_fdinfo_aml(int idx, FloppyDriveType type)
-{
-Aml *dev, *fdi;
-uint8_t maxc, maxh, maxs;
-
-isa_fdc_get_drive_max_chs(type, , , );
-
-dev = aml_device("FLP%c", 'A' + idx);
-
-aml_append(dev, aml_name_decl("_ADR", aml_int(idx)));
-
-fdi = aml_package(16);
-aml_append(fdi, aml_int(idx));  /* Drive Number */
-aml_append(fdi,
-aml_int(cmos_get_fd_drive_type(type)));  /* Device Type */
-/*
- * the values below are the 

[PATCH v8 09/10] acpi: drop build_piix4_pm()

2020-06-11 Thread Gerd Hoffmann
The _SB.PCI0.PX13.P13C opregion (holds isa device enable bits)
is not used any more, remove it from DSDT.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Igor Mammedow 
---
 hw/i386/acpi-build.c | 16 
 1 file changed, 16 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 750fcf9baa37..02cf4199c2e9 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1320,21 +1320,6 @@ static void build_q35_isa_bridge(Aml *table)
 aml_append(table, scope);
 }
 
-static void build_piix4_pm(Aml *table)
-{
-Aml *dev;
-Aml *scope;
-
-scope =  aml_scope("_SB.PCI0");
-dev = aml_device("PX13");
-aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010003)));
-
-aml_append(dev, aml_operation_region("P13C", AML_PCI_CONFIG,
- aml_int(0x00), 0xff));
-aml_append(scope, dev);
-aml_append(table, scope);
-}
-
 static void build_piix4_isa_bridge(Aml *table)
 {
 Aml *dev;
@@ -1486,7 +1471,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 aml_append(dsdt, sb_scope);
 
 build_hpet_aml(dsdt);
-build_piix4_pm(dsdt);
 build_piix4_isa_bridge(dsdt);
 build_isa_devices_aml(dsdt);
 build_piix4_pci_hotplug(dsdt);
-- 
2.18.4




[PATCH v8 06/10] acpi: factor out fw_cfg_add_acpi_dsdt()

2020-06-11 Thread Gerd Hoffmann
Add helper function to add fw_cfg device,
also move code to hw/i386/fw_cfg.c.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Igor Mammedov 
---
 hw/i386/fw_cfg.h |  1 +
 hw/i386/acpi-build.c | 24 +---
 hw/i386/fw_cfg.c | 28 
 3 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h
index 9e742787792b..275f15c1c5e8 100644
--- a/hw/i386/fw_cfg.h
+++ b/hw/i386/fw_cfg.h
@@ -25,5 +25,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms,
uint16_t apic_id_limit);
 void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg);
 void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg);
+void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg);
 
 #endif
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 9fed13a27333..86be45eea17c 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1759,30 +1759,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 
 /* create fw_cfg node, unconditionally */
 {
-/* when using port i/o, the 8-bit data register *always* overlaps
- * with half of the 16-bit control register. Hence, the total size
- * of the i/o region used is FW_CFG_CTL_SIZE; when using DMA, the
- * DMA control register is located at FW_CFG_DMA_IO_BASE + 4 */
-uint8_t io_size = object_property_get_bool(OBJECT(x86ms->fw_cfg),
-   "dma_enabled", NULL) ?
-  ROUND_UP(FW_CFG_CTL_SIZE, 4) + sizeof(dma_addr_t) :
-  FW_CFG_CTL_SIZE;
-
 scope = aml_scope("\\_SB.PCI0");
-dev = aml_device("FWCF");
-
-aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002")));
-
-/* device present, functioning, decoding, not shown in UI */
-aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
-
-crs = aml_resource_template();
-aml_append(crs,
-aml_io(AML_DECODE16, FW_CFG_IO_BASE, FW_CFG_IO_BASE, 0x01, io_size)
-);
-aml_append(dev, aml_name_decl("_CRS", crs));
-
-aml_append(scope, dev);
+fw_cfg_add_acpi_dsdt(scope, x86ms->fw_cfg);
 aml_append(dsdt, scope);
 }
 
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
index da60ada59462..c55abfb01abb 100644
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -15,6 +15,7 @@
 #include "qemu/osdep.h"
 #include "sysemu/numa.h"
 #include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
 #include "hw/firmware/smbios.h"
 #include "hw/i386/fw_cfg.h"
 #include "hw/timer/hpet.h"
@@ -179,3 +180,30 @@ void fw_cfg_build_feature_control(MachineState *ms, 
FWCfgState *fw_cfg)
 *val = cpu_to_le64(feature_control_bits | FEATURE_CONTROL_LOCKED);
 fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val));
 }
+
+void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
+{
+/*
+ * when using port i/o, the 8-bit data register *always* overlaps
+ * with half of the 16-bit control register. Hence, the total size
+ * of the i/o region used is FW_CFG_CTL_SIZE; when using DMA, the
+ * DMA control register is located at FW_CFG_DMA_IO_BASE + 4
+ */
+Object *obj = OBJECT(fw_cfg);
+uint8_t io_size = object_property_get_bool(obj, "dma_enabled", NULL) ?
+ROUND_UP(FW_CFG_CTL_SIZE, 4) + sizeof(dma_addr_t) :
+FW_CFG_CTL_SIZE;
+Aml *dev = aml_device("FWCF");
+Aml *crs = aml_resource_template();
+
+aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002")));
+
+/* device present, functioning, decoding, not shown in UI */
+aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
+
+aml_append(crs,
+aml_io(AML_DECODE16, FW_CFG_IO_BASE, FW_CFG_IO_BASE, 0x01, io_size));
+
+aml_append(dev, aml_name_decl("_CRS", crs));
+aml_append(scope, dev);
+}
-- 
2.18.4




[PATCH v8 00/10] acpi: i386 tweaks

2020-06-11 Thread Gerd Hoffmann
First batch of microvm patches, some generic acpi stuff.
Split the acpi-build.c monster, specifically split the
pc and q35 and pci bits into a separate file which we
can skip building at some point in the future.

v2 changes: leave acpi-build.c largely as-is, move useful
bits to other places to allow them being reused, specifically:

 * move isa device generator functions to individual isa devices.
 * move fw_cfg generator function to fw_cfg.c

v3 changes: fix rtc, support multiple lpt devices.

v4 changes:
 * drop merged patches.
 * split rtc crs change to separata patch.
 * added two cleanup patches.
 * picked up ack & review tags.

v5 changes:
 * add comment for rtc crs update.
 * add even more cleanup patches.
 * picked up ack & review tags.

v6 changes:
 * floppy: move cmos_get_fd_drive_type.
 * picked up ack & review tags.

v7 changes:
 * rebased to mst/pci branch, resolved stubs conflict.
 * dropped patches already queued up in mst/pci.
 * added missing sign-off.
 * picked up ack & review tags.

v8 changes:
 * (re-)add patch to allow acpi table changes

take care,
  Gerd

Gerd Hoffmann (10):
  qtest: allow DSDT acpi table changes
  acpi: move aml builder code for floppy device
  floppy: make isa_fdc_get_drive_max_chs static
  floppy: move cmos_get_fd_drive_type() from pc
  acpi: move aml builder code for i8042 (kbd+mouse) device
  acpi: factor out fw_cfg_add_acpi_dsdt()
  acpi: simplify build_isa_devices_aml()
  acpi: drop serial/parallel enable bits from dsdt
  acpi: drop build_piix4_pm()
  acpi: q35: drop _SB.PCI0.ISA.LPCD opregion.

 hw/i386/fw_cfg.h|   1 +
 include/hw/block/fdc.h  |   3 +-
 include/hw/i386/pc.h|   1 -
 tests/qtest/bios-tables-test-allowed-diff.h |  17 ++
 hw/block/fdc.c  | 111 +-
 hw/i386/acpi-build.c| 211 +---
 hw/i386/fw_cfg.c|  28 +++
 hw/i386/pc.c|  25 ---
 hw/input/pckbd.c|  31 +++
 stubs/cmos.c|   7 +
 stubs/Makefile.objs |   1 +
 11 files changed, 201 insertions(+), 235 deletions(-)
 create mode 100644 stubs/cmos.c

-- 
2.18.4




[PATCH v8 07/10] acpi: simplify build_isa_devices_aml()

2020-06-11 Thread Gerd Hoffmann
x86 machines can have a single ISA bus only.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Igor Mammedov 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/i386/acpi-build.c | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 86be45eea17c..c8e47700fc53 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -940,19 +940,14 @@ static void build_hpet_aml(Aml *table)
 static void build_isa_devices_aml(Aml *table)
 {
 bool ambiguous;
-
-Aml *scope = aml_scope("_SB.PCI0.ISA");
 Object *obj = object_resolve_path_type("", TYPE_ISA_BUS, );
+Aml *scope;
 
-if (ambiguous) {
-error_report("Multiple ISA busses, unable to define IPMI ACPI data");
-} else if (!obj) {
-error_report("No ISA bus, unable to define IPMI ACPI data");
-} else {
-build_acpi_ipmi_devices(scope, BUS(obj), "\\_SB.PCI0.ISA");
-isa_build_aml(ISA_BUS(obj), scope);
-}
+assert(obj && !ambiguous);
 
+scope = aml_scope("_SB.PCI0.ISA");
+build_acpi_ipmi_devices(scope, BUS(obj), "\\_SB.PCI0.ISA");
+isa_build_aml(ISA_BUS(obj), scope);
 aml_append(table, scope);
 }
 
-- 
2.18.4




[PATCH v8 04/10] floppy: move cmos_get_fd_drive_type() from pc

2020-06-11 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: John Snow 
---
 include/hw/block/fdc.h |  1 +
 include/hw/i386/pc.h   |  1 -
 hw/block/fdc.c | 26 +-
 hw/i386/pc.c   | 25 -
 4 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/include/hw/block/fdc.h b/include/hw/block/fdc.h
index 5d71cf972268..479cebc0a330 100644
--- a/include/hw/block/fdc.h
+++ b/include/hw/block/fdc.h
@@ -16,5 +16,6 @@ void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base,
DriveInfo **fds, qemu_irq *fdc_tc);
 
 FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i);
+int cmos_get_fd_drive_type(FloppyDriveType fd0);
 
 #endif
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 8d764f965cd3..5e3b19ab78fc 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -176,7 +176,6 @@ typedef void (*cpu_set_smm_t)(int smm, void *arg);
 void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs);
 
 ISADevice *pc_find_fdc0(void);
-int cmos_get_fd_drive_type(FloppyDriveType fd0);
 
 /* port92.c */
 #define PORT92_A20_LINE "a20"
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 8024c822cea3..ea0fb8ee15b9 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -32,7 +32,6 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
-#include "hw/i386/pc.h"
 #include "hw/acpi/aml-build.h"
 #include "hw/irq.h"
 #include "hw/isa/isa.h"
@@ -2809,6 +2808,31 @@ static Aml *build_fdinfo_aml(int idx, FloppyDriveType 
type)
 return dev;
 }
 
+int cmos_get_fd_drive_type(FloppyDriveType fd0)
+{
+int val;
+
+switch (fd0) {
+case FLOPPY_DRIVE_TYPE_144:
+/* 1.44 Mb 3"5 drive */
+val = 4;
+break;
+case FLOPPY_DRIVE_TYPE_288:
+/* 2.88 Mb 3"5 drive */
+val = 5;
+break;
+case FLOPPY_DRIVE_TYPE_120:
+/* 1.2 Mb 5"5 drive */
+val = 2;
+break;
+case FLOPPY_DRIVE_TYPE_NONE:
+default:
+val = 0;
+break;
+}
+return val;
+}
+
 static void fdc_isa_build_aml(ISADevice *isadev, Aml *scope)
 {
 Aml *dev;
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 2128f3d6fe8b..c5db7be6d8b1 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -385,31 +385,6 @@ static uint64_t ioportF0_read(void *opaque, hwaddr addr, 
unsigned size)
 
 #define REG_EQUIPMENT_BYTE  0x14
 
-int cmos_get_fd_drive_type(FloppyDriveType fd0)
-{
-int val;
-
-switch (fd0) {
-case FLOPPY_DRIVE_TYPE_144:
-/* 1.44 Mb 3"5 drive */
-val = 4;
-break;
-case FLOPPY_DRIVE_TYPE_288:
-/* 2.88 Mb 3"5 drive */
-val = 5;
-break;
-case FLOPPY_DRIVE_TYPE_120:
-/* 1.2 Mb 5"5 drive */
-val = 2;
-break;
-case FLOPPY_DRIVE_TYPE_NONE:
-default:
-val = 0;
-break;
-}
-return val;
-}
-
 static void cmos_init_hd(ISADevice *s, int type_ofs, int info_ofs,
  int16_t cylinders, int8_t heads, int8_t sectors)
 {
-- 
2.18.4




[PATCH v8 08/10] acpi: drop serial/parallel enable bits from dsdt

2020-06-11 Thread Gerd Hoffmann
The _STA methods for COM+LPT used to reference them,
but that isn't the case any more.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Igor Mammedov 
---
 hw/i386/acpi-build.c | 23 ---
 1 file changed, 23 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index c8e47700fc53..750fcf9baa37 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1316,15 +1316,6 @@ static void build_q35_isa_bridge(Aml *table)
 aml_append(field, aml_named_field("LPTD", 2));
 aml_append(dev, field);
 
-aml_append(dev, aml_operation_region("LPCE", AML_PCI_CONFIG,
- aml_int(0x82), 0x02));
-/* enable bits */
-field = aml_field("LPCE", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
-aml_append(field, aml_named_field("CAEN", 1));
-aml_append(field, aml_named_field("CBEN", 1));
-aml_append(field, aml_named_field("LPEN", 1));
-aml_append(dev, field);
-
 aml_append(scope, dev);
 aml_append(table, scope);
 }
@@ -1348,7 +1339,6 @@ static void build_piix4_isa_bridge(Aml *table)
 {
 Aml *dev;
 Aml *scope;
-Aml *field;
 
 scope =  aml_scope("_SB.PCI0");
 dev = aml_device("ISA");
@@ -1357,19 +1347,6 @@ static void build_piix4_isa_bridge(Aml *table)
 /* PIIX PCI to ISA irq remapping */
 aml_append(dev, aml_operation_region("P40C", AML_PCI_CONFIG,
  aml_int(0x60), 0x04));
-/* enable bits */
-field = aml_field("^PX13.P13C", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
-/* Offset(0x5f),, 7, */
-aml_append(field, aml_reserved_field(0x2f8));
-aml_append(field, aml_reserved_field(7));
-aml_append(field, aml_named_field("LPEN", 1));
-/* Offset(0x67),, 3, */
-aml_append(field, aml_reserved_field(0x38));
-aml_append(field, aml_reserved_field(3));
-aml_append(field, aml_named_field("CAEN", 1));
-aml_append(field, aml_reserved_field(3));
-aml_append(field, aml_named_field("CBEN", 1));
-aml_append(dev, field);
 
 aml_append(scope, dev);
 aml_append(table, scope);
-- 
2.18.4




[PATCH v8 05/10] acpi: move aml builder code for i8042 (kbd+mouse) device

2020-06-11 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Igor Mammedov 
---
 hw/i386/acpi-build.c | 39 ---
 hw/input/pckbd.c | 31 +++
 2 files changed, 31 insertions(+), 39 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 7726d5c0f7cb..9fed13a27333 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -937,42 +937,6 @@ static void build_hpet_aml(Aml *table)
 aml_append(table, scope);
 }
 
-static Aml *build_kbd_device_aml(void)
-{
-Aml *dev;
-Aml *crs;
-
-dev = aml_device("KBD");
-aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0303")));
-
-aml_append(dev, aml_name_decl("_STA", aml_int(0xf)));
-
-crs = aml_resource_template();
-aml_append(crs, aml_io(AML_DECODE16, 0x0060, 0x0060, 0x01, 0x01));
-aml_append(crs, aml_io(AML_DECODE16, 0x0064, 0x0064, 0x01, 0x01));
-aml_append(crs, aml_irq_no_flags(1));
-aml_append(dev, aml_name_decl("_CRS", crs));
-
-return dev;
-}
-
-static Aml *build_mouse_device_aml(void)
-{
-Aml *dev;
-Aml *crs;
-
-dev = aml_device("MOU");
-aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0F13")));
-
-aml_append(dev, aml_name_decl("_STA", aml_int(0xf)));
-
-crs = aml_resource_template();
-aml_append(crs, aml_irq_no_flags(12));
-aml_append(dev, aml_name_decl("_CRS", crs));
-
-return dev;
-}
-
 static void build_isa_devices_aml(Aml *table)
 {
 bool ambiguous;
@@ -980,9 +944,6 @@ static void build_isa_devices_aml(Aml *table)
 Aml *scope = aml_scope("_SB.PCI0.ISA");
 Object *obj = object_resolve_path_type("", TYPE_ISA_BUS, );
 
-aml_append(scope, build_kbd_device_aml());
-aml_append(scope, build_mouse_device_aml());
-
 if (ambiguous) {
 error_report("Multiple ISA busses, unable to define IPMI ACPI data");
 } else if (!obj) {
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index 60a41303203a..29d633ca9478 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -26,6 +26,7 @@
 #include "qemu/log.h"
 #include "hw/isa/isa.h"
 #include "migration/vmstate.h"
+#include "hw/acpi/aml-build.h"
 #include "hw/input/ps2.h"
 #include "hw/irq.h"
 #include "hw/input/i8042.h"
@@ -561,12 +562,42 @@ static void i8042_realizefn(DeviceState *dev, Error 
**errp)
 qemu_register_reset(kbd_reset, s);
 }
 
+static void i8042_build_aml(ISADevice *isadev, Aml *scope)
+{
+Aml *kbd;
+Aml *mou;
+Aml *crs;
+
+crs = aml_resource_template();
+aml_append(crs, aml_io(AML_DECODE16, 0x0060, 0x0060, 0x01, 0x01));
+aml_append(crs, aml_io(AML_DECODE16, 0x0064, 0x0064, 0x01, 0x01));
+aml_append(crs, aml_irq_no_flags(1));
+
+kbd = aml_device("KBD");
+aml_append(kbd, aml_name_decl("_HID", aml_eisaid("PNP0303")));
+aml_append(kbd, aml_name_decl("_STA", aml_int(0xf)));
+aml_append(kbd, aml_name_decl("_CRS", crs));
+
+crs = aml_resource_template();
+aml_append(crs, aml_irq_no_flags(12));
+
+mou = aml_device("MOU");
+aml_append(mou, aml_name_decl("_HID", aml_eisaid("PNP0F13")));
+aml_append(mou, aml_name_decl("_STA", aml_int(0xf)));
+aml_append(mou, aml_name_decl("_CRS", crs));
+
+aml_append(scope, kbd);
+aml_append(scope, mou);
+}
+
 static void i8042_class_initfn(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
+ISADeviceClass *isa = ISA_DEVICE_CLASS(klass);
 
 dc->realize = i8042_realizefn;
 dc->vmsd = _kbd_isa;
+isa->build_aml = i8042_build_aml;
 set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
 }
 
-- 
2.18.4




[PATCH v8 03/10] floppy: make isa_fdc_get_drive_max_chs static

2020-06-11 Thread Gerd Hoffmann
acpi aml generator needs this, but it is in floppy code now
so we can make the function static.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Igor Mammedov 
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: John Snow 
---
 include/hw/block/fdc.h | 2 --
 hw/block/fdc.c | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/hw/block/fdc.h b/include/hw/block/fdc.h
index c15ff4c62315..5d71cf972268 100644
--- a/include/hw/block/fdc.h
+++ b/include/hw/block/fdc.h
@@ -16,7 +16,5 @@ void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base,
DriveInfo **fds, qemu_irq *fdc_tc);
 
 FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i);
-void isa_fdc_get_drive_max_chs(FloppyDriveType type,
-   uint8_t *maxc, uint8_t *maxh, uint8_t *maxs);
 
 #endif
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index b4d2eaf66dcd..8024c822cea3 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -2744,8 +2744,8 @@ FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, 
int i)
 return isa->state.drives[i].drive;
 }
 
-void isa_fdc_get_drive_max_chs(FloppyDriveType type,
-   uint8_t *maxc, uint8_t *maxh, uint8_t *maxs)
+static void isa_fdc_get_drive_max_chs(FloppyDriveType type, uint8_t *maxc,
+  uint8_t *maxh, uint8_t *maxs)
 {
 const FDFormat *fdf;
 
-- 
2.18.4




[PATCH v8 01/10] qtest: allow DSDT acpi table changes

2020-06-11 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8bf4..6a052c50447a 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,18 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/pc/DSDT",
+"tests/data/acpi/pc/DSDT.acpihmat",
+"tests/data/acpi/pc/DSDT.bridge",
+"tests/data/acpi/pc/DSDT.cphp",
+"tests/data/acpi/pc/DSDT.dimmpxm",
+"tests/data/acpi/pc/DSDT.ipmikcs",
+"tests/data/acpi/pc/DSDT.memhp",
+"tests/data/acpi/pc/DSDT.numamem",
+"tests/data/acpi/q35/DSDT",
+"tests/data/acpi/q35/DSDT.acpihmat",
+"tests/data/acpi/q35/DSDT.bridge",
+"tests/data/acpi/q35/DSDT.cphp",
+"tests/data/acpi/q35/DSDT.dimmpxm",
+"tests/data/acpi/q35/DSDT.ipmibt",
+"tests/data/acpi/q35/DSDT.memhp",
+"tests/data/acpi/q35/DSDT.mmio64",
+"tests/data/acpi/q35/DSDT.numamem",
-- 
2.18.4