date:20160701

[Qemu-devel] [RFC PATCH V2 3/3] filter-rewriter: rewrite tcp packet to keep secondary connection

2016-07-01 Thread Zhang Chen

We will rewrite tcp packet secondary received and sent.
When colo guest is a tcp server.

Firstly, client start a tcp handshake. the packet's seq=client_seq,
ack=0,flag=SYN. COLO primary guest get this pkt and mirror(filter-mirror)
to secondary guest, secondary get it use filter-redirector.
Then,primary guest response pkt
(seq=primary_seq,ack=client_seq+1,flag=ACK|SYN).
secondary guest response pkt
(seq=secondary_seq,ack=client_seq+1,flag=ACK|SYN).
In here,we use filter-rewriter save the secondary_seq to it's tcp connection.
Finally handshake,client send pkt
(seq=client_seq+1,ack=primary_seq+1,flag=ACK).
Here,filter-rewriter can get primary_seq, and rewrite ack from primary_seq+1
to secondary_seq+1, recalculate checksum. So the secondary tcp connection
kept good.

When we send/recv packet.
client send pkt(seq=client_seq+1+data_len,ack=primary_seq+1,flag=ACK|PSH).
filter-rewriter rewrite ack and send to secondary guest.

primary guest response pkt
(seq=primary_seq+1,ack=client_seq+1+data_len,flag=ACK)
secondary guest response pkt
(seq=secondary_seq+1,ack=client_seq+1+data_len,flag=ACK)
we rewrite secondary guest seq from secondary_seq+1 to primary_seq+1.
So tcp connection kept good.

In code We use offset( = secondary_seq - primary_seq )
to rewrite seq or ack.
handle_primary_tcp_pkt: tcp_pkt->th_ack += offset;
handle_secondary_tcp_pkt: tcp_pkt->th_seq -= offset;

Signed-off-by: Zhang Chen 
Signed-off-by: Li Zhijian 
Signed-off-by: Wen Congyang 
---
 net/colo-base.h   |   2 +
 net/filter-rewriter.c | 110 +-
 trace-events  |   5 +++
 3 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/net/colo-base.h b/net/colo-base.h
index 62460c5..7b32648 100644
--- a/net/colo-base.h
+++ b/net/colo-base.h
@@ -71,6 +71,8 @@ typedef struct Connection {
 uint8_t ip_proto;
 /* be used by filter-rewriter */
 colo_conn_state state;
+/* offset = secondary_seq - primary_seq */
+tcp_seq  offset;
 } Connection;
 
 uint32_t connection_key_hash(const void *opaque);
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
index c38ab24..9f63c75 100644
--- a/net/filter-rewriter.c
+++ b/net/filter-rewriter.c
@@ -21,6 +21,7 @@
 #include "qemu/main-loop.h"
 #include "qemu/iov.h"
 #include "net/checksum.h"
+#include "trace.h"
 
 #define FILTER_COLO_REWRITER(obj) \
 OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
@@ -64,6 +65,91 @@ static int is_tcp_packet(Packet *pkt)
 }
 }
 
+/* handle tcp packet from primary guest */
+static int handle_primary_tcp_pkt(NetFilterState *nf,
+  Connection *conn,
+  Packet *pkt)
+{
+struct tcphdr *tcp_pkt;
+static int syn_flag;
+
+tcp_pkt = (struct tcphdr *)pkt->transport_layer;
+if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
+char *sdebug, *ddebug;
+sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
+ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
+trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
+ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
+tcp_pkt->th_flags);
+trace_colo_filter_rewriter_conn_offset(conn->offset);
+g_free(sdebug);
+g_free(ddebug);
+}
+
+if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
+/*
+ * this flag update offset func run oncs
+ * in independent tcp connection
+ */
+syn_flag = 1;
+}
+
+if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
+if (syn_flag) {
+/* offset = secondary_seq - primary seq */
+conn->offset -= (ntohl(tcp_pkt->th_ack));
+syn_flag = 0;
+
+}
+/* handle packets to the secondary from the primary */
+tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset + 1);
+
+net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
+}
+
+return 0;
+}
+
+/* handle tcp packet from secondary guest */
+static int handle_secondary_tcp_pkt(NetFilterState *nf,
+Connection *conn,
+Packet *pkt)
+{
+struct tcphdr *tcp_pkt;
+
+tcp_pkt = (struct tcphdr *)pkt->transport_layer;
+
+if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
+char *sdebug, *ddebug;
+sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
+ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
+trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
+ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
+tcp_pkt->th_flags);
+trace_colo_filter_rewriter_conn_offset(conn->offset);
+g_free(sdebug);
+g_free(ddebug);
+}
+
+if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
+/*
+ * save offset = secondary_seq and then
+ * in handle_primary_tcp_pkt make

[Qemu-devel] [RFC PATCH V2 2/3] filter-rewriter: track connection and parse packet

2016-07-01 Thread Zhang Chen

We use colo-base.h to track connection and parse packet

Signed-off-by: Zhang Chen 
Signed-off-by: Li Zhijian 
Signed-off-by: Wen Congyang 
---
 net/filter-rewriter.c | 52 +++
 1 file changed, 52 insertions(+)

diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
index 08b015d..c38ab24 100644
--- a/net/filter-rewriter.c
+++ b/net/filter-rewriter.c
@@ -50,6 +50,20 @@ typedef struct RewriterState {
 uint32_t hashtable_size;
 } RewriterState;
 
+/*
+ * Return 1 on success, if return 0 means the pkt
+ * is not TCP packet
+ */
+static int is_tcp_packet(Packet *pkt)
+{
+if (!parse_packet_early(pkt) &&
+pkt->ip->ip_p == IPPROTO_TCP) {
+return 1;
+} else {
+return 0;
+}
+}
+
 static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
  NetClientState *sender,
  unsigned flags,
@@ -57,11 +71,49 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
  int iovcnt,
  NetPacketSent *sent_cb)
 {
+RewriterState *s = FILTER_COLO_REWRITER(nf);
+Connection *conn;
+ConnectionKey key = {{ 0 } };
+Packet *pkt;
+ssize_t size = iov_size(iov, iovcnt);
+char *buf = g_malloc0(size);
+
+iov_to_buf(iov, iovcnt, 0, buf, size);
+pkt = packet_new(buf, size);
+
 /*
  * if we get tcp packet
  * we will rewrite it to make secondary guest's
  * connection established successfully
  */
+if (is_tcp_packet(pkt)) {
+if (sender == nf->netdev) {
+fill_connection_key(pkt, &key, SECONDARY);
+} else {
+fill_connection_key(pkt, &key, PRIMARY);
+}
+
+conn = connection_get(s->connection_track_table,
+  &key,
+  &s->hashtable_size);
+if (!conn->processing) {
+qemu_mutex_lock(&s->conn_list_lock);
+g_queue_push_tail(&s->conn_list, conn);
+qemu_mutex_unlock(&s->conn_list_lock);
+conn->processing = true;
+}
+
+if (sender == nf->netdev) {
+/* NET_FILTER_DIRECTION_TX */
+/* handle_primary_tcp_pkt */
+} else {
+/* NET_FILTER_DIRECTION_RX */
+/* handle_secondary_tcp_pkt */
+}
+}
+
+packet_destroy(pkt, NULL);
+pkt = NULL;
 return 0;
 }
 
-- 
2.7.4

[Qemu-devel] [RFC PATCH V2 1/3] filter-rewriter: introduce filter-rewriter initialization

2016-07-01 Thread Zhang Chen

Filter-rewriter is a part of COLO project.
It will rewrite some of secondary packet to make
secondary guest's connection established successfully.

usage:

colo secondary:
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
-object filter-rewriter,id=rew0,netdev=hn0,queue=all

Signed-off-by: Zhang Chen 
Signed-off-by: Li Zhijian 
Signed-off-by: Wen Congyang 
---
 net/Makefile.objs |   1 +
 net/filter-rewriter.c | 112 ++
 qemu-options.hx   |  10 +
 vl.c  |   3 +-
 4 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 net/filter-rewriter.c

diff --git a/net/Makefile.objs b/net/Makefile.objs
index 119589f..645bd10 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -18,3 +18,4 @@ common-obj-y += filter-buffer.o
 common-obj-y += filter-mirror.o
 common-obj-y += colo-compare.o
 common-obj-y += colo-base.o
+common-obj-y += filter-rewriter.o
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
new file mode 100644
index 000..08b015d
--- /dev/null
+++ b/net/filter-rewriter.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "net/colo-base.h"
+#include "net/filter.h"
+#include "net/net.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi-visit.h"
+#include "qom/object.h"
+#include "qemu/main-loop.h"
+#include "qemu/iov.h"
+#include "net/checksum.h"
+
+#define FILTER_COLO_REWRITER(obj) \
+OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
+
+#define TYPE_FILTER_REWRITER "filter-rewriter"
+
+enum {
+PRIMARY = 0,
+SECONDARY,
+};
+
+typedef struct RewriterState {
+NetFilterState parent_obj;
+/* connection list: the connections belonged to this NIC could be found
+ * in this list.
+ * element type: Connection
+ */
+GQueue conn_list;
+NetQueue *incoming_queue;
+/* to protect conn_list */
+QemuMutex conn_list_lock;
+/* hashtable to save connection */
+GHashTable *connection_track_table;
+/* to save unprocessed_connections */
+GQueue unprocessed_connections;
+/* current hash size */
+uint32_t hashtable_size;
+} RewriterState;
+
+static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb)
+{
+/*
+ * if we get tcp packet
+ * we will rewrite it to make secondary guest's
+ * connection established successfully
+ */
+return 0;
+}
+
+static void colo_rewriter_cleanup(NetFilterState *nf)
+{
+RewriterState *s = FILTER_COLO_REWRITER(nf);
+
+qemu_mutex_destroy(&s->conn_list_lock);
+g_queue_free(&s->conn_list);
+}
+
+static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
+{
+RewriterState *s = FILTER_COLO_REWRITER(nf);
+
+g_queue_init(&s->conn_list);
+qemu_mutex_init(&s->conn_list_lock);
+s->hashtable_size = 0;
+
+s->connection_track_table = g_hash_table_new_full(connection_key_hash,
+  connection_key_equal,
+  g_free,
+  connection_destroy);
+s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
+}
+
+static void colo_rewriter_class_init(ObjectClass *oc, void *data)
+{
+NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+nfc->setup = colo_rewriter_setup;
+nfc->cleanup = colo_rewriter_cleanup;
+nfc->receive_iov = colo_rewriter_receive_iov;
+}
+
+static const TypeInfo colo_rewriter_info = {
+.name = TYPE_FILTER_REWRITER,
+.parent = TYPE_NETFILTER,
+.class_init = colo_rewriter_class_init,
+.instance_size = sizeof(RewriterState),
+};
+
+static void register_types(void)
+{
+type_register_static(&colo_rewriter_info);
+}
+
+type_init(register_types);
diff --git a/qemu-options.hx b/qemu-options.hx
index 14bade5..d7ab165 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3859,6 +3859,16 @@ Create a filter-redirector we need to differ outdev id 
from indev id, id can not
 be the same. we can just use indev or outdev, but at least one of indev or 
outdev
 need to be specified.
 
+@item -object 
filter-rewriter,id=@var{id},netdev=@var{netdevid},rewriter-mode=@var{mode}[,queue=@var{all|rx|tx}]
+
+Filter-rewriter is a part of COLO project.It will rewrite some

[Qemu-devel] [RFC PATCH V2 0/3] filter-rewriter: introduce filter-rewriter

2016-07-01 Thread Zhang Chen

Filter-rewriter is a part of COLO project.
So this patch set depend on colo-compare.
It will rewrite some of secondary packet to make
secondary guest's connection established successfully.


v2:
  - add more comments about packet flows
  - add some trace-event
  - add seq offset ( = secondary_seq - primary_seq)

v1:
  - initial patch


Zhang Chen (3):
  filter-rewriter: introduce filter-rewriter initialization
  filter-rewriter: track connection and parse packet
  filter-rewriter: rewrite tcp packet to keep secondary connection

 net/Makefile.objs |   1 +
 net/colo-base.h   |   2 +
 net/filter-rewriter.c | 270 ++
 qemu-options.hx   |  10 ++
 trace-events  |   5 +
 vl.c  |   3 +-
 6 files changed, 290 insertions(+), 1 deletion(-)
 create mode 100644 net/filter-rewriter.c

-- 
2.7.4

[Qemu-devel] [Bug 1594239] Re: After adding more scsi disks for Aarch64 virtual machine, start the VM and got Qemu Error

2016-07-01 Thread Tom Hanson

This looks like a command line / configuration issue which results in a
name collision as Dave predicted above.

I had to piece this together out of bits of information since documentation is 
a bit sparse but the following works.  Note the explicit ID and LUN values on 
the -device declarations:
sudo qemu-system-aarch64 -enable-kvm -machine virt -cpu host -machine type=virt 
-nographic -smp 1 -m 2048 -kernel aarch64-linux-3.15rc2-buildroot.img  --append 
"console=ttyAMA0" \
  -device virtio-scsi-device,id=scsi0 \
  -device virtio-scsi-device,id=scsi1 \
  -drive file=scsi_1.img,format=raw,if=none,id=d0 \
  -device scsi-hd,bus=scsi0.0,scsi-id=0,lun=0,drive=d0 \
  -drive file=scsi_2.img,format=raw,if=none,id=d1 \
  -device scsi-hd,bus=scsi1.0,scsi-id=0,lun=1,drive=d1

Added debug shows the following (Note the LUN value of 1 for the second drive):
calculate_compat_instance_id: For [scsi-disk], Init instance_id to [0]
calculate_new_instance_id: For [0:0:0/scsi-disk], Init instance_id to [0]
calculate_compat_instance_id: For [scsi-disk], Init instance_id to [0]
calculate_compat_instance_id: Found match for [scsi-disk], incrementing 
instance_id is now [1]
calculate_new_instance_id: For [0:0:1/scsi-disk], Init instance_id to [0]

Note: even though it's on a different bus, specifying the same id & lun
will cause a collision.

If desired, the above can be simplified to use a single bus:
sudo qemu-system-aarch64 -enable-kvm -machine virt -cpu host -machine type=virt 
-nographic -smp 1 -m 2048 -kernel aarch64-linux-3.15rc2-buildroot.img  --append 
"console=ttyAMA0" \
  -device virtio-scsi-device,id=scsi0 \
  -drive file=scsi_1.img,format=raw,if=none,id=d0 \
  -device scsi-hd,bus=scsi0.0,scsi-id=0,lun=0,drive=d0 \
  -drive file=scsi_2.img,format=raw,if=none,id=d1 \
  -device scsi-hd,bus=scsi0.0,scsi-id=0,lun=1,drive=d1

Searching the web, I saw this more commonly done with virtio-scsi-pci instead 
of virtio-scsi-device (but I can't tell you why):
sudo qemu-system-aarch64 -enable-kvm -machine virt -cpu host -machine type=virt 
-nographic -smp 1 -m 2048 -kernel aarch64-linux-3.15rc2-buildroot.img  --append 
"console=ttyAMA0" \
  -device virtio-scsi-pci,id=scsi0 \
  -device virtio-scsi-pci,id=scsi1 \
  -drive file=scsi_1.img,format=raw,if=none,id=d0 \
  -device scsi-hd,bus=scsi0.0,scsi-id=0,lun=0,drive=d0 \
  -drive file=scsi_2.img,format=raw,if=none,id=d1 \
  -device scsi-hd,bus=scsi1.0,scsi-id=0,lun=1,drive=d1

Note that the name used internally now includes the bus id:
calculate_compat_instance_id: For [scsi-disk], Init instance_id to [0]
calculate_new_instance_id: For [:00:02.0/0:0:0/scsi-disk], Init instance_id 
to [0]
calculate_compat_instance_id: For [scsi-disk], Init instance_id to [0]
calculate_compat_instance_id: Found match for [scsi-disk], incrementing 
instance_id is now [1]
calculate_new_instance_id: For [:00:03.0/0:0:1/scsi-disk], Init instance_id 
to [0]

This means that it is now possible to use the same LUN for the drives on the 2 
different buses:
sudo qemu-system-aarch64 -enable-kvm -machine virt -cpu host -machine type=virt 
-nographic -smp 1 -m 2048 -kernel aarch64-linux-3.15rc2-buildroot.img  --append 
"console=ttyAMA0" \
  -device virtio-scsi-pci,id=scsi0 \
  -device virtio-scsi-pci,id=scsi1 \
  -drive file=scsi_1.img,format=raw,if=none,id=d0 \
  -device scsi-hd,bus=scsi0.0,scsi-id=0,lun=0,drive=d0 \
  -drive file=scsi_2.img,format=raw,if=none,id=d1 \
  -device scsi-hd,bus=scsi1.0,scsi-id=0,lun=0,drive=d1

Internally:
calculate_compat_instance_id: For [scsi-disk], Init instance_id to [0]
calculate_new_instance_id: For [:00:02.0/0:0:0/scsi-disk], Init instance_id 
to [0]
calculate_compat_instance_id: For [scsi-disk], Init instance_id to [0]
calculate_compat_instance_id: Found match for [scsi-disk], incrementing 
instance_id is now [1]
calculate_new_instance_id: For [:00:03.0/0:0:0/scsi-disk], Init instance_id 
to [0]

Here also, a single bus works fine as long as ID + LUN is unique:
sudo qemu-system-aarch64 -enable-kvm -machine virt -cpu host -machine type=virt 
-nographic -smp 1 -m 2048 -kernel aarch64-linux-3.15rc2-buildroot.img  --append 
"console=ttyAMA0" \
  -device virtio-scsi-pci,id=scsi0 \
  -drive file=scsi_1.img,format=raw,if=none,id=d0 \
  -device scsi-hd,bus=scsi0.0,scsi-id=0,lun=1,drive=d0 \
  -drive file=scsi_2.img,format=raw,if=none,id=d1 \
  -device scsi-hd,bus=scsi0.0,scsi-id=0,lun=5,drive=d1

Internally:
calculate_new_instance_id: For [:00:02.0/virtio-scsi], Init instance_id to 
[0]
calculate_compat_instance_id: For [scsi-disk], Init instance_id to [0]
calculate_new_instance_id: For [:00:02.0/0:0:1/scsi-disk], Init instance_id 
to [0]
calculate_compat_instance_id: For [scsi-disk], Init instance_id to [0]
calculate_compat_instance_id: Found match for [scsi-disk], incrementing 
instance_id is now [1]
calculate_new_instance_id: For [:00:02.0/0:0:5/scsi-disk], Init instance_id 
to [0]

-- 
You received this bug notification because you are a member of qemu

[Qemu-devel] [PATCH v8 5/5] docs: Add a generic loader explanation document

2016-07-01 Thread Alistair Francis

Signed-off-by: Alistair Francis 
---
V8:
 - Improve documentation
V6:
 - Fixup documentation
V4:
 - Re-write to be more comprehensive

 docs/generic-loader.txt | 60 +
 1 file changed, 60 insertions(+)
 create mode 100644 docs/generic-loader.txt

diff --git a/docs/generic-loader.txt b/docs/generic-loader.txt
new file mode 100644
index 000..34684fc
--- /dev/null
+++ b/docs/generic-loader.txt
@@ -0,0 +1,60 @@
+Copyright (c) 2016 Xilinx Inc.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.  See
+the COPYING file in the top-level directory.
+
+
+The 'loader' device allows the user to load multiple images or values into
+QEMU at startup.
+
+Loading Memory Values
+-
+The loader device allows memory values to be set from the command line. This
+can be done by following the syntax below:
+
+-device loader,addr=,data=,data-len=
+-device loader,addr=,cpu-num=
+
+  - The address to store the data or the value to use as the
+  CPU's PC.
+  - The value to be written to the address. The maximum size of
+  the data is 8 bytes.
+  - The length of the data in bytes. This argument must be
+  included if the data argument is.
+   - Set to true if the data to be stored on the guest should be
+  written as big endian data. The default is to write little
+  endian data.
+   - This will cause the CPU to be reset and the PC to be set to
+  the value of addr.
+
+For all values both hex and decimal values are allowed. By default the values
+will be parsed as decimal. To use hex values the user should prefix the number
+with a '0x'.
+
+An example of loading value 0x800e to address 0xfd1a0104 is:
+-device loader,addr=0xfd1a0104,data=0x800e,data-len=4
+
+Loading Files
+-
+The loader device also allows files to be loaded into memory. This can be done
+similarly to setting memory values. The syntax is shown below:
+
+-device loader,file=,addr=,cpu-num=,force-raw=
+
+  - A file to be loaded into memory
+  - The addr in memory that the file should be loaded. This is
+  ignored if you are using an ELF (unless force-raw is true).
+  This is required if you aren't loading an ELF.
+   - This specifies the CPU that should be used. This is an
+  optional argument and will cause the CPU's PC to be set to
+  where the image is stored. This option should only be used
+  for the boot image.
+ - Forces the file to be treated as a raw image. This can be
+  used to specify the load address of ELF files.
+
+For all values both hex and decimal values are allowed. By default the values
+will be parsed as decimal. To use hex values the user should prefix the number
+with a '0x'.
+
+An example of loading an ELF file which CPU0 will boot is shown below:
+-device loader,file=./images/boot.elf,cpu-num=0
-- 
2.7.4

[Qemu-devel] [PATCH v8 3/5] loader: Add AddressSpace loading support to ELFs

2016-07-01 Thread Alistair Francis

Add a new function load_elf_as() that allows the caller to specify an
AddressSpace to use when loading the ELF. The original load_elf()
function doesn't have any change in functionality.

Signed-off-by: Alistair Francis 
---
V8:
 - Introduce an RFC version of AddressSpace support

 hw/core/loader.c | 16 ++--
 include/hw/elf_ops.h |  5 +++--
 include/hw/loader.h  | 14 +-
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/hw/core/loader.c b/hw/core/loader.c
index fcbcfbf..9b25dfc 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -417,6 +417,18 @@ int load_elf(const char *filename, uint64_t 
(*translate_fn)(void *, uint64_t),
  uint64_t *highaddr, int big_endian, int elf_machine,
  int clear_lsb, int data_swab)
 {
+return load_elf_as(filename, translate_fn, translate_opaque, pentry,
+   lowaddr, highaddr, big_endian, elf_machine, clear_lsb,
+   data_swab, NULL);
+}
+
+/* return < 0 if error, otherwise the number of bytes loaded in memory */
+int load_elf_as(const char *filename,
+uint64_t (*translate_fn)(void *, uint64_t),
+void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+uint64_t *highaddr, int big_endian, int elf_machine,
+int clear_lsb, int data_swab, AddressSpace *as)
+{
 int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED;
 uint8_t e_ident[EI_NIDENT];
 
@@ -455,11 +467,11 @@ int load_elf(const char *filename, uint64_t 
(*translate_fn)(void *, uint64_t),
 if (e_ident[EI_CLASS] == ELFCLASS64) {
 ret = load_elf64(filename, fd, translate_fn, translate_opaque, 
must_swab,
  pentry, lowaddr, highaddr, elf_machine, clear_lsb,
- data_swab);
+ data_swab, as);
 } else {
 ret = load_elf32(filename, fd, translate_fn, translate_opaque, 
must_swab,
  pentry, lowaddr, highaddr, elf_machine, clear_lsb,
- data_swab);
+ data_swab, as);
 }
 
  fail:
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index 1339677..3b8c9e9 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -263,7 +263,8 @@ static int glue(load_elf, SZ)(const char *name, int fd,
   void *translate_opaque,
   int must_swab, uint64_t *pentry,
   uint64_t *lowaddr, uint64_t *highaddr,
-  int elf_machine, int clear_lsb, int data_swab)
+  int elf_machine, int clear_lsb, int data_swab,
+  AddressSpace *as)
 {
 struct elfhdr ehdr;
 struct elf_phdr *phdr = NULL, *ph;
@@ -405,7 +406,7 @@ static int glue(load_elf, SZ)(const char *name, int fd,
 snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
 
 /* rom_add_elf_program() seize the ownership of 'data' */
-rom_add_elf_program(label, data, file_size, mem_size, addr, NULL);
+rom_add_elf_program(label, data, file_size, mem_size, addr, as);
 
 total_size += mem_size;
 if (addr < low)
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 18eb0f2..d14eab1 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -45,7 +45,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, 
uint64_t max_sz);
 #define ELF_LOAD_WRONG_ENDIAN -4
 const char *load_elf_strerror(int error);
 
-/** load_elf:
+/** load_elf_as:
  * @filename: Path of ELF file
  * @translate_fn: optional function to translate load addresses
  * @translate_opaque: opaque data passed to @translate_fn
@@ -59,6 +59,8 @@ const char *load_elf_strerror(int error);
  * @data_swab: Set to order of byte swapping for data. 0 for no swap, 1
  * for swapping bytes within halfwords, 2 for bytes within
  * words and 3 for within doublewords.
+ * @as: The AddressSpace to load the ELF to. The value of address_space_memory
+ *  is used if nothing is supplied here.
  *
  * Load an ELF file's contents to the emulated system's address space.
  * Clients may optionally specify a callback to perform address
@@ -70,6 +72,16 @@ const char *load_elf_strerror(int error);
  * their particular values for @elf_machine are set.
  */
 
+int load_elf_as(const char *filename,
+uint64_t (*translate_fn)(void *, uint64_t),
+void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+uint64_t *highaddr, int big_endian, int elf_machine,
+int clear_lsb, int data_swab, AddressSpace *as);
+
+/** load_elf:
+ * Same as above, but doesn't allow the caller to specify an AddressSpace
+ */
+
 int load_elf(const char *filename, uint64_t (*translate_fn)(void *, uint64_t),
  void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,

[Qemu-devel] [PATCH v8 4/5] generic-loader: Add a generic loader

2016-07-01 Thread Alistair Francis

Add a generic loader to QEMU which can be used to load images or set
memory values.

Signed-off-by: Alistair Francis 
---
V8:
 - Code corrections
 - Rebase
V7:
 - Rebase
V6:
 - Add error checking
V5:
 - Rebase
V4:
 - Allow the loader to work with every architecture
 - Move the file to hw/core
 - Increase the maximum number of CPUs
 - Make the CPU operations conditional
 - Convert the cpu option to cpu-num
 - Require the user to specify endianess
V3:
 - Pass the ram_size to load_image_targphys()
V2:
 - Add maintainers entry
 - Perform bounds checking
 - Register and unregister the reset in the realise/unrealise
Changes since RFC:
 - Add BE support

 MAINTAINERS  |   6 ++
 hw/core/Makefile.objs|   2 +
 hw/core/generic-loader.c | 177 +++
 include/hw/core/generic-loader.h |  45 ++
 4 files changed, 230 insertions(+)
 create mode 100644 hw/core/generic-loader.c
 create mode 100644 include/hw/core/generic-loader.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 2ab6e3b..0077e22 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -992,6 +992,12 @@ M: Dmitry Fleytman 
 S: Maintained
 F: hw/net/e1000e*
 
+Generic Loader
+M: Alistair Francis 
+S: Maintained
+F: hw/core/generic-loader.c
+F: include/hw/core/generic-loader.h
+
 Subsystems
 --
 Audio
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index 82a9ef8..ab238fa 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -16,3 +16,5 @@ common-obj-$(CONFIG_SOFTMMU) += null-machine.o
 common-obj-$(CONFIG_SOFTMMU) += loader.o
 common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
 common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o
+
+obj-$(CONFIG_SOFTMMU) += generic-loader.o
diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
new file mode 100644
index 000..c9b0572
--- /dev/null
+++ b/hw/core/generic-loader.c
@@ -0,0 +1,177 @@
+/*
+ * Generic Loader
+ *
+ * Copyright (C) 2014 Li Guang
+ * Copyright (C) 2016 Xilinx Inc.
+ * Written by Li Guang 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/cpu.h"
+#include "hw/sysbus.h"
+#include "sysemu/dma.h"
+#include "hw/loader.h"
+#include "qapi/error.h"
+#include "hw/core/generic-loader.h"
+
+#define CPU_NONE 0x
+
+static void generic_loader_reset(void *opaque)
+{
+GenericLoaderState *s = GENERIC_LOADER(opaque);
+
+if (s->cpu) {
+CPUClass *cc = CPU_GET_CLASS(s->cpu);
+cpu_reset(s->cpu);
+if (cc) {
+cc->set_pc(s->cpu, s->addr);
+}
+}
+
+if (s->data_len) {
+assert(s->data_len < sizeof(s->data));
+dma_memory_write((s->cpu ? s->cpu : first_cpu)->as, s->addr, &s->data,
+ s->data_len);
+}
+}
+
+static void generic_loader_realize(DeviceState *dev, Error **errp)
+{
+GenericLoaderState *s = GENERIC_LOADER(dev);
+hwaddr entry;
+int big_endian;
+int size = 0;
+
+/* Perform some error checking on the user's options */
+if (s->data || s->data_len  || s->data_be) {
+/* User is loading memory values */
+if (s->file) {
+error_setg(errp, "Specifying a file is not supported when loading "
+   "memory values");
+return;
+} else if (s->force_raw) {
+error_setg(errp, "Specifying force raw is not supported when "
+   "loading memory values");
+return;
+} else if (!s->data || !s->data_len) {
+error_setg(errp, "Both data and data length must be specified");
+return;
+} else if (s->cpu_num) {
+error_setg(errp, "Setting data and a cpu number is not supported");
+return;
+}
+} else if (s->file || s->force_raw)  {
+/* User is loading an image */
+if (s->data || s->data_len || s->data_be) {
+error_setg(errp, "Data can not be specified when loading an "
+   "image");
+return;
+}
+} else if (s->data_len) {
+if (s->data_len > 8) {
+error_setg(errp, "data-len cannot be greate then 8 bytes");
+return;
+} else if (s->data_len > sizeof(s->data)) {
+error_setg(errp, "data-len cannot be more then the data size");
+return;
+}
+}
+
+qemu_register_reset(generic_loader_reset, dev);
+
+if (s->cpu_num != CPU_NONE) {
+s->cpu = qemu_get_cpu(s->cpu_num);
+if (!s->cpu

[Qemu-devel] [PATCH v8 2/5] loader: All a custom SddressSpace when loading ROMs

2016-07-01 Thread Alistair Francis

When loading ROMs allow the caller to specify an AddressSpace to use for
the load.

Signed-off-by: Alistair Francis 
---
V8:
 - Introduce an RFC version of AddressSpace loading support

 hw/core/loader.c | 18 --
 include/hw/elf_ops.h |  2 +-
 include/hw/loader.h  | 10 ++
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/hw/core/loader.c b/hw/core/loader.c
index 53e0e41..fcbcfbf 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -777,6 +777,7 @@ struct Rom {
 
 uint8_t *data;
 MemoryRegion *mr;
+AddressSpace *as;
 int isrom;
 char *fw_dir;
 char *fw_file;
@@ -833,7 +834,8 @@ static void *rom_set_mr(Rom *rom, Object *owner, const char 
*name)
 
 int rom_add_file(const char *file, const char *fw_dir,
  hwaddr addr, int32_t bootindex,
- bool option_rom, MemoryRegion *mr)
+ bool option_rom, MemoryRegion *mr,
+ AddressSpace *as)
 {
 MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
 Rom *rom;
@@ -969,7 +971,7 @@ MemoryRegion *rom_add_blob(const char *name, const void 
*blob, size_t len,
  * memory ownership of "data", so we don't have to allocate and copy the 
buffer.
  */
 int rom_add_elf_program(const char *name, void *data, size_t datasize,
-size_t romsize, hwaddr addr)
+size_t romsize, hwaddr addr, AddressSpace *as)
 {
 Rom *rom;
 
@@ -979,18 +981,19 @@ int rom_add_elf_program(const char *name, void *data, 
size_t datasize,
 rom->datasize = datasize;
 rom->romsize  = romsize;
 rom->data = data;
+rom->as   = as;
 rom_insert(rom);
 return 0;
 }
 
 int rom_add_vga(const char *file)
 {
-return rom_add_file(file, "vgaroms", 0, -1, true, NULL);
+return rom_add_file(file, "vgaroms", 0, -1, true, NULL, NULL);
 }
 
 int rom_add_option(const char *file, int32_t bootindex)
 {
-return rom_add_file(file, "genroms", 0, bootindex, true, NULL);
+return rom_add_file(file, "genroms", 0, bootindex, true, NULL, NULL);
 }
 
 static void rom_reset(void *unused)
@@ -1008,7 +1011,8 @@ static void rom_reset(void *unused)
 void *host = memory_region_get_ram_ptr(rom->mr);
 memcpy(host, rom->data, rom->datasize);
 } else {
-cpu_physical_memory_write_rom(&address_space_memory,
+cpu_physical_memory_write_rom(rom->as ? rom->as :
+&address_space_memory,
   rom->addr, rom->data, rom->datasize);
 }
 if (rom->isrom) {
@@ -1031,12 +1035,13 @@ int rom_check_and_register_reset(void)
 hwaddr addr = 0;
 MemoryRegionSection section;
 Rom *rom;
+AddressSpace *as = NULL;
 
 QTAILQ_FOREACH(rom, &roms, next) {
 if (rom->fw_file) {
 continue;
 }
-if (addr > rom->addr) {
+if ((addr > rom->addr) && (as == rom->as)) {
 fprintf(stderr, "rom: requested regions overlap "
 "(rom %s. free=0x" TARGET_FMT_plx
 ", addr=0x" TARGET_FMT_plx ")\n",
@@ -1048,6 +1053,7 @@ int rom_check_and_register_reset(void)
 section = memory_region_find(get_system_memory(), rom->addr, 1);
 rom->isrom = int128_nz(section.size) && 
memory_region_is_rom(section.mr);
 memory_region_unref(section.mr);
+as = rom->as;
 }
 qemu_register_reset(rom_reset, NULL);
 roms_loaded = 1;
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index db70c11..1339677 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -405,7 +405,7 @@ static int glue(load_elf, SZ)(const char *name, int fd,
 snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
 
 /* rom_add_elf_program() seize the ownership of 'data' */
-rom_add_elf_program(label, data, file_size, mem_size, addr);
+rom_add_elf_program(label, data, file_size, mem_size, addr, NULL);
 
 total_size += mem_size;
 if (addr < low)
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 4879b63..18eb0f2 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -118,14 +118,14 @@ extern bool rom_file_has_mr;
 
 int rom_add_file(const char *file, const char *fw_dir,
  hwaddr addr, int32_t bootindex,
- bool option_rom, MemoryRegion *mr);
+ bool option_rom, MemoryRegion *mr, AddressSpace *as);
 MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
size_t max_len, hwaddr addr,
const char *fw_file_name,
FWCfgReadCallback fw_callback,
void *callback_opaque);
 int rom_add_elf_program(const char *name, void *data, size_t datasize,
-size_t romsize, hwaddr addr);
+size_t romsize, hwaddr

[Qemu-devel] [PATCH v8 0/5] Add a generic loader

2016-07-01 Thread Alistair Francis

This work is based on the original work by Li Guang with extra
features added by Peter C and myself.

The idea of this loader is to allow the user to load multiple images
or values into QEMU at startup.

Memory values can be loaded like this: -device 
loader,addr=0xfd1a0104,data=0x800e,data-len=4

Images can be loaded like this: -device loader,file=./images/u-boot.elf,cpu=0

This can be useful and we use it a lot in Xilinx to load multiple images
into a machine at creation (ATF, Kernel and DTB for example).

It can also be used to set registers.

This patch series makes the load_elf() function more generic by not
requiring an architecture. It also adds a new function load_elf_as()
which allows custom AddressSpaces when loading ELF images.

At the moment I think the AddressSpace loading support is more of an
RFC. If people agree with the way I'm doing it I will expand the support
to image types.

V8:
 - Allow custom AddressSpaces when loading images
 - Move ELF architecture handling code
 - Rebase
 - Corrections to loading code
 - Corrections to documentation
V7:
 - Fix typo in comment
 - Rebase
V6:
 - Add error checking
V5:
 - Rebase
V4:
 - Re-write documentation
 - Allow the loader to work with every architecture
 - Move the file to hw/core
 - Increase the maximum number of CPUs
 - Make the CPU operations conditional
 - Convert the cpu option to cpu-num
 - Require the user to specify endianess
V2:
 - Add an entry to the maintainers file
 - Add some documentation
 - Perform bounds checking on the data_len
 - Register and unregister the reset in the realise/unrealise
Changes since RFC:
 - Add support for BE


Alistair Francis (5):
  loader: Allow ELF loader to auto-detect the ELF arch
  loader: All a custom SddressSpace when loading ROMs
  loader: Add AddressSpace loading support to ELFs
  generic-loader: Add a generic loader
  docs: Add a generic loader explanation document

 MAINTAINERS  |   6 ++
 docs/generic-loader.txt  |  60 +
 hw/core/Makefile.objs|   2 +
 hw/core/generic-loader.c | 177 +++
 hw/core/loader.c |  34 ++--
 include/hw/core/generic-loader.h |  45 ++
 include/hw/elf_ops.h |  10 ++-
 include/hw/loader.h  |  24 --
 8 files changed, 343 insertions(+), 15 deletions(-)
 create mode 100644 docs/generic-loader.txt
 create mode 100644 hw/core/generic-loader.c
 create mode 100644 include/hw/core/generic-loader.h

-- 
2.7.4

[Qemu-devel] [PATCH v8 1/5] loader: Allow ELF loader to auto-detect the ELF arch

2016-07-01 Thread Alistair Francis

If the caller didn't specify an architecture for the ELF machine
the load_elf() function will auto detect it based on the ELF file.

Signed-off-by: Alistair Francis 
---
V8:
 - Move into load_elf64/load_elf32
V7:
 - Fix typo

 include/hw/elf_ops.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index f510e7e..db70c11 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -280,6 +280,11 @@ static int glue(load_elf, SZ)(const char *name, int fd,
 glue(bswap_ehdr, SZ)(&ehdr);
 }
 
+if (elf_machine < 1) {
+/* The caller didn't specify an ARCH, we can figure it out */
+elf_machine = ehdr.e_machine;
+}
+
 switch (elf_machine) {
 case EM_PPC64:
 if (ehdr.e_machine != EM_PPC64) {
-- 
2.7.4

Re: [Qemu-devel] Bug in virtio_net_load

2016-07-01 Thread Robin Geuze


Hey Guys,

We just tested the patch on QEMU 2.6.0 and confirmed that both 2.6.0 -> 
2.6.0 and 2.4.0 -> 2.6.0 migrations work properly.


We will be leaving a migration loop running over the weekend to verify 
that everything works as expected, but I don't expect any surprises from 
that. Thanks for the quick fix :D


Regards,

Robin Geuze

TransIP BV

On 7/1/2016 10:48, Cornelia Huck wrote:

On Thu, 30 Jun 2016 20:23:08 +0300
"Michael S. Tsirkin"  wrote:


I'm not sure what was I thinking when I applied this:
it changes load without changing save - how can this work?

The ordering implications are easy to miss :(


I am inclined to revert 1f8828ef573c83365b4a87a776daf8bcef1caa21 and
apply this instead:

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 7ed06ea..18153d5 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1499,6 +1499,16 @@ int virtio_load(VirtIODevice *vdev, QEMUFile
*f, int version_id) }
  qemu_get_be32s(f, &features);

+/*
+ * Temporarily set guest_features low bits - needed by
+ * virtio net load code testing for
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
+ * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
+ *
+ * Note: devices should always test host features in future -
don't create
+ * new dependencies like this.

docs/virtio-migration.txt should probably talk about that as well. And
any conditional stuff needs to go into a subsection in the future.


+ */
+vdev->guest_features = features;
+
  config_len = qemu_get_be32(f);

  /*

Could you please confirm whether this help?
Jason, Cornelia - any comments?

After staring at the code, I'm inclined to think that this will work.

virtio migration: Frying unsuspecting brains since 2008.

Re: [Qemu-devel] [PATCH 0/2] Reduce lock contention on TCG hot-path

2016-07-01 Thread Emilio G. Cota

On Fri, Jul 01, 2016 at 17:16:08 +0100, Alex Bennée wrote:
(snip)
> run 1: ret=0 (PASS), time=4.755824 (1/1)
> run 2: ret=0 (PASS), time=4.756076 (2/2)
> run 3: ret=0 (PASS), time=4.755916 (3/3)
> run 4: ret=0 (PASS), time=4.755853 (4/4)
> run 5: ret=0 (PASS), time=4.755929 (5/5)
> Results summary:
> 0: 5 times (100.00%), avg time 4.755920 (0.00 deviation)

(snip)
> run 1: ret=0 (PASS), time=9.761559 (1/1)
> run 2: ret=0 (PASS), time=9.511616 (2/2)
> run 3: ret=0 (PASS), time=9.761713 (3/3)
> run 4: ret=0 (PASS), time=10.262504 (4/4)
> run 5: ret=0 (PASS), time=9.762059 (5/5)
> Results summary:
> 0: 5 times (100.00%), avg time 9.811890 (0.060150 deviation)

This is a needless diversion, but I was explaining this stuff today
to a student so couldn't help but notice.

The computed deviations seem overly small. For instance, the corrected sample
standard deviation ( https://en.wikipedia.org/wiki/Standard_deviation )
(which is usually referred to as "standard deviation", or "error")
for the last test should be 0.2742 instead of 0.06.

How are they being computed? I tried to find the source of your script
(in the kvm-unit-tests repo) but couldn't find it.

Thanks,

Emilio

[Qemu-devel] CPU topology and ordering in ACPI MADT

2016-07-01 Thread Eduardo Habkost

I got a bug report yesterday that seems to be related to how CPUs
are ordered 'lscpu' and /proc/cpuinfo:
https://bugzilla.redhat.com/show_bug.cgi?id=1351160

It's not an actual bug, but it's not the first time I see people
confused by CPU numbers not following socket/core/thread IDs in
lscpu and /proc/cpuinfo in the host (making CPU numbers seen in
the guest not matching CPU numbers in the host).

The ordering in /proc/cpuinfo and lscpu seems to come from the
MADT ACPI table, and on some hosts it looks completely
arbitrary[1].

Now that we will allow the APIC ID (or socket/core/thread IDs) to
be explicitly set in each VCPU, we could let management make CPU
ordering match the host exactly, for people that really want to
reproduce the host topology and get easily confused by CPU
numbers that don't match the host.

But this is lot of data to be provided to QEMU, so I don't see it
as an useful feature unless it can be represented in the libvirt
XML configuration in a more compact way, or generated
automatically based on the host. I'm CCing libvir-list to see if
they have any ideas.

...or we could just tell users that sometimes it will be
impossible to make the CPU numbers in the guest match the ones in
the host. To be honest, I am more inclined towards this option,
but I would like to hear your opinions.


[1] Example of lscpu output from a host that doesn't follow
socket/core/thread ID on the MADT table:

# lscpu -e
CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE
0   10  00:0:0:0   yes
1   01  11:1:1:1   yes
2   10  22:2:2:0   yes
3   01  33:3:3:1   yes
4   10  44:4:4:0   yes
5   01  55:5:5:1   yes
6   10  66:6:6:0   yes
7   01  77:7:7:1   yes
8   10  88:8:8:0   yes
9   01  99:9:9:1   yes
10  10  10   10:10:10:0yes
11  01  11   11:11:11:1yes
12  10  00:0:0:0   yes
13  01  11:1:1:1   yes
14  10  22:2:2:0   yes
15  01  33:3:3:1   yes
16  10  44:4:4:0   yes
17  01  55:5:5:1   yes
18  10  66:6:6:0   yes
19  01  77:7:7:1   yes
20  10  88:8:8:0   yes
21  01  99:9:9:1   yes
22  10  10   10:10:10:0yes
23  01  11   11:11:11:1yes
# lscpu
Architecture:  x86_64
CPU op-mode(s):32-bit, 64-bit
Byte Order:Little Endian
CPU(s):24
On-line CPU(s) list:   0-23
Thread(s) per core:2
Core(s) per socket:6
Socket(s): 2
NUMA node(s):  2
Vendor ID: GenuineIntel
CPU family:6
Model: 44
Model name:Intel(R) Xeon(R) CPU   L5640  @ 2.27GHz
Stepping:  2
CPU MHz:   2266.000
BogoMIPS:  4533.26
Virtualization:VT-x
L1d cache: 32K
L1i cache: 32K
L2 cache:  256K
L3 cache:  12288K
NUMA node0 CPU(s): 1,3,5,7,9,11,13,15,17,19,21,23
NUMA node1 CPU(s): 0,2,4,6,8,10,12,14,16,18,20,22
# 


-- 
Eduardo

Re: [Qemu-devel] [PATCH 2/2] cpu-exec: remove tb_lock from the hot-path

2016-07-01 Thread Emilio G. Cota

On Fri, Jul 01, 2016 at 17:16:10 +0100, Alex Bennée wrote:
> Lock contention in the hot path of moving between existing patched
> TranslationBlocks is the main drag in multithreaded performance. This
> patch pushes the tb_lock() usage down to the two places that really need
> it:
> 
>   - code generation (tb_gen_code)
>   - jump patching (tb_add_jump)
> 
> The rest of the code doesn't really need to hold a lock as it is either
> using per-CPU structures, atomically updated or designed to be used in
> concurrent read situations (qht_lookup).
> 
> To keep things simple I removed the #ifdef CONFIG_USER_ONLY stuff as the
> locks become NOPs anyway until the MTTCG work is completed.

>From a scalability point of view it would be better to have a single
critical section.

>From a correctness point of view, we're reading tb->page_addr[1]
without holding a lock. This field is set after qht_insert(tb),
so we might read a yet-uninitialized value.

I propose to just extend the critical section, like we used to
do with tcg_lock_reset.

Emilio

Re: [Qemu-devel] [PATCH 1/2] tcg: Ensure safe tb_jmp_cache lookup out of 'tb_lock'

2016-07-01 Thread Richard Henderson


On 07/01/2016 05:17 PM, Emilio G. Cota wrote:

On Fri, Jul 01, 2016 at 17:16:09 +0100, Alex Bennée wrote:

From: Sergey Fedorov 

(snip)

@@ -333,7 +338,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu,
is executed. */
 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 tb_lock();
-tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
+tb = atomic_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
 if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
  tb->flags != flags)) {
 tb = tb_find_slow(cpu, pc, cs_base, flags);
diff --git a/translate-all.c b/translate-all.c
index eaa95e4..1fcfe79 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1004,11 +1004,16 @@ void tb_phys_invalidate(TranslationBlock *tb, 
tb_page_addr_t page_addr)
 invalidate_page_bitmap(p);
 }

+/* Ensure that we won't find the TB in the shared hash table
+ * if we con't see it in CPU's local cache.


s/con't/can't/


+ * Pairs with smp_rmb() in tb_find_slow(). */
+smp_wmb();


This fence is already embedded in qht_remove, since it internally
calls seqlock_write_end() on a successful removal...


No.  There's stuff that happens after qht_remove and before this barrier: 
tb_page_remove and invalidate_page_bitmap.



r~

Re: [Qemu-devel] [PATCH 1/2] tcg: Ensure safe tb_jmp_cache lookup out of 'tb_lock'

2016-07-01 Thread Emilio G. Cota

On Fri, Jul 01, 2016 at 17:16:09 +0100, Alex Bennée wrote:
> From: Sergey Fedorov 
(snip)
> @@ -333,7 +338,7 @@ static inline TranslationBlock *tb_find_fast(CPUState 
> *cpu,
> is executed. */
>  cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
>  tb_lock();
> -tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
> +tb = atomic_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
>  if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
>   tb->flags != flags)) {
>  tb = tb_find_slow(cpu, pc, cs_base, flags);
> diff --git a/translate-all.c b/translate-all.c
> index eaa95e4..1fcfe79 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -1004,11 +1004,16 @@ void tb_phys_invalidate(TranslationBlock *tb, 
> tb_page_addr_t page_addr)
>  invalidate_page_bitmap(p);
>  }
>  
> +/* Ensure that we won't find the TB in the shared hash table
> + * if we con't see it in CPU's local cache.

s/con't/can't/

> + * Pairs with smp_rmb() in tb_find_slow(). */
> +smp_wmb();

This fence is already embedded in qht_remove, since it internally
calls seqlock_write_end() on a successful removal, so we could get
away with a comment instead of emitting a redundant fence.
However, if qht ever changed its implementation this would have
to be taken into account. So I'd be OK with emitting the
fence here too.

> +
>  /* remove the TB from the hash list */
>  h = tb_jmp_cache_hash_func(tb->pc);
>  CPU_FOREACH(cpu) {
>  if (cpu->tb_jmp_cache[h] == tb) {

Missing atomic_read here: if (atomic_read(cpu->tb_jmp_cache[...])) {

> -cpu->tb_jmp_cache[h] = NULL;
> +atomic_set(&cpu->tb_jmp_cache[h], NULL);

Other than that,

  Reviewed-by: Emilio G. Cota

Re: [Qemu-devel] [RFC v3 12/19] tcg: add kick timer for single-threaded vCPU emulation

2016-07-01 Thread Richard Henderson


On 06/27/2016 02:20 PM, Sergey Fedorov wrote:

On 03/06/16 23:40, Alex Bennée wrote:

diff --git a/cpus.c b/cpus.c
index 1694ce9..12e04c9 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1208,9 +1208,29 @@ static int tcg_cpu_exec(CPUState *cpu)
 return ret;
 }

+/* Single-threaded TCG
+ *
+ * In the single-threaded case each vCPU is simulated in turn. If
+ * there is more than a single vCPU we create a simple timer to kick
+ * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
+ * This is done explicitly rather than relying on side-effects
+ * elsewhere.
+ */
+static void qemu_cpu_kick_no_halt(void);
+#define TCG_KICK_FREQ (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + \
+   NANOSECONDS_PER_SECOND / 10)


Hmm, it doesn't look nice to wrap calculation of the next timeout in a
macro and name it '*_FREQ'. I think we'd better do like this:

#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)

static inline int64_t qemu_tcg_next_kick(void)
{
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
}

and use it like this:

timer_mod(kick_timer, qemu_tcg_next_kick());


Agreed.

As an aside, surely a period of 10ns is too small.
That's on the order of 20-50 host instructions.


r~

Re: [Qemu-devel] [RFC v3 11/19] tcg: add options for enabling MTTCG

2016-07-01 Thread Richard Henderson


On 06/03/2016 01:40 PM, Alex Bennée wrote:

+bool qemu_tcg_mttcg_enabled(void)
+{
+return mttcg_enabled;
+}


Is there a good reason to expose this via function call, rather than just test 
the variable?



r~

Re: [Qemu-devel] [RFC v3 10/19] tcg: cpus rm tcg_exec_all()

2016-07-01 Thread Richard Henderson


On 06/03/2016 01:40 PM, Alex Bennée wrote:

In preparation for multi-threaded TCG we remove tcg_exec_all and move
all the CPU cycling into the main thread function. When MTTCG is enabled
we shall use a separate thread function which only handles one vCPU.

Signed-off-by: Alex Bennée 


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [RFC v3 08/19] tcg: protect TBContext with tb_lock.

2016-07-01 Thread Richard Henderson


On 06/03/2016 01:40 PM, Alex Bennée wrote:

From: KONRAD Frederic 

This protects TBContext with tb_lock to make tb_* thread safe.

We can still have issue with tb_flush in case of multithread TCG:
another CPU can be executing code during a flush.

This can be fixed later by making all other TCG thread exiting before calling
tb_flush().

Signed-off-by: KONRAD Frederic 
Message-Id: <1439220437-23957-8-git-send-email-fred.kon...@greensocs.com>
Signed-off-by: Emilio G. Cota 
Signed-off-by: Paolo Bonzini 
[AJB: moved into tree, clean-up history]
Signed-off-by: Alex Bennée 

---


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [RFC PATCH 09/11] zynqmp_crf: add the clock mechanism

2016-07-01 Thread Alistair Francis

On Mon, Jun 13, 2016 at 9:27 AM,   wrote:
> From: KONRAD Frederic 
>
> This adds the pll to the zynqmp_crf and the dp_video clock output.
>
> Signed-off-by: KONRAD Frederic 
> ---
>  hw/misc/xilinx_zynqmp_crf.c | 440 
> 
>  1 file changed, 440 insertions(+)
>
> diff --git a/hw/misc/xilinx_zynqmp_crf.c b/hw/misc/xilinx_zynqmp_crf.c
> index 4c670a0..2097534 100644
> --- a/hw/misc/xilinx_zynqmp_crf.c
> +++ b/hw/misc/xilinx_zynqmp_crf.c
> @@ -30,6 +30,7 @@
>  #include "hw/register.h"
>  #include "qemu/bitops.h"
>  #include "qemu/log.h"
> +#include "qemu/qemu-clock.h"
>
>  #ifndef XILINX_CRF_APB_ERR_DEBUG
>  #define XILINX_CRF_APB_ERR_DEBUG 0
> @@ -281,6 +282,38 @@ typedef struct CRF_APB {
>
>  uint32_t regs[R_MAX];
>  RegisterInfo regs_info[R_MAX];
> +
> +/* input clocks */
> +qemu_clk pss_ref_clk;
> +qemu_clk video_clk;
> +qemu_clk pss_alt_ref_clk;
> +qemu_clk aux_refclk;
> +qemu_clk gt_crx_ref_clk;
> +
> +/* internal clocks */
> +qemu_clk apll_clk;
> +qemu_clk dpll_clk;
> +qemu_clk vpll_clk;
> +
> +/* output clocks */
> +qemu_clk acpu_clk;
> +qemu_clk dbg_trace;
> +qemu_clk dbg_fdp;
> +qemu_clk dp_video_ref;
> +qemu_clk dp_audio_ref;
> +qemu_clk dp_stc_ref;
> +qemu_clk ddr;
> +qemu_clk gpu_ref;
> +qemu_clk sata_ref;
> +qemu_clk pcie_ref;
> +qemu_clk gdma_ref;
> +qemu_clk dpdma_ref;
> +qemu_clk topsw_main;
> +qemu_clk topsw_lsbus;
> +qemu_clk dbg_tstmp;
> +qemu_clk apll_to_lpd;
> +qemu_clk dpll_to_lpd;
> +qemu_clk vpll_to_lpd;
>  } CRF_APB;
>
>  static const MemoryRegionOps crf_apb_ops = {
> @@ -325,6 +358,318 @@ static uint64_t ir_disable_prew(RegisterInfo *reg, 
> uint64_t val64)
>  return 0;
>  }
>
> +enum clk_src {
> +VIDEO_CLK = 4,
> +PSS_ALT_REF_CLK = 5,
> +AUX_REF_CLK = 6,
> +GT_CRX_REF_CLK = 7,
> +PSS_REF_CLK = 0
> +};
> +
> +static void apll_to_lpd_postw(RegisterInfo *reg, uint64_t val64)
> +{
> +CRF_APB *s = XILINX_CRF_APB(reg->opaque);
> +
> +qemu_clk_refresh(s->apll_to_lpd);
> +}
> +
> +static float apll_to_lpd_update_rate(void *opaque, float input_rate)
> +{
> +CRF_APB *s = XILINX_CRF_APB(opaque);
> +uint32_t divisor = AF_EX32(s->regs, APLL_TO_LPD_CTRL, DIVISOR0);
> +
> +if (!divisor) {
> +return 0.0f;
> +} else {
> +return input_rate / (float)divisor;
> +}
> +}
> +
> +static void dpll_to_lpd_postw(RegisterInfo *reg, uint64_t val64)
> +{
> +CRF_APB *s = XILINX_CRF_APB(reg->opaque);
> +
> +qemu_clk_refresh(s->dpll_to_lpd);
> +}
> +
> +static float dpll_to_lpd_update_rate(void *opaque, float input_rate)
> +{
> +CRF_APB *s = XILINX_CRF_APB(opaque);
> +uint32_t divisor = AF_EX32(s->regs, DPLL_TO_LPD_CTRL, DIVISOR0);
> +
> +if (!divisor) {
> +return 0.0f;
> +} else {
> +return input_rate / (float)divisor;
> +}
> +}
> +
> +static void vpll_to_lpd_postw(RegisterInfo *reg, uint64_t val64)
> +{
> +CRF_APB *s = XILINX_CRF_APB(reg->opaque);
> +
> +qemu_clk_refresh(s->vpll_to_lpd);
> +}
> +
> +static float vpll_to_lpd_update_rate(void *opaque, float input_rate)
> +{
> +CRF_APB *s = XILINX_CRF_APB(opaque);
> +uint32_t divisor = AF_EX32(s->regs, VPLL_TO_LPD_CTRL, DIVISOR0);
> +
> +if (!divisor) {
> +return 0.0f;
> +} else {
> +return input_rate / (float)divisor;
> +}
> +}
> +
> +static void apll_ctrl_postw(RegisterInfo *reg, uint64_t val64)
> +{
> +CRF_APB *s = XILINX_CRF_APB(reg->opaque);
> +uint32_t source = AF_EX32(s->regs, APLL_CTRL, BYPASS)
> +? AF_EX32(s->regs, APLL_CTRL, POST_SRC)
> +: AF_EX32(s->regs, APLL_CTRL, PRE_SRC);
> +
> +/*
> + * We must ensure that only one clock is bound to the apll internal 
> clock.
> + */
> +qemu_clk_unbound(s->pss_ref_clk, s->apll_clk);
> +qemu_clk_unbound(s->video_clk, s->apll_clk);
> +qemu_clk_unbound(s->pss_alt_ref_clk, s->apll_clk);
> +qemu_clk_unbound(s->aux_refclk, s->apll_clk);
> +qemu_clk_unbound(s->gt_crx_ref_clk, s->apll_clk);
> +
> +switch (source) {
> +case VIDEO_CLK:
> +qemu_clk_bound_clock(s->video_clk, s->apll_clk);
> +break;
> +case PSS_ALT_REF_CLK:
> +qemu_clk_bound_clock(s->pss_alt_ref_clk, s->apll_clk);
> +break;
> +case AUX_REF_CLK:
> +qemu_clk_bound_clock(s->aux_refclk, s->apll_clk);
> +break;
> +case GT_CRX_REF_CLK:
> +qemu_clk_bound_clock(s->gt_crx_ref_clk, s->apll_clk);
> +break;
> +default:
> +qemu_clk_bound_clock(s->pss_ref_clk, s->apll_clk);
> +break;
> +}
> +}
> +
> +static void dpll_ctrl_postw(RegisterInfo *reg, uint64_t val64)
> +{
> +CRF_APB *s = XILINX_CRF_APB(reg->opaque);
> +uint32_t source = AF_EX32(s->regs, DPLL_CTRL, BYPASS)
> +? AF_EX32(s->regs, DPLL_CTRL, POST_SRC)
> +: AF_EX32(s->reg

Re: [Qemu-devel] [RFC v3 03/19] translate-all: add DEBUG_LOCKING asserts

2016-07-01 Thread Richard Henderson


On 06/03/2016 01:40 PM, Alex Bennée wrote:

This adds asserts to check the locking on the various translation
engines structures. There are two sets of structures that are protected
by locks.

The first the l1map and PageDesc structures used to track which
translation blocks are associated with which physical addresses. In
user-mode this is covered by the mmap_lock.

The second case are TB context related structures which are protected by
tb_lock which is also user-mode only.

Currently the asserts do nothing in SoftMMU mode but this will change
for MTTCG.

Signed-off-by: Alex Bennée 
---
 bsd-user/mmap.c |  5 +
 include/exec/exec-all.h |  1 +
 linux-user/mmap.c   |  5 +
 translate-all.c | 41 +
 4 files changed, 52 insertions(+)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 2/2] cpu-exec: remove tb_lock from the hot-path

2016-07-01 Thread Richard Henderson


On 07/01/2016 09:16 AM, Alex Bennée wrote:

Lock contention in the hot path of moving between existing patched
TranslationBlocks is the main drag in multithreaded performance. This
patch pushes the tb_lock() usage down to the two places that really need
it:

  - code generation (tb_gen_code)
  - jump patching (tb_add_jump)

The rest of the code doesn't really need to hold a lock as it is either
using per-CPU structures, atomically updated or designed to be used in
concurrent read situations (qht_lookup).

To keep things simple I removed the #ifdef CONFIG_USER_ONLY stuff as the
locks become NOPs anyway until the MTTCG work is completed.

Signed-off-by: Alex Bennée 

---
v3
  - fix merge conflicts with Sergey's patch
v4
  - revert name tweaking
  - drop test jmp_list_next outside lock
  - mention lock NOPs in comments
---
 cpu-exec.c | 49 ++---
 1 file changed, 22 insertions(+), 27 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 1/2] tcg: Ensure safe tb_jmp_cache lookup out of 'tb_lock'

2016-07-01 Thread Richard Henderson


On 07/01/2016 09:16 AM, Alex Bennée wrote:

From: Sergey Fedorov 

First, ensure atomicity of CPU's 'tb_jmp_cache' access by:
 * using atomic_read() to look up a TB when not holding 'tb_lock';
 * using atomic_write() to remove a TB from each CPU's local cache on
   TB invalidation.

Second, add some memory barriers to ensure we don't put the TB being
invalidated back to CPU's 'tb_jmp_cache'. If we fail to look up a TB in
CPU's local cache because it is being invalidated by some other thread
then it must not be found in the shared TB hash table. Otherwise we'd
put it back to CPU's local cache.

Note that this patch does *not* make CPU's TLB invalidation safe if it
is done from some other thread while the CPU is in its execution loop.

Signed-off-by: Sergey Fedorov 
Signed-off-by: Sergey Fedorov 
[AJB: fixed missing atomic set, tweak title]
Signed-off-by: Alex Bennée 

---
AJB:
  - tweak title
  - fixed missing set of tb_jmp_cache
---
 cpu-exec.c  | 9 +++--
 translate-all.c | 7 ++-
 2 files changed, 13 insertions(+), 3 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [RFC PATCH 10/11] zynqmp: add the zynqmp_crf to the platform

2016-07-01 Thread Alistair Francis

On Mon, Jun 13, 2016 at 9:27 AM,   wrote:
> From: KONRAD Frederic 
>
> This adds the zynqmp_crf to the zynqmp platform.
>
> Signed-off-by: KONRAD Frederic 
> ---
>  hw/arm/xlnx-zynqmp.c | 7 +++
>  include/hw/arm/xlnx-zynqmp.h | 1 +
>  2 files changed, 8 insertions(+)
>
> diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
> index 4d504da..a8b7669 100644
> --- a/hw/arm/xlnx-zynqmp.c
> +++ b/hw/arm/xlnx-zynqmp.c
> @@ -135,6 +135,11 @@ static void xlnx_zynqmp_init(Object *obj)
>TYPE_XILINX_SPIPS);
>  qdev_set_parent_bus(DEVICE(&s->spi[i]), sysbus_get_default());
>  }
> +
> +s->crf = object_new("xlnx.zynqmp_crf");
> +qdev_set_parent_bus(DEVICE(s->crf), sysbus_get_default());
> +object_property_add_child(obj, "xlnx.zynqmp_crf", OBJECT(s->crf),
> +  &error_abort);
>  }
>
>  static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
> @@ -366,6 +371,8 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
> **errp)
>&error_abort);
> g_free(bus_name);
>  }
> +
> +sysbus_mmio_map(SYS_BUS_DEVICE(s->crf), 0, 0xFD1A);

Shouldn't this be realised?

Also macro for the address.

>  }
>
>  static Property xlnx_zynqmp_props[] = {
> diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
> index 2332596..78fed6e 100644
> --- a/include/hw/arm/xlnx-zynqmp.h
> +++ b/include/hw/arm/xlnx-zynqmp.h
> @@ -81,6 +81,7 @@ typedef struct XlnxZynqMPState {
>  SysbusAHCIState sata;
>  SDHCIState sdhci[XLNX_ZYNQMP_NUM_SDHCI];
>  XilinxSPIPS spi[XLNX_ZYNQMP_NUM_SPIS];
> +Object *crf;

Can we follow the same way as the other devices?

You'll need to split a header file out for the device then.

Thanks,

Alistair

>
>  char *boot_cpu;
>  ARMCPU *boot_cpu_ptr;
> --
> 2.5.5
>
>

Re: [Qemu-devel] [RFC PATCH 06/11] introduce fixed-clock

2016-07-01 Thread Alistair Francis

On Mon, Jun 13, 2016 at 9:27 AM,   wrote:
> From: KONRAD Frederic 
>
> This is a fixed clock device.
> It justs behave as an empty device with a parametrable output rate.
>
> Signed-off-by: KONRAD Frederic 
> ---
>  hw/misc/Makefile.objs |  2 +
>  hw/misc/fixed-clock.c | 87 
> +++
>  include/hw/misc/fixed-clock.h | 30 +++
>  3 files changed, 119 insertions(+)
>  create mode 100644 hw/misc/fixed-clock.c
>  create mode 100644 include/hw/misc/fixed-clock.h
>
> diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
> index e504463..e8b8855 100644
> --- a/hw/misc/Makefile.objs
> +++ b/hw/misc/Makefile.objs
> @@ -52,3 +52,5 @@ obj-$(CONFIG_MIPS_ITU) += mips_itu.o
>  obj-$(CONFIG_PVPANIC) += pvpanic.o
>  obj-$(CONFIG_EDU) += edu.o
>  obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o
> +
> +obj-y += fixed-clock.o
> diff --git a/hw/misc/fixed-clock.c b/hw/misc/fixed-clock.c
> new file mode 100644
> index 000..c273a91
> --- /dev/null
> +++ b/hw/misc/fixed-clock.c
> @@ -0,0 +1,87 @@
> +/*
> + * Fixed clock
> + *
> + *  Copyright (C) 2016 : GreenSocs Ltd
> + *  http://www.greensocs.com/ , email: i...@greensocs.com
> + *
> + *  Frederic Konrad   
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see .
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/qdev.h"
> +#include "hw/misc/fixed-clock.h"
> +#include "qemu/qemu-clock.h"
> +#include "qapi/error.h"
> +
> +/* #define DEBUG_FIXED_CLOCK */

Don't include this.

> +
> +#ifdef DEBUG_FIXED_CLOCK
> +#define DPRINTF(fmt, ...) \
> +do { printf("fixed-clock: " fmt , ## __VA_ARGS__); } while (0)

It might be better to use __func__ here.

It should also be qemu_log instead of printf().

> +#else
> +#define DPRINTF(fmt, ...) do { } while (0)
> +#endif
> +
> +typedef struct {
> +DeviceState parent_obj;
> +
> +uint32_t rate;
> +struct qemu_clk out;
> +} FixedClock;

Doesn't this need to be in the header file?

> +
> +static Property fixed_clock_properties[] = {
> +DEFINE_PROP_UINT32("rate", FixedClock, rate, 0),
> +DEFINE_PROP_END_OF_LIST()
> +};
> +
> +static void fixed_clock_realizefn(DeviceState *d, Error **errp)

dev instead of d

Thanks,

Alistair

> +{
> +FixedClock *s = FIXED_CLOCK(d);
> +
> +qemu_clk_update_rate(&s->out, s->rate);
> +}
> +
> +static void fixed_clock_instance_init(Object *obj)
> +{
> +FixedClock *s = FIXED_CLOCK(obj);
> +
> +object_initialize(&s->out, sizeof(s->out), TYPE_CLOCK);
> +qemu_clk_attach_to_device(DEVICE(obj), &s->out, "clk_out");
> +}
> +
> +static void fixed_clock_class_init(ObjectClass *klass, void *data)
> +{
> +DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +dc->realize = fixed_clock_realizefn;
> +dc->props = fixed_clock_properties;
> +}
> +
> +static const TypeInfo fixed_clock_info = {
> +.name  = TYPE_FIXED_CLOCK,
> +.parent= TYPE_DEVICE,
> +.instance_size = sizeof(FixedClock),
> +.instance_init = fixed_clock_instance_init,
> +.class_init= fixed_clock_class_init,
> +};
> +
> +static void fixed_clock_register_types(void)
> +{
> +type_register_static(&fixed_clock_info);
> +}
> +
> +type_init(fixed_clock_register_types);
> diff --git a/include/hw/misc/fixed-clock.h b/include/hw/misc/fixed-clock.h
> new file mode 100644
> index 000..1376444
> --- /dev/null
> +++ b/include/hw/misc/fixed-clock.h
> @@ -0,0 +1,30 @@
> +/*
> + * Fixed clock
> + *
> + *  Copyright (C) 2016 : GreenSocs Ltd
> + *  http://www.greensocs.com/ , email: i...@greensocs.com
> + *
> + *  Frederic Konrad   
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see .
> + *
> + */
> +
> +#ifndef FIXED_CLOCK_H
> +#define FIXED_CLOCK_H
> +
> +#define TYPE_FIXE

Re: [Qemu-devel] [PATCH v7 14/15] qapi: Allow anonymous branch types in flat union

2016-07-01 Thread Eric Blake

On 06/16/2016 08:33 AM, Markus Armbruster wrote:
> Eric Blake  writes:
> 
>> Recent commits added support for an anonymous type as the base
>> of a flat union; with a bit more work, we can also allow an
>> anonymous struct as a branch of a flat union.  This probably
>> most useful when a branch adds no additional members beyond the
>> common elements of the base (that is, the branch struct is '{}'),
>> but can be used for any struct in the same way we allow for an
>> anonymous struct for a command.
>>
>> The generator has to do a bit of special-casing for the fact that
>> we do not emit a '_empty' struct nor a 'visit_type__empty_members()'
>> corresponding to the special ':empty' type; but when the branch
>> is truly empty, there's nothing to do.
> 
> Well, it could emit them, if it makes things easier.
> 
>> The testsuite gets an update to use the new feature, and to ensure
>> that we can still detect invalid collisions of QMP names.
>>
>> Signed-off-by: Eric Blake 
>>

>> @@ -1061,6 +1063,9 @@ class QAPISchemaMember(object):
>>  return '(parameter of %s)' % owner[:-4]
>>  elif owner.endswith('-base'):
>>  return '(base of %s)' % owner[:-5]
>> +elif owner.endswith('-branch'):
>> +return ('(member of %s branch %s)'
>> +% tuple(owner[:-7].split(':')))
> 
> I think we should point to the spot that puts in the colon, and back.
> 
> Do we really need the "of %s" part?
> 

If you think the message reads okay without it, then we can avoid...

>>  else:
>>  assert owner.endswith('-wrapper')
>>  # Unreachable and not implemented
>> @@ -1335,7 +1340,11 @@ class QAPISchema(object):
>>self._make_members(data, 
>> info),
>>None))
>>
>> -def _make_variant(self, case, typ):
>> +def _make_variant(self, case, typ, info, owner):
>> +if isinstance(typ, dict):
>> +typ = self._make_implicit_object_type(
>> +"%s:%s" % (owner, case), info, 'branch',
>> +self._make_members(typ, info)) or 'q_empty'
> 
> This is the spot.
> 

>> @@ -1485,7 +1494,7 @@ def c_enum_const(type_name, const_name, prefix=None):
>>  type_name = prefix
>>  return camel_to_upper(type_name) + '_' + c_name(const_name, 
>> False).upper()
>>
>> -c_name_trans = string.maketrans('.-', '__')
>> +c_name_trans = string.maketrans('.-:', '___')
> 
> Because you use the colon as separator.  Hmm.
> 

...the need to transliterate : into _.  More below[1]


>> +++ b/scripts/qapi-types.py
>> @@ -61,7 +61,8 @@ def gen_object(name, base, members, variants):
>def gen_object(name, base, members, variants):
>if name in objects_seen:
>return ''
>objects_seen.add(name)
> 
>>  ret = ''
>>  if variants:
>>  for v in variants.variants:
>> -if isinstance(v.type, QAPISchemaObjectType):
>> +if (isinstance(v.type, QAPISchemaObjectType)
>> +and not (v.type.is_implicit() and v.type.is_empty())):
>>  ret += gen_object(v.type.name, v.type.base,
>>v.type.local_members, v.type.variants)
>>
> 
> This is the recursion that ensures an object type's variant member types
> are emitted before the object type.
> 
> We can't simply .type == schema.the_empty_object_type like
> qapi-introspect.py does, because we don't have schema handy here.  Hmm.
> 
> Do we really need this change?  Note that gen_object() does nothing for
> name in objects_seen, and we do this in visit_begin():
> 
> # gen_object() is recursive, ensure it doesn't visit the empty type
> objects_seen.add(schema.the_empty_object_type.name)

Cool! Remember, these patches have been through a lot of rebase churn -
I did indeed originally have to add this check to work around the empty
type (back when it was based on v5 of the "unboxed visits" series), but
that was before incorporating your suggestions in commit 7ce106a9 of the
nicer recursion prevention (v6 of the "unboxed visits", which is what
got pulled).  You are indeed correct that even without this hunk, things
still work just fine - I just never noticed that across all the rebasing.

> 
>> @@ -123,11 +124,14 @@ def gen_variants(variants):
>>  c_name=c_name(variants.tag_member.name))
>>
>>  for var in variants.variants:
> 
> Here, we emit the C union member for a variant:
> 
>> +typ = var.type.c_unboxed_type()
>> +if (isinstance(var.type, QAPISchemaObjectType) and
>> +var.type.is_empty() and var.type.is_implicit()):
>> +typ = 'char'
>>  ret += mcgen('''
>>  %(c_type)s %(c_name)s;
>>  ''',
>> - c_type=var.type.c_unboxed_type(),
>> - c_name=c_name(var.name))
>> + c_type=typ, c_name=c_name(va

[Qemu-devel] [PATCH v2 5/7] ppc: introduce ppc_set_vcpu_dt_id()

2016-07-01 Thread Greg Kurz

This patch introduces the ppc_set_vcpu_dt_id() function. It is
currently empty but it will be used to generate cpu_dt_id out of
a cpu_index provided by the machine.

It also changes the machine types to provide cpu_index. Since all
of them keep the cpus in an array, cpu_index is simply the index
in the array.

The only exception is pseries-2.7 which supports hotplug of cpu
cores and already open codes the cpu creation. Its case will be
covered in follow-up patch.

Suggested-by: Igor Mammedov 
Signed-off-by: Greg Kurz 
---
 hw/ppc/e500.c  |2 +-
 hw/ppc/mac_newworld.c  |2 +-
 hw/ppc/mac_oldworld.c  |2 +-
 hw/ppc/ppc.c   |   12 +++-
 hw/ppc/ppc440_bamboo.c |2 +-
 hw/ppc/prep.c  |2 +-
 hw/ppc/spapr.c |2 +-
 include/hw/ppc/ppc.h   |2 +-
 8 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index ff5d92e48dd9..461dcdc031b0 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -821,7 +821,7 @@ void ppce500_init(MachineState *machine, PPCE500Params 
*params)
 CPUState *cs;
 qemu_irq *input;
 
-cpu = ppc_cpu_init(machine->cpu_model);
+cpu = ppc_cpu_init(machine->cpu_model, i);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to initialize CPU!\n");
 exit(1);
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 6ab675c498d0..888f448796e7 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -193,7 +193,7 @@ static void ppc_core99_init(MachineState *machine)
 #endif
 }
 for (i = 0; i < smp_cpus; i++) {
-cpu = ppc_cpu_init(machine->cpu_model);
+cpu = ppc_cpu_init(machine->cpu_model, i);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to find PowerPC CPU definition\n");
 exit(1);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 77fbdfffd4e2..ceb92f820dde 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -113,7 +113,7 @@ static void ppc_heathrow_init(MachineState *machine)
 if (machine->cpu_model == NULL)
 machine->cpu_model = "G3";
 for (i = 0; i < smp_cpus; i++) {
-cpu = ppc_cpu_init(machine->cpu_model);
+cpu = ppc_cpu_init(machine->cpu_model, i);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to find PowerPC CPU definition\n");
 exit(1);
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 57f4ddd073d0..dbc8ac7b3a9b 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1352,7 +1352,12 @@ PowerPCCPU *ppc_get_vcpu_by_dt_id(int cpu_dt_id)
 return NULL;
 }
 
-PowerPCCPU *ppc_cpu_init(const char *cpu_model)
+static void ppc_set_vcpu_dt_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
+{
+;
+}
+
+PowerPCCPU *ppc_cpu_init(const char *cpu_model, int cpu_index)
 {
 PowerPCCPU *cpu;
 CPUClass *cc;
@@ -1374,6 +1379,11 @@ PowerPCCPU *ppc_cpu_init(const char *cpu_model)
 
 cpu = POWERPC_CPU(object_new(object_class_get_name(oc)));
 
+ppc_set_vcpu_dt_id(cpu, cpu_index, &err);
+if (err != NULL) {
+goto out;
+}
+
 cc = CPU_CLASS(oc);
 cc->parse_features(CPU(cpu), model_pieces[1], &err);
 g_strfreev(model_pieces);
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index 7f22433c8e91..86c453542a94 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -186,7 +186,7 @@ static void bamboo_init(MachineState *machine)
 if (machine->cpu_model == NULL) {
 machine->cpu_model = "440EP";
 }
-cpu = ppc_cpu_init(machine->cpu_model);
+cpu = ppc_cpu_init(machine->cpu_model, 0);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to initialize CPU!\n");
 exit(1);
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index e62fe643f492..d81d0675255e 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -509,7 +509,7 @@ static void ppc_prep_init(MachineState *machine)
 if (machine->cpu_model == NULL)
 machine->cpu_model = "602";
 for (i = 0; i < smp_cpus; i++) {
-cpu = ppc_cpu_init(machine->cpu_model);
+cpu = ppc_cpu_init(machine->cpu_model, i);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to find PowerPC CPU definition\n");
 exit(1);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 690ee486aa07..57dbac2106d2 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1828,7 +1828,7 @@ static void ppc_spapr_init(MachineState *machine)
 g_free(type);
 } else {
 for (i = 0; i < smp_cpus; i++) {
-PowerPCCPU *cpu = ppc_cpu_init(machine->cpu_model);
+PowerPCCPU *cpu = ppc_cpu_init(machine->cpu_model, i);
 if (cpu == NULL) {
 error_report("Unable to find PowerPC CPU definition");
 exit(1);
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index a4db1db82e1b..647451c9b6ac 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -106,5 +106,5 @@ enum {
 /* ppc_booke.c */
 void ppc_booke_

[Qemu-devel] [PATCH v2 4/7] ppc: open code cpu creation for machine types

2016-07-01 Thread Greg Kurz

If we want to generate cpu_dt_id in the machine code, this must occur
before the cpu gets realized. We must open code the cpu creation to be
able to do this.

This patch just does that. It borrows some lines from previous work
from Bharata to handle the feature parsing.

Signed-off-by: Greg Kurz 
---
 hw/ppc/ppc.c |   39 ++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index dc3d214009c5..57f4ddd073d0 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -32,6 +32,7 @@
 #include "sysemu/cpus.h"
 #include "hw/timer/m48t59.h"
 #include "qemu/log.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "hw/loader.h"
 #include "sysemu/kvm.h"
@@ -1353,5 +1354,41 @@ PowerPCCPU *ppc_get_vcpu_by_dt_id(int cpu_dt_id)
 
 PowerPCCPU *ppc_cpu_init(const char *cpu_model)
 {
-return POWERPC_CPU(cpu_generic_init(TYPE_POWERPC_CPU, cpu_model));
+PowerPCCPU *cpu;
+CPUClass *cc;
+ObjectClass *oc;
+gchar **model_pieces;
+Error *err = NULL;
+
+model_pieces = g_strsplit(cpu_model, ",", 2);
+if (!model_pieces[0]) {
+error_report("Invalid/empty CPU model name");
+return NULL;
+}
+
+oc = cpu_class_by_name(TYPE_POWERPC_CPU, model_pieces[0]);
+if (oc == NULL) {
+error_report("Unable to find CPU definition: %s", model_pieces[0]);
+return NULL;
+}
+
+cpu = POWERPC_CPU(object_new(object_class_get_name(oc)));
+
+cc = CPU_CLASS(oc);
+cc->parse_features(CPU(cpu), model_pieces[1], &err);
+g_strfreev(model_pieces);
+if (err != NULL) {
+goto out;
+}
+
+object_property_set_bool(OBJECT(cpu), true, "realized", &err);
+
+out:
+if (err != NULL) {
+error_report_err(err);
+object_unref(OBJECT(cpu));
+return NULL;
+}
+
+return cpu;
 }

[Qemu-devel] [PATCH v2 7/7] ppc: move the cpu_dt_id logic to machine code

2016-07-01 Thread Greg Kurz

Now that every supported machine type is able to provide a cpu_index, we
can safely move all the cpu_dt_id bits to the machine code.

TODO: the cpu_dt_id logic remains the same wannabe generic one as before
because of its target code background: machine types should provide their
own cpu_dt_id logic (it is required by the future powernv machine type for
example).

Signed-off-by: Greg Kurz 
---
 hw/ppc/ppc.c|   28 +++-
 target-ppc/translate_init.c |   30 --
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 12de255fb211..506b493bf43b 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1354,7 +1354,33 @@ PowerPCCPU *ppc_get_vcpu_by_dt_id(int cpu_dt_id)
 
 void ppc_set_vcpu_dt_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
 {
-;
+int max_smt = kvmppc_smt_threads();
+int vcpu_dt_id;
+
+if (smp_threads > max_smt) {
+error_setg(errp, "Cannot support more than %d threads on PPC with %s",
+   max_smt, kvm_enabled() ? "KVM" : "TCG");
+return;
+}
+if (!is_power_of_2(smp_threads)) {
+error_setg(errp, "Cannot support %d threads on PPC with %s, "
+   "threads count must be a power of 2.",
+   smp_threads, kvm_enabled() ? "KVM" : "TCG");
+return;
+}
+
+vcpu_dt_id = (cpu_index / smp_threads) * max_smt
++ (cpu_index % smp_threads);
+
+if (kvm_enabled() && !kvm_vcpu_id_is_valid(vcpu_dt_id)) {
+error_setg(errp, "Can't create CPU with id %d in KVM", vcpu_dt_id);
+error_append_hint(errp, "Adjust the number of cpus to %d "
+  "or try to raise the number of threads per core\n",
+  vcpu_dt_id * smp_threads / max_smt);
+return;
+}
+
+cpu->cpu_dt_id = vcpu_dt_id;
 }
 
 PowerPCCPU *ppc_cpu_init(const char *cpu_model, int cpu_index)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 6706787b41a1..a54845a5be8f 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -9515,23 +9515,6 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error 
**errp)
 PowerPCCPU *cpu = POWERPC_CPU(dev);
 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 Error *local_err = NULL;
-#if !defined(CONFIG_USER_ONLY)
-int max_smt = kvmppc_smt_threads();
-#endif
-
-#if !defined(CONFIG_USER_ONLY)
-if (smp_threads > max_smt) {
-error_setg(errp, "Cannot support more than %d threads on PPC with %s",
-   max_smt, kvm_enabled() ? "KVM" : "TCG");
-return;
-}
-if (!is_power_of_2(smp_threads)) {
-error_setg(errp, "Cannot support %d threads on PPC with %s, "
-   "threads count must be a power of 2.",
-   smp_threads, kvm_enabled() ? "KVM" : "TCG");
-return;
-}
-#endif
 
 cpu_exec_init(cs, &local_err);
 if (local_err != NULL) {
@@ -9539,19 +9522,6 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error 
**errp)
 return;
 }
 
-#if !defined(CONFIG_USER_ONLY)
-cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
-+ (cs->cpu_index % smp_threads);
-
-if (kvm_enabled() && !kvm_vcpu_id_is_valid(cpu->cpu_dt_id)) {
-error_setg(errp, "Can't create CPU with id %d in KVM", cpu->cpu_dt_id);
-error_append_hint(errp, "Adjust the number of cpus to %d "
-  "or try to raise the number of threads per core\n",
-  cpu->cpu_dt_id * smp_threads / max_smt);
-return;
-}
-#endif
-
 if (tcg_enabled()) {
 if (ppc_fixup_cpu(cpu) != 0) {
 error_setg(errp, "Unable to emulate selected CPU with TCG");

[Qemu-devel] [PATCH v2 3/7] ppc: different creation paths for cpus in system and user mode

2016-07-01 Thread Greg Kurz

The machine code currently uses the same cpu_ppc_init() function to
create cpus as the user mode. This function also triggers the cpu
realization.

It is okay for user mode but with system mode we may want to do other
things between initialization and realization, like generating cpu
ids for the DT for example.

With this patch, each mode has its own creation helper:
- ppc_cpu_init() is for system mode only
- cpu_init() is for user mode only

Suggested-by: Igor Mammedov 
Signed-off-by: Greg Kurz 
---
 hw/ppc/e500.c   |2 +-
 hw/ppc/mac_newworld.c   |2 +-
 hw/ppc/mac_oldworld.c   |2 +-
 hw/ppc/ppc.c|5 +
 hw/ppc/ppc440_bamboo.c  |2 +-
 hw/ppc/ppc4xx_devs.c|2 +-
 hw/ppc/prep.c   |2 +-
 hw/ppc/spapr.c  |2 +-
 hw/ppc/virtex_ml507.c   |2 +-
 include/hw/ppc/ppc.h|1 +
 target-ppc/cpu.h|5 +++--
 target-ppc/translate_init.c |5 -
 12 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 0cd534df55f8..ff5d92e48dd9 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -821,7 +821,7 @@ void ppce500_init(MachineState *machine, PPCE500Params 
*params)
 CPUState *cs;
 qemu_irq *input;
 
-cpu = cpu_ppc_init(machine->cpu_model);
+cpu = ppc_cpu_init(machine->cpu_model);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to initialize CPU!\n");
 exit(1);
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 32e88b378687..6ab675c498d0 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -193,7 +193,7 @@ static void ppc_core99_init(MachineState *machine)
 #endif
 }
 for (i = 0; i < smp_cpus; i++) {
-cpu = cpu_ppc_init(machine->cpu_model);
+cpu = ppc_cpu_init(machine->cpu_model);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to find PowerPC CPU definition\n");
 exit(1);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 447948746b1a..77fbdfffd4e2 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -113,7 +113,7 @@ static void ppc_heathrow_init(MachineState *machine)
 if (machine->cpu_model == NULL)
 machine->cpu_model = "G3";
 for (i = 0; i < smp_cpus; i++) {
-cpu = cpu_ppc_init(machine->cpu_model);
+cpu = ppc_cpu_init(machine->cpu_model);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to find PowerPC CPU definition\n");
 exit(1);
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index e4252528a69d..dc3d214009c5 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1350,3 +1350,8 @@ PowerPCCPU *ppc_get_vcpu_by_dt_id(int cpu_dt_id)
 
 return NULL;
 }
+
+PowerPCCPU *ppc_cpu_init(const char *cpu_model)
+{
+return POWERPC_CPU(cpu_generic_init(TYPE_POWERPC_CPU, cpu_model));
+}
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index 5c535b18a20d..7f22433c8e91 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -186,7 +186,7 @@ static void bamboo_init(MachineState *machine)
 if (machine->cpu_model == NULL) {
 machine->cpu_model = "440EP";
 }
-cpu = cpu_ppc_init(machine->cpu_model);
+cpu = ppc_cpu_init(machine->cpu_model);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to initialize CPU!\n");
 exit(1);
diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c
index e7f413e49d08..94a24243af70 100644
--- a/hw/ppc/ppc4xx_devs.c
+++ b/hw/ppc/ppc4xx_devs.c
@@ -56,7 +56,7 @@ PowerPCCPU *ppc4xx_init(const char *cpu_model,
 CPUPPCState *env;
 
 /* init CPUs */
-cpu = cpu_ppc_init(cpu_model);
+cpu = ppc_cpu_init(cpu_model, 0);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to find PowerPC %s CPU definition\n",
 cpu_model);
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 054af1e8b481..e62fe643f492 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -509,7 +509,7 @@ static void ppc_prep_init(MachineState *machine)
 if (machine->cpu_model == NULL)
 machine->cpu_model = "602";
 for (i = 0; i < smp_cpus; i++) {
-cpu = cpu_ppc_init(machine->cpu_model);
+cpu = ppc_cpu_init(machine->cpu_model);
 if (cpu == NULL) {
 fprintf(stderr, "Unable to find PowerPC CPU definition\n");
 exit(1);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 78ebd9ee38ce..690ee486aa07 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1828,7 +1828,7 @@ static void ppc_spapr_init(MachineState *machine)
 g_free(type);
 } else {
 for (i = 0; i < smp_cpus; i++) {
-PowerPCCPU *cpu = cpu_ppc_init(machine->cpu_model);
+PowerPCCPU *cpu = ppc_cpu_init(machine->cpu_model);
 if (cpu == NULL) {
 error_report("Unable to find PowerPC CPU definition");
 exit(1);
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index b97d

[Qemu-devel] [PATCH v2 1/7] spapr: Ensure thread0 of CPU core is always realized first

2016-07-01 Thread Greg Kurz

From: Bharata B Rao 

During CPU core realization, we create all the thread objects and parent
them to the core object in a loop. However, the realization of thread
objects is done separately by walking the threads of a core using
object_child_foreach(). With this, there is no guarantee on the order
in which the child thread objects get realized. Since CPU device tree
properties are currently derived from the CPU thread object, we assume
thread0 of the core to be the representative thread of the core when
creating device tree properties for the core. If thread0 is not the
first thread that gets realized, then we would end up having an
incorrect dt_id for the core and this causes hotplug failures from
the guest.

Fix this by realizing each thread object by walking the core's thread
object list thereby ensuring that thread0 and other threads are always
realized in the correct order.

Future TODO: CPU DT nodes are per-core properties and we should
ideally base the creation of CPU DT nodes on core objects rather than
the thread objects.

Signed-off-by: Bharata B Rao 
Reviewed-by: Greg Kurz 
Signed-off-by: Greg Kurz 
---
 hw/ppc/spapr_cpu_core.c |   29 -
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index a384db5204ac..70b6b0b5ee17 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -259,9 +259,9 @@ out:
 error_propagate(errp, local_err);
 }
 
-static int spapr_cpu_core_realize_child(Object *child, void *opaque)
+static void spapr_cpu_core_realize_child(Object *child, Error **errp)
 {
-Error **errp = opaque, *local_err = NULL;
+Error *local_err = NULL;
 sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 CPUState *cs = CPU(child);
 PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -269,15 +269,14 @@ static int spapr_cpu_core_realize_child(Object *child, 
void *opaque)
 object_property_set_bool(child, true, "realized", &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
-return 1;
+return;
 }
 
 spapr_cpu_init(spapr, cpu, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
-return 1;
+return;
 }
-return 0;
 }
 
 static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
@@ -287,13 +286,13 @@ static void spapr_cpu_core_realize(DeviceState *dev, 
Error **errp)
 const char *typename = object_class_get_name(sc->cpu_class);
 size_t size = object_type_get_instance_size(typename);
 Error *local_err = NULL;
-Object *obj;
-int i;
+void *obj;
+int i, j;
 
 sc->threads = g_malloc0(size * cc->nr_threads);
 for (i = 0; i < cc->nr_threads; i++) {
 char id[32];
-void *obj = sc->threads + i * size;
+obj = sc->threads + i * size;
 
 object_initialize(obj, size, typename);
 snprintf(id, sizeof(id), "thread[%d]", i);
@@ -303,12 +302,16 @@ static void spapr_cpu_core_realize(DeviceState *dev, 
Error **errp)
 }
 object_unref(obj);
 }
-object_child_foreach(OBJECT(dev), spapr_cpu_core_realize_child, 
&local_err);
-if (local_err) {
-goto err;
-} else {
-return;
+
+for (j = 0; j < cc->nr_threads; j++) {
+obj = sc->threads + j * size;
+
+spapr_cpu_core_realize_child(obj, &local_err);
+if (local_err) {
+goto err;
+}
 }
+return;
 
 err:
 while (--i >= 0) {

[Qemu-devel] [PATCH v2 6/7] spapr: use ppc_set_vcpu_dt_id() in CPU hotplug code

2016-07-01 Thread Greg Kurz

Starting with version 2.7, pseries machine now support hotplug of
cpu cores. The implementation requires to open code cpu creation
and thus does not call ppc_cpu_init().

This patch does all the plumbing to allow pseries machine types
with version >= 2.7 to generate cpu DT ids out of the indexes
of the cores and threads in their respective arrays.

Suggested-by: Igor Mammedov 
Signed-off-by: Greg Kurz 
---
 hw/ppc/ppc.c|2 +-
 hw/ppc/spapr_cpu_core.c |   11 +--
 include/hw/ppc/ppc.h|1 +
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index dbc8ac7b3a9b..12de255fb211 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1352,7 +1352,7 @@ PowerPCCPU *ppc_get_vcpu_by_dt_id(int cpu_dt_id)
 return NULL;
 }
 
-static void ppc_set_vcpu_dt_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
+void ppc_set_vcpu_dt_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
 {
 ;
 }
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 70b6b0b5ee17..475c8063f086 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -259,13 +259,20 @@ out:
 error_propagate(errp, local_err);
 }
 
-static void spapr_cpu_core_realize_child(Object *child, Error **errp)
+static void spapr_cpu_core_realize_child(Object *child, int cpu_index,
+ Error **errp)
 {
 Error *local_err = NULL;
 sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 CPUState *cs = CPU(child);
 PowerPCCPU *cpu = POWERPC_CPU(cs);
 
+ppc_set_vcpu_dt_id(cpu, cpu_index, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+
 object_property_set_bool(child, true, "realized", &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
@@ -306,7 +313,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error 
**errp)
 for (j = 0; j < cc->nr_threads; j++) {
 obj = sc->threads + j * size;
 
-spapr_cpu_core_realize_child(obj, &local_err);
+spapr_cpu_core_realize_child(obj, cc->core_id + j, &local_err);
 if (local_err) {
 goto err;
 }
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index 647451c9b6ac..a9067f45b3f4 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -107,4 +107,5 @@ enum {
 void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags);
 
 PowerPCCPU *ppc_cpu_init(const char *cpu_model, int cpu_index);
+void ppc_set_vcpu_dt_id(PowerPCCPU *cpu, int cpu_index, Error **errp);
 #endif

[Qemu-devel] [PATCH v2 2/7] ppc: simplify max_smt initialization in ppc_cpu_realizefn()

2016-07-01 Thread Greg Kurz

kvmppc_smt_threads() returns 1 if KVM is not enabled.

Signed-off-by: Greg Kurz 
---
 target-ppc/translate_init.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 843f19b748fb..a06bf50b65d4 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -9516,7 +9516,7 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error 
**errp)
 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 Error *local_err = NULL;
 #if !defined(CONFIG_USER_ONLY)
-int max_smt = kvm_enabled() ? kvmppc_smt_threads() : 1;
+int max_smt = kvmppc_smt_threads();
 #endif
 
 #if !defined(CONFIG_USER_ONLY)

[Qemu-devel] [PATCH v2 0/7] ppc: compute cpu_dt_id in the machine code

2016-07-01 Thread Greg Kurz

This series is a sequel to the discussion on a patch from Ben's powernv
patchset:

http://patchwork.ozlabs.org/patch/597153/

Indeed, since the DT is a machine abstraction, it should definitely sit
under hw/ppc and not in the target code:
- all machine types are forced to share the same numbering logic
- user mode does not need that => there are #ifdef everywhere

So this series moves all the current numbering logic to the machine
code.

The patchset was completely re-written according to Igor's valuable
suggestions. The main change is that cpu_dt_id is now computed out
of a cpu_index provided by the machine, instead of bending the code
to use cs->cpu_index.

I resend Bharata's patch without any modification because patch 6 is
based on it (pseries-2.7 support).

I did not see any regression on pseries-2.6, pseries-2.7 and CPU hotplug.

Migration is not impacted because the new cpu_index has the same value
as cs->cpu_index, and thus gives the same guest visible cpu_dt_id.

TODO: find a way for machines to provide their own cpu_dt_id logic.

---

Bharata B Rao (1):
  spapr: Ensure thread0 of CPU core is always realized first

Greg Kurz (6):
  ppc: simplify max_smt initialization in ppc_cpu_realizefn()
  ppc: different creation paths for cpus in system and user mode
  ppc: open code cpu creation for machine types
  ppc: introduce ppc_set_vcpu_dt_id()
  spapr: use ppc_set_vcpu_dt_id() in CPU hotplug code
  ppc: move the cpu_dt_id logic to machine code


 hw/ppc/e500.c   |2 +
 hw/ppc/mac_newworld.c   |2 +
 hw/ppc/mac_oldworld.c   |2 +
 hw/ppc/ppc.c|   78 +++
 hw/ppc/ppc440_bamboo.c  |2 +
 hw/ppc/ppc4xx_devs.c|2 +
 hw/ppc/prep.c   |2 +
 hw/ppc/spapr.c  |2 +
 hw/ppc/spapr_cpu_core.c |   36 +---
 hw/ppc/virtex_ml507.c   |2 +
 include/hw/ppc/ppc.h|2 +
 target-ppc/cpu.h|5 ++-
 target-ppc/translate_init.c |   35 ---
 13 files changed, 114 insertions(+), 58 deletions(-)

--
Greg

Re: [Qemu-devel] [Bug 1594239] Re: After adding more scsi disks for Aarch64 virtual machine, start the VM and got Qemu Error

2016-07-01 Thread Tom Hanson

We may be saying the same thing, but I'd word it differently.  If a
 "device" has a "path" then it gets a se->compat (compatibility?) record.
   -  Within that record each device gets an instance_id value based on its
name.  Multiple IDs for the same name are allowed.
   -  At the "se" level each device also gets an instance id but now based
on path + name.  There can only be one instance for that combination which
requires that the path must be unique for each device name.

In this case both SCSI device have the path "0:0:0" (chan:id:lun) which
violates the above requirement.

Looking at the debug info I noticed that for "virtio-net" the (PCI) path is
not all zeroes (:00:01.0).  Makes me wonder if maybe something on the
SCSI side of things should be generating valid paths.

Still digging.

On 1 July 2016 at 09:08, Dr. David Alan Gilbert 
wrote:

> Yeh I *think* the idea is that you either:
>  a) have an instance_id
> or
>  b) have a unique name
>  in which case you're also allowed to have an old compatibility
> name/instance_id to work with old code that didn't have a unique name
> (that's in se->compat)
>
> so the assert is:
>assert(!se->compat || se->instance_id == 0);
>
>  The !se->compat  corresponds to (a)
>  se->instance_id == 0 corresponds to (b)
>
> Having a unique name is a very good idea for hotplug - it lets you
> unplug the middle one and still receive a migration correctly.
>
> Dave
>
> --
> You received this bug notification because you are subscribed to the bug
> report.
> https://bugs.launchpad.net/bugs/1594239
>
> Title:
>   After adding more scsi disks for Aarch64 virtual machine, start the VM
>   and got Qemu Error
>
> Status in QEMU:
>   Confirmed
>
> Bug description:
>   Description
>   ===
>   Using virt-manager to create a VM in Aarch64, Ubuntu 16.04.
>   Add scsi disk to the VM. After add four or more scsi disks, start the VM
> and will got Qemu error.
>
>   Steps to reproduce
>   ==
>   1.Use virt-manager to create a VM.
>   2.After the VM is started, add scsi disk to the VM. They will be
> allocated to "sdb,sdc,sdd." .
>   3.If we got a disk name > sdg, virt-manager will also assign a
> virtio-scsi controller for this disk.And the VM will be shutdown.
>   4.Start the VM, will see the error log.
>
>
>   Expected result
>   ===
>   Start the vm smoothly.The added disks can work.
>
>   Actual result
>   =
>   Got the error:
>   starting domain: internal error: process exited while connecting to
> monitor: qemu-system-aarch64:
> /build/qemu-zxCwKP/qemu-2.5+dfsg/migration/savevm.c:620:
> vmstate_register_with_alias_id: Assertion `!se->compat || se->instance_id
> == 0' failed.
>   details=Traceback (most recent call last):
> File "/usr/share/virt-manager/virtManager/asyncjob.py", line 90, in
> cb_wrapper
>   callback(asyncjob, *args, **kwargs)
> File "/usr/share/virt-manager/virtManager/asyncjob.py", line 126, in
> tmpcb
>   callback(*args, **kwargs)
> File "/usr/share/virt-manager/virtManager/libvirtobject.py", line 83,
> in newfn
>   ret = fn(self, *args, **kwargs)
> File "/usr/share/virt-manager/virtManager/domain.py", line 1402, in
> startup
>   self._backend.create()
> File "/usr/local/lib/python2.7/dist-packages/libvirt.py", line 1035,
> in create
>   if ret == -1: raise libvirtError ('virDomainCreate() failed',
> dom=self)
>   libvirtError: internal error: process exited while connecting to
> monitor: qemu-system-aarch64:
> /build/qemu-zxCwKP/qemu-2.5+dfsg/migration/savevm.c:620:
> vmstate_register_with_alias_id: Assertion `!se->compat || se->instance_id
> == 0' failed.
>
>
>   Environment
>   ===
>   1. virt-manager version is 1.3.2
>
>   2. Which hypervisor did you use?
>   Libvirt+KVM
>   $ kvm --version
>   QEMU emulator version 2.5.0 (Debian 1:2.5+dfsg-5ubuntu10.1),
> Copyright (c) 2003-2008 Fabrice Bellard
>   $ libvirtd --version
>   libvirtd (libvirt) 1.3.1
>
>   3. Which storage type did you use?
>  In the host file system,all in one physics machine.
>   stack@u202154:/opt/stack/nova$ df -hl
>   Filesystem Size Used Avail Use% Mounted on
>   udev 7.8G 0 7.8G 0% /dev
>   tmpfs 1.6G 61M 1.6G 4% /run
>   /dev/sda2 917G 41G 830G 5% /
>   tmpfs 7.9G 0 7.9G 0% /dev/shm
>   tmpfs 5.0M 0 5.0M 0% /run/lock
>   tmpfs 7.9G 0 7.9G 0% /sys/fs/cgroup
>   /dev/sda1 511M 888K 511M 1% /boot/efi
>   cgmfs 100K 0 100K 0% /run/cgmanager/fs
>   tmpfs 1.6G 0 1.6G 0% /run/user/1002
>   tmpfs 1.6G 0 1.6G 0% /run/user/1000
>   tmpfs 1.6G 0 1.6G 0% /run/user/0
>
>   4. Environment information:
>  Architecture : AARCH64
>  OS: Ubuntu 16.04
>
>   The Qemu commmand of libvirt is :
>   2016-06-20 02:39:46.561+: starting up libvirt version: 1.3.1,
> package: 1ubuntu10 (William Grant  Fri, 15 Apr 2016
> 12:08:21 +1000), qemu version: 2.5.0 (Debian 1:2.5+dfsg-5ubuntu10.1),
> hostname: u202154
>   LC_ALL=C
> PATH=/usr/loca

Re: [Qemu-devel] Expensive emulation of CPU condition flags

2016-07-01 Thread Richard Henderson


On 06/30/2016 11:13 AM, Shuang Zhai wrote:

We wonder if there exists any optimization, e.g., directly mapping the
frontend flags to that of the backend? Any suggestions are appreciated.


Directly mapping frontend to backend flags is a non-starter, since not all 
backends have those flags.


There are alternate methods of emulating condition codes.  As an example, 
target-i386 and target-sparc store two values and an "operation code" value. 
The latter indicates how to treat the former.  This allows for the full 
computation of the flags to be delayed, and for the host compare-and-branch to 
be less complicated.


See also my design for an improved m68k condition code scheme:

  http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg00501.html
especially
  http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg00524.html

You're welcome to experiment with target-arm.  If you can create a scheme that 
performs better than the current, we'd like to hear about it.



r~

Re: [Qemu-devel] [PATCH v3 1/1] target-arm: Use Neon for zero checking

2016-07-01 Thread Richard Henderson


On 06/30/2016 06:45 AM, Peter Maydell wrote:

On 29 June 2016 at 09:47,   wrote:

From: Vijay 

Use Neon instructions to perform zero checking of
buffer. This is helps in reducing total migration time.



diff --git a/util/cutils.c b/util/cutils.c
index 5830a68..4779403 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -184,6 +184,13 @@ int qemu_fdatasync(int fd)
 #define SPLAT(p)   _mm_set1_epi8(*(p))
 #define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0x)
 #define VEC_OR(v1, v2) (_mm_or_si128(v1, v2))
+#elif __aarch64__
+#include "arm_neon.h"
+#define VECTYPEuint64x2_t
+#define ALL_EQ(v1, v2) \
+((vgetq_lane_u64(v1, 0) == vgetq_lane_u64(v2, 0)) && \
+ (vgetq_lane_u64(v1, 1) == vgetq_lane_u64(v2, 1)))
+#define VEC_OR(v1, v2) ((v1) | (v2))


Should be '#elif defined(__aarch64__)'. I have made this
tweak and put this patch in target-arm.next.


Consider

#define VECTYPEuint32x4_t
#define ALL_EQ(v1, v2) (vmaxvq_u32((v1) ^ (v2)) == 0)


which compiles down to

  1c:   6e211c00eor v0.16b, v0.16b, v1.16b
  20:   6eb0a800umaxv   s0, v0.4s
  24:   1e26fmovw0, s0
  28:   6b1f001fcmp w0, wzr
  2c:   1a9f17e0csetw0, eq
  30:   d65f03c0ret

vs

  34:   4e083c20mov x0, v1.d[0]
  38:   4e083c01mov x1, v0.d[0]
  3c:   eb3fcmp x1, x0
  40:   5280mov w0, #0
  44:   5440b.eq4c 
  48:   d65f03c0ret
  4c:   4e183c20mov x0, v1.d[1]
  50:   4e183c01mov x1, v0.d[1]
  54:   eb3fcmp x1, x0
  58:   1a9f17e0csetw0, eq
  5c:   d65f03c0ret


r~

Re: [Qemu-devel] [PATCH v5 9/9] tests: add a m25p80 test

2016-07-01 Thread Greg Kurz

On Fri, 1 Jul 2016 19:30:30 +0200
Cédric Le Goater  wrote:

> On 07/01/2016 07:18 PM, Peter Maydell wrote:
> > On 28 June 2016 at 19:24, Cédric Le Goater  wrote:  
> >> This test uses the palmetto platform and the AST2400 SPI controller to
> >> test the m25p80 flash module device model. The flash model is defined
> >> by the platform (n25q256a) and it would be nice to find way to control
> >> it, using a property probably.
> >>
> >> Signed-off-by: Cédric Le Goater 
> >> Reviewed-by: Peter Maydell 
> >> ---
> >>  
> > 
> > This test fails on ppc64be:
> > 
> > TEST: tests/m25p80-test... (pid=65123)
> >   /arm/m25p80/read_jedec:  OK
> >   /arm/m25p80/erase_sector:OK
> >   /arm/m25p80/erase_all:   **
> > ERROR:/home/pm215/qemu/tests/m25p80-test.c:162:test_erase_all:
> > assertion failed (page[i] == 0x0):
> > (0x == 0x)
> > FAIL
> > GTester: last random seed: R02S54b2016fda21b092e18d7a23a2db86ba
> > (pid=65128)
> >   /arm/m25p80/write_page:  **
> > ERROR:/home/pm215/qemu/tests/m25p80-test.c:200:test_write_page:
> > assertion failed (page[i] == my_page_addr + i * 4): (0x ==
> > 0x0140)
> > FAIL
> > GTester: last random seed: R02S8708910d6b72f700bc41e9340a516239
> > (pid=65133)
> > FAIL: tests/m25p80-test  
> 
> yes ... I am not sure how to fix this :/ 
> 
> I started with a patch using qtest_big_endian() and I found that 
> this one was fixing the problem : 
> 
>   https://lists.gnu.org/archive/html/qemu-devel/2016-06/msg07876.html
> 
> but it feels wrong. The interesting part is that the guest fully 
> boots on a ppc64be. We need an endian shaman for this. Greg ? 
> 

Heh ! Looking at the Cc list I guess you have chances to find the
shaman you're looking for :)

Anyway, doing bswap32() systematically like in the patch mentioned above
looks weird indeed.

> Thanks,
> 
> C.

Re: [Qemu-devel] [PATCH 01/18] Use #include "..." for our own headers, <...> for others

2016-07-01 Thread Eric Blake

On 06/29/2016 09:55 AM, Markus Armbruster wrote:
> Tracked down with an ugly, brittle and probably buggy Perl script.
> 
> Also move includes converted to <...> up so they get included before
> ours where that's obviously okay.
> 
> Signed-off-by: Markus Armbruster 
> ---

> +++ b/qga/vss-win32/provider.cpp
> @@ -12,8 +12,8 @@
>  
>  #include "qemu/osdep.h"
>  #include "vss-common.h"
> -#include "inc/win2003/vscoordint.h"
> -#include "inc/win2003/vsprov.h"
> +#include 
> +#include 

Worth hoisting <> before "" in this file?

>  
>  #define VSS_TIMEOUT_MSEC (60*1000)
>  
> diff --git a/qga/vss-win32/requester.cpp b/qga/vss-win32/requester.cpp
> index 889052d..0cd2f0e 100644
> --- a/qga/vss-win32/requester.cpp
> +++ b/qga/vss-win32/requester.cpp
> @@ -13,8 +13,8 @@
>  #include "qemu/osdep.h"
>  #include "vss-common.h"
>  #include "requester.h"
> -#include "inc/win2003/vswriter.h"
> -#include "inc/win2003/vsbackup.h"
> +#include 
> +#include 

Ditto.


> +++ b/tests/tcg/xtensa/linker.ld.S
> @@ -1,4 +1,4 @@
> -#include 
> +#include "core-isa.h"
>  

Just making sure that "" vs. <> isn't going to hurt assembler files,
since the rules on preprocessing those may be different.

Reviewed-by: Eric Blake 
Tested-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 6/6] coroutine: reduce stack size to 64kB

2016-07-01 Thread Richard Henderson


On 06/30/2016 12:37 AM, Peter Lieven wrote:

evaluation with the recently introduced maximum stack usage monitoring revealed
that the actual used stack size was never above 4kB so allocating 1MB stack
for each coroutine is a lot of wasted memory. So reduce the stack size to
64kB which should still give enough head room.

Signed-off-by: Peter Lieven 
---
 include/qemu/coroutine_int.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
index eac323a..f84d777 100644
--- a/include/qemu/coroutine_int.h
+++ b/include/qemu/coroutine_int.h
@@ -28,7 +28,7 @@
 #include "qemu/queue.h"
 #include "qemu/coroutine.h"

-#define COROUTINE_STACK_SIZE (1 << 20)
+#define COROUTINE_STACK_SIZE (1 << 16)

 typedef enum {
 COROUTINE_YIELD = 1,



Ought we check that this is not smaller than

sysconf(_SC_THREAD_STACK_MIN)

which (for glibc at least), is 192k for ia64, 128k for aarch64, mips and tile 
(though why it is quite so high in those later cases I don't know).



r~

Re: [Qemu-devel] [PATCH 4/6] coroutine-sigaltstack: use helper for allocating stack memory

2016-07-01 Thread Richard Henderson


On 06/30/2016 12:37 AM, Peter Lieven wrote:

Signed-off-by: Peter Lieven 
---
 util/coroutine-sigaltstack.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)



Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 5/6] oslib-posix: add a configure switch to debug stack usage

2016-07-01 Thread Richard Henderson


On 06/30/2016 12:37 AM, Peter Lieven wrote:

+for (ptr2 = ptr; ptr2 < ptr + sz; ptr2 += sizeof(u_int32_t)) {
+*(u_int32_t *)ptr2 = 0xdeadbeaf;
+}


s/u_int32_t/uint32_t/g


r~

Re: [Qemu-devel] [PATCH 3/6] coroutine-ucontext: use helper for allocating stack memory

2016-07-01 Thread Richard Henderson


On 06/30/2016 12:37 AM, Peter Lieven wrote:

Signed-off-by: Peter Lieven 
---
 util/coroutine-ucontext.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 2/6] coroutine: add a macro for the coroutine stack size

2016-07-01 Thread Richard Henderson


On 06/30/2016 12:37 AM, Peter Lieven wrote:

Signed-off-by: Peter Lieven 
---
 include/qemu/coroutine_int.h | 2 ++
 util/coroutine-sigaltstack.c | 2 +-
 util/coroutine-ucontext.c| 2 +-
 util/coroutine-win32.c   | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH 1/6] oslib-posix: add helpers for stack alloc and free

2016-07-01 Thread Richard Henderson


On 07/01/2016 01:12 PM, Richard Henderson wrote:

On 06/30/2016 12:37 AM, Peter Lieven wrote:

+void *qemu_alloc_stack(size_t sz)
+{
+/* allocate sz bytes plus one extra page for a guard
+ * page at the bottom of the stack */
+void *ptr = mmap(NULL, sz + getpagesize(), PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+if (ptr == MAP_FAILED) {
+abort();
+}
+if (mmap(ptr + getpagesize(), sz, PROT_READ | PROT_WRITE,
+MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) == MAP_FAILED) {
+abort();
+}


Rare platforms now, but fwiw, this is incorrect for hppa and ia64.

For hppa, stack grows up, so the guard page needs to be at the top.

For ia64, there are two stacks, the "normal" program stack (grows down) and the 
register window stack (grows up).  The guard page goes in between.


See e.g. glibc/nptl/allocatestack.c

#ifdef NEED_SEPARATE_REGISTER_STACK
  char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
#elif _STACK_GROWS_DOWN
  char *guard = mem;
#elif _STACK_GROWS_UP
  char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
#endif
  if (mprotect (guard, guardsize, PROT_NONE) != 0)



r~

Re: [Qemu-devel] [PATCH 1/6] oslib-posix: add helpers for stack alloc and free

2016-07-01 Thread Richard Henderson


On 06/30/2016 12:37 AM, Peter Lieven wrote:

+void *qemu_alloc_stack(size_t sz)
+{
+/* allocate sz bytes plus one extra page for a guard
+ * page at the bottom of the stack */
+void *ptr = mmap(NULL, sz + getpagesize(), PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+if (ptr == MAP_FAILED) {
+abort();
+}
+if (mmap(ptr + getpagesize(), sz, PROT_READ | PROT_WRITE,
+MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) == MAP_FAILED) {
+abort();
+}


Why two mmap instead of mmap + mprotect?


r~

Re: [Qemu-devel] [PATCH 00/18] Clean up #include "..." vs "<...>" and header guards

2016-07-01 Thread Richard Henderson


On 06/29/2016 08:55 AM, Markus Armbruster wrote:

This series takes a good swing at two annoyances:

* We sometimes use #include "..." even for system headers, and <...>
  for our own headers.  Makes spotting the system headers harder, and
  can be confusing.  PATCH 01 cleans this up.  You've seen it before
  as "[PATCH RFC v2 1/5] Use #include "..." exactly for our own
  headers", but I've since replaced the bugs in my Perl script by less
  stupid ones, which made the patch grow.

* Our use of header guards is rather sloppy.  Sloppiness there can
  lead to confusing compilation errors.  The rest of the series cleans
  up existing header guards.  In particular, it normalizes guard
  symbols to follow a common pattern, in the hope of making clashes
  less likely.  It doesn't add new header guards.  We have almost 150
  headers without a recognizable header guard.  A few of them are for
  multiple inclusion, a few more don't need header guards because they
  don't do anything but include, but the majority probably should have
  one.  Left for another day.

Markus Armbruster (18):
  Use #include "..." for our own headers, <...> for others
  scripts: New clean-header-guards.pl
  target-*: Clean up cpu.h header guards
  linux-user: Clean up target_syscall.h header guards
  linux-user: Clean up target_cpu.h header guards
  linux-user: Clean up target_signal.h header guards
  linux-user: Clean up target_structs.h header guards
  linux-user: Clean up hostdep.h header guards
  linux-user: Fix broken header guard in syscall_defs.h
  tcg: Clean up tcg-target.h header guards
  spapr_pci: Include spapr.h instead of playing games with #error
  Drop Emacs local variables lists redundant with .dir-locals.el
  Clean up header guards that don't match their file name
  libdecnumber: Don't fool around with guards to avoid #include
  libdecnumber: Don't error out on decNumberLocal.h re-inclusion
  Clean up ill-advised or unusual header guards
  Clean up decorations and whitespace around header guards
  cris: Fix broken header guard in hw/cris/boot.h


Reviewed-by: Richard Henderson 


r~

[Qemu-devel] [Bug 1297218] Re: guest hangs after live migration due to tsc jump

2016-07-01 Thread Serge Hallyn

Conflicting experimental packages in that ppa, trying ubuntu-virt/ppa
instead.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1297218

Title:
  guest hangs after live migration due to tsc jump

Status in QEMU:
  New
Status in glusterfs package in Ubuntu:
  Invalid
Status in qemu package in Ubuntu:
  Fix Released
Status in glusterfs source package in Trusty:
  Confirmed
Status in qemu source package in Trusty:
  Confirmed

Bug description:
  We have two identical Ubuntu servers running libvirt/kvm/qemu, sharing
  a Gluster filesystem. Guests can be live migrated between them.
  However, live migration often leads to the guest being stuck at 100%
  for a while. In that case, the dmesg output for such a guest will show
  (once it recovers): Clocksource tsc unstable (delta = 662463064082
  ns). In this particular example, a guest was migrated and only after
  11 minutes (662 seconds) did it become responsive again.

  It seems that newly booted guests doe not suffer from this problem,
  these can be migrated back and forth at will. After a day or so, the
  problem becomes apparent. It also seems that migrating from server A
  to server B causes much more problems than going from B back to A. If
  necessary, I can do more measurements to qualify these observations.

  The VM servers run Ubuntu 13.04 with these packages:
  Kernel: 3.8.0-35-generic x86_64
  Libvirt: 1.0.2
  Qemu: 1.4.0
  Gluster-fs: 3.4.2 (libvirt access the images via the filesystem, not using 
libgfapi yet as the Ubuntu libvirt is not linked against libgfapi).
  The interconnect between both machines (both for migration and gluster) is 
10GbE. 
  Both servers are synced to NTP and well within 1ms form one another.

  Guests are either Ubuntu 13.04 or 13.10.

  On the guests, the current_clocksource is kvm-clock.
  The XML definition of the guests only contains:   

  Now as far as I've read in the documentation of kvm-clock, it specifically 
supports live migrations, so I'm a bit surprised at these problems. There isn't 
all that much information to find on these issue, although I have found 
postings by others that seem to have run into the same issues, but without a 
solution.
  --- 
  ApportVersion: 2.14.1-0ubuntu3
  Architecture: amd64
  DistroRelease: Ubuntu 14.04
  Package: libvirt (not installed)
  ProcCmdline: BOOT_IMAGE=/boot/vmlinuz-3.13.0-24-generic 
root=UUID=1b0c3c6d-a9b8-4e84-b076-117ae267d178 ro console=ttyS1,115200n8 
BOOTIF=01-00-25-90-75-b5-c8
  ProcVersionSignature: Ubuntu 3.13.0-24.47-generic 3.13.9
  Tags:  trusty apparmor apparmor apparmor apparmor apparmor
  Uname: Linux 3.13.0-24-generic x86_64
  UpgradeStatus: No upgrade log present (probably fresh install)
  UserGroups:
   
  _MarkForUpload: True
  modified.conffile..etc.default.libvirt.bin: [modified]
  modified.conffile..etc.libvirt.libvirtd.conf: [modified]
  modified.conffile..etc.libvirt.qemu.conf: [modified]
  modified.conffile..etc.libvirt.qemu.networks.default.xml: [deleted]
  mtime.conffile..etc.default.libvirt.bin: 2014-05-12T19:07:40.020662
  mtime.conffile..etc.libvirt.libvirtd.conf: 2014-05-13T14:40:25.894837
  mtime.conffile..etc.libvirt.qemu.conf: 2014-05-12T18:58:27.885506

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1297218/+subscriptions

[Qemu-devel] [Bug 1297218] Re: guest hangs after live migration due to tsc jump

2016-07-01 Thread Serge Hallyn

Thank you.  I'm doing a test build in ppa:serge-hallyn/virt, and will
run a full regression test from there.  I'll push for SRU if that
passes.

Would you mind putting in the bug Description (at top) a concise summary
of the test case, for the SRU process?

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1297218

Title:
  guest hangs after live migration due to tsc jump

Status in QEMU:
  New
Status in glusterfs package in Ubuntu:
  Invalid
Status in qemu package in Ubuntu:
  Fix Released
Status in glusterfs source package in Trusty:
  Confirmed
Status in qemu source package in Trusty:
  Confirmed

Bug description:
  We have two identical Ubuntu servers running libvirt/kvm/qemu, sharing
  a Gluster filesystem. Guests can be live migrated between them.
  However, live migration often leads to the guest being stuck at 100%
  for a while. In that case, the dmesg output for such a guest will show
  (once it recovers): Clocksource tsc unstable (delta = 662463064082
  ns). In this particular example, a guest was migrated and only after
  11 minutes (662 seconds) did it become responsive again.

  It seems that newly booted guests doe not suffer from this problem,
  these can be migrated back and forth at will. After a day or so, the
  problem becomes apparent. It also seems that migrating from server A
  to server B causes much more problems than going from B back to A. If
  necessary, I can do more measurements to qualify these observations.

  The VM servers run Ubuntu 13.04 with these packages:
  Kernel: 3.8.0-35-generic x86_64
  Libvirt: 1.0.2
  Qemu: 1.4.0
  Gluster-fs: 3.4.2 (libvirt access the images via the filesystem, not using 
libgfapi yet as the Ubuntu libvirt is not linked against libgfapi).
  The interconnect between both machines (both for migration and gluster) is 
10GbE. 
  Both servers are synced to NTP and well within 1ms form one another.

  Guests are either Ubuntu 13.04 or 13.10.

  On the guests, the current_clocksource is kvm-clock.
  The XML definition of the guests only contains:   

  Now as far as I've read in the documentation of kvm-clock, it specifically 
supports live migrations, so I'm a bit surprised at these problems. There isn't 
all that much information to find on these issue, although I have found 
postings by others that seem to have run into the same issues, but without a 
solution.
  --- 
  ApportVersion: 2.14.1-0ubuntu3
  Architecture: amd64
  DistroRelease: Ubuntu 14.04
  Package: libvirt (not installed)
  ProcCmdline: BOOT_IMAGE=/boot/vmlinuz-3.13.0-24-generic 
root=UUID=1b0c3c6d-a9b8-4e84-b076-117ae267d178 ro console=ttyS1,115200n8 
BOOTIF=01-00-25-90-75-b5-c8
  ProcVersionSignature: Ubuntu 3.13.0-24.47-generic 3.13.9
  Tags:  trusty apparmor apparmor apparmor apparmor apparmor
  Uname: Linux 3.13.0-24-generic x86_64
  UpgradeStatus: No upgrade log present (probably fresh install)
  UserGroups:
   
  _MarkForUpload: True
  modified.conffile..etc.default.libvirt.bin: [modified]
  modified.conffile..etc.libvirt.libvirtd.conf: [modified]
  modified.conffile..etc.libvirt.qemu.conf: [modified]
  modified.conffile..etc.libvirt.qemu.networks.default.xml: [deleted]
  mtime.conffile..etc.default.libvirt.bin: 2014-05-12T19:07:40.020662
  mtime.conffile..etc.libvirt.libvirtd.conf: 2014-05-13T14:40:25.894837
  mtime.conffile..etc.libvirt.qemu.conf: 2014-05-12T18:58:27.885506

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1297218/+subscriptions

Re: [Qemu-devel] [PATCH v9 00/10] 8bit AVR cores

2016-07-01 Thread Richard Henderson


On 07/01/2016 07:47 AM, Michael Rolnik wrote:

5. translation bug fixes for ADIW, SBIW, XOR instructions
6. propper handling of cpu register writes though memory


I don't see these changes in the patch set.


r~

Re: [Qemu-devel] Default for phys-addr-bits? (was Re: [PATCH 4/5] x86: Allow physical address bits to be set)

2016-07-01 Thread Dr. David Alan Gilbert

* Gerd Hoffmann (kra...@redhat.com) wrote:
> > So that's mapped at an address beyond host phys-bits.
> > And it hasn't failed/crashed etc - but I guess maybe nothing is using that 
> > 2G space?
> 
> root@fedora ~# dmesg | grep Surface
> [4.830095] [drm] qxl: 2048M of Surface memory size
> 
> qxl bar 4 (64bit) and qxl bar 1 (32bit) are the same thing.  The 64bit
> bar can be alot larger obviously.  The 32bit bar is just an alias for
> the first portion of the 64bit bar.  So I guess qxl just falls back to
> use bar 1 instead of bar 4 because ioremap() on bar 4 fails.
> 
> > Obviously 128T is a bit silly for maxmem at the moment, however I was 
> > worrying what
> > happens with 36/39/40bit hosts, and it's not unusual to pick a maxmem 
> > that's a few TB
> > even if the VMs you're initially creating are only a handful of GB. 
> > (oVirt/RHEV seems to use
> > a 4TB default for maxmem).
> 
> Oh, ok.  Should be fixed I guess.
> 
> > Still, this only hits as a problem if you hit the combination of:
> >a) You use large PCI bars
> 
> ovmf will map all 64bit bars high, even without running out of 32bit
> address space.  And with virtio 1.0 pretty much every virtual machine
> will have 64bit bars.

OK, yes I got that working, and you're right, it does map it high;
(with recent OVMF running virtio 1.0, that needed recent guest/host kernels)
and it does fail easily as well if memory doesn't fit, so for
example:

(all on a xeon with 46 bit physical host)

specifying maxmem=1T - upstream build is hanging - but works if I specify 
phys-bits=46
so yes, it's noticing if the guest phys-bits is too small
even if the host can manage it.

It's OK if running with small amount of RAM and phys-bits=40

maxmem=64T with any phys-bits hangs.


specifying maxmem=64T with phys-bits=46 on xeon and it hangs
specifying maxmem=64T with phys-bits=48 on xeon and it hangs
specifying maxmem=32T with phys-bits=46-48 on xeon and it works

So for example we see:
  Bus  2, device   4, function 0:
SCSI controller: PCI device 1af4:1042
  IRQ 10.
  BAR1: 32 bit memory at 0x9800 [0x98000fff].
  BAR4: 64 bit prefetchable memory at 0x2008 [0x2008007f].
  id "virtio-disk0"

  and that works nicely.

Dave

> 
> cheers,
>   Gerd
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH v2 02/15] blockjob: Decouple the ID from the device name in the BlockJob struct

2016-07-01 Thread John Snow



On 06/30/2016 09:03 AM, Alberto Garcia wrote:
> On Wed 29 Jun 2016 07:20:55 PM CEST, Max Reitz wrote:
> I thought adding a new 'ID' field was simpler. The device name is
> still a device name (where it makes sense). The default ID is
> guaranteed to be valid and guaranteed not to clash with
> user-defined IDs. The API is (in my opinion) more clear.
>
> The only problems that I can think of:
>
> - BlockJobInfo and the events expose the 'device' field which is
>   superfluous.
> - block-job-{pause,resume,...} can take an ID or a device name.

 Yes. There are two parts that I don't like about this.

 The first one is that we need additional code to keep track of the
 device name and to look it up.
>>>
>>> I think this part is negligible, but ok.
>>>
 The other, more important one is that it couples block jobs more
 tightly with a BDS:

 * What do you with a background job that doesn't have a device name
   because it doesn't work on a BDS? Will 'device' become optional
   everywhere? But how is this less problematic for compatibility than
   returning non-device-name IDs? (To be clear, I don't think either is
   a real problem, but you can hardly dismiss one and accept the
   other.)
>>>
 * And what do you do once we allow more than one job per device? Then
   the device name isn't suitable for addressing the job any more. And
   letting the client use the device name after it started the first
   job, but not any more after it started the second one, feels wrong.
>>>
>>> Fair enough. Unless Max, Eric or someone else has something else to add
>>> I'll give it a try and see how it looks.
>>
>> Sorry for the late response, but then again I don't actually have an
>> opinion either way.
>>
>> The thing I feel most strongly about is the issue of storing a general
>> ID in a field named "device". However, as Kevin hinted at this
>> becoming irrelevant with John's work on decoupling block jobs from the
>> block layer.
> 
> I actually forgot to Cc him, I'm doing it now.
> 
> The idea is that I don't want to add anything now that is going to cause
> headaches later. Adding a new 'id' field to block jobs and keeping
> 'device' feels more natural to me, but reusing the 'device' field and
> allowing any ID set by the user requires less changes both to the code
> and the API.
> 
> Berto
> 

Reviewing everything now, sorry for being MIA, and thank you for keeping
me in the loop.

--js

[Qemu-devel] [PATCH] linux-user: Add some new blk ioctls

2016-07-01 Thread Peter Maydell

Add some new blk ioctls (these are 0x12,119 through
to 0x12,127). Several of these are used by mke2fs; this silences
the warnings:

mke2fs 1.42.12 (29-Aug-2014)
Unsupported ioctl: cmd=0x127b
Unsupported ioctl: cmd=0x127a
warning: Unable to get device geometry for /dev/loop5
Unsupported ioctl: cmd=0x127c
Unsupported ioctl: cmd=0x127c
Unsupported ioctl: cmd=0x1277

Signed-off-by: Peter Maydell 
---
The only 'unsupported ioctl' gripe in the LTP output now is the expected
one for the test case which deliberately passes an unknown ioctl number.

 linux-user/ioctls.h   | 29 +
 linux-user/syscall_defs.h | 11 +++
 2 files changed, 40 insertions(+)

diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index e672655..804f099 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -80,6 +80,35 @@
  IOCTL(BLKBSZGET, IOC_R, MK_PTR(TYPE_INT))
  IOCTL_SPECIAL(BLKPG, IOC_W, do_ioctl_blkpg,
MK_PTR(MK_STRUCT(STRUCT_blkpg_ioctl_arg)))
+
+#ifdef BLKDISCARD
+ IOCTL(BLKDISCARD, IOC_W, MK_PTR(MK_ARRAY(TYPE_ULONGLONG, 2)))
+#endif
+#ifdef BLKIOMIN
+ IOCTL(BLKIOMIN, IOC_R, MK_PTR(TYPE_INT))
+#endif
+#ifdef BLKIOOPT
+ IOCTL(BLKIOOPT, IOC_R, MK_PTR(TYPE_INT))
+#endif
+#ifdef BLKALIGNOFF
+ IOCTL(BLKALIGNOFF, IOC_R, MK_PTR(TYPE_INT))
+#endif
+#ifdef BLKPBSZGET
+ IOCTL(BLKPBSZGET, IOC_R, MK_PTR(TYPE_INT))
+#endif
+#ifdef BLKDISCARDZEROES
+ IOCTL(BLKDISCARDZEROES, IOC_R, MK_PTR(TYPE_INT))
+#endif
+#ifdef BLKSECDISCARD
+ IOCTL(BLKSECDISCARD, IOC_W, MK_PTR(MK_ARRAY(TYPE_ULONGLONG, 2)))
+#endif
+#ifdef BLKROTATIONAL
+ IOCTL(BLKROTATIONAL, IOC_R, MK_PTR(TYPE_SHORT))
+#endif
+#ifdef BLKZEROOUT
+ IOCTL(BLKZEROOUT, IOC_W, MK_PTR(MK_ARRAY(TYPE_ULONGLONG, 2)))
+#endif
+
 #ifdef FIBMAP
  IOCTL(FIBMAP, IOC_W | IOC_R, MK_PTR(TYPE_LONG))
 #endif
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index dce1bcc..6650e26 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -986,6 +986,17 @@ struct target_pollfd {
 #define TARGET_BLKGETSIZE64 TARGET_IOR(0x12,114,abi_ulong)
  /* return device size in bytes
 (u64 *arg) */
+
+#define TARGET_BLKDISCARD TARGET_IO(0x12, 119)
+#define TARGET_BLKIOMIN TARGET_IO(0x12, 120)
+#define TARGET_BLKIOOPT TARGET_IO(0x12, 121)
+#define TARGET_BLKALIGNOFF TARGET_IO(0x12, 122)
+#define TARGET_BLKPBSZGET TARGET_IO(0x12, 123)
+#define TARGET_BLKDISCARDZEROES TARGET_IO(0x12, 124)
+#define TARGET_BLKSECDISCARD TARGET_IO(0x12, 125)
+#define TARGET_BLKROTATIONAL TARGET_IO(0x12, 126)
+#define TARGET_BLKZEROOUT TARGET_IO(0x12, 127)
+
 #define TARGET_FIBMAP TARGET_IO(0x00,1)  /* bmap access */
 #define TARGET_FIGETBSZ   TARGET_IO(0x00,2)  /* get the block size used for 
bmap */
 #define TARGET_FS_IOC_FIEMAP TARGET_IOWR('f',11,struct fiemap)
-- 
1.9.1

Re: [Qemu-devel] [SeaBIOS] [RFC PATCH 2/2] serial console, input

2016-07-01 Thread Kevin O'Connor

On Fri, Jul 01, 2016 at 01:07:39PM -0400, Kevin O'Connor wrote:
> If I understand correctly, most keys are sent on the serial port as
> single bytes, but there are a few keys that are sent as multi-byte
> sequences.  There's a lot of complexity to implement buffering for
> that unusual case.  I wonder if the buffer could be avoided - I played
> with it a little and came up with the below (totally untested).  I'm
> not sure if it's an improvement.

The version below might be slightly easier to understand (still
totally untested).

-Kevin


u8 multibyte_read_count VARLOW;
u8 multibyte_read_pos VARLOW;

void
sercon_check_event(void)
{
u16 addr = GET_LOW(sercon_port);
...

// read and process data
int readdata = 0;
while (inb(addr + SEROFF_LSR) & 0x01) {
u8 byte = inb(addr + SEROFF_DATA);
readdata = 1;
int ret = sercon_check_multibyte(byte);
if (ret)
// byte part of multi-byte sequence
continue;
if (byte == 0x1b) {
// Start multi-byte sequence check
SET_LOW(multibyte_read_count, 1);
continue;
}
// Send normal key
sercon_sendkey(GET_LOW(termchr[chr].scancode), 
GET_LOW(termchr[chr].flags));
}

if (!readdata && GET_LOW(multibyte_read_count))
// Too long to read multi-byte sequence - must flush
dump_multibyte_sequence();
}

static int
sercon_check_multibyte(u8 byte)
{
int mb_count = GET_LOW(multibyte_read_count);
if (!mb_count)
// Not in a multi-byte sequence
return 0;
int mb_pos = GET_LOW(multibyte_read_pos);
while (GET_GLOBAL(termseq[mb_pos].seq[mb_count-1]) != byte) {
// Byte didn't match this sequence - find a sequence that does
mb_pos++;
if (mb_pos >= ARRAY_SIZE(termseq)
|| memcmp_far(GLOBAL_SEG, termseq[mb_pos-1].seq
  , GLOBAL_SEG, termseq[mb_pos].seq, mb_count-1) != 0)
// No match - must flush previusly queued keys
dump_multibyte_sequence();
return 0;
}
}
mb_count++;
if (!GET_GLOBAL(termseq[mb_pos].seq[mb_count-1])) {
// sequence complete - send key
sercon_sendkey(GET_GLOBAL(termseq[seq].scancode), 0);
mb_count = mb_pos = 0;
}
SET_LOW(multibyte_read_count, mb_count);
SET_LOW(multibyte_read_pos, mb_pos);
return 1;
}

static void
dump_multibyte_sequence(void)
{
sercon_sendkey(GET_LOW(termchr[0x1b].scancode), 
GET_LOW(termchr[0x1b].flags));
int i, mb_count = GET_LOW(multibyte_read_count);
for (i=0; i

[Qemu-devel] [PULL 4/4] tcg: Improve the alignment check infrastructure

2016-07-01 Thread Richard Henderson

From: Sergey Sorokin 

Some architectures (e.g. ARMv8) need the address which is aligned
to a size more than the size of the memory access.
To support such check it's enough the current costless alignment
check implementation in QEMU, but we need to support
an alignment size specifying.

Signed-off-by: Sergey Sorokin 
Message-Id: <1466705806-679898-1-git-send-email-afaral...@yandex.ru>
Signed-off-by: Richard Henderson 
[rth: Assert in tcg_canonicalize_memop.  Leave get_alignment_bits
available for, though unused by, user-mode.  Retain logging difference
based on ALIGNED_ONLY.]
---
 include/exec/cpu-all.h   | 16 ++--
 softmmu_template.h   | 88 
 tcg/aarch64/tcg-target.inc.c |  9 +++--
 tcg/i386/tcg-target.inc.c| 15 +---
 tcg/ppc/tcg-target.inc.c | 14 ---
 tcg/s390/tcg-target.inc.c|  9 +++--
 tcg/tcg-op.c |  3 ++
 tcg/tcg.c| 26 +
 tcg/tcg.h| 87 ---
 9 files changed, 159 insertions(+), 108 deletions(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 6a6796d..b6a7059 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -288,14 +288,22 @@ CPUArchState *cpu_copy(CPUArchState *env);
 #if !defined(CONFIG_USER_ONLY)
 
 /* Flags stored in the low bits of the TLB virtual address.  These are
-   defined so that fast path ram access is all zeros.  */
+ * defined so that fast path ram access is all zeros.
+ * The flags all must be between TARGET_PAGE_BITS and
+ * maximum address alignment bit.
+ */
 /* Zero if TLB entry is valid.  */
-#define TLB_INVALID_MASK   (1 << 3)
+#define TLB_INVALID_MASK(1 << (TARGET_PAGE_BITS - 1))
 /* Set if TLB entry references a clean RAM page.  The iotlb entry will
contain the page physical address.  */
-#define TLB_NOTDIRTY(1 << 4)
+#define TLB_NOTDIRTY(1 << (TARGET_PAGE_BITS - 2))
 /* Set if TLB entry is an IO callback.  */
-#define TLB_MMIO(1 << 5)
+#define TLB_MMIO(1 << (TARGET_PAGE_BITS - 3))
+
+/* Use this mask to check interception with an alignment mask
+ * in a TCG backend.
+ */
+#define TLB_FLAGS_MASK  (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO)
 
 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
 void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf);
diff --git a/softmmu_template.h b/softmmu_template.h
index 208f808..4d378ca 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -171,20 +171,21 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, 
target_ulong addr,
 unsigned mmu_idx = get_mmuidx(oi);
 int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
 target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+int a_bits = get_alignment_bits(get_memop(oi));
 uintptr_t haddr;
 DATA_TYPE res;
 
 /* Adjust the given return address.  */
 retaddr -= GETPC_ADJ;
 
+if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
+cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+ mmu_idx, retaddr);
+}
+
 /* If the TLB entry is for a different page, reload and try again.  */
 if ((addr & TARGET_PAGE_MASK)
  != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-if ((addr & (DATA_SIZE - 1)) != 0
-&& (get_memop(oi) & MO_AMASK) == MO_ALIGN) {
-cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
- mmu_idx, retaddr);
-}
 if (!VICTIM_TLB_HIT(ADDR_READ)) {
 tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
  mmu_idx, retaddr);
@@ -215,10 +216,6 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, 
target_ulong addr,
 DATA_TYPE res1, res2;
 unsigned shift;
 do_unaligned_access:
-if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
-cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
- mmu_idx, retaddr);
-}
 addr1 = addr & ~(DATA_SIZE - 1);
 addr2 = addr1 + DATA_SIZE;
 /* Note the adjustment at the beginning of the function.
@@ -232,13 +229,6 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, 
target_ulong addr,
 return res;
 }
 
-/* Handle aligned access or unaligned access in the same page.  */
-if ((addr & (DATA_SIZE - 1)) != 0
-&& (get_memop(oi) & MO_AMASK) == MO_ALIGN) {
-cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
- mmu_idx, retaddr);
-}
-
 haddr = addr + env->tlb_table[mmu_idx][index].addend;
 #if DATA_SIZE == 1
 res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr);
@@ -255,20 +245,21 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, 
target_ulong addr,
 unsigned mmu_idx = get_mmuidx(oi);
 int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
 target_ulong tlb

[Qemu-devel] [PULL 1/4] tcg: Fix name for high-half register

2016-07-01 Thread Richard Henderson

Reviewed-by: David Gibson 
Signed-off-by: Richard Henderson 
---
 tcg/tcg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 254427b..154ffe8 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -557,7 +557,7 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
 ts2->mem_offset = offset + (1 - bigendian) * 4;
 pstrcpy(buf, sizeof(buf), name);
 pstrcat(buf, sizeof(buf), "_1");
-ts->name = strdup(buf);
+ts2->name = strdup(buf);
 } else {
 ts->base_type = type;
 ts->type = type;
-- 
2.5.5

[Qemu-devel] [PULL 2/4] tcg: Optimize spills of constants

2016-07-01 Thread Richard Henderson

While we can store constants via constrants on INDEX_op_st_i32 et al,
we weren't able to spill constants to backing store.

Add a new backend interface, tcg_out_sti, which may store the constant
(and is allowed to fail).  Rearrange the temp_* helpers so that we only
attempt to directly store a constant when the temp is becoming dead/free.

Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target.inc.c |  10 +++
 tcg/arm/tcg-target.inc.c |   6 ++
 tcg/i386/tcg-target.inc.c|  21 --
 tcg/ia64/tcg-target.inc.c|  10 +++
 tcg/mips/tcg-target.inc.c|  10 +++
 tcg/ppc/tcg-target.inc.c |   6 ++
 tcg/s390/tcg-target.inc.c|   6 ++
 tcg/sparc/tcg-target.inc.c   |  10 +++
 tcg/tcg.c| 166 ---
 tcg/tci/tcg-target.inc.c |   6 ++
 10 files changed, 169 insertions(+), 82 deletions(-)

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 1447f7c..5ac0091 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -716,6 +716,16 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, 
TCGReg arg,
  arg, arg1, arg2);
 }
 
+static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+   TCGReg base, intptr_t ofs)
+{
+if (val == 0) {
+tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
+return true;
+}
+return false;
+}
+
 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
TCGReg rn, unsigned int a, unsigned int b)
 {
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index f9f54c6..172feba 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -2046,6 +2046,12 @@ static inline void tcg_out_st(TCGContext *s, TCGType 
type, TCGReg arg,
 tcg_out_st32(s, COND_AL, arg, arg1, arg2);
 }
 
+static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+   TCGReg base, intptr_t ofs)
+{
+return false;
+}
+
 static inline void tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
 {
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 317484c..bc34535 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -710,12 +710,19 @@ static inline void tcg_out_st(TCGContext *s, TCGType 
type, TCGReg arg,
 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
 }
 
-static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
-   tcg_target_long ofs, tcg_target_long val)
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+TCGReg base, intptr_t ofs)
 {
-int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
-tcg_out_modrm_offset(s, opc, 0, base, ofs);
+int rexw = 0;
+if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
+if (val != (int32_t)val) {
+return false;
+}
+rexw = P_REXW;
+}
+tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
 tcg_out32(s, val);
+return true;
 }
 
 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
@@ -1321,10 +1328,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 ofs += 4;
 }
 
-tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi);
+tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
 ofs += 4;
 
-tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
+tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
 } else {
 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
 /* The second argument is already loaded with addrlo.  */
@@ -1413,7 +1420,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 ofs += 4;
 }
 
-tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi);
+tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
 ofs += 4;
 
 retaddr = TCG_REG_EAX;
diff --git a/tcg/ia64/tcg-target.inc.c b/tcg/ia64/tcg-target.inc.c
index 395223e..c91f392 100644
--- a/tcg/ia64/tcg-target.inc.c
+++ b/tcg/ia64/tcg-target.inc.c
@@ -973,6 +973,16 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, 
TCGReg arg,
 }
 }
 
+static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+   TCGReg base, intptr_t ofs)
+{
+if (val == 0) {
+tcg_out_st(s, type, TCG_REG_R0, base, ofs);
+return true;
+}
+return false;
+}
+
 static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3,
TCGReg ret, TCGArg arg1, int const_arg1,
TCGArg arg2, int const_arg2)
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
index 50e98ea..2f9be48 100644
--- a/t

[Qemu-devel] [PULL 0/4] tcg (-ish) updates

2016-07-01 Thread Richard Henderson

Three tcg patches, and a build change so that we can still
use --cpu=i686 on a x86_64 host, now that linux-user has
the syscall assembly thunks.


r~


The following changes since commit 9a48e3670030148a8d00c8d4d4cd7f051c0d9f39:

  Added Bulgarian translation (2016-07-01 16:06:57 +0100)

are available in the git repository at:

  git://github.com/rth7680/qemu.git tags/pull-tcg-20160701

for you to fetch changes up to 15dab9abdedb1657ace1b3169617852299b5063f:

  tcg: Improve the alignment check infrastructure (2016-07-01 10:13:22 -0700)


Misc updates


Richard Henderson (3):
  tcg: Fix name for high-half register
  tcg: Optimize spills of constants
  build: Use $(CCAS) for compiling .S files

Sergey Sorokin (1):
  tcg: Improve the alignment check infrastructure

 configure|  13 ++-
 include/exec/cpu-all.h   |  16 +++-
 rules.mak|   7 +-
 softmmu_template.h   |  88 ++--
 tcg/aarch64/tcg-target.inc.c |  19 -
 tcg/arm/tcg-target.inc.c |   6 ++
 tcg/i386/tcg-target.inc.c|  36 +---
 tcg/ia64/tcg-target.inc.c|  10 +++
 tcg/mips/tcg-target.inc.c|  10 +++
 tcg/ppc/tcg-target.inc.c |  20 +++--
 tcg/s390/tcg-target.inc.c|  15 +++-
 tcg/sparc/tcg-target.inc.c   |  10 +++
 tcg/tcg-op.c |   3 +
 tcg/tcg.c| 194 ---
 tcg/tcg.h|  87 ---
 tcg/tci/tcg-target.inc.c |   6 ++
 16 files changed, 341 insertions(+), 199 deletions(-)

Re: [Qemu-devel] [PATCH v2] hw/ptimer: Don't wrap around counter for expired timer that uses tick handler

2016-07-01 Thread Dmitry Osipenko

On 01.07.2016 19:36, Peter Maydell wrote:
> On 30 June 2016 at 20:01, Dmitry Osipenko  wrote:
>> On 30.06.2016 18:02, Peter Maydell wrote:
>>> What I meant was: ptimer_get_count() is typically called to generate
>>> a value to return from a register. That's a separate thing, conceptually,
>>> from whether the device happens to also trigger an interrupt on timer
>>> expiry by passing a bh to ptimer_init(). So it's very odd for a detail
>>> of interrupt-on-timer-expiry (that there is a bottom half) to affect
>>> the value returned when you read the timer count register.
> 
>> In order to handle wraparound correctly, software needs to track the moment 
>> of
>> the wraparound - the interrupt. If software reads wrapped around counter 
>> value
>> before IRQ triggered (ptimer expired), then it would assume that no 
>> wraparound
>> happened and won't perform counter value correction, resulting in periodic
>> counter "jumping" backwards.
> 
> That just says you need particular behaviour between counter reads
> and IRQ triggers; it doesn't say that you need the behaviour to be
> different if the ptimer code doesn't know about the IRQ trigger.
> 

Okay, I already explained the reason for having two different behaviours - to
make polled counter value more distributed when possible. If I understand you
correctly, you don't like it because it is "odd" and I agree that it's a bit 
clumsy.

So, what we are going to do now? Would you just revert the offending commit or
you have some other suggestions?

I think we still need to change the returned counter value to "1" in case of the
expired timer, since it would result in the deterministic behaviour across of
all of the timers. However, it definitely feels like it should go into the
standalone patch and I can include it into the next iteration of the ptimer 
patches.

-- 
Dmitry

[Qemu-devel] [PULL 3/4] build: Use $(CCAS) for compiling .S files

2016-07-01 Thread Richard Henderson

We fail to pass to $(AS) all of the different flags that may be required
for a given set of CFLAGS.  Rather than figuring out the host-specific
mapping, it's better to allow the compiler driver to do that.

However, simply using $(CC) runs afoul of clang trying to build the
option roms.  C.f. 3dd46c78525a30e98c68, wherein we changed from
using $(CC) to using $(AS) in the first place.

Work around this by passing -fno-integrated-as to clang, so that we use
the external assembler, and the clang driver still passes along all of
the options that the assembler might require.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
Message-Id: <1466703558-7723-1-git-send-email-...@twiddle.net>
---
 configure | 13 ++---
 rules.mak |  7 ++-
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/configure b/configure
index 93e4c95..79af8ee 100755
--- a/configure
+++ b/configure
@@ -368,7 +368,7 @@ else
 fi
 
 ar="${AR-${cross_prefix}ar}"
-as="${AS-${cross_prefix}as}"
+ccas="${CCAS-$cc}"
 cpp="${CPP-$cc -E}"
 objcopy="${OBJCOPY-${cross_prefix}objcopy}"
 ld="${LD-${cross_prefix}ld}"
@@ -4496,6 +4496,13 @@ if test "$fortify_source" != "no"; then
   fi
 fi
 
+#
+# clang does not support the 16-bit assembly for roms
+
+if echo | $ccas -dM -E - | grep __clang__ > /dev/null 2>&1 ; then
+  ccas="$ccas -fno-integrated-as"
+fi
+
 ##
 # check if struct fsxattr is available via linux/fs.h
 
@@ -5516,7 +5523,7 @@ echo "CXX=$cxx" >> $config_host_mak
 echo "OBJCC=$objcc" >> $config_host_mak
 echo "AR=$ar" >> $config_host_mak
 echo "ARFLAGS=$ARFLAGS" >> $config_host_mak
-echo "AS=$as" >> $config_host_mak
+echo "CCAS=$ccas" >> $config_host_mak
 echo "CPP=$cpp" >> $config_host_mak
 echo "OBJCOPY=$objcopy" >> $config_host_mak
 echo "LD=$ld" >> $config_host_mak
@@ -5989,7 +5996,7 @@ for rom in seabios vgabios ; do
 config_mak=roms/$rom/config.mak
 echo "# Automatically generated by configure - do not modify" > $config_mak
 echo "SRC_PATH=$source_path/roms/$rom" >> $config_mak
-echo "AS=$as" >> $config_mak
+echo "CCAS=$ccas" >> $config_mak
 echo "CC=$cc" >> $config_mak
 echo "BCC=bcc" >> $config_mak
 echo "CPP=$cpp" >> $config_mak
diff --git a/rules.mak b/rules.mak
index 72c5955..7d7d83b 100644
--- a/rules.mak
+++ b/rules.mak
@@ -68,11 +68,8 @@ LINK = $(call quiet-command, $(LINKPROG) $(QEMU_CFLAGS) 
$(CFLAGS) $(LDFLAGS) -o
$(call process-archive-undefs, $1) \
$(version-obj-y) $(call extract-libs,$1) $(LIBS),"  LINK  
$(TARGET_DIR)$@")
 
-%.asm: %.S
-   $(call quiet-command,$(CPP) $(QEMU_INCLUDES) $(QEMU_CFLAGS) 
$(QEMU_DGFLAGS) $(CFLAGS) -o $@ $<,"  CPP   $(TARGET_DIR)$@")
-
-%.o: %.asm
-   $(call quiet-command,$(AS) $(ASFLAGS) -o $@ $<,"  AS
$(TARGET_DIR)$@")
+%.o: %.S
+   $(call quiet-command,$(CCAS) $(QEMU_INCLUDES) $(QEMU_CFLAGS) 
$(QEMU_DGFLAGS) $(CFLAGS) -c -o $@ $<,"  AS$(TARGET_DIR)$@")
 
 %.o: %.cc
$(call quiet-command,$(CXX) $(QEMU_INCLUDES) $(QEMU_CXXFLAGS) 
$(QEMU_DGFLAGS) $(CFLAGS) $($@-cflags) -c -o $@ $<,"  CXX   $(TARGET_DIR)$@")
-- 
2.5.5

Re: [Qemu-devel] [PATCH RFC v3 2/5] tests: Make check-block a phony target

2016-07-01 Thread Eric Blake

On 06/30/2016 06:53 AM, Markus Armbruster wrote:
> Got lost in commit b93b63f.
> 
> Signed-off-by: Markus Armbruster 
> ---
>  tests/Makefile.include | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Eric Blake 

> 
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index 5def161..002f15f 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -771,7 +771,7 @@ endif
>  
>  # Consolidated targets
>  
> -.PHONY: check-qapi-schema check-qtest check-unit check-source
> +.PHONY: check-qapi-schema check-qtest check-unit check-block check-source
>  .PHONY: check check-clean
>  check-qapi-schema: $(patsubst %,check-%, $(check-qapi-schema-y))
>  check-qtest: $(patsubst %,check-qtest-%, $(QTEST_TARGETS))
> 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v5 9/9] tests: add a m25p80 test

2016-07-01 Thread Peter Maydell

On 28 June 2016 at 19:24, Cédric Le Goater  wrote:
> This test uses the palmetto platform and the AST2400 SPI controller to
> test the m25p80 flash module device model. The flash model is defined
> by the platform (n25q256a) and it would be nice to find way to control
> it, using a property probably.
>
> Signed-off-by: Cédric Le Goater 
> Reviewed-by: Peter Maydell 
> ---
>

This test fails on ppc64be:

TEST: tests/m25p80-test... (pid=65123)
  /arm/m25p80/read_jedec:  OK
  /arm/m25p80/erase_sector:OK
  /arm/m25p80/erase_all:   **
ERROR:/home/pm215/qemu/tests/m25p80-test.c:162:test_erase_all:
assertion failed (page[i] == 0x0):
(0x == 0x)
FAIL
GTester: last random seed: R02S54b2016fda21b092e18d7a23a2db86ba
(pid=65128)
  /arm/m25p80/write_page:  **
ERROR:/home/pm215/qemu/tests/m25p80-test.c:200:test_write_page:
assertion failed (page[i] == my_page_addr + i * 4): (0x ==
0x0140)
FAIL
GTester: last random seed: R02S8708910d6b72f700bc41e9340a516239
(pid=65133)
FAIL: tests/m25p80-test

thanks
-- PMM

[Qemu-devel] [PATCH v2 24/27] target-arm: emulate aarch64's LL/SC using cmpxchg helpers

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

Emulating LL/SC with cmpxchg is not correct, since it can
suffer from the ABA problem. Portable parallel code, however,
is written assuming only cmpxchg--and not LL/SC--is available.
This means that in practice emulating LL/SC with cmpxchg is
a viable alternative.

The appended emulates LL/SC pairs in aarch64 with cmpxchg helpers.
This works in both user and system mode. In usermode, it avoids
pausing all other CPUs to perform the LL/SC pair. The subsequent
performance and scalability improvement is significant, as the
plots below show. They plot the throughput of atomic_add-bench
compiled for ARM and executed on a 64-core x86 machine.

Hi-res plots: http://imgur.com/a/JVc8Y

atomic_add-bench: 100 ops/thread, [0,1] range

  18 ++-+--+-+--+--+--+---++
 +cmpxchg +-E--+   + +  +  +  +|
  16 ++master +-H--+  ++
 |||
  14 ++   ++
 | |   |
  12 ++|  ++
 | |   |
  10  ++
   8 ++E  ++
 |+++  |
   6 ++ | ++
 |  |  |
   4 ++ | ++
 |   | |
   2 +H++E+---++
 + | +E+++E+---+--+E+++E+--+E+--+E+++E+---+--+E|
   0 ++H-HH-+-H+-+--+--+--+---++
 0  10 2030 40 50 60
Number of threads

atomic_add-bench: 100 ops/thread, [0,2] range

  18 ++-+--+-+--+--+--+---++
 +cmpxchg +-E--+   + +  +  +  +|
  16 ++master +-H--+  ++
 | |   |
  14 ++E  ++
 | |   |
  12 ++|  ++
 |+++  |
  10 ++ | ++
   8 ++ | ++
 |  |  |
   6 ++ | ++
 |   | |
   4 ++  |++
 |  +E+--- |
   2 +H+ +E+-+++  +++  +++   ---+E+-+E+--+++
 +++++E+---+--+E+++E+--+E+---   +++   +  +E|
   0 ++H-HH-+-H+-+--+--+--+---++
 0  10 2030 40 50 60
Number of threads

   atomic_add-bench: 100 ops/thread, [0,128] range

  70 ++-+--+-+--+--+--+---++
 +cmpxchg +-E--+   + +  +  +  +|
  60 ++master +-H--+  +++---+E+-+E+--+E+
 |+E+--E---+E+---  |
 | ---+++  |
  50 ++  +++---   ++
 |  -+E+   |
  40 ++  +++  ++
 |E-   |
 |  --||
  30 ++   -- +++  ++
 |  +E+|
  20 ++E+ ++
 |E+

[Qemu-devel] [PATCH v2 22/27] target-arm: emulate LL/SC using cmpxchg helpers

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

Emulating LL/SC with cmpxchg is not correct, since it can
suffer from the ABA problem. Portable parallel code, however,
is written assuming only cmpxchg--and not LL/SC--is available.
This means that in practice emulating LL/SC with cmpxchg is
a viable alternative.

The appended emulates LL/SC pairs in ARM with cmpxchg helpers.
This works in both user and system mode. In usermode, it avoids
pausing all other CPUs to perform the LL/SC pair. The subsequent
performance and scalability improvement is significant, as the
plots below show. They plot the throughput of atomic_add-bench
compiled for ARM and executed on a 64-core x86 machine.

Hi-res plots: http://imgur.com/a/aNQpB

   atomic_add-bench: 100 ops/thread, [0,1] range

  9 ++-+--+--+--+--+--+---++
+cmpxchg +-E--+   +  +  +  +  +|
  8 +Emaster +-H--+   ++
| ||
  7 ++E   ++
| ||
  6   ++
|  |   |
  5 ++ |  ++
  4 ++ |  ++
|  |   |
  3 ++ |  ++
|   |  |
  2 ++  | ++
|H++E+---  +++  ---+E+--+E+--+E|
  1 +++ +E+-+E+--+E+--+E+--+E+--   +++  +++   ++
++H+   ++++   +  +++    +  +  +|
  0 ++--HH-+-H+--+--+--+--+---++
0  10 20 30 40 50 60
   Number of threads

atomic_add-bench: 100 ops/thread, [0,2] range

  16 ++-+--+-+--+--+--+---++
 +cmpxchg +-E--+   + +  +  +  +|
  14 ++master +-H--+  ++
 | |   |
  12 ++|  ++
 | E   |
  10 ++|  ++
 | |   |
   8  ++
 |E+|  |
 |  |  |
   6 ++ | ++
 |   | |
   4 ++  |++
 |  +E+---   +++  +++  +++   ---+E+--+E|
   2 +H+ +E+--E---+E+-+E+--+E+--+E+--+++
 + |++++   +    +  +  +|
   0 ++H-HH-+-H+-+--+--+--+---++
 0  10 2030 40 50 60
Number of threads

   atomic_add-bench: 100 ops/thread, [0,128] range

  70 ++-+--+-+--+--+--+---++
 +cmpxchg +-E--+   + +  +     +|
  60 ++master +-H--+ E--+E+---++
 |-+E+---   +++ +++  +E|
 |+++  +++   ++|
  50 ++   +++  ---+E+-++
 |-E---|
  40 ++---+++ ++
 |   +++---|
 |  -+E+   |
  30 ++  +++  ++
 |   +E+   |
  20 ++ +++-- ++
 |  +E+

Re: [Qemu-devel] [SeaBIOS] [RFC PATCH 2/2] serial console, input

2016-07-01 Thread Kevin O'Connor

On Fri, Jul 01, 2016 at 12:54:31PM +0200, Gerd Hoffmann wrote:
> Signed-off-by: Gerd Hoffmann 
> ---
>  src/clock.c  |   1 +
>  src/serial.c | 255 
> +++
>  src/util.h   |   1 +
>  3 files changed, 257 insertions(+)
> 
> diff --git a/src/clock.c b/src/clock.c
> index e83e0f3..e44e112 100644
> --- a/src/clock.c
> +++ b/src/clock.c
> @@ -295,6 +295,7 @@ clock_update(void)
>  floppy_tick();
>  usb_check_event();
>  ps2_check_event();
> +sercon_check_event();
>  }
>  
>  // INT 08h System Timer ISR Entry Point
> diff --git a/src/serial.c b/src/serial.c
> index 74b91bb..d72dd01 100644
> --- a/src/serial.c
> +++ b/src/serial.c
> @@ -655,3 +655,258 @@ void sercon_enable(void)
>  outb(0x01, addr + 0x02);   // enable fifo
>  enable_vga_console();
>  }
> +
> +/
> + * serial input
> + /
> +
> +VARLOW u8 rx_buf[16];
> +VARLOW u8 rx_bytes;
> +
> +VARLOW struct {
> +char seq[4];
> +u8 len;
> +u8 scancode;
> +} termseq[] = {
> +{ .seq = "OP", .len = 2, .scancode = 0x3b },// F1
> +{ .seq = "OQ", .len = 2, .scancode = 0x3c },// F2
> +{ .seq = "OR", .len = 2, .scancode = 0x3d },// F3
> +{ .seq = "OS", .len = 2, .scancode = 0x3e },// F4
> +{ .seq = "[A", .len = 2, .scancode = 0xc8 },// up
> +{ .seq = "[B", .len = 2, .scancode = 0xd0 },// down
> +{ .seq = "[C", .len = 2, .scancode = 0xcd },// right
> +{ .seq = "[D", .len = 2, .scancode = 0xcb },// left
> +};

It would be preferable to mark constant data with "static VAR16"
instead of VARLOW.

> +
> +#define FLAG_CTRL  (1<<0)
> +#define FLAG_SHIFT (1<<1)
> +
> +VARLOW struct {
> +u8 flags;
> +u8 scancode;
> +} termchr[256] = {
> +[ '1'] = { .scancode = 0x02,  },

I think this table should be generated at runtime from
kbd.c:scan_to_keycode[].  Since it doesn't change at runtime,
malloc_fseg() / GET_GLOBAL() could be used instead of VARLOW.

[...]
> +static void sercon_sendkey(u8 scancode, u8 flags)
> +{
> +if (flags & FLAG_CTRL)
> +process_key(0x1d);
> +if (flags & FLAG_SHIFT)
> +process_key(0x2a);
> +
> +if (scancode & 0x80) {
> +process_key(0xe0);
> +process_key(scancode & ~0x80);
> +process_key(0xe0);
> +process_key(scancode);
> +} else {
> +process_key(scancode);
> +process_key(scancode | 0x80);
> +}
> +
> +if (flags & FLAG_SHIFT)
> +process_key(0x2a | 0x80);
> +if (flags & FLAG_CTRL)
> +process_key(0x1d | 0x80);
> +}

Is it necessary to use process_key() here instead of injecting the
keycode directly with enqueue_key()?  I think the only difference is
the CONFIG_KBD_CALL_INT15_4F stuff and I'm not sure if anything
interesting needs that.

> +
> +void VISIBLE16
> +sercon_check_event(void)

Does this need VISIBLE16?

> +{
> +u16 addr = GET_LOW(sercon_port);
> +u8 byte, scancode, flags, count = 0;
> +int seq, chr, len;
> +
> +// check to see if there is a active serial port
> +if (!addr)
> +return;
> +if (inb(addr + SEROFF_LSR) == 0xFF)
> +return;
> +
> +// flush pending output
> +sercon_flush_lazy();
> +
> +// read all available data
> +while (inb(addr + SEROFF_LSR) & 0x01) {
> +byte = inb(addr + SEROFF_DATA);
> +if (GET_LOW(rx_bytes) < sizeof(rx_buf)) {
> +SET_LOW(rx_buf[rx_bytes], byte);
> +SET_LOW(rx_bytes, GET_LOW(rx_bytes) + 1);
> +count++;
> +}
> +}
> +
> +next_char:
> +// no (more) input data
> +if (!GET_LOW(rx_bytes))
> +return;
> +
> +// lookup escape sequences
> +if (GET_LOW(rx_bytes) > 1 && GET_LOW(rx_buf[0]) == 0x1b) {
> +for (seq = 0; seq < ARRAY_SIZE(termseq); seq++) {
> +len = GET_LOW(termseq[seq].len);
> +if (GET_LOW(rx_bytes) < len + 1)
> +continue;
> +for (chr = 0; chr < len; chr++) {
> +if (GET_LOW(termseq[seq].seq[chr]) != GET_LOW(rx_buf[chr + 
> 1]))
> +break;
> +}
> +if (chr == len) {
> +scancode = GET_LOW(termseq[seq].scancode);
> +sercon_sendkey(scancode, 0);
> +shiftbuf(len + 1);
> +goto next_char;
> +}
> +}
> +}
> +
> +// Seems we got a escape sequence we didn't recognise.
> +//  -> If we received data wait for more, maybe it is just incomplete.
> +if (GET_LOW(rx_buf[0]) == 0x1b && count)
> +return;
> +
> +// Handle input as individual chars.
> +chr = GET_LOW(rx_buf[0]);
> +scancode = GET_LOW(termchr[chr].scancode);
> +flags = GET_LOW(termchr[chr].flags);
> +if (scancode)
> +sercon_sendkey(scancode, flags);
> +shiftbuf(1);
>

Re: [Qemu-devel] [PATCH v2 00/27] cmpxchg-based emulation of atomics

2016-07-01 Thread Richard Henderson

On 07/01/2016 10:04 AM, Richard Henderson wrote:
> I spent a couple evenings this week tweaking Emilio's patch set.
> 
> The first major change is to "qemu/int128.h", so that we can use
> that type in the context of a 16-byte cmpxchg.  I have yet to teach
> TCG code generation about this type, so it's really only usable
> from other helper functions.  But that's still an improvement over
> having to return two uint64_t by reference.
> 
> The second major change is to funnel atomic operation generation
> through functions in tcg-op.c.  There we can test whether or not
> we're generating code in a parallel context and require atomic
> operations.  This also centralizes the helper functions so that we
> don't have to have the same sets in every target.
> 
> The third major change is providing a mechanism by which we can
> trap on atomic operations that we do not support, exit the cpu loop,
> stop the world, and then re-execute the instruction in a serial context.
> This is obviously something that will need to be filled in further
> as MTTCG progresses.
> 
> This minimally tested, but it is good enough to boot Fedora 24 x86-64,
> even with the softmmu single-step stubbed out.  Perhaps unsurprisingly,
> Fedora does not attempt an unaligned atomic operation.

I should have mentioned -- this was based on my tcg-next branch, for which I
just sent a pull request (in particular, Sergey's alignment improvement patch).

I pushed my patchset to

  git://github.com/rth7680/qemu.git atomic-2

for ease of browsing.


r~

[Qemu-devel] [PATCH v2 25/27] linux-user: remove handling of ARM's EXCP_STREX

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

The exception is not emitted anymore.

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-29-git-send-email-c...@braap.org>
---
 linux-user/main.c | 93 ---
 1 file changed, 93 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 54df300..f4fc460 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -648,94 +648,6 @@ do_kernel_trap(CPUARMState *env)
 return 0;
 }
 
-/* Store exclusive handling for AArch32 */
-static int do_strex(CPUARMState *env)
-{
-uint64_t val;
-int size;
-int rc = 1;
-int segv = 0;
-uint32_t addr;
-start_exclusive();
-if (env->exclusive_addr != env->exclusive_test) {
-goto fail;
-}
-/* We know we're always AArch32 so the address is in uint32_t range
- * unless it was the -1 exclusive-monitor-lost value (which won't
- * match exclusive_test above).
- */
-assert(extract64(env->exclusive_addr, 32, 32) == 0);
-addr = env->exclusive_addr;
-size = env->exclusive_info & 0xf;
-switch (size) {
-case 0:
-segv = get_user_u8(val, addr);
-break;
-case 1:
-segv = get_user_data_u16(val, addr, env);
-break;
-case 2:
-case 3:
-segv = get_user_data_u32(val, addr, env);
-break;
-default:
-abort();
-}
-if (segv) {
-env->exception.vaddress = addr;
-goto done;
-}
-if (size == 3) {
-uint32_t valhi;
-segv = get_user_data_u32(valhi, addr + 4, env);
-if (segv) {
-env->exception.vaddress = addr + 4;
-goto done;
-}
-if (arm_cpu_bswap_data(env)) {
-val = deposit64((uint64_t)valhi, 32, 32, val);
-} else {
-val = deposit64(val, 32, 32, valhi);
-}
-}
-if (val != env->exclusive_val) {
-goto fail;
-}
-
-val = env->regs[(env->exclusive_info >> 8) & 0xf];
-switch (size) {
-case 0:
-segv = put_user_u8(val, addr);
-break;
-case 1:
-segv = put_user_data_u16(val, addr, env);
-break;
-case 2:
-case 3:
-segv = put_user_data_u32(val, addr, env);
-break;
-}
-if (segv) {
-env->exception.vaddress = addr;
-goto done;
-}
-if (size == 3) {
-val = env->regs[(env->exclusive_info >> 12) & 0xf];
-segv = put_user_data_u32(val, addr + 4, env);
-if (segv) {
-env->exception.vaddress = addr + 4;
-goto done;
-}
-}
-rc = 0;
-fail:
-env->regs[15] += 4;
-env->regs[(env->exclusive_info >> 4) & 0xf] = rc;
-done:
-end_exclusive();
-return segv;
-}
-
 void cpu_loop(CPUARMState *env)
 {
 CPUState *cs = CPU(arm_env_get_cpu(env));
@@ -905,11 +817,6 @@ void cpu_loop(CPUARMState *env)
 case EXCP_INTERRUPT:
 /* just indicate that signals should be handled asap */
 break;
-case EXCP_STREX:
-if (!do_strex(env)) {
-break;
-}
-/* fall through for segv */
 case EXCP_PREFETCH_ABORT:
 case EXCP_DATA_ABORT:
 addr = env->exception.vaddress;
-- 
2.5.5

[Qemu-devel] [PATCH v2 27/27] target-arm: remove EXCP_STREX + cpu_exclusive_{test, info}

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

The exception is not emitted anymore; remove it and the associated
TCG variables.

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-31-git-send-email-c...@braap.org>
---
 target-arm/cpu.h   | 17 ++---
 target-arm/internals.h |  4 +---
 target-arm/translate.c | 10 --
 target-arm/translate.h |  4 
 4 files changed, 7 insertions(+), 28 deletions(-)

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 7938ddc..0b2ed28 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -46,13 +46,12 @@
 #define EXCP_BKPT7
 #define EXCP_EXCEPTION_EXIT  8   /* Return from v7M exception.  */
 #define EXCP_KERNEL_TRAP 9   /* Jumped to kernel code page.  */
-#define EXCP_STREX  10
-#define EXCP_HVC11   /* HyperVisor Call */
-#define EXCP_HYP_TRAP   12
-#define EXCP_SMC13   /* Secure Monitor Call */
-#define EXCP_VIRQ   14
-#define EXCP_VFIQ   15
-#define EXCP_SEMIHOST   16   /* semihosting call (A64 only) */
+#define EXCP_HVC10   /* HyperVisor Call */
+#define EXCP_HYP_TRAP   11
+#define EXCP_SMC12   /* Secure Monitor Call */
+#define EXCP_VIRQ   13
+#define EXCP_VFIQ   14
+#define EXCP_SEMIHOST   15   /* semihosting call (A64 only) */
 
 #define ARMV7M_EXCP_RESET   1
 #define ARMV7M_EXCP_NMI 2
@@ -475,10 +474,6 @@ typedef struct CPUARMState {
 uint64_t exclusive_addr;
 uint64_t exclusive_val;
 uint64_t exclusive_high;
-#if defined(CONFIG_USER_ONLY)
-uint64_t exclusive_test;
-uint32_t exclusive_info;
-#endif
 
 /* iwMMXt coprocessor state.  */
 struct {
diff --git a/target-arm/internals.h b/target-arm/internals.h
index 466be0b..5ab3b28 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -46,8 +46,7 @@ static inline bool excp_is_internal(int excp)
 || excp == EXCP_HALTED
 || excp == EXCP_EXCEPTION_EXIT
 || excp == EXCP_KERNEL_TRAP
-|| excp == EXCP_SEMIHOST
-|| excp == EXCP_STREX;
+|| excp == EXCP_SEMIHOST;
 }
 
 /* Exception names for debug logging; note that not all of these
@@ -63,7 +62,6 @@ static const char * const excnames[] = {
 [EXCP_BKPT] = "Breakpoint",
 [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit",
 [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage",
-[EXCP_STREX] = "QEMU intercept of STREX",
 [EXCP_HVC] = "Hypervisor Call",
 [EXCP_HYP_TRAP] = "Hypervisor Trap",
 [EXCP_SMC] = "Secure Monitor Call",
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 2b3c34f..e8e8502 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -64,10 +64,6 @@ static TCGv_i32 cpu_R[16];
 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
 TCGv_i64 cpu_exclusive_addr;
 TCGv_i64 cpu_exclusive_val;
-#ifdef CONFIG_USER_ONLY
-TCGv_i64 cpu_exclusive_test;
-TCGv_i32 cpu_exclusive_info;
-#endif
 
 /* FIXME:  These should be removed.  */
 static TCGv_i32 cpu_F0s, cpu_F1s;
@@ -101,12 +97,6 @@ void arm_translate_init(void)
 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
 offsetof(CPUARMState, exclusive_val), "exclusive_val");
-#ifdef CONFIG_USER_ONLY
-cpu_exclusive_test = tcg_global_mem_new_i64(cpu_env,
-offsetof(CPUARMState, exclusive_test), "exclusive_test");
-cpu_exclusive_info = tcg_global_mem_new_i32(cpu_env,
-offsetof(CPUARMState, exclusive_info), "exclusive_info");
-#endif
 
 a64_translate_init();
 }
diff --git a/target-arm/translate.h b/target-arm/translate.h
index dbd7ac8..d4e205e 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -77,10 +77,6 @@ extern TCGv_env cpu_env;
 extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
 extern TCGv_i64 cpu_exclusive_addr;
 extern TCGv_i64 cpu_exclusive_val;
-#ifdef CONFIG_USER_ONLY
-extern TCGv_i64 cpu_exclusive_test;
-extern TCGv_i32 cpu_exclusive_info;
-#endif
 
 static inline int arm_dc_feature(DisasContext *dc, int feature)
 {
-- 
2.5.5

[Qemu-devel] [PATCH v2 09/27] tcg: Add atomic helpers

2016-07-01 Thread Richard Henderson

Add all of cmpxchg, op_fetch, fetch_op, and xchg.
Handle both endian-ness, and sizes up to 8.
Handle expanding non-atomically, when emulating in serial.

Signed-off-by: Richard Henderson 
---
 Makefile.objs  |   1 -
 Makefile.target|   1 +
 atomic_template.h  | 220 
 cputlb.c   |   3 +-
 softmmu_template.h | 178 -
 tcg-runtime.c  |  27 +++--
 tcg/tcg-op.c   | 324 +
 tcg/tcg-op.h   |  44 
 tcg/tcg-runtime.h  |  75 +
 tcg/tcg.h  |  53 +
 10 files changed, 909 insertions(+), 17 deletions(-)
 create mode 100644 atomic_template.h

diff --git a/Makefile.objs b/Makefile.objs
index 7f1f0a3..f40bdfd 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -88,7 +88,6 @@ endif
 
 ###
 # Target-independent parts used in system and user emulation
-common-obj-y += tcg-runtime.o
 common-obj-y += hw/
 common-obj-y += qom/
 common-obj-y += disas/
diff --git a/Makefile.target b/Makefile.target
index d720b3e..0ca9ed6 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -94,6 +94,7 @@ obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
 obj-y += target-$(TARGET_BASE_ARCH)/
 obj-y += disas.o
+obj-y += tcg-runtime.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
 
diff --git a/atomic_template.h b/atomic_template.h
new file mode 100644
index 000..a755853
--- /dev/null
+++ b/atomic_template.h
@@ -0,0 +1,220 @@
+/*
+ * Atomic helper templates
+ * Included from tcg-runtime.c.
+ *
+ * Copyright (c) 2016 Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#define DATA_SIZE (1 << SHIFT)
+
+#if DATA_SIZE == 8
+#define SUFFIX q
+#define DATA_TYPE  uint64_t
+#define ABI_TYPE   uint64_t
+#define BSWAP  bswap64
+#elif DATA_SIZE == 4
+#define SUFFIX l
+#define DATA_TYPE  uint32_t
+#define ABI_TYPE   uint32_t
+#define BSWAP  bswap32
+#elif DATA_SIZE == 2
+#define SUFFIX w
+#define DATA_TYPE  uint16_t
+#define ABI_TYPE   uint32_t
+#define BSWAP  bswap16
+#elif DATA_SIZE == 1
+#define SUFFIX b
+#define DATA_TYPE  uint8_t
+#define ABI_TYPE   uint32_t
+#else
+#error unsupported data size
+#endif
+
+#ifdef CONFIG_USER_ONLY
+
+#if DATA_SIZE == 1
+# define HE_SUFFIX
+#elif defined(HOST_WORDS_BIGENDIAN)
+# define HE_SUFFIX  _be
+# define RE_SUFFIX  _le
+#else
+# define HE_SUFFIX  _le
+# define RE_SUFFIX  _be
+#endif
+
+ABI_TYPE HELPER(glue(glue(atomic_cmpxchg, SUFFIX), HE_SUFFIX))
+(target_ulong addr, ABI_TYPE cmpv, ABI_TYPE newv)
+{
+DATA_TYPE *haddr = g2h(addr);
+return atomic_cmpxchg(haddr, cmpv, newv);
+}
+
+#define GEN_ATOMIC_HELPER_HE(NAME)  \
+ABI_TYPE HELPER(glue(glue(atomic_##NAME, SUFFIX), HE_SUFFIX))   \
+(target_ulong addr, ABI_TYPE val)   \
+{   \
+DATA_TYPE *haddr = g2h(addr);   \
+return atomic_##NAME(haddr, val);   \
+}   \
+
+GEN_ATOMIC_HELPER_HE(fetch_add)
+GEN_ATOMIC_HELPER_HE(fetch_and)
+GEN_ATOMIC_HELPER_HE(fetch_or)
+GEN_ATOMIC_HELPER_HE(fetch_xor)
+GEN_ATOMIC_HELPER_HE(add_fetch)
+GEN_ATOMIC_HELPER_HE(and_fetch)
+GEN_ATOMIC_HELPER_HE(or_fetch)
+GEN_ATOMIC_HELPER_HE(xor_fetch)
+GEN_ATOMIC_HELPER_HE(xchg)
+
+#undef GEN_ATOMIC_HELPER_HE
+
+#if DATA_SIZE > 1
+
+ABI_TYPE HELPER(glue(glue(atomic_cmpxchg, SUFFIX), RE_SUFFIX))
+(target_ulong addr, ABI_TYPE cmpv, ABI_TYPE newv)
+{
+DATA_TYPE *haddr = g2h(addr);
+return BSWAP(atomic_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv)));
+}
+
+#define GEN_ATOMIC_HELPER_RE(NAME)  \
+ABI_TYPE HELPER(glue(glue(atomic_##NAME, SUFFIX), RE_SUFFIX))   \
+(target_ulong addr, ABI_TYPE val)   \
+{   \
+DATA_TYPE *haddr = g2h(addr);   \
+return BSWAP(atomic_##NAME(haddr, BSWAP(val))); \
+}
+
+GEN_ATOMIC_HELPER_RE(fetch_and)
+GEN_ATOMIC_HELPER_RE(fetch_or)
+GEN

[Qemu-devel] [PATCH v2 23/27] target-arm: emulate SWP with atomic_xchg helper

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-25-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-arm/translate.c | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 680635c..2b3c34f 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -8741,25 +8741,26 @@ static void disas_arm_insn(DisasContext *s, unsigned 
int insn)
 }
 tcg_temp_free_i32(addr);
 } else {
+TCGv taddr;
+TCGMemOp opc = s->be_data;
+
 /* SWP instruction */
 rm = (insn) & 0xf;
 
-/* ??? This is not really atomic.  However we know
-   we never have multiple CPUs running in parallel,
-   so it is good enough.  */
-addr = load_reg(s, rn);
-tmp = load_reg(s, rm);
-tmp2 = tcg_temp_new_i32();
 if (insn & (1 << 22)) {
-gen_aa32_ld8u(s, tmp2, addr, get_mem_index(s));
-gen_aa32_st8(s, tmp, addr, get_mem_index(s));
+opc |= MO_UB;
 } else {
-gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
-gen_aa32_st32(s, tmp, addr, get_mem_index(s));
+opc |= MO_UL | MO_ALIGN;
 }
-tcg_temp_free_i32(tmp);
+
+addr = load_reg(s, rn);
+taddr = gen_aa32_addr(s, addr, opc);
 tcg_temp_free_i32(addr);
-store_reg(s, rd, tmp2);
+
+tmp = load_reg(s, rm);
+tcg_gen_atomic_xchg_i32(tmp, taddr, tmp,
+get_mem_index(s), opc);
+store_reg(s, rd, tmp);
 }
 }
 } else {
-- 
2.5.5

Re: [Qemu-devel] [PATCH v5 9/9] tests: add a m25p80 test

2016-07-01 Thread Cédric Le Goater

On 07/01/2016 07:18 PM, Peter Maydell wrote:
> On 28 June 2016 at 19:24, Cédric Le Goater  wrote:
>> This test uses the palmetto platform and the AST2400 SPI controller to
>> test the m25p80 flash module device model. The flash model is defined
>> by the platform (n25q256a) and it would be nice to find way to control
>> it, using a property probably.
>>
>> Signed-off-by: Cédric Le Goater 
>> Reviewed-by: Peter Maydell 
>> ---
>>
> 
> This test fails on ppc64be:
> 
> TEST: tests/m25p80-test... (pid=65123)
>   /arm/m25p80/read_jedec:  OK
>   /arm/m25p80/erase_sector:OK
>   /arm/m25p80/erase_all:   **
> ERROR:/home/pm215/qemu/tests/m25p80-test.c:162:test_erase_all:
> assertion failed (page[i] == 0x0):
> (0x == 0x)
> FAIL
> GTester: last random seed: R02S54b2016fda21b092e18d7a23a2db86ba
> (pid=65128)
>   /arm/m25p80/write_page:  **
> ERROR:/home/pm215/qemu/tests/m25p80-test.c:200:test_write_page:
> assertion failed (page[i] == my_page_addr + i * 4): (0x ==
> 0x0140)
> FAIL
> GTester: last random seed: R02S8708910d6b72f700bc41e9340a516239
> (pid=65133)
> FAIL: tests/m25p80-test

yes ... I am not sure how to fix this :/ 

I started with a patch using qtest_big_endian() and I found that 
this one was fixing the problem : 

https://lists.gnu.org/archive/html/qemu-devel/2016-06/msg07876.html

but it feels wrong. The interesting part is that the guest fully 
boots on a ppc64be. We need an endian shaman for this. Greg ? 

Thanks,

C.

[Qemu-devel] [PATCH v2 20/27] tests: add atomic_add-bench

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

With this microbenchmark we can measure the overhead of emulating atomic
instructions with a configurable degree of contention.

The benchmark spawns $n threads, each performing $o atomic ops (additions)
in a loop. Each atomic operation is performed on a different cache line
(assuming lines are 64b long) that is randomly selected from a range [0, $r).

[ Note: each $foo corresponds to a -foo flag ]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-20-git-send-email-c...@braap.org>
---
 tests/.gitignore |   1 +
 tests/Makefile.include   |   4 +-
 tests/atomic_add-bench.c | 180 +++
 3 files changed, 184 insertions(+), 1 deletion(-)
 create mode 100644 tests/atomic_add-bench.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 840ea39..52488a0 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -1,3 +1,4 @@
+atomic_add-bench
 check-qdict
 check-qfloat
 check-qint
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 6c09962..7421778 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -408,7 +408,8 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o 
tests/check-qdict.o \
tests/test-opts-visitor.o tests/test-qmp-event.o \
tests/rcutorture.o tests/test-rcu-list.o \
tests/test-qdist.o \
-   tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o
+   tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o \
+   tests/atomic_add-bench.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 QEMU_CFLAGS += -I$(SRC_PATH)/tests
@@ -451,6 +452,7 @@ tests/test-qdist$(EXESUF): tests/test-qdist.o 
$(test-util-obj-y)
 tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y)
 tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) 
$(test-util-obj-y)
 tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
+tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o $(test-util-obj-y)
 
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
new file mode 100644
index 000..5bbecf6
--- /dev/null
+++ b/tests/atomic_add-bench.c
@@ -0,0 +1,180 @@
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/host-utils.h"
+#include "qemu/processor.h"
+
+struct thread_info {
+uint64_t r;
+} QEMU_ALIGNED(64);
+
+struct count {
+unsigned long val;
+} QEMU_ALIGNED(64);
+
+static QemuThread *threads;
+static struct thread_info *th_info;
+static unsigned int n_threads = 1;
+static unsigned int n_ready_threads;
+static struct count *counts;
+static unsigned long n_ops = 1;
+static double duration;
+static unsigned int range = 1;
+static bool test_start;
+
+static const char commands_string[] =
+" -n = number of threads\n"
+" -o = number of ops per thread\n"
+" -r = range (will be rounded up to pow2)";
+
+static void usage_complete(char *argv[])
+{
+fprintf(stderr, "Usage: %s [options]\n", argv[0]);
+fprintf(stderr, "options:\n%s\n", commands_string);
+}
+
+/*
+ * From: https://en.wikipedia.org/wiki/Xorshift
+ * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
+ * guaranteed to be >= INT_MAX).
+ */
+static uint64_t xorshift64star(uint64_t x)
+{
+x ^= x >> 12; /* a */
+x ^= x << 25; /* b */
+x ^= x >> 27; /* c */
+return x * UINT64_C(2685821657736338717);
+}
+
+static void *thread_func(void *arg)
+{
+struct thread_info *info = arg;
+unsigned long i;
+
+atomic_inc(&n_ready_threads);
+while (!atomic_mb_read(&test_start)) {
+cpu_relax();
+}
+
+for (i = 0; i < n_ops; i++) {
+unsigned int index;
+
+info->r = xorshift64star(info->r);
+index = info->r & (range - 1);
+atomic_inc(&counts[index].val);
+}
+return NULL;
+}
+
+static inline
+uint64_t ts_subtract(const struct timespec *a, const struct timespec *b)
+{
+uint64_t ns;
+
+ns = (b->tv_sec - a->tv_sec) * 10ULL;
+ns += (b->tv_nsec - a->tv_nsec);
+return ns;
+}
+
+static void run_test(void)
+{
+unsigned int i;
+struct timespec ts_start, ts_end;
+
+while (atomic_read(&n_ready_threads) != n_threads) {
+cpu_relax();
+}
+atomic_mb_set(&test_start, true);
+
+clock_gettime(CLOCK_MONOTONIC, &ts_start);
+for (i = 0; i < n_threads; i++) {
+qemu_thread_join(&threads[i]);
+}
+clock_gettime(CLOCK_MONOTONIC, &ts_end);
+duration = ts_subtract(&ts_start, &ts_end) / 1e9;
+}
+
+static void create_threads(void)
+{
+unsigned int i;
+
+threads = g_new(QemuThread, n_threads);
+th_info = g_new(struct thread_info, n_threads);
+counts = qemu_memalign(64, sizeof(*counts) * range);
+
+for (i = 0; i < n_threads; i++) {
+struct thread_info *info = &th_info[i];
+
+info->r = (i + 1) ^ time(NULL);
+qemu_thread_create(&th

[Qemu-devel] [PATCH v2 12/27] target-i386: emulate LOCK'ed OP instructions using atomic helpers

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

[rth: Eliminate some unnecessary temporaries.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-13-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 76 +
 1 file changed, 58 insertions(+), 18 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 2244f38..58bc954 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1258,55 +1258,95 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp 
ot, int d)
 {
 if (d != OR_TMP0) {
 gen_op_mov_v_reg(ot, cpu_T0, d);
-} else {
+} else if (!(s1->prefix & PREFIX_LOCK)) {
 gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
 }
 switch(op) {
 case OP_ADCL:
 gen_compute_eflags_c(s1, cpu_tmp4);
-tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
+tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update3_cc(cpu_tmp4);
 set_cc_op(s1, CC_OP_ADCB + ot);
 break;
 case OP_SBBL:
 gen_compute_eflags_c(s1, cpu_tmp4);
-tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
-tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
+tcg_gen_neg_tl(cpu_T0, cpu_T0);
+tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
+tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update3_cc(cpu_tmp4);
 set_cc_op(s1, CC_OP_SBBB + ot);
 break;
 case OP_ADDL:
-tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update2_cc();
 set_cc_op(s1, CC_OP_ADDB + ot);
 break;
 case OP_SUBL:
-tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
-tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_neg_tl(cpu_T0, cpu_T1);
+tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
+s1->mem_index, ot | MO_LE);
+tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
+} else {
+tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
+tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update2_cc();
 set_cc_op(s1, CC_OP_SUBB + ot);
 break;
 default:
 case OP_ANDL:
-tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update1_cc();
 set_cc_op(s1, CC_OP_LOGICB + ot);
 break;
 case OP_ORL:
-tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+   s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update1_cc();
 set_cc_op(s1, CC_OP_LOGICB + ot);
 break;
 case OP_XORL:
-tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_rm_T0_A0(s1, ot, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+s1->mem_index, ot | MO_LE);
+} else {
+tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_rm_T0_A0(s1, ot, d);
+}
 gen_op_update1_cc();
 set_cc_op(s1, CC_OP_LOGICB + ot);
 break;
-- 
2.5.5

[Qemu-devel] [PATCH v2 18/27] target-i386: emulate XCHG using atomic helper

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-19-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index be097a6..525c445 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -5556,12 +5556,8 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 gen_lea_modrm(env, s, modrm);
 gen_op_mov_v_reg(ot, cpu_T0, reg);
 /* for xchg, lock is implicit */
-if (!(prefixes & PREFIX_LOCK))
-gen_helper_lock();
-gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
-gen_op_st_v(s, ot, cpu_T0, cpu_A0);
-if (!(prefixes & PREFIX_LOCK))
-gen_helper_unlock();
+tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
+   s->mem_index, ot | MO_LE);
 gen_op_mov_reg_v(ot, reg, cpu_T1);
 }
 break;
-- 
2.5.5

Re: [Qemu-devel] [PATCH v5 9/9] tests: add a m25p80 test

2016-07-01 Thread Peter Maydell

On 1 July 2016 at 18:18, Peter Maydell  wrote:
> On 28 June 2016 at 19:24, Cédric Le Goater  wrote:
>> This test uses the palmetto platform and the AST2400 SPI controller to
>> test the m25p80 flash module device model. The flash model is defined
>> by the platform (n25q256a) and it would be nice to find way to control
>> it, using a property probably.
>>
>> Signed-off-by: Cédric Le Goater 
>> Reviewed-by: Peter Maydell 
>> ---
>>
>
> This test fails on ppc64be:
>
> TEST: tests/m25p80-test... (pid=65123)
>   /arm/m25p80/read_jedec:  OK
>   /arm/m25p80/erase_sector:OK
>   /arm/m25p80/erase_all:   **
> ERROR:/home/pm215/qemu/tests/m25p80-test.c:162:test_erase_all:
> assertion failed (page[i] == 0x0):
> (0x == 0x)
> FAIL
> GTester: last random seed: R02S54b2016fda21b092e18d7a23a2db86ba
> (pid=65128)
>   /arm/m25p80/write_page:  **
> ERROR:/home/pm215/qemu/tests/m25p80-test.c:200:test_write_page:
> assertion failed (page[i] == my_page_addr + i * 4): (0x ==
> 0x0140)
> FAIL
> GTester: last random seed: R02S8708910d6b72f700bc41e9340a516239
> (pid=65133)
> FAIL: tests/m25p80-test

I'm going to take the easy approach of just dropping this patch;
please fix and resend it.

thanks
-- PMM

[Qemu-devel] [PATCH v2 26/27] linux-user: remove handling of aarch64's EXCP_STREX

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

The exception is not emitted anymore.

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-30-git-send-email-c...@braap.org>
---
 linux-user/main.c | 125 --
 1 file changed, 125 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index f4fc460..f2f7422 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -865,124 +865,6 @@ void cpu_loop(CPUARMState *env)
 
 #else
 
-/*
- * Handle AArch64 store-release exclusive
- *
- * rs = gets the status result of store exclusive
- * rt = is the register that is stored
- * rt2 = is the second register store (in STP)
- *
- */
-static int do_strex_a64(CPUARMState *env)
-{
-uint64_t val;
-int size;
-bool is_pair;
-int rc = 1;
-int segv = 0;
-uint64_t addr;
-int rs, rt, rt2;
-
-start_exclusive();
-/* size | is_pair << 2 | (rs << 4) | (rt << 9) | (rt2 << 14)); */
-size = extract32(env->exclusive_info, 0, 2);
-is_pair = extract32(env->exclusive_info, 2, 1);
-rs = extract32(env->exclusive_info, 4, 5);
-rt = extract32(env->exclusive_info, 9, 5);
-rt2 = extract32(env->exclusive_info, 14, 5);
-
-addr = env->exclusive_addr;
-
-if (addr != env->exclusive_test) {
-goto finish;
-}
-
-switch (size) {
-case 0:
-segv = get_user_u8(val, addr);
-break;
-case 1:
-segv = get_user_u16(val, addr);
-break;
-case 2:
-segv = get_user_u32(val, addr);
-break;
-case 3:
-segv = get_user_u64(val, addr);
-break;
-default:
-abort();
-}
-if (segv) {
-env->exception.vaddress = addr;
-goto error;
-}
-if (val != env->exclusive_val) {
-goto finish;
-}
-if (is_pair) {
-if (size == 2) {
-segv = get_user_u32(val, addr + 4);
-} else {
-segv = get_user_u64(val, addr + 8);
-}
-if (segv) {
-env->exception.vaddress = addr + (size == 2 ? 4 : 8);
-goto error;
-}
-if (val != env->exclusive_high) {
-goto finish;
-}
-}
-/* handle the zero register */
-val = rt == 31 ? 0 : env->xregs[rt];
-switch (size) {
-case 0:
-segv = put_user_u8(val, addr);
-break;
-case 1:
-segv = put_user_u16(val, addr);
-break;
-case 2:
-segv = put_user_u32(val, addr);
-break;
-case 3:
-segv = put_user_u64(val, addr);
-break;
-}
-if (segv) {
-goto error;
-}
-if (is_pair) {
-/* handle the zero register */
-val = rt2 == 31 ? 0 : env->xregs[rt2];
-if (size == 2) {
-segv = put_user_u32(val, addr + 4);
-} else {
-segv = put_user_u64(val, addr + 8);
-}
-if (segv) {
-env->exception.vaddress = addr + (size == 2 ? 4 : 8);
-goto error;
-}
-}
-rc = 0;
-finish:
-env->pc += 4;
-/* rs == 31 encodes a write to the ZR, thus throwing away
- * the status return. This is rather silly but valid.
- */
-if (rs < 31) {
-env->xregs[rs] = rc;
-}
-error:
-/* instruction faulted, PC does not advance */
-/* either way a strex releases any exclusive lock we have */
-env->exclusive_addr = -1;
-end_exclusive();
-return segv;
-}
-
 /* AArch64 main loop */
 void cpu_loop(CPUARMState *env)
 {
@@ -1023,11 +905,6 @@ void cpu_loop(CPUARMState *env)
 info._sifields._sigfault._addr = env->pc;
 queue_signal(env, info.si_signo, &info);
 break;
-case EXCP_STREX:
-if (!do_strex_a64(env)) {
-break;
-}
-/* fall through for segv */
 case EXCP_PREFETCH_ABORT:
 case EXCP_DATA_ABORT:
 info.si_signo = TARGET_SIGSEGV;
@@ -1063,8 +940,6 @@ void cpu_loop(CPUARMState *env)
 process_pending_signals(env);
 /* Exception return on AArch64 always clears the exclusive monitor,
  * so any return to running guest code implies this.
- * A strex (successful or otherwise) also clears the monitor, so
- * we don't need to specialcase EXCP_STREX.
  */
 env->exclusive_addr = -1;
 }
-- 
2.5.5

[Qemu-devel] [PATCH v2 10/27] tcg: Add atomic128 helpers

2016-07-01 Thread Richard Henderson

Force the use of cmpxchg16b on x86_64.

Wikipedia suggests that only very old AMD64 (circa 2004) did not have
this instruction.  Further, it's required by Windows 8 so no new cpus
will ever omit it.

If we truely care about these, then we could check this at startup time
and then avoid executing paths that use it.

Signed-off-by: Richard Henderson 
---
 configure |  29 -
 cputlb.c  |   6 +++
 include/qemu/int128.h |   6 +++
 softmmu_template.h| 110 +-
 tcg/tcg.h |  22 ++
 5 files changed, 144 insertions(+), 29 deletions(-)

diff --git a/configure b/configure
index 59ea124..586abd6 100755
--- a/configure
+++ b/configure
@@ -1201,7 +1201,10 @@ case "$cpu" in
cc_i386='$(CC) -m32'
;;
 x86_64)
-   CPU_CFLAGS="-m64"
+   # ??? Only extremely old AMD cpus do not have cmpxchg16b.
+   # If we truly care, we should simply detect this case at
+   # runtime and generate the fallback to serial emulation.
+   CPU_CFLAGS="-m64 -mcx16"
LDFLAGS="-m64 $LDFLAGS"
cc_i386='$(CC) -m32'
;;
@@ -4434,6 +4437,26 @@ if compile_prog "" "" ; then
 int128=yes
 fi
 
+#
+# See if 128-bit atomic operations are supported.
+
+atomic128=no
+if test "$int128" = "yes"; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  y = __atomic_load_16(&x, 0);
+  __atomic_store_16(&x, y, 0);
+  __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+atomic128=yes
+  fi
+fi
+
 
 # check if getauxval is available.
 
@@ -5383,6 +5406,10 @@ if test "$int128" = "yes" ; then
   echo "CONFIG_INT128=y" >> $config_host_mak
 fi
 
+if test "$atomic128" = "yes" ; then
+  echo "CONFIG_ATOMIC128=y" >> $config_host_mak
+fi
+
 if test "$getauxval" = "yes" ; then
   echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
 fi
diff --git a/cputlb.c b/cputlb.c
index 5272456..660f824 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -510,6 +510,12 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, 
target_ulong addr)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+
+#ifdef CONFIG_ATOMIC128
+#define SHIFT 4
+#include "softmmu_template.h"
+#endif
+
 #undef MMUSUFFIX
 
 #define MMUSUFFIX _cmmu
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index ab67275..5819da4 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -2,6 +2,7 @@
 #define INT128_H
 
 #ifdef CONFIG_INT128
+#include "qemu/bswap.h"
 
 typedef __int128 Int128;
 
@@ -137,6 +138,11 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
 *a -= b;
 }
 
+static inline Int128 bswap128(Int128 a)
+{
+return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a)));
+}
+
 #else /* !CONFIG_INT128 */
 
 /* Here we are catering to the ABI of the host.  If the host returns
diff --git a/softmmu_template.h b/softmmu_template.h
index 76712b9..0a9f49b 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -27,25 +27,30 @@
 
 #define DATA_SIZE (1 << SHIFT)
 
-#if DATA_SIZE == 8
-#define SUFFIX q
-#define LSUFFIX q
-#define SDATA_TYPE  int64_t
+#if DATA_SIZE == 16
+#define SUFFIX o
+#define LSUFFIXo
+#define SDATA_TYPE Int128
+#define DATA_TYPE  Int128
+#elif DATA_SIZE == 8
+#define SUFFIX q
+#define LSUFFIXq
+#define SDATA_TYPE int64_t
 #define DATA_TYPE  uint64_t
 #elif DATA_SIZE == 4
-#define SUFFIX l
-#define LSUFFIX l
-#define SDATA_TYPE  int32_t
+#define SUFFIX l
+#define LSUFFIXl
+#define SDATA_TYPE int32_t
 #define DATA_TYPE  uint32_t
 #elif DATA_SIZE == 2
-#define SUFFIX w
-#define LSUFFIX uw
-#define SDATA_TYPE  int16_t
+#define SUFFIX w
+#define LSUFFIXuw
+#define SDATA_TYPE int16_t
 #define DATA_TYPE  uint16_t
 #elif DATA_SIZE == 1
-#define SUFFIX b
-#define LSUFFIX ub
-#define SDATA_TYPE  int8_t
+#define SUFFIX b
+#define LSUFFIXub
+#define SDATA_TYPE int8_t
 #define DATA_TYPE  uint8_t
 #else
 #error unsupported data size
@@ -56,7 +61,7 @@
to the register size of the host.  This is tcg_target_long, except in the
case of a 32-bit host and 64-bit data, and for that we always have
uint64_t.  Don't bother with this widened value for SOFTMMU_CODE_ACCESS.  */
-#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
+#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE >= 8
 # define WORD_TYPE  DATA_TYPE
 # define USUFFIXSUFFIX
 #else
@@ -73,7 +78,9 @@
 #define ADDR_READ addr_read
 #endif
 
-#if DATA_SIZE == 8
+#if DATA_SIZE == 16
+# define BSWAP(X)  bswap128(X)
+#elif DATA_SIZE == 8
 # define BSWAP(X)  bswap64(X)
 #elif DATA_SIZE == 4
 # define BSWAP(X)  bswap32(X)
@@ -140,6 +147,7 @@
 vidx >= 0;\
 })
 
+#if DATA_SIZE < 16
 #ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState

[Qemu-devel] [PATCH v2 14/27] target-i386: emulate LOCK'ed NOT using atomic helper

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

[rth: Avoid qemu_load that's redundant with the atomic op.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-15-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 26 --
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 3f10ff0..78eadee 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4675,10 +4675,15 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 rm = (modrm & 7) | REX_B(s);
 op = (modrm >> 3) & 7;
 if (mod != 3) {
-if (op == 0)
+if (op == 0) {
 s->rip_offset = insn_const_size(ot);
+}
 gen_lea_modrm(env, s, modrm);
-gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+/* For those below that handle locked memory, don't load here.  */
+if (!(s->prefix & PREFIX_LOCK)
+|| op != 2) {
+gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+}
 } else {
 gen_op_mov_v_reg(ot, cpu_T0, rm);
 }
@@ -4691,11 +4696,20 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 set_cc_op(s, CC_OP_LOGICB + ot);
 break;
 case 2: /* not */
-tcg_gen_not_tl(cpu_T0, cpu_T0);
-if (mod != 3) {
-gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+if (s->prefix & PREFIX_LOCK) {
+if (mod == 3) {
+goto illegal_op;
+}
+tcg_gen_movi_tl(cpu_T0, ~0);
+tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+s->mem_index, ot | MO_LE);
 } else {
-gen_op_mov_reg_v(ot, rm, cpu_T0);
+tcg_gen_not_tl(cpu_T0, cpu_T0);
+if (mod != 3) {
+gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+} else {
+gen_op_mov_reg_v(ot, rm, cpu_T0);
+}
 }
 break;
 case 3: /* neg */
-- 
2.5.5

[Qemu-devel] [PATCH v2 13/27] target-i386: emulate LOCK'ed INC using atomic helper

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

[rth: Merge gen_inc_locked back into gen_inc to share cc update.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-14-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 58bc954..3f10ff0 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1362,21 +1362,23 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp 
ot, int d)
 /* if d == OR_TMP0, it means memory operand (address in A0) */
 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
 {
-if (d != OR_TMP0) {
-gen_op_mov_v_reg(ot, cpu_T0, d);
+if (s1->prefix & PREFIX_LOCK) {
+tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
+tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+s1->mem_index, ot | MO_LE);
 } else {
-gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+if (d != OR_TMP0) {
+gen_op_mov_v_reg(ot, cpu_T0, d);
+} else {
+gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+}
+tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
+gen_op_st_rm_T0_A0(s1, ot, d);
 }
+
 gen_compute_eflags_c(s1, cpu_cc_src);
-if (c > 0) {
-tcg_gen_addi_tl(cpu_T0, cpu_T0, 1);
-set_cc_op(s1, CC_OP_INCB + ot);
-} else {
-tcg_gen_addi_tl(cpu_T0, cpu_T0, -1);
-set_cc_op(s1, CC_OP_DECB + ot);
-}
-gen_op_st_rm_T0_A0(s1, ot, d);
 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
 }
 
 static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
-- 
2.5.5

[Qemu-devel] [PATCH v2 21/27] target-arm: Rearrange aa32 load and store functions

2016-07-01 Thread Richard Henderson

Stop specializing on TARGET_LONG_BITS == 32; unconditionally allocate
a temp and expand with tcg_gen_extu_i32_tl.  Split out gen_aa32_addr,
gen_aa32_frob64, gen_aa32_ld_i32 and gen_aa32_st_i32 as separate interfaces.

Signed-off-by: Richard Henderson 
---
 target-arm/translate.c | 171 +++--
 1 file changed, 66 insertions(+), 105 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index bd5d5cb..1b5bf87 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -926,145 +926,106 @@ static inline void store_reg_from_load(DisasContext *s, 
int reg, TCGv_i32 var)
  * These functions work like tcg_gen_qemu_{ld,st}* except
  * that the address argument is TCGv_i32 rather than TCGv.
  */
-#if TARGET_LONG_BITS == 32
 
-#define DO_GEN_LD(SUFF, OPC, BE32_XOR)   \
-static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,  \
- TCGv_i32 addr, int index)   \
-{\
-TCGMemOp opc = (OPC) | s->be_data;   \
-/* Not needed for user-mode BE32, where we use MO_BE instead.  */\
-if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) {   \
-TCGv addr_be = tcg_temp_new();   \
-tcg_gen_xori_i32(addr_be, addr, BE32_XOR);   \
-tcg_gen_qemu_ld_i32(val, addr_be, index, opc);   \
-tcg_temp_free(addr_be);  \
-return;  \
-}\
-tcg_gen_qemu_ld_i32(val, addr, index, opc);  \
-}
-
-#define DO_GEN_ST(SUFF, OPC, BE32_XOR)   \
-static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,  \
- TCGv_i32 addr, int index)   \
-{\
-TCGMemOp opc = (OPC) | s->be_data;   \
-/* Not needed for user-mode BE32, where we use MO_BE instead.  */\
-if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) {   \
-TCGv addr_be = tcg_temp_new();   \
-tcg_gen_xori_i32(addr_be, addr, BE32_XOR);   \
-tcg_gen_qemu_st_i32(val, addr_be, index, opc);   \
-tcg_temp_free(addr_be);  \
-return;  \
-}\
-tcg_gen_qemu_st_i32(val, addr, index, opc);  \
-}
-
-static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
- TCGv_i32 addr, int index)
+static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op)
 {
-TCGMemOp opc = MO_Q | s->be_data;
-tcg_gen_qemu_ld_i64(val, addr, index, opc);
+TCGv addr = tcg_temp_new();
+tcg_gen_extu_i32_tl(addr, a32);
+
 /* Not needed for user-mode BE32, where we use MO_BE instead.  */
-if (!IS_USER_ONLY && s->sctlr_b) {
-tcg_gen_rotri_i64(val, val, 32);
+if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
+tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 }
+return addr;
 }
 
-static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
- TCGv_i32 addr, int index)
+static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+int index, TCGMemOp opc)
 {
-TCGMemOp opc = MO_Q | s->be_data;
-/* Not needed for user-mode BE32, where we use MO_BE instead.  */
-if (!IS_USER_ONLY && s->sctlr_b) {
-TCGv_i64 tmp = tcg_temp_new_i64();
-tcg_gen_rotri_i64(tmp, val, 32);
-tcg_gen_qemu_st_i64(tmp, addr, index, opc);
-tcg_temp_free_i64(tmp);
-return;
-}
-tcg_gen_qemu_st_i64(val, addr, index, opc);
+TCGv addr = gen_aa32_addr(s, a32, opc);
+tcg_gen_qemu_ld_i32(val, addr, index, opc);
+tcg_temp_free(addr);
 }
 
-#else
+static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+int index, TCGMemOp opc)
+{
+TCGv addr = gen_aa32_addr(s, a32, opc);
+tcg_gen_qemu_st_i32(val, addr, index, opc);
+tcg_temp_free(addr);
+}
 
-#define DO_GEN_LD(SUFF, OPC, BE32_XOR)   \
+#define DO_GEN_LD(SUFF, OPC) \
 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,  \
- TCGv_i32 addr, int index)   \
+ TCGv

[Qemu-devel] [PATCH v2 07/27] tcg: Add EXCP_ATOMIC

2016-07-01 Thread Richard Henderson

When we cannot emulate an atomic operation within a parallel
context, this exception allows us to stop the world and try
again in a serial context.

Signed-off-by: Richard Henderson 
---
 cpu-exec-common.c   |  6 +
 cpu-exec.c  | 23 +++
 cpus.c  |  6 +
 include/exec/cpu-all.h  |  1 +
 include/exec/exec-all.h |  1 +
 include/qemu-common.h   |  1 +
 linux-user/main.c   | 59 -
 tcg/tcg.h   |  1 +
 translate-all.c |  1 +
 9 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/cpu-exec-common.c b/cpu-exec-common.c
index 0cb4ae6..767d9c6 100644
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
 }
 siglongjmp(cpu->jmp_env, 1);
 }
+
+void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
+{
+cpu->exception_index = EXCP_ATOMIC;
+cpu_loop_exit_restore(cpu, pc);
+}
diff --git a/cpu-exec.c b/cpu-exec.c
index b840e1d..041f8b7 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -225,6 +225,29 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
 }
 #endif
 
+void cpu_exec_step(CPUState *cpu)
+{
+CPUArchState *env = (CPUArchState *)cpu->env_ptr;
+TranslationBlock *tb;
+target_ulong cs_base, pc;
+uint32_t flags;
+bool old_tb_flushed;
+
+old_tb_flushed = cpu->tb_flushed;
+cpu->tb_flushed = false;
+
+cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+tb = tb_gen_code(cpu, pc, cs_base, flags,
+ 1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
+tb->orig_tb = NULL;
+cpu->tb_flushed |= old_tb_flushed;
+/* execute the generated code */
+trace_exec_tb_nocache(tb, pc);
+cpu_tb_exec(cpu, tb);
+tb_phys_invalidate(tb, -1);
+tb_free(tb);
+}
+
 struct tb_desc {
 target_ulong pc;
 target_ulong cs_base;
diff --git a/cpus.c b/cpus.c
index 84c3520..a01bbbd 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1575,6 +1575,12 @@ static void tcg_exec_all(void)
 if (r == EXCP_DEBUG) {
 cpu_handle_guest_debug(cpu);
 break;
+} else if (r == EXCP_ATOMIC) {
+/* ??? When we begin running cpus in parallel, we should
+   stop all cpus, clear parallel_cpus, and interpret a
+   single insn with cpu_exec_step.  In the meantime,
+   we should never get here.  */
+abort();
 }
 } else if (cpu->stop || cpu->stopped) {
 if (cpu->unplug) {
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 8007abb..d2aac4b 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -31,6 +31,7 @@
 #define EXCP_DEBUG  0x10002 /* cpu stopped after a breakpoint or 
singlestep */
 #define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external event) 
*/
 #define EXCP_YIELD  0x10004 /* cpu wants to yield timeslice to another */
+#define EXCP_ATOMIC 0x10005 /* stop-the-world and emulate atomic */
 
 /* some important defines:
  *
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index c1f59fa..ec72c5a 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -59,6 +59,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 void cpu_exec_init(CPUState *cpu, Error **errp);
 void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
 void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
+void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
 
 #if !defined(CONFIG_USER_ONLY)
 void cpu_reloading_memory_map(void);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 1f2cb94..77e379d 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -76,6 +76,7 @@ void tcg_exec_init(unsigned long tb_size);
 bool tcg_enabled(void);
 
 void cpu_exec_init_all(void);
+void cpu_exec_step(CPUState *cpu);
 
 /**
  * Sends a (part of) iovec down a socket, yielding when the socket is full, or
diff --git a/linux-user/main.c b/linux-user/main.c
index 78d8d04..54df300 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -179,13 +179,25 @@ static inline void start_exclusive(void)
 }
 
 /* Finish an exclusive operation.  */
-static inline void __attribute__((unused)) end_exclusive(void)
+static inline void end_exclusive(void)
 {
 pending_cpus = 0;
 pthread_cond_broadcast(&exclusive_resume);
 pthread_mutex_unlock(&exclusive_lock);
 }
 
+static void step_atomic(CPUState *cpu)
+{
+start_exclusive();
+
+/* Since we got here, we know that parallel_cpus must be true.  */
+parallel_cpus = false;
+cpu_exec_step(cpu);
+parallel_cpus = true;
+
+end_exclusive();
+}
+
 /* Wait for exclusive ops to finish, and begin cpu execution.  */
 static inline void cpu_exec_start(CPUState *cpu)
 {
@@ -437,6 +449,9 @@ void cpu_loop(CPUX86State *env)
   }
 }
 break;
+case EXCP_ATOMIC:
+

[Qemu-devel] [PATCH v2 19/27] target-i386: remove helper_lock()

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

It's been superseded by the atomic helpers.

The use of the atomic helpers provides a significant performance and scalability
improvement. Below is the result of running the atomic_add-test microbenchmark 
with:
 $ x86_64-linux-user/qemu-x86_64 tests/atomic_add-bench -o 500 -r $r -n $n
, where $n is the number of threads and $r is the allowed range for the 
additions.

The scenarios measured are:
- atomic: implements x86' ADDL with the atomic_add helper (i.e. this patchset)
- cmpxchg: implement x86' ADDL with a TCG loop using the cmpxchg helper
- master: before this patchset

Results sorted in ascending range, i.e. descending degree of contention.
Y axis is Throughput in Mops/s. Tests are run on an AMD machine with 64
Opteron 6376 cores.

atomic_add-bench: 500 ops/thread, [0,1] range

  25 ++-+--+-+--+--+--+---++
 + atomic +-E--+   + +  +  +  +|
 |cmpxchg +-H--+   |
  20 +Emaster +-N--+  ++
 |||
 |++   |
 |||
  15 +++  ++
 |N|   |
 |+|   |
  10 ++|  ++
 |+|+  |
 | |-+E+--+++  ---+E+--+E+--+E+-+E+--+E|
 |+E+E+- +++ +E+--+E+--|
   5 ++|+ ++
 |+N+H+--- +++ |
 N+--+H+++++   +  +++  --++H+--+H+--+H+++H+---+--- |
   0 ++-+-H+---H-+--+--+--+---H+
 0  10 2030 40 50 60
Number of threads

atomic_add-bench: 500 ops/thread, [0,2] range

  25 ++-+--+-+--+--+--+---++
 ++atomic +-E--+   + +  +  +  +|
 |cmpxchg +-H--+   |
  20 ++master +-N--+  ++
 |E|   |
 |++   |
 ||E   |
  15 ++|  ++
 |N||  |
 |+||   ---+E+--+E+-+E+--+E|
  10 ++| |---+E+--+E+-+E+---+++  +++
 ||H+E+--+E+-- |
 |+|
 | ||  |
   5 ++|+H+--  +++++
 |+N+-  ---+H+--+H+--  |
 +  +N+--+H+++H+---+--+H+++H+---+  ++H+---+--+H|
   0 ++-+--+-+--+--+--+---++
 0  10 2030 40 50 60
Number of threads

atomic_add-bench: 500 ops/thread, [0,8] range

  40 ++-+--+-+--+--+--+---++
 ++atomic +-E--+   + +  +  +  +|
  35 +cmpxchg +-H--+  ++
 | master +-N--+   ---+E+--+E+--+E+-+E+--+E|
  30 ++|   ---+E+--   +++ ++
 | |-+E+---|
  25 ++E +++  ++
 |+ -+E+   |
  20 +E+ E-- +++  ++
 |H|+++|
 |+|   +H+---  |
  15 ++H+   ---+++  +H+-- ++
 |N++H+-- +++---+H+--++|

[Qemu-devel] [PATCH v2 00/27] cmpxchg-based emulation of atomics

2016-07-01 Thread Richard Henderson

I spent a couple evenings this week tweaking Emilio's patch set.

The first major change is to "qemu/int128.h", so that we can use
that type in the context of a 16-byte cmpxchg.  I have yet to teach
TCG code generation about this type, so it's really only usable
from other helper functions.  But that's still an improvement over
having to return two uint64_t by reference.

The second major change is to funnel atomic operation generation
through functions in tcg-op.c.  There we can test whether or not
we're generating code in a parallel context and require atomic
operations.  This also centralizes the helper functions so that we
don't have to have the same sets in every target.

The third major change is providing a mechanism by which we can
trap on atomic operations that we do not support, exit the cpu loop,
stop the world, and then re-execute the instruction in a serial context.
This is obviously something that will need to be filled in further
as MTTCG progresses.

This minimally tested, but it is good enough to boot Fedora 24 x86-64,
even with the softmmu single-step stubbed out.  Perhaps unsurprisingly,
Fedora does not attempt an unaligned atomic operation.

Comments?


r~


Emilio G. Cota (18):
  atomics: add atomic_xor
  atomics: add atomic_op_fetch variants
  target-i386: emulate LOCK'ed cmpxchg using cmpxchg helpers
  target-i386: emulate LOCK'ed OP instructions using atomic helpers
  target-i386: emulate LOCK'ed INC using atomic helper
  target-i386: emulate LOCK'ed NOT using atomic helper
  target-i386: emulate LOCK'ed NEG using cmpxchg helper
  target-i386: emulate LOCK'ed XADD using atomic helper
  target-i386: emulate LOCK'ed BTX ops using atomic helpers
  target-i386: emulate XCHG using atomic helper
  target-i386: remove helper_lock()
  tests: add atomic_add-bench
  target-arm: emulate LL/SC using cmpxchg helpers
  target-arm: emulate SWP with atomic_xchg helper
  target-arm: emulate aarch64's LL/SC using cmpxchg helpers
  linux-user: remove handling of ARM's EXCP_STREX
  linux-user: remove handling of aarch64's EXCP_STREX
  target-arm: remove EXCP_STREX + cpu_exclusive_{test, info}

Richard Henderson (9):
  exec: Avoid direct references to Int128 parts
  int128: Use __int128 if available
  int128: Add int128_make128
  int128: Use complex numbers if advisable
  tcg: Add EXCP_ATOMIC
  HACK: Always enable parallel_cpus
  tcg: Add atomic helpers
  tcg: Add atomic128 helpers
  target-arm: Rearrange aa32 load and store functions

 Makefile.objs  |   1 -
 Makefile.target|   1 +
 atomic_template.h  | 220 ++
 configure  |  29 +++-
 cpu-exec-common.c  |   6 +
 cpu-exec.c |  23 +++
 cpus.c |   6 +
 cputlb.c   |   9 +-
 exec.c |   4 +-
 include/exec/cpu-all.h |   1 +
 include/exec/exec-all.h|   1 +
 include/qemu-common.h  |   1 +
 include/qemu/atomic.h  |  21 +++
 include/qemu/int128.h  | 261 ++-
 linux-user/main.c  | 277 +++--
 softmmu_template.h | 264 +---
 target-arm/cpu.h   |  17 +--
 target-arm/helper-a64.c| 135 
 target-arm/helper-a64.h|   2 +
 target-arm/internals.h |   4 +-
 target-arm/translate-a64.c | 106 ++---
 target-arm/translate.c | 342 +++--
 target-arm/translate.h |   4 -
 target-i386/helper.h   |   2 -
 target-i386/mem_helper.c   | 127 ---
 target-i386/translate.c| 374 -
 tcg-runtime.c  |  27 ++--
 tcg/tcg-op.c   | 324 +++
 tcg/tcg-op.h   |  44 ++
 tcg/tcg-runtime.h  |  75 +
 tcg/tcg.h  |  76 +
 tests/.gitignore   |   1 +
 tests/Makefile.include |   4 +-
 tests/atomic_add-bench.c   | 180 ++
 translate-all.c|   1 +
 35 files changed, 2189 insertions(+), 781 deletions(-)
 create mode 100644 atomic_template.h
 create mode 100644 tests/atomic_add-bench.c

-- 
2.5.5

[Qemu-devel] [PATCH v2 15/27] target-i386: emulate LOCK'ed NEG using cmpxchg helper

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

[rth: Move redundant qemu_load out of cmpxchg loop.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-16-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 38 ++
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 78eadee..19d6f1e 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4713,11 +4713,41 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 }
 break;
 case 3: /* neg */
-tcg_gen_neg_tl(cpu_T0, cpu_T0);
-if (mod != 3) {
-gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+if (s->prefix & PREFIX_LOCK) {
+TCGLabel *label1;
+TCGv a0, t0, t1, t2;
+
+if (mod == 3) {
+goto illegal_op;
+}
+a0 = tcg_temp_local_new();
+t0 = tcg_temp_local_new();
+label1 = gen_new_label();
+
+tcg_gen_mov_tl(a0, cpu_A0);
+tcg_gen_mov_tl(t0, cpu_T0);
+
+gen_set_label(label1);
+t1 = tcg_temp_new();
+t2 = tcg_temp_new();
+tcg_gen_mov_tl(t2, t0);
+tcg_gen_neg_tl(t1, t0);
+tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
+  s->mem_index, ot | MO_LE);
+tcg_temp_free(t1);
+tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
+
+tcg_temp_free(t2);
+tcg_temp_free(a0);
+tcg_gen_mov_tl(cpu_T0, t0);
+tcg_temp_free(t0);
 } else {
-gen_op_mov_reg_v(ot, rm, cpu_T0);
+tcg_gen_neg_tl(cpu_T0, cpu_T0);
+if (mod != 3) {
+gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+} else {
+gen_op_mov_reg_v(ot, rm, cpu_T0);
+}
 }
 gen_op_update_neg_cc();
 set_cc_op(s, CC_OP_SUBB + ot);
-- 
2.5.5

[Qemu-devel] [PATCH v2 17/27] target-i386: emulate LOCK'ed BTX ops using atomic helpers

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

[rth: Avoid redundant qemu_ld in locked case.  Fix previously unnoticed
incorrect zero-extension of address in register-offset case.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-18-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 87 -
 1 file changed, 57 insertions(+), 30 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index dc132a1..be097a6 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -6647,7 +6647,9 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 if (mod != 3) {
 s->rip_offset = 1;
 gen_lea_modrm(env, s, modrm);
-gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+if (!(s->prefix & PREFIX_LOCK)) {
+gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+}
 } else {
 gen_op_mov_v_reg(ot, cpu_T0, rm);
 }
@@ -6677,44 +6679,69 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 rm = (modrm & 7) | REX_B(s);
 gen_op_mov_v_reg(MO_32, cpu_T1, reg);
 if (mod != 3) {
-gen_lea_modrm(env, s, modrm);
+AddressParts a = gen_lea_modrm_0(env, s, modrm);
 /* specific case: we need to add a displacement */
 gen_exts(ot, cpu_T1);
 tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
 tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
-tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
+gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+if (!(s->prefix & PREFIX_LOCK)) {
+gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+}
 } else {
 gen_op_mov_v_reg(ot, cpu_T0, rm);
 }
 bt_op:
 tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
-tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
-switch(op) {
-case 0:
-break;
-case 1:
-tcg_gen_movi_tl(cpu_tmp0, 1);
-tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
-tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
-break;
-case 2:
-tcg_gen_movi_tl(cpu_tmp0, 1);
-tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
-tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
-break;
-default:
-case 3:
-tcg_gen_movi_tl(cpu_tmp0, 1);
-tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
-tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
-break;
-}
-if (op != 0) {
-if (mod != 3) {
-gen_op_st_v(s, ot, cpu_T0, cpu_A0);
-} else {
-gen_op_mov_reg_v(ot, rm, cpu_T0);
+tcg_gen_movi_tl(cpu_tmp0, 1);
+tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
+if (s->prefix & PREFIX_LOCK) {
+switch (op) {
+case 0: /* bt */
+/* Needs no atomic ops; we surpressed the normal
+   memory load for LOCK above so do it now.  */
+gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+break;
+case 1: /* bts */
+tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
+   s->mem_index, ot | MO_LE);
+break;
+case 2: /* btr */
+tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
+tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
+s->mem_index, ot | MO_LE);
+break;
+default:
+case 3: /* btc */
+tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
+s->mem_index, ot | MO_LE);
+break;
+}
+tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+} else {
+tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+switch (op) {
+case 0: /* bt */
+/* Data already loaded; nothing to do.  */
+break;
+case 1: /* bts */
+tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
+break;
+case 2: /* btr */
+tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
+break;
+default:
+case 3: /* btc */
+tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
+break;
+}
+if (op != 0) {
+if (mod != 3) {
+gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+} else {
+gen_op_mov_reg_v(ot, rm, cpu_T0);
+}
 }
 }
 
-- 
2.5.5

[Qemu-devel] [PATCH v2 16/27] target-i386: emulate LOCK'ed XADD using atomic helper

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

[rth: Move load of reg value to common location.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-17-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/translate.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 19d6f1e..dc132a1 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -5135,19 +5135,24 @@ static target_ulong disas_insn(CPUX86State *env, 
DisasContext *s,
 modrm = cpu_ldub_code(env, s->pc++);
 reg = ((modrm >> 3) & 7) | rex_r;
 mod = (modrm >> 6) & 3;
+gen_op_mov_v_reg(ot, cpu_T0, reg);
 if (mod == 3) {
 rm = (modrm & 7) | REX_B(s);
-gen_op_mov_v_reg(ot, cpu_T0, reg);
 gen_op_mov_v_reg(ot, cpu_T1, rm);
 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
 gen_op_mov_reg_v(ot, reg, cpu_T1);
 gen_op_mov_reg_v(ot, rm, cpu_T0);
 } else {
 gen_lea_modrm(env, s, modrm);
-gen_op_mov_v_reg(ot, cpu_T0, reg);
-gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
-tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+if (s->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
+s->mem_index, ot | MO_LE);
+tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+} else {
+gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+}
 gen_op_mov_reg_v(ot, reg, cpu_T1);
 }
 gen_op_update2_cc();
-- 
2.5.5

[Qemu-devel] [PATCH v2 06/27] int128: Use complex numbers if advisable

2016-07-01 Thread Richard Henderson

If __int128 is not supported, prefer a base type that is
returned in registers rather than memory.

Signed-off-by: Richard Henderson 
---
 include/qemu/int128.h | 110 +++---
 1 file changed, 69 insertions(+), 41 deletions(-)

diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 67440fa..ab67275 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -139,27 +139,37 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
 
 #else /* !CONFIG_INT128 */
 
-typedef struct Int128 Int128;
+/* Here we are catering to the ABI of the host.  If the host returns
+   64-bit complex in registers, but the 128-bit structure in memory,
+   then choose the complex representation.  */
+#if defined(__GNUC__) \
+&& (defined(__powerpc__) || defined(__sparc__)) \
+&& !defined(CONFIG_TCG_INTERPRETER)
+typedef _Complex unsigned long long Int128;
 
-struct Int128 {
-uint64_t lo;
-int64_t hi;
-};
+static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
+{
+return lo + 1i * hi;
+}
 
-static inline Int128 int128_make64(uint64_t a)
+static inline uint64_t int128_getlo(Int128 a)
 {
-return (Int128) { a, 0 };
+return __real__ a;
 }
 
-static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
+static inline int64_t int128_gethi(Int128 a)
 {
-return (Int128) { lo, hi };
+return __imag__ a;
 }
+#else
+typedef struct Int128 {
+uint64_t lo;
+int64_t hi;
+} Int128;
 
-static inline uint64_t int128_get64(Int128 a)
+static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
 {
-assert(!a.hi);
-return a.lo;
+return (Int128) { lo, hi };
 }
 
 static inline uint64_t int128_getlo(Int128 a)
@@ -171,78 +181,92 @@ static inline int64_t int128_gethi(Int128 a)
 {
 return a.hi;
 }
+#endif /* complex 128 */
+
+static inline Int128 int128_make64(uint64_t a)
+{
+return int128_make128(a, 0);
+}
+
+static inline uint64_t int128_get64(Int128 a)
+{
+assert(int128_gethi(a) == 0);
+return int128_getlo(a);
+}
 
 static inline Int128 int128_zero(void)
 {
-return int128_make64(0);
+return int128_make128(0, 0);
 }
 
 static inline Int128 int128_one(void)
 {
-return int128_make64(1);
+return int128_make128(1, 0);
 }
 
 static inline Int128 int128_2_64(void)
 {
-return (Int128) { 0, 1 };
+return int128_make128(0, 1);
 }
 
 static inline Int128 int128_exts64(int64_t a)
 {
-return (Int128) { .lo = a, .hi = (a < 0) ? -1 : 0 };
+return int128_make128(a, a < 0 ? -1 : 0);
 }
 
 static inline Int128 int128_and(Int128 a, Int128 b)
 {
-return (Int128) { a.lo & b.lo, a.hi & b.hi };
+uint64_t al = int128_getlo(a), bl = int128_getlo(b);
+uint64_t ah = int128_gethi(a), bh = int128_gethi(b);
+
+return int128_make128(al & bl, ah & bh);
 }
 
 static inline Int128 int128_rshift(Int128 a, int n)
 {
-int64_t h;
-if (!n) {
-return a;
-}
-h = a.hi >> (n & 63);
-if (n >= 64) {
+uint64_t al = int128_getlo(a), ah = int128_gethi(a);
+int64_t h = ((int64_t)ah) >> (n & 63);
+if (n & 64) {
 return int128_make128(h, h >> 63);
 } else {
-return int128_make128((a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h);
+return int128_make128((al >> n) | (ah << (64 - n)), h);
 }
 }
 
 static inline Int128 int128_add(Int128 a, Int128 b)
 {
-uint64_t lo = a.lo + b.lo;
+uint64_t al = int128_getlo(a), bl = int128_getlo(b);
+uint64_t ah = int128_gethi(a), bh = int128_gethi(b);
+uint64_t lo = al + bl;
 
-/* a.lo <= a.lo + b.lo < a.lo + k (k is the base, 2^64).  Hence,
- * a.lo + b.lo >= k implies 0 <= lo = a.lo + b.lo - k < a.lo.
- * Similarly, a.lo + b.lo < k implies a.lo <= lo = a.lo + b.lo < k.
- *
- * So the carry is lo < a.lo.
- */
-return int128_make128(lo, (uint64_t)a.hi + b.hi + (lo < a.lo));
+return int128_make128(lo, ah + bh + (lo < al));
 }
 
-static inline Int128 int128_neg(Int128 a)
+static inline Int128 int128_sub(Int128 a, Int128 b)
 {
-uint64_t lo = -a.lo;
-return int128_make128(lo, ~(uint64_t)a.hi + !lo);
+uint64_t al = int128_getlo(a), bl = int128_getlo(b);
+uint64_t ah = int128_gethi(a), bh = int128_gethi(b);
+
+return int128_make128(al - bl, ah - bh - (al < bl));
 }
 
-static inline Int128 int128_sub(Int128 a, Int128 b)
+static inline Int128 int128_neg(Int128 a)
 {
-return int128_make128(a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo));
+uint64_t al = int128_getlo(a), ah = int128_gethi(a);
+return int128_make128(-al, ~ah + !al);
 }
 
 static inline bool int128_nonneg(Int128 a)
 {
-return a.hi >= 0;
+return int128_gethi(a) >= 0;
 }
 
 static inline bool int128_eq(Int128 a, Int128 b)
 {
-return a.lo == b.lo && a.hi == b.hi;
+uint64_t al = int128_getlo(a), bl = int128_getlo(b);
+uint64_t ah = int128_gethi(a), bh = int128_gethi(b);
+
+return al == bl && ah == bh;
 }
 
 static inline bool int128_ne(Int128 a, Int128 b)
@@ -252,7 +27

[Qemu-devel] [PATCH v2 11/27] target-i386: emulate LOCK'ed cmpxchg using cmpxchg helpers

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

The diff here is uglier than necessary. All this does is to turn

FOO

into:

if (s->prefix & PREFIX_LOCK) {
  BAR
} else {
  FOO
}

where FOO is the original implementation of an unlocked cmpxchg.

[rth: Adjust unlocked cmpxchg to use movcond instead of branches.
Adjust helpers to use atomic helpers.]

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-6-git-send-email-c...@braap.org>
Signed-off-by: Richard Henderson 
---
 target-i386/mem_helper.c | 96 ++--
 target-i386/translate.c  | 87 +--
 2 files changed, 120 insertions(+), 63 deletions(-)

diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
index c2f4769..5c0558f 100644
--- a/target-i386/mem_helper.c
+++ b/target-i386/mem_helper.c
@@ -22,6 +22,8 @@
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
+#include "qemu/int128.h"
+#include "tcg.h"
 
 /* broken thread support */
 
@@ -58,20 +60,39 @@ void helper_lock_init(void)
 
 void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
 {
-uint64_t d;
+uintptr_t ra = GETPC();
+uint64_t oldv, cmpv, newv;
 int eflags;
 
 eflags = cpu_cc_compute_all(env, CC_OP);
-d = cpu_ldq_data_ra(env, a0, GETPC());
-if (d == (((uint64_t)env->regs[R_EDX] << 32) | 
(uint32_t)env->regs[R_EAX])) {
-cpu_stq_data_ra(env, a0, ((uint64_t)env->regs[R_ECX] << 32)
-  | (uint32_t)env->regs[R_EBX], GETPC());
-eflags |= CC_Z;
+
+cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
+newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
+
+if (parallel_cpus) {
+#ifdef CONFIG_USER_ONLY
+uint64_t *haddr = g2h(a0);
+cmpv = cpu_to_le64(cmpv);
+newv = cpu_to_le64(newv);
+oldv = atomic_cmpxchg(haddr, cmpv, newv);
+oldv = le64_to_cpu(oldv);
+#else
+int mem_idx = cpu_mmu_index(env, false);
+TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);
+oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
+#endif
 } else {
+oldv = cpu_ldq_data_ra(env, a0, ra);
+newv = (cmpv == oldv ? newv : oldv);
 /* always do the store */
-cpu_stq_data_ra(env, a0, d, GETPC());
-env->regs[R_EDX] = (uint32_t)(d >> 32);
-env->regs[R_EAX] = (uint32_t)d;
+cpu_stq_data_ra(env, a0, newv, ra);
+}
+
+if (oldv == cmpv) {
+eflags |= CC_Z;
+} else {
+env->regs[R_EAX] = (uint32_t)oldv;
+env->regs[R_EDX] = (uint32_t)(oldv >> 32);
 eflags &= ~CC_Z;
 }
 CC_SRC = eflags;
@@ -80,25 +101,60 @@ void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
 #ifdef TARGET_X86_64
 void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
 {
-uint64_t d0, d1;
+uintptr_t ra = GETPC();
+Int128 oldv, cmpv, newv;
 int eflags;
+bool success;
 
 if ((a0 & 0xf) != 0) {
 raise_exception_ra(env, EXCP0D_GPF, GETPC());
 }
 eflags = cpu_cc_compute_all(env, CC_OP);
-d0 = cpu_ldq_data_ra(env, a0, GETPC());
-d1 = cpu_ldq_data_ra(env, a0 + 8, GETPC());
-if (d0 == env->regs[R_EAX] && d1 == env->regs[R_EDX]) {
-cpu_stq_data_ra(env, a0, env->regs[R_EBX], GETPC());
-cpu_stq_data_ra(env, a0 + 8, env->regs[R_ECX], GETPC());
+
+cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
+newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
+
+if (parallel_cpus) {
+#ifndef CONFIG_ATOMIC128
+cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#elif defined(CONFIG_USER_ONLY)
+Int128 *haddr = g2h(a0);
+oldv = cmpv;
+#ifdef HOST_WORDS_BIGENDIAN
+oldv = bswap128(oldv);
+newv = bswap128(newv);
+#endif
+success = __atomic_compare_exchange_16(haddr, &oldv, newv, false,
+   __ATOMIC_SEQ_CST,
+   __ATOMIC_SEQ_CST);
+#ifdef HOST_WORDS_BIGENDIAN
+oldv = bswap128(oldv);
+#endif
+#else
+int mem_idx = cpu_mmu_index(env, false);
+TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+oldv = helper_atomic_cmpxchgo_le_mmu(env, a0, cmpv, newv, oi, ra);
+success = int128_eq(oldv, cmpv);
+#endif
+} else {
+uint64_t o0 = cpu_ldq_data_ra(env, a0 + 0, ra);
+uint64_t o1 = cpu_ldq_data_ra(env, a0 + 8, ra);
+
+oldv = int128_make128(o0, o1);
+success = int128_eq(oldv, cmpv);
+if (!success) {
+newv = oldv;
+}
+
+cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra);
+cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra);
+}
+
+if (success) {
 eflags |= CC_Z;
 } else {
-/* always do the store */
-cpu_stq_data_ra(env, a0, d0, GETPC());
-cpu_stq_data_ra(env, a0 + 8, d1, GETPC());
-env->regs[R_EDX] = d1;
-env

[Qemu-devel] [PATCH v2 01/27] atomics: add atomic_xor

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

This paves the way for upcoming work.

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-8-git-send-email-c...@braap.org>
---
 include/qemu/atomic.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 7a59096..a5531da 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -161,6 +161,7 @@
 #define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)
 #define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)
 #define atomic_fetch_or(ptr, n)  __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)
 
 /* And even shorter names that return void.  */
 #define atomic_inc(ptr)((void) __atomic_fetch_add(ptr, 1, 
__ATOMIC_SEQ_CST))
@@ -169,6 +170,7 @@
 #define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, 
__ATOMIC_SEQ_CST))
 #define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, 
__ATOMIC_SEQ_CST))
 #define atomic_or(ptr, n)  ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST))
+#define atomic_xor(ptr, n) ((void) __atomic_fetch_xor(ptr, n, 
__ATOMIC_SEQ_CST))
 
 #else /* __ATOMIC_RELAXED */
 
@@ -355,6 +357,7 @@
 #define atomic_fetch_sub   __sync_fetch_and_sub
 #define atomic_fetch_and   __sync_fetch_and_and
 #define atomic_fetch_or__sync_fetch_and_or
+#define atomic_fetch_xor   __sync_fetch_and_xor
 #define atomic_cmpxchg __sync_val_compare_and_swap
 
 /* And even shorter names that return void.  */
@@ -364,6 +367,7 @@
 #define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n))
 #define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n))
 #define atomic_or(ptr, n)  ((void) __sync_fetch_and_or(ptr, n))
+#define atomic_xor(ptr, n) ((void) __sync_fetch_and_xor(ptr, n))
 
 #endif /* __ATOMIC_RELAXED */
 #endif /* __QEMU_ATOMIC_H */
-- 
2.5.5

[Qemu-devel] [PATCH v2 02/27] atomics: add atomic_op_fetch variants

2016-07-01 Thread Richard Henderson

From: "Emilio G. Cota" 

This paves the way for upcoming work.

Signed-off-by: Emilio G. Cota 
Message-Id: <1467054136-10430-9-git-send-email-c...@braap.org>
---
 include/qemu/atomic.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index a5531da..d75bfde 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -163,6 +163,14 @@
 #define atomic_fetch_or(ptr, n)  __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)
 #define atomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)
 
+#define atomic_inc_fetch(ptr)__atomic_add_fetch(ptr, 1, __ATOMIC_SEQ_CST)
+#define atomic_dec_fetch(ptr)__atomic_sub_fetch(ptr, 1, __ATOMIC_SEQ_CST)
+#define atomic_add_fetch(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_sub_fetch(ptr, n) __atomic_sub_fetch(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_and_fetch(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_or_fetch(ptr, n)  __atomic_or_fetch(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_xor_fetch(ptr, n) __atomic_xor_fetch(ptr, n, __ATOMIC_SEQ_CST)
+
 /* And even shorter names that return void.  */
 #define atomic_inc(ptr)((void) __atomic_fetch_add(ptr, 1, 
__ATOMIC_SEQ_CST))
 #define atomic_dec(ptr)((void) __atomic_fetch_sub(ptr, 1, 
__ATOMIC_SEQ_CST))
@@ -358,6 +366,15 @@
 #define atomic_fetch_and   __sync_fetch_and_and
 #define atomic_fetch_or__sync_fetch_and_or
 #define atomic_fetch_xor   __sync_fetch_and_xor
+
+#define atomic_inc_fetch(ptr)  __sync_add_and_fetch(ptr, 1)
+#define atomic_dec_fetch(ptr)  __sync_add_and_fetch(ptr, -1)
+#define atomic_add_fetch   __sync_add_and_fetch
+#define atomic_sub_fetch   __sync_sub_and_fetch
+#define atomic_and_fetch   __sync_and_and_fetch
+#define atomic_or_fetch__sync_or_and_fetch
+#define atomic_xor_fetch   __sync_xor_and_fetch
+
 #define atomic_cmpxchg __sync_val_compare_and_swap
 
 /* And even shorter names that return void.  */
-- 
2.5.5

[Qemu-devel] [PATCH v2 03/27] exec: Avoid direct references to Int128 parts

2016-07-01 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 exec.c|  4 ++--
 include/qemu/int128.h | 10 ++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/exec.c b/exec.c
index 0122ef7..806e2fe 100644
--- a/exec.c
+++ b/exec.c
@@ -318,9 +318,9 @@ static inline bool section_covers_addr(const 
MemoryRegionSection *section,
 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
  * the section must cover the entire address space.
  */
-return section->size.hi ||
+return int128_gethi(section->size) ||
range_covers_byte(section->offset_within_address_space,
- section->size.lo, addr);
+ int128_getlo(section->size), addr);
 }
 
 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index c598881..52aaf99 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -20,6 +20,16 @@ static inline uint64_t int128_get64(Int128 a)
 return a.lo;
 }
 
+static inline uint64_t int128_getlo(Int128 a)
+{
+return a.lo;
+}
+
+static inline int64_t int128_gethi(Int128 a)
+{
+return a.hi;
+}
+
 static inline Int128 int128_zero(void)
 {
 return int128_make64(0);
-- 
2.5.5

[Qemu-devel] [PATCH v2 04/27] int128: Use __int128 if available

2016-07-01 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 include/qemu/int128.h | 135 +-
 1 file changed, 134 insertions(+), 1 deletion(-)

diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 52aaf99..08f1db1 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -1,6 +1,138 @@
 #ifndef INT128_H
 #define INT128_H
 
+#ifdef CONFIG_INT128
+
+typedef __int128 Int128;
+
+static inline Int128 int128_make64(uint64_t a)
+{
+return a;
+}
+
+static inline uint64_t int128_get64(Int128 a)
+{
+uint64_t r = a;
+assert(r == a);
+return r;
+}
+
+static inline uint64_t int128_getlo(Int128 a)
+{
+return a;
+}
+
+static inline int64_t int128_gethi(Int128 a)
+{
+return a >> 64;
+}
+
+static inline Int128 int128_zero(void)
+{
+return 0;
+}
+
+static inline Int128 int128_one(void)
+{
+return 1;
+}
+
+static inline Int128 int128_2_64(void)
+{
+return (Int128)1 << 64;
+}
+
+static inline Int128 int128_exts64(int64_t a)
+{
+return a;
+}
+
+static inline Int128 int128_and(Int128 a, Int128 b)
+{
+return a & b;
+}
+
+static inline Int128 int128_rshift(Int128 a, int n)
+{
+return a >> n;
+}
+
+static inline Int128 int128_add(Int128 a, Int128 b)
+{
+return a + b;
+}
+
+static inline Int128 int128_neg(Int128 a)
+{
+return -a;
+}
+
+static inline Int128 int128_sub(Int128 a, Int128 b)
+{
+return a - b;
+}
+
+static inline bool int128_nonneg(Int128 a)
+{
+return a >= 0;
+}
+
+static inline bool int128_eq(Int128 a, Int128 b)
+{
+return a == b;
+}
+
+static inline bool int128_ne(Int128 a, Int128 b)
+{
+return a != b;
+}
+
+static inline bool int128_ge(Int128 a, Int128 b)
+{
+return a >= b;
+}
+
+static inline bool int128_lt(Int128 a, Int128 b)
+{
+return a < b;
+}
+
+static inline bool int128_le(Int128 a, Int128 b)
+{
+return a <= b;
+}
+
+static inline bool int128_gt(Int128 a, Int128 b)
+{
+return a > b;
+}
+
+static inline bool int128_nz(Int128 a)
+{
+return a != 0;
+}
+
+static inline Int128 int128_min(Int128 a, Int128 b)
+{
+return a < b ? a : b;
+}
+
+static inline Int128 int128_max(Int128 a, Int128 b)
+{
+return a > b ? a : b;
+}
+
+static inline void int128_addto(Int128 *a, Int128 b)
+{
+*a += b;
+}
+
+static inline void int128_subfrom(Int128 *a, Int128 b)
+{
+*a -= b;
+}
+
+#else /* !CONFIG_INT128 */
 
 typedef struct Int128 Int128;
 
@@ -153,4 +285,5 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
 *a = int128_sub(*a, b);
 }
 
-#endif
+#endif /* CONFIG_INT128 */
+#endif /* INT128_H */
-- 
2.5.5

[Qemu-devel] [PATCH v2 08/27] HACK: Always enable parallel_cpus

2016-07-01 Thread Richard Henderson

This is really just a placeholder for an actual
command-line switch for mttcg.
---
 translate-all.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/translate-all.c b/translate-all.c
index 99ae7f9..a10fa06 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -119,7 +119,7 @@ static void *l1_map[V_L1_SIZE];
 
 /* code generation context */
 TCGContext tcg_ctx;
-bool parallel_cpus;
+bool parallel_cpus = 1;
 
 /* translation block context */
 #ifdef CONFIG_USER_ONLY
-- 
2.5.5

Re: [Qemu-devel] [PATCH v5 7/9] ast2400: add SPI flash slaves

2016-07-01 Thread Peter Maydell

On 1 July 2016 at 17:44, Cédric Le Goater  wrote:
> I have some extra patches to use a rom device and boot from flash0.
> That is for next week.

We're in softfreeze now, so really I should stop
taking non-bugfix patches, though for a new board
with missing stuff that prevents boot there's a little
flexibility.

thanks
-- PMM

[Qemu-devel] [PULL 2/2] 9p: synth: drop v9fs_ prefix

2016-07-01 Thread Greg Kurz

To have shorter lines and be consistent with other fs devices.

Acked-by: Cédric Le Goater 
Signed-off-by: Greg Kurz 
---
 hw/9pfs/9p-synth.c | 200 ++---
 1 file changed, 100 insertions(+), 100 deletions(-)

diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
index 73c8be816bbb..4b6d4e6a3f1c 100644
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@@ -21,19 +21,19 @@
 #include "qemu/cutils.h"
 
 /* Root node for synth file system */
-static V9fsSynthNode v9fs_synth_root = {
+static V9fsSynthNode synth_root = {
 .name = "/",
 .actual_attr = {
 .mode = 0555 | S_IFDIR,
 .nlink = 1,
 },
-.attr = &v9fs_synth_root.actual_attr,
+.attr = &synth_root.actual_attr,
 };
 
-static QemuMutex  v9fs_synth_mutex;
-static int v9fs_synth_node_count;
+static QemuMutex  synth_mutex;
+static int synth_node_count;
 /* set to 1 when the synth fs is ready */
-static int v9fs_synth_fs;
+static int synth_fs;
 
 static V9fsSynthNode *v9fs_add_dir_node(V9fsSynthNode *parent, int mode,
 const char *name,
@@ -69,16 +69,16 @@ int qemu_v9fs_synth_mkdir(V9fsSynthNode *parent, int mode,
 int ret;
 V9fsSynthNode *node, *tmp;
 
-if (!v9fs_synth_fs) {
+if (!synth_fs) {
 return EAGAIN;
 }
 if (!name || (strlen(name) >= NAME_MAX)) {
 return EINVAL;
 }
 if (!parent) {
-parent = &v9fs_synth_root;
+parent = &synth_root;
 }
-qemu_mutex_lock(&v9fs_synth_mutex);
+qemu_mutex_lock(&synth_mutex);
 QLIST_FOREACH(tmp, &parent->child, sibling) {
 if (!strcmp(tmp->name, name)) {
 ret = EEXIST;
@@ -86,7 +86,7 @@ int qemu_v9fs_synth_mkdir(V9fsSynthNode *parent, int mode,
 }
 }
 /* Add the name */
-node = v9fs_add_dir_node(parent, mode, name, NULL, 
v9fs_synth_node_count++);
+node = v9fs_add_dir_node(parent, mode, name, NULL, synth_node_count++);
 v9fs_add_dir_node(node, parent->attr->mode, "..",
   parent->attr, parent->attr->inode);
 v9fs_add_dir_node(node, node->attr->mode, ".",
@@ -94,7 +94,7 @@ int qemu_v9fs_synth_mkdir(V9fsSynthNode *parent, int mode,
 *result = node;
 ret = 0;
 err_out:
-qemu_mutex_unlock(&v9fs_synth_mutex);
+qemu_mutex_unlock(&synth_mutex);
 return ret;
 }
 
@@ -105,17 +105,17 @@ int qemu_v9fs_synth_add_file(V9fsSynthNode *parent, int 
mode,
 int ret;
 V9fsSynthNode *node, *tmp;
 
-if (!v9fs_synth_fs) {
+if (!synth_fs) {
 return EAGAIN;
 }
 if (!name || (strlen(name) >= NAME_MAX)) {
 return EINVAL;
 }
 if (!parent) {
-parent = &v9fs_synth_root;
+parent = &synth_root;
 }
 
-qemu_mutex_lock(&v9fs_synth_mutex);
+qemu_mutex_lock(&synth_mutex);
 QLIST_FOREACH(tmp, &parent->child, sibling) {
 if (!strcmp(tmp->name, name)) {
 ret = EEXIST;
@@ -126,7 +126,7 @@ int qemu_v9fs_synth_add_file(V9fsSynthNode *parent, int 
mode,
 mode = ((mode & 0777) | S_IFREG);
 node = g_malloc0(sizeof(V9fsSynthNode));
 node->attr = &node->actual_attr;
-node->attr->inode  = v9fs_synth_node_count++;
+node->attr->inode  = synth_node_count++;
 node->attr->nlink  = 1;
 node->attr->read   = read;
 node->attr->write  = write;
@@ -136,11 +136,11 @@ int qemu_v9fs_synth_add_file(V9fsSynthNode *parent, int 
mode,
 QLIST_INSERT_HEAD_RCU(&parent->child, node, sibling);
 ret = 0;
 err_out:
-qemu_mutex_unlock(&v9fs_synth_mutex);
+qemu_mutex_unlock(&synth_mutex);
 return ret;
 }
 
-static void v9fs_synth_fill_statbuf(V9fsSynthNode *node, struct stat *stbuf)
+static void synth_fill_statbuf(V9fsSynthNode *node, struct stat *stbuf)
 {
 stbuf->st_dev = 0;
 stbuf->st_ino = node->attr->inode;
@@ -157,24 +157,24 @@ static void v9fs_synth_fill_statbuf(V9fsSynthNode *node, 
struct stat *stbuf)
 stbuf->st_ctime = 0;
 }
 
-static int v9fs_synth_lstat(FsContext *fs_ctx,
+static int synth_lstat(FsContext *fs_ctx,
 V9fsPath *fs_path, struct stat *stbuf)
 {
 V9fsSynthNode *node = *(V9fsSynthNode **)fs_path->data;
 
-v9fs_synth_fill_statbuf(node, stbuf);
+synth_fill_statbuf(node, stbuf);
 return 0;
 }
 
-static int v9fs_synth_fstat(FsContext *fs_ctx, int fid_type,
+static int synth_fstat(FsContext *fs_ctx, int fid_type,
 V9fsFidOpenState *fs, struct stat *stbuf)
 {
 V9fsSynthOpenState *synth_open = fs->private;
-v9fs_synth_fill_statbuf(synth_open->node, stbuf);
+synth_fill_statbuf(synth_open->node, stbuf);
 return 0;
 }
 
-static int v9fs_synth_opendir(FsContext *ctx,
+static int synth_opendir(FsContext *ctx,
  V9fsPath *fs_path, V9fsFidOpenState *fs)
 {
 V9fsSynthOpenState *synth_open;
@@ -187,7 +187,7 @@ static int v9fs_synth_opendir(FsContext *ctx,
 return 0;
 }
 
-static int v9fs_synth_closedir(FsContext *ctx,

[Qemu-devel] [PULL 1/2] 9p: don't include

2016-07-01 Thread Greg Kurz

From: Greg Kurz 

The  system header doesn't exist on all host platforms. Code
should include "qemu/osdep.h" instead to avoid build breaks on plafforms
that don't define CONFIG_IOVEC (like win32, if it is to support 9p one day).

Acked-by: Cédric Le Goater 
Acked-by: Michael Fritscher 
Signed-off-by: Greg Kurz 
---
 fsdev/9p-iov-marshal.c | 1 -
 fsdev/9p-marshal.c | 1 -
 fsdev/file-op-9p.h | 1 -
 3 files changed, 3 deletions(-)

diff --git a/fsdev/9p-iov-marshal.c b/fsdev/9p-iov-marshal.c
index fce1ee9e5531..584082b5d61a 100644
--- a/fsdev/9p-iov-marshal.c
+++ b/fsdev/9p-iov-marshal.c
@@ -14,7 +14,6 @@
 #include "qemu/osdep.h"
 #include 
 #include 
-#include 
 
 #include "9p-iov-marshal.h"
 #include "qemu/bswap.h"
diff --git a/fsdev/9p-marshal.c b/fsdev/9p-marshal.c
index f56ef0e60c0f..238dbf21b1d5 100644
--- a/fsdev/9p-marshal.c
+++ b/fsdev/9p-marshal.c
@@ -15,7 +15,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include "9p-marshal.h"
 
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index 55614949740d..b1338ba06c95 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -14,7 +14,6 @@
 #define _FILEOP_H
 #include 
 #include 
-#include 
 #include 
 
 #define SM_LOCAL_MODE_BITS0600
-- 
2.5.5

[Qemu-devel] [PULL 0/2] 9p patches for 2.7

2016-07-01 Thread Greg Kurz

The following changes since commit 94e31093ff34ead50dc3970699a4e36582fb3f17:

  Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20160630.0' 
into staging (2016-07-01 11:52:14 +0100)

are available in the git repository at:

  https://github.com/gkurz/qemu.git tags/for-upstream

for you to fetch changes up to b05528b53354f7373fe15a8225cc475a6a66101a:

  9p: synth: drop v9fs_ prefix (2016-07-01 14:38:54 +0200)


Only trivial fixes.


Greg Kurz (2):
  9p: don't include 
  9p: synth: drop v9fs_ prefix

 fsdev/9p-iov-marshal.c |   1 -
 fsdev/9p-marshal.c |   1 -
 fsdev/file-op-9p.h |   1 -
 hw/9pfs/9p-synth.c | 200 -
 4 files changed, 100 insertions(+), 103 deletions(-)
-- 
2.5.5

Re: [Qemu-devel] [RFC 7/8] cpu-exec-common: Introduce async_safe_run_on_cpu()

2016-07-01 Thread Sergey Fedorov

On 01/07/16 19:29, Alvise Rigo wrote:
> Hi Sergey,
>
> On Mon, Jun 20, 2016 at 12:28 AM, Sergey Fedorov
>  wrote:
>> diff --git a/cpu-exec-common.c b/cpu-exec-common.c
>> index 8184e0662cbd..3056324738f8 100644
>> --- a/cpu-exec-common.c
>> +++ b/cpu-exec-common.c
>> @@ -25,6 +25,7 @@
>>
>>  bool exit_request;
>>  CPUState *tcg_current_cpu;
>> +int tcg_pending_cpus;
>>
>>  /* exit the current TB, but without causing any exception to be raised */
>>  void cpu_loop_exit_noexc(CPUState *cpu)
>> @@ -78,6 +79,15 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
>>  siglongjmp(cpu->jmp_env, 1);
>>  }
>>
>> +static int safe_work_pending;
>> +
>> +void wait_safe_cpu_work(void)
>> +{
>> +while (atomic_mb_read(&safe_work_pending) > 0) {
>> +wait_cpu_work();
>> +}
>> +}
>> +
> Is this piece of code deadlock-safe once we are in mttcg mode?

It is supposed to be deadlock-safe.

> What happens when two threads call simultaneously async_safe_run_on_cpu?
>

In this case each thread will roughly:
 - exit its execution loop;
 - take BQL;
 - decrement 'tcg_pending_cpus', signal 'qemu_work_cond' if zero;
 - start processing its work queue;
 - encountering safe work wait on 'qemu_work_cond' for
'tcg_pending_cpus' to become zero;
 - reacquire BQL;
 - process the safe work;
 - decrement 'safe_work_pending', signal 'qemu_work_cond' if zero;
 - when finished processing work, wait on 'qemu_work_cond' for
'safe_work_pending' to become zero;
 - reacquire BQL;
 - continue execution (releasing BQL).

Hope this will help.

Kind regards,
Sergey.

1 2 3 >

1 - 100 of 225 matches

Mail list logo