date:20220729

[PATCH v3 3/3] dump: use jobs framework for dump guest memory

2022-07-29 Thread Hogan Wang via

There's no way to cancel the current executing dump process, lead to the
virtual machine manager daemon((e.g. libvirtd) cannot restore the dump
job after daemon restart.

When caller pass the 'job-id' argument, create a job for dump process.
And then caller can use job-cancel QMP command to cancel the detached
dump process, use job-dismiss QMP command to release job object.

Examples:
Start dump job:
{"execute": "dump-guest-memory", "arguments": { "job-id": "dump-guest-memory",
"protocol": "file:/tmp/vm.dump",
"paging": false,
"format": "elf",
"detach": true
  }}

Cancel dump job:
{"execute": "job-cancel", "arguments": { "id": "dump-guest-memory" }}

Dismiss dump job:
{"execute": "job-dismiss", "arguments": { "id": "dump-guest-memory" }}

Signed-off-by: Hogan Wang 
---
 dump/dump.c | 76 +
 1 file changed, 76 insertions(+)

diff --git a/dump/dump.c b/dump/dump.c
index cec9be30b4..3f4ed8e7a7 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -98,6 +98,7 @@ static int dump_cleanup(DumpState *s)
 {
 guest_phys_blocks_free(>guest_phys_blocks);
 memory_mapping_list_free(>list);
+s->job = NULL;
 close(s->fd);
 g_free(s->guest_note);
 s->guest_note = NULL;
@@ -1542,6 +1543,14 @@ static void get_max_mapnr(DumpState *s)
 
 static DumpState dump_state_global = { .status = DUMP_STATUS_NONE };
 
+typedef struct DumpJob {
+Job common;
+DumpState *state;
+Coroutine *co;
+Error **errp;
+} DumpJob;
+
+
 static void dump_state_prepare(DumpState *s)
 {
 /* zero the struct, setting status to active */
@@ -1894,6 +1903,64 @@ DumpQueryResult *qmp_query_dump(Error **errp)
 return result;
 }
 
+static void *dump_job_thread(void *opaque)
+{
+DumpJob *job = (DumpJob *)opaque;
+job_progress_set_remaining(>common, 1);
+dump_process(job->state, job->errp);
+job_progress_update(>common, 1);
+aio_co_wake(job->co);
+return NULL;
+}
+
+static void dump_sync_job_bh(void *opaque)
+{
+dump_job_thread(opaque);
+}
+
+static int coroutine_fn dump_guest_memory_job_run(Job *job, Error **errp)
+{
+DumpJob *s = container_of(job, DumpJob, common);
+DumpState *state = _state_global;
+
+s->errp = errp;
+s->co = qemu_coroutine_self();
+
+if (state->detached) {
+/* detached dump */
+qemu_thread_create(>state->dump_thread, "dump_thread",
+   dump_job_thread, job, QEMU_THREAD_DETACHED);
+} else {
+aio_bh_schedule_oneshot(qemu_get_aio_context(),
+dump_sync_job_bh, job);
+}
+qemu_coroutine_yield();
+return qatomic_read(>status) == DUMP_STATUS_COMPLETED ? 0 : -1;
+}
+
+static const JobDriver dump_guest_memory_job_driver = {
+.instance_size = sizeof(DumpJob),
+.job_type  = JOB_TYPE_DUMP_GUEST_MEMORY,
+.run   = dump_guest_memory_job_run,
+};
+
+static void dump_job_start(DumpState *state, const char *job_id,
+   bool detach, Error **errp)
+{
+DumpJob *job;
+
+job = job_create(job_id, _guest_memory_job_driver, NULL,
+ qemu_get_aio_context(), JOB_MANUAL_DISMISS,
+ NULL, NULL, errp);
+if (!job) {
+return;
+}
+state->detached = detach;
+state->job = >common;
+job->state = state;
+job_start(>common);
+}
+
 void qmp_dump_guest_memory(bool paging, const char *file,
bool has_job_id, const char *job_id,
bool has_detach, bool detach,
@@ -2010,6 +2077,15 @@ void qmp_dump_guest_memory(bool paging, const char *file,
 return;
 }
 
+if (has_job_id) {
+dump_job_start(s, job_id, detach_p, errp);
+if (*errp) {
+qatomic_set(>status, DUMP_STATUS_FAILED);
+dump_cleanup(s);
+}
+return;
+}
+
 if (detach_p) {
 /* detached dump */
 s->detached = true;
-- 
2.33.0

[PATCH v3 2/3] job: introduce dump guest memory job

2022-07-29 Thread Hogan Wang via

There's no way to cancel the current executing dump process, lead to the
virtual machine manager daemon((e.g. libvirtd) cannot restore the dump
job after daemon restart.

Introduce dump guest memory job type, and add an optional 'job-id'
argument for dump-guest-memory QMP to make use of jobs framework.

Signed-off-by: Hogan Wang 
---
 dump/dump-hmp-cmds.c | 12 ++--
 dump/dump.c  |  1 +
 qapi/dump.json   |  6 +-
 qapi/job.json|  5 -
 4 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/dump/dump-hmp-cmds.c b/dump/dump-hmp-cmds.c
index e5053b04cd..ba28a5e631 100644
--- a/dump/dump-hmp-cmds.c
+++ b/dump/dump-hmp-cmds.c
@@ -24,9 +24,11 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
 bool has_begin = qdict_haskey(qdict, "begin");
 bool has_length = qdict_haskey(qdict, "length");
 bool has_detach = qdict_haskey(qdict, "detach");
+bool has_job_id = qdict_haskey(qdict, "job-id");
 int64_t begin = 0;
 int64_t length = 0;
 bool detach = false;
+const char *job_id = NULL;
 enum DumpGuestMemoryFormat dump_format = DUMP_GUEST_MEMORY_FORMAT_ELF;
 char *prot;
 
@@ -62,10 +64,16 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
 detach = qdict_get_bool(qdict, "detach");
 }
 
+if (has_job_id) {
+job_id = qdict_get_str(qdict, "job-id");
+}
+
 prot = g_strconcat("file:", file, NULL);
 
-qmp_dump_guest_memory(paging, prot, true, detach, has_begin, begin,
-  has_length, length, true, dump_format, );
+qmp_dump_guest_memory(paging, prot, has_job_id, job_id,
+  true, detach, has_begin, begin,
+  has_length, length, true, dump_format,
+  );
 hmp_handle_error(mon, err);
 g_free(prot);
 }
diff --git a/dump/dump.c b/dump/dump.c
index a57c580b12..cec9be30b4 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -1895,6 +1895,7 @@ DumpQueryResult *qmp_query_dump(Error **errp)
 }
 
 void qmp_dump_guest_memory(bool paging, const char *file,
+   bool has_job_id, const char *job_id,
bool has_detach, bool detach,
bool has_begin, int64_t begin, bool has_length,
int64_t length, bool has_format,
diff --git a/qapi/dump.json b/qapi/dump.json
index 90859c5483..5209d0b74f 100644
--- a/qapi/dump.json
+++ b/qapi/dump.json
@@ -59,6 +59,9 @@
 #2. fd: the protocol starts with "fd:", and the following string
 #   is the fd's name.
 #
+# @job-id: identifier for the newly-created memory dump job. If
+#  omitted, use 'memory-guest-dump' by default. (Since 7.2)
+#
 # @detach: if true, QMP will return immediately rather than
 #  waiting for the dump to finish. The user can track progress
 #  using "query-dump". (since 2.6).
@@ -88,7 +91,8 @@
 #
 ##
 { 'command': 'dump-guest-memory',
-  'data': { 'paging': 'bool', 'protocol': 'str', '*detach': 'bool',
+  'data': { 'paging': 'bool', 'protocol': 'str',
+'*job-id': 'str', '*detach': 'bool',
 '*begin': 'int', '*length': 'int',
 '*format': 'DumpGuestMemoryFormat'} }
 
diff --git a/qapi/job.json b/qapi/job.json
index d5f84e9615..e14d2290a5 100644
--- a/qapi/job.json
+++ b/qapi/job.json
@@ -28,11 +28,14 @@
 #
 # @snapshot-delete: snapshot delete job type, see "snapshot-delete" (since 6.0)
 #
+# @dump-guest-memory: dump guest memory job type, see "dump-guest-memory" 
(since 7.2)
+#
 # Since: 1.7
 ##
 { 'enum': 'JobType',
   'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'amend',
-   'snapshot-load', 'snapshot-save', 'snapshot-delete'] }
+   'snapshot-load', 'snapshot-save', 'snapshot-delete',
+   'dump-guest-memory'] }
 
 ##
 # @JobStatus:
-- 
2.33.0

[PATCH v3 1/3] dump: support cancel dump process

2022-07-29 Thread Hogan Wang via

Break saving pages or dump iterate when dump job in cancel state,
make sure dump process exits as soon as possible.

Signed-off-by: Hogan Wang 
---
 dump/dump.c   | 23 +++
 include/sysemu/dump.h |  2 ++
 2 files changed, 25 insertions(+)

diff --git a/dump/dump.c b/dump/dump.c
index 4d9658ffa2..a57c580b12 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -54,6 +54,8 @@ static Error *dump_migration_blocker;
   DIV_ROUND_UP((name_size), 4) +\
   DIV_ROUND_UP((desc_size), 4)) * 4)
 
+static bool dump_cancelling(void);
+
 static inline bool dump_is_64bit(DumpState *s)
 {
 return s->dump_info.d_class == ELFCLASS64;
@@ -118,6 +120,10 @@ static int fd_write_vmcore(const void *buf, size_t size, 
void *opaque)
 DumpState *s = opaque;
 size_t written_size;
 
+if (dump_cancelling()) {
+return -ECANCELED;
+}
+
 written_size = qemu_write_full(s->fd, buf, size);
 if (written_size != size) {
 return -errno;
@@ -627,6 +633,10 @@ static void dump_iterate(DumpState *s, Error **errp)
 
 do {
 block = s->next_block;
+if (dump_cancelling()) {
+error_setg(errp, "dump: job cancelled");
+return;
+}
 
 size = block->target_end - block->target_start;
 if (s->has_filter) {
@@ -1321,6 +1331,10 @@ static void write_dump_pages(DumpState *s, Error **errp)
  * first page of page section
  */
 while (get_next_page(_iter, _iter, , s)) {
+if (dump_cancelling()) {
+error_setg(errp, "dump: job cancelled");
+goto out;
+}
 /* check zero page */
 if (buffer_is_zero(buf, s->dump_info.page_size)) {
 ret = write_cache(_desc, _zero, sizeof(PageDescriptor),
@@ -1540,6 +1554,15 @@ bool qemu_system_dump_in_progress(void)
 return (qatomic_read(>status) == DUMP_STATUS_ACTIVE);
 }
 
+static bool dump_cancelling(void)
+{
+DumpState *state = _state_global;
+if (state->job && job_is_cancelled(state->job)) {
+return true;
+}
+return false;
+}
+
 /* calculate total size of memory to be dumped (taking filter into
  * acoount.) */
 static int64_t dump_calculate_size(DumpState *s)
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index ffc2ea1072..41bdbe595f 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -15,6 +15,7 @@
 #define DUMP_H
 
 #include "qapi/qapi-types-dump.h"
+#include "qemu/job.h"
 
 #define MAKEDUMPFILE_SIGNATURE  "makedumpfile"
 #define MAX_SIZE_MDF_HEADER (4096) /* max size of makedumpfile_header 
*/
@@ -154,6 +155,7 @@ typedef struct DumpState {
 GuestPhysBlockList guest_phys_blocks;
 ArchDumpInfo dump_info;
 MemoryMappingList list;
+Job *job;
 uint32_t phdr_num;
 uint32_t shdr_num;
 bool resume;
-- 
2.33.0

[PATCH] target/mips: Advance pc after semihosting exception

2022-07-29 Thread Richard Henderson

Delay generating the exception until after we know the
insn length, and record that length in env->error_code.

Fixes: 8ec7e3c53d4 ("target/mips: Use an exception for semihosting")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1126
Signed-off-by: Richard Henderson 
---
 target/mips/tcg/translate.h   |  4 
 target/mips/tcg/sysemu/tlb_helper.c   |  1 +
 target/mips/tcg/translate.c   | 10 +-
 target/mips/tcg/micromips_translate.c.inc |  6 +++---
 target/mips/tcg/mips16e_translate.c.inc   |  2 +-
 target/mips/tcg/nanomips_translate.c.inc  |  4 ++--
 6 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/target/mips/tcg/translate.h b/target/mips/tcg/translate.h
index 55053226ae..69f85841d2 100644
--- a/target/mips/tcg/translate.h
+++ b/target/mips/tcg/translate.h
@@ -51,6 +51,10 @@ typedef struct DisasContext {
 int gi;
 } DisasContext;
 
+#define DISAS_STOP   DISAS_TARGET_0
+#define DISAS_EXIT   DISAS_TARGET_1
+#define DISAS_SEMIHOST   DISAS_TARGET_2
+
 /* MIPS major opcodes */
 #define MASK_OP_MAJOR(op)   (op & (0x3F << 26))
 
diff --git a/target/mips/tcg/sysemu/tlb_helper.c 
b/target/mips/tcg/sysemu/tlb_helper.c
index 57ffad2902..9d16859c0a 100644
--- a/target/mips/tcg/sysemu/tlb_helper.c
+++ b/target/mips/tcg/sysemu/tlb_helper.c
@@ -1056,6 +1056,7 @@ void mips_cpu_do_interrupt(CPUState *cs)
 case EXCP_SEMIHOST:
 cs->exception_index = EXCP_NONE;
 mips_semihosting(env);
+env->active_tc.PC += env->error_code;
 return;
 case EXCP_DSS:
 env->CP0_Debug |= 1 << CP0DB_DSS;
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
index 1f6a779808..de1511baaf 100644
--- a/target/mips/tcg/translate.c
+++ b/target/mips/tcg/translate.c
@@ -1213,9 +1213,6 @@ TCGv_i64 fpu_f64[32];
 
 #include "exec/gen-icount.h"
 
-#define DISAS_STOP   DISAS_TARGET_0
-#define DISAS_EXIT   DISAS_TARGET_1
-
 static const char regnames_HI[][4] = {
 "HI0", "HI1", "HI2", "HI3",
 };
@@ -13902,7 +13899,7 @@ static void decode_opc_special_r6(CPUMIPSState *env, 
DisasContext *ctx)
 break;
 case R6_OPC_SDBBP:
 if (is_uhi(extract32(ctx->opcode, 6, 20))) {
-generate_exception_end(ctx, EXCP_SEMIHOST);
+ctx->base.is_jmp = DISAS_SEMIHOST;
 } else {
 if (ctx->hflags & MIPS_HFLAG_SBRI) {
 gen_reserved_instruction(ctx);
@@ -14314,7 +14311,7 @@ static void decode_opc_special2_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 break;
 case OPC_SDBBP:
 if (is_uhi(extract32(ctx->opcode, 6, 20))) {
-generate_exception_end(ctx, EXCP_SEMIHOST);
+ctx->base.is_jmp = DISAS_SEMIHOST;
 } else {
 /*
  * XXX: not clear which exception should be raised
@@ -16098,6 +16095,9 @@ static void mips_tr_translate_insn(DisasContextBase 
*dcbase, CPUState *cs)
 if (is_slot) {
 gen_branch(ctx, insn_bytes);
 }
+if (ctx->base.is_jmp == DISAS_SEMIHOST) {
+generate_exception_err(ctx, EXCP_SEMIHOST, insn_bytes);
+}
 ctx->base.pc_next += insn_bytes;
 
 if (ctx->base.is_jmp != DISAS_NEXT) {
diff --git a/target/mips/tcg/micromips_translate.c.inc 
b/target/mips/tcg/micromips_translate.c.inc
index 274caf2c3c..b2c696f891 100644
--- a/target/mips/tcg/micromips_translate.c.inc
+++ b/target/mips/tcg/micromips_translate.c.inc
@@ -826,7 +826,7 @@ static void gen_pool16c_insn(DisasContext *ctx)
 break;
 case SDBBP16:
 if (is_uhi(extract32(ctx->opcode, 0, 4))) {
-generate_exception_end(ctx, EXCP_SEMIHOST);
+ctx->base.is_jmp = DISAS_SEMIHOST;
 } else {
 /*
  * XXX: not clear which exception should be raised
@@ -942,7 +942,7 @@ static void gen_pool16c_r6_insn(DisasContext *ctx)
 case R6_SDBBP16:
 /* SDBBP16 */
 if (is_uhi(extract32(ctx->opcode, 6, 4))) {
-generate_exception_end(ctx, EXCP_SEMIHOST);
+ctx->base.is_jmp = DISAS_SEMIHOST;
 } else {
 if (ctx->hflags & MIPS_HFLAG_SBRI) {
 generate_exception(ctx, EXCP_RI);
@@ -1311,7 +1311,7 @@ static void gen_pool32axf(CPUMIPSState *env, DisasContext 
*ctx, int rt, int rs)
 break;
 case SDBBP:
 if (is_uhi(extract32(ctx->opcode, 16, 10))) {
-generate_exception_end(ctx, EXCP_SEMIHOST);
+ctx->base.is_jmp = DISAS_SEMIHOST;
 } else {
 check_insn(ctx, ISA_MIPS_R1);
 if (ctx->hflags & MIPS_HFLAG_SBRI) {
diff --git a/target/mips/tcg/mips16e_translate.c.inc 
b/target/mips/tcg/mips16e_translate.c.inc
index 0a3ba252e4..7568933e23 100644
--- a/target/mips/tcg/mips16e_translate.c.inc
+++ b/target/mips/tcg/mips16e_translate.c.inc
@@ -952,7 +952,7 @@ static int decode_ase_mips16e(CPUMIPSState *env, 
DisasContext *ctx)
 break;

[PULL 0/2] minor loongarch cleanups

2022-07-29 Thread Richard Henderson

As this is a new target this release, and these patches
are so minor, I'm applying these as a bug fix.

r~

Xiaojuan Yang (2):
  hw/loongarch: Rename file 'loongson3.XXX' to 'virt.XXX'
  hw/loongarch: Change macro name 'LS7A_XXX' to 'VIRT_XXX'

 include/hw/loongarch/virt.h  |  8 ++--
 include/hw/pci-host/ls7a.h   | 43 ++---
 hw/loongarch/acpi-build.c| 18 -
 hw/loongarch/{loongson3.c => virt.c} | 56 ++--
 MAINTAINERS  |  2 +-
 hw/loongarch/meson.build |  2 +-
 target/loongarch/README  |  2 +-
 7 files changed, 64 insertions(+), 67 deletions(-)
 rename hw/loongarch/{loongson3.c => virt.c} (93%)

-- 
2.34.1

Re: [PULL 0/2] minor loongarch cleanups

2022-07-29 Thread Richard Henderson


On 7/29/22 17:37, Richard Henderson wrote:

As this is a new target this release, and these patches
are so minor, I'm applying these as a bug fix.

r~


Dangit, impatience

  https://gitlab.com/rth7680/qemu.git pull-la-20220729

r~



Xiaojuan Yang (2):
   hw/loongarch: Rename file 'loongson3.XXX' to 'virt.XXX'
   hw/loongarch: Change macro name 'LS7A_XXX' to 'VIRT_XXX'

  include/hw/loongarch/virt.h  |  8 ++--
  include/hw/pci-host/ls7a.h   | 43 ++---
  hw/loongarch/acpi-build.c| 18 -
  hw/loongarch/{loongson3.c => virt.c} | 56 ++--
  MAINTAINERS  |  2 +-
  hw/loongarch/meson.build |  2 +-
  target/loongarch/README  |  2 +-
  7 files changed, 64 insertions(+), 67 deletions(-)
  rename hw/loongarch/{loongson3.c => virt.c} (93%)

[PULL 2/2] hw/loongarch: Change macro name 'LS7A_XXX' to 'VIRT_XXX'

2022-07-29 Thread Richard Henderson

From: Xiaojuan Yang 

Change macro name 'LS7A_XXX' to 'VIRT_XXX', as the loongarch
virt machinue use the GPEX bridge instead of LS7A bridge. So
the macro name should keep consistency.

Signed-off-by: Xiaojuan Yang 
Message-Id: <20220729073018.27037-3-yangxiaoj...@loongson.cn>
Signed-off-by: Richard Henderson 
---
 include/hw/loongarch/virt.h |  8 +++---
 include/hw/pci-host/ls7a.h  | 43 +---
 hw/loongarch/acpi-build.c   | 18 ++--
 hw/loongarch/virt.c | 56 ++---
 4 files changed, 61 insertions(+), 64 deletions(-)

diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h
index f4f24df428..92b84de1c5 100644
--- a/include/hw/loongarch/virt.h
+++ b/include/hw/loongarch/virt.h
@@ -15,8 +15,8 @@
 
 #define LOONGARCH_MAX_VCPUS 4
 
-#define LOONGARCH_ISA_IO_BASE   0x1800UL
-#define LOONGARCH_ISA_IO_SIZE   0x0004000
+#define VIRT_ISA_IO_BASE0x1800UL
+#define VIRT_ISA_IO_SIZE0x0004000
 #define VIRT_FWCFG_BASE 0x1e02UL
 #define VIRT_BIOS_BASE  0x1c00UL
 #define VIRT_BIOS_SIZE  (4 * MiB)
@@ -28,8 +28,8 @@
 #define VIRT_GED_MEM_ADDR   (VIRT_GED_EVT_ADDR + ACPI_GED_EVT_SEL_LEN)
 #define VIRT_GED_REG_ADDR   (VIRT_GED_MEM_ADDR + MEMORY_HOTPLUG_IO_LEN)
 
-#define LA_FDT_BASE 0x1c40
-#define LA_FDT_SIZE 0x10
+#define VIRT_FDT_BASE   0x1c40
+#define VIRT_FDT_SIZE   0x10
 
 struct LoongArchMachineState {
 /*< private >*/
diff --git a/include/hw/pci-host/ls7a.h b/include/hw/pci-host/ls7a.h
index 0fdc86b973..cdde0af1f8 100644
--- a/include/hw/pci-host/ls7a.h
+++ b/include/hw/pci-host/ls7a.h
@@ -15,34 +15,31 @@
 #include "qemu/range.h"
 #include "qom/object.h"
 
-#define LS7A_PCI_MEM_BASE0x4000UL
-#define LS7A_PCI_MEM_SIZE0x4000UL
-#define LS7A_PCI_IO_OFFSET  0x4000
-#define LS_PCIECFG_BASE 0x2000
-#define LS_PCIECFG_SIZE 0x0800
-#define LS7A_PCI_IO_BASE0x18004000UL
-#define LS7A_PCI_IO_SIZE0xC000
+#define VIRT_PCI_MEM_BASE0x4000UL
+#define VIRT_PCI_MEM_SIZE0x4000UL
+#define VIRT_PCI_IO_OFFSET   0x4000
+#define VIRT_PCI_CFG_BASE0x2000
+#define VIRT_PCI_CFG_SIZE0x0800
+#define VIRT_PCI_IO_BASE 0x18004000UL
+#define VIRT_PCI_IO_SIZE 0xC000
 
-#define LS7A_PCI_MEM_BASE   0x4000UL
-#define LS7A_PCI_MEM_SIZE   0x4000UL
-
-#define LS7A_PCH_REG_BASE   0x1000UL
-#define LS7A_IOAPIC_REG_BASE(LS7A_PCH_REG_BASE)
-#define LS7A_PCH_MSI_ADDR_LOW   0x2FF0UL
+#define VIRT_PCH_REG_BASE0x1000UL
+#define VIRT_IOAPIC_REG_BASE (VIRT_PCH_REG_BASE)
+#define VIRT_PCH_MSI_ADDR_LOW0x2FF0UL
 
 /*
  * According to the kernel pch irq start from 64 offset
  * 0 ~ 16 irqs used for non-pci device while 16 ~ 64 irqs
  * used for pci device.
  */
-#define PCH_PIC_IRQ_OFFSET  64
-#define LS7A_DEVICE_IRQS16
-#define LS7A_PCI_IRQS   48
-#define LS7A_UART_IRQ   (PCH_PIC_IRQ_OFFSET + 2)
-#define LS7A_UART_BASE  0x1fe001e0
-#define LS7A_RTC_IRQ(PCH_PIC_IRQ_OFFSET + 3)
-#define LS7A_MISC_REG_BASE  (LS7A_PCH_REG_BASE + 0x0008)
-#define LS7A_RTC_REG_BASE   (LS7A_MISC_REG_BASE + 0x00050100)
-#define LS7A_RTC_LEN0x100
-#define LS7A_SCI_IRQ(PCH_PIC_IRQ_OFFSET + 4)
+#define PCH_PIC_IRQ_OFFSET   64
+#define VIRT_DEVICE_IRQS 16
+#define VIRT_PCI_IRQS48
+#define VIRT_UART_IRQ(PCH_PIC_IRQ_OFFSET + 2)
+#define VIRT_UART_BASE   0x1fe001e0
+#define VIRT_RTC_IRQ (PCH_PIC_IRQ_OFFSET + 3)
+#define VIRT_MISC_REG_BASE   (VIRT_PCH_REG_BASE + 0x0008)
+#define VIRT_RTC_REG_BASE(VIRT_MISC_REG_BASE + 0x00050100)
+#define VIRT_RTC_LEN 0x100
+#define VIRT_SCI_IRQ (PCH_PIC_IRQ_OFFSET + 4)
 #endif
diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
index b95b83b079..4b4529a3fb 100644
--- a/hw/loongarch/acpi-build.c
+++ b/hw/loongarch/acpi-build.c
@@ -135,7 +135,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, 
LoongArchMachineState *lams)
 build_append_int_noprefix(table_data, 21, 1);/* Type */
 build_append_int_noprefix(table_data, 19, 1);/* Length */
 build_append_int_noprefix(table_data, 1, 1); /* Version */
-build_append_int_noprefix(table_data, LS7A_PCH_MSI_ADDR_LOW, 8);/* Address 
*/
+build_append_int_noprefix(table_data, VIRT_PCH_MSI_ADDR_LOW, 8);/* Address 
*/
 build_append_int_noprefix(table_data, 0x40, 4);  /* Start */
 build_append_int_noprefix(table_data, 0xc0, 4);  /* Count */
 
@@ -143,7 +143,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, 
LoongArchMachineState *lams)
 build_append_int_noprefix(table_data, 22, 1);/* Type */
 build_append_int_noprefix(table_data, 17, 1);/* Length */

Re: [PATCH v1 0/2] Change 'loongson3.XXX' file name and 'LS7A_XXX' macro name

2022-07-29 Thread Richard Henderson


On 7/29/22 00:30, Xiaojuan Yang wrote:

This series change 'loongson3.XXX' file name and 'LS7A_XXX' macro name.

Changes for v1:
1. Rename 'loongson3.c' to 'virt.c' and change the meson.build file.
2. Rename 'loongson3.rst' to 'virt.rst'.
3. Change macro name 'LS7A_XXX' to 'VIRT_XXX'.

Xiaojuan Yang (2):
   hw/loongarch: Rename file 'loongson3.XXX' to 'virt.XXX'
   hw/loongarch: Change macro name 'LS7A_XXX' to 'VIRT_XXX'


Queued, thanks.

r~

[PULL 1/2] hw/loongarch: Rename file 'loongson3.XXX' to 'virt.XXX'

2022-07-29 Thread Richard Henderson

From: Xiaojuan Yang 

1. Rename 'loongson3.c' to 'virt.c' and change the meson.build file.
2. Rename 'loongson3.rst' to 'virt.rst'.

Signed-off-by: Xiaojuan Yang 
Message-Id: <20220729073018.27037-2-yangxiaoj...@loongson.cn>
Signed-off-by: Richard Henderson 
---
 hw/loongarch/{loongson3.c => virt.c} | 0
 MAINTAINERS  | 2 +-
 hw/loongarch/meson.build | 2 +-
 target/loongarch/README  | 2 +-
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename hw/loongarch/{loongson3.c => virt.c} (100%)

diff --git a/hw/loongarch/loongson3.c b/hw/loongarch/virt.c
similarity index 100%
rename from hw/loongarch/loongson3.c
rename to hw/loongarch/virt.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 6af9cd985c..5ce4227ff6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1129,7 +1129,7 @@ Virt
 M: Xiaojuan Yang 
 M: Song Gao 
 S: Maintained
-F: docs/system/loongarch/loongson3.rst
+F: docs/system/loongarch/virt.rst
 F: configs/targets/loongarch64-softmmu.mak
 F: configs/devices/loongarch64-softmmu/default.mak
 F: hw/loongarch/
diff --git a/hw/loongarch/meson.build b/hw/loongarch/meson.build
index 6a2a1b18e5..c0421502ab 100644
--- a/hw/loongarch/meson.build
+++ b/hw/loongarch/meson.build
@@ -2,7 +2,7 @@ loongarch_ss = ss.source_set()
 loongarch_ss.add(files(
 'fw_cfg.c',
 ))
-loongarch_ss.add(when: 'CONFIG_LOONGARCH_VIRT', if_true: 
[files('loongson3.c'), fdt])
+loongarch_ss.add(when: 'CONFIG_LOONGARCH_VIRT', if_true: [files('virt.c'), 
fdt])
 loongarch_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-build.c'))
 
 hw_arch += {'loongarch': loongarch_ss}
diff --git a/target/loongarch/README b/target/loongarch/README
index 9f5edd10c8..1823375d04 100644
--- a/target/loongarch/README
+++ b/target/loongarch/README
@@ -15,7 +15,7 @@
   3A5000 support multiple interrupt cascading while here we just emulate the 
extioi interrupt
   cascading. LS7A1000 host bridge support multiple devices, such as sata, 
gmac, uart, rtc
   and so on. But we just realize the rtc. Others use the qemu common devices. 
It does not affect
-  the general use. We also introduced the emulation of devices at 
docs/system/loongarch/loongson3.rst.
+  the general use. We also introduced the emulation of devices at 
docs/system/loongarch/virt.rst.
 
   This version only supports running binary files in ELF format, and does not 
depend on BIOS and kernel file.
   You can compile the test program with 'make & make check-tcg' and run the 
test case with the following command:
-- 
2.34.1

Re: [PATCH v3 08/11] target/openrisc: Enable MTTCG

2022-07-29 Thread Richard Henderson


On 7/29/22 16:01, Stafford Horne wrote:

This patch enables multithread TCG for OpenRISC.  Since the or1k shared
syncrhonized timer can be updated from each vCPU via helpers we use a
mutex to synchronize updates.

Signed-off-by: Stafford Horne
---
Since v2:
  - Removed cpu_openrisc_timer_has_advanced lock optimization, measuring 
revealed
it did not help much.

  configs/targets/or1k-softmmu.mak | 1 +
  target/openrisc/cpu.h| 2 ++
  target/openrisc/sys_helper.c | 7 ++-
  3 files changed, 9 insertions(+), 1 deletion(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v3 06/11] hw/openrisc: Initialize timer time at startup

2022-07-29 Thread Richard Henderson


On 7/29/22 16:01, Stafford Horne wrote:

The last_clk time was initialized at zero, this means when we calculate
the first delta we will calculate 0 vs current time which could cause
unnecessary hops.

This patch moves timer initialization to the cpu reset.  There are two
resets registered here:

  1. Per cpu timer mask (ttmr) reset.
  2. Global cpu timer (last_clk and ttcr) reset, attached to the first
 cpu only.

Signed-off-by: Stafford Horne
---
Since v2:
  - Moved timer init from init to reset suggested by Richard

  hw/openrisc/cputimer.c | 22 +-
  1 file changed, 21 insertions(+), 1 deletion(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v3 03/11] goldfish_rtc: Add big-endian property

2022-07-29 Thread Richard Henderson


On 7/29/22 16:01, Stafford Horne wrote:

Add a new property "big-endian" to allow configuring the RTC as either
little or big endian, the default is little endian.

Currently overriding the default to big endian is only used by the m68k
virt platform.  New platforms should prefer to use little endian and not
set this.

Cc: Laurent Vivier
Reviewed-by: Anup Patel
Signed-off-by: Stafford Horne
---
Since v2:
  - Added Reviewed-by
  - Changed from enum property to boolean as suggested by Richard


Reviewed-by: Richard Henderson 

r~

[PATCH v3 11/11] docs/system: openrisc: Add OpenRISC documentation

2022-07-29 Thread Stafford Horne

Reviewed-by: Richard Henderson 
Signed-off-by: Stafford Horne 
---
Since v2;
 - Added Reviewed-by

 docs/system/openrisc/cpu-features.rst | 15 ++
 docs/system/openrisc/emulation.rst| 17 +++
 docs/system/openrisc/or1k-sim.rst | 43 
 docs/system/openrisc/virt.rst | 50 +++
 docs/system/target-openrisc.rst   | 72 +++
 docs/system/targets.rst   |  1 +
 6 files changed, 198 insertions(+)
 create mode 100644 docs/system/openrisc/cpu-features.rst
 create mode 100644 docs/system/openrisc/emulation.rst
 create mode 100644 docs/system/openrisc/or1k-sim.rst
 create mode 100644 docs/system/openrisc/virt.rst
 create mode 100644 docs/system/target-openrisc.rst

diff --git a/docs/system/openrisc/cpu-features.rst 
b/docs/system/openrisc/cpu-features.rst
new file mode 100644
index 00..aeb65e22ff
--- /dev/null
+++ b/docs/system/openrisc/cpu-features.rst
@@ -0,0 +1,15 @@
+CPU Features
+
+
+The QEMU emulation of the OpenRISC architecture provides following built in
+features.
+
+- Shadow GPRs
+- MMU TLB with 128 entries, 1 way
+- Power Management (PM)
+- Programmable Interrupt Controller (PIC)
+- Tick Timer
+
+These features are on by default and the presence can be confirmed by checking
+the contents of the Unit Presence Register (``UPR``) and CPU Configuration
+Register (``CPUCFGR``).
diff --git a/docs/system/openrisc/emulation.rst 
b/docs/system/openrisc/emulation.rst
new file mode 100644
index 00..0af898ab20
--- /dev/null
+++ b/docs/system/openrisc/emulation.rst
@@ -0,0 +1,17 @@
+OpenRISC 1000 CPU architecture support
+==
+
+QEMU's TCG emulation includes support for the OpenRISC or1200 implementation of
+the OpenRISC 1000 cpu architecture.
+
+The or1200 cpu also has support for the following instruction subsets:
+
+- ORBIS32 (OpenRISC Basic Instruction Set)
+- ORFPX32 (OpenRISC Floating-Point eXtension)
+
+In addition to the instruction subsets the QEMU TCG emulation also has support
+for most Class II (optional) instructions.
+
+For information on all OpenRISC instructions please refer to the latest
+architecture manual available on the OpenRISC website in the
+`OpenRISC Architecture `_ section.
diff --git a/docs/system/openrisc/or1k-sim.rst 
b/docs/system/openrisc/or1k-sim.rst
new file mode 100644
index 00..ef10439737
--- /dev/null
+++ b/docs/system/openrisc/or1k-sim.rst
@@ -0,0 +1,43 @@
+Or1ksim board
+=
+
+The QEMU Or1ksim machine emulates the standard OpenRISC board simulator which 
is
+also the standard SoC configuration.
+
+Supported devices
+-
+
+ * 16550A UART
+ * ETHOC Ethernet controller
+ * SMP (OpenRISC multicore using ompic)
+
+Boot options
+
+
+The Or1ksim machine can be started using the ``-kernel`` and ``-initrd`` 
options
+to load a Linux kernel and optional disk image.
+
+.. code-block:: bash
+
+  $ qemu-system-or1k -cpu or1220 -M or1k-sim -nographic \
+-kernel vmlinux \
+-initrd initramfs.cpio.gz \
+-m 128
+
+Linux guest kernel configuration
+
+
+The 'or1ksim_defconfig' for Linux openrisc kernels includes the right
+drivers for the or1ksim machine.  If you would like to run an SMP system
+choose the 'simple_smp_defconfig' config.
+
+Hardware configuration information
+""
+
+The ``or1k-sim`` board automatically generates a device tree blob ("dtb")
+which it passes to the guest. This provides information about the
+addresses, interrupt lines and other configuration of the various devices
+in the system.
+
+The location of the DTB will be passed in register ``r3`` to the guest 
operating
+system.
diff --git a/docs/system/openrisc/virt.rst b/docs/system/openrisc/virt.rst
new file mode 100644
index 00..2fe61ac942
--- /dev/null
+++ b/docs/system/openrisc/virt.rst
@@ -0,0 +1,50 @@
+'virt' generic virtual platform
+===
+
+The ``virt`` board is a platform which does not correspond to any
+real hardware; it is designed for use in virtual machines.
+It is the recommended board type if you simply want to run
+a guest such as Linux and do not care about reproducing the
+idiosyncrasies and limitations of a particular bit of real-world
+hardware.
+
+Supported devices
+-
+
+ * PCI/PCIe devices
+ * 8 virtio-mmio transport devices
+ * 16550A UART
+ * Goldfish RTC
+ * SiFive Test device for poweroff and reboot
+ * SMP (OpenRISC multicore using ompic)
+
+Boot options
+
+
+The virt machine can be started using the ``-kernel`` and ``-initrd`` options
+to load a Linux kernel and optional disk image. For example:
+
+.. code-block:: bash
+
+  $ qemu-system-or1k -cpu or1220 -M or1k-sim -nographic \
+-device virtio-net-device,netdev=user -netdev 
user,id=user,net=10.9.0.1/24,host=10.9.0.100 \
+-device

[PATCH v3 08/11] target/openrisc: Enable MTTCG

2022-07-29 Thread Stafford Horne

This patch enables multithread TCG for OpenRISC.  Since the or1k shared
syncrhonized timer can be updated from each vCPU via helpers we use a
mutex to synchronize updates.

Signed-off-by: Stafford Horne 
---
Since v2:
 - Removed cpu_openrisc_timer_has_advanced lock optimization, measuring revealed
   it did not help much.

 configs/targets/or1k-softmmu.mak | 1 +
 target/openrisc/cpu.h| 2 ++
 target/openrisc/sys_helper.c | 7 ++-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/configs/targets/or1k-softmmu.mak b/configs/targets/or1k-softmmu.mak
index 263e970870..432f855a30 100644
--- a/configs/targets/or1k-softmmu.mak
+++ b/configs/targets/or1k-softmmu.mak
@@ -1,3 +1,4 @@
 TARGET_ARCH=openrisc
+TARGET_SUPPORTS_MTTCG=y
 TARGET_BIG_ENDIAN=y
 TARGET_NEED_FDT=y
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index b9584f10d4..1d5efa5ca2 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -25,6 +25,8 @@
 #include "hw/core/cpu.h"
 #include "qom/object.h"
 
+#define TCG_GUEST_DEFAULT_MO (0)
+
 #define TYPE_OPENRISC_CPU "or1k-cpu"
 
 OBJECT_DECLARE_CPU_TYPE(OpenRISCCPU, OpenRISCCPUClass, OPENRISC_CPU)
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index 48674231e7..da88ad9e77 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -145,6 +145,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, 
target_ulong rb)
 break;
 case TO_SPR(10, 0): /* TTMR */
 {
+qemu_mutex_lock_iothread();
 if ((env->ttmr & TTMR_M) ^ (rb & TTMR_M)) {
 switch (rb & TTMR_M) {
 case TIMER_NONE:
@@ -168,14 +169,16 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong 
spr, target_ulong rb)
 env->ttmr = rb & ~TTMR_IP;
 cs->interrupt_request &= ~CPU_INTERRUPT_TIMER;
 }
-
 cpu_openrisc_timer_update(cpu);
+qemu_mutex_unlock_iothread();
 }
 break;
 
 case TO_SPR(10, 1): /* TTCR */
+qemu_mutex_lock_iothread();
 cpu_openrisc_count_set(cpu, rb);
 cpu_openrisc_timer_update(cpu);
+qemu_mutex_unlock_iothread();
 break;
 #endif
 
@@ -303,7 +306,9 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, 
target_ulong rd,
 return env->ttmr;
 
 case TO_SPR(10, 1): /* TTCR */
+qemu_mutex_lock_iothread();
 cpu_openrisc_count_update(cpu);
+qemu_mutex_unlock_iothread();
 return cpu_openrisc_count_get(cpu);
 #endif
 
-- 
2.37.1

[PATCH v3 03/11] goldfish_rtc: Add big-endian property

2022-07-29 Thread Stafford Horne

Add a new property "big-endian" to allow configuring the RTC as either
little or big endian, the default is little endian.

Currently overriding the default to big endian is only used by the m68k
virt platform.  New platforms should prefer to use little endian and not
set this.

Cc: Laurent Vivier 
Reviewed-by: Anup Patel 
Signed-off-by: Stafford Horne 
---
Since v2:
 - Added Reviewed-by
 - Changed from enum property to boolean as suggested by Richard

 hw/m68k/virt.c|  1 +
 hw/rtc/goldfish_rtc.c | 37 ++-
 include/hw/rtc/goldfish_rtc.h |  2 ++
 3 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c
index 0aa383fa6b..c7a6c766e3 100644
--- a/hw/m68k/virt.c
+++ b/hw/m68k/virt.c
@@ -173,6 +173,7 @@ static void virt_init(MachineState *machine)
 io_base = VIRT_GF_RTC_MMIO_BASE;
 for (i = 0; i < VIRT_GF_RTC_NB; i++) {
 dev = qdev_new(TYPE_GOLDFISH_RTC);
+qdev_prop_set_bit(dev, "big-endian", true);
 sysbus = SYS_BUS_DEVICE(dev);
 sysbus_realize_and_unref(sysbus, _fatal);
 sysbus_mmio_map(sysbus, 0, io_base);
diff --git a/hw/rtc/goldfish_rtc.c b/hw/rtc/goldfish_rtc.c
index 35e493be31..19a56402a0 100644
--- a/hw/rtc/goldfish_rtc.c
+++ b/hw/rtc/goldfish_rtc.c
@@ -216,14 +216,25 @@ static int goldfish_rtc_post_load(void *opaque, int 
version_id)
 return 0;
 }
 
-static const MemoryRegionOps goldfish_rtc_ops = {
-.read = goldfish_rtc_read,
-.write = goldfish_rtc_write,
-.endianness = DEVICE_NATIVE_ENDIAN,
-.valid = {
-.min_access_size = 4,
-.max_access_size = 4
-}
+static const MemoryRegionOps goldfish_rtc_ops[2] = {
+[false] = {
+.read = goldfish_rtc_read,
+.write = goldfish_rtc_write,
+.endianness = DEVICE_LITTLE_ENDIAN,
+.valid = {
+.min_access_size = 4,
+.max_access_size = 4
+}
+},
+[true] = {
+.read = goldfish_rtc_read,
+.write = goldfish_rtc_write,
+.endianness = DEVICE_BIG_ENDIAN,
+.valid = {
+.min_access_size = 4,
+.max_access_size = 4
+}
+},
 };
 
 static const VMStateDescription goldfish_rtc_vmstate = {
@@ -265,7 +276,8 @@ static void goldfish_rtc_realize(DeviceState *d, Error 
**errp)
 SysBusDevice *dev = SYS_BUS_DEVICE(d);
 GoldfishRTCState *s = GOLDFISH_RTC(d);
 
-memory_region_init_io(>iomem, OBJECT(s), _rtc_ops, s,
+memory_region_init_io(>iomem, OBJECT(s),
+  _rtc_ops[s->big_endian], s,
   "goldfish_rtc", 0x24);
 sysbus_init_mmio(dev, >iomem);
 
@@ -274,10 +286,17 @@ static void goldfish_rtc_realize(DeviceState *d, Error 
**errp)
 s->timer = timer_new_ns(rtc_clock, goldfish_rtc_interrupt, s);
 }
 
+static Property goldfish_rtc_properties[] = {
+DEFINE_PROP_BOOL("big-endian", GoldfishRTCState, big_endian,
+  false),
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static void goldfish_rtc_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
 
+device_class_set_props(dc, goldfish_rtc_properties);
 dc->realize = goldfish_rtc_realize;
 dc->reset = goldfish_rtc_reset;
 dc->vmsd = _rtc_vmstate;
diff --git a/include/hw/rtc/goldfish_rtc.h b/include/hw/rtc/goldfish_rtc.h
index 79ca7daf5d..162be33863 100644
--- a/include/hw/rtc/goldfish_rtc.h
+++ b/include/hw/rtc/goldfish_rtc.h
@@ -42,6 +42,8 @@ struct GoldfishRTCState {
 uint32_t irq_pending;
 uint32_t irq_enabled;
 uint32_t time_high;
+
+bool big_endian;
 };
 
 #endif
-- 
2.37.1

[PATCH v3 10/11] hw/openrisc: virt: pass random seed to fdt

2022-07-29 Thread Stafford Horne

From: "Jason A. Donenfeld" 

If the FDT contains /chosen/rng-seed, then the Linux RNG will use it to
initialize early. Set this using the usual guest random number
generation function. This is confirmed to successfully initialize the
RNG on Linux 5.19-rc2.

Signed-off-by: Jason A. Donenfeld 
Reviewed-by: Richard Henderson 
Signed-off-by: Stafford Horne 
---
Since v2:
 - No changes

 hw/openrisc/virt.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/openrisc/virt.c b/hw/openrisc/virt.c
index 9a78234a28..f8a68a6a6b 100644
--- a/hw/openrisc/virt.c
+++ b/hw/openrisc/virt.c
@@ -8,6 +8,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
+#include "qemu/guest-random.h"
 #include "qapi/error.h"
 #include "cpu.h"
 #include "exec/address-spaces.h"
@@ -130,6 +131,7 @@ static void openrisc_create_fdt(OR1KVirtState *state,
 void *fdt;
 int cpu;
 char *nodename;
+uint8_t rng_seed[32];
 
 fdt = state->fdt = create_device_tree(>fdt_size);
 if (!fdt) {
@@ -186,6 +188,10 @@ static void openrisc_create_fdt(OR1KVirtState *state,
 qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
 }
 
+/* Pass seed to RNG. */
+qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
+qemu_fdt_setprop(fdt, "/chosen", "rng-seed", rng_seed, sizeof(rng_seed));
+
 /* Create aliases node for use by devices. */
 qemu_fdt_add_subnode(fdt, "/aliases");
 }
-- 
2.37.1

[PATCH v3 07/11] target/openrisc: Add interrupted CPU to log

2022-07-29 Thread Stafford Horne

When we are tracing it's helpful to know which CPU's are getting
interrupted, add that detail to the log line.

Reviewed-by: Richard Henderson 
Signed-off-by: Stafford Horne 
---
Since v2:
 - Added Reviewed-by

 target/openrisc/interrupt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c
index e5724f5371..c31c6f12c4 100644
--- a/target/openrisc/interrupt.c
+++ b/target/openrisc/interrupt.c
@@ -83,7 +83,9 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
 [EXCP_TRAP] = "TRAP",
 };
 
-qemu_log_mask(CPU_LOG_INT, "INT: %s\n", int_name[exception]);
+qemu_log_mask(CPU_LOG_INT, "CPU: %d INT: %s\n",
+  cs->cpu_index,
+  int_name[exception]);
 
 hwaddr vect_pc = exception << 8;
 if (env->cpucfgr & CPUCFGR_EVBARP) {
-- 
2.37.1

[PATCH v3 05/11] hw/openrisc: Add PCI bus support to virt

2022-07-29 Thread Stafford Horne

This is mostly borrowed from xtensa and riscv as examples.  The
create_pcie_irq_map swizzle function is almost and exact copy
but here we use a single cell interrupt, possibly we can make
this generic.

Signed-off-by: Stafford Horne 
---
Since v2:
 - No changes

 hw/openrisc/Kconfig |   3 +
 hw/openrisc/virt.c  | 160 ++--
 2 files changed, 157 insertions(+), 6 deletions(-)

diff --git a/hw/openrisc/Kconfig b/hw/openrisc/Kconfig
index 202134668e..97af258b55 100644
--- a/hw/openrisc/Kconfig
+++ b/hw/openrisc/Kconfig
@@ -7,8 +7,11 @@ config OR1K_SIM
 
 config OR1K_VIRT
 bool
+imply PCI_DEVICES
 imply VIRTIO_VGA
 imply TEST_DEVICES
+select PCI
+select PCI_EXPRESS_GENERIC_BRIDGE
 select GOLDFISH_RTC
 select SERIAL
 select SIFIVE_TEST
diff --git a/hw/openrisc/virt.c b/hw/openrisc/virt.c
index 54f2732a6b..9a78234a28 100644
--- a/hw/openrisc/virt.c
+++ b/hw/openrisc/virt.c
@@ -17,6 +17,8 @@
 #include "hw/core/split-irq.h"
 #include "hw/openrisc/boot.h"
 #include "hw/misc/sifive_test.h"
+#include "hw/pci/pci.h"
+#include "hw/pci-host/gpex.h"
 #include "hw/qdev-properties.h"
 #include "hw/rtc/goldfish_rtc.h"
 #include "hw/sysbus.h"
@@ -47,6 +49,9 @@ typedef struct OR1KVirtState {
 
 enum {
 VIRT_DRAM,
+VIRT_ECAM,
+VIRT_MMIO,
+VIRT_PIO,
 VIRT_TEST,
 VIRT_RTC,
 VIRT_VIRTIO,
@@ -60,6 +65,7 @@ enum {
 VIRT_RTC_IRQ = 3,
 VIRT_VIRTIO_IRQ = 4, /* to 12 */
 VIRTIO_COUNT = 8,
+VIRT_PCI_IRQ_BASE = 13, /* to 17 */
 };
 
 static const struct MemmapEntry {
@@ -72,6 +78,9 @@ static const struct MemmapEntry {
 [VIRT_RTC] =   { 0x96005000, 0x1000 },
 [VIRT_VIRTIO] ={ 0x9700, 0x1000 },
 [VIRT_OMPIC] = { 0x9800, VIRT_CPUS_MAX * 8 },
+[VIRT_ECAM] =  { 0x9e00,  0x100 },
+[VIRT_PIO] =   { 0x9f00,  0x100 },
+[VIRT_MMIO] =  { 0xa000, 0x1000 },
 };
 
 static struct openrisc_boot_info {
@@ -115,12 +124,12 @@ static qemu_irq get_per_cpu_irq(OpenRISCCPU *cpus[], int 
num_cpus, int irq_pin)
 static void openrisc_create_fdt(OR1KVirtState *state,
 const struct MemmapEntry *memmap,
 int num_cpus, uint64_t mem_size,
-const char *cmdline)
+const char *cmdline,
+int32_t *pic_phandle)
 {
 void *fdt;
 int cpu;
 char *nodename;
-int pic_ph;
 
 fdt = state->fdt = create_device_tree(>fdt_size);
 if (!fdt) {
@@ -163,14 +172,14 @@ static void openrisc_create_fdt(OR1KVirtState *state,
 
 nodename = (char *)"/pic";
 qemu_fdt_add_subnode(fdt, nodename);
-pic_ph = qemu_fdt_alloc_phandle(fdt);
+*pic_phandle = qemu_fdt_alloc_phandle(fdt);
 qemu_fdt_setprop_string(fdt, nodename, "compatible",
 "opencores,or1k-pic-level");
 qemu_fdt_setprop_cell(fdt, nodename, "#interrupt-cells", 1);
 qemu_fdt_setprop(fdt, nodename, "interrupt-controller", NULL, 0);
-qemu_fdt_setprop_cell(fdt, nodename, "phandle", pic_ph);
+qemu_fdt_setprop_cell(fdt, nodename, "phandle", *pic_phandle);
 
-qemu_fdt_setprop_cell(fdt, "/", "interrupt-parent", pic_ph);
+qemu_fdt_setprop_cell(fdt, "/", "interrupt-parent", *pic_phandle);
 
 qemu_fdt_add_subnode(fdt, "/chosen");
 if (cmdline) {
@@ -275,6 +284,7 @@ static void openrisc_virt_test_init(OR1KVirtState *state, 
hwaddr base,
 g_free(nodename);
 
 }
+
 static void openrisc_virt_rtc_init(OR1KVirtState *state, hwaddr base,
hwaddr size, int num_cpus,
OpenRISCCPU *cpus[], int irq_pin)
@@ -296,6 +306,134 @@ static void openrisc_virt_rtc_init(OR1KVirtState *state, 
hwaddr base,
 g_free(nodename);
 
 }
+
+static void create_pcie_irq_map(void *fdt, char *nodename, int irq_base,
+uint32_t irqchip_phandle)
+{
+int pin, dev;
+uint32_t irq_map_stride = 0;
+uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS * 6] = {};
+uint32_t *irq_map = full_irq_map;
+
+/*
+ * This code creates a standard swizzle of interrupts such that
+ * each device's first interrupt is based on it's PCI_SLOT number.
+ * (See pci_swizzle_map_irq_fn())
+ *
+ * We only need one entry per interrupt in the table (not one per
+ * possible slot) seeing the interrupt-map-mask will allow the table
+ * to wrap to any number of devices.
+ */
+for (dev = 0; dev < GPEX_NUM_IRQS; dev++) {
+int devfn = dev << 3;
+
+for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
+int irq_nr = irq_base + ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS);
+int i = 0;
+
+/* Fill PCI address cells */
+irq_map[i++] = cpu_to_be32(devfn << 8);
+irq_map[i++] = 0;
+irq_map[i++] = 0;
+
+/* Fill PCI Interrupt

[PATCH v3 02/11] target/openrisc: Fix memory reading in debugger

2022-07-29 Thread Stafford Horne

In commit f0655423ca ("target/openrisc: Reorg tlb lookup") data and
instruction TLB reads were combined.  This, broke debugger reads where
we first tried to map using the data tlb then fall back to the
instruction tlb.

This patch replicates this logic by first requesting a PAGE_READ
protection mapping then falling back to PAGE_EXEC.

Reviewed-by: Richard Henderson 
Signed-off-by: Stafford Horne 
---
Since v2:
 - No changes, added Reviewed-by

 target/openrisc/mmu.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c
index d7e1320998..0b8afdbacf 100644
--- a/target/openrisc/mmu.c
+++ b/target/openrisc/mmu.c
@@ -148,7 +148,13 @@ hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cs, 
vaddr addr)
 case SR_DME | SR_IME:
 /* The mmu is definitely enabled.  */
 excp = get_phys_mmu(cpu, _addr, , addr,
-PAGE_EXEC | PAGE_READ | PAGE_WRITE,
+PAGE_READ,
+(sr & SR_SM) != 0);
+if (!excp) {
+return phys_addr;
+}
+excp = get_phys_mmu(cpu, _addr, , addr,
+PAGE_EXEC,
 (sr & SR_SM) != 0);
 return excp ? -1 : phys_addr;
 
-- 
2.37.1

[PATCH v3 01/11] hw/openrisc: Split re-usable boot time apis out to boot.c

2022-07-29 Thread Stafford Horne

These will be shared with the virt platform.

Reviewed-by: Richard Henderson 
Signed-off-by: Stafford Horne 
---
Since v2:
 - No changes

 hw/openrisc/boot.c | 117 +
 hw/openrisc/meson.build|   1 +
 hw/openrisc/openrisc_sim.c | 106 ++---
 include/hw/openrisc/boot.h |  34 +++
 4 files changed, 158 insertions(+), 100 deletions(-)
 create mode 100644 hw/openrisc/boot.c
 create mode 100644 include/hw/openrisc/boot.h

diff --git a/hw/openrisc/boot.c b/hw/openrisc/boot.c
new file mode 100644
index 00..ca773b385e
--- /dev/null
+++ b/hw/openrisc/boot.c
@@ -0,0 +1,117 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * QEMU OpenRISC boot helpers.
+ *
+ * (c) 2022 Stafford Horne 
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/cpu-defs.h"
+#include "elf.h"
+#include "hw/loader.h"
+#include "hw/openrisc/boot.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/qtest.h"
+
+#include 
+
+#define KERNEL_LOAD_ADDR 0x100
+
+hwaddr openrisc_load_kernel(ram_addr_t ram_size,
+const char *kernel_filename,
+uint32_t *bootstrap_pc)
+{
+long kernel_size;
+uint64_t elf_entry;
+uint64_t high_addr;
+hwaddr entry;
+
+if (kernel_filename && !qtest_enabled()) {
+kernel_size = load_elf(kernel_filename, NULL, NULL, NULL,
+   _entry, NULL, _addr, NULL, 1,
+   EM_OPENRISC, 1, 0);
+entry = elf_entry;
+if (kernel_size < 0) {
+kernel_size = load_uimage(kernel_filename,
+  , NULL, NULL, NULL, NULL);
+high_addr = entry + kernel_size;
+}
+if (kernel_size < 0) {
+kernel_size = load_image_targphys(kernel_filename,
+  KERNEL_LOAD_ADDR,
+  ram_size - KERNEL_LOAD_ADDR);
+high_addr = KERNEL_LOAD_ADDR + kernel_size;
+}
+
+if (entry <= 0) {
+entry = KERNEL_LOAD_ADDR;
+}
+
+if (kernel_size < 0) {
+error_report("couldn't load the kernel '%s'", kernel_filename);
+exit(1);
+}
+*bootstrap_pc = entry;
+
+return high_addr;
+}
+return 0;
+}
+
+hwaddr openrisc_load_initrd(void *fdt, const char *filename,
+hwaddr load_start, uint64_t mem_size)
+{
+int size;
+hwaddr start;
+
+/* We put the initrd right after the kernel; page aligned. */
+start = TARGET_PAGE_ALIGN(load_start);
+
+size = load_ramdisk(filename, start, mem_size - start);
+if (size < 0) {
+size = load_image_targphys(filename, start, mem_size - start);
+if (size < 0) {
+error_report("could not load ramdisk '%s'", filename);
+exit(1);
+}
+}
+
+if (fdt) {
+qemu_fdt_setprop_cell(fdt, "/chosen",
+  "linux,initrd-start", start);
+qemu_fdt_setprop_cell(fdt, "/chosen",
+  "linux,initrd-end", start + size);
+}
+
+return start + size;
+}
+
+uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start,
+   uint64_t mem_size)
+{
+uint32_t fdt_addr;
+int ret;
+int fdtsize = fdt_totalsize(fdt);
+
+if (fdtsize <= 0) {
+error_report("invalid device-tree");
+exit(1);
+}
+
+/* We put fdt right after the kernel and/or initrd. */
+fdt_addr = TARGET_PAGE_ALIGN(load_start);
+
+ret = fdt_pack(fdt);
+/* Should only fail if we've built a corrupted tree */
+g_assert(ret == 0);
+/* copy in the device tree */
+qemu_fdt_dumpdtb(fdt, fdtsize);
+
+rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr,
+  _space_memory);
+
+return fdt_addr;
+}
+
diff --git a/hw/openrisc/meson.build b/hw/openrisc/meson.build
index ec48172c9d..ab563820c5 100644
--- a/hw/openrisc/meson.build
+++ b/hw/openrisc/meson.build
@@ -1,5 +1,6 @@
 openrisc_ss = ss.source_set()
 openrisc_ss.add(files('cputimer.c'))
+openrisc_ss.add(files('boot.c'))
 openrisc_ss.add(when: 'CONFIG_OR1K_SIM', if_true: [files('openrisc_sim.c'), 
fdt])
 
 hw_arch += {'openrisc': openrisc_ss}
diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c
index 35adce17ac..35da123aef 100644
--- a/hw/openrisc/openrisc_sim.c
+++ b/hw/openrisc/openrisc_sim.c
@@ -24,10 +24,9 @@
 #include "cpu.h"
 #include "hw/irq.h"
 #include "hw/boards.h"
-#include "elf.h"
 #include "hw/char/serial.h"
 #include "net/net.h"
-#include "hw/loader.h"
+#include "hw/openrisc/boot.h"
 #include "hw/qdev-properties.h"
 #include "exec/address-spaces.h"
 #include "sysemu/device_tree.h"
@@ -283,101 +282,6 @@ static void openrisc_sim_serial_init(Or1ksimState *state, 
hwaddr base,
 g_free(nodename);
 }
 
-static hwaddr openrisc_load_kernel(ram_addr_t ram_size,
-

[PATCH v3 09/11] target/openrisc: Interrupt handling fixes

2022-07-29 Thread Stafford Horne

When running SMP systems we sometimes were seeing lockups where
IPI interrupts were being raised by never handled.

This looks to be caused by 2 issues in the openrisc interrupt handling
logic.

 1. After clearing an interrupt the openrisc_cpu_set_irq handler will
always clear PICSR.  This is not correct as masked interrupts
should still be visible in PICSR.
 2. After setting PICMR (mask register) and exposed interrupts should
cause an interrupt to be raised.  This was not being done so add it.

This patch fixes both issues.

Reviewed-by: Richard Henderson 
Signed-off-by: Stafford Horne 
---
Since v2:
 - Added Reviewed-by

 target/openrisc/cpu.c| 1 -
 target/openrisc/sys_helper.c | 7 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index 41d1b2a24a..cb9f35f408 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -98,7 +98,6 @@ static void openrisc_cpu_set_irq(void *opaque, int irq, int 
level)
 cpu_interrupt(cs, CPU_INTERRUPT_HARD);
 } else {
 cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
-cpu->env.picsr = 0;
 }
 }
 #endif
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index da88ad9e77..09b3c97d7c 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -139,6 +139,13 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong 
spr, target_ulong rb)
 break;
 case TO_SPR(9, 0):  /* PICMR */
 env->picmr = rb;
+qemu_mutex_lock_iothread();
+if (env->picsr & env->picmr) {
+cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+} else {
+cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+}
+qemu_mutex_unlock_iothread();
 break;
 case TO_SPR(9, 2):  /* PICSR */
 env->picsr &= ~rb;
-- 
2.37.1

[PATCH v3 06/11] hw/openrisc: Initialize timer time at startup

2022-07-29 Thread Stafford Horne

The last_clk time was initialized at zero, this means when we calculate
the first delta we will calculate 0 vs current time which could cause
unnecessary hops.

This patch moves timer initialization to the cpu reset.  There are two
resets registered here:

 1. Per cpu timer mask (ttmr) reset.
 2. Global cpu timer (last_clk and ttcr) reset, attached to the first
cpu only.

Signed-off-by: Stafford Horne 
---
Since v2:
 - Moved timer init from init to reset suggested by Richard

 hw/openrisc/cputimer.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/hw/openrisc/cputimer.c b/hw/openrisc/cputimer.c
index 93268815d8..10163b391b 100644
--- a/hw/openrisc/cputimer.c
+++ b/hw/openrisc/cputimer.c
@@ -22,6 +22,7 @@
 #include "cpu.h"
 #include "migration/vmstate.h"
 #include "qemu/timer.h"
+#include "sysemu/reset.h"
 
 #define TIMER_PERIOD 50 /* 50 ns period for 20 MHz timer */
 
@@ -122,6 +123,24 @@ static void openrisc_timer_cb(void *opaque)
 qemu_cpu_kick(CPU(cpu));
 }
 
+/* Reset the per CPU counter state. */
+static void openrisc_count_reset(void *opaque)
+{
+OpenRISCCPU *cpu = opaque;
+
+if (cpu->env.is_counting) {
+cpu_openrisc_count_stop(cpu);
+}
+cpu->env.ttmr = 0x;
+}
+
+/* Reset the global timer state. */
+static void openrisc_timer_reset(void *opaque)
+{
+or1k_timer->ttcr = 0x;
+or1k_timer->last_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+}
+
 static const VMStateDescription vmstate_or1k_timer = {
 .name = "or1k_timer",
 .version_id = 1,
@@ -136,10 +155,11 @@ static const VMStateDescription vmstate_or1k_timer = {
 void cpu_openrisc_clock_init(OpenRISCCPU *cpu)
 {
 cpu->env.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, _timer_cb, cpu);
-cpu->env.ttmr = 0x;
 
+qemu_register_reset(openrisc_count_reset, cpu);
 if (or1k_timer == NULL) {
 or1k_timer = g_new0(OR1KTimerState, 1);
+qemu_register_reset(openrisc_timer_reset, cpu);
 vmstate_register(NULL, 0, _or1k_timer, or1k_timer);
 }
 }
-- 
2.37.1

[PATCH v3 04/11] hw/openrisc: Add the OpenRISC virtual machine

2022-07-29 Thread Stafford Horne

This patch adds the OpenRISC virtual machine 'virt' for OpenRISC.  This
platform allows for a convenient CI platform for toolchain, software
ports and the OpenRISC linux kernel port.

Much of this has been sourced from the m68k and riscv virt platforms.

The platform provides:
 - OpenRISC SMP with up to 4 cpus
 - A virtio bus with up to 8 devices
 - Standard ns16550a serial
 - Goldfish RTC
 - SiFive TEST device for poweroff and reboot
 - Generated Device Tree to automatically configure the guest kernel

Signed-off-by: Stafford Horne 
---
Since v2:
 - No changes

 configs/devices/or1k-softmmu/default.mak |   1 +
 hw/openrisc/Kconfig  |   9 +
 hw/openrisc/meson.build  |   1 +
 hw/openrisc/virt.c   | 417 +++
 4 files changed, 428 insertions(+)
 create mode 100644 hw/openrisc/virt.c

diff --git a/configs/devices/or1k-softmmu/default.mak 
b/configs/devices/or1k-softmmu/default.mak
index 168101c39a..89c39e3123 100644
--- a/configs/devices/or1k-softmmu/default.mak
+++ b/configs/devices/or1k-softmmu/default.mak
@@ -3,3 +3,4 @@
 # Boards:
 #
 CONFIG_OR1K_SIM=y
+CONFIG_OR1K_VIRT=y
diff --git a/hw/openrisc/Kconfig b/hw/openrisc/Kconfig
index 8f284f3ba0..202134668e 100644
--- a/hw/openrisc/Kconfig
+++ b/hw/openrisc/Kconfig
@@ -4,3 +4,12 @@ config OR1K_SIM
 select OPENCORES_ETH
 select OMPIC
 select SPLIT_IRQ
+
+config OR1K_VIRT
+bool
+imply VIRTIO_VGA
+imply TEST_DEVICES
+select GOLDFISH_RTC
+select SERIAL
+select SIFIVE_TEST
+select VIRTIO_MMIO
diff --git a/hw/openrisc/meson.build b/hw/openrisc/meson.build
index ab563820c5..2dbc6365bb 100644
--- a/hw/openrisc/meson.build
+++ b/hw/openrisc/meson.build
@@ -2,5 +2,6 @@ openrisc_ss = ss.source_set()
 openrisc_ss.add(files('cputimer.c'))
 openrisc_ss.add(files('boot.c'))
 openrisc_ss.add(when: 'CONFIG_OR1K_SIM', if_true: [files('openrisc_sim.c'), 
fdt])
+openrisc_ss.add(when: 'CONFIG_OR1K_VIRT', if_true: [files('virt.c'), fdt])
 
 hw_arch += {'openrisc': openrisc_ss}
diff --git a/hw/openrisc/virt.c b/hw/openrisc/virt.c
new file mode 100644
index 00..54f2732a6b
--- /dev/null
+++ b/hw/openrisc/virt.c
@@ -0,0 +1,417 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * OpenRISC QEMU virtual machine.
+ *
+ * (c) 2022 Stafford Horne 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "cpu.h"
+#include "exec/address-spaces.h"
+#include "hw/irq.h"
+#include "hw/boards.h"
+#include "hw/char/serial.h"
+#include "hw/core/split-irq.h"
+#include "hw/openrisc/boot.h"
+#include "hw/misc/sifive_test.h"
+#include "hw/qdev-properties.h"
+#include "hw/rtc/goldfish_rtc.h"
+#include "hw/sysbus.h"
+#include "hw/virtio/virtio-mmio.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/qtest.h"
+#include "sysemu/reset.h"
+
+#include 
+
+#define VIRT_CPUS_MAX 4
+#define VIRT_CLK_MHZ 2000
+
+#define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
+#define VIRT_MACHINE(obj) \
+OBJECT_CHECK(OR1KVirtState, (obj), TYPE_VIRT_MACHINE)
+
+typedef struct OR1KVirtState {
+/*< private >*/
+MachineState parent_obj;
+
+/*< public >*/
+void *fdt;
+int fdt_size;
+
+} OR1KVirtState;
+
+enum {
+VIRT_DRAM,
+VIRT_TEST,
+VIRT_RTC,
+VIRT_VIRTIO,
+VIRT_UART,
+VIRT_OMPIC,
+};
+
+enum {
+VIRT_OMPIC_IRQ = 1,
+VIRT_UART_IRQ = 2,
+VIRT_RTC_IRQ = 3,
+VIRT_VIRTIO_IRQ = 4, /* to 12 */
+VIRTIO_COUNT = 8,
+};
+
+static const struct MemmapEntry {
+hwaddr base;
+hwaddr size;
+} virt_memmap[] = {
+[VIRT_DRAM] =  { 0x,  0 },
+[VIRT_UART] =  { 0x9000,  0x100 },
+[VIRT_TEST] =  { 0x9600,0x8 },
+[VIRT_RTC] =   { 0x96005000, 0x1000 },
+[VIRT_VIRTIO] ={ 0x9700, 0x1000 },
+[VIRT_OMPIC] = { 0x9800, VIRT_CPUS_MAX * 8 },
+};
+
+static struct openrisc_boot_info {
+uint32_t bootstrap_pc;
+uint32_t fdt_addr;
+} boot_info;
+
+static void main_cpu_reset(void *opaque)
+{
+OpenRISCCPU *cpu = opaque;
+CPUState *cs = CPU(cpu);
+
+cpu_reset(CPU(cpu));
+
+cpu_set_pc(cs, boot_info.bootstrap_pc);
+cpu_set_gpr(>env, 3, boot_info.fdt_addr);
+}
+
+static qemu_irq get_cpu_irq(OpenRISCCPU *cpus[], int cpunum, int irq_pin)
+{
+return qdev_get_gpio_in_named(DEVICE(cpus[cpunum]), "IRQ", irq_pin);
+}
+
+static qemu_irq get_per_cpu_irq(OpenRISCCPU *cpus[], int num_cpus, int irq_pin)
+{
+int i;
+
+if (num_cpus > 1) {
+DeviceState *splitter = qdev_new(TYPE_SPLIT_IRQ);
+qdev_prop_set_uint32(splitter, "num-lines", num_cpus);
+qdev_realize_and_unref(splitter, NULL, _fatal);
+for (i = 0; i < num_cpus; i++) {
+qdev_connect_gpio_out(splitter, i, get_cpu_irq(cpus, i, irq_pin));
+}
+return qdev_get_gpio_in(splitter, 0);
+} else {
+return get_cpu_irq(cpus, 0, irq_pin);
+}
+}
+

[PATCH v3 00/11] OpenRISC Virtual Machine

2022-07-29 Thread Stafford Horne

Hello,

This is the OpenRISC Virtual Machine plaform which we are now using for OpenRISC
CI such as the wireguard testing that Jason has been working on.  I also have
recently used it to test glibc 2.36 and it worked well. Previous glibc testsuite
runs on my FPGA board took about 3 days, running on qemu virt now takes 6 hours.

The first few patches help get OpenRISC QEMU ready for the virtual machine.
There is one bug fix for GDB debugging there too.

Next we have the Virt patch followed by a separate patch to add PCI support
which is split out because it's a bit easier to review that way I thought.  The
next few patches are fixes to get the Multicore platform stable, such as adding
MTTCG support and fixing some interrupt and timer related bugs.

The platform is relatively stable now, but every few boots we get about 10
second hangs.  However, overall this is much more stable than the SMP support we
had before.  So I want to submit this for review and maybe upstream it before
tracking down these last issues which might take significant more time.

This is being tested with the or1k-5.20-updates kernel branch here:

  https://github.com/stffrdhrn/linux/commits/or1k-5.20-updates

  This tree has support for: OpenRISC PCI and virt_defconfig and an irqchip bug
  fix.

Changes since v2:
 - Changed goldfish_rtc endian property to boolean
 - Moved or1k timer init from init to reset
 - Removed cpu_openrisc_timer_has_advanced lock optimization in MTTCG patch,
   measuring revealed it did not help much.
Changes since v1:
 - Dropped semihosting support
 - Added PCI support
 - Added OpenRISC documentation
 - Added OpenRISC support for MTTCG
 - Support Configurating Goldfish RTC endianness
 - Added a few bug fix patches

Jason A. Donenfeld (1):
  hw/openrisc: virt: pass random seed to fdt

Stafford Horne (10):
  hw/openrisc: Split re-usable boot time apis out to boot.c
  target/openrisc: Fix memory reading in debugger
  goldfish_rtc: Add big-endian property
  hw/openrisc: Add the OpenRISC virtual machine
  hw/openrisc: Add PCI bus support to virt
  hw/openrisc: Initialize timer time at startup
  target/openrisc: Add interrupted CPU to log
  target/openrisc: Enable MTTCG
  target/openrisc: Interrupt handling fixes
  docs/system: openrisc: Add OpenRISC documentation

 configs/devices/or1k-softmmu/default.mak |   1 +
 configs/targets/or1k-softmmu.mak |   1 +
 docs/system/openrisc/cpu-features.rst|  15 +
 docs/system/openrisc/emulation.rst   |  17 +
 docs/system/openrisc/or1k-sim.rst|  43 ++
 docs/system/openrisc/virt.rst|  50 ++
 docs/system/target-openrisc.rst  |  72 +++
 docs/system/targets.rst  |   1 +
 hw/m68k/virt.c   |   1 +
 hw/openrisc/Kconfig  |  12 +
 hw/openrisc/boot.c   | 117 +
 hw/openrisc/cputimer.c   |  22 +-
 hw/openrisc/meson.build  |   2 +
 hw/openrisc/openrisc_sim.c   | 106 +
 hw/openrisc/virt.c   | 571 +++
 hw/rtc/goldfish_rtc.c|  37 +-
 include/hw/openrisc/boot.h   |  34 ++
 include/hw/rtc/goldfish_rtc.h|   2 +
 target/openrisc/cpu.c|   1 -
 target/openrisc/cpu.h|   2 +
 target/openrisc/interrupt.c  |   4 +-
 target/openrisc/mmu.c|   8 +-
 target/openrisc/sys_helper.c |  14 +-
 23 files changed, 1019 insertions(+), 114 deletions(-)
 create mode 100644 docs/system/openrisc/cpu-features.rst
 create mode 100644 docs/system/openrisc/emulation.rst
 create mode 100644 docs/system/openrisc/or1k-sim.rst
 create mode 100644 docs/system/openrisc/virt.rst
 create mode 100644 docs/system/target-openrisc.rst
 create mode 100644 hw/openrisc/boot.c
 create mode 100644 hw/openrisc/virt.c
 create mode 100644 include/hw/openrisc/boot.h

-- 
2.37.1

Re: [PATCH v2] linux-user: Use memfd for open syscall emulation

2022-07-29 Thread Richard Henderson


On 7/29/22 14:19, Rainer Müller wrote:

On 29/07/2022 18.01, Richard Henderson wrote:

On 7/29/22 08:49, Rainer Müller wrote:

+    /* create temporary file to map stat to */
+    tmpdir = getenv("TMPDIR");
+    if (!tmpdir)
+    tmpdir = "/tmp";
+    snprintf(filename, sizeof(filename),
"%s/qemu-open.XX", tmpdir);
+    fd = mkstemp(filename);
+    if (fd < 0) {
+    return fd;
+    }


We've been using g_file_open_tmp elsewhere; probably good to follow suit
here.


That seemed reasonable at first, but with regards to error handling it
gets a bit complicated.

The suggested g_file_open_tmp() would leave us with a GError only, but
to return something meaningful to the caller we must set errno in this
context. As far as I can see, there is no way to convert back to an
errno from GError.

With g_file_open_tmp() we could always set the same generic errno, but
that would hide the real cause completely. I debugged this problem with
this message that was confusing, but at least it gave away a hint:
   cat: can't open '/proc/self/stat': Read-only file system


That's a good reason.  Let's leave this alone then.

Reviewed-by: Richard Henderson 


r~

Re: [PATCH for-7.1? 0/2] Re-enable ppc32 as a linux-user host

2022-07-29 Thread Richard Henderson


On 7/29/22 13:44, Daniel Henrique Barboza wrote:

The last Debian that supports ppc32 was Debian 8:

https://www.debian.org/ports/powerpc/#powerpc

"Debian on 32-bit PowerPC (powerpc)
It first became an official release architecture with Debian GNU/Linux 2.2
(potato) and had retained that status until the publication of Debian 9 
(stretch).
The last supported release for 32-bit PowerPC is Debian 8 (jessie)"


It has been moved out of general support but remains in ports[1].

I was able to install a debian sid chroot with this today[2].
which should be good enough to continue compile-testing.
And Mark has hardware for actual testing.  Given that almost
all of the code overlaps with ppc64, I'm ok keeping it now.


r~


[1] http://ftp.de.debian.org/debian-ports/pool-powerpc/
[2] https://github.com/vivier/linux-user-test-scrips.git

Re: [PATCH v2] linux-user: Use memfd for open syscall emulation

2022-07-29 Thread Rainer Müller

On 29/07/2022 18.01, Richard Henderson wrote:
> On 7/29/22 08:49, Rainer Müller wrote:
>> +    /* create temporary file to map stat to */
>> +    tmpdir = getenv("TMPDIR");
>> +    if (!tmpdir)
>> +    tmpdir = "/tmp";
>> +    snprintf(filename, sizeof(filename),
>> "%s/qemu-open.XX", tmpdir);
>> +    fd = mkstemp(filename);
>> +    if (fd < 0) {
>> +    return fd;
>> +    }
> 
> We've been using g_file_open_tmp elsewhere; probably good to follow suit
> here.

That seemed reasonable at first, but with regards to error handling it
gets a bit complicated.

The suggested g_file_open_tmp() would leave us with a GError only, but
to return something meaningful to the caller we must set errno in this
context. As far as I can see, there is no way to convert back to an
errno from GError.

With g_file_open_tmp() we could always set the same generic errno, but
that would hide the real cause completely. I debugged this problem with
this message that was confusing, but at least it gave away a hint:
  cat: can't open '/proc/self/stat': Read-only file system

The other option would be to g_assert_true(fd >= 0) and kill the process
in case opening the temporary file failed. This also feels wrong, as the
caller could still recover from this state and continue.

Rainer

Re: [PATCH v7 12/14] KVM: Handle page fault for private memory

2022-07-29 Thread Sean Christopherson

On Wed, Jul 06, 2022, Chao Peng wrote:
> A page fault can carry the private/shared information for
> KVM_MEM_PRIVATE memslot, this can be filled by architecture code(like
> TDX code). To handle page fault for such access, KVM maps the page only
> when this private property matches the host's view on the page.
> 
> For a successful match, private pfn is obtained with memfile_notifier
> callbacks from private fd and shared pfn is obtained with existing
> get_user_pages.
> 
> For a failed match, KVM causes a KVM_EXIT_MEMORY_FAULT exit to
> userspace. Userspace then can convert memory between private/shared from
> host's view then retry the access.
> 
> Co-developed-by: Yu Zhang 
> Signed-off-by: Yu Zhang 
> Signed-off-by: Chao Peng 
> ---
>  arch/x86/kvm/mmu/mmu.c  | 60 -
>  arch/x86/kvm/mmu/mmu_internal.h | 18 ++
>  arch/x86/kvm/mmu/mmutrace.h |  1 +
>  include/linux/kvm_host.h| 35 ++-
>  4 files changed, 112 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 545eb74305fe..27dbdd4fe8d1 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3004,6 +3004,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
>   if (max_level == PG_LEVEL_4K)
>   return PG_LEVEL_4K;
>  
> + if (kvm_mem_is_private(kvm, gfn))
> + return max_level;
> +
>   host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot);
>   return min(host_level, max_level);
>  }
> @@ -4101,10 +4104,52 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 
> struct kvm_async_pf *work)
>   kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
>  }
>  
> +static inline u8 order_to_level(int order)
> +{
> + enum pg_level level;
> +
> + for (level = KVM_MAX_HUGEPAGE_LEVEL; level > PG_LEVEL_4K; level--)

Curly braces needed for the for-loop.

And I think it makes sense to take in the fault->max_level, that way this is
slightly more performant when the guest mapping is smaller than the host, e.g.

for (level = max_level; level > PG_LEVEL_4K; level--)
...

return level;

Though I think I'd vote to avoid a loop entirely and do:

BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);

if (order > ???)
return PG_LEVEL_1G;

if (order > ???)
return PG_LEVEL_2M;

return PG_LEVEL_4K;


> + if (order >= page_level_shift(level) - PAGE_SHIFT)
> + return level;
> + return level;
> +}
> +
> +static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
> +struct kvm_page_fault *fault)
> +{
> + int order;
> + struct kvm_memory_slot *slot = fault->slot;
> + bool private_exist = kvm_mem_is_private(vcpu->kvm, fault->gfn);
> +
> + if (fault->is_private != private_exist) {
> + vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
> + if (fault->is_private)
> + vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
> + else
> + vcpu->run->memory.flags = 0;
> + vcpu->run->memory.padding = 0;
> + vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
> + vcpu->run->memory.size = PAGE_SIZE;
> + return RET_PF_USER;
> + }
> +
> + if (fault->is_private) {
> + if (kvm_private_mem_get_pfn(slot, fault->gfn, >pfn, 
> ))
> + return RET_PF_RETRY;
> + fault->max_level = min(order_to_level(order), fault->max_level);
> + fault->map_writable = !(slot->flags & KVM_MEM_READONLY);
> + return RET_PF_FIXED;
> + }
> +
> + /* Fault is shared, fallthrough. */
> + return RET_PF_CONTINUE;
> +}
> +
>  static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault 
> *fault)
>  {
>   struct kvm_memory_slot *slot = fault->slot;
>   bool async;
> + int r;
>  
>   /*
>* Retry the page fault if the gfn hit a memslot that is being deleted
> @@ -4133,6 +4178,12 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
> struct kvm_page_fault *fault)
>   return RET_PF_EMULATE;
>   }
>  
> + if (kvm_slot_can_be_private(slot)) {
> + r = kvm_faultin_pfn_private(vcpu, fault);
> + if (r != RET_PF_CONTINUE)
> + return r == RET_PF_FIXED ? RET_PF_CONTINUE : r;

I apologize if I've given you conflicting feedback in the past.  Now that this
returns RET_PF_* directly, I definitely think it makes sense to do:

if (kvm_slot_can_be_private(slot) &&
fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
if (fault->is_private)
vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
else

Re: [PATCH for-7.1? 0/2] Re-enable ppc32 as a linux-user host

2022-07-29 Thread Daniel Henrique Barboza





On 7/29/22 14:21, Richard Henderson wrote:

This is, technically, a regression from 6.2, so it's not
implausible to apply before rc1.  Thoughts?



In gitlab #1097 the author comments that:

https://gitlab.com/qemu-project/qemu/-/issues/1097#note_1044810483

"there are several distributions still available on 32-bit powerpc, e.g.
Adélie Linux, for now still Void Linux, afaik Debian and OpenSUSE also
still build packages"

I checked these claims. Latest version OpenSuse LEAP doesn't support
ppc32 bits:

https://get.opensuse.org/leap/15.4/#download

The last Debian that supports ppc32 was Debian 8:

https://www.debian.org/ports/powerpc/#powerpc

"Debian on 32-bit PowerPC (powerpc)
It first became an official release architecture with Debian GNU/Linux 2.2
(potato) and had retained that status until the publication of Debian 9 
(stretch).
The last supported release for 32-bit PowerPC is Debian 8 (jessie)"

And Void Linux doesn't seem to support any PowerPC flavor:

https://voidlinux.org/download/

Adélie Linux supports ppc32. I can also add that FreeBSD also supports ppc32.

Checking about/build-platforms.rst I can see that we would only somewhat
care for FreeBSD here, since Debian 8 is already out of our support
window.

All that said, I don't have strong feelings against re-enabling it, specially
because this build issue was deliberated caused by us.

However, after re-enabling it, I would only care about build bugs that are
reproduced on ppc32 FreeBSD.



Daniel




r~


Richard Henderson (2):
   common-user/host/ppc: Implement safe-syscall.inc.S
   linux-user: Implment host/ppc/host-signal.h

  linux-user/include/host/ppc/host-signal.h |  39 
  common-user/host/ppc/safe-syscall.inc.S   | 107 ++
  2 files changed, 146 insertions(+)
  create mode 100644 linux-user/include/host/ppc/host-signal.h
  create mode 100644 common-user/host/ppc/safe-syscall.inc.S

[PATCH for-7.1?] linux-user/riscv: Align signal frame to 16 bytes

2022-07-29 Thread Richard Henderson

Follow the kernel's alignment, as we already noted.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1093
Signed-off-by: Richard Henderson 
---
 linux-user/riscv/signal.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c
index 296e39fbf0..eaa168199a 100644
--- a/linux-user/riscv/signal.c
+++ b/linux-user/riscv/signal.c
@@ -64,9 +64,7 @@ static abi_ulong get_sigframe(struct target_sigaction *ka,
 
 /* This is the X/Open sanctioned signal stack switching.  */
 sp = target_sigsp(sp, ka) - framesize;
-
-/* XXX: kernel aligns with 0xf ? */
-sp &= ~3UL; /* align sp on 4-byte boundary */
+sp &= ~0xf;
 
 return sp;
 }
-- 
2.34.1

[PATCH] linux-user: Implement faccessat2

2022-07-29 Thread Richard Henderson

Split out do_faccessat2 helper, and use it for
accessat and faccessat as well.

Signed-off-by: Richard Henderson 
---
Will we ever have a system libc for which __NR_faccessat2 is present,
but faccessat() does not try faccessat2 first?

r~
---
 linux-user/syscall.c | 44 +++-
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index b27a6552aa..acd8452048 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8530,6 +8530,30 @@ static int do_getdents64(abi_long dirfd, abi_long arg2, 
abi_long count)
 _syscall2(int, pivot_root, const char *, new_root, const char *, put_old)
 #endif
 
+static int do_faccessat2(int dirfd, abi_ptr pathname, int mode, int flags)
+{
+char *p = lock_user_string(pathname);
+bool nosys = true;
+int ret;
+
+if (!p) {
+return -TARGET_EFAULT;
+}
+
+/* Use the raw host syscall if possible, in case we have an old libc. */
+#ifdef __NR_faccessat2
+ret = syscall(__NR_faccessat2, dirfd, p, mode, flags);
+nosys = ret < 0 && errno == ENOSYS;
+#endif
+/* If we don't have the syscall, defer to libc emulation. */
+if (nosys) {
+ret = faccessat(dirfd, p, mode, flags);
+}
+
+unlock_user(p, pathname, 0);
+return get_errno(ret);
+}
+
 /* This is an internal helper for do_syscall so that it is easier
  * to have a single return point, so that actions, such as logging
  * of syscall results, can be performed.
@@ -9058,21 +9082,15 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int 
num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_access
 case TARGET_NR_access:
-if (!(p = lock_user_string(arg1))) {
-return -TARGET_EFAULT;
-}
-ret = get_errno(access(path(p), arg2));
-unlock_user(p, arg1, 0);
-return ret;
+return do_faccessat2(AT_FDCWD, arg1, arg2, 0);
 #endif
-#if defined(TARGET_NR_faccessat) && defined(__NR_faccessat)
+#ifdef TARGET_NR_faccessat
 case TARGET_NR_faccessat:
-if (!(p = lock_user_string(arg2))) {
-return -TARGET_EFAULT;
-}
-ret = get_errno(faccessat(arg1, p, arg3, 0));
-unlock_user(p, arg2, 0);
-return ret;
+return do_faccessat2(arg1, arg2, arg3, 0);
+#endif
+#ifdef TARGET_NR_faccessat2
+case TARGET_NR_faccessat2:
+return do_faccessat2(arg1, arg2, arg3, arg4);
 #endif
 #ifdef TARGET_NR_nice /* not on alpha */
 case TARGET_NR_nice:
-- 
2.34.1

Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-29 Thread Sean Christopherson

On Mon, Jul 25, 2022, Chao Peng wrote:
> On Thu, Jul 21, 2022 at 05:58:50PM +, Sean Christopherson wrote:
> > On Thu, Jul 21, 2022, Chao Peng wrote:
> > > On Thu, Jul 21, 2022 at 03:34:59PM +0800, Wei Wang wrote:
> > > > 
> > > > 
> > > > On 7/21/22 00:21, Sean Christopherson wrote:
> > > > Maybe you could tag it with cgs for all the confidential guest support
> > > > related stuff: e.g. kvm_vm_ioctl_set_cgs_mem()
> > > > 
> > > > bool is_private = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> > > > ...
> > > > kvm_vm_ioctl_set_cgs_mem(, is_private)
> > > 
> > > If we plan to widely use such abbr. through KVM (e.g. it's well known),
> > > I'm fine.
> > 
> > I'd prefer to stay away from "confidential guest", and away from any 
> > VM-scoped
> > name for that matter.  User-unmappable memmory has use cases beyond hiding 
> > guest
> > state from the host, e.g. userspace could use inaccessible/unmappable 
> > memory to
> > harden itself against unintentional access to guest memory.
> > 
> > > I actually use mem_attr in patch: https://lkml.org/lkml/2022/7/20/610
> > > But I also don't quite like it, it's so generic and sounds say nothing.
> > > 
> > > But I do want a name can cover future usages other than just 
> > > private/shared (pKVM for example may have a third state).
> > 
> > I don't think there can be a third top-level state.  Memory is either 
> > private to
> > the guest or it's not.  There can be sub-states, e.g. memory could be 
> > selectively
> > shared or encrypted with a different key, in which case we'd need metadata 
> > to
> > track that state.
> > 
> > Though that begs the question of whether or not private_fd is the correct
> > terminology.  E.g. if guest memory is backed by a memfd that can't be 
> > mapped by
> > userspace (currently F_SEAL_INACCESSIBLE), but something else in the kernel 
> > plugs
> > that memory into a device or another VM, then arguably that memory is 
> > shared,
> > especially the multi-VM scenario.
> > 
> > For TDX and SNP "private vs. shared" is likely the correct terminology 
> > given the
> > current specs, but for generic KVM it's probably better to align with 
> > whatever
> > terminology is used for memfd.  "inaccessible_fd" and 
> > "user_inaccessible_fd" are
> > a bit odd since the fd itself is accesible.
> > 
> > What about "user_unmappable"?  E.g.
> > 
> >   F_SEAL_USER_UNMAPPABLE, MFD_USER_UNMAPPABLE, 
> > KVM_HAS_USER_UNMAPPABLE_MEMORY,
> >   MEMFILE_F_USER_INACCESSIBLE, user_unmappable_fd, etc...
> 
> For KVM I also think user_unmappable looks better than 'private', e.g.
> user_unmappable_fd/KVM_HAS_USER_UNMAPPABLE_MEMORY sounds more
> appropriate names. For memfd however, I don't feel that strong to change
> it from current 'inaccessible' to 'user_unmappable', one of the reason
> is it's not just about unmappable, but actually also inaccessible
> through direct ioctls like read()/write().

Heh, I _knew_ there had to be a catch.  I agree that INACCESSIBLE is better for
memfd.

Re: [PATCH v7 09/14] KVM: Extend the memslot to support fd-based private memory

2022-07-29 Thread Sean Christopherson

On Wed, Jul 06, 2022, Chao Peng wrote:
> @@ -1332,9 +1332,18 @@ yet and must be cleared on entry.
>   __u64 userspace_addr; /* start of the userspace allocated memory */
>};
>  
> +  struct kvm_userspace_memory_region_ext {
> + struct kvm_userspace_memory_region region;
> + __u64 private_offset;
> + __u32 private_fd;
> + __u32 pad1;
> + __u64 pad2[14];
> +};
> +
>/* for kvm_memory_region::flags */
>#define KVM_MEM_LOG_DIRTY_PAGES(1UL << 0)
>#define KVM_MEM_READONLY   (1UL << 1)
> +  #define KVM_MEM_PRIVATE(1UL << 2)

Very belatedly following up on prior feedback...

  | I think a flag is still needed, the problem is private_fd can be safely
  | accessed only when this flag is set, e.g. without this flag, we can't
  | copy_from_user these new fields since they don't exist for previous
  | kvm_userspace_memory_region callers.

I forgot about that aspect of things.  We don't technically need a dedicated
PRIVATE flag to handle that, but it does seem to be the least awful soltuion.
We could either add a generic KVM_MEM_EXTENDED_REGION or an entirely new
ioctl(), e.g. KVM_SET_USER_MEMORY_REGION2, but in both approaches there's a 
decent
chance that we'll end up needed individual "this field is valid" flags anways.

E.g. if KVM requires pad1 and pad2 to be zero to carve out future extensions,
then we're right back here if some future extension needs to treat '0' as a 
legal
input.

TL;DR: adding KVM_MEM_PRIVATE still seems like the best approach.

> @@ -4631,14 +4658,35 @@ static long kvm_vm_ioctl(struct file *filp,
>   break;
>   }
>   case KVM_SET_USER_MEMORY_REGION: {
> - struct kvm_userspace_memory_region kvm_userspace_mem;
> + struct kvm_user_mem_region mem;
> + unsigned long size;
> + u32 flags;
> +
> + kvm_sanity_check_user_mem_region_alias();
> +
> + memset(, 0, sizeof(mem));
>  
>   r = -EFAULT;
> - if (copy_from_user(_userspace_mem, argp,
> - sizeof(kvm_userspace_mem)))
> +
> + if (get_user(flags,
> + (u32 __user *)(argp + offsetof(typeof(mem), flags
> + goto out;

Indentation is funky.  It's hard to massage this into something short and
readable  What about capturing the offset separately?  E.g.

struct kvm_user_mem_region mem;
unsigned int flags_offset = offsetof(typeof(mem), flags));
unsigned long size;
u32 flags;

kvm_sanity_check_user_mem_region_alias();

memset(, 0, sizeof(mem));

r = -EFAULT;
if (get_user(flags, (u32 __user *)(argp + flags_offset)))
goto out;

But this can actually be punted until KVM_MEM_PRIVATE is fully supported.  As of
this patch, KVM doesn't read the extended size, so I believe the diff for this
patch can simply be:

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index da263c370d00..5194beb7b52f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4640,6 +4640,10 @@ static long kvm_vm_ioctl(struct file *filp,
sizeof(kvm_userspace_mem)))
goto out;

+   r = -EINVAL;
+   if (mem.flags & KVM_MEM_PRIVATE)
+   goto out;
+
r = kvm_vm_ioctl_set_memory_region(kvm, _userspace_mem);
break;
}

Re: [PATCH v4 11/17] dump/dump: Add section string table support

2022-07-29 Thread Janis Schoetterl-Glausch

On 7/26/22 11:22, Janosch Frank wrote:
> As sections don't have a type like the notes do we need another way to

Having a string table seems like a good idea to me, as we don't know
the requirements any architecture might have, but sections do have sh_type.
Could we use one of those, e.g. one of the processor specific ones?
Is
SHT_PROGBITS
The section holds information defined by the program,
whose format and meaning are determined solely by the program.
appropriate for us? It seems to me that our program is the guest and
it doesn't determine the meta information on how to decrypt the dump.

> determine their contents. The string table allows us to assign each
> section an identification string which architectures can then use to
> tag their sections with.
> 
> There will be no string table if the architecture doesn't add custom
> sections which are introduced in a following patch.
> 
> Signed-off-by: Janosch Frank 
> ---
>  dump/dump.c   | 81 ++-
>  include/sysemu/dump.h |  1 +
>  2 files changed, 81 insertions(+), 1 deletion(-)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index 3cf846d0a0..298a1e923f 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c

[...]

>  static void prepare_elf_section_hdrs(DumpState *s)
>  {
>  size_t len, sizeof_shdr;
> +Elf64_Ehdr *hdr64 = s->elf_header;
> +Elf32_Ehdr *hdr32 = s->elf_header;
> +void *buff_hdr;
>  
>  /*
>   * Section ordering:
>   * - HDR zero (if needed)
> + * - String table hdr
>   */
>  sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
>  len = sizeof_shdr * s->shdr_num;
>  s->elf_section_hdrs = g_malloc0(len);
> +buff_hdr = s->elf_section_hdrs;
>  
>  /* Write special section first */
>  if (s->phdr_num == PN_XNUM) {
>  prepare_elf_section_hdr_zero(s);
> +buff_hdr += sizeof_shdr;
> +}
> +
> +if (s->shdr_num < 2) {
> +return;
> +}
> +
> +/*
> + * String table needs to be last section since strings are added
> + * via arch_sections_write_hdr().
> + */
> +write_elf_section_hdr_string(s, buff_hdr);
> +if (dump_is_64bit(s)) {
> +hdr64->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
> +} else {
> +hdr32->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
>  }

Might want to assert e_shstrndx < SHN_LORESERVE (0xff00) or handle that case 
correctly.

>  }
>  
> @@ -401,11 +453,18 @@ static void write_elf_sections(DumpState *s, Error 
> **errp)
>  {
>  int ret;
>  
> -/* Write section zero */
> +/* Write section zero and arch sections */

Since that doesn't concern the string section it seems wrong to change this in 
this patch.

>  ret = fd_write_vmcore(s->elf_section_data, s->elf_section_data_size, s);
>  if (ret < 0) {
>  error_setg_errno(errp, -ret, "dump: failed to write section data");
>  }
> +
> +/* Write string table data */
> +ret = fd_write_vmcore(s->string_table_buf->data,
> +  s->string_table_buf->len, s);
> +if (ret < 0) {
> +error_setg_errno(errp, -ret, "dump: failed to write string table 
> data");
> +}
>  }
>  
>  static void write_data(DumpState *s, void *buf, int length, Error **errp)
> @@ -713,6 +772,7 @@ static void create_vmcore(DumpState *s, Error **errp)
>  return;
>  }
>  
> +/* Iterate over memory and dump it to file */

This should be going into the patch introducing dump_end.

>  dump_iterate(s, errp);
>  if (*errp) {
>  return;
> @@ -1695,6 +1755,13 @@ static void dump_init(DumpState *s, int fd, bool 
> has_format,
>  s->has_filter = has_filter;
>  s->begin = begin;
>  s->length = length;
> +/* First index is 0, it's the special null name */
> +s->string_table_buf = g_array_new(FALSE, TRUE, 1);
> +/*
> + * Allocate the null name, due to the clearing option set to true
> + * it will be 0.
> + */
> +g_array_set_size(s->string_table_buf, 1);
>  
>  memory_mapping_list_init(>list);
>  
> @@ -1855,6 +1922,18 @@ static void dump_init(DumpState *s, int fd, bool 
> has_format,
>  }
>  }
>  
> +/*
> + * calculate shdr_num and elf_section_data_size so we know the offsets 
> and
> + * sizes of all parts.
> + *
> + * If phdr_num overflowed we have at least one section header
> + * More sections/hdrs can be added by the architectures
> + */

This needs to be adjusted like we talked about, i.e. adding the special 0 
section unless
it already exists.

> +if (s->shdr_num > 1) {
> +/* Reserve the string table */
> +s->shdr_num += 1;
> +}
> +
>  tmp = (s->phdr_num == PN_XNUM) ? s->sh_info : s->phdr_num;
>  if (dump_is_64bit(s)) {
>  s->shdr_offset = sizeof(Elf64_Ehdr);
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index 696e6c67d6..299b611180 100644
> ---

Re: [PATCH v2] ci: Upgrade msys2 release to 20220603

2022-07-29 Thread Richard Henderson


On 7/28/22 13:04, Yonggang Luo wrote:

Signed-off-by: Yonggang Luo 
---
  .cirrus.yml  | 2 +-
  .gitlab-ci.d/windows.yml | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)


Thanks.  Applied to master as a hot-fix.


r~

Re: [PATCH v7 08/14] KVM: Rename mmu_notifier_*

2022-07-29 Thread Sean Christopherson

On Wed, Jul 06, 2022, Chao Peng wrote:
> The sync mechanism between mmu_notifier and page fault handler employs
> fields mmu_notifier_seq/count and mmu_notifier_range_start/end. For the
> to be added private memory, there is the same mechanism needed but not
> rely on mmu_notifier (It uses new introduced memfile_notifier). This
> patch renames the existing fields and related helper functions to a
> neutral name mmu_updating_* so private memory can reuse.

mmu_updating_* is too broad of a term, e.g. page faults and many other 
operations
also update the mmu.  Although the name most definitely came from the 
mmu_notifier,
it's not completely inaccurate for other sources, e.g. KVM's MMU is still being
notified of something, even if the source is not the actual mmu_notifier.

If we really want a different name, I'd vote for nomenclature that captures the
invalidation aspect, which is really what the variables are all trackng, e.g.

  mmu_invalidate_seq
  mmu_invalidate_in_progress
  mmu_invalidate_range_start
  mmu_invalidate_range_end

Re: [PATCH v4 10/17] dump: Swap segment and section header locations

2022-07-29 Thread Janis Schoetterl-Glausch

You swapped the headers in patch 8, you just fixing up the elf header in this 
patch, right?
Also I don't understand the reason for swapping the headers.
And the comment diagram in dump_begin still reflects the old ordering.

On 7/26/22 11:22, Janosch Frank wrote:
> For the upcoming string table and arch section support we need to
> modify the elf layout a bit. Instead of the segments, i.e. the guest's
> memory contents, beeing the last area the section data will live at
> the end of the file. This will allow us to write the section data
> after all guest memory has been dumped which is important for the s390
> PV dump support.
> 
> Signed-off-by: Janosch Frank 
> ---
>  dump/dump.c   | 21 -
>  include/sysemu/dump.h |  1 +
>  2 files changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index a6bb7bfa21..3cf846d0a0 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -588,6 +588,9 @@ static void dump_begin(DumpState *s, Error **errp)
>   *   --
>   *   |  memory |
>   *   --
> + *   |  sectn data |
> + *   --
> +
>   *
>   * we only know where the memory is saved after we write elf note into
>   * vmcore.
> @@ -1852,18 +1855,18 @@ static void dump_init(DumpState *s, int fd, bool 
> has_format,
>  }
>  }
>  
> +tmp = (s->phdr_num == PN_XNUM) ? s->sh_info : s->phdr_num;

You don't need this, do you? s->phdr_num is the correct value, it's the value
in the elf header that gets adjusted.

>  if (dump_is_64bit(s)) {
> -s->phdr_offset = sizeof(Elf64_Ehdr);
> -s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
> -s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
> -s->memory_offset = s->note_offset + s->note_size;
> +s->shdr_offset = sizeof(Elf64_Ehdr);
> +s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
> +s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * tmp;
>  } else {
> -
> -s->phdr_offset = sizeof(Elf32_Ehdr);
> -s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
> -s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
> -s->memory_offset = s->note_offset + s->note_size;
> +s->shdr_offset = sizeof(Elf32_Ehdr);
> +s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
> +s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * tmp;
>  }
> +s->memory_offset = s->note_offset + s->note_size;
> +s->section_offset = s->memory_offset + s->total_size;
>  
>  return;
>  
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index 3937afe0f9..696e6c67d6 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -172,6 +172,7 @@ typedef struct DumpState {
>  hwaddr shdr_offset;
>  hwaddr phdr_offset;
>  hwaddr note_offset;
> +hwaddr section_offset;
>  
>  void *elf_header;
>  void *elf_section_hdrs;

Re: [PATCH v4 05/17] dump: Cleanup and annotate guest memory related DumpState struct members

2022-07-29 Thread Janis Schoetterl-Glausch

On 7/26/22 11:22, Janosch Frank wrote:
> We can safely remove next_block and start as both of them aren't used
> anymore due to the block iteration re-work.
> 
> Also we add comments to the remaining guest memory related struct
> members and a comment on top to group them.
> 
> Signed-off-by: Janosch Frank 

Reviewed-by: Janis Schoetterl-Glausch 

> ---
>  include/sysemu/dump.h | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index 6ce3c24197..24ebb02660 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -166,11 +166,10 @@ typedef struct DumpState {
>  hwaddr memory_offset;
>  int fd;
>  
> -GuestPhysBlock *next_block;
> -ram_addr_t start;
> -bool has_filter;
> -int64_t begin;
> -int64_t length;
> +/* Guest memory related data */
> +bool has_filter;   /* Are we dumping parts of the memory? */

Maybe  /* Are we dumping part of the memory only? */

> +int64_t begin; /* Start address of the chunk we want to dump 
> */
> +int64_t length;/* Length of the dump we want to dump */
>  
>  uint8_t *note_buf;  /* buffer for notes */
>  size_t note_buf_offset; /* the writing place in note_buf */

Re: [PATCH v4 07/17] dump: Allocate header

2022-07-29 Thread Janis Schoetterl-Glausch

On 7/26/22 11:22, Janosch Frank wrote:
> Allocating the header lets us write it at a later time and hence also
> allows us to change section and segment table offsets until we
> finally write it.
> 
Where are you making use of this?
You set e_shstrndx in prepare_elf_section_hdrs, but that is not required, is it?
As far as I understand we know shdr_num after dump_init so that could be moved.

Re: [PATCH 1/1] block: add missed block_acct_setup with new block device init procedure

2022-07-29 Thread Kevin Wolf

Am 29.07.2022 um 14:36 hat Denis V. Lunev geschrieben:
> On 29.07.2022 11:13, Kevin Wolf wrote:
> > Am 28.07.2022 um 21:27 hat Denis V. Lunev geschrieben:
> > > On 28.07.2022 16:42, Vladimir Sementsov-Ogievskiy wrote:
> > > > On 7/11/22 14:07, Denis V. Lunev wrote:
> > > > > Commit 5f76a7aac156ca75680dad5df4a385fd0b58f6b1 is looking harmless 
> > > > > from
> > > > > the first glance, but it has changed things a lot. 'libvirt' uses it 
> > > > > to
> > > > > detect that it should follow new initialization way and this changes
> > > > > things considerably. With this procedure followed, blockdev_init() is
> > > > > not called anymore and thus block_acct_setup() helper is not called.
> > > > I'm not sure that 5f76a7aac156ca is really the corner stone.. But yes,
> > > > libvirt moved to "blockdev era", which means that we don't use old
> > > > -drive,
> > > > instead block nodes are created by -blockdev / qmp: blockdev-add, and
> > > > attached
> > > > to block devices by node-name.
> > > > 
> > > git bisected, thus I am sure here
> > > 
> > > 
> > > > And if I understand correctly blockdev_init() is called only on -drive
> > > > path.
> > > > 
> > > > I have some questions:
> > > > 
> > > > 1. After this patch, don't we call block_acct_setup() twice on old path
> > > > with -drive? That seems safe as block_acct_setup just assign fields of
> > > > BlockAcctStats.. But that's doesn't look good.
> > > > 
> > > hmmm
> > I don't think it's actually correct because then a value that was
> > explicitly set with -drive will by overridden by the default provided by
> > the device.
> > 
> > A possible solution would be to switch the defaults in the BlockBackend
> > initialisation back to true, and then have a ON_OFF_AUTO property in the
> > devices to allow overriding the default from -drive. With -blockdev, the
> > BlockBackend default will be hard coded to true and the options of the
> > devices will be the only way to change it.
> > 
> > > > 2. Do we really need these options? Could we instead just enable
> > > > accounting invalid and failed ops unconditionally? I doubt that someone
> > > > will learn that these new options appeared and will use them to disable
> > > > the failed/invalid accounting again.
> > > > 
> > > I can move assignment of these fields to true int
> > > block_acct_init() and forget about "configurable"
> > > items in new path. I do not think that somebody
> > > ever has these options set.
> > Well, whether anyone uses the option is a different question. I don't
> > know. But it has existed for many years.
> I have said about very small patch like the following
> 
> iris ~/src/qemu $ git diff
> diff --git a/block/accounting.c b/block/accounting.c
> index 2030851d79..c20d6ba9a0 100644
> --- a/block/accounting.c
> +++ b/block/accounting.c
> @@ -38,6 +38,8 @@ void block_acct_init(BlockAcctStats *stats)
>  if (qtest_enabled()) {
>  clock_type = QEMU_CLOCK_VIRTUAL;
>  }
> +    stats->account_invalid = true;
> +    stats->account_failed = true;
>  }

Yes, this looks good to me and we'll need it either way, even if we add
the ON_OFF_AUTO property to devices (because we need to set the right
default for 'auto').

>  void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
> iris ~/src/qemu $
> 
> but your proposal with ON_OFF_AUTO will work for me too.
> 
> The real question - do we really need to publish this option
> for the external to configure it?

As I said above, I don't know if anyone uses the option.

It would be needed for full feature parity of -blockdev with -drive,
but if the option isn't used by anyone, maybe full feature parity isn't
something we even want.

> > > The real question in this patch is that this initialization
> > > was a precondition for old good "long IO" report
> > > configuration, which should be "enableable".
> > > 
> > > But  we could move this option to "tracked request"
> > > layer only and this will solve my puzzle. So, I'll move
> > > "long IO report" to tracked request level only and will
> > > create an option for it on bdrv_ level and will avoid
> > > it on blk_ accounting.
> > > 
> > > What do you think?
> > I'm not sure what you mean by "long IO report". Don't these switches
> > just change which kind of operations are counted into statistics rather
> > than changing the structure of the report?
> > 
> > Conceptually, I would like accounting on the block node level, but it's
> > not what we have been doing, so it would be a big change.
> > 
> I have to say sorry again. I have found this place once I have
> reverted to my very old series discussed here + some late
> additions on top of it done by Vladimir.
> https://lists.defectivebydesign.org/archive/html/qemu-devel/2020-07/msg03772.html

Oh, we never merged this upstream it seems?

> I will definitely have to come back to this later.
> 
> Den

Kevin

Re: [PATCH] hw/intc: Handle software disabling of APIC correctly

2022-07-29 Thread Jay Khandkar

On Fri, Jul 29, 2022 at 06:09:01PM +0100, Peter Maydell wrote:
> On Tue, 12 Jul 2022 at 19:38, Jay Khandkar  wrote:
> >
> > When the local APIC is in a software disabled state, all local interrupt
> > sources must be masked and all attempts to unmask them should be
> > ignored. Currently, we don't do either. Fix this by handling it
> > correctly in apic_mem_write().
> >
> > Signed-off-by: Jay Khandkar 
> > ---
> >  hw/intc/apic.c | 16 +---
> >  1 file changed, 13 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/intc/apic.c b/hw/intc/apic.c
> > index 3df11c34d6..493c70af62 100644
> > --- a/hw/intc/apic.c
> > +++ b/hw/intc/apic.c
> > @@ -792,9 +792,16 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
> > uint64_t val,
> >  s->dest_mode = val >> 28;
> >  break;
> >  case 0x0f:
> > -s->spurious_vec = val & 0x1ff;
> > -apic_update_irq(s);
> > -break;
> > +{
> > +s->spurious_vec = val & 0x1ff;
> > +if (!(val & APIC_SPURIO_ENABLED)) {
> > +for (int i = 0; i < APIC_LVT_NB; i++) {
> > +s->lvt[i] |= APIC_LVT_MASKED;
> > +}
> > +}
> > +apic_update_irq(s);
> > +break;
> > +}
> 
> What are the braces for here ? There's no local variable declaration...
> 
> thanks
> -- PMM
You are right, the braces are unnecessary for that part. I just put them in to
create a neat visually separate block. Can get rid of them.

Thanks,
Jay

Re: [RFC 0/3] Add Generic SPI GPIO model

2022-07-29 Thread Peter Delevoryas

On Fri, Jul 29, 2022 at 03:25:55PM +0200, Cédric Le Goater wrote:
> Hello Iris,
> 
> On 7/29/22 01:23, Iris Chen wrote:
> > Hey everyone,
> > 
> > I have been working on a project to add support for SPI-based TPMs in QEMU.
> > Currently, most of our vboot platforms using a SPI-based TPM use the Linux
> > SPI-GPIO driver to "bit-bang" the SPI protocol. This is because the Aspeed
> > SPI controller (modeled in QEMU under hw/ssi/aspeed_smc.c) has an 
> > implementation
> > deficiency that prevents bi-directional operations.
> aspeed_smc models the Aspeed FMC/SPI controllers which have a well defined
> HW interface. Your model proposal adds support for a new SPI controller
> using bitbang GPIOs. These are really two differents models. I don't see
> how you could reuse aspeed_smc for this purpose.
> 
> or you mean that Linux is using the SPI-GPIO driver because the Linux
> Aspeed SMC driver doesn't match the need ? It is true that the Linux
> driver is not generic, it deals with flash devices only. But that's
> another problem.
> 
> > Thus, in order to connect
> > a TPM to this bus, my patch implements a QEMU SPI-GPIO driver (as the QEMU
> > counterpart of the Linux SPI-GPIO driver).
> > 
> > As we use SPI-based TPMs on many of our BMCs for the secure-boot 
> > implementation,
> > I have already tested this implementation locally with our Yosemite-v3.5 
> > platform
> > and Facebook-OpenBMC. This model was tested by connecting a generic SPI-NOR 
> > (m25p80
> > for example) to the Yosemite-v3.5 SPI bus containing the TPM.
> > 
> > This patch is an RFC because I have several questions about design. 
> > Although the
> > model is working, I understand there are many areas where the design 
> > decision
> > is not deal (ie. abstracting hard coded GPIO values). Below are some 
> > details of the
> > patch and specific areas where I would appreciate feedback on how to make 
> > this better:
> > hw/arm/aspeed.c:
> > I am not sure where the best place is to instantiate the spi_gpio besides 
> > the
> > aspeed_machine_init.
> 
> The SPI GPIO device would be a platform device and not a SoC device.
> Hence, it should be instantiated at the machine level, like the I2C
> device are, using properties to let the model know about the GPIOs
> that should be driven to implement the SPI protocol.

Agreed, should be like an I2C device.

> 
> Ideally, the state of the GPIO controller pins and the SPI GPIO state
> should be shared. I think that's what you are trying to do that with
> attribute 'controller_state' in your patch ? But, the way it is done
> today, is too tightly coupled (names) with the Aspeed GPIO model to
> be generic.
> 
> I think we need an intermediate QOM interface, or a base class, to
> implement an abstract SPI GPIO model and an Aspeed SPI GPIO model
> on top which would be linked to the Aspeed GPIO model of the SoC
> in use.

Disagree, I feel like we can accomplish this without inheritance.

> 
> Or we could introduce some kind of generic GPIO controller that
> we would link the SPI GPIO model with (using a property). The
> Aspeed GPIO model would be of that kind and the SPI GPIO model
> would be able to drive the pins using a common interface.
> That's another way to do it.

Agree, I would like to go in this direction if at all possible.

> 
> > Could we add the ability to instantiate it on the command line?
> 
> It might be complex to identify the QOM object to use as the GPIO
> controller from the command line since it is on the SoC and not
> a platform device. In that case, an Aspeed SPI GPIO model would
> be a better approach. we  would have to peek in the machine/soc to
> get a link on the Aspeed GPIO model in the realize routine of the
> Aspeed SPI GPIO model.

Hrrrm perhaps, I feel like it shouldn't be that hard though.

- Get a pointer to the QOM object that holds the GPIO's using object
  path or ID. Something similar to '-device ftgmac100,netdev='
  right?
- Get the GPIO's by name from the QOM object.

In this situation, I think we should be able to get the GPIO controller
object, and then get the IRQ's by the "sysbus-irq[126]"/etc name.

We could refactor the aspeed_gpio.c model to name the IRQ's like the
properties,.  to use "gpioX4" instead of "sysbus-irq[*]".

> 
> > m25p80_transfer8_ex in hw/block/m25p80.c:
> > Existing SPI model assumed that the output byte is fully formed, can be 
> > passed to
> > the SPI device, and the input byte can be returned, all in one operation. 
> > With
> > SPI-GPIO we don't have the input byte until all 8 bits of the output have 
> > been
> > shifted out, so we have to prime the MISO with bogus values (0xFF).
> 
> That's fine I think. We do something similar for dummies.
> 
> > MOSI pin in spi_gpio: the mosi pin is not included and we poll the realtime 
> > value
> > of the gpio for input bits to prevent bugs with caching the mosi value. It 
> > was discovered
> > during testing that when using the mosi pin as the input pin, the mosi 
> > value was not
> > being

Re: [PULL 0/6] Fixes for QEMU 7.1-rc1

2022-07-29 Thread Richard Henderson


On 7/29/22 08:04, Paolo Bonzini wrote:

The following changes since commit 7b17a1a841fc2336eba53afade9cadb14bd3dd9a:

   Update version for v7.1.0-rc0 release (2022-07-26 18:03:16 -0700)

are available in the Git repository at:

   https://gitlab.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to ebc55f523c2f406e30ec8fad77bd3b9aad5d4579:

   configure: pass correct cflags to container-based cross compilers 
(2022-07-29 00:22:19 +0200)


* Misc build system bugfixes
* Fix CGA 2-color graphics


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/7.1 as 
appropriate.


r~





Claudio Fontana (1):
   stubs: update replay-tools to match replay.h types

Cornelia Huck (1):
   kvm: don't use perror() without useful errno

Paolo Bonzini (3):
   ui: dbus-display requires CONFIG_GBM
   vga: fix incorrect line height in 640x200x2 mode
   configure: pass correct cflags to container-based cross compilers

Richard Henderson (1):
   configure: Fix ppc container_cross_cc substitution

  accel/kvm/kvm-all.c  | 2 +-
  configure| 3 +--
  hw/display/vga.c | 3 ++-
  meson.build  | 4 ++--
  stubs/replay-tools.c | 9 +
  target/arm/kvm.c | 2 +-
  ui/meson.build   | 2 +-
  7 files changed, 13 insertions(+), 12 deletions(-)

[PATCH 1/2] common-user/host/ppc: Implement safe-syscall.inc.S

2022-07-29 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 common-user/host/ppc/safe-syscall.inc.S | 107 
 1 file changed, 107 insertions(+)
 create mode 100644 common-user/host/ppc/safe-syscall.inc.S

diff --git a/common-user/host/ppc/safe-syscall.inc.S 
b/common-user/host/ppc/safe-syscall.inc.S
new file mode 100644
index 00..0851f6c0b8
--- /dev/null
+++ b/common-user/host/ppc/safe-syscall.inc.S
@@ -0,0 +1,107 @@
+/*
+ * safe-syscall.inc.S : host-specific assembly fragment
+ * to handle signals occurring at the same time as system calls.
+ * This is intended to be included by common-user/safe-syscall.S
+ *
+ * Copyright (C) 2022 Linaro, Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * Standardize on the _CALL_FOO symbols used by GCC:
+ * Apple XCode does not define _CALL_DARWIN.
+ * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
+ */
+#if !defined(_CALL_SYSV) && \
+!defined(_CALL_DARWIN) && \
+!defined(_CALL_AIX) && \
+!defined(_CALL_ELF)
+# if defined(__APPLE__)
+#  define _CALL_DARWIN
+# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
+#  define _CALL_SYSV
+# else
+#  error "Unknown ABI"
+# endif
+#endif 
+
+#ifndef _CALL_SYSV
+# error "Unsupported ABI"
+#endif
+
+
+.global safe_syscall_base
+.global safe_syscall_start
+.global safe_syscall_end
+.type   safe_syscall_base, @function
+
+.text
+
+/*
+ * This is the entry point for making a system call. The calling
+ * convention here is that of a C varargs function with the
+ * first argument an 'int *' to the signal_pending flag, the
+ * second one the system call number (as a 'long'), and all further
+ * arguments being syscall arguments (also 'long').
+ */
+safe_syscall_base:
+.cfi_startproc
+stwu1, -8(1)
+.cfi_def_cfa_offset 8
+stw 30, 4(1)
+.cfi_offset 30, -4
+
+/*
+ * We enter with r3 == _pending
+ *   r4 == syscall number
+ *   r5 ... r10 == syscall arguments
+ *   and return the result in r3
+ * and the syscall instruction needs
+ *   r0 == syscall number
+ *   r3 ... r8 == syscall arguments
+ *   and returns the result in r3
+ * Shuffle everything around appropriately.
+ */
+mr  30, 3   /* signal_pending */
+mr  0, 4/* syscall number */
+mr  3, 5/* syscall arguments */
+mr  4, 6
+mr  5, 7
+mr  6, 8
+mr  7, 9
+mr  8, 10
+
+/*
+ * This next sequence of code works in conjunction with the
+ * rewind_if_safe_syscall_function(). If a signal is taken
+ * and the interrupted PC is anywhere between 'safe_syscall_start'
+ * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
+ * The code sequence must therefore be able to cope with this, and
+ * the syscall instruction must be the final one in the sequence.
+ */
+safe_syscall_start:
+/* if signal_pending is non-zero, don't do the call */
+lwz 12, 0(30)
+cmpwi   0, 12, 0
+bne-2f
+sc
+safe_syscall_end:
+/* code path when we did execute the syscall */
+lwz 30, 4(1)/* restore r30 */
+addi1, 1, 8 /* restore stack */
+.cfi_restore 30
+.cfi_def_cfa_offset 0
+bnslr+  /* return on success */
+b   safe_syscall_set_errno_tail
+
+/* code path when we didn't execute the syscall */
+2:  lwz 30, 4(1)
+addi1, 1, 8
+addi3, 0, QEMU_ERESTARTSYS
+b   safe_syscall_set_errno_tail
+
+.cfi_endproc
+
+.size   safe_syscall_base, .-safe_syscall_base
-- 
2.34.1

[PATCH 2/2] linux-user: Implment host/ppc/host-signal.h

2022-07-29 Thread Richard Henderson

This commit re-enables ppc32 as a linux-user host,
as existance of the directory is noted by configure.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1097
Signed-off-by: Richard Henderson 
---
 linux-user/include/host/ppc/host-signal.h | 39 +++
 1 file changed, 39 insertions(+)
 create mode 100644 linux-user/include/host/ppc/host-signal.h

diff --git a/linux-user/include/host/ppc/host-signal.h 
b/linux-user/include/host/ppc/host-signal.h
new file mode 100644
index 00..de25c803f5
--- /dev/null
+++ b/linux-user/include/host/ppc/host-signal.h
@@ -0,0 +1,39 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2022 Linaro Ltd.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_HOST_SIGNAL_H
+#define PPC_HOST_SIGNAL_H
+
+#include 
+
+/* The third argument to a SA_SIGINFO handler is ucontext_t. */
+typedef ucontext_t host_sigcontext;
+
+static inline uintptr_t host_signal_pc(host_sigcontext *uc)
+{
+return uc->uc_mcontext.regs->nip;
+}
+
+static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc)
+{
+uc->uc_mcontext.regs->nip = pc;
+}
+
+static inline void *host_signal_mask(host_sigcontext *uc)
+{
+return >uc_sigmask;
+}
+
+static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc)
+{
+return uc->uc_mcontext.regs->trap != 0x400
+&& (uc->uc_mcontext.regs->dsisr & 0x0200);
+}
+
+#endif
-- 
2.34.1

[PATCH for-7.1? 0/2] Re-enable ppc32 as a linux-user host

2022-07-29 Thread Richard Henderson

This is, technically, a regression from 6.2, so it's not
implausible to apply before rc1.  Thoughts?


r~


Richard Henderson (2):
  common-user/host/ppc: Implement safe-syscall.inc.S
  linux-user: Implment host/ppc/host-signal.h

 linux-user/include/host/ppc/host-signal.h |  39 
 common-user/host/ppc/safe-syscall.inc.S   | 107 ++
 2 files changed, 146 insertions(+)
 create mode 100644 linux-user/include/host/ppc/host-signal.h
 create mode 100644 common-user/host/ppc/safe-syscall.inc.S

-- 
2.34.1

Re: [PATCH v4 09/17] dump: Reorder struct DumpState

2022-07-29 Thread Janis Schoetterl-Glausch

On 7/26/22 11:22, Janosch Frank wrote:
> Let's move ELF related members into one block and guest memory related
> ones into another to improve readability.
> 
> Signed-off-by: Janosch Frank 
> Reviewed-by: Richard Henderson 
> Reviewed-by: Marc-André Lureau 
> ---
>  include/sysemu/dump.h | 16 +---
>  1 file changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index 686555f908..3937afe0f9 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -154,15 +154,8 @@ typedef struct DumpState {
>  GuestPhysBlockList guest_phys_blocks;
>  ArchDumpInfo dump_info;
>  MemoryMappingList list;
> -uint32_t phdr_num;
> -uint32_t shdr_num;
>  bool resume;
>  bool detached;
> -ssize_t note_size;
> -hwaddr shdr_offset;
> -hwaddr phdr_offset;
> -hwaddr section_offset;
> -hwaddr note_offset;
>  hwaddr memory_offset;
>  int fd;
>  
> @@ -171,6 +164,15 @@ typedef struct DumpState {
>  int64_t begin; /* Start address of the chunk we want to dump 
> */
>  int64_t length;/* Length of the dump we want to dump */
>  
> +/* Elf dump related data */
> +uint32_t phdr_num;
> +uint32_t shdr_num;
> +uint32_t sh_info;

Why is this ^ here?

> +ssize_t note_size;
> +hwaddr shdr_offset;
> +hwaddr phdr_offset;
> +hwaddr note_offset;
> +
>  void *elf_header;
>  void *elf_section_hdrs;
>  uint64_t elf_section_data_size;

Re: [PATCH v4 08/17] dump: Split write of section headers and data and add a prepare step

2022-07-29 Thread Janis Schoetterl-Glausch

On 7/26/22 11:22, Janosch Frank wrote:
> By splitting the writing of the section headers and (future) section
> data we prepare for the addition of a string table section and
> architecture sections.
> 
> At the same time we move the writing of the section to the end of the
> dump process. This allows the upcoming architecture section code to
> add data after all of the common dump data has been written.
> 
> Signed-off-by: Janosch Frank 
> ---
>  dump/dump.c   | 112 --
>  include/sysemu/dump.h |   4 ++
>  2 files changed, 90 insertions(+), 26 deletions(-)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index 8a7ce95610..a6bb7bfa21 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -342,30 +342,69 @@ static void write_elf_phdr_note(DumpState *s, Error 
> **errp)
>  }
>  }
>  

[...]

> +
> +static void prepare_elf_section_hdrs(DumpState *s)
> +{
> +size_t len, sizeof_shdr;
> +
> +/*
> + * Section ordering:
> + * - HDR zero (if needed)
> + */
> +sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
> +len = sizeof_shdr * s->shdr_num;
> +s->elf_section_hdrs = g_malloc0(len);
> +
> +/* Write special section first */
> +if (s->phdr_num == PN_XNUM) {

Should be >= right?

> +prepare_elf_section_hdr_zero(s);
> +}
> +}
> +
[...]

Re: [PATCH] hw/intc: Handle software disabling of APIC correctly

2022-07-29 Thread Peter Maydell

On Tue, 12 Jul 2022 at 19:38, Jay Khandkar  wrote:
>
> When the local APIC is in a software disabled state, all local interrupt
> sources must be masked and all attempts to unmask them should be
> ignored. Currently, we don't do either. Fix this by handling it
> correctly in apic_mem_write().
>
> Signed-off-by: Jay Khandkar 
> ---
>  hw/intc/apic.c | 16 +---
>  1 file changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/hw/intc/apic.c b/hw/intc/apic.c
> index 3df11c34d6..493c70af62 100644
> --- a/hw/intc/apic.c
> +++ b/hw/intc/apic.c
> @@ -792,9 +792,16 @@ static void apic_mem_write(void *opaque, hwaddr addr, 
> uint64_t val,
>  s->dest_mode = val >> 28;
>  break;
>  case 0x0f:
> -s->spurious_vec = val & 0x1ff;
> -apic_update_irq(s);
> -break;
> +{
> +s->spurious_vec = val & 0x1ff;
> +if (!(val & APIC_SPURIO_ENABLED)) {
> +for (int i = 0; i < APIC_LVT_NB; i++) {
> +s->lvt[i] |= APIC_LVT_MASKED;
> +}
> +}
> +apic_update_irq(s);
> +break;
> +}

What are the braces for here ? There's no local variable declaration...

thanks
-- PMM

Re: [PATCH 2/3] Hexagon (tests/tcg/hexagon) Fix alignment in load_unpack.c

2022-07-29 Thread Richard Henderson


On 7/18/22 16:03, Taylor Simpson wrote:

The increment used in :brev tests was causing unaligned addresses
Change the increment and the relevant expected values

Signed-off-by: Taylor Simpson 
---
  tests/tcg/hexagon/load_unpack.c | 14 +++---
  1 file changed, 7 insertions(+), 7 deletions(-)


Acked-by: Richard Henderson 

r~



diff --git a/tests/tcg/hexagon/load_unpack.c b/tests/tcg/hexagon/load_unpack.c
index 3575a37a28..4aa26fc388 100644
--- a/tests/tcg/hexagon/load_unpack.c
+++ b/tests/tcg/hexagon/load_unpack.c
@@ -245,7 +245,7 @@ TEST_pr(loadbsw4_pr, long long, S, 4, 0xff00ff00LL,
   */
  #define BxW_LOAD_pbr(SZ, RES, PTR) \
  __asm__( \
-"r4 = #(1 << (16 - 3))\n\t" \
+"r4 = #(1 << (16 - 4))\n\t" \
  "m0 = r4\n\t" \
  "%0 = mem" #SZ "(%1++m0:brev)\n\t" \
  : "=r"(RES), "+r"(PTR) \
@@ -273,15 +273,15 @@ void test_##NAME(void) \
  }
  
  TEST_pbr(loadbzw2_pbr, int, Z, 0x,

-0x00020081, 0x00060085, 0x00040083, 0x00080087)
+0x00020081, 0x000a0089, 0x00060085, 0x000e008d)
  TEST_pbr(loadbsw2_pbr, int, S, 0xff00,
-0x00020081, 0x00060085, 0x00040083, 0x00080087)
+0x00020081, 0x000aff89, 0x0006ff85, 0x000eff8d)
  TEST_pbr(loadbzw4_pbr, long long, Z, 0xLL,
-0x0004008300020081LL, 0x0008008700060085LL,
-0x0006008500040083LL, 0x000a008900080087LL)
+0x0004008300020081LL, 0x000c008b000a0089LL,
+0x0008008700060085LL, 0x0010008f000e008dLL)
  TEST_pbr(loadbsw4_pbr, long long, S, 0xff00ff00LL,
-0x0004008300020081LL, 0x0008008700060085LL,
-0x0006008500040083LL, 0x000a008900080087LL)
+0x0004008300020081LL, 0x000cff8b000aff89LL,
+0x0008ff870006ff85LL, 0x0010ff8f000eff8dLL)
  
  /*

Re: [PATCH 3/3] Hexagon (tests/tcg/hexagon) reference file for float_convd

2022-07-29 Thread Richard Henderson


On 7/18/22 16:03, Taylor Simpson wrote:

The test is in tests/tcg/multiarch/float_convd.c

Signed-off-by: Taylor Simpson
---
  tests/tcg/hexagon/float_convd.ref | 988 ++
  1 file changed, 988 insertions(+)
  create mode 100644 tests/tcg/hexagon/float_convd.ref


Acked-by: Richard Henderson 

r~

Re: [PATCH 1/3] Hexagon (target/hexagon) make VyV operands use a unique temp

2022-07-29 Thread Richard Henderson


On 7/18/22 16:03, Taylor Simpson wrote:

VyV operand is only used in the vshuff and vdeal instructions.  These
instructions write to both VyV and VxV operands.  In the case where
both operands are the same register, we need a separate location for
VyV.  We use the existing vtmp field in CPUHexagonState.

Test case added in tests/tcg/hexagon/hvx_misc.c

Signed-off-by: Taylor Simpson 


Reviewed-by: Richard Henderson 

r~

Re: [PATCH] hw/intc: Handle software disabling of APIC correctly

2022-07-29 Thread Jay Khandkar

Ping?

On Tue, 12 Jul 2022, 19:49 Jay Khandkar,  wrote:

> When the local APIC is in a software disabled state, all local interrupt
> sources must be masked and all attempts to unmask them should be
> ignored. Currently, we don't do either. Fix this by handling it
> correctly in apic_mem_write().
>
> Signed-off-by: Jay Khandkar 
> ---
>  hw/intc/apic.c | 16 +---
>  1 file changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/hw/intc/apic.c b/hw/intc/apic.c
> index 3df11c34d6..493c70af62 100644
> --- a/hw/intc/apic.c
> +++ b/hw/intc/apic.c
> @@ -792,9 +792,16 @@ static void apic_mem_write(void *opaque, hwaddr addr,
> uint64_t val,
>  s->dest_mode = val >> 28;
>  break;
>  case 0x0f:
> -s->spurious_vec = val & 0x1ff;
> -apic_update_irq(s);
> -break;
> +{
> +s->spurious_vec = val & 0x1ff;
> +if (!(val & APIC_SPURIO_ENABLED)) {
> +for (int i = 0; i < APIC_LVT_NB; i++) {
> +s->lvt[i] |= APIC_LVT_MASKED;
> +}
> +}
> +apic_update_irq(s);
> +break;
> +}
>  case 0x10 ... 0x17:
>  case 0x18 ... 0x1f:
>  case 0x20 ... 0x27:
> @@ -812,6 +819,9 @@ static void apic_mem_write(void *opaque, hwaddr addr,
> uint64_t val,
>  case 0x32 ... 0x37:
>  {
>  int n = index - 0x32;
> +if (!(s->spurious_vec & APIC_SPURIO_ENABLED)) {
> +val |= APIC_LVT_MASKED;
> +}
>  s->lvt[n] = val;
>  if (n == APIC_LVT_TIMER) {
>  apic_timer_update(s,
> qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
> --
> 2.37.0
>
>

Re: [PATCH RESEND] tests/tcg/linux-test: Fix random hangs in test_socket

2022-07-29 Thread Thomas Huth


On 25/07/2022 16.42, Ilya Leoshkevich wrote:

test_socket hangs randomly in connect(), especially when run without
qemu. Apparently the reason is that linux started treating backlog
value of 0 literally instead of rounding it up since v4.4 (commit
ef547f2ac16b).

So set it to 1 instead.

Signed-off-by: Ilya Leoshkevich 
---

This is a rebase of the previous submission:
https://lists.gnu.org/archive/html/qemu-devel/2021-06/msg00095.html

  tests/tcg/multiarch/linux/linux-test.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tcg/multiarch/linux/linux-test.c 
b/tests/tcg/multiarch/linux/linux-test.c
index 019d8175ca..5a2a4f2258 100644
--- a/tests/tcg/multiarch/linux/linux-test.c
+++ b/tests/tcg/multiarch/linux/linux-test.c
@@ -263,7 +263,7 @@ static int server_socket(void)
  sockaddr.sin_port = htons(0); /* choose random ephemeral port) */
  sockaddr.sin_addr.s_addr = 0;
  chk_error(bind(fd, (struct sockaddr *), sizeof(sockaddr)));
-chk_error(listen(fd, 0));
+chk_error(listen(fd, 1));
  return fd;
  
  }


Not really my turf, but if there are no other takers and no complatins, I 
can take it through my testing branch:


https://gitlab.com/thuth/qemu/-/commits/testing-next

 Thomas

Re: [PATCH 12/15] ipmi: Add an SMBus IPMI interface

2022-07-29 Thread Peter Maydell

On Fri, 29 Jul 2022 at 16:56, Corey Minyard  wrote:
>
> On Tue, Jun 28, 2022 at 05:21:44PM +0100, Peter Maydell wrote:
> > On Thu, 19 Sept 2019 at 22:39,  wrote:
> > >
> > > From: Corey Minyard 
> > >
> > > Signed-off-by: Corey Minyard 
> > > ---
> >
>
> Thank you for the ping.  Comments inline...

> > ...calling memcpy() with argument 1 being a pointer that points
> > one past the end of the array. Even though len will be 0 and
> > we won't memcpy() anything, this is (depending on how you choose
> > to intepret things the C standard doesn't come right out and state
> > explicitly) undefined behaviour, because memcpy() wants to be passed
> > valid pointers, even if you ask it to do no work with a zero len.
> >
> > This isn't going to be a visible bug in practical terms, but it would
> > make Coverity happy if we either (a) rejected a request with an empty
> > length or else (b) skipped the memcpy(). I don't know enough about
> > IPMI to know which is better.
>
> Hmm.  In some cases you have to accept a zero-length packet (as
> described in the comments), but if you said:
>
>   if (len > 0)
>   memcpy(sid->inmsg + sid->inlen, buf, len);
>
> would that make Coverity happy?  I was under the impression that if you
> passed zero into len, you could pass anything into the data on a memcpy.
> But apparently not; I can make this change.

Yes, putting an if() around the memcpy() will be enough to avoid
the undefined behaviour. (NB that you want braces {} on it ;-))

thanks
-- PMM

[PATCH] hyperv: fix SynIC SINT assertion failure on guest reset

2022-07-29 Thread Maciej S. Szmigiero

From: "Maciej S. Szmigiero" 

Resetting a guest that has Hyper-V VMBus support enabled triggers a QEMU
assertion failure:
hw/hyperv/hyperv.c:131: synic_reset: Assertion 
`QLIST_EMPTY(>sint_routes)' failed.

This happens both on normal guest reboot or when using "system_reset" HMP
command.

The failing assertion was introduced by commit 64ddecc88bcf ("hyperv: SControl 
is optional to enable SynIc")
to catch dangling SINT routes on SynIC reset.

The root cause of this problem is that the SynIC itself is reset before
devices using SINT routes have chance to clean up these routes.

Since there seems to be no existing mechanism to force reset callbacks (or
methods) to be executed in specific order let's use a similar method that
is already used to reset another interrupt controller (APIC) after devices
have been reset - by invoking the SynIC reset from the machine reset
handler via a new "after_reset" X86 CPU method.

Fixes: 64ddecc88bcf ("hyperv: SControl is optional to enable SynIc") # exposed 
the bug
Signed-off-by: Maciej S. Szmigiero 
---
 hw/i386/pc.c   |  6 ++
 target/i386/cpu-qom.h  |  2 ++
 target/i386/cpu.c  | 10 ++
 target/i386/kvm/hyperv.c   |  4 
 target/i386/kvm/kvm.c  | 20 +++-
 target/i386/kvm/kvm_i386.h |  1 +
 6 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7280c02ce3..6a76d320f6 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1847,6 +1847,7 @@ static void pc_machine_reset(MachineState *machine)
 {
 CPUState *cs;
 X86CPU *cpu;
+const X86CPUClass *cpuc;
 
 qemu_devices_reset();
 
@@ -1855,6 +1856,11 @@ static void pc_machine_reset(MachineState *machine)
  */
 CPU_FOREACH(cs) {
 cpu = X86_CPU(cs);
+cpuc = X86_CPU_GET_CLASS(cpu);
+
+if (cpuc->after_reset) {
+cpuc->after_reset(cpu);
+}
 
 if (cpu->apic_state) {
 device_legacy_reset(cpu->apic_state);
diff --git a/target/i386/cpu-qom.h b/target/i386/cpu-qom.h
index c557a522e1..339d23006a 100644
--- a/target/i386/cpu-qom.h
+++ b/target/i386/cpu-qom.h
@@ -43,6 +43,7 @@ typedef struct X86CPUModel X86CPUModel;
  * @static_model: See CpuDefinitionInfo::static
  * @parent_realize: The parent class' realize handler.
  * @parent_reset: The parent class' reset handler.
+ * @after_reset: Reset handler to be called only after all other devices have 
been reset.
  *
  * An x86 CPU model or family.
  */
@@ -68,6 +69,7 @@ struct X86CPUClass {
 DeviceRealize parent_realize;
 DeviceUnrealize parent_unrealize;
 DeviceReset parent_reset;
+void (*after_reset)(X86CPU *cpu);
 };
 
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 6a57ef13af..a7b5945efd 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6029,6 +6029,15 @@ static void x86_cpu_reset(DeviceState *dev)
 #endif
 }
 
+static void x86_cpu_after_reset(X86CPU *cpu)
+{
+#ifndef CONFIG_USER_ONLY
+if (kvm_enabled()) {
+kvm_arch_after_reset_vcpu(cpu);
+}
+#endif
+}
+
 static void mce_init(X86CPU *cpu)
 {
 CPUX86State *cenv = >env;
@@ -7094,6 +7103,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, 
void *data)
 device_class_set_props(dc, x86_cpu_properties);
 
 device_class_set_parent_reset(dc, x86_cpu_reset, >parent_reset);
+xcc->after_reset = x86_cpu_after_reset;
 cc->reset_dump_flags = CPU_DUMP_FPU | CPU_DUMP_CCOP;
 
 cc->class_by_name = x86_cpu_class_by_name;
diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c
index 9026ef3a81..e3ac978648 100644
--- a/target/i386/kvm/hyperv.c
+++ b/target/i386/kvm/hyperv.c
@@ -23,6 +23,10 @@ int hyperv_x86_synic_add(X86CPU *cpu)
 return 0;
 }
 
+/*
+ * All devices possibly using SynIC have to be reset before calling this to let
+ * them remove their SINT routes first.
+ */
 void hyperv_x86_synic_reset(X86CPU *cpu)
 {
 hyperv_synic_reset(CPU(cpu));
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index f148a6d52f..00736abeea 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2188,18 +2188,28 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
 env->mp_state = KVM_MP_STATE_RUNNABLE;
 }
 
+/* enabled by default */
+env->poll_control_msr = 1;
+
+sev_es_set_reset_vector(CPU(cpu));
+}
+
+void kvm_arch_after_reset_vcpu(X86CPU *cpu)
+{
+CPUX86State *env = >env;
+int i;
+
+/*
+ * Reset SynIC after all other devices have been reset to let them remove
+ * their SINT routes first.
+ */
 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) {
-int i;
 for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) {
 env->msr_hv_synic_sint[i] = HV_SINT_MASKED;
 }
 
 hyperv_x86_synic_reset(cpu);
 }
-/* enabled by default */
-env->poll_control_msr = 1;
-
-sev_es_set_reset_vector(CPU(cpu));
 }
 
 void kvm_arch_do_init_vcpu(X86CPU *cpu)
diff --git a/target/i386/kvm/kvm_i386.h

Re: [PATCH v2] linux-user: Use memfd for open syscall emulation

2022-07-29 Thread Richard Henderson


On 7/29/22 08:49, Rainer Müller wrote:

+/* create temporary file to map stat to */
+tmpdir = getenv("TMPDIR");
+if (!tmpdir)
+tmpdir = "/tmp";
+snprintf(filename, sizeof(filename), "%s/qemu-open.XX", 
tmpdir);
+fd = mkstemp(filename);
+if (fd < 0) {
+return fd;
+}


We've been using g_file_open_tmp elsewhere; probably good to follow suit here.


r~

[PATCH] hw/net/rocker: Avoid undefined shifts with more than 31 ports

2022-07-29 Thread Peter Maydell

In rocker_port_phys_link_status() and rocker_port_phys_enable_read()
we construct a 64-bit value with one bit per front-panel port.
However we accidentally do the shift as 32-bit arithmetic, which
means that if there are more than 31 front-panel ports this is
undefined behaviour.

Fix the problem by ensuring we use 64-bit arithmetic for the whole
calculation. (We won't ever shift off the 64-bit value because
ROCKER_FP_PORTS_MAX is 62.)

Resolves: Coverity CID 1487121, 1487160
Signed-off-by: Peter Maydell 
---
 hw/net/rocker/rocker.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index 31f2340fb91..d8f3f16fe87 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -1010,7 +1010,7 @@ static uint64_t rocker_port_phys_link_status(Rocker *r)
 FpPort *port = r->fp_port[i];
 
 if (fp_port_get_link_up(port)) {
-status |= 1 << (i + 1);
+status |= 1ULL << (i + 1);
 }
 }
 return status;
@@ -1025,7 +1025,7 @@ static uint64_t rocker_port_phys_enable_read(Rocker *r)
 FpPort *port = r->fp_port[i];
 
 if (fp_port_enabled(port)) {
-ret |= 1 << (i + 1);
+ret |= 1ULL << (i + 1);
 }
 }
 return ret;
-- 
2.25.1

Re: [PATCH 12/15] ipmi: Add an SMBus IPMI interface

2022-07-29 Thread Corey Minyard

On Tue, Jun 28, 2022 at 05:21:44PM +0100, Peter Maydell wrote:
> On Thu, 19 Sept 2019 at 22:39,  wrote:
> >
> > From: Corey Minyard 
> >
> > Signed-off-by: Corey Minyard 
> > ---
> 

Thank you for the ping.  Comments inline...

> Very old patch, but Coverity has decided it doesn't like something
> in this function that's still basically the same in the current codebase
> (CID 1487146):
> 
> > +static int ipmi_write_data(SMBusDevice *dev, uint8_t *buf, uint8_t len)
> > +{
> > +SMBusIPMIDevice *sid = SMBUS_IPMI(dev);
> > +bool send = false;
> > +uint8_t cmd;
> > +int ret = 0;
> > +
> > +/* length is guaranteed to be >= 1. */
> > +cmd = *buf++;
> > +len--;
> > +
> > +/* Handle read request, which don't have any data in the write part. */
> > +switch (cmd) {
> > +case SSIF_IPMI_RESPONSE:
> > +sid->currblk = 0;
> > +ret = ipmi_load_readbuf(sid);
> > +break;
> > +
> > +case SSIF_IPMI_MULTI_PART_RESPONSE_MIDDLE:
> > +sid->currblk++;
> > +ret = ipmi_load_readbuf(sid);
> > +break;
> > +
> > +case SSIF_IPMI_MULTI_PART_RETRY:
> > +if (len >= 1) {
> > +sid->currblk = buf[0];
> > +ret = ipmi_load_readbuf(sid);
> > +} else {
> > +ret = -1;
> > +}
> > +break;
> > +
> > +default:
> > +break;
> > +}
> > +
> > +/* This should be a message write, make the length is there and 
> > correct. */
> > +if (len >= 1) {
> > +if (*buf != len - 1 || *buf > MAX_SSIF_IPMI_MSG_CHUNK) {
> > +return -1; /* Bogus message */
> > +}
> > +buf++;
> > +len--;
> > +}
> 
> After all of this preamble, len can be zero...
> 
> > +
> > +switch (cmd) {
> > +case SSIF_IPMI_REQUEST:
> > +send = true;
> > +/* FALLTHRU */
> > +case SSIF_IPMI_MULTI_PART_REQUEST_START:
> > +if (len < 2) {
> > +return -1; /* Bogus. */
> > +}
> > +memcpy(sid->inmsg, buf, len);
> > +sid->inlen = len;
> > +break;
> > +
> > +case SSIF_IPMI_MULTI_PART_REQUEST_END:
> > +send = true;
> > +/* FALLTHRU */
> > +case SSIF_IPMI_MULTI_PART_REQUEST_MIDDLE:
> > +if (!sid->inlen) {
> > +return -1; /* Bogus. */
> > +}
> > +if (sid->inlen + len > MAX_SSIF_IPMI_MSG_SIZE) {
> > +sid->inlen = 0; /* Discard the message. */
> > +return -1; /* Bogus. */
> > +}
> 
> ...this error checking on the values of the 'middle' request
> means that after one 'middle' request we can end up with
> sid->inlen == MAX_SSIF_IPMI_MSG_SIZE (ie we filled the
> entire sid->inmsg[] array).
> 
> But then if we get another 'middle' request with len == 0,
> that will pass this error checking because (sid->inlen + len == MSG_SIZE)
> and fall through into...
> 
> > +if (len < 32) {
> > +/*
> > + * Special hack, a multi-part middle that is less than 32 bytes
> > + * marks the end of a message.  The specification is fairly
> > + * confusing, so some systems to this, even sending a zero
> > + * length end message to mark the end.
> > + */
> > +send = true;
> > +}
> > +memcpy(sid->inmsg + sid->inlen, buf, len);
> 
> ...calling memcpy() with argument 1 being a pointer that points
> one past the end of the array. Even though len will be 0 and
> we won't memcpy() anything, this is (depending on how you choose
> to intepret things the C standard doesn't come right out and state
> explicitly) undefined behaviour, because memcpy() wants to be passed
> valid pointers, even if you ask it to do no work with a zero len.
> 
> This isn't going to be a visible bug in practical terms, but it would
> make Coverity happy if we either (a) rejected a request with an empty
> length or else (b) skipped the memcpy(). I don't know enough about
> IPMI to know which is better.

Hmm.  In some cases you have to accept a zero-length packet (as
described in the comments), but if you said:

  if (len > 0)
  memcpy(sid->inmsg + sid->inlen, buf, len);

would that make Coverity happy?  I was under the impression that if you
passed zero into len, you could pass anything into the data on a memcpy.
But apparently not; I can make this change.

-corey

> 
> > +sid->inlen += len;
> > +break;
> > +}
> > +
> > +if (send && sid->inlen) {
> > +smbus_ipmi_send_msg(sid);
> > +}
> > +
> > +return ret;
> > +}
> 
> thanks
> -- PMM
>

Re: [PULL 02/60] semihosting: Return failure from softmmu-uaccess.h functions

2022-07-29 Thread Richard Henderson


On 7/29/22 07:31, Peter Maydell wrote:

On Tue, 28 Jun 2022 at 05:54, Richard Henderson
 wrote:


We were reporting unconditional success for these functions;
pass on any failure from cpu_memory_rw_debug.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 


So, this commit makes us check the cpu_memory_rw_debug()
return value in the softmmu_lock_user() function; but Coverity
points out (CID 1490294) that we still aren't checking the
return value from when we call it in softmmu_unlock_user()...

What, if anything, should we do about that? In particular,
I think that for semihosting calls that write to guest memory,
if the guest passes us a pointer to read-only memory we're
not going to find out about that until unlock time.


Not even then, because address_space_write_rom will in fact write to rom, nevermind 
virtual page permissions.  Moreover, there are no failure conditions from 
address_space_write_rom.  It skips non-{ram,rom} and always returns OK.


It's probable that we should be using cpu_memory_rw_debug at all, but should be using a 
higher-level interface, something that (1) checks the virtual page permissions and (2) 
probably rejects semihosting to mmio.


But in the short term, I think we can just ignore the warning.


r~

[PATCH v2] linux-user: Use memfd for open syscall emulation

2022-07-29 Thread Rainer Müller

For certain paths in /proc, the open syscall is intercepted and the
returned file descriptor points to a temporary file with emulated
contents.

If TMPDIR is not accessible or writable for the current user (for
example in a read-only mounted chroot or container) tools such as ps
from procps may fail unexpectedly. Trying to read one of these paths
such as /proc/self/stat would return an error such as ENOENT or EROFS.

To relax the requirement on a writable TMPDIR, use memfd_create()
instead to create an anonymous file and return its file descriptor.

Signed-off-by: Rainer Müller 
---
v2: no more #ifdefs, use stub from util/memfd.c with ENOSYS fallback,
tested with 'strace -e fault=memfd_create'
---
 linux-user/syscall.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 991b85e6b4..7b55726f25 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8269,16 +8269,22 @@ static int do_openat(CPUArchState *cpu_env, int dirfd, 
const char *pathname, int
 char filename[PATH_MAX];
 int fd, r;
 
-/* create temporary file to map stat to */
-tmpdir = getenv("TMPDIR");
-if (!tmpdir)
-tmpdir = "/tmp";
-snprintf(filename, sizeof(filename), "%s/qemu-open.XX", tmpdir);
-fd = mkstemp(filename);
+fd = memfd_create("qemu-open", 0);
 if (fd < 0) {
-return fd;
+if (errno != ENOSYS) {
+return fd;
+}
+/* create temporary file to map stat to */
+tmpdir = getenv("TMPDIR");
+if (!tmpdir)
+tmpdir = "/tmp";
+snprintf(filename, sizeof(filename), "%s/qemu-open.XX", 
tmpdir);
+fd = mkstemp(filename);
+if (fd < 0) {
+return fd;
+}
+unlink(filename);
 }
-unlink(filename);
 
 if ((r = fake_open->fill(cpu_env, fd))) {
 int e = errno;
-- 
2.25.1

[PATCH v1] dirtylimit: Fix overflow when computing MB

2022-07-29 Thread huangy81

From: Hyman Huang(黄勇) 

Coverity points out a overflow problem when computing MB,
dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
multiplication will be done as a 32-bit operation, which
could overflow. Simplify the formula.

Meanwhile, fix spelling mistake of variable name.

Reported-by: Peter Maydell 
Signed-off-by: Peter Maydell 
Signed-off-by: Richard Henderson 
Signed-off-by: Hyman Huang(黄勇) 
---
 softmmu/dirtylimit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 8d98cb7..1423225 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -236,14 +236,14 @@ static inline int64_t 
dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
 {
 static uint64_t max_dirtyrate;
 uint32_t dirty_ring_size = kvm_dirty_ring_size();
-uint64_t dirty_ring_size_meory_MB =
-dirty_ring_size * TARGET_PAGE_SIZE >> 20;
+uint32_t dirty_ring_size_memory_MB =
+dirty_ring_size >> (20 - TARGET_PAGE_BITS);
 
 if (max_dirtyrate < dirtyrate) {
 max_dirtyrate = dirtyrate;
 }
 
-return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
+return dirty_ring_size_memory_MB * 100ULL / max_dirtyrate;
 }
 
 static inline bool dirtylimit_done(uint64_t quota,
-- 
1.8.3.1

Re: [PULL 00/13] testing, semihosting and doc fixes

2022-07-29 Thread Richard Henderson


On 7/29/22 02:19, Alex Bennée wrote:

The following changes since commit cc42559ab129a15554cc485ea9265e34dde7ab5b:

   Merge tag 'pull-ppc-20220728' of https://gitlab.com/danielhb/qemu into 
staging (2022-07-28 15:06:42 -0700)

are available in the Git repository at:

   https://github.com/stsquad/qemu.git tags/pull-testing-next-290722-1

for you to fetch changes up to 1235cf7d315b415fc2e4aa81815fda6ce96518c4:

   qemu-options: bring the kernel and image options together (2022-07-29 
09:48:01 +0100)


Testing, semihosting and doc fixes:

   - update to latest libvirt-ci
   - echo testlog.txt on failed cirrus runs
   - drop containers-layer2 stage
   - update handling of symlinks on windows builds
   - return 0 for failure of semihosting console write
   - don't copy unused buffer after semihost operation
   - check for errors in semihosting args
   - fix buffer handling for semihosting TMPNAM
   - add qapi exit-failure PanicAction
   - add lowcore unaligned access test to s390x
   - fix documentation of OBJECT_DECLARE_SIMPLE_TYPE
   - expand documentation on booting code


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/7.1 as 
appropriate.


r~





Alex Bennée (2):
   docs/devel: fix description of OBJECT_DECLARE_SIMPLE_TYPE
   qemu-options: bring the kernel and image options together

Bin Meng (2):
   .cirrus.yml: Change winsymlinks to 'native'
   .gitlab-ci.d/windows.yml: Enable native Windows symlink

Daniel P. Berrangé (3):
   tests: refresh to latest libvirt-ci module
   gitlab: show testlog.txt contents when cirrus/custom-runner jobs fail
   gitlab: drop 'containers-layer2' stage

Ilya Leoshkevich (2):
   qapi: Add exit-failure PanicAction
   tests/tcg/s390x: Test unaligned accesses to lowcore

Peter Maydell (4):
   semihosting: Don't return negative values on 
qemu_semihosting_console_write() failure
   semihosting: Don't copy buffer after console_write()
   semihosting: Check for errors on SET_ARG()
   semihosting: Fix handling of buffer in TARGET_SYS_TMPNAM

  docs/devel/qom.rst |  3 +-
  qapi/run-state.json|  5 +-
  include/sysemu/sysemu.h|  2 +-
  semihosting/arm-compat-semi.c  | 29 +--
  semihosting/console.c  |  3 +-
  semihosting/syscalls.c |  2 +-
  softmmu/main.c |  6 +-
  softmmu/runstate.c | 17 +++-
  .cirrus.yml|  2 +-
  .gitlab-ci.d/cirrus/build.yml  |  3 +-
  .gitlab-ci.d/cirrus/freebsd-12.vars|  3 +-
  .gitlab-ci.d/cirrus/freebsd-13.vars|  3 +-
  .gitlab-ci.d/cirrus/macos-11.vars  |  4 +-
  .gitlab-ci.d/container-cross.yml   | 24 +++---
  .../custom-runners/centos-stream-8-x86_64.yml  |  2 +
  .../custom-runners/ubuntu-20.04-aarch32.yml|  2 +
  .../custom-runners/ubuntu-20.04-aarch64.yml| 12 +++
  .gitlab-ci.d/custom-runners/ubuntu-20.04-s390x.yml | 12 +++
  .gitlab-ci.d/stages.yml|  1 -
  .gitlab-ci.d/windows.yml   |  2 +
  qemu-options.hx| 98 +-
  tests/docker/dockerfiles/alpine.docker |  4 +-
  tests/docker/dockerfiles/centos8.docker|  6 +-
  tests/docker/dockerfiles/debian-amd64.docker   |  2 +
  tests/docker/dockerfiles/debian-arm64-cross.docker |  2 +
  tests/docker/dockerfiles/debian-armel-cross.docker |  2 +
  tests/docker/dockerfiles/debian-armhf-cross.docker |  2 +
  .../dockerfiles/debian-mips64el-cross.docker   |  2 +
  .../docker/dockerfiles/debian-mipsel-cross.docker  |  2 +
  .../docker/dockerfiles/debian-ppc64el-cross.docker |  2 +
  tests/docker/dockerfiles/debian-s390x-cross.docker |  2 +
  tests/docker/dockerfiles/fedora.docker |  3 +-
  tests/docker/dockerfiles/opensuse-leap.docker  |  7 +-
  tests/docker/dockerfiles/ubuntu2004.docker |  2 +
  tests/lcitool/libvirt-ci   |  2 +-
  tests/lcitool/projects/qemu.yml|  6 +-
  tests/lcitool/refresh  |  4 +-
  tests/tcg/s390x/Makefile.softmmu-target|  9 ++
  tests/tcg/s390x/unaligned-lowcore.S| 19 +
  39 files changed, 243 insertions(+), 70 deletions(-)
  create mode 100644 tests/tcg/s390x/Makefile.softmmu-target
  create mode 100644 tests/tcg/s390x/unaligned-lowcore.S

Re: [PATCH 12/15] ipmi: Add an SMBus IPMI interface

2022-07-29 Thread Peter Maydell

On Tue, 28 Jun 2022 at 17:21, Peter Maydell  wrote:
>
> On Thu, 19 Sept 2019 at 22:39,  wrote:
> >
> > From: Corey Minyard 
> >
> > Signed-off-by: Corey Minyard 
> > ---
>
> Very old patch, but Coverity has decided it doesn't like something
> in this function that's still basically the same in the current codebase
> (CID 1487146):

Ping ?

> > +static int ipmi_write_data(SMBusDevice *dev, uint8_t *buf, uint8_t len)
> > +{
> > +SMBusIPMIDevice *sid = SMBUS_IPMI(dev);
> > +bool send = false;
> > +uint8_t cmd;
> > +int ret = 0;
> > +
> > +/* length is guaranteed to be >= 1. */
> > +cmd = *buf++;
> > +len--;
> > +
> > +/* Handle read request, which don't have any data in the write part. */
> > +switch (cmd) {
> > +case SSIF_IPMI_RESPONSE:
> > +sid->currblk = 0;
> > +ret = ipmi_load_readbuf(sid);
> > +break;
> > +
> > +case SSIF_IPMI_MULTI_PART_RESPONSE_MIDDLE:
> > +sid->currblk++;
> > +ret = ipmi_load_readbuf(sid);
> > +break;
> > +
> > +case SSIF_IPMI_MULTI_PART_RETRY:
> > +if (len >= 1) {
> > +sid->currblk = buf[0];
> > +ret = ipmi_load_readbuf(sid);
> > +} else {
> > +ret = -1;
> > +}
> > +break;
> > +
> > +default:
> > +break;
> > +}
> > +
> > +/* This should be a message write, make the length is there and 
> > correct. */
> > +if (len >= 1) {
> > +if (*buf != len - 1 || *buf > MAX_SSIF_IPMI_MSG_CHUNK) {
> > +return -1; /* Bogus message */
> > +}
> > +buf++;
> > +len--;
> > +}
>
> After all of this preamble, len can be zero...
>
> > +
> > +switch (cmd) {
> > +case SSIF_IPMI_REQUEST:
> > +send = true;
> > +/* FALLTHRU */
> > +case SSIF_IPMI_MULTI_PART_REQUEST_START:
> > +if (len < 2) {
> > +return -1; /* Bogus. */
> > +}
> > +memcpy(sid->inmsg, buf, len);
> > +sid->inlen = len;
> > +break;
> > +
> > +case SSIF_IPMI_MULTI_PART_REQUEST_END:
> > +send = true;
> > +/* FALLTHRU */
> > +case SSIF_IPMI_MULTI_PART_REQUEST_MIDDLE:
> > +if (!sid->inlen) {
> > +return -1; /* Bogus. */
> > +}
> > +if (sid->inlen + len > MAX_SSIF_IPMI_MSG_SIZE) {
> > +sid->inlen = 0; /* Discard the message. */
> > +return -1; /* Bogus. */
> > +}
>
> ...this error checking on the values of the 'middle' request
> means that after one 'middle' request we can end up with
> sid->inlen == MAX_SSIF_IPMI_MSG_SIZE (ie we filled the
> entire sid->inmsg[] array).
>
> But then if we get another 'middle' request with len == 0,
> that will pass this error checking because (sid->inlen + len == MSG_SIZE)
> and fall through into...
>
> > +if (len < 32) {
> > +/*
> > + * Special hack, a multi-part middle that is less than 32 bytes
> > + * marks the end of a message.  The specification is fairly
> > + * confusing, so some systems to this, even sending a zero
> > + * length end message to mark the end.
> > + */
> > +send = true;
> > +}
> > +memcpy(sid->inmsg + sid->inlen, buf, len);
>
> ...calling memcpy() with argument 1 being a pointer that points
> one past the end of the array. Even though len will be 0 and
> we won't memcpy() anything, this is (depending on how you choose
> to intepret things the C standard doesn't come right out and state
> explicitly) undefined behaviour, because memcpy() wants to be passed
> valid pointers, even if you ask it to do no work with a zero len.
>
> This isn't going to be a visible bug in practical terms, but it would
> make Coverity happy if we either (a) rejected a request with an empty
> length or else (b) skipped the memcpy(). I don't know enough about
> IPMI to know which is better.

thanks
-- PMM

Re: [PATCH v4 08/17] dump: Split write of section headers and data and add a prepare step

2022-07-29 Thread Janis Schoetterl-Glausch

On 7/26/22 11:22, Janosch Frank wrote:
> By splitting the writing of the section headers and (future) section
> data we prepare for the addition of a string table section and
> architecture sections.
> 
> At the same time we move the writing of the section to the end of the
> dump process. This allows the upcoming architecture section code to
> add data after all of the common dump data has been written.
> 
> Signed-off-by: Janosch Frank 
> ---
>  dump/dump.c   | 112 --
>  include/sysemu/dump.h |   4 ++
>  2 files changed, 90 insertions(+), 26 deletions(-)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index 8a7ce95610..a6bb7bfa21 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -342,30 +342,69 @@ static void write_elf_phdr_note(DumpState *s, Error 
> **errp)
>  }
>  }
>  
> -static void write_elf_section(DumpState *s, int type, Error **errp)
> +static size_t prepare_elf_section_hdr_zero(DumpState *s)

You don't use the return value and it is well known, the length of a section
header entry.

>  {
> -Elf32_Shdr shdr32;
> -Elf64_Shdr shdr64;
> -int shdr_size;
> -void *shdr;
> -int ret;
> +if (dump_is_64bit(s)) {
> +Elf64_Shdr *shdr64 = s->elf_section_hdrs;
>  
> -if (type == 0) {
> -shdr_size = sizeof(Elf32_Shdr);
> -memset(, 0, shdr_size);
> -shdr32.sh_info = cpu_to_dump32(s, s->phdr_num);
> -shdr = 
> +shdr64->sh_info = cpu_to_dump32(s, s->phdr_num);
>  } else {
> -shdr_size = sizeof(Elf64_Shdr);
> -memset(, 0, shdr_size);
> -shdr64.sh_info = cpu_to_dump32(s, s->phdr_num);
> -shdr = 
> +Elf32_Shdr *shdr32 = s->elf_section_hdrs;
> +
> +shdr32->sh_info = cpu_to_dump32(s, s->phdr_num);
>  }
>  
> -ret = fd_write_vmcore(shdr, shdr_size, s);
> +return dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
> +}
> +
[...]
> +
> +static void write_elf_sections(DumpState *s, Error **errp)
> +{
> +int ret;
> +
> +/* Write section zero */
> +ret = fd_write_vmcore(s->elf_section_data, s->elf_section_data_size, s);
> +if (ret < 0) {
> +error_setg_errno(errp, -ret, "dump: failed to write section data");
>  }
>  }
>  
> @@ -557,12 +596,22 @@ static void dump_begin(DumpState *s, Error **errp)
>  /* Write elf header to buffer */
>  prepare_elf_header(s);
>  
> +prepare_elf_sections(s, errp);
> +if (*errp) {
> +return;
> +}
> +
>  /* Start to write stuff into file descriptor */
>  write_elf_header(s, errp);
>  if (*errp) {
>  return;
>  }
>  
> +write_elf_section_headers(s, errp);
> +if (*errp) {
> +return;
> +}
> +
>  /* write PT_NOTE to vmcore */
>  write_elf_phdr_note(s, errp);
>  if (*errp) {
> @@ -575,14 +624,6 @@ static void dump_begin(DumpState *s, Error **errp)
>  return;
>  }
>  
> -/* write section to vmcore */
> -if (s->shdr_num) {
> -write_elf_section(s, 1, errp);
> -if (*errp) {
> -return;
> -}
> -}
> -
>  /* write notes to vmcore */
>  write_elf_notes(s, errp);
>  }
> @@ -647,6 +688,19 @@ static void dump_iterate(DumpState *s, Error **errp)
>  }
>  }
>  
> +static void dump_end(DumpState *s, Error **errp)
> +{

This function doesn't do anything yet, does it?
Not sure if I like splitting the patches like that, but squashing them will 
blow up the patch size.
Maybe you should just mention in the patch description which functionality 
remains
dormant and under what circumstances it will become active.

> +ERRP_GUARD();
> +
> +if (!s->elf_section_data_size) {
> +return;
> +}
> +s->elf_section_data = g_malloc0(s->elf_section_data_size);
> +
> +/* write sections to vmcore */
> +write_elf_sections(s, errp);
> +}
> +
>  static void create_vmcore(DumpState *s, Error **errp)
>  {
>  ERRP_GUARD();
> @@ -657,6 +711,12 @@ static void create_vmcore(DumpState *s, Error **errp)
>  }
>  
>  dump_iterate(s, errp);
> +if (*errp) {
> +return;
> +}
> +
> +/* Write section data after memory has been dumped */
> +dump_end(s, errp);
>  }
>  
>  static int write_start_flat_header(int fd)
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index 7f9a848573..686555f908 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -172,6 +172,10 @@ typedef struct DumpState {
>  int64_t length;/* Length of the dump we want to dump */
>  
>  void *elf_header;
> +void *elf_section_hdrs;
> +uint64_t elf_section_data_size;
> +void *elf_section_data;
> +
>  uint8_t *note_buf;  /* buffer for notes */
>  size_t note_buf_offset; /* the writing place in note_buf */
>  uint32_t nr_cpus;   /* number of guest's cpu */

Re: [PATCH v2 0/3] Fix some coverity issues on VDUSE

2022-07-29 Thread Peter Maydell

On Wed, 6 Jul 2022 at 11:18, Xie Yongji  wrote:
>
> This series fixes some issues reported by coverity.
>
> Patch 1 fixes a incorrect function name.
>
> Patch 2 fixes Coverity CID 1490224.
>
> Patch 3 fixes Coverity CID 1490226, 1490223.
>
> V1 to V2:
> - Drop the patch to fix Coverity CID 1490222, 1490227 [Markus]
> - Add some commit log to explain why we don't use g_strlcpy() [Markus]
>
> Xie Yongji (3):
>   libvduse: Fix the incorrect function name
>   libvduse: Replace strcpy() with strncpy()
>   libvduse: Pass positive value to strerror()
>
>  subprojects/libvduse/libvduse.c | 13 +++--
>  1 file changed, 7 insertions(+), 6 deletions(-)

Hi -- these patches still don't seem to be in git. What
is the plan for getting them in ?

thanks
-- PMM

Re: [PATCH] dirtylimit: Fix overflow when computing MB

2022-07-29 Thread Peter Maydell

On Fri, 29 Jul 2022 at 16:17,  wrote:
>
> From: Hyman Huang(黄勇) 
>
> Coverity points out a overflow problem when computing MB,
> dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
> multiplication will be done as a 32-bit operation, which
> could overflow. Simplify the formula.
>
> Meanwhile, fix spelling mistake of variable name.
>
> Reported-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> Signed-off-by: Hyman Huang(黄勇) 
> ---
>  softmmu/dirtylimit.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
> index 8d98cb7..ab62f29 100644
> --- a/softmmu/dirtylimit.c
> +++ b/softmmu/dirtylimit.c
> @@ -236,14 +236,14 @@ static inline int64_t 
> dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
>  {
>  static uint64_t max_dirtyrate;
>  uint32_t dirty_ring_size = kvm_dirty_ring_size();
> -uint64_t dirty_ring_size_meory_MB =
> -dirty_ring_size * TARGET_PAGE_SIZE >> 20;
> +uint32_t dirty_ring_size_memory_MB =
> +dirty_ring_size >> (20 - TARGET_PAGE_BITS);
>
>  if (max_dirtyrate < dirtyrate) {
>  max_dirtyrate = dirtyrate;
>  }
>
> -return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
> +return dirty_ring_size_memory_MB * 100 / max_dirtyrate;

Now you've changed dirty_ring_size_memory_MB to 32 bits,
this multiplication is going to be done at 32 bit
precision and can overflow. Adding 'ULL' to the '100'
is one way to fix that.

thanks
-- PMM

[PATCH] dirtylimit: Fix overflow when computing MB

2022-07-29 Thread huangy81

From: Hyman Huang(黄勇) 

Coverity points out a overflow problem when computing MB,
dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
multiplication will be done as a 32-bit operation, which
could overflow. Simplify the formula.

Meanwhile, fix spelling mistake of variable name.

Reported-by: Peter Maydell 
Signed-off-by: Richard Henderson 
Signed-off-by: Hyman Huang(黄勇) 
---
 softmmu/dirtylimit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 8d98cb7..ab62f29 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -236,14 +236,14 @@ static inline int64_t 
dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
 {
 static uint64_t max_dirtyrate;
 uint32_t dirty_ring_size = kvm_dirty_ring_size();
-uint64_t dirty_ring_size_meory_MB =
-dirty_ring_size * TARGET_PAGE_SIZE >> 20;
+uint32_t dirty_ring_size_memory_MB =
+dirty_ring_size >> (20 - TARGET_PAGE_BITS);
 
 if (max_dirtyrate < dirtyrate) {
 max_dirtyrate = dirtyrate;
 }
 
-return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
+return dirty_ring_size_memory_MB * 100 / max_dirtyrate;
 }
 
 static inline bool dirtylimit_done(uint64_t quota,
-- 
1.8.3.1

Re: [RFC 0/3] Add Generic SPI GPIO model

2022-07-29 Thread Cédric Le Goater


On 7/29/22 16:38, Patrick Williams wrote:

On Fri, Jul 29, 2022 at 03:25:55PM +0200, Cédric Le Goater wrote:

Hello Iris,

On 7/29/22 01:23, Iris Chen wrote:

Currently, most of our vboot platforms using a SPI-based TPM use the Linux
SPI-GPIO driver to "bit-bang" the SPI protocol. This is because the Aspeed
SPI controller (modeled in QEMU under hw/ssi/aspeed_smc.c) has an implementation
deficiency that prevents bi-directional operations.

aspeed_smc models the Aspeed FMC/SPI controllers which have a well defined
HW interface. Your model proposal adds support for a new SPI controller
using bitbang GPIOs. These are really two differents models. I don't see
how you could reuse aspeed_smc for this purpose.


(I don't think there was anything here that proposed reusing the QEMU
  aspeed_smc model, but it might have been poorly worded).


yeah. That's fine. I was trying to see if we could ease Iris work with
a fix in some driver/model but I didn't understand where.


or you mean that Linux is using the SPI-GPIO driver because the Linux
Aspeed SMC driver doesn't match the need ? It is true that the Linux
driver is not generic, it deals with flash devices only. But that's
another problem.


We actually mean that the _hardware_ has a deficiency, not any of the
code for it.  The Aspeed SPI controller has a single byte FIFO in its
implementation, which can only read or write in a single operation.  It
is *impossible* to perform bi-directional access with it (ie. you cannot
write the MOSI and read the MISO in the same transaction).  This is
broken for many SPI protocols, other than flash devices, including the one
used for TPMs.

In order to connect to SPI-based TPMs on an Aspeed chip, you have to use
the SPI-GPIO method.


Ok. Thanks for the clarification.

C.

Re: [PULL 06/30] softmmu/dirtylimit: Implement virtual CPU throttle

2022-07-29 Thread Hyman





在 2022/7/29 22:14, Richard Henderson 写道:

On 7/29/22 06:31, Peter Maydell wrote:

On Wed, 20 Jul 2022 at 12:30, Dr. David Alan Gilbert (git)
 wrote:


From: Hyman Huang(黄勇) 

Setup a negative feedback system when vCPU thread
handling KVM_EXIT_DIRTY_RING_FULL exit by introducing
throttle_us_per_full field in struct CPUState. Sleep
throttle_us_per_full microseconds to throttle vCPU
if dirtylimit is in service.

Signed-off-by: Hyman Huang(黄勇) 
Reviewed-by: Peter Xu 
Message-Id: 
<977e808e03a1cef5151cae75984658b6821be618.1656177590.git.huang...@chinatelecom.cn> 


Signed-off-by: Dr. David Alan Gilbert 



Hi; Coverity points out a problem with this code (CID 1490787):

Thanks for pointing out this bug.  I'm making a access request for
https://scan.coverity.com so that coverity problem can be found once new
series be posted. Hoping this bug doesn't appear anymore. :)


+static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t 
dirtyrate)

+{
+    static uint64_t max_dirtyrate;
+    uint32_t dirty_ring_size = kvm_dirty_ring_size();
+    uint64_t dirty_ring_size_meory_MB =
+    dirty_ring_size * TARGET_PAGE_SIZE >> 20;


Because dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
this multiplication will be done as a 32-bit operation,
which could overflow. You should cast one of the operands
to uint64_t to ensure that the operation is done as a 64 bit
multiplication.


To compute MB, you don't need multiplication:

   dirty_ring_size >> (20 - TARGET_PAGE_BITS)

In addition, why the mismatch in type?  dirty_ring_size_memory_MB can 
never be larger than dirty_ring_size.



r~


I'll post bugfix patch later as your suggestion, please review, thanks.


Side note: typo in the variable name: should be 'memory'.



+    if (max_dirtyrate < dirtyrate) {
+    max_dirtyrate = dirtyrate;
+    }
+
+    return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
+}


thanks
-- PMM

[PULL 3/6] stubs: update replay-tools to match replay.h types

2022-07-29 Thread Paolo Bonzini

From: Claudio Fontana 

detected with GCC 13 [-Werror=enum-int-mismatch]

Solves Issue #1096.

Signed-off-by: Claudio Fontana 
Cc: Pavel Dovgalyuk 
Reviewed-by: Thomas Huth 
Message-Id: <20220704075832.31537-1-cfont...@suse.de>
Signed-off-by: Paolo Bonzini 
---
 stubs/replay-tools.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/stubs/replay-tools.c b/stubs/replay-tools.c
index 43296b3d4e..f2e72bb225 100644
--- a/stubs/replay-tools.c
+++ b/stubs/replay-tools.c
@@ -7,13 +7,14 @@ bool replay_events_enabled(void)
 return false;
 }
 
-int64_t replay_save_clock(unsigned int kind, int64_t clock, int64_t raw_icount)
+int64_t replay_save_clock(ReplayClockKind kind,
+  int64_t clock, int64_t raw_icount)
 {
 abort();
 return 0;
 }
 
-int64_t replay_read_clock(unsigned int kind, int64_t raw_icount)
+int64_t replay_read_clock(ReplayClockKind kind, int64_t raw_icount)
 {
 abort();
 return 0;
@@ -48,11 +49,11 @@ void replay_mutex_unlock(void)
 {
 }
 
-void replay_register_char_driver(Chardev *chr)
+void replay_register_char_driver(struct Chardev *chr)
 {
 }
 
-void replay_chr_be_write(Chardev *s, uint8_t *buf, int len)
+void replay_chr_be_write(struct Chardev *s, uint8_t *buf, int len)
 {
 abort();
 }
-- 
2.36.1

[PULL 5/6] kvm: don't use perror() without useful errno

2022-07-29 Thread Paolo Bonzini

From: Cornelia Huck 

perror() is designed to append the decoded errno value to a
string. This, however, only makes sense if we called something that
actually sets errno prior to that.

For the callers that check for split irqchip support that is not the
case, and we end up with confusing error messages that end in
"success". Use error_report() instead.

Signed-off-by: Cornelia Huck 
Message-Id: <20220728142446.438177-1-coh...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 accel/kvm/kvm-all.c | 2 +-
 target/arm/kvm.c| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f165074e99..645f0a249a 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2265,7 +2265,7 @@ static void kvm_irqchip_create(KVMState *s)
 ret = kvm_arch_irqchip_create(s);
 if (ret == 0) {
 if (s->kernel_irqchip_split == ON_OFF_AUTO_ON) {
-perror("Split IRQ chip mode not supported.");
+error_report("Split IRQ chip mode not supported.");
 exit(1);
 } else {
 ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 4339e1cd6e..e5c1bd50d2 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -959,7 +959,7 @@ void kvm_arch_init_irq_routing(KVMState *s)
 int kvm_arch_irqchip_create(KVMState *s)
 {
 if (kvm_kernel_irqchip_split()) {
-perror("-machine kernel_irqchip=split is not supported on ARM.");
+error_report("-machine kernel_irqchip=split is not supported on ARM.");
 exit(1);
 }
 
-- 
2.36.1

[PULL 0/6] Fixes for QEMU 7.1-rc1

2022-07-29 Thread Paolo Bonzini

The following changes since commit 7b17a1a841fc2336eba53afade9cadb14bd3dd9a:

  Update version for v7.1.0-rc0 release (2022-07-26 18:03:16 -0700)

are available in the Git repository at:

  https://gitlab.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to ebc55f523c2f406e30ec8fad77bd3b9aad5d4579:

  configure: pass correct cflags to container-based cross compilers (2022-07-29 
00:22:19 +0200)


* Misc build system bugfixes
* Fix CGA 2-color graphics


Claudio Fontana (1):
  stubs: update replay-tools to match replay.h types

Cornelia Huck (1):
  kvm: don't use perror() without useful errno

Paolo Bonzini (3):
  ui: dbus-display requires CONFIG_GBM
  vga: fix incorrect line height in 640x200x2 mode
  configure: pass correct cflags to container-based cross compilers

Richard Henderson (1):
  configure: Fix ppc container_cross_cc substitution

 accel/kvm/kvm-all.c  | 2 +-
 configure| 3 +--
 hw/display/vga.c | 3 ++-
 meson.build  | 4 ++--
 stubs/replay-tools.c | 9 +
 target/arm/kvm.c | 2 +-
 ui/meson.build   | 2 +-
 7 files changed, 13 insertions(+), 12 deletions(-)
-- 
2.36.1

[PULL 6/6] configure: pass correct cflags to container-based cross compilers

2022-07-29 Thread Paolo Bonzini

probe_target_compiler returns nonempty $target_cc for installed toolchains
and $container_cross_cc for container-based toolchains.  In both cases
however the flags (coming from $cross_cc_cflags_${target_arch}) must be
in $target_cflags.

Therefore, do not clear them prior to returning from probe_target_compiler.

Reported-by: Taylor Simpson 
Fixes: 92e288fcfb ("build: try both native and cross compilers", 2022-07-08)
Signed-off-by: Paolo Bonzini 
---
 configure | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configure b/configure
index c4c02b8438..72ab03f11a 100755
--- a/configure
+++ b/configure
@@ -2173,7 +2173,6 @@ probe_target_compiler() {
 build_static=
 target_cc=
 target_ccas=
-target_cflags=
 target_ar=
 target_as=
 target_ld=
-- 
2.36.1

[PULL 4/6] configure: Fix ppc container_cross_cc substitution

2022-07-29 Thread Paolo Bonzini

From: Richard Henderson 

When moving this code out of probe_target_compiler(), we failed to adjust
the variable in which the target is located, resulting in e.g.
powerpc64-linux-user-linux-gnu-gcc-10

Fixes: cd362defbbd ("tests/tcg: merge configure.sh back into main configure 
script")
Signed-off-by: Richard Henderson 
Message-Id: <20220728183901.1290113-1-richard.hender...@linaro.org>
Signed-off-by: Paolo Bonzini 
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 2c19329d58..c4c02b8438 100755
--- a/configure
+++ b/configure
@@ -2028,7 +2028,7 @@ probe_target_compiler() {
 ;;
   ppc64|ppc64le)
 container_image=debian-powerpc-test-cross
-container_cross_prefix=powerpc${1#ppc}-linux-gnu-
+container_cross_prefix=powerpc${target_arch#ppc}-linux-gnu-
 container_cross_cc=${container_cross_prefix}gcc-10
 ;;
   riscv64)
-- 
2.36.1

[PULL 1/6] ui: dbus-display requires CONFIG_GBM

2022-07-29 Thread Paolo Bonzini

Without CONFIG_GBM, compiling dbus-display fails with

../ui/dbus.c: In function ‘dbus_create_context’:
../ui/dbus.c:47:20: error: ‘qemu_egl_rn_ctx’ undeclared (first use in this 
function); did you mean ‘qemu_egl_init_ctx’?
   47 |qemu_egl_rn_ctx);
  |^~~
  |qemu_egl_init_ctx
../ui/dbus.c:47:20: note: each undeclared identifier is reported only once for 
each function it appears in

and many other similar errors, because include/ui/egl-helpers.h only has
these declaration if gbm is found on the system.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1108
Reviewed-by: Marc-André Lureau 
Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Paolo Bonzini 
---
 meson.build| 4 ++--
 ui/meson.build | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/meson.build b/meson.build
index 75aaca8462..294e9a8f32 100644
--- a/meson.build
+++ b/meson.build
@@ -1677,8 +1677,8 @@ dbus_display = get_option('dbus_display') \
error_message: '-display dbus requires --enable-modules') \
   .require(gdbus_codegen.found(),
error_message: '-display dbus requires gdbus-codegen') \
-  .require(opengl.found(),
-   error_message: '-display dbus requires epoxy/egl') \
+  .require(opengl.found() and gbm.found(),
+   error_message: '-display dbus requires epoxy/egl and gbm') \
   .allowed()
 
 have_virtfs = get_option('virtfs') \
diff --git a/ui/meson.build b/ui/meson.build
index e9f48c5315..ec13949776 100644
--- a/ui/meson.build
+++ b/ui/meson.build
@@ -81,7 +81,7 @@ if dbus_display
   '--interface-prefix', 'org.qemu.',
   '--c-namespace', 'QemuDBus',
   '--generate-c-code', '@BASENAME@'])
-  dbus_ss.add(when: [gio, pixman, opengl],
+  dbus_ss.add(when: [gio, pixman, opengl, gbm],
   if_true: [files(
 'dbus-chardev.c',
 'dbus-clipboard.c',
-- 
2.36.1

[PULL 2/6] vga: fix incorrect line height in 640x200x2 mode

2022-07-29 Thread Paolo Bonzini

When in CGA modes, QEMU wants to ignore the maximum scan field (bits 0..4) of
the maximum scan length register in the CRTC.  It is not clear why this is
needed---for example, Bochs ignores bit 7 instead.  The issue is that the
CGA modes are not detected correctly, and in particular mode 6 results in
multi_scan==3 according to how SeaBIOS programs it.  The right way to check
for CGA graphics modes is to check whether bit 13 of the address is special
cased by the CRT controller to achieve line interleaving, i.e. whether bit 0
of the CRTC mode control register is clear.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1020
Reported-by: Korneliusz Osmenda 
Signed-off-by: Paolo Bonzini 
---
 hw/display/vga.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/display/vga.c b/hw/display/vga.c
index 5dca2d1528..50ecb1ad02 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -1514,9 +1514,10 @@ static void vga_draw_graphic(VGACommonState *s, int 
full_update)
 force_shadow = true;
 }
 
+/* bits 5-6: 0 = 16-color mode, 1 = 4-color mode, 2 = 256-color mode.  */
 shift_control = (s->gr[VGA_GFX_MODE] >> 5) & 3;
 double_scan = (s->cr[VGA_CRTC_MAX_SCAN] >> 7);
-if (shift_control != 1) {
+if (s->cr[VGA_CRTC_MODE] & 1) {
 multi_scan = (((s->cr[VGA_CRTC_MAX_SCAN] & 0x1f) + 1) << double_scan)
 - 1;
 } else {
-- 
2.36.1

Re: [RFC 0/3] Add Generic SPI GPIO model

2022-07-29 Thread Patrick Williams

On Fri, Jul 29, 2022 at 03:25:55PM +0200, Cédric Le Goater wrote:
> Hello Iris,
> 
> On 7/29/22 01:23, Iris Chen wrote:

> > MOSI pin in spi_gpio: the mosi pin is not included and we poll the realtime 
> > value
> > of the gpio for input bits to prevent bugs with caching the mosi value. It 
> > was discovered
> > during testing that when using the mosi pin as the input pin, the mosi 
> > value was not
> > being updated due to a kernel and aspeed_gpio model optimization. 
> 
> ah. We need help from Andrew ! the model should have a mosi pin .

This discussion about SMC reminded me of something that might be leading
to the issues we're seeing.  Our hardware implementation uses the same
GPIOs as one of the SMCs and doesn't use the SMC.  It could be that both
QEMU models (the SPI-GPIO and the SMC) are trying to grab the same
GPIOs.

-- 
Patrick Williams

signature.asc
Description: PGP signature

Re: [RFC 0/3] Add Generic SPI GPIO model

2022-07-29 Thread Patrick Williams

On Fri, Jul 29, 2022 at 03:25:55PM +0200, Cédric Le Goater wrote:
> Hello Iris,
> 
> On 7/29/22 01:23, Iris Chen wrote:
> > Currently, most of our vboot platforms using a SPI-based TPM use the Linux
> > SPI-GPIO driver to "bit-bang" the SPI protocol. This is because the Aspeed
> > SPI controller (modeled in QEMU under hw/ssi/aspeed_smc.c) has an 
> > implementation
> > deficiency that prevents bi-directional operations. 
> aspeed_smc models the Aspeed FMC/SPI controllers which have a well defined
> HW interface. Your model proposal adds support for a new SPI controller
> using bitbang GPIOs. These are really two differents models. I don't see
> how you could reuse aspeed_smc for this purpose.

(I don't think there was anything here that proposed reusing the QEMU
 aspeed_smc model, but it might have been poorly worded).

> or you mean that Linux is using the SPI-GPIO driver because the Linux
> Aspeed SMC driver doesn't match the need ? It is true that the Linux
> driver is not generic, it deals with flash devices only. But that's
> another problem.

We actually mean that the _hardware_ has a deficiency, not any of the
code for it.  The Aspeed SPI controller has a single byte FIFO in its
implementation, which can only read or write in a single operation.  It
is *impossible* to perform bi-directional access with it (ie. you cannot
write the MOSI and read the MISO in the same transaction).  This is
broken for many SPI protocols, other than flash devices, including the one
used for TPMs.

In order to connect to SPI-based TPMs on an Aspeed chip, you have to use
the SPI-GPIO method.

-- 
Patrick Williams

signature.asc
Description: PGP signature

Re: [PULL 02/60] semihosting: Return failure from softmmu-uaccess.h functions

2022-07-29 Thread Peter Maydell

On Tue, 28 Jun 2022 at 05:54, Richard Henderson
 wrote:
>
> We were reporting unconditional success for these functions;
> pass on any failure from cpu_memory_rw_debug.
>
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 

So, this commit makes us check the cpu_memory_rw_debug()
return value in the softmmu_lock_user() function; but Coverity
points out (CID 1490294) that we still aren't checking the
return value from when we call it in softmmu_unlock_user()...

What, if anything, should we do about that? In particular,
I think that for semihosting calls that write to guest memory,
if the guest passes us a pointer to read-only memory we're
not going to find out about that until unlock time.

thanks
-- PMM

Re: [PATCH] disas/nanomips: Convert nanoMIPS disassembler to C

2022-07-29 Thread Peter Maydell

On Fri, 29 Jul 2022 at 15:13, Milica Lazarevic
 wrote:
>
> C++ features like class, exception handling and function overloading
> have been removed and replaced with equivalent C code.
>
> Signed-off-by: Milica Lazarevic 
> ---
> Please see the discussion about why converting it here:
> https://lists.nongnu.org/archive/html/qemu-devel/2022-06/msg01803.html
>
> The validity of the disassembler after refactoring has been tested with
> the QEMU emulator version 6.0.1. With the most recent version, there is a
> problem with the executing nanoMIPS programs in the semihosting mode. The
> issue is reported here: https://gitlab.com/qemu-project/qemu/-/issues/1126
> We're currently working on fixing this.
>
>  disas/meson.build  |2 +-
>  disas/{nanomips.cpp => nanomips.c} | 8407 ++--
>  disas/nanomips.h   | 1076 
>  3 files changed, 4154 insertions(+), 5331 deletions(-)
>  rename disas/{nanomips.cpp => nanomips.c} (73%)
>  delete mode 100644 disas/nanomips.h

Is it possible to break this down into smaller pieces so it isn't
one single enormous 5000 line patch ?

I guess partial conversion is likely to run into compilation
difficulties mid-series; if so we could do "disable the
disassembler; convert it; reenable it".

The rationale here is code review -- reviewing a single huge
patch is essentially impossible, so it needs to be broken
down into coherent smaller pieces to be reviewable.

thanks
-- PMM

Re: [PULL 06/30] softmmu/dirtylimit: Implement virtual CPU throttle

2022-07-29 Thread Richard Henderson


On 7/29/22 06:31, Peter Maydell wrote:

On Wed, 20 Jul 2022 at 12:30, Dr. David Alan Gilbert (git)
 wrote:


From: Hyman Huang(黄勇) 

Setup a negative feedback system when vCPU thread
handling KVM_EXIT_DIRTY_RING_FULL exit by introducing
throttle_us_per_full field in struct CPUState. Sleep
throttle_us_per_full microseconds to throttle vCPU
if dirtylimit is in service.

Signed-off-by: Hyman Huang(黄勇) 
Reviewed-by: Peter Xu 
Message-Id: 
<977e808e03a1cef5151cae75984658b6821be618.1656177590.git.huang...@chinatelecom.cn>
Signed-off-by: Dr. David Alan Gilbert 



Hi; Coverity points out a problem with this code (CID 1490787):


+static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
+{
+static uint64_t max_dirtyrate;
+uint32_t dirty_ring_size = kvm_dirty_ring_size();
+uint64_t dirty_ring_size_meory_MB =
+dirty_ring_size * TARGET_PAGE_SIZE >> 20;


Because dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
this multiplication will be done as a 32-bit operation,
which could overflow. You should cast one of the operands
to uint64_t to ensure that the operation is done as a 64 bit
multiplication.


To compute MB, you don't need multiplication:

  dirty_ring_size >> (20 - TARGET_PAGE_BITS)

In addition, why the mismatch in type?  dirty_ring_size_memory_MB can never be larger than 
dirty_ring_size.



r~



Side note: typo in the variable name: should be 'memory'.



+if (max_dirtyrate < dirtyrate) {
+max_dirtyrate = dirtyrate;
+}
+
+return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
+}


thanks
-- PMM

Re: [PULL V2 19/25] vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs

2022-07-29 Thread Peter Maydell

On Wed, 20 Jul 2022 at 10:04, Jason Wang  wrote:
>
> From: Eugenio Pérez 
>
> To know the device features is needed for CVQ SVQ, so SVQ knows if it
> can handle all commands or not. Extract from
> vhost_vdpa_get_max_queue_pairs so we can reuse it.
>
> Signed-off-by: Eugenio Pérez 
> Acked-by: Jason Wang 
> Reviewed-by: Michael S. Tsirkin 
> Signed-off-by: Jason Wang 

Hi; this change introduces a resource leak in the new
error-exit return path in net_init_vhost_vdpa(). Spotted
by Coverity, CID 1490785.

> @@ -517,10 +521,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const 
> char *name,
>  NetClientState *peer, Error **errp)
>  {
>  const NetdevVhostVDPAOptions *opts;
> +uint64_t features;
>  int vdpa_device_fd;
>  g_autofree NetClientState **ncs = NULL;
>  NetClientState *nc;
> -int queue_pairs, i, has_cvq = 0;
> +int queue_pairs, r, i, has_cvq = 0;
>
>  assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>  opts = >u.vhost_vdpa;
> @@ -534,7 +539,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
> *name,
>  return -errno;
>  }
>
> -queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
> +r = vhost_vdpa_get_features(vdpa_device_fd, , errp);
> +if (unlikely(r < 0)) {
> +return r;

At this point in the code we have allocated the file descriptor
vdpa_device_fd, but this return path fails to close it.

> +}
> +
> +queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
>   _cvq, errp);
>  if (queue_pairs < 0) {
>  qemu_close(vdpa_device_fd);

Compare this pre-existing error-exit path, which correctly
calls qemu_close() on the fd.

Related question: is this function supposed to return -1 on
failure, or negative-errno ? At the moment it has a mix
of both. I think that the sole caller only really wants "<0 on
error", in which case the error-exit code paths could probably
be tidied up so that instead of explicitly calling
qemu_close() and returning r, queue_pairs, or whatever
they got back from the function they just called, they
could just 'goto err_svq' which will do the "close the fd
and return -1" work. Better still, by initializing 'i'
to 0 at the top of the function (and naming it something
clearer, ideally), all the code paths after the initial
qemu_open() succeeds could be made to use 'goto err'
for the error-exit case.

thanks
-- PMM

Re: virtio: why no full reset on virtio_set_status 0 ?

2022-07-29 Thread Claudio Fontana

On 7/29/22 15:21, Alex Bennée wrote:
> 
> Claudio Fontana  writes:
> 
>> On 7/29/22 12:13, Michael S. Tsirkin wrote:
>>> On Fri, Jul 29, 2022 at 11:46:05AM +0200, Claudio Fontana wrote:
>> @@ -2025,7 +2031,6 @@ void virtio_reset(void *opaque)
>>  VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
>>  int i;
>>  
>> -virtio_set_status(vdev, 0);
>>  if (current_cpu) {
>>  /* Guest initiated reset */
>>  vdev->device_endian = virtio_current_cpu_endian();
>> -- 
>> 2.26.2
>
> As you say this is incomplete ... bout could you share a bit more
> of what issue does this address?
>

 Hi, the problem I am trying to address is a segfault in OVS/dpdk that 
 looks like this:
>>>
>>> Sorry I was not clear. What I mean is, you don't yet know why does removing
>>> virtio_set_status call here prevent the crash in ovs, do you?
>>>
>>
>> I have no idea. Trying to collect logs to figure things out, but as
>> mentioned the logs easily hide the issue.
>> Likely there is just more to study here.
> 
> Given the OVS is going off on a NULL ptr deref could it just be it's not
> handling the disabling/reenabling of the virtqueues as you pause and
> restart properly? I could certainly imagine a backend jumping the gun to
> read a queue going very wrong if the current queue state is disabled.
> 

In this case both the ovs buf_addr and buf_iova are NULL, which is a nice case 
as they are more detectable,
however I also have segfaults where the addresses are just garbage.

I wonder whether it's possible that given the fact that the guest is going away 
without notification (SIGKILL),
as the guest driver resets the device and communicates with QEMU, QEMU adapts 
the state without notifying ovs,
so ovs happily tries to dequeue data from memory that isn't there. But I am 
just guessing.

I am still studying the qemu vhost user side and ovs/dpdk side to try to 
understand how this whole thing works.

Thanks,

CLaudio

Re: [PULL V2 25/25] net/colo.c: fix segmentation fault when packet is not parsed correctly

2022-07-29 Thread Peter Maydell

On Wed, 20 Jul 2022 at 10:04, Jason Wang  wrote:
>
> From: Zhang Chen 
>
> When COLO use only one vnet_hdr_support parameter between
> filter-redirector and filter-mirror(or colo-compare), COLO will crash
> with segmentation fault. Back track as follow:
>
> Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
> 0x55cb200b in eth_get_l2_hdr_length (p=0x0)
> at /home/tao/project/COLO/colo-qemu/include/net/eth.h:296
> 296 uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
> (gdb) bt
> 0  0x55cb200b in eth_get_l2_hdr_length (p=0x0)
> at /home/tao/project/COLO/colo-qemu/include/net/eth.h:296
> 1  0x55cb22b4 in parse_packet_early (pkt=0x56a44840) at
> net/colo.c:49
> 2  0x55cb2b91 in is_tcp_packet (pkt=0x56a44840) at
> net/filter-rewriter.c:63
>
> So wrong vnet_hdr_len will cause pkt->data become NULL. Add check to
> raise error and add trace-events to track vnet_hdr_len.
>
> Signed-off-by: Tao Xu 
> Signed-off-by: Zhang Chen 
> Reviewed-by: Li Zhijian 
> Signed-off-by: Jason Wang 

Hi -- Coverity points out that there's a problem with this fix
(CID 1490786):

> @@ -46,7 +46,14 @@ int parse_packet_early(Packet *pkt)
>  static const uint8_t vlan[] = {0x81, 0x00};
>  uint8_t *data = pkt->data + pkt->vnet_hdr_len;

data here is set to pkt->data + pkt->vnet_hdr_len.
If pkt->data is NULL then this addition is C undefined behaviour,
and if pkt->data is not NULL but the integer added is such
that the pointer ends up not pointing within data, then that
is also C undefined behaviour...

>  uint16_t l3_proto;
> -ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
> +ssize_t l2hdr_len;
> +
> +if (data == NULL) {

...so the compiler is allowed to assume that data cannot be NULL
here, and this entire error checking clause is logically dead code.

If you're trying to check whether vnet_hdr_len is valid, you
need to do that as an integer arithmetic check before you start
using it for pointer arithmetic. And you probably want to be
checking against some kind of bound, not just for whether the
result is going to be NULL.

Overall this looks kinda sketchy and could probably use more
detailed code review. Where does the bogus vnet_hdr_len come from in
the first place? Is this data from the guest, or from the network
(ie uncontrolled)?

> +trace_colo_proxy_main_vnet_info("This packet is not parsed 
> correctly, "
> +"pkt->vnet_hdr_len", 
> pkt->vnet_hdr_len);
> +return 1;
> +}

thanks
-- PMM

Re: [RFC 0/3] Add Generic SPI GPIO model

2022-07-29 Thread Cédric Le Goater


Hello Iris,

On 7/29/22 01:23, Iris Chen wrote:

Hey everyone,

I have been working on a project to add support for SPI-based TPMs in QEMU.
Currently, most of our vboot platforms using a SPI-based TPM use the Linux
SPI-GPIO driver to "bit-bang" the SPI protocol. This is because the Aspeed
SPI controller (modeled in QEMU under hw/ssi/aspeed_smc.c) has an implementation
deficiency that prevents bi-directional operations. 

aspeed_smc models the Aspeed FMC/SPI controllers which have a well defined
HW interface. Your model proposal adds support for a new SPI controller
using bitbang GPIOs. These are really two differents models. I don't see
how you could reuse aspeed_smc for this purpose.

or you mean that Linux is using the SPI-GPIO driver because the Linux
Aspeed SMC driver doesn't match the need ? It is true that the Linux
driver is not generic, it deals with flash devices only. But that's
another problem.


Thus, in order to connect
a TPM to this bus, my patch implements a QEMU SPI-GPIO driver (as the QEMU
counterpart of the Linux SPI-GPIO driver).

As we use SPI-based TPMs on many of our BMCs for the secure-boot implementation,
I have already tested this implementation locally with our Yosemite-v3.5 
platform
and Facebook-OpenBMC. This model was tested by connecting a generic SPI-NOR 
(m25p80
for example) to the Yosemite-v3.5 SPI bus containing the TPM.

This patch is an RFC because I have several questions about design. Although the
model is working, I understand there are many areas where the design decision
is not deal (ie. abstracting hard coded GPIO values). Below are some details of 
the
patch and specific areas where I would appreciate feedback on how to make this 
better:
  
hw/arm/aspeed.c:

I am not sure where the best place is to instantiate the spi_gpio besides the
aspeed_machine_init. 


The SPI GPIO device would be a platform device and not a SoC device.
Hence, it should be instantiated at the machine level, like the I2C
device are, using properties to let the model know about the GPIOs
that should be driven to implement the SPI protocol.

Ideally, the state of the GPIO controller pins and the SPI GPIO state
should be shared. I think that's what you are trying to do that with
attribute 'controller_state' in your patch ? But, the way it is done
today, is too tightly coupled (names) with the Aspeed GPIO model to
be generic.

I think we need an intermediate QOM interface, or a base class, to
implement an abstract SPI GPIO model and an Aspeed SPI GPIO model
on top which would be linked to the Aspeed GPIO model of the SoC
in use.

Or we could introduce some kind of generic GPIO controller that
we would link the SPI GPIO model with (using a property). The
Aspeed GPIO model would be of that kind and the SPI GPIO model
would be able to drive the pins using a common interface.
That's another way to do it.


Could we add the ability to instantiate it on the command line?


It might be complex to identify the QOM object to use as the GPIO
controller from the command line since it is on the SoC and not
a platform device. In that case, an Aspeed SPI GPIO model would
be a better approach. we  would have to peek in the machine/soc to
get a link on the Aspeed GPIO model in the realize routine of the
Aspeed SPI GPIO model.


m25p80_transfer8_ex in hw/block/m25p80.c:
Existing SPI model assumed that the output byte is fully formed, can be passed 
to
the SPI device, and the input byte can be returned, all in one operation. With
SPI-GPIO we don't have the input byte until all 8 bits of the output have been
shifted out, so we have to prime the MISO with bogus values (0xFF).


That's fine I think. We do something similar for dummies.


MOSI pin in spi_gpio: the mosi pin is not included and we poll the realtime 
value
of the gpio for input bits to prevent bugs with caching the mosi value. It was 
discovered
during testing that when using the mosi pin as the input pin, the mosi value 
was not
being updated due to a kernel and aspeed_gpio model optimization. 


ah. We need help from Andrew ! the model should have a mosi pin .

Thanks,

C.


Thus, here we are
reading the value directly from the gpio controller instead of waiting for the 
push.

I realize there are Aspeed specifics in the spi_gpio model. To make this 
extensible,
is it preferred to make this into a base class and have our Aspeed SPI GPIO 
extend
this or we could set up params to pass in the constructor?

Thanks for your review and any direction here would be helpful :)

Iris Chen (3):
   hw: m25p80: add prereading ability in transfer8
   hw: spi_gpio: add spi gpio model
   hw: aspeed: hook up the spi gpio model

  hw/arm/Kconfig|   1 +
  hw/arm/aspeed.c   |   5 ++
  hw/block/m25p80.c |  29 ++-
  hw/ssi/Kconfig|   4 +
  hw/ssi/meson.build|   1 +
  hw/ssi/spi_gpio.c | 166 ++
  hw/ssi/ssi.c  |   4 -
  include/hw/ssi/spi_gpio.h |  38 +

Re: [PATCH v10 05/21] job.c: add job_lock/unlock while keeping job.h intact

2022-07-29 Thread Kevin Wolf

Am 25.07.2022 um 09:38 hat Emanuele Giuseppe Esposito geschrieben:
> With "intact" we mean that all job.h functions implicitly
> take the lock. Therefore API callers are unmodified.
> 
> This means that:
> - many static functions that will be always called with job lock held
>   become _locked, and call _locked functions
> - all public functions take the lock internally if needed, and call _locked
>   functions
> - all public functions called internally by other functions in job.c will 
> have a
>   _locked counterpart (sometimes public), to avoid deadlocks (job lock 
> already taken).
>   These functions are not used for now.
> - some public functions called only from exernal files (not job.c) do not
>   have _locked() counterpart and take the lock inside. Others won't need
>   the lock at all because use fields only set at initialization and
>   never modified.
> 
> job_{lock/unlock} is independent from real_job_{lock/unlock}.
> 
> Note: at this stage, job_{lock/unlock} and job lock guard macros
> are *nop*
> 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> Signed-off-by: Emanuele Giuseppe Esposito 

Would be nice to fix the access Vladimir found, but I think it's not
actually a bug because we know that nobody else is going to write to
job->ret. So with or without that fix:

Reviewed-by: Kevin Wolf

[Bug 1921664] Re: Coroutines are racy for risc64 emu on arm64 - crash on Assertion

2022-07-29 Thread Thomas Huth

Upstream QEMU bugs are now tracked on https://gitlab.com/qemu-
project/qemu/-/issues - so if you can reproduce it with the latest
version from upstream QEMU, please report it there.

** No longer affects: qemu

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1921664

Title:
  Coroutines are racy for risc64 emu on arm64 - crash on Assertion

Status in qemu package in Ubuntu:
  Incomplete

Bug description:
  Note: this could as well be "riscv64 on arm64" for being slow@slow and affect
  other architectures as well.

  The following case triggers on a Raspberry Pi4 running with arm64 on
  Ubuntu 21.04 [1][2]. It might trigger on other environments as well,
  but that is what we have seen it so far.

 $ wget 
https://github.com/carlosedp/riscv-bringup/releases/download/v1.0/UbuntuFocal-riscv64-QemuVM.tar.gz
 $ tar xzf UbuntuFocal-riscv64-QemuVM.tar.gz
 $ ./run_riscvVM.sh
  (wait ~2 minutes)
 [ OK ] Reached target Local File Systems (Pre).
 [ OK ] Reached target Local File Systems.
  Starting udev Kernel Device Manager...
  qemu-system-riscv64: ../../util/qemu-coroutine-lock.c:57: 
qemu_co_queue_wait_impl: Assertion `qemu_in_coroutine()' failed.

  This is often, but not 100% reproducible and the cases differ slightly we
  see either of:
  - qemu-system-riscv64: ../../util/qemu-coroutine-lock.c:57: 
qemu_co_queue_wait_impl: Assertion `qemu_in_coroutine()' failed.
  - qemu-system-riscv64: ../../block/aio_task.c:64: aio_task_pool_wait_one: 
Assertion `qemu_coroutine_self() == pool->main_co' failed.

  Rebuilding working cases has shown to make them fail, as well as rebulding
  (or even reinstalling) bad cases has made them work. Also the same builds on
  different arm64 CPUs behave differently. TL;DR: The full list of conditions
  influencing good/bad case here are not yet known.

  [1]: 
https://ubuntu.com/tutorials/how-to-install-ubuntu-on-your-raspberry-pi#1-overview
  [2]: 
http://cdimage.ubuntu.com/daily-preinstalled/pending/hirsute-preinstalled-desktop-arm64+raspi.img.xz

  
  --- --- original report --- ---

  I regularly run a RISC-V (RV64GC) QEMU VM, but an update a few days
  ago broke it.  Now when I launch it, it hits an assertion:

  OpenSBI v0.6
     _  _
    / __ \  / |  _ \_   _|
   | |  | |_ __   ___ _ __ | (___ | |_) || |
   | |  | | '_ \ / _ \ '_ \ \___ \|  _ < | |
   | |__| | |_) |  __/ | | |) | |_) || |_
    \/| .__/ \___|_| |_|_/|/_|
  | |
  |_|

  ...
  Found /boot/extlinux/extlinux.conf
  Retrieving file: /boot/extlinux/extlinux.conf
  618 bytes read in 2 ms (301.8 KiB/s)
  RISC-V Qemu Boot Options
  1:  Linux kernel-5.5.0-dirty
  2:  Linux kernel-5.5.0-dirty (recovery mode)
  Enter choice: 1:Linux kernel-5.5.0-dirty
  Retrieving file: /boot/initrd.img-5.5.0-dirty
  qemu-system-riscv64: ../../block/aio_task.c:64: aio_task_pool_wait_one: 
Assertion `qemu_coroutine_self() == pool->main_co' failed.
  ./run.sh: line 31:  1604 Aborted (core dumped) 
qemu-system-riscv64 -machine virt -nographic -smp 8 -m 8G -bios fw_payload.bin 
-device virtio-blk-devi
  ce,drive=hd0 -object rng-random,filename=/dev/urandom,id=rng0 -device 
virtio-rng-device,rng=rng0 -drive 
file=riscv64-UbuntuFocal-qemu.qcow2,format=qcow2,id=hd0 -devi
  ce virtio-net-device,netdev=usernet -netdev user,id=usernet,$ports

  Interestingly this doesn't happen on the AMD64 version of Ubuntu 21.04
  (fully updated).

  Think you have everything already, but just in case:

  $ lsb_release -rd
  Description:Ubuntu Hirsute Hippo (development branch)
  Release:21.04

  $ uname -a
  Linux minimacvm 5.11.0-11-generic #12-Ubuntu SMP Mon Mar 1 19:27:36 UTC 2021 
aarch64 aarch64 aarch64 GNU/Linux
  (note this is a VM running on macOS/M1)

  $ apt-cache policy qemu
  qemu:
    Installed: 1:5.2+dfsg-9ubuntu1
    Candidate: 1:5.2+dfsg-9ubuntu1
    Version table:
   *** 1:5.2+dfsg-9ubuntu1 500
  500 http://ports.ubuntu.com/ubuntu-ports hirsute/universe arm64 
Packages
  100 /var/lib/dpkg/status

  ProblemType: Bug
  DistroRelease: Ubuntu 21.04
  Package: qemu 1:5.2+dfsg-9ubuntu1
  ProcVersionSignature: Ubuntu 5.11.0-11.12-generic 5.11.0
  Uname: Linux 5.11.0-11-generic aarch64
  ApportVersion: 2.20.11-0ubuntu61
  Architecture: arm64
  CasperMD5CheckResult: unknown
  CurrentDmesg:
   Error: command ['pkexec', 'dmesg'] failed with exit code 127: 
polkit-agent-helper-1: error response to PolicyKit daemon: 
GDBus.Error:org.freedesktop.PolicyKit1.Error.Failed: No session for cookie
   Error executing command as another user: Not authorized

   This incident has been reported.
  Date: Mon Mar 29 02:33:25 2021
  Dependencies:

  KvmCmdLine: COMMAND STAT  EUID  RUID PIDPPID %CPU COMMAND
  Lspci-vt:
   -[:00]-+-00.0  Apple Inc. Device f020
  +-01.0

Re: [PULL 06/30] softmmu/dirtylimit: Implement virtual CPU throttle

2022-07-29 Thread Peter Maydell

On Wed, 20 Jul 2022 at 12:30, Dr. David Alan Gilbert (git)
 wrote:
>
> From: Hyman Huang(黄勇) 
>
> Setup a negative feedback system when vCPU thread
> handling KVM_EXIT_DIRTY_RING_FULL exit by introducing
> throttle_us_per_full field in struct CPUState. Sleep
> throttle_us_per_full microseconds to throttle vCPU
> if dirtylimit is in service.
>
> Signed-off-by: Hyman Huang(黄勇) 
> Reviewed-by: Peter Xu 
> Message-Id: 
> <977e808e03a1cef5151cae75984658b6821be618.1656177590.git.huang...@chinatelecom.cn>
> Signed-off-by: Dr. David Alan Gilbert 


Hi; Coverity points out a problem with this code (CID 1490787):

> +static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
> +{
> +static uint64_t max_dirtyrate;
> +uint32_t dirty_ring_size = kvm_dirty_ring_size();
> +uint64_t dirty_ring_size_meory_MB =
> +dirty_ring_size * TARGET_PAGE_SIZE >> 20;

Because dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
this multiplication will be done as a 32-bit operation,
which could overflow. You should cast one of the operands
to uint64_t to ensure that the operation is done as a 64 bit
multiplication.

Side note: typo in the variable name: should be 'memory'.


> +if (max_dirtyrate < dirtyrate) {
> +max_dirtyrate = dirtyrate;
> +}
> +
> +return dirty_ring_size_meory_MB * 100 / max_dirtyrate;
> +}

thanks
-- PMM

Re: [PATCH] linux-user: Do not treat madvise()'s advice as a bitmask

2022-07-29 Thread Laurent Vivier


Le 25/07/2022 à 15:41, Ilya Leoshkevich a écrit :

Advice is enum, not flags. Doing (advice & MADV_DONTNEED) also matches
e.g. MADV_MERGEABLE.

Signed-off-by: Ilya Leoshkevich 
---
  linux-user/mmap.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 4e7a6be6ee..edceaca4a8 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -891,7 +891,7 @@ abi_long target_madvise(abi_ulong start, abi_ulong len_in, 
int advice)
   * anonymous mappings. In this case passthrough is safe, so do it.
   */
  mmap_lock();
-if ((advice & MADV_DONTNEED) &&
+if (advice == MADV_DONTNEED &&
  can_passthrough_madv_dontneed(start, end)) {
  ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED));
  }



Applied to my linux-user-for-7.1 branch.

Thanks,
Laurent

Re: [PATCH v2] ci: Upgrade msys2 release to 20220603

2022-07-29 Thread Alex Bennée



Yonggang Luo  writes:

> Signed-off-by: Yonggang Luo 
> ---
>  .cirrus.yml  | 2 +-
>  .gitlab-ci.d/windows.yml | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)

Queued to testing/next, thanks.


-- 
Alex Bennée

Re: virtio: why no full reset on virtio_set_status 0 ?

2022-07-29 Thread Alex Bennée

Claudio Fontana  writes:

> On 7/29/22 12:13, Michael S. Tsirkin wrote:
>> On Fri, Jul 29, 2022 at 11:46:05AM +0200, Claudio Fontana wrote:
> @@ -2025,7 +2031,6 @@ void virtio_reset(void *opaque)
>  VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
>  int i;
>  
> -virtio_set_status(vdev, 0);
>  if (current_cpu) {
>  /* Guest initiated reset */
>  vdev->device_endian = virtio_current_cpu_endian();
> -- 
> 2.26.2

 As you say this is incomplete ... bout could you share a bit more
 of what issue does this address?

>>>
>>> Hi, the problem I am trying to address is a segfault in OVS/dpdk that looks 
>>> like this:
>> 
>> Sorry I was not clear. What I mean is, you don't yet know why does removing
>> virtio_set_status call here prevent the crash in ovs, do you?
>> 
>
> I have no idea. Trying to collect logs to figure things out, but as
> mentioned the logs easily hide the issue.
> Likely there is just more to study here.

Given the OVS is going off on a NULL ptr deref could it just be it's not
handling the disabling/reenabling of the virtqueues as you pause and
restart properly? I could certainly imagine a backend jumping the gun to
read a queue going very wrong if the current queue state is disabled.

-- 
Alex Bennée

[RFC v2 10/10] Fix some calls from coroutine_fn to no_coroutine_fn

2022-07-29 Thread Alberto Faria

These calls were found by static-analyzer.py.

Not all occurrences of this problem were fixed.

Signed-off-by: Alberto Faria 
---
 block/commit.c |  2 +-
 block/io.c |  4 ++--
 block/mirror.c |  4 ++--
 block/parallels.c  | 28 ++--
 block/qcow.c   | 10 +-
 block/qcow2-snapshot.c |  6 +++---
 block/qcow2.c  | 24 
 block/qed-table.c  |  2 +-
 block/qed.c| 12 ++--
 block/vdi.c| 17 +
 block/vhdx.c   |  8 
 block/vmdk.c   | 11 ++-
 blockdev.c |  2 +-
 13 files changed, 66 insertions(+), 64 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 38571510cb..945945de05 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -135,7 +135,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 }
 
 if (base_len < len) {
-ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
+ret = blk_co_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
 if (ret) {
 return ret;
 }
diff --git a/block/io.c b/block/io.c
index c2ed14cedb..a7d16eae02 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2736,8 +2736,8 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState 
*bs, int64_t offset,
 return 1;
 }
 
-ret = bdrv_common_block_status_above(bs, NULL, false, false, offset,
- bytes, , NULL, NULL, NULL);
+ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset,
+bytes, , NULL, NULL, NULL);
 
 if (ret < 0) {
 return ret;
diff --git a/block/mirror.c b/block/mirror.c
index 3c4ab1159d..3cbb610118 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -921,8 +921,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
  * active layer. */
 if (s->base == blk_bs(s->target)) {
 if (s->bdev_length > target_length) {
-ret = blk_truncate(s->target, s->bdev_length, false,
-   PREALLOC_MODE_OFF, 0, NULL);
+ret = blk_co_truncate(s->target, s->bdev_length, false,
+  PREALLOC_MODE_OFF, 0, NULL);
 if (ret < 0) {
 goto immediate_exit;
 }
diff --git a/block/parallels.c b/block/parallels.c
index a229c06f25..46baea6387 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -204,18 +204,18 @@ static int64_t allocate_clusters(BlockDriverState *bs, 
int64_t sector_num,
  * force the safer-but-slower fallocate.
  */
 if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) {
-ret = bdrv_truncate(bs->file,
-(s->data_end + space) << BDRV_SECTOR_BITS,
-false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE,
-NULL);
+ret = bdrv_co_truncate(bs->file,
+   (s->data_end + space) << BDRV_SECTOR_BITS,
+   false, PREALLOC_MODE_OFF,
+   BDRV_REQ_ZERO_WRITE, NULL);
 if (ret == -ENOTSUP) {
 s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
 }
 }
 if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-ret = bdrv_pwrite_zeroes(bs->file,
- s->data_end << BDRV_SECTOR_BITS,
- space << BDRV_SECTOR_BITS, 0);
+ret = bdrv_co_pwrite_zeroes(bs->file,
+s->data_end << BDRV_SECTOR_BITS,
+space << BDRV_SECTOR_BITS, 0);
 }
 if (ret < 0) {
 return ret;
@@ -277,8 +277,8 @@ static coroutine_fn int 
parallels_co_flush_to_os(BlockDriverState *bs)
 if (off + to_write > s->header_size) {
 to_write = s->header_size - off;
 }
-ret = bdrv_pwrite(bs->file, off, to_write, (uint8_t *)s->header + off,
-  0);
+ret = bdrv_co_pwrite(bs->file, off, to_write,
+ (uint8_t *)s->header + off, 0);
 if (ret < 0) {
 qemu_co_mutex_unlock(>lock);
 return ret;
@@ -503,8 +503,8 @@ static int coroutine_fn parallels_co_check(BlockDriverState 
*bs,
  * In order to really repair the image, we must shrink it.
  * That means we have to pass exact=true.
  */
-ret = bdrv_truncate(bs->file, res->image_end_offset, true,
-PREALLOC_MODE_OFF, 0, _err);
+ret = bdrv_co_truncate(bs->file, res->image_end_offset, true,
+   PREALLOC_MODE_OFF, 0, _err);
 if (ret < 0) {
 error_report_err(local_err);

[RFC v2 08/10] Fix some bad coroutine_fn indirect calls and pointer assignments

2022-07-29 Thread Alberto Faria

These problems were found by static-analyzer.py.

Not all occurrences of these problems were fixed.

Signed-off-by: Alberto Faria 
---
 block/backup.c   |  2 +-
 include/block/block_int-common.h | 12 +---
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index b2b649e305..6a9ad97a53 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -309,7 +309,7 @@ static void coroutine_fn backup_pause(Job *job)
 }
 }
 
-static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed)
+static void backup_set_speed(BlockJob *job, int64_t speed)
 {
 BackupBlockJob *s = container_of(job, BackupBlockJob, common);
 
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 8947abab76..16c45d1262 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -731,13 +731,11 @@ struct BlockDriver {
 void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
 
 bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
-bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs,
-   const char *name,
-   uint32_t granularity,
-   Error **errp);
-int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
-  const char *name,
-  Error **errp);
+bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
+BlockDriverState *bs, const char *name, uint32_t granularity,
+Error **errp);
+int coroutine_fn (*bdrv_co_remove_persistent_dirty_bitmap)(
+BlockDriverState *bs, const char *name, Error **errp);
 };
 
 static inline bool block_driver_can_compress(BlockDriver *drv)
-- 
2.37.1

[PATCH] virtiofsd: Disable killpriv_v2 by default

2022-07-29 Thread Vivek Goyal

We are having bunch of issues with killpriv_v2 enabled by default. First
of all it relies on clearing suid/sgid bits as needed by dropping
capability CAP_FSETID. This does not work for remote filesystems like
NFS (and possibly others). 

Secondly, we are noticing other issues related to clearing of SGID
which leads to failures for xfstests generic/355 and generic/193.

Thirdly, there are other issues w.r.t caching of metadata (suid/sgid)
bits in fuse client with killpriv_v2 enabled. Guest can cache that
data for sometime even if cleared on server.

Second and Third issue are fixable. Just that it might take a little
while to get it fixed in kernel. First one will probably not see
any movement for a long time.

Given these issues, killpriv_v2 does not seem to be a good candidate
for enabling by default. We have already disabled it by default in
rust version of virtiofsd.

Hence this patch disabled killpriv_v2 by default. User can choose to
enable it by passing option "-o killpriv_v2".

Signed-off-by: Vivek Goyal 
---
 tools/virtiofsd/passthrough_ll.c |   13 ++---
 1 file changed, 2 insertions(+), 11 deletions(-)

Index: rhvgoyal-qemu/tools/virtiofsd/passthrough_ll.c
===
--- rhvgoyal-qemu.orig/tools/virtiofsd/passthrough_ll.c 2022-07-29 
08:19:05.925119947 -0400
+++ rhvgoyal-qemu/tools/virtiofsd/passthrough_ll.c  2022-07-29 
08:27:08.048049096 -0400
@@ -767,19 +767,10 @@ static void lo_init(void *userdata, stru
 fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n");
 conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2;
 lo->killpriv_v2 = 1;
-} else if (lo->user_killpriv_v2 == -1 &&
-   conn->capable & FUSE_CAP_HANDLE_KILLPRIV_V2) {
-/*
- * User did not specify a value for killpriv_v2. By default enable it
- * if connection offers this capability
- */
-fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n");
-conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2;
-lo->killpriv_v2 = 1;
 } else {
 /*
- * Either user specified to disable killpriv_v2, or connection does
- * not offer this capability. Disable killpriv_v2 in both the cases
+ * Either user specified to disable killpriv_v2, or did not
+ * specify anything. Disable killpriv_v2 in both the cases.
  */
 fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling killpriv_v2\n");
 conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;

[RFC v2 07/10] static-analyzer: Enforce coroutine_fn restrictions on function pointers

2022-07-29 Thread Alberto Faria

Extend static-analyzer.py's "coroutine_fn" check to enforce coroutine_fn
restrictions on function pointer operations.

Invalid operations include assigning a coroutine_fn value to a
non-coroutine_fn function pointer, and invoking a coroutine_fn function
pointer from a non-coroutine_fn function.

Signed-off-by: Alberto Faria 
---
 static_analyzer/__init__.py |  27 
 static_analyzer/coroutine_fn.py | 115 ++--
 2 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/static_analyzer/__init__.py b/static_analyzer/__init__.py
index 5abdbd21a3..90992d3500 100644
--- a/static_analyzer/__init__.py
+++ b/static_analyzer/__init__.py
@@ -24,6 +24,8 @@
 Cursor,
 CursorKind,
 SourceLocation,
+SourceRange,
+TokenGroup,
 TranslationUnit,
 TypeKind,
 conf,
@@ -117,6 +119,31 @@ def actual_visitor(node: Cursor, parent: Cursor, 
client_data: None) -> int:
 # Node predicates
 
 
+def is_binary_operator(node: Cursor, operator: str) -> bool:
+return (
+node.kind == CursorKind.BINARY_OPERATOR
+and get_binary_operator_spelling(node) == operator
+)
+
+
+def get_binary_operator_spelling(node: Cursor) -> Optional[str]:
+
+assert node.kind == CursorKind.BINARY_OPERATOR
+
+[left, right] = node.get_children()
+
+op_range = SourceRange.from_locations(left.extent.end, right.extent.start)
+
+tokens = list(TokenGroup.get_tokens(node.translation_unit, op_range))
+if not tokens:
+# Can occur when left and right children extents overlap due to
+# misparsing.
+return None
+
+[op_token, *_] = tokens
+return op_token.spelling
+
+
 def might_have_attribute(node: Cursor, attr: Union[CursorKind, str]) -> bool:
 """
 Check whether any of `node`'s children are an attribute of the given kind,
diff --git a/static_analyzer/coroutine_fn.py b/static_analyzer/coroutine_fn.py
index f70a3167eb..a16dcbeb52 100644
--- a/static_analyzer/coroutine_fn.py
+++ b/static_analyzer/coroutine_fn.py
@@ -8,6 +8,7 @@
 check,
 is_annotated_with,
 is_annotation,
+is_binary_operator,
 is_comma_wrapper,
 visit,
 )
@@ -22,6 +23,7 @@ def check_coroutine_fn(context: CheckContext) -> None:
 def visitor(node: Cursor) -> VisitorResult:
 
 validate_annotations(context, node)
+check_function_pointers(context, node)
 
 if node.kind == CursorKind.FUNCTION_DECL and node.is_definition():
 check_direct_calls(context, node)
@@ -91,6 +93,83 @@ def visitor(node: Cursor) -> VisitorResult:
 visit(caller, visitor)
 
 
+def check_function_pointers(context: CheckContext, node: Cursor) -> None:
+
+# What we would really like is to associate annotation attributes with 
types
+# directly, but that doesn't seem possible. Instead, we have to look at the
+# relevant variable/field/parameter declarations, and follow typedefs.
+
+# This doesn't check all possible ways of assigning to a coroutine_fn
+# field/variable/parameter. That would probably be too much work.
+
+# TODO: Check struct/union/array initialization.
+# TODO: Check assignment to struct/union/array fields.
+
+# check initialization of variables using coroutine_fn values
+
+if node.kind == CursorKind.VAR_DECL:
+
+children = [
+c
+for c in node.get_children()
+if c.kind
+not in [
+CursorKind.ANNOTATE_ATTR,
+CursorKind.INIT_LIST_EXPR,
+CursorKind.TYPE_REF,
+CursorKind.UNEXPOSED_ATTR,
+]
+]
+
+if (
+len(children) == 1
+and not is_coroutine_fn(node)
+and is_coroutine_fn(children[0])
+):
+context.report(node, "assigning coroutine_fn to non-coroutine_fn")
+
+# check initialization of fields using coroutine_fn values
+
+# TODO: This only checks designator initializers.
+
+if node.kind == CursorKind.INIT_LIST_EXPR:
+
+for initializer in filter(
+lambda n: n.kind == CursorKind.UNEXPOSED_EXPR,
+node.get_children(),
+):
+
+children = list(initializer.get_children())
+
+if (
+len(children) == 2
+and children[0].kind == CursorKind.MEMBER_REF
+and not is_coroutine_fn(children[0].referenced)
+and is_coroutine_fn(children[1])
+):
+context.report(
+initializer,
+"assigning coroutine_fn to non-coroutine_fn",
+)
+
+# check assignments of coroutine_fn values to variables or fields
+
+if is_binary_operator(node, "="):
+
+[left, right] = node.get_children()
+
+if (
+left.kind
+in [
+CursorKind.DECL_REF_EXPR,
+CursorKind.MEMBER_REF_EXPR,
+]
+and not

[RFC v2 09/10] block: Add no_coroutine_fn marker

2022-07-29 Thread Alberto Faria

When applied to a function, it advertises that it should not be called
from coroutine_fn functions.

Make generated_co_wrapper evaluate to no_coroutine_fn, as coroutine_fn
functions should instead directly call the coroutine_fn that backs the
generated_co_wrapper.

Add a "no_coroutine_fn" check to static-analyzer.py that enforces
no_coroutine_fn rules.

Signed-off-by: Alberto Faria 
---
 include/block/block-common.h   |   2 +-
 include/qemu/coroutine.h   |  12 
 static_analyzer/no_coroutine_fn.py | 111 +
 3 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 static_analyzer/no_coroutine_fn.py

diff --git a/include/block/block-common.h b/include/block/block-common.h
index fdb7306e78..4b60c8c6f2 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -42,7 +42,7 @@
  *
  * Read more in docs/devel/block-coroutine-wrapper.rst
  */
-#define generated_co_wrapper
+#define generated_co_wrapper no_coroutine_fn
 
 /* block.c */
 typedef struct BlockDriver BlockDriver;
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index 26445b3176..c61dd2d3f7 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -48,6 +48,18 @@
 #define coroutine_fn
 #endif
 
+/**
+ * Mark a function that should never be called from a coroutine context
+ *
+ * This typically means that there is an analogous, coroutine_fn function that
+ * should be used instead.
+ */
+#ifdef __clang__
+#define no_coroutine_fn __attribute__((__annotate__("no_coroutine_fn")))
+#else
+#define no_coroutine_fn
+#endif
+
 /**
  * This can wrap a call to a coroutine_fn from a non-coroutine_fn function and
  * suppress the static analyzer's complaints.
diff --git a/static_analyzer/no_coroutine_fn.py 
b/static_analyzer/no_coroutine_fn.py
new file mode 100644
index 00..1d0b93bb62
--- /dev/null
+++ b/static_analyzer/no_coroutine_fn.py
@@ -0,0 +1,111 @@
+#  
#
+
+from clang.cindex import Cursor, CursorKind  # type: ignore
+
+from static_analyzer import (
+CheckContext,
+VisitorResult,
+check,
+is_annotation,
+is_annotated_with,
+visit,
+)
+from static_analyzer.coroutine_fn import is_coroutine_fn
+
+#  
#
+
+
+@check("no_coroutine_fn")
+def check_no_coroutine_fn(context: CheckContext) -> None:
+"""Reports violations of no_coroutine_fn rules."""
+
+def visitor(node: Cursor) -> VisitorResult:
+
+validate_annotations(context, node)
+
+if node.kind == CursorKind.FUNCTION_DECL and node.is_definition():
+check_calls(context, node)
+return VisitorResult.CONTINUE
+
+return VisitorResult.RECURSE
+
+visit(context.translation_unit.cursor, visitor)
+
+
+def validate_annotations(context: CheckContext, node: Cursor) -> None:
+
+# validate annotation usage
+
+if is_annotation(node, "no_coroutine_fn") and (
+node.parent is None or not is_valid_no_coroutine_fn_usage(node.parent)
+):
+context.report(node, "invalid no_coroutine_fn usage")
+
+# reject re-declarations with inconsistent annotations
+
+if node.kind == CursorKind.FUNCTION_DECL and is_no_coroutine_fn(
+node
+) != is_no_coroutine_fn(node.canonical):
+
+context.report(
+node,
+f"no_coroutine_fn annotation disagreement with"
+f" {context.format_location(node.canonical)}",
+)
+
+
+def check_calls(context: CheckContext, caller: Cursor) -> None:
+"""
+Reject calls from coroutine_fn to no_coroutine_fn.
+
+Assumes that `caller` is a function definition.
+"""
+
+if is_coroutine_fn(caller):
+
+def visitor(node: Cursor) -> VisitorResult:
+
+# We can get some "calls" that are actually things like top-level
+# macro invocations for which `node.referenced` is None.
+
+if (
+node.kind == CursorKind.CALL_EXPR
+and node.referenced is not None
+and is_no_coroutine_fn(node.referenced.canonical)
+):
+context.report(
+node,
+f"coroutine_fn calls no_coroutine_fn function"
+f" {node.referenced.spelling}()",
+)
+
+return VisitorResult.RECURSE
+
+visit(caller, visitor)
+
+
+#  
#
+
+
+def is_valid_no_coroutine_fn_usage(parent: Cursor) -> bool:
+"""
+Checks if an occurrence of `no_coroutine_fn` represented by a node with
+parent `parent` appears at a valid point in the AST. This is the case if 
the
+parent is a function declaration/definition.
+"""
+
+return parent.kind == CursorKind.FUNCTION_DECL
+
+
+def is_no_coroutine_fn(node: Cursor) -> bool:
+"""
+Checks whether

[RFC v2 06/10] Fix some direct calls from non-coroutine_fn to coroutine_fn

2022-07-29 Thread Alberto Faria

In some cases we need to use a different function, in others we need to
make the caller a coroutine_fn, and in others still we need to wrap
calls to coroutines in __allow_coroutine_fn_call().

Also fix coroutine_fn annotation disagreements between several
declarations of the same function.

These problems were found by static-analyzer.py.

Not all occurrences of these problems were fixed.

Signed-off-by: Alberto Faria 
---
 block.c|  2 +-
 block/dirty-bitmap.c   |  6 --
 block/io.c | 18 +++---
 block/monitor/block-hmp-cmds.c |  2 +-
 block/nvme.c   |  3 ++-
 block/qcow2.c  | 14 +++---
 block/qcow2.h  | 14 +++---
 block/qed.c|  2 +-
 block/quorum.c |  2 +-
 block/ssh.c|  6 +++---
 block/throttle-groups.c|  3 ++-
 include/block/block-hmp-cmds.h |  2 +-
 include/block/block-io.h   |  5 +++--
 include/qemu/coroutine.h   | 18 ++
 14 files changed, 54 insertions(+), 43 deletions(-)

diff --git a/block.c b/block.c
index bc85f46eed..9f3814cbaa 100644
--- a/block.c
+++ b/block.c
@@ -561,7 +561,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
 
 if (qemu_in_coroutine()) {
 /* Fast-path if already in coroutine context */
-bdrv_create_co_entry();
+__allow_coroutine_fn_call(bdrv_create_co_entry());
 } else {
 co = qemu_coroutine_create(bdrv_create_co_entry, );
 qemu_coroutine_enter(co);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index bf3dc0512a..ccf46c0b1f 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -419,7 +419,8 @@ int bdrv_remove_persistent_dirty_bitmap(BlockDriverState 
*bs, const char *name,
 Error **errp)
 {
 if (qemu_in_coroutine()) {
-return bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp);
+return __allow_coroutine_fn_call(
+bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp));
 } else {
 Coroutine *co;
 BdrvRemovePersistentDirtyBitmapCo s = {
@@ -495,7 +496,8 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, 
const char *name,
 {
 IO_CODE();
 if (qemu_in_coroutine()) {
-return bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp);
+return __allow_coroutine_fn_call(
+bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp));
 } else {
 Coroutine *co;
 BdrvCanStoreNewDirtyBitmapCo s = {
diff --git a/block/io.c b/block/io.c
index 853ed44289..c2ed14cedb 100644
--- a/block/io.c
+++ b/block/io.c
@@ -449,8 +449,9 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, 
bool recursive,
 BdrvChild *child, *next;
 
 if (qemu_in_coroutine()) {
-bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
-   poll, NULL);
+__allow_coroutine_fn_call(
+bdrv_co_yield_to_drain(bs, true, recursive, parent,
+   ignore_bds_parents, poll, NULL));
 return;
 }
 
@@ -516,8 +517,10 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool 
recursive,
 assert(drained_end_counter != NULL);
 
 if (qemu_in_coroutine()) {
-bdrv_co_yield_to_drain(bs, false, recursive, parent, 
ignore_bds_parents,
-   false, drained_end_counter);
+__allow_coroutine_fn_call(
+bdrv_co_yield_to_drain(bs, false, recursive, parent,
+   ignore_bds_parents, false,
+   drained_end_counter));
 return;
 }
 assert(bs->quiesce_counter > 0);
@@ -643,7 +646,8 @@ void bdrv_drain_all_begin(void)
 GLOBAL_STATE_CODE();
 
 if (qemu_in_coroutine()) {
-bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
+__allow_coroutine_fn_call(
+bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL));
 return;
 }
 
@@ -2742,8 +2746,8 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState 
*bs, int64_t offset,
 return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
 }
 
-int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
-   int64_t bytes, int64_t *pnum)
+int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
+  int64_t *pnum)
 {
 int ret;
 int64_t dummy;
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index bfb3c043a0..b5ba9281f0 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -489,7 +489,7 @@ void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict)
 hmp_handle_error(mon, err);
 }
 
-void hmp_block_resize(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_block_resize(Monitor *mon,

[RFC v2 05/10] static-analyzer: Enforce coroutine_fn restrictions for direct calls

2022-07-29 Thread Alberto Faria

Add a "coroutine_fn" check to static-analyzer.py that ensures that
non-coroutine_fn functions don't perform direct calls to coroutine_fn
functions.

For the few cases where this must happen, introduce an
__allow_coroutine_fn_call() macro that wraps offending calls and
overrides the static analyzer.

Signed-off-by: Alberto Faria 
---
 include/qemu/coroutine.h|  13 +++
 static_analyzer/__init__.py |  46 -
 static_analyzer/coroutine_fn.py | 173 
 3 files changed, 231 insertions(+), 1 deletion(-)
 create mode 100644 static_analyzer/coroutine_fn.py

diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index 08c5bb3c76..40a4037525 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -42,7 +42,20 @@
  *   
  *   }
  */
+#ifdef __clang__
+#define coroutine_fn __attribute__((__annotate__("coroutine_fn")))
+#else
 #define coroutine_fn
+#endif
+
+/**
+ * This can wrap a call to a coroutine_fn from a non-coroutine_fn function and
+ * suppress the static analyzer's complaints.
+ *
+ * You don't want to use this.
+ */
+#define __allow_coroutine_fn_call(call) \
+((void)"__allow_coroutine_fn_call", call)
 
 typedef struct Coroutine Coroutine;
 
diff --git a/static_analyzer/__init__.py b/static_analyzer/__init__.py
index 36028724b1..5abdbd21a3 100644
--- a/static_analyzer/__init__.py
+++ b/static_analyzer/__init__.py
@@ -23,8 +23,9 @@
 from clang.cindex import (  # type: ignore
 Cursor,
 CursorKind,
-TranslationUnit,
 SourceLocation,
+TranslationUnit,
+TypeKind,
 conf,
 )
 
@@ -146,6 +147,49 @@ def matcher(n: Cursor) -> bool:
 return any(map(matcher, node.get_children()))
 
 
+def is_annotated_with(node: Cursor, annotation: str) -> bool:
+return any(is_annotation(c, annotation) for c in node.get_children())
+
+
+def is_annotation(node: Cursor, annotation: str) -> bool:
+return node.kind == CursorKind.ANNOTATE_ATTR and node.spelling == 
annotation
+
+
+def is_comma_wrapper(node: Cursor, literal: str) -> bool:
+"""
+Check if `node` is a "comma-wrapper" with the given string literal.
+
+A "comma-wrapper" is the pattern `((void)string_literal, expr)`. The `expr`
+is said to be "comma-wrapped".
+"""
+
+# TODO: Do we need to check that the operator is `,`? Is there another
+# operator that can combine void and an expr?
+
+if node.kind != CursorKind.BINARY_OPERATOR:
+return False
+
+[left, _right] = node.get_children()
+
+if (
+left.kind != CursorKind.CSTYLE_CAST_EXPR
+or left.type.kind != TypeKind.VOID
+):
+return False
+
+[unexposed_expr] = left.get_children()
+
+if unexposed_expr.kind != CursorKind.UNEXPOSED_EXPR:
+return False
+
+[string_literal] = unexposed_expr.get_children()
+
+return (
+string_literal.kind == CursorKind.STRING_LITERAL
+and string_literal.spelling == f'"{literal}"'
+)
+
+
 #  
#
 # Checks
 
diff --git a/static_analyzer/coroutine_fn.py b/static_analyzer/coroutine_fn.py
new file mode 100644
index 00..f70a3167eb
--- /dev/null
+++ b/static_analyzer/coroutine_fn.py
@@ -0,0 +1,173 @@
+#  
#
+
+from clang.cindex import Cursor, CursorKind, TypeKind  # type: ignore
+
+from static_analyzer import (
+CheckContext,
+VisitorResult,
+check,
+is_annotated_with,
+is_annotation,
+is_comma_wrapper,
+visit,
+)
+
+#  
#
+
+
+@check("coroutine_fn")
+def check_coroutine_fn(context: CheckContext) -> None:
+"""Reports violations of coroutine_fn rules."""
+
+def visitor(node: Cursor) -> VisitorResult:
+
+validate_annotations(context, node)
+
+if node.kind == CursorKind.FUNCTION_DECL and node.is_definition():
+check_direct_calls(context, node)
+return VisitorResult.CONTINUE
+
+return VisitorResult.RECURSE
+
+visit(context.translation_unit.cursor, visitor)
+
+
+def validate_annotations(context: CheckContext, node: Cursor) -> None:
+
+# validate annotation usage
+
+if is_annotation(node, "coroutine_fn") and (
+node.parent is None or not is_valid_coroutine_fn_usage(node.parent)
+):
+context.report(node, "invalid coroutine_fn usage")
+
+if is_comma_wrapper(
+node, "__allow_coroutine_fn_call"
+) and not is_valid_allow_coroutine_fn_call_usage(node):
+context.report(node, "invalid __allow_coroutine_fn_call usage")
+
+# reject re-declarations with inconsistent annotations
+
+if node.kind == CursorKind.FUNCTION_DECL and is_coroutine_fn(
+node
+) != is_coroutine_fn(node.canonical):
+context.report(
+node,
+f"coroutine_fn annotation disagreement with"
+f"

[RFC v2 04/10] static-analyzer: Avoid reanalyzing unmodified translation units

2022-07-29 Thread Alberto Faria

For each translation unit, run each check only if any of the translation
unit's files has been modified since the last time the check ran and
passed without reporting problems.

Signed-off-by: Alberto Faria 
---
 static-analyzer.py | 240 -
 1 file changed, 217 insertions(+), 23 deletions(-)

diff --git a/static-analyzer.py b/static-analyzer.py
index 16e331000d..140760a93e 100755
--- a/static-analyzer.py
+++ b/static-analyzer.py
@@ -25,6 +25,8 @@
 from traceback import print_exc
 from typing import (
 Callable,
+Dict,
+FrozenSet,
 Iterable,
 Iterator,
 List,
@@ -32,6 +34,7 @@
 NoReturn,
 Optional,
 Sequence,
+Tuple,
 Union,
 )
 
@@ -61,9 +64,19 @@ def parse_args() -> Namespace:
 build configuration. Note that a single .c file may give rise to
 several translation units.
 
+For each translation unit, each check is run only if any its files
+has been modified since the last time the check ran and passed
+without reporting problems.
+
 You should build QEMU before running this, since some translation
 units depend on files that are generated during the build. If you
 don't, you'll get errors, but should never get false negatives.
+Also, translation units that haven't been built will always be
+reanalyzed, even they haven't been modified, because we cant't know
+what their dependencies are until they are built. (TODO: This is
+rather annoying since `make all` does not actually build every
+translation unit. Should this script trigger an actual full build
+somehow as a first step?)
 """
 ),
 epilog=textwrap.dedent(
@@ -111,6 +124,16 @@ def parse_args() -> Namespace:
 ),
 )
 
+parser.add_argument(
+"-f",
+"--force",
+action="store_true",
+help=(
+"Analyze translation units even if they haven't been modified 
since"
+" the last analysis."
+),
+)
+
 parser.add_argument(
 "-j",
 "--jobs",
@@ -220,12 +243,17 @@ def test() -> int:
 
 def analyze(args: Namespace) -> int:
 
-tr_units = get_translation_units(args)
+analysis_timestamp = time.time()
+
+# load log and get translation units
+
+log = AnalysisLog.load(args.build_dir)
+(tr_units, num_up_to_date_tr_units) = get_translation_units(args, log)
 
 # analyze translation units
 
 start_time = time.monotonic()
-results: List[bool] = []
+results: List[AnalysisResults] = []
 
 if len(tr_units) == 1:
 progress_suffix = " of 1 translation unit...\033[0m\r"
@@ -237,7 +265,7 @@ def print_progress() -> None:
 
 print_progress()
 
-def collect_results(results_iter: Iterable[bool]) -> None:
+def collect_results(results_iter: Iterable[AnalysisResults]) -> None:
 if sys.stdout.isatty():
 for r in results_iter:
 results.append(r)
@@ -246,27 +274,41 @@ def collect_results(results_iter: Iterable[bool]) -> None:
 for r in results_iter:
 results.append(r)
 
-if tr_units:
+try:
 
-if args.threads == 1:
+if tr_units:
 
-collect_results(map(analyze_translation_unit, tr_units))
+if args.threads == 1:
 
-else:
+collect_results(map(analyze_translation_unit, tr_units))
 
-# Mimic Python's default pool.map() chunk size, but limit it to
-# 5 to avoid very big chunks when analyzing thousands of
-# translation units.
-chunk_size = min(5, -(-len(tr_units) // (args.threads * 4)))
+else:
 
-with Pool(processes=args.threads) as pool:
-collect_results(
-pool.imap_unordered(
-analyze_translation_unit, tr_units, chunk_size
+# Mimic Python's default pool.map() chunk size, but limit it to
+# 5 to avoid very big chunks when analyzing thousands of
+# translation units.
+chunk_size = min(5, -(-len(tr_units) // (args.threads * 4)))
+
+with Pool(processes=args.threads) as pool:
+collect_results(
+pool.imap_unordered(
+analyze_translation_unit, tr_units, chunk_size
+)
 )
-)
 
-end_time = time.monotonic()
+end_time = time.monotonic()
+
+finally:
+
+# update analysis timestamps for passed checks for each translation 
unit
+# (even if the static analyzer failed or was interrupted)
+
+for r in results:
+log.set_last_analysis_time(
+r.tr_unit_object_file, r.passed_check_names, analysis_timestamp
+)
+
+log.save()

1 2 >

1 - 100 of 137 matches

Mail list logo